2009-02-05 14:21:47 +00:00
|
|
|
/* This file is (c) 2008-2009 Konstantin Isakov <ikm@users.berlios.de>
|
2009-01-28 20:55:45 +00:00
|
|
|
* Part of GoldenDict. Licensed under GPLv3 or later, see the LICENSE file */
|
|
|
|
|
|
|
|
#ifndef __CHUNKEDSTORAGE_HH_INCLUDED__
|
|
|
|
#define __CHUNKEDSTORAGE_HH_INCLUDED__
|
|
|
|
|
|
|
|
#include "ex.hh"
|
|
|
|
#include "file.hh"
|
2009-04-29 23:18:26 +00:00
|
|
|
|
2009-01-28 20:55:45 +00:00
|
|
|
#include <vector>
|
|
|
|
|
2009-04-29 23:18:26 +00:00
|
|
|
#ifdef _MSC_VER
|
|
|
|
#include <stdint_msvc.h>
|
|
|
|
#endif
|
|
|
|
|
2009-01-28 20:55:45 +00:00
|
|
|
/// A chunked compression storage. We use this for articles' bodies. The idea
|
|
|
|
/// is to store data in a separately-compressed chunks, much like in dictzip,
|
|
|
|
/// but without any fancy gzip-compatibility or whatever. Another difference
|
|
|
|
/// is that any block of data saved is always contained without one chunk,
|
|
|
|
/// even if its size does exceed its maximum allowed size. This is very
|
|
|
|
/// handy since we're retrieving the data by the same blocks we used to save
|
|
|
|
/// it as, that' the only kind of seek we support, really.
|
|
|
|
namespace ChunkedStorage {
|
|
|
|
|
|
|
|
using std::vector;
|
|
|
|
|
|
|
|
DEF_EX( Ex, "Chunked storage exception", std::exception )
|
|
|
|
DEF_EX( exFailedToCompressChunk, "Failed to compress a chunk", Ex )
|
|
|
|
DEF_EX( exAddressOutOfRange, "The given chunked address is out of range", Ex )
|
|
|
|
DEF_EX( exFailedToDecompressChunk, "Failed to decompress a chunk", Ex )
|
|
|
|
|
|
|
|
/// This class writes data blocks in chunks.
|
|
|
|
class Writer
|
|
|
|
{
|
|
|
|
vector< uint32_t > offsets;
|
|
|
|
File::Class & file;
|
2009-04-14 16:35:47 +00:00
|
|
|
size_t scratchPadOffset, scratchPadSize;
|
2009-01-28 20:55:45 +00:00
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
Writer( File::Class & );
|
|
|
|
|
|
|
|
/// Starts new block. Returns its address.
|
|
|
|
uint32_t startNewBlock();
|
|
|
|
|
|
|
|
/// Add data to the previously started block.
|
|
|
|
void addToBlock( void const * data, size_t size );
|
|
|
|
|
|
|
|
/// Finishes writing chunks and returns the offset to the chunk table which
|
|
|
|
/// gets written at the moment of finishing.
|
|
|
|
uint32_t finish();
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
|
|
|
/// Indicates that an address was allocated, which would mean the writeout
|
|
|
|
/// of the pending chunk is required even if its size is zero.
|
|
|
|
bool chunkStarted;
|
|
|
|
|
|
|
|
// This buffer accumulates the chunk data until either enough data is
|
|
|
|
// stored (>=ChunkMaxSize), or there's no more data left to store.
|
|
|
|
vector< unsigned char > buffer;
|
|
|
|
|
|
|
|
// Here we compress the chunk before writing it out to file.
|
|
|
|
vector< unsigned char > bufferCompressed;
|
|
|
|
|
|
|
|
// The amount of data stored in buffer so far. We keep it separate
|
|
|
|
// from buffer.size() for performance reasons; the latter one only
|
|
|
|
// grows, but never shrinks.
|
|
|
|
size_t bufferUsed;
|
|
|
|
|
|
|
|
void saveCurrentChunk();
|
|
|
|
};
|
|
|
|
|
|
|
|
/// This class reads data blocks previously written by Writer.
|
|
|
|
class Reader
|
|
|
|
{
|
|
|
|
vector< uint32_t > offsets;
|
|
|
|
File::Class & file;
|
|
|
|
|
|
|
|
public:
|
|
|
|
/// Creates reader by giving it a file to read from and the offset returned
|
|
|
|
/// by Writer::finish().
|
|
|
|
Reader( File::Class &, uint32_t );
|
|
|
|
|
|
|
|
/// Reads the block previously written by Writer, identified by its address.
|
|
|
|
/// Uses the user-provided storage to load the entire chunk, and then to
|
|
|
|
/// return a pointer to the requested block inside it.
|
|
|
|
char * getBlock( uint32_t address, vector< char > & );
|
|
|
|
};
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|