mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 15:24:05 +00:00
752b880f24
* fix: double release memory
852 lines
26 KiB
C
852 lines
26 KiB
C
/* Made up from data.c and other supplementary files of dictd-1.0.11 for the
|
|
* GoldenDict program.
|
|
*/
|
|
|
|
/* data.c --
|
|
* Created: Tue Jul 16 12:45:41 1996 by faith@dict.org
|
|
* Revised: Sat Mar 30 10:46:06 2002 by faith@dict.org
|
|
* Copyright 1996, 1997, 1998, 2000, 2002 Rickard E. Faith (faith@dict.org)
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License as published by the
|
|
* Free Software Foundation; either version 1, or (at your option) any
|
|
* later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
* 51 Franklin Street, Suite 500, Boston, MA 02110, USA.
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <time.h>
|
|
#include "dictzip.hh"
|
|
#include <limits.h>
|
|
#include <stdarg.h>
|
|
#include <errno.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "ufile.hh"
|
|
|
|
#define BUFFERSIZE 10240
|
|
|
|
#define OUT_BUFFER_SIZE 0xffffL
|
|
|
|
#define IN_BUFFER_SIZE ( (unsigned long)( (double)( OUT_BUFFER_SIZE - 12 ) * 0.89 ) )
|
|
|
|
/* For gzip-compatible header, as defined in RFC 1952 */
|
|
|
|
/* Magic for GZIP (rfc1952) */
|
|
#define GZ_MAGIC1 0x1f /* First magic byte */
|
|
#define GZ_MAGIC2 0x8b /* Second magic byte */
|
|
|
|
/* FLaGs (bitmapped), from rfc1952 */
|
|
#define GZ_FTEXT 0x01 /* Set for ASCII text */
|
|
#define GZ_FHCRC 0x02 /* Header CRC16 */
|
|
#define GZ_FEXTRA 0x04 /* Optional field (random access index) */
|
|
#define GZ_FNAME 0x08 /* Original name */
|
|
#define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */
|
|
#define GZ_MAX 2 /* Maximum compression */
|
|
#define GZ_FAST 4 /* Fasted compression */
|
|
|
|
/* These are from rfc1952 */
|
|
#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
|
|
#define GZ_OS_AMIGA 1 /* Amiga */
|
|
#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
|
|
#define GZ_OS_UNIX 3 /* Unix */
|
|
#define GZ_OS_VMCMS 4 /* VM/CMS */
|
|
#define GZ_OS_ATARI 5 /* Atari TOS */
|
|
#define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */
|
|
#define GZ_OS_MAC 7 /* Macintosh */
|
|
#define GZ_OS_Z 8 /* Z-System */
|
|
#define GZ_OS_CPM 9 /* CP/M */
|
|
#define GZ_OS_TOPS20 10 /* TOPS-20 */
|
|
#define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */
|
|
#define GZ_OS_QDOS 12 /* QDOS */
|
|
#define GZ_OS_ACORN 13 /* Acorn RISCOS */
|
|
#define GZ_OS_UNKNOWN 255 /* unknown */
|
|
|
|
#define GZ_RND_S1 'R' /* First magic for random access format */
|
|
#define GZ_RND_S2 'A' /* Second magic for random access format */
|
|
|
|
#define GZ_ID1 0 /* GZ_MAGIC1 */
|
|
#define GZ_ID2 1 /* GZ_MAGIC2 */
|
|
#define GZ_CM 2 /* Compression Method (Z_DEFALTED) */
|
|
#define GZ_FLG 3 /* FLaGs (see above) */
|
|
#define GZ_MTIME 4 /* Modification TIME */
|
|
#define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */
|
|
#define GZ_OS 9 /* Operating System */
|
|
#define GZ_XLEN 10 /* eXtra LENgth (16bit) */
|
|
#define GZ_FEXTRA_START 12 /* Start of extra fields */
|
|
#define GZ_SI1 12 /* Subfield ID1 */
|
|
#define GZ_SI2 13 /* Subfield ID2 */
|
|
#define GZ_SUBLEN 14 /* Subfield length (16bit) */
|
|
#define GZ_VERSION 16 /* Version for subfield format */
|
|
#define GZ_CHUNKLEN 18 /* Chunk length (16bit) */
|
|
#define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */
|
|
#define GZ_RNDDATA 22 /* Random access data (16bit) */
|
|
|
|
|
|
#define DBG_VERBOSE ( 0 << 30 | 1 << 0 ) /* Verbose */
|
|
#define DBG_ZIP ( 0 << 30 | 1 << 1 ) /* Zip */
|
|
#define DBG_UNZIP ( 0 << 30 | 1 << 2 ) /* Unzip */
|
|
#define DBG_SEARCH ( 0 << 30 | 1 << 3 ) /* Search */
|
|
#define DBG_SCAN ( 0 << 30 | 1 << 4 ) /* Config file scan */
|
|
#define DBG_PARSE ( 0 << 30 | 1 << 5 ) /* Config file parse */
|
|
#define DBG_INIT ( 0 << 30 | 1 << 6 ) /* Database initialization */
|
|
#define DBG_PORT ( 0 << 30 | 1 << 7 ) /* Log port number for connections */
|
|
#define DBG_LEV ( 0 << 30 | 1 << 8 ) /* Levenshtein matching */
|
|
#define DBG_AUTH ( 0 << 30 | 1 << 9 ) /* Debug authentication */
|
|
#define DBG_NODETACH ( 0 << 30 | 1 << 10 ) /* Don't detach as a background proc. */
|
|
#define DBG_NOFORK ( 0 << 30 | 1 << 11 ) /* Don't fork (single threaded) */
|
|
#define DBG_ALT ( 0 << 30 | 1 << 12 ) /* altcompare() */
|
|
|
|
#define LOG_SERVER ( 0 << 30 | 1 << 0 ) /* Log server diagnostics */
|
|
#define LOG_CONNECT ( 0 << 30 | 1 << 1 ) /* Log connection information */
|
|
#define LOG_STATS ( 0 << 30 | 1 << 2 ) /* Log termination information */
|
|
#define LOG_COMMAND ( 0 << 30 | 1 << 3 ) /* Log commands */
|
|
#define LOG_FOUND ( 0 << 30 | 1 << 4 ) /* Log words found */
|
|
#define LOG_NOTFOUND ( 0 << 30 | 1 << 5 ) /* Log words not found */
|
|
#define LOG_CLIENT ( 0 << 30 | 1 << 6 ) /* Log client */
|
|
#define LOG_HOST ( 0 << 30 | 1 << 7 ) /* Log remote host name */
|
|
#define LOG_TIMESTAMP ( 0 << 30 | 1 << 8 ) /* Log with timestamps */
|
|
#define LOG_MIN ( 0 << 30 | 1 << 9 ) /* Log a few minimal things */
|
|
#define LOG_AUTH ( 0 << 30 | 1 << 10 ) /* Log authentication denials */
|
|
|
|
#define DICT_LOG_TERM 0
|
|
#define DICT_LOG_DEFINE 1
|
|
#define DICT_LOG_MATCH 2
|
|
#define DICT_LOG_NOMATCH 3
|
|
#define DICT_LOG_CLIENT 4
|
|
#define DICT_LOG_TRACE 5
|
|
#define DICT_LOG_COMMAND 6
|
|
#define DICT_LOG_AUTH 7
|
|
#define DICT_LOG_CONNECT 8
|
|
|
|
#define DICT_UNKNOWN 0
|
|
#define DICT_TEXT 1
|
|
#define DICT_GZIP 2
|
|
#define DICT_DZIP 3
|
|
|
|
#include <ctype.h>
|
|
#include <fcntl.h>
|
|
#include <assert.h>
|
|
|
|
#include <sys/stat.h>
|
|
|
|
#define USE_CACHE 1
|
|
|
|
#define dict_data_filter( ... )
|
|
#define PRINTF( ... )
|
|
|
|
#define xmalloc malloc
|
|
#define xfree free
|
|
|
|
static const char * _err_programName = "GoldenDict";
|
|
|
|
#define log_error( ... )
|
|
#define log_error_va( ... )
|
|
|
|
static void err_fatal( const char * routine, const char * format, ... )
|
|
{
|
|
va_list ap;
|
|
|
|
fflush( stdout );
|
|
if ( _err_programName ) {
|
|
if ( routine )
|
|
fprintf( stderr, "%s (%s): ", _err_programName, routine );
|
|
else
|
|
fprintf( stderr, "%s: ", _err_programName );
|
|
}
|
|
else {
|
|
if ( routine )
|
|
fprintf( stderr, "%s: ", routine );
|
|
}
|
|
|
|
va_start( ap, format );
|
|
vfprintf( stderr, format, ap );
|
|
log_error_va( routine, format, ap );
|
|
va_end( ap );
|
|
|
|
fflush( stderr );
|
|
fflush( stdout );
|
|
// exit ( 1 );
|
|
}
|
|
|
|
/* \doc |err_fatal_errno| flushes "stdout", prints a fatal error report on
|
|
"stderr", prints the system error corresponding to |errno|, flushes
|
|
"stderr" and "stdout", and calls |exit|. |routine| is the name of the
|
|
routine in which the error took place. */
|
|
|
|
static void err_fatal_errno( const char * routine, const char * format, ... )
|
|
{
|
|
va_list ap;
|
|
int errorno = errno;
|
|
|
|
fflush( stdout );
|
|
if ( _err_programName ) {
|
|
if ( routine )
|
|
fprintf( stderr, "%s (%s): ", _err_programName, routine );
|
|
else
|
|
fprintf( stderr, "%s: ", _err_programName );
|
|
}
|
|
else {
|
|
if ( routine )
|
|
fprintf( stderr, "%s: ", routine );
|
|
}
|
|
|
|
va_start( ap, format );
|
|
vfprintf( stderr, format, ap );
|
|
log_error_va( routine, format, ap );
|
|
va_end( ap );
|
|
|
|
#if HAVE_STRERROR
|
|
fprintf( stderr, "%s: %s\n", routine, strerror( errorno ) );
|
|
log_error( routine, "%s: %s\n", routine, strerror( errorno ) );
|
|
#else
|
|
errno = errorno;
|
|
perror( routine );
|
|
log_error( routine, "%s: errno = %d\n", routine, errorno );
|
|
#endif
|
|
|
|
fflush( stderr );
|
|
fflush( stdout );
|
|
// exit( 1 );
|
|
}
|
|
|
|
/* \doc |err_internal| flushes "stdout", prints the fatal error message,
|
|
flushes "stderr" and "stdout", and calls |abort| so that a core dump is
|
|
generated. */
|
|
|
|
static void err_internal( const char * routine, const char * format, ... )
|
|
{
|
|
va_list ap;
|
|
|
|
fflush( stdout );
|
|
if ( _err_programName ) {
|
|
if ( routine )
|
|
fprintf( stderr, "%s (%s): Internal error\n ", _err_programName, routine );
|
|
else
|
|
fprintf( stderr, "%s: Internal error\n ", _err_programName );
|
|
}
|
|
else {
|
|
if ( routine )
|
|
fprintf( stderr, "%s: Internal error\n ", routine );
|
|
else
|
|
fprintf( stderr, "Internal error\n " );
|
|
}
|
|
|
|
va_start( ap, format );
|
|
vfprintf( stderr, format, ap );
|
|
log_error( routine, format, ap );
|
|
va_end( ap );
|
|
|
|
if ( _err_programName )
|
|
fprintf( stderr, "Aborting %s...\n", _err_programName );
|
|
else
|
|
fprintf( stderr, "Aborting...\n" );
|
|
fflush( stderr );
|
|
fflush( stdout );
|
|
// abort();
|
|
}
|
|
|
|
#ifndef __func__
|
|
#ifdef __FUNCTION__
|
|
#define __func__ __FUNCTION__
|
|
#else
|
|
#define __func__ __FILE__
|
|
#endif
|
|
#endif
|
|
|
|
static enum DZ_ERRORS dict_read_header( const char * filename, dictData * header, int computeCRC )
|
|
{
|
|
FILE * str;
|
|
int id1, id2, si1, si2;
|
|
char buffer[ BUFFERSIZE ];
|
|
int extraLength, subLength;
|
|
int i;
|
|
char * pt;
|
|
int c;
|
|
struct stat sb;
|
|
unsigned long crc = crc32( 0L, Z_NULL, 0 );
|
|
int count;
|
|
unsigned long offset;
|
|
|
|
if ( !( str = gd_fopen( filename, "rb" ) ) ) {
|
|
err_fatal_errno( __func__, "Cannot open data file \"%s\" for read\n", filename );
|
|
return DZ_ERR_OPENFILE;
|
|
}
|
|
|
|
header->filename = NULL; //str_find( filename );
|
|
header->headerLength = GZ_XLEN - 1;
|
|
header->type = DICT_UNKNOWN;
|
|
|
|
id1 = getc( str );
|
|
id2 = getc( str );
|
|
|
|
if ( id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2 ) {
|
|
header->type = DICT_TEXT;
|
|
fstat( fileno( str ), &sb );
|
|
header->compressedLength = header->length = sb.st_size;
|
|
header->origFilename = NULL; //str_find( filename );
|
|
header->mtime = sb.st_mtime;
|
|
if ( computeCRC ) {
|
|
rewind( str );
|
|
while ( !feof( str ) ) {
|
|
if ( ( count = fread( buffer, 1, BUFFERSIZE, str ) ) ) {
|
|
crc = crc32( crc, (Bytef *)buffer, count );
|
|
}
|
|
}
|
|
}
|
|
header->crc = crc;
|
|
fclose( str );
|
|
return DZ_NOERROR;
|
|
}
|
|
header->type = DICT_GZIP;
|
|
|
|
header->method = getc( str );
|
|
header->flags = getc( str );
|
|
header->mtime = getc( str ) << 0;
|
|
header->mtime |= getc( str ) << 8;
|
|
header->mtime |= getc( str ) << 16;
|
|
header->mtime |= getc( str ) << 24;
|
|
header->extraFlags = getc( str );
|
|
header->os = getc( str );
|
|
|
|
if ( header->flags & GZ_FEXTRA ) {
|
|
extraLength = getc( str ) << 0;
|
|
extraLength |= getc( str ) << 8;
|
|
header->headerLength += extraLength + 2;
|
|
si1 = getc( str );
|
|
si2 = getc( str );
|
|
|
|
if ( si1 == GZ_RND_S1 && si2 == GZ_RND_S2 ) {
|
|
subLength = getc( str ) << 0;
|
|
subLength |= getc( str ) << 8;
|
|
header->version = getc( str ) << 0;
|
|
header->version |= getc( str ) << 8;
|
|
|
|
if ( header->version != 1 ) {
|
|
err_internal( __func__, "dzip header version %d not supported\n", header->version );
|
|
fclose( str );
|
|
return DZ_ERR_UNSUPPORTED_FORMAT;
|
|
}
|
|
|
|
header->chunkLength = getc( str ) << 0;
|
|
header->chunkLength |= getc( str ) << 8;
|
|
header->chunkCount = getc( str ) << 0;
|
|
header->chunkCount |= getc( str ) << 8;
|
|
|
|
if ( header->chunkCount <= 0 ) {
|
|
fclose( str );
|
|
return DZ_ERR_INVALID_FORMAT;
|
|
}
|
|
header->chunks = xmalloc( sizeof( header->chunks[ 0 ] ) * header->chunkCount );
|
|
if ( header->chunks == 0 ) {
|
|
fclose( str );
|
|
return DZ_ERR_NOMEMORY;
|
|
}
|
|
|
|
for ( i = 0; i < header->chunkCount; i++ ) {
|
|
header->chunks[ i ] = getc( str ) << 0;
|
|
header->chunks[ i ] |= getc( str ) << 8;
|
|
}
|
|
header->type = DICT_DZIP;
|
|
}
|
|
else {
|
|
fseek( str, header->headerLength, SEEK_SET );
|
|
}
|
|
}
|
|
|
|
if ( header->flags & GZ_FNAME ) { /* FIXME! Add checking against header len */
|
|
pt = buffer;
|
|
while ( ( c = getc( str ) ) && c != EOF ) {
|
|
*pt++ = c;
|
|
|
|
if ( pt == buffer + sizeof( buffer ) ) {
|
|
err_fatal( __func__, "too long FNAME field in dzip file \"%s\"\n", filename );
|
|
fclose( str );
|
|
if ( header->chunks ) {
|
|
free( header->chunks );
|
|
header->chunks = NULL;
|
|
}
|
|
return DZ_ERR_INVALID_FORMAT;
|
|
}
|
|
}
|
|
|
|
*pt = '\0';
|
|
header->origFilename = NULL; //str_find( buffer );
|
|
header->headerLength += strlen( buffer ) + 1;
|
|
}
|
|
else {
|
|
header->origFilename = NULL;
|
|
}
|
|
|
|
if ( header->flags & GZ_COMMENT ) { /* FIXME! Add checking for header len */
|
|
pt = buffer;
|
|
while ( ( c = getc( str ) ) && c != EOF ) {
|
|
*pt++ = c;
|
|
|
|
if ( pt == buffer + sizeof( buffer ) ) {
|
|
err_fatal( __func__, "too long COMMENT field in dzip file \"%s\"\n", filename );
|
|
fclose( str );
|
|
if ( header->chunks ) {
|
|
free( header->chunks );
|
|
header->chunks = NULL;
|
|
}
|
|
return DZ_ERR_INVALID_FORMAT;
|
|
}
|
|
}
|
|
|
|
*pt = '\0';
|
|
header->comment = NULL; //str_find( buffer );
|
|
header->headerLength += strlen( buffer ) + 1;
|
|
}
|
|
else {
|
|
header->comment = NULL;
|
|
}
|
|
|
|
if ( header->flags & GZ_FHCRC ) {
|
|
getc( str );
|
|
getc( str );
|
|
header->headerLength += 2;
|
|
}
|
|
|
|
if ( ftell( str ) != header->headerLength + 1 ) {
|
|
err_internal( __func__, "File position (%lu) != header length + 1 (%d)\n", ftell( str ), header->headerLength + 1 );
|
|
fclose( str );
|
|
if ( header->chunks ) {
|
|
free( header->chunks );
|
|
header->chunks = NULL;
|
|
}
|
|
return DZ_ERR_INVALID_FORMAT;
|
|
}
|
|
|
|
fseek( str, -8, SEEK_END );
|
|
header->crc = getc( str ) << 0;
|
|
header->crc |= getc( str ) << 8;
|
|
header->crc |= getc( str ) << 16;
|
|
header->crc |= getc( str ) << 24;
|
|
header->length = getc( str ) << 0;
|
|
header->length |= getc( str ) << 8;
|
|
header->length |= getc( str ) << 16;
|
|
header->length |= getc( str ) << 24;
|
|
header->compressedLength = ftell( str );
|
|
|
|
/* Compute offsets */
|
|
header->offsets = xmalloc( sizeof( header->offsets[ 0 ] ) * header->chunkCount );
|
|
if ( header->offsets == 0 ) {
|
|
if ( header->chunks ) {
|
|
free( header->chunks );
|
|
header->chunks = NULL;
|
|
}
|
|
fclose( str );
|
|
return DZ_ERR_NOMEMORY;
|
|
}
|
|
|
|
for ( offset = header->headerLength + 1, i = 0; i < header->chunkCount; i++ ) {
|
|
header->offsets[ i ] = offset;
|
|
offset += header->chunks[ i ];
|
|
}
|
|
|
|
fclose( str );
|
|
return DZ_NOERROR;
|
|
}
|
|
|
|
dictData * dict_data_open( const char * filename, enum DZ_ERRORS * error, int computeCRC )
|
|
{
|
|
dictData * h = NULL;
|
|
// struct stat sb;
|
|
int j;
|
|
|
|
if ( !filename ) {
|
|
*error = DZ_ERR_OPENFILE;
|
|
return NULL;
|
|
}
|
|
|
|
h = xmalloc( sizeof( struct dictData ) );
|
|
if ( h == 0 ) {
|
|
*error = DZ_ERR_NOMEMORY;
|
|
return 0;
|
|
}
|
|
|
|
memset( h, 0, sizeof( struct dictData ) );
|
|
#ifdef __WIN32
|
|
h->fd = INVALID_HANDLE_VALUE;
|
|
#endif
|
|
h->initialized = 0;
|
|
|
|
for ( ;; ) {
|
|
#ifdef __WIN32
|
|
wchar_t wname[ 16384 ];
|
|
#endif
|
|
*error = dict_read_header( filename, h, computeCRC );
|
|
if ( *error != DZ_NOERROR ) {
|
|
break; /*
|
|
err_fatal( __func__,
|
|
"\"%s\" not in text or dzip format\n", filename );*/
|
|
}
|
|
|
|
#ifdef __WIN32
|
|
if ( MultiByteToWideChar( CP_UTF8, 0, filename, -1, wname, 16384 ) == 0 ) {
|
|
*error = DZ_ERR_OPENFILE;
|
|
break;
|
|
}
|
|
|
|
h->fd = CreateFileW( wname,
|
|
GENERIC_READ,
|
|
FILE_SHARE_READ | FILE_SHARE_WRITE,
|
|
0,
|
|
OPEN_EXISTING,
|
|
FILE_FLAG_RANDOM_ACCESS,
|
|
0 );
|
|
if ( h->fd == INVALID_HANDLE_VALUE ) {
|
|
*error = DZ_ERR_OPENFILE;
|
|
break;
|
|
}
|
|
|
|
h->size = GetFileSize( h->fd, 0 );
|
|
#else
|
|
h->fd = gd_fopen( filename, "rb" );
|
|
|
|
if ( !h->fd ) {
|
|
*error = DZ_ERR_OPENFILE;
|
|
break;
|
|
/*err_fatal_errno( __func__,
|
|
"Cannot open data file \"%s\"\n", filename );*/
|
|
}
|
|
|
|
fseek( h->fd, 0, SEEK_END );
|
|
|
|
h->size = ftell( h->fd );
|
|
#endif
|
|
|
|
for ( j = 0; j < DICT_CACHE_SIZE; j++ ) {
|
|
h->cache[ j ].chunk = -1;
|
|
h->cache[ j ].stamp = -1;
|
|
h->cache[ j ].inBuffer = NULL;
|
|
h->cache[ j ].count = 0;
|
|
}
|
|
|
|
*error = DZ_NOERROR;
|
|
return h;
|
|
}
|
|
dict_data_close( h );
|
|
return ( 0 );
|
|
}
|
|
|
|
void dict_data_close( dictData * header )
|
|
{
|
|
int i;
|
|
|
|
if ( !header )
|
|
return;
|
|
|
|
#ifdef __WIN32
|
|
if ( header->fd != INVALID_HANDLE_VALUE )
|
|
CloseHandle( header->fd );
|
|
#else
|
|
if ( header->fd )
|
|
fclose( header->fd );
|
|
#endif
|
|
|
|
if ( header->chunks )
|
|
xfree( header->chunks );
|
|
if ( header->offsets )
|
|
xfree( header->offsets );
|
|
|
|
if ( header->initialized ) {
|
|
if ( inflateEnd( &header->zStream ) )
|
|
err_internal( __func__, "Cannot shut down inflation engine: %s\n", header->zStream.msg );
|
|
}
|
|
|
|
for ( i = 0; i < DICT_CACHE_SIZE; ++i ) {
|
|
if ( header->cache[ i ].inBuffer )
|
|
xfree( header->cache[ i ].inBuffer );
|
|
}
|
|
|
|
xfree( header );
|
|
}
|
|
|
|
char * dict_data_read_(
|
|
dictData * h, unsigned long start, unsigned long size, const char * preFilter, const char * postFilter )
|
|
{
|
|
char * buffer;
|
|
char * pt;
|
|
unsigned long end;
|
|
int count;
|
|
char * inBuffer;
|
|
char outBuffer[ OUT_BUFFER_SIZE ];
|
|
int firstChunk, lastChunk;
|
|
int firstOffset, lastOffset;
|
|
int i, j;
|
|
int found, target, lastStamp;
|
|
(void)preFilter;
|
|
(void)postFilter;
|
|
|
|
end = start + size;
|
|
|
|
buffer = xmalloc( size + 1 );
|
|
if ( !buffer ) {
|
|
strcpy( h->errorString, dz_error_str( DZ_ERR_NOMEMORY ) );
|
|
return 0;
|
|
}
|
|
|
|
if ( !size ) {
|
|
*buffer = 0;
|
|
return buffer;
|
|
}
|
|
|
|
PRINTF( DBG_UNZIP, ( "dict_data_read( %p, %lu, %lu, %s, %s )\n", h, start, size, preFilter, postFilter ) );
|
|
|
|
assert( h != NULL );
|
|
switch ( h->type ) {
|
|
case DICT_GZIP:
|
|
/*
|
|
err_fatal( __func__,
|
|
"Cannot seek on pure gzip format files.\n"
|
|
"Use plain text (for performance)"
|
|
" or dzip format (for space savings).\n" );
|
|
break;
|
|
*/
|
|
strcpy( h->errorString, "Cannot seek on pure gzip format files" );
|
|
xfree( buffer );
|
|
return 0;
|
|
case DICT_TEXT: {
|
|
#ifdef __WIN32
|
|
long hiPtr = 0;
|
|
DWORD pos = SetFilePointer( h->fd, start, &hiPtr, FILE_BEGIN );
|
|
DWORD readed = 0;
|
|
if ( pos != INVALID_SET_FILE_POINTER || GetLastError() != NO_ERROR )
|
|
ReadFile( h->fd, buffer, size, &readed, 0 );
|
|
if ( size != readed )
|
|
#else
|
|
if ( fseek( h->fd, start, SEEK_SET ) != 0 || fread( buffer, size, 1, h->fd ) != 1 )
|
|
#endif
|
|
{
|
|
strcpy( h->errorString, dz_error_str( DZ_ERR_READFILE ) );
|
|
xfree( buffer );
|
|
return 0;
|
|
}
|
|
|
|
buffer[ size ] = '\0';
|
|
} break;
|
|
case DICT_DZIP:
|
|
if ( !h->initialized ) {
|
|
h->zStream.zalloc = NULL;
|
|
h->zStream.zfree = NULL;
|
|
h->zStream.opaque = NULL;
|
|
h->zStream.next_in = 0;
|
|
h->zStream.avail_in = 0;
|
|
h->zStream.next_out = NULL;
|
|
h->zStream.avail_out = 0;
|
|
if ( inflateInit2( &h->zStream, -15 ) != Z_OK )
|
|
/*
|
|
err_internal( __func__,
|
|
"Cannot initialize inflation engine: %s\n",
|
|
h->zStream.msg );
|
|
*/
|
|
{
|
|
sprintf( h->errorString, "Cannot initialize inflation engine: %s", h->zStream.msg );
|
|
xfree( buffer );
|
|
return 0;
|
|
}
|
|
++h->initialized;
|
|
}
|
|
firstChunk = start / h->chunkLength;
|
|
firstOffset = start - firstChunk * h->chunkLength;
|
|
lastChunk = end / h->chunkLength;
|
|
lastOffset = end - lastChunk * h->chunkLength;
|
|
PRINTF( DBG_UNZIP,
|
|
( " start = %lu, end = %lu\n"
|
|
"firstChunk = %d, firstOffset = %d,"
|
|
" lastChunk = %d, lastOffset = %d\n",
|
|
start,
|
|
end,
|
|
firstChunk,
|
|
firstOffset,
|
|
lastChunk,
|
|
lastOffset ) );
|
|
for ( pt = buffer, i = firstChunk; i <= lastChunk; i++ ) {
|
|
|
|
/* Access cache */
|
|
found = 0;
|
|
target = 0;
|
|
lastStamp = INT_MAX;
|
|
for ( j = 0; j < DICT_CACHE_SIZE; j++ ) {
|
|
#if USE_CACHE
|
|
if ( h->cache[ j ].chunk == i ) {
|
|
found = 1;
|
|
target = j;
|
|
break;
|
|
}
|
|
#endif
|
|
if ( h->cache[ j ].stamp < lastStamp ) {
|
|
lastStamp = h->cache[ j ].stamp;
|
|
target = j;
|
|
}
|
|
}
|
|
|
|
h->cache[ target ].stamp = ++h->stamp;
|
|
if ( h->stamp < 0 ) {
|
|
h->stamp = 0;
|
|
for ( j = 0; j < DICT_CACHE_SIZE; j++ )
|
|
h->cache[ j ].stamp = -1;
|
|
}
|
|
if ( found ) {
|
|
count = h->cache[ target ].count;
|
|
inBuffer = h->cache[ target ].inBuffer;
|
|
}
|
|
else {
|
|
#ifdef __WIN32
|
|
DWORD pos;
|
|
DWORD readed;
|
|
#endif
|
|
h->cache[ target ].chunk = -1;
|
|
if ( !h->cache[ target ].inBuffer )
|
|
h->cache[ target ].inBuffer = xmalloc( h->chunkLength );
|
|
inBuffer = h->cache[ target ].inBuffer;
|
|
if ( !inBuffer ) {
|
|
strcpy( h->errorString, dz_error_str( DZ_ERR_NOMEMORY ) );
|
|
xfree( buffer );
|
|
return 0;
|
|
}
|
|
|
|
if ( h->chunks[ i ] >= OUT_BUFFER_SIZE ) {
|
|
/*
|
|
err_internal( __func__,
|
|
"h->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
|
|
i, h->chunks[i], OUT_BUFFER_SIZE );
|
|
*/
|
|
sprintf( h->errorString,
|
|
"h->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
|
|
i,
|
|
h->chunks[ i ],
|
|
OUT_BUFFER_SIZE );
|
|
xfree( buffer );
|
|
return 0;
|
|
}
|
|
|
|
#ifdef __WIN32
|
|
pos = SetFilePointer( h->fd, h->offsets[ i ], 0, FILE_BEGIN );
|
|
readed = 0;
|
|
if ( pos != INVALID_SET_FILE_POINTER || GetLastError() != NO_ERROR )
|
|
ReadFile( h->fd, outBuffer, h->chunks[ i ], &readed, 0 );
|
|
if ( h->chunks[ i ] != (int)readed )
|
|
#else
|
|
if ( fseek( h->fd, h->offsets[ i ], SEEK_SET ) != 0 || fread( outBuffer, h->chunks[ i ], 1, h->fd ) != 1 )
|
|
#endif
|
|
{
|
|
strcpy( h->errorString, dz_error_str( DZ_ERR_READFILE ) );
|
|
xfree( buffer );
|
|
return 0;
|
|
}
|
|
|
|
dict_data_filter( outBuffer, &count, OUT_BUFFER_SIZE, preFilter );
|
|
|
|
h->zStream.next_in = (Bytef *)outBuffer;
|
|
h->zStream.avail_in = h->chunks[ i ];
|
|
h->zStream.next_out = (Bytef *)inBuffer;
|
|
h->zStream.avail_out = h->chunkLength;
|
|
if ( inflate( &h->zStream, Z_PARTIAL_FLUSH ) != Z_OK ) {
|
|
// err_fatal( __func__, "inflate: %s\n", h->zStream.msg );
|
|
sprintf( h->errorString, "inflate: %s\n", h->zStream.msg );
|
|
xfree( buffer );
|
|
return 0;
|
|
}
|
|
if ( h->zStream.avail_in )
|
|
/*
|
|
err_internal( __func__,
|
|
"inflate did not flush (%d pending, %d avail)\n",
|
|
h->zStream.avail_in, h->zStream.avail_out );
|
|
*/
|
|
{
|
|
sprintf( h->errorString,
|
|
"inflate did not flush (%d pending, %d avail)\n",
|
|
h->zStream.avail_in,
|
|
h->zStream.avail_out );
|
|
xfree( buffer );
|
|
return 0;
|
|
}
|
|
|
|
count = h->chunkLength - h->zStream.avail_out;
|
|
dict_data_filter( inBuffer, &count, h->chunkLength, postFilter );
|
|
|
|
h->cache[ target ].count = count;
|
|
h->cache[ target ].chunk = i;
|
|
}
|
|
|
|
if ( i == firstChunk ) {
|
|
if ( i == lastChunk ) {
|
|
memcpy( pt, inBuffer + firstOffset, lastOffset - firstOffset );
|
|
pt += lastOffset - firstOffset;
|
|
}
|
|
else {
|
|
if ( count != h->chunkLength )
|
|
/*
|
|
err_internal( __func__,
|
|
"Length = %d instead of %d\n",
|
|
count, h->chunkLength );
|
|
*/
|
|
{
|
|
sprintf( h->errorString, "Length = %d instead of %d\n", count, h->chunkLength );
|
|
xfree( buffer );
|
|
return 0;
|
|
}
|
|
memcpy( pt, inBuffer + firstOffset, h->chunkLength - firstOffset );
|
|
pt += h->chunkLength - firstOffset;
|
|
}
|
|
}
|
|
else if ( i == lastChunk ) {
|
|
memcpy( pt, inBuffer, lastOffset );
|
|
pt += lastOffset;
|
|
}
|
|
else {
|
|
assert( count == h->chunkLength );
|
|
memcpy( pt, inBuffer, h->chunkLength );
|
|
pt += h->chunkLength;
|
|
}
|
|
}
|
|
*pt = '\0';
|
|
break;
|
|
case DICT_UNKNOWN:
|
|
// err_fatal( __func__, "Cannot read unknown file type\n" );
|
|
strcpy( h->errorString, "Cannot read unknown file type" );
|
|
xfree( buffer );
|
|
return 0;
|
|
}
|
|
h->errorString[ 0 ] = 0;
|
|
return buffer;
|
|
}
|
|
|
|
char * dict_error_str( dictData * data )
|
|
{
|
|
return data->errorString;
|
|
}
|
|
|
|
const char * dz_error_str( enum DZ_ERRORS error )
|
|
{
|
|
switch ( error ) {
|
|
case DZ_NOERROR:
|
|
return "No error";
|
|
case DZ_ERR_OPENFILE:
|
|
return "Open file error";
|
|
case DZ_ERR_READFILE:
|
|
return "Read file error";
|
|
case DZ_ERR_INVALID_FORMAT:
|
|
return "Invalid file format";
|
|
case DZ_ERR_UNSUPPORTED_FORMAT:
|
|
return "Unsupported file format";
|
|
case DZ_ERR_NOMEMORY:
|
|
return "Memory allocation error";
|
|
case DZ_ERR_INTERNAL:
|
|
return "Internal error";
|
|
}
|
|
return "Unknown error";
|
|
}
|