2009-01-28 20:55:45 +00:00
|
|
|
/* Made up from data.c and other supplementary files of dictd-1.0.11 for the
|
|
|
|
* GoldenDict program.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* data.c --
|
|
|
|
* Created: Tue Jul 16 12:45:41 1996 by faith@dict.org
|
|
|
|
* Revised: Sat Mar 30 10:46:06 2002 by faith@dict.org
|
|
|
|
* Copyright 1996, 1997, 1998, 2000, 2002 Rickard E. Faith (faith@dict.org)
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify it
|
|
|
|
* under the terms of the GNU General Public License as published by the
|
|
|
|
* Free Software Foundation; either version 1, or (at your option) any
|
|
|
|
* later version.
|
|
|
|
*
|
|
|
|
* This program is distributed in the hope that it will be useful, but
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
* General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License along
|
|
|
|
* with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
* 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
*/
|
|
|
|
|
2009-04-27 12:25:15 +00:00
|
|
|
#include <stdlib.h>
|
2009-01-28 20:55:45 +00:00
|
|
|
#include "dictzip.h"
|
|
|
|
#include <limits.h>
|
|
|
|
#include <stdarg.h>
|
|
|
|
#include <errno.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#define BUFFERSIZE 10240
|
|
|
|
|
|
|
|
#define OUT_BUFFER_SIZE 0xffffL
|
|
|
|
|
|
|
|
#define IN_BUFFER_SIZE ((unsigned long)((double)(OUT_BUFFER_SIZE - 12) * 0.89))
|
|
|
|
|
|
|
|
/* For gzip-compatible header, as defined in RFC 1952 */
|
|
|
|
|
|
|
|
/* Magic for GZIP (rfc1952) */
|
|
|
|
#define GZ_MAGIC1 0x1f /* First magic byte */
|
|
|
|
#define GZ_MAGIC2 0x8b /* Second magic byte */
|
|
|
|
|
|
|
|
/* FLaGs (bitmapped), from rfc1952 */
|
|
|
|
#define GZ_FTEXT 0x01 /* Set for ASCII text */
|
|
|
|
#define GZ_FHCRC 0x02 /* Header CRC16 */
|
|
|
|
#define GZ_FEXTRA 0x04 /* Optional field (random access index) */
|
|
|
|
#define GZ_FNAME 0x08 /* Original name */
|
|
|
|
#define GZ_COMMENT 0x10 /* Zero-terminated, human-readable comment */
|
|
|
|
#define GZ_MAX 2 /* Maximum compression */
|
|
|
|
#define GZ_FAST 4 /* Fasted compression */
|
|
|
|
|
|
|
|
/* These are from rfc1952 */
|
|
|
|
#define GZ_OS_FAT 0 /* FAT filesystem (MS-DOS, OS/2, NT/Win32) */
|
|
|
|
#define GZ_OS_AMIGA 1 /* Amiga */
|
|
|
|
#define GZ_OS_VMS 2 /* VMS (or OpenVMS) */
|
|
|
|
#define GZ_OS_UNIX 3 /* Unix */
|
|
|
|
#define GZ_OS_VMCMS 4 /* VM/CMS */
|
|
|
|
#define GZ_OS_ATARI 5 /* Atari TOS */
|
|
|
|
#define GZ_OS_HPFS 6 /* HPFS filesystem (OS/2, NT) */
|
|
|
|
#define GZ_OS_MAC 7 /* Macintosh */
|
|
|
|
#define GZ_OS_Z 8 /* Z-System */
|
|
|
|
#define GZ_OS_CPM 9 /* CP/M */
|
|
|
|
#define GZ_OS_TOPS20 10 /* TOPS-20 */
|
|
|
|
#define GZ_OS_NTFS 11 /* NTFS filesystem (NT) */
|
|
|
|
#define GZ_OS_QDOS 12 /* QDOS */
|
|
|
|
#define GZ_OS_ACORN 13 /* Acorn RISCOS */
|
|
|
|
#define GZ_OS_UNKNOWN 255 /* unknown */
|
|
|
|
|
|
|
|
#define GZ_RND_S1 'R' /* First magic for random access format */
|
|
|
|
#define GZ_RND_S2 'A' /* Second magic for random access format */
|
|
|
|
|
|
|
|
#define GZ_ID1 0 /* GZ_MAGIC1 */
|
|
|
|
#define GZ_ID2 1 /* GZ_MAGIC2 */
|
|
|
|
#define GZ_CM 2 /* Compression Method (Z_DEFALTED) */
|
|
|
|
#define GZ_FLG 3 /* FLaGs (see above) */
|
|
|
|
#define GZ_MTIME 4 /* Modification TIME */
|
|
|
|
#define GZ_XFL 8 /* eXtra FLags (GZ_MAX or GZ_FAST) */
|
|
|
|
#define GZ_OS 9 /* Operating System */
|
|
|
|
#define GZ_XLEN 10 /* eXtra LENgth (16bit) */
|
|
|
|
#define GZ_FEXTRA_START 12 /* Start of extra fields */
|
|
|
|
#define GZ_SI1 12 /* Subfield ID1 */
|
|
|
|
#define GZ_SI2 13 /* Subfield ID2 */
|
|
|
|
#define GZ_SUBLEN 14 /* Subfield length (16bit) */
|
|
|
|
#define GZ_VERSION 16 /* Version for subfield format */
|
|
|
|
#define GZ_CHUNKLEN 18 /* Chunk length (16bit) */
|
|
|
|
#define GZ_CHUNKCNT 20 /* Number of chunks (16bit) */
|
|
|
|
#define GZ_RNDDATA 22 /* Random access data (16bit) */
|
|
|
|
|
|
|
|
|
|
|
|
#define DBG_VERBOSE (0<<30|1<< 0) /* Verbose */
|
|
|
|
#define DBG_ZIP (0<<30|1<< 1) /* Zip */
|
|
|
|
#define DBG_UNZIP (0<<30|1<< 2) /* Unzip */
|
|
|
|
#define DBG_SEARCH (0<<30|1<< 3) /* Search */
|
|
|
|
#define DBG_SCAN (0<<30|1<< 4) /* Config file scan */
|
|
|
|
#define DBG_PARSE (0<<30|1<< 5) /* Config file parse */
|
|
|
|
#define DBG_INIT (0<<30|1<< 6) /* Database initialization */
|
|
|
|
#define DBG_PORT (0<<30|1<< 7) /* Log port number for connections */
|
|
|
|
#define DBG_LEV (0<<30|1<< 8) /* Levenshtein matching */
|
|
|
|
#define DBG_AUTH (0<<30|1<< 9) /* Debug authentication */
|
|
|
|
#define DBG_NODETACH (0<<30|1<<10) /* Don't detach as a background proc. */
|
|
|
|
#define DBG_NOFORK (0<<30|1<<11) /* Don't fork (single threaded) */
|
|
|
|
#define DBG_ALT (0<<30|1<<12) /* altcompare() */
|
|
|
|
|
|
|
|
#define LOG_SERVER (0<<30|1<< 0) /* Log server diagnostics */
|
|
|
|
#define LOG_CONNECT (0<<30|1<< 1) /* Log connection information */
|
|
|
|
#define LOG_STATS (0<<30|1<< 2) /* Log termination information */
|
|
|
|
#define LOG_COMMAND (0<<30|1<< 3) /* Log commands */
|
|
|
|
#define LOG_FOUND (0<<30|1<< 4) /* Log words found */
|
|
|
|
#define LOG_NOTFOUND (0<<30|1<< 5) /* Log words not found */
|
|
|
|
#define LOG_CLIENT (0<<30|1<< 6) /* Log client */
|
|
|
|
#define LOG_HOST (0<<30|1<< 7) /* Log remote host name */
|
|
|
|
#define LOG_TIMESTAMP (0<<30|1<< 8) /* Log with timestamps */
|
|
|
|
#define LOG_MIN (0<<30|1<< 9) /* Log a few minimal things */
|
|
|
|
#define LOG_AUTH (0<<30|1<<10) /* Log authentication denials */
|
|
|
|
|
|
|
|
#define DICT_LOG_TERM 0
|
|
|
|
#define DICT_LOG_DEFINE 1
|
|
|
|
#define DICT_LOG_MATCH 2
|
|
|
|
#define DICT_LOG_NOMATCH 3
|
|
|
|
#define DICT_LOG_CLIENT 4
|
|
|
|
#define DICT_LOG_TRACE 5
|
|
|
|
#define DICT_LOG_COMMAND 6
|
|
|
|
#define DICT_LOG_AUTH 7
|
|
|
|
#define DICT_LOG_CONNECT 8
|
|
|
|
|
|
|
|
#define DICT_UNKNOWN 0
|
|
|
|
#define DICT_TEXT 1
|
|
|
|
#define DICT_GZIP 2
|
|
|
|
#define DICT_DZIP 3
|
|
|
|
|
2009-02-02 00:44:38 +00:00
|
|
|
/* Always enable the mmap mode -- else it reads the whole file into memory! */
|
2009-01-28 20:55:45 +00:00
|
|
|
#define HAVE_MMAP
|
|
|
|
|
|
|
|
#include <sys/stat.h>
|
2009-02-02 00:44:38 +00:00
|
|
|
|
|
|
|
#ifndef __WIN32
|
2009-01-28 20:55:45 +00:00
|
|
|
#ifdef HAVE_MMAP
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#endif
|
2009-02-02 00:44:38 +00:00
|
|
|
#endif
|
|
|
|
|
2009-01-28 20:55:45 +00:00
|
|
|
#include <ctype.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
#include <sys/stat.h>
|
|
|
|
|
|
|
|
#define USE_CACHE 1
|
|
|
|
|
|
|
|
#ifdef HAVE_MMAP
|
|
|
|
int mmap_mode = 1; /* dictd uses mmap() function (the default) */
|
|
|
|
#else
|
|
|
|
int mmap_mode = 0;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define dict_data_filter( ... )
|
|
|
|
#define PRINTF( ... )
|
|
|
|
|
|
|
|
#define xmalloc malloc
|
|
|
|
#define xfree free
|
|
|
|
|
|
|
|
static const char * _err_programName = "GoldenDict";
|
|
|
|
|
|
|
|
#define log_error( ... )
|
|
|
|
#define log_error_va( ... )
|
|
|
|
|
|
|
|
static void err_fatal( const char *routine, const char *format, ... )
|
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
fflush( stdout );
|
|
|
|
if (_err_programName) {
|
|
|
|
if (routine)
|
|
|
|
fprintf( stderr, "%s (%s): ", _err_programName, routine );
|
|
|
|
else
|
|
|
|
fprintf( stderr, "%s: ", _err_programName );
|
|
|
|
} else {
|
|
|
|
if (routine) fprintf( stderr, "%s: ", routine );
|
|
|
|
}
|
|
|
|
|
|
|
|
va_start( ap, format );
|
|
|
|
vfprintf( stderr, format, ap );
|
|
|
|
log_error_va( routine, format, ap );
|
|
|
|
va_end( ap );
|
|
|
|
|
|
|
|
fflush( stderr );
|
|
|
|
fflush( stdout );
|
|
|
|
exit ( 1 );
|
|
|
|
}
|
|
|
|
|
|
|
|
/* \doc |err_fatal_errno| flushes "stdout", prints a fatal error report on
|
|
|
|
"stderr", prints the system error corresponding to |errno|, flushes
|
|
|
|
"stderr" and "stdout", and calls |exit|. |routine| is the name of the
|
|
|
|
routine in which the error took place. */
|
|
|
|
|
|
|
|
static void err_fatal_errno( const char *routine, const char *format, ... )
|
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
int errorno = errno;
|
|
|
|
|
|
|
|
fflush( stdout );
|
|
|
|
if (_err_programName) {
|
|
|
|
if (routine)
|
|
|
|
fprintf( stderr, "%s (%s): ", _err_programName, routine );
|
|
|
|
else
|
|
|
|
fprintf( stderr, "%s: ", _err_programName );
|
|
|
|
} else {
|
|
|
|
if (routine) fprintf( stderr, "%s: ", routine );
|
|
|
|
}
|
|
|
|
|
|
|
|
va_start( ap, format );
|
|
|
|
vfprintf( stderr, format, ap );
|
|
|
|
log_error_va( routine, format, ap );
|
|
|
|
va_end( ap );
|
|
|
|
|
|
|
|
#if HAVE_STRERROR
|
|
|
|
fprintf( stderr, "%s: %s\n", routine, strerror( errorno ) );
|
|
|
|
log_error( routine, "%s: %s\n", routine, strerror( errorno ) );
|
|
|
|
#else
|
|
|
|
errno = errorno;
|
|
|
|
perror( routine );
|
|
|
|
log_error( routine, "%s: errno = %d\n", routine, errorno );
|
|
|
|
#endif
|
|
|
|
|
|
|
|
fflush( stderr );
|
|
|
|
fflush( stdout );
|
|
|
|
exit( 1 );
|
|
|
|
}
|
|
|
|
|
|
|
|
/* \doc |err_internal| flushes "stdout", prints the fatal error message,
|
|
|
|
flushes "stderr" and "stdout", and calls |abort| so that a core dump is
|
|
|
|
generated. */
|
|
|
|
|
|
|
|
static void err_internal( const char *routine, const char *format, ... )
|
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
fflush( stdout );
|
|
|
|
if (_err_programName) {
|
|
|
|
if (routine)
|
|
|
|
fprintf( stderr, "%s (%s): Internal error\n ",
|
|
|
|
_err_programName, routine );
|
|
|
|
else
|
|
|
|
fprintf( stderr, "%s: Internal error\n ", _err_programName );
|
|
|
|
} else {
|
|
|
|
if (routine) fprintf( stderr, "%s: Internal error\n ", routine );
|
|
|
|
else fprintf( stderr, "Internal error\n " );
|
|
|
|
}
|
|
|
|
|
|
|
|
va_start( ap, format );
|
|
|
|
vfprintf( stderr, format, ap );
|
|
|
|
log_error( routine, format, ap );
|
|
|
|
va_end( ap );
|
|
|
|
|
|
|
|
if (_err_programName)
|
|
|
|
fprintf( stderr, "Aborting %s...\n", _err_programName );
|
|
|
|
else
|
|
|
|
fprintf( stderr, "Aborting...\n" );
|
|
|
|
fflush( stderr );
|
|
|
|
fflush( stdout );
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2009-04-29 23:18:26 +00:00
|
|
|
#ifndef __func__
|
|
|
|
# ifdef __FUNCTION__
|
|
|
|
# define __func__ __FUNCTION__
|
|
|
|
# else
|
|
|
|
# define __func__ __FILE__
|
|
|
|
# endif
|
|
|
|
#endif
|
|
|
|
|
2009-01-28 20:55:45 +00:00
|
|
|
static int dict_read_header( const char *filename,
|
|
|
|
dictData *header, int computeCRC )
|
|
|
|
{
|
|
|
|
FILE *str;
|
|
|
|
int id1, id2, si1, si2;
|
|
|
|
char buffer[BUFFERSIZE];
|
|
|
|
int extraLength, subLength;
|
|
|
|
int i;
|
|
|
|
char *pt;
|
|
|
|
int c;
|
|
|
|
struct stat sb;
|
|
|
|
unsigned long crc = crc32( 0L, Z_NULL, 0 );
|
|
|
|
int count;
|
|
|
|
unsigned long offset;
|
|
|
|
|
2009-02-02 00:44:38 +00:00
|
|
|
if (!(str = fopen( filename, "rb" )))
|
2009-01-28 20:55:45 +00:00
|
|
|
err_fatal_errno( __func__,
|
|
|
|
"Cannot open data file \"%s\" for read\n", filename );
|
|
|
|
|
|
|
|
header->filename = NULL;//str_find( filename );
|
|
|
|
header->headerLength = GZ_XLEN - 1;
|
|
|
|
header->type = DICT_UNKNOWN;
|
|
|
|
|
|
|
|
id1 = getc( str );
|
|
|
|
id2 = getc( str );
|
|
|
|
|
|
|
|
if (id1 != GZ_MAGIC1 || id2 != GZ_MAGIC2) {
|
|
|
|
header->type = DICT_TEXT;
|
|
|
|
fstat( fileno( str ), &sb );
|
|
|
|
header->compressedLength = header->length = sb.st_size;
|
|
|
|
header->origFilename = NULL;//str_find( filename );
|
|
|
|
header->mtime = sb.st_mtime;
|
|
|
|
if (computeCRC) {
|
|
|
|
rewind( str );
|
|
|
|
while (!feof( str )) {
|
|
|
|
if ((count = fread( buffer, 1, BUFFERSIZE, str ))) {
|
|
|
|
crc = crc32( crc, buffer, count );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
header->crc = crc;
|
|
|
|
fclose( str );
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
header->type = DICT_GZIP;
|
|
|
|
|
|
|
|
header->method = getc( str );
|
|
|
|
header->flags = getc( str );
|
|
|
|
header->mtime = getc( str ) << 0;
|
|
|
|
header->mtime |= getc( str ) << 8;
|
|
|
|
header->mtime |= getc( str ) << 16;
|
|
|
|
header->mtime |= getc( str ) << 24;
|
|
|
|
header->extraFlags = getc( str );
|
|
|
|
header->os = getc( str );
|
|
|
|
|
|
|
|
if (header->flags & GZ_FEXTRA) {
|
|
|
|
extraLength = getc( str ) << 0;
|
|
|
|
extraLength |= getc( str ) << 8;
|
|
|
|
header->headerLength += extraLength + 2;
|
|
|
|
si1 = getc( str );
|
|
|
|
si2 = getc( str );
|
|
|
|
|
|
|
|
if (si1 == GZ_RND_S1 && si2 == GZ_RND_S2) {
|
|
|
|
subLength = getc( str ) << 0;
|
|
|
|
subLength |= getc( str ) << 8;
|
|
|
|
header->version = getc( str ) << 0;
|
|
|
|
header->version |= getc( str ) << 8;
|
|
|
|
|
|
|
|
if (header->version != 1)
|
|
|
|
err_internal( __func__,
|
|
|
|
"dzip header version %d not supported\n",
|
|
|
|
header->version );
|
|
|
|
|
|
|
|
header->chunkLength = getc( str ) << 0;
|
|
|
|
header->chunkLength |= getc( str ) << 8;
|
|
|
|
header->chunkCount = getc( str ) << 0;
|
|
|
|
header->chunkCount |= getc( str ) << 8;
|
|
|
|
|
|
|
|
if (header->chunkCount <= 0) {
|
|
|
|
fclose( str );
|
|
|
|
return 5;
|
|
|
|
}
|
|
|
|
header->chunks = xmalloc( sizeof( header->chunks[0] )
|
|
|
|
* header->chunkCount );
|
|
|
|
for (i = 0; i < header->chunkCount; i++) {
|
|
|
|
header->chunks[i] = getc( str ) << 0;
|
|
|
|
header->chunks[i] |= getc( str ) << 8;
|
|
|
|
}
|
|
|
|
header->type = DICT_DZIP;
|
|
|
|
} else {
|
|
|
|
fseek( str, header->headerLength, SEEK_SET );
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (header->flags & GZ_FNAME) { /* FIXME! Add checking against header len */
|
|
|
|
pt = buffer;
|
|
|
|
while ((c = getc( str )) && c != EOF){
|
|
|
|
*pt++ = c;
|
|
|
|
|
|
|
|
if (pt == buffer + sizeof (buffer)){
|
|
|
|
err_fatal (
|
|
|
|
__func__,
|
|
|
|
"too long FNAME field in dzip file \"%s\"\n", filename);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*pt = '\0';
|
|
|
|
header->origFilename = NULL;//str_find( buffer );
|
|
|
|
header->headerLength += strlen( buffer ) + 1;
|
|
|
|
} else {
|
|
|
|
header->origFilename = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (header->flags & GZ_COMMENT) { /* FIXME! Add checking for header len */
|
|
|
|
pt = buffer;
|
|
|
|
while ((c = getc( str )) && c != EOF){
|
|
|
|
*pt++ = c;
|
|
|
|
|
|
|
|
if (pt == buffer + sizeof (buffer)){
|
|
|
|
err_fatal (
|
|
|
|
__func__,
|
|
|
|
"too long COMMENT field in dzip file \"%s\"\n", filename);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
*pt = '\0';
|
|
|
|
header->comment = NULL;//str_find( buffer );
|
|
|
|
header->headerLength += strlen( header->comment ) + 1;
|
|
|
|
} else {
|
|
|
|
header->comment = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (header->flags & GZ_FHCRC) {
|
|
|
|
getc( str );
|
|
|
|
getc( str );
|
|
|
|
header->headerLength += 2;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ftell( str ) != header->headerLength + 1)
|
|
|
|
err_internal( __func__,
|
|
|
|
"File position (%lu) != header length + 1 (%d)\n",
|
|
|
|
ftell( str ), header->headerLength + 1 );
|
|
|
|
|
|
|
|
fseek( str, -8, SEEK_END );
|
|
|
|
header->crc = getc( str ) << 0;
|
|
|
|
header->crc |= getc( str ) << 8;
|
|
|
|
header->crc |= getc( str ) << 16;
|
|
|
|
header->crc |= getc( str ) << 24;
|
|
|
|
header->length = getc( str ) << 0;
|
|
|
|
header->length |= getc( str ) << 8;
|
|
|
|
header->length |= getc( str ) << 16;
|
|
|
|
header->length |= getc( str ) << 24;
|
|
|
|
header->compressedLength = ftell( str );
|
|
|
|
|
|
|
|
/* Compute offsets */
|
|
|
|
header->offsets = xmalloc( sizeof( header->offsets[0] )
|
|
|
|
* header->chunkCount );
|
|
|
|
for (offset = header->headerLength + 1, i = 0;
|
|
|
|
i < header->chunkCount;
|
|
|
|
i++)
|
|
|
|
{
|
|
|
|
header->offsets[i] = offset;
|
|
|
|
offset += header->chunks[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
fclose( str );
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
dictData *dict_data_open( const char *filename, int computeCRC )
|
|
|
|
{
|
|
|
|
dictData *h = NULL;
|
|
|
|
struct stat sb;
|
|
|
|
int j;
|
|
|
|
|
|
|
|
if (!filename)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
h = xmalloc( sizeof( struct dictData ) );
|
|
|
|
|
|
|
|
memset( h, 0, sizeof( struct dictData ) );
|
|
|
|
h->initialized = 0;
|
|
|
|
|
|
|
|
if (dict_read_header( filename, h, computeCRC )) {
|
|
|
|
err_fatal( __func__,
|
|
|
|
"\"%s\" not in text or dzip format\n", filename );
|
|
|
|
}
|
2009-02-02 00:44:38 +00:00
|
|
|
|
|
|
|
#ifdef __WIN32
|
|
|
|
|
|
|
|
h->fileHandle = CreateFileA( filename, GENERIC_READ, FILE_SHARE_READ, 0,
|
|
|
|
OPEN_EXISTING, 0, 0 );
|
|
|
|
|
|
|
|
if ( h->fileHandle == INVALID_HANDLE_VALUE )
|
|
|
|
{
|
|
|
|
err_fatal_errno( __func__,
|
|
|
|
"Cannot open data file \"%s\"\n", filename );
|
|
|
|
|
|
|
|
xfree( h );
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
h->size = GetFileSize( h->fileHandle, 0 );
|
|
|
|
|
|
|
|
h->mappingHandle = CreateFileMapping( h->fileHandle, 0,
|
|
|
|
PAGE_READONLY, 0, h->size, 0 );
|
|
|
|
|
|
|
|
if ( !h->mappingHandle )
|
|
|
|
{
|
|
|
|
err_fatal_errno( __func__,
|
|
|
|
"Cannot create file mapping for data file \"%s\"\n", filename );
|
|
|
|
|
|
|
|
CloseHandle( h->fileHandle );
|
|
|
|
xfree( h );
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
h->start = MapViewOfFile( h->mappingHandle, FILE_MAP_READ, 0, 0, h->size );
|
|
|
|
|
|
|
|
if ( !h->start )
|
|
|
|
{
|
|
|
|
err_fatal_errno( __func__,
|
|
|
|
"Cannot map view of data file \"%s\"\n", filename );
|
|
|
|
|
|
|
|
CloseHandle( h->mappingHandle );
|
|
|
|
CloseHandle( h->fileHandle );
|
|
|
|
xfree( h );
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
#else
|
|
|
|
|
2009-01-28 20:55:45 +00:00
|
|
|
if ((h->fd = open( filename, O_RDONLY )) < 0)
|
|
|
|
err_fatal_errno( __func__,
|
|
|
|
"Cannot open data file \"%s\"\n", filename );
|
|
|
|
if (fstat( h->fd, &sb ))
|
|
|
|
err_fatal_errno( __func__,
|
|
|
|
"Cannot stat data file \"%s\"\n", filename );
|
|
|
|
h->size = sb.st_size;
|
|
|
|
|
|
|
|
if (mmap_mode){
|
|
|
|
#ifdef HAVE_MMAP
|
|
|
|
h->start = mmap( NULL, h->size, PROT_READ, MAP_SHARED, h->fd, 0 );
|
|
|
|
if ((void *)h->start == (void *)(-1))
|
|
|
|
err_fatal_errno(
|
|
|
|
__func__,
|
|
|
|
"Cannot mmap data file \"%s\"\n", filename );
|
|
|
|
#else
|
|
|
|
err_fatal (__func__, "This should not happen");
|
|
|
|
#endif
|
|
|
|
}else{
|
|
|
|
h->start = xmalloc (h->size);
|
|
|
|
if (-1 == read (h->fd, (char *) h->start, h->size))
|
|
|
|
err_fatal_errno (
|
|
|
|
__func__,
|
|
|
|
"Cannot read data file \"%s\"\n", filename );
|
|
|
|
|
|
|
|
close (h -> fd);
|
|
|
|
h -> fd = 0;
|
|
|
|
}
|
2009-02-02 00:44:38 +00:00
|
|
|
#endif
|
2009-01-28 20:55:45 +00:00
|
|
|
|
|
|
|
h->end = h->start + h->size;
|
|
|
|
|
|
|
|
for (j = 0; j < DICT_CACHE_SIZE; j++) {
|
|
|
|
h->cache[j].chunk = -1;
|
|
|
|
h->cache[j].stamp = -1;
|
|
|
|
h->cache[j].inBuffer = NULL;
|
|
|
|
h->cache[j].count = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return h;
|
|
|
|
}
|
|
|
|
|
|
|
|
void dict_data_close( dictData *header )
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (!header)
|
|
|
|
return;
|
|
|
|
|
2009-02-02 00:44:38 +00:00
|
|
|
#ifdef __WIN32
|
|
|
|
UnmapViewOfFile( header->start );
|
|
|
|
CloseHandle( header->mappingHandle );
|
|
|
|
CloseHandle( header->fileHandle );
|
|
|
|
#else
|
2009-01-28 20:55:45 +00:00
|
|
|
if (header->fd >= 0) {
|
|
|
|
if (mmap_mode){
|
|
|
|
#ifdef HAVE_MMAP
|
|
|
|
munmap( (void *)header->start, header->size );
|
|
|
|
close( header->fd );
|
|
|
|
header->fd = 0;
|
|
|
|
header->start = header->end = NULL;
|
|
|
|
#else
|
|
|
|
err_fatal (__func__, "This should not happen");
|
|
|
|
#endif
|
|
|
|
}else{
|
|
|
|
if (header -> start)
|
|
|
|
xfree ((char *) header -> start);
|
|
|
|
}
|
|
|
|
}
|
2009-02-02 00:44:38 +00:00
|
|
|
#endif
|
2009-01-28 20:55:45 +00:00
|
|
|
|
|
|
|
if (header->chunks) xfree( header->chunks );
|
|
|
|
if (header->offsets) xfree( header->offsets );
|
|
|
|
|
|
|
|
if (header->initialized) {
|
|
|
|
if (inflateEnd( &header->zStream ))
|
|
|
|
err_internal( __func__,
|
|
|
|
"Cannot shut down inflation engine: %s\n",
|
|
|
|
header->zStream.msg );
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < DICT_CACHE_SIZE; ++i){
|
|
|
|
if (header -> cache [i].inBuffer)
|
|
|
|
xfree (header -> cache [i].inBuffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
memset( header, 0, sizeof( struct dictData ) );
|
|
|
|
xfree( header );
|
|
|
|
}
|
|
|
|
|
|
|
|
char *dict_data_read_ (
|
|
|
|
dictData *h, unsigned long start, unsigned long size,
|
|
|
|
const char *preFilter, const char *postFilter )
|
|
|
|
{
|
|
|
|
char *buffer, *pt;
|
|
|
|
unsigned long end;
|
|
|
|
int count;
|
|
|
|
char *inBuffer;
|
|
|
|
char outBuffer[OUT_BUFFER_SIZE];
|
|
|
|
int firstChunk, lastChunk;
|
|
|
|
int firstOffset, lastOffset;
|
|
|
|
int i, j;
|
|
|
|
int found, target, lastStamp;
|
|
|
|
static int stamp = 0;
|
|
|
|
|
|
|
|
end = start + size;
|
|
|
|
|
|
|
|
buffer = xmalloc( size + 1 );
|
|
|
|
|
|
|
|
PRINTF(DBG_UNZIP,
|
|
|
|
("dict_data_read( %p, %lu, %lu, %s, %s )\n",
|
|
|
|
h, start, size, preFilter, postFilter ));
|
|
|
|
|
|
|
|
assert( h != NULL);
|
|
|
|
switch (h->type) {
|
|
|
|
case DICT_GZIP:
|
|
|
|
err_fatal( __func__,
|
|
|
|
"Cannot seek on pure gzip format files.\n"
|
|
|
|
"Use plain text (for performance)"
|
|
|
|
" or dzip format (for space savings).\n" );
|
|
|
|
break;
|
|
|
|
case DICT_TEXT:
|
|
|
|
memcpy( buffer, h->start + start, size );
|
|
|
|
buffer[size] = '\0';
|
|
|
|
break;
|
|
|
|
case DICT_DZIP:
|
|
|
|
if (!h->initialized) {
|
|
|
|
++h->initialized;
|
|
|
|
h->zStream.zalloc = NULL;
|
|
|
|
h->zStream.zfree = NULL;
|
|
|
|
h->zStream.opaque = NULL;
|
|
|
|
h->zStream.next_in = 0;
|
|
|
|
h->zStream.avail_in = 0;
|
|
|
|
h->zStream.next_out = NULL;
|
|
|
|
h->zStream.avail_out = 0;
|
|
|
|
if (inflateInit2( &h->zStream, -15 ) != Z_OK)
|
|
|
|
err_internal( __func__,
|
|
|
|
"Cannot initialize inflation engine: %s\n",
|
|
|
|
h->zStream.msg );
|
|
|
|
}
|
|
|
|
firstChunk = start / h->chunkLength;
|
|
|
|
firstOffset = start - firstChunk * h->chunkLength;
|
|
|
|
lastChunk = end / h->chunkLength;
|
|
|
|
lastOffset = end - lastChunk * h->chunkLength;
|
|
|
|
PRINTF(DBG_UNZIP,
|
|
|
|
(" start = %lu, end = %lu\n"
|
|
|
|
"firstChunk = %d, firstOffset = %d,"
|
|
|
|
" lastChunk = %d, lastOffset = %d\n",
|
|
|
|
start, end, firstChunk, firstOffset, lastChunk, lastOffset ));
|
|
|
|
for (pt = buffer, i = firstChunk; i <= lastChunk; i++) {
|
|
|
|
|
|
|
|
/* Access cache */
|
|
|
|
found = 0;
|
|
|
|
target = 0;
|
|
|
|
lastStamp = INT_MAX;
|
|
|
|
for (j = 0; j < DICT_CACHE_SIZE; j++) {
|
|
|
|
#if USE_CACHE
|
|
|
|
if (h->cache[j].chunk == i) {
|
|
|
|
found = 1;
|
|
|
|
target = j;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
if (h->cache[j].stamp < lastStamp) {
|
|
|
|
lastStamp = h->cache[j].stamp;
|
|
|
|
target = j;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
h->cache[target].stamp = ++stamp;
|
|
|
|
if (found) {
|
|
|
|
count = h->cache[target].count;
|
|
|
|
inBuffer = h->cache[target].inBuffer;
|
|
|
|
} else {
|
|
|
|
h->cache[target].chunk = i;
|
|
|
|
if (!h->cache[target].inBuffer)
|
|
|
|
h->cache[target].inBuffer = xmalloc( IN_BUFFER_SIZE );
|
|
|
|
inBuffer = h->cache[target].inBuffer;
|
|
|
|
|
|
|
|
if (h->chunks[i] >= OUT_BUFFER_SIZE ) {
|
|
|
|
err_internal( __func__,
|
|
|
|
"h->chunks[%d] = %d >= %ld (OUT_BUFFER_SIZE)\n",
|
|
|
|
i, h->chunks[i], OUT_BUFFER_SIZE );
|
|
|
|
}
|
|
|
|
memcpy( outBuffer, h->start + h->offsets[i], h->chunks[i] );
|
|
|
|
dict_data_filter( outBuffer, &count, OUT_BUFFER_SIZE, preFilter );
|
|
|
|
|
|
|
|
h->zStream.next_in = outBuffer;
|
|
|
|
h->zStream.avail_in = h->chunks[i];
|
|
|
|
h->zStream.next_out = inBuffer;
|
|
|
|
h->zStream.avail_out = IN_BUFFER_SIZE;
|
|
|
|
if (inflate( &h->zStream, Z_PARTIAL_FLUSH ) != Z_OK)
|
|
|
|
err_fatal( __func__, "inflate: %s\n", h->zStream.msg );
|
|
|
|
if (h->zStream.avail_in)
|
|
|
|
err_internal( __func__,
|
|
|
|
"inflate did not flush (%d pending, %d avail)\n",
|
|
|
|
h->zStream.avail_in, h->zStream.avail_out );
|
|
|
|
|
|
|
|
count = IN_BUFFER_SIZE - h->zStream.avail_out;
|
|
|
|
dict_data_filter( inBuffer, &count, IN_BUFFER_SIZE, postFilter );
|
|
|
|
|
|
|
|
h->cache[target].count = count;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i == firstChunk) {
|
|
|
|
if (i == lastChunk) {
|
|
|
|
memcpy( pt, inBuffer + firstOffset, lastOffset-firstOffset);
|
|
|
|
pt += lastOffset - firstOffset;
|
|
|
|
} else {
|
|
|
|
if (count != h->chunkLength )
|
|
|
|
err_internal( __func__,
|
|
|
|
"Length = %d instead of %d\n",
|
|
|
|
count, h->chunkLength );
|
|
|
|
memcpy( pt, inBuffer + firstOffset,
|
|
|
|
h->chunkLength - firstOffset );
|
|
|
|
pt += h->chunkLength - firstOffset;
|
|
|
|
}
|
|
|
|
} else if (i == lastChunk) {
|
|
|
|
memcpy( pt, inBuffer, lastOffset );
|
|
|
|
pt += lastOffset;
|
|
|
|
} else {
|
|
|
|
assert( count == h->chunkLength );
|
|
|
|
memcpy( pt, inBuffer, h->chunkLength );
|
|
|
|
pt += h->chunkLength;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*pt = '\0';
|
|
|
|
break;
|
|
|
|
case DICT_UNKNOWN:
|
|
|
|
err_fatal( __func__, "Cannot read unknown file type\n" );
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return buffer;
|
|
|
|
}
|