mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 15:24:05 +00:00
Change libhunspell to last official version 1.3.2
This commit is contained in:
parent
e41f8e9384
commit
a3e3ab2ad8
|
@ -1,13 +1,15 @@
|
|||
#ifndef _AFFIX_HXX_
|
||||
#define _AFFIX_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "baseaffix.hxx"
|
||||
#include "affixmgr.hxx"
|
||||
|
||||
/* A Prefix Entry */
|
||||
|
||||
class PfxEntry : public AffEntry
|
||||
class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry
|
||||
{
|
||||
AffixMgr* pmyMgr;
|
||||
|
||||
|
@ -63,7 +65,7 @@ public:
|
|||
|
||||
/* A Suffix Entry */
|
||||
|
||||
class SfxEntry : public AffEntry
|
||||
class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry
|
||||
{
|
||||
AffixMgr* pmyMgr;
|
||||
char * rappnd;
|
||||
|
@ -84,16 +86,16 @@ public:
|
|||
|
||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
||||
struct hentry * checkword(const char * word, int len, int optflags,
|
||||
AffEntry* ppfx, char ** wlst, int maxSug, int * ns,
|
||||
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
|
||||
// const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound=IN_CPD_NOT);
|
||||
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, const FLAG badflag = 0);
|
||||
|
||||
struct hentry * check_twosfx(const char * word, int len, int optflags, AffEntry* ppfx, const FLAG needflag = NULL);
|
||||
struct hentry * check_twosfx(const char * word, int len, int optflags, PfxEntry* ppfx, const FLAG needflag = NULL);
|
||||
|
||||
char * check_twosfx_morph(const char * word, int len, int optflags,
|
||||
AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
||||
PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
||||
struct hentry * get_next_homonym(struct hentry * he);
|
||||
struct hentry * get_next_homonym(struct hentry * word, int optflags, AffEntry* ppfx,
|
||||
struct hentry * get_next_homonym(struct hentry * word, int optflags, PfxEntry* ppfx,
|
||||
const FLAG cclass, const FLAG needflag);
|
||||
|
||||
|
||||
|
|
|
@ -1,14 +1,9 @@
|
|||
#ifndef _AFFIXMGR_HXX_
|
||||
#define _AFFIXMGR_HXX_
|
||||
|
||||
#ifdef MOZILLA_CLIENT
|
||||
#ifdef __SUNPRO_CC // for SunONE Studio compiler
|
||||
using namespace std;
|
||||
#endif
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#else
|
||||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "baseaffix.hxx"
|
||||
|
@ -20,13 +15,16 @@ using namespace std;
|
|||
#define dupSFX (1 << 0)
|
||||
#define dupPFX (1 << 1)
|
||||
|
||||
class AffixMgr
|
||||
class PfxEntry;
|
||||
class SfxEntry;
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED AffixMgr
|
||||
{
|
||||
|
||||
AffEntry * pStart[SETSIZE];
|
||||
AffEntry * sStart[SETSIZE];
|
||||
AffEntry * pFlag[SETSIZE];
|
||||
AffEntry * sFlag[SETSIZE];
|
||||
PfxEntry * pStart[SETSIZE];
|
||||
SfxEntry * sStart[SETSIZE];
|
||||
PfxEntry * pFlag[SETSIZE];
|
||||
SfxEntry * sFlag[SETSIZE];
|
||||
HashMgr * pHMgr;
|
||||
HashMgr ** alldic;
|
||||
int * maxdic;
|
||||
|
@ -50,6 +48,7 @@ class AffixMgr
|
|||
int simplifiedtriple;
|
||||
FLAG forbiddenword;
|
||||
FLAG nosuggest;
|
||||
FLAG nongramsuggest;
|
||||
FLAG needaffix;
|
||||
int cpdmin;
|
||||
int numrep;
|
||||
|
@ -67,6 +66,9 @@ class AffixMgr
|
|||
flagentry * defcpdtable;
|
||||
phonetable * phone;
|
||||
int maxngramsugs;
|
||||
int maxcpdsugs;
|
||||
int maxdiff;
|
||||
int onlymaxdiff;
|
||||
int nosplitsugs;
|
||||
int sugswithdots;
|
||||
int cpdwordmax;
|
||||
|
@ -79,8 +81,8 @@ class AffixMgr
|
|||
const char * sfxappnd; // BUG: not stateless
|
||||
FLAG sfxflag; // BUG: not stateless
|
||||
char * derived; // BUG: not stateless
|
||||
AffEntry * sfx; // BUG: not stateless
|
||||
AffEntry * pfx; // BUG: not stateless
|
||||
SfxEntry * sfx; // BUG: not stateless
|
||||
PfxEntry * pfx; // BUG: not stateless
|
||||
int checknum;
|
||||
char * wordchars;
|
||||
unsigned short * wordchars_utf16;
|
||||
|
@ -95,6 +97,9 @@ class AffixMgr
|
|||
FLAG circumfix;
|
||||
FLAG onlyincompound;
|
||||
FLAG keepcase;
|
||||
FLAG forceucase;
|
||||
FLAG warn;
|
||||
int forbidwarn;
|
||||
FLAG substandard;
|
||||
int checksharps;
|
||||
int fullstrip;
|
||||
|
@ -117,24 +122,24 @@ public:
|
|||
char in_compound, const FLAG needflag = FLAG_NULL);
|
||||
inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
|
||||
struct hentry * suffix_check(const char * word, int len, int sfxopts,
|
||||
AffEntry* ppfx, char ** wlst, int maxSug, int * ns,
|
||||
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
|
||||
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
struct hentry * suffix_check_twosfx(const char * word, int len,
|
||||
int sfxopts, AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
||||
int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
||||
|
||||
char * affix_check_morph(const char * word, int len,
|
||||
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
|
||||
char * prefix_check_morph(const char * word, int len,
|
||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
||||
char * suffix_check_morph (const char * word, int len, int sfxopts,
|
||||
AffEntry * ppfx, const FLAG cclass = FLAG_NULL,
|
||||
PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
|
||||
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
|
||||
|
||||
char * prefix_check_twosfx_morph(const char * word, int len,
|
||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
||||
char * suffix_check_twosfx_morph(const char * word, int len,
|
||||
int sfxopts, AffEntry * ppfx, const FLAG needflag = FLAG_NULL);
|
||||
int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
|
||||
|
||||
char * morphgen(char * ts, int wl, const unsigned short * ap,
|
||||
unsigned short al, char * morph, char * targetmorph, int level);
|
||||
|
@ -145,7 +150,8 @@ public:
|
|||
|
||||
short get_syllable (const char * word, int wlen);
|
||||
int cpdrep_check(const char * word, int len);
|
||||
int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2);
|
||||
int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
|
||||
const char affixed);
|
||||
int defcpd_check(hentry *** words, short wnum, hentry * rv,
|
||||
hentry ** rwords, char all);
|
||||
int cpdcase_check(const char * word, int len);
|
||||
|
@ -153,56 +159,62 @@ public:
|
|||
void setcminmax(int * cmin, int * cmax, const char * word, int len);
|
||||
struct hentry * compound_check(const char * word, int len, short wordnum,
|
||||
short numsyllable, short maxwordnum, short wnum, hentry ** words,
|
||||
char hu_mov_rule, char is_sug);
|
||||
char hu_mov_rule, char is_sug, int * info);
|
||||
|
||||
int compound_check_morph(const char * word, int len, short wordnum,
|
||||
short numsyllable, short maxwordnum, short wnum, hentry ** words,
|
||||
char hu_mov_rule, char ** result, char * partresult);
|
||||
|
||||
struct hentry * lookup(const char * word);
|
||||
int get_numrep();
|
||||
struct replentry * get_reptable();
|
||||
RepList * get_iconvtable();
|
||||
RepList * get_oconvtable();
|
||||
struct phonetable * get_phonetable();
|
||||
int get_nummap();
|
||||
struct mapentry * get_maptable();
|
||||
int get_numbreak();
|
||||
char ** get_breaktable();
|
||||
int get_numrep() const;
|
||||
struct replentry * get_reptable() const;
|
||||
RepList * get_iconvtable() const;
|
||||
RepList * get_oconvtable() const;
|
||||
struct phonetable * get_phonetable() const;
|
||||
int get_nummap() const;
|
||||
struct mapentry * get_maptable() const;
|
||||
int get_numbreak() const;
|
||||
char ** get_breaktable() const;
|
||||
char * get_encoding();
|
||||
int get_langnum();
|
||||
int get_langnum() const;
|
||||
char * get_key_string();
|
||||
char * get_try_string();
|
||||
const char * get_wordchars();
|
||||
unsigned short * get_wordchars_utf16(int * len);
|
||||
char * get_ignore();
|
||||
unsigned short * get_ignore_utf16(int * len);
|
||||
int get_compound();
|
||||
FLAG get_compoundflag();
|
||||
FLAG get_compoundbegin();
|
||||
FLAG get_forbiddenword();
|
||||
FLAG get_nosuggest();
|
||||
FLAG get_needaffix();
|
||||
FLAG get_onlyincompound();
|
||||
FLAG get_compoundroot();
|
||||
FLAG get_lemma_present();
|
||||
int get_checknum();
|
||||
char * get_possible_root();
|
||||
const char * get_prefix();
|
||||
const char * get_suffix();
|
||||
const char * get_derived();
|
||||
const char * get_version();
|
||||
const int have_contclass();
|
||||
int get_utf8();
|
||||
int get_complexprefixes();
|
||||
char * get_suffixed(char );
|
||||
int get_maxngramsugs();
|
||||
int get_nosplitsugs();
|
||||
int get_sugswithdots(void);
|
||||
FLAG get_keepcase(void);
|
||||
int get_checksharps(void);
|
||||
char * encode_flag(unsigned short aflag);
|
||||
int get_fullstrip();
|
||||
char * get_try_string() const;
|
||||
const char * get_wordchars() const;
|
||||
unsigned short * get_wordchars_utf16(int * len) const;
|
||||
char * get_ignore() const;
|
||||
unsigned short * get_ignore_utf16(int * len) const;
|
||||
int get_compound() const;
|
||||
FLAG get_compoundflag() const;
|
||||
FLAG get_compoundbegin() const;
|
||||
FLAG get_forbiddenword() const;
|
||||
FLAG get_nosuggest() const;
|
||||
FLAG get_nongramsuggest() const;
|
||||
FLAG get_needaffix() const;
|
||||
FLAG get_onlyincompound() const;
|
||||
FLAG get_compoundroot() const;
|
||||
FLAG get_lemma_present() const;
|
||||
int get_checknum() const;
|
||||
const char * get_prefix() const;
|
||||
const char * get_suffix() const;
|
||||
const char * get_derived() const;
|
||||
const char * get_version() const;
|
||||
int have_contclass() const;
|
||||
int get_utf8() const;
|
||||
int get_complexprefixes() const;
|
||||
char * get_suffixed(char ) const;
|
||||
int get_maxngramsugs() const;
|
||||
int get_maxcpdsugs() const;
|
||||
int get_maxdiff() const;
|
||||
int get_onlymaxdiff() const;
|
||||
int get_nosplitsugs() const;
|
||||
int get_sugswithdots(void) const;
|
||||
FLAG get_keepcase(void) const;
|
||||
FLAG get_forceucase(void) const;
|
||||
FLAG get_warn(void) const;
|
||||
int get_forbidwarn(void) const;
|
||||
int get_checksharps(void) const;
|
||||
char * encode_flag(unsigned short aflag) const;
|
||||
int get_fullstrip() const;
|
||||
|
||||
private:
|
||||
int parse_file(const char * affpath, const char * key);
|
||||
|
@ -221,13 +233,13 @@ private:
|
|||
void reverse_condition(char *);
|
||||
void debugflag(char * result, unsigned short flag);
|
||||
int condlen(char *);
|
||||
int encodeit(struct affentry * ptr, char * cs);
|
||||
int build_pfxtree(AffEntry* pfxptr);
|
||||
int build_sfxtree(AffEntry* sfxptr);
|
||||
int encodeit(affentry &entry, char * cs);
|
||||
int build_pfxtree(PfxEntry* pfxptr);
|
||||
int build_sfxtree(SfxEntry* sfxptr);
|
||||
int process_pfx_order();
|
||||
int process_sfx_order();
|
||||
AffEntry * process_pfx_in_order(AffEntry * ptr, AffEntry * nptr);
|
||||
AffEntry * process_sfx_in_order(AffEntry * ptr, AffEntry * nptr);
|
||||
PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
|
||||
SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
|
||||
int process_pfx_tree_to_list();
|
||||
int process_sfx_tree_to_list();
|
||||
int redundant_condition(char, char * strip, int stripl,
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
#define _ATYPES_HXX_
|
||||
|
||||
#ifndef HUNSPELL_WARNING
|
||||
#include <stdio.h>
|
||||
#ifdef HUNSPELL_WARNING_ON
|
||||
#define HUNSPELL_WARNING fprintf
|
||||
#else
|
||||
|
@ -34,6 +35,15 @@ static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {}
|
|||
#define IN_CPD_END 2
|
||||
#define IN_CPD_OTHER 3
|
||||
|
||||
// info options
|
||||
#define SPELL_COMPOUND (1 << 0)
|
||||
#define SPELL_FORBIDDEN (1 << 1)
|
||||
#define SPELL_ALLCAP (1 << 2)
|
||||
#define SPELL_NOCAP (1 << 3)
|
||||
#define SPELL_INITCAP (1 << 4)
|
||||
#define SPELL_ORIGCAP (1 << 5)
|
||||
#define SPELL_WARN (1 << 6)
|
||||
|
||||
#define MAXLNLEN 8192
|
||||
|
||||
#define MINCPDLEN 3
|
||||
|
@ -77,8 +87,7 @@ struct guessword {
|
|||
};
|
||||
|
||||
struct mapentry {
|
||||
char * set;
|
||||
w_char * set_utf16;
|
||||
char ** set;
|
||||
int len;
|
||||
};
|
||||
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
#ifndef _BASEAFF_HXX_
|
||||
#define _BASEAFF_HXX_
|
||||
|
||||
class AffEntry
|
||||
{
|
||||
public:
|
||||
#include "hunvisapi.h"
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED AffEntry
|
||||
{
|
||||
protected:
|
||||
char * appnd;
|
||||
char * strip;
|
||||
|
|
|
@ -1,9 +1,17 @@
|
|||
#ifndef __CSUTILHXX__
|
||||
#define __CSUTILHXX__
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
// First some base level utility routines
|
||||
|
||||
#include <string.h>
|
||||
#include "w_char.hxx"
|
||||
#include "htypes.hxx"
|
||||
|
||||
#ifdef MOZILLA_CLIENT
|
||||
#include "nscore.h" // for mozalloc headers
|
||||
#endif
|
||||
|
||||
// casing
|
||||
#define NOCAP 0
|
||||
|
@ -44,72 +52,62 @@
|
|||
#define FORBIDDENWORD 65510
|
||||
#define ONLYUPCASEFLAG 65511
|
||||
|
||||
// hash entry macros
|
||||
#define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
|
||||
get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : NULL)
|
||||
// NULL-free version for warning-free OOo build
|
||||
#define HENTRY_DATA2(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
|
||||
get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : "")
|
||||
#define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL)
|
||||
|
||||
#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
|
||||
|
||||
// convert UTF-16 characters to UTF-8
|
||||
char * u16_u8(char * dest, int size, const w_char * src, int srclen);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
|
||||
|
||||
// convert UTF-8 characters to UTF-16
|
||||
int u8_u16(w_char * dest, int size, const char * src);
|
||||
LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
|
||||
|
||||
// sort 2-byte vector
|
||||
void flag_qsort(unsigned short flags[], int begin, int end);
|
||||
LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
|
||||
|
||||
// binary search in 2-byte vector
|
||||
int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
|
||||
LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
|
||||
|
||||
// remove end of line char(s)
|
||||
void mychomp(char * s);
|
||||
LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
|
||||
|
||||
// duplicate string
|
||||
char * mystrdup(const char * s);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
|
||||
|
||||
// strcat for limited length destination string
|
||||
char * mystrcat(char * dest, const char * st, int max);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
|
||||
|
||||
// duplicate reverse of string
|
||||
char * myrevstrdup(const char * s);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
|
||||
|
||||
// parse into tokens with char delimiter
|
||||
char * mystrsep(char ** sptr, const char delim);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
|
||||
// parse into tokens with char delimiter
|
||||
char * mystrsep2(char ** sptr, const char delim);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
|
||||
|
||||
// parse into tokens with char delimiter
|
||||
char * mystrrep(char *, const char *, const char *);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
|
||||
|
||||
// append s to ends of every lines in text
|
||||
void strlinecat(char * lines, const char * s);
|
||||
LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
|
||||
|
||||
// tokenize into lines with new line
|
||||
int line_tok(const char * text, char *** lines, char breakchar);
|
||||
LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
|
||||
|
||||
// tokenize into lines with new line and uniq in place
|
||||
char * line_uniq(char * text, char breakchar);
|
||||
char * line_uniq_app(char ** text, char breakchar);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
|
||||
|
||||
// change oldchar to newchar in place
|
||||
char * tr(char * text, char oldc, char newc);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
|
||||
|
||||
// reverse word
|
||||
int reverseword(char *);
|
||||
LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
|
||||
|
||||
// reverse word
|
||||
int reverseword_utf(char *);
|
||||
LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
|
||||
|
||||
// remove duplicates
|
||||
int uniqlist(char ** list, int n);
|
||||
LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
|
||||
|
||||
// free character array list
|
||||
void freelist(char *** list, int n);
|
||||
LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
|
||||
|
||||
// character encoding information
|
||||
struct cs_info {
|
||||
|
@ -118,100 +116,105 @@ struct cs_info {
|
|||
unsigned char cupper;
|
||||
};
|
||||
|
||||
// Unicode character encoding information
|
||||
struct unicode_info {
|
||||
unsigned short c;
|
||||
unsigned short cupper;
|
||||
unsigned short clower;
|
||||
};
|
||||
LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
|
||||
LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
|
||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
|
||||
|
||||
struct unicode_info2 {
|
||||
char cletter;
|
||||
unsigned short cupper;
|
||||
unsigned short clower;
|
||||
};
|
||||
|
||||
int initialize_utf_tbl();
|
||||
void free_utf_tbl();
|
||||
unsigned short unicodetoupper(unsigned short c, int langnum);
|
||||
unsigned short unicodetolower(unsigned short c, int langnum);
|
||||
int unicodeisalpha(unsigned short c);
|
||||
|
||||
struct enc_entry {
|
||||
const char * enc_name;
|
||||
struct cs_info * cs_table;
|
||||
};
|
||||
|
||||
// language to encoding default map
|
||||
|
||||
struct lang_map {
|
||||
const char * lang;
|
||||
const char * def_enc;
|
||||
int num;
|
||||
};
|
||||
|
||||
struct cs_info * get_current_cs(const char * es);
|
||||
|
||||
const char * get_default_enc(const char * lang);
|
||||
LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
|
||||
|
||||
// get language identifiers of language codes
|
||||
int get_lang_num(const char * lang);
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
|
||||
|
||||
// get characters of the given 8bit encoding with lower- and uppercase forms
|
||||
char * get_casechars(const char * enc);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
|
||||
|
||||
// convert null terminated string to all caps using encoding
|
||||
void enmkallcap(char * d, const char * p, const char * encoding);
|
||||
LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
|
||||
|
||||
// convert null terminated string to all little using encoding
|
||||
void enmkallsmall(char * d, const char * p, const char * encoding);
|
||||
LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
|
||||
|
||||
// convert null terminated string to have intial capital using encoding
|
||||
void enmkinitcap(char * d, const char * p, const char * encoding);
|
||||
// convert null terminated string to have initial capital using encoding
|
||||
LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
|
||||
|
||||
// convert null terminated string to all caps
|
||||
void mkallcap(char * p, const struct cs_info * csconv);
|
||||
LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
|
||||
|
||||
// convert null terminated string to all little
|
||||
void mkallsmall(char * p, const struct cs_info * csconv);
|
||||
LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
|
||||
|
||||
// convert null terminated string to have intial capital
|
||||
void mkinitcap(char * p, const struct cs_info * csconv);
|
||||
// convert null terminated string to have initial capital
|
||||
LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
|
||||
|
||||
// convert first nc characters of UTF-8 string to little
|
||||
void mkallsmall_utf(w_char * u, int nc, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
|
||||
|
||||
// convert first nc characters of UTF-8 string to capital
|
||||
void mkallcap_utf(w_char * u, int nc, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
|
||||
|
||||
// get type of capitalization
|
||||
int get_captype(char * q, int nl, cs_info *);
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
|
||||
|
||||
// get type of capitalization (UTF-8)
|
||||
int get_captype_utf8(w_char * q, int nl, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
|
||||
|
||||
// strip all ignored characters in the string
|
||||
void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
|
||||
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
|
||||
|
||||
// strip all ignored characters in the string
|
||||
void remove_ignored_chars(char * word, char * ignored_chars);
|
||||
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
|
||||
|
||||
int parse_string(char * line, char ** out, int ln);
|
||||
LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
|
||||
|
||||
int parse_array(char * line, char ** out, unsigned short ** out_utf16,
|
||||
LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
|
||||
int * out_utf16_len, int utf8, int ln);
|
||||
|
||||
int fieldlen(const char * r);
|
||||
char * copy_field(char * dest, const char * morph, const char * var);
|
||||
LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
|
||||
|
||||
int morphcmp(const char * s, const char * t);
|
||||
LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
|
||||
|
||||
int get_sfxcount(const char * morph);
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
|
||||
|
||||
// conversion function for protected memory
|
||||
void store_pointer(char * dest, char * source);
|
||||
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
|
||||
|
||||
// conversion function for protected memory
|
||||
char * get_stored_pointer(char * s);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
|
||||
|
||||
// hash entry macros
|
||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
|
||||
{
|
||||
char *ret;
|
||||
if (!h->var)
|
||||
ret = NULL;
|
||||
else if (h->var & H_OPT_ALIASM)
|
||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||
else
|
||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// NULL-free version for warning-free OOo build
|
||||
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
|
||||
{
|
||||
const char *ret;
|
||||
if (!h->var)
|
||||
ret = "";
|
||||
else if (h->var & H_OPT_ALIASM)
|
||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||
else
|
||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
|
||||
{
|
||||
return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
|
||||
}
|
||||
|
||||
#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
#ifndef _DICTMGR_HXX_
|
||||
#define _DICTMGR_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#define MAXDICTIONARIES 100
|
||||
#define MAXDICTENTRYLEN 1024
|
||||
|
||||
|
@ -11,7 +13,7 @@ struct dictentry {
|
|||
};
|
||||
|
||||
|
||||
class DictMgr
|
||||
class LIBHUNSPELL_DLL_EXPORTED DictMgr
|
||||
{
|
||||
|
||||
int numdict;
|
||||
|
|
|
@ -1,9 +1,13 @@
|
|||
/* file manager class - read lines of files [filename] OR [filename.hz] */
|
||||
#ifndef _FILEMGR_HXX_
|
||||
#define _FILEMGR_HXX_
|
||||
#include "hunzip.hxx"
|
||||
|
||||
class FileMgr
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include "hunzip.hxx"
|
||||
#include <stdio.h>
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED FileMgr
|
||||
{
|
||||
protected:
|
||||
FILE * fin;
|
||||
|
|
|
@ -1,18 +1,16 @@
|
|||
#ifndef _HASHMGR_HXX_
|
||||
#define _HASHMGR_HXX_
|
||||
|
||||
#ifndef MOZILLA_CLIENT
|
||||
#include <cstdio>
|
||||
#else
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include "htypes.hxx"
|
||||
#include "filemgr.hxx"
|
||||
|
||||
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
|
||||
|
||||
class HashMgr
|
||||
class LIBHUNSPELL_DLL_EXPORTED HashMgr
|
||||
{
|
||||
int tablesize;
|
||||
struct hentry ** tableptr;
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#define H_OPT_PHON (1 << 2)
|
||||
|
||||
// see also csutil.hxx
|
||||
#define HENTRY_WORD(h) &(h->word)
|
||||
#define HENTRY_WORD(h) &(h->word[0])
|
||||
|
||||
// approx. number of user defined words
|
||||
#define USERWORD 1000
|
||||
|
@ -26,7 +26,7 @@ struct hentry
|
|||
struct hentry * next; // next word with same hash code
|
||||
struct hentry * next_homonym; // next homonym word (with same hash code)
|
||||
char var; // variable fields (only for special pronounciation yet)
|
||||
char word; // variable-length word (8-bit or UTF-8 encoding)
|
||||
char word[1]; // variable-length word (8-bit or UTF-8 encoding)
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,32 +1,27 @@
|
|||
#ifndef _MYSPELLMGR_H_
|
||||
#define _MYSPELLMGR_H_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct Hunhandle Hunhandle;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define DLL __declspec ( dllexport )
|
||||
#else
|
||||
#define DLL
|
||||
#endif
|
||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
|
||||
|
||||
|
||||
DLL Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
|
||||
|
||||
DLL Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
|
||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
|
||||
const char * key);
|
||||
|
||||
DLL void Hunspell_destroy(Hunhandle *pHunspell);
|
||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle *pHunspell);
|
||||
|
||||
/* spell(word) - spellcheck word
|
||||
* output: 0 = bad word, not 0 = good word
|
||||
*/
|
||||
DLL int Hunspell_spell(Hunhandle *pHunspell, const char *);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle *pHunspell, const char *);
|
||||
|
||||
DLL char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
|
||||
LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
|
||||
|
||||
/* suggest(suggestions, word) - search suggestions
|
||||
* input: pointer to an array of strings pointer and the (bad) word
|
||||
|
@ -35,17 +30,17 @@ DLL char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
|
|||
* a newly allocated array of strings (*slts will be NULL when number
|
||||
* of suggestion equals 0.)
|
||||
*/
|
||||
DLL int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
|
||||
/* morphological functions */
|
||||
|
||||
/* analyze(result, word) - morphological analysis of the word */
|
||||
|
||||
DLL int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
|
||||
/* stem(result, word) - stemmer function */
|
||||
|
||||
DLL int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
|
||||
/* stem(result, analysis, n) - get stems from a morph. analysis
|
||||
* example:
|
||||
|
@ -54,11 +49,11 @@ DLL int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
|
|||
* int n2 = Hunspell_stem2(result2, result, n1);
|
||||
*/
|
||||
|
||||
DLL int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
|
||||
|
||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
||||
|
||||
DLL int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
|
||||
const char * word2);
|
||||
|
||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
||||
|
@ -69,29 +64,29 @@ DLL int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
|
|||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
||||
*/
|
||||
|
||||
DLL int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
|
||||
char** desc, int n);
|
||||
|
||||
/* functions for run-time modification of the dictionary */
|
||||
|
||||
/* add word to the run-time dictionary */
|
||||
|
||||
DLL int Hunspell_add(Hunhandle *pHunspell, const char * word);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle *pHunspell, const char * word);
|
||||
|
||||
/* add word to the run-time dictionary with affix flags of
|
||||
* the example (a dictionary word): Hunspell will recognize
|
||||
* affixed forms of the new word, too.
|
||||
*/
|
||||
|
||||
DLL int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
|
||||
|
||||
/* remove word from the run-time dictionary */
|
||||
|
||||
DLL int Hunspell_remove(Hunhandle *pHunspell, const char * word);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle *pHunspell, const char * word);
|
||||
|
||||
/* free suggestion lists */
|
||||
|
||||
DLL void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n);
|
||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -1,41 +1,23 @@
|
|||
#include "hunvisapi.h"
|
||||
|
||||
#include "hashmgr.hxx"
|
||||
#include "affixmgr.hxx"
|
||||
#include "suggestmgr.hxx"
|
||||
#include "langnum.hxx"
|
||||
|
||||
#define SPELL_COMPOUND (1 << 0)
|
||||
#define SPELL_FORBIDDEN (1 << 1)
|
||||
#define SPELL_ALLCAP (1 << 2)
|
||||
#define SPELL_NOCAP (1 << 3)
|
||||
#define SPELL_INITCAP (1 << 4)
|
||||
|
||||
#define SPELL_XML "<?xml?>"
|
||||
|
||||
#define MAXDIC 20
|
||||
#define MAXSUGGESTION 15
|
||||
#define MAXSHARPS 5
|
||||
|
||||
#define HUNSPELL_OK (1 << 0)
|
||||
#define HUNSPELL_OK_WARN (1 << 1)
|
||||
|
||||
#ifndef _MYSPELLMGR_HXX_
|
||||
#define _MYSPELLMGR_HXX_
|
||||
|
||||
#ifdef HUNSPELL_STATIC
|
||||
#define DLLEXPORT
|
||||
#else
|
||||
#ifdef HUNSPELL_EXPORTS
|
||||
#define DLLEXPORT __declspec( dllexport )
|
||||
#else
|
||||
#define DLLEXPORT __declspec( dllimport )
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#undef DLLEXPORT
|
||||
#define DLLEXPORT
|
||||
|
||||
#ifdef WIN32
|
||||
class DLLEXPORT Hunspell
|
||||
#else
|
||||
class Hunspell
|
||||
#endif
|
||||
class LIBHUNSPELL_DLL_EXPORTED Hunspell
|
||||
{
|
||||
AffixMgr* pAMgr;
|
||||
HashMgr* pHMgr[MAXDIC];
|
||||
|
@ -147,6 +129,8 @@ public:
|
|||
|
||||
struct cs_info * get_csconv();
|
||||
const char * get_version();
|
||||
|
||||
int get_langnum() const;
|
||||
|
||||
/* experimental and deprecated functions */
|
||||
|
||||
|
@ -158,7 +142,6 @@ public:
|
|||
/* spec. suggestions */
|
||||
int suggest_auto(char*** slst, const char * word);
|
||||
int suggest_pos_stems(char*** slst, const char * word);
|
||||
char * get_possible_root();
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
|
18
winlibs/include/hunspell/hunvisapi.h
Normal file
18
winlibs/include/hunspell/hunvisapi.h
Normal file
|
@ -0,0 +1,18 @@
|
|||
#ifndef _HUNSPELL_VISIBILITY_H_
|
||||
#define _HUNSPELL_VISIBILITY_H_
|
||||
|
||||
#if defined(HUNSPELL_STATIC)
|
||||
# define LIBHUNSPELL_DLL_EXPORTED
|
||||
#elif defined(_MSC_VER)
|
||||
# if defined(BUILDING_LIBHUNSPELL)
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport)
|
||||
# else
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
||||
# endif
|
||||
#elif BUILDING_LIBHUNSPELL && 1
|
||||
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
||||
#else
|
||||
# define LIBHUNSPELL_DLL_EXPORTED
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -4,6 +4,10 @@
|
|||
#ifndef _HUNZIP_HXX_
|
||||
#define _HUNZIP_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define BUFSIZE 65536
|
||||
#define HZIP_EXTENSION ".hz"
|
||||
|
||||
|
@ -17,7 +21,7 @@ struct bit {
|
|||
int v[2];
|
||||
};
|
||||
|
||||
class Hunzip
|
||||
class LIBHUNSPELL_DLL_EXPORTED Hunzip
|
||||
{
|
||||
|
||||
protected:
|
||||
|
|
|
@ -34,6 +34,8 @@
|
|||
#define MAXPHONETLEN 256
|
||||
#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
struct phonetable {
|
||||
char utf8;
|
||||
cs_info * lang;
|
||||
|
@ -42,9 +44,9 @@ struct phonetable {
|
|||
int hash[HASHSIZE];
|
||||
};
|
||||
|
||||
void init_phonet_hash(phonetable & parms);
|
||||
LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable & parms);
|
||||
|
||||
int phonet (const char * inword, char * target,
|
||||
LIBHUNSPELL_DLL_EXPORTED int phonet (const char * inword, char * target,
|
||||
int len, phonetable & phone);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,9 +1,12 @@
|
|||
/* string replacement list class */
|
||||
#ifndef _REPLIST_HXX_
|
||||
#define _REPLIST_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include "w_char.hxx"
|
||||
|
||||
class RepList
|
||||
class LIBHUNSPELL_DLL_EXPORTED RepList
|
||||
{
|
||||
protected:
|
||||
replentry ** dat;
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#define MAX_GUESS 200
|
||||
#define MAXNGRAMSUGS 4
|
||||
#define MAXPHONSUGS 2
|
||||
#define MAXCOMPOUNDSUGS 3
|
||||
|
||||
// timelimit: max ~1/4 sec (process time on Linux) for a time consuming function
|
||||
#define TIMELIMIT (CLOCKS_PER_SEC >> 2)
|
||||
|
@ -17,6 +18,9 @@
|
|||
#define NGRAM_LONGER_WORSE (1 << 0)
|
||||
#define NGRAM_ANY_MISMATCH (1 << 1)
|
||||
#define NGRAM_LOWERING (1 << 2)
|
||||
#define NGRAM_WEIGHTED (1 << 3)
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "affixmgr.hxx"
|
||||
|
@ -26,7 +30,7 @@
|
|||
|
||||
enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
|
||||
|
||||
class SuggestMgr
|
||||
class LIBHUNSPELL_DLL_EXPORTED SuggestMgr
|
||||
{
|
||||
char * ckey;
|
||||
int ckeyl;
|
||||
|
@ -43,6 +47,7 @@ class SuggestMgr
|
|||
int langnum;
|
||||
int nosplitsugs;
|
||||
int maxngramsugs;
|
||||
int maxcpdsugs;
|
||||
int complexprefixes;
|
||||
|
||||
|
||||
|
@ -90,8 +95,7 @@ private:
|
|||
int movechar_utf(char **, const w_char *, int, int, int);
|
||||
|
||||
int mapchars(char**, const char *, int, int);
|
||||
int map_related(const char *, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *);
|
||||
int map_related_utf(w_char *, int, int, int, char ** wlst, int, const mapentry*, int, int *, clock_t *);
|
||||
int map_related(const char *, char *, int, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *);
|
||||
int ngram(int n, char * s1, const char * s2, int opt);
|
||||
int mystrlen(const char * word);
|
||||
int leftcommonsubstring(char * s1, const char * s2);
|
||||
|
|
|
@ -14,6 +14,8 @@ typedef struct __attribute__ ((packed)) {
|
|||
struct replentry {
|
||||
char * pattern;
|
||||
char * pattern2;
|
||||
bool start;
|
||||
bool end;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
Binary file not shown.
BIN
winlibs/lib/libhunspell-1.3.2.a
Normal file
BIN
winlibs/lib/libhunspell-1.3.2.a
Normal file
Binary file not shown.
Loading…
Reference in a new issue