mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 23:34:06 +00:00
Mac-specific: Update hunspell library to version 1.6.1
This commit is contained in:
parent
f2a30c29cc
commit
46a4509101
|
@ -191,7 +191,7 @@ mac {
|
||||||
-lvorbisfile \
|
-lvorbisfile \
|
||||||
-lvorbis \
|
-lvorbis \
|
||||||
-logg \
|
-logg \
|
||||||
-lhunspell-1.2 \
|
-lhunspell-1.6.1 \
|
||||||
-llzo2
|
-llzo2
|
||||||
isEmpty(DISABLE_INTERNAL_PLAYER) {
|
isEmpty(DISABLE_INTERNAL_PLAYER) {
|
||||||
LIBS += -lao \
|
LIBS += -lao \
|
||||||
|
|
|
@ -1,7 +1,75 @@
|
||||||
#ifndef _AFFIX_HXX_
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#define _AFFIX_HXX_
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
/*
|
||||||
|
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||||
|
* And Contributors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. All modifications to the source code must be clearly marked as
|
||||||
|
* such. Binary redistributions based on modified source code
|
||||||
|
* must be clearly marked as modified versions in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
#ifndef AFFIX_HXX_
|
||||||
|
#define AFFIX_HXX_
|
||||||
|
|
||||||
#include "atypes.hxx"
|
#include "atypes.hxx"
|
||||||
#include "baseaffix.hxx"
|
#include "baseaffix.hxx"
|
||||||
|
@ -9,128 +77,147 @@
|
||||||
|
|
||||||
/* A Prefix Entry */
|
/* A Prefix Entry */
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry
|
class PfxEntry : public AffEntry {
|
||||||
{
|
private:
|
||||||
AffixMgr* pmyMgr;
|
PfxEntry(const PfxEntry&);
|
||||||
|
PfxEntry& operator=(const PfxEntry&);
|
||||||
|
|
||||||
PfxEntry * next;
|
private:
|
||||||
PfxEntry * nexteq;
|
AffixMgr* pmyMgr;
|
||||||
PfxEntry * nextne;
|
|
||||||
PfxEntry * flgnxt;
|
|
||||||
|
|
||||||
public:
|
PfxEntry* next;
|
||||||
|
PfxEntry* nexteq;
|
||||||
|
PfxEntry* nextne;
|
||||||
|
PfxEntry* flgnxt;
|
||||||
|
|
||||||
PfxEntry(AffixMgr* pmgr, affentry* dp );
|
public:
|
||||||
~PfxEntry();
|
explicit PfxEntry(AffixMgr* pmgr);
|
||||||
|
|
||||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
bool allowCross() const { return ((opts & aeXPRODUCT) != 0); }
|
||||||
struct hentry * checkword(const char * word, int len, char in_compound,
|
struct hentry* checkword(const char* word,
|
||||||
const FLAG needflag = FLAG_NULL);
|
int len,
|
||||||
|
char in_compound,
|
||||||
|
const FLAG needflag = FLAG_NULL);
|
||||||
|
|
||||||
struct hentry * check_twosfx(const char * word, int len, char in_compound, const FLAG needflag = NULL);
|
struct hentry* check_twosfx(const char* word,
|
||||||
|
int len,
|
||||||
|
char in_compound,
|
||||||
|
const FLAG needflag = FLAG_NULL);
|
||||||
|
|
||||||
char * check_morph(const char * word, int len, char in_compound,
|
std::string check_morph(const char* word,
|
||||||
const FLAG needflag = FLAG_NULL);
|
int len,
|
||||||
|
char in_compound,
|
||||||
|
const FLAG needflag = FLAG_NULL);
|
||||||
|
|
||||||
char * check_twosfx_morph(const char * word, int len,
|
std::string check_twosfx_morph(const char* word,
|
||||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
int len,
|
||||||
|
char in_compound,
|
||||||
|
const FLAG needflag = FLAG_NULL);
|
||||||
|
|
||||||
inline FLAG getFlag() { return aflag; }
|
FLAG getFlag() { return aflag; }
|
||||||
inline const char * getKey() { return appnd; }
|
const char* getKey() { return appnd.c_str(); }
|
||||||
char * add(const char * word, int len);
|
std::string add(const char* word, size_t len);
|
||||||
|
|
||||||
inline short getKeyLen() { return appndl; }
|
inline short getKeyLen() { return appnd.size(); }
|
||||||
|
|
||||||
inline const char * getMorph() { return morphcode; }
|
inline const char* getMorph() { return morphcode; }
|
||||||
|
|
||||||
inline const unsigned short * getCont() { return contclass; }
|
inline const unsigned short* getCont() { return contclass; }
|
||||||
inline short getContLen() { return contclasslen; }
|
inline short getContLen() { return contclasslen; }
|
||||||
|
|
||||||
inline PfxEntry * getNext() { return next; }
|
inline PfxEntry* getNext() { return next; }
|
||||||
inline PfxEntry * getNextNE() { return nextne; }
|
inline PfxEntry* getNextNE() { return nextne; }
|
||||||
inline PfxEntry * getNextEQ() { return nexteq; }
|
inline PfxEntry* getNextEQ() { return nexteq; }
|
||||||
inline PfxEntry * getFlgNxt() { return flgnxt; }
|
inline PfxEntry* getFlgNxt() { return flgnxt; }
|
||||||
|
|
||||||
inline void setNext(PfxEntry * ptr) { next = ptr; }
|
inline void setNext(PfxEntry* ptr) { next = ptr; }
|
||||||
inline void setNextNE(PfxEntry * ptr) { nextne = ptr; }
|
inline void setNextNE(PfxEntry* ptr) { nextne = ptr; }
|
||||||
inline void setNextEQ(PfxEntry * ptr) { nexteq = ptr; }
|
inline void setNextEQ(PfxEntry* ptr) { nexteq = ptr; }
|
||||||
inline void setFlgNxt(PfxEntry * ptr) { flgnxt = ptr; }
|
inline void setFlgNxt(PfxEntry* ptr) { flgnxt = ptr; }
|
||||||
|
|
||||||
inline char * nextchar(char * p);
|
inline char* nextchar(char* p);
|
||||||
inline int test_condition(const char * st);
|
inline int test_condition(const char* st);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* A Suffix Entry */
|
/* A Suffix Entry */
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry
|
class SfxEntry : public AffEntry {
|
||||||
{
|
private:
|
||||||
AffixMgr* pmyMgr;
|
SfxEntry(const SfxEntry&);
|
||||||
char * rappnd;
|
SfxEntry& operator=(const SfxEntry&);
|
||||||
|
|
||||||
SfxEntry * next;
|
private:
|
||||||
SfxEntry * nexteq;
|
AffixMgr* pmyMgr;
|
||||||
SfxEntry * nextne;
|
std::string rappnd;
|
||||||
SfxEntry * flgnxt;
|
|
||||||
|
|
||||||
SfxEntry * l_morph;
|
|
||||||
SfxEntry * r_morph;
|
|
||||||
SfxEntry * eq_morph;
|
|
||||||
|
|
||||||
public:
|
SfxEntry* next;
|
||||||
|
SfxEntry* nexteq;
|
||||||
|
SfxEntry* nextne;
|
||||||
|
SfxEntry* flgnxt;
|
||||||
|
|
||||||
SfxEntry(AffixMgr* pmgr, affentry* dp );
|
SfxEntry* l_morph;
|
||||||
~SfxEntry();
|
SfxEntry* r_morph;
|
||||||
|
SfxEntry* eq_morph;
|
||||||
|
|
||||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
public:
|
||||||
struct hentry * checkword(const char * word, int len, int optflags,
|
explicit SfxEntry(AffixMgr* pmgr);
|
||||||
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
|
|
||||||
// const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound=IN_CPD_NOT);
|
|
||||||
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, const FLAG badflag = 0);
|
|
||||||
|
|
||||||
struct hentry * check_twosfx(const char * word, int len, int optflags, PfxEntry* ppfx, const FLAG needflag = NULL);
|
bool allowCross() const { return ((opts & aeXPRODUCT) != 0); }
|
||||||
|
struct hentry* checkword(const char* word,
|
||||||
|
int len,
|
||||||
|
int optflags,
|
||||||
|
PfxEntry* ppfx,
|
||||||
|
const FLAG cclass,
|
||||||
|
const FLAG needflag,
|
||||||
|
const FLAG badflag);
|
||||||
|
|
||||||
char * check_twosfx_morph(const char * word, int len, int optflags,
|
struct hentry* check_twosfx(const char* word,
|
||||||
PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
int len,
|
||||||
struct hentry * get_next_homonym(struct hentry * he);
|
int optflags,
|
||||||
struct hentry * get_next_homonym(struct hentry * word, int optflags, PfxEntry* ppfx,
|
PfxEntry* ppfx,
|
||||||
const FLAG cclass, const FLAG needflag);
|
const FLAG needflag = FLAG_NULL);
|
||||||
|
|
||||||
|
std::string check_twosfx_morph(const char* word,
|
||||||
|
int len,
|
||||||
|
int optflags,
|
||||||
|
PfxEntry* ppfx,
|
||||||
|
const FLAG needflag = FLAG_NULL);
|
||||||
|
struct hentry* get_next_homonym(struct hentry* he);
|
||||||
|
struct hentry* get_next_homonym(struct hentry* word,
|
||||||
|
int optflags,
|
||||||
|
PfxEntry* ppfx,
|
||||||
|
const FLAG cclass,
|
||||||
|
const FLAG needflag);
|
||||||
|
|
||||||
inline FLAG getFlag() { return aflag; }
|
FLAG getFlag() { return aflag; }
|
||||||
inline const char * getKey() { return rappnd; }
|
const char* getKey() { return rappnd.c_str(); }
|
||||||
char * add(const char * word, int len);
|
std::string add(const char* word, size_t len);
|
||||||
|
|
||||||
|
inline const char* getMorph() { return morphcode; }
|
||||||
|
|
||||||
inline const char * getMorph() { return morphcode; }
|
inline const unsigned short* getCont() { return contclass; }
|
||||||
|
inline short getContLen() { return contclasslen; }
|
||||||
|
inline const char* getAffix() { return appnd.c_str(); }
|
||||||
|
|
||||||
inline const unsigned short * getCont() { return contclass; }
|
inline short getKeyLen() { return appnd.size(); }
|
||||||
inline short getContLen() { return contclasslen; }
|
|
||||||
inline const char * getAffix() { return appnd; }
|
|
||||||
|
|
||||||
inline short getKeyLen() { return appndl; }
|
inline SfxEntry* getNext() { return next; }
|
||||||
|
inline SfxEntry* getNextNE() { return nextne; }
|
||||||
|
inline SfxEntry* getNextEQ() { return nexteq; }
|
||||||
|
|
||||||
inline SfxEntry * getNext() { return next; }
|
inline SfxEntry* getLM() { return l_morph; }
|
||||||
inline SfxEntry * getNextNE() { return nextne; }
|
inline SfxEntry* getRM() { return r_morph; }
|
||||||
inline SfxEntry * getNextEQ() { return nexteq; }
|
inline SfxEntry* getEQM() { return eq_morph; }
|
||||||
|
inline SfxEntry* getFlgNxt() { return flgnxt; }
|
||||||
|
|
||||||
inline SfxEntry * getLM() { return l_morph; }
|
inline void setNext(SfxEntry* ptr) { next = ptr; }
|
||||||
inline SfxEntry * getRM() { return r_morph; }
|
inline void setNextNE(SfxEntry* ptr) { nextne = ptr; }
|
||||||
inline SfxEntry * getEQM() { return eq_morph; }
|
inline void setNextEQ(SfxEntry* ptr) { nexteq = ptr; }
|
||||||
inline SfxEntry * getFlgNxt() { return flgnxt; }
|
inline void setFlgNxt(SfxEntry* ptr) { flgnxt = ptr; }
|
||||||
|
void initReverseWord();
|
||||||
inline void setNext(SfxEntry * ptr) { next = ptr; }
|
|
||||||
inline void setNextNE(SfxEntry * ptr) { nextne = ptr; }
|
|
||||||
inline void setNextEQ(SfxEntry * ptr) { nexteq = ptr; }
|
|
||||||
inline void setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; }
|
|
||||||
|
|
||||||
inline char * nextchar(char * p);
|
|
||||||
inline int test_condition(const char * st, const char * begin);
|
|
||||||
|
|
||||||
|
inline char* nextchar(char* p);
|
||||||
|
inline int test_condition(const char* st, const char* begin);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,81 @@
|
||||||
#ifndef _AFFIXMGR_HXX_
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#define _AFFIXMGR_HXX_
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
/*
|
||||||
|
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||||
|
* And Contributors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. All modifications to the source code must be clearly marked as
|
||||||
|
* such. Binary redistributions based on modified source code
|
||||||
|
* must be clearly marked as modified versions in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
#ifndef AFFIXMGR_HXX_
|
||||||
|
#define AFFIXMGR_HXX_
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "atypes.hxx"
|
#include "atypes.hxx"
|
||||||
#include "baseaffix.hxx"
|
#include "baseaffix.hxx"
|
||||||
#include "hashmgr.hxx"
|
#include "hashmgr.hxx"
|
||||||
|
@ -12,239 +83,287 @@
|
||||||
#include "replist.hxx"
|
#include "replist.hxx"
|
||||||
|
|
||||||
// check flag duplication
|
// check flag duplication
|
||||||
#define dupSFX (1 << 0)
|
#define dupSFX (1 << 0)
|
||||||
#define dupPFX (1 << 1)
|
#define dupPFX (1 << 1)
|
||||||
|
|
||||||
class PfxEntry;
|
class PfxEntry;
|
||||||
class SfxEntry;
|
class SfxEntry;
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED AffixMgr
|
class AffixMgr {
|
||||||
{
|
PfxEntry* pStart[SETSIZE];
|
||||||
|
SfxEntry* sStart[SETSIZE];
|
||||||
|
PfxEntry* pFlag[SETSIZE];
|
||||||
|
SfxEntry* sFlag[SETSIZE];
|
||||||
|
const std::vector<HashMgr*>& alldic;
|
||||||
|
const HashMgr* pHMgr;
|
||||||
|
std::string keystring;
|
||||||
|
std::string trystring;
|
||||||
|
std::string encoding;
|
||||||
|
struct cs_info* csconv;
|
||||||
|
int utf8;
|
||||||
|
int complexprefixes;
|
||||||
|
FLAG compoundflag;
|
||||||
|
FLAG compoundbegin;
|
||||||
|
FLAG compoundmiddle;
|
||||||
|
FLAG compoundend;
|
||||||
|
FLAG compoundroot;
|
||||||
|
FLAG compoundforbidflag;
|
||||||
|
FLAG compoundpermitflag;
|
||||||
|
int compoundmoresuffixes;
|
||||||
|
int checkcompounddup;
|
||||||
|
int checkcompoundrep;
|
||||||
|
int checkcompoundcase;
|
||||||
|
int checkcompoundtriple;
|
||||||
|
int simplifiedtriple;
|
||||||
|
FLAG forbiddenword;
|
||||||
|
FLAG nosuggest;
|
||||||
|
FLAG nongramsuggest;
|
||||||
|
FLAG needaffix;
|
||||||
|
int cpdmin;
|
||||||
|
bool parsedrep;
|
||||||
|
std::vector<replentry> reptable;
|
||||||
|
RepList* iconvtable;
|
||||||
|
RepList* oconvtable;
|
||||||
|
bool parsedmaptable;
|
||||||
|
std::vector<mapentry> maptable;
|
||||||
|
bool parsedbreaktable;
|
||||||
|
std::vector<std::string> breaktable;
|
||||||
|
bool parsedcheckcpd;
|
||||||
|
std::vector<patentry> checkcpdtable;
|
||||||
|
int simplifiedcpd;
|
||||||
|
bool parseddefcpd;
|
||||||
|
std::vector<flagentry> defcpdtable;
|
||||||
|
phonetable* phone;
|
||||||
|
int maxngramsugs;
|
||||||
|
int maxcpdsugs;
|
||||||
|
int maxdiff;
|
||||||
|
int onlymaxdiff;
|
||||||
|
int nosplitsugs;
|
||||||
|
int sugswithdots;
|
||||||
|
int cpdwordmax;
|
||||||
|
int cpdmaxsyllable;
|
||||||
|
std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit,
|
||||||
|
std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding
|
||||||
|
std::string cpdsyllablenum; // syllable count incrementing flag
|
||||||
|
const char* pfxappnd; // BUG: not stateless
|
||||||
|
const char* sfxappnd; // BUG: not stateless
|
||||||
|
int sfxextra; // BUG: not stateless
|
||||||
|
FLAG sfxflag; // BUG: not stateless
|
||||||
|
char* derived; // BUG: not stateless
|
||||||
|
SfxEntry* sfx; // BUG: not stateless
|
||||||
|
PfxEntry* pfx; // BUG: not stateless
|
||||||
|
int checknum;
|
||||||
|
std::string wordchars; // letters + spec. word characters
|
||||||
|
std::vector<w_char> wordchars_utf16;
|
||||||
|
std::string ignorechars; // letters + spec. word characters
|
||||||
|
std::vector<w_char> ignorechars_utf16;
|
||||||
|
std::string version; // affix and dictionary file version string
|
||||||
|
std::string lang; // language
|
||||||
|
int langnum;
|
||||||
|
FLAG lemma_present;
|
||||||
|
FLAG circumfix;
|
||||||
|
FLAG onlyincompound;
|
||||||
|
FLAG keepcase;
|
||||||
|
FLAG forceucase;
|
||||||
|
FLAG warn;
|
||||||
|
int forbidwarn;
|
||||||
|
FLAG substandard;
|
||||||
|
int checksharps;
|
||||||
|
int fullstrip;
|
||||||
|
|
||||||
PfxEntry * pStart[SETSIZE];
|
int havecontclass; // boolean variable
|
||||||
SfxEntry * sStart[SETSIZE];
|
char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold
|
||||||
PfxEntry * pFlag[SETSIZE];
|
// affix)
|
||||||
SfxEntry * sFlag[SETSIZE];
|
|
||||||
HashMgr * pHMgr;
|
|
||||||
HashMgr ** alldic;
|
|
||||||
int * maxdic;
|
|
||||||
char * keystring;
|
|
||||||
char * trystring;
|
|
||||||
char * encoding;
|
|
||||||
struct cs_info * csconv;
|
|
||||||
int utf8;
|
|
||||||
int complexprefixes;
|
|
||||||
FLAG compoundflag;
|
|
||||||
FLAG compoundbegin;
|
|
||||||
FLAG compoundmiddle;
|
|
||||||
FLAG compoundend;
|
|
||||||
FLAG compoundroot;
|
|
||||||
FLAG compoundforbidflag;
|
|
||||||
FLAG compoundpermitflag;
|
|
||||||
int checkcompounddup;
|
|
||||||
int checkcompoundrep;
|
|
||||||
int checkcompoundcase;
|
|
||||||
int checkcompoundtriple;
|
|
||||||
int simplifiedtriple;
|
|
||||||
FLAG forbiddenword;
|
|
||||||
FLAG nosuggest;
|
|
||||||
FLAG nongramsuggest;
|
|
||||||
FLAG needaffix;
|
|
||||||
int cpdmin;
|
|
||||||
int numrep;
|
|
||||||
replentry * reptable;
|
|
||||||
RepList * iconvtable;
|
|
||||||
RepList * oconvtable;
|
|
||||||
int nummap;
|
|
||||||
mapentry * maptable;
|
|
||||||
int numbreak;
|
|
||||||
char ** breaktable;
|
|
||||||
int numcheckcpd;
|
|
||||||
patentry * checkcpdtable;
|
|
||||||
int simplifiedcpd;
|
|
||||||
int numdefcpd;
|
|
||||||
flagentry * defcpdtable;
|
|
||||||
phonetable * phone;
|
|
||||||
int maxngramsugs;
|
|
||||||
int maxcpdsugs;
|
|
||||||
int maxdiff;
|
|
||||||
int onlymaxdiff;
|
|
||||||
int nosplitsugs;
|
|
||||||
int sugswithdots;
|
|
||||||
int cpdwordmax;
|
|
||||||
int cpdmaxsyllable;
|
|
||||||
char * cpdvowels;
|
|
||||||
w_char * cpdvowels_utf16;
|
|
||||||
int cpdvowels_utf16_len;
|
|
||||||
char * cpdsyllablenum;
|
|
||||||
const char * pfxappnd; // BUG: not stateless
|
|
||||||
const char * sfxappnd; // BUG: not stateless
|
|
||||||
FLAG sfxflag; // BUG: not stateless
|
|
||||||
char * derived; // BUG: not stateless
|
|
||||||
SfxEntry * sfx; // BUG: not stateless
|
|
||||||
PfxEntry * pfx; // BUG: not stateless
|
|
||||||
int checknum;
|
|
||||||
char * wordchars;
|
|
||||||
unsigned short * wordchars_utf16;
|
|
||||||
int wordchars_utf16_len;
|
|
||||||
char * ignorechars;
|
|
||||||
unsigned short * ignorechars_utf16;
|
|
||||||
int ignorechars_utf16_len;
|
|
||||||
char * version;
|
|
||||||
char * lang;
|
|
||||||
int langnum;
|
|
||||||
FLAG lemma_present;
|
|
||||||
FLAG circumfix;
|
|
||||||
FLAG onlyincompound;
|
|
||||||
FLAG keepcase;
|
|
||||||
FLAG forceucase;
|
|
||||||
FLAG warn;
|
|
||||||
int forbidwarn;
|
|
||||||
FLAG substandard;
|
|
||||||
int checksharps;
|
|
||||||
int fullstrip;
|
|
||||||
|
|
||||||
int havecontclass; // boolean variable
|
public:
|
||||||
char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
|
AffixMgr(const char* affpath, const std::vector<HashMgr*>& ptr, const char* key = NULL);
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
AffixMgr(const char * affpath, HashMgr** ptr, int * md,
|
|
||||||
const char * key = NULL);
|
|
||||||
~AffixMgr();
|
~AffixMgr();
|
||||||
struct hentry * affix_check(const char * word, int len,
|
struct hentry* affix_check(const char* word,
|
||||||
const unsigned short needflag = (unsigned short) 0,
|
int len,
|
||||||
char in_compound = IN_CPD_NOT);
|
const unsigned short needflag = (unsigned short)0,
|
||||||
struct hentry * prefix_check(const char * word, int len,
|
char in_compound = IN_CPD_NOT);
|
||||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
struct hentry* prefix_check(const char* word,
|
||||||
inline int isSubset(const char * s1, const char * s2);
|
int len,
|
||||||
struct hentry * prefix_check_twosfx(const char * word, int len,
|
char in_compound,
|
||||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
const FLAG needflag = FLAG_NULL);
|
||||||
inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
|
inline int isSubset(const char* s1, const char* s2);
|
||||||
struct hentry * suffix_check(const char * word, int len, int sfxopts,
|
struct hentry* prefix_check_twosfx(const char* word,
|
||||||
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
|
int len,
|
||||||
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
|
char in_compound,
|
||||||
char in_compound = IN_CPD_NOT);
|
const FLAG needflag = FLAG_NULL);
|
||||||
struct hentry * suffix_check_twosfx(const char * word, int len,
|
inline int isRevSubset(const char* s1, const char* end_of_s2, int len);
|
||||||
int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
struct hentry* suffix_check(const char* word,
|
||||||
|
int len,
|
||||||
|
int sfxopts,
|
||||||
|
PfxEntry* ppfx,
|
||||||
|
const FLAG cclass = FLAG_NULL,
|
||||||
|
const FLAG needflag = FLAG_NULL,
|
||||||
|
char in_compound = IN_CPD_NOT);
|
||||||
|
struct hentry* suffix_check_twosfx(const char* word,
|
||||||
|
int len,
|
||||||
|
int sfxopts,
|
||||||
|
PfxEntry* ppfx,
|
||||||
|
const FLAG needflag = FLAG_NULL);
|
||||||
|
|
||||||
char * affix_check_morph(const char * word, int len,
|
std::string affix_check_morph(const char* word,
|
||||||
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
|
int len,
|
||||||
char * prefix_check_morph(const char * word, int len,
|
const FLAG needflag = FLAG_NULL,
|
||||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
char in_compound = IN_CPD_NOT);
|
||||||
char * suffix_check_morph (const char * word, int len, int sfxopts,
|
std::string prefix_check_morph(const char* word,
|
||||||
PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
|
int len,
|
||||||
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
|
char in_compound,
|
||||||
|
const FLAG needflag = FLAG_NULL);
|
||||||
|
std::string suffix_check_morph(const char* word,
|
||||||
|
int len,
|
||||||
|
int sfxopts,
|
||||||
|
PfxEntry* ppfx,
|
||||||
|
const FLAG cclass = FLAG_NULL,
|
||||||
|
const FLAG needflag = FLAG_NULL,
|
||||||
|
char in_compound = IN_CPD_NOT);
|
||||||
|
|
||||||
char * prefix_check_twosfx_morph(const char * word, int len,
|
std::string prefix_check_twosfx_morph(const char* word,
|
||||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
int len,
|
||||||
char * suffix_check_twosfx_morph(const char * word, int len,
|
char in_compound,
|
||||||
int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
|
const FLAG needflag = FLAG_NULL);
|
||||||
|
std::string suffix_check_twosfx_morph(const char* word,
|
||||||
|
int len,
|
||||||
|
int sfxopts,
|
||||||
|
PfxEntry* ppfx,
|
||||||
|
const FLAG needflag = FLAG_NULL);
|
||||||
|
|
||||||
char * morphgen(char * ts, int wl, const unsigned short * ap,
|
std::string morphgen(const char* ts,
|
||||||
unsigned short al, char * morph, char * targetmorph, int level);
|
int wl,
|
||||||
|
const unsigned short* ap,
|
||||||
|
unsigned short al,
|
||||||
|
const char* morph,
|
||||||
|
const char* targetmorph,
|
||||||
|
int level);
|
||||||
|
|
||||||
int expand_rootword(struct guessword * wlst, int maxn, const char * ts,
|
int expand_rootword(struct guessword* wlst,
|
||||||
int wl, const unsigned short * ap, unsigned short al, char * bad,
|
int maxn,
|
||||||
int, char *);
|
const char* ts,
|
||||||
|
int wl,
|
||||||
|
const unsigned short* ap,
|
||||||
|
unsigned short al,
|
||||||
|
const char* bad,
|
||||||
|
int,
|
||||||
|
const char*);
|
||||||
|
|
||||||
short get_syllable (const char * word, int wlen);
|
short get_syllable(const std::string& word);
|
||||||
int cpdrep_check(const char * word, int len);
|
int cpdrep_check(const char* word, int len);
|
||||||
int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
|
int cpdpat_check(const char* word,
|
||||||
const char affixed);
|
int len,
|
||||||
int defcpd_check(hentry *** words, short wnum, hentry * rv,
|
hentry* r1,
|
||||||
hentry ** rwords, char all);
|
hentry* r2,
|
||||||
int cpdcase_check(const char * word, int len);
|
const char affixed);
|
||||||
inline int candidate_check(const char * word, int len);
|
int defcpd_check(hentry*** words,
|
||||||
void setcminmax(int * cmin, int * cmax, const char * word, int len);
|
short wnum,
|
||||||
struct hentry * compound_check(const char * word, int len, short wordnum,
|
hentry* rv,
|
||||||
short numsyllable, short maxwordnum, short wnum, hentry ** words,
|
hentry** rwords,
|
||||||
char hu_mov_rule, char is_sug, int * info);
|
char all);
|
||||||
|
int cpdcase_check(const char* word, int len);
|
||||||
|
inline int candidate_check(const char* word, int len);
|
||||||
|
void setcminmax(int* cmin, int* cmax, const char* word, int len);
|
||||||
|
struct hentry* compound_check(const std::string& word,
|
||||||
|
short wordnum,
|
||||||
|
short numsyllable,
|
||||||
|
short maxwordnum,
|
||||||
|
short wnum,
|
||||||
|
hentry** words,
|
||||||
|
hentry** rwords,
|
||||||
|
char hu_mov_rule,
|
||||||
|
char is_sug,
|
||||||
|
int* info);
|
||||||
|
|
||||||
int compound_check_morph(const char * word, int len, short wordnum,
|
int compound_check_morph(const char* word,
|
||||||
short numsyllable, short maxwordnum, short wnum, hentry ** words,
|
int len,
|
||||||
char hu_mov_rule, char ** result, char * partresult);
|
short wordnum,
|
||||||
|
short numsyllable,
|
||||||
|
short maxwordnum,
|
||||||
|
short wnum,
|
||||||
|
hentry** words,
|
||||||
|
hentry** rwords,
|
||||||
|
char hu_mov_rule,
|
||||||
|
std::string& result,
|
||||||
|
const std::string* partresult);
|
||||||
|
|
||||||
struct hentry * lookup(const char * word);
|
std::vector<std::string> get_suffix_words(short unsigned* suff,
|
||||||
int get_numrep() const;
|
int len,
|
||||||
struct replentry * get_reptable() const;
|
const char* root_word);
|
||||||
RepList * get_iconvtable() const;
|
|
||||||
RepList * get_oconvtable() const;
|
|
||||||
struct phonetable * get_phonetable() const;
|
|
||||||
int get_nummap() const;
|
|
||||||
struct mapentry * get_maptable() const;
|
|
||||||
int get_numbreak() const;
|
|
||||||
char ** get_breaktable() const;
|
|
||||||
char * get_encoding();
|
|
||||||
int get_langnum() const;
|
|
||||||
char * get_key_string();
|
|
||||||
char * get_try_string() const;
|
|
||||||
const char * get_wordchars() const;
|
|
||||||
unsigned short * get_wordchars_utf16(int * len) const;
|
|
||||||
char * get_ignore() const;
|
|
||||||
unsigned short * get_ignore_utf16(int * len) const;
|
|
||||||
int get_compound() const;
|
|
||||||
FLAG get_compoundflag() const;
|
|
||||||
FLAG get_compoundbegin() const;
|
|
||||||
FLAG get_forbiddenword() const;
|
|
||||||
FLAG get_nosuggest() const;
|
|
||||||
FLAG get_nongramsuggest() const;
|
|
||||||
FLAG get_needaffix() const;
|
|
||||||
FLAG get_onlyincompound() const;
|
|
||||||
FLAG get_compoundroot() const;
|
|
||||||
FLAG get_lemma_present() const;
|
|
||||||
int get_checknum() const;
|
|
||||||
const char * get_prefix() const;
|
|
||||||
const char * get_suffix() const;
|
|
||||||
const char * get_derived() const;
|
|
||||||
const char * get_version() const;
|
|
||||||
int have_contclass() const;
|
|
||||||
int get_utf8() const;
|
|
||||||
int get_complexprefixes() const;
|
|
||||||
char * get_suffixed(char ) const;
|
|
||||||
int get_maxngramsugs() const;
|
|
||||||
int get_maxcpdsugs() const;
|
|
||||||
int get_maxdiff() const;
|
|
||||||
int get_onlymaxdiff() const;
|
|
||||||
int get_nosplitsugs() const;
|
|
||||||
int get_sugswithdots(void) const;
|
|
||||||
FLAG get_keepcase(void) const;
|
|
||||||
FLAG get_forceucase(void) const;
|
|
||||||
FLAG get_warn(void) const;
|
|
||||||
int get_forbidwarn(void) const;
|
|
||||||
int get_checksharps(void) const;
|
|
||||||
char * encode_flag(unsigned short aflag) const;
|
|
||||||
int get_fullstrip() const;
|
|
||||||
|
|
||||||
private:
|
struct hentry* lookup(const char* word);
|
||||||
int parse_file(const char * affpath, const char * key);
|
const std::vector<replentry>& get_reptable() const;
|
||||||
int parse_flag(char * line, unsigned short * out, FileMgr * af);
|
RepList* get_iconvtable() const;
|
||||||
int parse_num(char * line, int * out, FileMgr * af);
|
RepList* get_oconvtable() const;
|
||||||
int parse_cpdsyllable(char * line, FileMgr * af);
|
struct phonetable* get_phonetable() const;
|
||||||
int parse_reptable(char * line, FileMgr * af);
|
const std::vector<mapentry>& get_maptable() const;
|
||||||
int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword);
|
const std::vector<std::string>& get_breaktable() const;
|
||||||
int parse_phonetable(char * line, FileMgr * af);
|
const std::string& get_encoding();
|
||||||
int parse_maptable(char * line, FileMgr * af);
|
int get_langnum() const;
|
||||||
int parse_breaktable(char * line, FileMgr * af);
|
char* get_key_string();
|
||||||
int parse_checkcpdtable(char * line, FileMgr * af);
|
char* get_try_string() const;
|
||||||
int parse_defcpdtable(char * line, FileMgr * af);
|
const std::string& get_wordchars() const;
|
||||||
int parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
|
const std::vector<w_char>& get_wordchars_utf16() const;
|
||||||
|
const char* get_ignore() const;
|
||||||
|
const std::vector<w_char>& get_ignore_utf16() const;
|
||||||
|
int get_compound() const;
|
||||||
|
FLAG get_compoundflag() const;
|
||||||
|
FLAG get_forbiddenword() const;
|
||||||
|
FLAG get_nosuggest() const;
|
||||||
|
FLAG get_nongramsuggest() const;
|
||||||
|
FLAG get_needaffix() const;
|
||||||
|
FLAG get_onlyincompound() const;
|
||||||
|
const char* get_derived() const;
|
||||||
|
const std::string& get_version() const;
|
||||||
|
int have_contclass() const;
|
||||||
|
int get_utf8() const;
|
||||||
|
int get_complexprefixes() const;
|
||||||
|
char* get_suffixed(char) const;
|
||||||
|
int get_maxngramsugs() const;
|
||||||
|
int get_maxcpdsugs() const;
|
||||||
|
int get_maxdiff() const;
|
||||||
|
int get_onlymaxdiff() const;
|
||||||
|
int get_nosplitsugs() const;
|
||||||
|
int get_sugswithdots(void) const;
|
||||||
|
FLAG get_keepcase(void) const;
|
||||||
|
FLAG get_forceucase(void) const;
|
||||||
|
FLAG get_warn(void) const;
|
||||||
|
int get_forbidwarn(void) const;
|
||||||
|
int get_checksharps(void) const;
|
||||||
|
char* encode_flag(unsigned short aflag) const;
|
||||||
|
int get_fullstrip() const;
|
||||||
|
|
||||||
void reverse_condition(char *);
|
private:
|
||||||
void debugflag(char * result, unsigned short flag);
|
int parse_file(const char* affpath, const char* key);
|
||||||
int condlen(char *);
|
bool parse_flag(const std::string& line, unsigned short* out, FileMgr* af);
|
||||||
int encodeit(affentry &entry, char * cs);
|
bool parse_num(const std::string& line, int* out, FileMgr* af);
|
||||||
|
bool parse_cpdsyllable(const std::string& line, FileMgr* af);
|
||||||
|
bool parse_reptable(const std::string& line, FileMgr* af);
|
||||||
|
bool parse_convtable(const std::string& line,
|
||||||
|
FileMgr* af,
|
||||||
|
RepList** rl,
|
||||||
|
const std::string& keyword);
|
||||||
|
bool parse_phonetable(const std::string& line, FileMgr* af);
|
||||||
|
bool parse_maptable(const std::string& line, FileMgr* af);
|
||||||
|
bool parse_breaktable(const std::string& line, FileMgr* af);
|
||||||
|
bool parse_checkcpdtable(const std::string& line, FileMgr* af);
|
||||||
|
bool parse_defcpdtable(const std::string& line, FileMgr* af);
|
||||||
|
bool parse_affix(const std::string& line, const char at, FileMgr* af, char* dupflags);
|
||||||
|
|
||||||
|
void reverse_condition(std::string&);
|
||||||
|
std::string& debugflag(std::string& result, unsigned short flag);
|
||||||
|
int condlen(const char*);
|
||||||
|
int encodeit(AffEntry& entry, const char* cs);
|
||||||
int build_pfxtree(PfxEntry* pfxptr);
|
int build_pfxtree(PfxEntry* pfxptr);
|
||||||
int build_sfxtree(SfxEntry* sfxptr);
|
int build_sfxtree(SfxEntry* sfxptr);
|
||||||
int process_pfx_order();
|
int process_pfx_order();
|
||||||
int process_sfx_order();
|
int process_sfx_order();
|
||||||
PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
|
PfxEntry* process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr);
|
||||||
SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
|
SfxEntry* process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr);
|
||||||
int process_pfx_tree_to_list();
|
int process_pfx_tree_to_list();
|
||||||
int process_sfx_tree_to_list();
|
int process_sfx_tree_to_list();
|
||||||
int redundant_condition(char, char * strip, int stripl,
|
int redundant_condition(char, const char* strip, int stripl, const char* cond, int);
|
||||||
const char * cond, int);
|
void finishFileMgr(FileMgr* afflst);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1,107 +1,119 @@
|
||||||
#ifndef _ATYPES_HXX_
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#define _ATYPES_HXX_
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#ifndef ATYPES_HXX_
|
||||||
|
#define ATYPES_HXX_
|
||||||
|
|
||||||
#ifndef HUNSPELL_WARNING
|
#ifndef HUNSPELL_WARNING
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#ifdef HUNSPELL_WARNING_ON
|
#ifdef HUNSPELL_WARNING_ON
|
||||||
#define HUNSPELL_WARNING fprintf
|
#define HUNSPELL_WARNING fprintf
|
||||||
#else
|
#else
|
||||||
// empty inline function to switch off warnings (instead of the C99 standard variadic macros)
|
// empty inline function to switch off warnings (instead of the C99 standard
|
||||||
static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {}
|
// variadic macros)
|
||||||
|
static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// HUNSTEM def.
|
// HUNSTEM def.
|
||||||
#define HUNSTEM
|
#define HUNSTEM
|
||||||
|
|
||||||
#include "hashmgr.hxx"
|
|
||||||
#include "w_char.hxx"
|
#include "w_char.hxx"
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#define SETSIZE 256
|
#define SETSIZE 256
|
||||||
#define CONTSIZE 65536
|
#define CONTSIZE 65536
|
||||||
#define MAXWORDLEN 100
|
|
||||||
#define MAXWORDUTF8LEN 256
|
|
||||||
|
|
||||||
// affentry options
|
// AffEntry options
|
||||||
#define aeXPRODUCT (1 << 0)
|
#define aeXPRODUCT (1 << 0)
|
||||||
#define aeUTF8 (1 << 1)
|
#define aeUTF8 (1 << 1)
|
||||||
#define aeALIASF (1 << 2)
|
#define aeALIASF (1 << 2)
|
||||||
#define aeALIASM (1 << 3)
|
#define aeALIASM (1 << 3)
|
||||||
#define aeLONGCOND (1 << 4)
|
#define aeLONGCOND (1 << 4)
|
||||||
|
|
||||||
// compound options
|
// compound options
|
||||||
#define IN_CPD_NOT 0
|
#define IN_CPD_NOT 0
|
||||||
#define IN_CPD_BEGIN 1
|
#define IN_CPD_BEGIN 1
|
||||||
#define IN_CPD_END 2
|
#define IN_CPD_END 2
|
||||||
#define IN_CPD_OTHER 3
|
#define IN_CPD_OTHER 3
|
||||||
|
|
||||||
// info options
|
// info options
|
||||||
#define SPELL_COMPOUND (1 << 0)
|
#define SPELL_COMPOUND (1 << 0)
|
||||||
#define SPELL_FORBIDDEN (1 << 1)
|
#define SPELL_FORBIDDEN (1 << 1)
|
||||||
#define SPELL_ALLCAP (1 << 2)
|
#define SPELL_ALLCAP (1 << 2)
|
||||||
#define SPELL_NOCAP (1 << 3)
|
#define SPELL_NOCAP (1 << 3)
|
||||||
#define SPELL_INITCAP (1 << 4)
|
#define SPELL_INITCAP (1 << 4)
|
||||||
#define SPELL_ORIGCAP (1 << 5)
|
#define SPELL_ORIGCAP (1 << 5)
|
||||||
#define SPELL_WARN (1 << 6)
|
#define SPELL_WARN (1 << 6)
|
||||||
|
|
||||||
#define MAXLNLEN 8192
|
#define MINCPDLEN 3
|
||||||
|
#define MAXCOMPOUND 10
|
||||||
|
#define MAXCONDLEN 20
|
||||||
|
#define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char*))
|
||||||
|
|
||||||
#define MINCPDLEN 3
|
#define MAXACC 1000
|
||||||
#define MAXCOMPOUND 10
|
|
||||||
#define MAXCONDLEN 20
|
|
||||||
#define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char *))
|
|
||||||
|
|
||||||
#define MAXACC 1000
|
|
||||||
|
|
||||||
#define FLAG unsigned short
|
#define FLAG unsigned short
|
||||||
#define FLAG_NULL 0x00
|
#define FLAG_NULL 0x00
|
||||||
#define FREE_FLAG(a) a = 0
|
#define FREE_FLAG(a) a = 0
|
||||||
|
|
||||||
#define TESTAFF( a, b , c ) flag_bsearch((unsigned short *) a, (unsigned short) b, c)
|
#define TESTAFF(a, b, c) (std::binary_search(a, a + c, b))
|
||||||
|
|
||||||
struct affentry
|
|
||||||
{
|
|
||||||
char * strip;
|
|
||||||
char * appnd;
|
|
||||||
unsigned char stripl;
|
|
||||||
unsigned char appndl;
|
|
||||||
char numconds;
|
|
||||||
char opts;
|
|
||||||
unsigned short aflag;
|
|
||||||
unsigned short * contclass;
|
|
||||||
short contclasslen;
|
|
||||||
union {
|
|
||||||
char conds[MAXCONDLEN];
|
|
||||||
struct {
|
|
||||||
char conds1[MAXCONDLEN_1];
|
|
||||||
char * conds2;
|
|
||||||
} l;
|
|
||||||
} c;
|
|
||||||
char * morphcode;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct guessword {
|
struct guessword {
|
||||||
char * word;
|
char* word;
|
||||||
bool allow;
|
bool allow;
|
||||||
char * orig;
|
char* orig;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct mapentry {
|
typedef std::vector<std::string> mapentry;
|
||||||
char ** set;
|
typedef std::vector<FLAG> flagentry;
|
||||||
int len;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct flagentry {
|
|
||||||
FLAG * def;
|
|
||||||
int len;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct patentry {
|
struct patentry {
|
||||||
char * pattern;
|
std::string pattern;
|
||||||
char * pattern2;
|
std::string pattern2;
|
||||||
char * pattern3;
|
std::string pattern3;
|
||||||
FLAG cond;
|
FLAG cond;
|
||||||
FLAG cond2;
|
FLAG cond2;
|
||||||
|
patentry()
|
||||||
|
: cond(FLAG_NULL)
|
||||||
|
, cond2(FLAG_NULL) {
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,28 +1,74 @@
|
||||||
#ifndef _BASEAFF_HXX_
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#define _BASEAFF_HXX_
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
#ifndef BASEAFF_HXX_
|
||||||
|
#define BASEAFF_HXX_
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED AffEntry
|
#include <string>
|
||||||
{
|
|
||||||
protected:
|
class AffEntry {
|
||||||
char * appnd;
|
private:
|
||||||
char * strip;
|
AffEntry(const AffEntry&);
|
||||||
unsigned char appndl;
|
AffEntry& operator=(const AffEntry&);
|
||||||
unsigned char stripl;
|
|
||||||
char numconds;
|
public:
|
||||||
char opts;
|
AffEntry()
|
||||||
unsigned short aflag;
|
: numconds(0),
|
||||||
union {
|
opts(0),
|
||||||
char conds[MAXCONDLEN];
|
aflag(0),
|
||||||
struct {
|
morphcode(0),
|
||||||
char conds1[MAXCONDLEN_1];
|
contclass(NULL),
|
||||||
char * conds2;
|
contclasslen(0) {}
|
||||||
} l;
|
virtual ~AffEntry();
|
||||||
} c;
|
std::string appnd;
|
||||||
char * morphcode;
|
std::string strip;
|
||||||
unsigned short * contclass;
|
unsigned char numconds;
|
||||||
short contclasslen;
|
char opts;
|
||||||
|
unsigned short aflag;
|
||||||
|
union {
|
||||||
|
char conds[MAXCONDLEN];
|
||||||
|
struct {
|
||||||
|
char conds1[MAXCONDLEN_1];
|
||||||
|
char* conds2;
|
||||||
|
} l;
|
||||||
|
} c;
|
||||||
|
char* morphcode;
|
||||||
|
unsigned short* contclass;
|
||||||
|
short contclasslen;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,113 +1,178 @@
|
||||||
#ifndef __CSUTILHXX__
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#define __CSUTILHXX__
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
/*
|
||||||
|
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||||
|
* And Contributors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. All modifications to the source code must be clearly marked as
|
||||||
|
* such. Binary redistributions based on modified source code
|
||||||
|
* must be clearly marked as modified versions in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CSUTIL_HXX_
|
||||||
|
#define CSUTIL_HXX_
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
#include "hunvisapi.h"
|
||||||
|
|
||||||
// First some base level utility routines
|
// First some base level utility routines
|
||||||
|
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "w_char.hxx"
|
#include "w_char.hxx"
|
||||||
#include "htypes.hxx"
|
#include "htypes.hxx"
|
||||||
|
|
||||||
#ifdef MOZILLA_CLIENT
|
#ifdef MOZILLA_CLIENT
|
||||||
#include "nscore.h" // for mozalloc headers
|
#include "nscore.h" // for mozalloc headers
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// casing
|
// casing
|
||||||
#define NOCAP 0
|
#define NOCAP 0
|
||||||
#define INITCAP 1
|
#define INITCAP 1
|
||||||
#define ALLCAP 2
|
#define ALLCAP 2
|
||||||
#define HUHCAP 3
|
#define HUHCAP 3
|
||||||
#define HUHINITCAP 4
|
#define HUHINITCAP 4
|
||||||
|
|
||||||
// default encoding and keystring
|
// default encoding and keystring
|
||||||
#define SPELL_ENCODING "ISO8859-1"
|
#define SPELL_ENCODING "ISO8859-1"
|
||||||
#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
|
#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
|
||||||
|
|
||||||
// default morphological fields
|
// default morphological fields
|
||||||
#define MORPH_STEM "st:"
|
#define MORPH_STEM "st:"
|
||||||
#define MORPH_ALLOMORPH "al:"
|
#define MORPH_ALLOMORPH "al:"
|
||||||
#define MORPH_POS "po:"
|
#define MORPH_POS "po:"
|
||||||
#define MORPH_DERI_PFX "dp:"
|
#define MORPH_DERI_PFX "dp:"
|
||||||
#define MORPH_INFL_PFX "ip:"
|
#define MORPH_INFL_PFX "ip:"
|
||||||
#define MORPH_TERM_PFX "tp:"
|
#define MORPH_TERM_PFX "tp:"
|
||||||
#define MORPH_DERI_SFX "ds:"
|
#define MORPH_DERI_SFX "ds:"
|
||||||
#define MORPH_INFL_SFX "is:"
|
#define MORPH_INFL_SFX "is:"
|
||||||
#define MORPH_TERM_SFX "ts:"
|
#define MORPH_TERM_SFX "ts:"
|
||||||
#define MORPH_SURF_PFX "sp:"
|
#define MORPH_SURF_PFX "sp:"
|
||||||
#define MORPH_FREQ "fr:"
|
#define MORPH_FREQ "fr:"
|
||||||
#define MORPH_PHON "ph:"
|
#define MORPH_PHON "ph:"
|
||||||
#define MORPH_HYPH "hy:"
|
#define MORPH_HYPH "hy:"
|
||||||
#define MORPH_PART "pa:"
|
#define MORPH_PART "pa:"
|
||||||
#define MORPH_FLAG "fl:"
|
#define MORPH_FLAG "fl:"
|
||||||
#define MORPH_HENTRY "_H:"
|
#define MORPH_HENTRY "_H:"
|
||||||
#define MORPH_TAG_LEN strlen(MORPH_STEM)
|
#define MORPH_TAG_LEN strlen(MORPH_STEM)
|
||||||
|
|
||||||
#define MSEP_FLD ' '
|
#define MSEP_FLD ' '
|
||||||
#define MSEP_REC '\n'
|
#define MSEP_REC '\n'
|
||||||
#define MSEP_ALT '\v'
|
#define MSEP_ALT '\v'
|
||||||
|
|
||||||
// default flags
|
// default flags
|
||||||
#define DEFAULTFLAGS 65510
|
#define DEFAULTFLAGS 65510
|
||||||
#define FORBIDDENWORD 65510
|
#define FORBIDDENWORD 65510
|
||||||
#define ONLYUPCASEFLAG 65511
|
#define ONLYUPCASEFLAG 65511
|
||||||
|
|
||||||
|
// fix long pathname problem of WIN32 by using w_char std::fstream::open override
|
||||||
|
LIBHUNSPELL_DLL_EXPORTED void myopen(std::ifstream& stream, const char* path,
|
||||||
|
std::ios_base::openmode mode);
|
||||||
|
|
||||||
// convert UTF-16 characters to UTF-8
|
// convert UTF-16 characters to UTF-8
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
|
LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,
|
||||||
|
const std::vector<w_char>& src);
|
||||||
|
|
||||||
// convert UTF-8 characters to UTF-16
|
// convert UTF-8 characters to UTF-16
|
||||||
LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
|
LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,
|
||||||
|
const std::string& src);
|
||||||
// sort 2-byte vector
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
|
|
||||||
|
|
||||||
// binary search in 2-byte vector
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
|
|
||||||
|
|
||||||
// remove end of line char(s)
|
// remove end of line char(s)
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
|
LIBHUNSPELL_DLL_EXPORTED void mychomp(std::string& s);
|
||||||
|
|
||||||
// duplicate string
|
// duplicate string
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
|
LIBHUNSPELL_DLL_EXPORTED char* mystrdup(const char* s);
|
||||||
|
|
||||||
// strcat for limited length destination string
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
|
|
||||||
|
|
||||||
// duplicate reverse of string
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
|
|
||||||
|
|
||||||
// parse into tokens with char delimiter
|
// parse into tokens with char delimiter
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
|
LIBHUNSPELL_DLL_EXPORTED std::string::const_iterator mystrsep(const std::string &str,
|
||||||
// parse into tokens with char delimiter
|
std::string::const_iterator& start);
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
|
|
||||||
|
|
||||||
// parse into tokens with char delimiter
|
// replace pat by rep in word and return word
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
|
LIBHUNSPELL_DLL_EXPORTED std::string& mystrrep(std::string& str,
|
||||||
|
const std::string& search,
|
||||||
|
const std::string& replace);
|
||||||
|
|
||||||
// append s to ends of every lines in text
|
// append s to ends of every lines in text
|
||||||
LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
|
LIBHUNSPELL_DLL_EXPORTED std::string& strlinecat(std::string& str,
|
||||||
|
const std::string& apd);
|
||||||
|
|
||||||
// tokenize into lines with new line
|
// tokenize into lines with new line
|
||||||
LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
|
LIBHUNSPELL_DLL_EXPORTED std::vector<std::string> line_tok(const std::string& text,
|
||||||
|
char breakchar);
|
||||||
|
|
||||||
// tokenize into lines with new line and uniq in place
|
// tokenize into lines with new line and uniq in place
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
|
LIBHUNSPELL_DLL_EXPORTED void line_uniq(std::string& text, char breakchar);
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
|
|
||||||
|
|
||||||
// change oldchar to newchar in place
|
LIBHUNSPELL_DLL_EXPORTED void line_uniq_app(std::string& text, char breakchar);
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
|
|
||||||
|
|
||||||
// reverse word
|
// reverse word
|
||||||
LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
|
LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word);
|
||||||
|
|
||||||
// reverse word
|
// reverse word
|
||||||
LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
|
LIBHUNSPELL_DLL_EXPORTED size_t reverseword_utf(std::string&);
|
||||||
|
|
||||||
// remove duplicates
|
// remove duplicates
|
||||||
LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
|
LIBHUNSPELL_DLL_EXPORTED void uniqlist(std::vector<std::string>& list);
|
||||||
|
|
||||||
// free character array list
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
|
|
||||||
|
|
||||||
// character encoding information
|
// character encoding information
|
||||||
struct cs_info {
|
struct cs_info {
|
||||||
|
@ -116,105 +181,134 @@ struct cs_info {
|
||||||
unsigned char cupper;
|
unsigned char cupper;
|
||||||
};
|
};
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
|
LIBHUNSPELL_DLL_EXPORTED void initialize_utf_tbl();
|
||||||
LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
|
LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
|
||||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
|
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c,
|
||||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
|
int langnum);
|
||||||
|
LIBHUNSPELL_DLL_EXPORTED w_char upper_utf(w_char u, int langnum);
|
||||||
|
LIBHUNSPELL_DLL_EXPORTED w_char lower_utf(w_char u, int langnum);
|
||||||
|
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c,
|
||||||
|
int langnum);
|
||||||
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
|
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
|
LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const std::string& es);
|
||||||
|
|
||||||
// get language identifiers of language codes
|
// get language identifiers of language codes
|
||||||
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
|
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const std::string& lang);
|
||||||
|
|
||||||
// get characters of the given 8bit encoding with lower- and uppercase forms
|
// get characters of the given 8bit encoding with lower- and uppercase forms
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
|
LIBHUNSPELL_DLL_EXPORTED std::string get_casechars(const char* enc);
|
||||||
|
|
||||||
// convert null terminated string to all caps using encoding
|
// convert std::string to all caps
|
||||||
LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
|
LIBHUNSPELL_DLL_EXPORTED std::string& mkallcap(std::string& s,
|
||||||
|
const struct cs_info* csconv);
|
||||||
// convert null terminated string to all little using encoding
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
|
|
||||||
|
|
||||||
// convert null terminated string to have initial capital using encoding
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
|
|
||||||
|
|
||||||
// convert null terminated string to all caps
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
|
|
||||||
|
|
||||||
// convert null terminated string to all little
|
// convert null terminated string to all little
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
|
LIBHUNSPELL_DLL_EXPORTED std::string& mkallsmall(std::string& s,
|
||||||
|
const struct cs_info* csconv);
|
||||||
|
|
||||||
// convert null terminated string to have initial capital
|
// convert first letter of string to little
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
|
LIBHUNSPELL_DLL_EXPORTED std::string& mkinitsmall(std::string& s,
|
||||||
|
const struct cs_info* csconv);
|
||||||
|
|
||||||
// convert first nc characters of UTF-8 string to little
|
// convert first letter of string to capital
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
|
LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s,
|
||||||
|
const struct cs_info* csconv);
|
||||||
|
|
||||||
// convert first nc characters of UTF-8 string to capital
|
// convert first letter of UTF-8 string to capital
|
||||||
LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
|
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||||
|
mkinitcap_utf(std::vector<w_char>& u, int langnum);
|
||||||
|
|
||||||
|
// convert UTF-8 string to little
|
||||||
|
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||||
|
mkallsmall_utf(std::vector<w_char>& u, int langnum);
|
||||||
|
|
||||||
|
// convert first letter of UTF-8 string to little
|
||||||
|
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||||
|
mkinitsmall_utf(std::vector<w_char>& u, int langnum);
|
||||||
|
|
||||||
|
// convert UTF-8 string to capital
|
||||||
|
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||||
|
mkallcap_utf(std::vector<w_char>& u, int langnum);
|
||||||
|
|
||||||
// get type of capitalization
|
// get type of capitalization
|
||||||
LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
|
LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*);
|
||||||
|
|
||||||
// get type of capitalization (UTF-8)
|
// get type of capitalization (UTF-8)
|
||||||
LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
|
LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum);
|
||||||
|
|
||||||
// strip all ignored characters in the string
|
// strip all ignored characters in the string
|
||||||
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
|
LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf(
|
||||||
|
std::string& word,
|
||||||
|
const std::vector<w_char>& ignored_chars);
|
||||||
|
|
||||||
// strip all ignored characters in the string
|
// strip all ignored characters in the string
|
||||||
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
|
LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(
|
||||||
|
std::string& word,
|
||||||
|
const std::string& ignored_chars);
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
|
LIBHUNSPELL_DLL_EXPORTED bool parse_string(const std::string& line,
|
||||||
|
std::string& out,
|
||||||
|
int ln);
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
|
LIBHUNSPELL_DLL_EXPORTED bool parse_array(const std::string& line,
|
||||||
int * out_utf16_len, int utf8, int ln);
|
std::string& out,
|
||||||
|
std::vector<w_char>& out_utf16,
|
||||||
|
int utf8,
|
||||||
|
int ln);
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
|
LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char* r);
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
|
LIBHUNSPELL_DLL_EXPORTED bool copy_field(std::string& dest,
|
||||||
|
const std::string& morph,
|
||||||
LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
|
const std::string& var);
|
||||||
|
|
||||||
// conversion function for protected memory
|
// conversion function for protected memory
|
||||||
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
|
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char* dest, char* source);
|
||||||
|
|
||||||
// conversion function for protected memory
|
// conversion function for protected memory
|
||||||
LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
|
LIBHUNSPELL_DLL_EXPORTED char* get_stored_pointer(const char* s);
|
||||||
|
|
||||||
// hash entry macros
|
// hash entry macros
|
||||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
|
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry* h) {
|
||||||
{
|
char* ret;
|
||||||
char *ret;
|
if (!h->var)
|
||||||
if (!h->var)
|
ret = NULL;
|
||||||
ret = NULL;
|
else if (h->var & H_OPT_ALIASM)
|
||||||
else if (h->var & H_OPT_ALIASM)
|
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
else
|
||||||
else
|
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
return ret;
|
||||||
return ret;
|
}
|
||||||
|
|
||||||
|
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA(
|
||||||
|
const struct hentry* h) {
|
||||||
|
const char* ret;
|
||||||
|
if (!h->var)
|
||||||
|
ret = NULL;
|
||||||
|
else if (h->var & H_OPT_ALIASM)
|
||||||
|
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||||
|
else
|
||||||
|
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
// NULL-free version for warning-free OOo build
|
// NULL-free version for warning-free OOo build
|
||||||
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
|
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(
|
||||||
{
|
const struct hentry* h) {
|
||||||
const char *ret;
|
const char* ret;
|
||||||
if (!h->var)
|
if (!h->var)
|
||||||
ret = "";
|
ret = "";
|
||||||
else if (h->var & H_OPT_ALIASM)
|
else if (h->var & H_OPT_ALIASM)
|
||||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||||
else
|
else
|
||||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
|
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry* h,
|
||||||
{
|
const char* p) {
|
||||||
return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
|
return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,36 +0,0 @@
|
||||||
#ifndef _DICTMGR_HXX_
|
|
||||||
#define _DICTMGR_HXX_
|
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
#define MAXDICTIONARIES 100
|
|
||||||
#define MAXDICTENTRYLEN 1024
|
|
||||||
|
|
||||||
struct dictentry {
|
|
||||||
char * filename;
|
|
||||||
char * lang;
|
|
||||||
char * region;
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED DictMgr
|
|
||||||
{
|
|
||||||
|
|
||||||
int numdict;
|
|
||||||
dictentry * pdentry;
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
DictMgr(const char * dictpath, const char * etype);
|
|
||||||
~DictMgr();
|
|
||||||
int get_list(dictentry** ppentry);
|
|
||||||
|
|
||||||
private:
|
|
||||||
int parse_file(const char * dictpath, const char * etype);
|
|
||||||
char * mystrsep(char ** stringp, const char delim);
|
|
||||||
char * mystrdup(const char * s);
|
|
||||||
void mychomp(char * s);
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,25 +1,98 @@
|
||||||
/* file manager class - read lines of files [filename] OR [filename.hz] */
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#ifndef _FILEMGR_HXX_
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
#define _FILEMGR_HXX_
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
/*
|
||||||
|
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||||
|
* And Contributors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. All modifications to the source code must be clearly marked as
|
||||||
|
* such. Binary redistributions based on modified source code
|
||||||
|
* must be clearly marked as modified versions in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
/* file manager class - read lines of files [filename] OR [filename.hz] */
|
||||||
|
#ifndef FILEMGR_HXX_
|
||||||
|
#define FILEMGR_HXX_
|
||||||
|
|
||||||
#include "hunzip.hxx"
|
#include "hunzip.hxx"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <string>
|
||||||
|
#include <fstream>
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED FileMgr
|
class FileMgr {
|
||||||
{
|
private:
|
||||||
protected:
|
FileMgr(const FileMgr&);
|
||||||
FILE * fin;
|
FileMgr& operator=(const FileMgr&);
|
||||||
Hunzip * hin;
|
|
||||||
char in[BUFSIZE + 50]; // input buffer
|
|
||||||
int fail(const char * err, const char * par);
|
|
||||||
int linenum;
|
|
||||||
|
|
||||||
public:
|
protected:
|
||||||
FileMgr(const char * filename, const char * key = NULL);
|
std::ifstream fin;
|
||||||
~FileMgr();
|
Hunzip* hin;
|
||||||
char * getline();
|
char in[BUFSIZE + 50]; // input buffer
|
||||||
int getlinenum();
|
int fail(const char* err, const char* par);
|
||||||
|
int linenum;
|
||||||
|
|
||||||
|
public:
|
||||||
|
FileMgr(const char* filename, const char* key = NULL);
|
||||||
|
~FileMgr();
|
||||||
|
bool getline(std::string&);
|
||||||
|
int getlinenum();
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,69 +1,145 @@
|
||||||
#ifndef _HASHMGR_HXX_
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#define _HASHMGR_HXX_
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
/*
|
||||||
|
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||||
|
* And Contributors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. All modifications to the source code must be clearly marked as
|
||||||
|
* such. Binary redistributions based on modified source code
|
||||||
|
* must be clearly marked as modified versions in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
#ifndef HASHMGR_HXX_
|
||||||
|
#define HASHMGR_HXX_
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "htypes.hxx"
|
#include "htypes.hxx"
|
||||||
#include "filemgr.hxx"
|
#include "filemgr.hxx"
|
||||||
|
#include "w_char.hxx"
|
||||||
|
|
||||||
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
|
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED HashMgr
|
class HashMgr {
|
||||||
{
|
int tablesize;
|
||||||
int tablesize;
|
struct hentry** tableptr;
|
||||||
struct hentry ** tableptr;
|
flag flag_mode;
|
||||||
int userword;
|
int complexprefixes;
|
||||||
flag flag_mode;
|
int utf8;
|
||||||
int complexprefixes;
|
unsigned short forbiddenword;
|
||||||
int utf8;
|
int langnum;
|
||||||
unsigned short forbiddenword;
|
std::string enc;
|
||||||
int langnum;
|
std::string lang;
|
||||||
char * enc;
|
struct cs_info* csconv;
|
||||||
char * lang;
|
std::string ignorechars;
|
||||||
struct cs_info * csconv;
|
std::vector<w_char> ignorechars_utf16;
|
||||||
char * ignorechars;
|
int numaliasf; // flag vector `compression' with aliases
|
||||||
unsigned short * ignorechars_utf16;
|
unsigned short** aliasf;
|
||||||
int ignorechars_utf16_len;
|
unsigned short* aliasflen;
|
||||||
int numaliasf; // flag vector `compression' with aliases
|
int numaliasm; // morphological desciption `compression' with aliases
|
||||||
unsigned short ** aliasf;
|
char** aliasm;
|
||||||
unsigned short * aliasflen;
|
|
||||||
int numaliasm; // morphological desciption `compression' with aliases
|
|
||||||
char ** aliasm;
|
|
||||||
|
|
||||||
|
public:
|
||||||
public:
|
HashMgr(const char* tpath, const char* apath, const char* key = NULL);
|
||||||
HashMgr(const char * tpath, const char * apath, const char * key = NULL);
|
|
||||||
~HashMgr();
|
~HashMgr();
|
||||||
|
|
||||||
struct hentry * lookup(const char *) const;
|
struct hentry* lookup(const char*) const;
|
||||||
int hash(const char *) const;
|
int hash(const char*) const;
|
||||||
struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
|
struct hentry* walk_hashtable(int& col, struct hentry* hp) const;
|
||||||
|
|
||||||
int add(const char * word);
|
int add(const std::string& word);
|
||||||
int add_with_affix(const char * word, const char * pattern);
|
int add_with_affix(const std::string& word, const std::string& pattern);
|
||||||
int remove(const char * word);
|
int remove(const std::string& word);
|
||||||
int decode_flags(unsigned short ** result, char * flags, FileMgr * af);
|
int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const;
|
||||||
unsigned short decode_flag(const char * flag);
|
bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const;
|
||||||
char * encode_flag(unsigned short flag);
|
unsigned short decode_flag(const char* flag) const;
|
||||||
int is_aliasf();
|
char* encode_flag(unsigned short flag) const;
|
||||||
int get_aliasf(int index, unsigned short ** fvec, FileMgr * af);
|
int is_aliasf() const;
|
||||||
int is_aliasm();
|
int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const;
|
||||||
char * get_aliasm(int index);
|
int is_aliasm() const;
|
||||||
|
char* get_aliasm(int index) const;
|
||||||
private:
|
|
||||||
int get_clen_and_captype(const char * word, int wbl, int * captype);
|
|
||||||
int load_tables(const char * tpath, const char * key);
|
|
||||||
int add_word(const char * word, int wbl, int wcl, unsigned short * ap,
|
|
||||||
int al, const char * desc, bool onlyupcase);
|
|
||||||
int load_config(const char * affpath, const char * key);
|
|
||||||
int parse_aliasf(char * line, FileMgr * af);
|
|
||||||
int add_hidden_capitalized_word(char * word, int wbl, int wcl,
|
|
||||||
unsigned short * flags, int al, char * dp, int captype);
|
|
||||||
int parse_aliasm(char * line, FileMgr * af);
|
|
||||||
int remove_forbidden_flag(const char * word);
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
int get_clen_and_captype(const std::string& word, int* captype);
|
||||||
|
int get_clen_and_captype(const std::string& word, int* captype, std::vector<w_char> &workbuf);
|
||||||
|
int load_tables(const char* tpath, const char* key);
|
||||||
|
int add_word(const std::string& word,
|
||||||
|
int wcl,
|
||||||
|
unsigned short* ap,
|
||||||
|
int al,
|
||||||
|
const std::string* desc,
|
||||||
|
bool onlyupcase);
|
||||||
|
int load_config(const char* affpath, const char* key);
|
||||||
|
bool parse_aliasf(const std::string& line, FileMgr* af);
|
||||||
|
int add_hidden_capitalized_word(const std::string& word,
|
||||||
|
int wcl,
|
||||||
|
unsigned short* flags,
|
||||||
|
int al,
|
||||||
|
const std::string* dp,
|
||||||
|
int captype);
|
||||||
|
bool parse_aliasm(const std::string& line, FileMgr* af);
|
||||||
|
int remove_forbidden_flag(const std::string& word);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,15 +1,52 @@
|
||||||
#ifndef _HTYPES_HXX_
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#define _HTYPES_HXX_
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
#define ROTATE_LEN 5
|
#ifndef HTYPES_HXX_
|
||||||
|
#define HTYPES_HXX_
|
||||||
|
|
||||||
#define ROTATE(v,q) \
|
#define ROTATE_LEN 5
|
||||||
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
|
|
||||||
|
#define ROTATE(v, q) \
|
||||||
|
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q)) - 1));
|
||||||
|
|
||||||
// hentry options
|
// hentry options
|
||||||
#define H_OPT (1 << 0)
|
#define H_OPT (1 << 0)
|
||||||
#define H_OPT_ALIASM (1 << 1)
|
#define H_OPT_ALIASM (1 << 1)
|
||||||
#define H_OPT_PHON (1 << 2)
|
#define H_OPT_PHON (1 << 2)
|
||||||
|
|
||||||
// see also csutil.hxx
|
// see also csutil.hxx
|
||||||
#define HENTRY_WORD(h) &(h->word[0])
|
#define HENTRY_WORD(h) &(h->word[0])
|
||||||
|
@ -17,16 +54,15 @@
|
||||||
// approx. number of user defined words
|
// approx. number of user defined words
|
||||||
#define USERWORD 1000
|
#define USERWORD 1000
|
||||||
|
|
||||||
struct hentry
|
struct hentry {
|
||||||
{
|
unsigned char blen; // word length in bytes
|
||||||
unsigned char blen; // word length in bytes
|
unsigned char clen; // word length in characters (different for UTF-8 enc.)
|
||||||
unsigned char clen; // word length in characters (different for UTF-8 enc.)
|
short alen; // length of affix flag vector
|
||||||
short alen; // length of affix flag vector
|
unsigned short* astr; // affix flag vector
|
||||||
unsigned short * astr; // affix flag vector
|
struct hentry* next; // next word with same hash code
|
||||||
struct hentry * next; // next word with same hash code
|
struct hentry* next_homonym; // next homonym word (with same hash code)
|
||||||
struct hentry * next_homonym; // next homonym word (with same hash code)
|
char var; // variable fields (only for special pronounciation yet)
|
||||||
char var; // variable fields (only for special pronounciation yet)
|
char word[1]; // variable-length word (8-bit or UTF-8 encoding)
|
||||||
char word[1]; // variable-length word (8-bit or UTF-8 encoding)
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,45 @@
|
||||||
#ifndef _MYSPELLMGR_H_
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#define _MYSPELLMGR_H_
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* The Original Code is Hunspell, based on MySpell.
|
||||||
|
*
|
||||||
|
* The Initial Developers of the Original Code are
|
||||||
|
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||||
|
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||||
|
* the Initial Developers. All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#ifndef MYSPELLMGR_H_
|
||||||
|
#define MYSPELLMGR_H_
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
#include "hunvisapi.h"
|
||||||
|
|
||||||
|
@ -9,19 +49,26 @@ extern "C" {
|
||||||
|
|
||||||
typedef struct Hunhandle Hunhandle;
|
typedef struct Hunhandle Hunhandle;
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
|
LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create(const char* affpath,
|
||||||
|
const char* dpath);
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
|
LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create_key(const char* affpath,
|
||||||
const char * key);
|
const char* dpath,
|
||||||
|
const char* key);
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle *pHunspell);
|
LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle* pHunspell);
|
||||||
|
|
||||||
|
/* load extra dictionaries (only dic files)
|
||||||
|
* output: 0 = additional dictionary slots available, 1 = slots are now full*/
|
||||||
|
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_dic(Hunhandle* pHunspell,
|
||||||
|
const char* dpath);
|
||||||
|
|
||||||
/* spell(word) - spellcheck word
|
/* spell(word) - spellcheck word
|
||||||
* output: 0 = bad word, not 0 = good word
|
* output: 0 = bad word, not 0 = good word
|
||||||
*/
|
*/
|
||||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle *pHunspell, const char *);
|
LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle* pHunspell, const char*);
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
|
LIBHUNSPELL_DLL_EXPORTED char* Hunspell_get_dic_encoding(Hunhandle* pHunspell);
|
||||||
|
|
||||||
/* suggest(suggestions, word) - search suggestions
|
/* suggest(suggestions, word) - search suggestions
|
||||||
* input: pointer to an array of strings pointer and the (bad) word
|
* input: pointer to an array of strings pointer and the (bad) word
|
||||||
|
@ -30,63 +77,83 @@ LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
|
||||||
* a newly allocated array of strings (*slts will be NULL when number
|
* a newly allocated array of strings (*slts will be NULL when number
|
||||||
* of suggestion equals 0.)
|
* of suggestion equals 0.)
|
||||||
*/
|
*/
|
||||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
|
LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle* pHunspell,
|
||||||
|
char*** slst,
|
||||||
|
const char* word);
|
||||||
|
|
||||||
/* morphological functions */
|
/* morphological functions */
|
||||||
|
|
||||||
/* analyze(result, word) - morphological analysis of the word */
|
/* analyze(result, word) - morphological analysis of the word */
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
|
LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle* pHunspell,
|
||||||
|
char*** slst,
|
||||||
|
const char* word);
|
||||||
|
|
||||||
/* stem(result, word) - stemmer function */
|
/* stem(result, word) - stemmer function */
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
|
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle* pHunspell,
|
||||||
|
char*** slst,
|
||||||
|
const char* word);
|
||||||
|
|
||||||
/* stem(result, analysis, n) - get stems from a morph. analysis
|
/* stem(result, analysis, n) - get stems from a morph. analysis
|
||||||
* example:
|
* example:
|
||||||
* char ** result, result2;
|
* char ** result, result2;
|
||||||
* int n1 = Hunspell_analyze(result, "words");
|
* int n1 = Hunspell_analyze(result, "words");
|
||||||
* int n2 = Hunspell_stem2(result2, result, n1);
|
* int n2 = Hunspell_stem2(result2, result, n1);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
|
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle* pHunspell,
|
||||||
|
char*** slst,
|
||||||
|
char** desc,
|
||||||
|
int n);
|
||||||
|
|
||||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
/* generate(result, word, word2) - morphological generation by example(s) */
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
|
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle* pHunspell,
|
||||||
const char * word2);
|
char*** slst,
|
||||||
|
const char* word,
|
||||||
|
const char* word2);
|
||||||
|
|
||||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
/* generate(result, word, desc, n) - generation by morph. description(s)
|
||||||
* example:
|
* example:
|
||||||
* char ** result;
|
* char ** result;
|
||||||
* char * affix = "is:plural"; // description depends from dictionaries, too
|
* char * affix = "is:plural"; // description depends from dictionaries, too
|
||||||
* int n = Hunspell_generate2(result, "word", &affix, 1);
|
* int n = Hunspell_generate2(result, "word", &affix, 1);
|
||||||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
|
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle* pHunspell,
|
||||||
char** desc, int n);
|
char*** slst,
|
||||||
|
const char* word,
|
||||||
|
char** desc,
|
||||||
|
int n);
|
||||||
|
|
||||||
/* functions for run-time modification of the dictionary */
|
/* functions for run-time modification of the dictionary */
|
||||||
|
|
||||||
/* add word to the run-time dictionary */
|
/* add word to the run-time dictionary */
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle *pHunspell, const char * word);
|
|
||||||
|
|
||||||
/* add word to the run-time dictionary with affix flags of
|
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle* pHunspell,
|
||||||
* the example (a dictionary word): Hunspell will recognize
|
const char* word);
|
||||||
* affixed forms of the new word, too.
|
|
||||||
*/
|
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
|
|
||||||
|
|
||||||
/* remove word from the run-time dictionary */
|
/* add word to the run-time dictionary with affix flags of
|
||||||
|
* the example (a dictionary word): Hunspell will recognize
|
||||||
|
* affixed forms of the new word, too.
|
||||||
|
*/
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle *pHunspell, const char * word);
|
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle* pHunspell,
|
||||||
|
const char* word,
|
||||||
|
const char* example);
|
||||||
|
|
||||||
/* free suggestion lists */
|
/* remove word from the run-time dictionary */
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n);
|
LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle* pHunspell,
|
||||||
|
const char* word);
|
||||||
|
|
||||||
|
/* free suggestion lists */
|
||||||
|
|
||||||
|
LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle* pHunspell,
|
||||||
|
char*** slst,
|
||||||
|
int n);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,59 +1,134 @@
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
/*
|
||||||
|
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||||
|
* And Contributors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. All modifications to the source code must be clearly marked as
|
||||||
|
* such. Binary redistributions based on modified source code
|
||||||
|
* must be clearly marked as modified versions in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
#ifndef MYSPELLMGR_HXX_
|
||||||
|
#define MYSPELLMGR_HXX_
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
#include "hunvisapi.h"
|
||||||
|
#include "w_char.hxx"
|
||||||
|
#include "atypes.hxx"
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include "hashmgr.hxx"
|
#define SPELL_XML "<?xml?>"
|
||||||
#include "affixmgr.hxx"
|
|
||||||
#include "suggestmgr.hxx"
|
|
||||||
#include "langnum.hxx"
|
|
||||||
|
|
||||||
#define SPELL_XML "<?xml?>"
|
|
||||||
|
|
||||||
#define MAXDIC 20
|
|
||||||
#define MAXSUGGESTION 15
|
#define MAXSUGGESTION 15
|
||||||
#define MAXSHARPS 5
|
#define MAXSHARPS 5
|
||||||
|
|
||||||
#define HUNSPELL_OK (1 << 0)
|
#ifndef MAXWORDLEN
|
||||||
#define HUNSPELL_OK_WARN (1 << 1)
|
#define MAXWORDLEN 100
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifndef _MYSPELLMGR_HXX_
|
#if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
|
||||||
#define _MYSPELLMGR_HXX_
|
# define H_DEPRECATED __attribute__((__deprecated__))
|
||||||
|
#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
|
||||||
|
# define H_DEPRECATED __declspec(deprecated)
|
||||||
|
#else
|
||||||
|
# define H_DEPRECATED
|
||||||
|
#endif
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED Hunspell
|
class HunspellImpl;
|
||||||
{
|
|
||||||
AffixMgr* pAMgr;
|
|
||||||
HashMgr* pHMgr[MAXDIC];
|
|
||||||
int maxdic;
|
|
||||||
SuggestMgr* pSMgr;
|
|
||||||
char * affixpath;
|
|
||||||
char * encoding;
|
|
||||||
struct cs_info * csconv;
|
|
||||||
int langnum;
|
|
||||||
int utf8;
|
|
||||||
int complexprefixes;
|
|
||||||
char** wordbreak;
|
|
||||||
|
|
||||||
public:
|
class LIBHUNSPELL_DLL_EXPORTED Hunspell {
|
||||||
|
private:
|
||||||
|
Hunspell(const Hunspell&);
|
||||||
|
Hunspell& operator=(const Hunspell&);
|
||||||
|
|
||||||
|
private:
|
||||||
|
HunspellImpl* m_Impl;
|
||||||
|
|
||||||
|
public:
|
||||||
/* Hunspell(aff, dic) - constructor of Hunspell class
|
/* Hunspell(aff, dic) - constructor of Hunspell class
|
||||||
* input: path of affix file and dictionary file
|
* input: path of affix file and dictionary file
|
||||||
|
*
|
||||||
|
* In WIN32 environment, use UTF-8 encoded paths started with the long path
|
||||||
|
* prefix \\\\?\\ to handle system-independent character encoding and very
|
||||||
|
* long path names (without the long path prefix Hunspell will use fopen()
|
||||||
|
* with system-dependent character encoding instead of _wfopen()).
|
||||||
*/
|
*/
|
||||||
|
Hunspell(const char* affpath, const char* dpath, const char* key = NULL);
|
||||||
Hunspell(const char * affpath, const char * dpath, const char * key = NULL);
|
|
||||||
~Hunspell();
|
~Hunspell();
|
||||||
|
|
||||||
/* load extra dictionaries (only dic files) */
|
/* load extra dictionaries (only dic files) */
|
||||||
int add_dic(const char * dpath, const char * key = NULL);
|
int add_dic(const char* dpath, const char* key = NULL);
|
||||||
|
|
||||||
/* spell(word) - spellcheck word
|
/* spell(word) - spellcheck word
|
||||||
* output: 0 = bad word, not 0 = good word
|
* output: false = bad word, true = good word
|
||||||
*
|
*
|
||||||
* plus output:
|
* plus output:
|
||||||
* info: information bit array, fields:
|
* info: information bit array, fields:
|
||||||
* SPELL_COMPOUND = a compound word
|
* SPELL_COMPOUND = a compound word
|
||||||
* SPELL_FORBIDDEN = an explicit forbidden word
|
* SPELL_FORBIDDEN = an explicit forbidden word
|
||||||
* root: root (stem), when input is a word with affix(es)
|
* root: root (stem), when input is a word with affix(es)
|
||||||
*/
|
*/
|
||||||
|
bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
|
||||||
int spell(const char * word, int * info = NULL, char ** root = NULL);
|
H_DEPRECATED int spell(const char* word, int* info = NULL, char** root = NULL);
|
||||||
|
|
||||||
/* suggest(suggestions, word) - search suggestions
|
/* suggest(suggestions, word) - search suggestions
|
||||||
* input: pointer to an array of strings pointer and the (bad) word
|
* input: pointer to an array of strings pointer and the (bad) word
|
||||||
|
@ -62,111 +137,93 @@ public:
|
||||||
* a newly allocated array of strings (*slts will be NULL when number
|
* a newly allocated array of strings (*slts will be NULL when number
|
||||||
* of suggestion equals 0.)
|
* of suggestion equals 0.)
|
||||||
*/
|
*/
|
||||||
|
std::vector<std::string> suggest(const std::string& word);
|
||||||
|
H_DEPRECATED int suggest(char*** slst, const char* word);
|
||||||
|
|
||||||
int suggest(char*** slst, const char * word);
|
/* Suggest words from suffix rules
|
||||||
|
* suffix_suggest(suggestions, root_word)
|
||||||
|
* input: pointer to an array of strings pointer and the word
|
||||||
|
* array of strings pointer (here *slst) may not be initialized
|
||||||
|
* output: number of suggestions in string array, and suggestions in
|
||||||
|
* a newly allocated array of strings (*slts will be NULL when number
|
||||||
|
* of suggestion equals 0.)
|
||||||
|
*/
|
||||||
|
std::vector<std::string> suffix_suggest(const std::string& root_word);
|
||||||
|
H_DEPRECATED int suffix_suggest(char*** slst, const char* root_word);
|
||||||
|
|
||||||
/* deallocate suggestion lists */
|
/* deallocate suggestion lists */
|
||||||
|
H_DEPRECATED void free_list(char*** slst, int n);
|
||||||
|
|
||||||
void free_list(char *** slst, int n);
|
const std::string& get_dict_encoding() const;
|
||||||
|
char* get_dic_encoding();
|
||||||
|
|
||||||
char * get_dic_encoding();
|
/* morphological functions */
|
||||||
|
|
||||||
/* morphological functions */
|
/* analyze(result, word) - morphological analysis of the word */
|
||||||
|
std::vector<std::string> analyze(const std::string& word);
|
||||||
|
H_DEPRECATED int analyze(char*** slst, const char* word);
|
||||||
|
|
||||||
/* analyze(result, word) - morphological analysis of the word */
|
/* stem(word) - stemmer function */
|
||||||
|
std::vector<std::string> stem(const std::string& word);
|
||||||
int analyze(char*** slst, const char * word);
|
H_DEPRECATED int stem(char*** slst, const char* word);
|
||||||
|
|
||||||
/* stem(result, word) - stemmer function */
|
/* stem(analysis, n) - get stems from a morph. analysis
|
||||||
|
* example:
|
||||||
int stem(char*** slst, const char * word);
|
* char ** result, result2;
|
||||||
|
* int n1 = analyze(&result, "words");
|
||||||
/* stem(result, analysis, n) - get stems from a morph. analysis
|
* int n2 = stem(&result2, result, n1);
|
||||||
* example:
|
*/
|
||||||
* char ** result, result2;
|
std::vector<std::string> stem(const std::vector<std::string>& morph);
|
||||||
* int n1 = analyze(&result, "words");
|
H_DEPRECATED int stem(char*** slst, char** morph, int n);
|
||||||
* int n2 = stem(&result2, result, n1);
|
|
||||||
*/
|
|
||||||
|
|
||||||
int stem(char*** slst, char ** morph, int n);
|
|
||||||
|
|
||||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
/* generate(result, word, word2) - morphological generation by example(s) */
|
||||||
|
std::vector<std::string> generate(const std::string& word, const std::string& word2);
|
||||||
|
H_DEPRECATED int generate(char*** slst, const char* word, const char* word2);
|
||||||
|
|
||||||
int generate(char*** slst, const char * word, const char * word2);
|
/* generate(result, word, desc, n) - generation by morph. description(s)
|
||||||
|
* example:
|
||||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
* char ** result;
|
||||||
* example:
|
* char * affix = "is:plural"; // description depends from dictionaries, too
|
||||||
* char ** result;
|
* int n = generate(&result, "word", &affix, 1);
|
||||||
* char * affix = "is:plural"; // description depends from dictionaries, too
|
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
||||||
* int n = generate(&result, "word", &affix, 1);
|
*/
|
||||||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
|
||||||
*/
|
H_DEPRECATED int generate(char*** slst, const char* word, char** desc, int n);
|
||||||
|
|
||||||
int generate(char*** slst, const char * word, char ** desc, int n);
|
|
||||||
|
|
||||||
/* functions for run-time modification of the dictionary */
|
/* functions for run-time modification of the dictionary */
|
||||||
|
|
||||||
/* add word to the run-time dictionary */
|
/* add word to the run-time dictionary */
|
||||||
|
|
||||||
int add(const char * word);
|
int add(const std::string& word);
|
||||||
|
|
||||||
/* add word to the run-time dictionary with affix flags of
|
/* add word to the run-time dictionary with affix flags of
|
||||||
* the example (a dictionary word): Hunspell will recognize
|
* the example (a dictionary word): Hunspell will recognize
|
||||||
* affixed forms of the new word, too.
|
* affixed forms of the new word, too.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int add_with_affix(const char * word, const char * example);
|
int add_with_affix(const std::string& word, const std::string& example);
|
||||||
|
|
||||||
/* remove word from the run-time dictionary */
|
/* remove word from the run-time dictionary */
|
||||||
|
|
||||||
int remove(const char * word);
|
int remove(const std::string& word);
|
||||||
|
|
||||||
/* other */
|
/* other */
|
||||||
|
|
||||||
/* get extra word characters definied in affix file for tokenization */
|
/* get extra word characters definied in affix file for tokenization */
|
||||||
const char * get_wordchars();
|
const char* get_wordchars() const;
|
||||||
unsigned short * get_wordchars_utf16(int * len);
|
const std::string& get_wordchars_cpp() const;
|
||||||
|
const std::vector<w_char>& get_wordchars_utf16() const;
|
||||||
|
|
||||||
struct cs_info * get_csconv();
|
struct cs_info* get_csconv();
|
||||||
const char * get_version();
|
|
||||||
|
const char* get_version() const;
|
||||||
|
const std::string& get_version_cpp() const;
|
||||||
|
|
||||||
int get_langnum() const;
|
int get_langnum() const;
|
||||||
|
|
||||||
/* experimental and deprecated functions */
|
|
||||||
|
|
||||||
#ifdef HUNSPELL_EXPERIMENTAL
|
|
||||||
/* suffix is an affix flag string, similarly in dictionary files */
|
|
||||||
int put_word_suffix(const char * word, const char * suffix);
|
|
||||||
char * morph_with_correction(const char * word);
|
|
||||||
|
|
||||||
/* spec. suggestions */
|
|
||||||
int suggest_auto(char*** slst, const char * word);
|
|
||||||
int suggest_pos_stems(char*** slst, const char * word);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
private:
|
|
||||||
int cleanword(char *, const char *, int * pcaptype, int * pabbrev);
|
|
||||||
int cleanword2(char *, const char *, w_char *, int * w_len, int * pcaptype, int * pabbrev);
|
|
||||||
void mkinitcap(char *);
|
|
||||||
int mkinitcap2(char * p, w_char * u, int nc);
|
|
||||||
int mkinitsmall2(char * p, w_char * u, int nc);
|
|
||||||
void mkallcap(char *);
|
|
||||||
int mkallcap2(char * p, w_char * u, int nc);
|
|
||||||
void mkallsmall(char *);
|
|
||||||
int mkallsmall2(char * p, w_char * u, int nc);
|
|
||||||
struct hentry * checkword(const char *, int * info, char **root);
|
|
||||||
char * sharps_u8_l1(char * dest, char * source);
|
|
||||||
hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root);
|
|
||||||
int is_keepcase(const hentry * rv);
|
|
||||||
int insert_sug(char ***slst, char * word, int ns);
|
|
||||||
void cat_result(char * result, char * st);
|
|
||||||
char * stem_description(const char * desc);
|
|
||||||
int spellml(char*** slst, const char * word);
|
|
||||||
int get_xml_par(char * dest, const char * par, int maxl);
|
|
||||||
const char * get_xml_pos(const char * s, const char * attr);
|
|
||||||
int get_xml_list(char ***slst, char * list, const char * tag);
|
|
||||||
int check_xml_par(const char * q, const char * attr, const char * value);
|
|
||||||
|
|
||||||
|
/* need for putdic */
|
||||||
|
bool input_conv(const std::string& word, std::string& dest);
|
||||||
|
H_DEPRECATED int input_conv(const char* word, char* dest, size_t destsize);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
#ifndef _HUNSPELL_VISIBILITY_H_
|
#ifndef HUNSPELL_VISIBILITY_H_
|
||||||
#define _HUNSPELL_VISIBILITY_H_
|
#define HUNSPELL_VISIBILITY_H_
|
||||||
|
|
||||||
#if defined(HUNSPELL_STATIC)
|
#if defined(HUNSPELL_STATIC)
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED
|
# define LIBHUNSPELL_DLL_EXPORTED
|
||||||
|
@ -9,7 +9,7 @@
|
||||||
# else
|
# else
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
|
||||||
# endif
|
# endif
|
||||||
#elif BUILDING_LIBHUNSPELL && 1
|
#elif defined(BUILDING_LIBHUNSPELL) && 1
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
|
||||||
#else
|
#else
|
||||||
# define LIBHUNSPELL_DLL_EXPORTED
|
# define LIBHUNSPELL_DLL_EXPORTED
|
||||||
|
|
|
@ -1,45 +1,87 @@
|
||||||
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
/* hunzip: file decompression for sorted dictionaries with optional encryption,
|
/* hunzip: file decompression for sorted dictionaries with optional encryption,
|
||||||
* algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
|
* algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
|
||||||
|
|
||||||
#ifndef _HUNZIP_HXX_
|
#ifndef HUNZIP_HXX_
|
||||||
#define _HUNZIP_HXX_
|
#define HUNZIP_HXX_
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
#include "hunvisapi.h"
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#define BUFSIZE 65536
|
#define BUFSIZE 65536
|
||||||
#define HZIP_EXTENSION ".hz"
|
#define HZIP_EXTENSION ".hz"
|
||||||
|
|
||||||
#define MSG_OPEN "error: %s: cannot open\n"
|
#define MSG_OPEN "error: %s: cannot open\n"
|
||||||
#define MSG_FORMAT "error: %s: not in hzip format\n"
|
#define MSG_FORMAT "error: %s: not in hzip format\n"
|
||||||
#define MSG_MEMORY "error: %s: missing memory\n"
|
#define MSG_MEMORY "error: %s: missing memory\n"
|
||||||
#define MSG_KEY "error: %s: missing or bad password\n"
|
#define MSG_KEY "error: %s: missing or bad password\n"
|
||||||
|
|
||||||
struct bit {
|
struct bit {
|
||||||
unsigned char c[2];
|
unsigned char c[2];
|
||||||
int v[2];
|
int v[2];
|
||||||
};
|
};
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED Hunzip
|
class LIBHUNSPELL_DLL_EXPORTED Hunzip {
|
||||||
{
|
private:
|
||||||
|
Hunzip(const Hunzip&);
|
||||||
|
Hunzip& operator=(const Hunzip&);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
char * filename;
|
char* filename;
|
||||||
FILE * fin;
|
std::ifstream fin;
|
||||||
int bufsiz, lastbit, inc, inbits, outc;
|
int bufsiz, lastbit, inc, inbits, outc;
|
||||||
struct bit * dec; // code table
|
std::vector<bit> dec; // code table
|
||||||
char in[BUFSIZE]; // input buffer
|
char in[BUFSIZE]; // input buffer
|
||||||
char out[BUFSIZE + 1]; // Huffman-decoded buffer
|
char out[BUFSIZE + 1]; // Huffman-decoded buffer
|
||||||
char line[BUFSIZE + 50]; // decoded line
|
char line[BUFSIZE + 50]; // decoded line
|
||||||
int getcode(const char * key);
|
int getcode(const char* key);
|
||||||
int getbuf();
|
int getbuf();
|
||||||
int fail(const char * err, const char * par);
|
int fail(const char* err, const char* par);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Hunzip(const char * filename, const char * key = NULL);
|
Hunzip(const char* filename, const char* key = NULL);
|
||||||
~Hunzip();
|
~Hunzip();
|
||||||
const char * getline();
|
bool is_open() { return fin.is_open(); }
|
||||||
|
bool getline(std::string& dest);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,38 +1,75 @@
|
||||||
#ifndef _LANGNUM_HXX_
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#define _LANGNUM_HXX_
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#ifndef LANGNUM_HXX_
|
||||||
|
#define LANGNUM_HXX_
|
||||||
|
|
||||||
/*
|
/*
|
||||||
language numbers for language specific codes
|
language numbers for language specific codes
|
||||||
see http://l10n.openoffice.org/languages.html
|
see https://wiki.openoffice.org/w/index.php?title=Languages&oldid=230199
|
||||||
*/
|
*/
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
LANG_ar=96,
|
LANG_ar = 96,
|
||||||
LANG_az=100, // custom number
|
LANG_az = 100, // custom number
|
||||||
LANG_bg=41,
|
LANG_bg = 41,
|
||||||
LANG_ca=37,
|
LANG_ca = 37,
|
||||||
LANG_cs=42,
|
LANG_cs = 42,
|
||||||
LANG_da=45,
|
LANG_da = 45,
|
||||||
LANG_de=49,
|
LANG_de = 49,
|
||||||
LANG_el=30,
|
LANG_el = 30,
|
||||||
LANG_en=01,
|
LANG_en = 01,
|
||||||
LANG_es=34,
|
LANG_es = 34,
|
||||||
LANG_eu=10,
|
LANG_eu = 10,
|
||||||
LANG_fr=02,
|
LANG_fr = 02,
|
||||||
LANG_gl=38,
|
LANG_gl = 38,
|
||||||
LANG_hr=78,
|
LANG_hr = 78,
|
||||||
LANG_hu=36,
|
LANG_hu = 36,
|
||||||
LANG_it=39,
|
LANG_it = 39,
|
||||||
LANG_la=99, // custom number
|
LANG_la = 99, // custom number
|
||||||
LANG_lv=101, // custom number
|
LANG_lv = 101, // custom number
|
||||||
LANG_nl=31,
|
LANG_nl = 31,
|
||||||
LANG_pl=48,
|
LANG_pl = 48,
|
||||||
LANG_pt=03,
|
LANG_pt = 03,
|
||||||
LANG_ru=07,
|
LANG_ru = 07,
|
||||||
LANG_sv=50,
|
LANG_sv = 50,
|
||||||
LANG_tr=90,
|
LANG_tr = 90,
|
||||||
LANG_uk=80,
|
LANG_uk = 80,
|
||||||
LANG_xx=999
|
LANG_xx = 999
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -4,12 +4,12 @@
|
||||||
This library is free software; you can redistribute it and/or
|
This library is free software; you can redistribute it and/or
|
||||||
modify it under the terms of the GNU Lesser General Public
|
modify it under the terms of the GNU Lesser General Public
|
||||||
License version 2.1 as published by the Free Software Foundation;
|
License version 2.1 as published by the Free Software Foundation;
|
||||||
|
|
||||||
This library is distributed in the hope that it will be useful,
|
This library is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
Lesser General Public License for more details.
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
You should have received a copy of the GNU Lesser General Public
|
You should have received a copy of the GNU Lesser General Public
|
||||||
License along with this library; If not, see
|
License along with this library; If not, see
|
||||||
<http://www.gnu.org/licenses/>.
|
<http://www.gnu.org/licenses/>.
|
||||||
|
@ -21,32 +21,30 @@
|
||||||
transformations out of c't 25/1999
|
transformations out of c't 25/1999
|
||||||
|
|
||||||
2007-07-26 Bjoern Jacke <bjoern at j3e.de>
|
2007-07-26 Bjoern Jacke <bjoern at j3e.de>
|
||||||
Released under MPL/GPL/LGPL tri-license for Hunspell
|
Released under MPL/GPL/LGPL tri-license for Hunspell
|
||||||
|
|
||||||
2007-08-23 Laszlo Nemeth <nemeth at OOo>
|
2007-08-23 Laszlo Nemeth <nemeth at OOo>
|
||||||
Porting from Aspell to Hunspell using C-like structs
|
Porting from Aspell to Hunspell using C-like structs
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef __PHONETHXX__
|
#ifndef PHONET_HXX_
|
||||||
#define __PHONETHXX__
|
#define PHONET_HXX_
|
||||||
|
|
||||||
#define HASHSIZE 256
|
#define HASHSIZE 256
|
||||||
#define MAXPHONETLEN 256
|
#define MAXPHONETLEN 256
|
||||||
#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
|
#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
#include "hunvisapi.h"
|
||||||
|
|
||||||
struct phonetable {
|
struct phonetable {
|
||||||
char utf8;
|
char utf8;
|
||||||
cs_info * lang;
|
std::vector<std::string> rules;
|
||||||
int num;
|
|
||||||
char * * rules;
|
|
||||||
int hash[HASHSIZE];
|
int hash[HASHSIZE];
|
||||||
};
|
};
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable & parms);
|
LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable& parms);
|
||||||
|
|
||||||
LIBHUNSPELL_DLL_EXPORTED int phonet (const char * inword, char * target,
|
LIBHUNSPELL_DLL_EXPORTED std::string phonet(const std::string& inword,
|
||||||
int len, phonetable & phone);
|
phonetable& phone);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,27 +1,100 @@
|
||||||
/* string replacement list class */
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#ifndef _REPLIST_HXX_
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
#define _REPLIST_HXX_
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
/*
|
||||||
|
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||||
|
* And Contributors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. All modifications to the source code must be clearly marked as
|
||||||
|
* such. Binary redistributions based on modified source code
|
||||||
|
* must be clearly marked as modified versions in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
/* string replacement list class */
|
||||||
|
#ifndef REPLIST_HXX_
|
||||||
|
#define REPLIST_HXX_
|
||||||
|
|
||||||
#include "w_char.hxx"
|
#include "w_char.hxx"
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED RepList
|
#include <string>
|
||||||
{
|
#include <vector>
|
||||||
protected:
|
|
||||||
replentry ** dat;
|
|
||||||
int size;
|
|
||||||
int pos;
|
|
||||||
|
|
||||||
public:
|
class RepList {
|
||||||
RepList(int n);
|
private:
|
||||||
~RepList();
|
RepList(const RepList&);
|
||||||
|
RepList& operator=(const RepList&);
|
||||||
|
|
||||||
int get_pos();
|
protected:
|
||||||
int add(char * pat1, char * pat2);
|
replentry** dat;
|
||||||
replentry * item(int n);
|
int size;
|
||||||
int near(const char * word);
|
int pos;
|
||||||
int match(const char * word, int n);
|
|
||||||
int conv(const char * word, char * dest);
|
public:
|
||||||
|
explicit RepList(int n);
|
||||||
|
~RepList();
|
||||||
|
|
||||||
|
int add(const std::string& pat1, const std::string& pat2);
|
||||||
|
replentry* item(int n);
|
||||||
|
int find(const char* word);
|
||||||
|
std::string replace(const char* word, int n, bool atstart);
|
||||||
|
bool conv(const std::string& word, std::string& dest);
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,8 +1,76 @@
|
||||||
#ifndef _SUGGESTMGR_HXX_
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#define _SUGGESTMGR_HXX_
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
/*
|
||||||
|
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||||
|
* And Contributors. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. All modifications to the source code must be clearly marked as
|
||||||
|
* such. Binary redistributions based on modified source code
|
||||||
|
* must be clearly marked as modified versions in the documentation
|
||||||
|
* and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||||
|
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||||
|
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||||
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||||
|
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SUGGESTMGR_HXX_
|
||||||
|
#define SUGGESTMGR_HXX_
|
||||||
|
|
||||||
#define MAXSWL 100
|
|
||||||
#define MAXSWUTF8L (MAXSWL * 4)
|
|
||||||
#define MAX_ROOTS 100
|
#define MAX_ROOTS 100
|
||||||
#define MAX_WORDS 100
|
#define MAX_WORDS 100
|
||||||
#define MAX_GUESS 200
|
#define MAX_GUESS 200
|
||||||
|
@ -15,12 +83,10 @@
|
||||||
#define MINTIMER 100
|
#define MINTIMER 100
|
||||||
#define MAXPLUSTIMER 100
|
#define MAXPLUSTIMER 100
|
||||||
|
|
||||||
#define NGRAM_LONGER_WORSE (1 << 0)
|
#define NGRAM_LONGER_WORSE (1 << 0)
|
||||||
#define NGRAM_ANY_MISMATCH (1 << 1)
|
#define NGRAM_ANY_MISMATCH (1 << 1)
|
||||||
#define NGRAM_LOWERING (1 << 2)
|
#define NGRAM_LOWERING (1 << 2)
|
||||||
#define NGRAM_WEIGHTED (1 << 3)
|
#define NGRAM_WEIGHTED (1 << 3)
|
||||||
|
|
||||||
#include "hunvisapi.h"
|
|
||||||
|
|
||||||
#include "atypes.hxx"
|
#include "atypes.hxx"
|
||||||
#include "affixmgr.hxx"
|
#include "affixmgr.hxx"
|
||||||
|
@ -30,82 +96,93 @@
|
||||||
|
|
||||||
enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
|
enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
|
||||||
|
|
||||||
class LIBHUNSPELL_DLL_EXPORTED SuggestMgr
|
class SuggestMgr {
|
||||||
{
|
private:
|
||||||
char * ckey;
|
SuggestMgr(const SuggestMgr&);
|
||||||
int ckeyl;
|
SuggestMgr& operator=(const SuggestMgr&);
|
||||||
w_char * ckey_utf;
|
|
||||||
|
|
||||||
char * ctry;
|
private:
|
||||||
int ctryl;
|
char* ckey;
|
||||||
w_char * ctry_utf;
|
size_t ckeyl;
|
||||||
|
std::vector<w_char> ckey_utf;
|
||||||
|
|
||||||
AffixMgr* pAMgr;
|
char* ctry;
|
||||||
int maxSug;
|
size_t ctryl;
|
||||||
struct cs_info * csconv;
|
std::vector<w_char> ctry_utf;
|
||||||
int utf8;
|
|
||||||
int langnum;
|
|
||||||
int nosplitsugs;
|
|
||||||
int maxngramsugs;
|
|
||||||
int maxcpdsugs;
|
|
||||||
int complexprefixes;
|
|
||||||
|
|
||||||
|
AffixMgr* pAMgr;
|
||||||
|
unsigned int maxSug;
|
||||||
|
struct cs_info* csconv;
|
||||||
|
int utf8;
|
||||||
|
int langnum;
|
||||||
|
int nosplitsugs;
|
||||||
|
int maxngramsugs;
|
||||||
|
int maxcpdsugs;
|
||||||
|
int complexprefixes;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr);
|
SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr);
|
||||||
~SuggestMgr();
|
~SuggestMgr();
|
||||||
|
|
||||||
int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug);
|
void suggest(std::vector<std::string>& slst, const char* word, int* onlycmpdsug);
|
||||||
int ngsuggest(char ** wlst, char * word, int ns, HashMgr** pHMgr, int md);
|
void ngsuggest(std::vector<std::string>& slst, const char* word, const std::vector<HashMgr*>& rHMgr);
|
||||||
int suggest_auto(char*** slst, const char * word, int nsug);
|
|
||||||
int suggest_stems(char*** slst, const char * word, int nsug);
|
|
||||||
int suggest_pos_stems(char*** slst, const char * word, int nsug);
|
|
||||||
|
|
||||||
char * suggest_morph(const char * word);
|
std::string suggest_morph(const std::string& word);
|
||||||
char * suggest_gen(char ** pl, int pln, char * pattern);
|
std::string suggest_gen(const std::vector<std::string>& pl, const std::string& pattern);
|
||||||
char * suggest_morph_for_spelling_error(const char * word);
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest,
|
void testsug(std::vector<std::string>& wlst,
|
||||||
int * timer, clock_t * timelimit);
|
const std::string& candidate,
|
||||||
int checkword(const char *, int, int, int *, clock_t *);
|
int cpdsuggest,
|
||||||
int check_forbidden(const char *, int);
|
int* timer,
|
||||||
|
clock_t* timelimit);
|
||||||
|
int checkword(const std::string& word, int, int*, clock_t*);
|
||||||
|
int check_forbidden(const char*, int);
|
||||||
|
|
||||||
int capchars(char **, const char *, int, int);
|
void capchars(std::vector<std::string>&, const char*, int);
|
||||||
int replchars(char**, const char *, int, int);
|
int replchars(std::vector<std::string>&, const char*, int);
|
||||||
int doubletwochars(char**, const char *, int, int);
|
int doubletwochars(std::vector<std::string>&, const char*, int);
|
||||||
int forgotchar(char **, const char *, int, int);
|
int forgotchar(std::vector<std::string>&, const char*, int);
|
||||||
int swapchar(char **, const char *, int, int);
|
int swapchar(std::vector<std::string>&, const char*, int);
|
||||||
int longswapchar(char **, const char *, int, int);
|
int longswapchar(std::vector<std::string>&, const char*, int);
|
||||||
int movechar(char **, const char *, int, int);
|
int movechar(std::vector<std::string>&, const char*, int);
|
||||||
int extrachar(char **, const char *, int, int);
|
int extrachar(std::vector<std::string>&, const char*, int);
|
||||||
int badcharkey(char **, const char *, int, int);
|
int badcharkey(std::vector<std::string>&, const char*, int);
|
||||||
int badchar(char **, const char *, int, int);
|
int badchar(std::vector<std::string>&, const char*, int);
|
||||||
int twowords(char **, const char *, int, int);
|
int twowords(std::vector<std::string>&, const char*, int);
|
||||||
int fixstems(char **, const char *, int);
|
|
||||||
|
|
||||||
int capchars_utf(char **, const w_char *, int wl, int, int);
|
void capchars_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||||
int doubletwochars_utf(char**, const w_char *, int wl, int, int);
|
int doubletwochars_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||||
int forgotchar_utf(char**, const w_char *, int wl, int, int);
|
int forgotchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||||
int extrachar_utf(char**, const w_char *, int wl, int, int);
|
int extrachar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||||
int badcharkey_utf(char **, const w_char *, int wl, int, int);
|
int badcharkey_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||||
int badchar_utf(char **, const w_char *, int wl, int, int);
|
int badchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||||
int swapchar_utf(char **, const w_char *, int wl, int, int);
|
int swapchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||||
int longswapchar_utf(char **, const w_char *, int, int, int);
|
int longswapchar_utf(std::vector<std::string>&, const w_char*, int, int);
|
||||||
int movechar_utf(char **, const w_char *, int, int, int);
|
int movechar_utf(std::vector<std::string>&, const w_char*, int, int);
|
||||||
|
|
||||||
int mapchars(char**, const char *, int, int);
|
|
||||||
int map_related(const char *, char *, int, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *);
|
|
||||||
int ngram(int n, char * s1, const char * s2, int opt);
|
|
||||||
int mystrlen(const char * word);
|
|
||||||
int leftcommonsubstring(char * s1, const char * s2);
|
|
||||||
int commoncharacterpositions(char * s1, const char * s2, int * is_swap);
|
|
||||||
void bubblesort( char ** rwd, char ** rwd2, int * rsc, int n);
|
|
||||||
void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result);
|
|
||||||
int lcslen(const char * s, const char* s2);
|
|
||||||
char * suggest_hentry_gen(hentry * rv, char * pattern);
|
|
||||||
|
|
||||||
|
int mapchars(std::vector<std::string>&, const char*, int);
|
||||||
|
int map_related(const char*,
|
||||||
|
std::string&,
|
||||||
|
int,
|
||||||
|
std::vector<std::string>& wlst,
|
||||||
|
int,
|
||||||
|
const std::vector<mapentry>&,
|
||||||
|
int*,
|
||||||
|
clock_t*);
|
||||||
|
int ngram(int n, const std::vector<w_char>& su1,
|
||||||
|
const std::vector<w_char>& su2, int opt);
|
||||||
|
int ngram(int n, const std::string& s1, const std::string& s2, int opt);
|
||||||
|
int mystrlen(const char* word);
|
||||||
|
int leftcommonsubstring(const std::vector<w_char>& su1,
|
||||||
|
const std::vector<w_char>& su2);
|
||||||
|
int leftcommonsubstring(const char* s1, const char* s2);
|
||||||
|
int commoncharacterpositions(const char* s1, const char* s2, int* is_swap);
|
||||||
|
void bubblesort(char** rwd, char** rwd2, int* rsc, int n);
|
||||||
|
void lcs(const char* s, const char* s2, int* l1, int* l2, char** result);
|
||||||
|
int lcslen(const char* s, const char* s2);
|
||||||
|
int lcslen(const std::string& s, const std::string& s2);
|
||||||
|
std::string suggest_hentry_gen(hentry* rv, const char* pattern);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -1,21 +1,72 @@
|
||||||
#ifndef __WCHARHXX__
|
/* ***** BEGIN LICENSE BLOCK *****
|
||||||
#define __WCHARHXX__
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||||
|
*
|
||||||
|
* Copyright (C) 2002-2017 Németh László
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the Mozilla Public License Version
|
||||||
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
* http://www.mozilla.org/MPL/
|
||||||
|
*
|
||||||
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||||
|
* for the specific language governing rights and limitations under the
|
||||||
|
* License.
|
||||||
|
*
|
||||||
|
* Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
|
||||||
|
*
|
||||||
|
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||||
|
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||||
|
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||||
|
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||||
|
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||||
|
*
|
||||||
|
* Alternatively, the contents of this file may be used under the terms of
|
||||||
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||||
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||||
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||||
|
* of those above. If you wish to allow use of your version of this file only
|
||||||
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||||
|
* use your version of this file under the terms of the MPL, indicate your
|
||||||
|
* decision by deleting the provisions above and replace them with the notice
|
||||||
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||||
|
* the provisions above, a recipient may use your version of this file under
|
||||||
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||||
|
*
|
||||||
|
* ***** END LICENSE BLOCK ***** */
|
||||||
|
|
||||||
|
#ifndef W_CHAR_HXX_
|
||||||
|
#define W_CHAR_HXX_
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
#ifndef GCC
|
#ifndef GCC
|
||||||
typedef struct {
|
struct w_char {
|
||||||
#else
|
#else
|
||||||
typedef struct __attribute__ ((packed)) {
|
struct __attribute__((packed)) w_char {
|
||||||
#endif
|
#endif
|
||||||
unsigned char l;
|
unsigned char l;
|
||||||
unsigned char h;
|
unsigned char h;
|
||||||
} w_char;
|
|
||||||
|
friend bool operator<(const w_char a, const w_char b) {
|
||||||
|
unsigned short a_idx = (a.h << 8) + a.l;
|
||||||
|
unsigned short b_idx = (b.h << 8) + b.l;
|
||||||
|
return a_idx < b_idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
friend bool operator==(const w_char a, const w_char b) {
|
||||||
|
return (((a).l == (b).l) && ((a).h == (b).h));
|
||||||
|
}
|
||||||
|
|
||||||
|
friend bool operator!=(const w_char a, const w_char b) {
|
||||||
|
return !(a == b);;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// two character arrays
|
// two character arrays
|
||||||
struct replentry {
|
struct replentry {
|
||||||
char * pattern;
|
std::string pattern;
|
||||||
char * pattern2;
|
std::string outstrings[4]; // med, ini, fin, isol
|
||||||
bool start;
|
|
||||||
bool end;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Binary file not shown.
BIN
maclibs/lib/libhunspell-1.6.1.dylib
Normal file
BIN
maclibs/lib/libhunspell-1.6.1.dylib
Normal file
Binary file not shown.
Loading…
Reference in a new issue