mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-27 19:24:08 +00:00
Add OpenCC libraries and data for Windows
This commit is contained in:
parent
b5ac249062
commit
9f32149a30
BIN
opencc/HKVariants.ocd
Normal file
BIN
opencc/HKVariants.ocd
Normal file
Binary file not shown.
BIN
opencc/HKVariantsPhrases.ocd
Normal file
BIN
opencc/HKVariantsPhrases.ocd
Normal file
Binary file not shown.
BIN
opencc/STCharacters.ocd
Normal file
BIN
opencc/STCharacters.ocd
Normal file
Binary file not shown.
BIN
opencc/STPhrases.ocd
Normal file
BIN
opencc/STPhrases.ocd
Normal file
Binary file not shown.
BIN
opencc/TSCharacters.ocd
Normal file
BIN
opencc/TSCharacters.ocd
Normal file
Binary file not shown.
BIN
opencc/TSPhrases.ocd
Normal file
BIN
opencc/TSPhrases.ocd
Normal file
Binary file not shown.
BIN
opencc/TWVariants.ocd
Normal file
BIN
opencc/TWVariants.ocd
Normal file
Binary file not shown.
33
opencc/s2hk.json
Normal file
33
opencc/s2hk.json
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
{
|
||||||
|
"name": "Simplified Chinese to Traditional Chinese (Hong Kong standard)",
|
||||||
|
"segmentation": {
|
||||||
|
"type": "mmseg",
|
||||||
|
"dict": {
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "STPhrases.ocd"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"conversion_chain": [{
|
||||||
|
"dict": {
|
||||||
|
"type": "group",
|
||||||
|
"dicts": [{
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "STPhrases.ocd"
|
||||||
|
}, {
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "STCharacters.ocd"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
"dict": {
|
||||||
|
"type": "group",
|
||||||
|
"dicts": [{
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "HKVariantsPhrases.ocd"
|
||||||
|
}, {
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "HKVariants.ocd"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}
|
27
opencc/s2tw.json
Normal file
27
opencc/s2tw.json
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
{
|
||||||
|
"name": "Simplified Chinese to Traditional Chinese (Taiwan standard)",
|
||||||
|
"segmentation": {
|
||||||
|
"type": "mmseg",
|
||||||
|
"dict": {
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "STPhrases.ocd"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"conversion_chain": [{
|
||||||
|
"dict": {
|
||||||
|
"type": "group",
|
||||||
|
"dicts": [{
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "STPhrases.ocd"
|
||||||
|
}, {
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "STCharacters.ocd"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
"dict": {
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "TWVariants.ocd"
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}
|
22
opencc/t2s.json
Normal file
22
opencc/t2s.json
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
{
|
||||||
|
"name": "Traditional Chinese to Simplified Chinese",
|
||||||
|
"segmentation": {
|
||||||
|
"type": "mmseg",
|
||||||
|
"dict": {
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "TSPhrases.ocd"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"conversion_chain": [{
|
||||||
|
"dict": {
|
||||||
|
"type": "group",
|
||||||
|
"dicts": [{
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "TSPhrases.ocd"
|
||||||
|
}, {
|
||||||
|
"type": "ocd",
|
||||||
|
"file": "TSCharacters.ocd"
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}
|
53
winlibs/include/opencc/BinaryDict.hpp
Normal file
53
winlibs/include/opencc/BinaryDict.hpp
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
#include "SerializableDict.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Binary dictionary for faster deserialization
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT BinaryDict : public SerializableDict {
|
||||||
|
public:
|
||||||
|
BinaryDict(const LexiconPtr& _lexicon) : lexicon(_lexicon) {}
|
||||||
|
|
||||||
|
virtual ~BinaryDict() {}
|
||||||
|
|
||||||
|
virtual void SerializeToFile(FILE* fp) const;
|
||||||
|
|
||||||
|
static BinaryDictPtr NewFromFile(FILE* fp);
|
||||||
|
|
||||||
|
const LexiconPtr& GetLexicon() const { return lexicon; }
|
||||||
|
|
||||||
|
size_t KeyMaxLength() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
LexiconPtr lexicon;
|
||||||
|
string keyBuffer;
|
||||||
|
string valueBuffer;
|
||||||
|
|
||||||
|
void ConstructBuffer(string& keyBuffer, vector<size_t>& keyOffset,
|
||||||
|
size_t& keyTotalLength, string& valueBuffer,
|
||||||
|
vector<size_t>& valueOffset,
|
||||||
|
size_t& valueTotalLength) const;
|
||||||
|
};
|
||||||
|
}
|
92
winlibs/include/opencc/Common.hpp
Normal file
92
winlibs/include/opencc/Common.hpp
Normal file
|
@ -0,0 +1,92 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
// Microsoft Visual C++ specific
|
||||||
|
#if defined(_MSC_VER) && (_MSC_VER >= 1020)
|
||||||
|
#pragma warning(disable : 4251 4266 4350 4503 4512 4514 4710 4820)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <fstream>
|
||||||
|
#include <functional>
|
||||||
|
#include <iostream>
|
||||||
|
#include <list>
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <sstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
|
#include "Exception.hpp"
|
||||||
|
#include "Export.hpp"
|
||||||
|
#include "Optional.hpp"
|
||||||
|
|
||||||
|
using std::list;
|
||||||
|
using std::string;
|
||||||
|
using std::vector;
|
||||||
|
|
||||||
|
// Forward decalarations and alias
|
||||||
|
namespace opencc {
|
||||||
|
class BinaryDict;
|
||||||
|
class Config;
|
||||||
|
class Conversion;
|
||||||
|
class ConversionChain;
|
||||||
|
class Converter;
|
||||||
|
class DartsDict;
|
||||||
|
class Dict;
|
||||||
|
class DictEntry;
|
||||||
|
class DictGroup;
|
||||||
|
class Lexicon;
|
||||||
|
class MultiValueDictEntry;
|
||||||
|
class NoValueDictEntry;
|
||||||
|
class Segmentation;
|
||||||
|
class Segments;
|
||||||
|
class SerializableDict;
|
||||||
|
class SingleValueDictEntry;
|
||||||
|
class TextDict;
|
||||||
|
typedef std::shared_ptr<BinaryDict> BinaryDictPtr;
|
||||||
|
typedef std::shared_ptr<Conversion> ConversionPtr;
|
||||||
|
typedef std::shared_ptr<ConversionChain> ConversionChainPtr;
|
||||||
|
typedef std::shared_ptr<Converter> ConverterPtr;
|
||||||
|
typedef std::shared_ptr<DartsDict> DartsDictPtr;
|
||||||
|
typedef std::shared_ptr<Dict> DictPtr;
|
||||||
|
typedef std::shared_ptr<DictGroup> DictGroupPtr;
|
||||||
|
typedef std::shared_ptr<Lexicon> LexiconPtr;
|
||||||
|
typedef std::shared_ptr<Segmentation> SegmentationPtr;
|
||||||
|
typedef std::shared_ptr<Segments> SegmentsPtr;
|
||||||
|
typedef std::shared_ptr<SerializableDict> SerializableDictPtr;
|
||||||
|
typedef std::shared_ptr<TextDict> TextDictPtr;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef PKGDATADIR
|
||||||
|
const string PACKAGE_DATA_DIRECTORY = "";
|
||||||
|
#else // ifndef PKGDATADIR
|
||||||
|
const string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/";
|
||||||
|
#endif // ifndef PKGDATADIR
|
||||||
|
|
||||||
|
#ifndef VERSION
|
||||||
|
#define VERSION "1.0.*"
|
||||||
|
#endif // ifndef VERSION
|
41
winlibs/include/opencc/Config.hpp
Normal file
41
winlibs/include/opencc/Config.hpp
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Configuration loader
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT Config {
|
||||||
|
public:
|
||||||
|
Config();
|
||||||
|
|
||||||
|
virtual ~Config();
|
||||||
|
|
||||||
|
ConverterPtr NewFromString(const string& json, const string& configDirectory);
|
||||||
|
|
||||||
|
ConverterPtr NewFromFile(const string& fileName);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void* internal;
|
||||||
|
};
|
||||||
|
}
|
47
winlibs/include/opencc/Conversion.hpp
Normal file
47
winlibs/include/opencc/Conversion.hpp
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
#include "Segmentation.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Conversion interface
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT Conversion {
|
||||||
|
public:
|
||||||
|
Conversion(DictPtr _dict) : dict(_dict) {}
|
||||||
|
|
||||||
|
// Convert single phrase
|
||||||
|
string Convert(const string& phrase) const;
|
||||||
|
|
||||||
|
// Convert single phrase
|
||||||
|
string Convert(const char* phrase) const;
|
||||||
|
|
||||||
|
// Convert segmented text
|
||||||
|
SegmentsPtr Convert(const SegmentsPtr& input) const;
|
||||||
|
|
||||||
|
const DictPtr GetDict() const { return dict; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const DictPtr dict;
|
||||||
|
};
|
||||||
|
}
|
41
winlibs/include/opencc/ConversionChain.hpp
Normal file
41
winlibs/include/opencc/ConversionChain.hpp
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
#include "Conversion.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Chain of conversions
|
||||||
|
* Consists of a list of conversions. Converts input in sequence.
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT ConversionChain {
|
||||||
|
public:
|
||||||
|
ConversionChain(const list<ConversionPtr> _conversions);
|
||||||
|
|
||||||
|
SegmentsPtr Convert(const SegmentsPtr& input) const;
|
||||||
|
|
||||||
|
const list<ConversionPtr> GetConversions() const { return conversions; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const list<ConversionPtr> conversions;
|
||||||
|
};
|
||||||
|
}
|
51
winlibs/include/opencc/Converter.hpp
Normal file
51
winlibs/include/opencc/Converter.hpp
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
#include "Segmentation.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Controller of segmentation and conversion
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT Converter {
|
||||||
|
public:
|
||||||
|
Converter(const string& _name, SegmentationPtr _segmentation,
|
||||||
|
ConversionChainPtr _conversionChain)
|
||||||
|
: name(_name), segmentation(_segmentation),
|
||||||
|
conversionChain(_conversionChain) {}
|
||||||
|
|
||||||
|
string Convert(const string& text) const;
|
||||||
|
|
||||||
|
size_t Convert(const char* input, char* output) const;
|
||||||
|
|
||||||
|
const SegmentationPtr GetSegmentation() const { return segmentation; }
|
||||||
|
|
||||||
|
const ConversionChainPtr GetConversionChain() const {
|
||||||
|
return conversionChain;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const string name;
|
||||||
|
const SegmentationPtr segmentation;
|
||||||
|
const ConversionChainPtr conversionChain;
|
||||||
|
};
|
||||||
|
}
|
59
winlibs/include/opencc/DartsDict.hpp
Normal file
59
winlibs/include/opencc/DartsDict.hpp
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
#include "SerializableDict.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Darts dictionary
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT DartsDict : public Dict, public SerializableDict {
|
||||||
|
public:
|
||||||
|
virtual ~DartsDict();
|
||||||
|
|
||||||
|
virtual size_t KeyMaxLength() const;
|
||||||
|
|
||||||
|
virtual Optional<const DictEntry*> Match(const char* word) const;
|
||||||
|
|
||||||
|
virtual Optional<const DictEntry*> MatchPrefix(const char* word) const;
|
||||||
|
|
||||||
|
virtual LexiconPtr GetLexicon() const;
|
||||||
|
|
||||||
|
virtual void SerializeToFile(FILE* fp) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a DartsDict from another dictionary.
|
||||||
|
*/
|
||||||
|
static DartsDictPtr NewFromDict(const Dict& thatDict);
|
||||||
|
|
||||||
|
static DartsDictPtr NewFromFile(FILE* fp);
|
||||||
|
|
||||||
|
private:
|
||||||
|
DartsDict();
|
||||||
|
|
||||||
|
size_t maxLength;
|
||||||
|
LexiconPtr lexicon;
|
||||||
|
|
||||||
|
class DartsInternal;
|
||||||
|
DartsInternal* internal;
|
||||||
|
};
|
||||||
|
}
|
81
winlibs/include/opencc/Dict.hpp
Normal file
81
winlibs/include/opencc/Dict.hpp
Normal file
|
@ -0,0 +1,81 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
#include "DictEntry.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Abstract class of dictionary
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT Dict {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Matches a word exactly and returns the DictEntry or Optional::Null().
|
||||||
|
*/
|
||||||
|
virtual Optional<const DictEntry*> Match(const char* word) const = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Matches a word exactly and returns the DictEntry or Optional::Null().
|
||||||
|
*/
|
||||||
|
Optional<const DictEntry*> Match(const string& word) const {
|
||||||
|
return Match(word.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Matches the longest matched prefix of a word.
|
||||||
|
* For example given a dictionary having "a", "an", "b", "ba", "ban", "bana",
|
||||||
|
* the longest prefix of "banana" matched is "bana".
|
||||||
|
*/
|
||||||
|
virtual Optional<const DictEntry*> MatchPrefix(const char* word) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Matches the longest matched prefix of a word.
|
||||||
|
*/
|
||||||
|
Optional<const DictEntry*> MatchPrefix(const string& word) const {
|
||||||
|
return MatchPrefix(word.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns all matched prefixes of a word, sorted by the length (desc).
|
||||||
|
* For example given a dictionary having "a", "an", "b", "ba", "ban", "bana",
|
||||||
|
* all the matched prefixes of "banana" are "bana", "ban", "ba", "b".
|
||||||
|
*/
|
||||||
|
virtual vector<const DictEntry*> MatchAllPrefixes(const char* word) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns all matched prefixes of a word, sorted by the length (desc).
|
||||||
|
*/
|
||||||
|
vector<const DictEntry*> MatchAllPrefixes(const string& word) const {
|
||||||
|
return MatchAllPrefixes(word.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the length of the longest key in the dictionary.
|
||||||
|
*/
|
||||||
|
virtual size_t KeyMaxLength() const = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns all entries in the dictionary.
|
||||||
|
*/
|
||||||
|
virtual LexiconPtr GetLexicon() const = 0;
|
||||||
|
};
|
||||||
|
}
|
197
winlibs/include/opencc/DictEntry.hpp
Normal file
197
winlibs/include/opencc/DictEntry.hpp
Normal file
|
@ -0,0 +1,197 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
#include "UTF8Util.hpp"
|
||||||
|
#include "Segments.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Key-values pair entry
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT DictEntry {
|
||||||
|
public:
|
||||||
|
virtual ~DictEntry() {}
|
||||||
|
|
||||||
|
virtual const char* Key() const = 0;
|
||||||
|
|
||||||
|
virtual vector<const char*> Values() const = 0;
|
||||||
|
|
||||||
|
virtual const char* GetDefault() const = 0;
|
||||||
|
|
||||||
|
virtual size_t NumValues() const = 0;
|
||||||
|
|
||||||
|
virtual string ToString() const = 0;
|
||||||
|
|
||||||
|
size_t KeyLength() const { return strlen(Key()); }
|
||||||
|
|
||||||
|
bool operator<(const DictEntry& that) const {
|
||||||
|
return strcmp(Key(), that.Key()) < 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const DictEntry& that) const {
|
||||||
|
return strcmp(Key(), that.Key()) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool PtrLessThan(const DictEntry* a, const DictEntry* b) {
|
||||||
|
return *a < *b;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT NoValueDictEntry : public DictEntry {
|
||||||
|
public:
|
||||||
|
NoValueDictEntry(const string& _key) : key(_key) {}
|
||||||
|
|
||||||
|
virtual ~NoValueDictEntry() {}
|
||||||
|
|
||||||
|
virtual const char* Key() const { return key.c_str(); }
|
||||||
|
|
||||||
|
virtual vector<const char*> Values() const { return vector<const char*>(); }
|
||||||
|
|
||||||
|
virtual const char* GetDefault() const { return Key(); }
|
||||||
|
|
||||||
|
virtual size_t NumValues() const { return 0; }
|
||||||
|
|
||||||
|
virtual string ToString() const { return key; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
string key;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT SingleValueDictEntry : public DictEntry {
|
||||||
|
public:
|
||||||
|
virtual const char* Value() const = 0;
|
||||||
|
|
||||||
|
virtual vector<const char*> Values() const {
|
||||||
|
return vector<const char*>{Value()};
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual const char* GetDefault() const { return Value(); }
|
||||||
|
|
||||||
|
virtual size_t NumValues() const { return 1; }
|
||||||
|
|
||||||
|
virtual string ToString() const { return string(Key()) + "\t" + Value(); }
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT StrSingleValueDictEntry : public SingleValueDictEntry {
|
||||||
|
public:
|
||||||
|
StrSingleValueDictEntry(const string& _key, const string& _value)
|
||||||
|
: key(_key), value(_value) {}
|
||||||
|
|
||||||
|
virtual ~StrSingleValueDictEntry() {}
|
||||||
|
|
||||||
|
virtual const char* Key() const { return key.c_str(); }
|
||||||
|
|
||||||
|
virtual const char* Value() const { return value.c_str(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
string key;
|
||||||
|
string value;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT MultiValueDictEntry : public DictEntry {
|
||||||
|
public:
|
||||||
|
virtual const char* GetDefault() const {
|
||||||
|
if (NumValues() > 0) {
|
||||||
|
return Values().at(0);
|
||||||
|
} else {
|
||||||
|
return Key();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual string ToString() const;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT StrMultiValueDictEntry : public MultiValueDictEntry {
|
||||||
|
public:
|
||||||
|
StrMultiValueDictEntry(const string& _key, const vector<string>& _values)
|
||||||
|
: key(_key), values(_values) {}
|
||||||
|
|
||||||
|
StrMultiValueDictEntry(const string& _key, const vector<const char*>& _values)
|
||||||
|
: key(_key) {
|
||||||
|
values.reserve(_values.size());
|
||||||
|
for (const char* str : _values) {
|
||||||
|
values.push_back(str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~StrMultiValueDictEntry() {}
|
||||||
|
|
||||||
|
virtual const char* Key() const { return key.c_str(); }
|
||||||
|
|
||||||
|
size_t NumValues() const { return values.size(); }
|
||||||
|
|
||||||
|
vector<const char*> Values() const {
|
||||||
|
vector<const char*> values;
|
||||||
|
for (const string& value : this->values) {
|
||||||
|
values.push_back(value.c_str());
|
||||||
|
}
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
string key;
|
||||||
|
vector<string> values;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT PtrDictEntry : public MultiValueDictEntry {
|
||||||
|
public:
|
||||||
|
PtrDictEntry(const char* _key, const vector<const char*>& _values)
|
||||||
|
: key(_key), values(_values) {}
|
||||||
|
|
||||||
|
virtual ~PtrDictEntry() {}
|
||||||
|
|
||||||
|
virtual const char* Key() const { return key; }
|
||||||
|
|
||||||
|
size_t NumValues() const { return values.size(); }
|
||||||
|
|
||||||
|
vector<const char*> Values() const { return values; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const char* key;
|
||||||
|
vector<const char*> values;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT DictEntryFactory {
|
||||||
|
public:
|
||||||
|
static DictEntry* New(const string& key) { return new NoValueDictEntry(key); }
|
||||||
|
|
||||||
|
static DictEntry* New(const string& key, const string& value) {
|
||||||
|
return new StrSingleValueDictEntry(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
static DictEntry* New(const string& key, const vector<string>& values) {
|
||||||
|
return new StrMultiValueDictEntry(key, values);
|
||||||
|
}
|
||||||
|
|
||||||
|
static DictEntry* New(const DictEntry* entry) {
|
||||||
|
if (entry->NumValues() == 0) {
|
||||||
|
return new NoValueDictEntry(entry->Key());
|
||||||
|
} else if (entry->NumValues() == 1) {
|
||||||
|
const auto svEntry = static_cast<const SingleValueDictEntry*>(entry);
|
||||||
|
return new StrSingleValueDictEntry(svEntry->Key(), svEntry->Value());
|
||||||
|
} else {
|
||||||
|
const auto mvEntry = static_cast<const MultiValueDictEntry*>(entry);
|
||||||
|
return new StrMultiValueDictEntry(mvEntry->Key(), mvEntry->Values());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
53
winlibs/include/opencc/DictGroup.hpp
Normal file
53
winlibs/include/opencc/DictGroup.hpp
Normal file
|
@ -0,0 +1,53 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
#include "Dict.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Group of dictionaries
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT DictGroup : public Dict {
|
||||||
|
public:
|
||||||
|
DictGroup(const list<DictPtr>& dicts);
|
||||||
|
|
||||||
|
static DictGroupPtr NewFromDict(const Dict& dict);
|
||||||
|
|
||||||
|
virtual ~DictGroup();
|
||||||
|
|
||||||
|
virtual size_t KeyMaxLength() const;
|
||||||
|
|
||||||
|
virtual Optional<const DictEntry*> Match(const char* word) const;
|
||||||
|
|
||||||
|
virtual Optional<const DictEntry*> MatchPrefix(const char* word) const;
|
||||||
|
|
||||||
|
virtual vector<const DictEntry*> MatchAllPrefixes(const char* word) const;
|
||||||
|
|
||||||
|
virtual LexiconPtr GetLexicon() const;
|
||||||
|
|
||||||
|
const list<DictPtr> GetDicts() const { return dicts; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const size_t keyMaxLength;
|
||||||
|
const list<DictPtr> dicts;
|
||||||
|
};
|
||||||
|
}
|
88
winlibs/include/opencc/Exception.hpp
Normal file
88
winlibs/include/opencc/Exception.hpp
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <sstream>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "Export.hpp"
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
|
||||||
|
// Until Visual Studio 2013 (12.0), C++ 11 "noexcept" qualifier is not supported
|
||||||
|
#define noexcept
|
||||||
|
#endif // ifdef _MSC_VER
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
|
||||||
|
class OPENCC_EXPORT Exception : public std::exception {
|
||||||
|
public:
|
||||||
|
Exception() {}
|
||||||
|
|
||||||
|
virtual ~Exception() throw() {}
|
||||||
|
|
||||||
|
Exception(const std::string& _message) : message(_message) {}
|
||||||
|
|
||||||
|
virtual const char* what() const noexcept { return message.c_str(); }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::string message;
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT FileNotFound : public Exception {
|
||||||
|
public:
|
||||||
|
FileNotFound(const std::string& fileName)
|
||||||
|
: Exception(fileName + " not found or not accessible.") {}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT FileNotWritable : public Exception {
|
||||||
|
public:
|
||||||
|
FileNotWritable(const std::string& fileName)
|
||||||
|
: Exception(fileName + " not writable.") {}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT InvalidFormat : public Exception {
|
||||||
|
public:
|
||||||
|
InvalidFormat(const std::string& message)
|
||||||
|
: Exception("Invalid format: " + message) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT InvalidTextDictionary : public InvalidFormat {
|
||||||
|
public:
|
||||||
|
InvalidTextDictionary(const std::string& _message, size_t lineNum)
|
||||||
|
: InvalidFormat("") {
|
||||||
|
std::ostringstream buffer;
|
||||||
|
buffer << "Invalid text dictionary at line " << lineNum << ": " << _message;
|
||||||
|
message = buffer.str();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT InvalidUTF8 : public Exception {
|
||||||
|
public:
|
||||||
|
InvalidUTF8(const std::string& _message)
|
||||||
|
: Exception("Invalid UTF8: " + _message) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
class OPENCC_EXPORT ShouldNotBeHere : public Exception {
|
||||||
|
public:
|
||||||
|
ShouldNotBeHere() : Exception("ShouldNotBeHere! This must be a bug.") {}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace opencc
|
40
winlibs/include/opencc/Export.hpp
Normal file
40
winlibs/include/opencc/Export.hpp
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#if defined(Opencc_BUILT_AS_STATIC) || !defined(_WIN32)
|
||||||
|
#define OPENCC_EXPORT
|
||||||
|
#define OPENCC_NO_EXPORT
|
||||||
|
#else // if defined(Opencc_BUILT_AS_STATIC) || !defined(_WIN32)
|
||||||
|
#ifndef OPENCC_EXPORT
|
||||||
|
#ifdef libopencc_EXPORTS
|
||||||
|
|
||||||
|
/* We are building this library */
|
||||||
|
#define OPENCC_EXPORT __declspec(dllexport)
|
||||||
|
#else // ifdef libopencc_EXPORTS
|
||||||
|
|
||||||
|
/* We are using this library */
|
||||||
|
#define OPENCC_EXPORT __declspec(dllimport)
|
||||||
|
#endif // ifdef libopencc_EXPORTS
|
||||||
|
#endif // ifndef OPENCC_EXPORT
|
||||||
|
|
||||||
|
#ifndef OPENCC_NO_EXPORT
|
||||||
|
#define OPENCC_NO_EXPORT
|
||||||
|
#endif // ifndef OPENCC_NO_EXPORT
|
||||||
|
#endif // if defined(Opencc_BUILT_AS_STATIC) || !defined(_WIN32)
|
56
winlibs/include/opencc/Lexicon.hpp
Normal file
56
winlibs/include/opencc/Lexicon.hpp
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
#include "DictEntry.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Storage of all entries
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT Lexicon {
|
||||||
|
public:
|
||||||
|
Lexicon() {}
|
||||||
|
|
||||||
|
~Lexicon() {
|
||||||
|
for (DictEntry* entry : entries) {
|
||||||
|
delete entry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void Add(DictEntry* entry) { entries.push_back(entry); }
|
||||||
|
|
||||||
|
void Sort() {
|
||||||
|
std::sort(entries.begin(), entries.end(), DictEntry::PtrLessThan);
|
||||||
|
}
|
||||||
|
|
||||||
|
const DictEntry* At(size_t index) const { return entries.at(index); }
|
||||||
|
|
||||||
|
size_t Length() const { return entries.size(); }
|
||||||
|
|
||||||
|
vector<DictEntry*>::const_iterator begin() const { return entries.begin(); }
|
||||||
|
|
||||||
|
vector<DictEntry*>::const_iterator end() const { return entries.end(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
vector<DictEntry*> entries;
|
||||||
|
};
|
||||||
|
}
|
43
winlibs/include/opencc/MaxMatchSegmentation.hpp
Normal file
43
winlibs/include/opencc/MaxMatchSegmentation.hpp
Normal file
|
@ -0,0 +1,43 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
#include "DictGroup.hpp"
|
||||||
|
#include "Segmentation.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Implementation of maximal match segmentation
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT MaxMatchSegmentation : public Segmentation {
|
||||||
|
public:
|
||||||
|
MaxMatchSegmentation(const DictPtr _dict) : dict(_dict) {}
|
||||||
|
|
||||||
|
virtual ~MaxMatchSegmentation() {}
|
||||||
|
|
||||||
|
virtual SegmentsPtr Segment(const string& text) const;
|
||||||
|
|
||||||
|
const DictPtr GetDict() const { return dict; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const DictPtr dict;
|
||||||
|
};
|
||||||
|
}
|
76
winlibs/include/opencc/Optional.hpp
Normal file
76
winlibs/include/opencc/Optional.hpp
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* A class that wraps type T into a nullable type.
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
template <typename T> class Optional {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* The constructor of Optional.
|
||||||
|
*/
|
||||||
|
Optional(T actual) : isNull(false), data(actual) {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the instance is null.
|
||||||
|
*/
|
||||||
|
bool IsNull() const { return isNull; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the containing data of the instance.
|
||||||
|
*/
|
||||||
|
const T& Get() const { return data; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a null instance.
|
||||||
|
*/
|
||||||
|
static Optional<T> Null() { return Optional(); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
Optional() : isNull(true) {}
|
||||||
|
|
||||||
|
bool isNull;
|
||||||
|
T data;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Specialization of Optional for pointers.
|
||||||
|
*
|
||||||
|
* Reduce a bool.
|
||||||
|
*/
|
||||||
|
template <typename T> class Optional<T*> {
|
||||||
|
private:
|
||||||
|
Optional() : data(nullptr) {}
|
||||||
|
|
||||||
|
typedef T* TPtr;
|
||||||
|
TPtr data;
|
||||||
|
|
||||||
|
public:
|
||||||
|
Optional(TPtr actual) : data(actual) {}
|
||||||
|
|
||||||
|
bool IsNull() const { return data == nullptr; }
|
||||||
|
|
||||||
|
const TPtr& Get() const { return data; }
|
||||||
|
|
||||||
|
static Optional<TPtr> Null() { return Optional(); }
|
||||||
|
};
|
||||||
|
}
|
32
winlibs/include/opencc/Segmentation.hpp
Normal file
32
winlibs/include/opencc/Segmentation.hpp
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Abstract segmentation
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT Segmentation {
|
||||||
|
public:
|
||||||
|
virtual SegmentsPtr Segment(const string& text) const = 0;
|
||||||
|
};
|
||||||
|
}
|
112
winlibs/include/opencc/Segments.hpp
Normal file
112
winlibs/include/opencc/Segments.hpp
Normal file
|
@ -0,0 +1,112 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Segmented text
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT Segments {
|
||||||
|
public:
|
||||||
|
Segments() {}
|
||||||
|
|
||||||
|
Segments(std::initializer_list<const char*> initList) {
|
||||||
|
for (const string& item : initList) {
|
||||||
|
AddSegment(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Segments(std::initializer_list<string> initList) {
|
||||||
|
for (const string& item : initList) {
|
||||||
|
AddSegment(item);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AddSegment(const char* unmanagedString) {
|
||||||
|
indexes.push_back(std::make_pair(unmanaged.size(), false));
|
||||||
|
unmanaged.push_back(unmanagedString);
|
||||||
|
}
|
||||||
|
|
||||||
|
void AddSegment(const string& str) {
|
||||||
|
indexes.push_back(std::make_pair(managed.size(), true));
|
||||||
|
managed.push_back(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
class iterator : public std::iterator<std::input_iterator_tag, const char*> {
|
||||||
|
public:
|
||||||
|
iterator(const Segments* const _segments, size_t _cursor)
|
||||||
|
: segments(_segments), cursor(_cursor) {}
|
||||||
|
|
||||||
|
iterator& operator++() {
|
||||||
|
cursor++;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const iterator& that) const {
|
||||||
|
return cursor == that.cursor && segments == that.segments;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator!=(const iterator& that) const {
|
||||||
|
return !this->operator==(that);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* operator*() const { return segments->At(cursor); }
|
||||||
|
|
||||||
|
private:
|
||||||
|
const Segments* const segments;
|
||||||
|
size_t cursor;
|
||||||
|
};
|
||||||
|
|
||||||
|
const char* At(size_t cursor) const {
|
||||||
|
const auto& index = indexes[cursor];
|
||||||
|
if (index.second) {
|
||||||
|
return managed[index.first].c_str();
|
||||||
|
} else {
|
||||||
|
return unmanaged[index.first];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t Length() const { return indexes.size(); }
|
||||||
|
|
||||||
|
iterator begin() const { return iterator(this, 0); }
|
||||||
|
|
||||||
|
iterator end() const { return iterator(this, indexes.size()); }
|
||||||
|
|
||||||
|
string ToString() const {
|
||||||
|
// TODO implement a nested structure to reduce concatenation,
|
||||||
|
// like a purely functional differential list
|
||||||
|
std::ostringstream buffer;
|
||||||
|
for (const char* segment : *this) {
|
||||||
|
buffer << segment;
|
||||||
|
}
|
||||||
|
return buffer.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
Segments(const Segments&) {}
|
||||||
|
|
||||||
|
vector<const char*> unmanaged;
|
||||||
|
vector<string> managed;
|
||||||
|
// index, managed
|
||||||
|
vector<std::pair<size_t, bool>> indexes;
|
||||||
|
};
|
||||||
|
}
|
69
winlibs/include/opencc/SerializableDict.hpp
Normal file
69
winlibs/include/opencc/SerializableDict.hpp
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Dict.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Serializable dictionary interface
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT SerializableDict {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Serializes the dictionary and writes in to a file.
|
||||||
|
*/
|
||||||
|
virtual void SerializeToFile(FILE* fp) const = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Serializes the dictionary and writes in to a file.
|
||||||
|
*/
|
||||||
|
virtual void SerializeToFile(const string& fileName) const {
|
||||||
|
FILE* fp = fopen(fileName.c_str(), "wb");
|
||||||
|
if (fp == NULL) {
|
||||||
|
throw FileNotWritable(fileName);
|
||||||
|
}
|
||||||
|
SerializeToFile(fp);
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename DICT>
|
||||||
|
static bool TryLoadFromFile(const string& fileName,
|
||||||
|
std::shared_ptr<DICT>* dict) {
|
||||||
|
FILE* fp = fopen(fileName.c_str(), "rb");
|
||||||
|
if (fp == NULL) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
std::shared_ptr<DICT> loadedDict = DICT::NewFromFile(fp);
|
||||||
|
fclose(fp);
|
||||||
|
*dict = loadedDict;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename DICT>
|
||||||
|
static std::shared_ptr<DICT> NewFromFile(const string& fileName) {
|
||||||
|
std::shared_ptr<DICT> dict;
|
||||||
|
if (!TryLoadFromFile<DICT>(fileName, &dict)) {
|
||||||
|
throw FileNotFound(fileName);
|
||||||
|
}
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
88
winlibs/include/opencc/SimpleConverter.hpp
Normal file
88
winlibs/include/opencc/SimpleConverter.hpp
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __OPENCC_SIMPLECONVERTER_HPP_
|
||||||
|
#define __OPENCC_SIMPLECONVERTER_HPP_
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @defgroup opencc_simple_api OpenCC C++ Simple API
|
||||||
|
*
|
||||||
|
* Simple API in C++ language
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* A high level converter
|
||||||
|
* This interface does not require C++11 to compile.
|
||||||
|
* @ingroup opencc_simple_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT SimpleConverter {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Constructor of SimpleConverter
|
||||||
|
* @param configFileName File name of configuration.
|
||||||
|
*/
|
||||||
|
SimpleConverter(const std::string& configFileName);
|
||||||
|
|
||||||
|
~SimpleConverter();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a text
|
||||||
|
* @param input Text to be converted.
|
||||||
|
*/
|
||||||
|
std::string Convert(const std::string& input) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a text
|
||||||
|
* @param input A C-Style string (terminated by '\0') to be converted.
|
||||||
|
*/
|
||||||
|
std::string Convert(const char* input) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a text
|
||||||
|
* @param input A C-Style string limited by a given length to be converted.
|
||||||
|
* @param length Maximal length in byte of the input string.
|
||||||
|
*/
|
||||||
|
std::string Convert(const char* input, size_t length) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a text and writes to an allocated buffer
|
||||||
|
* Please make sure the buffer has sufficent space.
|
||||||
|
* @param input A C-Style string (terminated by '\0') to be converted.
|
||||||
|
* @param output Buffer to write the converted text.
|
||||||
|
* @return Length of converted text.
|
||||||
|
*/
|
||||||
|
size_t Convert(const char* input, char* output) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a text and writes to an allocated buffer
|
||||||
|
* Please make sure the buffer has sufficent space.
|
||||||
|
* @param input A C-Style string limited by a given length to be converted.
|
||||||
|
* @param length Maximal length in byte of the input string.
|
||||||
|
* @param output Buffer to write the converted text.
|
||||||
|
* @return Length of converted text.
|
||||||
|
*/
|
||||||
|
size_t Convert(const char* input, size_t length, char* output) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const void* internalData;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace opencc
|
||||||
|
|
||||||
|
#endif
|
60
winlibs/include/opencc/TextDict.hpp
Normal file
60
winlibs/include/opencc/TextDict.hpp
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
#include "SerializableDict.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* Text dictionary
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT TextDict : public Dict, public SerializableDict {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Constructor of TextDict.
|
||||||
|
* _lexicon must be sorted.
|
||||||
|
*/
|
||||||
|
TextDict(const LexiconPtr& _lexicon);
|
||||||
|
|
||||||
|
virtual ~TextDict();
|
||||||
|
|
||||||
|
virtual size_t KeyMaxLength() const;
|
||||||
|
|
||||||
|
virtual Optional<const DictEntry*> Match(const char* word) const;
|
||||||
|
|
||||||
|
virtual LexiconPtr GetLexicon() const;
|
||||||
|
|
||||||
|
virtual void SerializeToFile(FILE* fp) const;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs a TextDict from another dictionary.
|
||||||
|
*/
|
||||||
|
static TextDictPtr NewFromDict(const Dict& dict);
|
||||||
|
|
||||||
|
static TextDictPtr NewFromFile(FILE* fp);
|
||||||
|
|
||||||
|
static TextDictPtr NewFromSortedFile(FILE* fp);
|
||||||
|
|
||||||
|
private:
|
||||||
|
const size_t maxLength;
|
||||||
|
const LexiconPtr lexicon;
|
||||||
|
};
|
||||||
|
}
|
245
winlibs/include/opencc/UTF8Util.hpp
Normal file
245
winlibs/include/opencc/UTF8Util.hpp
Normal file
|
@ -0,0 +1,245 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2013 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.hpp"
|
||||||
|
|
||||||
|
namespace opencc {
|
||||||
|
/**
|
||||||
|
* UTF8 string utilities
|
||||||
|
* @ingroup opencc_cpp_api
|
||||||
|
*/
|
||||||
|
class OPENCC_EXPORT UTF8Util {
|
||||||
|
public:
|
||||||
|
/**
|
||||||
|
* Detect UTF8 BOM and skip it.
|
||||||
|
*/
|
||||||
|
static void SkipUtf8Bom(FILE* fp);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the length in byte for the next UTF8 character.
|
||||||
|
* On error returns 0.
|
||||||
|
*/
|
||||||
|
static size_t NextCharLengthNoException(const char* str) {
|
||||||
|
char ch = *str;
|
||||||
|
if ((ch & 0xF0) == 0xE0) {
|
||||||
|
return 3;
|
||||||
|
} else if ((ch & 0x80) == 0x00) {
|
||||||
|
return 1;
|
||||||
|
} else if ((ch & 0xE0) == 0xC0) {
|
||||||
|
return 2;
|
||||||
|
} else if ((ch & 0xF8) == 0xF0) {
|
||||||
|
return 4;
|
||||||
|
} else if ((ch & 0xFC) == 0xF8) {
|
||||||
|
return 5;
|
||||||
|
} else if ((ch & 0xFE) == 0xFC) {
|
||||||
|
return 6;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the length in byte for the next UTF8 character.
|
||||||
|
*/
|
||||||
|
static size_t NextCharLength(const char* str) {
|
||||||
|
size_t length = NextCharLengthNoException(str);
|
||||||
|
if (length == 0) {
|
||||||
|
throw InvalidUTF8(str);
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the length in byte for the previous UTF8 character.
|
||||||
|
*/
|
||||||
|
static size_t PrevCharLength(const char* str) {
|
||||||
|
{
|
||||||
|
const size_t length = NextCharLengthNoException(str - 3);
|
||||||
|
if (length == 3) {
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
const size_t length = NextCharLengthNoException(str - 1);
|
||||||
|
if (length == 1) {
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
const size_t length = NextCharLengthNoException(str - 2);
|
||||||
|
if (length == 2) {
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (size_t i = 4; i <= 6; i++) {
|
||||||
|
const size_t length = NextCharLengthNoException(str - i);
|
||||||
|
if (length == i) {
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw InvalidUTF8(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the char* pointer over the next UTF8 character.
|
||||||
|
*/
|
||||||
|
static const char* NextChar(const char* str) {
|
||||||
|
return str + NextCharLength(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Move the char* pointer before the previous UTF8 character.
|
||||||
|
*/
|
||||||
|
static const char* PrevChar(const char* str) {
|
||||||
|
return str - PrevCharLength(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the UTF8 length of a valid UTF8 string.
|
||||||
|
*/
|
||||||
|
static size_t Length(const char* str) {
|
||||||
|
size_t length = 0;
|
||||||
|
while (*str != '\0') {
|
||||||
|
str = NextChar(str);
|
||||||
|
length++;
|
||||||
|
}
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds a character in the same line.
|
||||||
|
* @param str The text to be searched in.
|
||||||
|
* @param ch The character to find.
|
||||||
|
* @return The pointer that points to the found chacter in str or EOL/EOF.
|
||||||
|
*/
|
||||||
|
static const char* FindNextInline(const char* str, const char ch) {
|
||||||
|
while (!IsLineEndingOrFileEnding(*str) && *str != ch) {
|
||||||
|
str = NextChar(str);
|
||||||
|
}
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns ture if the character is a line ending or end of file.
|
||||||
|
*/
|
||||||
|
static bool IsLineEndingOrFileEnding(const char ch) {
|
||||||
|
return ch == '\0' || ch == '\n' || ch == '\r';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copies a substring with given length to a new std::string.
|
||||||
|
*/
|
||||||
|
static string FromSubstr(const char* str, size_t length) {
|
||||||
|
string newStr;
|
||||||
|
newStr.resize(length);
|
||||||
|
strncpy(const_cast<char*>(newStr.c_str()), str, length);
|
||||||
|
return newStr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if the given string is longer or as long as the given length.
|
||||||
|
*/
|
||||||
|
static bool NotShorterThan(const char* str, size_t byteLength) {
|
||||||
|
while (byteLength > 0) {
|
||||||
|
if (*str == '\0') {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
byteLength--;
|
||||||
|
str++;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncates a string with a maximal length in byte.
|
||||||
|
* No UTF8 character will be broken.
|
||||||
|
*/
|
||||||
|
static string TruncateUTF8(const char* str, size_t maxByteLength) {
|
||||||
|
string wordTrunc;
|
||||||
|
if (NotShorterThan(str, maxByteLength)) {
|
||||||
|
size_t len = 0;
|
||||||
|
const char* pStr = str;
|
||||||
|
for (;;) {
|
||||||
|
const size_t charLength = NextCharLength(pStr);
|
||||||
|
if (len + charLength > maxByteLength) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
pStr += charLength;
|
||||||
|
len += charLength;
|
||||||
|
}
|
||||||
|
wordTrunc = FromSubstr(str, len);
|
||||||
|
} else {
|
||||||
|
wordTrunc = str;
|
||||||
|
}
|
||||||
|
return wordTrunc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replaces all patterns in a string in place.
|
||||||
|
*/
|
||||||
|
static void ReplaceAll(string& str, const char* from, const char* to) {
|
||||||
|
string::size_type pos = 0;
|
||||||
|
string::size_type fromLen = strlen(from);
|
||||||
|
string::size_type toLen = strlen(to);
|
||||||
|
while ((pos = str.find(from, pos)) != string::npos) {
|
||||||
|
str.replace(pos, fromLen, to);
|
||||||
|
pos += toLen;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Joins a string vector in to a string with a separator.
|
||||||
|
*/
|
||||||
|
static string Join(const vector<string>& strings, const string& separator) {
|
||||||
|
std::ostringstream buffer;
|
||||||
|
bool first = true;
|
||||||
|
for (const auto& str : strings) {
|
||||||
|
if (!first) {
|
||||||
|
buffer << separator;
|
||||||
|
}
|
||||||
|
buffer << str;
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
return buffer.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Joins a string vector in to a string.
|
||||||
|
*/
|
||||||
|
static string Join(const vector<string>& strings) {
|
||||||
|
std::ostringstream buffer;
|
||||||
|
for (const auto& str : strings) {
|
||||||
|
buffer << str;
|
||||||
|
}
|
||||||
|
return buffer.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void GetByteMap(const char* str, const size_t utf8Length,
|
||||||
|
vector<size_t>* byteMap) {
|
||||||
|
if (byteMap->size() < utf8Length) {
|
||||||
|
byteMap->resize(utf8Length);
|
||||||
|
}
|
||||||
|
const char* pstr = str;
|
||||||
|
for (size_t i = 0; i < utf8Length; i++) {
|
||||||
|
(*byteMap)[i] = pstr - str;
|
||||||
|
pstr = NextChar(pstr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
150
winlibs/include/opencc/opencc.h
Normal file
150
winlibs/include/opencc/opencc.h
Normal file
|
@ -0,0 +1,150 @@
|
||||||
|
/*
|
||||||
|
* Open Chinese Convert
|
||||||
|
*
|
||||||
|
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef __OPENCC_H_
|
||||||
|
#define __OPENCC_H_
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include "Export.hpp"
|
||||||
|
#include "SimpleConverter.hpp"
|
||||||
|
|
||||||
|
extern "C" {
|
||||||
|
#else
|
||||||
|
#include <stddef.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef OPENCC_EXPORT
|
||||||
|
#define OPENCC_EXPORT
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @defgroup opencc_c_api OpenCC C API
|
||||||
|
*
|
||||||
|
* API in C language
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filename of default Simplified to Traditional configuration
|
||||||
|
*
|
||||||
|
* @ingroup opencc_c_api
|
||||||
|
*/
|
||||||
|
#define OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD "s2t.json"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filename of default Traditional to Simplified configuration
|
||||||
|
*
|
||||||
|
* @ingroup opencc_c_api
|
||||||
|
*/
|
||||||
|
#define OPENCC_DEFAULT_CONFIG_TRAD_TO_SIMP "t2s.json"
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Type of opencc descriptor
|
||||||
|
*
|
||||||
|
* @ingroup opencc_c_api
|
||||||
|
*/
|
||||||
|
typedef void* opencc_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Makes an instance of opencc
|
||||||
|
*
|
||||||
|
* @param configFileName Location of configuration file. If this is set to NULL,
|
||||||
|
* OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD will be loaded.
|
||||||
|
* @return A description pointer of the newly allocated instance of
|
||||||
|
* opencc. On error the return value will be (opencc_t) -1.
|
||||||
|
* @ingroup opencc_c_api
|
||||||
|
*/
|
||||||
|
OPENCC_EXPORT opencc_t opencc_open(const char* configFileName);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Destroys an instance of opencc
|
||||||
|
*
|
||||||
|
* @param opencc The description pointer.
|
||||||
|
* @return 0 on success or non-zero number on failure.
|
||||||
|
* @ingroup opencc_c_api
|
||||||
|
*/
|
||||||
|
OPENCC_EXPORT int opencc_close(opencc_t opencc);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts UTF-8 string
|
||||||
|
*
|
||||||
|
* @param opencc The opencc description pointer.
|
||||||
|
* @param input The UTF-8 encoded string.
|
||||||
|
* @param length The maximum length in byte to convert. If length is (size_t)-1,
|
||||||
|
* the whole string (terminated by '\0') will be converted.
|
||||||
|
* @param output The buffer to store converted text. You MUST make sure this
|
||||||
|
* buffer has sufficient space.
|
||||||
|
*
|
||||||
|
* @return The length of converted string or (size_t)-1 on error.
|
||||||
|
*
|
||||||
|
* @ingroup opencc_c_api
|
||||||
|
*/
|
||||||
|
OPENCC_EXPORT size_t opencc_convert_utf8_to_buffer(opencc_t opencc,
|
||||||
|
const char* input,
|
||||||
|
size_t length,
|
||||||
|
char* output);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts UTF-8 string
|
||||||
|
* This function returns an allocated C-Style string, which stores
|
||||||
|
* the converted string.
|
||||||
|
* You MUST call opencc_convert_utf8_free() to release allocated memory.
|
||||||
|
*
|
||||||
|
* @param opencc The opencc description pointer.
|
||||||
|
* @param input The UTF-8 encoded string.
|
||||||
|
* @param length The maximum length in byte to convert. If length is (size_t)-1,
|
||||||
|
* the whole string (terminated by '\0') will be converted.
|
||||||
|
*
|
||||||
|
* @return The newly allocated UTF-8 string that stores text converted,
|
||||||
|
* or NULL on error.
|
||||||
|
* @ingroup opencc_c_api
|
||||||
|
*/
|
||||||
|
OPENCC_EXPORT char* opencc_convert_utf8(opencc_t opencc,
|
||||||
|
const char* input,
|
||||||
|
size_t length);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Releases allocated buffer by opencc_convert_utf8
|
||||||
|
*
|
||||||
|
* @param str Pointer to the allocated string buffer by opencc_convert_utf8.
|
||||||
|
*
|
||||||
|
* @ingroup opencc_c_api
|
||||||
|
*/
|
||||||
|
OPENCC_EXPORT void opencc_convert_utf8_free(char* str);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the last error message
|
||||||
|
*
|
||||||
|
* Note that this function is the only one which is NOT thread-safe.
|
||||||
|
*
|
||||||
|
* @ingroup opencc_c_api
|
||||||
|
*/
|
||||||
|
OPENCC_EXPORT const char* opencc_error(void);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
} // extern "C"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @defgroup opencc_cpp_api OpenCC C++ Comprehensive API
|
||||||
|
*
|
||||||
|
* Comprehensive API in C++ language
|
||||||
|
*/
|
||||||
|
|
||||||
|
#endif
|
BIN
winlibs/lib/libopencc.dll.a
Normal file
BIN
winlibs/lib/libopencc.dll.a
Normal file
Binary file not shown.
BIN
winlibs/lib/opencc.dll
Normal file
BIN
winlibs/lib/opencc.dll
Normal file
Binary file not shown.
Loading…
Reference in a new issue