Win Specific: upgrade opencc (#840)

* Win Specific: upgrade opencc

* opencc shared configuration file
This commit is contained in:
xiaoyifang 2023-06-10 09:40:13 +08:00 committed by GitHub
parent 7aedba4e2a
commit fe1c609b49
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
60 changed files with 715 additions and 481 deletions

View file

@ -21,6 +21,7 @@ set(THIRD_PARTY_LIBARY
debug ${CMAKE_SOURCE_DIR}/winlibs/lib/dbg/xapian.lib optimized ${CMAKE_SOURCE_DIR}/winlibs/lib/xapian.lib
debug ${CMAKE_SOURCE_DIR}/winlibs/lib/dbg/hunspell-1.7.lib optimized ${CMAKE_SOURCE_DIR}/winlibs/lib/hunspell-1.7.lib
debug ${CMAKE_SOURCE_DIR}/winlibs/lib/dbg/zim.lib optimized ${CMAKE_SOURCE_DIR}/winlibs/lib/zim.lib
debug ${CMAKE_SOURCE_DIR}/winlibs/lib/dbg/opencc.lib optimized ${CMAKE_SOURCE_DIR}/winlibs/lib/opencc.lib
)
target_link_libraries(${GOLDENDICT} PRIVATE ${THIRD_PARTY_LIBARY})

View file

@ -592,9 +592,16 @@ CONFIG( chinese_conversion_support ) {
src/ui/chineseconversion.hh
SOURCES += src/dict/chinese.cc \
src/ui/chineseconversion.cc
win32{
Debug: LIBS+= -L$$PWD/winlibs/lib/dbg/ -lopencc
Release: LIBS+= -L$$PWD/winlibs/lib -lopencc
}else{
LIBS += -lopencc
}
}
RESOURCES += resources.qrc \
src/scripts/scripts.qrc \
icons/flags.qrc \

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -1,5 +1,5 @@
{
"name": "Traditional Chinese (Hong Kong standard) to Simplified Chinese",
"name": "Traditional Chinese (Hong Kong variant) to Simplified Chinese",
"segmentation": {
"type": "mmseg",
"dict": {

22
opencc/hk2t.json Normal file
View file

@ -0,0 +1,22 @@
{
"name": "Traditional Chinese (Hong Kong variant) to Traditional Chinese",
"segmentation": {
"type": "mmseg",
"dict": {
"type": "ocd2",
"file": "HKVariantsRevPhrases.ocd2"
}
},
"conversion_chain": [{
"dict": {
"type": "group",
"dicts": [{
"type": "ocd2",
"file": "HKVariantsRevPhrases.ocd2"
}, {
"type": "ocd2",
"file": "HKVariantsRev.ocd2"
}]
}
}]
}

View file

@ -1,5 +1,5 @@
{
"name": "Simplified Chinese to Traditional Chinese (Hong Kong standard)",
"name": "Simplified Chinese to Traditional Chinese (Hong Kong variant)",
"segmentation": {
"type": "mmseg",
"dict": {
@ -20,14 +20,8 @@
}
}, {
"dict": {
"type": "group",
"dicts": [{
"type": "ocd2",
"file": "HKVariantsPhrases.ocd2"
}, {
"type": "ocd2",
"file": "HKVariants.ocd2"
}]
"type": "ocd2",
"file": "HKVariants.ocd2"
}
}]
}

View file

@ -1,22 +1,16 @@
{
"name": "Traditional Chinese to Traditional Chinese (Hong Kong standard)",
"name": "Traditional Chinese to Traditional Chinese (Hong Kong variant)",
"segmentation": {
"type": "mmseg",
"dict": {
"type": "ocd2",
"file": "HKVariantsPhrases.ocd2"
"file": "HKVariants.ocd2"
}
},
"conversion_chain": [{
"dict": {
"type": "group",
"dicts": [{
"type": "ocd2",
"file": "HKVariantsPhrases.ocd2"
}, {
"type": "ocd2",
"file": "HKVariants.ocd2"
}]
"type": "ocd2",
"file": "HKVariants.ocd2"
}
}]
}

22
opencc/tw2t.json Normal file
View file

@ -0,0 +1,22 @@
{
"name": "Traditional Chinese (Taiwan standard) to Traditional Chinese",
"segmentation": {
"type": "mmseg",
"dict": {
"type": "ocd2",
"file": "TWVariantsRevPhrases.ocd2"
}
},
"conversion_chain": [{
"dict": {
"type": "group",
"dicts": [{
"type": "ocd2",
"file": "TWVariantsRevPhrases.ocd2"
}, {
"type": "ocd2",
"file": "TWVariantsRev.ocd2"
}]
}
}]
}

137
opencc/vcpkg.spdx.json Normal file
View file

@ -0,0 +1,137 @@
{
"$schema": "https://raw.githubusercontent.com/spdx/spdx-spec/v2.2.1/schemas/spdx-schema.json",
"spdxVersion": "SPDX-2.2",
"dataLicense": "CC0-1.0",
"SPDXID": "SPDXRef-DOCUMENT",
"documentNamespace": "https://spdx.org/spdxdocs/opencc-x64-windows-1.1.6#1-77e9ab4c-a73d-4494-a5a7-8c16f4c63181",
"name": "opencc:x64-windows@1.1.6#1 c2ced06b2a1f9dafc206a7e56d003ca36dc5b07462881e03cdfe6b0b9bec8ecb",
"creationInfo": {
"creators": [
"Tool: vcpkg-bedcba5172f5e4b91caac660ab7afe92c27a9895"
],
"created": "2023-06-09T15:52:16Z"
},
"relationships": [
{
"spdxElementId": "SPDXRef-port",
"relationshipType": "GENERATES",
"relatedSpdxElement": "SPDXRef-binary"
},
{
"spdxElementId": "SPDXRef-port",
"relationshipType": "CONTAINS",
"relatedSpdxElement": "SPDXRef-file-0"
},
{
"spdxElementId": "SPDXRef-port",
"relationshipType": "CONTAINS",
"relatedSpdxElement": "SPDXRef-file-1"
},
{
"spdxElementId": "SPDXRef-port",
"relationshipType": "CONTAINS",
"relatedSpdxElement": "SPDXRef-file-2"
},
{
"spdxElementId": "SPDXRef-binary",
"relationshipType": "GENERATED_FROM",
"relatedSpdxElement": "SPDXRef-port"
},
{
"spdxElementId": "SPDXRef-file-0",
"relationshipType": "CONTAINED_BY",
"relatedSpdxElement": "SPDXRef-port"
},
{
"spdxElementId": "SPDXRef-file-1",
"relationshipType": "CONTAINED_BY",
"relatedSpdxElement": "SPDXRef-port"
},
{
"spdxElementId": "SPDXRef-file-2",
"relationshipType": "CONTAINED_BY",
"relatedSpdxElement": "SPDXRef-port"
},
{
"spdxElementId": "SPDXRef-file-2",
"relationshipType": "DEPENDENCY_MANIFEST_OF",
"relatedSpdxElement": "SPDXRef-port"
}
],
"packages": [
{
"name": "opencc",
"SPDXID": "SPDXRef-port",
"versionInfo": "1.1.6#1",
"downloadLocation": "git+https://github.com/Microsoft/vcpkg#ports/opencc",
"homepage": "https://github.com/BYVoid/OpenCC",
"licenseConcluded": "Apache-2.0",
"licenseDeclared": "NOASSERTION",
"copyrightText": "NOASSERTION",
"description": "A project for conversions between Traditional Chinese, Simplified Chinese and Japanese Kanji (Shinjitai)",
"comment": "This is the port (recipe) consumed by vcpkg."
},
{
"name": "opencc:x64-windows",
"SPDXID": "SPDXRef-binary",
"versionInfo": "c2ced06b2a1f9dafc206a7e56d003ca36dc5b07462881e03cdfe6b0b9bec8ecb",
"downloadLocation": "NONE",
"licenseConcluded": "Apache-2.0",
"licenseDeclared": "NOASSERTION",
"copyrightText": "NOASSERTION",
"comment": "This is a binary package built by vcpkg."
},
{
"SPDXID": "SPDXRef-resource-1",
"name": "BYVoid/OpenCC",
"downloadLocation": "git+https://github.com/BYVoid/OpenCC@ver.${VERSION}",
"licenseConcluded": "NOASSERTION",
"licenseDeclared": "NOASSERTION",
"copyrightText": "NOASSERTION",
"checksums": [
{
"algorithm": "SHA512",
"checksumValue": "bfc40bdf1348e6a265b3304ab1e8acee2f4b6ac9c377ff3d8c996435a92dee98c3758503186b4fd424653faf44db339f8a90300e3290c59942ccf04b1bbb2a30"
}
]
}
],
"files": [
{
"fileName": "./fix-dependencies.patch",
"SPDXID": "SPDXRef-file-0",
"checksums": [
{
"algorithm": "SHA256",
"checksumValue": "495ac3be36e9d2a350d909e120ceb92dc7f5fc7edf10ffb274be803e8a8b79e5"
}
],
"licenseConcluded": "NOASSERTION",
"copyrightText": "NOASSERTION"
},
{
"fileName": "./portfile.cmake",
"SPDXID": "SPDXRef-file-1",
"checksums": [
{
"algorithm": "SHA256",
"checksumValue": "b2292bfc8abd9ef70e09513a207a6351f9c887035206359ba89ee94fe0c5457c"
}
],
"licenseConcluded": "NOASSERTION",
"copyrightText": "NOASSERTION"
},
{
"fileName": "./vcpkg.json",
"SPDXID": "SPDXRef-file-2",
"checksums": [
{
"algorithm": "SHA256",
"checksumValue": "4a3967777ea499bc78ee425ab5a513b94e4cad3add9c9358cc08dd44e355a69b"
}
],
"licenseConcluded": "NOASSERTION",
"copyrightText": "NOASSERTION"
}
]
}

View file

@ -1,18 +1,26 @@
cmake 3.21.1
cmake 3.25.1
darts-clone 89a71c3602a9a640e2a4c623e4b86a9c08cbe49c17e892e8b925fecbe9ff45fa
features core
portfile.cmake 9863613dceb7a5268bbb2d767d9098c35400b8a881a0815bfe7dbe36e793868f
ports.cmake e01bcbe535b11a2fbda56cffd590218ddeb18cb899c00afb2ac22a2301df2b09
fix-dependencies.patch 495ac3be36e9d2a350d909e120ceb92dc7f5fc7edf10ffb274be803e8a8b79e5
marisa-trie 344ec03e241b42e674d4b138d50aa1e2ceea7e40029ed3212936b16cd4348cfb
pkgconf b5677ec2a00cf34862a66513ae97571b40a8eb1e4600af08ac0a3ac5ff384d28
portfile.cmake b2292bfc8abd9ef70e09513a207a6351f9c887035206359ba89ee94fe0c5457c
ports.cmake 5a8e00cedff0c898b1f90f7d129329d0288801bc9056562b039698caf31ff3f3
post_build_checks 2
powershell 7.2.1
triplet x64-windows-rel
triplet_abi 4970dad5b87bdaa6176cb4f981bb32f09d8957ff9babdaf321bdb98198621b56-b569c9954a47274946415ff01b1a344c8549f3fc19cb931d50bc09d1e5630c1d-cc5606e89ff68c454d07a2425977151c05eb9158
vcpkg.json d7125dc7d159ce1190e81cbc30003b28a55e53e7d88a455903040003606a0976
vcpkg_add_to_path 5d7b62ed9fa23a49d3d842206e95e4211ab25b41321e30b6ddd9208febed9d18
vcpkg_configure_cmake 0b91a87ccb0659953d3be8d9b5b82dc9bf680b7b84b50eba8f8790c6d27b03d3
vcpkg_copy_pdbs 59e4c0d2321697848a899ba9537394cdee81725b11965c6d1341df53a6850380
vcpkg_copy_tool_dependencies 4655cdf0d283d2e6bce5e2aee3745ab0bfd7e4fb048c532274de53ef56389ea9
vcpkg_find_acquire_program a1240f46d8e1cb7eaacb8ac882ce597e5370be9b387d8a048d178d3b842b3520
vcpkg_fixup_pkgconfig ff54f8b06c83b54a1af5e35286a6c36ea9e69376bed1d11b5521c254987c123f
vcpkg_from_git 05d446731ca8f6cefe0e4dc04c17776e54b39d4f2bfeeec7952ced1cfe8bf89f
vcpkg_from_github 1929b9ee1417dbf59f8a25ac321ef9ca792b6d67aee38bda69ee3700ea256b73
vcpkg_install_cmake 6430f4795e65c4c44c545c590d431fe1a68d7444255d0da58362a267bbf6408d
powershell 7.3.4
rapidjson 28ccfa9831e8dc1ae50215d843daca2f9b2caec53ca86f769188f60229d45ba6
tclap 50c1e4611a8a04be4a47c2539e14b605ff5930955b2edbabefd1496c453089e9
triplet x64-windows
triplet_abi 4556164a2cd3dd6f4742101eabb46def7e71b6e5856faa88e5d005aac12a803c-36b818778ba6f2c16962495caedb9a7b221d5be4c60de1cd3060f549319a9931-37ddf335dc10d14fa90c13a22cc1ec1cdbd1efcd
vcpkg-cmake 9f162a8d81a98883f391b63412d4947ca06fc2fb875ff027f1e59e08ae945726
vcpkg-cmake-config 71416eefbbd89d780c6a08a08da3b3a3699f2f4cfe8092a67b8df1821a96eff9
vcpkg.json 4a3967777ea499bc78ee425ab5a513b94e4cad3add9c9358cc08dd44e355a69b
vcpkg_add_to_path 5f5ae75cf37b2a58d1a8561ca96496b64cd91ec9a0afab0b976c3e5d59030bfe
vcpkg_copy_pdbs d57e4f196c82dc562a9968c6155073094513c31e2de475694143d3aa47954b1c
vcpkg_copy_tools 3d45ff761bddbabe8923b52330168dc3abd295fa469d3f2e47cb14dce85332d5
vcpkg_find_acquire_program 722996708543bc3e10f7ae54acc4a6468478c28adb52c84a930d096ff56f76dd
vcpkg_find_acquire_program(PYTHON3) bf78099b5a097427eb6fbaa1dabe3eccea50abbcfe37bfc89e6c9ad8c6c9cb90
vcpkg_fixup_pkgconfig 588d833ff057d3ca99c14616c7ecfb5948b5e2a9e4fc02517dceb8b803473457
vcpkg_from_git 8f27bff0d01c6d15a3e691758df52bfbb0b1b929da45c4ebba02ef76b54b1881
vcpkg_from_github b743742296a114ea1b18ae99672e02f142c4eb2bef7f57d36c038bedbfb0502f
vcpkg_install_copyright ba6c169ab4e59fa05682e530cdeb883767de22c8391f023d4e6844a7ec5dd3d2

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -23,9 +23,9 @@
namespace opencc {
/**
* Binary dictionary for faster deserialization
* @ingroup opencc_cpp_api
*/
* Binary dictionary for faster deserialization
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT BinaryDict : public SerializableDict {
public:
BinaryDict(const LexiconPtr& _lexicon) : lexicon(_lexicon) {}
@ -42,12 +42,12 @@ public:
private:
LexiconPtr lexicon;
string keyBuffer;
string valueBuffer;
std::string keyBuffer;
std::string valueBuffer;
void ConstructBuffer(string& keyBuffer, vector<size_t>& keyOffset,
size_t& keyTotalLength, string& valueBuffer,
vector<size_t>& valueOffset,
void ConstructBuffer(std::string& keyBuffer, std::vector<size_t>& keyOffset,
size_t& keyTotalLength, std::string& valueBuffer,
std::vector<size_t>& valueOffset,
size_t& valueTotalLength) const;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -23,30 +23,14 @@
#pragma warning(disable : 4251 4266 4350 4503 4512 4514 4710 4820)
#endif
#include <algorithm>
#include <fstream>
#include <functional>
#include <iostream>
#include <list>
#include <map>
#include <cstddef>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include <cassert>
#include <cstddef>
#include <cstdio>
#include <cstring>
#include <ctime>
#include "Exception.hpp"
#include "Export.hpp"
#include "Optional.hpp"
using std::list;
using std::string;
using std::vector;
#include "opencc_config.h"
// Forward decalarations and alias
namespace opencc {
@ -78,7 +62,7 @@ typedef std::shared_ptr<Segments> SegmentsPtr;
typedef std::shared_ptr<SerializableDict> SerializableDictPtr;
typedef std::shared_ptr<TextDict> TextDictPtr;
#ifdef ENABLE_DARTS
#ifdef OPENCC_ENABLE_DARTS
class BinaryDict;
class DartsDict;
typedef std::shared_ptr<BinaryDict> BinaryDictPtr;
@ -88,14 +72,11 @@ typedef std::shared_ptr<DartsDict> DartsDictPtr;
} // namespace opencc
#ifndef PKGDATADIR
const string PACKAGE_DATA_DIRECTORY = "";
const std::string PACKAGE_DATA_DIRECTORY = "";
#else // ifndef PKGDATADIR
const string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/";
const std::string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/";
#endif // ifndef PKGDATADIR
#ifndef VERSION
#define VERSION "1.0.*"
#endif // ifndef VERSION
// The following definitions are provided by CMake
// #define ENABLE_DARTS

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -22,20 +22,21 @@
namespace opencc {
/**
* Configuration loader
* @ingroup opencc_cpp_api
*/
* Configuration loader
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT Config {
public:
Config();
virtual ~Config();
ConverterPtr NewFromString(const string& json, const string& configDirectory);
ConverterPtr NewFromString(const std::string& json,
const std::string& configDirectory);
ConverterPtr NewFromFile(const string& fileName);
ConverterPtr NewFromFile(const std::string& fileName);
private:
void* internal;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -23,18 +23,18 @@
namespace opencc {
/**
* Conversion interface
* @ingroup opencc_cpp_api
*/
* Conversion interface
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT Conversion {
public:
Conversion(DictPtr _dict) : dict(_dict) {}
// Convert single phrase
string Convert(const string& phrase) const;
std::string Convert(const std::string& phrase) const;
// Convert single phrase
string Convert(const char* phrase) const;
std::string Convert(const char* phrase) const;
// Convert segmented text
SegmentsPtr Convert(const SegmentsPtr& input) const;
@ -44,4 +44,4 @@ public:
private:
const DictPtr dict;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,24 +18,26 @@
#pragma once
#include <list>
#include "Common.hpp"
#include "Conversion.hpp"
namespace opencc {
/**
* Chain of conversions
* Consists of a list of conversions. Converts input in sequence.
* @ingroup opencc_cpp_api
*/
* Chain of conversions
* Consists of a list of conversions. Converts input in sequence.
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT ConversionChain {
public:
ConversionChain(const list<ConversionPtr> _conversions);
ConversionChain(const std::list<ConversionPtr> _conversions);
SegmentsPtr Convert(const SegmentsPtr& input) const;
const list<ConversionPtr> GetConversions() const { return conversions; }
const std::list<ConversionPtr> GetConversions() const { return conversions; }
private:
const list<ConversionPtr> conversions;
const std::list<ConversionPtr> conversions;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -23,17 +23,17 @@
namespace opencc {
/**
* Controller of segmentation and conversion
* @ingroup opencc_cpp_api
*/
* Controller of segmentation and conversion
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT Converter {
public:
Converter(const string& _name, SegmentationPtr _segmentation,
Converter(const std::string& _name, SegmentationPtr _segmentation,
ConversionChainPtr _conversionChain)
: name(_name), segmentation(_segmentation),
conversionChain(_conversionChain) {}
string Convert(const string& text) const;
std::string Convert(const std::string& text) const;
size_t Convert(const char* input, char* output) const;
@ -44,8 +44,8 @@ public:
}
private:
const string name;
const std::string name;
const SegmentationPtr segmentation;
const ConversionChainPtr conversionChain;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -23,26 +23,27 @@
namespace opencc {
/**
* Darts dictionary
* @ingroup opencc_cpp_api
*/
* Darts dictionary
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT DartsDict : public Dict, public SerializableDict {
public:
virtual ~DartsDict();
virtual size_t KeyMaxLength() const;
virtual Optional<const DictEntry*> Match(const char* word) const;
virtual Optional<const DictEntry*> Match(const char* word, size_t len) const;
virtual Optional<const DictEntry*> MatchPrefix(const char* word) const;
virtual Optional<const DictEntry*> MatchPrefix(const char* word,
size_t len) const;
virtual LexiconPtr GetLexicon() const;
virtual void SerializeToFile(FILE* fp) const;
/**
* Constructs a DartsDict from another dictionary.
*/
* Constructs a DartsDict from another dictionary.
*/
static DartsDictPtr NewFromDict(const Dict& thatDict);
static DartsDictPtr NewFromFile(FILE* fp);
@ -56,4 +57,4 @@ private:
class DartsInternal;
DartsInternal* internal;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -23,59 +23,70 @@
namespace opencc {
/**
* Abstract class of dictionary
* @ingroup opencc_cpp_api
*/
* Abstract class of dictionary
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT Dict {
public:
/**
* Matches a word exactly and returns the DictEntry or Optional::Null().
*/
virtual Optional<const DictEntry*> Match(const char* word) const = 0;
* Matches a word exactly and returns the DictEntry or Optional::Null().
*/
virtual Optional<const DictEntry*> Match(const char* word,
size_t len) const = 0;
/**
* Matches a word exactly and returns the DictEntry or Optional::Null().
*/
Optional<const DictEntry*> Match(const string& word) const {
return Match(word.c_str());
* Matches a word exactly and returns the DictEntry or Optional::Null().
*/
Optional<const DictEntry*> Match(const std::string& word) const {
return Match(word.c_str(), word.length());
}
/**
* Matches the longest matched prefix of a word.
* For example given a dictionary having "a", "an", "b", "ba", "ban", "bana",
* the longest prefix of "banana" matched is "bana".
*/
virtual Optional<const DictEntry*> MatchPrefix(const char* word) const;
* Matches the longest matched prefix of a word.
* For example given a dictionary having "a", "an", "b", "ba", "ban", "bana",
* the longest prefix of "banana" matched is "bana".
*/
virtual Optional<const DictEntry*> MatchPrefix(const char* word,
size_t len) const;
/**
* Matches the longest matched prefix of a word.
*/
Optional<const DictEntry*> MatchPrefix(const string& word) const {
return MatchPrefix(word.c_str());
* Matches the longest matched prefix of a word.
*/
Optional<const DictEntry*> MatchPrefix(const char* word) const {
return MatchPrefix(word, KeyMaxLength());
}
/**
* Returns all matched prefixes of a word, sorted by the length (desc).
* For example given a dictionary having "a", "an", "b", "ba", "ban", "bana",
* all the matched prefixes of "banana" are "bana", "ban", "ba", "b".
*/
virtual vector<const DictEntry*> MatchAllPrefixes(const char* word) const;
/**
* Returns all matched prefixes of a word, sorted by the length (desc).
*/
vector<const DictEntry*> MatchAllPrefixes(const string& word) const {
return MatchAllPrefixes(word.c_str());
* Matches the longest matched prefix of a word.
*/
Optional<const DictEntry*> MatchPrefix(const std::string& word) const {
return MatchPrefix(word.c_str(), word.length());
}
/**
* Returns the length of the longest key in the dictionary.
*/
* Returns all matched prefixes of a word, sorted by the length (desc).
* For example given a dictionary having "a", "an", "b", "ba", "ban", "bana",
* all the matched prefixes of "banana" are "bana", "ban", "ba", "b".
*/
virtual std::vector<const DictEntry*> MatchAllPrefixes(const char* word,
size_t len) const;
/**
* Returns all matched prefixes of a word, sorted by the length (desc).
*/
std::vector<const DictEntry*>
MatchAllPrefixes(const std::string& word) const {
return MatchAllPrefixes(word.c_str(), word.length());
}
/**
* Returns the length of the longest key in the dictionary.
*/
virtual size_t KeyMaxLength() const = 0;
/**
* Returns all entries in the dictionary.
*/
* Returns all entries in the dictionary.
*/
virtual LexiconPtr GetLexicon() const = 0;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2017 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2017 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -22,9 +22,11 @@
namespace opencc {
/**
* Converts a dictionary from a format to another.
* @ingroup opencc_cpp_api
*/
OPENCC_EXPORT void ConvertDictionary(const string inputFileName, const string outputFileName,
const string formatFrom, const string formatTo);
}
* Converts a dictionary from a format to another.
* @ingroup opencc_cpp_api
*/
OPENCC_EXPORT void ConvertDictionary(const std::string& inputFileName,
const std::string& outputFileName,
const std::string& formatFrom,
const std::string& formatTo);
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -33,13 +33,13 @@ public:
virtual std::string Key() const = 0;
virtual vector<std::string> Values() const = 0;
virtual std::vector<std::string> Values() const = 0;
virtual std::string GetDefault() const = 0;
virtual size_t NumValues() const = 0;
virtual string ToString() const = 0;
virtual std::string ToString() const = 0;
size_t KeyLength() const { return Key().length(); }
@ -55,42 +55,46 @@ public:
class OPENCC_EXPORT NoValueDictEntry : public DictEntry {
public:
NoValueDictEntry(const string& _key) : key(_key) {}
NoValueDictEntry(const std::string& _key) : key(_key) {}
virtual ~NoValueDictEntry() {}
virtual std::string Key() const { return key; }
virtual vector<std::string> Values() const { return vector<std::string>(); }
virtual std::vector<std::string> Values() const {
return std::vector<std::string>();
}
virtual std::string GetDefault() const { return key; }
virtual size_t NumValues() const { return 0; }
virtual string ToString() const { return key; }
virtual std::string ToString() const { return key; }
private:
string key;
std::string key;
};
class OPENCC_EXPORT SingleValueDictEntry : public DictEntry {
public:
virtual std::string Value() const = 0;
virtual vector<std::string> Values() const {
return vector<std::string>{Value()};
virtual std::vector<std::string> Values() const {
return std::vector<std::string>{Value()};
}
virtual std::string GetDefault() const { return Value(); }
virtual size_t NumValues() const { return 1; }
virtual string ToString() const { return string(Key()) + "\t" + Value(); }
virtual std::string ToString() const {
return std::string(Key()) + "\t" + Value();
}
};
class OPENCC_EXPORT StrSingleValueDictEntry : public SingleValueDictEntry {
public:
StrSingleValueDictEntry(const string& _key, const string& _value)
StrSingleValueDictEntry(const std::string& _key, const std::string& _value)
: key(_key), value(_value) {}
virtual ~StrSingleValueDictEntry() {}
@ -100,8 +104,8 @@ public:
virtual std::string Value() const { return value; }
private:
string key;
string value;
std::string key;
std::string value;
};
class OPENCC_EXPORT MultiValueDictEntry : public DictEntry {
@ -114,12 +118,13 @@ public:
}
}
virtual string ToString() const;
virtual std::string ToString() const;
};
class OPENCC_EXPORT StrMultiValueDictEntry : public MultiValueDictEntry {
public:
StrMultiValueDictEntry(const string& _key, const vector<std::string>& _values)
StrMultiValueDictEntry(const std::string& _key,
const std::vector<std::string>& _values)
: key(_key), values(_values) {}
virtual ~StrMultiValueDictEntry() {}
@ -128,22 +133,25 @@ public:
size_t NumValues() const { return values.size(); }
vector<std::string> Values() const { return values; }
std::vector<std::string> Values() const { return values; }
private:
string key;
vector<string> values;
std::string key;
std::vector<std::string> values;
};
class OPENCC_EXPORT DictEntryFactory {
public:
static DictEntry* New(const string& key) { return new NoValueDictEntry(key); }
static DictEntry* New(const std::string& key) {
return new NoValueDictEntry(key);
}
static DictEntry* New(const string& key, const string& value) {
static DictEntry* New(const std::string& key, const std::string& value) {
return new StrSingleValueDictEntry(key, value);
}
static DictEntry* New(const string& key, const vector<string>& values) {
static DictEntry* New(const std::string& key,
const std::vector<std::string>& values) {
if (values.size() == 0) {
return New(key);
} else if (values.size() == 1) {
@ -156,11 +164,9 @@ public:
if (entry->NumValues() == 0) {
return new NoValueDictEntry(entry->Key());
} else if (entry->NumValues() == 1) {
const auto svEntry = static_cast<const SingleValueDictEntry*>(entry);
return new StrSingleValueDictEntry(svEntry->Key(), svEntry->Value());
return new StrSingleValueDictEntry(entry->Key(), entry->Values().front());
} else {
const auto mvEntry = static_cast<const MultiValueDictEntry*>(entry);
return new StrMultiValueDictEntry(mvEntry->Key(), mvEntry->Values());
return new StrMultiValueDictEntry(entry->Key(), entry->Values());
}
}
};

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,17 +18,19 @@
#pragma once
#include <list>
#include "Common.hpp"
#include "Dict.hpp"
namespace opencc {
/**
* Group of dictionaries
* @ingroup opencc_cpp_api
*/
* Group of dictionaries
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT DictGroup : public Dict {
public:
DictGroup(const list<DictPtr>& dicts);
DictGroup(const std::list<DictPtr>& dicts);
static DictGroupPtr NewFromDict(const Dict& dict);
@ -36,18 +38,20 @@ public:
virtual size_t KeyMaxLength() const;
virtual Optional<const DictEntry*> Match(const char* word) const;
virtual Optional<const DictEntry*> Match(const char* word, size_t len) const;
virtual Optional<const DictEntry*> MatchPrefix(const char* word) const;
virtual Optional<const DictEntry*> MatchPrefix(const char* word,
size_t len) const;
virtual vector<const DictEntry*> MatchAllPrefixes(const char* word) const;
virtual std::vector<const DictEntry*> MatchAllPrefixes(const char* word,
size_t len) const;
virtual LexiconPtr GetLexicon() const;
const list<DictPtr> GetDicts() const { return dicts; }
const std::list<DictPtr> GetDicts() const { return dicts; }
private:
const size_t keyMaxLength;
const list<DictPtr> dicts;
const std::list<DictPtr> dicts;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -25,7 +25,8 @@
#include "Export.hpp"
#if defined(_MSC_VER) && _MSC_VER < 1900
// Before Visual Studio 2015 (14.0), C++ 11 "noexcept" qualifier is not supported
// Before Visual Studio 2015 (14.0), C++ 11 "noexcept" qualifier is not
// supported
#define noexcept
#endif // ifdef _MSC_VER

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -29,7 +29,7 @@ namespace opencc {
class OPENCC_EXPORT Lexicon {
public:
Lexicon() {}
Lexicon(vector<std::unique_ptr<DictEntry>> entries_)
Lexicon(std::vector<std::unique_ptr<DictEntry>> entries_)
: entries(std::move(entries_)) {}
Lexicon(const Lexicon&) = delete;
Lexicon& operator=(const Lexicon&) = delete;
@ -41,28 +41,28 @@ public:
entries.push_back(std::move(entry));
}
void Sort() {
std::sort(entries.begin(), entries.end(), DictEntry::UPtrLessThan);
}
void Sort();
bool IsSorted() {
return std::is_sorted(entries.begin(), entries.end(),
DictEntry::UPtrLessThan);
}
// Returns true if the lexicon is sorted by key.
bool IsSorted();
// Returns true if every key unique (after sorted).
// When dupkey is set, it is set to the duplicate key.
bool IsUnique(std::string* dupkey = nullptr);
const DictEntry* At(size_t index) const { return entries.at(index).get(); }
size_t Length() const { return entries.size(); }
vector<std::unique_ptr<DictEntry>>::const_iterator begin() const {
std::vector<std::unique_ptr<DictEntry>>::const_iterator begin() const {
return entries.begin();
}
vector<std::unique_ptr<DictEntry>>::const_iterator end() const {
std::vector<std::unique_ptr<DictEntry>>::const_iterator end() const {
return entries.end();
}
private:
vector<std::unique_ptr<DictEntry>> entries;
std::vector<std::unique_ptr<DictEntry>> entries;
};
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2020 BYVoid <byvoid@byvoid.com>
* Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -32,11 +32,13 @@ public:
virtual size_t KeyMaxLength() const;
virtual Optional<const DictEntry*> Match(const char* word) const;
virtual Optional<const DictEntry*> Match(const char* word, size_t len) const;
virtual Optional<const DictEntry*> MatchPrefix(const char* word) const;
virtual Optional<const DictEntry*> MatchPrefix(const char* word,
size_t len) const;
virtual vector<const DictEntry*> MatchAllPrefixes(const char* word) const;
virtual std::vector<const DictEntry*> MatchAllPrefixes(const char* word,
size_t len) const;
virtual LexiconPtr GetLexicon() const;

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -24,20 +24,20 @@
namespace opencc {
/**
* Implementation of maximal match segmentation
* @ingroup opencc_cpp_api
*/
* Implementation of maximal match segmentation
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT MaxMatchSegmentation : public Segmentation {
public:
MaxMatchSegmentation(const DictPtr _dict) : dict(_dict) {}
virtual ~MaxMatchSegmentation() {}
virtual SegmentsPtr Segment(const string& text) const;
virtual SegmentsPtr Segment(const std::string& text) const;
const DictPtr GetDict() const { return dict; }
private:
const DictPtr dict;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -20,29 +20,29 @@
namespace opencc {
/**
* A class that wraps type T into a nullable type.
* @ingroup opencc_cpp_api
*/
* A class that wraps type T into a nullable type.
* @ingroup opencc_cpp_api
*/
template <typename T> class Optional {
public:
/**
* The constructor of Optional.
*/
* The constructor of Optional.
*/
Optional(T actual) : isNull(false), data(actual) {}
/**
* Returns true if the instance is null.
*/
* Returns true if the instance is null.
*/
bool IsNull() const { return isNull; }
/**
* Returns the containing data of the instance.
*/
* Returns the containing data of the instance.
*/
const T& Get() const { return data; }
/**
* Constructs a null instance.
*/
* Constructs a null instance.
*/
static Optional<T> Null() { return Optional(); }
private:
@ -53,10 +53,10 @@ private:
};
/**
* Specialization of Optional for pointers.
*
* Reduce a bool.
*/
* Specialization of Optional for pointers.
*
* Reduce a bool.
*/
template <typename T> class Optional<T*> {
private:
Optional() : data(nullptr) {}
@ -73,4 +73,4 @@ public:
static Optional<TPtr> Null() { return Optional(); }
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2015 BYVoid <byvoid@byvoid.com>
* Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,6 +18,7 @@
#pragma once
#include <functional>
#include <unordered_map>
#include "Common.hpp"
@ -35,7 +36,7 @@ public:
virtual ~PhraseExtract();
void Extract(const string& text) {
void Extract(const std::string& text) {
SetFullText(text);
ExtractSuffixes();
CalculateFrequency();
@ -49,7 +50,7 @@ public:
SelectWords();
}
void SetFullText(const string& fullText) {
void SetFullText(const std::string& fullText) {
utf8FullText = UTF8StringSlice(fullText.c_str());
}
@ -76,23 +77,25 @@ public:
}
// PreCalculationFilter is called after frequencies statistics.
void SetPreCalculationFilter(const std::function<
bool(const PhraseExtract&, const UTF8StringSlice8Bit&)>& filter) {
void SetPreCalculationFilter(
const std::function<bool(const PhraseExtract&,
const UTF8StringSlice8Bit&)>& filter) {
preCalculationFilter = filter;
}
void SetPostCalculationFilter(const std::function<
bool(const PhraseExtract&, const UTF8StringSlice8Bit&)>& filter) {
void SetPostCalculationFilter(
const std::function<bool(const PhraseExtract&,
const UTF8StringSlice8Bit&)>& filter) {
postCalculationFilter = filter;
}
void ReleaseSuffixes() { vector<UTF8StringSlice8Bit>().swap(suffixes); }
void ReleaseSuffixes() { std::vector<UTF8StringSlice8Bit>().swap(suffixes); }
void ReleasePrefixes() { vector<UTF8StringSlice8Bit>().swap(prefixes); }
void ReleasePrefixes() { std::vector<UTF8StringSlice8Bit>().swap(prefixes); }
const vector<UTF8StringSlice8Bit>& Words() const { return words; }
const std::vector<UTF8StringSlice8Bit>& Words() const { return words; }
const vector<UTF8StringSlice8Bit>& WordCandidates() const {
const std::vector<UTF8StringSlice8Bit>& WordCandidates() const {
return wordCandidates;
}
@ -155,8 +158,9 @@ private:
double CalculateCohesion(const UTF8StringSlice8Bit& wordCandidate) const;
double CalculateEntropy(const std::unordered_map<
UTF8StringSlice8Bit, size_t, UTF8StringSlice8Bit::Hasher>& choices) const;
double CalculateEntropy(
const std::unordered_map<UTF8StringSlice8Bit, size_t,
UTF8StringSlice8Bit::Hasher>& choices) const;
LengthType wordMinLength;
LengthType wordMaxLength;
@ -179,10 +183,10 @@ private:
UTF8StringSlice utf8FullText;
size_t totalOccurrence;
double logTotalOccurrence;
vector<UTF8StringSlice8Bit> prefixes;
vector<UTF8StringSlice8Bit> suffixes;
vector<UTF8StringSlice8Bit> wordCandidates;
vector<UTF8StringSlice8Bit> words;
std::vector<UTF8StringSlice8Bit> prefixes;
std::vector<UTF8StringSlice8Bit> suffixes;
std::vector<UTF8StringSlice8Bit> wordCandidates;
std::vector<UTF8StringSlice8Bit> words;
DictType* signals;
friend class PhraseExtractTest;

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -22,11 +22,11 @@
namespace opencc {
/**
* Abstract segmentation
* @ingroup opencc_cpp_api
*/
* Abstract segmentation
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT Segmentation {
public:
virtual SegmentsPtr Segment(const string& text) const = 0;
virtual SegmentsPtr Segment(const std::string& text) const = 0;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -18,25 +18,27 @@
#pragma once
#include <sstream>
#include "Common.hpp"
namespace opencc {
/**
* Segmented text
* @ingroup opencc_cpp_api
*/
* Segmented text
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT Segments {
public:
Segments() {}
Segments(std::initializer_list<const char*> initList) {
for (const string& item : initList) {
for (const std::string& item : initList) {
AddSegment(item);
}
}
Segments(std::initializer_list<string> initList) {
for (const string& item : initList) {
Segments(std::initializer_list<std::string> initList) {
for (const std::string& item : initList) {
AddSegment(item);
}
}
@ -46,7 +48,7 @@ public:
unmanaged.push_back(unmanagedString);
}
void AddSegment(const string& str) {
void AddSegment(const std::string& str) {
indexes.push_back(std::make_pair(managed.size(), true));
managed.push_back(str);
}
@ -91,7 +93,7 @@ public:
iterator end() const { return iterator(this, indexes.size()); }
string ToString() const {
std::string ToString() const {
// TODO implement a nested structure to reduce concatenation,
// like a purely functional differential list
std::ostringstream buffer;
@ -104,9 +106,9 @@ public:
private:
Segments(const Segments&) {}
vector<const char*> unmanaged;
vector<string> managed;
std::vector<const char*> unmanaged;
std::vector<std::string> managed;
// index, managed
vector<std::pair<size_t, bool>> indexes;
std::vector<std::pair<size_t, bool>> indexes;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -22,20 +22,20 @@
namespace opencc {
/**
* Serializable dictionary interface
* @ingroup opencc_cpp_api
*/
* Serializable dictionary interface
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT SerializableDict {
public:
/**
* Serializes the dictionary and writes in to a file.
*/
* Serializes the dictionary and writes in to a file.
*/
virtual void SerializeToFile(FILE* fp) const = 0;
/**
* Serializes the dictionary and writes in to a file.
*/
virtual void SerializeToFile(const string& fileName) const {
* Serializes the dictionary and writes in to a file.
*/
virtual void SerializeToFile(const std::string& fileName) const {
FILE* fp = fopen(fileName.c_str(), "wb");
if (fp == NULL) {
throw FileNotWritable(fileName);
@ -45,16 +45,16 @@ public:
}
template <typename DICT>
static bool TryLoadFromFile(const string& fileName,
static bool TryLoadFromFile(const std::string& fileName,
std::shared_ptr<DICT>* dict) {
FILE* fp =
FILE* fp =
#ifdef _MSC_VER
// well, the 'GetPlatformString' shall return a 'wstring'
_wfopen(UTF8Util::GetPlatformString(fileName).c_str(), L"rb")
// well, the 'GetPlatformString' shall return a 'wstring'
_wfopen(UTF8Util::GetPlatformString(fileName).c_str(), L"rb")
#else
fopen(UTF8Util::GetPlatformString(fileName).c_str(), "rb")
fopen(UTF8Util::GetPlatformString(fileName).c_str(), "rb")
#endif // _MSC_VER
;
;
if (fp == NULL) {
return false;
@ -66,7 +66,7 @@ public:
}
template <typename DICT>
static std::shared_ptr<DICT> NewFromFile(const string& fileName) {
static std::shared_ptr<DICT> NewFromFile(const std::string& fileName) {
std::shared_ptr<DICT> dict;
if (!TryLoadFromFile<DICT>(fileName, &dict)) {
throw FileNotFound(fileName);
@ -74,4 +74,4 @@ public:
return dict;
}
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2020 BYVoid <byvoid@byvoid.com>
* Copyright 2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -43,7 +43,8 @@ public:
private:
LexiconPtr lexicon;
void ConstructBuffer(string* valueBuffer, vector<uint16_t>* valueBytes,
void ConstructBuffer(std::string* valueBuffer,
std::vector<uint16_t>* valueBytes,
uint32_t* valueTotalLength) const;
};
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -16,67 +16,72 @@
* limitations under the License.
*/
#include "Export.hpp"
#include <string>
#ifndef __OPENCC_SIMPLECONVERTER_HPP_
#define __OPENCC_SIMPLECONVERTER_HPP_
/**
* @defgroup opencc_simple_api OpenCC C++ Simple API
*
* Simple API in C++ language
*/
* @defgroup opencc_simple_api OpenCC C++ Simple API
*
* Simple API in C++ language
*/
namespace opencc {
/**
* A high level converter
* This interface does not require C++11 to compile.
* @ingroup opencc_simple_api
*/
* A high level converter
* This interface does not require C++11 to compile.
* @ingroup opencc_simple_api
*/
class OPENCC_EXPORT SimpleConverter {
public:
/**
* Constructor of SimpleConverter
* @param configFileName File name of configuration.
*/
* Constructor of SimpleConverter
* @param configFileName File name of configuration.
*/
SimpleConverter(const std::string& configFileName);
~SimpleConverter();
/**
* Converts a text
* @param input Text to be converted.
*/
* Converts a text
* @param input Text to be converted.
*/
std::string Convert(const std::string& input) const;
/**
* Converts a text
* @param input A C-Style string (terminated by '\0') to be converted.
*/
* Converts a text
* @param input A C-Style std::string (terminated by '\0') to be converted.
*/
std::string Convert(const char* input) const;
/**
* Converts a text
* @param input A C-Style string limited by a given length to be converted.
* @param length Maximal length in byte of the input string.
*/
* Converts a text
* @param input A C-Style std::string limited by a given length to be
* converted.
* @param length Maximal length in byte of the input std::string.
*/
std::string Convert(const char* input, size_t length) const;
/**
* Converts a text and writes to an allocated buffer
* Please make sure the buffer has sufficent space.
* @param input A C-Style string (terminated by '\0') to be converted.
* @param output Buffer to write the converted text.
* @return Length of converted text.
*/
* Converts a text and writes to an allocated buffer
* Please make sure the buffer has sufficient space.
* @param input A C-Style std::string (terminated by '\0') to be converted.
* @param output Buffer to write the converted text.
* @return Length of converted text.
*/
size_t Convert(const char* input, char* output) const;
/**
* Converts a text and writes to an allocated buffer
* Please make sure the buffer has sufficent space.
* @param input A C-Style string limited by a given length to be converted.
* @param length Maximal length in byte of the input string.
* @param output Buffer to write the converted text.
* @return Length of converted text.
*/
* Converts a text and writes to an allocated buffer
* Please make sure the buffer has sufficient space.
* @param input A C-Style std::string limited by a given length to be
* converted.
* @param length Maximal length in byte of the input std::string.
* @param output Buffer to write the converted text.
* @return Length of converted text.
*/
size_t Convert(const char* input, size_t length, char* output) const;
private:

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2020 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -23,30 +23,30 @@
namespace opencc {
/**
* Text dictionary
* @ingroup opencc_cpp_api
*/
* Text dictionary
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT TextDict : public Dict, public SerializableDict {
public:
/**
* Constructor of TextDict.
* _lexicon must be sorted.
*/
* Constructor of TextDict.
* _lexicon must be sorted.
*/
TextDict(const LexiconPtr& _lexicon);
virtual ~TextDict();
virtual size_t KeyMaxLength() const;
virtual Optional<const DictEntry*> Match(const char* word) const;
virtual Optional<const DictEntry*> Match(const char* word, size_t len) const;
virtual LexiconPtr GetLexicon() const;
virtual void SerializeToFile(FILE* fp) const;
/**
* Constructs a TextDict from another dictionary.
*/
* Constructs a TextDict from another dictionary.
*/
static TextDictPtr NewFromDict(const Dict& dict);
static TextDictPtr NewFromFile(FILE* fp);
@ -57,4 +57,4 @@ private:
const size_t maxLength;
const LexiconPtr lexicon;
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2015 BYVoid <byvoid@byvoid.com>
* Copyright 2015 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -16,6 +16,8 @@
* limitations under the License.
*/
#include <cstring>
#include "Common.hpp"
#include "UTF8Util.hpp"
@ -105,13 +107,13 @@ public:
}
}
string ToString() const { return string(str, str + byteLength); }
std::string ToString() const { return std::string(str, str + byteLength); }
const char* CString() const { return str; }
LengthType CommonPrefixLength(const UTF8StringSliceBase& that) const {
if (str == that.str) {
return std::min(utf8Length, that.utf8Length);
return (std::min)(utf8Length, that.utf8Length);
} else {
const char* pstr1 = str;
const char* pstr2 = that.str;
@ -149,13 +151,13 @@ public:
int ReverseCompare(const UTF8StringSliceBase& that) const {
const char* pstr1 = str + byteLength;
const char* pstr2 = that.str + that.byteLength;
const size_t length = std::min(utf8Length, that.utf8Length);
const size_t length = (std::min)(utf8Length, that.utf8Length);
for (size_t i = 0; i < length; i++) {
const size_t charLen1 = UTF8Util::PrevCharLength(pstr1);
const size_t charLen2 = UTF8Util::PrevCharLength(pstr2);
pstr1 -= charLen1;
pstr2 -= charLen2;
const int cmp = strncmp(pstr1, pstr2, std::min(charLen1, charLen2));
const int cmp = strncmp(pstr1, pstr2, (std::min)(charLen1, charLen2));
if (cmp < 0) {
return -1;
} else if (cmp > 0) {
@ -207,7 +209,7 @@ public:
private:
inline int Compare(const UTF8StringSliceBase& that) const {
int cmp = strncmp(str, that.str, std::min(byteLength, that.byteLength));
int cmp = strncmp(str, that.str, (std::min)(byteLength, that.byteLength));
if (cmp == 0) {
if (utf8Length < that.utf8Length) {
cmp = -1;

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2013 BYVoid <byvoid@byvoid.com>
* Copyright 2013 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -24,24 +24,27 @@
#undef NOMINMAX
#endif // _MSC_VER
#include <cstring>
#include "Common.hpp"
#include "Exception.hpp"
namespace opencc {
/**
* UTF8 string utilities
* @ingroup opencc_cpp_api
*/
* UTF8 std::string utilities
* @ingroup opencc_cpp_api
*/
class OPENCC_EXPORT UTF8Util {
public:
/**
* Detect UTF8 BOM and skip it.
*/
* Detect UTF8 BOM and skip it.
*/
static void SkipUtf8Bom(FILE* fp);
/**
* Returns the length in byte for the next UTF8 character.
* On error returns 0.
*/
* Returns the length in byte for the next UTF8 character.
* On error returns 0.
*/
static size_t NextCharLengthNoException(const char* str) {
char ch = *str;
if ((ch & 0xF0) == 0xE0) {
@ -72,8 +75,8 @@ public:
}
/**
* Returns the length in byte for the previous UTF8 character.
*/
* Returns the length in byte for the previous UTF8 character.
*/
static size_t PrevCharLength(const char* str) {
{
const size_t length = NextCharLengthNoException(str - 3);
@ -103,21 +106,21 @@ public:
}
/**
* Returns the char* pointer over the next UTF8 character.
*/
* Returns the char* pointer over the next UTF8 character.
*/
static const char* NextChar(const char* str) {
return str + NextCharLength(str);
}
/**
* Move the char* pointer before the previous UTF8 character.
*/
* Move the char* pointer before the previous UTF8 character.
*/
static const char* PrevChar(const char* str) {
return str - PrevCharLength(str);
}
/**
* Returns the UTF8 length of a valid UTF8 string.
* Returns the UTF8 length of a valid UTF8 std::string.
*/
static size_t Length(const char* str) {
size_t length = 0;
@ -129,11 +132,11 @@ public:
}
/**
* Finds a character in the same line.
* @param str The text to be searched in.
* @param ch The character to find.
* @return The pointer that points to the found chacter in str or EOL/EOF.
*/
* Finds a character in the same line.
* @param str The text to be searched in.
* @param ch The character to find.
* @return The pointer that points to the found chacter in str or EOL/EOF.
*/
static const char* FindNextInline(const char* str, const char ch) {
while (!IsLineEndingOrFileEnding(*str) && *str != ch) {
str = NextChar(str);
@ -142,25 +145,26 @@ public:
}
/**
* Returns ture if the character is a line ending or end of file.
*/
* Returns true if the character is a line ending or end of file.
*/
static bool IsLineEndingOrFileEnding(const char ch) {
return ch == '\0' || ch == '\n' || ch == '\r';
}
/**
* Copies a substring with given length to a new std::string.
*/
static string FromSubstr(const char* str, size_t length) {
string newStr;
* Copies a substring with given length to a new string.
*/
static std::string FromSubstr(const char* str, size_t length) {
std::string newStr;
newStr.resize(length);
strncpy(const_cast<char*>(newStr.c_str()), str, length);
return newStr;
}
/**
* Returns true if the given string is longer or as long as the given length.
*/
* Returns true if the given std::string is longer or as long as the given
* length.
*/
static bool NotShorterThan(const char* str, size_t byteLength) {
while (byteLength > 0) {
if (*str == '\0') {
@ -173,11 +177,11 @@ public:
}
/**
* Truncates a string with a maximal length in byte.
* No UTF8 character will be broken.
*/
static string TruncateUTF8(const char* str, size_t maxByteLength) {
string wordTrunc;
* Truncates a std::string with a maximal length in byte.
* No UTF8 character will be broken.
*/
static std::string TruncateUTF8(const char* str, size_t maxByteLength) {
std::string wordTrunc;
if (NotShorterThan(str, maxByteLength)) {
size_t len = 0;
const char* pStr = str;
@ -197,22 +201,23 @@ public:
}
/**
* Replaces all patterns in a string in place.
*/
static void ReplaceAll(string& str, const char* from, const char* to) {
string::size_type pos = 0;
string::size_type fromLen = strlen(from);
string::size_type toLen = strlen(to);
while ((pos = str.find(from, pos)) != string::npos) {
* Replaces all patterns in a std::string in place.
*/
static void ReplaceAll(std::string& str, const char* from, const char* to) {
std::string::size_type pos = 0;
std::string::size_type fromLen = strlen(from);
std::string::size_type toLen = strlen(to);
while ((pos = str.find(from, pos)) != std::string::npos) {
str.replace(pos, fromLen, to);
pos += toLen;
}
}
/**
* Joins a string vector in to a string with a separator.
*/
static string Join(const vector<string>& strings, const string& separator) {
* Joins a std::string vector in to a std::string with a separator.
*/
static std::string Join(const std::vector<std::string>& strings,
const std::string& separator) {
std::ostringstream buffer;
bool first = true;
for (const auto& str : strings) {
@ -226,9 +231,9 @@ public:
}
/**
* Joins a string vector in to a string.
*/
static string Join(const vector<string>& strings) {
* Joins a std::string vector in to a std::string.
*/
static std::string Join(const std::vector<std::string>& strings) {
std::ostringstream buffer;
for (const auto& str : strings) {
buffer << str;
@ -237,7 +242,7 @@ public:
}
static void GetByteMap(const char* str, const size_t utf8Length,
vector<size_t>* byteMap) {
std::vector<size_t>* byteMap) {
if (byteMap->size() < utf8Length) {
byteMap->resize(utf8Length);
}
@ -253,20 +258,19 @@ public:
return U8ToU16(str);
}
#else
static std::string GetPlatformString(const std::string& str) {
return str;
}
static std::string GetPlatformString(const std::string& str) { return str; }
#endif // _MSC_VER
#ifdef _MSC_VER
static std::string U16ToU8(const std::wstring& wstr) {
std::string ret;
int length = static_cast<int>(wstr.length());
int convcnt = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, NULL, 0, NULL, NULL);
int convcnt = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, NULL, 0,
NULL, NULL);
if (convcnt > 0) {
ret.resize(convcnt);
WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, &ret[0], convcnt, NULL, NULL);
WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), length, &ret[0], convcnt,
NULL, NULL);
}
return ret;
}
@ -283,4 +287,4 @@ public:
}
#endif // _MSC_VER
};
}
} // namespace opencc

View file

@ -1,7 +1,7 @@
/*
* Open Chinese Convert
*
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
* Copyright 2010-2014 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -21,9 +21,9 @@
#ifdef __cplusplus
#include <string>
#include "Export.hpp"
#include "SimpleConverter.hpp"
#include <string>
extern "C" {
#else
@ -35,118 +35,117 @@ extern "C" {
#endif
/**
* @defgroup opencc_c_api OpenCC C API
*
* API in C language
*/
* @defgroup opencc_c_api OpenCC C API
*
* API in C language
*/
/**
* Filename of default Simplified to Traditional configuration
*
* @ingroup opencc_c_api
*/
* Filename of default Simplified to Traditional configuration
*
* @ingroup opencc_c_api
*/
#define OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD "s2t.json"
/**
* Filename of default Traditional to Simplified configuration
*
* @ingroup opencc_c_api
*/
* Filename of default Traditional to Simplified configuration
*
* @ingroup opencc_c_api
*/
#define OPENCC_DEFAULT_CONFIG_TRAD_TO_SIMP "t2s.json"
/**
* Type of opencc descriptor
*
* @ingroup opencc_c_api
*/
* Type of opencc descriptor
*
* @ingroup opencc_c_api
*/
typedef void* opencc_t;
/**
* Makes an instance of opencc
*
* @param configFileName Location of configuration file. If this is set to NULL,
* OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD will be loaded.
* @return A description pointer of the newly allocated instance of
* opencc. On error the return value will be (opencc_t) -1.
* @ingroup opencc_c_api
*/
* Makes an instance of opencc
*
* @param configFileName Location of configuration file. If this is set to NULL,
* OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD will be loaded.
* @return A description pointer of the newly allocated instance of
* opencc. On error the return value will be (opencc_t) -1.
* @ingroup opencc_c_api
*/
OPENCC_EXPORT opencc_t opencc_open(const char* configFileName);
#ifdef _MSC_VER
/**
* Makes an instance of opencc (wide char / Unicode)
*
* @param configFileName Location of configuration file. If this is set to NULL,
* OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD will be loaded.
* @return A description pointer of the newly allocated instance of
* opencc. On error the return value will be (opencc_t) -1.
* @ingroup opencc_c_api
*/
* Makes an instance of opencc (wide char / Unicode)
*
* @param configFileName Location of configuration file. If this is set to NULL,
* OPENCC_DEFAULT_CONFIG_SIMP_TO_TRAD will be loaded.
* @return A description pointer of the newly allocated instance of
* opencc. On error the return value will be (opencc_t) -1.
* @ingroup opencc_c_api
*/
OPENCC_EXPORT opencc_t opencc_open_w(const wchar_t* configFileName);
#endif /* _MSC_VER */
/**
* Destroys an instance of opencc
*
* @param opencc The description pointer.
* @return 0 on success or non-zero number on failure.
* @ingroup opencc_c_api
*/
* Destroys an instance of opencc
*
* @param opencc The description pointer.
* @return 0 on success or non-zero number on failure.
* @ingroup opencc_c_api
*/
OPENCC_EXPORT int opencc_close(opencc_t opencc);
/**
* Converts UTF-8 string
*
* @param opencc The opencc description pointer.
* @param input The UTF-8 encoded string.
* @param length The maximum length in byte to convert. If length is (size_t)-1,
* the whole string (terminated by '\0') will be converted.
* @param output The buffer to store converted text. You MUST make sure this
* buffer has sufficient space.
*
* @return The length of converted string or (size_t)-1 on error.
*
* @ingroup opencc_c_api
*/
* Converts UTF-8 std::string
*
* @param opencc The opencc description pointer.
* @param input The UTF-8 encoded std::string.
* @param length The maximum length in byte to convert. If length is (size_t)-1,
* the whole std::string (terminated by '\0') will be converted.
* @param output The buffer to store converted text. You MUST make sure this
* buffer has sufficient space.
*
* @return The length of converted std::string or (size_t)-1 on error.
*
* @ingroup opencc_c_api
*/
OPENCC_EXPORT size_t opencc_convert_utf8_to_buffer(opencc_t opencc,
const char* input,
size_t length,
char* output);
size_t length, char* output);
/**
* Converts UTF-8 string
* This function returns an allocated C-Style string, which stores
* the converted string.
* You MUST call opencc_convert_utf8_free() to release allocated memory.
*
* @param opencc The opencc description pointer.
* @param input The UTF-8 encoded string.
* @param length The maximum length in byte to convert. If length is (size_t)-1,
* the whole string (terminated by '\0') will be converted.
*
* @return The newly allocated UTF-8 string that stores text converted,
* or NULL on error.
* @ingroup opencc_c_api
*/
OPENCC_EXPORT char* opencc_convert_utf8(opencc_t opencc,
const char* input,
* Converts UTF-8 std::string
* This function returns an allocated C-Style std::string, which stores
* the converted std::string.
* You MUST call opencc_convert_utf8_free() to release allocated memory.
*
* @param opencc The opencc description pointer.
* @param input The UTF-8 encoded std::string.
* @param length The maximum length in byte to convert. If length is (size_t)-1,
* the whole std::string (terminated by '\0') will be converted.
*
* @return The newly allocated UTF-8 std::string that stores text
* converted, or NULL on error.
* @ingroup opencc_c_api
*/
OPENCC_EXPORT char* opencc_convert_utf8(opencc_t opencc, const char* input,
size_t length);
/**
* Releases allocated buffer by opencc_convert_utf8
*
* @param str Pointer to the allocated string buffer by opencc_convert_utf8.
*
* @ingroup opencc_c_api
*/
* Releases allocated buffer by opencc_convert_utf8
*
* @param str Pointer to the allocated std::string buffer by
* opencc_convert_utf8.
*
* @ingroup opencc_c_api
*/
OPENCC_EXPORT void opencc_convert_utf8_free(char* str);
/**
* Returns the last error message
*
* Note that this function is the only one which is NOT thread-safe.
*
* @ingroup opencc_c_api
*/
* Returns the last error message
*
* Note that this function is the only one which is NOT thread-safe.
*
* @ingroup opencc_c_api
*/
OPENCC_EXPORT const char* opencc_error(void);
#ifdef __cplusplus
@ -154,9 +153,9 @@ OPENCC_EXPORT const char* opencc_error(void);
#endif
/**
* @defgroup opencc_cpp_api OpenCC C++ Comprehensive API
*
* Comprehensive API in C++ language
*/
* @defgroup opencc_cpp_api OpenCC C++ Comprehensive API
*
* Comprehensive API in C++ language
*/
#endif

View file

@ -0,0 +1,21 @@
/*
* Open Chinese Convert
*
* Copyright 2021 Carbo Kuo <byvoid@byvoid.com>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#define OPENCC_ENABLE_DARTS

BIN
winlibs/lib/dbg/opencc.dll Normal file

Binary file not shown.

BIN
winlibs/lib/dbg/opencc.lib Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
winlibs/lib/opencc.dll Normal file

Binary file not shown.

BIN
winlibs/lib/opencc.lib Normal file

Binary file not shown.