mirror of
https://github.com/xiaoyifang/goldendict-ng.git
synced 2024-11-23 16:04:06 +00:00
winlib: update opencc to 2020-04-26
This commit is contained in:
parent
d788ab9338
commit
b7c2169695
|
@ -552,8 +552,7 @@ CONFIG( chinese_conversion_support ) {
|
|||
SOURCES += chinese.cc \
|
||||
chineseconversion.cc
|
||||
win32-msvc* {
|
||||
Debug: LIBS += -lopenccd
|
||||
Release: LIBS += -lopencc
|
||||
LIBS += -lopencc
|
||||
} else {
|
||||
mac {
|
||||
LIBS += -lopencc.2
|
||||
|
|
Binary file not shown.
BIN
opencc/HKVariants.ocd2
Normal file
BIN
opencc/HKVariants.ocd2
Normal file
Binary file not shown.
Binary file not shown.
BIN
opencc/HKVariantsPhrases.ocd2
Normal file
BIN
opencc/HKVariantsPhrases.ocd2
Normal file
Binary file not shown.
BIN
opencc/HKVariantsRev.ocd2
Normal file
BIN
opencc/HKVariantsRev.ocd2
Normal file
Binary file not shown.
BIN
opencc/HKVariantsRevPhrases.ocd2
Normal file
BIN
opencc/HKVariantsRevPhrases.ocd2
Normal file
Binary file not shown.
BIN
opencc/JPShinjitaiCharacters.ocd2
Normal file
BIN
opencc/JPShinjitaiCharacters.ocd2
Normal file
Binary file not shown.
BIN
opencc/JPShinjitaiPhrases.ocd2
Normal file
BIN
opencc/JPShinjitaiPhrases.ocd2
Normal file
Binary file not shown.
BIN
opencc/JPVariants.ocd2
Normal file
BIN
opencc/JPVariants.ocd2
Normal file
Binary file not shown.
BIN
opencc/JPVariantsRev.ocd2
Normal file
BIN
opencc/JPVariantsRev.ocd2
Normal file
Binary file not shown.
Binary file not shown.
BIN
opencc/STCharacters.ocd2
Normal file
BIN
opencc/STCharacters.ocd2
Normal file
Binary file not shown.
Binary file not shown.
BIN
opencc/STPhrases.ocd2
Normal file
BIN
opencc/STPhrases.ocd2
Normal file
Binary file not shown.
Binary file not shown.
BIN
opencc/TSCharacters.ocd2
Normal file
BIN
opencc/TSCharacters.ocd2
Normal file
Binary file not shown.
Binary file not shown.
BIN
opencc/TSPhrases.ocd2
Normal file
BIN
opencc/TSPhrases.ocd2
Normal file
Binary file not shown.
BIN
opencc/TWPhrases.ocd2
Normal file
BIN
opencc/TWPhrases.ocd2
Normal file
Binary file not shown.
BIN
opencc/TWPhrasesRev.ocd2
Normal file
BIN
opencc/TWPhrasesRev.ocd2
Normal file
Binary file not shown.
Binary file not shown.
BIN
opencc/TWVariants.ocd2
Normal file
BIN
opencc/TWVariants.ocd2
Normal file
Binary file not shown.
BIN
opencc/TWVariantsRev.ocd2
Normal file
BIN
opencc/TWVariantsRev.ocd2
Normal file
Binary file not shown.
BIN
opencc/TWVariantsRevPhrases.ocd2
Normal file
BIN
opencc/TWVariantsRevPhrases.ocd2
Normal file
Binary file not shown.
56
opencc/copyright
Normal file
56
opencc/copyright
Normal file
|
@ -0,0 +1,56 @@
|
|||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
|
||||
|
||||
1. You must give any other recipients of the Work or Derivative Works a copy of this License; and
|
||||
|
||||
2. You must cause any modified files to carry prominent notices stating that You changed the files; and
|
||||
|
||||
3. You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
|
||||
|
||||
4. If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
33
opencc/hk2s.json
Normal file
33
opencc/hk2s.json
Normal file
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"name": "Traditional Chinese (Hong Kong standard) to Simplified Chinese",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "TSPhrases.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd2",
|
||||
"file": "HKVariantsRevPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "HKVariantsRev.ocd2"
|
||||
}]
|
||||
}
|
||||
}, {
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd2",
|
||||
"file": "TSPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "TSCharacters.ocd2"
|
||||
}]
|
||||
}
|
||||
}]
|
||||
}
|
25
opencc/jp2t.json
Normal file
25
opencc/jp2t.json
Normal file
|
@ -0,0 +1,25 @@
|
|||
{
|
||||
"name": "New Japanese Kanji (Shinjitai) to Traditional Chinese Characters (Kyūjitai)",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "JPShinjitaiPhrases.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd2",
|
||||
"file": "JPShinjitaiPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "JPShinjitaiCharacters.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "JPVariantsRev.ocd2"
|
||||
}]
|
||||
}
|
||||
}]
|
||||
}
|
|
@ -3,30 +3,30 @@
|
|||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd",
|
||||
"file": "STPhrases.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "STPhrases.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd",
|
||||
"file": "STPhrases.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "STPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd",
|
||||
"file": "STCharacters.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "STCharacters.ocd2"
|
||||
}]
|
||||
}
|
||||
}, {
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd",
|
||||
"file": "HKVariantsPhrases.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "HKVariantsPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd",
|
||||
"file": "HKVariants.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "HKVariants.ocd2"
|
||||
}]
|
||||
}
|
||||
}]
|
||||
|
|
22
opencc/s2t.json
Normal file
22
opencc/s2t.json
Normal file
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"name": "Simplified Chinese to Traditional Chinese",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "STPhrases.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd2",
|
||||
"file": "STPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "STCharacters.ocd2"
|
||||
}]
|
||||
}
|
||||
}]
|
||||
}
|
|
@ -3,25 +3,25 @@
|
|||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd",
|
||||
"file": "STPhrases.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "STPhrases.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd",
|
||||
"file": "STPhrases.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "STPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd",
|
||||
"file": "STCharacters.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "STCharacters.ocd2"
|
||||
}]
|
||||
}
|
||||
}, {
|
||||
"dict": {
|
||||
"type": "ocd",
|
||||
"file": "TWVariants.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "TWVariants.ocd2"
|
||||
}
|
||||
}]
|
||||
}
|
||||
|
|
32
opencc/s2twp.json
Normal file
32
opencc/s2twp.json
Normal file
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
"name": "Simplified Chinese to Traditional Chinese (Taiwan standard, with phrases)",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "STPhrases.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd2",
|
||||
"file": "STPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "STCharacters.ocd2"
|
||||
}]
|
||||
}
|
||||
}, {
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "TWPhrases.ocd2"
|
||||
}
|
||||
}, {
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "TWVariants.ocd2"
|
||||
}
|
||||
}]
|
||||
}
|
22
opencc/t2hk.json
Normal file
22
opencc/t2hk.json
Normal file
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"name": "Traditional Chinese to Traditional Chinese (Hong Kong standard)",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "HKVariantsPhrases.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd2",
|
||||
"file": "HKVariantsPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "HKVariants.ocd2"
|
||||
}]
|
||||
}
|
||||
}]
|
||||
}
|
16
opencc/t2jp.json
Normal file
16
opencc/t2jp.json
Normal file
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"name": "Traditional Chinese Characters (Kyūjitai) to New Japanese Kanji (Shinjitai)",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "JPVariants.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "JPVariants.ocd2"
|
||||
}
|
||||
}]
|
||||
}
|
|
@ -3,19 +3,19 @@
|
|||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd",
|
||||
"file": "TSPhrases.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "TSPhrases.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd",
|
||||
"file": "TSPhrases.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "TSPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd",
|
||||
"file": "TSCharacters.ocd"
|
||||
"type": "ocd2",
|
||||
"file": "TSCharacters.ocd2"
|
||||
}]
|
||||
}
|
||||
}]
|
||||
|
|
16
opencc/t2tw.json
Normal file
16
opencc/t2tw.json
Normal file
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"name": "Traditional Chinese to Traditional Chinese (Taiwan standard)",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "TWVariants.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "TWVariants.ocd2"
|
||||
}
|
||||
}]
|
||||
}
|
33
opencc/tw2s.json
Normal file
33
opencc/tw2s.json
Normal file
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"name": "Traditional Chinese (Taiwan standard) to Simplified Chinese",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "TSPhrases.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd2",
|
||||
"file": "TWVariantsRevPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "TWVariantsRev.ocd2"
|
||||
}]
|
||||
}
|
||||
}, {
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd2",
|
||||
"file": "TSPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "TSCharacters.ocd2"
|
||||
}]
|
||||
}
|
||||
}]
|
||||
}
|
36
opencc/tw2sp.json
Normal file
36
opencc/tw2sp.json
Normal file
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"name": "Traditional Chinese (Taiwan standard) to Simplified Chinese (with phrases)",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd2",
|
||||
"file": "TSPhrases.ocd2"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd2",
|
||||
"file": "TWPhrasesRev.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "TWVariantsRevPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "TWVariantsRev.ocd2"
|
||||
}]
|
||||
}
|
||||
}, {
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd2",
|
||||
"file": "TSPhrases.ocd2"
|
||||
}, {
|
||||
"type": "ocd2",
|
||||
"file": "TSCharacters.ocd2"
|
||||
}]
|
||||
}
|
||||
}]
|
||||
}
|
18
opencc/vcpkg_abi_info.txt
Normal file
18
opencc/vcpkg_abi_info.txt
Normal file
|
@ -0,0 +1,18 @@
|
|||
cmake 3.21.1
|
||||
features core
|
||||
portfile.cmake 9863613dceb7a5268bbb2d767d9098c35400b8a881a0815bfe7dbe36e793868f
|
||||
ports.cmake e01bcbe535b11a2fbda56cffd590218ddeb18cb899c00afb2ac22a2301df2b09
|
||||
post_build_checks 2
|
||||
powershell 7.2.1
|
||||
triplet x64-windows-rel
|
||||
triplet_abi 4970dad5b87bdaa6176cb4f981bb32f09d8957ff9babdaf321bdb98198621b56-b569c9954a47274946415ff01b1a344c8549f3fc19cb931d50bc09d1e5630c1d-cc5606e89ff68c454d07a2425977151c05eb9158
|
||||
vcpkg.json d7125dc7d159ce1190e81cbc30003b28a55e53e7d88a455903040003606a0976
|
||||
vcpkg_add_to_path 5d7b62ed9fa23a49d3d842206e95e4211ab25b41321e30b6ddd9208febed9d18
|
||||
vcpkg_configure_cmake 0b91a87ccb0659953d3be8d9b5b82dc9bf680b7b84b50eba8f8790c6d27b03d3
|
||||
vcpkg_copy_pdbs 59e4c0d2321697848a899ba9537394cdee81725b11965c6d1341df53a6850380
|
||||
vcpkg_copy_tool_dependencies 4655cdf0d283d2e6bce5e2aee3745ab0bfd7e4fb048c532274de53ef56389ea9
|
||||
vcpkg_find_acquire_program a1240f46d8e1cb7eaacb8ac882ce597e5370be9b387d8a048d178d3b842b3520
|
||||
vcpkg_fixup_pkgconfig ff54f8b06c83b54a1af5e35286a6c36ea9e69376bed1d11b5521c254987c123f
|
||||
vcpkg_from_git 05d446731ca8f6cefe0e4dc04c17776e54b39d4f2bfeeec7952ced1cfe8bf89f
|
||||
vcpkg_from_github 1929b9ee1417dbf59f8a25ac321ef9ca792b6d67aee38bda69ee3700ea256b73
|
||||
vcpkg_install_cmake 6430f4795e65c4c44c545c590d431fe1a68d7444255d0da58362a267bbf6408d
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,33 +0,0 @@
|
|||
{
|
||||
"name": "Simplified Chinese to Traditional Chinese (Hong Kong standard)",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd",
|
||||
"file": "STPhrases.ocd"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd",
|
||||
"file": "STPhrases.ocd"
|
||||
}, {
|
||||
"type": "ocd",
|
||||
"file": "STCharacters.ocd"
|
||||
}]
|
||||
}
|
||||
}, {
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd",
|
||||
"file": "HKVariantsPhrases.ocd"
|
||||
}, {
|
||||
"type": "ocd",
|
||||
"file": "HKVariants.ocd"
|
||||
}]
|
||||
}
|
||||
}]
|
||||
}
|
|
@ -1,27 +0,0 @@
|
|||
{
|
||||
"name": "Simplified Chinese to Traditional Chinese (Taiwan standard)",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd",
|
||||
"file": "STPhrases.ocd"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd",
|
||||
"file": "STPhrases.ocd"
|
||||
}, {
|
||||
"type": "ocd",
|
||||
"file": "STCharacters.ocd"
|
||||
}]
|
||||
}
|
||||
}, {
|
||||
"dict": {
|
||||
"type": "ocd",
|
||||
"file": "TWVariants.ocd"
|
||||
}
|
||||
}]
|
||||
}
|
|
@ -1,22 +0,0 @@
|
|||
{
|
||||
"name": "Traditional Chinese to Simplified Chinese",
|
||||
"segmentation": {
|
||||
"type": "mmseg",
|
||||
"dict": {
|
||||
"type": "ocd",
|
||||
"file": "TSPhrases.ocd"
|
||||
}
|
||||
},
|
||||
"conversion_chain": [{
|
||||
"dict": {
|
||||
"type": "group",
|
||||
"dicts": [{
|
||||
"type": "ocd",
|
||||
"file": "TSPhrases.ocd"
|
||||
}, {
|
||||
"type": "ocd",
|
||||
"file": "TSCharacters.ocd"
|
||||
}]
|
||||
}
|
||||
}]
|
||||
}
|
|
@ -50,16 +50,15 @@ using std::vector;
|
|||
|
||||
// Forward decalarations and alias
|
||||
namespace opencc {
|
||||
class BinaryDict;
|
||||
class Config;
|
||||
class Conversion;
|
||||
class ConversionChain;
|
||||
class Converter;
|
||||
class DartsDict;
|
||||
class Dict;
|
||||
class DictEntry;
|
||||
class DictGroup;
|
||||
class Lexicon;
|
||||
class MarisaDict;
|
||||
class MultiValueDictEntry;
|
||||
class NoValueDictEntry;
|
||||
class Segmentation;
|
||||
|
@ -67,26 +66,36 @@ class Segments;
|
|||
class SerializableDict;
|
||||
class SingleValueDictEntry;
|
||||
class TextDict;
|
||||
typedef std::shared_ptr<BinaryDict> BinaryDictPtr;
|
||||
typedef std::shared_ptr<Conversion> ConversionPtr;
|
||||
typedef std::shared_ptr<ConversionChain> ConversionChainPtr;
|
||||
typedef std::shared_ptr<Converter> ConverterPtr;
|
||||
typedef std::shared_ptr<DartsDict> DartsDictPtr;
|
||||
typedef std::shared_ptr<Dict> DictPtr;
|
||||
typedef std::shared_ptr<DictGroup> DictGroupPtr;
|
||||
typedef std::shared_ptr<Lexicon> LexiconPtr;
|
||||
typedef std::shared_ptr<MarisaDict> MarisaDictPtr;
|
||||
typedef std::shared_ptr<Segmentation> SegmentationPtr;
|
||||
typedef std::shared_ptr<Segments> SegmentsPtr;
|
||||
typedef std::shared_ptr<SerializableDict> SerializableDictPtr;
|
||||
typedef std::shared_ptr<TextDict> TextDictPtr;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_DARTS
|
||||
class BinaryDict;
|
||||
class DartsDict;
|
||||
typedef std::shared_ptr<BinaryDict> BinaryDictPtr;
|
||||
typedef std::shared_ptr<DartsDict> DartsDictPtr;
|
||||
#endif
|
||||
|
||||
} // namespace opencc
|
||||
|
||||
#ifndef PKGDATADIR
|
||||
const string PACKAGE_DATA_DIRECTORY = "";
|
||||
#else // ifndef PKGDATADIR
|
||||
#else // ifndef PKGDATADIR
|
||||
const string PACKAGE_DATA_DIRECTORY = PKGDATADIR "/";
|
||||
#endif // ifndef PKGDATADIR
|
||||
|
||||
#ifndef VERSION
|
||||
#define VERSION "1.0.*"
|
||||
#endif // ifndef VERSION
|
||||
|
||||
// The following definitions are provided by CMake
|
||||
// #define ENABLE_DARTS
|
||||
|
|
30
winlibs/include/opencc/DictConverter.hpp
Normal file
30
winlibs/include/opencc/DictConverter.hpp
Normal file
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* Open Chinese Convert
|
||||
*
|
||||
* Copyright 2010-2017 BYVoid <byvoid@byvoid.com>
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common.hpp"
|
||||
|
||||
namespace opencc {
|
||||
/**
|
||||
* Converts a dictionary from a format to another.
|
||||
* @ingroup opencc_cpp_api
|
||||
*/
|
||||
OPENCC_EXPORT void ConvertDictionary(const string inputFileName, const string outputFileName,
|
||||
const string formatFrom, const string formatTo);
|
||||
}
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* Open Chinese Convert
|
||||
*
|
||||
* Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
|
||||
* Copyright 2010-2020 BYVoid <byvoid@byvoid.com>
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -19,39 +19,36 @@
|
|||
#pragma once
|
||||
|
||||
#include "Common.hpp"
|
||||
#include "UTF8Util.hpp"
|
||||
#include "Segments.hpp"
|
||||
#include "UTF8Util.hpp"
|
||||
|
||||
namespace opencc {
|
||||
/**
|
||||
* Key-values pair entry
|
||||
* @ingroup opencc_cpp_api
|
||||
*/
|
||||
* Key-values pair entry
|
||||
* @ingroup opencc_cpp_api
|
||||
*/
|
||||
class OPENCC_EXPORT DictEntry {
|
||||
public:
|
||||
virtual ~DictEntry() {}
|
||||
|
||||
virtual const char* Key() const = 0;
|
||||
virtual std::string Key() const = 0;
|
||||
|
||||
virtual vector<const char*> Values() const = 0;
|
||||
virtual vector<std::string> Values() const = 0;
|
||||
|
||||
virtual const char* GetDefault() const = 0;
|
||||
virtual std::string GetDefault() const = 0;
|
||||
|
||||
virtual size_t NumValues() const = 0;
|
||||
|
||||
virtual string ToString() const = 0;
|
||||
|
||||
size_t KeyLength() const { return strlen(Key()); }
|
||||
size_t KeyLength() const { return Key().length(); }
|
||||
|
||||
bool operator<(const DictEntry& that) const {
|
||||
return strcmp(Key(), that.Key()) < 0;
|
||||
}
|
||||
bool operator<(const DictEntry& that) const { return Key() < that.Key(); }
|
||||
|
||||
bool operator==(const DictEntry& that) const {
|
||||
return strcmp(Key(), that.Key()) == 0;
|
||||
}
|
||||
bool operator==(const DictEntry& that) const { return Key() == that.Key(); }
|
||||
|
||||
static bool PtrLessThan(const DictEntry* a, const DictEntry* b) {
|
||||
static bool UPtrLessThan(const std::unique_ptr<DictEntry>& a,
|
||||
const std::unique_ptr<DictEntry>& b) {
|
||||
return *a < *b;
|
||||
}
|
||||
};
|
||||
|
@ -62,11 +59,11 @@ public:
|
|||
|
||||
virtual ~NoValueDictEntry() {}
|
||||
|
||||
virtual const char* Key() const { return key.c_str(); }
|
||||
virtual std::string Key() const { return key; }
|
||||
|
||||
virtual vector<const char*> Values() const { return vector<const char*>(); }
|
||||
virtual vector<std::string> Values() const { return vector<std::string>(); }
|
||||
|
||||
virtual const char* GetDefault() const { return Key(); }
|
||||
virtual std::string GetDefault() const { return key; }
|
||||
|
||||
virtual size_t NumValues() const { return 0; }
|
||||
|
||||
|
@ -78,13 +75,13 @@ private:
|
|||
|
||||
class OPENCC_EXPORT SingleValueDictEntry : public DictEntry {
|
||||
public:
|
||||
virtual const char* Value() const = 0;
|
||||
virtual std::string Value() const = 0;
|
||||
|
||||
virtual vector<const char*> Values() const {
|
||||
return vector<const char*>{Value()};
|
||||
virtual vector<std::string> Values() const {
|
||||
return vector<std::string>{Value()};
|
||||
}
|
||||
|
||||
virtual const char* GetDefault() const { return Value(); }
|
||||
virtual std::string GetDefault() const { return Value(); }
|
||||
|
||||
virtual size_t NumValues() const { return 1; }
|
||||
|
||||
|
@ -98,9 +95,9 @@ public:
|
|||
|
||||
virtual ~StrSingleValueDictEntry() {}
|
||||
|
||||
virtual const char* Key() const { return key.c_str(); }
|
||||
virtual std::string Key() const { return key; }
|
||||
|
||||
virtual const char* Value() const { return value.c_str(); }
|
||||
virtual std::string Value() const { return value; }
|
||||
|
||||
private:
|
||||
string key;
|
||||
|
@ -109,7 +106,7 @@ private:
|
|||
|
||||
class OPENCC_EXPORT MultiValueDictEntry : public DictEntry {
|
||||
public:
|
||||
virtual const char* GetDefault() const {
|
||||
virtual std::string GetDefault() const {
|
||||
if (NumValues() > 0) {
|
||||
return Values().at(0);
|
||||
} else {
|
||||
|
@ -122,54 +119,22 @@ public:
|
|||
|
||||
class OPENCC_EXPORT StrMultiValueDictEntry : public MultiValueDictEntry {
|
||||
public:
|
||||
StrMultiValueDictEntry(const string& _key, const vector<string>& _values)
|
||||
StrMultiValueDictEntry(const string& _key, const vector<std::string>& _values)
|
||||
: key(_key), values(_values) {}
|
||||
|
||||
StrMultiValueDictEntry(const string& _key, const vector<const char*>& _values)
|
||||
: key(_key) {
|
||||
values.reserve(_values.size());
|
||||
for (const char* str : _values) {
|
||||
values.push_back(str);
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~StrMultiValueDictEntry() {}
|
||||
|
||||
virtual const char* Key() const { return key.c_str(); }
|
||||
virtual std::string Key() const { return key; }
|
||||
|
||||
size_t NumValues() const { return values.size(); }
|
||||
|
||||
vector<const char*> Values() const {
|
||||
vector<const char*> retsult;
|
||||
for (const string& value : this->values) {
|
||||
retsult.push_back(value.c_str());
|
||||
}
|
||||
return retsult;
|
||||
}
|
||||
vector<std::string> Values() const { return values; }
|
||||
|
||||
private:
|
||||
string key;
|
||||
vector<string> values;
|
||||
};
|
||||
|
||||
class OPENCC_EXPORT PtrDictEntry : public MultiValueDictEntry {
|
||||
public:
|
||||
PtrDictEntry(const char* _key, const vector<const char*>& _values)
|
||||
: key(_key), values(_values) {}
|
||||
|
||||
virtual ~PtrDictEntry() {}
|
||||
|
||||
virtual const char* Key() const { return key; }
|
||||
|
||||
size_t NumValues() const { return values.size(); }
|
||||
|
||||
vector<const char*> Values() const { return values; }
|
||||
|
||||
private:
|
||||
const char* key;
|
||||
vector<const char*> values;
|
||||
};
|
||||
|
||||
class OPENCC_EXPORT DictEntryFactory {
|
||||
public:
|
||||
static DictEntry* New(const string& key) { return new NoValueDictEntry(key); }
|
||||
|
@ -179,6 +144,11 @@ public:
|
|||
}
|
||||
|
||||
static DictEntry* New(const string& key, const vector<string>& values) {
|
||||
if (values.size() == 0) {
|
||||
return New(key);
|
||||
} else if (values.size() == 1) {
|
||||
return New(key, values.front());
|
||||
}
|
||||
return new StrMultiValueDictEntry(key, values);
|
||||
}
|
||||
|
||||
|
@ -194,4 +164,4 @@ public:
|
|||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
} // namespace opencc
|
||||
|
|
|
@ -24,9 +24,8 @@
|
|||
|
||||
#include "Export.hpp"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
// Until Visual Studio 2013 (12.0), C++ 11 "noexcept" qualifier is not supported
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1900
|
||||
// Before Visual Studio 2015 (14.0), C++ 11 "noexcept" qualifier is not supported
|
||||
#define noexcept
|
||||
#endif // ifdef _MSC_VER
|
||||
|
||||
|
|
|
@ -23,34 +23,46 @@
|
|||
|
||||
namespace opencc {
|
||||
/**
|
||||
* Storage of all entries
|
||||
* @ingroup opencc_cpp_api
|
||||
*/
|
||||
* Storage of all entries
|
||||
* @ingroup opencc_cpp_api
|
||||
*/
|
||||
class OPENCC_EXPORT Lexicon {
|
||||
public:
|
||||
Lexicon() {}
|
||||
Lexicon(vector<std::unique_ptr<DictEntry>> entries_)
|
||||
: entries(std::move(entries_)) {}
|
||||
Lexicon(const Lexicon&) = delete;
|
||||
Lexicon& operator=(const Lexicon&) = delete;
|
||||
|
||||
~Lexicon() {
|
||||
for (DictEntry* entry : entries) {
|
||||
delete entry;
|
||||
}
|
||||
// Lexicon will take the ownership of the entry.
|
||||
void Add(DictEntry* entry) { entries.emplace_back(entry); }
|
||||
|
||||
void Add(std::unique_ptr<DictEntry> entry) {
|
||||
entries.push_back(std::move(entry));
|
||||
}
|
||||
|
||||
void Add(DictEntry* entry) { entries.push_back(entry); }
|
||||
|
||||
void Sort() {
|
||||
std::sort(entries.begin(), entries.end(), DictEntry::PtrLessThan);
|
||||
std::sort(entries.begin(), entries.end(), DictEntry::UPtrLessThan);
|
||||
}
|
||||
|
||||
const DictEntry* At(size_t index) const { return entries.at(index); }
|
||||
bool IsSorted() {
|
||||
return std::is_sorted(entries.begin(), entries.end(),
|
||||
DictEntry::UPtrLessThan);
|
||||
}
|
||||
|
||||
const DictEntry* At(size_t index) const { return entries.at(index).get(); }
|
||||
|
||||
size_t Length() const { return entries.size(); }
|
||||
|
||||
vector<DictEntry*>::const_iterator begin() const { return entries.begin(); }
|
||||
vector<std::unique_ptr<DictEntry>>::const_iterator begin() const {
|
||||
return entries.begin();
|
||||
}
|
||||
|
||||
vector<DictEntry*>::const_iterator end() const { return entries.end(); }
|
||||
vector<std::unique_ptr<DictEntry>>::const_iterator end() const {
|
||||
return entries.end();
|
||||
}
|
||||
|
||||
private:
|
||||
vector<DictEntry*> entries;
|
||||
vector<std::unique_ptr<DictEntry>> entries;
|
||||
};
|
||||
}
|
||||
} // namespace opencc
|
||||
|
|
61
winlibs/include/opencc/MarisaDict.hpp
Normal file
61
winlibs/include/opencc/MarisaDict.hpp
Normal file
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Open Chinese Convert
|
||||
*
|
||||
* Copyright 2020 BYVoid <byvoid@byvoid.com>
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common.hpp"
|
||||
#include "SerializableDict.hpp"
|
||||
|
||||
namespace opencc {
|
||||
/**
|
||||
* Darts dictionary
|
||||
* @ingroup opencc_cpp_api
|
||||
*/
|
||||
class OPENCC_EXPORT MarisaDict : public Dict, public SerializableDict {
|
||||
public:
|
||||
virtual ~MarisaDict();
|
||||
|
||||
virtual size_t KeyMaxLength() const;
|
||||
|
||||
virtual Optional<const DictEntry*> Match(const char* word) const;
|
||||
|
||||
virtual Optional<const DictEntry*> MatchPrefix(const char* word) const;
|
||||
|
||||
virtual vector<const DictEntry*> MatchAllPrefixes(const char* word) const;
|
||||
|
||||
virtual LexiconPtr GetLexicon() const;
|
||||
|
||||
virtual void SerializeToFile(FILE* fp) const;
|
||||
|
||||
/**
|
||||
* Constructs a MarisaDict from another dictionary.
|
||||
*/
|
||||
static MarisaDictPtr NewFromDict(const Dict& thatDict);
|
||||
|
||||
static MarisaDictPtr NewFromFile(FILE* fp);
|
||||
|
||||
private:
|
||||
MarisaDict();
|
||||
|
||||
size_t maxLength;
|
||||
LexiconPtr lexicon;
|
||||
|
||||
class MarisaInternal;
|
||||
std::unique_ptr<MarisaInternal> internal;
|
||||
};
|
||||
} // namespace opencc
|
191
winlibs/include/opencc/PhraseExtract.hpp
Normal file
191
winlibs/include/opencc/PhraseExtract.hpp
Normal file
|
@ -0,0 +1,191 @@
|
|||
/*
|
||||
* Open Chinese Convert
|
||||
*
|
||||
* Copyright 2015 BYVoid <byvoid@byvoid.com>
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "Common.hpp"
|
||||
#include "UTF8StringSlice.hpp"
|
||||
|
||||
namespace opencc {
|
||||
|
||||
class OPENCC_EXPORT PhraseExtract {
|
||||
public:
|
||||
typedef UTF8StringSlice::LengthType LengthType;
|
||||
|
||||
typedef UTF8StringSliceBase<unsigned char> UTF8StringSlice8Bit;
|
||||
|
||||
PhraseExtract();
|
||||
|
||||
virtual ~PhraseExtract();
|
||||
|
||||
void Extract(const string& text) {
|
||||
SetFullText(text);
|
||||
ExtractSuffixes();
|
||||
CalculateFrequency();
|
||||
CalculateSuffixEntropy();
|
||||
ReleaseSuffixes();
|
||||
ExtractPrefixes();
|
||||
CalculatePrefixEntropy();
|
||||
ReleasePrefixes();
|
||||
ExtractWordCandidates();
|
||||
CalculateCohesions();
|
||||
SelectWords();
|
||||
}
|
||||
|
||||
void SetFullText(const string& fullText) {
|
||||
utf8FullText = UTF8StringSlice(fullText.c_str());
|
||||
}
|
||||
|
||||
void SetFullText(const char* fullText) {
|
||||
utf8FullText = UTF8StringSlice(fullText);
|
||||
}
|
||||
|
||||
void SetFullText(const UTF8StringSlice& fullText) { utf8FullText = fullText; }
|
||||
|
||||
void SetWordMinLength(const LengthType _wordMinLength) {
|
||||
wordMinLength = _wordMinLength;
|
||||
}
|
||||
|
||||
void SetWordMaxLength(const LengthType _wordMaxLength) {
|
||||
wordMaxLength = _wordMaxLength;
|
||||
}
|
||||
|
||||
void SetPrefixSetLength(const LengthType _prefixSetLength) {
|
||||
prefixSetLength = _prefixSetLength;
|
||||
}
|
||||
|
||||
void SetSuffixSetLength(const LengthType _suffixSetLength) {
|
||||
suffixSetLength = _suffixSetLength;
|
||||
}
|
||||
|
||||
// PreCalculationFilter is called after frequencies statistics.
|
||||
void SetPreCalculationFilter(const std::function<
|
||||
bool(const PhraseExtract&, const UTF8StringSlice8Bit&)>& filter) {
|
||||
preCalculationFilter = filter;
|
||||
}
|
||||
|
||||
void SetPostCalculationFilter(const std::function<
|
||||
bool(const PhraseExtract&, const UTF8StringSlice8Bit&)>& filter) {
|
||||
postCalculationFilter = filter;
|
||||
}
|
||||
|
||||
void ReleaseSuffixes() { vector<UTF8StringSlice8Bit>().swap(suffixes); }
|
||||
|
||||
void ReleasePrefixes() { vector<UTF8StringSlice8Bit>().swap(prefixes); }
|
||||
|
||||
const vector<UTF8StringSlice8Bit>& Words() const { return words; }
|
||||
|
||||
const vector<UTF8StringSlice8Bit>& WordCandidates() const {
|
||||
return wordCandidates;
|
||||
}
|
||||
|
||||
struct Signals {
|
||||
size_t frequency;
|
||||
double cohesion;
|
||||
double suffixEntropy;
|
||||
double prefixEntropy;
|
||||
};
|
||||
|
||||
const Signals& Signal(const UTF8StringSlice8Bit& wordCandidate) const;
|
||||
|
||||
double Cohesion(const UTF8StringSlice8Bit& wordCandidate) const;
|
||||
|
||||
double Entropy(const UTF8StringSlice8Bit& wordCandidate) const;
|
||||
|
||||
double SuffixEntropy(const UTF8StringSlice8Bit& wordCandidate) const;
|
||||
|
||||
double PrefixEntropy(const UTF8StringSlice8Bit& wordCandidate) const;
|
||||
|
||||
size_t Frequency(const UTF8StringSlice8Bit& word) const;
|
||||
|
||||
double Probability(const UTF8StringSlice8Bit& word) const;
|
||||
|
||||
double LogProbability(const UTF8StringSlice8Bit& word) const;
|
||||
|
||||
void Reset();
|
||||
|
||||
void ExtractSuffixes();
|
||||
|
||||
void ExtractPrefixes();
|
||||
|
||||
void ExtractWordCandidates();
|
||||
|
||||
void CalculateFrequency();
|
||||
|
||||
void CalculateCohesions();
|
||||
|
||||
void CalculateSuffixEntropy();
|
||||
|
||||
void CalculatePrefixEntropy();
|
||||
|
||||
void SelectWords();
|
||||
|
||||
static bool
|
||||
DefaultPreCalculationFilter(const PhraseExtract&,
|
||||
const PhraseExtract::UTF8StringSlice8Bit&);
|
||||
|
||||
static bool
|
||||
DefaultPostCalculationFilter(const PhraseExtract&,
|
||||
const PhraseExtract::UTF8StringSlice8Bit&);
|
||||
|
||||
private:
|
||||
class DictType;
|
||||
|
||||
// Pointwise Mutual Information
|
||||
double PMI(const UTF8StringSlice8Bit& wordCandidate,
|
||||
const UTF8StringSlice8Bit& part1,
|
||||
const UTF8StringSlice8Bit& part2) const;
|
||||
|
||||
double CalculateCohesion(const UTF8StringSlice8Bit& wordCandidate) const;
|
||||
|
||||
double CalculateEntropy(const std::unordered_map<
|
||||
UTF8StringSlice8Bit, size_t, UTF8StringSlice8Bit::Hasher>& choices) const;
|
||||
|
||||
LengthType wordMinLength;
|
||||
LengthType wordMaxLength;
|
||||
LengthType prefixSetLength;
|
||||
LengthType suffixSetLength;
|
||||
std::function<bool(const PhraseExtract&, const UTF8StringSlice8Bit&)>
|
||||
preCalculationFilter;
|
||||
std::function<bool(const PhraseExtract&, const UTF8StringSlice8Bit&)>
|
||||
postCalculationFilter;
|
||||
|
||||
bool prefixesExtracted;
|
||||
bool suffixesExtracted;
|
||||
bool frequenciesCalculated;
|
||||
bool wordCandidatesExtracted;
|
||||
bool cohesionsCalculated;
|
||||
bool prefixEntropiesCalculated;
|
||||
bool suffixEntropiesCalculated;
|
||||
bool wordsSelected;
|
||||
|
||||
UTF8StringSlice utf8FullText;
|
||||
size_t totalOccurrence;
|
||||
double logTotalOccurrence;
|
||||
vector<UTF8StringSlice8Bit> prefixes;
|
||||
vector<UTF8StringSlice8Bit> suffixes;
|
||||
vector<UTF8StringSlice8Bit> wordCandidates;
|
||||
vector<UTF8StringSlice8Bit> words;
|
||||
DictType* signals;
|
||||
|
||||
friend class PhraseExtractTest;
|
||||
};
|
||||
|
||||
} // namespace opencc
|
49
winlibs/include/opencc/SerializedValues.hpp
Normal file
49
winlibs/include/opencc/SerializedValues.hpp
Normal file
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Open Chinese Convert
|
||||
*
|
||||
* Copyright 2020 BYVoid <byvoid@byvoid.com>
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common.hpp"
|
||||
#include "SerializableDict.hpp"
|
||||
|
||||
namespace opencc {
|
||||
/**
|
||||
* Binary format for dictionary values serialization.
|
||||
* @ingroup opencc_cpp_api
|
||||
*/
|
||||
class OPENCC_EXPORT SerializedValues : public SerializableDict {
|
||||
public:
|
||||
SerializedValues(const LexiconPtr& _lexicon) : lexicon(_lexicon) {}
|
||||
|
||||
virtual ~SerializedValues() {}
|
||||
|
||||
virtual void SerializeToFile(FILE* fp) const;
|
||||
|
||||
static std::shared_ptr<SerializedValues> NewFromFile(FILE* fp);
|
||||
|
||||
const LexiconPtr& GetLexicon() const { return lexicon; }
|
||||
|
||||
size_t KeyMaxLength() const;
|
||||
|
||||
private:
|
||||
LexiconPtr lexicon;
|
||||
|
||||
void ConstructBuffer(string* valueBuffer, vector<uint16_t>* valueBytes,
|
||||
uint32_t* valueTotalLength) const;
|
||||
};
|
||||
} // namespace opencc
|
244
winlibs/include/opencc/UTF8StringSlice.hpp
Normal file
244
winlibs/include/opencc/UTF8StringSlice.hpp
Normal file
|
@ -0,0 +1,244 @@
|
|||
/*
|
||||
* Open Chinese Convert
|
||||
*
|
||||
* Copyright 2015 BYVoid <byvoid@byvoid.com>
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "Common.hpp"
|
||||
#include "UTF8Util.hpp"
|
||||
|
||||
namespace opencc {
|
||||
|
||||
namespace internal {
|
||||
|
||||
inline size_t FNVHash(const char* text, const size_t byteLength,
|
||||
const size_t FNV_prime, const size_t FNV_offset_basis) {
|
||||
size_t hash = FNV_offset_basis;
|
||||
for (const char* pstr = text; pstr < text + byteLength; pstr++) {
|
||||
hash ^= *pstr;
|
||||
hash *= FNV_prime;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
template <int> size_t FNVHash(const char* text, const size_t byteLength);
|
||||
|
||||
template <>
|
||||
inline size_t FNVHash<4>(const char* text, const size_t byteLength) {
|
||||
return FNVHash(text, byteLength, 16777619UL, 2166136261UL);
|
||||
}
|
||||
|
||||
#if SIZE_MAX == 0xffffffffffffffff
|
||||
template <>
|
||||
inline size_t FNVHash<8>(const char* text, const size_t byteLength) {
|
||||
return FNVHash(text, byteLength, 1099511628211UL, 14695981039346656037UL);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace internal
|
||||
|
||||
template <typename LENGTH_TYPE> class UTF8StringSliceBase {
|
||||
public:
|
||||
typedef LENGTH_TYPE LengthType;
|
||||
|
||||
UTF8StringSliceBase(const char* _str)
|
||||
: str(_str), utf8Length(static_cast<LengthType>(UTF8Util::Length(_str))),
|
||||
byteLength(static_cast<LengthType>(strlen(_str))) {}
|
||||
|
||||
UTF8StringSliceBase(const char* _str, const LengthType _utf8Length)
|
||||
: str(_str), utf8Length(_utf8Length) {
|
||||
CalculateByteLength();
|
||||
}
|
||||
|
||||
UTF8StringSliceBase(const char* _str, const LengthType _utf8Length,
|
||||
const LengthType _byteLength)
|
||||
: str(_str), utf8Length(_utf8Length), byteLength(_byteLength) {
|
||||
CalculateByteLength();
|
||||
}
|
||||
|
||||
LengthType UTF8Length() const { return utf8Length; }
|
||||
|
||||
LengthType ByteLength() const { return byteLength; }
|
||||
|
||||
UTF8StringSliceBase Left(const LengthType numberOfCharacters) const {
|
||||
if (numberOfCharacters == UTF8Length()) {
|
||||
return *this;
|
||||
} else {
|
||||
return UTF8StringSliceBase(str, numberOfCharacters);
|
||||
}
|
||||
}
|
||||
|
||||
UTF8StringSliceBase Right(const LengthType numberOfCharacters) const {
|
||||
if (numberOfCharacters == UTF8Length()) {
|
||||
return *this;
|
||||
} else {
|
||||
const char* pstr = str + byteLength;
|
||||
for (size_t i = 0; i < numberOfCharacters; i++) {
|
||||
pstr = UTF8Util::PrevChar(pstr);
|
||||
}
|
||||
return UTF8StringSliceBase(pstr, numberOfCharacters);
|
||||
}
|
||||
}
|
||||
|
||||
UTF8StringSliceBase SubString(const LengthType offset,
|
||||
const LengthType numberOfCharacters) const {
|
||||
if (offset == 0) {
|
||||
return Left(numberOfCharacters);
|
||||
} else {
|
||||
const char* pstr = str;
|
||||
for (size_t i = 0; i < offset; i++) {
|
||||
pstr = UTF8Util::NextChar(pstr);
|
||||
}
|
||||
return UTF8StringSliceBase(pstr, numberOfCharacters);
|
||||
}
|
||||
}
|
||||
|
||||
string ToString() const { return string(str, str + byteLength); }
|
||||
|
||||
const char* CString() const { return str; }
|
||||
|
||||
LengthType CommonPrefixLength(const UTF8StringSliceBase& that) const {
|
||||
if (str == that.str) {
|
||||
return std::min(utf8Length, that.utf8Length);
|
||||
} else {
|
||||
const char* pstr1 = str;
|
||||
const char* pstr2 = that.str;
|
||||
for (size_t length = 0; length < utf8Length && length < that.utf8Length;
|
||||
length++) {
|
||||
size_t charLen1 = UTF8Util::NextCharLength(pstr1);
|
||||
size_t charLen2 = UTF8Util::NextCharLength(pstr2);
|
||||
if (charLen1 != charLen2 || strncmp(pstr1, pstr2, charLen1) != 0) {
|
||||
return length;
|
||||
}
|
||||
pstr1 += charLen1;
|
||||
pstr2 += charLen2;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void MoveRight() {
|
||||
if (utf8Length > 0) {
|
||||
const size_t charLen = UTF8Util::NextCharLength(str);
|
||||
str += charLen;
|
||||
utf8Length--;
|
||||
byteLength -= charLen;
|
||||
}
|
||||
}
|
||||
|
||||
void MoveLeft() {
|
||||
if (utf8Length > 0) {
|
||||
const size_t charLen = UTF8Util::PrevCharLength(str + byteLength);
|
||||
utf8Length--;
|
||||
byteLength -= charLen;
|
||||
}
|
||||
}
|
||||
|
||||
int ReverseCompare(const UTF8StringSliceBase& that) const {
|
||||
const char* pstr1 = str + byteLength;
|
||||
const char* pstr2 = that.str + that.byteLength;
|
||||
const size_t length = std::min(utf8Length, that.utf8Length);
|
||||
for (size_t i = 0; i < length; i++) {
|
||||
const size_t charLen1 = UTF8Util::PrevCharLength(pstr1);
|
||||
const size_t charLen2 = UTF8Util::PrevCharLength(pstr2);
|
||||
pstr1 -= charLen1;
|
||||
pstr2 -= charLen2;
|
||||
const int cmp = strncmp(pstr1, pstr2, std::min(charLen1, charLen2));
|
||||
if (cmp < 0) {
|
||||
return -1;
|
||||
} else if (cmp > 0) {
|
||||
return 1;
|
||||
} else if (charLen1 < charLen2) {
|
||||
return -1;
|
||||
} else if (charLen1 > charLen2) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
if (utf8Length < that.utf8Length) {
|
||||
return -1;
|
||||
} else if (utf8Length > that.utf8Length) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
LengthType FindBytePosition(const UTF8StringSliceBase& pattern) const {
|
||||
return static_cast<LengthType>(
|
||||
ToString().find(pattern.str, 0, pattern.byteLength));
|
||||
}
|
||||
|
||||
bool operator<(const UTF8StringSliceBase& that) const {
|
||||
return Compare(that) < 0;
|
||||
}
|
||||
|
||||
bool operator>(const UTF8StringSliceBase& that) const {
|
||||
return Compare(that) > 0;
|
||||
}
|
||||
|
||||
bool operator==(const UTF8StringSliceBase& that) const {
|
||||
return (str == that.str && utf8Length == that.utf8Length) ||
|
||||
Compare(that) == 0;
|
||||
}
|
||||
|
||||
bool operator!=(const UTF8StringSliceBase& that) const {
|
||||
return !this->operator==(that);
|
||||
}
|
||||
|
||||
class Hasher {
|
||||
public:
|
||||
size_t operator()(const UTF8StringSliceBase& text) const {
|
||||
return internal::FNVHash<sizeof(size_t)>(text.CString(),
|
||||
text.ByteLength());
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
inline int Compare(const UTF8StringSliceBase& that) const {
|
||||
int cmp = strncmp(str, that.str, std::min(byteLength, that.byteLength));
|
||||
if (cmp == 0) {
|
||||
if (utf8Length < that.utf8Length) {
|
||||
cmp = -1;
|
||||
} else if (utf8Length > that.utf8Length) {
|
||||
cmp = 1;
|
||||
} else {
|
||||
cmp = 0;
|
||||
}
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
|
||||
void CalculateByteLength() {
|
||||
const char* pstr = str;
|
||||
for (size_t i = 0; i < utf8Length; i++) {
|
||||
pstr = UTF8Util::NextChar(pstr);
|
||||
}
|
||||
byteLength = static_cast<LengthType>(pstr - str);
|
||||
}
|
||||
|
||||
const char* str;
|
||||
LengthType utf8Length;
|
||||
LengthType byteLength;
|
||||
};
|
||||
|
||||
typedef UTF8StringSliceBase<size_t> UTF8StringSlice;
|
||||
|
||||
template <typename LENGTH_TYPE>
|
||||
std::ostream& operator<<(::std::ostream& os,
|
||||
const UTF8StringSliceBase<LENGTH_TYPE>& str) {
|
||||
return os << str.ToString();
|
||||
}
|
||||
|
||||
} // namespace opencc
|
Binary file not shown.
Binary file not shown.
Loading…
Reference in a new issue