gd-tools/res/mandarin_dict/dicrc
2024-02-04 14:24:04 -04:00

42 lines
1.2 KiB
Plaintext

; List of features
; f[0]: pos1
; f[1]: pos2
; f[2]: pos3
; f[3]: pos4
; f[4]: pinyin pronunciation
; f[5]: traditional Chinese character form
; f[6]: simplified Chinese character form
; f[7]: definition
cost-factor = 700
bos-feature = BOS/EOS,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*
; If the analysis target is a known word, eval-size specifies criteria for correct answers.
; Unlike Japanese, Chinese has no conjugation.
; Therefore, four elements that are pos 1, 2, 3, 4 are evaluated. Other elements, such as pronunciation information, are ignored.
eval-size = 4
unk-eval-size = 4
config-charset = utf8
output-format-type = cedict
node-format-cedict = %m\t%f[0],%f[1],%f[2],%f[3],%f[4],%f[5],%f[6],%f[7]\n
unk-format-cedict = %m\t%f[0],%f[1],%f[2],%f[3],%f[4],%f[5]\n
bos-format-cedict =
eos-format-cedict = EOS\n
node-format-traditional = %f[5]
unk-format-traditional = %f[5]
bos-format-traditional =
eos-format-traditional = \n
node-format-simplified = %f[6]
unk-format-simplified = %f[6]
bos-format-simplified =
eos-format-simplified = \n
node-format-gdmandarin = <a href="bword:%f[5]">%f[5]</a>
unk-format-gdmandarin = <a href="bword:%f[5]">%f[5]</a>
bos-format-gdmandarin =
eos-format-gdmandarin = \n