Update logic for adding variant kanji forms
Prior to this commit, the program would only add variant kanji forms in one direction. For example, an additional search key for 掴む would be added if a 摑む headword was found in a dictionary, but a search key for 摑む would not be added to 掴む. Search keys are now added in both directions (old-to-new and new-to-old).
This commit is contained in:
parent
d37c3aca5b
commit
8f30f9419d
|
@ -31,11 +31,14 @@ def add_fullwidth(expressions):
|
||||||
|
|
||||||
def add_variant_kanji(expressions):
|
def add_variant_kanji(expressions):
|
||||||
variant_kanji = load_variant_kanji()
|
variant_kanji = load_variant_kanji()
|
||||||
for old_kanji, new_kanji in variant_kanji.items():
|
for kyuuji, shinji in variant_kanji.items():
|
||||||
new_exps = []
|
new_exps = []
|
||||||
for expression in expressions:
|
for expression in expressions:
|
||||||
if old_kanji in expression:
|
if kyuuji in expression:
|
||||||
new_exp = expression.replace(old_kanji, new_kanji)
|
new_exp = expression.replace(kyuuji, shinji)
|
||||||
|
new_exps.append(new_exp)
|
||||||
|
if shinji in expression:
|
||||||
|
new_exp = expression.replace(shinji, kyuuji)
|
||||||
new_exps.append(new_exp)
|
new_exps.append(new_exp)
|
||||||
for new_exp in new_exps:
|
for new_exp in new_exps:
|
||||||
if new_exp not in expressions:
|
if new_exp not in expressions:
|
||||||
|
|
|
@ -34,8 +34,8 @@ class TestExpressions(unittest.TestCase):
|
||||||
self.assertIn("凶々しい", exps)
|
self.assertIn("凶々しい", exps)
|
||||||
self.assertIn("凶凶しい", exps)
|
self.assertIn("凶凶しい", exps)
|
||||||
|
|
||||||
def test_add_variant_kanji(self):
|
def test_add_variant_kanji1(self):
|
||||||
exps = ["剝く", "掴む", "摑む"]
|
exps = ["剥く", "摑む"]
|
||||||
Expressions.add_variant_kanji(exps)
|
Expressions.add_variant_kanji(exps)
|
||||||
self.assertEqual(len(exps), 4)
|
self.assertEqual(len(exps), 4)
|
||||||
self.assertIn("剥く", exps)
|
self.assertIn("剥く", exps)
|
||||||
|
@ -44,6 +44,15 @@ class TestExpressions(unittest.TestCase):
|
||||||
self.assertIn("摑む", exps)
|
self.assertIn("摑む", exps)
|
||||||
|
|
||||||
def test_add_variant_kanji2(self):
|
def test_add_variant_kanji2(self):
|
||||||
|
exps = ["剝く", "掴む", "摑む"]
|
||||||
|
Expressions.add_variant_kanji(exps)
|
||||||
|
self.assertEqual(len(exps), 4)
|
||||||
|
self.assertIn("剥く", exps)
|
||||||
|
self.assertIn("剝く", exps)
|
||||||
|
self.assertIn("掴む", exps)
|
||||||
|
self.assertIn("摑む", exps)
|
||||||
|
|
||||||
|
def test_add_variant_kanji3(self):
|
||||||
exps = ["剝摑"]
|
exps = ["剝摑"]
|
||||||
Expressions.add_variant_kanji(exps)
|
Expressions.add_variant_kanji(exps)
|
||||||
self.assertEqual(len(exps), 4)
|
self.assertEqual(len(exps), 4)
|
||||||
|
@ -52,6 +61,15 @@ class TestExpressions(unittest.TestCase):
|
||||||
self.assertIn("剥掴", exps)
|
self.assertIn("剥掴", exps)
|
||||||
self.assertIn("剥摑", exps)
|
self.assertIn("剥摑", exps)
|
||||||
|
|
||||||
|
def test_add_variant_kanji4(self):
|
||||||
|
exps = ["剥掴"]
|
||||||
|
Expressions.add_variant_kanji(exps)
|
||||||
|
self.assertEqual(len(exps), 4)
|
||||||
|
self.assertIn("剝摑", exps)
|
||||||
|
self.assertIn("剝掴", exps)
|
||||||
|
self.assertIn("剥掴", exps)
|
||||||
|
self.assertIn("剥摑", exps)
|
||||||
|
|
||||||
def test_expand_abbreviation(self):
|
def test_expand_abbreviation(self):
|
||||||
text = "有(り)合(わ)せ"
|
text = "有(り)合(わ)せ"
|
||||||
abbrs = Expressions.expand_abbreviation(text)
|
abbrs = Expressions.expand_abbreviation(text)
|
||||||
|
|
Loading…
Reference in a new issue