Update logic for adding variant kanji forms

Prior to this commit, the program would only add variant kanji forms
in one direction. For example, an additional search key for 掴む would
be added if a 摑む headword was found in a dictionary, but a search
key for 摑む would not be added to 掴む.

Search keys are now added in both directions (old-to-new and
new-to-old).
This commit is contained in:
stephenmk 2023-07-28 18:35:22 -05:00
parent d37c3aca5b
commit 8f30f9419d
No known key found for this signature in database
GPG key ID: B6DA730DB06235F1
2 changed files with 26 additions and 5 deletions

View file

@ -31,11 +31,14 @@ def add_fullwidth(expressions):
def add_variant_kanji(expressions):
variant_kanji = load_variant_kanji()
for old_kanji, new_kanji in variant_kanji.items():
for kyuuji, shinji in variant_kanji.items():
new_exps = []
for expression in expressions:
if old_kanji in expression:
new_exp = expression.replace(old_kanji, new_kanji)
if kyuuji in expression:
new_exp = expression.replace(kyuuji, shinji)
new_exps.append(new_exp)
if shinji in expression:
new_exp = expression.replace(shinji, kyuuji)
new_exps.append(new_exp)
for new_exp in new_exps:
if new_exp not in expressions:

View file

@ -34,8 +34,8 @@ class TestExpressions(unittest.TestCase):
self.assertIn("凶々しい", exps)
self.assertIn("凶凶しい", exps)
def test_add_variant_kanji(self):
exps = ["剝く", "掴む", "摑む"]
def test_add_variant_kanji1(self):
exps = ["剥く", "摑む"]
Expressions.add_variant_kanji(exps)
self.assertEqual(len(exps), 4)
self.assertIn("剥く", exps)
@ -44,6 +44,15 @@ class TestExpressions(unittest.TestCase):
self.assertIn("摑む", exps)
def test_add_variant_kanji2(self):
exps = ["剝く", "掴む", "摑む"]
Expressions.add_variant_kanji(exps)
self.assertEqual(len(exps), 4)
self.assertIn("剥く", exps)
self.assertIn("剝く", exps)
self.assertIn("掴む", exps)
self.assertIn("摑む", exps)
def test_add_variant_kanji3(self):
exps = ["剝摑"]
Expressions.add_variant_kanji(exps)
self.assertEqual(len(exps), 4)
@ -52,6 +61,15 @@ class TestExpressions(unittest.TestCase):
self.assertIn("剥掴", exps)
self.assertIn("剥摑", exps)
def test_add_variant_kanji4(self):
exps = ["剥掴"]
Expressions.add_variant_kanji(exps)
self.assertEqual(len(exps), 4)
self.assertIn("剝摑", exps)
self.assertIn("剝掴", exps)
self.assertIn("剥掴", exps)
self.assertIn("剥摑", exps)
def test_expand_abbreviation(self):
text = "有(り)合(わ)せ"
abbrs = Expressions.expand_abbreviation(text)