From 8f30f9419d0403952bfc4ae7c81fc999693dcd43 Mon Sep 17 00:00:00 2001 From: stephenmk Date: Fri, 28 Jul 2023 18:35:22 -0500 Subject: [PATCH] Update logic for adding variant kanji forms MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prior to this commit, the program would only add variant kanji forms in one direction. For example, an additional search key for 掴む would be added if a 摑む headword was found in a dictionary, but a search key for 摑む would not be added to 掴む. Search keys are now added in both directions (old-to-new and new-to-old). --- bot/entries/base/expressions.py | 9 ++++++--- tests/test_expressions.py | 22 ++++++++++++++++++++-- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/bot/entries/base/expressions.py b/bot/entries/base/expressions.py index 7d20891..8049a99 100644 --- a/bot/entries/base/expressions.py +++ b/bot/entries/base/expressions.py @@ -31,11 +31,14 @@ def add_fullwidth(expressions): def add_variant_kanji(expressions): variant_kanji = load_variant_kanji() - for old_kanji, new_kanji in variant_kanji.items(): + for kyuuji, shinji in variant_kanji.items(): new_exps = [] for expression in expressions: - if old_kanji in expression: - new_exp = expression.replace(old_kanji, new_kanji) + if kyuuji in expression: + new_exp = expression.replace(kyuuji, shinji) + new_exps.append(new_exp) + if shinji in expression: + new_exp = expression.replace(shinji, kyuuji) new_exps.append(new_exp) for new_exp in new_exps: if new_exp not in expressions: diff --git a/tests/test_expressions.py b/tests/test_expressions.py index 5d90ce1..9091dda 100644 --- a/tests/test_expressions.py +++ b/tests/test_expressions.py @@ -34,8 +34,8 @@ class TestExpressions(unittest.TestCase): self.assertIn("凶々しい", exps) self.assertIn("凶凶しい", exps) - def test_add_variant_kanji(self): - exps = ["剝く", "掴む", "摑む"] + def test_add_variant_kanji1(self): + exps = ["剥く", "摑む"] Expressions.add_variant_kanji(exps) self.assertEqual(len(exps), 4) self.assertIn("剥く", exps) @@ -44,6 +44,15 @@ class TestExpressions(unittest.TestCase): self.assertIn("摑む", exps) def test_add_variant_kanji2(self): + exps = ["剝く", "掴む", "摑む"] + Expressions.add_variant_kanji(exps) + self.assertEqual(len(exps), 4) + self.assertIn("剥く", exps) + self.assertIn("剝く", exps) + self.assertIn("掴む", exps) + self.assertIn("摑む", exps) + + def test_add_variant_kanji3(self): exps = ["剝摑"] Expressions.add_variant_kanji(exps) self.assertEqual(len(exps), 4) @@ -52,6 +61,15 @@ class TestExpressions(unittest.TestCase): self.assertIn("剥掴", exps) self.assertIn("剥摑", exps) + def test_add_variant_kanji4(self): + exps = ["剥掴"] + Expressions.add_variant_kanji(exps) + self.assertEqual(len(exps), 4) + self.assertIn("剝摑", exps) + self.assertIn("剝掴", exps) + self.assertIn("剥掴", exps) + self.assertIn("剥摑", exps) + def test_expand_abbreviation(self): text = "有(り)合(わ)せ" abbrs = Expressions.expand_abbreviation(text)