jitenbot/bot/entries/base/expressions.py

91 lines
2.7 KiB
Python
Raw Permalink Normal View History

import re
2023-05-07 01:07:07 +00:00
from bot.data import load_variant_kanji
__KATA_TO_HIRA_MAP = {
i: i - 96 for i in [
2023-05-07 01:07:07 +00:00
*range(0x30A1, 0x30F7),
*range(0x30FD, 0x30FF),
]
}
__HALFWIDTH_TO_FULLWIDTH_MAP = {
i: i + 0xFEE0 for i in [
*range(0x21, 0x7F),
]
}
def kata_to_hira(text):
hira = text.translate(__KATA_TO_HIRA_MAP)
return hira
def add_fullwidth(expressions):
for expression in expressions:
new_exp = expression.translate(__HALFWIDTH_TO_FULLWIDTH_MAP)
if new_exp not in expressions:
expressions.append(new_exp)
2023-05-07 01:07:07 +00:00
def add_variant_kanji(expressions):
variant_kanji = load_variant_kanji()
for kyuuji, shinji in variant_kanji.items():
new_exps = []
for expression in expressions:
if kyuuji in expression:
new_exp = expression.replace(kyuuji, shinji)
new_exps.append(new_exp)
if shinji in expression:
new_exp = expression.replace(shinji, kyuuji)
new_exps.append(new_exp)
for new_exp in new_exps:
if new_exp not in expressions:
expressions.append(new_exp)
def remove_iteration_mark(expressions):
iterated_kanji = r"(.)々"
for expression in expressions:
for char in re.findall(iterated_kanji, expression):
new_exp = expression.replace(f"{char}", f"{char}{char}")
if new_exp not in expressions:
expressions.append(new_exp)
def add_iteration_mark(expressions):
repeat_kanji = r"([^0-z-zぁ-ヿ])\1"
for expression in expressions:
for char in re.findall(repeat_kanji, expression):
new_exp = expression.replace(f"{char}{char}", f"{char}")
if new_exp not in expressions:
expressions.append(new_exp)
def expand_abbreviation(abbreviated_expression):
2023-05-07 01:07:07 +00:00
"""Return a list of words described by a 省略 notation."""
groups = re.findall(r"([^]*)(([^]+))?", abbreviated_expression)
expressions = [""]
for group in groups:
new_exps = []
for expression in expressions:
new_exps.append(expression + group[0])
expressions = new_exps.copy()
if group[2] == '':
continue
new_exps = []
for expression in expressions:
new_exps.append(expression + group[2])
expressions = new_exps.copy() + expressions.copy()
return expressions
def expand_abbreviation_list(expressions):
new_exps = []
for expression in expressions:
for new_exp in expand_abbreviation(expression):
if new_exp not in new_exps:
new_exps.append(new_exp)
return new_exps