jitenbot/bot/expressions.py

89 lines
2.6 KiB
Python
Raw Normal View History

import re
__KATA_TO_HIRA_MAP = {
i: i - 96 for i in [
*range(0x30A1, 0x30F6),
*range(0x30FD, 0x30FE),
]
}
__HALFWIDTH_TO_FULLWIDTH_MAP = {
i: i + 0xFEE0 for i in [
*range(0x21, 0x7F),
]
}
def kata_to_hira(text):
hira = text.translate(__KATA_TO_HIRA_MAP)
return hira
def add_fullwidth(expressions):
for expression in expressions:
new_exp = expression.translate(__HALFWIDTH_TO_FULLWIDTH_MAP)
if new_exp not in expressions:
expressions.append(new_exp)
def add_variant_kanji(expressions, variant_kanji):
for old_kanji, new_kanji in variant_kanji.items():
new_exps = []
for expression in expressions:
if old_kanji in expression:
new_exp = expression.replace(old_kanji, new_kanji)
new_exps.append(new_exp)
for new_exp in new_exps:
if new_exp not in expressions:
expressions.append(new_exp)
def remove_iteration_mark(expressions):
iterated_kanji = r"(.)々"
for expression in expressions:
for char in re.findall(iterated_kanji, expression):
new_exp = expression.replace(f"{char}", f"{char}{char}")
if new_exp not in expressions:
expressions.append(new_exp)
def add_iteration_mark(expressions):
repeat_kanji = r"([^0-z-zぁ-ヿ])\1"
for expression in expressions:
for char in re.findall(repeat_kanji, expression):
new_exp = expression.replace(f"{char}{char}", f"{char}")
if new_exp not in expressions:
expressions.append(new_exp)
def expand_abbreviation(abbreviated_expression):
"""Return a list of words described by a 省略 notation.
eg. "有(り)合(わ)せ" -> [
2023-04-23 02:01:52 +00:00
"有り合わせ", "有合わせ", "有り合せ", "有合せ"
]
"""
groups = re.findall(r"([^]*)(([^]+))?", abbreviated_expression)
expressions = [""]
for group in groups:
new_exps = []
for expression in expressions:
new_exps.append(expression + group[0])
expressions = new_exps.copy()
if group[2] == '':
continue
new_exps = []
for expression in expressions:
new_exps.append(expression + group[2])
expressions = new_exps.copy() + expressions.copy()
return expressions
def expand_abbreviation_list(expressions):
new_exps = []
for expression in expressions:
for new_exp in expand_abbreviation(expression):
if new_exp not in new_exps:
new_exps.append(new_exp)
return new_exps