jitenbot/bot/entries/expressions.py

125 lines
3.7 KiB
Python
Raw Normal View History

import re
2023-05-07 01:07:07 +00:00
from bot.data import load_variant_kanji
__KATA_TO_HIRA_MAP = {
i: i - 96 for i in [
2023-05-07 01:07:07 +00:00
*range(0x30A1, 0x30F7),
*range(0x30FD, 0x30FF),
]
}
__HALFWIDTH_TO_FULLWIDTH_MAP = {
i: i + 0xFEE0 for i in [
*range(0x21, 0x7F),
]
}
def kata_to_hira(text):
hira = text.translate(__KATA_TO_HIRA_MAP)
return hira
def add_fullwidth(expressions):
for expression in expressions:
new_exp = expression.translate(__HALFWIDTH_TO_FULLWIDTH_MAP)
if new_exp not in expressions:
expressions.append(new_exp)
2023-05-07 01:07:07 +00:00
def add_variant_kanji(expressions):
variant_kanji = load_variant_kanji()
for old_kanji, new_kanji in variant_kanji.items():
new_exps = []
for expression in expressions:
if old_kanji in expression:
new_exp = expression.replace(old_kanji, new_kanji)
new_exps.append(new_exp)
for new_exp in new_exps:
if new_exp not in expressions:
expressions.append(new_exp)
def remove_iteration_mark(expressions):
iterated_kanji = r"(.)々"
for expression in expressions:
for char in re.findall(iterated_kanji, expression):
new_exp = expression.replace(f"{char}", f"{char}{char}")
if new_exp not in expressions:
expressions.append(new_exp)
def add_iteration_mark(expressions):
repeat_kanji = r"([^0-z-zぁ-ヿ])\1"
for expression in expressions:
for char in re.findall(repeat_kanji, expression):
new_exp = expression.replace(f"{char}{char}", f"{char}")
if new_exp not in expressions:
expressions.append(new_exp)
def expand_abbreviation(abbreviated_expression):
2023-05-07 01:07:07 +00:00
"""Return a list of words described by a 省略 notation."""
groups = re.findall(r"([^]*)(([^]+))?", abbreviated_expression)
expressions = [""]
for group in groups:
new_exps = []
for expression in expressions:
new_exps.append(expression + group[0])
expressions = new_exps.copy()
if group[2] == '':
continue
new_exps = []
for expression in expressions:
new_exps.append(expression + group[2])
expressions = new_exps.copy() + expressions.copy()
return expressions
def expand_abbreviation_list(expressions):
new_exps = []
for expression in expressions:
for new_exp in expand_abbreviation(expression):
if new_exp not in new_exps:
new_exps.append(new_exp)
return new_exps
2023-05-07 01:07:07 +00:00
def expand_smk_alternatives(text):
"""Return a list of strings described by △ notation."""
m = re.search(r"△([^]+)([^]+)", text)
if m is None:
return [text]
alt_parts = [m.group(1)]
for alt_part in m.group(2).split(""):
alt_parts.append(alt_part)
alts = []
for alt_part in alt_parts:
alt_exp = re.sub(r"△[^]+[^]+", alt_part, text)
alts.append(alt_exp)
return alts
def expand_daijirin_alternatives(text):
"""Return a list of strings described by notation."""
group_pattern = r"([^]+)(([^]+)([^]+))?"
groups = re.findall(group_pattern, text)
expressions = [""]
for group in groups:
new_exps = []
for expression in expressions:
new_exps.append(expression + group[0])
expressions = new_exps.copy()
if group[1] == "":
continue
new_exps = []
for expression in expressions:
new_exps.append(expression + group[2])
for expression in expressions:
for alt in group[3].split(""):
new_exps.append(expression + alt)
expressions = new_exps.copy()
return expressions