jitenbot/bot/entries/expressions.py
2023-05-06 20:07:07 -05:00

125 lines
3.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
from bot.data import load_variant_kanji
__KATA_TO_HIRA_MAP = {
i: i - 96 for i in [
*range(0x30A1, 0x30F7),
*range(0x30FD, 0x30FF),
]
}
__HALFWIDTH_TO_FULLWIDTH_MAP = {
i: i + 0xFEE0 for i in [
*range(0x21, 0x7F),
]
}
def kata_to_hira(text):
hira = text.translate(__KATA_TO_HIRA_MAP)
return hira
def add_fullwidth(expressions):
for expression in expressions:
new_exp = expression.translate(__HALFWIDTH_TO_FULLWIDTH_MAP)
if new_exp not in expressions:
expressions.append(new_exp)
def add_variant_kanji(expressions):
variant_kanji = load_variant_kanji()
for old_kanji, new_kanji in variant_kanji.items():
new_exps = []
for expression in expressions:
if old_kanji in expression:
new_exp = expression.replace(old_kanji, new_kanji)
new_exps.append(new_exp)
for new_exp in new_exps:
if new_exp not in expressions:
expressions.append(new_exp)
def remove_iteration_mark(expressions):
iterated_kanji = r"(.)々"
for expression in expressions:
for char in re.findall(iterated_kanji, expression):
new_exp = expression.replace(f"{char}", f"{char}{char}")
if new_exp not in expressions:
expressions.append(new_exp)
def add_iteration_mark(expressions):
repeat_kanji = r"([^0-z-zぁ-ヿ])\1"
for expression in expressions:
for char in re.findall(repeat_kanji, expression):
new_exp = expression.replace(f"{char}{char}", f"{char}")
if new_exp not in expressions:
expressions.append(new_exp)
def expand_abbreviation(abbreviated_expression):
"""Return a list of words described by a 省略 notation."""
groups = re.findall(r"([^]*)(([^]+))?", abbreviated_expression)
expressions = [""]
for group in groups:
new_exps = []
for expression in expressions:
new_exps.append(expression + group[0])
expressions = new_exps.copy()
if group[2] == '':
continue
new_exps = []
for expression in expressions:
new_exps.append(expression + group[2])
expressions = new_exps.copy() + expressions.copy()
return expressions
def expand_abbreviation_list(expressions):
new_exps = []
for expression in expressions:
for new_exp in expand_abbreviation(expression):
if new_exp not in new_exps:
new_exps.append(new_exp)
return new_exps
def expand_smk_alternatives(text):
"""Return a list of strings described by △ notation."""
m = re.search(r"△([^]+)([^]+)", text)
if m is None:
return [text]
alt_parts = [m.group(1)]
for alt_part in m.group(2).split(""):
alt_parts.append(alt_part)
alts = []
for alt_part in alt_parts:
alt_exp = re.sub(r"△[^]+[^]+", alt_part, text)
alts.append(alt_exp)
return alts
def expand_daijirin_alternatives(text):
"""Return a list of strings described by notation."""
group_pattern = r"([^]+)(([^]+)([^]+))?"
groups = re.findall(group_pattern, text)
expressions = [""]
for group in groups:
new_exps = []
for expression in expressions:
new_exps.append(expression + group[0])
expressions = new_exps.copy()
if group[1] == "":
continue
new_exps = []
for expression in expressions:
new_exps.append(expression + group[2])
for expression in expressions:
for alt in group[3].split(""):
new_exps.append(expression + alt)
expressions = new_exps.copy()
return expressions