import re from bot.data import load_variant_kanji __KATA_TO_HIRA_MAP = { i: i - 96 for i in [ *range(0x30A1, 0x30F7), *range(0x30FD, 0x30FF), ] } __HALFWIDTH_TO_FULLWIDTH_MAP = { i: i + 0xFEE0 for i in [ *range(0x21, 0x7F), ] } def kata_to_hira(text): hira = text.translate(__KATA_TO_HIRA_MAP) return hira def add_fullwidth(expressions): for expression in expressions: new_exp = expression.translate(__HALFWIDTH_TO_FULLWIDTH_MAP) if new_exp not in expressions: expressions.append(new_exp) def add_variant_kanji(expressions): variant_kanji = load_variant_kanji() for old_kanji, new_kanji in variant_kanji.items(): new_exps = [] for expression in expressions: if old_kanji in expression: new_exp = expression.replace(old_kanji, new_kanji) new_exps.append(new_exp) for new_exp in new_exps: if new_exp not in expressions: expressions.append(new_exp) def remove_iteration_mark(expressions): iterated_kanji = r"(.)々" for expression in expressions: for char in re.findall(iterated_kanji, expression): new_exp = expression.replace(f"{char}々", f"{char}{char}") if new_exp not in expressions: expressions.append(new_exp) def add_iteration_mark(expressions): repeat_kanji = r"([^0-z0-zぁ-ヿ])\1" for expression in expressions: for char in re.findall(repeat_kanji, expression): new_exp = expression.replace(f"{char}{char}", f"{char}々") if new_exp not in expressions: expressions.append(new_exp) def expand_abbreviation(abbreviated_expression): """Return a list of words described by a 省略 notation.""" groups = re.findall(r"([^(]*)((([^(]+)))?", abbreviated_expression) expressions = [""] for group in groups: new_exps = [] for expression in expressions: new_exps.append(expression + group[0]) expressions = new_exps.copy() if group[2] == '': continue new_exps = [] for expression in expressions: new_exps.append(expression + group[2]) expressions = new_exps.copy() + expressions.copy() return expressions def expand_abbreviation_list(expressions): new_exps = [] for expression in expressions: for new_exp in expand_abbreviation(expression): if new_exp not in new_exps: new_exps.append(new_exp) return new_exps def expand_smk_alternatives(text): """Return a list of strings described by △ notation.""" m = re.search(r"△([^(]+)(([^(]+))", text) if m is None: return [text] alt_parts = [m.group(1)] for alt_part in m.group(2).split("・"): alt_parts.append(alt_part) alts = [] for alt_part in alt_parts: alt_exp = re.sub(r"△[^(]+([^(]+)", alt_part, text) alts.append(alt_exp) return alts def expand_daijirin_alternatives(text): """Return a list of strings described by = notation.""" group_pattern = r"([^=]+)(=([^(]+)(=([^(]+)))?" groups = re.findall(group_pattern, text) expressions = [""] for group in groups: new_exps = [] for expression in expressions: new_exps.append(expression + group[0]) expressions = new_exps.copy() if group[1] == "": continue new_exps = [] for expression in expressions: new_exps.append(expression + group[2]) for expression in expressions: for alt in group[3].split("・"): new_exps.append(expression + alt) expressions = new_exps.copy() return expressions