jitenbot/bot/yomichan/grammar.py

61 lines
2.2 KiB
Python
Raw Normal View History

2023-04-10 20:20:33 +00:00
from sudachipy import tokenizer
from sudachipy import dictionary
2023-04-22 17:03:00 +00:00
import bot.data as Data
2023-04-10 20:20:33 +00:00
2023-04-22 17:03:00 +00:00
__U_KANA_LIST = ["", "", "", "", "", "", "",
"", "", "", "", "", "", ""]
__SUDACHI_DICTIONARY = None
__SUDACHI_INFLECTION_TYPES = None
def sudachi_rules(expression):
global __SUDACHI_DICTIONARY
global __SUDACHI_INFLECTION_TYPES
if __SUDACHI_DICTIONARY is None:
__SUDACHI_DICTIONARY = dictionary.Dictionary(dict="full").create()
if __SUDACHI_INFLECTION_TYPES is None:
categories = Data.yomichan_inflection_categories()
__SUDACHI_INFLECTION_TYPES = categories["sudachi"]
2023-04-10 20:20:33 +00:00
splitmode = tokenizer.Tokenizer.SplitMode.A
2023-04-22 17:03:00 +00:00
tokens = __SUDACHI_DICTIONARY.tokenize(expression, splitmode)
if len(tokens) == 0:
return ""
2023-04-10 20:20:33 +00:00
pos = tokens[len(tokens)-1].part_of_speech()[4]
tags = pos.split("-")
2023-04-22 17:03:00 +00:00
rules = tags_to_rules(expression, tags, __SUDACHI_INFLECTION_TYPES)
2023-04-10 20:20:33 +00:00
return rules
2023-04-22 17:03:00 +00:00
def tags_to_rules(expression, tags, inflection_types):
2023-04-10 20:20:33 +00:00
rules = set()
2023-04-22 17:03:00 +00:00
exp_final_character = expression[len(expression)-1:]
2023-04-10 20:20:33 +00:00
for tag in tags:
2023-04-22 17:03:00 +00:00
if tag in inflection_types["sahen"]:
if expression.endswith("する"):
rules.add("vs")
elif expression.endswith("為る"):
rules.add("vs")
elif expression.endswith("ずる"):
rules.add("vz")
elif expression.endswith(""):
rules.add("v5")
if tag in inflection_types["godan"]:
if exp_final_character in __U_KANA_LIST:
rules.add("v5")
if tag in inflection_types["ichidan"]:
if expression.endswith(""):
2023-04-10 20:20:33 +00:00
rules.add("v1")
2023-04-22 17:03:00 +00:00
if tag in inflection_types["keiyoushi"]:
if expression.endswith(""):
rules.add("adj-i")
if tag in inflection_types["kahen"]:
if expression.endswith("くる"):
rules.add("vk")
elif expression.endswith("来る"):
rules.add("vk")
if tag in inflection_types["sudachi"]:
return sudachi_rules(expression)
2023-04-10 20:20:33 +00:00
return " ".join(list(rules))