2023-04-10 20:20:33 +00:00
|
|
|
from sudachipy import tokenizer
|
|
|
|
from sudachipy import dictionary
|
|
|
|
|
2023-04-22 17:03:00 +00:00
|
|
|
import bot.data as Data
|
2023-04-10 20:20:33 +00:00
|
|
|
|
2023-04-22 17:03:00 +00:00
|
|
|
__U_KANA_LIST = ["う", "く", "す", "つ", "ぬ", "ふ", "む",
|
|
|
|
"ゆ", "る", "ぐ", "ず", "づ", "ぶ", "ぷ"]
|
|
|
|
|
|
|
|
__SUDACHI_DICTIONARY = None
|
|
|
|
__SUDACHI_INFLECTION_TYPES = None
|
|
|
|
|
|
|
|
|
|
|
|
def sudachi_rules(expression):
|
|
|
|
global __SUDACHI_DICTIONARY
|
|
|
|
global __SUDACHI_INFLECTION_TYPES
|
|
|
|
if __SUDACHI_DICTIONARY is None:
|
|
|
|
__SUDACHI_DICTIONARY = dictionary.Dictionary(dict="full").create()
|
|
|
|
if __SUDACHI_INFLECTION_TYPES is None:
|
|
|
|
categories = Data.yomichan_inflection_categories()
|
|
|
|
__SUDACHI_INFLECTION_TYPES = categories["sudachi"]
|
2023-04-10 20:20:33 +00:00
|
|
|
splitmode = tokenizer.Tokenizer.SplitMode.A
|
2023-04-22 17:03:00 +00:00
|
|
|
tokens = __SUDACHI_DICTIONARY.tokenize(expression, splitmode)
|
|
|
|
if len(tokens) == 0:
|
|
|
|
return ""
|
2023-04-10 20:20:33 +00:00
|
|
|
pos = tokens[len(tokens)-1].part_of_speech()[4]
|
|
|
|
tags = pos.split("-")
|
2023-04-22 17:03:00 +00:00
|
|
|
rules = tags_to_rules(expression, tags, __SUDACHI_INFLECTION_TYPES)
|
2023-04-10 20:20:33 +00:00
|
|
|
return rules
|
|
|
|
|
|
|
|
|
2023-04-22 17:03:00 +00:00
|
|
|
def tags_to_rules(expression, tags, inflection_types):
|
2023-04-10 20:20:33 +00:00
|
|
|
rules = set()
|
2023-04-22 17:03:00 +00:00
|
|
|
exp_final_character = expression[len(expression)-1:]
|
2023-04-10 20:20:33 +00:00
|
|
|
for tag in tags:
|
2023-04-22 17:03:00 +00:00
|
|
|
if tag in inflection_types["sahen"]:
|
|
|
|
if expression.endswith("する"):
|
|
|
|
rules.add("vs")
|
|
|
|
elif expression.endswith("為る"):
|
|
|
|
rules.add("vs")
|
|
|
|
elif expression.endswith("ずる"):
|
|
|
|
rules.add("vz")
|
|
|
|
elif expression.endswith("す"):
|
|
|
|
rules.add("v5")
|
|
|
|
if tag in inflection_types["godan"]:
|
|
|
|
if exp_final_character in __U_KANA_LIST:
|
|
|
|
rules.add("v5")
|
|
|
|
if tag in inflection_types["ichidan"]:
|
|
|
|
if expression.endswith("る"):
|
2023-04-10 20:20:33 +00:00
|
|
|
rules.add("v1")
|
2023-04-22 17:03:00 +00:00
|
|
|
if tag in inflection_types["keiyoushi"]:
|
|
|
|
if expression.endswith("い"):
|
|
|
|
rules.add("adj-i")
|
|
|
|
if tag in inflection_types["kahen"]:
|
|
|
|
if expression.endswith("くる"):
|
|
|
|
rules.add("vk")
|
|
|
|
elif expression.endswith("来る"):
|
|
|
|
rules.add("vk")
|
|
|
|
if tag in inflection_types["sudachi"]:
|
|
|
|
return sudachi_rules(expression)
|
2023-04-10 20:20:33 +00:00
|
|
|
return " ".join(list(rules))
|