jitenbot/bot/yomichan/grammar.py
2023-04-11 12:01:23 -05:00

39 lines
1.4 KiB
Python

from sudachipy import tokenizer
from sudachipy import dictionary
def sudachi_rules(expression, reading):
tokenizer_obj = dictionary.Dictionary().create()
splitmode = tokenizer.Tokenizer.SplitMode.A
tokens = tokenizer_obj.tokenize(expression, splitmode)
pos = tokens[len(tokens)-1].part_of_speech()[4]
tags = pos.split("-")
rules = __sudachi_tags_to_rules(tags, expression, reading)
return rules
def __sudachi_tags_to_rules(tags, expression, reading):
u_endings = ["", "", "", "", "", "", "",
"", "", "", "", "", "", ""]
rules = set()
for tag in tags:
if expression.endswith(""):
if tag == "形容詞" or "ナイ" in tag or "タイ" in tag:
rules.add("adj-i")
if expression.endswith(""):
if "" in tag or tag == "レル":
rules.add("v1")
if "" in tag or "" in tag or "" in tag:
for u_ending in u_endings:
if expression.endswith(u_ending):
rules.add("v5")
break
if "" in tag and (expression.endswith("する") or expression == "為る"):
rules.add("vs")
if "" in tag and expression.endswith("ずる"):
rules.add("vz")
if expression.endswith("来る") and reading.endswith("くる"):
rules = set()
rules.add("vk")
return " ".join(list(rules))