jitenbot/bot/yomichan/grammar.py

from sudachipy import tokenizer
from sudachipy import dictionary


def sudachi_rules(expression, reading):
    tokenizer_obj = dictionary.Dictionary().create()
    splitmode = tokenizer.Tokenizer.SplitMode.A
    tokens = tokenizer_obj.tokenize(expression, splitmode)
    pos = tokens[len(tokens)-1].part_of_speech()[4]
    tags = pos.split("-")
    rules = __sudachi_tags_to_rules(tags, expression, reading)
    return rules


def __sudachi_tags_to_rules(tags, expression, reading):
    u_endings = ["う", "く", "す", "つ", "ぬ", "ふ", "む",
                 "ゆ", "る", "ぐ", "ず", "づ", "ぶ", "ぷ"]
    rules = set()
    for tag in tags:
        if expression.endswith("い"):
            if tag == "形容詞" or "ナイ" in tag or "タイ" in tag:
                rules.add("adj-i")
        if expression.endswith("る"):
            if "一" in tag or tag == "レル":
                rules.add("v1")
        if "二" in tag or "四" in tag or "五" in tag:
            for u_ending in u_endings:
                if expression.endswith(u_ending):
                    rules.add("v5")
                    break
        if "サ" in tag and (expression.endswith("する") or expression == "為る"):
            rules.add("vs")
        if "サ" in tag and expression.endswith("ずる"):
            rules.add("vz")
    if expression.endswith("来る") and reading.endswith("くる"):
        rules = set()
        rules.add("vk")
    return " ".join(list(rules))
Reorganize file structure 2023-04-10 20:20:33 +00:00			`from sudachipy import tokenizer`
			`from sudachipy import dictionary`


			`def sudachi_rules(expression, reading):`
			`tokenizer_obj = dictionary.Dictionary().create()`
			`splitmode = tokenizer.Tokenizer.SplitMode.A`
			`tokens = tokenizer_obj.tokenize(expression, splitmode)`
			`pos = tokens[len(tokens)-1].part_of_speech()[4]`
			`tags = pos.split("-")`
			`rules = __sudachi_tags_to_rules(tags, expression, reading)`
			`return rules`


			`def __sudachi_tags_to_rules(tags, expression, reading):`
			`u_endings = ["う", "く", "す", "つ", "ぬ", "ふ", "む",`
			`"ゆ", "る", "ぐ", "ず", "づ", "ぶ", "ぷ"]`
			`rules = set()`
			`for tag in tags:`
			`if expression.endswith("い"):`
			`if tag == "形容詞" or "ナイ" in tag or "タイ" in tag:`
			`rules.add("adj-i")`
			`if expression.endswith("る"):`
			`if "一" in tag or tag == "レル":`
			`rules.add("v1")`
			`if "二" in tag or "四" in tag or "五" in tag:`
			`for u_ending in u_endings:`
			`if expression.endswith(u_ending):`
			`rules.add("v5")`
			`break`
			`if "サ" in tag and (expression.endswith("する") or expression == "為る"):`
			`rules.add("vs")`
			`if "サ" in tag and expression.endswith("ずる"):`
			`rules.add("vz")`
			`if expression.endswith("来る") and reading.endswith("くる"):`
			`rules = set()`
			`rules.add("vk")`
			`return " ".join(list(rules))`