jitenbot/bot/entries/smk8/base_entry.py
2023-07-26 19:28:50 -05:00

74 lines
2.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import bot.soup as Soup
import bot.entries.base.expressions as Expressions
from bot.entries.base.sanseido_entry import SanseidoEntry
class BaseEntry(SanseidoEntry):
def __init__(self, target, entry_id):
super().__init__(target, entry_id)
self.children = []
self.phrases = []
self.kanjis = []
def get_part_of_speech_tags(self):
if self._part_of_speech_tags is not None:
return self._part_of_speech_tags
self._part_of_speech_tags = []
soup = self.get_page_soup()
headword_info = soup.find("見出要素")
if headword_info is None:
return self._part_of_speech_tags
for tag in headword_info.find_all("品詞M"):
if tag.text not in self._part_of_speech_tags:
self._part_of_speech_tags.append(tag.text)
return self._part_of_speech_tags
def _find_reading(self, soup):
midasi_kana = soup.find("見出仮名")
reading = midasi_kana.text
for x in [" ", ""]:
reading = reading.replace(x, "")
return reading
def _find_expressions(self, soup):
clean_expressions = []
for expression in soup.find_all("標準表記"):
clean_expression = self._clean_expression(expression.text)
clean_expressions.append(clean_expression)
expressions = Expressions.expand_abbreviation_list(clean_expressions)
return expressions
def _get_subentry_parameters(self):
from bot.entries.smk8.child_entry import ChildEntry
from bot.entries.smk8.phrase_entry import PhraseEntry
from bot.entries.smk8.kanji_entry import KanjiEntry
subentry_parameters = [
[ChildEntry, ["子項目F", "子項目"], self.children],
[PhraseEntry, ["句項目F", "句項目"], self.phrases],
[KanjiEntry, ["造語成分項目"], self.kanjis],
]
return subentry_parameters
@staticmethod
def _delete_unused_nodes(soup):
"""Remove extra markup elements that appear in the entry
headword line which are not part of the entry headword"""
unused_nodes = [
"表音表記", "表外音訓マーク", "表外字マーク", "ルビG"
]
for name in unused_nodes:
Soup.delete_soup_nodes(soup, name)
@staticmethod
def _clean_expression(expression):
for x in ["", "", "", "", "", " "]:
expression = expression.replace(x, "")
return expression
@staticmethod
def _fill_alts(soup):
for elm in soup.find_all(["親見出仮名", "親見出表記"]):
elm.string = elm.attrs["alt"]
for gaiji in soup.find_all("外字"):
gaiji.string = gaiji.img.attrs["alt"]