jitenbot/bot/entries/smk8/base_entry.py

74 lines
2.7 KiB
Python
Raw Normal View History

import bot.soup as Soup
import bot.entries.base.expressions as Expressions
from bot.entries.base.sanseido_entry import SanseidoEntry
class BaseEntry(SanseidoEntry):
def __init__(self, target, entry_id):
super().__init__(target, entry_id)
self.children = []
self.phrases = []
self.kanjis = []
def get_part_of_speech_tags(self):
if self._part_of_speech_tags is not None:
return self._part_of_speech_tags
self._part_of_speech_tags = []
soup = self.get_page_soup()
headword_info = soup.find("見出要素")
if headword_info is None:
return self._part_of_speech_tags
for tag in headword_info.find_all("品詞M"):
if tag.text not in self._part_of_speech_tags:
self._part_of_speech_tags.append(tag.text)
return self._part_of_speech_tags
def _find_reading(self, soup):
midasi_kana = soup.find("見出仮名")
reading = midasi_kana.text
for x in [" ", ""]:
reading = reading.replace(x, "")
return reading
def _find_expressions(self, soup):
clean_expressions = []
for expression in soup.find_all("標準表記"):
clean_expression = self._clean_expression(expression.text)
clean_expressions.append(clean_expression)
expressions = Expressions.expand_abbreviation_list(clean_expressions)
return expressions
def _get_subentry_parameters(self):
from bot.entries.smk8.child_entry import ChildEntry
from bot.entries.smk8.phrase_entry import PhraseEntry
from bot.entries.smk8.kanji_entry import KanjiEntry
subentry_parameters = [
[ChildEntry, ["子項目F", "子項目"], self.children],
[PhraseEntry, ["句項目F", "句項目"], self.phrases],
[KanjiEntry, ["造語成分項目"], self.kanjis],
]
return subentry_parameters
@staticmethod
def _delete_unused_nodes(soup):
"""Remove extra markup elements that appear in the entry
headword line which are not part of the entry headword"""
unused_nodes = [
"表音表記", "表外音訓マーク", "表外字マーク", "ルビG"
]
for name in unused_nodes:
Soup.delete_soup_nodes(soup, name)
@staticmethod
def _clean_expression(expression):
for x in ["", "", "", "", "", " "]:
expression = expression.replace(x, "")
return expression
@staticmethod
def _fill_alts(soup):
for elm in soup.find_all(["親見出仮名", "親見出表記"]):
elm.string = elm.attrs["alt"]
for gaiji in soup.find_all("外字"):
gaiji.string = gaiji.img.attrs["alt"]