51 lines
1.7 KiB
Python
51 lines
1.7 KiB
Python
import bot.entries.base.expressions as Expressions
|
|
from bot.entries.daijirin2.base_entry import BaseEntry
|
|
from bot.entries.daijirin2.preprocess import preprocess_page
|
|
|
|
|
|
class Entry(BaseEntry):
|
|
def __init__(self, target, page_id):
|
|
entry_id = (page_id, 0)
|
|
super().__init__(target, entry_id)
|
|
|
|
def set_page(self, page):
|
|
page = preprocess_page(page)
|
|
super().set_page(page)
|
|
|
|
def _get_headwords(self):
|
|
soup = self.get_page_soup()
|
|
self._delete_unused_nodes(soup)
|
|
if soup.find("漢字見出") is not None:
|
|
headwords = self._get_kanji_headwords(soup)
|
|
elif soup.find("略語G") is not None:
|
|
headwords = self._get_acronym_headwords(soup)
|
|
else:
|
|
headwords = self._get_regular_headwords(soup)
|
|
return headwords
|
|
|
|
def _get_kanji_headwords(self, soup):
|
|
readings = []
|
|
for el in soup.find_all("漢字音"):
|
|
hira = Expressions.kata_to_hira(el.text)
|
|
readings.append(hira)
|
|
if soup.find("漢字音") is None:
|
|
readings.append("")
|
|
expressions = []
|
|
for el in soup.find_all("漢字見出"):
|
|
expressions.append(el.text)
|
|
headwords = {}
|
|
for reading in readings:
|
|
headwords[reading] = expressions
|
|
return headwords
|
|
|
|
def _get_acronym_headwords(self, soup):
|
|
expressions = []
|
|
for el in soup.find_all("略語"):
|
|
expression_parts = []
|
|
for part in el.find_all(["欧字", "和字"]):
|
|
expression_parts.append(part.text)
|
|
expression = "".join(expression_parts)
|
|
expressions.append(expression)
|
|
headwords = {"": expressions}
|
|
return headwords
|