Organize yomichan term creation logic into separate classes
This commit is contained in:
parent
7d7e32ba45
commit
13f07c9000
|
@ -3,10 +3,10 @@ from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import bot.scraper as Scraper
|
import bot.scraper as Scraper
|
||||||
|
|
||||||
from bot.entries.jitenon_kotowaza import JitenonKotowazaEntry
|
from bot.entries.jitenon import JitenonKotowazaEntry
|
||||||
from bot.yomichan.export import JitenonKotowazaExporter
|
from bot.yomichan.export import JitenonKotowazaExporter
|
||||||
|
|
||||||
from bot.entries.jitenon_yoji import JitenonYojiEntry
|
from bot.entries.jitenon import JitenonYojiEntry
|
||||||
from bot.yomichan.export import JitenonYojiExporter
|
from bot.yomichan.export import JitenonYojiExporter
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,14 +15,14 @@ class Crawler():
|
||||||
self._crawl_map = {}
|
self._crawl_map = {}
|
||||||
self.__entries = []
|
self.__entries = []
|
||||||
|
|
||||||
def make_entries(self):
|
def read_entries(self):
|
||||||
entries_len = len(self._crawl_map)
|
entries_len = len(self._crawl_map)
|
||||||
items = self._crawl_map.items()
|
items = self._crawl_map.items()
|
||||||
for idx, (entry_id, entry_path) in enumerate(items):
|
for idx, (entry_id, entry_path) in enumerate(items):
|
||||||
update = f"Reading entry {idx+1}/{entries_len}"
|
update = f"Reading entry {idx+1}/{entries_len}"
|
||||||
print(update, end='\r', flush=True)
|
print(update, end='\r', flush=True)
|
||||||
entry = self._entry_class(entry_id)
|
entry = self._entry_class(entry_id)
|
||||||
entry.add_document(entry_path)
|
entry.set_markup(entry_path)
|
||||||
self.__entries.append(entry)
|
self.__entries.append(entry)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
|
|
@ -2,35 +2,52 @@ import re
|
||||||
from datetime import datetime, date
|
from datetime import datetime, date
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import bot.yomichan.html_gloss as YomichanGloss
|
|
||||||
import bot.util as Util
|
import bot.util as Util
|
||||||
|
|
||||||
|
|
||||||
class JitenonEntry:
|
class JitenonEntry:
|
||||||
def __init__(self, sequence):
|
def __init__(self, entry_id):
|
||||||
self.sequence = sequence
|
self.entry_id = entry_id
|
||||||
self.yomichan_glossary = [""]
|
self.markup = ""
|
||||||
self.modified_date = date(1970, 1, 1)
|
self.modified_date = date(1970, 1, 1)
|
||||||
self.attribution = ""
|
self.attribution = ""
|
||||||
for column in self.columns.values():
|
for column in self.COLUMNS.values():
|
||||||
setattr(self, column[0], column[1])
|
setattr(self, column[0], column[1])
|
||||||
|
self._headwords = None
|
||||||
|
|
||||||
def add_document(self, path):
|
def set_markup(self, path):
|
||||||
with open(path, "r") as f:
|
with open(path, "r") as f:
|
||||||
html = f.read()
|
html = f.read()
|
||||||
yoji_soup = BeautifulSoup(html, features="html5lib")
|
soup = BeautifulSoup(html, features="html5lib")
|
||||||
self.__set_modified_date(html)
|
self.__set_modified_date(html)
|
||||||
self.attribution = yoji_soup.find(class_="copyright").text
|
self.attribution = soup.find(class_="copyright").text
|
||||||
table = yoji_soup.find(class_="kanjirighttb")
|
table = soup.find(class_="kanjirighttb")
|
||||||
rows = table.find("tbody").find_all("tr")
|
rows = table.find("tbody").find_all("tr")
|
||||||
colname = ""
|
colname = ""
|
||||||
for row in rows:
|
for row in rows:
|
||||||
colname = row.th.text if row.th is not None else colname
|
colname = row.th.text if row.th is not None else colname
|
||||||
colval = self.__clean(row.td.text)
|
colval = self.__clean_text(row.td.text)
|
||||||
self.__set_column(colname, colval)
|
self.__set_column(colname, colval)
|
||||||
self.__prepare_yomichan_soup(table)
|
self.markup = table.decode()
|
||||||
gloss = YomichanGloss.make_gloss(table)
|
|
||||||
self.yomichan_glossary = [gloss]
|
def get_headwords(self):
|
||||||
|
if self._headwords is not None:
|
||||||
|
return self._headwords
|
||||||
|
self._set_headwords()
|
||||||
|
return self._headwords
|
||||||
|
|
||||||
|
def _set_headwords(self):
|
||||||
|
headwords = {}
|
||||||
|
for yomikata in self.__yomikatas():
|
||||||
|
headwords[yomikata] = [self.expression]
|
||||||
|
ikei_headwords = self.__ikei_headwords()
|
||||||
|
for reading, expressions in ikei_headwords.items():
|
||||||
|
if reading not in headwords:
|
||||||
|
headwords[reading] = []
|
||||||
|
for expression in expressions:
|
||||||
|
if expression not in headwords[reading]:
|
||||||
|
headwords[reading].append(expression)
|
||||||
|
self._headwords = headwords
|
||||||
|
|
||||||
def __set_modified_date(self, html):
|
def __set_modified_date(self, html):
|
||||||
m = re.search(r"\"dateModified\": \"(\d{4}-\d{2}-\d{2})", html)
|
m = re.search(r"\"dateModified\": \"(\d{4}-\d{2}-\d{2})", html)
|
||||||
|
@ -39,15 +56,8 @@ class JitenonEntry:
|
||||||
date = datetime.strptime(m.group(1), '%Y-%m-%d').date()
|
date = datetime.strptime(m.group(1), '%Y-%m-%d').date()
|
||||||
self.modified_date = date
|
self.modified_date = date
|
||||||
|
|
||||||
def __clean(self, text):
|
|
||||||
text = text.replace("\n", "")
|
|
||||||
text = text.replace(",", "、")
|
|
||||||
text = text.replace(" ", "")
|
|
||||||
text = text.strip()
|
|
||||||
return text
|
|
||||||
|
|
||||||
def __set_column(self, colname, colval):
|
def __set_column(self, colname, colval):
|
||||||
attr_name = self.columns[colname][0]
|
attr_name = self.COLUMNS[colname][0]
|
||||||
attr_value = getattr(self, attr_name)
|
attr_value = getattr(self, attr_name)
|
||||||
if isinstance(attr_value, str):
|
if isinstance(attr_value, str):
|
||||||
setattr(self, attr_name, colval)
|
setattr(self, attr_name, colval)
|
||||||
|
@ -57,35 +67,6 @@ class JitenonEntry:
|
||||||
else:
|
else:
|
||||||
attr_value.append(colval)
|
attr_value.append(colval)
|
||||||
|
|
||||||
def __prepare_yomichan_soup(self, soup):
|
|
||||||
patterns = [
|
|
||||||
r"^(.+)([ぁ-ヿ、\s]+)$",
|
|
||||||
r"^(.+)([ぁ-ヿ、\s]+([ぁ-ヿ、\s])[ぁ-ヿ、\s]+)$"
|
|
||||||
]
|
|
||||||
for a in soup.find_all("a"):
|
|
||||||
for pattern in patterns:
|
|
||||||
m = re.search(pattern, a.text)
|
|
||||||
if m:
|
|
||||||
a['href'] = f"?query={m.group(1)}&wildcards=off"
|
|
||||||
break
|
|
||||||
for p in soup.find_all("p"):
|
|
||||||
p.name = "span"
|
|
||||||
for th in soup.find_all("th"):
|
|
||||||
th['style'] = "vertical-align: middle; text-align: center;"
|
|
||||||
|
|
||||||
def _headwords(self):
|
|
||||||
words = []
|
|
||||||
for yomikata in self.__yomikatas():
|
|
||||||
headword = [self.expression, yomikata]
|
|
||||||
if headword in words:
|
|
||||||
words.remove(headword)
|
|
||||||
words.append(headword)
|
|
||||||
for headword in self.__ikei_headwords():
|
|
||||||
if headword in words:
|
|
||||||
words.remove(headword)
|
|
||||||
words.append(headword)
|
|
||||||
return words
|
|
||||||
|
|
||||||
def __yomikatas(self):
|
def __yomikatas(self):
|
||||||
yomikata = self.yomikata
|
yomikata = self.yomikata
|
||||||
m = re.search(r"^[ぁ-ヿ、]+$", yomikata)
|
m = re.search(r"^[ぁ-ヿ、]+$", yomikata)
|
||||||
|
@ -108,22 +89,73 @@ class JitenonEntry:
|
||||||
return [""]
|
return [""]
|
||||||
|
|
||||||
def __ikei_headwords(self):
|
def __ikei_headwords(self):
|
||||||
ikei_headwords = []
|
ikei_headwords = {}
|
||||||
for val in self.ikei:
|
for val in self.ikei:
|
||||||
m = re.search(r"^([^(]+)(([ぁ-ヿ、]+))$", val)
|
m = re.search(r"^([^(]+)(([ぁ-ヿ、]+))$", val)
|
||||||
if m:
|
if not m:
|
||||||
headword = [m.group(1), m.group(2)]
|
|
||||||
ikei_headwords.append(headword)
|
|
||||||
else:
|
|
||||||
print(f"Invalid 異形 format: {val}\n{self}\n")
|
print(f"Invalid 異形 format: {val}\n{self}\n")
|
||||||
|
continue
|
||||||
|
expression = m.group(1)
|
||||||
|
reading = m.group(2)
|
||||||
|
if reading not in ikei_headwords:
|
||||||
|
ikei_headwords[reading] = []
|
||||||
|
if expression not in ikei_headwords[reading]:
|
||||||
|
ikei_headwords[reading].append(expression)
|
||||||
return ikei_headwords
|
return ikei_headwords
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def __clean_text(text):
|
||||||
|
text = text.replace("\n", "")
|
||||||
|
text = text.replace(",", "、")
|
||||||
|
text = text.replace(" ", "")
|
||||||
|
text = text.strip()
|
||||||
|
return text
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
colvals = [str(self.sequence)]
|
colvals = [str(self.entry_id)]
|
||||||
for attr in self.columns.values():
|
for attr in self.COLUMNS.values():
|
||||||
attr_val = getattr(self, attr[0])
|
attr_val = getattr(self, attr[0])
|
||||||
if isinstance(attr_val, str):
|
if isinstance(attr_val, str):
|
||||||
colvals.append(attr_val)
|
colvals.append(attr_val)
|
||||||
elif isinstance(attr_val, list):
|
elif isinstance(attr_val, list):
|
||||||
colvals.append(";".join(attr_val))
|
colvals.append(";".join(attr_val))
|
||||||
return ",".join(colvals)
|
return ",".join(colvals)
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonYojiEntry(JitenonEntry):
|
||||||
|
COLUMNS = {
|
||||||
|
"四字熟語": ["expression", ""],
|
||||||
|
"読み方": ["yomikata", ""],
|
||||||
|
"意味": ["imi", ""],
|
||||||
|
"出典": ["shutten", ""],
|
||||||
|
"漢検級": ["kankenkyuu", ""],
|
||||||
|
"場面用途": ["bamenyouto", ""],
|
||||||
|
"異形": ["ikei", []],
|
||||||
|
"類義語": ["ruigigo", []],
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, sequence):
|
||||||
|
super().__init__(sequence)
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonKotowazaEntry(JitenonEntry):
|
||||||
|
COLUMNS = {
|
||||||
|
"言葉": ["expression", ""],
|
||||||
|
"読み方": ["yomikata", ""],
|
||||||
|
"意味": ["imi", ""],
|
||||||
|
"出典": ["shutten", ""],
|
||||||
|
"例文": ["reibun", ""],
|
||||||
|
"異形": ["ikei", []],
|
||||||
|
"類句": ["ruiku", []],
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, sequence):
|
||||||
|
super().__init__(sequence)
|
||||||
|
|
||||||
|
def _set_headwords(self):
|
||||||
|
if self.expression == "金棒引き・鉄棒引き":
|
||||||
|
self._headwords = {
|
||||||
|
"かなぼうひき": ["金棒引き", "鉄棒引き"]
|
||||||
|
}
|
||||||
|
else:
|
||||||
|
super()._set_headwords()
|
||||||
|
|
|
@ -1,41 +0,0 @@
|
||||||
from bot.entries.jitenon import JitenonEntry
|
|
||||||
import bot.yomichan.grammar as Grammar
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaEntry(JitenonEntry):
|
|
||||||
columns = {
|
|
||||||
"言葉": ["expression", ""],
|
|
||||||
"読み方": ["yomikata", ""],
|
|
||||||
"意味": ["imi", ""],
|
|
||||||
"出典": ["shutten", ""],
|
|
||||||
"例文": ["reibun", ""],
|
|
||||||
"異形": ["ikei", []],
|
|
||||||
"類句": ["ruiku", []],
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, sequence):
|
|
||||||
super().__init__(sequence)
|
|
||||||
|
|
||||||
def yomichan_terms(self):
|
|
||||||
terms = []
|
|
||||||
for idx, headword in enumerate(self._headwords()):
|
|
||||||
(expression, reading) = headword
|
|
||||||
definition_tags = None
|
|
||||||
inflection_rules = Grammar.sudachi_rules(expression)
|
|
||||||
score = -idx
|
|
||||||
glossary = self.yomichan_glossary
|
|
||||||
sequence = self.sequence
|
|
||||||
term_tags = ""
|
|
||||||
term = [
|
|
||||||
expression, reading, definition_tags, inflection_rules,
|
|
||||||
score, glossary, sequence, term_tags
|
|
||||||
]
|
|
||||||
terms.append(term)
|
|
||||||
return terms
|
|
||||||
|
|
||||||
def _headwords(self):
|
|
||||||
if self.expression == "金棒引き・鉄棒引き":
|
|
||||||
return [["金棒引き", "かなぼうひき"],
|
|
||||||
["鉄棒引き", "かなぼうひき"]]
|
|
||||||
else:
|
|
||||||
return super()._headwords()
|
|
|
@ -1,38 +0,0 @@
|
||||||
from bot.entries.jitenon import JitenonEntry
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonYojiEntry(JitenonEntry):
|
|
||||||
columns = {
|
|
||||||
"四字熟語": ["expression", ""],
|
|
||||||
"読み方": ["yomikata", ""],
|
|
||||||
"意味": ["imi", ""],
|
|
||||||
"出典": ["shutten", ""],
|
|
||||||
"漢検級": ["kankenkyuu", ""],
|
|
||||||
"場面用途": ["bamenyouto", ""],
|
|
||||||
"異形": ["ikei", []],
|
|
||||||
"類義語": ["ruigigo", []],
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, sequence):
|
|
||||||
super().__init__(sequence)
|
|
||||||
|
|
||||||
def yomichan_terms(self):
|
|
||||||
terms = []
|
|
||||||
for idx, headword in enumerate(self._headwords()):
|
|
||||||
(expression, reading) = headword
|
|
||||||
definition_tags = None
|
|
||||||
inflection_rules = ""
|
|
||||||
score = -idx
|
|
||||||
glossary = self.yomichan_glossary
|
|
||||||
sequence = self.sequence
|
|
||||||
term_tags = self.__term_tags()
|
|
||||||
term = [
|
|
||||||
expression, reading, definition_tags, inflection_rules,
|
|
||||||
score, glossary, sequence, term_tags
|
|
||||||
]
|
|
||||||
terms.append(term)
|
|
||||||
return terms
|
|
||||||
|
|
||||||
def __term_tags(self):
|
|
||||||
tags = self.kankenkyuu.replace(" ", "").split("/")
|
|
||||||
return " ".join(tags)
|
|
|
@ -5,7 +5,10 @@ from pathlib import Path
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from platformdirs import user_documents_dir, user_cache_dir
|
from platformdirs import user_documents_dir, user_cache_dir
|
||||||
|
|
||||||
import bot.data as Data
|
from bot.data import yomichan_metadata
|
||||||
|
|
||||||
|
from bot.yomichan.terms.jitenon import JitenonYojiTerminator
|
||||||
|
from bot.yomichan.terms.jitenon import JitenonKotowazaTerminator
|
||||||
|
|
||||||
|
|
||||||
class Exporter:
|
class Exporter:
|
||||||
|
@ -14,7 +17,7 @@ class Exporter:
|
||||||
self._terms_per_file = 2000
|
self._terms_per_file = 2000
|
||||||
|
|
||||||
def export(self, entries):
|
def export(self, entries):
|
||||||
meta = Data.yomichan_metadata()
|
meta = yomichan_metadata()
|
||||||
index = meta[self._name]["index"]
|
index = meta[self._name]["index"]
|
||||||
index["revision"] = self._get_revision(entries)
|
index["revision"] = self._get_revision(entries)
|
||||||
index["attribution"] = self._get_attribution(entries)
|
index["attribution"] = self._get_attribution(entries)
|
||||||
|
@ -40,7 +43,8 @@ class Exporter:
|
||||||
for idx, entry in enumerate(entries):
|
for idx, entry in enumerate(entries):
|
||||||
update = f"Creating Yomichan terms for entry {idx+1}/{entries_len}"
|
update = f"Creating Yomichan terms for entry {idx+1}/{entries_len}"
|
||||||
print(update, end='\r', flush=True)
|
print(update, end='\r', flush=True)
|
||||||
for term in entry.yomichan_terms():
|
new_terms = self._terminator.make_terms(entry)
|
||||||
|
for term in new_terms:
|
||||||
terms.append(term)
|
terms.append(term)
|
||||||
print()
|
print()
|
||||||
return terms
|
return terms
|
||||||
|
@ -120,9 +124,11 @@ class JitenonYojiExporter(JitenonExporter):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self._name = "jitenon-yoji"
|
self._name = "jitenon-yoji"
|
||||||
|
self._terminator = JitenonYojiTerminator()
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaExporter(JitenonExporter):
|
class JitenonKotowazaExporter(JitenonExporter):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self._name = "jitenon-kotowaza"
|
self._name = "jitenon-kotowaza"
|
||||||
|
self._terminator = JitenonKotowazaTerminator()
|
||||||
|
|
25
bot/yomichan/glossary/jitenon.py
Normal file
25
bot/yomichan/glossary/jitenon.py
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
import re
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from bot.yomichan.glossary.gloss import make_gloss
|
||||||
|
|
||||||
|
|
||||||
|
def make_glossary(entry):
|
||||||
|
soup = BeautifulSoup(entry.markup, "html5lib")
|
||||||
|
patterns = [
|
||||||
|
r"^(.+)([ぁ-ヿ、\s]+)$",
|
||||||
|
r"^(.+)([ぁ-ヿ、\s]+([ぁ-ヿ、\s])[ぁ-ヿ、\s]+)$"
|
||||||
|
]
|
||||||
|
for a in soup.find_all("a"):
|
||||||
|
for pattern in patterns:
|
||||||
|
m = re.search(pattern, a.text)
|
||||||
|
if m:
|
||||||
|
a['href'] = f"?query={m.group(1)}&wildcards=off"
|
||||||
|
break
|
||||||
|
for p in soup.find_all("p"):
|
||||||
|
p.name = "span"
|
||||||
|
for th in soup.find_all("th"):
|
||||||
|
th['style'] = "vertical-align: middle; text-align: center;"
|
||||||
|
gloss = make_gloss(soup.table)
|
||||||
|
glossary = [gloss]
|
||||||
|
return glossary
|
50
bot/yomichan/terms/jitenon.py
Normal file
50
bot/yomichan/terms/jitenon.py
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
import bot.yomichan.grammar as Grammar
|
||||||
|
from bot.yomichan.terms.terminator import Terminator
|
||||||
|
from bot.yomichan.glossary.jitenon import make_glossary
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonTerminator(Terminator):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def _definition_tags(self, entry):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _glossary(self, entry):
|
||||||
|
if entry.entry_id in self.glossary_cache:
|
||||||
|
return self.glossary_cache[entry.entry_id]
|
||||||
|
glossary = make_glossary(entry)
|
||||||
|
self.glossary_cache[entry.entry_id] = glossary
|
||||||
|
return glossary
|
||||||
|
|
||||||
|
def _sequence(self, entry):
|
||||||
|
return entry.entry_id
|
||||||
|
|
||||||
|
def _link_glossary_parameters(self, entry):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _subentry_lists(self, entry):
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonYojiTerminator(JitenonTerminator):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def _inflection_rules(self, entry, expression):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _term_tags(self, entry):
|
||||||
|
tags = entry.kankenkyuu.replace(" ", "").split("/")
|
||||||
|
return " ".join(tags)
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonKotowazaTerminator(JitenonTerminator):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def _inflection_rules(self, entry, expression):
|
||||||
|
return Grammar.sudachi_rules(expression)
|
||||||
|
|
||||||
|
def _term_tags(self, entry):
|
||||||
|
return ""
|
54
bot/yomichan/terms/terminator.py
Normal file
54
bot/yomichan/terms/terminator.py
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
class Terminator:
|
||||||
|
def __init__(self):
|
||||||
|
self.glossary_cache = {}
|
||||||
|
|
||||||
|
def make_terms(self, entry):
|
||||||
|
terms = []
|
||||||
|
headwords = entry.get_headwords()
|
||||||
|
for reading, expressions in headwords.items():
|
||||||
|
for expression in expressions:
|
||||||
|
definition_tags = self._definition_tags(entry)
|
||||||
|
inflection_rules = self._inflection_rules(entry, expression)
|
||||||
|
score = -len(terms)
|
||||||
|
glossary = self._glossary(entry)
|
||||||
|
sequence = self._sequence(entry)
|
||||||
|
term_tags = ""
|
||||||
|
term = [
|
||||||
|
expression, reading, definition_tags, inflection_rules,
|
||||||
|
score, glossary, sequence, term_tags
|
||||||
|
]
|
||||||
|
terms.append(term)
|
||||||
|
|
||||||
|
for x in self._link_glossary_parameters(entry):
|
||||||
|
(subentries, definition_tags) = x
|
||||||
|
if len(subentries) == 0:
|
||||||
|
continue
|
||||||
|
score = -len(terms)
|
||||||
|
glossary = self.__links_glossary(subentries)
|
||||||
|
term = [
|
||||||
|
expression, reading, definition_tags, inflection_rules,
|
||||||
|
score, glossary, sequence, term_tags
|
||||||
|
]
|
||||||
|
terms.append(term)
|
||||||
|
|
||||||
|
for subentries in self._subentry_lists(entry):
|
||||||
|
for subentry in subentries:
|
||||||
|
for term in self.make_terms(subentry):
|
||||||
|
terms.append(term)
|
||||||
|
return terms
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def __links_glossary(subentries):
|
||||||
|
glossary = []
|
||||||
|
for subentry in subentries:
|
||||||
|
exp = subentry.get_first_expression()
|
||||||
|
gloss = {
|
||||||
|
"type": "structured-content",
|
||||||
|
"content": {
|
||||||
|
"tag": "a",
|
||||||
|
"href": f"?query={exp}&wildcards=off",
|
||||||
|
"content": exp,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
glossary.append(gloss)
|
||||||
|
return glossary
|
|
@ -44,7 +44,7 @@ def main():
|
||||||
crawler_class = crawlers[args.target]
|
crawler_class = crawlers[args.target]
|
||||||
crawler = crawler_class()
|
crawler = crawler_class()
|
||||||
crawler.crawl()
|
crawler.crawl()
|
||||||
crawler.make_entries()
|
crawler.read_entries()
|
||||||
crawler.make_yomichan_dictionary()
|
crawler.make_yomichan_dictionary()
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue