Add entry and term factories
This commit is contained in:
parent
3d795ab49f
commit
6dbc8b90ce
|
@ -1,28 +1,23 @@
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import bot.scraper as Scraper
|
import bot.scraper as Scraper
|
||||||
|
from bot.entries.factory import new_entry
|
||||||
from bot.entries.jitenon import JitenonKokugoEntry
|
from bot.yomichan.exporters.factory import new_exporter
|
||||||
from bot.entries.jitenon import JitenonKotowazaEntry
|
|
||||||
from bot.entries.jitenon import JitenonYojiEntry
|
|
||||||
from bot.entries.smk8 import Smk8Entry
|
|
||||||
from bot.entries.daijirin2 import Daijirin2Entry
|
|
||||||
|
|
||||||
from bot.yomichan.export import JitenonKokugoExporter
|
|
||||||
from bot.yomichan.export import JitenonKotowazaExporter
|
|
||||||
from bot.yomichan.export import JitenonYojiExporter
|
|
||||||
from bot.yomichan.export import Smk8Exporter
|
|
||||||
from bot.yomichan.export import Daijirin2Exporter
|
|
||||||
|
|
||||||
|
|
||||||
class _Crawler():
|
class Crawler(ABC):
|
||||||
def __init__(self, args):
|
def __init__(self, target):
|
||||||
self._page_dir = args.page_dir
|
self._target = target
|
||||||
self._image_dir = args.image_dir
|
|
||||||
self._page_map = {}
|
self._page_map = {}
|
||||||
self._entries = []
|
self._entries = []
|
||||||
|
self._page_id_pattern = None
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def collect_pages(self, page_dir):
|
||||||
|
pass
|
||||||
|
|
||||||
def read_pages(self):
|
def read_pages(self):
|
||||||
pages_len = len(self._page_map)
|
pages_len = len(self._page_map)
|
||||||
|
@ -30,19 +25,20 @@ class _Crawler():
|
||||||
for idx, (page_id, page_path) in enumerate(items):
|
for idx, (page_id, page_path) in enumerate(items):
|
||||||
update = f"Reading page {idx+1}/{pages_len}"
|
update = f"Reading page {idx+1}/{pages_len}"
|
||||||
print(update, end='\r', flush=True)
|
print(update, end='\r', flush=True)
|
||||||
entry = self._entry_class(page_id)
|
entry = new_entry(self._target, page_id)
|
||||||
with open(page_path, "r", encoding="utf-8") as f:
|
with open(page_path, "r", encoding="utf-8") as f:
|
||||||
page = f.read()
|
page = f.read()
|
||||||
entry.set_page(page)
|
entry.set_page(page)
|
||||||
self._entries.append(entry)
|
self._entries.append(entry)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
def make_yomichan_dictionary(self):
|
def make_yomichan_dictionary(self, image_dir):
|
||||||
self._yomi_exporter.export(self._entries, self._image_dir)
|
exporter = new_exporter(self._target)
|
||||||
|
exporter.export(self._entries, image_dir)
|
||||||
|
|
||||||
def _parse_page_id(self, page_link):
|
def _parse_page_id(self, page_link):
|
||||||
m = re.search(self._page_id_pattern, page_link)
|
m = re.search(self._page_id_pattern, page_link)
|
||||||
if not m:
|
if m is None:
|
||||||
return None
|
return None
|
||||||
page_id = int(m.group(1))
|
page_id = int(m.group(1))
|
||||||
if page_id in self._page_map:
|
if page_id in self._page_map:
|
||||||
|
@ -50,15 +46,13 @@ class _Crawler():
|
||||||
return page_id
|
return page_id
|
||||||
|
|
||||||
|
|
||||||
class JitenonKokugoCrawler(_Crawler):
|
class JitenonKokugoCrawler(Crawler):
|
||||||
def __init__(self, args):
|
def __init__(self, target):
|
||||||
super().__init__(args)
|
super().__init__(target)
|
||||||
self._entry_class = JitenonKokugoEntry
|
|
||||||
self._yomi_exporter = JitenonKokugoExporter(args.target)
|
|
||||||
self._gojuon_url = "https://kokugo.jitenon.jp/cat/gojuonindex.php"
|
self._gojuon_url = "https://kokugo.jitenon.jp/cat/gojuonindex.php"
|
||||||
self._page_id_pattern = r"word/p([0-9]+)$"
|
self._page_id_pattern = r"word/p([0-9]+)$"
|
||||||
|
|
||||||
def collect_pages(self):
|
def collect_pages(self, page_dir):
|
||||||
jitenon = Scraper.Jitenon()
|
jitenon = Scraper.Jitenon()
|
||||||
gojuon_doc, _ = jitenon.scrape(self._gojuon_url)
|
gojuon_doc, _ = jitenon.scrape(self._gojuon_url)
|
||||||
gojuon_soup = BeautifulSoup(gojuon_doc, features="html.parser")
|
gojuon_soup = BeautifulSoup(gojuon_doc, features="html.parser")
|
||||||
|
@ -85,11 +79,12 @@ class JitenonKokugoCrawler(_Crawler):
|
||||||
print(f"Finished scraping {pages_len} pages")
|
print(f"Finished scraping {pages_len} pages")
|
||||||
|
|
||||||
|
|
||||||
class _JitenonCrawler(_Crawler):
|
class _JitenonCrawler(Crawler):
|
||||||
def __init__(self, args):
|
def __init__(self, target):
|
||||||
super().__init__(args)
|
super().__init__(target)
|
||||||
|
self._gojuon_url = None
|
||||||
|
|
||||||
def collect_pages(self):
|
def collect_pages(self, page_dir):
|
||||||
print("Scraping jitenon.jp")
|
print("Scraping jitenon.jp")
|
||||||
jitenon = Scraper.Jitenon()
|
jitenon = Scraper.Jitenon()
|
||||||
gojuon_doc, _ = jitenon.scrape(self._gojuon_url)
|
gojuon_doc, _ = jitenon.scrape(self._gojuon_url)
|
||||||
|
@ -110,49 +105,41 @@ class _JitenonCrawler(_Crawler):
|
||||||
|
|
||||||
|
|
||||||
class JitenonYojiCrawler(_JitenonCrawler):
|
class JitenonYojiCrawler(_JitenonCrawler):
|
||||||
def __init__(self, args):
|
def __init__(self, target):
|
||||||
super().__init__(args)
|
super().__init__(target)
|
||||||
self._entry_class = JitenonYojiEntry
|
|
||||||
self._yomi_exporter = JitenonYojiExporter(args.target)
|
|
||||||
self._gojuon_url = "https://yoji.jitenon.jp/cat/gojuon.html"
|
self._gojuon_url = "https://yoji.jitenon.jp/cat/gojuon.html"
|
||||||
self._page_id_pattern = r"([0-9]+)\.html$"
|
self._page_id_pattern = r"([0-9]+)\.html$"
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaCrawler(_JitenonCrawler):
|
class JitenonKotowazaCrawler(_JitenonCrawler):
|
||||||
def __init__(self, args):
|
def __init__(self, target):
|
||||||
super().__init__(args)
|
super().__init__(target)
|
||||||
self._entry_class = JitenonKotowazaEntry
|
|
||||||
self._yomi_exporter = JitenonKotowazaExporter(args.target)
|
|
||||||
self._gojuon_url = "https://kotowaza.jitenon.jp/cat/gojuon.php"
|
self._gojuon_url = "https://kotowaza.jitenon.jp/cat/gojuon.php"
|
||||||
self._page_id_pattern = r"([0-9]+)\.php$"
|
self._page_id_pattern = r"([0-9]+)\.php$"
|
||||||
|
|
||||||
|
|
||||||
class _MonokakidoCrawler(_Crawler):
|
class _MonokakidoCrawler(Crawler):
|
||||||
def __init__(self, args):
|
def __init__(self, target):
|
||||||
super().__init__(args)
|
super().__init__(target)
|
||||||
self._page_id_pattern = r"^([0-9]+)\.xml$"
|
self._page_id_pattern = r"^([0-9]+)\.xml$"
|
||||||
|
|
||||||
def collect_pages(self):
|
def collect_pages(self, page_dir):
|
||||||
print(f"Searching for page files in `{self._page_dir}`")
|
print(f"Searching for page files in `{page_dir}`")
|
||||||
for pagefile in os.listdir(self._page_dir):
|
for pagefile in os.listdir(page_dir):
|
||||||
page_id = self._parse_page_id(pagefile)
|
page_id = self._parse_page_id(pagefile)
|
||||||
if page_id is None or page_id == 0:
|
if page_id is None or page_id == 0:
|
||||||
continue
|
continue
|
||||||
path = os.path.join(self._page_dir, pagefile)
|
path = os.path.join(page_dir, pagefile)
|
||||||
self._page_map[page_id] = path
|
self._page_map[page_id] = path
|
||||||
pages_len = len(self._page_map)
|
pages_len = len(self._page_map)
|
||||||
print(f"Found {pages_len} page files for processing")
|
print(f"Found {pages_len} page files for processing")
|
||||||
|
|
||||||
|
|
||||||
class Smk8Crawler(_MonokakidoCrawler):
|
class Smk8Crawler(_MonokakidoCrawler):
|
||||||
def __init__(self, args):
|
def __init__(self, target):
|
||||||
super().__init__(args)
|
super().__init__(target)
|
||||||
self._entry_class = Smk8Entry
|
|
||||||
self._yomi_exporter = Smk8Exporter(args.target)
|
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2Crawler(_MonokakidoCrawler):
|
class Daijirin2Crawler(_MonokakidoCrawler):
|
||||||
def __init__(self, args):
|
def __init__(self, target):
|
||||||
super().__init__(args)
|
super().__init__(target)
|
||||||
self._entry_class = Daijirin2Entry
|
|
||||||
self._yomi_exporter = Daijirin2Exporter(args.target)
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ from bot.crawlers.crawlers import Smk8Crawler
|
||||||
from bot.crawlers.crawlers import Daijirin2Crawler
|
from bot.crawlers.crawlers import Daijirin2Crawler
|
||||||
|
|
||||||
|
|
||||||
def new_crawler(target, args):
|
def new_crawler(target):
|
||||||
crawler_map = {
|
crawler_map = {
|
||||||
Targets.JITENON_KOKUGO: JitenonKokugoCrawler,
|
Targets.JITENON_KOKUGO: JitenonKokugoCrawler,
|
||||||
Targets.JITENON_YOJI: JitenonYojiCrawler,
|
Targets.JITENON_YOJI: JitenonYojiCrawler,
|
||||||
|
@ -15,4 +15,4 @@ def new_crawler(target, args):
|
||||||
Targets.SMK8: Smk8Crawler,
|
Targets.SMK8: Smk8Crawler,
|
||||||
Targets.DAIJIRIN2: Daijirin2Crawler,
|
Targets.DAIJIRIN2: Daijirin2Crawler,
|
||||||
}
|
}
|
||||||
return crawler_map[target](args)
|
return crawler_map[target](target)
|
||||||
|
|
18
bot/entries/factory.py
Normal file
18
bot/entries/factory.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from bot.targets import Targets
|
||||||
|
|
||||||
|
from bot.entries.jitenon import JitenonKokugoEntry
|
||||||
|
from bot.entries.jitenon import JitenonYojiEntry
|
||||||
|
from bot.entries.jitenon import JitenonKotowazaEntry
|
||||||
|
from bot.entries.smk8 import Smk8Entry
|
||||||
|
from bot.entries.daijirin2 import Daijirin2Entry
|
||||||
|
|
||||||
|
|
||||||
|
def new_entry(target, page_id):
|
||||||
|
entry_map = {
|
||||||
|
Targets.JITENON_KOKUGO: JitenonKokugoEntry,
|
||||||
|
Targets.JITENON_YOJI: JitenonYojiEntry,
|
||||||
|
Targets.JITENON_KOTOWAZA: JitenonKotowazaEntry,
|
||||||
|
Targets.SMK8: Smk8Entry,
|
||||||
|
Targets.DAIJIRIN2: Daijirin2Entry,
|
||||||
|
}
|
||||||
|
return entry_map[target](page_id)
|
|
@ -6,27 +6,23 @@ from datetime import datetime
|
||||||
from platformdirs import user_documents_dir, user_cache_dir
|
from platformdirs import user_documents_dir, user_cache_dir
|
||||||
|
|
||||||
from bot.data import load_yomichan_metadata
|
from bot.data import load_yomichan_metadata
|
||||||
|
from bot.yomichan.terms.factory import new_terminator
|
||||||
from bot.yomichan.terms.jitenon import JitenonKokugoTerminator
|
|
||||||
from bot.yomichan.terms.jitenon import JitenonYojiTerminator
|
|
||||||
from bot.yomichan.terms.jitenon import JitenonKotowazaTerminator
|
|
||||||
from bot.yomichan.terms.smk8 import Smk8Terminator
|
|
||||||
from bot.yomichan.terms.daijirin2 import Daijirin2Terminator
|
|
||||||
|
|
||||||
|
|
||||||
class Exporter:
|
class Exporter:
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
self._name = name
|
self._target = target
|
||||||
|
self._terminator = new_terminator(target)
|
||||||
self._build_dir = None
|
self._build_dir = None
|
||||||
self._terms_per_file = 2000
|
self._terms_per_file = 2000
|
||||||
|
|
||||||
def export(self, entries, image_dir):
|
def export(self, entries, image_dir):
|
||||||
self.__init_build_image_dir(image_dir)
|
self.__init_build_image_dir(image_dir)
|
||||||
meta = load_yomichan_metadata()
|
meta = load_yomichan_metadata()
|
||||||
index = meta[self._name]["index"]
|
index = meta[self._target.value]["index"]
|
||||||
index["revision"] = self._get_revision(entries)
|
index["revision"] = self._get_revision(entries)
|
||||||
index["attribution"] = self._get_attribution(entries)
|
index["attribution"] = self._get_attribution(entries)
|
||||||
tags = meta[self._name]["tags"]
|
tags = meta[self._target.value]["tags"]
|
||||||
terms = self.__get_terms(entries)
|
terms = self.__get_terms(entries)
|
||||||
self.__make_dictionary(terms, index, tags)
|
self.__make_dictionary(terms, index, tags)
|
||||||
|
|
||||||
|
@ -43,7 +39,7 @@ class Exporter:
|
||||||
|
|
||||||
def __init_build_image_dir(self, image_dir):
|
def __init_build_image_dir(self, image_dir):
|
||||||
build_dir = self._get_build_dir()
|
build_dir = self._get_build_dir()
|
||||||
build_img_dir = os.path.join(build_dir, self._name)
|
build_img_dir = os.path.join(build_dir, self._target.value)
|
||||||
if image_dir is not None:
|
if image_dir is not None:
|
||||||
print("Copying image files to build directory...")
|
print("Copying image files to build directory...")
|
||||||
shutil.copytree(image_dir, build_img_dir)
|
shutil.copytree(image_dir, build_img_dir)
|
||||||
|
@ -115,15 +111,15 @@ class Exporter:
|
||||||
|
|
||||||
|
|
||||||
class JitenonExporter(Exporter):
|
class JitenonExporter(Exporter):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
|
|
||||||
def _get_revision(self, entries):
|
def _get_revision(self, entries):
|
||||||
modified_date = None
|
modified_date = None
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
if modified_date is None or entry.modified_date > modified_date:
|
if modified_date is None or entry.modified_date > modified_date:
|
||||||
modified_date = entry.modified_date
|
modified_date = entry.modified_date
|
||||||
revision = f"{self._name};{modified_date}"
|
revision = f"{self._target.value};{modified_date}"
|
||||||
return revision
|
return revision
|
||||||
|
|
||||||
def _get_attribution(self, entries):
|
def _get_attribution(self, entries):
|
||||||
|
@ -135,44 +131,39 @@ class JitenonExporter(Exporter):
|
||||||
|
|
||||||
|
|
||||||
class JitenonKokugoExporter(JitenonExporter):
|
class JitenonKokugoExporter(JitenonExporter):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
self._terminator = JitenonKokugoTerminator(name)
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonYojiExporter(JitenonExporter):
|
class JitenonYojiExporter(JitenonExporter):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
self._terminator = JitenonYojiTerminator(name)
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaExporter(JitenonExporter):
|
class JitenonKotowazaExporter(JitenonExporter):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
self._terminator = JitenonKotowazaTerminator(name)
|
|
||||||
|
|
||||||
|
|
||||||
class Smk8Exporter(Exporter):
|
class Smk8Exporter(Exporter):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
self._terminator = Smk8Terminator(name)
|
|
||||||
|
|
||||||
def _get_revision(self, entries):
|
def _get_revision(self, entries):
|
||||||
timestamp = datetime.now().strftime("%Y-%m-%d")
|
timestamp = datetime.now().strftime("%Y-%m-%d")
|
||||||
return f"{self._name};{timestamp}"
|
return f"{self._target.value};{timestamp}"
|
||||||
|
|
||||||
def _get_attribution(self, entries):
|
def _get_attribution(self, entries):
|
||||||
return "© Sanseido Co., LTD. 2020"
|
return "© Sanseido Co., LTD. 2020"
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2Exporter(Exporter):
|
class Daijirin2Exporter(Exporter):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
self._terminator = Daijirin2Terminator(name)
|
|
||||||
|
|
||||||
def _get_revision(self, entries):
|
def _get_revision(self, entries):
|
||||||
timestamp = datetime.now().strftime("%Y-%m-%d")
|
timestamp = datetime.now().strftime("%Y-%m-%d")
|
||||||
return f"{self._name};{timestamp}"
|
return f"{self._target.value};{timestamp}"
|
||||||
|
|
||||||
def _get_attribution(self, entries):
|
def _get_attribution(self, entries):
|
||||||
return "© Sanseido Co., LTD. 2019"
|
return "© Sanseido Co., LTD. 2019"
|
18
bot/yomichan/exporters/factory.py
Normal file
18
bot/yomichan/exporters/factory.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from bot.targets import Targets
|
||||||
|
|
||||||
|
from bot.yomichan.exporters.export import JitenonKokugoExporter
|
||||||
|
from bot.yomichan.exporters.export import JitenonYojiExporter
|
||||||
|
from bot.yomichan.exporters.export import JitenonKotowazaExporter
|
||||||
|
from bot.yomichan.exporters.export import Smk8Exporter
|
||||||
|
from bot.yomichan.exporters.export import Daijirin2Exporter
|
||||||
|
|
||||||
|
|
||||||
|
def new_exporter(target):
|
||||||
|
exporter_map = {
|
||||||
|
Targets.JITENON_KOKUGO: JitenonKokugoExporter,
|
||||||
|
Targets.JITENON_YOJI: JitenonYojiExporter,
|
||||||
|
Targets.JITENON_KOTOWAZA: JitenonKotowazaExporter,
|
||||||
|
Targets.SMK8: Smk8Exporter,
|
||||||
|
Targets.DAIJIRIN2: Daijirin2Exporter,
|
||||||
|
}
|
||||||
|
return exporter_map[target](target)
|
|
@ -1,5 +1,3 @@
|
||||||
from bot.data import load_yomichan_inflection_categories
|
|
||||||
|
|
||||||
from bot.entries.daijirin2 import Daijirin2PhraseEntry as PhraseEntry
|
from bot.entries.daijirin2 import Daijirin2PhraseEntry as PhraseEntry
|
||||||
|
|
||||||
from bot.yomichan.terms.terminator import Terminator
|
from bot.yomichan.terms.terminator import Terminator
|
||||||
|
@ -8,10 +6,8 @@ from bot.yomichan.grammar import sudachi_rules, tags_to_rules
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2Terminator(Terminator):
|
class Daijirin2Terminator(Terminator):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
categories = load_yomichan_inflection_categories()
|
|
||||||
self._inflection_categories = categories[name]
|
|
||||||
|
|
||||||
def _definition_tags(self, entry):
|
def _definition_tags(self, entry):
|
||||||
return ""
|
return ""
|
||||||
|
|
18
bot/yomichan/terms/factory.py
Normal file
18
bot/yomichan/terms/factory.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from bot.targets import Targets
|
||||||
|
|
||||||
|
from bot.yomichan.terms.jitenon import JitenonKokugoTerminator
|
||||||
|
from bot.yomichan.terms.jitenon import JitenonYojiTerminator
|
||||||
|
from bot.yomichan.terms.jitenon import JitenonKotowazaTerminator
|
||||||
|
from bot.yomichan.terms.smk8 import Smk8Terminator
|
||||||
|
from bot.yomichan.terms.daijirin2 import Daijirin2Terminator
|
||||||
|
|
||||||
|
|
||||||
|
def new_terminator(target):
|
||||||
|
terminator_map = {
|
||||||
|
Targets.JITENON_KOKUGO: JitenonKokugoTerminator,
|
||||||
|
Targets.JITENON_YOJI: JitenonYojiTerminator,
|
||||||
|
Targets.JITENON_KOTOWAZA: JitenonKotowazaTerminator,
|
||||||
|
Targets.SMK8: Smk8Terminator,
|
||||||
|
Targets.DAIJIRIN2: Daijirin2Terminator,
|
||||||
|
}
|
||||||
|
return terminator_map[target](target)
|
|
@ -7,8 +7,8 @@ from bot.yomichan.glossary.jitenon import JitenonKotowazaGlossary
|
||||||
|
|
||||||
|
|
||||||
class JitenonTerminator(Terminator):
|
class JitenonTerminator(Terminator):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
|
|
||||||
def _definition_tags(self, entry):
|
def _definition_tags(self, entry):
|
||||||
return None
|
return None
|
||||||
|
@ -31,8 +31,8 @@ class JitenonTerminator(Terminator):
|
||||||
|
|
||||||
|
|
||||||
class JitenonKokugoTerminator(JitenonTerminator):
|
class JitenonKokugoTerminator(JitenonTerminator):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
self._glossary_maker = JitenonKokugoGlossary()
|
self._glossary_maker = JitenonKokugoGlossary()
|
||||||
|
|
||||||
def _inflection_rules(self, entry, expression):
|
def _inflection_rules(self, entry, expression):
|
||||||
|
@ -43,8 +43,8 @@ class JitenonKokugoTerminator(JitenonTerminator):
|
||||||
|
|
||||||
|
|
||||||
class JitenonYojiTerminator(JitenonTerminator):
|
class JitenonYojiTerminator(JitenonTerminator):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
self._glossary_maker = JitenonYojiGlossary()
|
self._glossary_maker = JitenonYojiGlossary()
|
||||||
|
|
||||||
def _inflection_rules(self, entry, expression):
|
def _inflection_rules(self, entry, expression):
|
||||||
|
@ -56,8 +56,8 @@ class JitenonYojiTerminator(JitenonTerminator):
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaTerminator(JitenonTerminator):
|
class JitenonKotowazaTerminator(JitenonTerminator):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
self._glossary_maker = JitenonKotowazaGlossary()
|
self._glossary_maker = JitenonKotowazaGlossary()
|
||||||
|
|
||||||
def _inflection_rules(self, entry, expression):
|
def _inflection_rules(self, entry, expression):
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
from bot.data import load_yomichan_inflection_categories
|
|
||||||
|
|
||||||
from bot.entries.smk8 import Smk8KanjiEntry as KanjiEntry
|
from bot.entries.smk8 import Smk8KanjiEntry as KanjiEntry
|
||||||
from bot.entries.smk8 import Smk8PhraseEntry as PhraseEntry
|
from bot.entries.smk8 import Smk8PhraseEntry as PhraseEntry
|
||||||
|
|
||||||
|
@ -9,10 +7,8 @@ from bot.yomichan.grammar import sudachi_rules, tags_to_rules
|
||||||
|
|
||||||
|
|
||||||
class Smk8Terminator(Terminator):
|
class Smk8Terminator(Terminator):
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
super().__init__(name)
|
super().__init__(target)
|
||||||
categories = load_yomichan_inflection_categories()
|
|
||||||
self._inflection_categories = categories[name]
|
|
||||||
|
|
||||||
def _definition_tags(self, entry):
|
def _definition_tags(self, entry):
|
||||||
if isinstance(entry, KanjiEntry):
|
if isinstance(entry, KanjiEntry):
|
||||||
|
|
|
@ -1,8 +1,13 @@
|
||||||
|
from bot.data import load_yomichan_inflection_categories
|
||||||
|
|
||||||
|
|
||||||
class Terminator:
|
class Terminator:
|
||||||
def __init__(self, name):
|
def __init__(self, target):
|
||||||
self._name = name
|
self._target = target
|
||||||
self._glossary_cache = {}
|
self._glossary_cache = {}
|
||||||
self._image_dir = None
|
self._image_dir = None
|
||||||
|
categories = load_yomichan_inflection_categories()
|
||||||
|
self._inflection_categories = categories[target.value]
|
||||||
|
|
||||||
def set_image_dir(self, image_dir):
|
def set_image_dir(self, image_dir):
|
||||||
self._image_dir = image_dir
|
self._image_dir = image_dir
|
||||||
|
|
|
@ -7,6 +7,9 @@
|
||||||
"kahen": ["カ行変格"],
|
"kahen": ["カ行変格"],
|
||||||
"sudachi": []
|
"sudachi": []
|
||||||
},
|
},
|
||||||
|
"jitenon-kokugo": {},
|
||||||
|
"jitenon-yoji": {},
|
||||||
|
"jitenon-kotowaza": {},
|
||||||
"smk8": {
|
"smk8": {
|
||||||
"sahen": ["サ", "サ変型"],
|
"sahen": ["サ", "サ変型"],
|
||||||
"godan": ["上二", "下二", "四", "五", "上二型", "下二型", "四段型", "五型", "特殊型"],
|
"godan": ["上二", "下二", "四", "五", "上二型", "下二型", "四段型", "五型", "特殊型"],
|
||||||
|
|
|
@ -59,10 +59,10 @@ def main():
|
||||||
target_names = [x.value for x in Targets]
|
target_names = [x.value for x in Targets]
|
||||||
args = parse_args(target_names)
|
args = parse_args(target_names)
|
||||||
selected_target = Targets(args.target)
|
selected_target = Targets(args.target)
|
||||||
crawler = new_crawler(selected_target, args)
|
crawler = new_crawler(selected_target)
|
||||||
crawler.collect_pages()
|
crawler.collect_pages(args.page_dir)
|
||||||
crawler.read_pages()
|
crawler.read_pages()
|
||||||
crawler.make_yomichan_dictionary()
|
crawler.make_yomichan_dictionary(args.image_dir)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
Loading…
Reference in a new issue