From 7b2ba96db9770c6ef5e827b8dc2161d52cf3e872 Mon Sep 17 00:00:00 2001 From: stephenmk Date: Wed, 26 Jul 2023 23:48:24 -0500 Subject: [PATCH] Reorganize file structure of all other modules --- bot/crawlers/base/crawler.py | 54 ++++++ bot/crawlers/base/jitenon.py | 29 ++++ bot/crawlers/base/monokakido.py | 19 +++ bot/crawlers/crawlers.py | 158 ------------------ bot/crawlers/daijirin2.py | 5 + bot/crawlers/factory.py | 20 --- bot/crawlers/jitenon_kokugo.py | 38 +++++ bot/crawlers/jitenon_kotowaza.py | 8 + bot/crawlers/jitenon_yoji.py | 8 + bot/crawlers/sankoku8.py | 5 + bot/crawlers/scrapers/jitenon.py | 10 ++ bot/crawlers/{ => scrapers}/scraper.py | 24 ++- bot/crawlers/smk8.py | 5 + bot/entries/base/entry.py | 12 +- bot/entries/base/jitenon_entry.py | 2 +- bot/entries/base/sanseido_entry.py | 2 +- bot/entries/factory.py | 7 - bot/factory.py | 37 ++++ .../exporters/{export.py => base/exporter.py} | 66 +------- bot/mdict/exporters/base/jitenon.py | 18 ++ bot/mdict/exporters/base/monokakido.py | 8 + bot/mdict/exporters/daijirin2.py | 6 + bot/mdict/exporters/factory.py | 20 --- bot/mdict/exporters/jitenon_kokugo.py | 5 + bot/mdict/exporters/jitenon_kotowaza.py | 5 + bot/mdict/exporters/jitenon_yoji.py | 5 + bot/mdict/exporters/sankoku8.py | 6 + bot/mdict/exporters/smk8.py | 6 + bot/mdict/terms/base/jitenon.py | 20 +++ bot/mdict/terms/{ => base}/terminator.py | 8 +- bot/mdict/terms/daijirin2.py | 4 +- bot/mdict/terms/factory.py | 20 --- bot/mdict/terms/jitenon.py | 42 ----- bot/mdict/terms/jitenon_kokugo.py | 8 + bot/mdict/terms/jitenon_kotowaza.py | 8 + bot/mdict/terms/jitenon_yoji.py | 8 + bot/mdict/terms/sankoku8.py | 4 +- bot/mdict/terms/smk8.py | 4 +- .../exporters/{export.py => base/exporter.py} | 74 ++------ bot/yomichan/exporters/base/jitenon.py | 18 ++ bot/yomichan/exporters/base/monokakido.py | 8 + bot/yomichan/exporters/daijirin2.py | 6 + bot/yomichan/exporters/factory.py | 20 --- bot/yomichan/exporters/jitenon_kokugo.py | 5 + bot/yomichan/exporters/jitenon_kotowaza.py | 5 + bot/yomichan/exporters/jitenon_yoji.py | 5 + bot/yomichan/exporters/sankoku8.py | 6 + bot/yomichan/exporters/smk8.py | 6 + bot/yomichan/glossary/daijirin2.py | 3 +- bot/yomichan/terms/base/jitenon.py | 26 +++ bot/yomichan/terms/{ => base}/terminator.py | 16 +- bot/yomichan/terms/daijirin2.py | 5 +- bot/yomichan/terms/factory.py | 20 --- bot/yomichan/terms/jitenon.py | 68 -------- bot/yomichan/terms/jitenon_kokugo.py | 15 ++ bot/yomichan/terms/jitenon_kotowaza.py | 15 ++ bot/yomichan/terms/jitenon_yoji.py | 15 ++ bot/yomichan/terms/sankoku8.py | 5 +- bot/yomichan/terms/smk8.py | 5 +- jitenbot.py | 2 +- run_all.sh | 2 + 61 files changed, 517 insertions(+), 547 deletions(-) create mode 100644 bot/crawlers/base/crawler.py create mode 100644 bot/crawlers/base/jitenon.py create mode 100644 bot/crawlers/base/monokakido.py delete mode 100644 bot/crawlers/crawlers.py create mode 100644 bot/crawlers/daijirin2.py delete mode 100644 bot/crawlers/factory.py create mode 100644 bot/crawlers/jitenon_kokugo.py create mode 100644 bot/crawlers/jitenon_kotowaza.py create mode 100644 bot/crawlers/jitenon_yoji.py create mode 100644 bot/crawlers/sankoku8.py create mode 100644 bot/crawlers/scrapers/jitenon.py rename bot/crawlers/{ => scrapers}/scraper.py (93%) create mode 100644 bot/crawlers/smk8.py delete mode 100644 bot/entries/factory.py create mode 100644 bot/factory.py rename bot/mdict/exporters/{export.py => base/exporter.py} (79%) create mode 100644 bot/mdict/exporters/base/jitenon.py create mode 100644 bot/mdict/exporters/base/monokakido.py create mode 100644 bot/mdict/exporters/daijirin2.py delete mode 100644 bot/mdict/exporters/factory.py create mode 100644 bot/mdict/exporters/jitenon_kokugo.py create mode 100644 bot/mdict/exporters/jitenon_kotowaza.py create mode 100644 bot/mdict/exporters/jitenon_yoji.py create mode 100644 bot/mdict/exporters/sankoku8.py create mode 100644 bot/mdict/exporters/smk8.py create mode 100644 bot/mdict/terms/base/jitenon.py rename bot/mdict/terms/{ => base}/terminator.py (95%) delete mode 100644 bot/mdict/terms/factory.py delete mode 100644 bot/mdict/terms/jitenon.py create mode 100644 bot/mdict/terms/jitenon_kokugo.py create mode 100644 bot/mdict/terms/jitenon_kotowaza.py create mode 100644 bot/mdict/terms/jitenon_yoji.py rename bot/yomichan/exporters/{export.py => base/exporter.py} (76%) create mode 100644 bot/yomichan/exporters/base/jitenon.py create mode 100644 bot/yomichan/exporters/base/monokakido.py create mode 100644 bot/yomichan/exporters/daijirin2.py delete mode 100644 bot/yomichan/exporters/factory.py create mode 100644 bot/yomichan/exporters/jitenon_kokugo.py create mode 100644 bot/yomichan/exporters/jitenon_kotowaza.py create mode 100644 bot/yomichan/exporters/jitenon_yoji.py create mode 100644 bot/yomichan/exporters/sankoku8.py create mode 100644 bot/yomichan/exporters/smk8.py create mode 100644 bot/yomichan/terms/base/jitenon.py rename bot/yomichan/terms/{ => base}/terminator.py (91%) delete mode 100644 bot/yomichan/terms/factory.py delete mode 100644 bot/yomichan/terms/jitenon.py create mode 100644 bot/yomichan/terms/jitenon_kokugo.py create mode 100644 bot/yomichan/terms/jitenon_kotowaza.py create mode 100644 bot/yomichan/terms/jitenon_yoji.py diff --git a/bot/crawlers/base/crawler.py b/bot/crawlers/base/crawler.py new file mode 100644 index 0000000..31c3bdc --- /dev/null +++ b/bot/crawlers/base/crawler.py @@ -0,0 +1,54 @@ +import re +from abc import ABC, abstractmethod + +from bot.factory import new_entry +from bot.factory import new_yomichan_exporter +from bot.factory import new_mdict_exporter + + +class BaseCrawler(ABC): + def __init__(self, target): + self._target = target + self._page_map = {} + self._entries = [] + self._page_id_pattern = None + + @abstractmethod + def collect_pages(self, page_dir): + raise NotImplementedError + + def read_pages(self): + pages_len = len(self._page_map) + items = self._page_map.items() + for idx, (page_id, page_path) in enumerate(items): + update = f"Reading page {idx+1}/{pages_len}" + print(update, end='\r', flush=True) + entry = new_entry(self._target, page_id) + with open(page_path, "r", encoding="utf-8") as f: + page = f.read() + try: + entry.set_page(page) + except ValueError as err: + print(err) + print("Try deleting and redownloading file:") + print(f"\t{page_path}\n") + continue + self._entries.append(entry) + print() + + def make_yomichan_dictionary(self, media_dir, validate): + exporter = new_yomichan_exporter(self._target) + exporter.export(self._entries, media_dir, validate) + + def make_mdict_dictionary(self, media_dir, icon_file): + exporter = new_mdict_exporter(self._target) + exporter.export(self._entries, media_dir, icon_file) + + def _parse_page_id(self, page_link): + m = re.search(self._page_id_pattern, page_link) + if m is None: + return None + page_id = int(m.group(1)) + if page_id in self._page_map: + return None + return page_id diff --git a/bot/crawlers/base/jitenon.py b/bot/crawlers/base/jitenon.py new file mode 100644 index 0000000..ddbf3e5 --- /dev/null +++ b/bot/crawlers/base/jitenon.py @@ -0,0 +1,29 @@ +from bs4 import BeautifulSoup + +from bot.crawlers.scrapers.jitenon import Jitenon as JitenonScraper +from bot.crawlers.base.crawler import BaseCrawler + + +class JitenonCrawler(BaseCrawler): + def __init__(self, target): + super().__init__(target) + self._gojuon_url = None + + def collect_pages(self, page_dir): + print("Scraping jitenon.jp") + jitenon = JitenonScraper() + gojuon_doc, _ = jitenon.scrape(self._gojuon_url) + gojuon_soup = BeautifulSoup(gojuon_doc, features="html.parser") + for gojuon_a in gojuon_soup.select(".kana_area a", href=True): + gojuon_href = gojuon_a['href'] + kana_doc, _ = jitenon.scrape(gojuon_href) + kana_soup = BeautifulSoup(kana_doc, features="html.parser") + for kana_a in kana_soup.select(".word_box a", href=True): + page_link = kana_a['href'] + page_id = self._parse_page_id(page_link) + if page_id is None: + continue + _, page_path = jitenon.scrape(page_link) + self._page_map[page_id] = page_path + pages_len = len(self._page_map) + print(f"Finished scraping {pages_len} pages") diff --git a/bot/crawlers/base/monokakido.py b/bot/crawlers/base/monokakido.py new file mode 100644 index 0000000..057f8d4 --- /dev/null +++ b/bot/crawlers/base/monokakido.py @@ -0,0 +1,19 @@ +import os +from bot.crawlers.base.crawler import BaseCrawler + + +class MonokakidoCrawler(BaseCrawler): + def __init__(self, target): + super().__init__(target) + self._page_id_pattern = r"^([0-9]+)\.xml$" + + def collect_pages(self, page_dir): + print(f"Searching for page files in `{page_dir}`") + for pagefile in os.listdir(page_dir): + page_id = self._parse_page_id(pagefile) + if page_id is None or page_id == 0: + continue + path = os.path.join(page_dir, pagefile) + self._page_map[page_id] = path + pages_len = len(self._page_map) + print(f"Found {pages_len} page files for processing") diff --git a/bot/crawlers/crawlers.py b/bot/crawlers/crawlers.py deleted file mode 100644 index 51e0552..0000000 --- a/bot/crawlers/crawlers.py +++ /dev/null @@ -1,158 +0,0 @@ -import os -import re -from abc import ABC, abstractmethod -from bs4 import BeautifulSoup - -import bot.crawlers.scraper as Scraper -from bot.entries.factory import new_entry -from bot.yomichan.exporters.factory import new_yomi_exporter -from bot.mdict.exporters.factory import new_mdict_exporter - - -class Crawler(ABC): - def __init__(self, target): - self._target = target - self._page_map = {} - self._entries = [] - self._page_id_pattern = None - - @abstractmethod - def collect_pages(self, page_dir): - pass - - def read_pages(self): - pages_len = len(self._page_map) - items = self._page_map.items() - for idx, (page_id, page_path) in enumerate(items): - update = f"Reading page {idx+1}/{pages_len}" - print(update, end='\r', flush=True) - entry = new_entry(self._target, page_id) - with open(page_path, "r", encoding="utf-8") as f: - page = f.read() - try: - entry.set_page(page) - except ValueError as err: - print(err) - print("Try deleting and redownloading file:") - print(f"\t{page_path}\n") - continue - self._entries.append(entry) - print() - - def make_yomichan_dictionary(self, media_dir, validate): - exporter = new_yomi_exporter(self._target) - exporter.export(self._entries, media_dir, validate) - - def make_mdict_dictionary(self, media_dir, icon_file): - exporter = new_mdict_exporter(self._target) - exporter.export(self._entries, media_dir, icon_file) - - def _parse_page_id(self, page_link): - m = re.search(self._page_id_pattern, page_link) - if m is None: - return None - page_id = int(m.group(1)) - if page_id in self._page_map: - return None - return page_id - - -class JitenonKokugoCrawler(Crawler): - def __init__(self, target): - super().__init__(target) - self._gojuon_url = "https://kokugo.jitenon.jp/cat/gojuonindex.php" - self._page_id_pattern = r"word/p([0-9]+)$" - - def collect_pages(self, page_dir): - jitenon = Scraper.Jitenon() - gojuon_doc, _ = jitenon.scrape(self._gojuon_url) - gojuon_soup = BeautifulSoup(gojuon_doc, features="html.parser") - for gojuon_a in gojuon_soup.select(".kana_area a", href=True): - gojuon_href = gojuon_a['href'] - max_kana_page = 1 - current_kana_page = 1 - while current_kana_page <= max_kana_page: - kana_doc, _ = jitenon.scrape(f"{gojuon_href}&page={current_kana_page}") - current_kana_page += 1 - kana_soup = BeautifulSoup(kana_doc, features="html.parser") - page_total = kana_soup.find(class_="page_total").text - m = re.search(r"全([0-9]+)件", page_total) - if m: - max_kana_page = int(m.group(1)) - for kana_a in kana_soup.select(".word_box a", href=True): - page_link = kana_a['href'] - page_id = self._parse_page_id(page_link) - if page_id is None: - continue - _, page_path = jitenon.scrape(page_link) - self._page_map[page_id] = page_path - pages_len = len(self._page_map) - print(f"Finished scraping {pages_len} pages") - - -class _JitenonCrawler(Crawler): - def __init__(self, target): - super().__init__(target) - self._gojuon_url = None - - def collect_pages(self, page_dir): - print("Scraping jitenon.jp") - jitenon = Scraper.Jitenon() - gojuon_doc, _ = jitenon.scrape(self._gojuon_url) - gojuon_soup = BeautifulSoup(gojuon_doc, features="html.parser") - for gojuon_a in gojuon_soup.select(".kana_area a", href=True): - gojuon_href = gojuon_a['href'] - kana_doc, _ = jitenon.scrape(gojuon_href) - kana_soup = BeautifulSoup(kana_doc, features="html.parser") - for kana_a in kana_soup.select(".word_box a", href=True): - page_link = kana_a['href'] - page_id = self._parse_page_id(page_link) - if page_id is None: - continue - _, page_path = jitenon.scrape(page_link) - self._page_map[page_id] = page_path - pages_len = len(self._page_map) - print(f"Finished scraping {pages_len} pages") - - -class JitenonYojiCrawler(_JitenonCrawler): - def __init__(self, target): - super().__init__(target) - self._gojuon_url = "https://yoji.jitenon.jp/cat/gojuon.html" - self._page_id_pattern = r"([0-9]+)\.html$" - - -class JitenonKotowazaCrawler(_JitenonCrawler): - def __init__(self, target): - super().__init__(target) - self._gojuon_url = "https://kotowaza.jitenon.jp/cat/gojuon.php" - self._page_id_pattern = r"([0-9]+)\.php$" - - -class _MonokakidoCrawler(Crawler): - def __init__(self, target): - super().__init__(target) - self._page_id_pattern = r"^([0-9]+)\.xml$" - - def collect_pages(self, page_dir): - print(f"Searching for page files in `{page_dir}`") - for pagefile in os.listdir(page_dir): - page_id = self._parse_page_id(pagefile) - if page_id is None or page_id == 0: - continue - path = os.path.join(page_dir, pagefile) - self._page_map[page_id] = path - pages_len = len(self._page_map) - print(f"Found {pages_len} page files for processing") - - -class Smk8Crawler(_MonokakidoCrawler): - pass - - -class Daijirin2Crawler(_MonokakidoCrawler): - pass - - -class Sankoku8Crawler(_MonokakidoCrawler): - pass diff --git a/bot/crawlers/daijirin2.py b/bot/crawlers/daijirin2.py new file mode 100644 index 0000000..a9c711b --- /dev/null +++ b/bot/crawlers/daijirin2.py @@ -0,0 +1,5 @@ +from bot.crawlers.base.monokakido import MonokakidoCrawler + + +class Crawler(MonokakidoCrawler): + pass diff --git a/bot/crawlers/factory.py b/bot/crawlers/factory.py deleted file mode 100644 index d7450ea..0000000 --- a/bot/crawlers/factory.py +++ /dev/null @@ -1,20 +0,0 @@ -from bot.targets import Targets - -from bot.crawlers.crawlers import JitenonKokugoCrawler -from bot.crawlers.crawlers import JitenonYojiCrawler -from bot.crawlers.crawlers import JitenonKotowazaCrawler -from bot.crawlers.crawlers import Smk8Crawler -from bot.crawlers.crawlers import Daijirin2Crawler -from bot.crawlers.crawlers import Sankoku8Crawler - - -def new_crawler(target): - crawler_map = { - Targets.JITENON_KOKUGO: JitenonKokugoCrawler, - Targets.JITENON_YOJI: JitenonYojiCrawler, - Targets.JITENON_KOTOWAZA: JitenonKotowazaCrawler, - Targets.SMK8: Smk8Crawler, - Targets.DAIJIRIN2: Daijirin2Crawler, - Targets.SANKOKU8: Sankoku8Crawler, - } - return crawler_map[target](target) diff --git a/bot/crawlers/jitenon_kokugo.py b/bot/crawlers/jitenon_kokugo.py new file mode 100644 index 0000000..6d5cd66 --- /dev/null +++ b/bot/crawlers/jitenon_kokugo.py @@ -0,0 +1,38 @@ +import re +from bs4 import BeautifulSoup + +from bot.crawlers.base.crawler import BaseCrawler +from bot.crawlers.scrapers.jitenon import Jitenon as JitenonScraper + + +class Crawler(BaseCrawler): + def __init__(self, target): + super().__init__(target) + self._gojuon_url = "https://kokugo.jitenon.jp/cat/gojuonindex.php" + self._page_id_pattern = r"word/p([0-9]+)$" + + def collect_pages(self, page_dir): + jitenon = JitenonScraper() + gojuon_doc, _ = jitenon.scrape(self._gojuon_url) + gojuon_soup = BeautifulSoup(gojuon_doc, features="html.parser") + for gojuon_a in gojuon_soup.select(".kana_area a", href=True): + gojuon_href = gojuon_a['href'] + max_kana_page = 1 + current_kana_page = 1 + while current_kana_page <= max_kana_page: + kana_doc, _ = jitenon.scrape(f"{gojuon_href}&page={current_kana_page}") + current_kana_page += 1 + kana_soup = BeautifulSoup(kana_doc, features="html.parser") + page_total = kana_soup.find(class_="page_total").text + m = re.search(r"全([0-9]+)件", page_total) + if m: + max_kana_page = int(m.group(1)) + for kana_a in kana_soup.select(".word_box a", href=True): + page_link = kana_a['href'] + page_id = self._parse_page_id(page_link) + if page_id is None: + continue + _, page_path = jitenon.scrape(page_link) + self._page_map[page_id] = page_path + pages_len = len(self._page_map) + print(f"Finished scraping {pages_len} pages") diff --git a/bot/crawlers/jitenon_kotowaza.py b/bot/crawlers/jitenon_kotowaza.py new file mode 100644 index 0000000..693fa52 --- /dev/null +++ b/bot/crawlers/jitenon_kotowaza.py @@ -0,0 +1,8 @@ +from bot.crawlers.base.jitenon import JitenonCrawler + + +class Crawler(JitenonCrawler): + def __init__(self, target): + super().__init__(target) + self._gojuon_url = "https://kotowaza.jitenon.jp/cat/gojuon.php" + self._page_id_pattern = r"([0-9]+)\.php$" diff --git a/bot/crawlers/jitenon_yoji.py b/bot/crawlers/jitenon_yoji.py new file mode 100644 index 0000000..5b89875 --- /dev/null +++ b/bot/crawlers/jitenon_yoji.py @@ -0,0 +1,8 @@ +from bot.crawlers.base.jitenon import JitenonCrawler + + +class Crawler(JitenonCrawler): + def __init__(self, target): + super().__init__(target) + self._gojuon_url = "https://yoji.jitenon.jp/cat/gojuon.html" + self._page_id_pattern = r"([0-9]+)\.html$" diff --git a/bot/crawlers/sankoku8.py b/bot/crawlers/sankoku8.py new file mode 100644 index 0000000..a9c711b --- /dev/null +++ b/bot/crawlers/sankoku8.py @@ -0,0 +1,5 @@ +from bot.crawlers.base.monokakido import MonokakidoCrawler + + +class Crawler(MonokakidoCrawler): + pass diff --git a/bot/crawlers/scrapers/jitenon.py b/bot/crawlers/scrapers/jitenon.py new file mode 100644 index 0000000..e4163d9 --- /dev/null +++ b/bot/crawlers/scrapers/jitenon.py @@ -0,0 +1,10 @@ +import re +from bot.crawlers.scrapers.scraper import BaseScraper + + +class Jitenon(BaseScraper): + def _get_netloc_re(self): + domain = r"jitenon\.jp" + pattern = r"^(?:([A-Za-z0-9.\-]+)\.)?" + domain + r"$" + netloc_re = re.compile(pattern) + return netloc_re diff --git a/bot/crawlers/scraper.py b/bot/crawlers/scrapers/scraper.py similarity index 93% rename from bot/crawlers/scraper.py rename to bot/crawlers/scrapers/scraper.py index 577f602..113d090 100644 --- a/bot/crawlers/scraper.py +++ b/bot/crawlers/scrapers/scraper.py @@ -1,24 +1,24 @@ import time -import requests import re import os import hashlib from datetime import datetime -from pathlib import Path - -from platformdirs import user_cache_dir from urllib.parse import urlparse +from pathlib import Path +from abc import ABC, abstractmethod + +import requests from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry +from platformdirs import user_cache_dir from bot.data import load_config -class Scraper(): +class BaseScraper(ABC): def __init__(self): self._config = load_config() - pattern = r"^(?:([A-Za-z0-9.\-]+)\.)?" + self.domain + r"$" - self.netloc_re = re.compile(pattern) + self.netloc_re = self._get_netloc_re() self.__set_session() def scrape(self, urlstring): @@ -34,6 +34,10 @@ class Scraper(): print("Discovering cached files...", end='\r', flush=True) return html, cache_path + @abstractmethod + def _get_netloc_re(self): + raise NotImplementedError + def __set_session(self): retry_strategy = Retry( total=3, @@ -99,9 +103,3 @@ class Scraper(): self.__set_session() response = self.session.get(urlstring, timeout=10) return response.text - - -class Jitenon(Scraper): - def __init__(self): - self.domain = r"jitenon\.jp" - super().__init__() diff --git a/bot/crawlers/smk8.py b/bot/crawlers/smk8.py new file mode 100644 index 0000000..a9c711b --- /dev/null +++ b/bot/crawlers/smk8.py @@ -0,0 +1,5 @@ +from bot.crawlers.base.monokakido import MonokakidoCrawler + + +class Crawler(MonokakidoCrawler): + pass diff --git a/bot/entries/base/entry.py b/bot/entries/base/entry.py index 3811a77..60d4f16 100644 --- a/bot/entries/base/entry.py +++ b/bot/entries/base/entry.py @@ -18,15 +18,15 @@ class Entry(ABC): @abstractmethod def get_global_identifier(self): - pass + raise NotImplementedError @abstractmethod def set_page(self, page): - pass + raise NotImplementedError @abstractmethod def get_page_soup(self): - pass + raise NotImplementedError def get_headwords(self): if self._headwords is not None: @@ -38,15 +38,15 @@ class Entry(ABC): @abstractmethod def _get_headwords(self): - pass + raise NotImplementedError @abstractmethod def _add_variant_expressions(self, headwords): - pass + raise NotImplementedError @abstractmethod def get_part_of_speech_tags(self): - pass + raise NotImplementedError def get_parent(self): if self.entry_id in self.SUBENTRY_ID_TO_ENTRY_ID: diff --git a/bot/entries/base/jitenon_entry.py b/bot/entries/base/jitenon_entry.py index 7af845b..43d6005 100644 --- a/bot/entries/base/jitenon_entry.py +++ b/bot/entries/base/jitenon_entry.py @@ -58,7 +58,7 @@ class JitenonEntry(Entry): @abstractmethod def _get_column_map(self): - pass + raise NotImplementedError def __set_modified_date(self, page): m = re.search(r"\"dateModified\": \"(\d{4}-\d{2}-\d{2})", page) diff --git a/bot/entries/base/sanseido_entry.py b/bot/entries/base/sanseido_entry.py index 4e1098d..bb52431 100644 --- a/bot/entries/base/sanseido_entry.py +++ b/bot/entries/base/sanseido_entry.py @@ -39,7 +39,7 @@ class SanseidoEntry(Entry): @abstractmethod def _get_subentry_parameters(self): - pass + raise NotImplementedError def _add_variant_expressions(self, headwords): for expressions in headwords.values(): diff --git a/bot/entries/factory.py b/bot/entries/factory.py deleted file mode 100644 index 594762f..0000000 --- a/bot/entries/factory.py +++ /dev/null @@ -1,7 +0,0 @@ -import importlib - - -def new_entry(target, page_id): - module_path = f"bot.entries.{target.name.lower()}.entry" - module = importlib.import_module(module_path) - return module.Entry(target, page_id) diff --git a/bot/factory.py b/bot/factory.py new file mode 100644 index 0000000..7b025d4 --- /dev/null +++ b/bot/factory.py @@ -0,0 +1,37 @@ +import importlib + + +def new_crawler(target): + module_path = f"bot.crawlers.{target.name.lower()}" + module = importlib.import_module(module_path) + return module.Crawler(target) + + +def new_entry(target, page_id): + module_path = f"bot.entries.{target.name.lower()}.entry" + module = importlib.import_module(module_path) + return module.Entry(target, page_id) + + +def new_yomichan_exporter(target): + module_path = f"bot.yomichan.exporters.{target.name.lower()}" + module = importlib.import_module(module_path) + return module.Exporter(target) + + +def new_yomichan_terminator(target): + module_path = f"bot.yomichan.terms.{target.name.lower()}" + module = importlib.import_module(module_path) + return module.Terminator(target) + + +def new_mdict_exporter(target): + module_path = f"bot.mdict.exporters.{target.name.lower()}" + module = importlib.import_module(module_path) + return module.Exporter(target) + + +def new_mdict_terminator(target): + module_path = f"bot.mdict.terms.{target.name.lower()}" + module = importlib.import_module(module_path) + return module.Terminator(target) diff --git a/bot/mdict/exporters/export.py b/bot/mdict/exporters/base/exporter.py similarity index 79% rename from bot/mdict/exporters/export.py rename to bot/mdict/exporters/base/exporter.py index b8e8347..26dc662 100644 --- a/bot/mdict/exporters/export.py +++ b/bot/mdict/exporters/base/exporter.py @@ -1,20 +1,18 @@ -# pylint: disable=too-few-public-methods - -import subprocess import os import shutil +import subprocess from abc import ABC, abstractmethod from pathlib import Path -from datetime import datetime + from platformdirs import user_documents_dir, user_cache_dir -from bot.mdict.terms.factory import new_terminator +from bot.factory import new_mdict_terminator -class Exporter(ABC): +class BaseExporter(ABC): def __init__(self, target): self._target = target - self._terminator = new_terminator(target) + self._terminator = new_mdict_terminator(target) self._build_dir = None self._build_media_dir = None self._description_file = None @@ -168,58 +166,8 @@ class Exporter(ABC): @abstractmethod def _get_revision(self, entries): - pass + raise NotImplementedError @abstractmethod def _get_attribution(self, entries): - pass - - -class _JitenonExporter(Exporter): - def _get_revision(self, entries): - modified_date = None - for entry in entries: - if modified_date is None or entry.modified_date > modified_date: - modified_date = entry.modified_date - revision = modified_date.strftime("%Y年%m月%d日閲覧") - return revision - - def _get_attribution(self, entries): - modified_date = None - for entry in entries: - if modified_date is None or entry.modified_date > modified_date: - attribution = entry.attribution - return attribution - - -class JitenonKokugoExporter(_JitenonExporter): - pass - - -class JitenonYojiExporter(_JitenonExporter): - pass - - -class JitenonKotowazaExporter(_JitenonExporter): - pass - - -class _MonokakidoExporter(Exporter): - def _get_revision(self, entries): - timestamp = datetime.now().strftime("%Y年%m月%d日作成") - return timestamp - - -class Smk8Exporter(_MonokakidoExporter): - def _get_attribution(self, entries): - return "© Sanseido Co., LTD. 2020" - - -class Daijirin2Exporter(_MonokakidoExporter): - def _get_attribution(self, entries): - return "© Sanseido Co., LTD. 2019" - - -class Sankoku8Exporter(_MonokakidoExporter): - def _get_attribution(self, entries): - return "© Sanseido Co., LTD. 2021" + raise NotImplementedError diff --git a/bot/mdict/exporters/base/jitenon.py b/bot/mdict/exporters/base/jitenon.py new file mode 100644 index 0000000..2e6b1df --- /dev/null +++ b/bot/mdict/exporters/base/jitenon.py @@ -0,0 +1,18 @@ +from bot.mdict.exporters.base.exporter import BaseExporter + + +class JitenonExporter(BaseExporter): + def _get_revision(self, entries): + modified_date = None + for entry in entries: + if modified_date is None or entry.modified_date > modified_date: + modified_date = entry.modified_date + revision = modified_date.strftime("%Y年%m月%d日閲覧") + return revision + + def _get_attribution(self, entries): + modified_date = None + for entry in entries: + if modified_date is None or entry.modified_date > modified_date: + attribution = entry.attribution + return attribution diff --git a/bot/mdict/exporters/base/monokakido.py b/bot/mdict/exporters/base/monokakido.py new file mode 100644 index 0000000..b9b9629 --- /dev/null +++ b/bot/mdict/exporters/base/monokakido.py @@ -0,0 +1,8 @@ +from datetime import datetime +from bot.mdict.exporters.base.exporter import BaseExporter + + +class MonokakidoExporter(BaseExporter): + def _get_revision(self, entries): + timestamp = datetime.now().strftime("%Y年%m月%d日作成") + return timestamp diff --git a/bot/mdict/exporters/daijirin2.py b/bot/mdict/exporters/daijirin2.py new file mode 100644 index 0000000..4692470 --- /dev/null +++ b/bot/mdict/exporters/daijirin2.py @@ -0,0 +1,6 @@ +from bot.mdict.exporters.base.monokakido import MonokakidoExporter + + +class Exporter(MonokakidoExporter): + def _get_attribution(self, entries): + return "© Sanseido Co., LTD. 2019" diff --git a/bot/mdict/exporters/factory.py b/bot/mdict/exporters/factory.py deleted file mode 100644 index 5417493..0000000 --- a/bot/mdict/exporters/factory.py +++ /dev/null @@ -1,20 +0,0 @@ -from bot.targets import Targets - -from bot.mdict.exporters.export import JitenonKokugoExporter -from bot.mdict.exporters.export import JitenonYojiExporter -from bot.mdict.exporters.export import JitenonKotowazaExporter -from bot.mdict.exporters.export import Smk8Exporter -from bot.mdict.exporters.export import Daijirin2Exporter -from bot.mdict.exporters.export import Sankoku8Exporter - - -def new_mdict_exporter(target): - exporter_map = { - Targets.JITENON_KOKUGO: JitenonKokugoExporter, - Targets.JITENON_YOJI: JitenonYojiExporter, - Targets.JITENON_KOTOWAZA: JitenonKotowazaExporter, - Targets.SMK8: Smk8Exporter, - Targets.DAIJIRIN2: Daijirin2Exporter, - Targets.SANKOKU8: Sankoku8Exporter, - } - return exporter_map[target](target) diff --git a/bot/mdict/exporters/jitenon_kokugo.py b/bot/mdict/exporters/jitenon_kokugo.py new file mode 100644 index 0000000..5689fa8 --- /dev/null +++ b/bot/mdict/exporters/jitenon_kokugo.py @@ -0,0 +1,5 @@ +from bot.mdict.exporters.base.jitenon import JitenonExporter + + +class Exporter(JitenonExporter): + pass diff --git a/bot/mdict/exporters/jitenon_kotowaza.py b/bot/mdict/exporters/jitenon_kotowaza.py new file mode 100644 index 0000000..5689fa8 --- /dev/null +++ b/bot/mdict/exporters/jitenon_kotowaza.py @@ -0,0 +1,5 @@ +from bot.mdict.exporters.base.jitenon import JitenonExporter + + +class Exporter(JitenonExporter): + pass diff --git a/bot/mdict/exporters/jitenon_yoji.py b/bot/mdict/exporters/jitenon_yoji.py new file mode 100644 index 0000000..5689fa8 --- /dev/null +++ b/bot/mdict/exporters/jitenon_yoji.py @@ -0,0 +1,5 @@ +from bot.mdict.exporters.base.jitenon import JitenonExporter + + +class Exporter(JitenonExporter): + pass diff --git a/bot/mdict/exporters/sankoku8.py b/bot/mdict/exporters/sankoku8.py new file mode 100644 index 0000000..6063864 --- /dev/null +++ b/bot/mdict/exporters/sankoku8.py @@ -0,0 +1,6 @@ +from bot.mdict.exporters.base.monokakido import MonokakidoExporter + + +class Exporter(MonokakidoExporter): + def _get_attribution(self, entries): + return "© Sanseido Co., LTD. 2021" diff --git a/bot/mdict/exporters/smk8.py b/bot/mdict/exporters/smk8.py new file mode 100644 index 0000000..a030b4b --- /dev/null +++ b/bot/mdict/exporters/smk8.py @@ -0,0 +1,6 @@ +from bot.mdict.exporters.base.monokakido import MonokakidoExporter + + +class Exporter(MonokakidoExporter): + def _get_attribution(self, entries): + return "© Sanseido Co., LTD. 2020" diff --git a/bot/mdict/terms/base/jitenon.py b/bot/mdict/terms/base/jitenon.py new file mode 100644 index 0000000..4f255bf --- /dev/null +++ b/bot/mdict/terms/base/jitenon.py @@ -0,0 +1,20 @@ +from bot.mdict.terms.base.terminator import BaseTerminator + + +class JitenonTerminator(BaseTerminator): + def __init__(self, target): + super().__init__(target) + self._glossary_maker = None + + def _glossary(self, entry): + if entry.entry_id in self._glossary_cache: + return self._glossary_cache[entry.entry_id] + glossary = self._glossary_maker.make_glossary(entry, self._media_dir) + self._glossary_cache[entry.entry_id] = glossary + return glossary + + def _link_glossary_parameters(self, entry): + return [] + + def _subentry_lists(self, entry): + return [] diff --git a/bot/mdict/terms/terminator.py b/bot/mdict/terms/base/terminator.py similarity index 95% rename from bot/mdict/terms/terminator.py rename to bot/mdict/terms/base/terminator.py index ee62411..945a65b 100644 --- a/bot/mdict/terms/terminator.py +++ b/bot/mdict/terms/base/terminator.py @@ -2,7 +2,7 @@ import re from abc import abstractmethod, ABC -class Terminator(ABC): +class BaseTerminator(ABC): def __init__(self, target): self._target = target self._glossary_cache = {} @@ -72,12 +72,12 @@ class Terminator(ABC): @abstractmethod def _glossary(self, entry): - pass + raise NotImplementedError @abstractmethod def _link_glossary_parameters(self, entry): - pass + raise NotImplementedError @abstractmethod def _subentry_lists(self, entry): - pass + raise NotImplementedError diff --git a/bot/mdict/terms/daijirin2.py b/bot/mdict/terms/daijirin2.py index 3b5ce68..640b520 100644 --- a/bot/mdict/terms/daijirin2.py +++ b/bot/mdict/terms/daijirin2.py @@ -1,8 +1,8 @@ -from bot.mdict.terms.terminator import Terminator +from bot.mdict.terms.base.terminator import BaseTerminator from bot.mdict.glossary.daijirin2 import make_glossary -class Daijirin2Terminator(Terminator): +class Terminator(BaseTerminator): def _glossary(self, entry): if entry.entry_id in self._glossary_cache: return self._glossary_cache[entry.entry_id] diff --git a/bot/mdict/terms/factory.py b/bot/mdict/terms/factory.py deleted file mode 100644 index 8cee8e7..0000000 --- a/bot/mdict/terms/factory.py +++ /dev/null @@ -1,20 +0,0 @@ -from bot.targets import Targets - -from bot.mdict.terms.jitenon import JitenonKokugoTerminator -from bot.mdict.terms.jitenon import JitenonYojiTerminator -from bot.mdict.terms.jitenon import JitenonKotowazaTerminator -from bot.mdict.terms.smk8 import Smk8Terminator -from bot.mdict.terms.daijirin2 import Daijirin2Terminator -from bot.mdict.terms.sankoku8 import Sankoku8Terminator - - -def new_terminator(target): - terminator_map = { - Targets.JITENON_KOKUGO: JitenonKokugoTerminator, - Targets.JITENON_YOJI: JitenonYojiTerminator, - Targets.JITENON_KOTOWAZA: JitenonKotowazaTerminator, - Targets.SMK8: Smk8Terminator, - Targets.DAIJIRIN2: Daijirin2Terminator, - Targets.SANKOKU8: Sankoku8Terminator, - } - return terminator_map[target](target) diff --git a/bot/mdict/terms/jitenon.py b/bot/mdict/terms/jitenon.py deleted file mode 100644 index 3f9cfc1..0000000 --- a/bot/mdict/terms/jitenon.py +++ /dev/null @@ -1,42 +0,0 @@ -from bot.mdict.terms.terminator import Terminator - -from bot.mdict.glossary.jitenon import JitenonKokugoGlossary -from bot.mdict.glossary.jitenon import JitenonYojiGlossary -from bot.mdict.glossary.jitenon import JitenonKotowazaGlossary - - -class JitenonTerminator(Terminator): - def __init__(self, target): - super().__init__(target) - self._glossary_maker = None - - def _glossary(self, entry): - if entry.entry_id in self._glossary_cache: - return self._glossary_cache[entry.entry_id] - glossary = self._glossary_maker.make_glossary(entry, self._media_dir) - self._glossary_cache[entry.entry_id] = glossary - return glossary - - def _link_glossary_parameters(self, entry): - return [] - - def _subentry_lists(self, entry): - return [] - - -class JitenonKokugoTerminator(JitenonTerminator): - def __init__(self, target): - super().__init__(target) - self._glossary_maker = JitenonKokugoGlossary() - - -class JitenonYojiTerminator(JitenonTerminator): - def __init__(self, target): - super().__init__(target) - self._glossary_maker = JitenonYojiGlossary() - - -class JitenonKotowazaTerminator(JitenonTerminator): - def __init__(self, target): - super().__init__(target) - self._glossary_maker = JitenonKotowazaGlossary() diff --git a/bot/mdict/terms/jitenon_kokugo.py b/bot/mdict/terms/jitenon_kokugo.py new file mode 100644 index 0000000..2a44b7b --- /dev/null +++ b/bot/mdict/terms/jitenon_kokugo.py @@ -0,0 +1,8 @@ +from bot.mdict.terms.base.jitenon import JitenonTerminator +from bot.mdict.glossary.jitenon import JitenonKokugoGlossary + + +class Terminator(JitenonTerminator): + def __init__(self, target): + super().__init__(target) + self._glossary_maker = JitenonKokugoGlossary() diff --git a/bot/mdict/terms/jitenon_kotowaza.py b/bot/mdict/terms/jitenon_kotowaza.py new file mode 100644 index 0000000..3492a49 --- /dev/null +++ b/bot/mdict/terms/jitenon_kotowaza.py @@ -0,0 +1,8 @@ +from bot.mdict.terms.base.jitenon import JitenonTerminator +from bot.mdict.glossary.jitenon import JitenonKotowazaGlossary + + +class Terminator(JitenonTerminator): + def __init__(self, target): + super().__init__(target) + self._glossary_maker = JitenonKotowazaGlossary() diff --git a/bot/mdict/terms/jitenon_yoji.py b/bot/mdict/terms/jitenon_yoji.py new file mode 100644 index 0000000..a4175a1 --- /dev/null +++ b/bot/mdict/terms/jitenon_yoji.py @@ -0,0 +1,8 @@ +from bot.mdict.terms.base.jitenon import JitenonTerminator +from bot.mdict.glossary.jitenon import JitenonYojiGlossary + + +class Terminator(JitenonTerminator): + def __init__(self, target): + super().__init__(target) + self._glossary_maker = JitenonYojiGlossary() diff --git a/bot/mdict/terms/sankoku8.py b/bot/mdict/terms/sankoku8.py index 5c1bfb7..71a3b8f 100644 --- a/bot/mdict/terms/sankoku8.py +++ b/bot/mdict/terms/sankoku8.py @@ -1,8 +1,8 @@ -from bot.mdict.terms.terminator import Terminator +from bot.mdict.terms.base.terminator import BaseTerminator from bot.mdict.glossary.sankoku8 import make_glossary -class Sankoku8Terminator(Terminator): +class Terminator(BaseTerminator): def _glossary(self, entry): if entry.entry_id in self._glossary_cache: return self._glossary_cache[entry.entry_id] diff --git a/bot/mdict/terms/smk8.py b/bot/mdict/terms/smk8.py index 22275d5..ef2b7a2 100644 --- a/bot/mdict/terms/smk8.py +++ b/bot/mdict/terms/smk8.py @@ -1,8 +1,8 @@ -from bot.mdict.terms.terminator import Terminator +from bot.mdict.terms.base.terminator import BaseTerminator from bot.mdict.glossary.smk8 import make_glossary -class Smk8Terminator(Terminator): +class Terminator(BaseTerminator): def _glossary(self, entry): if entry.entry_id in self._glossary_cache: return self._glossary_cache[entry.entry_id] diff --git a/bot/yomichan/exporters/export.py b/bot/yomichan/exporters/base/exporter.py similarity index 76% rename from bot/yomichan/exporters/export.py rename to bot/yomichan/exporters/base/exporter.py index d348fed..9389202 100644 --- a/bot/yomichan/exporters/export.py +++ b/bot/yomichan/exporters/base/exporter.py @@ -1,24 +1,22 @@ -# pylint: disable=too-few-public-methods - import json import os import shutil import copy from pathlib import Path -from datetime import datetime from abc import ABC, abstractmethod -from platformdirs import user_documents_dir, user_cache_dir import fastjsonschema +from platformdirs import user_documents_dir, user_cache_dir + from bot.data import load_yomichan_metadata -from bot.yomichan.terms.factory import new_terminator from bot.data import load_yomichan_term_schema +from bot.factory import new_yomichan_terminator -class Exporter(ABC): +class BaseExporter(ABC): def __init__(self, target): self._target = target - self._terminator = new_terminator(target) + self._terminator = new_yomichan_terminator(target) self._build_dir = None self._terms_per_file = 2000 @@ -36,11 +34,11 @@ class Exporter(ABC): @abstractmethod def _get_revision(self, entries): - pass + raise NotImplementedError @abstractmethod def _get_attribution(self, entries): - pass + raise NotImplementedError def _get_build_dir(self): if self._build_dir is not None: @@ -118,10 +116,10 @@ class Exporter(ABC): build_dir = self._get_build_dir() max_i = int(len(terms) / self._terms_per_file) + 1 for i in range(max_i): + update = f"Writing terms to term bank {i+1}/{max_i}" + print(update, end='\r', flush=True) start = self._terms_per_file * i end = self._terms_per_file * (i + 1) - update = f"Writing terms to term banks {start} - {end}" - print(update, end='\r', flush=True) term_file = os.path.join(build_dir, f"term_bank_{i+1}.json") with open(term_file, "w", encoding='utf8') as f: json.dump(terms[start:end], f, indent=4, ensure_ascii=False) @@ -142,8 +140,8 @@ class Exporter(ABC): json.dump(tags, f, indent=4, ensure_ascii=False) def __write_archive(self, filename): - print("Archiving data to ZIP file...") archive_format = "zip" + print(f"Archiving data to {archive_format.upper()} file...") out_dir = os.path.join(user_documents_dir(), "jitenbot", "yomichan") if not Path(out_dir).is_dir(): os.makedirs(out_dir) @@ -154,58 +152,8 @@ class Exporter(ABC): base_filename = os.path.join(out_dir, filename) build_dir = self._get_build_dir() shutil.make_archive(base_filename, archive_format, build_dir) - print(f"Dictionary file saved to {out_filepath}") + print(f"Dictionary file saved to `{out_filepath}`") def __rm_build_dir(self): build_dir = self._get_build_dir() shutil.rmtree(build_dir) - - -class _JitenonExporter(Exporter): - def _get_revision(self, entries): - modified_date = None - for entry in entries: - if modified_date is None or entry.modified_date > modified_date: - modified_date = entry.modified_date - revision = f"{self._target.value};{modified_date}" - return revision - - def _get_attribution(self, entries): - modified_date = None - for entry in entries: - if modified_date is None or entry.modified_date > modified_date: - attribution = entry.attribution - return attribution - - -class JitenonKokugoExporter(_JitenonExporter): - pass - - -class JitenonYojiExporter(_JitenonExporter): - pass - - -class JitenonKotowazaExporter(_JitenonExporter): - pass - - -class _MonokakidoExporter(Exporter): - def _get_revision(self, entries): - timestamp = datetime.now().strftime("%Y-%m-%d") - return f"{self._target.value};{timestamp}" - - -class Smk8Exporter(_MonokakidoExporter): - def _get_attribution(self, entries): - return "© Sanseido Co., LTD. 2020" - - -class Daijirin2Exporter(_MonokakidoExporter): - def _get_attribution(self, entries): - return "© Sanseido Co., LTD. 2019" - - -class Sankoku8Exporter(_MonokakidoExporter): - def _get_attribution(self, entries): - return "© Sanseido Co., LTD. 2021" diff --git a/bot/yomichan/exporters/base/jitenon.py b/bot/yomichan/exporters/base/jitenon.py new file mode 100644 index 0000000..80f0175 --- /dev/null +++ b/bot/yomichan/exporters/base/jitenon.py @@ -0,0 +1,18 @@ +from bot.yomichan.exporters.base.exporter import BaseExporter + + +class JitenonExporter(BaseExporter): + def _get_revision(self, entries): + modified_date = None + for entry in entries: + if modified_date is None or entry.modified_date > modified_date: + modified_date = entry.modified_date + revision = f"{self._target.value};{modified_date}" + return revision + + def _get_attribution(self, entries): + modified_date = None + for entry in entries: + if modified_date is None or entry.modified_date > modified_date: + attribution = entry.attribution + return attribution diff --git a/bot/yomichan/exporters/base/monokakido.py b/bot/yomichan/exporters/base/monokakido.py new file mode 100644 index 0000000..5c5f3fa --- /dev/null +++ b/bot/yomichan/exporters/base/monokakido.py @@ -0,0 +1,8 @@ +from datetime import datetime +from bot.yomichan.exporters.base.exporter import BaseExporter + + +class MonokakidoExporter(BaseExporter): + def _get_revision(self, entries): + timestamp = datetime.now().strftime("%Y-%m-%d") + return f"{self._target.value};{timestamp}" diff --git a/bot/yomichan/exporters/daijirin2.py b/bot/yomichan/exporters/daijirin2.py new file mode 100644 index 0000000..7115342 --- /dev/null +++ b/bot/yomichan/exporters/daijirin2.py @@ -0,0 +1,6 @@ +from bot.yomichan.exporters.base.monokakido import MonokakidoExporter + + +class Exporter(MonokakidoExporter): + def _get_attribution(self, entries): + return "© Sanseido Co., LTD. 2019" diff --git a/bot/yomichan/exporters/factory.py b/bot/yomichan/exporters/factory.py deleted file mode 100644 index afed7fd..0000000 --- a/bot/yomichan/exporters/factory.py +++ /dev/null @@ -1,20 +0,0 @@ -from bot.targets import Targets - -from bot.yomichan.exporters.export import JitenonKokugoExporter -from bot.yomichan.exporters.export import JitenonYojiExporter -from bot.yomichan.exporters.export import JitenonKotowazaExporter -from bot.yomichan.exporters.export import Smk8Exporter -from bot.yomichan.exporters.export import Daijirin2Exporter -from bot.yomichan.exporters.export import Sankoku8Exporter - - -def new_yomi_exporter(target): - exporter_map = { - Targets.JITENON_KOKUGO: JitenonKokugoExporter, - Targets.JITENON_YOJI: JitenonYojiExporter, - Targets.JITENON_KOTOWAZA: JitenonKotowazaExporter, - Targets.SMK8: Smk8Exporter, - Targets.DAIJIRIN2: Daijirin2Exporter, - Targets.SANKOKU8: Sankoku8Exporter, - } - return exporter_map[target](target) diff --git a/bot/yomichan/exporters/jitenon_kokugo.py b/bot/yomichan/exporters/jitenon_kokugo.py new file mode 100644 index 0000000..0a3ef7a --- /dev/null +++ b/bot/yomichan/exporters/jitenon_kokugo.py @@ -0,0 +1,5 @@ +from bot.yomichan.exporters.base.jitenon import JitenonExporter + + +class Exporter(JitenonExporter): + pass diff --git a/bot/yomichan/exporters/jitenon_kotowaza.py b/bot/yomichan/exporters/jitenon_kotowaza.py new file mode 100644 index 0000000..0a3ef7a --- /dev/null +++ b/bot/yomichan/exporters/jitenon_kotowaza.py @@ -0,0 +1,5 @@ +from bot.yomichan.exporters.base.jitenon import JitenonExporter + + +class Exporter(JitenonExporter): + pass diff --git a/bot/yomichan/exporters/jitenon_yoji.py b/bot/yomichan/exporters/jitenon_yoji.py new file mode 100644 index 0000000..0a3ef7a --- /dev/null +++ b/bot/yomichan/exporters/jitenon_yoji.py @@ -0,0 +1,5 @@ +from bot.yomichan.exporters.base.jitenon import JitenonExporter + + +class Exporter(JitenonExporter): + pass diff --git a/bot/yomichan/exporters/sankoku8.py b/bot/yomichan/exporters/sankoku8.py new file mode 100644 index 0000000..b33c389 --- /dev/null +++ b/bot/yomichan/exporters/sankoku8.py @@ -0,0 +1,6 @@ +from bot.yomichan.exporters.base.monokakido import MonokakidoExporter + + +class Exporter(MonokakidoExporter): + def _get_attribution(self, entries): + return "© Sanseido Co., LTD. 2021" diff --git a/bot/yomichan/exporters/smk8.py b/bot/yomichan/exporters/smk8.py new file mode 100644 index 0000000..7f71aa3 --- /dev/null +++ b/bot/yomichan/exporters/smk8.py @@ -0,0 +1,6 @@ +from bot.yomichan.exporters.base.monokakido import MonokakidoExporter + + +class Exporter(MonokakidoExporter): + def _get_attribution(self, entries): + return "© Sanseido Co., LTD. 2020" diff --git a/bot/yomichan/glossary/daijirin2.py b/bot/yomichan/glossary/daijirin2.py index 0adaa96..178de00 100644 --- a/bot/yomichan/glossary/daijirin2.py +++ b/bot/yomichan/glossary/daijirin2.py @@ -1,9 +1,10 @@ import re import os -from bs4 import BeautifulSoup from functools import cache from pathlib import Path +from bs4 import BeautifulSoup + import bot.yomichan.glossary.icons as Icons from bot.soup import delete_soup_nodes from bot.data import load_yomichan_name_conversion diff --git a/bot/yomichan/terms/base/jitenon.py b/bot/yomichan/terms/base/jitenon.py new file mode 100644 index 0000000..d0d5388 --- /dev/null +++ b/bot/yomichan/terms/base/jitenon.py @@ -0,0 +1,26 @@ +from bot.yomichan.terms.base.terminator import BaseTerminator + + +class JitenonTerminator(BaseTerminator): + def __init__(self, target): + super().__init__(target) + self._glossary_maker = None + + def _definition_tags(self, entry): + return None + + def _glossary(self, entry): + if entry.entry_id in self._glossary_cache: + return self._glossary_cache[entry.entry_id] + glossary = self._glossary_maker.make_glossary(entry, self._image_dir) + self._glossary_cache[entry.entry_id] = glossary + return glossary + + def _sequence(self, entry): + return entry.entry_id + + def _link_glossary_parameters(self, entry): + return [] + + def _subentry_lists(self, entry): + return [] diff --git a/bot/yomichan/terms/terminator.py b/bot/yomichan/terms/base/terminator.py similarity index 91% rename from bot/yomichan/terms/terminator.py rename to bot/yomichan/terms/base/terminator.py index dd0c02d..f57c4cc 100644 --- a/bot/yomichan/terms/terminator.py +++ b/bot/yomichan/terms/base/terminator.py @@ -2,7 +2,7 @@ from abc import abstractmethod, ABC from bot.data import load_yomichan_inflection_categories -class Terminator(ABC): +class BaseTerminator(ABC): def __init__(self, target): self._target = target self._glossary_cache = {} @@ -66,28 +66,28 @@ class Terminator(ABC): @abstractmethod def _definition_tags(self, entry): - pass + raise NotImplementedError @abstractmethod def _inflection_rules(self, entry, expression): - pass + raise NotImplementedError @abstractmethod def _glossary(self, entry): - pass + raise NotImplementedError @abstractmethod def _sequence(self, entry): - pass + raise NotImplementedError @abstractmethod def _term_tags(self, entry): - pass + raise NotImplementedError @abstractmethod def _link_glossary_parameters(self, entry): - pass + raise NotImplementedError @abstractmethod def _subentry_lists(self, entry): - pass + raise NotImplementedError diff --git a/bot/yomichan/terms/daijirin2.py b/bot/yomichan/terms/daijirin2.py index 281fac4..7cf06fb 100644 --- a/bot/yomichan/terms/daijirin2.py +++ b/bot/yomichan/terms/daijirin2.py @@ -1,11 +1,10 @@ from bot.entries.daijirin2.phrase_entry import PhraseEntry - -from bot.yomichan.terms.terminator import Terminator +from bot.yomichan.terms.base.terminator import BaseTerminator from bot.yomichan.glossary.daijirin2 import make_glossary from bot.yomichan.grammar import sudachi_rules, tags_to_rules -class Daijirin2Terminator(Terminator): +class Terminator(BaseTerminator): def _definition_tags(self, entry): return "" diff --git a/bot/yomichan/terms/factory.py b/bot/yomichan/terms/factory.py deleted file mode 100644 index 8c596cb..0000000 --- a/bot/yomichan/terms/factory.py +++ /dev/null @@ -1,20 +0,0 @@ -from bot.targets import Targets - -from bot.yomichan.terms.jitenon import JitenonKokugoTerminator -from bot.yomichan.terms.jitenon import JitenonYojiTerminator -from bot.yomichan.terms.jitenon import JitenonKotowazaTerminator -from bot.yomichan.terms.smk8 import Smk8Terminator -from bot.yomichan.terms.daijirin2 import Daijirin2Terminator -from bot.yomichan.terms.sankoku8 import Sankoku8Terminator - - -def new_terminator(target): - terminator_map = { - Targets.JITENON_KOKUGO: JitenonKokugoTerminator, - Targets.JITENON_YOJI: JitenonYojiTerminator, - Targets.JITENON_KOTOWAZA: JitenonKotowazaTerminator, - Targets.SMK8: Smk8Terminator, - Targets.DAIJIRIN2: Daijirin2Terminator, - Targets.SANKOKU8: Sankoku8Terminator, - } - return terminator_map[target](target) diff --git a/bot/yomichan/terms/jitenon.py b/bot/yomichan/terms/jitenon.py deleted file mode 100644 index 66bbed7..0000000 --- a/bot/yomichan/terms/jitenon.py +++ /dev/null @@ -1,68 +0,0 @@ -from bot.yomichan.grammar import sudachi_rules -from bot.yomichan.terms.terminator import Terminator - -from bot.yomichan.glossary.jitenon import JitenonKokugoGlossary -from bot.yomichan.glossary.jitenon import JitenonYojiGlossary -from bot.yomichan.glossary.jitenon import JitenonKotowazaGlossary - - -class JitenonTerminator(Terminator): - def __init__(self, target): - super().__init__(target) - self._glossary_maker = None - - def _definition_tags(self, entry): - return None - - def _glossary(self, entry): - if entry.entry_id in self._glossary_cache: - return self._glossary_cache[entry.entry_id] - glossary = self._glossary_maker.make_glossary(entry, self._image_dir) - self._glossary_cache[entry.entry_id] = glossary - return glossary - - def _sequence(self, entry): - return entry.entry_id - - def _link_glossary_parameters(self, entry): - return [] - - def _subentry_lists(self, entry): - return [] - - -class JitenonKokugoTerminator(JitenonTerminator): - def __init__(self, target): - super().__init__(target) - self._glossary_maker = JitenonKokugoGlossary() - - def _inflection_rules(self, entry, expression): - return sudachi_rules(expression) - - def _term_tags(self, entry): - return "" - - -class JitenonYojiTerminator(JitenonTerminator): - def __init__(self, target): - super().__init__(target) - self._glossary_maker = JitenonYojiGlossary() - - def _inflection_rules(self, entry, expression): - return "" - - def _term_tags(self, entry): - tags = entry.kanken_level.split("/") - return " ".join(tags) - - -class JitenonKotowazaTerminator(JitenonTerminator): - def __init__(self, target): - super().__init__(target) - self._glossary_maker = JitenonKotowazaGlossary() - - def _inflection_rules(self, entry, expression): - return sudachi_rules(expression) - - def _term_tags(self, entry): - return "" diff --git a/bot/yomichan/terms/jitenon_kokugo.py b/bot/yomichan/terms/jitenon_kokugo.py new file mode 100644 index 0000000..3e33b77 --- /dev/null +++ b/bot/yomichan/terms/jitenon_kokugo.py @@ -0,0 +1,15 @@ +from bot.yomichan.grammar import sudachi_rules +from bot.yomichan.glossary.jitenon import JitenonKokugoGlossary +from bot.yomichan.terms.base.jitenon import JitenonTerminator + + +class Terminator(JitenonTerminator): + def __init__(self, target): + super().__init__(target) + self._glossary_maker = JitenonKokugoGlossary() + + def _inflection_rules(self, entry, expression): + return sudachi_rules(expression) + + def _term_tags(self, entry): + return "" diff --git a/bot/yomichan/terms/jitenon_kotowaza.py b/bot/yomichan/terms/jitenon_kotowaza.py new file mode 100644 index 0000000..a0651b9 --- /dev/null +++ b/bot/yomichan/terms/jitenon_kotowaza.py @@ -0,0 +1,15 @@ +from bot.yomichan.grammar import sudachi_rules +from bot.yomichan.glossary.jitenon import JitenonKotowazaGlossary +from bot.yomichan.terms.base.jitenon import JitenonTerminator + + +class Terminator(JitenonTerminator): + def __init__(self, target): + super().__init__(target) + self._glossary_maker = JitenonKotowazaGlossary() + + def _inflection_rules(self, entry, expression): + return sudachi_rules(expression) + + def _term_tags(self, entry): + return "" diff --git a/bot/yomichan/terms/jitenon_yoji.py b/bot/yomichan/terms/jitenon_yoji.py new file mode 100644 index 0000000..5087539 --- /dev/null +++ b/bot/yomichan/terms/jitenon_yoji.py @@ -0,0 +1,15 @@ +from bot.yomichan.glossary.jitenon import JitenonYojiGlossary +from bot.yomichan.terms.base.jitenon import JitenonTerminator + + +class Terminator(JitenonTerminator): + def __init__(self, target): + super().__init__(target) + self._glossary_maker = JitenonYojiGlossary() + + def _inflection_rules(self, entry, expression): + return "" + + def _term_tags(self, entry): + tags = entry.kanken_level.split("/") + return " ".join(tags) diff --git a/bot/yomichan/terms/sankoku8.py b/bot/yomichan/terms/sankoku8.py index cff264f..d6e6afd 100644 --- a/bot/yomichan/terms/sankoku8.py +++ b/bot/yomichan/terms/sankoku8.py @@ -1,11 +1,10 @@ from bot.entries.sankoku8.phrase_entry import PhraseEntry - -from bot.yomichan.terms.terminator import Terminator +from bot.yomichan.terms.base.terminator import BaseTerminator from bot.yomichan.glossary.sankoku8 import make_glossary from bot.yomichan.grammar import sudachi_rules, tags_to_rules -class Sankoku8Terminator(Terminator): +class Terminator(BaseTerminator): def _definition_tags(self, entry): return "" diff --git a/bot/yomichan/terms/smk8.py b/bot/yomichan/terms/smk8.py index 766f4a0..9e85c17 100644 --- a/bot/yomichan/terms/smk8.py +++ b/bot/yomichan/terms/smk8.py @@ -1,12 +1,11 @@ from bot.entries.smk8.kanji_entry import KanjiEntry from bot.entries.smk8.phrase_entry import PhraseEntry - -from bot.yomichan.terms.terminator import Terminator +from bot.yomichan.terms.base.terminator import BaseTerminator from bot.yomichan.glossary.smk8 import make_glossary from bot.yomichan.grammar import sudachi_rules, tags_to_rules -class Smk8Terminator(Terminator): +class Terminator(BaseTerminator): def __init__(self, target): super().__init__(target) diff --git a/jitenbot.py b/jitenbot.py index da44905..f0a2719 100644 --- a/jitenbot.py +++ b/jitenbot.py @@ -21,7 +21,7 @@ import sys import argparse import subprocess from bot.targets import Targets -from bot.crawlers.factory import new_crawler +from bot.factory import new_crawler def filename(f): diff --git a/run_all.sh b/run_all.sh index 706a911..9dcdfda 100755 --- a/run_all.sh +++ b/run_all.sh @@ -1,5 +1,7 @@ #!/bin/sh +export PYTHONPYCACHEPREFIX=/tmp/pycache + python -m unittest discover -s tests python jitenbot.py jitenon-kokugo