Reorganize file structure of all other modules
This commit is contained in:
parent
9b3fdc86d1
commit
7b2ba96db9
54
bot/crawlers/base/crawler.py
Normal file
54
bot/crawlers/base/crawler.py
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
import re
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
from bot.factory import new_entry
|
||||||
|
from bot.factory import new_yomichan_exporter
|
||||||
|
from bot.factory import new_mdict_exporter
|
||||||
|
|
||||||
|
|
||||||
|
class BaseCrawler(ABC):
|
||||||
|
def __init__(self, target):
|
||||||
|
self._target = target
|
||||||
|
self._page_map = {}
|
||||||
|
self._entries = []
|
||||||
|
self._page_id_pattern = None
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def collect_pages(self, page_dir):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def read_pages(self):
|
||||||
|
pages_len = len(self._page_map)
|
||||||
|
items = self._page_map.items()
|
||||||
|
for idx, (page_id, page_path) in enumerate(items):
|
||||||
|
update = f"Reading page {idx+1}/{pages_len}"
|
||||||
|
print(update, end='\r', flush=True)
|
||||||
|
entry = new_entry(self._target, page_id)
|
||||||
|
with open(page_path, "r", encoding="utf-8") as f:
|
||||||
|
page = f.read()
|
||||||
|
try:
|
||||||
|
entry.set_page(page)
|
||||||
|
except ValueError as err:
|
||||||
|
print(err)
|
||||||
|
print("Try deleting and redownloading file:")
|
||||||
|
print(f"\t{page_path}\n")
|
||||||
|
continue
|
||||||
|
self._entries.append(entry)
|
||||||
|
print()
|
||||||
|
|
||||||
|
def make_yomichan_dictionary(self, media_dir, validate):
|
||||||
|
exporter = new_yomichan_exporter(self._target)
|
||||||
|
exporter.export(self._entries, media_dir, validate)
|
||||||
|
|
||||||
|
def make_mdict_dictionary(self, media_dir, icon_file):
|
||||||
|
exporter = new_mdict_exporter(self._target)
|
||||||
|
exporter.export(self._entries, media_dir, icon_file)
|
||||||
|
|
||||||
|
def _parse_page_id(self, page_link):
|
||||||
|
m = re.search(self._page_id_pattern, page_link)
|
||||||
|
if m is None:
|
||||||
|
return None
|
||||||
|
page_id = int(m.group(1))
|
||||||
|
if page_id in self._page_map:
|
||||||
|
return None
|
||||||
|
return page_id
|
29
bot/crawlers/base/jitenon.py
Normal file
29
bot/crawlers/base/jitenon.py
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from bot.crawlers.scrapers.jitenon import Jitenon as JitenonScraper
|
||||||
|
from bot.crawlers.base.crawler import BaseCrawler
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonCrawler(BaseCrawler):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._gojuon_url = None
|
||||||
|
|
||||||
|
def collect_pages(self, page_dir):
|
||||||
|
print("Scraping jitenon.jp")
|
||||||
|
jitenon = JitenonScraper()
|
||||||
|
gojuon_doc, _ = jitenon.scrape(self._gojuon_url)
|
||||||
|
gojuon_soup = BeautifulSoup(gojuon_doc, features="html.parser")
|
||||||
|
for gojuon_a in gojuon_soup.select(".kana_area a", href=True):
|
||||||
|
gojuon_href = gojuon_a['href']
|
||||||
|
kana_doc, _ = jitenon.scrape(gojuon_href)
|
||||||
|
kana_soup = BeautifulSoup(kana_doc, features="html.parser")
|
||||||
|
for kana_a in kana_soup.select(".word_box a", href=True):
|
||||||
|
page_link = kana_a['href']
|
||||||
|
page_id = self._parse_page_id(page_link)
|
||||||
|
if page_id is None:
|
||||||
|
continue
|
||||||
|
_, page_path = jitenon.scrape(page_link)
|
||||||
|
self._page_map[page_id] = page_path
|
||||||
|
pages_len = len(self._page_map)
|
||||||
|
print(f"Finished scraping {pages_len} pages")
|
19
bot/crawlers/base/monokakido.py
Normal file
19
bot/crawlers/base/monokakido.py
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
import os
|
||||||
|
from bot.crawlers.base.crawler import BaseCrawler
|
||||||
|
|
||||||
|
|
||||||
|
class MonokakidoCrawler(BaseCrawler):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._page_id_pattern = r"^([0-9]+)\.xml$"
|
||||||
|
|
||||||
|
def collect_pages(self, page_dir):
|
||||||
|
print(f"Searching for page files in `{page_dir}`")
|
||||||
|
for pagefile in os.listdir(page_dir):
|
||||||
|
page_id = self._parse_page_id(pagefile)
|
||||||
|
if page_id is None or page_id == 0:
|
||||||
|
continue
|
||||||
|
path = os.path.join(page_dir, pagefile)
|
||||||
|
self._page_map[page_id] = path
|
||||||
|
pages_len = len(self._page_map)
|
||||||
|
print(f"Found {pages_len} page files for processing")
|
|
@ -1,158 +0,0 @@
|
||||||
import os
|
|
||||||
import re
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
|
|
||||||
import bot.crawlers.scraper as Scraper
|
|
||||||
from bot.entries.factory import new_entry
|
|
||||||
from bot.yomichan.exporters.factory import new_yomi_exporter
|
|
||||||
from bot.mdict.exporters.factory import new_mdict_exporter
|
|
||||||
|
|
||||||
|
|
||||||
class Crawler(ABC):
|
|
||||||
def __init__(self, target):
|
|
||||||
self._target = target
|
|
||||||
self._page_map = {}
|
|
||||||
self._entries = []
|
|
||||||
self._page_id_pattern = None
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def collect_pages(self, page_dir):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def read_pages(self):
|
|
||||||
pages_len = len(self._page_map)
|
|
||||||
items = self._page_map.items()
|
|
||||||
for idx, (page_id, page_path) in enumerate(items):
|
|
||||||
update = f"Reading page {idx+1}/{pages_len}"
|
|
||||||
print(update, end='\r', flush=True)
|
|
||||||
entry = new_entry(self._target, page_id)
|
|
||||||
with open(page_path, "r", encoding="utf-8") as f:
|
|
||||||
page = f.read()
|
|
||||||
try:
|
|
||||||
entry.set_page(page)
|
|
||||||
except ValueError as err:
|
|
||||||
print(err)
|
|
||||||
print("Try deleting and redownloading file:")
|
|
||||||
print(f"\t{page_path}\n")
|
|
||||||
continue
|
|
||||||
self._entries.append(entry)
|
|
||||||
print()
|
|
||||||
|
|
||||||
def make_yomichan_dictionary(self, media_dir, validate):
|
|
||||||
exporter = new_yomi_exporter(self._target)
|
|
||||||
exporter.export(self._entries, media_dir, validate)
|
|
||||||
|
|
||||||
def make_mdict_dictionary(self, media_dir, icon_file):
|
|
||||||
exporter = new_mdict_exporter(self._target)
|
|
||||||
exporter.export(self._entries, media_dir, icon_file)
|
|
||||||
|
|
||||||
def _parse_page_id(self, page_link):
|
|
||||||
m = re.search(self._page_id_pattern, page_link)
|
|
||||||
if m is None:
|
|
||||||
return None
|
|
||||||
page_id = int(m.group(1))
|
|
||||||
if page_id in self._page_map:
|
|
||||||
return None
|
|
||||||
return page_id
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKokugoCrawler(Crawler):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._gojuon_url = "https://kokugo.jitenon.jp/cat/gojuonindex.php"
|
|
||||||
self._page_id_pattern = r"word/p([0-9]+)$"
|
|
||||||
|
|
||||||
def collect_pages(self, page_dir):
|
|
||||||
jitenon = Scraper.Jitenon()
|
|
||||||
gojuon_doc, _ = jitenon.scrape(self._gojuon_url)
|
|
||||||
gojuon_soup = BeautifulSoup(gojuon_doc, features="html.parser")
|
|
||||||
for gojuon_a in gojuon_soup.select(".kana_area a", href=True):
|
|
||||||
gojuon_href = gojuon_a['href']
|
|
||||||
max_kana_page = 1
|
|
||||||
current_kana_page = 1
|
|
||||||
while current_kana_page <= max_kana_page:
|
|
||||||
kana_doc, _ = jitenon.scrape(f"{gojuon_href}&page={current_kana_page}")
|
|
||||||
current_kana_page += 1
|
|
||||||
kana_soup = BeautifulSoup(kana_doc, features="html.parser")
|
|
||||||
page_total = kana_soup.find(class_="page_total").text
|
|
||||||
m = re.search(r"全([0-9]+)件", page_total)
|
|
||||||
if m:
|
|
||||||
max_kana_page = int(m.group(1))
|
|
||||||
for kana_a in kana_soup.select(".word_box a", href=True):
|
|
||||||
page_link = kana_a['href']
|
|
||||||
page_id = self._parse_page_id(page_link)
|
|
||||||
if page_id is None:
|
|
||||||
continue
|
|
||||||
_, page_path = jitenon.scrape(page_link)
|
|
||||||
self._page_map[page_id] = page_path
|
|
||||||
pages_len = len(self._page_map)
|
|
||||||
print(f"Finished scraping {pages_len} pages")
|
|
||||||
|
|
||||||
|
|
||||||
class _JitenonCrawler(Crawler):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._gojuon_url = None
|
|
||||||
|
|
||||||
def collect_pages(self, page_dir):
|
|
||||||
print("Scraping jitenon.jp")
|
|
||||||
jitenon = Scraper.Jitenon()
|
|
||||||
gojuon_doc, _ = jitenon.scrape(self._gojuon_url)
|
|
||||||
gojuon_soup = BeautifulSoup(gojuon_doc, features="html.parser")
|
|
||||||
for gojuon_a in gojuon_soup.select(".kana_area a", href=True):
|
|
||||||
gojuon_href = gojuon_a['href']
|
|
||||||
kana_doc, _ = jitenon.scrape(gojuon_href)
|
|
||||||
kana_soup = BeautifulSoup(kana_doc, features="html.parser")
|
|
||||||
for kana_a in kana_soup.select(".word_box a", href=True):
|
|
||||||
page_link = kana_a['href']
|
|
||||||
page_id = self._parse_page_id(page_link)
|
|
||||||
if page_id is None:
|
|
||||||
continue
|
|
||||||
_, page_path = jitenon.scrape(page_link)
|
|
||||||
self._page_map[page_id] = page_path
|
|
||||||
pages_len = len(self._page_map)
|
|
||||||
print(f"Finished scraping {pages_len} pages")
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonYojiCrawler(_JitenonCrawler):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._gojuon_url = "https://yoji.jitenon.jp/cat/gojuon.html"
|
|
||||||
self._page_id_pattern = r"([0-9]+)\.html$"
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaCrawler(_JitenonCrawler):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._gojuon_url = "https://kotowaza.jitenon.jp/cat/gojuon.php"
|
|
||||||
self._page_id_pattern = r"([0-9]+)\.php$"
|
|
||||||
|
|
||||||
|
|
||||||
class _MonokakidoCrawler(Crawler):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._page_id_pattern = r"^([0-9]+)\.xml$"
|
|
||||||
|
|
||||||
def collect_pages(self, page_dir):
|
|
||||||
print(f"Searching for page files in `{page_dir}`")
|
|
||||||
for pagefile in os.listdir(page_dir):
|
|
||||||
page_id = self._parse_page_id(pagefile)
|
|
||||||
if page_id is None or page_id == 0:
|
|
||||||
continue
|
|
||||||
path = os.path.join(page_dir, pagefile)
|
|
||||||
self._page_map[page_id] = path
|
|
||||||
pages_len = len(self._page_map)
|
|
||||||
print(f"Found {pages_len} page files for processing")
|
|
||||||
|
|
||||||
|
|
||||||
class Smk8Crawler(_MonokakidoCrawler):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2Crawler(_MonokakidoCrawler):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Sankoku8Crawler(_MonokakidoCrawler):
|
|
||||||
pass
|
|
5
bot/crawlers/daijirin2.py
Normal file
5
bot/crawlers/daijirin2.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
from bot.crawlers.base.monokakido import MonokakidoCrawler
|
||||||
|
|
||||||
|
|
||||||
|
class Crawler(MonokakidoCrawler):
|
||||||
|
pass
|
|
@ -1,20 +0,0 @@
|
||||||
from bot.targets import Targets
|
|
||||||
|
|
||||||
from bot.crawlers.crawlers import JitenonKokugoCrawler
|
|
||||||
from bot.crawlers.crawlers import JitenonYojiCrawler
|
|
||||||
from bot.crawlers.crawlers import JitenonKotowazaCrawler
|
|
||||||
from bot.crawlers.crawlers import Smk8Crawler
|
|
||||||
from bot.crawlers.crawlers import Daijirin2Crawler
|
|
||||||
from bot.crawlers.crawlers import Sankoku8Crawler
|
|
||||||
|
|
||||||
|
|
||||||
def new_crawler(target):
|
|
||||||
crawler_map = {
|
|
||||||
Targets.JITENON_KOKUGO: JitenonKokugoCrawler,
|
|
||||||
Targets.JITENON_YOJI: JitenonYojiCrawler,
|
|
||||||
Targets.JITENON_KOTOWAZA: JitenonKotowazaCrawler,
|
|
||||||
Targets.SMK8: Smk8Crawler,
|
|
||||||
Targets.DAIJIRIN2: Daijirin2Crawler,
|
|
||||||
Targets.SANKOKU8: Sankoku8Crawler,
|
|
||||||
}
|
|
||||||
return crawler_map[target](target)
|
|
38
bot/crawlers/jitenon_kokugo.py
Normal file
38
bot/crawlers/jitenon_kokugo.py
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
import re
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from bot.crawlers.base.crawler import BaseCrawler
|
||||||
|
from bot.crawlers.scrapers.jitenon import Jitenon as JitenonScraper
|
||||||
|
|
||||||
|
|
||||||
|
class Crawler(BaseCrawler):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._gojuon_url = "https://kokugo.jitenon.jp/cat/gojuonindex.php"
|
||||||
|
self._page_id_pattern = r"word/p([0-9]+)$"
|
||||||
|
|
||||||
|
def collect_pages(self, page_dir):
|
||||||
|
jitenon = JitenonScraper()
|
||||||
|
gojuon_doc, _ = jitenon.scrape(self._gojuon_url)
|
||||||
|
gojuon_soup = BeautifulSoup(gojuon_doc, features="html.parser")
|
||||||
|
for gojuon_a in gojuon_soup.select(".kana_area a", href=True):
|
||||||
|
gojuon_href = gojuon_a['href']
|
||||||
|
max_kana_page = 1
|
||||||
|
current_kana_page = 1
|
||||||
|
while current_kana_page <= max_kana_page:
|
||||||
|
kana_doc, _ = jitenon.scrape(f"{gojuon_href}&page={current_kana_page}")
|
||||||
|
current_kana_page += 1
|
||||||
|
kana_soup = BeautifulSoup(kana_doc, features="html.parser")
|
||||||
|
page_total = kana_soup.find(class_="page_total").text
|
||||||
|
m = re.search(r"全([0-9]+)件", page_total)
|
||||||
|
if m:
|
||||||
|
max_kana_page = int(m.group(1))
|
||||||
|
for kana_a in kana_soup.select(".word_box a", href=True):
|
||||||
|
page_link = kana_a['href']
|
||||||
|
page_id = self._parse_page_id(page_link)
|
||||||
|
if page_id is None:
|
||||||
|
continue
|
||||||
|
_, page_path = jitenon.scrape(page_link)
|
||||||
|
self._page_map[page_id] = page_path
|
||||||
|
pages_len = len(self._page_map)
|
||||||
|
print(f"Finished scraping {pages_len} pages")
|
8
bot/crawlers/jitenon_kotowaza.py
Normal file
8
bot/crawlers/jitenon_kotowaza.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from bot.crawlers.base.jitenon import JitenonCrawler
|
||||||
|
|
||||||
|
|
||||||
|
class Crawler(JitenonCrawler):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._gojuon_url = "https://kotowaza.jitenon.jp/cat/gojuon.php"
|
||||||
|
self._page_id_pattern = r"([0-9]+)\.php$"
|
8
bot/crawlers/jitenon_yoji.py
Normal file
8
bot/crawlers/jitenon_yoji.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from bot.crawlers.base.jitenon import JitenonCrawler
|
||||||
|
|
||||||
|
|
||||||
|
class Crawler(JitenonCrawler):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._gojuon_url = "https://yoji.jitenon.jp/cat/gojuon.html"
|
||||||
|
self._page_id_pattern = r"([0-9]+)\.html$"
|
5
bot/crawlers/sankoku8.py
Normal file
5
bot/crawlers/sankoku8.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
from bot.crawlers.base.monokakido import MonokakidoCrawler
|
||||||
|
|
||||||
|
|
||||||
|
class Crawler(MonokakidoCrawler):
|
||||||
|
pass
|
10
bot/crawlers/scrapers/jitenon.py
Normal file
10
bot/crawlers/scrapers/jitenon.py
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
import re
|
||||||
|
from bot.crawlers.scrapers.scraper import BaseScraper
|
||||||
|
|
||||||
|
|
||||||
|
class Jitenon(BaseScraper):
|
||||||
|
def _get_netloc_re(self):
|
||||||
|
domain = r"jitenon\.jp"
|
||||||
|
pattern = r"^(?:([A-Za-z0-9.\-]+)\.)?" + domain + r"$"
|
||||||
|
netloc_re = re.compile(pattern)
|
||||||
|
return netloc_re
|
|
@ -1,24 +1,24 @@
|
||||||
import time
|
import time
|
||||||
import requests
|
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import hashlib
|
import hashlib
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from platformdirs import user_cache_dir
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
from pathlib import Path
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
import requests
|
||||||
from requests.adapters import HTTPAdapter
|
from requests.adapters import HTTPAdapter
|
||||||
from requests.packages.urllib3.util.retry import Retry
|
from requests.packages.urllib3.util.retry import Retry
|
||||||
|
from platformdirs import user_cache_dir
|
||||||
|
|
||||||
from bot.data import load_config
|
from bot.data import load_config
|
||||||
|
|
||||||
|
|
||||||
class Scraper():
|
class BaseScraper(ABC):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._config = load_config()
|
self._config = load_config()
|
||||||
pattern = r"^(?:([A-Za-z0-9.\-]+)\.)?" + self.domain + r"$"
|
self.netloc_re = self._get_netloc_re()
|
||||||
self.netloc_re = re.compile(pattern)
|
|
||||||
self.__set_session()
|
self.__set_session()
|
||||||
|
|
||||||
def scrape(self, urlstring):
|
def scrape(self, urlstring):
|
||||||
|
@ -34,6 +34,10 @@ class Scraper():
|
||||||
print("Discovering cached files...", end='\r', flush=True)
|
print("Discovering cached files...", end='\r', flush=True)
|
||||||
return html, cache_path
|
return html, cache_path
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _get_netloc_re(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def __set_session(self):
|
def __set_session(self):
|
||||||
retry_strategy = Retry(
|
retry_strategy = Retry(
|
||||||
total=3,
|
total=3,
|
||||||
|
@ -99,9 +103,3 @@ class Scraper():
|
||||||
self.__set_session()
|
self.__set_session()
|
||||||
response = self.session.get(urlstring, timeout=10)
|
response = self.session.get(urlstring, timeout=10)
|
||||||
return response.text
|
return response.text
|
||||||
|
|
||||||
|
|
||||||
class Jitenon(Scraper):
|
|
||||||
def __init__(self):
|
|
||||||
self.domain = r"jitenon\.jp"
|
|
||||||
super().__init__()
|
|
5
bot/crawlers/smk8.py
Normal file
5
bot/crawlers/smk8.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
from bot.crawlers.base.monokakido import MonokakidoCrawler
|
||||||
|
|
||||||
|
|
||||||
|
class Crawler(MonokakidoCrawler):
|
||||||
|
pass
|
|
@ -18,15 +18,15 @@ class Entry(ABC):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_global_identifier(self):
|
def get_global_identifier(self):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def set_page(self, page):
|
def set_page(self, page):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_page_soup(self):
|
def get_page_soup(self):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
def get_headwords(self):
|
def get_headwords(self):
|
||||||
if self._headwords is not None:
|
if self._headwords is not None:
|
||||||
|
@ -38,15 +38,15 @@ class Entry(ABC):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _get_headwords(self):
|
def _get_headwords(self):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _add_variant_expressions(self, headwords):
|
def _add_variant_expressions(self, headwords):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_part_of_speech_tags(self):
|
def get_part_of_speech_tags(self):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
def get_parent(self):
|
def get_parent(self):
|
||||||
if self.entry_id in self.SUBENTRY_ID_TO_ENTRY_ID:
|
if self.entry_id in self.SUBENTRY_ID_TO_ENTRY_ID:
|
||||||
|
|
|
@ -58,7 +58,7 @@ class JitenonEntry(Entry):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _get_column_map(self):
|
def _get_column_map(self):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
def __set_modified_date(self, page):
|
def __set_modified_date(self, page):
|
||||||
m = re.search(r"\"dateModified\": \"(\d{4}-\d{2}-\d{2})", page)
|
m = re.search(r"\"dateModified\": \"(\d{4}-\d{2}-\d{2})", page)
|
||||||
|
|
|
@ -39,7 +39,7 @@ class SanseidoEntry(Entry):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _get_subentry_parameters(self):
|
def _get_subentry_parameters(self):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
def _add_variant_expressions(self, headwords):
|
def _add_variant_expressions(self, headwords):
|
||||||
for expressions in headwords.values():
|
for expressions in headwords.values():
|
||||||
|
|
|
@ -1,7 +0,0 @@
|
||||||
import importlib
|
|
||||||
|
|
||||||
|
|
||||||
def new_entry(target, page_id):
|
|
||||||
module_path = f"bot.entries.{target.name.lower()}.entry"
|
|
||||||
module = importlib.import_module(module_path)
|
|
||||||
return module.Entry(target, page_id)
|
|
37
bot/factory.py
Normal file
37
bot/factory.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
|
||||||
|
def new_crawler(target):
|
||||||
|
module_path = f"bot.crawlers.{target.name.lower()}"
|
||||||
|
module = importlib.import_module(module_path)
|
||||||
|
return module.Crawler(target)
|
||||||
|
|
||||||
|
|
||||||
|
def new_entry(target, page_id):
|
||||||
|
module_path = f"bot.entries.{target.name.lower()}.entry"
|
||||||
|
module = importlib.import_module(module_path)
|
||||||
|
return module.Entry(target, page_id)
|
||||||
|
|
||||||
|
|
||||||
|
def new_yomichan_exporter(target):
|
||||||
|
module_path = f"bot.yomichan.exporters.{target.name.lower()}"
|
||||||
|
module = importlib.import_module(module_path)
|
||||||
|
return module.Exporter(target)
|
||||||
|
|
||||||
|
|
||||||
|
def new_yomichan_terminator(target):
|
||||||
|
module_path = f"bot.yomichan.terms.{target.name.lower()}"
|
||||||
|
module = importlib.import_module(module_path)
|
||||||
|
return module.Terminator(target)
|
||||||
|
|
||||||
|
|
||||||
|
def new_mdict_exporter(target):
|
||||||
|
module_path = f"bot.mdict.exporters.{target.name.lower()}"
|
||||||
|
module = importlib.import_module(module_path)
|
||||||
|
return module.Exporter(target)
|
||||||
|
|
||||||
|
|
||||||
|
def new_mdict_terminator(target):
|
||||||
|
module_path = f"bot.mdict.terms.{target.name.lower()}"
|
||||||
|
module = importlib.import_module(module_path)
|
||||||
|
return module.Terminator(target)
|
|
@ -1,20 +1,18 @@
|
||||||
# pylint: disable=too-few-public-methods
|
|
||||||
|
|
||||||
import subprocess
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
import subprocess
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
|
||||||
from platformdirs import user_documents_dir, user_cache_dir
|
from platformdirs import user_documents_dir, user_cache_dir
|
||||||
|
|
||||||
from bot.mdict.terms.factory import new_terminator
|
from bot.factory import new_mdict_terminator
|
||||||
|
|
||||||
|
|
||||||
class Exporter(ABC):
|
class BaseExporter(ABC):
|
||||||
def __init__(self, target):
|
def __init__(self, target):
|
||||||
self._target = target
|
self._target = target
|
||||||
self._terminator = new_terminator(target)
|
self._terminator = new_mdict_terminator(target)
|
||||||
self._build_dir = None
|
self._build_dir = None
|
||||||
self._build_media_dir = None
|
self._build_media_dir = None
|
||||||
self._description_file = None
|
self._description_file = None
|
||||||
|
@ -168,58 +166,8 @@ class Exporter(ABC):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _get_revision(self, entries):
|
def _get_revision(self, entries):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _get_attribution(self, entries):
|
def _get_attribution(self, entries):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
|
|
||||||
class _JitenonExporter(Exporter):
|
|
||||||
def _get_revision(self, entries):
|
|
||||||
modified_date = None
|
|
||||||
for entry in entries:
|
|
||||||
if modified_date is None or entry.modified_date > modified_date:
|
|
||||||
modified_date = entry.modified_date
|
|
||||||
revision = modified_date.strftime("%Y年%m月%d日閲覧")
|
|
||||||
return revision
|
|
||||||
|
|
||||||
def _get_attribution(self, entries):
|
|
||||||
modified_date = None
|
|
||||||
for entry in entries:
|
|
||||||
if modified_date is None or entry.modified_date > modified_date:
|
|
||||||
attribution = entry.attribution
|
|
||||||
return attribution
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKokugoExporter(_JitenonExporter):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonYojiExporter(_JitenonExporter):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaExporter(_JitenonExporter):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class _MonokakidoExporter(Exporter):
|
|
||||||
def _get_revision(self, entries):
|
|
||||||
timestamp = datetime.now().strftime("%Y年%m月%d日作成")
|
|
||||||
return timestamp
|
|
||||||
|
|
||||||
|
|
||||||
class Smk8Exporter(_MonokakidoExporter):
|
|
||||||
def _get_attribution(self, entries):
|
|
||||||
return "© Sanseido Co., LTD. 2020"
|
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2Exporter(_MonokakidoExporter):
|
|
||||||
def _get_attribution(self, entries):
|
|
||||||
return "© Sanseido Co., LTD. 2019"
|
|
||||||
|
|
||||||
|
|
||||||
class Sankoku8Exporter(_MonokakidoExporter):
|
|
||||||
def _get_attribution(self, entries):
|
|
||||||
return "© Sanseido Co., LTD. 2021"
|
|
18
bot/mdict/exporters/base/jitenon.py
Normal file
18
bot/mdict/exporters/base/jitenon.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from bot.mdict.exporters.base.exporter import BaseExporter
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonExporter(BaseExporter):
|
||||||
|
def _get_revision(self, entries):
|
||||||
|
modified_date = None
|
||||||
|
for entry in entries:
|
||||||
|
if modified_date is None or entry.modified_date > modified_date:
|
||||||
|
modified_date = entry.modified_date
|
||||||
|
revision = modified_date.strftime("%Y年%m月%d日閲覧")
|
||||||
|
return revision
|
||||||
|
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
modified_date = None
|
||||||
|
for entry in entries:
|
||||||
|
if modified_date is None or entry.modified_date > modified_date:
|
||||||
|
attribution = entry.attribution
|
||||||
|
return attribution
|
8
bot/mdict/exporters/base/monokakido.py
Normal file
8
bot/mdict/exporters/base/monokakido.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from datetime import datetime
|
||||||
|
from bot.mdict.exporters.base.exporter import BaseExporter
|
||||||
|
|
||||||
|
|
||||||
|
class MonokakidoExporter(BaseExporter):
|
||||||
|
def _get_revision(self, entries):
|
||||||
|
timestamp = datetime.now().strftime("%Y年%m月%d日作成")
|
||||||
|
return timestamp
|
6
bot/mdict/exporters/daijirin2.py
Normal file
6
bot/mdict/exporters/daijirin2.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from bot.mdict.exporters.base.monokakido import MonokakidoExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(MonokakidoExporter):
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
return "© Sanseido Co., LTD. 2019"
|
|
@ -1,20 +0,0 @@
|
||||||
from bot.targets import Targets
|
|
||||||
|
|
||||||
from bot.mdict.exporters.export import JitenonKokugoExporter
|
|
||||||
from bot.mdict.exporters.export import JitenonYojiExporter
|
|
||||||
from bot.mdict.exporters.export import JitenonKotowazaExporter
|
|
||||||
from bot.mdict.exporters.export import Smk8Exporter
|
|
||||||
from bot.mdict.exporters.export import Daijirin2Exporter
|
|
||||||
from bot.mdict.exporters.export import Sankoku8Exporter
|
|
||||||
|
|
||||||
|
|
||||||
def new_mdict_exporter(target):
|
|
||||||
exporter_map = {
|
|
||||||
Targets.JITENON_KOKUGO: JitenonKokugoExporter,
|
|
||||||
Targets.JITENON_YOJI: JitenonYojiExporter,
|
|
||||||
Targets.JITENON_KOTOWAZA: JitenonKotowazaExporter,
|
|
||||||
Targets.SMK8: Smk8Exporter,
|
|
||||||
Targets.DAIJIRIN2: Daijirin2Exporter,
|
|
||||||
Targets.SANKOKU8: Sankoku8Exporter,
|
|
||||||
}
|
|
||||||
return exporter_map[target](target)
|
|
5
bot/mdict/exporters/jitenon_kokugo.py
Normal file
5
bot/mdict/exporters/jitenon_kokugo.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
from bot.mdict.exporters.base.jitenon import JitenonExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(JitenonExporter):
|
||||||
|
pass
|
5
bot/mdict/exporters/jitenon_kotowaza.py
Normal file
5
bot/mdict/exporters/jitenon_kotowaza.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
from bot.mdict.exporters.base.jitenon import JitenonExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(JitenonExporter):
|
||||||
|
pass
|
5
bot/mdict/exporters/jitenon_yoji.py
Normal file
5
bot/mdict/exporters/jitenon_yoji.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
from bot.mdict.exporters.base.jitenon import JitenonExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(JitenonExporter):
|
||||||
|
pass
|
6
bot/mdict/exporters/sankoku8.py
Normal file
6
bot/mdict/exporters/sankoku8.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from bot.mdict.exporters.base.monokakido import MonokakidoExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(MonokakidoExporter):
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
return "© Sanseido Co., LTD. 2021"
|
6
bot/mdict/exporters/smk8.py
Normal file
6
bot/mdict/exporters/smk8.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from bot.mdict.exporters.base.monokakido import MonokakidoExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(MonokakidoExporter):
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
return "© Sanseido Co., LTD. 2020"
|
20
bot/mdict/terms/base/jitenon.py
Normal file
20
bot/mdict/terms/base/jitenon.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
from bot.mdict.terms.base.terminator import BaseTerminator
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonTerminator(BaseTerminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = None
|
||||||
|
|
||||||
|
def _glossary(self, entry):
|
||||||
|
if entry.entry_id in self._glossary_cache:
|
||||||
|
return self._glossary_cache[entry.entry_id]
|
||||||
|
glossary = self._glossary_maker.make_glossary(entry, self._media_dir)
|
||||||
|
self._glossary_cache[entry.entry_id] = glossary
|
||||||
|
return glossary
|
||||||
|
|
||||||
|
def _link_glossary_parameters(self, entry):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _subentry_lists(self, entry):
|
||||||
|
return []
|
|
@ -2,7 +2,7 @@ import re
|
||||||
from abc import abstractmethod, ABC
|
from abc import abstractmethod, ABC
|
||||||
|
|
||||||
|
|
||||||
class Terminator(ABC):
|
class BaseTerminator(ABC):
|
||||||
def __init__(self, target):
|
def __init__(self, target):
|
||||||
self._target = target
|
self._target = target
|
||||||
self._glossary_cache = {}
|
self._glossary_cache = {}
|
||||||
|
@ -72,12 +72,12 @@ class Terminator(ABC):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _glossary(self, entry):
|
def _glossary(self, entry):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _link_glossary_parameters(self, entry):
|
def _link_glossary_parameters(self, entry):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _subentry_lists(self, entry):
|
def _subentry_lists(self, entry):
|
||||||
pass
|
raise NotImplementedError
|
|
@ -1,8 +1,8 @@
|
||||||
from bot.mdict.terms.terminator import Terminator
|
from bot.mdict.terms.base.terminator import BaseTerminator
|
||||||
from bot.mdict.glossary.daijirin2 import make_glossary
|
from bot.mdict.glossary.daijirin2 import make_glossary
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2Terminator(Terminator):
|
class Terminator(BaseTerminator):
|
||||||
def _glossary(self, entry):
|
def _glossary(self, entry):
|
||||||
if entry.entry_id in self._glossary_cache:
|
if entry.entry_id in self._glossary_cache:
|
||||||
return self._glossary_cache[entry.entry_id]
|
return self._glossary_cache[entry.entry_id]
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
from bot.targets import Targets
|
|
||||||
|
|
||||||
from bot.mdict.terms.jitenon import JitenonKokugoTerminator
|
|
||||||
from bot.mdict.terms.jitenon import JitenonYojiTerminator
|
|
||||||
from bot.mdict.terms.jitenon import JitenonKotowazaTerminator
|
|
||||||
from bot.mdict.terms.smk8 import Smk8Terminator
|
|
||||||
from bot.mdict.terms.daijirin2 import Daijirin2Terminator
|
|
||||||
from bot.mdict.terms.sankoku8 import Sankoku8Terminator
|
|
||||||
|
|
||||||
|
|
||||||
def new_terminator(target):
|
|
||||||
terminator_map = {
|
|
||||||
Targets.JITENON_KOKUGO: JitenonKokugoTerminator,
|
|
||||||
Targets.JITENON_YOJI: JitenonYojiTerminator,
|
|
||||||
Targets.JITENON_KOTOWAZA: JitenonKotowazaTerminator,
|
|
||||||
Targets.SMK8: Smk8Terminator,
|
|
||||||
Targets.DAIJIRIN2: Daijirin2Terminator,
|
|
||||||
Targets.SANKOKU8: Sankoku8Terminator,
|
|
||||||
}
|
|
||||||
return terminator_map[target](target)
|
|
|
@ -1,42 +0,0 @@
|
||||||
from bot.mdict.terms.terminator import Terminator
|
|
||||||
|
|
||||||
from bot.mdict.glossary.jitenon import JitenonKokugoGlossary
|
|
||||||
from bot.mdict.glossary.jitenon import JitenonYojiGlossary
|
|
||||||
from bot.mdict.glossary.jitenon import JitenonKotowazaGlossary
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonTerminator(Terminator):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._glossary_maker = None
|
|
||||||
|
|
||||||
def _glossary(self, entry):
|
|
||||||
if entry.entry_id in self._glossary_cache:
|
|
||||||
return self._glossary_cache[entry.entry_id]
|
|
||||||
glossary = self._glossary_maker.make_glossary(entry, self._media_dir)
|
|
||||||
self._glossary_cache[entry.entry_id] = glossary
|
|
||||||
return glossary
|
|
||||||
|
|
||||||
def _link_glossary_parameters(self, entry):
|
|
||||||
return []
|
|
||||||
|
|
||||||
def _subentry_lists(self, entry):
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKokugoTerminator(JitenonTerminator):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._glossary_maker = JitenonKokugoGlossary()
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonYojiTerminator(JitenonTerminator):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._glossary_maker = JitenonYojiGlossary()
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaTerminator(JitenonTerminator):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._glossary_maker = JitenonKotowazaGlossary()
|
|
8
bot/mdict/terms/jitenon_kokugo.py
Normal file
8
bot/mdict/terms/jitenon_kokugo.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from bot.mdict.terms.base.jitenon import JitenonTerminator
|
||||||
|
from bot.mdict.glossary.jitenon import JitenonKokugoGlossary
|
||||||
|
|
||||||
|
|
||||||
|
class Terminator(JitenonTerminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = JitenonKokugoGlossary()
|
8
bot/mdict/terms/jitenon_kotowaza.py
Normal file
8
bot/mdict/terms/jitenon_kotowaza.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from bot.mdict.terms.base.jitenon import JitenonTerminator
|
||||||
|
from bot.mdict.glossary.jitenon import JitenonKotowazaGlossary
|
||||||
|
|
||||||
|
|
||||||
|
class Terminator(JitenonTerminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = JitenonKotowazaGlossary()
|
8
bot/mdict/terms/jitenon_yoji.py
Normal file
8
bot/mdict/terms/jitenon_yoji.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from bot.mdict.terms.base.jitenon import JitenonTerminator
|
||||||
|
from bot.mdict.glossary.jitenon import JitenonYojiGlossary
|
||||||
|
|
||||||
|
|
||||||
|
class Terminator(JitenonTerminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = JitenonYojiGlossary()
|
|
@ -1,8 +1,8 @@
|
||||||
from bot.mdict.terms.terminator import Terminator
|
from bot.mdict.terms.base.terminator import BaseTerminator
|
||||||
from bot.mdict.glossary.sankoku8 import make_glossary
|
from bot.mdict.glossary.sankoku8 import make_glossary
|
||||||
|
|
||||||
|
|
||||||
class Sankoku8Terminator(Terminator):
|
class Terminator(BaseTerminator):
|
||||||
def _glossary(self, entry):
|
def _glossary(self, entry):
|
||||||
if entry.entry_id in self._glossary_cache:
|
if entry.entry_id in self._glossary_cache:
|
||||||
return self._glossary_cache[entry.entry_id]
|
return self._glossary_cache[entry.entry_id]
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
from bot.mdict.terms.terminator import Terminator
|
from bot.mdict.terms.base.terminator import BaseTerminator
|
||||||
from bot.mdict.glossary.smk8 import make_glossary
|
from bot.mdict.glossary.smk8 import make_glossary
|
||||||
|
|
||||||
|
|
||||||
class Smk8Terminator(Terminator):
|
class Terminator(BaseTerminator):
|
||||||
def _glossary(self, entry):
|
def _glossary(self, entry):
|
||||||
if entry.entry_id in self._glossary_cache:
|
if entry.entry_id in self._glossary_cache:
|
||||||
return self._glossary_cache[entry.entry_id]
|
return self._glossary_cache[entry.entry_id]
|
||||||
|
|
|
@ -1,24 +1,22 @@
|
||||||
# pylint: disable=too-few-public-methods
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import copy
|
import copy
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from platformdirs import user_documents_dir, user_cache_dir
|
|
||||||
|
|
||||||
import fastjsonschema
|
import fastjsonschema
|
||||||
|
from platformdirs import user_documents_dir, user_cache_dir
|
||||||
|
|
||||||
from bot.data import load_yomichan_metadata
|
from bot.data import load_yomichan_metadata
|
||||||
from bot.yomichan.terms.factory import new_terminator
|
|
||||||
from bot.data import load_yomichan_term_schema
|
from bot.data import load_yomichan_term_schema
|
||||||
|
from bot.factory import new_yomichan_terminator
|
||||||
|
|
||||||
|
|
||||||
class Exporter(ABC):
|
class BaseExporter(ABC):
|
||||||
def __init__(self, target):
|
def __init__(self, target):
|
||||||
self._target = target
|
self._target = target
|
||||||
self._terminator = new_terminator(target)
|
self._terminator = new_yomichan_terminator(target)
|
||||||
self._build_dir = None
|
self._build_dir = None
|
||||||
self._terms_per_file = 2000
|
self._terms_per_file = 2000
|
||||||
|
|
||||||
|
@ -36,11 +34,11 @@ class Exporter(ABC):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _get_revision(self, entries):
|
def _get_revision(self, entries):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _get_attribution(self, entries):
|
def _get_attribution(self, entries):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
def _get_build_dir(self):
|
def _get_build_dir(self):
|
||||||
if self._build_dir is not None:
|
if self._build_dir is not None:
|
||||||
|
@ -118,10 +116,10 @@ class Exporter(ABC):
|
||||||
build_dir = self._get_build_dir()
|
build_dir = self._get_build_dir()
|
||||||
max_i = int(len(terms) / self._terms_per_file) + 1
|
max_i = int(len(terms) / self._terms_per_file) + 1
|
||||||
for i in range(max_i):
|
for i in range(max_i):
|
||||||
|
update = f"Writing terms to term bank {i+1}/{max_i}"
|
||||||
|
print(update, end='\r', flush=True)
|
||||||
start = self._terms_per_file * i
|
start = self._terms_per_file * i
|
||||||
end = self._terms_per_file * (i + 1)
|
end = self._terms_per_file * (i + 1)
|
||||||
update = f"Writing terms to term banks {start} - {end}"
|
|
||||||
print(update, end='\r', flush=True)
|
|
||||||
term_file = os.path.join(build_dir, f"term_bank_{i+1}.json")
|
term_file = os.path.join(build_dir, f"term_bank_{i+1}.json")
|
||||||
with open(term_file, "w", encoding='utf8') as f:
|
with open(term_file, "w", encoding='utf8') as f:
|
||||||
json.dump(terms[start:end], f, indent=4, ensure_ascii=False)
|
json.dump(terms[start:end], f, indent=4, ensure_ascii=False)
|
||||||
|
@ -142,8 +140,8 @@ class Exporter(ABC):
|
||||||
json.dump(tags, f, indent=4, ensure_ascii=False)
|
json.dump(tags, f, indent=4, ensure_ascii=False)
|
||||||
|
|
||||||
def __write_archive(self, filename):
|
def __write_archive(self, filename):
|
||||||
print("Archiving data to ZIP file...")
|
|
||||||
archive_format = "zip"
|
archive_format = "zip"
|
||||||
|
print(f"Archiving data to {archive_format.upper()} file...")
|
||||||
out_dir = os.path.join(user_documents_dir(), "jitenbot", "yomichan")
|
out_dir = os.path.join(user_documents_dir(), "jitenbot", "yomichan")
|
||||||
if not Path(out_dir).is_dir():
|
if not Path(out_dir).is_dir():
|
||||||
os.makedirs(out_dir)
|
os.makedirs(out_dir)
|
||||||
|
@ -154,58 +152,8 @@ class Exporter(ABC):
|
||||||
base_filename = os.path.join(out_dir, filename)
|
base_filename = os.path.join(out_dir, filename)
|
||||||
build_dir = self._get_build_dir()
|
build_dir = self._get_build_dir()
|
||||||
shutil.make_archive(base_filename, archive_format, build_dir)
|
shutil.make_archive(base_filename, archive_format, build_dir)
|
||||||
print(f"Dictionary file saved to {out_filepath}")
|
print(f"Dictionary file saved to `{out_filepath}`")
|
||||||
|
|
||||||
def __rm_build_dir(self):
|
def __rm_build_dir(self):
|
||||||
build_dir = self._get_build_dir()
|
build_dir = self._get_build_dir()
|
||||||
shutil.rmtree(build_dir)
|
shutil.rmtree(build_dir)
|
||||||
|
|
||||||
|
|
||||||
class _JitenonExporter(Exporter):
|
|
||||||
def _get_revision(self, entries):
|
|
||||||
modified_date = None
|
|
||||||
for entry in entries:
|
|
||||||
if modified_date is None or entry.modified_date > modified_date:
|
|
||||||
modified_date = entry.modified_date
|
|
||||||
revision = f"{self._target.value};{modified_date}"
|
|
||||||
return revision
|
|
||||||
|
|
||||||
def _get_attribution(self, entries):
|
|
||||||
modified_date = None
|
|
||||||
for entry in entries:
|
|
||||||
if modified_date is None or entry.modified_date > modified_date:
|
|
||||||
attribution = entry.attribution
|
|
||||||
return attribution
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKokugoExporter(_JitenonExporter):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonYojiExporter(_JitenonExporter):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaExporter(_JitenonExporter):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class _MonokakidoExporter(Exporter):
|
|
||||||
def _get_revision(self, entries):
|
|
||||||
timestamp = datetime.now().strftime("%Y-%m-%d")
|
|
||||||
return f"{self._target.value};{timestamp}"
|
|
||||||
|
|
||||||
|
|
||||||
class Smk8Exporter(_MonokakidoExporter):
|
|
||||||
def _get_attribution(self, entries):
|
|
||||||
return "© Sanseido Co., LTD. 2020"
|
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2Exporter(_MonokakidoExporter):
|
|
||||||
def _get_attribution(self, entries):
|
|
||||||
return "© Sanseido Co., LTD. 2019"
|
|
||||||
|
|
||||||
|
|
||||||
class Sankoku8Exporter(_MonokakidoExporter):
|
|
||||||
def _get_attribution(self, entries):
|
|
||||||
return "© Sanseido Co., LTD. 2021"
|
|
18
bot/yomichan/exporters/base/jitenon.py
Normal file
18
bot/yomichan/exporters/base/jitenon.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from bot.yomichan.exporters.base.exporter import BaseExporter
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonExporter(BaseExporter):
|
||||||
|
def _get_revision(self, entries):
|
||||||
|
modified_date = None
|
||||||
|
for entry in entries:
|
||||||
|
if modified_date is None or entry.modified_date > modified_date:
|
||||||
|
modified_date = entry.modified_date
|
||||||
|
revision = f"{self._target.value};{modified_date}"
|
||||||
|
return revision
|
||||||
|
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
modified_date = None
|
||||||
|
for entry in entries:
|
||||||
|
if modified_date is None or entry.modified_date > modified_date:
|
||||||
|
attribution = entry.attribution
|
||||||
|
return attribution
|
8
bot/yomichan/exporters/base/monokakido.py
Normal file
8
bot/yomichan/exporters/base/monokakido.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from datetime import datetime
|
||||||
|
from bot.yomichan.exporters.base.exporter import BaseExporter
|
||||||
|
|
||||||
|
|
||||||
|
class MonokakidoExporter(BaseExporter):
|
||||||
|
def _get_revision(self, entries):
|
||||||
|
timestamp = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
return f"{self._target.value};{timestamp}"
|
6
bot/yomichan/exporters/daijirin2.py
Normal file
6
bot/yomichan/exporters/daijirin2.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from bot.yomichan.exporters.base.monokakido import MonokakidoExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(MonokakidoExporter):
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
return "© Sanseido Co., LTD. 2019"
|
|
@ -1,20 +0,0 @@
|
||||||
from bot.targets import Targets
|
|
||||||
|
|
||||||
from bot.yomichan.exporters.export import JitenonKokugoExporter
|
|
||||||
from bot.yomichan.exporters.export import JitenonYojiExporter
|
|
||||||
from bot.yomichan.exporters.export import JitenonKotowazaExporter
|
|
||||||
from bot.yomichan.exporters.export import Smk8Exporter
|
|
||||||
from bot.yomichan.exporters.export import Daijirin2Exporter
|
|
||||||
from bot.yomichan.exporters.export import Sankoku8Exporter
|
|
||||||
|
|
||||||
|
|
||||||
def new_yomi_exporter(target):
|
|
||||||
exporter_map = {
|
|
||||||
Targets.JITENON_KOKUGO: JitenonKokugoExporter,
|
|
||||||
Targets.JITENON_YOJI: JitenonYojiExporter,
|
|
||||||
Targets.JITENON_KOTOWAZA: JitenonKotowazaExporter,
|
|
||||||
Targets.SMK8: Smk8Exporter,
|
|
||||||
Targets.DAIJIRIN2: Daijirin2Exporter,
|
|
||||||
Targets.SANKOKU8: Sankoku8Exporter,
|
|
||||||
}
|
|
||||||
return exporter_map[target](target)
|
|
5
bot/yomichan/exporters/jitenon_kokugo.py
Normal file
5
bot/yomichan/exporters/jitenon_kokugo.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
from bot.yomichan.exporters.base.jitenon import JitenonExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(JitenonExporter):
|
||||||
|
pass
|
5
bot/yomichan/exporters/jitenon_kotowaza.py
Normal file
5
bot/yomichan/exporters/jitenon_kotowaza.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
from bot.yomichan.exporters.base.jitenon import JitenonExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(JitenonExporter):
|
||||||
|
pass
|
5
bot/yomichan/exporters/jitenon_yoji.py
Normal file
5
bot/yomichan/exporters/jitenon_yoji.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
from bot.yomichan.exporters.base.jitenon import JitenonExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(JitenonExporter):
|
||||||
|
pass
|
6
bot/yomichan/exporters/sankoku8.py
Normal file
6
bot/yomichan/exporters/sankoku8.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from bot.yomichan.exporters.base.monokakido import MonokakidoExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(MonokakidoExporter):
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
return "© Sanseido Co., LTD. 2021"
|
6
bot/yomichan/exporters/smk8.py
Normal file
6
bot/yomichan/exporters/smk8.py
Normal file
|
@ -0,0 +1,6 @@
|
||||||
|
from bot.yomichan.exporters.base.monokakido import MonokakidoExporter
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(MonokakidoExporter):
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
return "© Sanseido Co., LTD. 2020"
|
|
@ -1,9 +1,10 @@
|
||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from functools import cache
|
from functools import cache
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import bot.yomichan.glossary.icons as Icons
|
import bot.yomichan.glossary.icons as Icons
|
||||||
from bot.soup import delete_soup_nodes
|
from bot.soup import delete_soup_nodes
|
||||||
from bot.data import load_yomichan_name_conversion
|
from bot.data import load_yomichan_name_conversion
|
||||||
|
|
26
bot/yomichan/terms/base/jitenon.py
Normal file
26
bot/yomichan/terms/base/jitenon.py
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
from bot.yomichan.terms.base.terminator import BaseTerminator
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonTerminator(BaseTerminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = None
|
||||||
|
|
||||||
|
def _definition_tags(self, entry):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _glossary(self, entry):
|
||||||
|
if entry.entry_id in self._glossary_cache:
|
||||||
|
return self._glossary_cache[entry.entry_id]
|
||||||
|
glossary = self._glossary_maker.make_glossary(entry, self._image_dir)
|
||||||
|
self._glossary_cache[entry.entry_id] = glossary
|
||||||
|
return glossary
|
||||||
|
|
||||||
|
def _sequence(self, entry):
|
||||||
|
return entry.entry_id
|
||||||
|
|
||||||
|
def _link_glossary_parameters(self, entry):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _subentry_lists(self, entry):
|
||||||
|
return []
|
|
@ -2,7 +2,7 @@ from abc import abstractmethod, ABC
|
||||||
from bot.data import load_yomichan_inflection_categories
|
from bot.data import load_yomichan_inflection_categories
|
||||||
|
|
||||||
|
|
||||||
class Terminator(ABC):
|
class BaseTerminator(ABC):
|
||||||
def __init__(self, target):
|
def __init__(self, target):
|
||||||
self._target = target
|
self._target = target
|
||||||
self._glossary_cache = {}
|
self._glossary_cache = {}
|
||||||
|
@ -66,28 +66,28 @@ class Terminator(ABC):
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _definition_tags(self, entry):
|
def _definition_tags(self, entry):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _inflection_rules(self, entry, expression):
|
def _inflection_rules(self, entry, expression):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _glossary(self, entry):
|
def _glossary(self, entry):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _sequence(self, entry):
|
def _sequence(self, entry):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _term_tags(self, entry):
|
def _term_tags(self, entry):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _link_glossary_parameters(self, entry):
|
def _link_glossary_parameters(self, entry):
|
||||||
pass
|
raise NotImplementedError
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _subentry_lists(self, entry):
|
def _subentry_lists(self, entry):
|
||||||
pass
|
raise NotImplementedError
|
|
@ -1,11 +1,10 @@
|
||||||
from bot.entries.daijirin2.phrase_entry import PhraseEntry
|
from bot.entries.daijirin2.phrase_entry import PhraseEntry
|
||||||
|
from bot.yomichan.terms.base.terminator import BaseTerminator
|
||||||
from bot.yomichan.terms.terminator import Terminator
|
|
||||||
from bot.yomichan.glossary.daijirin2 import make_glossary
|
from bot.yomichan.glossary.daijirin2 import make_glossary
|
||||||
from bot.yomichan.grammar import sudachi_rules, tags_to_rules
|
from bot.yomichan.grammar import sudachi_rules, tags_to_rules
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2Terminator(Terminator):
|
class Terminator(BaseTerminator):
|
||||||
def _definition_tags(self, entry):
|
def _definition_tags(self, entry):
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
from bot.targets import Targets
|
|
||||||
|
|
||||||
from bot.yomichan.terms.jitenon import JitenonKokugoTerminator
|
|
||||||
from bot.yomichan.terms.jitenon import JitenonYojiTerminator
|
|
||||||
from bot.yomichan.terms.jitenon import JitenonKotowazaTerminator
|
|
||||||
from bot.yomichan.terms.smk8 import Smk8Terminator
|
|
||||||
from bot.yomichan.terms.daijirin2 import Daijirin2Terminator
|
|
||||||
from bot.yomichan.terms.sankoku8 import Sankoku8Terminator
|
|
||||||
|
|
||||||
|
|
||||||
def new_terminator(target):
|
|
||||||
terminator_map = {
|
|
||||||
Targets.JITENON_KOKUGO: JitenonKokugoTerminator,
|
|
||||||
Targets.JITENON_YOJI: JitenonYojiTerminator,
|
|
||||||
Targets.JITENON_KOTOWAZA: JitenonKotowazaTerminator,
|
|
||||||
Targets.SMK8: Smk8Terminator,
|
|
||||||
Targets.DAIJIRIN2: Daijirin2Terminator,
|
|
||||||
Targets.SANKOKU8: Sankoku8Terminator,
|
|
||||||
}
|
|
||||||
return terminator_map[target](target)
|
|
|
@ -1,68 +0,0 @@
|
||||||
from bot.yomichan.grammar import sudachi_rules
|
|
||||||
from bot.yomichan.terms.terminator import Terminator
|
|
||||||
|
|
||||||
from bot.yomichan.glossary.jitenon import JitenonKokugoGlossary
|
|
||||||
from bot.yomichan.glossary.jitenon import JitenonYojiGlossary
|
|
||||||
from bot.yomichan.glossary.jitenon import JitenonKotowazaGlossary
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonTerminator(Terminator):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._glossary_maker = None
|
|
||||||
|
|
||||||
def _definition_tags(self, entry):
|
|
||||||
return None
|
|
||||||
|
|
||||||
def _glossary(self, entry):
|
|
||||||
if entry.entry_id in self._glossary_cache:
|
|
||||||
return self._glossary_cache[entry.entry_id]
|
|
||||||
glossary = self._glossary_maker.make_glossary(entry, self._image_dir)
|
|
||||||
self._glossary_cache[entry.entry_id] = glossary
|
|
||||||
return glossary
|
|
||||||
|
|
||||||
def _sequence(self, entry):
|
|
||||||
return entry.entry_id
|
|
||||||
|
|
||||||
def _link_glossary_parameters(self, entry):
|
|
||||||
return []
|
|
||||||
|
|
||||||
def _subentry_lists(self, entry):
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKokugoTerminator(JitenonTerminator):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._glossary_maker = JitenonKokugoGlossary()
|
|
||||||
|
|
||||||
def _inflection_rules(self, entry, expression):
|
|
||||||
return sudachi_rules(expression)
|
|
||||||
|
|
||||||
def _term_tags(self, entry):
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonYojiTerminator(JitenonTerminator):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._glossary_maker = JitenonYojiGlossary()
|
|
||||||
|
|
||||||
def _inflection_rules(self, entry, expression):
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def _term_tags(self, entry):
|
|
||||||
tags = entry.kanken_level.split("/")
|
|
||||||
return " ".join(tags)
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaTerminator(JitenonTerminator):
|
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
self._glossary_maker = JitenonKotowazaGlossary()
|
|
||||||
|
|
||||||
def _inflection_rules(self, entry, expression):
|
|
||||||
return sudachi_rules(expression)
|
|
||||||
|
|
||||||
def _term_tags(self, entry):
|
|
||||||
return ""
|
|
15
bot/yomichan/terms/jitenon_kokugo.py
Normal file
15
bot/yomichan/terms/jitenon_kokugo.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
from bot.yomichan.grammar import sudachi_rules
|
||||||
|
from bot.yomichan.glossary.jitenon import JitenonKokugoGlossary
|
||||||
|
from bot.yomichan.terms.base.jitenon import JitenonTerminator
|
||||||
|
|
||||||
|
|
||||||
|
class Terminator(JitenonTerminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = JitenonKokugoGlossary()
|
||||||
|
|
||||||
|
def _inflection_rules(self, entry, expression):
|
||||||
|
return sudachi_rules(expression)
|
||||||
|
|
||||||
|
def _term_tags(self, entry):
|
||||||
|
return ""
|
15
bot/yomichan/terms/jitenon_kotowaza.py
Normal file
15
bot/yomichan/terms/jitenon_kotowaza.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
from bot.yomichan.grammar import sudachi_rules
|
||||||
|
from bot.yomichan.glossary.jitenon import JitenonKotowazaGlossary
|
||||||
|
from bot.yomichan.terms.base.jitenon import JitenonTerminator
|
||||||
|
|
||||||
|
|
||||||
|
class Terminator(JitenonTerminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = JitenonKotowazaGlossary()
|
||||||
|
|
||||||
|
def _inflection_rules(self, entry, expression):
|
||||||
|
return sudachi_rules(expression)
|
||||||
|
|
||||||
|
def _term_tags(self, entry):
|
||||||
|
return ""
|
15
bot/yomichan/terms/jitenon_yoji.py
Normal file
15
bot/yomichan/terms/jitenon_yoji.py
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
from bot.yomichan.glossary.jitenon import JitenonYojiGlossary
|
||||||
|
from bot.yomichan.terms.base.jitenon import JitenonTerminator
|
||||||
|
|
||||||
|
|
||||||
|
class Terminator(JitenonTerminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = JitenonYojiGlossary()
|
||||||
|
|
||||||
|
def _inflection_rules(self, entry, expression):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _term_tags(self, entry):
|
||||||
|
tags = entry.kanken_level.split("/")
|
||||||
|
return " ".join(tags)
|
|
@ -1,11 +1,10 @@
|
||||||
from bot.entries.sankoku8.phrase_entry import PhraseEntry
|
from bot.entries.sankoku8.phrase_entry import PhraseEntry
|
||||||
|
from bot.yomichan.terms.base.terminator import BaseTerminator
|
||||||
from bot.yomichan.terms.terminator import Terminator
|
|
||||||
from bot.yomichan.glossary.sankoku8 import make_glossary
|
from bot.yomichan.glossary.sankoku8 import make_glossary
|
||||||
from bot.yomichan.grammar import sudachi_rules, tags_to_rules
|
from bot.yomichan.grammar import sudachi_rules, tags_to_rules
|
||||||
|
|
||||||
|
|
||||||
class Sankoku8Terminator(Terminator):
|
class Terminator(BaseTerminator):
|
||||||
def _definition_tags(self, entry):
|
def _definition_tags(self, entry):
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
|
@ -1,12 +1,11 @@
|
||||||
from bot.entries.smk8.kanji_entry import KanjiEntry
|
from bot.entries.smk8.kanji_entry import KanjiEntry
|
||||||
from bot.entries.smk8.phrase_entry import PhraseEntry
|
from bot.entries.smk8.phrase_entry import PhraseEntry
|
||||||
|
from bot.yomichan.terms.base.terminator import BaseTerminator
|
||||||
from bot.yomichan.terms.terminator import Terminator
|
|
||||||
from bot.yomichan.glossary.smk8 import make_glossary
|
from bot.yomichan.glossary.smk8 import make_glossary
|
||||||
from bot.yomichan.grammar import sudachi_rules, tags_to_rules
|
from bot.yomichan.grammar import sudachi_rules, tags_to_rules
|
||||||
|
|
||||||
|
|
||||||
class Smk8Terminator(Terminator):
|
class Terminator(BaseTerminator):
|
||||||
def __init__(self, target):
|
def __init__(self, target):
|
||||||
super().__init__(target)
|
super().__init__(target)
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ import sys
|
||||||
import argparse
|
import argparse
|
||||||
import subprocess
|
import subprocess
|
||||||
from bot.targets import Targets
|
from bot.targets import Targets
|
||||||
from bot.crawlers.factory import new_crawler
|
from bot.factory import new_crawler
|
||||||
|
|
||||||
|
|
||||||
def filename(f):
|
def filename(f):
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
#!/bin/sh
|
#!/bin/sh
|
||||||
|
|
||||||
|
export PYTHONPYCACHEPREFIX=/tmp/pycache
|
||||||
|
|
||||||
python -m unittest discover -s tests
|
python -m unittest discover -s tests
|
||||||
|
|
||||||
python jitenbot.py jitenon-kokugo
|
python jitenbot.py jitenon-kokugo
|
||||||
|
|
Loading…
Reference in a new issue