diff --git a/bot/data.py b/bot/data.py new file mode 100644 index 0000000..ec85287 --- /dev/null +++ b/bot/data.py @@ -0,0 +1,43 @@ +import os +import sys +import json +from pathlib import Path + +from platformdirs import user_config_dir + + +def config(): + config_dir = user_config_dir("jitenbot") + if not Path(config_dir).is_dir(): + os.makedirs(config_dir) + config_file = os.path.join(config_dir, "config.json") + if Path(config_file).is_file(): + with open(config_file, "r") as f: + config = json.load(f) + else: + config = __default_config() + with open(config_file, "w") as f: + json.dump(config, f, indent=4) + return config + + +def yomichan_inflection_categories(): + file_name = "yomichan_inflection_categories.json" + data = __load_json(file_name) + return data + + +def __default_config(): + file_name = "default_config.json" + data = __load_json(file_name) + return data + + +def __load_json(file_name): + file_path = os.path.join("data", file_name) + if not Path(file_path).is_file(): + print(f"Missing data file: {file_path}") + sys.exit(1) + with open(file_path, "r", encoding="utf-8") as f: + data = json.load(f) + return data diff --git a/bot/scraper.py b/bot/scraper.py index a8de968..a8c4905 100644 --- a/bot/scraper.py +++ b/bot/scraper.py @@ -2,18 +2,24 @@ import time import requests import re import os -import json import hashlib - +from datetime import datetime from pathlib import Path + +from platformdirs import user_cache_dir from urllib.parse import urlparse from requests.adapters import HTTPAdapter from requests.packages.urllib3.util.retry import Retry -from datetime import datetime + +import bot.data as Data class Scraper(): + __CONFIG = None + def __init__(self): + if self.__CONFIG is None: + self.__CONFIG = Data.config() pattern = r"^(?:([A-Za-z0-9.\-]+)\.)?" + self.domain + r"$" self.netloc_re = re.compile(pattern) self.__set_session() @@ -38,9 +44,7 @@ class Scraper(): allowed_methods=["HEAD", "GET", "OPTIONS"] ) adapter = HTTPAdapter(max_retries=retry_strategy) - with open("config.json", "r") as f: - config = json.load(f) - headers = config["http-request-headers"] + headers = self.__CONFIG["http-request-headers"] self.session = requests.Session() self.session.mount("https://", adapter) self.session.headers.update(headers) @@ -54,7 +58,9 @@ class Scraper(): raise Exception(f"Invalid URL: {url.geturl()}") def __cache_path(self, url): - cache_dir = os.path.join("webcache", self.__class__.__name__.lower()) + class_name = self.__class__.__name__.lower() + cache_dir = user_cache_dir("jitenbot") + cache_dir = os.path.join(cache_dir, class_name) netloc_match = self.netloc_re.match(url.netloc) if netloc_match.group(1) is not None: subdomain = netloc_match.group(1) diff --git a/bot/yomichan/export.py b/bot/yomichan/export.py index ef30db9..54bfc9e 100644 --- a/bot/yomichan/export.py +++ b/bot/yomichan/export.py @@ -3,6 +3,9 @@ import os import shutil import uuid from pathlib import Path +from datetime import datetime + +from platformdirs import user_documents_dir, user_cache_dir def jitenon_yoji(entries): @@ -54,8 +57,12 @@ def __terms(entries): def __create_zip(terms, index, tags=[]): - build_directory = str(uuid.uuid4()) - os.mkdir(build_directory) + cache_dir = user_cache_dir("jitenbot") + timestamp = datetime.now().strftime("%Y%m%d%H%M%S") + build_directory = os.path.join(cache_dir, f"build_{timestamp}") + if Path(build_directory).is_dir(): + shutil.rmtree(build_directory) + os.makedirs(build_directory) terms_per_file = 1000 max_i = int(len(terms) / terms_per_file) + 1 @@ -78,7 +85,8 @@ def __create_zip(terms, index, tags=[]): zip_filename = index["title"] zip_file = f"{zip_filename}.zip" shutil.make_archive(zip_filename, "zip", build_directory) - out_dir = "output" + + out_dir = os.path.join(user_documents_dir(), "jitenbot") out_file = os.path.join(out_dir, zip_file) if not Path(out_dir).is_dir(): os.mkdir(out_dir) diff --git a/config.json b/data/default_config.json similarity index 100% rename from config.json rename to data/default_config.json diff --git a/data/yomichan_inflection_categories.json b/data/yomichan_inflection_categories.json new file mode 100644 index 0000000..7f346f5 --- /dev/null +++ b/data/yomichan_inflection_categories.json @@ -0,0 +1,10 @@ +{ + "sudachi": { + "sahen": ["サ行", "サ行変格", "ザ行変格", "文語サ行変格"], + "godan": ["五段", "文語四段", "文語上二段", "文語下二段", "マス", "ヤス", "デス"], + "ichidan": ["上一段", "下一段", "文語上一段", "文語下一段", "レル"], + "keiyoushi": ["形容詞", "ナイ", "タイ", "ラシイ"], + "kahen": ["カ行変格"], + "sudachi": [] + } +}