Use standard platform directories for cache, config, and output data

2023-04-22 13:37:34 -05:00 · 2023-04-22 13:37:34 -05:00 · 90eb5dc285
parent 071144c808
commit 90eb5dc285
5 changed files with 77 additions and 10 deletions
--- a/bot/data.py
+++ b/bot/data.py
@ -0,0 +1,43 @@
+import os
+import sys
+import json
+from pathlib import Path
+
+from platformdirs import user_config_dir
+
+
+def config():
+    config_dir = user_config_dir("jitenbot")
+    if not Path(config_dir).is_dir():
+        os.makedirs(config_dir)
+    config_file = os.path.join(config_dir, "config.json")
+    if Path(config_file).is_file():
+        with open(config_file, "r") as f:
+            config = json.load(f)
+    else:
+        config = __default_config()
+        with open(config_file, "w") as f:
+            json.dump(config, f, indent=4)
+    return config
+
+
+def yomichan_inflection_categories():
+    file_name = "yomichan_inflection_categories.json"
+    data = __load_json(file_name)
+    return data
+
+
+def __default_config():
+    file_name = "default_config.json"
+    data = __load_json(file_name)
+    return data
+
+
+def __load_json(file_name):
+    file_path = os.path.join("data", file_name)
+    if not Path(file_path).is_file():
+        print(f"Missing data file: {file_path}")
+        sys.exit(1)
+    with open(file_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    return data
--- a/bot/scraper.py
+++ b/bot/scraper.py
@ -2,18 +2,24 @@ import time
 import requests
 import re
 import os
-import json
 import hashlib
-
+from datetime import datetime
 from pathlib import Path
+
+from platformdirs import user_cache_dir
 from urllib.parse import urlparse
 from requests.adapters import HTTPAdapter
 from requests.packages.urllib3.util.retry import Retry
-from datetime import datetime
+
+import bot.data as Data


 class Scraper():
+    __CONFIG = None
+
    def __init__(self):
+        if self.__CONFIG is None:
+            self.__CONFIG = Data.config()
        pattern = r"^(?:([A-Za-z0-9.\-]+)\.)?" + self.domain + r"$"
        self.netloc_re = re.compile(pattern)
        self.__set_session()
@ -38,9 +44,7 @@ class Scraper():
            allowed_methods=["HEAD", "GET", "OPTIONS"]
        )
        adapter = HTTPAdapter(max_retries=retry_strategy)
-        with open("config.json", "r") as f:
-            config = json.load(f)
-        headers = config["http-request-headers"]
+        headers = self.__CONFIG["http-request-headers"]
        self.session = requests.Session()
        self.session.mount("https://", adapter)
        self.session.headers.update(headers)
@ -54,7 +58,9 @@ class Scraper():
            raise Exception(f"Invalid URL: {url.geturl()}")

    def __cache_path(self, url):
-        cache_dir = os.path.join("webcache", self.__class__.__name__.lower())
+        class_name = self.__class__.__name__.lower()
+        cache_dir = user_cache_dir("jitenbot")
+        cache_dir = os.path.join(cache_dir, class_name)
        netloc_match = self.netloc_re.match(url.netloc)
        if netloc_match.group(1) is not None:
            subdomain = netloc_match.group(1)
--- a/bot/yomichan/export.py
+++ b/bot/yomichan/export.py
@ -3,6 +3,9 @@ import os
 import shutil
 import uuid
 from pathlib import Path
+from datetime import datetime
+
+from platformdirs import user_documents_dir, user_cache_dir


 def jitenon_yoji(entries):
@ -54,8 +57,12 @@ def __terms(entries):


 def __create_zip(terms, index, tags=[]):
-    build_directory = str(uuid.uuid4())
-    os.mkdir(build_directory)
+    cache_dir = user_cache_dir("jitenbot")
+    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
+    build_directory = os.path.join(cache_dir, f"build_{timestamp}")
+    if Path(build_directory).is_dir():
+        shutil.rmtree(build_directory)
+    os.makedirs(build_directory)

    terms_per_file = 1000
    max_i = int(len(terms) / terms_per_file) + 1
@ -78,7 +85,8 @@ def __create_zip(terms, index, tags=[]):
    zip_filename = index["title"]
    zip_file = f"{zip_filename}.zip"
    shutil.make_archive(zip_filename, "zip", build_directory)
-    out_dir = "output"
+
+    out_dir = os.path.join(user_documents_dir(), "jitenbot")
    out_file = os.path.join(out_dir, zip_file)
    if not Path(out_dir).is_dir():
        os.mkdir(out_dir)
--- a/data/default_config.json
+++ b/data/default_config.json
--- a/data/yomichan_inflection_categories.json
+++ b/data/yomichan_inflection_categories.json
@ -0,0 +1,10 @@
+{
+    "sudachi": {
+        "sahen": ["サ行", "サ行変格", "ザ行変格", "文語サ行変格"],
+        "godan": ["五段", "文語四段", "文語上二段", "文語下二段", "マス", "ヤス", "デス"],
+        "ichidan": ["上一段", "下一段", "文語上一段", "文語下一段", "レル"],
+        "keiyoushi": ["形容詞", "ナイ", "タイ", "ラシイ"],
+        "kahen": ["カ行変格"],
+        "sudachi": []
+    }
+}