Use standard platform directories for cache, config, and output data
This commit is contained in:
parent
071144c808
commit
90eb5dc285
43
bot/data.py
Normal file
43
bot/data.py
Normal file
|
@ -0,0 +1,43 @@
|
|||
import os
|
||||
import sys
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
from platformdirs import user_config_dir
|
||||
|
||||
|
||||
def config():
|
||||
config_dir = user_config_dir("jitenbot")
|
||||
if not Path(config_dir).is_dir():
|
||||
os.makedirs(config_dir)
|
||||
config_file = os.path.join(config_dir, "config.json")
|
||||
if Path(config_file).is_file():
|
||||
with open(config_file, "r") as f:
|
||||
config = json.load(f)
|
||||
else:
|
||||
config = __default_config()
|
||||
with open(config_file, "w") as f:
|
||||
json.dump(config, f, indent=4)
|
||||
return config
|
||||
|
||||
|
||||
def yomichan_inflection_categories():
|
||||
file_name = "yomichan_inflection_categories.json"
|
||||
data = __load_json(file_name)
|
||||
return data
|
||||
|
||||
|
||||
def __default_config():
|
||||
file_name = "default_config.json"
|
||||
data = __load_json(file_name)
|
||||
return data
|
||||
|
||||
|
||||
def __load_json(file_name):
|
||||
file_path = os.path.join("data", file_name)
|
||||
if not Path(file_path).is_file():
|
||||
print(f"Missing data file: {file_path}")
|
||||
sys.exit(1)
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return data
|
|
@ -2,18 +2,24 @@ import time
|
|||
import requests
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import hashlib
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
from platformdirs import user_cache_dir
|
||||
from urllib.parse import urlparse
|
||||
from requests.adapters import HTTPAdapter
|
||||
from requests.packages.urllib3.util.retry import Retry
|
||||
from datetime import datetime
|
||||
|
||||
import bot.data as Data
|
||||
|
||||
|
||||
class Scraper():
|
||||
__CONFIG = None
|
||||
|
||||
def __init__(self):
|
||||
if self.__CONFIG is None:
|
||||
self.__CONFIG = Data.config()
|
||||
pattern = r"^(?:([A-Za-z0-9.\-]+)\.)?" + self.domain + r"$"
|
||||
self.netloc_re = re.compile(pattern)
|
||||
self.__set_session()
|
||||
|
@ -38,9 +44,7 @@ class Scraper():
|
|||
allowed_methods=["HEAD", "GET", "OPTIONS"]
|
||||
)
|
||||
adapter = HTTPAdapter(max_retries=retry_strategy)
|
||||
with open("config.json", "r") as f:
|
||||
config = json.load(f)
|
||||
headers = config["http-request-headers"]
|
||||
headers = self.__CONFIG["http-request-headers"]
|
||||
self.session = requests.Session()
|
||||
self.session.mount("https://", adapter)
|
||||
self.session.headers.update(headers)
|
||||
|
@ -54,7 +58,9 @@ class Scraper():
|
|||
raise Exception(f"Invalid URL: {url.geturl()}")
|
||||
|
||||
def __cache_path(self, url):
|
||||
cache_dir = os.path.join("webcache", self.__class__.__name__.lower())
|
||||
class_name = self.__class__.__name__.lower()
|
||||
cache_dir = user_cache_dir("jitenbot")
|
||||
cache_dir = os.path.join(cache_dir, class_name)
|
||||
netloc_match = self.netloc_re.match(url.netloc)
|
||||
if netloc_match.group(1) is not None:
|
||||
subdomain = netloc_match.group(1)
|
||||
|
|
|
@ -3,6 +3,9 @@ import os
|
|||
import shutil
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
from platformdirs import user_documents_dir, user_cache_dir
|
||||
|
||||
|
||||
def jitenon_yoji(entries):
|
||||
|
@ -54,8 +57,12 @@ def __terms(entries):
|
|||
|
||||
|
||||
def __create_zip(terms, index, tags=[]):
|
||||
build_directory = str(uuid.uuid4())
|
||||
os.mkdir(build_directory)
|
||||
cache_dir = user_cache_dir("jitenbot")
|
||||
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
build_directory = os.path.join(cache_dir, f"build_{timestamp}")
|
||||
if Path(build_directory).is_dir():
|
||||
shutil.rmtree(build_directory)
|
||||
os.makedirs(build_directory)
|
||||
|
||||
terms_per_file = 1000
|
||||
max_i = int(len(terms) / terms_per_file) + 1
|
||||
|
@ -78,7 +85,8 @@ def __create_zip(terms, index, tags=[]):
|
|||
zip_filename = index["title"]
|
||||
zip_file = f"{zip_filename}.zip"
|
||||
shutil.make_archive(zip_filename, "zip", build_directory)
|
||||
out_dir = "output"
|
||||
|
||||
out_dir = os.path.join(user_documents_dir(), "jitenbot")
|
||||
out_file = os.path.join(out_dir, zip_file)
|
||||
if not Path(out_dir).is_dir():
|
||||
os.mkdir(out_dir)
|
||||
|
|
10
data/yomichan_inflection_categories.json
Normal file
10
data/yomichan_inflection_categories.json
Normal file
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"sudachi": {
|
||||
"sahen": ["サ行", "サ行変格", "ザ行変格", "文語サ行変格"],
|
||||
"godan": ["五段", "文語四段", "文語上二段", "文語下二段", "マス", "ヤス", "デス"],
|
||||
"ichidan": ["上一段", "下一段", "文語上一段", "文語下一段", "レル"],
|
||||
"keiyoushi": ["形容詞", "ナイ", "タイ", "ラシイ"],
|
||||
"kahen": ["カ行変格"],
|
||||
"sudachi": []
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue