jitenbot/bot/yomichan/export.py

97 lines
3.4 KiB
Python
Raw Normal View History

2023-04-10 20:20:33 +00:00
import json
import os
import shutil
import uuid
from pathlib import Path
from datetime import datetime
from platformdirs import user_documents_dir, user_cache_dir
2023-04-10 20:20:33 +00:00
def jitenon_yoji(entries):
terms, modified_date, attribution = __terms(entries)
index = {
"title": "四字熟語辞典オンライン",
"revision": f"jitenon-yoji.{modified_date}",
"sequenced": True,
"format": 3,
"url": "https://yoji.jitenon.jp/",
"attribution": attribution,
}
tags = [
["1級", "frequent", 0, "漢字検定(漢検)1級の四字熟語", 0],
["準1級", "frequent", 0, "漢字検定(漢検)準1級の四字熟語", 0],
["2級", "frequent", 0, "漢字検定(漢検)2級の四字熟語", 0],
["準2級", "frequent", 0, "漢字検定(漢検)準2級の四字熟語", 0],
["3級", "frequent", 0, "漢字検定(漢検)3級の四字熟語", 0],
["4級", "frequent", 0, "漢字検定(漢検)4級の四字熟語", 0],
["5級", "frequent", 0, "漢字検定(漢検)5級の四字熟語", 0],
]
__create_zip(terms, index, tags)
def jitenon_kotowaza(entries):
terms, modified_date, attribution = __terms(entries)
index = {
"title": "故事・ことわざ・慣用句オンライン",
"revision": f"jitenon-kotowaza.{modified_date}",
"sequenced": True,
"format": 3,
"url": "https://kotowaza.jitenon.jp/",
"attribution": attribution,
}
__create_zip(terms, index)
def __terms(entries):
terms = []
modified_date = None
attribution = ""
for entry in entries:
if modified_date is None or entry.modified_date > modified_date:
modified_date = entry.modified_date
attribution = entry.attribution
for term in entry.yomichan_terms():
terms.append(term)
return terms, modified_date, attribution
def __create_zip(terms, index, tags=[]):
cache_dir = user_cache_dir("jitenbot")
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
build_directory = os.path.join(cache_dir, f"build_{timestamp}")
if Path(build_directory).is_dir():
shutil.rmtree(build_directory)
os.makedirs(build_directory)
2023-04-10 20:20:33 +00:00
terms_per_file = 1000
max_i = int(len(terms) / terms_per_file) + 1
for i in range(max_i):
term_file = os.path.join(build_directory, f"term_bank_{i+1}.json")
with open(term_file, "w", encoding='utf8') as f:
start = terms_per_file * i
end = terms_per_file * (i + 1)
json.dump(terms[start:end], f, indent=4, ensure_ascii=False)
index_file = os.path.join(build_directory, "index.json")
with open(index_file, 'w', encoding='utf8') as f:
json.dump(index, f, indent=4, ensure_ascii=False)
if len(tags) > 0:
tag_file = os.path.join(build_directory, "tag_bank_1.json")
with open(tag_file, 'w', encoding='utf8') as f:
json.dump(tags, f, indent=4, ensure_ascii=False)
zip_filename = index["title"]
zip_file = f"{zip_filename}.zip"
shutil.make_archive(zip_filename, "zip", build_directory)
out_dir = os.path.join(user_documents_dir(), "jitenbot")
2023-04-10 20:20:33 +00:00
out_file = os.path.join(out_dir, zip_file)
if not Path(out_dir).is_dir():
os.mkdir(out_dir)
elif Path(out_file).is_file():
os.remove(out_file)
shutil.move(zip_file, out_dir)
shutil.rmtree(build_directory)