2023-04-10 20:20:33 +00:00
|
|
|
import json
|
|
|
|
import os
|
|
|
|
import shutil
|
2023-07-18 05:43:38 +00:00
|
|
|
import copy
|
2023-04-10 20:20:33 +00:00
|
|
|
from pathlib import Path
|
2023-07-08 21:49:03 +00:00
|
|
|
from abc import ABC, abstractmethod
|
2023-04-10 20:20:33 +00:00
|
|
|
|
2023-07-18 05:43:38 +00:00
|
|
|
import fastjsonschema
|
2023-07-27 04:48:24 +00:00
|
|
|
from platformdirs import user_documents_dir, user_cache_dir
|
|
|
|
|
2023-04-23 17:33:42 +00:00
|
|
|
from bot.data import load_yomichan_metadata
|
2023-07-18 05:43:38 +00:00
|
|
|
from bot.data import load_yomichan_term_schema
|
2023-07-27 04:48:24 +00:00
|
|
|
from bot.factory import new_yomichan_terminator
|
2023-04-22 19:14:28 +00:00
|
|
|
|
2023-04-10 20:20:33 +00:00
|
|
|
|
2023-07-27 04:48:24 +00:00
|
|
|
class BaseExporter(ABC):
|
2023-05-06 21:55:00 +00:00
|
|
|
def __init__(self, target):
|
|
|
|
self._target = target
|
2023-07-27 04:48:24 +00:00
|
|
|
self._terminator = new_yomichan_terminator(target)
|
2023-04-22 21:49:29 +00:00
|
|
|
self._build_dir = None
|
|
|
|
self._terms_per_file = 2000
|
|
|
|
|
2023-07-18 05:43:38 +00:00
|
|
|
def export(self, entries, image_dir, validate):
|
2023-05-06 03:53:17 +00:00
|
|
|
self.__init_build_image_dir(image_dir)
|
2023-04-23 17:33:42 +00:00
|
|
|
meta = load_yomichan_metadata()
|
2023-05-06 21:55:00 +00:00
|
|
|
index = meta[self._target.value]["index"]
|
2023-04-22 21:49:29 +00:00
|
|
|
index["revision"] = self._get_revision(entries)
|
|
|
|
index["attribution"] = self._get_attribution(entries)
|
2023-05-06 21:55:00 +00:00
|
|
|
tags = meta[self._target.value]["tags"]
|
2023-04-22 21:49:29 +00:00
|
|
|
terms = self.__get_terms(entries)
|
2023-07-18 05:43:38 +00:00
|
|
|
if validate:
|
|
|
|
self.__validate_terms(terms)
|
2023-04-22 21:49:29 +00:00
|
|
|
self.__make_dictionary(terms, index, tags)
|
|
|
|
|
2023-07-08 21:49:03 +00:00
|
|
|
@abstractmethod
|
|
|
|
def _get_revision(self, entries):
|
2023-07-27 04:48:24 +00:00
|
|
|
raise NotImplementedError
|
2023-07-08 21:49:03 +00:00
|
|
|
|
|
|
|
@abstractmethod
|
|
|
|
def _get_attribution(self, entries):
|
2023-07-27 04:48:24 +00:00
|
|
|
raise NotImplementedError
|
2023-07-08 21:49:03 +00:00
|
|
|
|
2023-04-22 21:49:29 +00:00
|
|
|
def _get_build_dir(self):
|
|
|
|
if self._build_dir is not None:
|
|
|
|
return self._build_dir
|
|
|
|
cache_dir = user_cache_dir("jitenbot")
|
2023-05-01 22:31:28 +00:00
|
|
|
build_directory = os.path.join(cache_dir, "yomichan_build")
|
2023-07-11 02:18:40 +00:00
|
|
|
print(f"Initializing build directory `{build_directory}`")
|
2023-04-22 21:49:29 +00:00
|
|
|
if Path(build_directory).is_dir():
|
|
|
|
shutil.rmtree(build_directory)
|
|
|
|
os.makedirs(build_directory)
|
|
|
|
self._build_dir = build_directory
|
|
|
|
return self._build_dir
|
|
|
|
|
2023-07-18 05:43:38 +00:00
|
|
|
def __get_invalid_term_dir(self):
|
|
|
|
cache_dir = user_cache_dir("jitenbot")
|
|
|
|
log_dir = os.path.join(cache_dir, "invalid_yomichan_terms")
|
|
|
|
if Path(log_dir).is_dir():
|
|
|
|
shutil.rmtree(log_dir)
|
|
|
|
os.makedirs(log_dir)
|
|
|
|
return log_dir
|
|
|
|
|
2023-05-01 22:31:28 +00:00
|
|
|
def __init_build_image_dir(self, image_dir):
|
|
|
|
build_dir = self._get_build_dir()
|
2023-05-06 21:55:00 +00:00
|
|
|
build_img_dir = os.path.join(build_dir, self._target.value)
|
2023-05-06 03:53:17 +00:00
|
|
|
if image_dir is not None:
|
2023-07-08 21:49:03 +00:00
|
|
|
print("Copying media files to build directory...")
|
2023-05-06 03:53:17 +00:00
|
|
|
shutil.copytree(image_dir, build_img_dir)
|
|
|
|
else:
|
|
|
|
os.makedirs(build_img_dir)
|
2023-05-01 22:31:28 +00:00
|
|
|
self._terminator.set_image_dir(build_img_dir)
|
|
|
|
|
2023-04-22 21:49:29 +00:00
|
|
|
def __get_terms(self, entries):
|
|
|
|
terms = []
|
|
|
|
entries_len = len(entries)
|
|
|
|
for idx, entry in enumerate(entries):
|
|
|
|
update = f"Creating Yomichan terms for entry {idx+1}/{entries_len}"
|
|
|
|
print(update, end='\r', flush=True)
|
2023-04-23 01:26:54 +00:00
|
|
|
new_terms = self._terminator.make_terms(entry)
|
|
|
|
for term in new_terms:
|
2023-04-22 21:49:29 +00:00
|
|
|
terms.append(term)
|
|
|
|
print()
|
|
|
|
return terms
|
|
|
|
|
2023-07-18 05:43:38 +00:00
|
|
|
def __validate_terms(self, terms):
|
|
|
|
print("Making a copy of term data for validation...")
|
|
|
|
terms_copy = copy.deepcopy(terms) # because validator will alter data!
|
|
|
|
term_count = len(terms_copy)
|
|
|
|
log_dir = self.__get_invalid_term_dir()
|
|
|
|
schema = load_yomichan_term_schema()
|
|
|
|
validator = fastjsonschema.compile(schema)
|
|
|
|
failure_count = 0
|
|
|
|
for idx, term in enumerate(terms_copy):
|
|
|
|
update = f"Validating term {idx+1}/{term_count}"
|
|
|
|
print(update, end='\r', flush=True)
|
|
|
|
try:
|
|
|
|
validator([term])
|
|
|
|
except fastjsonschema.JsonSchemaException:
|
|
|
|
failure_count += 1
|
|
|
|
term_file = os.path.join(log_dir, f"{idx}.json")
|
|
|
|
with open(term_file, "w", encoding='utf8') as f:
|
|
|
|
json.dump([term], f, indent=4, ensure_ascii=False)
|
|
|
|
print(f"\nFinished validating with {failure_count} error{'' if failure_count == 1 else 's'}")
|
|
|
|
if failure_count > 0:
|
|
|
|
print(f"Invalid terms saved to `{log_dir}` for debugging")
|
|
|
|
|
2023-04-22 21:49:29 +00:00
|
|
|
def __make_dictionary(self, terms, index, tags):
|
|
|
|
self.__write_term_banks(terms)
|
|
|
|
self.__write_index(index)
|
|
|
|
self.__write_tag_bank(tags)
|
|
|
|
self.__write_archive(index["title"])
|
|
|
|
self.__rm_build_dir()
|
|
|
|
|
|
|
|
def __write_term_banks(self, terms):
|
2023-07-18 05:43:38 +00:00
|
|
|
print(f"Exporting {len(terms)} JSON terms")
|
2023-04-22 21:49:29 +00:00
|
|
|
build_dir = self._get_build_dir()
|
|
|
|
max_i = int(len(terms) / self._terms_per_file) + 1
|
|
|
|
for i in range(max_i):
|
2023-07-27 04:48:24 +00:00
|
|
|
update = f"Writing terms to term bank {i+1}/{max_i}"
|
|
|
|
print(update, end='\r', flush=True)
|
2023-07-18 05:43:38 +00:00
|
|
|
start = self._terms_per_file * i
|
|
|
|
end = self._terms_per_file * (i + 1)
|
2023-04-22 21:49:29 +00:00
|
|
|
term_file = os.path.join(build_dir, f"term_bank_{i+1}.json")
|
|
|
|
with open(term_file, "w", encoding='utf8') as f:
|
|
|
|
json.dump(terms[start:end], f, indent=4, ensure_ascii=False)
|
2023-07-18 05:43:38 +00:00
|
|
|
print()
|
2023-04-22 21:49:29 +00:00
|
|
|
|
|
|
|
def __write_index(self, index):
|
|
|
|
build_dir = self._get_build_dir()
|
|
|
|
index_file = os.path.join(build_dir, "index.json")
|
|
|
|
with open(index_file, 'w', encoding='utf8') as f:
|
|
|
|
json.dump(index, f, indent=4, ensure_ascii=False)
|
|
|
|
|
|
|
|
def __write_tag_bank(self, tags):
|
|
|
|
if len(tags) == 0:
|
|
|
|
return
|
|
|
|
build_dir = self._get_build_dir()
|
|
|
|
tag_file = os.path.join(build_dir, "tag_bank_1.json")
|
2023-04-10 20:20:33 +00:00
|
|
|
with open(tag_file, 'w', encoding='utf8') as f:
|
|
|
|
json.dump(tags, f, indent=4, ensure_ascii=False)
|
|
|
|
|
2023-04-22 21:49:29 +00:00
|
|
|
def __write_archive(self, filename):
|
|
|
|
archive_format = "zip"
|
2023-07-27 04:48:24 +00:00
|
|
|
print(f"Archiving data to {archive_format.upper()} file...")
|
2023-07-08 21:49:03 +00:00
|
|
|
out_dir = os.path.join(user_documents_dir(), "jitenbot", "yomichan")
|
2023-04-22 21:49:29 +00:00
|
|
|
if not Path(out_dir).is_dir():
|
|
|
|
os.makedirs(out_dir)
|
|
|
|
out_file = f"{filename}.{archive_format}"
|
|
|
|
out_filepath = os.path.join(out_dir, out_file)
|
|
|
|
if Path(out_filepath).is_file():
|
|
|
|
os.remove(out_filepath)
|
|
|
|
base_filename = os.path.join(out_dir, filename)
|
|
|
|
build_dir = self._get_build_dir()
|
|
|
|
shutil.make_archive(base_filename, archive_format, build_dir)
|
2023-07-27 04:48:24 +00:00
|
|
|
print(f"Dictionary file saved to `{out_filepath}`")
|
2023-04-22 21:49:29 +00:00
|
|
|
|
|
|
|
def __rm_build_dir(self):
|
|
|
|
build_dir = self._get_build_dir()
|
|
|
|
shutil.rmtree(build_dir)
|