jitenbot/bot/data.py
2023-07-18 00:43:38 -05:00

175 lines
4.5 KiB
Python

import os
import sys
import json
import csv
from functools import cache
from pathlib import Path
from platformdirs import user_config_dir
@cache
def get_adobe_glyph(code):
adobe_glyphs = __load_adobe_glyphs()
override_adobe_glyphs = __load_override_adobe_glyphs()
if code in override_adobe_glyphs:
return override_adobe_glyphs[code]
if len(adobe_glyphs[code]) > 1:
raise Exception(f"Multiple glyphs available for code {code}")
return adobe_glyphs[code][0]
@cache
def load_config():
config_dir = user_config_dir("jitenbot")
if not Path(config_dir).is_dir():
os.makedirs(config_dir)
config_file = os.path.join(config_dir, "config.json")
if Path(config_file).is_file():
with open(config_file, "r", encoding="utf-8") as f:
config = json.load(f)
else:
config = __load_default_config()
with open(config_file, "w", encoding="utf-8") as f:
json.dump(config, f, indent=4)
return config
@cache
def load_yomichan_inflection_categories():
file_name = os.path.join(
"yomichan", "inflection_categories.json")
data = __load_json(file_name)
return data
@cache
def load_yomichan_metadata():
file_name = os.path.join(
"yomichan", "index.json")
data = __load_json(file_name)
return data
@cache
def load_variant_kanji():
def loader(data, row):
data[row[0]] = row[1]
file_name = os.path.join(
"entries", "variant_kanji.csv")
data = {}
__load_csv(file_name, loader, data)
return data
@cache
def load_phrase_readings(target):
def loader(data, row):
entry_id = (int(row[0]), int(row[1]))
reading = row[2]
data[entry_id] = reading
file_name = os.path.join(
"entries", target.value, "phrase_readings.csv")
data = {}
__load_csv(file_name, loader, data)
return data
@cache
def load_daijirin2_kana_abbreviations():
def loader(data, row):
entry_id = (int(row[0]), int(row[1]))
abbreviations = []
for abbr in row[2:]:
if abbr.strip() != "":
abbreviations.append(abbr)
data[entry_id] = abbreviations
file_name = os.path.join(
"entries", "daijirin2", "kana_abbreviations.csv")
data = {}
__load_csv(file_name, loader, data)
return data
@cache
def load_yomichan_name_conversion(target):
file_name = os.path.join(
"yomichan", "name_conversion", f"{target.value}.json")
data = __load_json(file_name)
return data
@cache
def load_yomichan_term_schema():
file_name = os.path.join(
"yomichan", "dictionary-term-bank-v3-schema.json")
schema = __load_json(file_name)
return schema
@cache
def load_mdict_name_conversion(target):
file_name = os.path.join(
"mdict", "name_conversion", f"{target.value}.json")
data = __load_json(file_name)
return data
@cache
def __load_default_config():
file_name = "default_config.json"
data = __load_json(file_name)
return data
@cache
def __load_adobe_glyphs():
def loader(data, row):
if row[0].startswith("#"):
return
character = chr(int(row[0].split(" ")[0], 16))
code = int(row[2].removeprefix(" CID+"))
if code in data:
if character not in data[code]:
data[code].append(character)
else:
data[code] = [character]
file_name = os.path.join(
"entries", "adobe", "Adobe-Japan1_sequences.txt")
data = {}
__load_csv(file_name, loader, data, delim=';')
return data
@cache
def __load_override_adobe_glyphs():
file_name = os.path.join(
"entries", "adobe", "override_glyphs.json")
json_data = __load_json(file_name)
data = {}
for key, val in json_data.items():
data[int(key)] = val
return data
def __load_json(file_name):
file_path = os.path.join("data", file_name)
if not Path(file_path).is_file():
print(f"Missing data file: {file_path}")
sys.exit(1)
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
return data
def __load_csv(file_name, loader, data, delim=',', quote='"'):
file_path = os.path.join("data", file_name)
if not Path(file_path).is_file():
print(f"Missing data file: {file_path}")
sys.exit(1)
with open(file_path, "r", encoding="utf-8") as f:
reader = csv.reader(f, delimiter=delim, quotechar=quote)
for row in reader:
loader(data, row)
return data