Add support for sankoku8
This commit is contained in:
parent
b0a9ab5cae
commit
e85d0a1625
7
TODO.md
7
TODO.md
|
@ -1,10 +1,13 @@
|
|||
### Todo
|
||||
|
||||
- [x] Add factory classes to reduce the amount of class import statements
|
||||
- [ ] Add dynamic import functionality to factory classes to reduce boilerplate
|
||||
- [x] Support exporting to MDict (.MDX) dictionary format
|
||||
- [x] Validate JSON schema of Yomichan terms during export
|
||||
- [ ] Add support for monokakido search keys from index files
|
||||
- [ ] Delete unneeded media from temp build directory before final export
|
||||
- [ ] Add test suite
|
||||
- [ ] Add documentation (docstrings, etc.)
|
||||
- [ ] Validate JSON schema of Yomichan terms during export
|
||||
- [ ] Add build scripts for producing program binaries
|
||||
- [ ] Validate scraped webpages after downloading
|
||||
- [ ] Log non-fatal failures to a log file instead of raising exceptions
|
||||
|
@ -13,7 +16,7 @@
|
|||
- [ ] [Yoji-Jukugo.com](https://yoji-jukugo.com/)
|
||||
- [ ] [実用日本語表現辞典](https://www.weblio.jp/cat/dictionary/jtnhj)
|
||||
- [ ] Support more Monokakido dictionaries
|
||||
- [ ] 三省堂国語辞典 第8版 (SANKOKU8)
|
||||
- [x] 三省堂国語辞典 第8版 (SANKOKU8)
|
||||
- [ ] 精選版 日本国語大辞典 (NDS)
|
||||
- [ ] 大辞泉 第2版 (DAIJISEN2)
|
||||
- [ ] 明鏡国語辞典 第3版 (MK3)
|
||||
|
|
|
@ -39,9 +39,9 @@ class Crawler(ABC):
|
|||
self._entries.append(entry)
|
||||
print()
|
||||
|
||||
def make_yomichan_dictionary(self, media_dir):
|
||||
def make_yomichan_dictionary(self, media_dir, validate):
|
||||
exporter = new_yomi_exporter(self._target)
|
||||
exporter.export(self._entries, media_dir)
|
||||
exporter.export(self._entries, media_dir, validate)
|
||||
|
||||
def make_mdict_dictionary(self, media_dir, icon_file):
|
||||
exporter = new_mdict_exporter(self._target)
|
||||
|
@ -152,3 +152,7 @@ class Smk8Crawler(_MonokakidoCrawler):
|
|||
|
||||
class Daijirin2Crawler(_MonokakidoCrawler):
|
||||
pass
|
||||
|
||||
|
||||
class Sankoku8Crawler(_MonokakidoCrawler):
|
||||
pass
|
||||
|
|
|
@ -5,6 +5,7 @@ from bot.crawlers.crawlers import JitenonYojiCrawler
|
|||
from bot.crawlers.crawlers import JitenonKotowazaCrawler
|
||||
from bot.crawlers.crawlers import Smk8Crawler
|
||||
from bot.crawlers.crawlers import Daijirin2Crawler
|
||||
from bot.crawlers.crawlers import Sankoku8Crawler
|
||||
|
||||
|
||||
def new_crawler(target):
|
||||
|
@ -14,5 +15,6 @@ def new_crawler(target):
|
|||
Targets.JITENON_KOTOWAZA: JitenonKotowazaCrawler,
|
||||
Targets.SMK8: Smk8Crawler,
|
||||
Targets.DAIJIRIN2: Daijirin2Crawler,
|
||||
Targets.SANKOKU8: Sankoku8Crawler,
|
||||
}
|
||||
return crawler_map[target](target)
|
||||
|
|
49
bot/data.py
49
bot/data.py
|
@ -37,14 +37,16 @@ def load_config():
|
|||
|
||||
@cache
|
||||
def load_yomichan_inflection_categories():
|
||||
file_name = os.path.join("yomichan", "inflection_categories.json")
|
||||
file_name = os.path.join(
|
||||
"yomichan", "inflection_categories.json")
|
||||
data = __load_json(file_name)
|
||||
return data
|
||||
|
||||
|
||||
@cache
|
||||
def load_yomichan_metadata():
|
||||
file_name = os.path.join("yomichan", "index.json")
|
||||
file_name = os.path.join(
|
||||
"yomichan", "index.json")
|
||||
data = __load_json(file_name)
|
||||
return data
|
||||
|
||||
|
@ -53,31 +55,21 @@ def load_yomichan_metadata():
|
|||
def load_variant_kanji():
|
||||
def loader(data, row):
|
||||
data[row[0]] = row[1]
|
||||
file_name = os.path.join("entries", "variant_kanji.csv")
|
||||
file_name = os.path.join(
|
||||
"entries", "variant_kanji.csv")
|
||||
data = {}
|
||||
__load_csv(file_name, loader, data)
|
||||
return data
|
||||
|
||||
|
||||
@cache
|
||||
def load_smk8_phrase_readings():
|
||||
def load_phrase_readings(target):
|
||||
def loader(data, row):
|
||||
entry_id = (int(row[0]), int(row[1]))
|
||||
reading = row[2]
|
||||
data[entry_id] = reading
|
||||
file_name = os.path.join("entries", "smk8", "phrase_readings.csv")
|
||||
data = {}
|
||||
__load_csv(file_name, loader, data)
|
||||
return data
|
||||
|
||||
|
||||
@cache
|
||||
def load_daijirin2_phrase_readings():
|
||||
def loader(data, row):
|
||||
entry_id = (int(row[0]), int(row[1]))
|
||||
reading = row[2]
|
||||
data[entry_id] = reading
|
||||
file_name = os.path.join("entries", "daijirin2", "phrase_readings.csv")
|
||||
file_name = os.path.join(
|
||||
"entries", target.value, "phrase_readings.csv")
|
||||
data = {}
|
||||
__load_csv(file_name, loader, data)
|
||||
return data
|
||||
|
@ -92,7 +84,8 @@ def load_daijirin2_kana_abbreviations():
|
|||
if abbr.strip() != "":
|
||||
abbreviations.append(abbr)
|
||||
data[entry_id] = abbreviations
|
||||
file_name = os.path.join("entries", "daijirin2", "kana_abbreviations.csv")
|
||||
file_name = os.path.join(
|
||||
"entries", "daijirin2", "kana_abbreviations.csv")
|
||||
data = {}
|
||||
__load_csv(file_name, loader, data)
|
||||
return data
|
||||
|
@ -100,14 +93,24 @@ def load_daijirin2_kana_abbreviations():
|
|||
|
||||
@cache
|
||||
def load_yomichan_name_conversion(target):
|
||||
file_name = os.path.join("yomichan", "name_conversion", f"{target.value}.json")
|
||||
file_name = os.path.join(
|
||||
"yomichan", "name_conversion", f"{target.value}.json")
|
||||
data = __load_json(file_name)
|
||||
return data
|
||||
|
||||
|
||||
@cache
|
||||
def load_yomichan_term_schema():
|
||||
file_name = os.path.join(
|
||||
"yomichan", "dictionary-term-bank-v3-schema.json")
|
||||
schema = __load_json(file_name)
|
||||
return schema
|
||||
|
||||
|
||||
@cache
|
||||
def load_mdict_name_conversion(target):
|
||||
file_name = os.path.join("mdict", "name_conversion", f"{target.value}.json")
|
||||
file_name = os.path.join(
|
||||
"mdict", "name_conversion", f"{target.value}.json")
|
||||
data = __load_json(file_name)
|
||||
return data
|
||||
|
||||
|
@ -131,7 +134,8 @@ def __load_adobe_glyphs():
|
|||
data[code].append(character)
|
||||
else:
|
||||
data[code] = [character]
|
||||
file_name = os.path.join("entries", "adobe", "Adobe-Japan1_sequences.txt")
|
||||
file_name = os.path.join(
|
||||
"entries", "adobe", "Adobe-Japan1_sequences.txt")
|
||||
data = {}
|
||||
__load_csv(file_name, loader, data, delim=';')
|
||||
return data
|
||||
|
@ -139,7 +143,8 @@ def __load_adobe_glyphs():
|
|||
|
||||
@cache
|
||||
def __load_override_adobe_glyphs():
|
||||
file_name = os.path.join("entries", "adobe", "override_glyphs.json")
|
||||
file_name = os.path.join(
|
||||
"entries", "adobe", "override_glyphs.json")
|
||||
json_data = __load_json(file_name)
|
||||
data = {}
|
||||
for key, val in json_data.items():
|
||||
|
|
|
@ -2,7 +2,7 @@ from bs4 import BeautifulSoup
|
|||
|
||||
import bot.entries.expressions as Expressions
|
||||
import bot.soup as Soup
|
||||
from bot.data import load_daijirin2_phrase_readings
|
||||
from bot.data import load_phrase_readings
|
||||
from bot.data import load_daijirin2_kana_abbreviations
|
||||
from bot.entries.entry import Entry
|
||||
from bot.entries.daijirin2_preprocess import preprocess_page
|
||||
|
@ -221,7 +221,7 @@ class Daijirin2PhraseEntry(_BaseDaijirin2Entry):
|
|||
return expressions
|
||||
|
||||
def _find_readings(self):
|
||||
phrase_readings = load_daijirin2_phrase_readings()
|
||||
phrase_readings = load_phrase_readings(self.target)
|
||||
text = phrase_readings[self.entry_id]
|
||||
alternatives = Expressions.expand_daijirin_alternatives(text)
|
||||
readings = []
|
||||
|
|
|
@ -5,6 +5,7 @@ from bot.entries.jitenon import JitenonYojiEntry
|
|||
from bot.entries.jitenon import JitenonKotowazaEntry
|
||||
from bot.entries.smk8 import Smk8Entry
|
||||
from bot.entries.daijirin2 import Daijirin2Entry
|
||||
from bot.entries.sankoku8 import Sankoku8Entry
|
||||
|
||||
|
||||
def new_entry(target, page_id):
|
||||
|
@ -14,5 +15,6 @@ def new_entry(target, page_id):
|
|||
Targets.JITENON_KOTOWAZA: JitenonKotowazaEntry,
|
||||
Targets.SMK8: Smk8Entry,
|
||||
Targets.DAIJIRIN2: Daijirin2Entry,
|
||||
Targets.SANKOKU8: Sankoku8Entry,
|
||||
}
|
||||
return entry_map[target](target, page_id)
|
||||
|
|
260
bot/entries/sankoku8.py
Normal file
260
bot/entries/sankoku8.py
Normal file
|
@ -0,0 +1,260 @@
|
|||
from bs4 import BeautifulSoup
|
||||
import bot.entries.expressions as Expressions
|
||||
import bot.soup as Soup
|
||||
from bot.entries.entry import Entry
|
||||
from bot.data import load_phrase_readings
|
||||
from bot.entries.sankoku8_preprocess import preprocess_page
|
||||
|
||||
|
||||
class _BaseSankoku8Entry(Entry):
|
||||
def __init__(self, target, entry_id):
|
||||
super().__init__(target, entry_id)
|
||||
self.children = []
|
||||
self.phrases = []
|
||||
self._hyouki_name = "表記"
|
||||
self._midashi_name = None
|
||||
self._midashi_kana_name = None
|
||||
|
||||
def get_global_identifier(self):
|
||||
parent_part = format(self.entry_id[0], '06')
|
||||
child_part = hex(self.entry_id[1]).lstrip('0x').zfill(4).upper()
|
||||
return f"@{self.target.value}-{parent_part}-{child_part}"
|
||||
|
||||
def set_page(self, page):
|
||||
page = self.__decompose_subentries(page)
|
||||
self._page = page
|
||||
|
||||
def get_page_soup(self):
|
||||
soup = BeautifulSoup(self._page, "xml")
|
||||
return soup
|
||||
|
||||
def _get_headwords(self):
|
||||
soup = self.get_page_soup()
|
||||
self._delete_unused_nodes(soup)
|
||||
readings = self._find_readings(soup)
|
||||
expressions = self._find_expressions(soup)
|
||||
headwords = {}
|
||||
for reading in readings:
|
||||
headwords[reading] = []
|
||||
if len(readings) == 1:
|
||||
reading = readings[0]
|
||||
if soup.find(self._midashi_name).find(self._hyouki_name) is None:
|
||||
headwords[reading].append(reading)
|
||||
for exp in expressions:
|
||||
if exp not in headwords[reading]:
|
||||
headwords[reading].append(exp)
|
||||
elif len(readings) > 1 and len(expressions) == 0:
|
||||
for reading in readings:
|
||||
headwords[reading].append(reading)
|
||||
elif len(readings) > 1 and len(expressions) == 1:
|
||||
if soup.find(self._midashi_name).find(self._hyouki_name) is None:
|
||||
for reading in readings:
|
||||
headwords[reading].append(reading)
|
||||
expression = expressions[0]
|
||||
for reading in readings:
|
||||
if expression not in headwords[reading]:
|
||||
headwords[reading].append(expression)
|
||||
elif len(readings) > 1 and len(expressions) == len(readings):
|
||||
if soup.find(self._midashi_name).find(self._hyouki_name) is None:
|
||||
for reading in readings:
|
||||
headwords[reading].append(reading)
|
||||
for idx, reading in enumerate(readings):
|
||||
exp = expressions[idx]
|
||||
if exp not in headwords[reading]:
|
||||
headwords[reading].append(exp)
|
||||
else:
|
||||
raise Exception() # shouldn't happen
|
||||
return headwords
|
||||
|
||||
def _add_variant_expressions(self, headwords):
|
||||
for expressions in headwords.values():
|
||||
Expressions.add_variant_kanji(expressions)
|
||||
Expressions.add_fullwidth(expressions)
|
||||
Expressions.remove_iteration_mark(expressions)
|
||||
Expressions.add_iteration_mark(expressions)
|
||||
|
||||
def get_part_of_speech_tags(self):
|
||||
if self._part_of_speech_tags is not None:
|
||||
return self._part_of_speech_tags
|
||||
self._part_of_speech_tags = []
|
||||
soup = self.get_page_soup()
|
||||
for midashi in soup.find_all([self._midashi_name, "見出部要素"]):
|
||||
pos_group = midashi.find("品詞G")
|
||||
if pos_group is None:
|
||||
continue
|
||||
for tag in pos_group.find_all("a"):
|
||||
if tag.text not in self._part_of_speech_tags:
|
||||
self._part_of_speech_tags.append(tag.text)
|
||||
return self._part_of_speech_tags
|
||||
|
||||
def _find_expressions(self, soup):
|
||||
expressions = []
|
||||
for hyouki in soup.find_all(self._hyouki_name):
|
||||
for expression in parse_hyouki_soup(hyouki, [""]):
|
||||
expressions.append(expression)
|
||||
return expressions
|
||||
|
||||
def _find_readings(self, soup):
|
||||
midasi_kana = soup.find(self._midashi_kana_name)
|
||||
readings = parse_hyouki_soup(midasi_kana, [""])
|
||||
return readings
|
||||
|
||||
def __decompose_subentries(self, page):
|
||||
soup = BeautifulSoup(page, features="xml")
|
||||
subentry_parameters = [
|
||||
[Sankoku8ChildEntry, ["子項目"], self.children],
|
||||
[Sankoku8PhraseEntry, ["句項目"], self.phrases],
|
||||
]
|
||||
for x in subentry_parameters:
|
||||
subentry_class, tags, subentry_list = x
|
||||
for tag in tags:
|
||||
tag_soup = soup.find(tag)
|
||||
while tag_soup is not None:
|
||||
tag_soup.name = "項目"
|
||||
subentry_id = self.id_string_to_entry_id(tag_soup.attrs["id"])
|
||||
self.SUBENTRY_ID_TO_ENTRY_ID[subentry_id] = self.entry_id
|
||||
subentry = subentry_class(self.target, subentry_id)
|
||||
page = tag_soup.decode()
|
||||
subentry.set_page(page)
|
||||
subentry_list.append(subentry)
|
||||
tag_soup.decompose()
|
||||
tag_soup = soup.find(tag)
|
||||
return soup.decode()
|
||||
|
||||
@staticmethod
|
||||
def id_string_to_entry_id(id_string):
|
||||
parts = id_string.split("-")
|
||||
if len(parts) == 1:
|
||||
return (int(parts[0]), 0)
|
||||
elif len(parts) == 2:
|
||||
# subentries have a hexadecimal part
|
||||
return (int(parts[0]), int(parts[1], 16))
|
||||
else:
|
||||
raise Exception(f"Invalid entry ID: {id_string}")
|
||||
|
||||
@staticmethod
|
||||
def _delete_unused_nodes(soup):
|
||||
"""Remove extra markup elements that appear in the entry
|
||||
headword line which are not part of the entry headword"""
|
||||
unused_nodes = [
|
||||
"語構成", "平板", "アクセント", "表外字マーク", "表外音訓マーク",
|
||||
"アクセント分節", "活用分節", "ルビG", "分書"
|
||||
]
|
||||
for name in unused_nodes:
|
||||
Soup.delete_soup_nodes(soup, name)
|
||||
|
||||
|
||||
class Sankoku8Entry(_BaseSankoku8Entry):
|
||||
def __init__(self, target, page_id):
|
||||
entry_id = (page_id, 0)
|
||||
super().__init__(target, entry_id)
|
||||
self._midashi_name = "見出部"
|
||||
self._midashi_kana_name = "見出仮名"
|
||||
|
||||
def set_page(self, page):
|
||||
page = preprocess_page(page)
|
||||
super().set_page(page)
|
||||
|
||||
|
||||
class Sankoku8ChildEntry(_BaseSankoku8Entry):
|
||||
def __init__(self, target, page_id):
|
||||
super().__init__(target, page_id)
|
||||
self._midashi_name = "子見出部"
|
||||
self._midashi_kana_name = "子見出仮名"
|
||||
|
||||
|
||||
class Sankoku8PhraseEntry(_BaseSankoku8Entry):
|
||||
def get_part_of_speech_tags(self):
|
||||
# phrases do not contain these tags
|
||||
return []
|
||||
|
||||
def _get_headwords(self):
|
||||
soup = self.get_page_soup()
|
||||
self._delete_unused_nodes(soup)
|
||||
expressions = self._find_expressions(soup)
|
||||
readings = self._find_readings(soup)
|
||||
headwords = {}
|
||||
if len(expressions) != len(readings):
|
||||
raise Exception(f"{self.entry_id[0]}-{self.entry_id[1]}")
|
||||
for idx, expression in enumerate(expressions):
|
||||
reading = readings[idx]
|
||||
if reading in headwords:
|
||||
headwords[reading].append(expression)
|
||||
else:
|
||||
headwords[reading] = [expression]
|
||||
return headwords
|
||||
|
||||
def _find_expressions(self, soup):
|
||||
phrase_soup = soup.find("句表記")
|
||||
expressions = parse_hyouki_soup(phrase_soup, [""])
|
||||
return expressions
|
||||
|
||||
def _find_readings(self, soup):
|
||||
reading_patterns = load_phrase_readings(self.target)
|
||||
reading_pattern = reading_patterns[self.entry_id]
|
||||
readings = parse_hyouki_pattern(reading_pattern)
|
||||
return readings
|
||||
|
||||
|
||||
def parse_hyouki_soup(soup, base_exps):
|
||||
omitted_characters = [
|
||||
"/", "〈", "〉", "(", ")", "⦅", "⦆", ":", "…"
|
||||
]
|
||||
exps = base_exps.copy()
|
||||
for child in soup.children:
|
||||
new_exps = []
|
||||
if child.name == "言換G":
|
||||
for alt in child.find_all("言換"):
|
||||
parts = parse_hyouki_soup(alt, [""])
|
||||
for exp in exps:
|
||||
for part in parts:
|
||||
new_exps.append(exp + part)
|
||||
elif child.name == "補足表記":
|
||||
alt1 = child.find("表記対象")
|
||||
alt2 = child.find("表記内容G")
|
||||
parts1 = parse_hyouki_soup(alt1, [""])
|
||||
parts2 = parse_hyouki_soup(alt2, [""])
|
||||
for exp in exps:
|
||||
for part in parts1:
|
||||
new_exps.append(exp + part)
|
||||
for part in parts2:
|
||||
new_exps.append(exp + part)
|
||||
elif child.name == "省略":
|
||||
parts = parse_hyouki_soup(child, [""])
|
||||
for exp in exps:
|
||||
new_exps.append(exp)
|
||||
for part in parts:
|
||||
new_exps.append(exp + part)
|
||||
elif child.name is not None:
|
||||
new_exps = parse_hyouki_soup(child, exps)
|
||||
else:
|
||||
text = child.text
|
||||
for char in omitted_characters:
|
||||
text = text.replace(char, "")
|
||||
for exp in exps:
|
||||
new_exps.append(exp + text)
|
||||
exps = new_exps.copy()
|
||||
return exps
|
||||
|
||||
|
||||
def parse_hyouki_pattern(pattern):
|
||||
replacements = {
|
||||
"(": "<省略>(",
|
||||
")": ")</省略>",
|
||||
"{": "<補足表記><表記対象>",
|
||||
"・": "</表記対象><表記内容G>(<表記内容>",
|
||||
"}": "</表記内容>)</表記内容G></補足表記>",
|
||||
"〈": "<言換G>〈<言換>",
|
||||
"/": "</言換>/<言換>",
|
||||
"〉": "</言換>〉</言換G>",
|
||||
"⦅": "<補足表記><表記対象>",
|
||||
"\": "</表記対象><表記内容G>⦅<表記内容>",
|
||||
"⦆": "</表記内容>⦆</表記内容G></補足表記>",
|
||||
}
|
||||
markup = f"<span>{pattern}</span>"
|
||||
for key, val in replacements.items():
|
||||
markup = markup.replace(key, val)
|
||||
soup = BeautifulSoup(markup, "xml")
|
||||
hyouki_soup = soup.find("span")
|
||||
exps = parse_hyouki_soup(hyouki_soup, [""])
|
||||
return exps
|
28
bot/entries/sankoku8_preprocess.py
Normal file
28
bot/entries/sankoku8_preprocess.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from bot.data import get_adobe_glyph
|
||||
|
||||
|
||||
def preprocess_page(page):
|
||||
soup = BeautifulSoup(page, features="xml")
|
||||
__replace_glyph_codes(soup)
|
||||
page = __strip_page(soup)
|
||||
return page
|
||||
|
||||
|
||||
def __replace_glyph_codes(soup):
|
||||
for el in soup.find_all("glyph"):
|
||||
m = re.search(r"^glyph:([0-9]+);?$", el.attrs["style"])
|
||||
code = int(m.group(1))
|
||||
for geta in el.find_all(string="〓"):
|
||||
glyph = get_adobe_glyph(code)
|
||||
geta.replace_with(glyph)
|
||||
|
||||
|
||||
def __strip_page(soup):
|
||||
koumoku = soup.find(["項目"])
|
||||
if koumoku is not None:
|
||||
return koumoku.decode()
|
||||
else:
|
||||
raise Exception(f"Primary 項目 not found in page:\n{soup.prettify()}")
|
|
@ -2,7 +2,7 @@ from bs4 import BeautifulSoup
|
|||
|
||||
import bot.entries.expressions as Expressions
|
||||
import bot.soup as Soup
|
||||
from bot.data import load_smk8_phrase_readings
|
||||
from bot.data import load_phrase_readings
|
||||
from bot.entries.entry import Entry
|
||||
from bot.entries.smk8_preprocess import preprocess_page
|
||||
|
||||
|
@ -163,7 +163,7 @@ class Smk8ChildEntry(_BaseSmk8Entry):
|
|||
class Smk8PhraseEntry(_BaseSmk8Entry):
|
||||
def __init__(self, target, entry_id):
|
||||
super().__init__(target, entry_id)
|
||||
self.__phrase_readings = load_smk8_phrase_readings()
|
||||
self.__phrase_readings = load_phrase_readings(self.target)
|
||||
|
||||
def get_part_of_speech_tags(self):
|
||||
# phrases do not contain these tags
|
||||
|
|
|
@ -218,3 +218,8 @@ class Smk8Exporter(_MonokakidoExporter):
|
|||
class Daijirin2Exporter(_MonokakidoExporter):
|
||||
def _get_attribution(self, entries):
|
||||
return "© Sanseido Co., LTD. 2019"
|
||||
|
||||
|
||||
class Sankoku8Exporter(_MonokakidoExporter):
|
||||
def _get_attribution(self, entries):
|
||||
return "© Sanseido Co., LTD. 2021"
|
||||
|
|
|
@ -5,6 +5,7 @@ from bot.mdict.exporters.export import JitenonYojiExporter
|
|||
from bot.mdict.exporters.export import JitenonKotowazaExporter
|
||||
from bot.mdict.exporters.export import Smk8Exporter
|
||||
from bot.mdict.exporters.export import Daijirin2Exporter
|
||||
from bot.mdict.exporters.export import Sankoku8Exporter
|
||||
|
||||
|
||||
def new_mdict_exporter(target):
|
||||
|
@ -14,5 +15,6 @@ def new_mdict_exporter(target):
|
|||
Targets.JITENON_KOTOWAZA: JitenonKotowazaExporter,
|
||||
Targets.SMK8: Smk8Exporter,
|
||||
Targets.DAIJIRIN2: Daijirin2Exporter,
|
||||
Targets.SANKOKU8: Sankoku8Exporter,
|
||||
}
|
||||
return exporter_map[target](target)
|
||||
|
|
137
bot/mdict/glossary/sankoku8.py
Normal file
137
bot/mdict/glossary/sankoku8.py
Normal file
|
@ -0,0 +1,137 @@
|
|||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from bot.data import load_mdict_name_conversion
|
||||
from bot.name_conversion import convert_names
|
||||
|
||||
|
||||
def make_glossary(entry, media_dir):
|
||||
soup = entry.get_page_soup()
|
||||
__reposition_marks(soup)
|
||||
__remove_appendix_links(soup)
|
||||
__convert_images(soup)
|
||||
__remove_links_without_href(soup)
|
||||
__convert_links(soup, entry)
|
||||
__add_parent_link(soup, entry)
|
||||
__add_homophone_links(soup, entry)
|
||||
|
||||
name_conversion = load_mdict_name_conversion(entry.target)
|
||||
convert_names(soup, name_conversion)
|
||||
|
||||
glossary = soup.span.decode()
|
||||
return glossary
|
||||
|
||||
|
||||
def __reposition_marks(soup):
|
||||
"""These 表外字マーク symbols will be converted to rubies later, so they need to
|
||||
be positioned after the corresponding text in order to appear correctly"""
|
||||
for elm in soup.find_all("表外字"):
|
||||
mark = elm.find("表外字マーク")
|
||||
elm.append(mark)
|
||||
for elm in soup.find_all("表外音訓"):
|
||||
mark = elm.find("表外音訓マーク")
|
||||
elm.append(mark)
|
||||
|
||||
|
||||
def __remove_appendix_links(soup):
|
||||
"""This info would be useful and nice to have, but jitenbot currently
|
||||
isn't designed to fetch and process these appendix files. It probably
|
||||
wouldn't be possible to include them in Yomichan, but it would definitely
|
||||
be possible for Mdict."""
|
||||
for elm in soup.find_all("a"):
|
||||
if not elm.has_attr("href"):
|
||||
continue
|
||||
if elm.attrs["href"].startswith("appendix"):
|
||||
elm.attrs["data-name"] = "a"
|
||||
elm.attrs["data-href"] = elm.attrs["href"]
|
||||
elm.name = "span"
|
||||
del elm.attrs["href"]
|
||||
|
||||
|
||||
def __convert_images(soup):
|
||||
conversions = [
|
||||
["svg-logo/重要語.svg", "*"],
|
||||
["svg-logo/最重要語.svg", "**"],
|
||||
["svg-logo/一般常識語.svg", "☆☆"],
|
||||
["svg-logo/追い込み.svg", ""],
|
||||
["svg-special/区切り線.svg", "|"],
|
||||
["svg-accent/平板.svg", "⎺"],
|
||||
["svg-accent/アクセント.svg", "⌝"],
|
||||
["svg-logo/アク.svg", "アク"],
|
||||
["svg-logo/丁寧.svg", "丁寧"],
|
||||
["svg-logo/可能.svg", "可能"],
|
||||
["svg-logo/尊敬.svg", "尊敬"],
|
||||
["svg-logo/接尾.svg", "接尾"],
|
||||
["svg-logo/接頭.svg", "接頭"],
|
||||
["svg-logo/表記.svg", "表記"],
|
||||
["svg-logo/謙譲.svg", "謙譲"],
|
||||
["svg-logo/区別.svg", "区別"],
|
||||
["svg-logo/由来.svg", "由来"],
|
||||
]
|
||||
for conversion in conversions:
|
||||
filename, text = conversion
|
||||
for elm in soup.find_all("img", attrs={"src": filename}):
|
||||
elm.attrs["data-name"] = elm.name
|
||||
elm.attrs["data-src"] = elm.attrs["src"]
|
||||
elm.name = "span"
|
||||
elm.string = text
|
||||
del elm.attrs["src"]
|
||||
|
||||
|
||||
def __remove_links_without_href(soup):
|
||||
for elm in soup.find_all("a"):
|
||||
if elm.has_attr("href"):
|
||||
continue
|
||||
elm.attrs["data-name"] = elm.name
|
||||
elm.name = "span"
|
||||
|
||||
|
||||
def __convert_links(soup, entry):
|
||||
for elm in soup.find_all("a"):
|
||||
href = elm.attrs["href"].split(" ")[0]
|
||||
if re.match(r"^#?[0-9]+(?:-[0-9A-F]{4})?$", href):
|
||||
href = href.removeprefix("#")
|
||||
ref_entry_id = entry.id_string_to_entry_id(href)
|
||||
if ref_entry_id in entry.ID_TO_ENTRY:
|
||||
ref_entry = entry.ID_TO_ENTRY[ref_entry_id]
|
||||
else:
|
||||
ref_entry = entry.ID_TO_ENTRY[(ref_entry_id[0], 0)]
|
||||
gid = ref_entry.get_global_identifier()
|
||||
elm.attrs["href"] = f"entry://{gid}"
|
||||
elif re.match(r"^entry:", href):
|
||||
pass
|
||||
elif re.match(r"^https?:[\w\W]*", href):
|
||||
pass
|
||||
else:
|
||||
raise Exception(f"Invalid href format: {href}")
|
||||
|
||||
|
||||
def __add_parent_link(soup, entry):
|
||||
elm = soup.find("親見出相当部")
|
||||
if elm is not None:
|
||||
parent_entry = entry.get_parent()
|
||||
gid = parent_entry.get_global_identifier()
|
||||
elm.attrs["href"] = f"entry://{gid}"
|
||||
elm.attrs["data-name"] = elm.name
|
||||
elm.name = "a"
|
||||
|
||||
|
||||
def __add_homophone_links(soup, entry):
|
||||
forward_link = ["←", entry.entry_id[0] + 1]
|
||||
backward_link = ["→", entry.entry_id[0] - 1]
|
||||
homophone_info_list = [
|
||||
["svg-logo/homophone1.svg", [forward_link]],
|
||||
["svg-logo/homophone2.svg", [forward_link, backward_link]],
|
||||
["svg-logo/homophone3.svg", [backward_link]],
|
||||
]
|
||||
for homophone_info in homophone_info_list:
|
||||
filename, link_info = homophone_info
|
||||
for elm in soup.find_all("img", attrs={"src": filename}):
|
||||
for info in link_info:
|
||||
text, link_id = info
|
||||
link_entry = entry.ID_TO_ENTRY[(link_id, 0)]
|
||||
gid = link_entry.get_global_identifier()
|
||||
link = BeautifulSoup("<a/>", "xml").a
|
||||
link.string = text
|
||||
link.attrs["href"] = f"entry://{gid}"
|
||||
elm.append(link)
|
||||
elm.unwrap()
|
|
@ -5,6 +5,7 @@ from bot.mdict.terms.jitenon import JitenonYojiTerminator
|
|||
from bot.mdict.terms.jitenon import JitenonKotowazaTerminator
|
||||
from bot.mdict.terms.smk8 import Smk8Terminator
|
||||
from bot.mdict.terms.daijirin2 import Daijirin2Terminator
|
||||
from bot.mdict.terms.sankoku8 import Sankoku8Terminator
|
||||
|
||||
|
||||
def new_terminator(target):
|
||||
|
@ -14,5 +15,6 @@ def new_terminator(target):
|
|||
Targets.JITENON_KOTOWAZA: JitenonKotowazaTerminator,
|
||||
Targets.SMK8: Smk8Terminator,
|
||||
Targets.DAIJIRIN2: Daijirin2Terminator,
|
||||
Targets.SANKOKU8: Sankoku8Terminator,
|
||||
}
|
||||
return terminator_map[target](target)
|
||||
|
|
23
bot/mdict/terms/sankoku8.py
Normal file
23
bot/mdict/terms/sankoku8.py
Normal file
|
@ -0,0 +1,23 @@
|
|||
from bot.mdict.terms.terminator import Terminator
|
||||
from bot.mdict.glossary.sankoku8 import make_glossary
|
||||
|
||||
|
||||
class Sankoku8Terminator(Terminator):
|
||||
def _glossary(self, entry):
|
||||
if entry.entry_id in self._glossary_cache:
|
||||
return self._glossary_cache[entry.entry_id]
|
||||
glossary = make_glossary(entry, self._media_dir)
|
||||
self._glossary_cache[entry.entry_id] = glossary
|
||||
return glossary
|
||||
|
||||
def _link_glossary_parameters(self, entry):
|
||||
return [
|
||||
[entry.children, "子項目"],
|
||||
[entry.phrases, "句項目"],
|
||||
]
|
||||
|
||||
def _subentry_lists(self, entry):
|
||||
return [
|
||||
entry.children,
|
||||
entry.phrases,
|
||||
]
|
|
@ -7,3 +7,4 @@ class Targets(Enum):
|
|||
JITENON_KOTOWAZA = "jitenon-kotowaza"
|
||||
SMK8 = "smk8"
|
||||
DAIJIRIN2 = "daijirin2"
|
||||
SANKOKU8 = "sankoku8"
|
||||
|
|
|
@ -3,13 +3,16 @@
|
|||
import json
|
||||
import os
|
||||
import shutil
|
||||
import copy
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from abc import ABC, abstractmethod
|
||||
from platformdirs import user_documents_dir, user_cache_dir
|
||||
|
||||
import fastjsonschema
|
||||
from bot.data import load_yomichan_metadata
|
||||
from bot.yomichan.terms.factory import new_terminator
|
||||
from bot.data import load_yomichan_term_schema
|
||||
|
||||
|
||||
class Exporter(ABC):
|
||||
|
@ -19,7 +22,7 @@ class Exporter(ABC):
|
|||
self._build_dir = None
|
||||
self._terms_per_file = 2000
|
||||
|
||||
def export(self, entries, image_dir):
|
||||
def export(self, entries, image_dir, validate):
|
||||
self.__init_build_image_dir(image_dir)
|
||||
meta = load_yomichan_metadata()
|
||||
index = meta[self._target.value]["index"]
|
||||
|
@ -27,6 +30,8 @@ class Exporter(ABC):
|
|||
index["attribution"] = self._get_attribution(entries)
|
||||
tags = meta[self._target.value]["tags"]
|
||||
terms = self.__get_terms(entries)
|
||||
if validate:
|
||||
self.__validate_terms(terms)
|
||||
self.__make_dictionary(terms, index, tags)
|
||||
|
||||
@abstractmethod
|
||||
|
@ -49,6 +54,14 @@ class Exporter(ABC):
|
|||
self._build_dir = build_directory
|
||||
return self._build_dir
|
||||
|
||||
def __get_invalid_term_dir(self):
|
||||
cache_dir = user_cache_dir("jitenbot")
|
||||
log_dir = os.path.join(cache_dir, "invalid_yomichan_terms")
|
||||
if Path(log_dir).is_dir():
|
||||
shutil.rmtree(log_dir)
|
||||
os.makedirs(log_dir)
|
||||
return log_dir
|
||||
|
||||
def __init_build_image_dir(self, image_dir):
|
||||
build_dir = self._get_build_dir()
|
||||
build_img_dir = os.path.join(build_dir, self._target.value)
|
||||
|
@ -71,8 +84,29 @@ class Exporter(ABC):
|
|||
print()
|
||||
return terms
|
||||
|
||||
def __validate_terms(self, terms):
|
||||
print("Making a copy of term data for validation...")
|
||||
terms_copy = copy.deepcopy(terms) # because validator will alter data!
|
||||
term_count = len(terms_copy)
|
||||
log_dir = self.__get_invalid_term_dir()
|
||||
schema = load_yomichan_term_schema()
|
||||
validator = fastjsonschema.compile(schema)
|
||||
failure_count = 0
|
||||
for idx, term in enumerate(terms_copy):
|
||||
update = f"Validating term {idx+1}/{term_count}"
|
||||
print(update, end='\r', flush=True)
|
||||
try:
|
||||
validator([term])
|
||||
except fastjsonschema.JsonSchemaException:
|
||||
failure_count += 1
|
||||
term_file = os.path.join(log_dir, f"{idx}.json")
|
||||
with open(term_file, "w", encoding='utf8') as f:
|
||||
json.dump([term], f, indent=4, ensure_ascii=False)
|
||||
print(f"\nFinished validating with {failure_count} error{'' if failure_count == 1 else 's'}")
|
||||
if failure_count > 0:
|
||||
print(f"Invalid terms saved to `{log_dir}` for debugging")
|
||||
|
||||
def __make_dictionary(self, terms, index, tags):
|
||||
print(f"Exporting {len(terms)} Yomichan terms...")
|
||||
self.__write_term_banks(terms)
|
||||
self.__write_index(index)
|
||||
self.__write_tag_bank(tags)
|
||||
|
@ -80,14 +114,18 @@ class Exporter(ABC):
|
|||
self.__rm_build_dir()
|
||||
|
||||
def __write_term_banks(self, terms):
|
||||
print(f"Exporting {len(terms)} JSON terms")
|
||||
build_dir = self._get_build_dir()
|
||||
max_i = int(len(terms) / self._terms_per_file) + 1
|
||||
for i in range(max_i):
|
||||
start = self._terms_per_file * i
|
||||
end = self._terms_per_file * (i + 1)
|
||||
update = f"Writing terms to term banks {start} - {end}"
|
||||
print(update, end='\r', flush=True)
|
||||
term_file = os.path.join(build_dir, f"term_bank_{i+1}.json")
|
||||
with open(term_file, "w", encoding='utf8') as f:
|
||||
start = self._terms_per_file * i
|
||||
end = self._terms_per_file * (i + 1)
|
||||
json.dump(terms[start:end], f, indent=4, ensure_ascii=False)
|
||||
print()
|
||||
|
||||
def __write_index(self, index):
|
||||
build_dir = self._get_build_dir()
|
||||
|
@ -104,6 +142,7 @@ class Exporter(ABC):
|
|||
json.dump(tags, f, indent=4, ensure_ascii=False)
|
||||
|
||||
def __write_archive(self, filename):
|
||||
print("Archiving data to ZIP file...")
|
||||
archive_format = "zip"
|
||||
out_dir = os.path.join(user_documents_dir(), "jitenbot", "yomichan")
|
||||
if not Path(out_dir).is_dir():
|
||||
|
@ -151,19 +190,22 @@ class JitenonKotowazaExporter(_JitenonExporter):
|
|||
pass
|
||||
|
||||
|
||||
class Smk8Exporter(Exporter):
|
||||
class _MonokakidoExporter(Exporter):
|
||||
def _get_revision(self, entries):
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d")
|
||||
return f"{self._target.value};{timestamp}"
|
||||
|
||||
|
||||
class Smk8Exporter(_MonokakidoExporter):
|
||||
def _get_attribution(self, entries):
|
||||
return "© Sanseido Co., LTD. 2020"
|
||||
|
||||
|
||||
class Daijirin2Exporter(Exporter):
|
||||
def _get_revision(self, entries):
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d")
|
||||
return f"{self._target.value};{timestamp}"
|
||||
|
||||
class Daijirin2Exporter(_MonokakidoExporter):
|
||||
def _get_attribution(self, entries):
|
||||
return "© Sanseido Co., LTD. 2019"
|
||||
|
||||
|
||||
class Sankoku8Exporter(_MonokakidoExporter):
|
||||
def _get_attribution(self, entries):
|
||||
return "© Sanseido Co., LTD. 2021"
|
||||
|
|
|
@ -5,6 +5,7 @@ from bot.yomichan.exporters.export import JitenonYojiExporter
|
|||
from bot.yomichan.exporters.export import JitenonKotowazaExporter
|
||||
from bot.yomichan.exporters.export import Smk8Exporter
|
||||
from bot.yomichan.exporters.export import Daijirin2Exporter
|
||||
from bot.yomichan.exporters.export import Sankoku8Exporter
|
||||
|
||||
|
||||
def new_yomi_exporter(target):
|
||||
|
@ -14,5 +15,6 @@ def new_yomi_exporter(target):
|
|||
Targets.JITENON_KOTOWAZA: JitenonKotowazaExporter,
|
||||
Targets.SMK8: Smk8Exporter,
|
||||
Targets.DAIJIRIN2: Daijirin2Exporter,
|
||||
Targets.SANKOKU8: Sankoku8Exporter,
|
||||
}
|
||||
return exporter_map[target](target)
|
||||
|
|
|
@ -26,6 +26,27 @@ def make_monochrome_fill_rectangle(path, text):
|
|||
f.write(svg)
|
||||
|
||||
|
||||
@cache
|
||||
def make_accent(path):
|
||||
svg = __svg_accent()
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(svg)
|
||||
|
||||
|
||||
@cache
|
||||
def make_heiban(path):
|
||||
svg = __svg_heiban()
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(svg)
|
||||
|
||||
|
||||
@cache
|
||||
def make_red_char(path, char):
|
||||
svg = __svg_red_character(char)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(svg)
|
||||
|
||||
|
||||
def __calculate_svg_ratio(path):
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
xml = f.read()
|
||||
|
@ -82,3 +103,30 @@ def __svg_masked_rectangle(text):
|
|||
fill='black' mask='url(#a)'/>
|
||||
</svg>"""
|
||||
return svg.strip()
|
||||
|
||||
|
||||
def __svg_heiban():
|
||||
svg = f"""
|
||||
<svg viewBox='0 0 210 300' xmlns='http://www.w3.org/2000/svg' version='1.1'>
|
||||
<rect width='210' height='30' fill='red'/>
|
||||
</svg>"""
|
||||
return svg.strip()
|
||||
|
||||
|
||||
def __svg_accent():
|
||||
svg = f"""
|
||||
<svg viewBox='0 0 150 300' xmlns='http://www.w3.org/2000/svg' version='1.1'>
|
||||
<rect width='150' height='30' fill='red'/>
|
||||
<rect width='30' height='150' x='120' fill='red'/>
|
||||
</svg>"""
|
||||
return svg.strip()
|
||||
|
||||
|
||||
def __svg_red_character(char):
|
||||
svg = f"""
|
||||
<svg viewBox='0 0 300 300' xmlns='http://www.w3.org/2000/svg' version='1.1'>
|
||||
<text text-anchor='middle' x='50%' y='50%' dy='.37em'
|
||||
font-family='sans-serif' font-size='300px'
|
||||
fill='red'>{char}</text>
|
||||
</svg>"""
|
||||
return svg.strip()
|
||||
|
|
344
bot/yomichan/glossary/sankoku8.py
Normal file
344
bot/yomichan/glossary/sankoku8.py
Normal file
|
@ -0,0 +1,344 @@
|
|||
import re
|
||||
import os
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
import bot.yomichan.glossary.icons as Icons
|
||||
from bot.data import load_yomichan_name_conversion
|
||||
from bot.yomichan.glossary.gloss import make_gloss
|
||||
from bot.name_conversion import convert_names
|
||||
|
||||
|
||||
def make_glossary(entry, media_dir):
|
||||
soup = entry.get_page_soup()
|
||||
__remove_glyph_styles(soup)
|
||||
__reposition_marks(soup)
|
||||
__remove_links_without_href(soup)
|
||||
__remove_appendix_links(soup)
|
||||
__convert_links(soup, entry)
|
||||
__add_parent_link(soup, entry)
|
||||
__add_homophone_links(soup, entry)
|
||||
__convert_images_to_text(soup)
|
||||
__text_parens_to_images(soup, media_dir)
|
||||
__replace_icons(soup, media_dir)
|
||||
__replace_accent_symbols(soup, media_dir)
|
||||
__convert_gaiji(soup, media_dir)
|
||||
__convert_graphics(soup, media_dir)
|
||||
__convert_number_icons(soup, media_dir)
|
||||
|
||||
name_conversion = load_yomichan_name_conversion(entry.target)
|
||||
convert_names(soup, name_conversion)
|
||||
|
||||
gloss = make_gloss(soup.span)
|
||||
glossary = [gloss]
|
||||
return glossary
|
||||
|
||||
|
||||
def __remove_glyph_styles(soup):
|
||||
"""The css_parser library will emit annoying warning messages
|
||||
later if it sees these glyph character styles"""
|
||||
for elm in soup.find_all("glyph"):
|
||||
if elm.has_attr("style"):
|
||||
elm["data-style"] = elm.attrs["style"]
|
||||
del elm.attrs["style"]
|
||||
|
||||
|
||||
def __reposition_marks(soup):
|
||||
"""These マーク symbols will be converted to rubies later, so they need to
|
||||
be positioned after the corresponding text in order to appear correctly"""
|
||||
for elm in soup.find_all("表外字"):
|
||||
mark = elm.find("表外字マーク")
|
||||
elm.append(mark)
|
||||
for elm in soup.find_all("表外音訓"):
|
||||
mark = elm.find("表外音訓マーク")
|
||||
elm.append(mark)
|
||||
|
||||
|
||||
def __remove_links_without_href(soup):
|
||||
for elm in soup.find_all("a"):
|
||||
if elm.has_attr("href"):
|
||||
continue
|
||||
elm.attrs["data-name"] = elm.name
|
||||
elm.name = "span"
|
||||
|
||||
|
||||
def __remove_appendix_links(soup):
|
||||
for elm in soup.find_all("a"):
|
||||
if elm.attrs["href"].startswith("appendix"):
|
||||
elm.unwrap()
|
||||
|
||||
|
||||
def __convert_links(soup, entry):
|
||||
for elm in soup.find_all("a"):
|
||||
href = elm.attrs["href"].split(" ")[0]
|
||||
href = href.removeprefix("#")
|
||||
if not re.match(r"^[0-9]+(?:-[0-9A-F]{4})?$", href):
|
||||
raise Exception(f"Invalid href format: {href}")
|
||||
ref_entry_id = entry.id_string_to_entry_id(href)
|
||||
if ref_entry_id in entry.ID_TO_ENTRY:
|
||||
ref_entry = entry.ID_TO_ENTRY[ref_entry_id]
|
||||
else:
|
||||
ref_entry = entry.ID_TO_ENTRY[(ref_entry_id[0], 0)]
|
||||
expression = ref_entry.get_first_expression()
|
||||
elm.attrs["href"] = f"?query={expression}&wildcards=off"
|
||||
|
||||
|
||||
def __add_parent_link(soup, entry):
|
||||
elm = soup.find("親見出相当部")
|
||||
if elm is not None:
|
||||
parent_entry = entry.get_parent()
|
||||
expression = parent_entry.get_first_expression()
|
||||
elm.attrs["href"] = f"?query={expression}&wildcards=off"
|
||||
elm.name = "a"
|
||||
|
||||
|
||||
def __add_homophone_links(soup, entry):
|
||||
forward_link = ["←", entry.entry_id[0] + 1]
|
||||
backward_link = ["→", entry.entry_id[0] - 1]
|
||||
homophone_info_list = [
|
||||
["svg-logo/homophone1.svg", [forward_link]],
|
||||
["svg-logo/homophone2.svg", [forward_link, backward_link]],
|
||||
["svg-logo/homophone3.svg", [backward_link]],
|
||||
]
|
||||
for homophone_info in homophone_info_list:
|
||||
filename, link_info = homophone_info
|
||||
for elm in soup.find_all("img", attrs={"src": filename}):
|
||||
for info in link_info:
|
||||
text, link_id = info
|
||||
link_entry = entry.ID_TO_ENTRY[(link_id, 0)]
|
||||
expression = link_entry.get_first_expression()
|
||||
link = BeautifulSoup("<a/>", "xml").a
|
||||
link.string = text
|
||||
link.attrs["href"] = f"?query={expression}&wildcards=off"
|
||||
elm.append(link)
|
||||
elm.unwrap()
|
||||
|
||||
|
||||
def __convert_images_to_text(soup):
|
||||
conversions = [
|
||||
["svg-logo/重要語.svg", "*", "vertical-align: super; font-size: 0.6em"],
|
||||
["svg-logo/最重要語.svg", "**", "vertical-align: super; font-size: 0.6em"],
|
||||
["svg-logo/一般常識語.svg", "☆☆", "vertical-align: super; font-size: 0.6em"],
|
||||
["svg-logo/追い込み.svg", "", ""],
|
||||
["svg-special/区切り線.svg", "|", ""],
|
||||
]
|
||||
for conversion in conversions:
|
||||
filename, text, style = conversion
|
||||
for elm in soup.find_all("img", attrs={"src": filename}):
|
||||
if text == "":
|
||||
elm.unwrap()
|
||||
continue
|
||||
if style != "":
|
||||
elm.attrs["style"] = style
|
||||
elm.attrs["data-name"] = elm.name
|
||||
elm.attrs["data-src"] = elm.attrs["src"]
|
||||
elm.name = "span"
|
||||
elm.string = text
|
||||
del elm.attrs["src"]
|
||||
|
||||
|
||||
def __text_parens_to_images(soup, media_dir):
|
||||
for elm in soup.find_all("red"):
|
||||
char = elm.text
|
||||
if char not in ["(", ")"]:
|
||||
continue
|
||||
filename = f"red_{char}.svg"
|
||||
path = os.path.join(media_dir, filename)
|
||||
Icons.make_red_char(path, char)
|
||||
ratio = Icons.calculate_ratio(path)
|
||||
img = BeautifulSoup("<img/>", "xml").img
|
||||
img.attrs = {
|
||||
"height": 1.0,
|
||||
"width": ratio,
|
||||
"sizeUnits": "em",
|
||||
"collapsible": False,
|
||||
"collapsed": False,
|
||||
"background": False,
|
||||
"appearance": "auto",
|
||||
"path": f"{os.path.basename(media_dir)}/{filename}",
|
||||
}
|
||||
elm.attrs["data-name"] = elm.name
|
||||
elm.name = "span"
|
||||
elm.string = ""
|
||||
elm.append(img)
|
||||
elm.attrs["style"] = "vertical-align: text-bottom;"
|
||||
|
||||
|
||||
def __replace_icons(soup, media_dir):
|
||||
cls_to_appearance = {
|
||||
"default": "monochrome",
|
||||
"fill": "monochrome",
|
||||
"red": "auto",
|
||||
"redfill": "auto",
|
||||
"none": "monochrome",
|
||||
}
|
||||
icon_info_list = [
|
||||
["svg-logo/アク.svg", "アク", "default"],
|
||||
["svg-logo/丁寧.svg", "丁寧", "default"],
|
||||
["svg-logo/可能.svg", "可能", "default"],
|
||||
["svg-logo/尊敬.svg", "尊敬", "default"],
|
||||
["svg-logo/接尾.svg", "接尾", "default"],
|
||||
["svg-logo/接頭.svg", "接頭", "default"],
|
||||
["svg-logo/表記.svg", "表記", "default"],
|
||||
["svg-logo/謙譲.svg", "謙譲", "default"],
|
||||
["svg-logo/区別.svg", "区別", "redfill"],
|
||||
["svg-logo/由来.svg", "由来", "redfill"],
|
||||
["svg-logo/人.svg", "", "none"],
|
||||
["svg-logo/他.svg", "", "none"],
|
||||
["svg-logo/動.svg", "", "none"],
|
||||
["svg-logo/名.svg", "", "none"],
|
||||
["svg-logo/句.svg", "", "none"],
|
||||
["svg-logo/派.svg", "", "none"],
|
||||
["svg-logo/自.svg", "", "none"],
|
||||
["svg-logo/連.svg", "", "none"],
|
||||
["svg-logo/造.svg", "", "none"],
|
||||
["svg-logo/造2.svg", "", "none"],
|
||||
["svg-logo/造3.svg", "", "none"],
|
||||
["svg-logo/百科.svg", "", "none"],
|
||||
]
|
||||
for icon_info in icon_info_list:
|
||||
src, text, cls = icon_info
|
||||
for elm in soup.find_all("img", attrs={"src": src}):
|
||||
path = media_dir
|
||||
for part in src.split("/"):
|
||||
path = os.path.join(path, part)
|
||||
__make_rectangle(path, text, cls)
|
||||
ratio = Icons.calculate_ratio(path)
|
||||
img = BeautifulSoup("<img/>", "xml").img
|
||||
img.attrs = {
|
||||
"height": 1.0,
|
||||
"width": ratio,
|
||||
"sizeUnits": "em",
|
||||
"collapsible": False,
|
||||
"collapsed": False,
|
||||
"background": False,
|
||||
"appearance": cls_to_appearance[cls],
|
||||
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
|
||||
"path": f"{os.path.basename(media_dir)}/{src}",
|
||||
}
|
||||
elm.name = "span"
|
||||
elm.clear()
|
||||
elm.append(img)
|
||||
elm.attrs["style"] = "vertical-align: text-bottom; margin-right: 0.25em;"
|
||||
|
||||
|
||||
def __replace_accent_symbols(soup, media_dir):
|
||||
accent_info_list = [
|
||||
["svg-accent/平板.svg", Icons.make_heiban],
|
||||
["svg-accent/アクセント.svg", Icons.make_accent],
|
||||
]
|
||||
for info in accent_info_list:
|
||||
src, write_svg_function = info
|
||||
for elm in soup.find_all("img", attrs={"src": src}):
|
||||
path = media_dir
|
||||
for part in src.split("/"):
|
||||
path = os.path.join(path, part)
|
||||
write_svg_function(path)
|
||||
ratio = Icons.calculate_ratio(path)
|
||||
img = BeautifulSoup("<img/>", "xml").img
|
||||
img.attrs = {
|
||||
"height": 1.0,
|
||||
"width": ratio,
|
||||
"sizeUnits": "em",
|
||||
"collapsible": False,
|
||||
"collapsed": False,
|
||||
"background": False,
|
||||
"appearance": "auto",
|
||||
"path": f"{os.path.basename(media_dir)}/{src}",
|
||||
}
|
||||
elm.name = "span"
|
||||
elm.clear()
|
||||
elm.append(img)
|
||||
elm.attrs["style"] = "vertical-align: text-bottom;"
|
||||
|
||||
|
||||
def __convert_gaiji(soup, media_dir):
|
||||
for elm in soup.find_all("img"):
|
||||
if not elm.has_attr("src"):
|
||||
continue
|
||||
src = elm.attrs["src"]
|
||||
if src.startswith("graphics"):
|
||||
continue
|
||||
path = media_dir
|
||||
for part in src.split("/"):
|
||||
if part.strip() == "":
|
||||
continue
|
||||
path = os.path.join(path, part)
|
||||
ratio = Icons.calculate_ratio(path)
|
||||
img = BeautifulSoup("<img/>", "xml").img
|
||||
img.attrs = {
|
||||
"height": 1.0,
|
||||
"width": ratio,
|
||||
"sizeUnits": "em",
|
||||
"collapsible": False,
|
||||
"collapsed": False,
|
||||
"background": False,
|
||||
"appearance": "monochrome",
|
||||
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
|
||||
"path": f"{os.path.basename(media_dir)}/{src}",
|
||||
}
|
||||
elm.name = "span"
|
||||
elm.clear()
|
||||
elm.append(img)
|
||||
elm.attrs["style"] = "vertical-align: text-bottom;"
|
||||
|
||||
|
||||
def __convert_graphics(soup, media_dir):
|
||||
for elm in soup.find_all("img"):
|
||||
if not elm.has_attr("src"):
|
||||
continue
|
||||
src = elm.attrs["src"]
|
||||
if not src.startswith("graphics"):
|
||||
continue
|
||||
elm.attrs = {
|
||||
"collapsible": True,
|
||||
"collapsed": True,
|
||||
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
|
||||
"path": f"{os.path.basename(media_dir)}/{src}",
|
||||
"src": src,
|
||||
}
|
||||
|
||||
|
||||
def __convert_number_icons(soup, media_dir):
|
||||
for elm in soup.find_all("大語義番号"):
|
||||
if elm.find_parent("a") is None:
|
||||
filename = f"{elm.text}-fill.svg"
|
||||
appearance = "monochrome"
|
||||
path = os.path.join(media_dir, filename)
|
||||
__make_rectangle(path, elm.text, "fill")
|
||||
else:
|
||||
filename = f"{elm.text}-bluefill.svg"
|
||||
appearance = "auto"
|
||||
path = os.path.join(media_dir, filename)
|
||||
__make_rectangle(path, elm.text, "bluefill")
|
||||
ratio = Icons.calculate_ratio(path)
|
||||
img = BeautifulSoup("<img/>", "xml").img
|
||||
img.attrs = {
|
||||
"height": 1.0,
|
||||
"width": ratio,
|
||||
"sizeUnits": "em",
|
||||
"collapsible": False,
|
||||
"collapsed": False,
|
||||
"background": False,
|
||||
"appearance": appearance,
|
||||
"title": elm.text,
|
||||
"path": f"{os.path.basename(media_dir)}/{filename}",
|
||||
}
|
||||
elm.name = "span"
|
||||
elm.clear()
|
||||
elm.append(img)
|
||||
elm.attrs["style"] = "vertical-align: text-bottom; margin-right: 0.25em;"
|
||||
|
||||
|
||||
def __make_rectangle(path, text, cls):
|
||||
if cls == "none":
|
||||
pass
|
||||
elif cls == "fill":
|
||||
Icons.make_monochrome_fill_rectangle(path, text)
|
||||
elif cls == "red":
|
||||
Icons.make_rectangle(path, text, "red", "white", "red")
|
||||
elif cls == "redfill":
|
||||
Icons.make_rectangle(path, text, "red", "red", "white")
|
||||
elif cls == "bluefill":
|
||||
Icons.make_rectangle(path, text, "blue", "blue", "white")
|
||||
else:
|
||||
Icons.make_rectangle(path, text, "black", "transparent", "black")
|
|
@ -5,6 +5,7 @@ from bot.yomichan.terms.jitenon import JitenonYojiTerminator
|
|||
from bot.yomichan.terms.jitenon import JitenonKotowazaTerminator
|
||||
from bot.yomichan.terms.smk8 import Smk8Terminator
|
||||
from bot.yomichan.terms.daijirin2 import Daijirin2Terminator
|
||||
from bot.yomichan.terms.sankoku8 import Sankoku8Terminator
|
||||
|
||||
|
||||
def new_terminator(target):
|
||||
|
@ -14,5 +15,6 @@ def new_terminator(target):
|
|||
Targets.JITENON_KOTOWAZA: JitenonKotowazaTerminator,
|
||||
Targets.SMK8: Smk8Terminator,
|
||||
Targets.DAIJIRIN2: Daijirin2Terminator,
|
||||
Targets.SANKOKU8: Sankoku8Terminator,
|
||||
}
|
||||
return terminator_map[target](target)
|
||||
|
|
47
bot/yomichan/terms/sankoku8.py
Normal file
47
bot/yomichan/terms/sankoku8.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
from bot.entries.sankoku8 import Sankoku8PhraseEntry as PhraseEntry
|
||||
|
||||
from bot.yomichan.terms.terminator import Terminator
|
||||
from bot.yomichan.glossary.sankoku8 import make_glossary
|
||||
from bot.yomichan.grammar import sudachi_rules, tags_to_rules
|
||||
|
||||
|
||||
class Sankoku8Terminator(Terminator):
|
||||
def __init__(self, target):
|
||||
super().__init__(target)
|
||||
|
||||
def _definition_tags(self, entry):
|
||||
return ""
|
||||
|
||||
def _inflection_rules(self, entry, expression):
|
||||
if isinstance(entry, PhraseEntry):
|
||||
return sudachi_rules(expression)
|
||||
pos_tags = entry.get_part_of_speech_tags()
|
||||
if len(pos_tags) == 0:
|
||||
return sudachi_rules(expression)
|
||||
else:
|
||||
return tags_to_rules(expression, pos_tags, self._inflection_categories)
|
||||
|
||||
def _glossary(self, entry):
|
||||
if entry.entry_id in self._glossary_cache:
|
||||
return self._glossary_cache[entry.entry_id]
|
||||
glossary = make_glossary(entry, self._image_dir)
|
||||
self._glossary_cache[entry.entry_id] = glossary
|
||||
return glossary
|
||||
|
||||
def _sequence(self, entry):
|
||||
return entry.entry_id[0] * 100000 + entry.entry_id[1]
|
||||
|
||||
def _term_tags(self, entry):
|
||||
return ""
|
||||
|
||||
def _link_glossary_parameters(self, entry):
|
||||
return [
|
||||
[entry.children, "子"],
|
||||
[entry.phrases, "句"]
|
||||
]
|
||||
|
||||
def _subentry_lists(self, entry):
|
||||
return [
|
||||
entry.children,
|
||||
entry.phrases,
|
||||
]
|
3573
data/entries/sankoku8/phrase_readings.csv
Normal file
3573
data/entries/sankoku8/phrase_readings.csv
Normal file
File diff suppressed because it is too large
Load diff
611
data/mdict/css/sankoku8.css
Normal file
611
data/mdict/css/sankoku8.css
Normal file
|
@ -0,0 +1,611 @@
|
|||
|
||||
@font-face {
|
||||
font-family: jpgothic;
|
||||
src: local("Noto Sans CJK JP"), local("IPAexGothic"), local("Source Han Sans JP");
|
||||
}
|
||||
|
||||
@font-face {
|
||||
font-family: jpmincho;
|
||||
src: local("Noto Serif CJK JP"), local("IPAexMincho"), local("IPAmjMincho"), local("Source Han Serif JP"), local("HanaMinA"), local("HanaMinB");
|
||||
}
|
||||
|
||||
@font-face {
|
||||
font-family: jpkyokasho;
|
||||
src: local("A-OTF Kyoukasho ICA Pro R"), local("DFKyoKaSho-W4");
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0em 1em;
|
||||
line-height: 1.5em;
|
||||
font-family: jpmincho, serif;
|
||||
font-size: 1.2em;
|
||||
}
|
||||
|
||||
span[data-name="entry-index"] > a {
|
||||
display: none;
|
||||
}
|
||||
|
||||
span[data-name="項目"] {
|
||||
display: block;
|
||||
/*max-width: 39em;*/
|
||||
}
|
||||
|
||||
span[data-name="見出部"] {
|
||||
display: block;
|
||||
}
|
||||
|
||||
span[data-name="見出仮名"] {
|
||||
font-family: jpgothic, sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
span[data-name="見出仮名"].アンチック {
|
||||
font-family: jpmincho, serif;
|
||||
}
|
||||
|
||||
span[data-name="表記G"] {
|
||||
margin-left: 0.25em;
|
||||
}
|
||||
|
||||
span[data-name="専門G"] {
|
||||
margin-right: 0.25em;
|
||||
}
|
||||
|
||||
span[data-name="常用漢字"] {
|
||||
font-family: jpmincho, serif;
|
||||
}
|
||||
|
||||
span[data-name="教育漢字"] {
|
||||
font-family: jpkyokasho, jpmincho, serif;
|
||||
color: green;
|
||||
}
|
||||
|
||||
span[data-name="解説部"],
|
||||
span[data-name="子解説部"],
|
||||
span[data-name="句解説部"] {
|
||||
display: block;
|
||||
margin-left: 1em;
|
||||
}
|
||||
|
||||
span[data-name="大語義"] {
|
||||
display: block;
|
||||
}
|
||||
|
||||
span[data-name="大語義"] + span[data-name="大語義"] {
|
||||
margin-top: 0.5em;
|
||||
}
|
||||
|
||||
span[data-name="大語義番号"] {
|
||||
margin-right: 0.25em;
|
||||
padding: 0.1em;
|
||||
font-family: jpgothic, sans-serif;
|
||||
font-size: 0.8em;
|
||||
font-weight: bold;
|
||||
color: white;
|
||||
background-color: gray;
|
||||
border-radius: 0.2em;
|
||||
}
|
||||
|
||||
a span[data-name="大語義番号"] {
|
||||
background-color: blue;
|
||||
text-decoration-color: blue;
|
||||
}
|
||||
|
||||
span[data-name="語義番号"] {
|
||||
margin-right: 0.25em;
|
||||
}
|
||||
|
||||
span[data-name="参照語義番号"] {
|
||||
margin-left: 0.1em;
|
||||
}
|
||||
|
||||
span[data-name="参照語義番号"]>span[data-name="語義番号"] {
|
||||
margin-right: 0.1em;
|
||||
}
|
||||
|
||||
span[data-name="参照語義番号"]:first-child {
|
||||
margin-left: 0em;
|
||||
}
|
||||
|
||||
span[data-name="語義"] {
|
||||
display: block;
|
||||
}
|
||||
|
||||
span[data-name="副義"] {
|
||||
display: block;
|
||||
margin-left: 1.0em;
|
||||
}
|
||||
|
||||
span[data-name="注記語義"] {
|
||||
margin-left: 0.5em;
|
||||
}
|
||||
|
||||
span[data-name="語釈"] {
|
||||
}
|
||||
|
||||
span[data-name="用例G"] {
|
||||
display: block;
|
||||
/*margin-left: 1.25em;*/
|
||||
}
|
||||
|
||||
span[data-name="百科"] span[data-name="用例G"] {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
span[data-name="注記"] span[data-name="用例G"] {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
span[data-name="用例"] {
|
||||
}
|
||||
|
||||
span[data-name="見出相当部"] {
|
||||
margin-left: 0.125em;
|
||||
margin-right: 0.125em;
|
||||
}
|
||||
|
||||
span[data-name="ルビG"] {
|
||||
font-size: 0.7em;
|
||||
font-weight: normal;
|
||||
vertical-align: 0.5em;
|
||||
-webkit-user-select: none;
|
||||
}
|
||||
|
||||
span[data-name="名詞形G"],
|
||||
span[data-name="動詞形G"],
|
||||
span[data-name="自動詞形G"],
|
||||
span[data-name="他動詞形G"],
|
||||
span[data-name="可能形G"],
|
||||
span[data-name="人G"],
|
||||
span[data-name="名詞人形G"] {
|
||||
display: block;
|
||||
}
|
||||
|
||||
span[data-name="語義"] span[data-name="名詞形G"],
|
||||
span[data-name="語義"] span[data-name="動詞形G"],
|
||||
span[data-name="語義"] span[data-name="自動詞形G"],
|
||||
span[data-name="語義"] span[data-name="他動詞形G"],
|
||||
span[data-name="語義"] span[data-name="可能形G"],
|
||||
span[data-name="語義"] span[data-name="人G"] {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
span[data-name="副義"] span[data-name="名詞形G"],
|
||||
span[data-name="副義"] span[data-name="動詞形G"],
|
||||
span[data-name="副義"] span[data-name="自動詞形G"],
|
||||
span[data-name="副義"] span[data-name="他動詞形G"],
|
||||
span[data-name="副義"] span[data-name="可能形G"],
|
||||
span[data-name="副義"] span[data-name="人G"] {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
span[data-name="注記"] span[data-name="名詞形G"],
|
||||
span[data-name="注記"] span[data-name="動詞形G"],
|
||||
span[data-name="注記"] span[data-name="自動詞形G"],
|
||||
span[data-name="注記"] span[data-name="他動詞形G"],
|
||||
span[data-name="注記"] span[data-name="可能形G"],
|
||||
span[data-name="注記"] span[data-name="人G"] {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
span[data-name="共通"] span[data-name="名詞形G"],
|
||||
span[data-name="共通"] span[data-name="動詞形G"],
|
||||
span[data-name="共通"] span[data-name="自動詞形G"],
|
||||
span[data-name="共通"] span[data-name="他動詞形G"],
|
||||
span[data-name="共通"] span[data-name="可能形G"],
|
||||
span[data-name="共通"] span[data-name="人G"] {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
span[data-name="名詞形G"] span[data-name="用例G"],
|
||||
span[data-name="動詞形G"] span[data-name="用例G"],
|
||||
span[data-name="自動詞形G"] span[data-name="用例G"],
|
||||
span[data-name="他動詞形G"] span[data-name="用例G"],
|
||||
span[data-name="可能形G"] span[data-name="用例G"],
|
||||
span[data-name="人G"] span[data-name="用例G"] {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
span[data-name="参照G"] {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
span[data-name="参照矢印"] {
|
||||
margin-right: 0.25em;
|
||||
}
|
||||
|
||||
span[data-name="参照"] {
|
||||
}
|
||||
|
||||
span[data-name="子項目"] {
|
||||
display: block;
|
||||
margin-top: 0.5em;
|
||||
}
|
||||
|
||||
span[data-name="子見出部"] {
|
||||
display: block;
|
||||
/* text-indent: -1em; */
|
||||
}
|
||||
|
||||
span[data-name="子見出仮名"] {
|
||||
font-family: jpgothic, sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
span[data-name="親見出省略"] {
|
||||
font-family: jpgothic, sans-serif;
|
||||
}
|
||||
|
||||
span[data-name="句項目"] {
|
||||
display: block;
|
||||
margin-top: 0.5em;
|
||||
}
|
||||
|
||||
span[data-name="句見出部"] {
|
||||
display: block;
|
||||
/* text-indent: -1em; */
|
||||
}
|
||||
|
||||
span[data-name="句表記"] {
|
||||
font-family: jpgothic, sans-serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
span[data-name="対義語G"] {
|
||||
}
|
||||
|
||||
span[data-name="派生語G"] {
|
||||
display: block;
|
||||
}
|
||||
|
||||
span[data-name="謙譲形G"] {
|
||||
display: block;
|
||||
}
|
||||
|
||||
span[data-name="共通"] {
|
||||
display: block;
|
||||
}
|
||||
|
||||
span[data-name="共通ロゴ"] {
|
||||
color: red;
|
||||
margin-right: 0.25em;
|
||||
}
|
||||
|
||||
span[data-name="rank1"] a,
|
||||
span[data-name="rank2"] a,
|
||||
span[data-name="rank3"] a,
|
||||
span[data-name="表外字マーク"] a,
|
||||
span[data-name="表外音訓マーク"] a,
|
||||
span[data-name="省略形"] a,
|
||||
span[data-name="熟字訓"] a,
|
||||
span[data-name="原籍"] a,
|
||||
span[data-name="品詞"] a,
|
||||
span[data-name="専門"] a,
|
||||
span[data-name="使用域"] a,
|
||||
span[data-name="rect"] a {
|
||||
color: black;
|
||||
border-top-style: none;
|
||||
}
|
||||
|
||||
span[data-name="共通ロゴ"] a {
|
||||
color: red;
|
||||
border-top-style: none;
|
||||
}
|
||||
|
||||
span[data-name="rect"].red a {
|
||||
color: red;
|
||||
}
|
||||
|
||||
a {
|
||||
text-decoration: none;
|
||||
padding-top: 0.04em;
|
||||
/* border-top: solid 1px blue; */
|
||||
}
|
||||
|
||||
a.appendix {
|
||||
color: black;
|
||||
text-decoration: none;
|
||||
border-top-style: none;
|
||||
}
|
||||
|
||||
a.black {
|
||||
color: black;
|
||||
}
|
||||
|
||||
span[data-name="カット"] {
|
||||
display: block;
|
||||
width: 75%;
|
||||
margin-top: 1em;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
}
|
||||
|
||||
span[data-name="カット"] img {
|
||||
max-height: 200px;
|
||||
max-width: 600px;
|
||||
}
|
||||
|
||||
span[data-name="イタリック"] {
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
span[data-name="ボールド"] {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
span[data-name="色版"] {
|
||||
color: red;
|
||||
}
|
||||
|
||||
/* 独自定義 */
|
||||
|
||||
span[data-name="rect"] {
|
||||
margin-left: 0.25em;
|
||||
margin-right: 0.25em;
|
||||
padding: 0.1em;
|
||||
font-size: 0.8em;
|
||||
border-width: 0.04em;
|
||||
border-style: solid;
|
||||
border-color: black;
|
||||
word-break: keep-all;
|
||||
border-radius: 0.1em;
|
||||
}
|
||||
|
||||
span[data-name="rect"].fill {
|
||||
color: white;
|
||||
border-style: none;
|
||||
background-color: gray;
|
||||
}
|
||||
|
||||
span[data-name="rect"].red {
|
||||
color: red;
|
||||
border-color: red;
|
||||
}
|
||||
|
||||
span[data-name="red"] {
|
||||
color: red;
|
||||
}
|
||||
|
||||
span[data-name="glyph"] {
|
||||
font-family: jpmincho, serif;
|
||||
}
|
||||
|
||||
span[data-name="gaiji"] {
|
||||
width: 1em;
|
||||
}
|
||||
|
||||
span[data-name="frac"] {
|
||||
width: 2em;
|
||||
}
|
||||
|
||||
img.logo {
|
||||
display: gaiji;
|
||||
margin-right: 0.25em;
|
||||
height: 1em;
|
||||
text-combine-horizontal: all;
|
||||
}
|
||||
|
||||
.logo-red {
|
||||
height: 1em;
|
||||
color: red;
|
||||
}
|
||||
|
||||
span[data-name="平板"] .logo-red {
|
||||
margin-left: 0.1em;
|
||||
}
|
||||
|
||||
img.区切り線 {
|
||||
display: gaiji;
|
||||
height: 1em;
|
||||
padding: 0 0.3em 0 0.05em;
|
||||
color: gray;
|
||||
text-combine-horizontal: all;
|
||||
}
|
||||
|
||||
/* *, **, and ☆☆ symbols */
|
||||
span[data-src^="svg-logo"].rank {
|
||||
font-size: 0.65em;
|
||||
vertical-align: super;
|
||||
}
|
||||
|
||||
img.gaiji {
|
||||
display: gaiji;
|
||||
height: 1em;
|
||||
text-combine-horizontal: all;
|
||||
}
|
||||
|
||||
img.svg {
|
||||
zoom: 250%;
|
||||
}
|
||||
|
||||
span[data-name="表外字マーク"] {
|
||||
font-size: 0.5em;
|
||||
vertical-align: 1em;
|
||||
-webkit-user-select: none;
|
||||
}
|
||||
|
||||
span[data-name="表外音訓マーク"] {
|
||||
font-size: 0.5em;
|
||||
vertical-align: 1em;
|
||||
-webkit-user-select: none;
|
||||
}
|
||||
|
||||
span[data-name="表外字ロゴ"],
|
||||
span[data-name="表外音訓ロゴ"] {
|
||||
margin: 0em 0.5em;
|
||||
font-size: 0.5em;
|
||||
}
|
||||
|
||||
span[data-name="アクセント"] {
|
||||
}
|
||||
|
||||
span[data-name="アクセント表記"] {
|
||||
font-family: jpgothic, sans-serif;
|
||||
font-weight: bold;
|
||||
font-size: 0.85em;
|
||||
}
|
||||
|
||||
span[data-name="横"] {
|
||||
text-combine-horizontal: all 1;
|
||||
}
|
||||
|
||||
span[data-name="縦中横"] {
|
||||
text-combine-horizontal: all;
|
||||
}
|
||||
|
||||
span[data-name="分子"],
|
||||
span[data-name="分母"] {
|
||||
text-combine-horizontal: all;
|
||||
}
|
||||
|
||||
span[data-name="英"],
|
||||
span[data-name="回転"] {
|
||||
writing-mode: horizontal-tb;
|
||||
}
|
||||
|
||||
span[data-name="i"] {
|
||||
font-family: "Times New Roman";
|
||||
font-style: italic;
|
||||
}
|
||||
|
||||
span[data-name="横"] span[data-name="sub"] {
|
||||
font-size: 0.7em;
|
||||
vertical-align: 0.35em;
|
||||
}
|
||||
|
||||
span[data-name="kanbun"] {
|
||||
font-size: 0.5em;
|
||||
vertical-align: -1em;
|
||||
}
|
||||
|
||||
span[data-name="歴史仮名"] {
|
||||
font-size: 0.7em;
|
||||
vertical-align: 0.5em;
|
||||
-webkit-user-select: none;
|
||||
}
|
||||
|
||||
span[data-name="品詞G"] {
|
||||
/* margin-left: 0.25em; */
|
||||
}
|
||||
|
||||
span[data-name="ロゴ"] {
|
||||
margin-right: 0.25em;
|
||||
}
|
||||
|
||||
span[data-name="割書"] {
|
||||
/*display: warichu;*/
|
||||
/*font-size: 0.5em;*/
|
||||
}
|
||||
|
||||
span[data-name="尊敬形G"],
|
||||
span[data-name="謙譲形G"],
|
||||
span[data-name="丁寧形G"] {
|
||||
display: block;
|
||||
}
|
||||
|
||||
span[data-name="百科"],
|
||||
span[data-name="由来"],
|
||||
span[data-name="区別"],
|
||||
span[data-name="アクセント注記"] {
|
||||
display: block;
|
||||
}
|
||||
|
||||
span[data-name="表記情報"] {
|
||||
display: block;
|
||||
}
|
||||
|
||||
span[data-name="別見出"] {
|
||||
font-family: jpmincho, serif;
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
span[data-name="読み"] {
|
||||
font-size: 0.7em;
|
||||
}
|
||||
|
||||
span[data-name="歴史仮名"]:before,
|
||||
span[data-name="ルビ"]:before {
|
||||
content: "(";
|
||||
}
|
||||
|
||||
span[data-name="歴史仮名"]:after,
|
||||
span[data-name="ルビ"]:after {
|
||||
content: ")";
|
||||
}
|
||||
|
||||
div[data-child-links] {
|
||||
padding-left: 1em;
|
||||
}
|
||||
|
||||
div[data-child-links] ul {
|
||||
margin: 0;
|
||||
padding-left: 2em;
|
||||
}
|
||||
|
||||
div[data-child-links] span {
|
||||
padding: 0.1em;
|
||||
font-family: jpgothic, sans-serif;
|
||||
font-size: 0.8em;
|
||||
color: white;
|
||||
border-width: 0.05em;
|
||||
border-style: none;
|
||||
border-color: black;
|
||||
border-radius: 0.2em;
|
||||
word-break: keep-all;
|
||||
}
|
||||
|
||||
div[data-child-links="子項目"] span {
|
||||
background-color: rgb(153, 42, 103);
|
||||
}
|
||||
|
||||
div[data-child-links="句項目"] span {
|
||||
background-color: rgb(176, 127, 57);
|
||||
}
|
||||
|
||||
|
||||
/* Replacements for vertical SVG icons */
|
||||
span[data-src="svg-logo/区別.svg"],
|
||||
span[data-src="svg-logo/由来.svg"] {
|
||||
font-family: jpgothic, sans-serif;
|
||||
margin-left: 0.25em;
|
||||
margin-right: 0.25em;
|
||||
padding: 0em 0.1em 0.1em 0.1em;
|
||||
font-size: 0.8em;
|
||||
word-break: keep-all;
|
||||
border-radius: 0.2em;
|
||||
border-style: none;
|
||||
color: white;
|
||||
background-color: red;
|
||||
}
|
||||
|
||||
span[data-src="svg-logo/アク.svg"],
|
||||
span[data-src="svg-logo/丁寧.svg"],
|
||||
span[data-src="svg-logo/可能.svg"],
|
||||
span[data-src="svg-logo/尊敬.svg"],
|
||||
span[data-src="svg-logo/表記.svg"],
|
||||
span[data-src="svg-logo/謙譲.svg"],
|
||||
span[data-src="svg-logo/接尾.svg"],
|
||||
span[data-src="svg-logo/接頭.svg"] {
|
||||
margin-left: 0.25em;
|
||||
margin-right: 0.25em;
|
||||
padding: 0em 0.1em 0.1em 0.1em;
|
||||
font-size: 0.8em;
|
||||
word-break: keep-all;
|
||||
border-width: 0.1em;
|
||||
border-style: solid;
|
||||
border-radius: 0.2em;
|
||||
}
|
||||
|
||||
span[data-src="svg-logo/アク.svg"],
|
||||
span[data-src="svg-logo/丁寧.svg"],
|
||||
span[data-src="svg-logo/可能.svg"],
|
||||
span[data-src="svg-logo/尊敬.svg"],
|
||||
span[data-src="svg-logo/表記.svg"],
|
||||
span[data-src="svg-logo/謙譲.svg"] {
|
||||
font-family: jpgothic, sans-serif;
|
||||
}
|
||||
|
||||
span[data-src="svg-logo/接尾.svg"],
|
||||
span[data-src="svg-logo/接頭.svg"] {
|
||||
font-family: jpmincho, serif;
|
||||
}
|
7
data/mdict/description/sankoku8.mdx.description.html
Normal file
7
data/mdict/description/sankoku8.mdx.description.html
Normal file
|
@ -0,0 +1,7 @@
|
|||
三省堂国語辞典 第八版
|
||||
<br><br>
|
||||
https://www.monokakido.jp/ja/dictionaries/sankoku8/index.html
|
||||
<br><br>
|
||||
{{revision}}
|
||||
<br><br>
|
||||
{{attribution}}
|
22
data/mdict/name_conversion/sankoku8.json
Normal file
22
data/mdict/name_conversion/sankoku8.json
Normal file
|
@ -0,0 +1,22 @@
|
|||
{
|
||||
"a": {},
|
||||
"br": {},
|
||||
"img": {},
|
||||
"div": {},
|
||||
"span": {},
|
||||
"small": {},
|
||||
"sup": {},
|
||||
"sub": {},
|
||||
"表外字": {
|
||||
"name": "ruby"
|
||||
},
|
||||
"表外字マーク": {
|
||||
"name": "rt"
|
||||
},
|
||||
"表外音訓": {
|
||||
"name": "ruby"
|
||||
},
|
||||
"表外音訓マーク": {
|
||||
"name": "rt"
|
||||
}
|
||||
}
|
1
data/mdict/title/sankoku8.mdx.title.html
Normal file
1
data/mdict/title/sankoku8.mdx.title.html
Normal file
|
@ -0,0 +1 @@
|
|||
三省堂国語辞典 第八版
|
474
data/yomichan/dictionary-term-bank-v3-schema.json
Normal file
474
data/yomichan/dictionary-term-bank-v3-schema.json
Normal file
|
@ -0,0 +1,474 @@
|
|||
{
|
||||
"$schema": "http://json-schema.org/draft-07/schema#",
|
||||
"definitions": {
|
||||
"structuredContent": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Represents a text node."
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/structuredContent",
|
||||
"description": "An array of child content."
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "object",
|
||||
"description": "Empty tags.",
|
||||
"required": [
|
||||
"tag"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"tag": {
|
||||
"type": "string",
|
||||
"const": "br"
|
||||
},
|
||||
"data": {
|
||||
"$ref": "#/definitions/structuredContentData"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"description": "Generic container tags.",
|
||||
"required": [
|
||||
"tag"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"tag": {
|
||||
"type": "string",
|
||||
"enum": ["ruby", "rt", "rp", "table", "thead", "tbody", "tfoot", "tr"]
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/structuredContent"
|
||||
},
|
||||
"data": {
|
||||
"$ref": "#/definitions/structuredContentData"
|
||||
},
|
||||
"lang": {
|
||||
"type": "string",
|
||||
"description": "Defines the language of an element in the format defined by RFC 5646."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"description": "Table tags.",
|
||||
"required": [
|
||||
"tag"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"tag": {
|
||||
"type": "string",
|
||||
"enum": ["td", "th"]
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/structuredContent"
|
||||
},
|
||||
"data": {
|
||||
"$ref": "#/definitions/structuredContentData"
|
||||
},
|
||||
"colSpan": {
|
||||
"type": "integer",
|
||||
"minimum": 1
|
||||
},
|
||||
"rowSpan": {
|
||||
"type": "integer",
|
||||
"minimum": 1
|
||||
},
|
||||
"style": {
|
||||
"$ref": "#/definitions/structuredContentStyle"
|
||||
},
|
||||
"lang": {
|
||||
"type": "string",
|
||||
"description": "Defines the language of an element in the format defined by RFC 5646."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"description": "Container tags supporting configurable styles.",
|
||||
"required": [
|
||||
"tag"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"tag": {
|
||||
"type": "string",
|
||||
"enum": ["span", "div", "ol", "ul", "li"]
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/structuredContent"
|
||||
},
|
||||
"data": {
|
||||
"$ref": "#/definitions/structuredContentData"
|
||||
},
|
||||
"style": {
|
||||
"$ref": "#/definitions/structuredContentStyle"
|
||||
},
|
||||
"lang": {
|
||||
"type": "string",
|
||||
"description": "Defines the language of an element in the format defined by RFC 5646."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"description": "Image tag.",
|
||||
"required": [
|
||||
"tag",
|
||||
"path"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"tag": {
|
||||
"type": "string",
|
||||
"const": "img"
|
||||
},
|
||||
"data": {
|
||||
"$ref": "#/definitions/structuredContentData"
|
||||
},
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "Path to the image file in the archive."
|
||||
},
|
||||
"width": {
|
||||
"type": "number",
|
||||
"description": "Preferred width of the image.",
|
||||
"minimum": 0
|
||||
},
|
||||
"height": {
|
||||
"type": "number",
|
||||
"description": "Preferred width of the image.",
|
||||
"minimum": 0
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Hover text for the image."
|
||||
},
|
||||
"pixelated": {
|
||||
"type": "boolean",
|
||||
"description": "Whether or not the image should appear pixelated at sizes larger than the image's native resolution.",
|
||||
"default": false
|
||||
},
|
||||
"imageRendering": {
|
||||
"type": "string",
|
||||
"description": "Controls how the image is rendered. The value of this field supersedes the pixelated field.",
|
||||
"enum": ["auto", "pixelated", "crisp-edges"],
|
||||
"default": "auto"
|
||||
},
|
||||
"appearance": {
|
||||
"type": "string",
|
||||
"description": "Controls the appearance of the image. The \"monochrome\" value will mask the opaque parts of the image using the current text color.",
|
||||
"enum": ["auto", "monochrome"],
|
||||
"default": "auto"
|
||||
},
|
||||
"background": {
|
||||
"type": "boolean",
|
||||
"description": "Whether or not a background color is displayed behind the image.",
|
||||
"default": true
|
||||
},
|
||||
"collapsed": {
|
||||
"type": "boolean",
|
||||
"description": "Whether or not the image is collapsed by default.",
|
||||
"default": false
|
||||
},
|
||||
"collapsible": {
|
||||
"type": "boolean",
|
||||
"description": "Whether or not the image can be collapsed.",
|
||||
"default": false
|
||||
},
|
||||
"verticalAlign": {
|
||||
"type": "string",
|
||||
"description": "The vertical alignment of the image.",
|
||||
"enum": ["baseline", "sub", "super", "text-top", "text-bottom", "middle", "top", "bottom"]
|
||||
},
|
||||
"sizeUnits": {
|
||||
"type": "string",
|
||||
"description": "The units for the width and height.",
|
||||
"enum": ["px", "em"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"description": "Link tag.",
|
||||
"required": [
|
||||
"tag",
|
||||
"href"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"tag": {
|
||||
"type": "string",
|
||||
"const": "a"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/structuredContent"
|
||||
},
|
||||
"href": {
|
||||
"type": "string",
|
||||
"description": "The URL for the link. URLs starting with a ? are treated as internal links to other dictionary content.",
|
||||
"pattern": "^(?:https?:|\\?)[\\w\\W]*"
|
||||
},
|
||||
"lang": {
|
||||
"type": "string",
|
||||
"description": "Defines the language of an element in the format defined by RFC 5646."
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
"structuredContentData": {
|
||||
"type": "object",
|
||||
"description": "Generic data attributes that should be added to the element.",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"structuredContentStyle": {
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"fontStyle": {
|
||||
"type": "string",
|
||||
"enum": ["normal", "italic"],
|
||||
"default": "normal"
|
||||
},
|
||||
"fontWeight": {
|
||||
"type": "string",
|
||||
"enum": ["normal", "bold"],
|
||||
"default": "normal"
|
||||
},
|
||||
"fontSize": {
|
||||
"type": "string",
|
||||
"default": "medium"
|
||||
},
|
||||
"textDecorationLine": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": ["none", "underline", "overline", "line-through"],
|
||||
"default": "none"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"enum": ["underline", "overline", "line-through"],
|
||||
"default": "none"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"verticalAlign": {
|
||||
"type": "string",
|
||||
"enum": ["baseline", "sub", "super", "text-top", "text-bottom", "middle", "top", "bottom"],
|
||||
"default": "baseline"
|
||||
},
|
||||
"textAlign": {
|
||||
"type": "string",
|
||||
"enum": ["start", "end", "left", "right", "center", "justify", "justify-all", "match-parent"],
|
||||
"default": "start"
|
||||
},
|
||||
"marginTop": {
|
||||
"type": "number",
|
||||
"default": 0
|
||||
},
|
||||
"marginLeft": {
|
||||
"type": "number",
|
||||
"default": 0
|
||||
},
|
||||
"marginRight": {
|
||||
"type": "number",
|
||||
"default": 0
|
||||
},
|
||||
"marginBottom": {
|
||||
"type": "number",
|
||||
"default": 0
|
||||
},
|
||||
"listStyleType": {
|
||||
"type": "string",
|
||||
"default": "disc"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"type": "array",
|
||||
"description": "Data file containing term information.",
|
||||
"items": {
|
||||
"type": "array",
|
||||
"description": "Information about a single term.",
|
||||
"minItems": 8,
|
||||
"additionalItems": false,
|
||||
"items": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "The text for the term."
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Reading of the term, or an empty string if the reading is the same as the term."
|
||||
},
|
||||
{
|
||||
"type": ["string", "null"],
|
||||
"description": "String of space-separated tags for the definition. An empty string is treated as no tags."
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "String of space-separated rule identifiers for the definition which is used to validate delinflection. Valid rule identifiers are: v1: ichidan verb; v5: godan verb; vs: suru verb; vk: kuru verb; adj-i: i-adjective. An empty string corresponds to words which aren't inflected, such as nouns."
|
||||
},
|
||||
{
|
||||
"type": "number",
|
||||
"description": "Score used to determine popularity. Negative values are more rare and positive values are more frequent. This score is also used to sort search results."
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"description": "Array of definitions for the term.",
|
||||
"items": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"description": "Single definition for the term."
|
||||
},
|
||||
{
|
||||
"type": "object",
|
||||
"description": "Single detailed definition for the term.",
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "The type of the data for this definition.",
|
||||
"enum": ["text", "image", "structured-content"]
|
||||
}
|
||||
},
|
||||
"oneOf": [
|
||||
{
|
||||
"required": [
|
||||
"type",
|
||||
"text"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "text"
|
||||
},
|
||||
"text": {
|
||||
"type": "string",
|
||||
"description": "Single definition for the term."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"required": [
|
||||
"type",
|
||||
"content"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "structured-content"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/structuredContent",
|
||||
"description": "Single definition for the term using a structured content object."
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"required": [
|
||||
"type",
|
||||
"path"
|
||||
],
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "image"
|
||||
},
|
||||
"path": {
|
||||
"type": "string",
|
||||
"description": "Path to the image file in the archive."
|
||||
},
|
||||
"width": {
|
||||
"type": "integer",
|
||||
"description": "Preferred width of the image.",
|
||||
"minimum": 1
|
||||
},
|
||||
"height": {
|
||||
"type": "integer",
|
||||
"description": "Preferred width of the image.",
|
||||
"minimum": 1
|
||||
},
|
||||
"title": {
|
||||
"type": "string",
|
||||
"description": "Hover text for the image."
|
||||
},
|
||||
"description": {
|
||||
"type": "string",
|
||||
"description": "Description of the image."
|
||||
},
|
||||
"pixelated": {
|
||||
"type": "boolean",
|
||||
"description": "Whether or not the image should appear pixelated at sizes larger than the image's native resolution.",
|
||||
"default": false
|
||||
},
|
||||
"imageRendering": {
|
||||
"type": "string",
|
||||
"description": "Controls how the image is rendered. The value of this field supersedes the pixelated field.",
|
||||
"enum": ["auto", "pixelated", "crisp-edges"],
|
||||
"default": "auto"
|
||||
},
|
||||
"appearance": {
|
||||
"type": "string",
|
||||
"description": "Controls the appearance of the image. The \"monochrome\" value will mask the opaque parts of the image using the current text color.",
|
||||
"enum": ["auto", "monochrome"],
|
||||
"default": "auto"
|
||||
},
|
||||
"background": {
|
||||
"type": "boolean",
|
||||
"description": "Whether or not a background color is displayed behind the image.",
|
||||
"default": true
|
||||
},
|
||||
"collapsed": {
|
||||
"type": "boolean",
|
||||
"description": "Whether or not the image is collapsed by default.",
|
||||
"default": false
|
||||
},
|
||||
"collapsible": {
|
||||
"type": "boolean",
|
||||
"description": "Whether or not the image can be collapsed.",
|
||||
"default": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "integer",
|
||||
"description": "Sequence number for the term. Terms with the same sequence number can be shown together when the \"resultOutputMode\" option is set to \"merge\"."
|
||||
},
|
||||
{
|
||||
"type": "string",
|
||||
"description": "String of space-separated tags for the term. An empty string is treated as no tags."
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
|
@ -56,5 +56,16 @@
|
|||
["子", "name", 0, "子項目", 0],
|
||||
["句", "expression", 0, "句項目", 0]
|
||||
]
|
||||
},
|
||||
"sankoku8": {
|
||||
"index": {
|
||||
"title": "三省堂国語辞典 第八版",
|
||||
"sequenced": true,
|
||||
"format": 3
|
||||
},
|
||||
"tags": [
|
||||
["子", "name", 0, "子項目", 0],
|
||||
["句", "expression", 0, "句項目", 0]
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,5 +25,13 @@
|
|||
"keiyoushi": ["形"],
|
||||
"kahen": ["カ変"],
|
||||
"sudachi": ["助動", "接尾", "枕詞", "連体", "連語"]
|
||||
},
|
||||
"sankoku8": {
|
||||
"sahen": ["サ", "サ型"],
|
||||
"godan": ["上二", "下二", "下二型", "四","四型", "五", "五型", "特殊型", "マス","マス型"],
|
||||
"ichidan": ["上一", "下一", "下一型"],
|
||||
"keiyoushi": ["形", "形型"],
|
||||
"kahen": ["カ"],
|
||||
"sudachi": []
|
||||
}
|
||||
}
|
||||
|
|
495
data/yomichan/name_conversion/sankoku8.json
Normal file
495
data/yomichan/name_conversion/sankoku8.json
Normal file
|
@ -0,0 +1,495 @@
|
|||
{
|
||||
"a": {},
|
||||
"br": {},
|
||||
"img": {},
|
||||
"div": {},
|
||||
"span": {},
|
||||
"ruby": {},
|
||||
"rt": {},
|
||||
"small": {
|
||||
"name": "span",
|
||||
"style": "vertical-align: super; font-size: 0.65em; font-weight: normal; margin-right: 0.25em;",
|
||||
"procedures": [
|
||||
{
|
||||
"procedure_name": "wrap",
|
||||
"parameters": {
|
||||
"l_wrap": "(",
|
||||
"r_wrap": ")"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"sup": {
|
||||
"name": "span",
|
||||
"style": "vertical-align: super; font-size: 0.65em;"
|
||||
},
|
||||
"sub": {
|
||||
"name": "span",
|
||||
"style": "vertical-align: sub; font-size: 0.65em;"
|
||||
},
|
||||
"表外字": {
|
||||
"name": "ruby"
|
||||
},
|
||||
"表外字マーク": {
|
||||
"name": "rt"
|
||||
},
|
||||
"表外音訓": {
|
||||
"name": "ruby"
|
||||
},
|
||||
"表外音訓マーク": {
|
||||
"name": "rt"
|
||||
},
|
||||
"語構成": {
|
||||
"name": "span",
|
||||
"style": "margin-right: 0.5em;"
|
||||
},
|
||||
"分書": {
|
||||
"name": "span",
|
||||
"style": "margin-right: 0.5em;"
|
||||
},
|
||||
"見出仮名": {
|
||||
"name": "span",
|
||||
"style": "font-weight: bold;"
|
||||
},
|
||||
"解説部": {
|
||||
"name": "div"
|
||||
},
|
||||
"子解説部": {
|
||||
"name": "div"
|
||||
},
|
||||
"句解説部": {
|
||||
"name": "div"
|
||||
},
|
||||
"大語義": {
|
||||
"name": "div"
|
||||
},
|
||||
"語義": {
|
||||
"name": "div"
|
||||
},
|
||||
"副義": {
|
||||
"name": "div",
|
||||
"style": "margin-left: 1.0em;"
|
||||
},
|
||||
"注記語義": {
|
||||
"name": "span",
|
||||
"style": "margin-left: 0.5em;"
|
||||
},
|
||||
"用例G": {
|
||||
"name": "div",
|
||||
"procedures": [
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "百科",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "注記",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "名詞形G",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "動詞形G",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "自動詞形G",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "他動詞形G",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "可能形G",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "人G",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"見出相当部": {
|
||||
"name": "span",
|
||||
"style": "margin-left: 0.125em; margin-right: 0.125em;"
|
||||
},
|
||||
"ルビG": {
|
||||
"name": "span",
|
||||
"style": "vertical-align: super; font-size: 0.65em; font-weight: normal;"
|
||||
},
|
||||
"名詞人形G": {
|
||||
"name": "div"
|
||||
},
|
||||
"名詞形G": {
|
||||
"name": "div",
|
||||
"procedures": [
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "語義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "副義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "注記",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "共通",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"動詞形G": {
|
||||
"name": "div",
|
||||
"procedures": [
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "語義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "副義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "注記",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "共通",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"自動詞形G": {
|
||||
"name": "div",
|
||||
"procedures": [
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "語義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "副義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "注記",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "共通",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"他動詞形G": {
|
||||
"name": "div",
|
||||
"procedures": [
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "語義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "副義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "注記",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "共通",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"可能形G": {
|
||||
"name": "div",
|
||||
"procedures": [
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "語義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "副義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "注記",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "共通",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"人G": {
|
||||
"name": "div",
|
||||
"procedures": [
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "語義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "副義",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "注記",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
},
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "共通",
|
||||
"key": "name",
|
||||
"value": "span"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"参照矢印": {
|
||||
"name": "span",
|
||||
"style": "margin-right: 0.25em;"
|
||||
},
|
||||
"子見出仮名": {
|
||||
"name": "span",
|
||||
"style": "font-weight: bold;"
|
||||
},
|
||||
"句表記": {
|
||||
"name": "span",
|
||||
"style": "font-weight: bold;"
|
||||
},
|
||||
"派生語G": {
|
||||
"name": "div"
|
||||
},
|
||||
"謙譲形G": {
|
||||
"name": "div"
|
||||
},
|
||||
"共通": {
|
||||
"name": "div"
|
||||
},
|
||||
"イタリック": {
|
||||
"name": "span",
|
||||
"style": "font-style: italic;"
|
||||
},
|
||||
"ボールド": {
|
||||
"name": "span",
|
||||
"style": "font-weight: bold;"
|
||||
},
|
||||
"アクセント表記": {
|
||||
"name": "span",
|
||||
"style": "font-weight: bold; font-size: 0.85em;"
|
||||
},
|
||||
"i": {
|
||||
"name": "span",
|
||||
"style": "font-style: italic;"
|
||||
},
|
||||
"sub": {
|
||||
"name": "span",
|
||||
"style": "font-size: 0.7em; vertical-align: sub;",
|
||||
"procedures": [
|
||||
{
|
||||
"procedure_name": "has_parent",
|
||||
"parameters": {
|
||||
"parent_name": "横",
|
||||
"key": "style",
|
||||
"value": "font-size: 0.7em; vertical-align: super;"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"kanbun": {
|
||||
"name": "span",
|
||||
"style": "font-size: 0.5em; vertical-align: sub;"
|
||||
},
|
||||
"ロゴ": {
|
||||
"name": "span",
|
||||
"style": "margin-right: 0.25em"
|
||||
},
|
||||
"尊敬形G": {
|
||||
"name": "div"
|
||||
},
|
||||
"謙譲形G": {
|
||||
"name": "div"
|
||||
},
|
||||
"丁寧形G": {
|
||||
"name": "div"
|
||||
},
|
||||
"百科": {
|
||||
"name": "div"
|
||||
},
|
||||
"由来": {
|
||||
"name": "div"
|
||||
},
|
||||
"区別": {
|
||||
"name": "div"
|
||||
},
|
||||
"アクセント注記": {
|
||||
"name": "div"
|
||||
},
|
||||
"表記情報": {
|
||||
"name": "div"
|
||||
},
|
||||
"別見出": {
|
||||
"name": "span",
|
||||
"style": "font-weight: bold;"
|
||||
},
|
||||
"読み": {
|
||||
"name": "span",
|
||||
"style": "font-size: 0.7em;"
|
||||
},
|
||||
"歴史仮名": {
|
||||
"name": "span",
|
||||
"style": "vertical-align: super; font-size: 0.65em; font-weight: normal;",
|
||||
"procedures": [
|
||||
{
|
||||
"procedure_name": "wrap",
|
||||
"parameters": {
|
||||
"l_wrap": "(",
|
||||
"r_wrap": ")"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"ルビ": {
|
||||
"name": "span",
|
||||
"procedures": [
|
||||
{
|
||||
"procedure_name": "wrap",
|
||||
"parameters": {
|
||||
"l_wrap": "(",
|
||||
"r_wrap": ")"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
15
jitenbot.py
15
jitenbot.py
|
@ -68,14 +68,19 @@ def parse_args(target_names):
|
|||
help="path to icon file to be used with MDict",
|
||||
type=filename,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-mdict-export",
|
||||
help="skip export of dictionary data to MDict format",
|
||||
action='store_true',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-yomichan-export",
|
||||
help="skip export of dictionary data to Yomichan format",
|
||||
action='store_true',
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-mdict-export",
|
||||
help="skip export of dictionary data to MDict format",
|
||||
"--validate-yomichan-terms",
|
||||
help="validate JSON structure of exported Yomichan dictionary terms",
|
||||
action='store_true',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
@ -108,9 +113,11 @@ def main():
|
|||
crawler.collect_pages(args.page_dir)
|
||||
crawler.read_pages()
|
||||
if not args.no_yomichan_export:
|
||||
crawler.make_yomichan_dictionary(args.media_dir)
|
||||
crawler.make_yomichan_dictionary(
|
||||
args.media_dir, args.validate_yomichan_terms)
|
||||
if not args.no_mdict_export:
|
||||
crawler.make_mdict_dictionary(args.media_dir, args.mdict_icon)
|
||||
crawler.make_mdict_dictionary(
|
||||
args.media_dir, args.mdict_icon)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -1,15 +1,20 @@
|
|||
attrs==23.1.0
|
||||
beautifulsoup4==4.12.2
|
||||
bs4==0.0.1
|
||||
certifi==2022.12.7
|
||||
charset-normalizer==3.1.0
|
||||
css-parser==1.0.8
|
||||
fastjsonschema==2.17.1
|
||||
html5lib==1.1
|
||||
idna==3.4
|
||||
jsonschema-specifications==2023.6.1
|
||||
lxml==4.9.2
|
||||
mdict-utils==1.3.12
|
||||
Pillow==9.5.0
|
||||
platformdirs==3.5.0
|
||||
referencing==0.29.1
|
||||
requests==2.29.0
|
||||
rpds-py==0.8.10
|
||||
six==1.16.0
|
||||
soupsieve==2.4.1
|
||||
SudachiDict-full==20230110
|
||||
|
|
9
run_all.sh
Normal file → Executable file
9
run_all.sh
Normal file → Executable file
|
@ -1,3 +1,7 @@
|
|||
#!/bin/sh
|
||||
|
||||
python -m unittest discover -s tests
|
||||
|
||||
python jitenbot.py jitenon-kokugo
|
||||
python jitenbot.py jitenon-yoji
|
||||
python jitenbot.py jitenon-kotowaza
|
||||
|
@ -11,3 +15,8 @@ python jitenbot.py daijirin2 \
|
|||
--media-dir monokakido/DAIJIRIN2/media \
|
||||
--page-dir monokakido/DAIJIRIN2/pages \
|
||||
--mdict-icon monokakido/DAIJIRIN2/DAIJIRIN2-76@3x.png
|
||||
|
||||
python jitenbot.py sankoku8 \
|
||||
--media-dir monokakido/SANKOKU8/media \
|
||||
--page-dir monokakido/SANKOKU8/pages \
|
||||
--mdict-icon monokakido/SANKOKU8/SANKOKU8-76@3x.png
|
||||
|
|
47
tests/test_sankoku_phrases.py
Normal file
47
tests/test_sankoku_phrases.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
import unittest
|
||||
from bot.entries.sankoku8 import parse_hyouki_pattern
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
class TestSankokuPhrases(unittest.TestCase):
|
||||
def test_sankoku_phrases1(self):
|
||||
pattern = '耳にたこ(ができる)'
|
||||
exps = parse_hyouki_pattern(pattern)
|
||||
self.assertEqual(len(exps), 2)
|
||||
self.assertIn("耳にたこ", exps)
|
||||
self.assertIn("耳にたこができる", exps)
|
||||
|
||||
def test_sankoku_phrases2(self):
|
||||
pattern = '一斑を〈見て/もって〉全豹を〈卜す/推す〉'
|
||||
exps = parse_hyouki_pattern(pattern)
|
||||
self.assertEqual(len(exps), 4)
|
||||
self.assertIn("一斑を見て全豹を卜す", exps)
|
||||
self.assertIn("一斑を見て全豹を推す", exps)
|
||||
self.assertIn("一斑をもって全豹を卜す", exps)
|
||||
self.assertIn("一斑をもって全豹を推す", exps)
|
||||
|
||||
def test_sankoku_phrases3(self):
|
||||
pattern = '{かじ・舵}を切る'
|
||||
exps = parse_hyouki_pattern(pattern)
|
||||
self.assertEqual(len(exps), 2)
|
||||
self.assertIn("かじを切る", exps)
|
||||
self.assertIn("舵を切る", exps)
|
||||
|
||||
def test_sankoku_phrases4(self):
|
||||
pattern = '重箱の隅を(⦅ようじ\楊枝⦆で)〈つつく/ほじくる〉'
|
||||
exps = parse_hyouki_pattern(pattern)
|
||||
self.assertEqual(len(exps), 6)
|
||||
self.assertIn("重箱の隅をつつく", exps)
|
||||
self.assertIn("重箱の隅をようじでつつく", exps)
|
||||
self.assertIn("重箱の隅を楊枝でつつく", exps)
|
||||
self.assertIn("重箱の隅をほじくる", exps)
|
||||
self.assertIn("重箱の隅をようじでほじくる", exps)
|
||||
self.assertIn("重箱の隅を楊枝でほじくる", exps)
|
||||
|
||||
def test_sankoku_phrases5(self):
|
||||
pattern = '群盲象を〈{な・撫}でる/評する〉'
|
||||
exps = parse_hyouki_pattern(pattern)
|
||||
self.assertEqual(len(exps), 3)
|
||||
self.assertIn("群盲象をなでる", exps)
|
||||
self.assertIn("群盲象を撫でる", exps)
|
||||
self.assertIn("群盲象を評する", exps)
|
Loading…
Reference in a new issue