Add export support for the MDict dictionary format
This commit is contained in:
parent
e4a2e75d82
commit
4c837cd72d
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,6 +1,7 @@
|
||||||
webcache/
|
webcache/
|
||||||
output/
|
output/
|
||||||
notes/
|
notes/
|
||||||
|
monokakido/
|
||||||
|
|
||||||
# Byte-compiled / optimized / DLL files
|
# Byte-compiled / optimized / DLL files
|
||||||
__pycache__/
|
__pycache__/
|
||||||
|
|
64
README.md
64
README.md
|
@ -49,7 +49,8 @@ compiling the scraped data into compact dictionary file formats.
|
||||||
|
|
||||||
# Usage
|
# Usage
|
||||||
```
|
```
|
||||||
usage: jitenbot [-h] [-p PAGE_DIR] [-i IMAGE_DIR]
|
usage: jitenbot [-h] [-p PAGE_DIR] [-m MEDIA_DIR] [-i MDICT_ICON]
|
||||||
|
[--no-yomichan-export] [--no-mdict-export]
|
||||||
{jitenon-kokugo,jitenon-yoji,jitenon-kotowaza,smk8,daijirin2}
|
{jitenon-kokugo,jitenon-yoji,jitenon-kotowaza,smk8,daijirin2}
|
||||||
|
|
||||||
Convert Japanese dictionary files to new formats.
|
Convert Japanese dictionary files to new formats.
|
||||||
|
@ -62,9 +63,15 @@ options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-p PAGE_DIR, --page-dir PAGE_DIR
|
-p PAGE_DIR, --page-dir PAGE_DIR
|
||||||
path to directory containing XML page files
|
path to directory containing XML page files
|
||||||
-i IMAGE_DIR, --image-dir IMAGE_DIR
|
-m MEDIA_DIR, --media-dir MEDIA_DIR
|
||||||
path to directory containing image folders (gaiji,
|
path to directory containing media folders (gaiji,
|
||||||
graphics, etc.)
|
graphics, audio, etc.)
|
||||||
|
-i MDICT_ICON, --mdict-icon MDICT_ICON
|
||||||
|
path to icon file to be used with MDict
|
||||||
|
--no-yomichan-export skip export of dictionary data to Yomichan format
|
||||||
|
--no-mdict-export skip export of dictionary data to MDict format
|
||||||
|
|
||||||
|
See README.md for details regarding media directory structures
|
||||||
```
|
```
|
||||||
### Online Targets
|
### Online Targets
|
||||||
Jitenbot will scrape the target website and save the pages to the [user cache directory](https://pypi.org/project/platformdirs/).
|
Jitenbot will scrape the target website and save the pages to the [user cache directory](https://pypi.org/project/platformdirs/).
|
||||||
|
@ -75,8 +82,55 @@ HTTP request headers (user agent string, etc.) may be customized by editing the
|
||||||
[user config directory](https://pypi.org/project/platformdirs/).
|
[user config directory](https://pypi.org/project/platformdirs/).
|
||||||
|
|
||||||
### Offline Targets
|
### Offline Targets
|
||||||
Page data and image data must be procured by the user
|
Page data and media data must be [procured by the user](https://github.com/golddranks/monokakido/)
|
||||||
and passed to jitenbot via the appropriate command line flags.
|
and passed to jitenbot via the appropriate command line flags.
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>smk8 media directory</summary>
|
||||||
|
|
||||||
|
Since Yomichan does not support audio files from imported
|
||||||
|
dictionaries, the `audio/` directory may be omitted to save filesize
|
||||||
|
space in the output ZIP file if desired.
|
||||||
|
|
||||||
|
```
|
||||||
|
media
|
||||||
|
├── Audio.png
|
||||||
|
├── audio
|
||||||
|
│ ├── 00001.aac
|
||||||
|
│ ├── 00002.aac
|
||||||
|
│ ├── 00003.aac
|
||||||
|
│ │ ...
|
||||||
|
│ └── 82682.aac
|
||||||
|
└── gaiji
|
||||||
|
├── 1d110.svg
|
||||||
|
├── 1d15d.svg
|
||||||
|
├── 1d15e.svg
|
||||||
|
│ ...
|
||||||
|
└── xbunnoa.svg
|
||||||
|
```
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary>daijirin2 media directory</summary>
|
||||||
|
|
||||||
|
The `graphics/` directory may be omitted to save space if desired.
|
||||||
|
|
||||||
|
```
|
||||||
|
media
|
||||||
|
├── gaiji
|
||||||
|
│ ├── 1D10B.svg
|
||||||
|
│ ├── 1D110.svg
|
||||||
|
│ ├── 1D12A.svg
|
||||||
|
│ │ ...
|
||||||
|
│ └── vectorOB.svg
|
||||||
|
└── graphics
|
||||||
|
├── 3djr_0002.png
|
||||||
|
├── 3djr_0004.png
|
||||||
|
├── 3djr_0005.png
|
||||||
|
│ ...
|
||||||
|
└── 4djr_yahazu.png
|
||||||
|
```
|
||||||
|
</details>
|
||||||
|
|
||||||
# Attribution
|
# Attribution
|
||||||
`Adobe-Japan1_sequences.txt` is provided by [The Adobe-Japan1-7 Character Collection](https://github.com/adobe-type-tools/Adobe-Japan1).
|
`Adobe-Japan1_sequences.txt` is provided by [The Adobe-Japan1-7 Character Collection](https://github.com/adobe-type-tools/Adobe-Japan1).
|
||||||
|
|
4
TODO.md
4
TODO.md
|
@ -1,11 +1,11 @@
|
||||||
### Todo
|
### Todo
|
||||||
|
|
||||||
|
- [x] Add factory classes to reduce the amount of class import statements
|
||||||
|
- [x] Support exporting to MDict (.MDX) dictionary format
|
||||||
- [ ] Add test suite
|
- [ ] Add test suite
|
||||||
- [ ] Add documentation (docstrings, etc.)
|
- [ ] Add documentation (docstrings, etc.)
|
||||||
- [ ] Validate JSON schema of Yomichan terms during export
|
- [ ] Validate JSON schema of Yomichan terms during export
|
||||||
- [ ] Add factory classes to reduce the amount of class import statements
|
|
||||||
- [ ] Add build scripts for producing program binaries
|
- [ ] Add build scripts for producing program binaries
|
||||||
- [ ] Support exporting to MDict (.MDX) dictionary format
|
|
||||||
- [ ] Validate scraped webpages after downloading
|
- [ ] Validate scraped webpages after downloading
|
||||||
- [ ] Log non-fatal failures to a log file instead of raising exceptions
|
- [ ] Log non-fatal failures to a log file instead of raising exceptions
|
||||||
- [ ] Support more dictionary websites
|
- [ ] Support more dictionary websites
|
||||||
|
|
|
@ -5,7 +5,8 @@ from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import bot.scraper as Scraper
|
import bot.scraper as Scraper
|
||||||
from bot.entries.factory import new_entry
|
from bot.entries.factory import new_entry
|
||||||
from bot.yomichan.exporters.factory import new_exporter
|
from bot.yomichan.exporters.factory import new_yomi_exporter
|
||||||
|
from bot.mdict.exporters.factory import new_mdict_exporter
|
||||||
|
|
||||||
|
|
||||||
class Crawler(ABC):
|
class Crawler(ABC):
|
||||||
|
@ -38,9 +39,13 @@ class Crawler(ABC):
|
||||||
self._entries.append(entry)
|
self._entries.append(entry)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
def make_yomichan_dictionary(self, image_dir):
|
def make_yomichan_dictionary(self, media_dir):
|
||||||
exporter = new_exporter(self._target)
|
exporter = new_yomi_exporter(self._target)
|
||||||
exporter.export(self._entries, image_dir)
|
exporter.export(self._entries, media_dir)
|
||||||
|
|
||||||
|
def make_mdict_dictionary(self, media_dir, icon_file):
|
||||||
|
exporter = new_mdict_exporter(self._target)
|
||||||
|
exporter.export(self._entries, media_dir, icon_file)
|
||||||
|
|
||||||
def _parse_page_id(self, page_link):
|
def _parse_page_id(self, page_link):
|
||||||
m = re.search(self._page_id_pattern, page_link)
|
m = re.search(self._page_id_pattern, page_link)
|
||||||
|
@ -142,10 +147,8 @@ class _MonokakidoCrawler(Crawler):
|
||||||
|
|
||||||
|
|
||||||
class Smk8Crawler(_MonokakidoCrawler):
|
class Smk8Crawler(_MonokakidoCrawler):
|
||||||
def __init__(self, target):
|
pass
|
||||||
super().__init__(target)
|
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2Crawler(_MonokakidoCrawler):
|
class Daijirin2Crawler(_MonokakidoCrawler):
|
||||||
def __init__(self, target):
|
pass
|
||||||
super().__init__(target)
|
|
||||||
|
|
|
@ -99,15 +99,15 @@ def load_daijirin2_kana_abbreviations():
|
||||||
|
|
||||||
|
|
||||||
@cache
|
@cache
|
||||||
def load_smk8_yomichan_name_conversion():
|
def load_yomichan_name_conversion(target):
|
||||||
file_name = os.path.join("smk8", "yomichan_name_conversion.json")
|
file_name = os.path.join(target.value, "yomichan_name_conversion.json")
|
||||||
data = __load_json(file_name)
|
data = __load_json(file_name)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
@cache
|
@cache
|
||||||
def load_daijirin2_yomichan_name_conversion():
|
def load_mdict_name_conversion(target):
|
||||||
file_name = os.path.join("daijirin2", "yomichan_name_conversion.json")
|
file_name = os.path.join(target.value, "mdict_name_conversion.json")
|
||||||
data = __load_json(file_name)
|
data = __load_json(file_name)
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import re
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import bot.entries.expressions as Expressions
|
import bot.entries.expressions as Expressions
|
||||||
|
@ -10,19 +9,17 @@ from bot.entries.daijirin2_preprocess import preprocess_page
|
||||||
|
|
||||||
|
|
||||||
class _BaseDaijirin2Entry(Entry):
|
class _BaseDaijirin2Entry(Entry):
|
||||||
ID_TO_ENTRY = {}
|
def __init__(self, target, entry_id):
|
||||||
SUBENTRY_ID_TO_ENTRY_ID = {}
|
super().__init__(target, entry_id)
|
||||||
|
|
||||||
def __init__(self, entry_id):
|
|
||||||
super().__init__(entry_id)
|
|
||||||
if entry_id not in self.ID_TO_ENTRY:
|
|
||||||
self.ID_TO_ENTRY[entry_id] = self
|
|
||||||
else:
|
|
||||||
raise Exception(f"Duplicate entry ID: {entry_id}")
|
|
||||||
self.children = []
|
self.children = []
|
||||||
self.phrases = []
|
self.phrases = []
|
||||||
self._kana_abbreviations = load_daijirin2_kana_abbreviations()
|
self._kana_abbreviations = load_daijirin2_kana_abbreviations()
|
||||||
|
|
||||||
|
def get_global_identifier(self):
|
||||||
|
parent_part = format(self.entry_id[0], '06')
|
||||||
|
child_part = hex(self.entry_id[1]).lstrip('0x').zfill(4).upper()
|
||||||
|
return f"@{self.target.value}-{parent_part}-{child_part}"
|
||||||
|
|
||||||
def set_page(self, page):
|
def set_page(self, page):
|
||||||
page = self.__decompose_subentries(page)
|
page = self.__decompose_subentries(page)
|
||||||
self._page = page
|
self._page = page
|
||||||
|
@ -57,14 +54,7 @@ class _BaseDaijirin2Entry(Entry):
|
||||||
else:
|
else:
|
||||||
self._part_of_speech_tags.append(pos)
|
self._part_of_speech_tags.append(pos)
|
||||||
|
|
||||||
def get_headwords(self):
|
def _get_regular_headwords(self, soup):
|
||||||
if self._headwords is not None:
|
|
||||||
return self._headwords
|
|
||||||
self._set_headwords()
|
|
||||||
self._set_variant_headwords()
|
|
||||||
return self._headwords
|
|
||||||
|
|
||||||
def _set_regular_headwords(self, soup):
|
|
||||||
self._fill_alts(soup)
|
self._fill_alts(soup)
|
||||||
reading = soup.find("見出仮名").text
|
reading = soup.find("見出仮名").text
|
||||||
expressions = []
|
expressions = []
|
||||||
|
@ -78,10 +68,11 @@ class _BaseDaijirin2Entry(Entry):
|
||||||
expressions = Expressions.expand_abbreviation_list(expressions)
|
expressions = Expressions.expand_abbreviation_list(expressions)
|
||||||
if len(expressions) == 0:
|
if len(expressions) == 0:
|
||||||
expressions.append(reading)
|
expressions.append(reading)
|
||||||
self._headwords = {reading: expressions}
|
headwords = {reading: expressions}
|
||||||
|
return headwords
|
||||||
|
|
||||||
def _set_variant_headwords(self):
|
def _add_variant_expressions(self, headwords):
|
||||||
for expressions in self._headwords.values():
|
for expressions in headwords.values():
|
||||||
Expressions.add_variant_kanji(expressions)
|
Expressions.add_variant_kanji(expressions)
|
||||||
Expressions.add_fullwidth(expressions)
|
Expressions.add_fullwidth(expressions)
|
||||||
Expressions.remove_iteration_mark(expressions)
|
Expressions.remove_iteration_mark(expressions)
|
||||||
|
@ -101,7 +92,7 @@ class _BaseDaijirin2Entry(Entry):
|
||||||
tag_soup.name = "項目"
|
tag_soup.name = "項目"
|
||||||
subentry_id = self.id_string_to_entry_id(tag_soup.attrs["id"])
|
subentry_id = self.id_string_to_entry_id(tag_soup.attrs["id"])
|
||||||
self.SUBENTRY_ID_TO_ENTRY_ID[subentry_id] = self.entry_id
|
self.SUBENTRY_ID_TO_ENTRY_ID[subentry_id] = self.entry_id
|
||||||
subentry = subentry_class(subentry_id)
|
subentry = subentry_class(self.target, subentry_id)
|
||||||
page = tag_soup.decode()
|
page = tag_soup.decode()
|
||||||
subentry.set_page(page)
|
subentry.set_page(page)
|
||||||
subentry_list.append(subentry)
|
subentry_list.append(subentry)
|
||||||
|
@ -122,6 +113,8 @@ class _BaseDaijirin2Entry(Entry):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _delete_unused_nodes(soup):
|
def _delete_unused_nodes(soup):
|
||||||
|
"""Remove extra markup elements that appear in the entry
|
||||||
|
headword line which are not part of the entry headword"""
|
||||||
unused_nodes = [
|
unused_nodes = [
|
||||||
"漢字音logo", "活用分節", "連語句活用分節", "語構成",
|
"漢字音logo", "活用分節", "連語句活用分節", "語構成",
|
||||||
"表外字マーク", "表外字マーク", "ルビG"
|
"表外字マーク", "表外字マーク", "ルビG"
|
||||||
|
@ -144,25 +137,26 @@ class _BaseDaijirin2Entry(Entry):
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2Entry(_BaseDaijirin2Entry):
|
class Daijirin2Entry(_BaseDaijirin2Entry):
|
||||||
def __init__(self, page_id):
|
def __init__(self, target, page_id):
|
||||||
entry_id = (page_id, 0)
|
entry_id = (page_id, 0)
|
||||||
super().__init__(entry_id)
|
super().__init__(target, entry_id)
|
||||||
|
|
||||||
def set_page(self, page):
|
def set_page(self, page):
|
||||||
page = preprocess_page(page)
|
page = preprocess_page(page)
|
||||||
super().set_page(page)
|
super().set_page(page)
|
||||||
|
|
||||||
def _set_headwords(self):
|
def _get_headwords(self):
|
||||||
soup = self.get_page_soup()
|
soup = self.get_page_soup()
|
||||||
self._delete_unused_nodes(soup)
|
self._delete_unused_nodes(soup)
|
||||||
if soup.find("漢字見出") is not None:
|
if soup.find("漢字見出") is not None:
|
||||||
self._set_kanji_headwords(soup)
|
headwords = self._get_kanji_headwords(soup)
|
||||||
elif soup.find("略語G") is not None:
|
elif soup.find("略語G") is not None:
|
||||||
self._set_acronym_headwords(soup)
|
headwords = self._get_acronym_headwords(soup)
|
||||||
else:
|
else:
|
||||||
self._set_regular_headwords(soup)
|
headwords = self._get_regular_headwords(soup)
|
||||||
|
return headwords
|
||||||
|
|
||||||
def _set_kanji_headwords(self, soup):
|
def _get_kanji_headwords(self, soup):
|
||||||
readings = []
|
readings = []
|
||||||
for el in soup.find_all("漢字音"):
|
for el in soup.find_all("漢字音"):
|
||||||
hira = Expressions.kata_to_hira(el.text)
|
hira = Expressions.kata_to_hira(el.text)
|
||||||
|
@ -172,11 +166,12 @@ class Daijirin2Entry(_BaseDaijirin2Entry):
|
||||||
expressions = []
|
expressions = []
|
||||||
for el in soup.find_all("漢字見出"):
|
for el in soup.find_all("漢字見出"):
|
||||||
expressions.append(el.text)
|
expressions.append(el.text)
|
||||||
self._headwords = {}
|
headwords = {}
|
||||||
for reading in readings:
|
for reading in readings:
|
||||||
self._headwords[reading] = expressions
|
headwords[reading] = expressions
|
||||||
|
return headwords
|
||||||
|
|
||||||
def _set_acronym_headwords(self, soup):
|
def _get_acronym_headwords(self, soup):
|
||||||
expressions = []
|
expressions = []
|
||||||
for el in soup.find_all("略語"):
|
for el in soup.find_all("略語"):
|
||||||
expression_parts = []
|
expression_parts = []
|
||||||
|
@ -184,29 +179,24 @@ class Daijirin2Entry(_BaseDaijirin2Entry):
|
||||||
expression_parts.append(part.text)
|
expression_parts.append(part.text)
|
||||||
expression = "".join(expression_parts)
|
expression = "".join(expression_parts)
|
||||||
expressions.append(expression)
|
expressions.append(expression)
|
||||||
self._headwords = {"": expressions}
|
headwords = {"": expressions}
|
||||||
|
return headwords
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2ChildEntry(_BaseDaijirin2Entry):
|
class Daijirin2ChildEntry(_BaseDaijirin2Entry):
|
||||||
def __init__(self, entry_id):
|
def _get_headwords(self):
|
||||||
super().__init__(entry_id)
|
|
||||||
|
|
||||||
def _set_headwords(self):
|
|
||||||
soup = self.get_page_soup()
|
soup = self.get_page_soup()
|
||||||
self._delete_unused_nodes(soup)
|
self._delete_unused_nodes(soup)
|
||||||
self._set_regular_headwords(soup)
|
headwords = self._get_regular_headwords(soup)
|
||||||
|
return headwords
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2PhraseEntry(_BaseDaijirin2Entry):
|
class Daijirin2PhraseEntry(_BaseDaijirin2Entry):
|
||||||
def __init__(self, entry_id):
|
|
||||||
super().__init__(entry_id)
|
|
||||||
self.__phrase_readings = load_daijirin2_phrase_readings()
|
|
||||||
|
|
||||||
def get_part_of_speech_tags(self):
|
def get_part_of_speech_tags(self):
|
||||||
# phrases do not contain these tags
|
# phrases do not contain these tags
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _set_headwords(self):
|
def _get_headwords(self):
|
||||||
soup = self.get_page_soup()
|
soup = self.get_page_soup()
|
||||||
headwords = {}
|
headwords = {}
|
||||||
expressions = self._find_expressions(soup)
|
expressions = self._find_expressions(soup)
|
||||||
|
@ -217,7 +207,7 @@ class Daijirin2PhraseEntry(_BaseDaijirin2Entry):
|
||||||
headwords[reading].append(expression)
|
headwords[reading].append(expression)
|
||||||
else:
|
else:
|
||||||
headwords[reading] = [expression]
|
headwords[reading] = [expression]
|
||||||
self._headwords = headwords
|
return headwords
|
||||||
|
|
||||||
def _find_expressions(self, soup):
|
def _find_expressions(self, soup):
|
||||||
self._delete_unused_nodes(soup)
|
self._delete_unused_nodes(soup)
|
||||||
|
@ -231,7 +221,8 @@ class Daijirin2PhraseEntry(_BaseDaijirin2Entry):
|
||||||
return expressions
|
return expressions
|
||||||
|
|
||||||
def _find_readings(self):
|
def _find_readings(self):
|
||||||
text = self.__phrase_readings[self.entry_id]
|
phrase_readings = load_daijirin2_phrase_readings()
|
||||||
|
text = phrase_readings[self.entry_id]
|
||||||
alternatives = Expressions.expand_daijirin_alternatives(text)
|
alternatives = Expressions.expand_daijirin_alternatives(text)
|
||||||
readings = []
|
readings = []
|
||||||
for alt in alternatives:
|
for alt in alternatives:
|
||||||
|
|
|
@ -2,12 +2,24 @@ from abc import ABC, abstractmethod
|
||||||
|
|
||||||
|
|
||||||
class Entry(ABC):
|
class Entry(ABC):
|
||||||
def __init__(self, entry_id):
|
ID_TO_ENTRY = {}
|
||||||
|
SUBENTRY_ID_TO_ENTRY_ID = {}
|
||||||
|
|
||||||
|
def __init__(self, target, entry_id):
|
||||||
|
if entry_id not in self.ID_TO_ENTRY:
|
||||||
|
self.ID_TO_ENTRY[entry_id] = self
|
||||||
|
else:
|
||||||
|
raise Exception(f"Duplicate entry ID: {entry_id}")
|
||||||
|
self.target = target
|
||||||
self.entry_id = entry_id
|
self.entry_id = entry_id
|
||||||
self._page = None
|
self._page = None
|
||||||
self._headwords = None
|
self._headwords = None
|
||||||
self._part_of_speech_tags = None
|
self._part_of_speech_tags = None
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_global_identifier(self):
|
||||||
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def set_page(self, page):
|
def set_page(self, page):
|
||||||
pass
|
pass
|
||||||
|
@ -16,14 +28,34 @@ class Entry(ABC):
|
||||||
def get_page_soup(self):
|
def get_page_soup(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def get_headwords(self):
|
def get_headwords(self):
|
||||||
|
if self._headwords is not None:
|
||||||
|
return self._headwords
|
||||||
|
headwords = self._get_headwords()
|
||||||
|
self._add_variant_expressions(headwords)
|
||||||
|
self._headwords = headwords
|
||||||
|
return headwords
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _get_headwords(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _add_variant_expressions(self, headwords):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_part_of_speech_tags(self):
|
def get_part_of_speech_tags(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def get_parent(self):
|
||||||
|
if self.entry_id in self.SUBENTRY_ID_TO_ENTRY_ID:
|
||||||
|
parent_id = self.SUBENTRY_ID_TO_ENTRY_ID[self.entry_id]
|
||||||
|
parent = self.ID_TO_ENTRY[parent_id]
|
||||||
|
else:
|
||||||
|
parent = None
|
||||||
|
return parent
|
||||||
|
|
||||||
def get_first_expression(self):
|
def get_first_expression(self):
|
||||||
headwords = self.get_headwords()
|
headwords = self.get_headwords()
|
||||||
expressions = next(iter(headwords.values()))
|
expressions = next(iter(headwords.values()))
|
||||||
|
|
|
@ -15,4 +15,4 @@ def new_entry(target, page_id):
|
||||||
Targets.SMK8: Smk8Entry,
|
Targets.SMK8: Smk8Entry,
|
||||||
Targets.DAIJIRIN2: Daijirin2Entry,
|
Targets.DAIJIRIN2: Daijirin2Entry,
|
||||||
}
|
}
|
||||||
return entry_map[target](page_id)
|
return entry_map[target](target, page_id)
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import re
|
import re
|
||||||
|
from abc import abstractmethod
|
||||||
from datetime import datetime, date
|
from datetime import datetime, date
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
@ -7,18 +8,17 @@ import bot.entries.expressions as Expressions
|
||||||
|
|
||||||
|
|
||||||
class _JitenonEntry(Entry):
|
class _JitenonEntry(Entry):
|
||||||
ID_TO_ENTRY = {}
|
def __init__(self, target, entry_id):
|
||||||
|
super().__init__(target, entry_id)
|
||||||
def __init__(self, entry_id):
|
self.expression = ""
|
||||||
super().__init__(entry_id)
|
self.yomikata = ""
|
||||||
if entry_id not in self.ID_TO_ENTRY:
|
self.definition = ""
|
||||||
self.ID_TO_ENTRY[entry_id] = self
|
self.other_forms = []
|
||||||
else:
|
|
||||||
raise Exception(f"Duplicate entry ID: {entry_id}")
|
|
||||||
self.modified_date = date(1970, 1, 1)
|
self.modified_date = date(1970, 1, 1)
|
||||||
self.attribution = ""
|
self.attribution = ""
|
||||||
for column in self._COLUMNS.values():
|
|
||||||
setattr(self, column[0], column[1])
|
def get_global_identifier(self):
|
||||||
|
return f"@{self.target.value}-{format(self.entry_id, '06')}"
|
||||||
|
|
||||||
def set_page(self, page):
|
def set_page(self, page):
|
||||||
soup = BeautifulSoup(page, features="html5lib")
|
soup = BeautifulSoup(page, features="html5lib")
|
||||||
|
@ -39,36 +39,33 @@ class _JitenonEntry(Entry):
|
||||||
soup = BeautifulSoup(self._page, "html5lib")
|
soup = BeautifulSoup(self._page, "html5lib")
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_headwords(self):
|
|
||||||
if self._headwords is not None:
|
|
||||||
return self._headwords
|
|
||||||
self._set_headwords()
|
|
||||||
self._set_variant_headwords()
|
|
||||||
return self._headwords
|
|
||||||
|
|
||||||
def get_part_of_speech_tags(self):
|
def get_part_of_speech_tags(self):
|
||||||
# Jitenon doesn't have any
|
# Jitenon doesn't have any
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _set_headwords(self):
|
def _get_headwords(self):
|
||||||
headwords = {}
|
headwords = {}
|
||||||
for yomikata in self._yomikatas():
|
for reading in self._get_readings():
|
||||||
headwords[yomikata] = [self.expression]
|
headwords[reading] = [self.expression]
|
||||||
ikei_headwords = self._ikei_headwords()
|
other_form_headwords = self._other_form_headwords()
|
||||||
for reading, expressions in ikei_headwords.items():
|
for reading, expressions in other_form_headwords.items():
|
||||||
if reading not in headwords:
|
if reading not in headwords:
|
||||||
headwords[reading] = []
|
headwords[reading] = []
|
||||||
for expression in expressions:
|
for expression in expressions:
|
||||||
if expression not in headwords[reading]:
|
if expression not in headwords[reading]:
|
||||||
headwords[reading].append(expression)
|
headwords[reading].append(expression)
|
||||||
self._headwords = headwords
|
return headwords
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _get_column_map(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def __set_modified_date(self, page):
|
def __set_modified_date(self, page):
|
||||||
m = re.search(r"\"dateModified\": \"(\d{4}-\d{2}-\d{2})", page)
|
m = re.search(r"\"dateModified\": \"(\d{4}-\d{2}-\d{2})", page)
|
||||||
if m is None:
|
if m is None:
|
||||||
return
|
return
|
||||||
date = datetime.strptime(m.group(1), '%Y-%m-%d').date()
|
modified_date = datetime.strptime(m.group(1), '%Y-%m-%d').date()
|
||||||
self.modified_date = date
|
self.modified_date = modified_date
|
||||||
|
|
||||||
def __set_attribution(self, soup):
|
def __set_attribution(self, soup):
|
||||||
attribution = soup.find(class_="copyright")
|
attribution = soup.find(class_="copyright")
|
||||||
|
@ -78,7 +75,8 @@ class _JitenonEntry(Entry):
|
||||||
self.attribution = ""
|
self.attribution = ""
|
||||||
|
|
||||||
def __set_column(self, colname, colval):
|
def __set_column(self, colname, colval):
|
||||||
attr_name = self._COLUMNS[colname][0]
|
column_map = self._get_column_map()
|
||||||
|
attr_name = column_map[colname]
|
||||||
attr_value = getattr(self, attr_name)
|
attr_value = getattr(self, attr_name)
|
||||||
if isinstance(attr_value, str):
|
if isinstance(attr_value, str):
|
||||||
setattr(self, attr_name, colval)
|
setattr(self, attr_name, colval)
|
||||||
|
@ -88,7 +86,7 @@ class _JitenonEntry(Entry):
|
||||||
else:
|
else:
|
||||||
attr_value.append(colval)
|
attr_value.append(colval)
|
||||||
|
|
||||||
def _yomikatas(self):
|
def _get_readings(self):
|
||||||
yomikata = self.yomikata
|
yomikata = self.yomikata
|
||||||
m = re.search(r"^[ぁ-ヿ、]+$", yomikata)
|
m = re.search(r"^[ぁ-ヿ、]+$", yomikata)
|
||||||
if m:
|
if m:
|
||||||
|
@ -109,20 +107,20 @@ class _JitenonEntry(Entry):
|
||||||
print(f"Invalid 読み方 format: {self.yomikata}\n{self}\n")
|
print(f"Invalid 読み方 format: {self.yomikata}\n{self}\n")
|
||||||
return [""]
|
return [""]
|
||||||
|
|
||||||
def _ikei_headwords(self):
|
def _other_form_headwords(self):
|
||||||
ikei_headwords = {}
|
other_form_headwords = {}
|
||||||
for val in self.ikei:
|
for val in self.other_forms:
|
||||||
m = re.search(r"^([^(]+)(([ぁ-ヿ、]+))$", val)
|
m = re.search(r"^([^(]+)(([ぁ-ヿ、]+))$", val)
|
||||||
if not m:
|
if not m:
|
||||||
print(f"Invalid 異形 format: {val}\n{self}\n")
|
print(f"Invalid 異形 format: {val}\n{self}\n")
|
||||||
continue
|
continue
|
||||||
expression = m.group(1)
|
expression = m.group(1)
|
||||||
reading = m.group(2)
|
reading = m.group(2)
|
||||||
if reading not in ikei_headwords:
|
if reading not in other_form_headwords:
|
||||||
ikei_headwords[reading] = []
|
other_form_headwords[reading] = []
|
||||||
if expression not in ikei_headwords[reading]:
|
if expression not in other_form_headwords[reading]:
|
||||||
ikei_headwords[reading].append(expression)
|
other_form_headwords[reading].append(expression)
|
||||||
return ikei_headwords
|
return other_form_headwords
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def __clean_text(text):
|
def __clean_text(text):
|
||||||
|
@ -133,9 +131,10 @@ class _JitenonEntry(Entry):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
column_map = self._get_column_map()
|
||||||
colvals = [str(self.entry_id)]
|
colvals = [str(self.entry_id)]
|
||||||
for attr in self._COLUMNS.values():
|
for attr_name in column_map.values():
|
||||||
attr_val = getattr(self, attr[0])
|
attr_val = getattr(self, attr_name)
|
||||||
if isinstance(attr_val, str):
|
if isinstance(attr_val, str):
|
||||||
colvals.append(attr_val)
|
colvals.append(attr_val)
|
||||||
elif isinstance(attr_val, list):
|
elif isinstance(attr_val, list):
|
||||||
|
@ -144,83 +143,100 @@ class _JitenonEntry(Entry):
|
||||||
|
|
||||||
|
|
||||||
class JitenonYojiEntry(_JitenonEntry):
|
class JitenonYojiEntry(_JitenonEntry):
|
||||||
_COLUMNS = {
|
def __init__(self, target, entry_id):
|
||||||
"四字熟語": ["expression", ""],
|
super().__init__(target, entry_id)
|
||||||
"読み方": ["yomikata", ""],
|
self.origin = ""
|
||||||
"意味": ["imi", ""],
|
self.kanken_level = ""
|
||||||
"出典": ["shutten", ""],
|
self.category = ""
|
||||||
"漢検級": ["kankenkyuu", ""],
|
self.related_expressions = []
|
||||||
"場面用途": ["bamenyouto", ""],
|
|
||||||
"異形": ["ikei", []],
|
|
||||||
"類義語": ["ruigigo", []],
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, entry_id):
|
def _get_column_map(self):
|
||||||
super().__init__(entry_id)
|
return {
|
||||||
|
"四字熟語": "expression",
|
||||||
|
"読み方": "yomikata",
|
||||||
|
"意味": "definition",
|
||||||
|
"異形": "other_forms",
|
||||||
|
"出典": "origin",
|
||||||
|
"漢検級": "kanken_level",
|
||||||
|
"場面用途": "category",
|
||||||
|
"類義語": "related_expressions",
|
||||||
|
}
|
||||||
|
|
||||||
def _set_variant_headwords(self):
|
def _add_variant_expressions(self, headwords):
|
||||||
for expressions in self._headwords.values():
|
for expressions in headwords.values():
|
||||||
Expressions.add_variant_kanji(expressions)
|
Expressions.add_variant_kanji(expressions)
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaEntry(_JitenonEntry):
|
class JitenonKotowazaEntry(_JitenonEntry):
|
||||||
_COLUMNS = {
|
def __init__(self, target, entry_id):
|
||||||
"言葉": ["expression", ""],
|
super().__init__(target, entry_id)
|
||||||
"読み方": ["yomikata", ""],
|
self.origin = ""
|
||||||
"意味": ["imi", ""],
|
self.example = ""
|
||||||
"出典": ["shutten", ""],
|
self.related_expressions = []
|
||||||
"例文": ["reibun", ""],
|
|
||||||
"異形": ["ikei", []],
|
|
||||||
"類句": ["ruiku", []],
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, entry_id):
|
def _get_column_map(self):
|
||||||
super().__init__(entry_id)
|
return {
|
||||||
|
"言葉": "expression",
|
||||||
|
"読み方": "yomikata",
|
||||||
|
"意味": "definition",
|
||||||
|
"異形": "other_forms",
|
||||||
|
"出典": "origin",
|
||||||
|
"例文": "example",
|
||||||
|
"類句": "related_expressions",
|
||||||
|
}
|
||||||
|
|
||||||
def _set_headwords(self):
|
def _get_headwords(self):
|
||||||
if self.expression == "金棒引き・鉄棒引き":
|
if self.expression == "金棒引き・鉄棒引き":
|
||||||
self._headwords = {
|
headwords = {
|
||||||
"かなぼうひき": ["金棒引き", "鉄棒引き"]
|
"かなぼうひき": ["金棒引き", "鉄棒引き"]
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
super()._set_headwords()
|
headwords = super()._get_headwords()
|
||||||
|
return headwords
|
||||||
|
|
||||||
def _set_variant_headwords(self):
|
def _add_variant_expressions(self, headwords):
|
||||||
for expressions in self._headwords.values():
|
for expressions in headwords.values():
|
||||||
Expressions.add_variant_kanji(expressions)
|
Expressions.add_variant_kanji(expressions)
|
||||||
Expressions.add_fullwidth(expressions)
|
Expressions.add_fullwidth(expressions)
|
||||||
|
|
||||||
|
|
||||||
class JitenonKokugoEntry(_JitenonEntry):
|
class JitenonKokugoEntry(_JitenonEntry):
|
||||||
_COLUMNS = {
|
def __init__(self, target, entry_id):
|
||||||
"言葉": ["expression", ""],
|
super().__init__(target, entry_id)
|
||||||
"読み方": ["yomikata", ""],
|
self.example = ""
|
||||||
"意味": ["imi", ""],
|
self.alt_expression = ""
|
||||||
"例文": ["reibun", ""],
|
self.antonym = ""
|
||||||
"別表記": ["betsuhyouki", ""],
|
self.attachments = ""
|
||||||
"対義語": ["taigigo", ""],
|
self.compounds = ""
|
||||||
"活用": ["katsuyou", ""],
|
self.related_words = ""
|
||||||
"用例": ["yourei", ""],
|
|
||||||
"類語": ["ruigo", ""],
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, entry_id):
|
def _get_column_map(self):
|
||||||
super().__init__(entry_id)
|
return {
|
||||||
|
"言葉": "expression",
|
||||||
|
"読み方": "yomikata",
|
||||||
|
"意味": "definition",
|
||||||
|
"例文": "example",
|
||||||
|
"別表記": "alt_expression",
|
||||||
|
"対義語": "antonym",
|
||||||
|
"活用": "attachments",
|
||||||
|
"用例": "compounds",
|
||||||
|
"類語": "related_words",
|
||||||
|
}
|
||||||
|
|
||||||
def _set_headwords(self):
|
def _get_headwords(self):
|
||||||
headwords = {}
|
headwords = {}
|
||||||
for reading in self.yomikata.split("・"):
|
for reading in self.yomikata.split("・"):
|
||||||
if reading not in headwords:
|
if reading not in headwords:
|
||||||
headwords[reading] = []
|
headwords[reading] = []
|
||||||
for expression in self.expression.split("・"):
|
for expression in self.expression.split("・"):
|
||||||
headwords[reading].append(expression)
|
headwords[reading].append(expression)
|
||||||
if self.betsuhyouki.strip() != "":
|
if self.alt_expression.strip() != "":
|
||||||
for expression in self.betsuhyouki.split("・"):
|
for expression in self.alt_expression.split("・"):
|
||||||
headwords[reading].append(expression)
|
headwords[reading].append(expression)
|
||||||
self._headwords = headwords
|
return headwords
|
||||||
|
|
||||||
def _set_variant_headwords(self):
|
def _add_variant_expressions(self, headwords):
|
||||||
for expressions in self._headwords.values():
|
for expressions in headwords.values():
|
||||||
Expressions.add_variant_kanji(expressions)
|
Expressions.add_variant_kanji(expressions)
|
||||||
Expressions.add_fullwidth(expressions)
|
Expressions.add_fullwidth(expressions)
|
||||||
Expressions.remove_iteration_mark(expressions)
|
Expressions.remove_iteration_mark(expressions)
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import re
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import bot.entries.expressions as Expressions
|
import bot.entries.expressions as Expressions
|
||||||
|
@ -9,19 +8,17 @@ from bot.entries.smk8_preprocess import preprocess_page
|
||||||
|
|
||||||
|
|
||||||
class _BaseSmk8Entry(Entry):
|
class _BaseSmk8Entry(Entry):
|
||||||
ID_TO_ENTRY = {}
|
def __init__(self, target, entry_id):
|
||||||
SUBENTRY_ID_TO_ENTRY_ID = {}
|
super().__init__(target, entry_id)
|
||||||
|
|
||||||
def __init__(self, entry_id):
|
|
||||||
super().__init__(entry_id)
|
|
||||||
if entry_id not in self.ID_TO_ENTRY:
|
|
||||||
self.ID_TO_ENTRY[entry_id] = self
|
|
||||||
else:
|
|
||||||
raise Exception(f"Duplicate entry ID: {entry_id}")
|
|
||||||
self.children = []
|
self.children = []
|
||||||
self.phrases = []
|
self.phrases = []
|
||||||
self.kanjis = []
|
self.kanjis = []
|
||||||
|
|
||||||
|
def get_global_identifier(self):
|
||||||
|
parent_part = format(self.entry_id[0], '06')
|
||||||
|
child_part = hex(self.entry_id[1]).lstrip('0x').zfill(4).upper()
|
||||||
|
return f"@{self.target.value}-{parent_part}-{child_part}"
|
||||||
|
|
||||||
def set_page(self, page):
|
def set_page(self, page):
|
||||||
page = self.__decompose_subentries(page)
|
page = self.__decompose_subentries(page)
|
||||||
self._page = page
|
self._page = page
|
||||||
|
@ -30,13 +27,6 @@ class _BaseSmk8Entry(Entry):
|
||||||
soup = BeautifulSoup(self._page, "xml")
|
soup = BeautifulSoup(self._page, "xml")
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def get_headwords(self):
|
|
||||||
if self._headwords is not None:
|
|
||||||
return self._headwords
|
|
||||||
self._set_headwords()
|
|
||||||
self._set_variant_headwords()
|
|
||||||
return self._headwords
|
|
||||||
|
|
||||||
def get_part_of_speech_tags(self):
|
def get_part_of_speech_tags(self):
|
||||||
if self._part_of_speech_tags is not None:
|
if self._part_of_speech_tags is not None:
|
||||||
return self._part_of_speech_tags
|
return self._part_of_speech_tags
|
||||||
|
@ -50,8 +40,8 @@ class _BaseSmk8Entry(Entry):
|
||||||
self._part_of_speech_tags.append(tag.text)
|
self._part_of_speech_tags.append(tag.text)
|
||||||
return self._part_of_speech_tags
|
return self._part_of_speech_tags
|
||||||
|
|
||||||
def _set_variant_headwords(self):
|
def _add_variant_expressions(self, headwords):
|
||||||
for expressions in self._headwords.values():
|
for expressions in headwords.values():
|
||||||
Expressions.add_variant_kanji(expressions)
|
Expressions.add_variant_kanji(expressions)
|
||||||
Expressions.add_fullwidth(expressions)
|
Expressions.add_fullwidth(expressions)
|
||||||
Expressions.remove_iteration_mark(expressions)
|
Expressions.remove_iteration_mark(expressions)
|
||||||
|
@ -87,7 +77,7 @@ class _BaseSmk8Entry(Entry):
|
||||||
tag_soup.name = "項目"
|
tag_soup.name = "項目"
|
||||||
subentry_id = self.id_string_to_entry_id(tag_soup.attrs["id"])
|
subentry_id = self.id_string_to_entry_id(tag_soup.attrs["id"])
|
||||||
self.SUBENTRY_ID_TO_ENTRY_ID[subentry_id] = self.entry_id
|
self.SUBENTRY_ID_TO_ENTRY_ID[subentry_id] = self.entry_id
|
||||||
subentry = subentry_class(subentry_id)
|
subentry = subentry_class(self.target, subentry_id)
|
||||||
page = tag_soup.decode()
|
page = tag_soup.decode()
|
||||||
subentry.set_page(page)
|
subentry.set_page(page)
|
||||||
subentry_list.append(subentry)
|
subentry_list.append(subentry)
|
||||||
|
@ -106,6 +96,16 @@ class _BaseSmk8Entry(Entry):
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Invalid entry ID: {id_string}")
|
raise Exception(f"Invalid entry ID: {id_string}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _delete_unused_nodes(soup):
|
||||||
|
"""Remove extra markup elements that appear in the entry
|
||||||
|
headword line which are not part of the entry headword"""
|
||||||
|
unused_nodes = [
|
||||||
|
"表音表記", "表外音訓マーク", "表外字マーク", "ルビG"
|
||||||
|
]
|
||||||
|
for name in unused_nodes:
|
||||||
|
Soup.delete_soup_nodes(soup, name)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _clean_expression(expression):
|
def _clean_expression(expression):
|
||||||
for x in ["〈", "〉", "{", "}", "…", " "]:
|
for x in ["〈", "〉", "{", "}", "…", " "]:
|
||||||
|
@ -114,24 +114,24 @@ class _BaseSmk8Entry(Entry):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _fill_alts(soup):
|
def _fill_alts(soup):
|
||||||
for e in soup.find_all(["親見出仮名", "親見出表記"]):
|
for el in soup.find_all(["親見出仮名", "親見出表記"]):
|
||||||
e.string = e.attrs["alt"]
|
el.string = el.attrs["alt"]
|
||||||
for gaiji in soup.find_all("外字"):
|
for gaiji in soup.find_all("外字"):
|
||||||
gaiji.string = gaiji.img.attrs["alt"]
|
gaiji.string = gaiji.img.attrs["alt"]
|
||||||
|
|
||||||
|
|
||||||
class Smk8Entry(_BaseSmk8Entry):
|
class Smk8Entry(_BaseSmk8Entry):
|
||||||
def __init__(self, page_id):
|
def __init__(self, target, page_id):
|
||||||
entry_id = (page_id, 0)
|
entry_id = (page_id, 0)
|
||||||
super().__init__(entry_id)
|
super().__init__(target, entry_id)
|
||||||
|
|
||||||
def set_page(self, page):
|
def set_page(self, page):
|
||||||
page = preprocess_page(page)
|
page = preprocess_page(page)
|
||||||
super().set_page(page)
|
super().set_page(page)
|
||||||
|
|
||||||
def _set_headwords(self):
|
def _get_headwords(self):
|
||||||
soup = self.get_page_soup()
|
soup = self.get_page_soup()
|
||||||
Soup.delete_soup_nodes(soup, "表音表記")
|
self._delete_unused_nodes(soup)
|
||||||
self._fill_alts(soup)
|
self._fill_alts(soup)
|
||||||
reading = self._find_reading(soup)
|
reading = self._find_reading(soup)
|
||||||
expressions = []
|
expressions = []
|
||||||
|
@ -140,16 +140,14 @@ class Smk8Entry(_BaseSmk8Entry):
|
||||||
for expression in self._find_expressions(soup):
|
for expression in self._find_expressions(soup):
|
||||||
if expression not in expressions:
|
if expression not in expressions:
|
||||||
expressions.append(expression)
|
expressions.append(expression)
|
||||||
self._headwords = {reading: expressions}
|
headwords = {reading: expressions}
|
||||||
|
return headwords
|
||||||
|
|
||||||
|
|
||||||
class Smk8ChildEntry(_BaseSmk8Entry):
|
class Smk8ChildEntry(_BaseSmk8Entry):
|
||||||
def __init__(self, entry_id):
|
def _get_headwords(self):
|
||||||
super().__init__(entry_id)
|
|
||||||
|
|
||||||
def _set_headwords(self):
|
|
||||||
soup = self.get_page_soup()
|
soup = self.get_page_soup()
|
||||||
Soup.delete_soup_nodes(soup, "表音表記")
|
self._delete_unused_nodes(soup)
|
||||||
self._fill_alts(soup)
|
self._fill_alts(soup)
|
||||||
reading = self._find_reading(soup)
|
reading = self._find_reading(soup)
|
||||||
expressions = []
|
expressions = []
|
||||||
|
@ -158,19 +156,20 @@ class Smk8ChildEntry(_BaseSmk8Entry):
|
||||||
for expression in self._find_expressions(soup):
|
for expression in self._find_expressions(soup):
|
||||||
if expression not in expressions:
|
if expression not in expressions:
|
||||||
expressions.append(expression)
|
expressions.append(expression)
|
||||||
self._headwords = {reading: expressions}
|
headwords = {reading: expressions}
|
||||||
|
return headwords
|
||||||
|
|
||||||
|
|
||||||
class Smk8PhraseEntry(_BaseSmk8Entry):
|
class Smk8PhraseEntry(_BaseSmk8Entry):
|
||||||
def __init__(self, entry_id):
|
def __init__(self, target, entry_id):
|
||||||
super().__init__(entry_id)
|
super().__init__(target, entry_id)
|
||||||
self.__phrase_readings = load_smk8_phrase_readings()
|
self.__phrase_readings = load_smk8_phrase_readings()
|
||||||
|
|
||||||
def get_part_of_speech_tags(self):
|
def get_part_of_speech_tags(self):
|
||||||
# phrases do not contain these tags
|
# phrases do not contain these tags
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def _set_headwords(self):
|
def _get_headwords(self):
|
||||||
soup = self.get_page_soup()
|
soup = self.get_page_soup()
|
||||||
headwords = {}
|
headwords = {}
|
||||||
expressions = self._find_expressions(soup)
|
expressions = self._find_expressions(soup)
|
||||||
|
@ -181,10 +180,10 @@ class Smk8PhraseEntry(_BaseSmk8Entry):
|
||||||
headwords[reading].append(expression)
|
headwords[reading].append(expression)
|
||||||
else:
|
else:
|
||||||
headwords[reading] = [expression]
|
headwords[reading] = [expression]
|
||||||
self._headwords = headwords
|
return headwords
|
||||||
|
|
||||||
def _find_expressions(self, soup):
|
def _find_expressions(self, soup):
|
||||||
Soup.delete_soup_nodes(soup, "ルビG")
|
self._delete_unused_nodes(soup)
|
||||||
self._fill_alts(soup)
|
self._fill_alts(soup)
|
||||||
text = soup.find("標準表記").text
|
text = soup.find("標準表記").text
|
||||||
text = self._clean_expression(text)
|
text = self._clean_expression(text)
|
||||||
|
@ -206,15 +205,14 @@ class Smk8PhraseEntry(_BaseSmk8Entry):
|
||||||
|
|
||||||
|
|
||||||
class Smk8KanjiEntry(_BaseSmk8Entry):
|
class Smk8KanjiEntry(_BaseSmk8Entry):
|
||||||
def __init__(self, entry_id):
|
def _get_headwords(self):
|
||||||
super().__init__(entry_id)
|
|
||||||
|
|
||||||
def _set_headwords(self):
|
|
||||||
soup = self.get_page_soup()
|
soup = self.get_page_soup()
|
||||||
|
self._delete_unused_nodes(soup)
|
||||||
self._fill_alts(soup)
|
self._fill_alts(soup)
|
||||||
reading = self.__get_parent_reading()
|
reading = self.__get_parent_reading()
|
||||||
expressions = self._find_expressions(soup)
|
expressions = self._find_expressions(soup)
|
||||||
self._headwords = {reading: expressions}
|
headwords = {reading: expressions}
|
||||||
|
return headwords
|
||||||
|
|
||||||
def __get_parent_reading(self):
|
def __get_parent_reading(self):
|
||||||
parent_id = self.SUBENTRY_ID_TO_ENTRY_ID[self.entry_id]
|
parent_id = self.SUBENTRY_ID_TO_ENTRY_ID[self.entry_id]
|
||||||
|
|
|
@ -15,6 +15,7 @@ def preprocess_page(page):
|
||||||
page = __strip_page(page)
|
page = __strip_page(page)
|
||||||
page = __replace_glyph_codes(page)
|
page = __replace_glyph_codes(page)
|
||||||
page = __format_hyougai_marks(page)
|
page = __format_hyougai_marks(page)
|
||||||
|
page = __remove_pronunciation_parentheses(page)
|
||||||
return page
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,6 +65,7 @@ def __format_hyougai_marks(page):
|
||||||
for x in ["\n", "\t", " "]:
|
for x in ["\n", "\t", " "]:
|
||||||
text = text.replace(x, "")
|
text = text.replace(x, "")
|
||||||
text = re.sub(r"〈([^〈]+)〉", r"\1", text)
|
text = re.sub(r"〈([^〈]+)〉", r"\1", text)
|
||||||
|
|
||||||
page = re.sub(r"〈([^〈]+)〉", r"␂\1␃", page)
|
page = re.sub(r"〈([^〈]+)〉", r"␂\1␃", page)
|
||||||
for mark in re.findall(r"《.", text):
|
for mark in re.findall(r"《.", text):
|
||||||
if mark[1] == "〓":
|
if mark[1] == "〓":
|
||||||
|
@ -79,13 +81,29 @@ def __format_hyougai_marks(page):
|
||||||
page = re.sub(f"〈([^{mark[1]}]*)({mark[1]})",
|
page = re.sub(f"〈([^{mark[1]}]*)({mark[1]})",
|
||||||
r"\1<表外字>\2</表外字>",
|
r"\1<表外字>\2</表外字>",
|
||||||
page, count=1)
|
page, count=1)
|
||||||
|
|
||||||
page = page.replace("␂", "〈")
|
page = page.replace("␂", "〈")
|
||||||
page = page.replace("␃", "〉")
|
page = page.replace("␃", "〉")
|
||||||
soup = BeautifulSoup(page, features="xml")
|
soup = BeautifulSoup(page, features="xml")
|
||||||
|
|
||||||
for el in soup.find_all("表外音訓"):
|
for el in soup.find_all("表外音訓"):
|
||||||
if el.text == "":
|
if el.text == "":
|
||||||
el.append(el.next_sibling)
|
el.append(el.next_sibling)
|
||||||
|
mark_xml = "<表外音訓マーク>︽</表外音訓マーク>"
|
||||||
|
mark_soup = BeautifulSoup(mark_xml, "xml")
|
||||||
|
el.append(mark_soup.表外音訓マーク)
|
||||||
|
|
||||||
for el in soup.find_all("表外字"):
|
for el in soup.find_all("表外字"):
|
||||||
if el.text == "":
|
if el.text == "":
|
||||||
el.append(el.next_sibling)
|
el.append(el.next_sibling)
|
||||||
|
mark_xml = "<表外字マーク>︿</表外字マーク>"
|
||||||
|
mark_soup = BeautifulSoup(mark_xml, "xml")
|
||||||
|
el.append(mark_soup.表外字マーク)
|
||||||
|
|
||||||
return soup.decode()
|
return soup.decode()
|
||||||
|
|
||||||
|
|
||||||
|
def __remove_pronunciation_parentheses(page):
|
||||||
|
page = page.replace("<表音表記>(", "<表音表記>")
|
||||||
|
page = page.replace(")</表音表記>", "</表音表記>")
|
||||||
|
return page
|
||||||
|
|
204
bot/mdict/exporters/export.py
Normal file
204
bot/mdict/exporters/export.py
Normal file
|
@ -0,0 +1,204 @@
|
||||||
|
# pylint: disable=too-few-public-methods
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
from platformdirs import user_documents_dir, user_cache_dir
|
||||||
|
|
||||||
|
from bot.targets import Targets
|
||||||
|
from bot.mdict.terms.factory import new_terminator
|
||||||
|
|
||||||
|
|
||||||
|
class Exporter(ABC):
|
||||||
|
def __init__(self, target):
|
||||||
|
self._target = target
|
||||||
|
self._terminator = new_terminator(target)
|
||||||
|
self._build_dir = None
|
||||||
|
self._build_media_dir = None
|
||||||
|
self._description_file = None
|
||||||
|
self._out_dir = None
|
||||||
|
|
||||||
|
def export(self, entries, media_dir, icon_file):
|
||||||
|
self._init_build_media_dir(media_dir)
|
||||||
|
self._init_description_file(entries)
|
||||||
|
terms = self._get_terms(entries)
|
||||||
|
print(f"Exporting {len(terms)} Mdict keys...")
|
||||||
|
self._write_mdx_file(terms)
|
||||||
|
self._write_mdd_file()
|
||||||
|
self._write_icon_file(icon_file)
|
||||||
|
self._rm_build_dir()
|
||||||
|
|
||||||
|
def _get_build_dir(self):
|
||||||
|
if self._build_dir is not None:
|
||||||
|
return self._build_dir
|
||||||
|
cache_dir = user_cache_dir("jitenbot")
|
||||||
|
build_directory = os.path.join(cache_dir, "mdict_build")
|
||||||
|
if Path(build_directory).is_dir():
|
||||||
|
shutil.rmtree(build_directory)
|
||||||
|
os.makedirs(build_directory)
|
||||||
|
self._build_dir = build_directory
|
||||||
|
return self._build_dir
|
||||||
|
|
||||||
|
def _init_build_media_dir(self, media_dir):
|
||||||
|
build_dir = self._get_build_dir()
|
||||||
|
build_media_dir = os.path.join(build_dir, self._target.value)
|
||||||
|
if media_dir is not None:
|
||||||
|
print("Copying media files to build directory...")
|
||||||
|
shutil.copytree(media_dir, build_media_dir)
|
||||||
|
else:
|
||||||
|
os.makedirs(build_media_dir)
|
||||||
|
css_file = self._get_css_file()
|
||||||
|
shutil.copy(css_file, build_media_dir)
|
||||||
|
self._terminator.set_media_dir(build_media_dir)
|
||||||
|
self._build_media_dir = build_media_dir
|
||||||
|
|
||||||
|
def _init_description_file(self, entries):
|
||||||
|
filename = f"{self._target.value}.mdx.description.html"
|
||||||
|
original_file = os.path.join(
|
||||||
|
"data", "mdict", "description", filename)
|
||||||
|
with open(original_file, "r", encoding="utf8") as f:
|
||||||
|
description = f.read()
|
||||||
|
description = description.replace(
|
||||||
|
"{{revision}}", self._get_revision(entries))
|
||||||
|
description = description.replace(
|
||||||
|
"{{attribution}}", self._get_attribution(entries))
|
||||||
|
build_dir = self._get_build_dir()
|
||||||
|
description_file = os.path.join(build_dir, filename)
|
||||||
|
with open(description_file, "w", encoding="utf8") as f:
|
||||||
|
f.write(description)
|
||||||
|
self._description_file = description_file
|
||||||
|
|
||||||
|
def _get_terms(self, entries):
|
||||||
|
terms = []
|
||||||
|
entries_len = len(entries)
|
||||||
|
for idx, entry in enumerate(entries):
|
||||||
|
update = f"Creating Mdict terms for entry {idx+1}/{entries_len}"
|
||||||
|
print(update, end='\r', flush=True)
|
||||||
|
new_terms = self._terminator.make_terms(entry)
|
||||||
|
for term in new_terms:
|
||||||
|
terms.append(term)
|
||||||
|
print()
|
||||||
|
return terms
|
||||||
|
|
||||||
|
def _write_mdx_file(self, terms):
|
||||||
|
out_dir = self._get_out_dir()
|
||||||
|
out_file = os.path.join(out_dir, f"{self._target.value}.mdx")
|
||||||
|
params = [
|
||||||
|
"mdict",
|
||||||
|
"-a", self._get_term_file(terms),
|
||||||
|
"--title", self._get_title_file(),
|
||||||
|
"--description", self._description_file,
|
||||||
|
out_file
|
||||||
|
]
|
||||||
|
subprocess.run(params, check=True)
|
||||||
|
|
||||||
|
def _write_mdd_file(self):
|
||||||
|
out_dir = self._get_out_dir()
|
||||||
|
out_file = os.path.join(out_dir, f"{self._target.value}.mdd")
|
||||||
|
params = [
|
||||||
|
"mdict",
|
||||||
|
"-a", self._build_media_dir,
|
||||||
|
"--title", self._get_title_file(),
|
||||||
|
"--description", self._description_file,
|
||||||
|
out_file
|
||||||
|
]
|
||||||
|
subprocess.run(params, check=True)
|
||||||
|
|
||||||
|
def _write_icon_file(self, icon_file):
|
||||||
|
premade_icon_file = f"data/mdict/icon/{self._target.value}.png"
|
||||||
|
out_dir = self._get_out_dir()
|
||||||
|
out_file = os.path.join(out_dir, f"{self._target.value}.png")
|
||||||
|
if icon_file is not None and Path(icon_file).is_file():
|
||||||
|
shutil.copy(icon_file, out_file)
|
||||||
|
elif Path(premade_icon_file).is_file():
|
||||||
|
shutil.copy(premade_icon_file, out_file)
|
||||||
|
|
||||||
|
def _get_out_dir(self):
|
||||||
|
if self._out_dir is not None:
|
||||||
|
return self._out_dir
|
||||||
|
out_dir = os.path.join(
|
||||||
|
user_documents_dir(), "jitenbot", "mdict", self._target.value)
|
||||||
|
if Path(out_dir).is_dir():
|
||||||
|
shutil.rmtree(out_dir)
|
||||||
|
os.makedirs(out_dir)
|
||||||
|
self._out_dir = out_dir
|
||||||
|
return out_dir
|
||||||
|
|
||||||
|
def _get_term_file(self, terms):
|
||||||
|
build_dir = self._get_build_dir()
|
||||||
|
term_file = os.path.join(build_dir, f"{self._target.value}.mdx.txt")
|
||||||
|
with open(term_file, "w", encoding="utf8") as f:
|
||||||
|
for term in terms:
|
||||||
|
f.write("\n".join(term))
|
||||||
|
f.write("\n</>\n")
|
||||||
|
return term_file
|
||||||
|
|
||||||
|
def _get_title_file(self):
|
||||||
|
return os.path.join(
|
||||||
|
"data", "mdict", "title",
|
||||||
|
f"{self._target.value}.mdx.title.html")
|
||||||
|
|
||||||
|
def _get_css_file(self):
|
||||||
|
return os.path.join(
|
||||||
|
"data", "mdict", "css",
|
||||||
|
f"{self._target.value}.css")
|
||||||
|
|
||||||
|
def _rm_build_dir(self):
|
||||||
|
build_dir = self._get_build_dir()
|
||||||
|
shutil.rmtree(build_dir)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _get_revision(self, entries):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class _JitenonExporter(Exporter):
|
||||||
|
def _get_revision(self, entries):
|
||||||
|
modified_date = None
|
||||||
|
for entry in entries:
|
||||||
|
if modified_date is None or entry.modified_date > modified_date:
|
||||||
|
modified_date = entry.modified_date
|
||||||
|
revision = modified_date.strftime("%Y年%m月%d日閲覧")
|
||||||
|
return revision
|
||||||
|
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
modified_date = None
|
||||||
|
for entry in entries:
|
||||||
|
if modified_date is None or entry.modified_date > modified_date:
|
||||||
|
attribution = entry.attribution
|
||||||
|
return attribution
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonKokugoExporter(_JitenonExporter):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonYojiExporter(_JitenonExporter):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonKotowazaExporter(_JitenonExporter):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class _MonokakidoExporter(Exporter):
|
||||||
|
def _get_revision(self, entries):
|
||||||
|
timestamp = datetime.now().strftime("%Y年%m月%d日作成")
|
||||||
|
return timestamp
|
||||||
|
|
||||||
|
|
||||||
|
class Smk8Exporter(_MonokakidoExporter):
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
return "© Sanseido Co., LTD. 2020"
|
||||||
|
|
||||||
|
|
||||||
|
class Daijirin2Exporter(_MonokakidoExporter):
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
return "© Sanseido Co., LTD. 2019"
|
18
bot/mdict/exporters/factory.py
Normal file
18
bot/mdict/exporters/factory.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from bot.targets import Targets
|
||||||
|
|
||||||
|
from bot.mdict.exporters.export import JitenonKokugoExporter
|
||||||
|
from bot.mdict.exporters.export import JitenonYojiExporter
|
||||||
|
from bot.mdict.exporters.export import JitenonKotowazaExporter
|
||||||
|
from bot.mdict.exporters.export import Smk8Exporter
|
||||||
|
from bot.mdict.exporters.export import Daijirin2Exporter
|
||||||
|
|
||||||
|
|
||||||
|
def new_mdict_exporter(target):
|
||||||
|
exporter_map = {
|
||||||
|
Targets.JITENON_KOKUGO: JitenonKokugoExporter,
|
||||||
|
Targets.JITENON_YOJI: JitenonYojiExporter,
|
||||||
|
Targets.JITENON_KOTOWAZA: JitenonKotowazaExporter,
|
||||||
|
Targets.SMK8: Smk8Exporter,
|
||||||
|
Targets.DAIJIRIN2: Daijirin2Exporter,
|
||||||
|
}
|
||||||
|
return exporter_map[target](target)
|
77
bot/mdict/glossary/daijirin2.py
Normal file
77
bot/mdict/glossary/daijirin2.py
Normal file
|
@ -0,0 +1,77 @@
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
from functools import cache
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from bot.soup import delete_soup_nodes
|
||||||
|
from bot.data import load_mdict_name_conversion
|
||||||
|
from bot.name_conversion import convert_names
|
||||||
|
|
||||||
|
|
||||||
|
def make_glossary(entry, media_dir):
|
||||||
|
soup = entry.get_page_soup()
|
||||||
|
__add_rubies(soup)
|
||||||
|
__hyperlink_parent_expression(soup, entry)
|
||||||
|
__delete_unused_nodes(soup, media_dir)
|
||||||
|
__convert_links(soup, entry)
|
||||||
|
|
||||||
|
name_conversion = load_mdict_name_conversion(entry.target)
|
||||||
|
convert_names(soup, name_conversion)
|
||||||
|
|
||||||
|
glossary = soup.span.decode()
|
||||||
|
return glossary
|
||||||
|
|
||||||
|
|
||||||
|
def __add_rubies(soup):
|
||||||
|
for name in ["表外音訓", "表外字"]:
|
||||||
|
for ruby in soup.find_all(name):
|
||||||
|
ruby.name = "ruby"
|
||||||
|
rt = ruby.find("表外字マーク")
|
||||||
|
rt.name = "rt"
|
||||||
|
ruby.append(rt) # needs to positioned after the text
|
||||||
|
|
||||||
|
|
||||||
|
def __hyperlink_parent_expression(soup, entry):
|
||||||
|
if soup.find("親表記") is None:
|
||||||
|
return
|
||||||
|
parent_entry = entry.get_parent()
|
||||||
|
gid = parent_entry.get_global_identifier()
|
||||||
|
for el in soup.find_all("親表記"):
|
||||||
|
el.name = "a"
|
||||||
|
el.attrs["href"] = f"entry://{gid}"
|
||||||
|
|
||||||
|
|
||||||
|
def __delete_unused_nodes(soup, media_dir):
|
||||||
|
if not __graphics_directory_exists(media_dir):
|
||||||
|
delete_soup_nodes(soup, "カットG")
|
||||||
|
for el in soup.find_all("logo"):
|
||||||
|
next_sibling = el.next_sibling
|
||||||
|
if next_sibling is None:
|
||||||
|
continue
|
||||||
|
elif next_sibling.name in ["漢字見出G", "漢字音G"]:
|
||||||
|
el.decompose()
|
||||||
|
for el in soup.find_all("漢字音G"):
|
||||||
|
for child in el.find_all(string="・"):
|
||||||
|
child.replace_with("")
|
||||||
|
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def __graphics_directory_exists(media_dir):
|
||||||
|
path = os.path.join(media_dir, "graphics")
|
||||||
|
return Path(path).is_dir()
|
||||||
|
|
||||||
|
|
||||||
|
def __convert_links(soup, entry):
|
||||||
|
for el in soup.find_all("a"):
|
||||||
|
href = el.attrs["href"]
|
||||||
|
if re.match(r"^[0-9]+(?:-[0-9A-F]{4})?$", href):
|
||||||
|
ref_entry_id = entry.id_string_to_entry_id(href)
|
||||||
|
ref_entry = entry.ID_TO_ENTRY[ref_entry_id]
|
||||||
|
gid = ref_entry.get_global_identifier()
|
||||||
|
el.attrs["href"] = f"entry://{gid}"
|
||||||
|
elif re.match(r"^entry:", href):
|
||||||
|
pass
|
||||||
|
elif re.match(r"^https?:[\w\W]*", href):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception(f"Invalid href format: {href}")
|
141
bot/mdict/glossary/jitenon.py
Normal file
141
bot/mdict/glossary/jitenon.py
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
# pylint: disable=too-few-public-methods
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonGlossary():
|
||||||
|
def __init__(self):
|
||||||
|
self._id_pattern = None
|
||||||
|
self._expression_header = None
|
||||||
|
|
||||||
|
def _replace_punctuation(self, soup):
|
||||||
|
punctuation = {
|
||||||
|
"/": "/",
|
||||||
|
",": "、",
|
||||||
|
}
|
||||||
|
for el in soup.find_all(string=True):
|
||||||
|
text = el.text
|
||||||
|
for old, new in punctuation.items():
|
||||||
|
text = text.replace(old, new)
|
||||||
|
el.replace_with(text)
|
||||||
|
|
||||||
|
def _add_internal_links(self, soup, entry):
|
||||||
|
for el in soup.find_all("a"):
|
||||||
|
href = el.attrs["href"]
|
||||||
|
m = re.search(self._id_pattern, href)
|
||||||
|
if m is not None:
|
||||||
|
ref_entry_id = int(m.group(1))
|
||||||
|
ref_entry = entry.ID_TO_ENTRY[ref_entry_id]
|
||||||
|
gid = ref_entry.get_global_identifier()
|
||||||
|
el.attrs["href"] = f"entry://{gid}"
|
||||||
|
elif re.match(r"^(?:https?:|\?)[\w\W]*", href):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception(f"Invalid href format: {href}")
|
||||||
|
|
||||||
|
def _decompose_table_rows(self, soup, entry):
|
||||||
|
for tr in soup.find_all("tr"):
|
||||||
|
if tr.find("th") is None:
|
||||||
|
continue
|
||||||
|
elif tr.th.text == self._expression_header:
|
||||||
|
tr.decompose()
|
||||||
|
elif tr.th.text == "読み方":
|
||||||
|
if self._do_display_yomikata_in_headword(entry):
|
||||||
|
tr.decompose()
|
||||||
|
elif tr.th.text == "意味":
|
||||||
|
definition = tr.td
|
||||||
|
definition.name = "div"
|
||||||
|
definition.attrs["class"] = "意味"
|
||||||
|
soup.body.insert(0, definition)
|
||||||
|
tr.decompose()
|
||||||
|
if soup.find("tr") is None:
|
||||||
|
soup.table.decompose()
|
||||||
|
|
||||||
|
def _insert_headword_line(self, soup, entry):
|
||||||
|
headword_line = soup.new_tag("div")
|
||||||
|
headword_line.attrs["class"] = "見出し"
|
||||||
|
if self._do_display_yomikata_in_headword(entry):
|
||||||
|
reading = soup.new_tag("span")
|
||||||
|
reading.attrs["class"] = "読み方"
|
||||||
|
reading.string = entry.yomikata
|
||||||
|
headword_line.append(reading)
|
||||||
|
expression = soup.new_tag("span")
|
||||||
|
expression.attrs["class"] = self._expression_header
|
||||||
|
expression.string = f"【{entry.expression}】"
|
||||||
|
headword_line.append(expression)
|
||||||
|
soup.body.insert(0, headword_line)
|
||||||
|
|
||||||
|
def _do_display_yomikata_in_headword(self, entry):
|
||||||
|
if not re.match(r"^[ぁ-ヿ、]+$", entry.yomikata):
|
||||||
|
return False
|
||||||
|
elif len(entry.yomikata) > 10:
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonKokugoGlossary(JitenonGlossary):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self._expression_header = "言葉"
|
||||||
|
self._id_pattern = r"kokugo.jitenon.jp/word/p([0-9]+)$"
|
||||||
|
|
||||||
|
def make_glossary(self, entry, media_dir):
|
||||||
|
soup = entry.get_page_soup()
|
||||||
|
self._remove_antonym_list_item(soup)
|
||||||
|
self._replace_number_icons(soup, media_dir)
|
||||||
|
self._replace_punctuation(soup)
|
||||||
|
self._add_internal_links(soup, entry)
|
||||||
|
self._decompose_table_rows(soup, entry)
|
||||||
|
self._insert_headword_line(soup, entry)
|
||||||
|
glossary = soup.body.prettify()
|
||||||
|
return glossary
|
||||||
|
|
||||||
|
def _remove_antonym_list_item(self, soup):
|
||||||
|
for el in soup.find_all("li"):
|
||||||
|
if el.text == "対義語辞典":
|
||||||
|
el.decompose()
|
||||||
|
|
||||||
|
def _replace_number_icons(self, soup, media_dir):
|
||||||
|
for el in soup.find_all("img"):
|
||||||
|
alt = el.attrs["alt"]
|
||||||
|
text = re.search(r"[0-9]+", alt).group(0)
|
||||||
|
el.name = "span"
|
||||||
|
el.string = text
|
||||||
|
del el.attrs["src"]
|
||||||
|
del el.attrs["alt"]
|
||||||
|
|
||||||
|
def _do_display_yomikata_in_headword(self, entry):
|
||||||
|
return len(entry.yomikata) <= 10
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonYojiGlossary(JitenonGlossary):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self._expression_header = "四字熟語"
|
||||||
|
self._id_pattern = r"yoji.jitenon.jp/yoji.?/([0-9]+)\.html$"
|
||||||
|
|
||||||
|
def make_glossary(self, entry, media_dir):
|
||||||
|
soup = entry.get_page_soup()
|
||||||
|
self._replace_punctuation(soup)
|
||||||
|
self._add_internal_links(soup, entry)
|
||||||
|
self._decompose_table_rows(soup, entry)
|
||||||
|
self._insert_headword_line(soup, entry)
|
||||||
|
glossary = soup.body.prettify()
|
||||||
|
return glossary
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonKotowazaGlossary(JitenonGlossary):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self._expression_header = "言葉"
|
||||||
|
self._id_pattern = r"kotowaza.jitenon.jp/kotowaza/([0-9]+)\.php$"
|
||||||
|
|
||||||
|
def make_glossary(self, entry, media_dir):
|
||||||
|
soup = entry.get_page_soup()
|
||||||
|
self._replace_punctuation(soup)
|
||||||
|
self._add_internal_links(soup, entry)
|
||||||
|
self._decompose_table_rows(soup, entry)
|
||||||
|
self._insert_headword_line(soup, entry)
|
||||||
|
glossary = soup.body.prettify()
|
||||||
|
return glossary
|
67
bot/mdict/glossary/smk8.py
Normal file
67
bot/mdict/glossary/smk8.py
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
import re
|
||||||
|
|
||||||
|
from bot.soup import delete_soup_nodes
|
||||||
|
from bot.data import load_mdict_name_conversion
|
||||||
|
from bot.name_conversion import convert_names
|
||||||
|
|
||||||
|
|
||||||
|
def make_glossary(entry, media_dir):
|
||||||
|
soup = entry.get_page_soup()
|
||||||
|
__fill_alts(soup, entry)
|
||||||
|
__delete_unused_nodes(soup)
|
||||||
|
__convert_links(soup, entry)
|
||||||
|
__convert_priority_markers(soup)
|
||||||
|
|
||||||
|
name_conversion = load_mdict_name_conversion(entry.target)
|
||||||
|
convert_names(soup, name_conversion)
|
||||||
|
|
||||||
|
glossary = soup.span.decode()
|
||||||
|
return glossary
|
||||||
|
|
||||||
|
|
||||||
|
def __fill_alts(soup, entry):
|
||||||
|
names = ["親見出仮名", "親見出表記"]
|
||||||
|
if soup.find(names) is None:
|
||||||
|
return
|
||||||
|
parent_entry = entry.get_parent()
|
||||||
|
gid = parent_entry.get_global_identifier()
|
||||||
|
for el in soup.find_all(names):
|
||||||
|
el.name = "a"
|
||||||
|
alt = el.attrs["alt"]
|
||||||
|
el.string = alt
|
||||||
|
el.attrs["href"] = f"entry://{gid}"
|
||||||
|
del el.attrs["alt"]
|
||||||
|
|
||||||
|
|
||||||
|
def __delete_unused_nodes(soup):
|
||||||
|
for name in ["連濁"]:
|
||||||
|
delete_soup_nodes(soup, name)
|
||||||
|
|
||||||
|
|
||||||
|
def __convert_links(soup, entry):
|
||||||
|
for el in soup.find_all("a"):
|
||||||
|
href = el.attrs["href"]
|
||||||
|
if href.startswith("$"):
|
||||||
|
el.unwrap()
|
||||||
|
elif re.match(r"^[0-9]+(?:-[0-9A-F]{4})?$", href):
|
||||||
|
ref_entry_id = entry.id_string_to_entry_id(href)
|
||||||
|
ref_entry = entry.ID_TO_ENTRY[ref_entry_id]
|
||||||
|
gid = ref_entry.get_global_identifier()
|
||||||
|
el.attrs["href"] = f"entry://{gid}"
|
||||||
|
elif re.match(r"^[0-9]+[ab]?\.aac$", href):
|
||||||
|
el.attrs["href"] = f"sound://audio/{href}"
|
||||||
|
elif re.match(r"^entry:", href):
|
||||||
|
pass
|
||||||
|
elif re.match(r"^https?:[\w\W]*", href):
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise Exception(f"Invalid href format: {href}")
|
||||||
|
|
||||||
|
|
||||||
|
def __convert_priority_markers(soup):
|
||||||
|
for el in soup.find_all("img", attrs={"alt": "*"}):
|
||||||
|
el.name = "span"
|
||||||
|
el.string = "*"
|
||||||
|
for el in soup.find_all("img", attrs={"alt": "⁑"}):
|
||||||
|
el.name = "span"
|
||||||
|
el.string = "**"
|
23
bot/mdict/terms/daijirin2.py
Normal file
23
bot/mdict/terms/daijirin2.py
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
from bot.mdict.terms.terminator import Terminator
|
||||||
|
from bot.mdict.glossary.daijirin2 import make_glossary
|
||||||
|
|
||||||
|
|
||||||
|
class Daijirin2Terminator(Terminator):
|
||||||
|
def _glossary(self, entry):
|
||||||
|
if entry.entry_id in self._glossary_cache:
|
||||||
|
return self._glossary_cache[entry.entry_id]
|
||||||
|
glossary = make_glossary(entry, self._media_dir)
|
||||||
|
self._glossary_cache[entry.entry_id] = glossary
|
||||||
|
return glossary
|
||||||
|
|
||||||
|
def _link_glossary_parameters(self, entry):
|
||||||
|
return [
|
||||||
|
[entry.children, "子項目"],
|
||||||
|
[entry.phrases, "句項目"],
|
||||||
|
]
|
||||||
|
|
||||||
|
def _subentry_lists(self, entry):
|
||||||
|
return [
|
||||||
|
entry.children,
|
||||||
|
entry.phrases,
|
||||||
|
]
|
18
bot/mdict/terms/factory.py
Normal file
18
bot/mdict/terms/factory.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from bot.targets import Targets
|
||||||
|
|
||||||
|
from bot.mdict.terms.jitenon import JitenonKokugoTerminator
|
||||||
|
from bot.mdict.terms.jitenon import JitenonYojiTerminator
|
||||||
|
from bot.mdict.terms.jitenon import JitenonKotowazaTerminator
|
||||||
|
from bot.mdict.terms.smk8 import Smk8Terminator
|
||||||
|
from bot.mdict.terms.daijirin2 import Daijirin2Terminator
|
||||||
|
|
||||||
|
|
||||||
|
def new_terminator(target):
|
||||||
|
terminator_map = {
|
||||||
|
Targets.JITENON_KOKUGO: JitenonKokugoTerminator,
|
||||||
|
Targets.JITENON_YOJI: JitenonYojiTerminator,
|
||||||
|
Targets.JITENON_KOTOWAZA: JitenonKotowazaTerminator,
|
||||||
|
Targets.SMK8: Smk8Terminator,
|
||||||
|
Targets.DAIJIRIN2: Daijirin2Terminator,
|
||||||
|
}
|
||||||
|
return terminator_map[target](target)
|
42
bot/mdict/terms/jitenon.py
Normal file
42
bot/mdict/terms/jitenon.py
Normal file
|
@ -0,0 +1,42 @@
|
||||||
|
from bot.mdict.terms.terminator import Terminator
|
||||||
|
|
||||||
|
from bot.mdict.glossary.jitenon import JitenonKokugoGlossary
|
||||||
|
from bot.mdict.glossary.jitenon import JitenonYojiGlossary
|
||||||
|
from bot.mdict.glossary.jitenon import JitenonKotowazaGlossary
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonTerminator(Terminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = None
|
||||||
|
|
||||||
|
def _glossary(self, entry):
|
||||||
|
if entry.entry_id in self._glossary_cache:
|
||||||
|
return self._glossary_cache[entry.entry_id]
|
||||||
|
glossary = self._glossary_maker.make_glossary(entry, self._media_dir)
|
||||||
|
self._glossary_cache[entry.entry_id] = glossary
|
||||||
|
return glossary
|
||||||
|
|
||||||
|
def _link_glossary_parameters(self, entry):
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _subentry_lists(self, entry):
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonKokugoTerminator(JitenonTerminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = JitenonKokugoGlossary()
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonYojiTerminator(JitenonTerminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = JitenonYojiGlossary()
|
||||||
|
|
||||||
|
|
||||||
|
class JitenonKotowazaTerminator(JitenonTerminator):
|
||||||
|
def __init__(self, target):
|
||||||
|
super().__init__(target)
|
||||||
|
self._glossary_maker = JitenonKotowazaGlossary()
|
24
bot/mdict/terms/smk8.py
Normal file
24
bot/mdict/terms/smk8.py
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
from bot.mdict.terms.terminator import Terminator
|
||||||
|
from bot.mdict.glossary.smk8 import make_glossary
|
||||||
|
|
||||||
|
|
||||||
|
class Smk8Terminator(Terminator):
|
||||||
|
def _glossary(self, entry):
|
||||||
|
if entry.entry_id in self._glossary_cache:
|
||||||
|
return self._glossary_cache[entry.entry_id]
|
||||||
|
glossary = make_glossary(entry, self._media_dir)
|
||||||
|
self._glossary_cache[entry.entry_id] = glossary
|
||||||
|
return glossary
|
||||||
|
|
||||||
|
def _link_glossary_parameters(self, entry):
|
||||||
|
return [
|
||||||
|
[entry.children, "子項目"],
|
||||||
|
[entry.phrases, "句項目"],
|
||||||
|
]
|
||||||
|
|
||||||
|
def _subentry_lists(self, entry):
|
||||||
|
return [
|
||||||
|
entry.children,
|
||||||
|
entry.phrases,
|
||||||
|
entry.kanjis,
|
||||||
|
]
|
73
bot/mdict/terms/terminator.py
Normal file
73
bot/mdict/terms/terminator.py
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
from abc import abstractmethod, ABC
|
||||||
|
|
||||||
|
|
||||||
|
class Terminator(ABC):
|
||||||
|
def __init__(self, target):
|
||||||
|
self._target = target
|
||||||
|
self._glossary_cache = {}
|
||||||
|
self._media_dir = None
|
||||||
|
|
||||||
|
def set_media_dir(self, media_dir):
|
||||||
|
self._media_dir = media_dir
|
||||||
|
|
||||||
|
def make_terms(self, entry):
|
||||||
|
gid = entry.get_global_identifier()
|
||||||
|
glossary = self.__full_glossary(entry)
|
||||||
|
terms = [[gid, glossary]]
|
||||||
|
keys = set()
|
||||||
|
headwords = entry.get_headwords()
|
||||||
|
for reading, expressions in headwords.items():
|
||||||
|
if len(expressions) == 0:
|
||||||
|
keys.add(reading)
|
||||||
|
for expression in expressions:
|
||||||
|
if expression.strip() == "":
|
||||||
|
keys.add(reading)
|
||||||
|
continue
|
||||||
|
keys.add(expression)
|
||||||
|
if reading.strip() == "":
|
||||||
|
continue
|
||||||
|
if reading != expression:
|
||||||
|
keys.add(f"{reading}【{expression}】")
|
||||||
|
else:
|
||||||
|
keys.add(reading)
|
||||||
|
link = f"@@@LINK={gid}"
|
||||||
|
for key in keys:
|
||||||
|
if key.strip() != "":
|
||||||
|
terms.append([key, link])
|
||||||
|
for subentries in self._subentry_lists(entry):
|
||||||
|
for subentry in subentries:
|
||||||
|
for term in self.make_terms(subentry):
|
||||||
|
terms.append(term)
|
||||||
|
return terms
|
||||||
|
|
||||||
|
def __full_glossary(self, entry):
|
||||||
|
glossary = []
|
||||||
|
style_link = f"<link rel='stylesheet' href='{self._target.value}.css' type='text/css'>"
|
||||||
|
glossary.append(style_link)
|
||||||
|
glossary.append(self._glossary(entry))
|
||||||
|
|
||||||
|
for x in self._link_glossary_parameters(entry):
|
||||||
|
(subentries, list_title) = x
|
||||||
|
if len(subentries) == 0:
|
||||||
|
continue
|
||||||
|
items = []
|
||||||
|
for subentry in subentries:
|
||||||
|
exp = subentry.get_first_expression()
|
||||||
|
gid = subentry.get_global_identifier()
|
||||||
|
item = f"<li><a href='entry://{gid}'>{exp}</a></li>"
|
||||||
|
items.append(item)
|
||||||
|
link_glossary = f"<div data-child-links='{list_title}'><span>{list_title}</span><ul>{''.join(items)}</ul></div>"
|
||||||
|
glossary.append(link_glossary)
|
||||||
|
return "\n".join(glossary)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _glossary(self, entry):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _link_glossary_parameters(self, entry):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _subentry_lists(self, entry):
|
||||||
|
pass
|
|
@ -30,7 +30,7 @@ def __apply_name_conversion_procedures(soup, procedures):
|
||||||
"has_previous_sibling": __has_previous_sibling,
|
"has_previous_sibling": __has_previous_sibling,
|
||||||
"replace": __replace,
|
"replace": __replace,
|
||||||
"wrap": __wrap,
|
"wrap": __wrap,
|
||||||
"add_ruby_text": __add_ruby_text,
|
"insert_span": __insert_span,
|
||||||
}
|
}
|
||||||
for procedure in procedures:
|
for procedure in procedures:
|
||||||
function = functions[procedure["procedure_name"]]
|
function = functions[procedure["procedure_name"]]
|
||||||
|
@ -92,10 +92,9 @@ def __wrap(soup, l_wrap, r_wrap):
|
||||||
soup.string = f"{l_wrap}{soup.text}{r_wrap}"
|
soup.string = f"{l_wrap}{soup.text}{r_wrap}"
|
||||||
|
|
||||||
|
|
||||||
def __add_ruby_text(soup, mark, style):
|
def __insert_span(soup, attr_name, attr_val):
|
||||||
if style.strip() != "":
|
span_markup = f"<span {attr_name}='{attr_val}'></span>"
|
||||||
markup = f"<rt><span style='{style}'>{mark}</span></rt>"
|
span_soup = BeautifulSoup(span_markup, "xml")
|
||||||
else:
|
for content in reversed(soup.contents):
|
||||||
markup = f"<rt>{mark}</rt>"
|
span_soup.span.insert(0, content.extract())
|
||||||
rt_soup = BeautifulSoup(markup, "xml")
|
soup.append(span_soup.span)
|
||||||
soup.append(rt_soup.rt)
|
|
|
@ -1,15 +1,18 @@
|
||||||
|
# pylint: disable=too-few-public-methods
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
from platformdirs import user_documents_dir, user_cache_dir
|
from platformdirs import user_documents_dir, user_cache_dir
|
||||||
|
|
||||||
from bot.data import load_yomichan_metadata
|
from bot.data import load_yomichan_metadata
|
||||||
from bot.yomichan.terms.factory import new_terminator
|
from bot.yomichan.terms.factory import new_terminator
|
||||||
|
|
||||||
|
|
||||||
class Exporter:
|
class Exporter(ABC):
|
||||||
def __init__(self, target):
|
def __init__(self, target):
|
||||||
self._target = target
|
self._target = target
|
||||||
self._terminator = new_terminator(target)
|
self._terminator = new_terminator(target)
|
||||||
|
@ -26,6 +29,14 @@ class Exporter:
|
||||||
terms = self.__get_terms(entries)
|
terms = self.__get_terms(entries)
|
||||||
self.__make_dictionary(terms, index, tags)
|
self.__make_dictionary(terms, index, tags)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _get_revision(self, entries):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _get_attribution(self, entries):
|
||||||
|
pass
|
||||||
|
|
||||||
def _get_build_dir(self):
|
def _get_build_dir(self):
|
||||||
if self._build_dir is not None:
|
if self._build_dir is not None:
|
||||||
return self._build_dir
|
return self._build_dir
|
||||||
|
@ -41,7 +52,7 @@ class Exporter:
|
||||||
build_dir = self._get_build_dir()
|
build_dir = self._get_build_dir()
|
||||||
build_img_dir = os.path.join(build_dir, self._target.value)
|
build_img_dir = os.path.join(build_dir, self._target.value)
|
||||||
if image_dir is not None:
|
if image_dir is not None:
|
||||||
print("Copying image files to build directory...")
|
print("Copying media files to build directory...")
|
||||||
shutil.copytree(image_dir, build_img_dir)
|
shutil.copytree(image_dir, build_img_dir)
|
||||||
else:
|
else:
|
||||||
os.makedirs(build_img_dir)
|
os.makedirs(build_img_dir)
|
||||||
|
@ -93,7 +104,7 @@ class Exporter:
|
||||||
|
|
||||||
def __write_archive(self, filename):
|
def __write_archive(self, filename):
|
||||||
archive_format = "zip"
|
archive_format = "zip"
|
||||||
out_dir = os.path.join(user_documents_dir(), "jitenbot")
|
out_dir = os.path.join(user_documents_dir(), "jitenbot", "yomichan")
|
||||||
if not Path(out_dir).is_dir():
|
if not Path(out_dir).is_dir():
|
||||||
os.makedirs(out_dir)
|
os.makedirs(out_dir)
|
||||||
out_file = f"{filename}.{archive_format}"
|
out_file = f"{filename}.{archive_format}"
|
||||||
|
@ -110,10 +121,7 @@ class Exporter:
|
||||||
shutil.rmtree(build_dir)
|
shutil.rmtree(build_dir)
|
||||||
|
|
||||||
|
|
||||||
class JitenonExporter(Exporter):
|
class _JitenonExporter(Exporter):
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
|
|
||||||
def _get_revision(self, entries):
|
def _get_revision(self, entries):
|
||||||
modified_date = None
|
modified_date = None
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
@ -130,25 +138,19 @@ class JitenonExporter(Exporter):
|
||||||
return attribution
|
return attribution
|
||||||
|
|
||||||
|
|
||||||
class JitenonKokugoExporter(JitenonExporter):
|
class JitenonKokugoExporter(_JitenonExporter):
|
||||||
def __init__(self, target):
|
pass
|
||||||
super().__init__(target)
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonYojiExporter(JitenonExporter):
|
class JitenonYojiExporter(_JitenonExporter):
|
||||||
def __init__(self, target):
|
pass
|
||||||
super().__init__(target)
|
|
||||||
|
|
||||||
|
|
||||||
class JitenonKotowazaExporter(JitenonExporter):
|
class JitenonKotowazaExporter(_JitenonExporter):
|
||||||
def __init__(self, target):
|
pass
|
||||||
super().__init__(target)
|
|
||||||
|
|
||||||
|
|
||||||
class Smk8Exporter(Exporter):
|
class Smk8Exporter(Exporter):
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
|
|
||||||
def _get_revision(self, entries):
|
def _get_revision(self, entries):
|
||||||
timestamp = datetime.now().strftime("%Y-%m-%d")
|
timestamp = datetime.now().strftime("%Y-%m-%d")
|
||||||
return f"{self._target.value};{timestamp}"
|
return f"{self._target.value};{timestamp}"
|
||||||
|
@ -158,9 +160,6 @@ class Smk8Exporter(Exporter):
|
||||||
|
|
||||||
|
|
||||||
class Daijirin2Exporter(Exporter):
|
class Daijirin2Exporter(Exporter):
|
||||||
def __init__(self, target):
|
|
||||||
super().__init__(target)
|
|
||||||
|
|
||||||
def _get_revision(self, entries):
|
def _get_revision(self, entries):
|
||||||
timestamp = datetime.now().strftime("%Y-%m-%d")
|
timestamp = datetime.now().strftime("%Y-%m-%d")
|
||||||
return f"{self._target.value};{timestamp}"
|
return f"{self._target.value};{timestamp}"
|
||||||
|
|
|
@ -7,7 +7,7 @@ from bot.yomichan.exporters.export import Smk8Exporter
|
||||||
from bot.yomichan.exporters.export import Daijirin2Exporter
|
from bot.yomichan.exporters.export import Daijirin2Exporter
|
||||||
|
|
||||||
|
|
||||||
def new_exporter(target):
|
def new_yomi_exporter(target):
|
||||||
exporter_map = {
|
exporter_map = {
|
||||||
Targets.JITENON_KOKUGO: JitenonKokugoExporter,
|
Targets.JITENON_KOKUGO: JitenonKokugoExporter,
|
||||||
Targets.JITENON_YOJI: JitenonYojiExporter,
|
Targets.JITENON_YOJI: JitenonYojiExporter,
|
||||||
|
|
|
@ -6,9 +6,9 @@ from pathlib import Path
|
||||||
|
|
||||||
import bot.icons as Icons
|
import bot.icons as Icons
|
||||||
from bot.soup import delete_soup_nodes
|
from bot.soup import delete_soup_nodes
|
||||||
from bot.data import load_daijirin2_yomichan_name_conversion
|
from bot.data import load_yomichan_name_conversion
|
||||||
from bot.yomichan.glossary.gloss import make_gloss
|
from bot.yomichan.glossary.gloss import make_gloss
|
||||||
from bot.yomichan.glossary.name_conversion import convert_names
|
from bot.name_conversion import convert_names
|
||||||
|
|
||||||
|
|
||||||
def make_glossary(entry, image_dir):
|
def make_glossary(entry, image_dir):
|
||||||
|
@ -26,7 +26,7 @@ def make_glossary(entry, image_dir):
|
||||||
__convert_daigoginum(soup, image_dir)
|
__convert_daigoginum(soup, image_dir)
|
||||||
__convert_jundaigoginum(soup, image_dir)
|
__convert_jundaigoginum(soup, image_dir)
|
||||||
|
|
||||||
name_conversion = load_daijirin2_yomichan_name_conversion()
|
name_conversion = load_yomichan_name_conversion(entry.target)
|
||||||
convert_names(soup, name_conversion)
|
convert_names(soup, name_conversion)
|
||||||
|
|
||||||
gloss = make_gloss(soup.span)
|
gloss = make_gloss(soup.span)
|
||||||
|
|
|
@ -58,9 +58,9 @@ class JitenonGlossary():
|
||||||
if self._do_display_yomikata_in_headword(entry):
|
if self._do_display_yomikata_in_headword(entry):
|
||||||
tr.decompose()
|
tr.decompose()
|
||||||
elif tr.th.text == "意味":
|
elif tr.th.text == "意味":
|
||||||
imi = tr.td
|
definition = tr.td
|
||||||
imi.name = "div"
|
definition.name = "div"
|
||||||
soup.body.insert(0, imi)
|
soup.body.insert(0, definition)
|
||||||
tr.decompose()
|
tr.decompose()
|
||||||
if soup.find("tr") is None:
|
if soup.find("tr") is None:
|
||||||
soup.table.decompose()
|
soup.table.decompose()
|
||||||
|
|
|
@ -4,9 +4,9 @@ from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import bot.icons as Icons
|
import bot.icons as Icons
|
||||||
from bot.soup import delete_soup_nodes
|
from bot.soup import delete_soup_nodes
|
||||||
from bot.data import load_smk8_yomichan_name_conversion
|
from bot.data import load_yomichan_name_conversion
|
||||||
from bot.yomichan.glossary.gloss import make_gloss
|
from bot.yomichan.glossary.gloss import make_gloss
|
||||||
from bot.yomichan.glossary.name_conversion import convert_names
|
from bot.name_conversion import convert_names
|
||||||
|
|
||||||
|
|
||||||
def make_glossary(entry, image_dir):
|
def make_glossary(entry, image_dir):
|
||||||
|
@ -20,7 +20,7 @@ def make_glossary(entry, image_dir):
|
||||||
__convert_gaiji(soup, image_dir)
|
__convert_gaiji(soup, image_dir)
|
||||||
__convert_rectangles(soup, image_dir)
|
__convert_rectangles(soup, image_dir)
|
||||||
|
|
||||||
name_conversion = load_smk8_yomichan_name_conversion()
|
name_conversion = load_yomichan_name_conversion(entry.target)
|
||||||
convert_names(soup, name_conversion)
|
convert_names(soup, name_conversion)
|
||||||
|
|
||||||
gloss = make_gloss(soup.span)
|
gloss = make_gloss(soup.span)
|
||||||
|
|
|
@ -9,6 +9,7 @@ from bot.yomichan.glossary.jitenon import JitenonKotowazaGlossary
|
||||||
class JitenonTerminator(Terminator):
|
class JitenonTerminator(Terminator):
|
||||||
def __init__(self, target):
|
def __init__(self, target):
|
||||||
super().__init__(target)
|
super().__init__(target)
|
||||||
|
self._glossary_maker = None
|
||||||
|
|
||||||
def _definition_tags(self, entry):
|
def _definition_tags(self, entry):
|
||||||
return None
|
return None
|
||||||
|
@ -51,7 +52,7 @@ class JitenonYojiTerminator(JitenonTerminator):
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
def _term_tags(self, entry):
|
def _term_tags(self, entry):
|
||||||
tags = entry.kankenkyuu.split("/")
|
tags = entry.kanken_level.split("/")
|
||||||
return " ".join(tags)
|
return " ".join(tags)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
|
from abc import abstractmethod, ABC
|
||||||
from bot.data import load_yomichan_inflection_categories
|
from bot.data import load_yomichan_inflection_categories
|
||||||
|
|
||||||
|
|
||||||
class Terminator:
|
class Terminator(ABC):
|
||||||
def __init__(self, target):
|
def __init__(self, target):
|
||||||
self._target = target
|
self._target = target
|
||||||
self._glossary_cache = {}
|
self._glossary_cache = {}
|
||||||
|
@ -62,3 +63,31 @@ class Terminator:
|
||||||
}
|
}
|
||||||
glossary.append(gloss)
|
glossary.append(gloss)
|
||||||
return glossary
|
return glossary
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _definition_tags(self, entry):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _inflection_rules(self, entry, expression):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _glossary(self, entry):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _sequence(self, entry):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _term_tags(self, entry):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _link_glossary_parameters(self, entry):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def _subentry_lists(self, entry):
|
||||||
|
pass
|
||||||
|
|
12
data/daijirin2/mdict_name_conversion.json
Normal file
12
data/daijirin2/mdict_name_conversion.json
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
{
|
||||||
|
"a": {},
|
||||||
|
"br": {},
|
||||||
|
"img": {},
|
||||||
|
"div": {},
|
||||||
|
"span": {},
|
||||||
|
"ruby": {},
|
||||||
|
"rt": {},
|
||||||
|
"p": {},
|
||||||
|
"漢字音G": {"name": "ul"},
|
||||||
|
"漢字音": {"name": "li"}
|
||||||
|
}
|
414
data/mdict/css/daijirin2.css
Normal file
414
data/mdict/css/daijirin2.css
Normal file
|
@ -0,0 +1,414 @@
|
||||||
|
|
||||||
|
body {
|
||||||
|
margin: 1em 44px 1em 1em;
|
||||||
|
line-height: 1.5em;
|
||||||
|
font-family: serif;
|
||||||
|
font-size: 1.2em;
|
||||||
|
color: black;
|
||||||
|
}
|
||||||
|
|
||||||
|
body.ABC {
|
||||||
|
margin: 0.5em 0.5em 2em 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
a {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.gaiji {
|
||||||
|
height: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.cut {
|
||||||
|
max-height: 100px;
|
||||||
|
max-width: 600px;
|
||||||
|
}
|
||||||
|
|
||||||
|
p {
|
||||||
|
margin: 0.5em 0
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="i"] {
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="h1"] {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-size: 1em;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="image"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="ref"] a {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="sl"] {
|
||||||
|
text-decoration: accent;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="sm"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="small"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="sub"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: -0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="ty2"] span[data-name="sub"] {
|
||||||
|
vertical-align: 0em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="ty2"] span[data-name="sup"] {
|
||||||
|
vertical-align: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="文語形"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="用例"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="補説G"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="語義Gnum"] + span[data-name="補説G"] {
|
||||||
|
display: inline;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="アクセントG"] + span[data-name="補説G"] {
|
||||||
|
display: inline;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="補説G"] + span[data-name="語釈"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="アクセントG"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: super;
|
||||||
|
margin-left: 0.25em;
|
||||||
|
margin-right: 0.25em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="カット"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="カットG"] {
|
||||||
|
display: block;
|
||||||
|
margin-top: 0.5em;
|
||||||
|
margin-bottom: 0.5em;
|
||||||
|
margin-left: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="キャプション"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="ルビG"] {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-size: 0.7em;
|
||||||
|
font-weight: normal;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.warichu span[data-name="ルビG"] {
|
||||||
|
font-family: serif;
|
||||||
|
font-size: 0.5em;
|
||||||
|
font-weight: normal;
|
||||||
|
vertical-align: 0em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="中語義"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="付記"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="副義"] {
|
||||||
|
display: block;
|
||||||
|
margin-left: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="単位名"] {
|
||||||
|
font-size: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="原籍"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="句仮名"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="句項目"] {
|
||||||
|
margin-top: 0.5em;
|
||||||
|
margin-left: 1em;
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="和字"] {
|
||||||
|
font-family: sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="品詞行"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="品詞用法"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="大語義"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="大語義num"] {
|
||||||
|
margin: 0.025em;
|
||||||
|
padding: 0.1em;
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-size: 0.8em;
|
||||||
|
color: white;
|
||||||
|
background-color: black;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="子項目"] {
|
||||||
|
display: block;
|
||||||
|
margin-top: 0.5em;
|
||||||
|
margin-left: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="慣用G"] {
|
||||||
|
display: block;
|
||||||
|
margin-top: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="欧字"] {
|
||||||
|
font-family: sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="歴史仮名"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="派生G"] {
|
||||||
|
display: block;
|
||||||
|
margin-top: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="準大語義"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="準大語義num"] {
|
||||||
|
margin: 0.025em;
|
||||||
|
padding: 0.1em;
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-size: 0.8em;
|
||||||
|
border: solid 1px black;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="漢字音logo"] {
|
||||||
|
margin: 0.025em;
|
||||||
|
padding: 0.1em;
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-size: 0.8em;
|
||||||
|
border: solid 0.5px black;
|
||||||
|
border-radius: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="漢字音G"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
font-weight: normal;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="生没年"] {
|
||||||
|
margin-left: 0.25em;
|
||||||
|
margin-right: 0.25em;
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="生没年"]:first-child {
|
||||||
|
margin-left: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="用法"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="異字同訓"] {
|
||||||
|
display: block;
|
||||||
|
margin-top: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="異字同訓仮名"] {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="異字同訓漢字"] {
|
||||||
|
font-family: serif;
|
||||||
|
font-weight: normal;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="異字同訓表記"] {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="異字同訓解説"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="異字同訓語義G"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="細義"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="表外字マーク"] {
|
||||||
|
font-size: 0.5em;
|
||||||
|
vertical-align: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="見出仮名"] {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="見出相当部"] {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="見出部"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="解説部"] {
|
||||||
|
display: block;
|
||||||
|
margin-left: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="語義G"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="語義区切"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="返り点"] {
|
||||||
|
font-size: 0.5em;
|
||||||
|
font-weight: normal;
|
||||||
|
vertical-align: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="返り点"].熟語記号 {
|
||||||
|
vertical-align: 0em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="項目"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="logo"] {
|
||||||
|
margin: 0.025em 0.25em;
|
||||||
|
padding: 0.1em;
|
||||||
|
font-size: 0.8em;
|
||||||
|
border: solid 1px black;
|
||||||
|
border-radius: 0.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.gothic {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.warichu {
|
||||||
|
font-size: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.refnum {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
}
|
||||||
|
|
||||||
|
#index {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="歴史仮名"]:before,
|
||||||
|
span[data-name="ルビG"]:before,
|
||||||
|
span[data-name="品詞行"]:before,
|
||||||
|
span[data-name="原籍"]:before,
|
||||||
|
span[data-name="品詞用法"]:before,
|
||||||
|
span[data-name="付記"]:before {
|
||||||
|
content: "(";
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="歴史仮名"]:after,
|
||||||
|
span[data-name="ルビG"]:after,
|
||||||
|
span[data-name="品詞行"]:after,
|
||||||
|
span[data-name="原籍"]:after,
|
||||||
|
span[data-name="品詞用法"]:after,
|
||||||
|
span[data-name="付記"]:after {
|
||||||
|
content: ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
div[data-child-links] {
|
||||||
|
padding-top: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div[data-child-links] ul {
|
||||||
|
margin: 0;
|
||||||
|
padding-left: 2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div[data-child-links] span {
|
||||||
|
padding: 0.1em;
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-size: 0.8em;
|
||||||
|
color: white;
|
||||||
|
border-width: 0.05em;
|
||||||
|
border-style: none;
|
||||||
|
border-color: black;
|
||||||
|
word-break: keep-all;
|
||||||
|
-webkit-border-radius: 0.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div[data-child-links="子項目"] span {
|
||||||
|
background-color: rgb(153, 42, 103);
|
||||||
|
}
|
||||||
|
|
||||||
|
div[data-child-links="句項目"] span {
|
||||||
|
background-color: rgb(176, 127, 57);
|
||||||
|
}
|
56
data/mdict/css/jitenon-kokugo.css
Normal file
56
data/mdict/css/jitenon-kokugo.css
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: serif;
|
||||||
|
margin: 1em 44px 1em 1.5em;
|
||||||
|
line-height: 1.5em;
|
||||||
|
font-size: 1.2em;
|
||||||
|
color: black;
|
||||||
|
}
|
||||||
|
|
||||||
|
table, th, td {
|
||||||
|
border: 1px solid;
|
||||||
|
border-collapse: collapse;
|
||||||
|
padding: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
th {
|
||||||
|
font-family: sans-serif;
|
||||||
|
color: black;
|
||||||
|
background-color: lightgray;
|
||||||
|
font-weight: normal;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
a {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
td ul {
|
||||||
|
margin: -0.1em 0em -0.1em -1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.見出し {
|
||||||
|
}
|
||||||
|
|
||||||
|
.読み方 {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.意味 {
|
||||||
|
margin-left: 1.0em;
|
||||||
|
margin-bottom: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.num_icon {
|
||||||
|
font-family: sans-serif;
|
||||||
|
padding-left: 0.25em;
|
||||||
|
margin-right: 0.5em;
|
||||||
|
font-size: 0.8em;
|
||||||
|
word-break: keep-all;
|
||||||
|
color: white;
|
||||||
|
background-color: gray;
|
||||||
|
border-style: none;
|
||||||
|
-webkit-border-radius: 0.1em;
|
||||||
|
}
|
||||||
|
|
40
data/mdict/css/jitenon-kotowaza.css
Normal file
40
data/mdict/css/jitenon-kotowaza.css
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: serif;
|
||||||
|
margin: 1em 44px 1em 1.5em;
|
||||||
|
line-height: 1.5em;
|
||||||
|
font-size: 1.2em;
|
||||||
|
color: black;
|
||||||
|
}
|
||||||
|
|
||||||
|
table, th, td {
|
||||||
|
border: 1px solid;
|
||||||
|
border-collapse: collapse;
|
||||||
|
padding: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
th {
|
||||||
|
font-family: sans-serif;
|
||||||
|
color: black;
|
||||||
|
background-color: lightgray;
|
||||||
|
font-weight: normal;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
a {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.見出し {
|
||||||
|
}
|
||||||
|
|
||||||
|
.読み方 {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.意味 {
|
||||||
|
margin-left: 1.0em;
|
||||||
|
margin-bottom: 0.5em;
|
||||||
|
}
|
||||||
|
|
40
data/mdict/css/jitenon-yoji.css
Normal file
40
data/mdict/css/jitenon-yoji.css
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: serif;
|
||||||
|
margin: 1em 44px 1em 1.5em;
|
||||||
|
line-height: 1.5em;
|
||||||
|
font-size: 1.2em;
|
||||||
|
color: black;
|
||||||
|
}
|
||||||
|
|
||||||
|
table, th, td {
|
||||||
|
border: 1px solid;
|
||||||
|
border-collapse: collapse;
|
||||||
|
padding: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
th {
|
||||||
|
font-family: sans-serif;
|
||||||
|
color: black;
|
||||||
|
background-color: lightgray;
|
||||||
|
font-weight: normal;
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
a {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.見出し {
|
||||||
|
}
|
||||||
|
|
||||||
|
.読み方 {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
.意味 {
|
||||||
|
margin-left: 1.0em;
|
||||||
|
margin-bottom: 0.5em;
|
||||||
|
}
|
||||||
|
|
449
data/mdict/css/smk8.css
Normal file
449
data/mdict/css/smk8.css
Normal file
|
@ -0,0 +1,449 @@
|
||||||
|
|
||||||
|
body {
|
||||||
|
margin: 1em 44px 1em 1.5em;
|
||||||
|
line-height: 1.5em;
|
||||||
|
font-family: serif;
|
||||||
|
font-size: 1.2em;
|
||||||
|
color: black;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="項目"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="見出部"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="見出部"].pri {
|
||||||
|
margin-left: -0.4em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="見出仮名"] {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt[data-name="表音表記"] {
|
||||||
|
font-size: 0.65em;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt[data-name="表外音訓マーク"] {
|
||||||
|
font-size: 0.65em;
|
||||||
|
}
|
||||||
|
|
||||||
|
rt[data-name="表外字マーク"] {
|
||||||
|
font-size: 0.65em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="解説部"] {
|
||||||
|
display: block;
|
||||||
|
margin-left: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="大語義"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="語義"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="副義"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="用例G"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="注記"] span[data-name="用例G"] {
|
||||||
|
display: inline;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="用例"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="注記"] span[data-name="用例"] {
|
||||||
|
display: inline;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="見出語省略"] {
|
||||||
|
margin-left: 0.125em;
|
||||||
|
margin-right: 0.125em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="教育漢字"] {
|
||||||
|
color: green;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="ルビ"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="ルビ区切"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: 0.65em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="名詞形G"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="可能形G"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="参照G"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="参照"] {
|
||||||
|
color: blue;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="子項目"],
|
||||||
|
span[data-name="句項目"] {
|
||||||
|
display: block;
|
||||||
|
margin-bottom: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="子項目F"],
|
||||||
|
span[data-name="句項目F"] {
|
||||||
|
display: block;
|
||||||
|
margin-bottom: 0.5em;
|
||||||
|
margin-top: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="子見出部"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="子解説部"] {
|
||||||
|
display: block;
|
||||||
|
margin-left: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="句見出部"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="句解説部"] {
|
||||||
|
display: block;
|
||||||
|
margin-left: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="運用解説"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="表記解説"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="文法解説"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="かぞえ方解説"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="派生"] {
|
||||||
|
display: block;
|
||||||
|
margin-left: 1.25em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="派生SubGF"] {
|
||||||
|
display: block;
|
||||||
|
text-indent: -1.25em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="派生SubG"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="派生SubGF"] span[data-name="用例G"] {
|
||||||
|
text-indent: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="派生見出"] {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="派生見出"].normal {
|
||||||
|
font-weight: normal
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="造語成分項目"] {
|
||||||
|
display: block;
|
||||||
|
margin-top: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="造語成分見出"] {
|
||||||
|
font-size:1.4em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="EM"] {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="アクセント"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
vertical-align: super;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="アクセント組M"] {
|
||||||
|
vertical-align: 0.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
span[data-name="反意語M"],
|
||||||
|
span[data-name="同意語M"] {
|
||||||
|
vertical-align: 0.15em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="B"] {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="IT"] {
|
||||||
|
font-family: "Times New Roman";
|
||||||
|
font-style: italic;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="EXCLAMATION"] {
|
||||||
|
font-family: "Times New Roman";
|
||||||
|
font-style: italic;
|
||||||
|
font-size: 1.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="歴史仮名"] {
|
||||||
|
font-family: serif;
|
||||||
|
font-size: 0.7em;
|
||||||
|
font-weight: normal;
|
||||||
|
vertical-align: 0.35em;
|
||||||
|
-webkit-user-select: nocopy;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="出現形"] {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="品詞用法"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="品詞用法"] span[data-name="品詞G"] {
|
||||||
|
font-size: 1.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="基本構文型"] {
|
||||||
|
font-size: 0.8em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="基本構文em"] {
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="ウ濁音参照"] {
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="rect"] {
|
||||||
|
padding: 0.1em;
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-size: 0.8em;
|
||||||
|
border-width: 0.05em;
|
||||||
|
border-style: solid;
|
||||||
|
border-color: black;
|
||||||
|
word-break: keep-all;
|
||||||
|
-webkit-border-radius: 0.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="rect"].fill {
|
||||||
|
color: white;
|
||||||
|
border-style: none;
|
||||||
|
background-color: gray;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="rect"].red {
|
||||||
|
color: red;
|
||||||
|
border-color: red;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="rect"].redfill {
|
||||||
|
color: white;
|
||||||
|
border-style: none;
|
||||||
|
background-color: red;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="red"] {
|
||||||
|
color: red;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="大語義番号"],
|
||||||
|
span[data-name="語義番号"],
|
||||||
|
span[data-name="副義番号"] {
|
||||||
|
margin-right: 0.25em;
|
||||||
|
font-family: sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="ref"] span[data-name="大語義番号"],
|
||||||
|
span[data-name="ref"] span[data-name="語義番号"],
|
||||||
|
span[data-name="ref"] span[data-name="副義番号"] {
|
||||||
|
font-size: 0.8em;
|
||||||
|
margin-right: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="表外字マーク"] {
|
||||||
|
vertical-align: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="表外音訓マーク"] {
|
||||||
|
font-size: 0.5em;
|
||||||
|
vertical-align: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="言換M"] {
|
||||||
|
font-size: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="字音語参照項目"] {
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="本文項目M"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="運用解説M"],
|
||||||
|
span[data-name="表記解説M"],
|
||||||
|
span[data-name="文法解説M"],
|
||||||
|
span[data-name="かぞえ方解説M"],
|
||||||
|
span[data-name="派生M"] {
|
||||||
|
margin-right: 0.25em;
|
||||||
|
font-family: sans-serif;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="派生ロゴ"] {
|
||||||
|
margin-left: 0.1em;
|
||||||
|
margin-right: 0.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="文字"] {
|
||||||
|
margin: 0 0.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="二分"] {
|
||||||
|
font-size: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="四分"] {
|
||||||
|
font-size: 0.25em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="ref"] {
|
||||||
|
margin-left: 0.1em;
|
||||||
|
margin-right: 0.1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="ref-small"] {
|
||||||
|
font-size: 0.7em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="sup"] {
|
||||||
|
font-size: 0.6em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="外字"] img {
|
||||||
|
height: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.audio {
|
||||||
|
height: 1em;
|
||||||
|
margin: 0 0.25em;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.外字 {
|
||||||
|
height: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
img.外字欧 {
|
||||||
|
height: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="レ点M"] {
|
||||||
|
font-size: 0.6em;
|
||||||
|
vertical-align: -0.7em;
|
||||||
|
}
|
||||||
|
|
||||||
|
a {
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="audio"] a {
|
||||||
|
padding-bottom: 0;
|
||||||
|
border-bottom: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="アクセント"] a,
|
||||||
|
span[data-name="古語M"] a,
|
||||||
|
span[data-name="雅語M"] a,
|
||||||
|
span[data-name="派生M"] a,
|
||||||
|
span[data-name="原籍M"] a,
|
||||||
|
span[data-name="品詞M"] a {
|
||||||
|
color: black;
|
||||||
|
border-bottom-style: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
span[data-name="歴史仮名"]:before,
|
||||||
|
span[data-name="ルビ"]:before {
|
||||||
|
content: "(";
|
||||||
|
}
|
||||||
|
|
||||||
|
span[data-name="歴史仮名"]:after,
|
||||||
|
span[data-name="ルビ"]:after {
|
||||||
|
content: ")";
|
||||||
|
}
|
||||||
|
|
||||||
|
div[data-child-links] {
|
||||||
|
padding-top: 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div[data-child-links] ul {
|
||||||
|
margin: 0;
|
||||||
|
padding-left: 2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div[data-child-links] span {
|
||||||
|
padding: 0.1em;
|
||||||
|
font-family: sans-serif;
|
||||||
|
font-size: 0.8em;
|
||||||
|
color: white;
|
||||||
|
border-width: 0.05em;
|
||||||
|
border-style: none;
|
||||||
|
border-color: black;
|
||||||
|
word-break: keep-all;
|
||||||
|
-webkit-border-radius: 0.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
div[data-child-links="子項目"] span {
|
||||||
|
background-color: rgb(153, 42, 103);
|
||||||
|
}
|
||||||
|
|
||||||
|
div[data-child-links="句項目"] span {
|
||||||
|
background-color: rgb(176, 127, 57);
|
||||||
|
}
|
||||||
|
|
||||||
|
span.pri > span.外字 {
|
||||||
|
font-size: 0.65em;
|
||||||
|
vertical-align: super;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
7
data/mdict/description/daijirin2.mdx.description.html
Normal file
7
data/mdict/description/daijirin2.mdx.description.html
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
大辞林 第四版
|
||||||
|
<br><br>
|
||||||
|
https://www.monokakido.jp/ja/dictionaries/daijirin2/index.html
|
||||||
|
<br><br>
|
||||||
|
{{revision}}
|
||||||
|
<br><br>
|
||||||
|
{{attribution}}
|
|
@ -0,0 +1,7 @@
|
||||||
|
国語辞典オンライン
|
||||||
|
<br><br>
|
||||||
|
https://kokugo.jitenon.jp/
|
||||||
|
<br><br>
|
||||||
|
{{revision}}
|
||||||
|
<br><br>
|
||||||
|
{{attribution}}
|
|
@ -0,0 +1,7 @@
|
||||||
|
故事・ことわざ・慣用句オンライン
|
||||||
|
<br><br>
|
||||||
|
https://kotowaza.jitenon.jp/
|
||||||
|
<br><br>
|
||||||
|
{{revision}}
|
||||||
|
<br><br>
|
||||||
|
{{attribution}}
|
7
data/mdict/description/jitenon-yoji.mdx.description.html
Normal file
7
data/mdict/description/jitenon-yoji.mdx.description.html
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
四字熟語辞典オンライン
|
||||||
|
<br><br>
|
||||||
|
https://yoji.jitenon.jp/
|
||||||
|
<br><br>
|
||||||
|
{{revision}}
|
||||||
|
<br><br>
|
||||||
|
{{attribution}}
|
7
data/mdict/description/smk8.mdx.description.html
Normal file
7
data/mdict/description/smk8.mdx.description.html
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
新明解国語辞典 第八版
|
||||||
|
<br><br>
|
||||||
|
https://www.monokakido.jp/ja/dictionaries/smk8/index.html
|
||||||
|
<br><br>
|
||||||
|
{{revision}}
|
||||||
|
<br><br>
|
||||||
|
{{attribution}}
|
BIN
data/mdict/icon/jitenon-kokugo.png
Normal file
BIN
data/mdict/icon/jitenon-kokugo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.3 KiB |
BIN
data/mdict/icon/jitenon-kotowaza.png
Normal file
BIN
data/mdict/icon/jitenon-kotowaza.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 5.3 KiB |
BIN
data/mdict/icon/jitenon-yoji.png
Normal file
BIN
data/mdict/icon/jitenon-yoji.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.6 KiB |
1
data/mdict/title/daijirin2.mdx.title.html
Normal file
1
data/mdict/title/daijirin2.mdx.title.html
Normal file
|
@ -0,0 +1 @@
|
||||||
|
大辞林 第四版
|
1
data/mdict/title/jitenon-kokugo.mdx.title.html
Normal file
1
data/mdict/title/jitenon-kokugo.mdx.title.html
Normal file
|
@ -0,0 +1 @@
|
||||||
|
国語辞典オンライン
|
1
data/mdict/title/jitenon-kotowaza.mdx.title.html
Normal file
1
data/mdict/title/jitenon-kotowaza.mdx.title.html
Normal file
|
@ -0,0 +1 @@
|
||||||
|
故事・ことわざ・慣用句オンライン
|
1
data/mdict/title/jitenon-yoji.mdx.title.html
Normal file
1
data/mdict/title/jitenon-yoji.mdx.title.html
Normal file
|
@ -0,0 +1 @@
|
||||||
|
四字熟語辞典オンライン
|
1
data/mdict/title/smk8.mdx.title.html
Normal file
1
data/mdict/title/smk8.mdx.title.html
Normal file
|
@ -0,0 +1 @@
|
||||||
|
新明解国語辞典 第八版
|
25
data/smk8/mdict_name_conversion.json
Normal file
25
data/smk8/mdict_name_conversion.json
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
{
|
||||||
|
"a": {},
|
||||||
|
"br": {},
|
||||||
|
"img": {},
|
||||||
|
"div": {},
|
||||||
|
"span": {},
|
||||||
|
"表外字": {
|
||||||
|
"name": "ruby"
|
||||||
|
},
|
||||||
|
"表外字マーク": {
|
||||||
|
"name": "rt"
|
||||||
|
},
|
||||||
|
"表外音訓": {
|
||||||
|
"name": "ruby"
|
||||||
|
},
|
||||||
|
"表外音訓マーク": {
|
||||||
|
"name": "rt"
|
||||||
|
},
|
||||||
|
"表音式": {
|
||||||
|
"name": "ruby"
|
||||||
|
},
|
||||||
|
"表音表記": {
|
||||||
|
"name": "rt"
|
||||||
|
}
|
||||||
|
}
|
|
@ -121,25 +121,31 @@
|
||||||
"style": "font-weight: bold;"
|
"style": "font-weight: bold;"
|
||||||
},
|
},
|
||||||
"表外字": {
|
"表外字": {
|
||||||
"name": "ruby",
|
"name": "ruby"
|
||||||
|
},
|
||||||
|
"表外字マーク": {
|
||||||
|
"name": "rt",
|
||||||
"procedures": [
|
"procedures": [
|
||||||
{
|
{
|
||||||
"procedure_name": "add_ruby_text",
|
"procedure_name": "insert_span",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"mark": "︿",
|
"attr_name": "style",
|
||||||
"style": "font-size: 2em;"
|
"attr_val": "font-size: 2em;"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"表外音訓": {
|
"表外音訓": {
|
||||||
"name": "ruby",
|
"name": "ruby"
|
||||||
|
},
|
||||||
|
"表外音訓マーク": {
|
||||||
|
"name": "rt",
|
||||||
"procedures": [
|
"procedures": [
|
||||||
{
|
{
|
||||||
"procedure_name": "add_ruby_text",
|
"procedure_name": "insert_span",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"mark": "︽",
|
"attr_name": "style",
|
||||||
"style": "font-size: 2em;"
|
"attr_val": "font-size: 2em;"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
@ -148,23 +154,7 @@
|
||||||
"name": "ruby"
|
"name": "ruby"
|
||||||
},
|
},
|
||||||
"表音表記": {
|
"表音表記": {
|
||||||
"name": "rt",
|
"name": "rt"
|
||||||
"procedures": [
|
|
||||||
{
|
|
||||||
"procedure_name": "replace",
|
|
||||||
"parameters": {
|
|
||||||
"old": "(",
|
|
||||||
"new": ""
|
|
||||||
}
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"procedure_name": "replace",
|
|
||||||
"parameters": {
|
|
||||||
"old": ")",
|
|
||||||
"new": ""
|
|
||||||
}
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"派生見出": {
|
"派生見出": {
|
||||||
"name": "span",
|
"name": "span",
|
||||||
|
|
60
jitenbot.py
60
jitenbot.py
|
@ -17,11 +17,22 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
|
import subprocess
|
||||||
from bot.targets import Targets
|
from bot.targets import Targets
|
||||||
from bot.crawlers.factory import new_crawler
|
from bot.crawlers.factory import new_crawler
|
||||||
|
|
||||||
|
|
||||||
|
def filename(f):
|
||||||
|
if not os.path.isfile(f):
|
||||||
|
raise argparse.ArgumentTypeError(f"`{f}` is not a valid filename")
|
||||||
|
elif not os.access(f, os.R_OK):
|
||||||
|
raise argparse.ArgumentTypeError(f"Cannot access file `{f}`")
|
||||||
|
else:
|
||||||
|
return f
|
||||||
|
|
||||||
|
|
||||||
def directory(d):
|
def directory(d):
|
||||||
if not os.path.isdir(d):
|
if not os.path.isdir(d):
|
||||||
raise argparse.ArgumentTypeError(f"`{d}` is not a valid directory")
|
raise argparse.ArgumentTypeError(f"`{d}` is not a valid directory")
|
||||||
|
@ -35,34 +46,71 @@ def parse_args(target_names):
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog="jitenbot",
|
prog="jitenbot",
|
||||||
description="Convert Japanese dictionary files to new formats.",
|
description="Convert Japanese dictionary files to new formats.",
|
||||||
|
epilog="See README.md for details regarding media directory structures",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"target",
|
"target",
|
||||||
choices=target_names,
|
choices=target_names,
|
||||||
help="name of dictionary to convert"
|
help="name of dictionary to convert",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-p", "--page-dir",
|
"-p", "--page-dir",
|
||||||
help="path to directory containing XML page files",
|
help="path to directory containing XML page files",
|
||||||
type=directory
|
type=directory,
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-i", "--image-dir",
|
"-m", "--media-dir",
|
||||||
help="path to directory containing image folders (gaiji, graphics, etc.)",
|
help="path to directory containing media folders (gaiji, graphics, audio, etc.)",
|
||||||
type=directory
|
type=directory,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-i", "--mdict-icon",
|
||||||
|
help="path to icon file to be used with MDict",
|
||||||
|
type=filename,
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-yomichan-export",
|
||||||
|
help="skip export of dictionary data to Yomichan format",
|
||||||
|
action='store_true',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-mdict-export",
|
||||||
|
help="skip export of dictionary data to MDict format",
|
||||||
|
action='store_true',
|
||||||
)
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
return args
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
def test_mdict():
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
["mdict", "--version"],
|
||||||
|
check=True,
|
||||||
|
stdout=subprocess.DEVNULL,
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("Could not find `mdict` pack tool.")
|
||||||
|
print("Ensure that mdict-utils is installed and")
|
||||||
|
print("included in the environment PATH.\n")
|
||||||
|
print("Mdict export functionality may also be")
|
||||||
|
print("disabled with the --no-mdict-export flag.")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
target_names = [x.value for x in Targets]
|
target_names = [x.value for x in Targets]
|
||||||
args = parse_args(target_names)
|
args = parse_args(target_names)
|
||||||
|
if not args.no_mdict_export:
|
||||||
|
test_mdict()
|
||||||
selected_target = Targets(args.target)
|
selected_target = Targets(args.target)
|
||||||
crawler = new_crawler(selected_target)
|
crawler = new_crawler(selected_target)
|
||||||
crawler.collect_pages(args.page_dir)
|
crawler.collect_pages(args.page_dir)
|
||||||
crawler.read_pages()
|
crawler.read_pages()
|
||||||
crawler.make_yomichan_dictionary(args.image_dir)
|
if not args.no_yomichan_export:
|
||||||
|
crawler.make_yomichan_dictionary(args.media_dir)
|
||||||
|
if not args.no_mdict_export:
|
||||||
|
crawler.make_mdict_dictionary(args.media_dir, args.mdict_icon)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -6,6 +6,7 @@ css-parser==1.0.8
|
||||||
html5lib==1.1
|
html5lib==1.1
|
||||||
idna==3.4
|
idna==3.4
|
||||||
lxml==4.9.2
|
lxml==4.9.2
|
||||||
|
mdict-utils==1.3.12
|
||||||
Pillow==9.5.0
|
Pillow==9.5.0
|
||||||
platformdirs==3.5.0
|
platformdirs==3.5.0
|
||||||
requests==2.29.0
|
requests==2.29.0
|
||||||
|
@ -13,5 +14,7 @@ six==1.16.0
|
||||||
soupsieve==2.4.1
|
soupsieve==2.4.1
|
||||||
SudachiDict-full==20230110
|
SudachiDict-full==20230110
|
||||||
SudachiPy==0.6.7
|
SudachiPy==0.6.7
|
||||||
|
tqdm==4.65.0
|
||||||
urllib3==1.26.15
|
urllib3==1.26.15
|
||||||
webencodings==0.5.1
|
webencodings==0.5.1
|
||||||
|
xxhash==3.2.0
|
||||||
|
|
13
run_all.sh
Normal file
13
run_all.sh
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
python jitenbot.py jitenon-kokugo
|
||||||
|
python jitenbot.py jitenon-yoji
|
||||||
|
python jitenbot.py jitenon-kotowaza
|
||||||
|
|
||||||
|
python jitenbot.py smk8 \
|
||||||
|
--media-dir monokakido/SMK8/media \
|
||||||
|
--page-dir monokakido/SMK8/pages \
|
||||||
|
--mdict-icon monokakido/SMK8/SMK8-76@3x.png
|
||||||
|
|
||||||
|
python jitenbot.py daijirin2 \
|
||||||
|
--media-dir monokakido/DAIJIRIN2/media \
|
||||||
|
--page-dir monokakido/DAIJIRIN2/pages \
|
||||||
|
--mdict-icon monokakido/DAIJIRIN2/DAIJIRIN2-76@3x.png
|
Loading…
Reference in a new issue