dbf0cf0eb8
For images that have greater height than width values, the image proportions for Yomichan were being calculated incorrectly. This can be observed for example in the SMK8 entry for くの字点 and the DAIJIRIN2 entry for 定積分.
239 lines
7.7 KiB
Python
239 lines
7.7 KiB
Python
import re
|
|
import os
|
|
from bs4 import BeautifulSoup
|
|
from functools import cache
|
|
from pathlib import Path
|
|
|
|
import bot.yomichan.glossary.icons as Icons
|
|
from bot.soup import delete_soup_nodes
|
|
from bot.data import load_yomichan_name_conversion
|
|
from bot.yomichan.glossary.gloss import make_gloss
|
|
from bot.name_conversion import convert_names
|
|
|
|
|
|
def make_glossary(entry, image_dir):
|
|
soup = entry.get_page_soup()
|
|
__add_rubies(soup)
|
|
__hyperlink_parent_expression(soup, entry)
|
|
__delete_unused_nodes(soup, image_dir)
|
|
__clear_styles(soup)
|
|
__set_data_class(soup)
|
|
__convert_links(soup, entry)
|
|
__convert_gaiji(soup, image_dir)
|
|
__convert_graphics(soup, image_dir)
|
|
__convert_logos(soup, image_dir)
|
|
__convert_kanjion_logos(soup, image_dir)
|
|
__convert_daigoginum(soup, image_dir)
|
|
__convert_jundaigoginum(soup, image_dir)
|
|
|
|
name_conversion = load_yomichan_name_conversion(entry.target)
|
|
convert_names(soup, name_conversion)
|
|
|
|
gloss = make_gloss(soup.span)
|
|
glossary = [gloss]
|
|
return glossary
|
|
|
|
|
|
def __add_rubies(soup):
|
|
for name in ["表外音訓", "表外字"]:
|
|
for ruby in soup.find_all(name):
|
|
ruby.name = "ruby"
|
|
rt = ruby.find("表外字マーク")
|
|
rt.name = "rt"
|
|
ruby.append(rt) # needs to positioned after the text
|
|
|
|
|
|
def __hyperlink_parent_expression(soup, entry):
|
|
if soup.find("親表記") is None:
|
|
return
|
|
parent_entry_id = entry.SUBENTRY_ID_TO_ENTRY_ID[entry.entry_id]
|
|
parent_entry = entry.ID_TO_ENTRY[parent_entry_id]
|
|
parent_expression = parent_entry.get_first_expression()
|
|
for el in soup.find_all("親表記"):
|
|
el.name = "a"
|
|
el.attrs["href"] = f"?query={parent_expression}&wildcards=off"
|
|
|
|
|
|
def __delete_unused_nodes(soup, image_dir):
|
|
if not __graphics_directory_exists(image_dir):
|
|
delete_soup_nodes(soup, "カットG")
|
|
for el in soup.find_all("logo"):
|
|
next_sibling = el.next_sibling
|
|
if next_sibling is None:
|
|
continue
|
|
elif next_sibling.name in ["漢字見出G", "漢字音G"]:
|
|
el.decompose()
|
|
for el in soup.find_all("漢字音G"):
|
|
for child in el.find_all(string="・"):
|
|
child.replace_with("")
|
|
|
|
|
|
@cache
|
|
def __graphics_directory_exists(image_dir):
|
|
path = os.path.join(image_dir, "graphics")
|
|
return Path(path).is_dir()
|
|
|
|
|
|
def __clear_styles(soup):
|
|
for el in soup.select("[style]"):
|
|
del el.attrs["style"]
|
|
|
|
|
|
def __set_data_class(soup):
|
|
for el in soup.select("[class]"):
|
|
el.attrs["data-class"] = el.attrs["class"]
|
|
|
|
|
|
def __convert_links(soup, entry):
|
|
for el in soup.find_all("a"):
|
|
href = el.attrs["href"]
|
|
if re.match(r"^[0-9]+(?:-[0-9A-F]{4})?$", href):
|
|
ref_entry_id = entry.id_string_to_entry_id(href)
|
|
ref_entry = entry.ID_TO_ENTRY[ref_entry_id]
|
|
expression = ref_entry.get_first_expression()
|
|
el.attrs["href"] = f"?query={expression}&wildcards=off"
|
|
elif re.match(r"^(?:https?:|\?)[\w\W]*", href):
|
|
pass
|
|
else:
|
|
raise Exception(f"Invalid href format: {href}")
|
|
|
|
|
|
def __convert_gaiji(soup, image_dir):
|
|
for el in soup.find_all("img"):
|
|
src = el.attrs["src"]
|
|
if not src.startswith("gaiji"):
|
|
continue
|
|
path = image_dir
|
|
for part in src.split("/"):
|
|
if part.strip() == "":
|
|
continue
|
|
path = os.path.join(path, part)
|
|
ratio = Icons.calculate_ratio(path)
|
|
img = BeautifulSoup("<img/>", "xml").img
|
|
img.attrs = {
|
|
"height": 1.0,
|
|
"width": ratio,
|
|
"sizeUnits": "em",
|
|
"collapsible": False,
|
|
"collapsed": False,
|
|
"background": False,
|
|
"appearance": "monochrome",
|
|
"title": el.attrs["alt"] if el.has_attr("alt") else "",
|
|
"path": f"{os.path.basename(image_dir)}/{src}",
|
|
"src": src,
|
|
}
|
|
el.name = "span"
|
|
el.clear()
|
|
el.append(img)
|
|
el.attrs["style"] = "vertical-align: text-bottom;"
|
|
|
|
|
|
def __convert_graphics(soup, image_dir):
|
|
for el in soup.find_all("img"):
|
|
src = el.attrs["src"]
|
|
if not src.startswith("graphics"):
|
|
continue
|
|
el.attrs = {
|
|
"collapsible": True,
|
|
"collapsed": True,
|
|
"title": el.attrs["alt"] if el.has_attr("alt") else "",
|
|
"path": f"{os.path.basename(image_dir)}/{src}",
|
|
"src": src,
|
|
}
|
|
|
|
|
|
def __convert_logos(soup, image_dir):
|
|
for el in soup.find_all("logo"):
|
|
filename = f"{el.text}-default.svg"
|
|
path = os.path.join(image_dir, filename)
|
|
Icons.make_rectangle(path, el.text, "black", "transparent", "black")
|
|
ratio = Icons.calculate_ratio(path)
|
|
img = BeautifulSoup("<img/>", "xml").img
|
|
img.attrs = {
|
|
"height": 1.0,
|
|
"width": ratio,
|
|
"sizeUnits": "em",
|
|
"collapsible": False,
|
|
"collapsed": False,
|
|
"background": False,
|
|
"appearance": "monochrome",
|
|
"title": el.text,
|
|
"path": f"{os.path.basename(image_dir)}/{filename}",
|
|
}
|
|
el.name = "span"
|
|
el.clear()
|
|
el.append(img)
|
|
el.attrs["style"] = "vertical-align: text-bottom; margin-right: 0.25em;"
|
|
|
|
|
|
def __convert_kanjion_logos(soup, image_dir):
|
|
for el in soup.find_all("漢字音logo"):
|
|
filename = f"{el.text}-default.svg"
|
|
path = os.path.join(image_dir, filename)
|
|
Icons.make_rectangle(path, el.text, "black", "transparent", "black")
|
|
ratio = Icons.calculate_ratio(path)
|
|
img = BeautifulSoup("<img/>", "xml").img
|
|
img.attrs = {
|
|
"height": 1.0,
|
|
"width": ratio,
|
|
"sizeUnits": "em",
|
|
"collapsible": False,
|
|
"collapsed": False,
|
|
"background": False,
|
|
"appearance": "monochrome",
|
|
"title": el.text,
|
|
"path": f"{os.path.basename(image_dir)}/{filename}",
|
|
}
|
|
el.name = "span"
|
|
el.clear()
|
|
el.append(img)
|
|
el.attrs["style"] = "vertical-align: text-bottom; margin-left: 0.25em;"
|
|
|
|
|
|
def __convert_daigoginum(soup, image_dir):
|
|
for el in soup.find_all("大語義num"):
|
|
filename = f"{el.text}-fill.svg"
|
|
path = os.path.join(image_dir, filename)
|
|
Icons.make_monochrome_fill_rectangle(path, el.text)
|
|
ratio = Icons.calculate_ratio(path)
|
|
img = BeautifulSoup("<img/>", "xml").img
|
|
img.attrs = {
|
|
"height": 1.0,
|
|
"width": ratio,
|
|
"sizeUnits": "em",
|
|
"collapsible": False,
|
|
"collapsed": False,
|
|
"background": False,
|
|
"appearance": "monochrome",
|
|
"title": el.text,
|
|
"path": f"{os.path.basename(image_dir)}/{filename}",
|
|
}
|
|
el.name = "span"
|
|
el.clear()
|
|
el.append(img)
|
|
el.attrs["style"] = "vertical-align: text-bottom;"
|
|
|
|
|
|
def __convert_jundaigoginum(soup, image_dir):
|
|
for el in soup.find_all("準大語義num"):
|
|
filename = f"{el.text}-default.svg"
|
|
path = os.path.join(image_dir, filename)
|
|
Icons.make_rectangle(path, el.text, "black", "transparent", "black")
|
|
ratio = Icons.calculate_ratio(path)
|
|
img = BeautifulSoup("<img/>", "xml").img
|
|
img.attrs = {
|
|
"height": 1.0,
|
|
"width": ratio,
|
|
"sizeUnits": "em",
|
|
"collapsible": False,
|
|
"collapsed": False,
|
|
"background": False,
|
|
"appearance": "monochrome",
|
|
"title": el.text,
|
|
"path": f"{os.path.basename(image_dir)}/{filename}",
|
|
}
|
|
el.name = "span"
|
|
el.clear()
|
|
el.append(img)
|
|
el.attrs["style"] = "vertical-align: text-bottom;"
|