2023-07-18 05:43:38 +00:00
|
|
|
|
import re
|
|
|
|
|
import os
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
|
|
|
|
import bot.yomichan.glossary.icons as Icons
|
|
|
|
|
from bot.data import load_yomichan_name_conversion
|
|
|
|
|
from bot.yomichan.glossary.gloss import make_gloss
|
|
|
|
|
from bot.name_conversion import convert_names
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def make_glossary(entry, media_dir):
|
|
|
|
|
soup = entry.get_page_soup()
|
|
|
|
|
__remove_glyph_styles(soup)
|
|
|
|
|
__reposition_marks(soup)
|
|
|
|
|
__remove_links_without_href(soup)
|
|
|
|
|
__remove_appendix_links(soup)
|
|
|
|
|
__convert_links(soup, entry)
|
|
|
|
|
__add_parent_link(soup, entry)
|
|
|
|
|
__add_homophone_links(soup, entry)
|
|
|
|
|
__convert_images_to_text(soup)
|
|
|
|
|
__text_parens_to_images(soup, media_dir)
|
|
|
|
|
__replace_icons(soup, media_dir)
|
|
|
|
|
__replace_accent_symbols(soup, media_dir)
|
|
|
|
|
__convert_gaiji(soup, media_dir)
|
|
|
|
|
__convert_graphics(soup, media_dir)
|
|
|
|
|
__convert_number_icons(soup, media_dir)
|
|
|
|
|
|
|
|
|
|
name_conversion = load_yomichan_name_conversion(entry.target)
|
|
|
|
|
convert_names(soup, name_conversion)
|
|
|
|
|
|
|
|
|
|
gloss = make_gloss(soup.span)
|
|
|
|
|
glossary = [gloss]
|
|
|
|
|
return glossary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __remove_glyph_styles(soup):
|
|
|
|
|
"""The css_parser library will emit annoying warning messages
|
|
|
|
|
later if it sees these glyph character styles"""
|
|
|
|
|
for elm in soup.find_all("glyph"):
|
|
|
|
|
if elm.has_attr("style"):
|
|
|
|
|
elm["data-style"] = elm.attrs["style"]
|
|
|
|
|
del elm.attrs["style"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __reposition_marks(soup):
|
|
|
|
|
"""These マーク symbols will be converted to rubies later, so they need to
|
|
|
|
|
be positioned after the corresponding text in order to appear correctly"""
|
|
|
|
|
for elm in soup.find_all("表外字"):
|
|
|
|
|
mark = elm.find("表外字マーク")
|
|
|
|
|
elm.append(mark)
|
|
|
|
|
for elm in soup.find_all("表外音訓"):
|
|
|
|
|
mark = elm.find("表外音訓マーク")
|
|
|
|
|
elm.append(mark)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __remove_links_without_href(soup):
|
|
|
|
|
for elm in soup.find_all("a"):
|
|
|
|
|
if elm.has_attr("href"):
|
|
|
|
|
continue
|
|
|
|
|
elm.attrs["data-name"] = elm.name
|
|
|
|
|
elm.name = "span"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __remove_appendix_links(soup):
|
|
|
|
|
for elm in soup.find_all("a"):
|
|
|
|
|
if elm.attrs["href"].startswith("appendix"):
|
|
|
|
|
elm.unwrap()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __convert_links(soup, entry):
|
|
|
|
|
for elm in soup.find_all("a"):
|
|
|
|
|
href = elm.attrs["href"].split(" ")[0]
|
|
|
|
|
href = href.removeprefix("#")
|
|
|
|
|
if not re.match(r"^[0-9]+(?:-[0-9A-F]{4})?$", href):
|
|
|
|
|
raise Exception(f"Invalid href format: {href}")
|
|
|
|
|
ref_entry_id = entry.id_string_to_entry_id(href)
|
|
|
|
|
if ref_entry_id in entry.ID_TO_ENTRY:
|
|
|
|
|
ref_entry = entry.ID_TO_ENTRY[ref_entry_id]
|
|
|
|
|
else:
|
|
|
|
|
ref_entry = entry.ID_TO_ENTRY[(ref_entry_id[0], 0)]
|
|
|
|
|
expression = ref_entry.get_first_expression()
|
|
|
|
|
elm.attrs["href"] = f"?query={expression}&wildcards=off"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __add_parent_link(soup, entry):
|
|
|
|
|
elm = soup.find("親見出相当部")
|
|
|
|
|
if elm is not None:
|
|
|
|
|
parent_entry = entry.get_parent()
|
|
|
|
|
expression = parent_entry.get_first_expression()
|
|
|
|
|
elm.attrs["href"] = f"?query={expression}&wildcards=off"
|
|
|
|
|
elm.name = "a"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __add_homophone_links(soup, entry):
|
|
|
|
|
forward_link = ["←", entry.entry_id[0] + 1]
|
|
|
|
|
backward_link = ["→", entry.entry_id[0] - 1]
|
|
|
|
|
homophone_info_list = [
|
|
|
|
|
["svg-logo/homophone1.svg", [forward_link]],
|
|
|
|
|
["svg-logo/homophone2.svg", [forward_link, backward_link]],
|
|
|
|
|
["svg-logo/homophone3.svg", [backward_link]],
|
|
|
|
|
]
|
|
|
|
|
for homophone_info in homophone_info_list:
|
|
|
|
|
filename, link_info = homophone_info
|
|
|
|
|
for elm in soup.find_all("img", attrs={"src": filename}):
|
|
|
|
|
for info in link_info:
|
|
|
|
|
text, link_id = info
|
|
|
|
|
link_entry = entry.ID_TO_ENTRY[(link_id, 0)]
|
|
|
|
|
expression = link_entry.get_first_expression()
|
|
|
|
|
link = BeautifulSoup("<a/>", "xml").a
|
|
|
|
|
link.string = text
|
|
|
|
|
link.attrs["href"] = f"?query={expression}&wildcards=off"
|
|
|
|
|
elm.append(link)
|
|
|
|
|
elm.unwrap()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __convert_images_to_text(soup):
|
|
|
|
|
conversions = [
|
|
|
|
|
["svg-logo/重要語.svg", "*", "vertical-align: super; font-size: 0.6em"],
|
|
|
|
|
["svg-logo/最重要語.svg", "**", "vertical-align: super; font-size: 0.6em"],
|
|
|
|
|
["svg-logo/一般常識語.svg", "☆☆", "vertical-align: super; font-size: 0.6em"],
|
|
|
|
|
["svg-logo/追い込み.svg", "", ""],
|
|
|
|
|
["svg-special/区切り線.svg", "|", ""],
|
|
|
|
|
]
|
|
|
|
|
for conversion in conversions:
|
|
|
|
|
filename, text, style = conversion
|
|
|
|
|
for elm in soup.find_all("img", attrs={"src": filename}):
|
|
|
|
|
if text == "":
|
|
|
|
|
elm.unwrap()
|
|
|
|
|
continue
|
|
|
|
|
if style != "":
|
|
|
|
|
elm.attrs["style"] = style
|
|
|
|
|
elm.attrs["data-name"] = elm.name
|
|
|
|
|
elm.attrs["data-src"] = elm.attrs["src"]
|
|
|
|
|
elm.name = "span"
|
|
|
|
|
elm.string = text
|
|
|
|
|
del elm.attrs["src"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __text_parens_to_images(soup, media_dir):
|
|
|
|
|
for elm in soup.find_all("red"):
|
|
|
|
|
char = elm.text
|
|
|
|
|
if char not in ["(", ")"]:
|
|
|
|
|
continue
|
|
|
|
|
filename = f"red_{char}.svg"
|
|
|
|
|
path = os.path.join(media_dir, filename)
|
|
|
|
|
Icons.make_red_char(path, char)
|
|
|
|
|
ratio = Icons.calculate_ratio(path)
|
|
|
|
|
img = BeautifulSoup("<img/>", "xml").img
|
|
|
|
|
img.attrs = {
|
|
|
|
|
"height": 1.0,
|
|
|
|
|
"width": ratio,
|
|
|
|
|
"sizeUnits": "em",
|
|
|
|
|
"collapsible": False,
|
|
|
|
|
"collapsed": False,
|
|
|
|
|
"background": False,
|
|
|
|
|
"appearance": "auto",
|
|
|
|
|
"path": f"{os.path.basename(media_dir)}/{filename}",
|
|
|
|
|
}
|
|
|
|
|
elm.attrs["data-name"] = elm.name
|
|
|
|
|
elm.name = "span"
|
|
|
|
|
elm.string = ""
|
|
|
|
|
elm.append(img)
|
|
|
|
|
elm.attrs["style"] = "vertical-align: text-bottom;"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __replace_icons(soup, media_dir):
|
|
|
|
|
cls_to_appearance = {
|
|
|
|
|
"default": "monochrome",
|
|
|
|
|
"fill": "monochrome",
|
|
|
|
|
"red": "auto",
|
|
|
|
|
"redfill": "auto",
|
|
|
|
|
"none": "monochrome",
|
|
|
|
|
}
|
|
|
|
|
icon_info_list = [
|
|
|
|
|
["svg-logo/アク.svg", "アク", "default"],
|
|
|
|
|
["svg-logo/丁寧.svg", "丁寧", "default"],
|
|
|
|
|
["svg-logo/可能.svg", "可能", "default"],
|
|
|
|
|
["svg-logo/尊敬.svg", "尊敬", "default"],
|
|
|
|
|
["svg-logo/接尾.svg", "接尾", "default"],
|
|
|
|
|
["svg-logo/接頭.svg", "接頭", "default"],
|
|
|
|
|
["svg-logo/表記.svg", "表記", "default"],
|
|
|
|
|
["svg-logo/謙譲.svg", "謙譲", "default"],
|
|
|
|
|
["svg-logo/区別.svg", "区別", "redfill"],
|
|
|
|
|
["svg-logo/由来.svg", "由来", "redfill"],
|
|
|
|
|
["svg-logo/人.svg", "", "none"],
|
|
|
|
|
["svg-logo/他.svg", "", "none"],
|
|
|
|
|
["svg-logo/動.svg", "", "none"],
|
|
|
|
|
["svg-logo/名.svg", "", "none"],
|
|
|
|
|
["svg-logo/句.svg", "", "none"],
|
|
|
|
|
["svg-logo/派.svg", "", "none"],
|
|
|
|
|
["svg-logo/自.svg", "", "none"],
|
|
|
|
|
["svg-logo/連.svg", "", "none"],
|
|
|
|
|
["svg-logo/造.svg", "", "none"],
|
|
|
|
|
["svg-logo/造2.svg", "", "none"],
|
|
|
|
|
["svg-logo/造3.svg", "", "none"],
|
|
|
|
|
["svg-logo/百科.svg", "", "none"],
|
|
|
|
|
]
|
|
|
|
|
for icon_info in icon_info_list:
|
|
|
|
|
src, text, cls = icon_info
|
|
|
|
|
for elm in soup.find_all("img", attrs={"src": src}):
|
|
|
|
|
path = media_dir
|
|
|
|
|
for part in src.split("/"):
|
|
|
|
|
path = os.path.join(path, part)
|
|
|
|
|
__make_rectangle(path, text, cls)
|
|
|
|
|
ratio = Icons.calculate_ratio(path)
|
|
|
|
|
img = BeautifulSoup("<img/>", "xml").img
|
|
|
|
|
img.attrs = {
|
|
|
|
|
"height": 1.0,
|
|
|
|
|
"width": ratio,
|
|
|
|
|
"sizeUnits": "em",
|
|
|
|
|
"collapsible": False,
|
|
|
|
|
"collapsed": False,
|
|
|
|
|
"background": False,
|
|
|
|
|
"appearance": cls_to_appearance[cls],
|
|
|
|
|
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
|
|
|
|
|
"path": f"{os.path.basename(media_dir)}/{src}",
|
|
|
|
|
}
|
|
|
|
|
elm.name = "span"
|
|
|
|
|
elm.clear()
|
|
|
|
|
elm.append(img)
|
|
|
|
|
elm.attrs["style"] = "vertical-align: text-bottom; margin-right: 0.25em;"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __replace_accent_symbols(soup, media_dir):
|
|
|
|
|
accent_info_list = [
|
|
|
|
|
["svg-accent/平板.svg", Icons.make_heiban],
|
|
|
|
|
["svg-accent/アクセント.svg", Icons.make_accent],
|
|
|
|
|
]
|
|
|
|
|
for info in accent_info_list:
|
|
|
|
|
src, write_svg_function = info
|
|
|
|
|
for elm in soup.find_all("img", attrs={"src": src}):
|
|
|
|
|
path = media_dir
|
|
|
|
|
for part in src.split("/"):
|
|
|
|
|
path = os.path.join(path, part)
|
|
|
|
|
write_svg_function(path)
|
|
|
|
|
ratio = Icons.calculate_ratio(path)
|
|
|
|
|
img = BeautifulSoup("<img/>", "xml").img
|
|
|
|
|
img.attrs = {
|
|
|
|
|
"height": 1.0,
|
|
|
|
|
"width": ratio,
|
|
|
|
|
"sizeUnits": "em",
|
|
|
|
|
"collapsible": False,
|
|
|
|
|
"collapsed": False,
|
|
|
|
|
"background": False,
|
|
|
|
|
"appearance": "auto",
|
|
|
|
|
"path": f"{os.path.basename(media_dir)}/{src}",
|
|
|
|
|
}
|
|
|
|
|
elm.name = "span"
|
|
|
|
|
elm.clear()
|
|
|
|
|
elm.append(img)
|
2023-07-19 22:31:55 +00:00
|
|
|
|
elm.attrs["style"] = "vertical-align: super; margin-left: -0.5em;"
|
2023-07-18 05:43:38 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __convert_gaiji(soup, media_dir):
|
|
|
|
|
for elm in soup.find_all("img"):
|
|
|
|
|
if not elm.has_attr("src"):
|
|
|
|
|
continue
|
|
|
|
|
src = elm.attrs["src"]
|
|
|
|
|
if src.startswith("graphics"):
|
|
|
|
|
continue
|
|
|
|
|
path = media_dir
|
|
|
|
|
for part in src.split("/"):
|
|
|
|
|
if part.strip() == "":
|
|
|
|
|
continue
|
|
|
|
|
path = os.path.join(path, part)
|
|
|
|
|
ratio = Icons.calculate_ratio(path)
|
|
|
|
|
img = BeautifulSoup("<img/>", "xml").img
|
|
|
|
|
img.attrs = {
|
|
|
|
|
"height": 1.0,
|
|
|
|
|
"width": ratio,
|
|
|
|
|
"sizeUnits": "em",
|
|
|
|
|
"collapsible": False,
|
|
|
|
|
"collapsed": False,
|
|
|
|
|
"background": False,
|
|
|
|
|
"appearance": "monochrome",
|
|
|
|
|
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
|
|
|
|
|
"path": f"{os.path.basename(media_dir)}/{src}",
|
|
|
|
|
}
|
|
|
|
|
elm.name = "span"
|
|
|
|
|
elm.clear()
|
|
|
|
|
elm.append(img)
|
|
|
|
|
elm.attrs["style"] = "vertical-align: text-bottom;"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __convert_graphics(soup, media_dir):
|
|
|
|
|
for elm in soup.find_all("img"):
|
|
|
|
|
if not elm.has_attr("src"):
|
|
|
|
|
continue
|
|
|
|
|
src = elm.attrs["src"]
|
|
|
|
|
if not src.startswith("graphics"):
|
|
|
|
|
continue
|
|
|
|
|
elm.attrs = {
|
|
|
|
|
"collapsible": True,
|
|
|
|
|
"collapsed": True,
|
|
|
|
|
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
|
|
|
|
|
"path": f"{os.path.basename(media_dir)}/{src}",
|
|
|
|
|
"src": src,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __convert_number_icons(soup, media_dir):
|
|
|
|
|
for elm in soup.find_all("大語義番号"):
|
|
|
|
|
if elm.find_parent("a") is None:
|
|
|
|
|
filename = f"{elm.text}-fill.svg"
|
|
|
|
|
appearance = "monochrome"
|
|
|
|
|
path = os.path.join(media_dir, filename)
|
|
|
|
|
__make_rectangle(path, elm.text, "fill")
|
|
|
|
|
else:
|
|
|
|
|
filename = f"{elm.text}-bluefill.svg"
|
|
|
|
|
appearance = "auto"
|
|
|
|
|
path = os.path.join(media_dir, filename)
|
|
|
|
|
__make_rectangle(path, elm.text, "bluefill")
|
|
|
|
|
ratio = Icons.calculate_ratio(path)
|
|
|
|
|
img = BeautifulSoup("<img/>", "xml").img
|
|
|
|
|
img.attrs = {
|
|
|
|
|
"height": 1.0,
|
|
|
|
|
"width": ratio,
|
|
|
|
|
"sizeUnits": "em",
|
|
|
|
|
"collapsible": False,
|
|
|
|
|
"collapsed": False,
|
|
|
|
|
"background": False,
|
|
|
|
|
"appearance": appearance,
|
|
|
|
|
"title": elm.text,
|
|
|
|
|
"path": f"{os.path.basename(media_dir)}/{filename}",
|
|
|
|
|
}
|
|
|
|
|
elm.name = "span"
|
|
|
|
|
elm.clear()
|
|
|
|
|
elm.append(img)
|
|
|
|
|
elm.attrs["style"] = "vertical-align: text-bottom; margin-right: 0.25em;"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __make_rectangle(path, text, cls):
|
|
|
|
|
if cls == "none":
|
|
|
|
|
pass
|
|
|
|
|
elif cls == "fill":
|
|
|
|
|
Icons.make_monochrome_fill_rectangle(path, text)
|
|
|
|
|
elif cls == "red":
|
|
|
|
|
Icons.make_rectangle(path, text, "red", "white", "red")
|
|
|
|
|
elif cls == "redfill":
|
|
|
|
|
Icons.make_rectangle(path, text, "red", "red", "white")
|
|
|
|
|
elif cls == "bluefill":
|
|
|
|
|
Icons.make_rectangle(path, text, "blue", "blue", "white")
|
|
|
|
|
else:
|
|
|
|
|
Icons.make_rectangle(path, text, "black", "transparent", "black")
|