jitenbot/bot/yomichan/glossary/sankoku8.py

345 lines
12 KiB
Python
Raw Normal View History

2023-07-18 05:43:38 +00:00
import re
import os
from bs4 import BeautifulSoup
import bot.yomichan.glossary.icons as Icons
from bot.data import load_yomichan_name_conversion
from bot.yomichan.glossary.gloss import make_gloss
from bot.name_conversion import convert_names
def make_glossary(entry, media_dir):
soup = entry.get_page_soup()
__remove_glyph_styles(soup)
__reposition_marks(soup)
__remove_links_without_href(soup)
__remove_appendix_links(soup)
__convert_links(soup, entry)
__add_parent_link(soup, entry)
__add_homophone_links(soup, entry)
__convert_images_to_text(soup)
__text_parens_to_images(soup, media_dir)
__replace_icons(soup, media_dir)
__replace_accent_symbols(soup, media_dir)
__convert_gaiji(soup, media_dir)
__convert_graphics(soup, media_dir)
__convert_number_icons(soup, media_dir)
name_conversion = load_yomichan_name_conversion(entry.target)
convert_names(soup, name_conversion)
gloss = make_gloss(soup.span)
glossary = [gloss]
return glossary
def __remove_glyph_styles(soup):
"""The css_parser library will emit annoying warning messages
later if it sees these glyph character styles"""
for elm in soup.find_all("glyph"):
if elm.has_attr("style"):
elm["data-style"] = elm.attrs["style"]
del elm.attrs["style"]
def __reposition_marks(soup):
"""These マーク symbols will be converted to rubies later, so they need to
be positioned after the corresponding text in order to appear correctly"""
for elm in soup.find_all("表外字"):
mark = elm.find("表外字マーク")
elm.append(mark)
for elm in soup.find_all("表外音訓"):
mark = elm.find("表外音訓マーク")
elm.append(mark)
def __remove_links_without_href(soup):
for elm in soup.find_all("a"):
if elm.has_attr("href"):
continue
elm.attrs["data-name"] = elm.name
elm.name = "span"
def __remove_appendix_links(soup):
for elm in soup.find_all("a"):
if elm.attrs["href"].startswith("appendix"):
elm.unwrap()
def __convert_links(soup, entry):
for elm in soup.find_all("a"):
href = elm.attrs["href"].split(" ")[0]
href = href.removeprefix("#")
if not re.match(r"^[0-9]+(?:-[0-9A-F]{4})?$", href):
raise Exception(f"Invalid href format: {href}")
ref_entry_id = entry.id_string_to_entry_id(href)
if ref_entry_id in entry.ID_TO_ENTRY:
ref_entry = entry.ID_TO_ENTRY[ref_entry_id]
else:
ref_entry = entry.ID_TO_ENTRY[(ref_entry_id[0], 0)]
expression = ref_entry.get_first_expression()
elm.attrs["href"] = f"?query={expression}&wildcards=off"
def __add_parent_link(soup, entry):
elm = soup.find("親見出相当部")
if elm is not None:
parent_entry = entry.get_parent()
expression = parent_entry.get_first_expression()
elm.attrs["href"] = f"?query={expression}&wildcards=off"
elm.name = "a"
def __add_homophone_links(soup, entry):
forward_link = ["", entry.entry_id[0] + 1]
backward_link = ["", entry.entry_id[0] - 1]
homophone_info_list = [
["svg-logo/homophone1.svg", [forward_link]],
["svg-logo/homophone2.svg", [forward_link, backward_link]],
["svg-logo/homophone3.svg", [backward_link]],
]
for homophone_info in homophone_info_list:
filename, link_info = homophone_info
for elm in soup.find_all("img", attrs={"src": filename}):
for info in link_info:
text, link_id = info
link_entry = entry.ID_TO_ENTRY[(link_id, 0)]
expression = link_entry.get_first_expression()
link = BeautifulSoup("<a/>", "xml").a
link.string = text
link.attrs["href"] = f"?query={expression}&wildcards=off"
elm.append(link)
elm.unwrap()
def __convert_images_to_text(soup):
conversions = [
["svg-logo/重要語.svg", "", "vertical-align: super; font-size: 0.6em"],
["svg-logo/最重要語.svg", "", "vertical-align: super; font-size: 0.6em"],
["svg-logo/一般常識語.svg", "☆☆", "vertical-align: super; font-size: 0.6em"],
["svg-logo/追い込み.svg", "", ""],
["svg-special/区切り線.svg", "|", ""],
]
for conversion in conversions:
filename, text, style = conversion
for elm in soup.find_all("img", attrs={"src": filename}):
if text == "":
elm.unwrap()
continue
if style != "":
elm.attrs["style"] = style
elm.attrs["data-name"] = elm.name
elm.attrs["data-src"] = elm.attrs["src"]
elm.name = "span"
elm.string = text
del elm.attrs["src"]
def __text_parens_to_images(soup, media_dir):
for elm in soup.find_all("red"):
char = elm.text
if char not in ["", ""]:
continue
filename = f"red_{char}.svg"
path = os.path.join(media_dir, filename)
Icons.make_red_char(path, char)
ratio = Icons.calculate_ratio(path)
img = BeautifulSoup("<img/>", "xml").img
img.attrs = {
"height": 1.0,
"width": ratio,
"sizeUnits": "em",
"collapsible": False,
"collapsed": False,
"background": False,
"appearance": "auto",
"path": f"{os.path.basename(media_dir)}/{filename}",
}
elm.attrs["data-name"] = elm.name
elm.name = "span"
elm.string = ""
elm.append(img)
elm.attrs["style"] = "vertical-align: text-bottom;"
def __replace_icons(soup, media_dir):
cls_to_appearance = {
"default": "monochrome",
"fill": "monochrome",
"red": "auto",
"redfill": "auto",
"none": "monochrome",
}
icon_info_list = [
["svg-logo/アク.svg", "アク", "default"],
["svg-logo/丁寧.svg", "丁寧", "default"],
["svg-logo/可能.svg", "可能", "default"],
["svg-logo/尊敬.svg", "尊敬", "default"],
["svg-logo/接尾.svg", "接尾", "default"],
["svg-logo/接頭.svg", "接頭", "default"],
["svg-logo/表記.svg", "表記", "default"],
["svg-logo/謙譲.svg", "謙譲", "default"],
["svg-logo/区別.svg", "区別", "redfill"],
["svg-logo/由来.svg", "由来", "redfill"],
["svg-logo/人.svg", "", "none"],
["svg-logo/他.svg", "", "none"],
["svg-logo/動.svg", "", "none"],
["svg-logo/名.svg", "", "none"],
["svg-logo/句.svg", "", "none"],
["svg-logo/派.svg", "", "none"],
["svg-logo/自.svg", "", "none"],
["svg-logo/連.svg", "", "none"],
["svg-logo/造.svg", "", "none"],
["svg-logo/造2.svg", "", "none"],
["svg-logo/造3.svg", "", "none"],
["svg-logo/百科.svg", "", "none"],
]
for icon_info in icon_info_list:
src, text, cls = icon_info
for elm in soup.find_all("img", attrs={"src": src}):
path = media_dir
for part in src.split("/"):
path = os.path.join(path, part)
__make_rectangle(path, text, cls)
ratio = Icons.calculate_ratio(path)
img = BeautifulSoup("<img/>", "xml").img
img.attrs = {
"height": 1.0,
"width": ratio,
"sizeUnits": "em",
"collapsible": False,
"collapsed": False,
"background": False,
"appearance": cls_to_appearance[cls],
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
"path": f"{os.path.basename(media_dir)}/{src}",
}
elm.name = "span"
elm.clear()
elm.append(img)
elm.attrs["style"] = "vertical-align: text-bottom; margin-right: 0.25em;"
def __replace_accent_symbols(soup, media_dir):
accent_info_list = [
["svg-accent/平板.svg", Icons.make_heiban],
["svg-accent/アクセント.svg", Icons.make_accent],
]
for info in accent_info_list:
src, write_svg_function = info
for elm in soup.find_all("img", attrs={"src": src}):
path = media_dir
for part in src.split("/"):
path = os.path.join(path, part)
write_svg_function(path)
ratio = Icons.calculate_ratio(path)
img = BeautifulSoup("<img/>", "xml").img
img.attrs = {
"height": 1.0,
"width": ratio,
"sizeUnits": "em",
"collapsible": False,
"collapsed": False,
"background": False,
"appearance": "auto",
"path": f"{os.path.basename(media_dir)}/{src}",
}
elm.name = "span"
elm.clear()
elm.append(img)
elm.attrs["style"] = "vertical-align: super; margin-left: -0.5em;"
2023-07-18 05:43:38 +00:00
def __convert_gaiji(soup, media_dir):
for elm in soup.find_all("img"):
if not elm.has_attr("src"):
continue
src = elm.attrs["src"]
if src.startswith("graphics"):
continue
path = media_dir
for part in src.split("/"):
if part.strip() == "":
continue
path = os.path.join(path, part)
ratio = Icons.calculate_ratio(path)
img = BeautifulSoup("<img/>", "xml").img
img.attrs = {
"height": 1.0,
"width": ratio,
"sizeUnits": "em",
"collapsible": False,
"collapsed": False,
"background": False,
"appearance": "monochrome",
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
"path": f"{os.path.basename(media_dir)}/{src}",
}
elm.name = "span"
elm.clear()
elm.append(img)
elm.attrs["style"] = "vertical-align: text-bottom;"
def __convert_graphics(soup, media_dir):
for elm in soup.find_all("img"):
if not elm.has_attr("src"):
continue
src = elm.attrs["src"]
if not src.startswith("graphics"):
continue
elm.attrs = {
"collapsible": True,
"collapsed": True,
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
"path": f"{os.path.basename(media_dir)}/{src}",
"src": src,
}
def __convert_number_icons(soup, media_dir):
for elm in soup.find_all("大語義番号"):
if elm.find_parent("a") is None:
filename = f"{elm.text}-fill.svg"
appearance = "monochrome"
path = os.path.join(media_dir, filename)
__make_rectangle(path, elm.text, "fill")
else:
filename = f"{elm.text}-bluefill.svg"
appearance = "auto"
path = os.path.join(media_dir, filename)
__make_rectangle(path, elm.text, "bluefill")
ratio = Icons.calculate_ratio(path)
img = BeautifulSoup("<img/>", "xml").img
img.attrs = {
"height": 1.0,
"width": ratio,
"sizeUnits": "em",
"collapsible": False,
"collapsed": False,
"background": False,
"appearance": appearance,
"title": elm.text,
"path": f"{os.path.basename(media_dir)}/{filename}",
}
elm.name = "span"
elm.clear()
elm.append(img)
elm.attrs["style"] = "vertical-align: text-bottom; margin-right: 0.25em;"
def __make_rectangle(path, text, cls):
if cls == "none":
pass
elif cls == "fill":
Icons.make_monochrome_fill_rectangle(path, text)
elif cls == "red":
Icons.make_rectangle(path, text, "red", "white", "red")
elif cls == "redfill":
Icons.make_rectangle(path, text, "red", "red", "white")
elif cls == "bluefill":
Icons.make_rectangle(path, text, "blue", "blue", "white")
else:
Icons.make_rectangle(path, text, "black", "transparent", "black")