jitenbot/bot/yomichan/glossary/sankoku8.py

345 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import re
import os
from bs4 import BeautifulSoup
import bot.yomichan.glossary.icons as Icons
from bot.data import load_yomichan_name_conversion
from bot.yomichan.glossary.gloss import make_gloss
from bot.name_conversion import convert_names
def make_glossary(entry, media_dir):
soup = entry.get_page_soup()
__remove_glyph_styles(soup)
__reposition_marks(soup)
__remove_links_without_href(soup)
__remove_appendix_links(soup)
__convert_links(soup, entry)
__add_parent_link(soup, entry)
__add_homophone_links(soup, entry)
__convert_images_to_text(soup)
__text_parens_to_images(soup, media_dir)
__replace_icons(soup, media_dir)
__replace_accent_symbols(soup, media_dir)
__convert_gaiji(soup, media_dir)
__convert_graphics(soup, media_dir)
__convert_number_icons(soup, media_dir)
name_conversion = load_yomichan_name_conversion(entry.target)
convert_names(soup, name_conversion)
gloss = make_gloss(soup.span)
glossary = [gloss]
return glossary
def __remove_glyph_styles(soup):
"""The css_parser library will emit annoying warning messages
later if it sees these glyph character styles"""
for elm in soup.find_all("glyph"):
if elm.has_attr("style"):
elm["data-style"] = elm.attrs["style"]
del elm.attrs["style"]
def __reposition_marks(soup):
"""These マーク symbols will be converted to rubies later, so they need to
be positioned after the corresponding text in order to appear correctly"""
for elm in soup.find_all("表外字"):
mark = elm.find("表外字マーク")
elm.append(mark)
for elm in soup.find_all("表外音訓"):
mark = elm.find("表外音訓マーク")
elm.append(mark)
def __remove_links_without_href(soup):
for elm in soup.find_all("a"):
if elm.has_attr("href"):
continue
elm.attrs["data-name"] = elm.name
elm.name = "span"
def __remove_appendix_links(soup):
for elm in soup.find_all("a"):
if elm.attrs["href"].startswith("appendix"):
elm.unwrap()
def __convert_links(soup, entry):
for elm in soup.find_all("a"):
href = elm.attrs["href"].split(" ")[0]
href = href.removeprefix("#")
if not re.match(r"^[0-9]+(?:-[0-9A-F]{4})?$", href):
raise Exception(f"Invalid href format: {href}")
ref_entry_id = entry.id_string_to_entry_id(href)
if ref_entry_id in entry.ID_TO_ENTRY:
ref_entry = entry.ID_TO_ENTRY[ref_entry_id]
else:
ref_entry = entry.ID_TO_ENTRY[(ref_entry_id[0], 0)]
expression = ref_entry.get_first_expression()
elm.attrs["href"] = f"?query={expression}&wildcards=off"
def __add_parent_link(soup, entry):
elm = soup.find("親見出相当部")
if elm is not None:
parent_entry = entry.get_parent()
expression = parent_entry.get_first_expression()
elm.attrs["href"] = f"?query={expression}&wildcards=off"
elm.name = "a"
def __add_homophone_links(soup, entry):
forward_link = ["", entry.entry_id[0] + 1]
backward_link = ["", entry.entry_id[0] - 1]
homophone_info_list = [
["svg-logo/homophone1.svg", [forward_link]],
["svg-logo/homophone2.svg", [forward_link, backward_link]],
["svg-logo/homophone3.svg", [backward_link]],
]
for homophone_info in homophone_info_list:
filename, link_info = homophone_info
for elm in soup.find_all("img", attrs={"src": filename}):
for info in link_info:
text, link_id = info
link_entry = entry.ID_TO_ENTRY[(link_id, 0)]
expression = link_entry.get_first_expression()
link = BeautifulSoup("<a/>", "xml").a
link.string = text
link.attrs["href"] = f"?query={expression}&wildcards=off"
elm.append(link)
elm.unwrap()
def __convert_images_to_text(soup):
conversions = [
["svg-logo/重要語.svg", "", "vertical-align: super; font-size: 0.6em"],
["svg-logo/最重要語.svg", "", "vertical-align: super; font-size: 0.6em"],
["svg-logo/一般常識語.svg", "☆☆", "vertical-align: super; font-size: 0.6em"],
["svg-logo/追い込み.svg", "", ""],
["svg-special/区切り線.svg", "|", ""],
]
for conversion in conversions:
filename, text, style = conversion
for elm in soup.find_all("img", attrs={"src": filename}):
if text == "":
elm.unwrap()
continue
if style != "":
elm.attrs["style"] = style
elm.attrs["data-name"] = elm.name
elm.attrs["data-src"] = elm.attrs["src"]
elm.name = "span"
elm.string = text
del elm.attrs["src"]
def __text_parens_to_images(soup, media_dir):
for elm in soup.find_all("red"):
char = elm.text
if char not in ["", ""]:
continue
filename = f"red_{char}.svg"
path = os.path.join(media_dir, filename)
Icons.make_red_char(path, char)
ratio = Icons.calculate_ratio(path)
img = BeautifulSoup("<img/>", "xml").img
img.attrs = {
"height": 1.0,
"width": ratio,
"sizeUnits": "em",
"collapsible": False,
"collapsed": False,
"background": False,
"appearance": "auto",
"path": f"{os.path.basename(media_dir)}/{filename}",
}
elm.attrs["data-name"] = elm.name
elm.name = "span"
elm.string = ""
elm.append(img)
elm.attrs["style"] = "vertical-align: text-bottom;"
def __replace_icons(soup, media_dir):
cls_to_appearance = {
"default": "monochrome",
"fill": "monochrome",
"red": "auto",
"redfill": "auto",
"none": "monochrome",
}
icon_info_list = [
["svg-logo/アク.svg", "アク", "default"],
["svg-logo/丁寧.svg", "丁寧", "default"],
["svg-logo/可能.svg", "可能", "default"],
["svg-logo/尊敬.svg", "尊敬", "default"],
["svg-logo/接尾.svg", "接尾", "default"],
["svg-logo/接頭.svg", "接頭", "default"],
["svg-logo/表記.svg", "表記", "default"],
["svg-logo/謙譲.svg", "謙譲", "default"],
["svg-logo/区別.svg", "区別", "redfill"],
["svg-logo/由来.svg", "由来", "redfill"],
["svg-logo/人.svg", "", "none"],
["svg-logo/他.svg", "", "none"],
["svg-logo/動.svg", "", "none"],
["svg-logo/名.svg", "", "none"],
["svg-logo/句.svg", "", "none"],
["svg-logo/派.svg", "", "none"],
["svg-logo/自.svg", "", "none"],
["svg-logo/連.svg", "", "none"],
["svg-logo/造.svg", "", "none"],
["svg-logo/造2.svg", "", "none"],
["svg-logo/造3.svg", "", "none"],
["svg-logo/百科.svg", "", "none"],
]
for icon_info in icon_info_list:
src, text, cls = icon_info
for elm in soup.find_all("img", attrs={"src": src}):
path = media_dir
for part in src.split("/"):
path = os.path.join(path, part)
__make_rectangle(path, text, cls)
ratio = Icons.calculate_ratio(path)
img = BeautifulSoup("<img/>", "xml").img
img.attrs = {
"height": 1.0,
"width": ratio,
"sizeUnits": "em",
"collapsible": False,
"collapsed": False,
"background": False,
"appearance": cls_to_appearance[cls],
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
"path": f"{os.path.basename(media_dir)}/{src}",
}
elm.name = "span"
elm.clear()
elm.append(img)
elm.attrs["style"] = "vertical-align: text-bottom; margin-right: 0.25em;"
def __replace_accent_symbols(soup, media_dir):
accent_info_list = [
["svg-accent/平板.svg", Icons.make_heiban],
["svg-accent/アクセント.svg", Icons.make_accent],
]
for info in accent_info_list:
src, write_svg_function = info
for elm in soup.find_all("img", attrs={"src": src}):
path = media_dir
for part in src.split("/"):
path = os.path.join(path, part)
write_svg_function(path)
ratio = Icons.calculate_ratio(path)
img = BeautifulSoup("<img/>", "xml").img
img.attrs = {
"height": 1.0,
"width": ratio,
"sizeUnits": "em",
"collapsible": False,
"collapsed": False,
"background": False,
"appearance": "auto",
"path": f"{os.path.basename(media_dir)}/{src}",
}
elm.name = "span"
elm.clear()
elm.append(img)
elm.attrs["style"] = "vertical-align: super; margin-left: -0.5em;"
def __convert_gaiji(soup, media_dir):
for elm in soup.find_all("img"):
if not elm.has_attr("src"):
continue
src = elm.attrs["src"]
if src.startswith("graphics"):
continue
path = media_dir
for part in src.split("/"):
if part.strip() == "":
continue
path = os.path.join(path, part)
ratio = Icons.calculate_ratio(path)
img = BeautifulSoup("<img/>", "xml").img
img.attrs = {
"height": 1.0,
"width": ratio,
"sizeUnits": "em",
"collapsible": False,
"collapsed": False,
"background": False,
"appearance": "monochrome",
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
"path": f"{os.path.basename(media_dir)}/{src}",
}
elm.name = "span"
elm.clear()
elm.append(img)
elm.attrs["style"] = "vertical-align: text-bottom;"
def __convert_graphics(soup, media_dir):
for elm in soup.find_all("img"):
if not elm.has_attr("src"):
continue
src = elm.attrs["src"]
if not src.startswith("graphics"):
continue
elm.attrs = {
"collapsible": True,
"collapsed": True,
"title": elm.attrs["alt"] if elm.has_attr("alt") else "",
"path": f"{os.path.basename(media_dir)}/{src}",
"src": src,
}
def __convert_number_icons(soup, media_dir):
for elm in soup.find_all("大語義番号"):
if elm.find_parent("a") is None:
filename = f"{elm.text}-fill.svg"
appearance = "monochrome"
path = os.path.join(media_dir, filename)
__make_rectangle(path, elm.text, "fill")
else:
filename = f"{elm.text}-bluefill.svg"
appearance = "auto"
path = os.path.join(media_dir, filename)
__make_rectangle(path, elm.text, "bluefill")
ratio = Icons.calculate_ratio(path)
img = BeautifulSoup("<img/>", "xml").img
img.attrs = {
"height": 1.0,
"width": ratio,
"sizeUnits": "em",
"collapsible": False,
"collapsed": False,
"background": False,
"appearance": appearance,
"title": elm.text,
"path": f"{os.path.basename(media_dir)}/{filename}",
}
elm.name = "span"
elm.clear()
elm.append(img)
elm.attrs["style"] = "vertical-align: text-bottom; margin-right: 0.25em;"
def __make_rectangle(path, text, cls):
if cls == "none":
pass
elif cls == "fill":
Icons.make_monochrome_fill_rectangle(path, text)
elif cls == "red":
Icons.make_rectangle(path, text, "red", "white", "red")
elif cls == "redfill":
Icons.make_rectangle(path, text, "red", "red", "white")
elif cls == "bluefill":
Icons.make_rectangle(path, text, "blue", "blue", "white")
else:
Icons.make_rectangle(path, text, "black", "transparent", "black")