import re import os from bs4 import BeautifulSoup import bot.yomichan.glossary.icons as Icons from bot.data import load_yomichan_name_conversion from bot.yomichan.glossary.gloss import make_gloss from bot.name_conversion import convert_names def make_glossary(entry, media_dir): soup = entry.get_page_soup() __remove_glyph_styles(soup) __reposition_marks(soup) __remove_links_without_href(soup) __remove_appendix_links(soup) __convert_links(soup, entry) __add_parent_link(soup, entry) __add_homophone_links(soup, entry) __convert_images_to_text(soup) __text_parens_to_images(soup, media_dir) __replace_icons(soup, media_dir) __replace_accent_symbols(soup, media_dir) __convert_gaiji(soup, media_dir) __convert_graphics(soup, media_dir) __convert_number_icons(soup, media_dir) name_conversion = load_yomichan_name_conversion(entry.target) convert_names(soup, name_conversion) gloss = make_gloss(soup.span) glossary = [gloss] return glossary def __remove_glyph_styles(soup): """The css_parser library will emit annoying warning messages later if it sees these glyph character styles""" for elm in soup.find_all("glyph"): if elm.has_attr("style"): elm["data-style"] = elm.attrs["style"] del elm.attrs["style"] def __reposition_marks(soup): """These マーク symbols will be converted to rubies later, so they need to be positioned after the corresponding text in order to appear correctly""" for elm in soup.find_all("表外字"): mark = elm.find("表外字マーク") elm.append(mark) for elm in soup.find_all("表外音訓"): mark = elm.find("表外音訓マーク") elm.append(mark) def __remove_links_without_href(soup): for elm in soup.find_all("a"): if elm.has_attr("href"): continue elm.attrs["data-name"] = elm.name elm.name = "span" def __remove_appendix_links(soup): for elm in soup.find_all("a"): if elm.attrs["href"].startswith("appendix"): elm.unwrap() def __convert_links(soup, entry): for elm in soup.find_all("a"): href = elm.attrs["href"].split(" ")[0] href = href.removeprefix("#") if not re.match(r"^[0-9]+(?:-[0-9A-F]{4})?$", href): raise Exception(f"Invalid href format: {href}") ref_entry_id = entry.id_string_to_entry_id(href) if ref_entry_id in entry.ID_TO_ENTRY: ref_entry = entry.ID_TO_ENTRY[ref_entry_id] else: ref_entry = entry.ID_TO_ENTRY[(ref_entry_id[0], 0)] expression = ref_entry.get_first_expression() elm.attrs["href"] = f"?query={expression}&wildcards=off" def __add_parent_link(soup, entry): elm = soup.find("親見出相当部") if elm is not None: parent_entry = entry.get_parent() expression = parent_entry.get_first_expression() elm.attrs["href"] = f"?query={expression}&wildcards=off" elm.name = "a" def __add_homophone_links(soup, entry): forward_link = ["←", entry.entry_id[0] + 1] backward_link = ["→", entry.entry_id[0] - 1] homophone_info_list = [ ["svg-logo/homophone1.svg", [forward_link]], ["svg-logo/homophone2.svg", [forward_link, backward_link]], ["svg-logo/homophone3.svg", [backward_link]], ] for homophone_info in homophone_info_list: filename, link_info = homophone_info for elm in soup.find_all("img", attrs={"src": filename}): for info in link_info: text, link_id = info link_entry = entry.ID_TO_ENTRY[(link_id, 0)] expression = link_entry.get_first_expression() link = BeautifulSoup("", "xml").a link.string = text link.attrs["href"] = f"?query={expression}&wildcards=off" elm.append(link) elm.unwrap() def __convert_images_to_text(soup): conversions = [ ["svg-logo/重要語.svg", "*", "vertical-align: super; font-size: 0.6em"], ["svg-logo/最重要語.svg", "**", "vertical-align: super; font-size: 0.6em"], ["svg-logo/一般常識語.svg", "☆☆", "vertical-align: super; font-size: 0.6em"], ["svg-logo/追い込み.svg", "", ""], ["svg-special/区切り線.svg", "|", ""], ] for conversion in conversions: filename, text, style = conversion for elm in soup.find_all("img", attrs={"src": filename}): if text == "": elm.unwrap() continue if style != "": elm.attrs["style"] = style elm.attrs["data-name"] = elm.name elm.attrs["data-src"] = elm.attrs["src"] elm.name = "span" elm.string = text del elm.attrs["src"] def __text_parens_to_images(soup, media_dir): for elm in soup.find_all("red"): char = elm.text if char not in ["(", ")"]: continue filename = f"red_{char}.svg" path = os.path.join(media_dir, filename) Icons.make_red_char(path, char) ratio = Icons.calculate_ratio(path) img = BeautifulSoup("", "xml").img img.attrs = { "height": 1.0, "width": ratio, "sizeUnits": "em", "collapsible": False, "collapsed": False, "background": False, "appearance": "auto", "path": f"{os.path.basename(media_dir)}/{filename}", } elm.attrs["data-name"] = elm.name elm.name = "span" elm.string = "" elm.append(img) elm.attrs["style"] = "vertical-align: text-bottom;" def __replace_icons(soup, media_dir): cls_to_appearance = { "default": "monochrome", "fill": "monochrome", "red": "auto", "redfill": "auto", "none": "monochrome", } icon_info_list = [ ["svg-logo/アク.svg", "アク", "default"], ["svg-logo/丁寧.svg", "丁寧", "default"], ["svg-logo/可能.svg", "可能", "default"], ["svg-logo/尊敬.svg", "尊敬", "default"], ["svg-logo/接尾.svg", "接尾", "default"], ["svg-logo/接頭.svg", "接頭", "default"], ["svg-logo/表記.svg", "表記", "default"], ["svg-logo/謙譲.svg", "謙譲", "default"], ["svg-logo/区別.svg", "区別", "redfill"], ["svg-logo/由来.svg", "由来", "redfill"], ["svg-logo/人.svg", "", "none"], ["svg-logo/他.svg", "", "none"], ["svg-logo/動.svg", "", "none"], ["svg-logo/名.svg", "", "none"], ["svg-logo/句.svg", "", "none"], ["svg-logo/派.svg", "", "none"], ["svg-logo/自.svg", "", "none"], ["svg-logo/連.svg", "", "none"], ["svg-logo/造.svg", "", "none"], ["svg-logo/造2.svg", "", "none"], ["svg-logo/造3.svg", "", "none"], ["svg-logo/百科.svg", "", "none"], ] for icon_info in icon_info_list: src, text, cls = icon_info for elm in soup.find_all("img", attrs={"src": src}): path = media_dir for part in src.split("/"): path = os.path.join(path, part) __make_rectangle(path, text, cls) ratio = Icons.calculate_ratio(path) img = BeautifulSoup("", "xml").img img.attrs = { "height": 1.0, "width": ratio, "sizeUnits": "em", "collapsible": False, "collapsed": False, "background": False, "appearance": cls_to_appearance[cls], "title": elm.attrs["alt"] if elm.has_attr("alt") else "", "path": f"{os.path.basename(media_dir)}/{src}", } elm.name = "span" elm.clear() elm.append(img) elm.attrs["style"] = "vertical-align: text-bottom; margin-right: 0.25em;" def __replace_accent_symbols(soup, media_dir): accent_info_list = [ ["svg-accent/平板.svg", Icons.make_heiban], ["svg-accent/アクセント.svg", Icons.make_accent], ] for info in accent_info_list: src, write_svg_function = info for elm in soup.find_all("img", attrs={"src": src}): path = media_dir for part in src.split("/"): path = os.path.join(path, part) write_svg_function(path) ratio = Icons.calculate_ratio(path) img = BeautifulSoup("", "xml").img img.attrs = { "height": 1.0, "width": ratio, "sizeUnits": "em", "collapsible": False, "collapsed": False, "background": False, "appearance": "auto", "path": f"{os.path.basename(media_dir)}/{src}", } elm.name = "span" elm.clear() elm.append(img) elm.attrs["style"] = "vertical-align: text-bottom;" def __convert_gaiji(soup, media_dir): for elm in soup.find_all("img"): if not elm.has_attr("src"): continue src = elm.attrs["src"] if src.startswith("graphics"): continue path = media_dir for part in src.split("/"): if part.strip() == "": continue path = os.path.join(path, part) ratio = Icons.calculate_ratio(path) img = BeautifulSoup("", "xml").img img.attrs = { "height": 1.0, "width": ratio, "sizeUnits": "em", "collapsible": False, "collapsed": False, "background": False, "appearance": "monochrome", "title": elm.attrs["alt"] if elm.has_attr("alt") else "", "path": f"{os.path.basename(media_dir)}/{src}", } elm.name = "span" elm.clear() elm.append(img) elm.attrs["style"] = "vertical-align: text-bottom;" def __convert_graphics(soup, media_dir): for elm in soup.find_all("img"): if not elm.has_attr("src"): continue src = elm.attrs["src"] if not src.startswith("graphics"): continue elm.attrs = { "collapsible": True, "collapsed": True, "title": elm.attrs["alt"] if elm.has_attr("alt") else "", "path": f"{os.path.basename(media_dir)}/{src}", "src": src, } def __convert_number_icons(soup, media_dir): for elm in soup.find_all("大語義番号"): if elm.find_parent("a") is None: filename = f"{elm.text}-fill.svg" appearance = "monochrome" path = os.path.join(media_dir, filename) __make_rectangle(path, elm.text, "fill") else: filename = f"{elm.text}-bluefill.svg" appearance = "auto" path = os.path.join(media_dir, filename) __make_rectangle(path, elm.text, "bluefill") ratio = Icons.calculate_ratio(path) img = BeautifulSoup("", "xml").img img.attrs = { "height": 1.0, "width": ratio, "sizeUnits": "em", "collapsible": False, "collapsed": False, "background": False, "appearance": appearance, "title": elm.text, "path": f"{os.path.basename(media_dir)}/{filename}", } elm.name = "span" elm.clear() elm.append(img) elm.attrs["style"] = "vertical-align: text-bottom; margin-right: 0.25em;" def __make_rectangle(path, text, cls): if cls == "none": pass elif cls == "fill": Icons.make_monochrome_fill_rectangle(path, text) elif cls == "red": Icons.make_rectangle(path, text, "red", "white", "red") elif cls == "redfill": Icons.make_rectangle(path, text, "red", "red", "white") elif cls == "bluefill": Icons.make_rectangle(path, text, "blue", "blue", "white") else: Icons.make_rectangle(path, text, "black", "transparent", "black")