Redesign search key logic for mdict

https://github.com/stephenmk/jitenbot/issues/1#issuecomment-1631583836
This commit is contained in:
stephenmk 2023-07-12 19:02:07 -05:00
parent c9ab0aea46
commit d51de0b3dc
No known key found for this signature in database
GPG key ID: B6DA730DB06235F1

View file

@ -1,3 +1,4 @@
import re
from abc import abstractmethod, ABC from abc import abstractmethod, ABC
@ -12,35 +13,20 @@ class Terminator(ABC):
def make_terms(self, entry): def make_terms(self, entry):
gid = entry.get_global_identifier() gid = entry.get_global_identifier()
glossary = self.__full_glossary(entry) glossary = self.__get_full_glossary(entry)
terms = [[gid, glossary]] terms = [[gid, glossary]]
keys = set() keys = self.__get_keys(entry)
headwords = entry.get_headwords()
for reading, expressions in headwords.items():
if len(expressions) == 0:
keys.add(reading)
for expression in expressions:
if expression.strip() == "":
keys.add(reading)
continue
keys.add(expression)
if reading.strip() == "":
continue
if reading != expression:
keys.add(f"{reading}{expression}")
else:
keys.add(reading)
link = f"@@@LINK={gid}" link = f"@@@LINK={gid}"
for key in keys: for key in keys:
if key.strip() != "": if key.strip() != "":
terms.append([key, link]) terms.append([key, link])
for subentries in self._subentry_lists(entry): for subentry_list in self._subentry_lists(entry):
for subentry in subentries: for subentry in subentry_list:
for term in self.make_terms(subentry): for term in self.make_terms(subentry):
terms.append(term) terms.append(term)
return terms return terms
def __full_glossary(self, entry): def __get_full_glossary(self, entry):
glossary = [] glossary = []
style_link = f"<link rel='stylesheet' href='{self._target.value}.css' type='text/css'>" style_link = f"<link rel='stylesheet' href='{self._target.value}.css' type='text/css'>"
glossary.append(style_link) glossary.append(style_link)
@ -60,6 +46,30 @@ class Terminator(ABC):
glossary.append(link_glossary) glossary.append(link_glossary)
return "\n".join(glossary) return "\n".join(glossary)
def __get_keys(self, entry):
keys = set()
headwords = entry.get_headwords()
for reading, expressions in headwords.items():
stripped_reading = reading.strip()
keys.add(stripped_reading)
if re.match(r"^[ぁ-ヿ、]+$", stripped_reading):
kana_only_key = f"{stripped_reading}【∅】"
else:
kana_only_key = ""
if len(expressions) == 0:
keys.add(kana_only_key)
for expression in expressions:
stripped_expression = expression.strip()
keys.add(stripped_expression)
if stripped_expression == "":
keys.add(kana_only_key)
elif stripped_expression == stripped_reading:
keys.add(kana_only_key)
else:
combo_key = f"{stripped_reading}{stripped_expression}"
keys.add(combo_key)
return keys
@abstractmethod @abstractmethod
def _glossary(self, entry): def _glossary(self, entry):
pass pass