diff --git a/TODO.md b/TODO.md index 043a2f3..30c860d 100644 --- a/TODO.md +++ b/TODO.md @@ -6,8 +6,10 @@ - [ ] Add factory classes to reduce the amount of class import statements - [ ] Add build scripts for producing program binaries - [ ] Support exporting to MDict (.MDX) dictionary format +- [ ] Validate scraped webpages after downloading +- [ ] Log non-fatal failures to a log file instead of raising exceptions - [ ] Support more dictionary websites - - [ ] [国語辞典オンライン](https://kokugo.jitenon.jp/) + - [x] [国語辞典オンライン](https://kokugo.jitenon.jp/) - [ ] [Yoji-Jukugo.com](https://yoji-jukugo.com/) - [ ] [実用日本語表現辞典](https://www.weblio.jp/cat/dictionary/jtnhj) - [ ] Support more Monokakido dictionaries diff --git a/bot/crawlers.py b/bot/crawlers/crawlers.py similarity index 100% rename from bot/crawlers.py rename to bot/crawlers/crawlers.py diff --git a/bot/crawlers/factory.py b/bot/crawlers/factory.py new file mode 100644 index 0000000..f2af6d1 --- /dev/null +++ b/bot/crawlers/factory.py @@ -0,0 +1,18 @@ +from bot.targets import Targets + +from bot.crawlers.crawlers import JitenonKokugoCrawler +from bot.crawlers.crawlers import JitenonYojiCrawler +from bot.crawlers.crawlers import JitenonKotowazaCrawler +from bot.crawlers.crawlers import Smk8Crawler +from bot.crawlers.crawlers import Daijirin2Crawler + + +def new_crawler(target, args): + crawler_map = { + Targets.JITENON_KOKUGO: JitenonKokugoCrawler, + Targets.JITENON_YOJI: JitenonYojiCrawler, + Targets.JITENON_KOTOWAZA: JitenonKotowazaCrawler, + Targets.SMK8: Smk8Crawler, + Targets.DAIJIRIN2: Daijirin2Crawler, + } + return crawler_map[target](args) diff --git a/bot/targets.py b/bot/targets.py new file mode 100644 index 0000000..3c4c571 --- /dev/null +++ b/bot/targets.py @@ -0,0 +1,9 @@ +from enum import Enum + + +class Targets(Enum): + JITENON_KOKUGO = "jitenon-kokugo" + JITENON_YOJI = "jitenon-yoji" + JITENON_KOTOWAZA = "jitenon-kotowaza" + SMK8 = "smk8" + DAIJIRIN2 = "daijirin2" diff --git a/jitenbot.py b/jitenbot.py index be42f5b..950ab16 100644 --- a/jitenbot.py +++ b/jitenbot.py @@ -18,11 +18,8 @@ along with this program. If not, see . import os import argparse -from bot.crawlers import JitenonKokugoCrawler -from bot.crawlers import JitenonYojiCrawler -from bot.crawlers import JitenonKotowazaCrawler -from bot.crawlers import Smk8Crawler -from bot.crawlers import Daijirin2Crawler +from bot.targets import Targets +from bot.crawlers.factory import new_crawler def directory(d): @@ -34,14 +31,14 @@ def directory(d): return d -def parse_args(targets): +def parse_args(target_names): parser = argparse.ArgumentParser( prog="jitenbot", description="Convert Japanese dictionary files to new formats.", ) parser.add_argument( "target", - choices=targets, + choices=target_names, help="name of dictionary to convert" ) parser.add_argument( @@ -59,16 +56,10 @@ def parse_args(targets): def main(): - crawlers = { - "jitenon-kokugo": JitenonKokugoCrawler, - "jitenon-yoji": JitenonYojiCrawler, - "jitenon-kotowaza": JitenonKotowazaCrawler, - "smk8": Smk8Crawler, - "daijirin2": Daijirin2Crawler, - } - args = parse_args(crawlers.keys()) - crawler_class = crawlers[args.target] - crawler = crawler_class(args) + target_names = [x.value for x in Targets] + args = parse_args(target_names) + selected_target = Targets(args.target) + crawler = new_crawler(selected_target, args) crawler.collect_pages() crawler.read_pages() crawler.make_yomichan_dictionary()