Add crawler factory

This commit is contained in:
stephenmk 2023-05-06 13:15:38 -05:00
parent 68949dde6c
commit 3d795ab49f
No known key found for this signature in database
GPG key ID: B6DA730DB06235F1
5 changed files with 38 additions and 18 deletions

View file

@ -6,8 +6,10 @@
- [ ] Add factory classes to reduce the amount of class import statements - [ ] Add factory classes to reduce the amount of class import statements
- [ ] Add build scripts for producing program binaries - [ ] Add build scripts for producing program binaries
- [ ] Support exporting to MDict (.MDX) dictionary format - [ ] Support exporting to MDict (.MDX) dictionary format
- [ ] Validate scraped webpages after downloading
- [ ] Log non-fatal failures to a log file instead of raising exceptions
- [ ] Support more dictionary websites - [ ] Support more dictionary websites
- [ ] [国語辞典オンライン](https://kokugo.jitenon.jp/) - [x] [国語辞典オンライン](https://kokugo.jitenon.jp/)
- [ ] [Yoji-Jukugo.com](https://yoji-jukugo.com/) - [ ] [Yoji-Jukugo.com](https://yoji-jukugo.com/)
- [ ] [実用日本語表現辞典](https://www.weblio.jp/cat/dictionary/jtnhj) - [ ] [実用日本語表現辞典](https://www.weblio.jp/cat/dictionary/jtnhj)
- [ ] Support more Monokakido dictionaries - [ ] Support more Monokakido dictionaries

18
bot/crawlers/factory.py Normal file
View file

@ -0,0 +1,18 @@
from bot.targets import Targets
from bot.crawlers.crawlers import JitenonKokugoCrawler
from bot.crawlers.crawlers import JitenonYojiCrawler
from bot.crawlers.crawlers import JitenonKotowazaCrawler
from bot.crawlers.crawlers import Smk8Crawler
from bot.crawlers.crawlers import Daijirin2Crawler
def new_crawler(target, args):
crawler_map = {
Targets.JITENON_KOKUGO: JitenonKokugoCrawler,
Targets.JITENON_YOJI: JitenonYojiCrawler,
Targets.JITENON_KOTOWAZA: JitenonKotowazaCrawler,
Targets.SMK8: Smk8Crawler,
Targets.DAIJIRIN2: Daijirin2Crawler,
}
return crawler_map[target](args)

9
bot/targets.py Normal file
View file

@ -0,0 +1,9 @@
from enum import Enum
class Targets(Enum):
JITENON_KOKUGO = "jitenon-kokugo"
JITENON_YOJI = "jitenon-yoji"
JITENON_KOTOWAZA = "jitenon-kotowaza"
SMK8 = "smk8"
DAIJIRIN2 = "daijirin2"

View file

@ -18,11 +18,8 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
import os import os
import argparse import argparse
from bot.crawlers import JitenonKokugoCrawler from bot.targets import Targets
from bot.crawlers import JitenonYojiCrawler from bot.crawlers.factory import new_crawler
from bot.crawlers import JitenonKotowazaCrawler
from bot.crawlers import Smk8Crawler
from bot.crawlers import Daijirin2Crawler
def directory(d): def directory(d):
@ -34,14 +31,14 @@ def directory(d):
return d return d
def parse_args(targets): def parse_args(target_names):
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
prog="jitenbot", prog="jitenbot",
description="Convert Japanese dictionary files to new formats.", description="Convert Japanese dictionary files to new formats.",
) )
parser.add_argument( parser.add_argument(
"target", "target",
choices=targets, choices=target_names,
help="name of dictionary to convert" help="name of dictionary to convert"
) )
parser.add_argument( parser.add_argument(
@ -59,16 +56,10 @@ def parse_args(targets):
def main(): def main():
crawlers = { target_names = [x.value for x in Targets]
"jitenon-kokugo": JitenonKokugoCrawler, args = parse_args(target_names)
"jitenon-yoji": JitenonYojiCrawler, selected_target = Targets(args.target)
"jitenon-kotowaza": JitenonKotowazaCrawler, crawler = new_crawler(selected_target, args)
"smk8": Smk8Crawler,
"daijirin2": Daijirin2Crawler,
}
args = parse_args(crawlers.keys())
crawler_class = crawlers[args.target]
crawler = crawler_class(args)
crawler.collect_pages() crawler.collect_pages()
crawler.read_pages() crawler.read_pages()
crawler.make_yomichan_dictionary() crawler.make_yomichan_dictionary()