Add crawler factory
This commit is contained in:
parent
68949dde6c
commit
3d795ab49f
4
TODO.md
4
TODO.md
|
@ -6,8 +6,10 @@
|
|||
- [ ] Add factory classes to reduce the amount of class import statements
|
||||
- [ ] Add build scripts for producing program binaries
|
||||
- [ ] Support exporting to MDict (.MDX) dictionary format
|
||||
- [ ] Validate scraped webpages after downloading
|
||||
- [ ] Log non-fatal failures to a log file instead of raising exceptions
|
||||
- [ ] Support more dictionary websites
|
||||
- [ ] [国語辞典オンライン](https://kokugo.jitenon.jp/)
|
||||
- [x] [国語辞典オンライン](https://kokugo.jitenon.jp/)
|
||||
- [ ] [Yoji-Jukugo.com](https://yoji-jukugo.com/)
|
||||
- [ ] [実用日本語表現辞典](https://www.weblio.jp/cat/dictionary/jtnhj)
|
||||
- [ ] Support more Monokakido dictionaries
|
||||
|
|
18
bot/crawlers/factory.py
Normal file
18
bot/crawlers/factory.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
from bot.targets import Targets
|
||||
|
||||
from bot.crawlers.crawlers import JitenonKokugoCrawler
|
||||
from bot.crawlers.crawlers import JitenonYojiCrawler
|
||||
from bot.crawlers.crawlers import JitenonKotowazaCrawler
|
||||
from bot.crawlers.crawlers import Smk8Crawler
|
||||
from bot.crawlers.crawlers import Daijirin2Crawler
|
||||
|
||||
|
||||
def new_crawler(target, args):
|
||||
crawler_map = {
|
||||
Targets.JITENON_KOKUGO: JitenonKokugoCrawler,
|
||||
Targets.JITENON_YOJI: JitenonYojiCrawler,
|
||||
Targets.JITENON_KOTOWAZA: JitenonKotowazaCrawler,
|
||||
Targets.SMK8: Smk8Crawler,
|
||||
Targets.DAIJIRIN2: Daijirin2Crawler,
|
||||
}
|
||||
return crawler_map[target](args)
|
9
bot/targets.py
Normal file
9
bot/targets.py
Normal file
|
@ -0,0 +1,9 @@
|
|||
from enum import Enum
|
||||
|
||||
|
||||
class Targets(Enum):
|
||||
JITENON_KOKUGO = "jitenon-kokugo"
|
||||
JITENON_YOJI = "jitenon-yoji"
|
||||
JITENON_KOTOWAZA = "jitenon-kotowaza"
|
||||
SMK8 = "smk8"
|
||||
DAIJIRIN2 = "daijirin2"
|
25
jitenbot.py
25
jitenbot.py
|
@ -18,11 +18,8 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|||
|
||||
import os
|
||||
import argparse
|
||||
from bot.crawlers import JitenonKokugoCrawler
|
||||
from bot.crawlers import JitenonYojiCrawler
|
||||
from bot.crawlers import JitenonKotowazaCrawler
|
||||
from bot.crawlers import Smk8Crawler
|
||||
from bot.crawlers import Daijirin2Crawler
|
||||
from bot.targets import Targets
|
||||
from bot.crawlers.factory import new_crawler
|
||||
|
||||
|
||||
def directory(d):
|
||||
|
@ -34,14 +31,14 @@ def directory(d):
|
|||
return d
|
||||
|
||||
|
||||
def parse_args(targets):
|
||||
def parse_args(target_names):
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="jitenbot",
|
||||
description="Convert Japanese dictionary files to new formats.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"target",
|
||||
choices=targets,
|
||||
choices=target_names,
|
||||
help="name of dictionary to convert"
|
||||
)
|
||||
parser.add_argument(
|
||||
|
@ -59,16 +56,10 @@ def parse_args(targets):
|
|||
|
||||
|
||||
def main():
|
||||
crawlers = {
|
||||
"jitenon-kokugo": JitenonKokugoCrawler,
|
||||
"jitenon-yoji": JitenonYojiCrawler,
|
||||
"jitenon-kotowaza": JitenonKotowazaCrawler,
|
||||
"smk8": Smk8Crawler,
|
||||
"daijirin2": Daijirin2Crawler,
|
||||
}
|
||||
args = parse_args(crawlers.keys())
|
||||
crawler_class = crawlers[args.target]
|
||||
crawler = crawler_class(args)
|
||||
target_names = [x.value for x in Targets]
|
||||
args = parse_args(target_names)
|
||||
selected_target = Targets(args.target)
|
||||
crawler = new_crawler(selected_target, args)
|
||||
crawler.collect_pages()
|
||||
crawler.read_pages()
|
||||
crawler.make_yomichan_dictionary()
|
||||
|
|
Loading…
Reference in a new issue