Add crawler factory
This commit is contained in:
parent
68949dde6c
commit
3d795ab49f
4
TODO.md
4
TODO.md
|
@ -6,8 +6,10 @@
|
||||||
- [ ] Add factory classes to reduce the amount of class import statements
|
- [ ] Add factory classes to reduce the amount of class import statements
|
||||||
- [ ] Add build scripts for producing program binaries
|
- [ ] Add build scripts for producing program binaries
|
||||||
- [ ] Support exporting to MDict (.MDX) dictionary format
|
- [ ] Support exporting to MDict (.MDX) dictionary format
|
||||||
|
- [ ] Validate scraped webpages after downloading
|
||||||
|
- [ ] Log non-fatal failures to a log file instead of raising exceptions
|
||||||
- [ ] Support more dictionary websites
|
- [ ] Support more dictionary websites
|
||||||
- [ ] [国語辞典オンライン](https://kokugo.jitenon.jp/)
|
- [x] [国語辞典オンライン](https://kokugo.jitenon.jp/)
|
||||||
- [ ] [Yoji-Jukugo.com](https://yoji-jukugo.com/)
|
- [ ] [Yoji-Jukugo.com](https://yoji-jukugo.com/)
|
||||||
- [ ] [実用日本語表現辞典](https://www.weblio.jp/cat/dictionary/jtnhj)
|
- [ ] [実用日本語表現辞典](https://www.weblio.jp/cat/dictionary/jtnhj)
|
||||||
- [ ] Support more Monokakido dictionaries
|
- [ ] Support more Monokakido dictionaries
|
||||||
|
|
18
bot/crawlers/factory.py
Normal file
18
bot/crawlers/factory.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
from bot.targets import Targets
|
||||||
|
|
||||||
|
from bot.crawlers.crawlers import JitenonKokugoCrawler
|
||||||
|
from bot.crawlers.crawlers import JitenonYojiCrawler
|
||||||
|
from bot.crawlers.crawlers import JitenonKotowazaCrawler
|
||||||
|
from bot.crawlers.crawlers import Smk8Crawler
|
||||||
|
from bot.crawlers.crawlers import Daijirin2Crawler
|
||||||
|
|
||||||
|
|
||||||
|
def new_crawler(target, args):
|
||||||
|
crawler_map = {
|
||||||
|
Targets.JITENON_KOKUGO: JitenonKokugoCrawler,
|
||||||
|
Targets.JITENON_YOJI: JitenonYojiCrawler,
|
||||||
|
Targets.JITENON_KOTOWAZA: JitenonKotowazaCrawler,
|
||||||
|
Targets.SMK8: Smk8Crawler,
|
||||||
|
Targets.DAIJIRIN2: Daijirin2Crawler,
|
||||||
|
}
|
||||||
|
return crawler_map[target](args)
|
9
bot/targets.py
Normal file
9
bot/targets.py
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
class Targets(Enum):
|
||||||
|
JITENON_KOKUGO = "jitenon-kokugo"
|
||||||
|
JITENON_YOJI = "jitenon-yoji"
|
||||||
|
JITENON_KOTOWAZA = "jitenon-kotowaza"
|
||||||
|
SMK8 = "smk8"
|
||||||
|
DAIJIRIN2 = "daijirin2"
|
25
jitenbot.py
25
jitenbot.py
|
@ -18,11 +18,8 @@ along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
from bot.crawlers import JitenonKokugoCrawler
|
from bot.targets import Targets
|
||||||
from bot.crawlers import JitenonYojiCrawler
|
from bot.crawlers.factory import new_crawler
|
||||||
from bot.crawlers import JitenonKotowazaCrawler
|
|
||||||
from bot.crawlers import Smk8Crawler
|
|
||||||
from bot.crawlers import Daijirin2Crawler
|
|
||||||
|
|
||||||
|
|
||||||
def directory(d):
|
def directory(d):
|
||||||
|
@ -34,14 +31,14 @@ def directory(d):
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
|
||||||
def parse_args(targets):
|
def parse_args(target_names):
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
prog="jitenbot",
|
prog="jitenbot",
|
||||||
description="Convert Japanese dictionary files to new formats.",
|
description="Convert Japanese dictionary files to new formats.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"target",
|
"target",
|
||||||
choices=targets,
|
choices=target_names,
|
||||||
help="name of dictionary to convert"
|
help="name of dictionary to convert"
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
@ -59,16 +56,10 @@ def parse_args(targets):
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
crawlers = {
|
target_names = [x.value for x in Targets]
|
||||||
"jitenon-kokugo": JitenonKokugoCrawler,
|
args = parse_args(target_names)
|
||||||
"jitenon-yoji": JitenonYojiCrawler,
|
selected_target = Targets(args.target)
|
||||||
"jitenon-kotowaza": JitenonKotowazaCrawler,
|
crawler = new_crawler(selected_target, args)
|
||||||
"smk8": Smk8Crawler,
|
|
||||||
"daijirin2": Daijirin2Crawler,
|
|
||||||
}
|
|
||||||
args = parse_args(crawlers.keys())
|
|
||||||
crawler_class = crawlers[args.target]
|
|
||||||
crawler = crawler_class(args)
|
|
||||||
crawler.collect_pages()
|
crawler.collect_pages()
|
||||||
crawler.read_pages()
|
crawler.read_pages()
|
||||||
crawler.make_yomichan_dictionary()
|
crawler.make_yomichan_dictionary()
|
||||||
|
|
Loading…
Reference in a new issue