jitenbot/jitenbot.py

""" jitenbot
Copyright (C) 2023 Stephen Kraus

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.

"""

import os
import argparse
from bot.crawlers import JitenonKokugoCrawler
from bot.crawlers import JitenonYojiCrawler
from bot.crawlers import JitenonKotowazaCrawler
from bot.crawlers import Smk8Crawler
from bot.crawlers import Daijirin2Crawler


def directory(d):
    if not os.path.isdir(d):
        raise argparse.ArgumentTypeError(f"`{d}` is not a valid directory")
    elif not os.access(d, os.R_OK):
        raise argparse.ArgumentTypeError(f"Cannot access directory `{d}`")
    else:
        return d


def parse_args(targets):
    parser = argparse.ArgumentParser(
        prog="jitenbot",
        description="Convert Japanese dictionary files to new formats.",
    )
    parser.add_argument(
        "target",
        choices=targets,
        help="name of dictionary to convert"
    )
    parser.add_argument(
        "-p", "--page-dir",
        help="path to directory containing XML page files",
        type=directory
    )
    parser.add_argument(
        "-i", "--image-dir",
        help="path to directory containing image folders (gaiji, graphics, etc.)",
        type=directory
    )
    args = parser.parse_args()
    return args


def main():
    crawlers = {
        "jitenon-kokugo": JitenonKokugoCrawler,
        "jitenon-yoji": JitenonYojiCrawler,
        "jitenon-kotowaza": JitenonKotowazaCrawler,
        "smk8": Smk8Crawler,
        "daijirin2": Daijirin2Crawler,
    }
    args = parse_args(crawlers.keys())
    crawler_class = crawlers[args.target]
    crawler = crawler_class(args)
    crawler.collect_pages()
    crawler.read_pages()
    crawler.make_yomichan_dictionary()


if __name__ == "__main__":
    main()
First version Support for Jitenon's yoji dictionary 2023-04-08 03:05:36 +00:00			`""" jitenbot`
			`Copyright (C) 2023 Stephen Kraus`

			`This program is free software: you can redistribute it and/or modify`
			`it under the terms of the GNU General Public License as published by`
			`the Free Software Foundation, either version 3 of the License, or`
			`(at your option) any later version.`

			`This program is distributed in the hope that it will be useful,`
			`but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`GNU General Public License for more details.`

			`You should have received a copy of the GNU General Public License`
			`along with this program. If not, see <https://www.gnu.org/licenses/>.`

			`"""`

Add support for Shinmeikai 8th edition & Daijirin 4th edition 2023-05-01 22:31:28 +00:00			`import os`
Add support Jitenon Kotowaza 2023-04-10 16:14:52 +00:00			`import argparse`
Add support for jitenon-kokugo 2023-05-06 03:53:17 +00:00			`from bot.crawlers import JitenonKokugoCrawler`
Organize crawler logic into classes 2023-04-22 22:56:52 +00:00			`from bot.crawlers import JitenonYojiCrawler`
			`from bot.crawlers import JitenonKotowazaCrawler`
Add support for Shinmeikai 8th edition & Daijirin 4th edition 2023-05-01 22:31:28 +00:00			`from bot.crawlers import Smk8Crawler`
			`from bot.crawlers import Daijirin2Crawler`


			`def directory(d):`
			`if not os.path.isdir(d):`
			raise argparse.ArgumentTypeError(f"`{d}` is not a valid directory")
			`elif not os.access(d, os.R_OK):`
			raise argparse.ArgumentTypeError(f"Cannot access directory `{d}`")
			`else:`
			`return d`


			`def parse_args(targets):`
			`parser = argparse.ArgumentParser(`
			`prog="jitenbot",`
			`description="Convert Japanese dictionary files to new formats.",`
			`)`
			`parser.add_argument(`
			`"target",`
			`choices=targets,`
			`help="name of dictionary to convert"`
			`)`
			`parser.add_argument(`
			`"-p", "--page-dir",`
			`help="path to directory containing XML page files",`
			`type=directory`
			`)`
			`parser.add_argument(`
			`"-i", "--image-dir",`
Update jitenbot.py 2023-05-01 23:25:42 +00:00			`help="path to directory containing image folders (gaiji, graphics, etc.)",`
Add support for Shinmeikai 8th edition & Daijirin 4th edition 2023-05-01 22:31:28 +00:00			`type=directory`
			`)`
Add support Jitenon Kotowaza 2023-04-10 16:14:52 +00:00			`args = parser.parse_args()`
			`return args`


			`def main():`
Add support for Shinmeikai 8th edition & Daijirin 4th edition 2023-05-01 22:31:28 +00:00			`crawlers = {`
Add support for jitenon-kokugo 2023-05-06 03:53:17 +00:00			`"jitenon-kokugo": JitenonKokugoCrawler,`
Add support for Shinmeikai 8th edition & Daijirin 4th edition 2023-05-01 22:31:28 +00:00			`"jitenon-yoji": JitenonYojiCrawler,`
			`"jitenon-kotowaza": JitenonKotowazaCrawler,`
			`"smk8": Smk8Crawler,`
			`"daijirin2": Daijirin2Crawler,`
			`}`
			`args = parse_args(crawlers.keys())`
Organize crawler logic into classes 2023-04-22 22:56:52 +00:00			`crawler_class = crawlers[args.target]`
Add support for Shinmeikai 8th edition & Daijirin 4th edition 2023-05-01 22:31:28 +00:00			`crawler = crawler_class(args)`
			`crawler.collect_pages()`
			`crawler.read_pages()`
Organize crawler logic into classes 2023-04-22 22:56:52 +00:00			`crawler.make_yomichan_dictionary()`
Add support Jitenon Kotowaza 2023-04-10 16:14:52 +00:00
First version Support for Jitenon's yoji dictionary 2023-04-08 03:05:36 +00:00
			`if __name__ == "__main__":`
Add support Jitenon Kotowaza 2023-04-10 16:14:52 +00:00			`main()`