2023-04-08 03:05:36 +00:00
|
|
|
""" jitenbot
|
|
|
|
Copyright (C) 2023 Stephen Kraus
|
|
|
|
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
2023-05-01 22:31:28 +00:00
|
|
|
import os
|
2023-07-08 21:49:03 +00:00
|
|
|
import sys
|
2023-04-10 16:14:52 +00:00
|
|
|
import argparse
|
2023-07-08 21:49:03 +00:00
|
|
|
import subprocess
|
2023-05-06 18:15:38 +00:00
|
|
|
from bot.targets import Targets
|
2023-07-27 04:48:24 +00:00
|
|
|
from bot.factory import new_crawler
|
2023-05-01 22:31:28 +00:00
|
|
|
|
|
|
|
|
2023-07-08 21:49:03 +00:00
|
|
|
def filename(f):
|
|
|
|
if not os.path.isfile(f):
|
|
|
|
raise argparse.ArgumentTypeError(f"`{f}` is not a valid filename")
|
|
|
|
elif not os.access(f, os.R_OK):
|
|
|
|
raise argparse.ArgumentTypeError(f"Cannot access file `{f}`")
|
|
|
|
else:
|
|
|
|
return f
|
|
|
|
|
|
|
|
|
2023-05-01 22:31:28 +00:00
|
|
|
def directory(d):
|
|
|
|
if not os.path.isdir(d):
|
|
|
|
raise argparse.ArgumentTypeError(f"`{d}` is not a valid directory")
|
|
|
|
elif not os.access(d, os.R_OK):
|
|
|
|
raise argparse.ArgumentTypeError(f"Cannot access directory `{d}`")
|
|
|
|
else:
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
2023-05-06 18:15:38 +00:00
|
|
|
def parse_args(target_names):
|
2023-05-01 22:31:28 +00:00
|
|
|
parser = argparse.ArgumentParser(
|
|
|
|
prog="jitenbot",
|
|
|
|
description="Convert Japanese dictionary files to new formats.",
|
2023-07-08 21:49:03 +00:00
|
|
|
epilog="See README.md for details regarding media directory structures",
|
2023-05-01 22:31:28 +00:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"target",
|
2023-05-06 18:15:38 +00:00
|
|
|
choices=target_names,
|
2023-07-08 21:49:03 +00:00
|
|
|
help="name of dictionary to convert",
|
2023-05-01 22:31:28 +00:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-p", "--page-dir",
|
|
|
|
help="path to directory containing XML page files",
|
2023-07-08 21:49:03 +00:00
|
|
|
type=directory,
|
2023-05-01 22:31:28 +00:00
|
|
|
)
|
|
|
|
parser.add_argument(
|
2023-07-08 21:49:03 +00:00
|
|
|
"-m", "--media-dir",
|
|
|
|
help="path to directory containing media folders (gaiji, graphics, audio, etc.)",
|
|
|
|
type=directory,
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
"-i", "--mdict-icon",
|
|
|
|
help="path to icon file to be used with MDict",
|
|
|
|
type=filename,
|
|
|
|
)
|
2023-07-18 05:43:38 +00:00
|
|
|
parser.add_argument(
|
|
|
|
"--no-mdict-export",
|
|
|
|
help="skip export of dictionary data to MDict format",
|
|
|
|
action='store_true',
|
|
|
|
)
|
2023-07-08 21:49:03 +00:00
|
|
|
parser.add_argument(
|
|
|
|
"--no-yomichan-export",
|
|
|
|
help="skip export of dictionary data to Yomichan format",
|
|
|
|
action='store_true',
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
2023-07-18 05:43:38 +00:00
|
|
|
"--validate-yomichan-terms",
|
|
|
|
help="validate JSON structure of exported Yomichan dictionary terms",
|
2023-07-08 21:49:03 +00:00
|
|
|
action='store_true',
|
2023-05-01 22:31:28 +00:00
|
|
|
)
|
2023-04-10 16:14:52 +00:00
|
|
|
args = parser.parse_args()
|
|
|
|
return args
|
|
|
|
|
|
|
|
|
2023-07-08 21:49:03 +00:00
|
|
|
def test_mdict():
|
|
|
|
try:
|
|
|
|
subprocess.run(
|
|
|
|
["mdict", "--version"],
|
|
|
|
check=True,
|
|
|
|
stdout=subprocess.DEVNULL,
|
|
|
|
)
|
|
|
|
except FileNotFoundError:
|
|
|
|
print("Could not find `mdict` pack tool.")
|
|
|
|
print("Ensure that mdict-utils is installed and")
|
|
|
|
print("included in the environment PATH.\n")
|
|
|
|
print("Mdict export functionality may also be")
|
|
|
|
print("disabled with the --no-mdict-export flag.")
|
|
|
|
sys.exit()
|
|
|
|
|
|
|
|
|
2023-04-10 16:14:52 +00:00
|
|
|
def main():
|
2023-05-06 18:15:38 +00:00
|
|
|
target_names = [x.value for x in Targets]
|
|
|
|
args = parse_args(target_names)
|
2023-07-08 21:49:03 +00:00
|
|
|
if not args.no_mdict_export:
|
|
|
|
test_mdict()
|
2023-05-06 18:15:38 +00:00
|
|
|
selected_target = Targets(args.target)
|
2023-05-06 21:55:00 +00:00
|
|
|
crawler = new_crawler(selected_target)
|
|
|
|
crawler.collect_pages(args.page_dir)
|
2023-05-01 22:31:28 +00:00
|
|
|
crawler.read_pages()
|
2023-07-08 21:49:03 +00:00
|
|
|
if not args.no_yomichan_export:
|
2023-07-18 05:43:38 +00:00
|
|
|
crawler.make_yomichan_dictionary(
|
|
|
|
args.media_dir, args.validate_yomichan_terms)
|
2023-07-08 21:49:03 +00:00
|
|
|
if not args.no_mdict_export:
|
2023-07-18 05:43:38 +00:00
|
|
|
crawler.make_mdict_dictionary(
|
|
|
|
args.media_dir, args.mdict_icon)
|
2023-04-10 16:14:52 +00:00
|
|
|
|
2023-04-08 03:05:36 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2023-04-10 16:14:52 +00:00
|
|
|
main()
|