From 5e90bbc38d76d00ea66fb13c89e686c92ff6f514 Mon Sep 17 00:00:00 2001 From: Jimmy-Z Date: Sat, 19 Aug 2023 14:05:57 +0800 Subject: [PATCH] WIP --- .gitignore | 1 + Cargo.lock | 407 +++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 13 ++ README.md | 1 + rustfmt.toml | 1 + src/main.rs | 187 +++++++++++++++++++++++ src/utils.rs | 29 ++++ 7 files changed, 639 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 README.md create mode 100644 rustfmt.toml create mode 100644 src/main.rs create mode 100644 src/utils.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..3100fc1 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,407 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "anstream" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ca84f3628370c59db74ee214b3263d58f9aadd9b4fe7e711fd87dc452b7f163" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is-terminal", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" + +[[package]] +name = "anstyle-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "938874ff5980b03a87c5524b3ae5b59cf99b1d6bc836848df7bc5ada9643c333" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ca11d4be1bab0c8bc8734a9aa7bf4ee8316d462a08c6ac5052f888fef5b494b" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c677ab05e09154296dd37acecd46420c17b9713e8366facafa8fc0885167cf4c" +dependencies = [ + "anstyle", + "windows-sys", +] + +[[package]] +name = "bitflags" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4682ae6287fcf752ecaabbfcc7b6f9b72aa33933dc23a554d853aea8eea8635" + +[[package]] +name = "cc" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "305fe645edc1442a0fa8b6726ba61d422798d37a52e12eaecf4b022ebbb88f01" +dependencies = [ + "libc", +] + +[[package]] +name = "clap" +version = "4.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03aef18ddf7d879c15ce20f04826ef8418101c7e528014c3eeea13321047dca3" +dependencies = [ + "clap_builder", + "clap_derive", + "once_cell", +] + +[[package]] +name = "clap_builder" +version = "4.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ce6fffb678c9b80a70b6b6de0aad31df727623a70fd9a842c30cd573e2fa98" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" + +[[package]] +name = "colorchoice" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" + +[[package]] +name = "errno" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b30f669a7961ef1631673d2766cc92f52d64f7ef354d4fe0ddfd30ed52f0f4f" +dependencies = [ + "errno-dragonfly", + "libc", + "windows-sys", +] + +[[package]] +name = "errno-dragonfly" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa68f1b12764fab894d2755d2518754e71b4fd80ecfb822714a1206c2aab39bf" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "443144c8cdadd93ebf52ddb4056d257f5b52c04d3c804e657d19eb73fc33668b" + +[[package]] +name = "is-terminal" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" +dependencies = [ + "hermit-abi", + "rustix", + "windows-sys", +] + +[[package]] +name = "itoa" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af150ab688ff2122fcef229be89cb50dd66af9e01a4ff320cc137eecc9bacc38" + +[[package]] +name = "libc" +version = "0.2.147" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" + +[[package]] +name = "linux-raw-sys" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" + +[[package]] +name = "mini-internal" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03b49c03b26cd68fcc1b1c2b11c953f600793c879bee590c1cd0b00f75365784" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "miniserde" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c27b472d15e3ad6a6b1cf68999107089db752222830338f0cf7a89a4dc81570e" +dependencies = [ + "itoa", + "mini-internal", + "ryu", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "mkd-utils" +version = "0.1.0" +dependencies = [ + "clap", + "monokakido", + "serde", + "serde_json", +] + +[[package]] +name = "monokakido" +version = "0.3.2" +dependencies = [ + "miniserde", + "miniz_oxide", + "xmlparser", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f3b39ccfb720540debaa0164757101c08ecb8d326b15358ce76a62c7e85965" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rustix" +version = "0.38.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys", + "windows-sys", +] + +[[package]] +name = "ryu" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ad4cc8da4ef723ed60bced201181d83791ad433213d8c24efffda1eec85d741" + +[[package]] +name = "serde" +version = "1.0.183" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32ac8da02677876d532745a130fc9d8e6edfa81a269b107c5b00829b91d8eb3c" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.183" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aafe972d60b0b9bee71a91b92fee2d4fb3c9d7e8f6b179aa99f27203d99a4816" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "076066c5f1078eac5b722a31827a8832fe108bed65dfa75e233c89f8206e976c" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "2.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04361975b3f5e348b2189d8dc55bc942f278b2d482a6a0365de5bdd62d351567" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" + +[[package]] +name = "utf8parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "xmlparser" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..7f004af --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "mkd-utils" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +serde = {version = "1", features = ["derive"]} +serde_json = "1" +clap = { version = "4", features = ["derive"] } + +monokakido = { path = "../monokakido" } \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..6d3f665 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +WIP \ No newline at end of file diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..18d655e --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +hard_tabs = true \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..9f23a90 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,187 @@ +use std::{ + collections::HashMap, + ffi::OsStr, + fs, + path::{Path, PathBuf}, +}; + +use clap::{Parser, Subcommand}; +use serde::Deserialize; + +use monokakido as mkt; + +mod utils; +use utils::*; + +#[derive(Parser)] +#[command(version)] +struct Args { + #[command(subcommand)] + cmd: Cmds, +} + +#[derive(Subcommand)] +enum Cmds { + scan_base { dir: String }, + scan_dict { dir: String }, + scan_contents { dir: String }, +} + +#[derive(Deserialize, Debug)] +struct MkdProduct { + #[serde(rename = "DSProductTitle")] + title: HashMap, + #[serde(rename = "DSProductContents")] + contents: Vec, +} + +#[derive(Deserialize, Debug)] +struct MkdContent { + #[serde(rename = "DSContentTitle")] + title: HashMap, + #[serde(rename = "DSContentDirectory")] + dir: String, +} + +fn main() { + let args = Args::parse(); + match args.cmd { + Cmds::scan_base { dir } => { + scan_base(&dir); + } + Cmds::scan_dict { dir } => { + scan_dict(&dir) + } + Cmds::scan_contents { dir } => { + scan_contents(&dir) + } + } +} + +// base: the base dir for all dicts +// each sub-dir should have a "Contents" sub-dir +// for example: +// mac: "/Library/Application Support/AppStoreContent/jp.monokakido.Dictionaries/Products/" +fn scan_base>(dir: P) { + for e in fs::read_dir(dir).unwrap() { + let e = e.unwrap(); + let p = e.path(); + if !p.is_dir() { + continue; + } + scan_dict(p); + } +} + +fn scan_dict>(dir: P) { + let mut p: PathBuf = dir.as_ref().into(); + let dir_name = p.file_name().unwrap().to_str().unwrap().to_string(); + // main JSON + p.push("Contents"); + let json = find_file_with_ext(&p, OsStr::new("json")); + if json.len() != 1 { + println!( + "{} JSON file in {}, which is unexpected", + json.len(), + p.as_os_str().to_str().unwrap() + ); + return; + } + let json = json[0].to_str().unwrap(); + // println!("{}", json); + let json: MkdProduct = serde_json::from_reader(fs::File::open(json).unwrap()).unwrap(); + println!( + "{} [{}]", + fmt_ml_str(&json.title), + dir_name, + ); + for c in json.contents { + println!("\t{} [{}]", fmt_ml_str(&c.title), &c.dir); + p.push(c.dir); + scan_contents(&p); + p.pop(); + } +} + +// dir: the content directory of a single dict +// should be a sub-dir of the "Contents" dir mentioned above +// should contain sub-dirs like "key" +fn scan_contents>(dir: P) { + for d in fs::read_dir(dir).unwrap() { + let d = d.unwrap(); + let dp = d.path(); + if !dp.is_dir() { + continue; + } + let dn = d.file_name(); + let dn = dn.to_str().unwrap(); + // counters + let mut c_idx = 0; + let mut c_nidx = 0; + let mut c_keys = 0; + let mut c_not_file = 0; + let mut c_no_ext = 0; + let mut c_other = HashMap::::new(); + // counter helper + let mut c_other_mod = |e: &str, m: isize| { + *c_other.entry(e.to_string()).or_insert(0) += m; + }; + for f in fs::read_dir(&dp).unwrap() { + let f = f.unwrap(); + let fp = f.path(); + if !fp.is_file() { + c_not_file += 1; + continue; + } + let fname = f.file_name(); + let fname = fname.to_str().unwrap(); + let fext = match fp.extension() { + Some(e) => e, + None => { + c_no_ext += 1; + continue; + } + }; + match fext.to_str().unwrap() { + "idx" => { + let mp = fp.with_extension("map"); + if mp.exists() && mp.is_file() { + println!("\t\t{}: {}|map", dn, fname); + // prevent the corresponding map file from showing up in c_other + c_other_mod("map", -1); + } else { + println!( + "\t\t{}: {} without corresponding map file, unexpected", + dn, fname + ) + } + } + "nidx" => { + println!("\t\t{}: {}", dn, fname); + } + "keystore" => { + println!("\t\t{}: {}", dn, fname); + } + e => { + c_other_mod(e, 1); + } + }; + } + // collect others and print them in a single line + let mut r = Vec::with_capacity(c_other.keys().len() + 2); + for (e, c) in c_other.iter() { + if *c > 0 { + r.push(format!("{}: {}", e, c)); + } + } + if c_no_ext > 0 { + r.push(format!("no ext: {}", c_no_ext)); + } + if c_not_file > 0 { + r.push(format!("not file: {}", c_not_file)); + } + if r.len() > 0 { + println!("\t\t{}: {}", dn, r.join(", ")); + } + } +} diff --git a/src/utils.rs b/src/utils.rs new file mode 100644 index 0000000..275acf5 --- /dev/null +++ b/src/utils.rs @@ -0,0 +1,29 @@ +use std::{ + collections::{HashMap, HashSet}, + ffi::OsStr, + fs, + path::{Path, PathBuf}, +}; + +pub fn fmt_ml_str(h: &HashMap) -> String { + h.values() + .map(|e| e.as_str()) + .collect::>() + .into_iter() + .collect::>() + .join(" | ") +} + +pub fn find_file_with_ext>(path: P, ext: &OsStr) -> Vec { + fs::read_dir(path) + .unwrap() + .filter_map(|e| { + let p = e.unwrap().path(); + if p.is_file() && Some(ext) == p.extension() { + Some(p) + } else { + None + } + }) + .collect() +}