diff --git a/Cargo.lock b/Cargo.lock
index a695f01..ec5c4ba 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -51,6 +51,7 @@ version = "0.2.0"
dependencies = [
"miniserde",
"miniz_oxide",
+ "xmlparser",
]
[[package]]
@@ -93,3 +94,9 @@ name = "unicode-ident"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
+
+[[package]]
+name = "xmlparser"
+version = "0.13.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd"
diff --git a/Cargo.toml b/Cargo.toml
index 68388ea..5528b90 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,3 +8,4 @@ license = "MIT"
[dependencies]
miniz_oxide = { version = "0.6", default-features = false }
miniserde = "0.1"
+xmlparser = "0.13.5"
diff --git a/src/bin/cli.rs b/src/bin/cli.rs
index 4e9a12b..7920c05 100644
--- a/src/bin/cli.rs
+++ b/src/bin/cli.rs
@@ -1,184 +1,92 @@
-use std::{
- io::{stdout, Write},
- ops::Neg,
-};
-
use monokakido::{Error, MonokakidoDict};
-fn get_first_audio_id(page: &str) -> Result<&str, Error> {
- if let Some((_, sound_tail)) = page.split_once("") {
- if let Some((sound, _)) = sound_tail.split_once("") {
- if let Some((head_id, _)) = sound.split_once(".aac") {
- if let Some((_, id)) = head_id.split_once("href=\"") {
- return Ok(id);
+fn print_help() {
+ println!("Monokakido CLI. Supported subcommands:");
+ println!("list - lists all dictionaries installed in the standard path");
+ println!("list_items {{dict}} {{keyword}} - lists all items");
+ println!("list_audio {{dict}} {{keyword}} - lists all audio files");
+ println!("help - this help");
+}
+
+fn list_items(dict_name: &str, keyword: &str) -> Result<(), Error> {
+ let mut dict = MonokakidoDict::open(dict_name)?;
+ let (_, items) = dict.keys.search_exact(keyword)?;
+
+ for id in items {
+ let item = dict.pages.get_item(id)?;
+ println!("{item}");
+ }
+ Ok(())
+}
+
+fn list_pages(dict_name: &str, keyword: &str) -> Result<(), Error> {
+ let mut dict = MonokakidoDict::open(dict_name)?;
+ let (_, items) = dict.keys.search_exact(keyword)?;
+
+ for id in items {
+ let page = dict.pages.get_page(id)?;
+ println!("{page}");
+ }
+ Ok(())
+}
+
+fn list_audio(dict_name: &str, keyword: &str) -> Result<(), Error> {
+ let mut dict = MonokakidoDict::open(dict_name)?;
+ let (_, items) = dict.keys.search_exact(keyword)?;
+
+ for id in items {
+ for audio in dict.pages.get_item_audio(id)? {
+ if let Some((_, audio)) = audio?.split_once("href=\"") {
+ if let Some((id, _)) = audio.split_once('"') {
+ println!("{id}");
}
}
}
}
- Err(Error::NotFound)
+ Ok(())
}
-fn get_first_accent(page: &str) -> Result {
- if let Some((_, accent_tail)) = page.split_once("") {
- if let Some((mut accent, _)) = accent_tail.split_once("") {
- if let Some((a, _)) = accent.split_once("") {
- accent = a;
- }
- if let Some(pos) = accent.find("\") {
- let endpos = pos + "\".len();
- let before = &accent[..pos];
- let after = &accent[endpos..];
- let is_mora = |&c: &char| {
- (matches!(c, 'ぁ'..='ん' | 'ァ'..='ン' | 'ー')
- && !matches!(c, 'ゃ'..='ょ' | 'ャ'..='ョ'))
- };
- return Ok((before.chars().filter(is_mora).count() as i8));
- }
- if let Some(_) = accent.find("━") {
- return Ok(0);
- }
- }
+fn list_dicts() -> Result<(), Error> {
+ for dict in MonokakidoDict::list()? {
+ println!("{}", dict?);
}
- Err(Error::NotFound)
-}
-
-fn get_accents(page: &str) -> Result<(i8, Option), Error> {
- if let Some((first, tail)) = page.split_once("") {
- return Ok((get_first_accent(first)?, get_first_accent(tail).ok()));
- }
- Err(Error::NotFound)
+ Ok(())
}
fn main() {
- let Some(key) = std::env::args().nth(1) else {
- return;
+ let mut args = std::env::args();
+ let res = match args.nth(1).as_deref() {
+ Some("list_audio") => {
+ if let (Some(dict_name), Some(keyword)) = (args.next(), args.next()) {
+ list_audio(&dict_name, &keyword)
+ } else {
+ Err(Error::InvalidArg)
+ }
+ }
+ Some("list_items") => {
+ if let (Some(dict_name), Some(keyword)) = (args.next(), args.next()) {
+ list_items(&dict_name, &keyword)
+ } else {
+ Err(Error::InvalidArg)
+ }
+ }
+ Some("list_pages") => {
+ if let (Some(dict_name), Some(keyword)) = (args.next(), args.next()) {
+ list_pages(&dict_name, &keyword)
+ } else {
+ Err(Error::InvalidArg)
+ }
+ }
+ Some("list") => list_dicts(),
+ None | Some("help") => {
+ print_help();
+ Ok(())
+ }
+ _ => Err(Error::InvalidArg),
};
- for dict in MonokakidoDict::list().unwrap() {
- dbg!(dict.unwrap());
+ if let Err(e) = res {
+ eprintln!("Error: {e:?}");
+ std::process::exit(1)
}
-
- let mut dict = MonokakidoDict::open("NHKACCENT2").unwrap();
- // let mut accents = vec![];
- let result = dict.keys.search_exact(&key);
-
- match result {
- Ok((_, pages)) => {
- for id in pages {
- let page = dict.pages.get(id.page).unwrap();
- println!("{page}");
- /*
- if let Ok(accent) = get_accents(page) {
- accents.push(accent);
- } */
- /*
- let id = get_first_audio_id(page).unwrap();
- let audio = dict.audio.get(id).unwrap();
- let mut stdout = stdout().lock();
- stdout.write_all(audio).unwrap();
- */
- }
- }
- Err(e) => {
- println!("{:?}", e);
- return;
- }
- }
- /*
- print!("{key}\t");
- accents.sort();
- accents.dedup();
- if accents.is_empty() {
- print!("N/A");
- } else {
- for (accent_main, accent_sub) in accents {
- print!("{accent_main}");
- if let Some(accent_sub) = accent_sub {
- if accent_main != accent_sub {
- print!("/{accent_sub}");
- }
- }
- print!(" ");
- }
- } */
-
- /*
- let idx_list = [
- 0,
- 1,
- 2,
- 3,
- 4,
- 5,
- 6,
- 7,
- 8,
- 9,
- 10,
- 11,
- 12,
- 13,
- 14,
- 15,
- 16,
- 17,
- 18,
- 19,
- 20,
- 46200,
- 46201,
- 46202,
- 46203,
- 46204,
- 46205,
- 46206,
- 46207,
- 46208,
- 46209,
- 46210,
- 46211,
- 70000,
- dict.keys.count() - 1,
- ];
-
- println!("Index: length order");
- for idx in idx_list {
- let (word, pages) = dict.keys.get_index_len(idx).unwrap();
- println!("\n{}", word);
- for id in pages {
- println!("{}", dict.pages.get(id).unwrap());
- }
- }
-
- println!("Index: prefix order");
- for idx in idx_list {
- let (word, pages) = dict.keys.get_index_prefix(idx).unwrap();
- println!("\n{}", word);
- for id in pages {
- println!("{}", dict.pages.get(id).unwrap());
- }
- }
-
- println!("Index: suffix order");
- for idx in idx_list {
- let (word, pages) = dict.keys.get_index_suffix(idx).unwrap();
- println!("\n{}", word);
- for id in pages {
- println!("{}", dict.pages.get(id).unwrap());
- }
- }
-
- println!("Index: ?");
- for idx in idx_list {
- let (word, pages) = dict.keys.get_index_d(idx).unwrap();
- println!("\n{}", word);
- for id in pages {
- println!("{}", dict.pages.get(id).unwrap());
- }
- }
- */
- let mut audio_rsc = dict.audio.unwrap();
- let audio = audio_rsc.get("jee").unwrap();
- let mut stdout = stdout().lock();
- stdout.write_all(audio).unwrap();
}
diff --git a/src/bin/explode.rs b/src/bin/explode.rs
index 6fb2706..ea3cd7d 100644
--- a/src/bin/explode.rs
+++ b/src/bin/explode.rs
@@ -18,7 +18,7 @@ fn write_index(dict: &MonokakidoDict, index: &KeyIndex, tsv_fname: &str) -> Resu
for PageItemId { page, item } in pages {
write!(&mut index_tsv, "\t{page:0>10}")?;
if item > 0 {
- write!(&mut index_tsv, ":{item:0>3}")?;
+ write!(&mut index_tsv, "-{item:0>3}")?;
}
}
index_tsv.write_all(b"\n")?;
@@ -37,7 +37,7 @@ fn explode() -> Result<(), Error> {
create_dir_all(&pages_dir)?;
let mut path = String::from(&pages_dir);
for idx in dict.pages.idx_iter()? {
- let (id, page) = dict.pages.get_by_idx(idx)?;
+ let (id, page) = dict.pages.page_by_idx(idx)?;
write!(&mut path, "{id:0>10}.xml")?;
let mut file = File::create(&path)?;
path.truncate(pages_dir.len());
diff --git a/src/error.rs b/src/error.rs
index cede223..61809cf 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -24,6 +24,7 @@ pub enum Error {
InvalidArg,
FmtError,
IndexDoesntExist,
+ XmlError,
}
impl From for Error {
@@ -43,3 +44,9 @@ impl From for Error {
Error::FmtError
}
}
+
+impl From for Error {
+ fn from(_: xmlparser::Error) -> Self {
+ Error::XmlError
+ }
+}
diff --git a/src/key.rs b/src/key.rs
index 2909e4b..4aaa585 100644
--- a/src/key.rs
+++ b/src/key.rs
@@ -342,6 +342,7 @@ impl<'a> Iterator for PageIter<'a> {
}
}
+#[derive(Debug, Clone, Copy)]
pub struct PageItemId {
pub page: u32,
pub item: u8,
diff --git a/src/lib.rs b/src/lib.rs
index 6afd41c..f81d773 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -10,4 +10,4 @@ pub use audio::Audio;
pub use dict::MonokakidoDict;
pub use error::Error;
pub use key::{KeyIndex, Keys, PageItemId};
-pub use pages::Pages;
+pub use pages::{Pages, XmlParser};
diff --git a/src/pages.rs b/src/pages.rs
index 2ebe60c..0ca6999 100644
--- a/src/pages.rs
+++ b/src/pages.rs
@@ -1,6 +1,6 @@
use std::{ops::Range, path::PathBuf};
-use crate::{dict::Paths, resource::Rsc, Error};
+use crate::{dict::Paths, resource::Rsc, Error, PageItemId};
const RSC_NAME: &str = "contents";
@@ -9,6 +9,75 @@ pub struct Pages {
res: Option,
}
+pub struct XmlParser<'a> {
+ xml: &'a str,
+ tokens: xmlparser::Tokenizer<'a>,
+ target_level: Option,
+ tag_stack: Vec<(&'a str, usize)>,
+}
+
+impl<'a> XmlParser<'a> {
+ pub fn from(xml: &'a str) -> Self {
+ Self {
+ xml,
+ tokens: xmlparser::Tokenizer::from(xml),
+ target_level: None,
+ tag_stack: Vec::new(),
+ }
+ }
+
+ pub fn next_fragment_by(
+ &mut self,
+ elem_cond: impl Fn(&str) -> bool,
+ attr_cond: impl Fn(&str, &str) -> bool,
+ ) -> Result