CLI works!
This commit is contained in:
parent
280787b7db
commit
40a27dc355
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -51,6 +51,7 @@ version = "0.2.0"
|
|||
dependencies = [
|
||||
"miniserde",
|
||||
"miniz_oxide",
|
||||
"xmlparser",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -93,3 +94,9 @@ name = "unicode-ident"
|
|||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
|
||||
|
||||
[[package]]
|
||||
name = "xmlparser"
|
||||
version = "0.13.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd"
|
||||
|
|
|
@ -8,3 +8,4 @@ license = "MIT"
|
|||
[dependencies]
|
||||
miniz_oxide = { version = "0.6", default-features = false }
|
||||
miniserde = "0.1"
|
||||
xmlparser = "0.13.5"
|
||||
|
|
246
src/bin/cli.rs
246
src/bin/cli.rs
|
@ -1,184 +1,92 @@
|
|||
use std::{
|
||||
io::{stdout, Write},
|
||||
ops::Neg,
|
||||
};
|
||||
|
||||
use monokakido::{Error, MonokakidoDict};
|
||||
|
||||
fn get_first_audio_id(page: &str) -> Result<&str, Error> {
|
||||
if let Some((_, sound_tail)) = page.split_once("<sound>") {
|
||||
if let Some((sound, _)) = sound_tail.split_once("</sound>") {
|
||||
if let Some((head_id, _)) = sound.split_once(".aac") {
|
||||
if let Some((_, id)) = head_id.split_once("href=\"") {
|
||||
return Ok(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(Error::NotFound)
|
||||
fn print_help() {
|
||||
println!("Monokakido CLI. Supported subcommands:");
|
||||
println!("list - lists all dictionaries installed in the standard path");
|
||||
println!("list_items {{dict}} {{keyword}} - lists all items");
|
||||
println!("list_audio {{dict}} {{keyword}} - lists all audio files");
|
||||
println!("help - this help");
|
||||
}
|
||||
|
||||
fn get_first_accent(page: &str) -> Result<i8, Error> {
|
||||
if let Some((_, accent_tail)) = page.split_once("<accent_text>") {
|
||||
if let Some((mut accent, _)) = accent_tail.split_once("</accent_text>") {
|
||||
if let Some((a, _)) = accent.split_once("<sound>") {
|
||||
accent = a;
|
||||
fn list_items(dict_name: &str, keyword: &str) -> Result<(), Error> {
|
||||
let mut dict = MonokakidoDict::open(dict_name)?;
|
||||
let (_, items) = dict.keys.search_exact(keyword)?;
|
||||
|
||||
for id in items {
|
||||
let item = dict.pages.get_item(id)?;
|
||||
println!("{item}");
|
||||
}
|
||||
if let Some(pos) = accent.find("<symbol_backslash>\</symbol_backslash>") {
|
||||
let endpos = pos + "<symbol_backslash>\</symbol_backslash>".len();
|
||||
let before = &accent[..pos];
|
||||
let after = &accent[endpos..];
|
||||
let is_mora = |&c: &char| {
|
||||
(matches!(c, 'ぁ'..='ん' | 'ァ'..='ン' | 'ー')
|
||||
&& !matches!(c, 'ゃ'..='ょ' | 'ャ'..='ョ'))
|
||||
};
|
||||
return Ok((before.chars().filter(is_mora).count() as i8));
|
||||
}
|
||||
if let Some(_) = accent.find("<symbol_macron>━</symbol_macron>") {
|
||||
return Ok(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(Error::NotFound)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_accents(page: &str) -> Result<(i8, Option<i8>), Error> {
|
||||
if let Some((first, tail)) = page.split_once("</accent>") {
|
||||
return Ok((get_first_accent(first)?, get_first_accent(tail).ok()));
|
||||
fn list_pages(dict_name: &str, keyword: &str) -> Result<(), Error> {
|
||||
let mut dict = MonokakidoDict::open(dict_name)?;
|
||||
let (_, items) = dict.keys.search_exact(keyword)?;
|
||||
|
||||
for id in items {
|
||||
let page = dict.pages.get_page(id)?;
|
||||
println!("{page}");
|
||||
}
|
||||
Err(Error::NotFound)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn list_audio(dict_name: &str, keyword: &str) -> Result<(), Error> {
|
||||
let mut dict = MonokakidoDict::open(dict_name)?;
|
||||
let (_, items) = dict.keys.search_exact(keyword)?;
|
||||
|
||||
for id in items {
|
||||
for audio in dict.pages.get_item_audio(id)? {
|
||||
if let Some((_, audio)) = audio?.split_once("href=\"") {
|
||||
if let Some((id, _)) = audio.split_once('"') {
|
||||
println!("{id}");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn list_dicts() -> Result<(), Error> {
|
||||
for dict in MonokakidoDict::list()? {
|
||||
println!("{}", dict?);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let Some(key) = std::env::args().nth(1) else {
|
||||
return;
|
||||
let mut args = std::env::args();
|
||||
let res = match args.nth(1).as_deref() {
|
||||
Some("list_audio") => {
|
||||
if let (Some(dict_name), Some(keyword)) = (args.next(), args.next()) {
|
||||
list_audio(&dict_name, &keyword)
|
||||
} else {
|
||||
Err(Error::InvalidArg)
|
||||
}
|
||||
}
|
||||
Some("list_items") => {
|
||||
if let (Some(dict_name), Some(keyword)) = (args.next(), args.next()) {
|
||||
list_items(&dict_name, &keyword)
|
||||
} else {
|
||||
Err(Error::InvalidArg)
|
||||
}
|
||||
}
|
||||
Some("list_pages") => {
|
||||
if let (Some(dict_name), Some(keyword)) = (args.next(), args.next()) {
|
||||
list_pages(&dict_name, &keyword)
|
||||
} else {
|
||||
Err(Error::InvalidArg)
|
||||
}
|
||||
}
|
||||
Some("list") => list_dicts(),
|
||||
None | Some("help") => {
|
||||
print_help();
|
||||
Ok(())
|
||||
}
|
||||
_ => Err(Error::InvalidArg),
|
||||
};
|
||||
|
||||
for dict in MonokakidoDict::list().unwrap() {
|
||||
dbg!(dict.unwrap());
|
||||
if let Err(e) = res {
|
||||
eprintln!("Error: {e:?}");
|
||||
std::process::exit(1)
|
||||
}
|
||||
|
||||
let mut dict = MonokakidoDict::open("NHKACCENT2").unwrap();
|
||||
// let mut accents = vec![];
|
||||
let result = dict.keys.search_exact(&key);
|
||||
|
||||
match result {
|
||||
Ok((_, pages)) => {
|
||||
for id in pages {
|
||||
let page = dict.pages.get(id.page).unwrap();
|
||||
println!("{page}");
|
||||
/*
|
||||
if let Ok(accent) = get_accents(page) {
|
||||
accents.push(accent);
|
||||
} */
|
||||
/*
|
||||
let id = get_first_audio_id(page).unwrap();
|
||||
let audio = dict.audio.get(id).unwrap();
|
||||
let mut stdout = stdout().lock();
|
||||
stdout.write_all(audio).unwrap();
|
||||
*/
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
println!("{:?}", e);
|
||||
return;
|
||||
}
|
||||
}
|
||||
/*
|
||||
print!("{key}\t");
|
||||
accents.sort();
|
||||
accents.dedup();
|
||||
if accents.is_empty() {
|
||||
print!("N/A");
|
||||
} else {
|
||||
for (accent_main, accent_sub) in accents {
|
||||
print!("{accent_main}");
|
||||
if let Some(accent_sub) = accent_sub {
|
||||
if accent_main != accent_sub {
|
||||
print!("/{accent_sub}");
|
||||
}
|
||||
}
|
||||
print!(" ");
|
||||
}
|
||||
} */
|
||||
|
||||
/*
|
||||
let idx_list = [
|
||||
0,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
4,
|
||||
5,
|
||||
6,
|
||||
7,
|
||||
8,
|
||||
9,
|
||||
10,
|
||||
11,
|
||||
12,
|
||||
13,
|
||||
14,
|
||||
15,
|
||||
16,
|
||||
17,
|
||||
18,
|
||||
19,
|
||||
20,
|
||||
46200,
|
||||
46201,
|
||||
46202,
|
||||
46203,
|
||||
46204,
|
||||
46205,
|
||||
46206,
|
||||
46207,
|
||||
46208,
|
||||
46209,
|
||||
46210,
|
||||
46211,
|
||||
70000,
|
||||
dict.keys.count() - 1,
|
||||
];
|
||||
|
||||
println!("Index: length order");
|
||||
for idx in idx_list {
|
||||
let (word, pages) = dict.keys.get_index_len(idx).unwrap();
|
||||
println!("\n{}", word);
|
||||
for id in pages {
|
||||
println!("{}", dict.pages.get(id).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
println!("Index: prefix order");
|
||||
for idx in idx_list {
|
||||
let (word, pages) = dict.keys.get_index_prefix(idx).unwrap();
|
||||
println!("\n{}", word);
|
||||
for id in pages {
|
||||
println!("{}", dict.pages.get(id).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
println!("Index: suffix order");
|
||||
for idx in idx_list {
|
||||
let (word, pages) = dict.keys.get_index_suffix(idx).unwrap();
|
||||
println!("\n{}", word);
|
||||
for id in pages {
|
||||
println!("{}", dict.pages.get(id).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
println!("Index: ?");
|
||||
for idx in idx_list {
|
||||
let (word, pages) = dict.keys.get_index_d(idx).unwrap();
|
||||
println!("\n{}", word);
|
||||
for id in pages {
|
||||
println!("{}", dict.pages.get(id).unwrap());
|
||||
}
|
||||
}
|
||||
*/
|
||||
let mut audio_rsc = dict.audio.unwrap();
|
||||
let audio = audio_rsc.get("jee").unwrap();
|
||||
let mut stdout = stdout().lock();
|
||||
stdout.write_all(audio).unwrap();
|
||||
}
|
||||
|
|
|
@ -18,7 +18,7 @@ fn write_index(dict: &MonokakidoDict, index: &KeyIndex, tsv_fname: &str) -> Resu
|
|||
for PageItemId { page, item } in pages {
|
||||
write!(&mut index_tsv, "\t{page:0>10}")?;
|
||||
if item > 0 {
|
||||
write!(&mut index_tsv, ":{item:0>3}")?;
|
||||
write!(&mut index_tsv, "-{item:0>3}")?;
|
||||
}
|
||||
}
|
||||
index_tsv.write_all(b"\n")?;
|
||||
|
@ -37,7 +37,7 @@ fn explode() -> Result<(), Error> {
|
|||
create_dir_all(&pages_dir)?;
|
||||
let mut path = String::from(&pages_dir);
|
||||
for idx in dict.pages.idx_iter()? {
|
||||
let (id, page) = dict.pages.get_by_idx(idx)?;
|
||||
let (id, page) = dict.pages.page_by_idx(idx)?;
|
||||
write!(&mut path, "{id:0>10}.xml")?;
|
||||
let mut file = File::create(&path)?;
|
||||
path.truncate(pages_dir.len());
|
||||
|
|
|
@ -24,6 +24,7 @@ pub enum Error {
|
|||
InvalidArg,
|
||||
FmtError,
|
||||
IndexDoesntExist,
|
||||
XmlError,
|
||||
}
|
||||
|
||||
impl From<IoError> for Error {
|
||||
|
@ -43,3 +44,9 @@ impl From<FmtError> for Error {
|
|||
Error::FmtError
|
||||
}
|
||||
}
|
||||
|
||||
impl From<xmlparser::Error> for Error {
|
||||
fn from(_: xmlparser::Error) -> Self {
|
||||
Error::XmlError
|
||||
}
|
||||
}
|
||||
|
|
|
@ -342,6 +342,7 @@ impl<'a> Iterator for PageIter<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct PageItemId {
|
||||
pub page: u32,
|
||||
pub item: u8,
|
||||
|
|
|
@ -10,4 +10,4 @@ pub use audio::Audio;
|
|||
pub use dict::MonokakidoDict;
|
||||
pub use error::Error;
|
||||
pub use key::{KeyIndex, Keys, PageItemId};
|
||||
pub use pages::Pages;
|
||||
pub use pages::{Pages, XmlParser};
|
||||
|
|
124
src/pages.rs
124
src/pages.rs
|
@ -1,6 +1,6 @@
|
|||
use std::{ops::Range, path::PathBuf};
|
||||
|
||||
use crate::{dict::Paths, resource::Rsc, Error};
|
||||
use crate::{dict::Paths, resource::Rsc, Error, PageItemId};
|
||||
|
||||
const RSC_NAME: &str = "contents";
|
||||
|
||||
|
@ -9,6 +9,75 @@ pub struct Pages {
|
|||
res: Option<Rsc>,
|
||||
}
|
||||
|
||||
pub struct XmlParser<'a> {
|
||||
xml: &'a str,
|
||||
tokens: xmlparser::Tokenizer<'a>,
|
||||
target_level: Option<usize>,
|
||||
tag_stack: Vec<(&'a str, usize)>,
|
||||
}
|
||||
|
||||
impl<'a> XmlParser<'a> {
|
||||
pub fn from(xml: &'a str) -> Self {
|
||||
Self {
|
||||
xml,
|
||||
tokens: xmlparser::Tokenizer::from(xml),
|
||||
target_level: None,
|
||||
tag_stack: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_fragment_by(
|
||||
&mut self,
|
||||
elem_cond: impl Fn(&str) -> bool,
|
||||
attr_cond: impl Fn(&str, &str) -> bool,
|
||||
) -> Result<Option<&'a str>, Error> {
|
||||
use xmlparser::{
|
||||
ElementEnd::{Close, Empty},
|
||||
Token::{Attribute, ElementEnd, ElementStart},
|
||||
};
|
||||
|
||||
for token in &mut self.tokens {
|
||||
let mut popped = None;
|
||||
let token = token?;
|
||||
match token {
|
||||
ElementStart { local, span, .. } => {
|
||||
self.tag_stack.push((local.as_str(), span.start()));
|
||||
if elem_cond(&local) && self.target_level.is_none() {
|
||||
self.target_level = Some(self.tag_stack.len());
|
||||
}
|
||||
}
|
||||
Attribute { local, value, .. } => {
|
||||
if attr_cond(&local, &value) && self.target_level.is_none() {
|
||||
self.target_level = Some(self.tag_stack.len());
|
||||
}
|
||||
}
|
||||
ElementEnd {
|
||||
end: Close(_, tag),
|
||||
span,
|
||||
} => {
|
||||
if Some(&*tag) == self.tag_stack.last().map(|(t, _)| *t) {
|
||||
popped = self.tag_stack.pop().map(|(_, start)| (start, span.end()));
|
||||
} else {
|
||||
return Err(Error::XmlError);
|
||||
}
|
||||
}
|
||||
ElementEnd { end: Empty, span } => {
|
||||
popped = self.tag_stack.pop().map(|(_, start)| (start, span.end()));
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
if let Some((start, end)) = popped {
|
||||
if Some(self.tag_stack.len()) < self.target_level {
|
||||
self.target_level = None;
|
||||
return Ok(Some(&self.xml[start..end]));
|
||||
}
|
||||
}
|
||||
}
|
||||
// No body fragment or item fragment with suitable ID found
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl Pages {
|
||||
pub fn new(paths: &Paths) -> Result<Self, Error> {
|
||||
Ok(Pages {
|
||||
|
@ -24,13 +93,43 @@ impl Pages {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn get(&mut self, id: u32) -> Result<&str, Error> {
|
||||
pub fn get_page(&mut self, id: PageItemId) -> Result<&str, Error> {
|
||||
self.init()?;
|
||||
let Some(res) = self.res.as_mut() else { unreachable!() };
|
||||
std::str::from_utf8(res.get(id)?).map_err(|_| Error::Utf8Error)
|
||||
let xml = std::str::from_utf8(res.get(id.page)?).map_err(|_| Error::Utf8Error)?;
|
||||
Ok(xml)
|
||||
}
|
||||
|
||||
pub fn get_by_idx(&mut self, idx: usize) -> Result<(u32, &str), Error> {
|
||||
pub fn get_item(&mut self, id: PageItemId) -> Result<&str, Error> {
|
||||
let xml = self.get_page(id)?;
|
||||
let mut parser = XmlParser::from(xml);
|
||||
if id.item == 0 {
|
||||
parser.next_fragment_by(|tag| tag == "body", |_, _| false)
|
||||
} else {
|
||||
parser.next_fragment_by(
|
||||
|_| false,
|
||||
|name, value| {
|
||||
if name == "id" {
|
||||
if let Some((page, item)) = value.split_once('-') {
|
||||
if page.parse() == Ok(id.page) && item.parse() == Ok(id.item) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
},
|
||||
)
|
||||
}?
|
||||
.ok_or(Error::XmlError)
|
||||
}
|
||||
|
||||
pub fn get_item_audio(&mut self, id: PageItemId) -> Result<AudioIter, Error> {
|
||||
let xml = self.get_item(id)?;
|
||||
let parser = XmlParser::from(xml);
|
||||
Ok(AudioIter { parser })
|
||||
}
|
||||
|
||||
pub fn page_by_idx(&mut self, idx: usize) -> Result<(u32, &str), Error> {
|
||||
self.init()?;
|
||||
let Some(res) = self.res.as_mut() else { unreachable!() };
|
||||
let (id, page) = res.get_by_idx(idx)?;
|
||||
|
@ -43,3 +142,20 @@ impl Pages {
|
|||
Ok(0..res.len())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AudioIter<'a> {
|
||||
parser: XmlParser<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for AudioIter<'a> {
|
||||
type Item = Result<&'a str, Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.parser
|
||||
.next_fragment_by(
|
||||
|_| false,
|
||||
|name, value| name == "href" && value.ends_with(".aac"),
|
||||
)
|
||||
.transpose()
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue