CLI works!

This commit is contained in:
Pyry Kontio 2023-02-03 03:43:23 +09:00
parent 280787b7db
commit 40a27dc355
No known key found for this signature in database
8 changed files with 215 additions and 175 deletions

7
Cargo.lock generated
View file

@ -51,6 +51,7 @@ version = "0.2.0"
dependencies = [
"miniserde",
"miniz_oxide",
"xmlparser",
]
[[package]]
@ -93,3 +94,9 @@ name = "unicode-ident"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
[[package]]
name = "xmlparser"
version = "0.13.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d25c75bf9ea12c4040a97f829154768bbbce366287e2dc044af160cd79a13fd"

View file

@ -8,3 +8,4 @@ license = "MIT"
[dependencies]
miniz_oxide = { version = "0.6", default-features = false }
miniserde = "0.1"
xmlparser = "0.13.5"

View file

@ -1,184 +1,92 @@
use std::{
io::{stdout, Write},
ops::Neg,
};
use monokakido::{Error, MonokakidoDict};
fn get_first_audio_id(page: &str) -> Result<&str, Error> {
if let Some((_, sound_tail)) = page.split_once("<sound>") {
if let Some((sound, _)) = sound_tail.split_once("</sound>") {
if let Some((head_id, _)) = sound.split_once(".aac") {
if let Some((_, id)) = head_id.split_once("href=\"") {
return Ok(id);
fn print_help() {
println!("Monokakido CLI. Supported subcommands:");
println!("list - lists all dictionaries installed in the standard path");
println!("list_items {{dict}} {{keyword}} - lists all items");
println!("list_audio {{dict}} {{keyword}} - lists all audio files");
println!("help - this help");
}
fn list_items(dict_name: &str, keyword: &str) -> Result<(), Error> {
let mut dict = MonokakidoDict::open(dict_name)?;
let (_, items) = dict.keys.search_exact(keyword)?;
for id in items {
let item = dict.pages.get_item(id)?;
println!("{item}");
}
Ok(())
}
fn list_pages(dict_name: &str, keyword: &str) -> Result<(), Error> {
let mut dict = MonokakidoDict::open(dict_name)?;
let (_, items) = dict.keys.search_exact(keyword)?;
for id in items {
let page = dict.pages.get_page(id)?;
println!("{page}");
}
Ok(())
}
fn list_audio(dict_name: &str, keyword: &str) -> Result<(), Error> {
let mut dict = MonokakidoDict::open(dict_name)?;
let (_, items) = dict.keys.search_exact(keyword)?;
for id in items {
for audio in dict.pages.get_item_audio(id)? {
if let Some((_, audio)) = audio?.split_once("href=\"") {
if let Some((id, _)) = audio.split_once('"') {
println!("{id}");
}
}
}
}
Err(Error::NotFound)
Ok(())
}
fn get_first_accent(page: &str) -> Result<i8, Error> {
if let Some((_, accent_tail)) = page.split_once("<accent_text>") {
if let Some((mut accent, _)) = accent_tail.split_once("</accent_text>") {
if let Some((a, _)) = accent.split_once("<sound>") {
accent = a;
}
if let Some(pos) = accent.find("<symbol_backslash></symbol_backslash>") {
let endpos = pos + "<symbol_backslash></symbol_backslash>".len();
let before = &accent[..pos];
let after = &accent[endpos..];
let is_mora = |&c: &char| {
(matches!(c, 'ぁ'..='ん' | 'ァ'..='ン' | 'ー')
&& !matches!(c, 'ゃ'..='ょ' | 'ャ'..='ョ'))
};
return Ok((before.chars().filter(is_mora).count() as i8));
}
if let Some(_) = accent.find("<symbol_macron>━</symbol_macron>") {
return Ok(0);
}
}
fn list_dicts() -> Result<(), Error> {
for dict in MonokakidoDict::list()? {
println!("{}", dict?);
}
Err(Error::NotFound)
}
fn get_accents(page: &str) -> Result<(i8, Option<i8>), Error> {
if let Some((first, tail)) = page.split_once("</accent>") {
return Ok((get_first_accent(first)?, get_first_accent(tail).ok()));
}
Err(Error::NotFound)
Ok(())
}
fn main() {
let Some(key) = std::env::args().nth(1) else {
return;
let mut args = std::env::args();
let res = match args.nth(1).as_deref() {
Some("list_audio") => {
if let (Some(dict_name), Some(keyword)) = (args.next(), args.next()) {
list_audio(&dict_name, &keyword)
} else {
Err(Error::InvalidArg)
}
}
Some("list_items") => {
if let (Some(dict_name), Some(keyword)) = (args.next(), args.next()) {
list_items(&dict_name, &keyword)
} else {
Err(Error::InvalidArg)
}
}
Some("list_pages") => {
if let (Some(dict_name), Some(keyword)) = (args.next(), args.next()) {
list_pages(&dict_name, &keyword)
} else {
Err(Error::InvalidArg)
}
}
Some("list") => list_dicts(),
None | Some("help") => {
print_help();
Ok(())
}
_ => Err(Error::InvalidArg),
};
for dict in MonokakidoDict::list().unwrap() {
dbg!(dict.unwrap());
if let Err(e) = res {
eprintln!("Error: {e:?}");
std::process::exit(1)
}
let mut dict = MonokakidoDict::open("NHKACCENT2").unwrap();
// let mut accents = vec![];
let result = dict.keys.search_exact(&key);
match result {
Ok((_, pages)) => {
for id in pages {
let page = dict.pages.get(id.page).unwrap();
println!("{page}");
/*
if let Ok(accent) = get_accents(page) {
accents.push(accent);
} */
/*
let id = get_first_audio_id(page).unwrap();
let audio = dict.audio.get(id).unwrap();
let mut stdout = stdout().lock();
stdout.write_all(audio).unwrap();
*/
}
}
Err(e) => {
println!("{:?}", e);
return;
}
}
/*
print!("{key}\t");
accents.sort();
accents.dedup();
if accents.is_empty() {
print!("N/A");
} else {
for (accent_main, accent_sub) in accents {
print!("{accent_main}");
if let Some(accent_sub) = accent_sub {
if accent_main != accent_sub {
print!("/{accent_sub}");
}
}
print!(" ");
}
} */
/*
let idx_list = [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
46200,
46201,
46202,
46203,
46204,
46205,
46206,
46207,
46208,
46209,
46210,
46211,
70000,
dict.keys.count() - 1,
];
println!("Index: length order");
for idx in idx_list {
let (word, pages) = dict.keys.get_index_len(idx).unwrap();
println!("\n{}", word);
for id in pages {
println!("{}", dict.pages.get(id).unwrap());
}
}
println!("Index: prefix order");
for idx in idx_list {
let (word, pages) = dict.keys.get_index_prefix(idx).unwrap();
println!("\n{}", word);
for id in pages {
println!("{}", dict.pages.get(id).unwrap());
}
}
println!("Index: suffix order");
for idx in idx_list {
let (word, pages) = dict.keys.get_index_suffix(idx).unwrap();
println!("\n{}", word);
for id in pages {
println!("{}", dict.pages.get(id).unwrap());
}
}
println!("Index: ?");
for idx in idx_list {
let (word, pages) = dict.keys.get_index_d(idx).unwrap();
println!("\n{}", word);
for id in pages {
println!("{}", dict.pages.get(id).unwrap());
}
}
*/
let mut audio_rsc = dict.audio.unwrap();
let audio = audio_rsc.get("jee").unwrap();
let mut stdout = stdout().lock();
stdout.write_all(audio).unwrap();
}

View file

@ -18,7 +18,7 @@ fn write_index(dict: &MonokakidoDict, index: &KeyIndex, tsv_fname: &str) -> Resu
for PageItemId { page, item } in pages {
write!(&mut index_tsv, "\t{page:0>10}")?;
if item > 0 {
write!(&mut index_tsv, ":{item:0>3}")?;
write!(&mut index_tsv, "-{item:0>3}")?;
}
}
index_tsv.write_all(b"\n")?;
@ -37,7 +37,7 @@ fn explode() -> Result<(), Error> {
create_dir_all(&pages_dir)?;
let mut path = String::from(&pages_dir);
for idx in dict.pages.idx_iter()? {
let (id, page) = dict.pages.get_by_idx(idx)?;
let (id, page) = dict.pages.page_by_idx(idx)?;
write!(&mut path, "{id:0>10}.xml")?;
let mut file = File::create(&path)?;
path.truncate(pages_dir.len());

View file

@ -24,6 +24,7 @@ pub enum Error {
InvalidArg,
FmtError,
IndexDoesntExist,
XmlError,
}
impl From<IoError> for Error {
@ -43,3 +44,9 @@ impl From<FmtError> for Error {
Error::FmtError
}
}
impl From<xmlparser::Error> for Error {
fn from(_: xmlparser::Error) -> Self {
Error::XmlError
}
}

View file

@ -342,6 +342,7 @@ impl<'a> Iterator for PageIter<'a> {
}
}
#[derive(Debug, Clone, Copy)]
pub struct PageItemId {
pub page: u32,
pub item: u8,

View file

@ -10,4 +10,4 @@ pub use audio::Audio;
pub use dict::MonokakidoDict;
pub use error::Error;
pub use key::{KeyIndex, Keys, PageItemId};
pub use pages::Pages;
pub use pages::{Pages, XmlParser};

View file

@ -1,6 +1,6 @@
use std::{ops::Range, path::PathBuf};
use crate::{dict::Paths, resource::Rsc, Error};
use crate::{dict::Paths, resource::Rsc, Error, PageItemId};
const RSC_NAME: &str = "contents";
@ -9,6 +9,75 @@ pub struct Pages {
res: Option<Rsc>,
}
pub struct XmlParser<'a> {
xml: &'a str,
tokens: xmlparser::Tokenizer<'a>,
target_level: Option<usize>,
tag_stack: Vec<(&'a str, usize)>,
}
impl<'a> XmlParser<'a> {
pub fn from(xml: &'a str) -> Self {
Self {
xml,
tokens: xmlparser::Tokenizer::from(xml),
target_level: None,
tag_stack: Vec::new(),
}
}
pub fn next_fragment_by(
&mut self,
elem_cond: impl Fn(&str) -> bool,
attr_cond: impl Fn(&str, &str) -> bool,
) -> Result<Option<&'a str>, Error> {
use xmlparser::{
ElementEnd::{Close, Empty},
Token::{Attribute, ElementEnd, ElementStart},
};
for token in &mut self.tokens {
let mut popped = None;
let token = token?;
match token {
ElementStart { local, span, .. } => {
self.tag_stack.push((local.as_str(), span.start()));
if elem_cond(&local) && self.target_level.is_none() {
self.target_level = Some(self.tag_stack.len());
}
}
Attribute { local, value, .. } => {
if attr_cond(&local, &value) && self.target_level.is_none() {
self.target_level = Some(self.tag_stack.len());
}
}
ElementEnd {
end: Close(_, tag),
span,
} => {
if Some(&*tag) == self.tag_stack.last().map(|(t, _)| *t) {
popped = self.tag_stack.pop().map(|(_, start)| (start, span.end()));
} else {
return Err(Error::XmlError);
}
}
ElementEnd { end: Empty, span } => {
popped = self.tag_stack.pop().map(|(_, start)| (start, span.end()));
}
_ => continue,
}
if let Some((start, end)) = popped {
if Some(self.tag_stack.len()) < self.target_level {
self.target_level = None;
return Ok(Some(&self.xml[start..end]));
}
}
}
// No body fragment or item fragment with suitable ID found
Ok(None)
}
}
impl Pages {
pub fn new(paths: &Paths) -> Result<Self, Error> {
Ok(Pages {
@ -24,13 +93,43 @@ impl Pages {
Ok(())
}
pub fn get(&mut self, id: u32) -> Result<&str, Error> {
pub fn get_page(&mut self, id: PageItemId) -> Result<&str, Error> {
self.init()?;
let Some(res) = self.res.as_mut() else { unreachable!() };
std::str::from_utf8(res.get(id)?).map_err(|_| Error::Utf8Error)
let xml = std::str::from_utf8(res.get(id.page)?).map_err(|_| Error::Utf8Error)?;
Ok(xml)
}
pub fn get_by_idx(&mut self, idx: usize) -> Result<(u32, &str), Error> {
pub fn get_item(&mut self, id: PageItemId) -> Result<&str, Error> {
let xml = self.get_page(id)?;
let mut parser = XmlParser::from(xml);
if id.item == 0 {
parser.next_fragment_by(|tag| tag == "body", |_, _| false)
} else {
parser.next_fragment_by(
|_| false,
|name, value| {
if name == "id" {
if let Some((page, item)) = value.split_once('-') {
if page.parse() == Ok(id.page) && item.parse() == Ok(id.item) {
return true;
}
}
}
false
},
)
}?
.ok_or(Error::XmlError)
}
pub fn get_item_audio(&mut self, id: PageItemId) -> Result<AudioIter, Error> {
let xml = self.get_item(id)?;
let parser = XmlParser::from(xml);
Ok(AudioIter { parser })
}
pub fn page_by_idx(&mut self, idx: usize) -> Result<(u32, &str), Error> {
self.init()?;
let Some(res) = self.res.as_mut() else { unreachable!() };
let (id, page) = res.get_by_idx(idx)?;
@ -43,3 +142,20 @@ impl Pages {
Ok(0..res.len())
}
}
pub struct AudioIter<'a> {
parser: XmlParser<'a>,
}
impl<'a> Iterator for AudioIter<'a> {
type Item = Result<&'a str, Error>;
fn next(&mut self) -> Option<Self::Item> {
self.parser
.next_fragment_by(
|_| false,
|name, value| name == "href" && value.ends_with(".aac"),
)
.transpose()
}
}