Compare commits
1 commit
Author | SHA1 | Date | |
---|---|---|---|
7d503e8aba |
17
README.md
17
README.md
|
@ -1,14 +1,6 @@
|
|||
# monokakido.rs
|
||||
A Rust library for parsing and interpreting the [Monokakido](https://www.monokakido.jp/en/dictionaries/app/) dictionary format.
|
||||
Aiming for full test coverage and efficient implementation with minimal dependencies.
|
||||
|
||||
## Notice
|
||||
|
||||
This library started as a personal project driven by curiosity.
|
||||
It is ABSOLUTELY NOT inteded to support piracy;
|
||||
I strongly condemn making unauthorized copies of Monokakido's dictionaries,
|
||||
and take no part or responsibility in that kind of activity.
|
||||
Please buy your own dictionaries directly from Monokakido to show your love and support.
|
||||
A Rust library for parsing and interpreting the [Monokakido](https://www.monokakido.jp/en/dictionaries/app/) dictionary format. Aiming for full test coverage and efficient implementation with minimal dependencies.
|
||||
|
||||
## TODO:
|
||||
- Add headline support
|
||||
|
@ -47,5 +39,12 @@ Please buy your own dictionaries directly from Monokakido to show your love and
|
|||
## Planned to support:
|
||||
- WISDOM3
|
||||
- SMK8
|
||||
- RHEJ
|
||||
- OLT
|
||||
- OLEX
|
||||
- OLDAE
|
||||
- OCD
|
||||
- OALD10
|
||||
- NHKACCENT2
|
||||
- DAIJISEN2
|
||||
- CCCAD
|
||||
|
|
|
@ -33,6 +33,7 @@ fn explode() -> Result<(), Error> {
|
|||
|
||||
let pages_dir = out_dir(&dict) + "pages/";
|
||||
let audio_dir = out_dir(&dict) + "audio/";
|
||||
let graphics_dir = out_dir(&dict) + "graphics/";
|
||||
|
||||
create_dir_all(&pages_dir)?;
|
||||
let mut path = String::from(&pages_dir);
|
||||
|
@ -56,6 +57,18 @@ fn explode() -> Result<(), Error> {
|
|||
}
|
||||
}
|
||||
|
||||
if let Some(graphics) = &mut dict.graphics {
|
||||
create_dir_all(&graphics_dir)?;
|
||||
let mut path = String::from(&graphics_dir);
|
||||
for idx in graphics.idx_iter()? {
|
||||
let (id, graphics) = graphics.get_by_idx(idx)?;
|
||||
write!(&mut path, "{id}")?;
|
||||
let mut file = File::create(&path)?;
|
||||
path.truncate(graphics_dir.len());
|
||||
file.write_all(graphics)?;
|
||||
}
|
||||
}
|
||||
|
||||
write_index(&dict, &dict.keys.index_len, "index_len.tsv")?;
|
||||
write_index(&dict, &dict.keys.index_prefix, "index_prefix.tsv")?;
|
||||
write_index(&dict, &dict.keys.index_suffix, "index_suffix.tsv")?;
|
||||
|
|
19
src/dict.rs
19
src/dict.rs
|
@ -5,12 +5,13 @@ use std::{
|
|||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
use crate::{audio::Audio, key::Keys, pages::Pages, Error};
|
||||
use crate::{media::Media, key::Keys, pages::Pages, Error};
|
||||
|
||||
pub struct MonokakidoDict {
|
||||
paths: Paths,
|
||||
pub pages: Pages,
|
||||
pub audio: Option<Audio>,
|
||||
pub audio: Option<Media>,
|
||||
pub graphics: Option<Media>,
|
||||
pub keys: Keys,
|
||||
}
|
||||
|
||||
|
@ -60,7 +61,9 @@ impl Paths {
|
|||
}
|
||||
|
||||
pub(crate) fn key_path(&self) -> PathBuf {
|
||||
let mut pb = self.contents_path();
|
||||
let mut pb = PathBuf::from(&self.base_path);
|
||||
pb.push("Contents");
|
||||
pb.push(&self.contents_dir);
|
||||
pb.push("key");
|
||||
pb
|
||||
}
|
||||
|
@ -72,7 +75,9 @@ impl Paths {
|
|||
}
|
||||
|
||||
pub(crate) fn headline_path(&self) -> PathBuf {
|
||||
let mut pb = self.contents_path();
|
||||
let mut pb = PathBuf::from(&self.base_path);
|
||||
pb.push("Contents");
|
||||
pb.push(&self.contents_dir);
|
||||
pb.push("headline");
|
||||
pb
|
||||
}
|
||||
|
@ -131,13 +136,15 @@ impl MonokakidoDict {
|
|||
contents_dir: contents.dir,
|
||||
};
|
||||
let pages = Pages::new(&paths)?;
|
||||
let audio = Audio::new(&paths)?;
|
||||
let keys = Keys::new(paths.key_headword_path())?;
|
||||
let audio = Media::new(&paths)?;
|
||||
let graphics = Media::new(&paths)?;
|
||||
let keys = Keys::new(&paths)?;
|
||||
|
||||
Ok(MonokakidoDict {
|
||||
paths,
|
||||
pages,
|
||||
audio,
|
||||
graphics,
|
||||
keys,
|
||||
})
|
||||
}
|
||||
|
|
57
src/key.rs
57
src/key.rs
|
@ -4,12 +4,12 @@ use std::{
|
|||
fs::File,
|
||||
io::{Read, Seek},
|
||||
mem::size_of,
|
||||
path::Path,
|
||||
str::from_utf8,
|
||||
};
|
||||
|
||||
use crate::{
|
||||
abi_utils::{read_vec, TransmuteSafe, LE32},
|
||||
abi_utils::{TransmuteSafe, LE32, read_vec},
|
||||
dict::Paths,
|
||||
Error,
|
||||
};
|
||||
|
||||
|
@ -19,42 +19,27 @@ mod abi {
|
|||
#[repr(C)]
|
||||
#[derive(Debug, Clone, Copy, Default)]
|
||||
pub(super) struct FileHeader {
|
||||
pub ver: LE32,
|
||||
magic1: LE32,
|
||||
magic2: LE32,
|
||||
pub words_offset: LE32,
|
||||
pub idx_offset: LE32,
|
||||
// Jimmy-Z: no idea what this is
|
||||
// present on (not limited to) OALD10, SANKOKU8
|
||||
pub next_offset: LE32,
|
||||
magic3: LE32,
|
||||
magic4: LE32,
|
||||
magic5: LE32,
|
||||
magic6: LE32,
|
||||
magic7: LE32,
|
||||
}
|
||||
|
||||
impl FileHeader {
|
||||
pub(super) fn from(r: &mut impl Read) -> Result<Self, Error> {
|
||||
let mut h = FileHeader::default();
|
||||
r.read_exact(&mut h.as_bytes_mut()[..0x10])?;
|
||||
if h.ver.read() == 0x10000 && h.words_offset.read() == 0x10{
|
||||
} else if h.ver.read() == 0x20000 && h.words_offset.read() == 0x20 {
|
||||
r.read_exact(&mut h.as_bytes_mut()[0x10..])?;
|
||||
} else {
|
||||
return Err(Error::KeyFileHeaderValidate)
|
||||
}
|
||||
if h.ver.read() == 0x10000
|
||||
&& h.magic1.read() == 0
|
||||
&& h.words_offset.us() < h.idx_offset.us()
|
||||
pub(super) fn validate(&self) -> Result<(), Error> {
|
||||
if self.magic1.read() == 0x20000
|
||||
&& self.magic2.read() == 0
|
||||
&& self.magic3.read() == 0
|
||||
&& self.magic4.read() == 0
|
||||
&& self.magic5.read() == 0
|
||||
&& self.magic6.read() == 0
|
||||
&& self.words_offset.us() < self.idx_offset.us()
|
||||
{
|
||||
Ok(h)
|
||||
} else if h.ver.read() == 0x20000
|
||||
&& h.magic1.read() == 0
|
||||
&& h.magic5.read() == 0
|
||||
&& h.magic6.read() == 0
|
||||
&& h.magic7.read() == 0
|
||||
&& h.words_offset.us() < h.idx_offset.us()
|
||||
&& (h.next_offset.read() == 0 || h.idx_offset.us() < h.next_offset.us())
|
||||
{
|
||||
Ok(h)
|
||||
Ok(())
|
||||
} else {
|
||||
Err(Error::KeyFileHeaderValidate)
|
||||
}
|
||||
|
@ -134,20 +119,18 @@ impl Keys {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> Result<Keys, Error> {
|
||||
let mut file = File::open(path)?;
|
||||
pub fn new(paths: &Paths) -> Result<Keys, Error> {
|
||||
let mut file = File::open(paths.key_headword_path())?;
|
||||
let file_size = file.metadata()?.len() as usize;
|
||||
let hdr = FileHeader::from(&mut file)?;
|
||||
let mut hdr = FileHeader::default();
|
||||
file.read_exact(hdr.as_bytes_mut())?;
|
||||
hdr.validate()?;
|
||||
|
||||
file.seek(std::io::SeekFrom::Start(hdr.words_offset.read() as u64))?;
|
||||
let words = read_vec(&mut file, hdr.words_offset.us(), hdr.idx_offset.us())?;
|
||||
let Some(words) = words else { return Err(Error::InvalidIndex); };
|
||||
|
||||
let idx_end = (if hdr.next_offset.us() == 0 {
|
||||
file_size
|
||||
} else {
|
||||
hdr.next_offset.us()
|
||||
}) - hdr.idx_offset.us();
|
||||
let idx_end = file_size - hdr.idx_offset.us();
|
||||
let mut ihdr = IndexHeader::default();
|
||||
file.seek(std::io::SeekFrom::Start(hdr.idx_offset.read() as u64))?;
|
||||
file.read_exact(ihdr.as_bytes_mut())?;
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
mod abi_utils;
|
||||
mod audio;
|
||||
mod media;
|
||||
mod dict;
|
||||
mod error;
|
||||
mod key;
|
||||
mod pages;
|
||||
pub mod resource;
|
||||
mod resource;
|
||||
mod headline;
|
||||
|
||||
pub use audio::Audio;
|
||||
pub use media::Media;
|
||||
pub use dict::MonokakidoDict;
|
||||
pub use error::Error;
|
||||
pub use key::{KeyIndex, Keys, PageItemId};
|
||||
|
|
|
@ -8,22 +8,22 @@ use crate::{
|
|||
|
||||
const RSC_NAME: &str = "audio";
|
||||
|
||||
pub struct Audio {
|
||||
pub struct Media {
|
||||
path: PathBuf,
|
||||
res: Option<AudioResource>,
|
||||
res: Option<MediaResource>,
|
||||
}
|
||||
|
||||
enum AudioResource {
|
||||
enum MediaResource {
|
||||
Rsc(Rsc),
|
||||
Nrsc(Nrsc),
|
||||
}
|
||||
|
||||
impl Audio {
|
||||
impl Media {
|
||||
pub fn new(paths: &Paths) -> Result<Option<Self>, Error> {
|
||||
let mut path = paths.contents_path();
|
||||
path.push(RSC_NAME);
|
||||
Ok(if path.exists() {
|
||||
Some(Audio { path, res: None })
|
||||
Some(Media { path, res: None })
|
||||
} else {
|
||||
None
|
||||
})
|
||||
|
@ -35,9 +35,9 @@ impl Audio {
|
|||
let nrsc_index_exists = self.path.exists();
|
||||
self.path.pop();
|
||||
self.res = Some(if nrsc_index_exists {
|
||||
AudioResource::Nrsc(Nrsc::new(&self.path)?)
|
||||
MediaResource::Nrsc(Nrsc::new(&self.path)?)
|
||||
} else {
|
||||
AudioResource::Rsc(Rsc::new(&self.path, RSC_NAME)?)
|
||||
MediaResource::Rsc(Rsc::new(&self.path, RSC_NAME)?)
|
||||
});
|
||||
}
|
||||
Ok(())
|
||||
|
@ -47,22 +47,22 @@ impl Audio {
|
|||
self.init()?;
|
||||
let Some(res) = self.res.as_mut() else { unreachable!() };
|
||||
match res {
|
||||
AudioResource::Rsc(rsc) => rsc.get(id.parse::<u32>().map_err(|_| Error::InvalidIndex)?),
|
||||
AudioResource::Nrsc(nrsc) => nrsc.get(id),
|
||||
MediaResource::Rsc(rsc) => rsc.get(id.parse::<u32>().map_err(|_| Error::InvalidIndex)?),
|
||||
MediaResource::Nrsc(nrsc) => nrsc.get(id),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_by_idx(&mut self, idx: usize) -> Result<(AudioId, &[u8]), Error> {
|
||||
pub fn get_by_idx(&mut self, idx: usize) -> Result<(MediaId, &[u8]), Error> {
|
||||
self.init()?;
|
||||
let Some(res) = self.res.as_mut() else { unreachable!() };
|
||||
Ok(match res {
|
||||
AudioResource::Rsc(rsc) => {
|
||||
MediaResource::Rsc(rsc) => {
|
||||
let (id, page) = rsc.get_by_idx(idx)?;
|
||||
(AudioId::Num(id), page)
|
||||
(MediaId::Num(id), page)
|
||||
}
|
||||
AudioResource::Nrsc(nrsc) => {
|
||||
MediaResource::Nrsc(nrsc) => {
|
||||
let (id, page) = nrsc.get_by_idx(idx)?;
|
||||
(AudioId::Str(id), page)
|
||||
(MediaId::Str(id), page)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
@ -71,19 +71,19 @@ impl Audio {
|
|||
self.init()?;
|
||||
let Some(res) = self.res.as_ref() else { unreachable!() };
|
||||
Ok(0..match res {
|
||||
AudioResource::Rsc(rsc) => rsc.len(),
|
||||
AudioResource::Nrsc(nrsc) => nrsc.len(),
|
||||
MediaResource::Rsc(rsc) => rsc.len(),
|
||||
MediaResource::Nrsc(nrsc) => nrsc.len(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum AudioId<'a> {
|
||||
pub enum MediaId<'a> {
|
||||
Str(&'a str),
|
||||
Num(u32),
|
||||
}
|
||||
|
||||
impl Display for AudioId<'_> {
|
||||
impl Display for MediaId<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::Str(str) => f.write_str(str),
|
|
@ -210,7 +210,7 @@ impl Nrsc {
|
|||
Ok(files)
|
||||
}
|
||||
|
||||
pub fn new(path: &Path) -> Result<Self, Error> {
|
||||
pub(crate) fn new(path: &Path) -> Result<Self, Error> {
|
||||
let files = Nrsc::files(path)?;
|
||||
let index = NrscIndex::new(path)?;
|
||||
Ok(Nrsc {
|
||||
|
|
|
@ -280,7 +280,7 @@ impl Rsc {
|
|||
Ok(files)
|
||||
}
|
||||
|
||||
pub fn new(path: &Path, rsc_name: &str) -> Result<Self, Error> {
|
||||
pub(crate) fn new(path: &Path, rsc_name: &str) -> Result<Self, Error> {
|
||||
let files = Rsc::files(path, rsc_name)?;
|
||||
let index = RscIndex::new(path, rsc_name)?;
|
||||
Ok(Self {
|
||||
|
|
Loading…
Reference in a new issue