Compare commits
1 commit
Author | SHA1 | Date | |
---|---|---|---|
7d503e8aba |
17
README.md
17
README.md
|
@ -1,14 +1,6 @@
|
||||||
# monokakido.rs
|
# monokakido.rs
|
||||||
A Rust library for parsing and interpreting the [Monokakido](https://www.monokakido.jp/en/dictionaries/app/) dictionary format.
|
|
||||||
Aiming for full test coverage and efficient implementation with minimal dependencies.
|
|
||||||
|
|
||||||
## Notice
|
A Rust library for parsing and interpreting the [Monokakido](https://www.monokakido.jp/en/dictionaries/app/) dictionary format. Aiming for full test coverage and efficient implementation with minimal dependencies.
|
||||||
|
|
||||||
This library started as a personal project driven by curiosity.
|
|
||||||
It is ABSOLUTELY NOT inteded to support piracy;
|
|
||||||
I strongly condemn making unauthorized copies of Monokakido's dictionaries,
|
|
||||||
and take no part or responsibility in that kind of activity.
|
|
||||||
Please buy your own dictionaries directly from Monokakido to show your love and support.
|
|
||||||
|
|
||||||
## TODO:
|
## TODO:
|
||||||
- Add headline support
|
- Add headline support
|
||||||
|
@ -47,5 +39,12 @@ Please buy your own dictionaries directly from Monokakido to show your love and
|
||||||
## Planned to support:
|
## Planned to support:
|
||||||
- WISDOM3
|
- WISDOM3
|
||||||
- SMK8
|
- SMK8
|
||||||
|
- RHEJ
|
||||||
|
- OLT
|
||||||
|
- OLEX
|
||||||
|
- OLDAE
|
||||||
|
- OCD
|
||||||
|
- OALD10
|
||||||
- NHKACCENT2
|
- NHKACCENT2
|
||||||
- DAIJISEN2
|
- DAIJISEN2
|
||||||
|
- CCCAD
|
||||||
|
|
|
@ -33,6 +33,7 @@ fn explode() -> Result<(), Error> {
|
||||||
|
|
||||||
let pages_dir = out_dir(&dict) + "pages/";
|
let pages_dir = out_dir(&dict) + "pages/";
|
||||||
let audio_dir = out_dir(&dict) + "audio/";
|
let audio_dir = out_dir(&dict) + "audio/";
|
||||||
|
let graphics_dir = out_dir(&dict) + "graphics/";
|
||||||
|
|
||||||
create_dir_all(&pages_dir)?;
|
create_dir_all(&pages_dir)?;
|
||||||
let mut path = String::from(&pages_dir);
|
let mut path = String::from(&pages_dir);
|
||||||
|
@ -56,6 +57,18 @@ fn explode() -> Result<(), Error> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if let Some(graphics) = &mut dict.graphics {
|
||||||
|
create_dir_all(&graphics_dir)?;
|
||||||
|
let mut path = String::from(&graphics_dir);
|
||||||
|
for idx in graphics.idx_iter()? {
|
||||||
|
let (id, graphics) = graphics.get_by_idx(idx)?;
|
||||||
|
write!(&mut path, "{id}")?;
|
||||||
|
let mut file = File::create(&path)?;
|
||||||
|
path.truncate(graphics_dir.len());
|
||||||
|
file.write_all(graphics)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
write_index(&dict, &dict.keys.index_len, "index_len.tsv")?;
|
write_index(&dict, &dict.keys.index_len, "index_len.tsv")?;
|
||||||
write_index(&dict, &dict.keys.index_prefix, "index_prefix.tsv")?;
|
write_index(&dict, &dict.keys.index_prefix, "index_prefix.tsv")?;
|
||||||
write_index(&dict, &dict.keys.index_suffix, "index_suffix.tsv")?;
|
write_index(&dict, &dict.keys.index_suffix, "index_suffix.tsv")?;
|
||||||
|
|
19
src/dict.rs
19
src/dict.rs
|
@ -5,12 +5,13 @@ use std::{
|
||||||
path::{Path, PathBuf},
|
path::{Path, PathBuf},
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{audio::Audio, key::Keys, pages::Pages, Error};
|
use crate::{media::Media, key::Keys, pages::Pages, Error};
|
||||||
|
|
||||||
pub struct MonokakidoDict {
|
pub struct MonokakidoDict {
|
||||||
paths: Paths,
|
paths: Paths,
|
||||||
pub pages: Pages,
|
pub pages: Pages,
|
||||||
pub audio: Option<Audio>,
|
pub audio: Option<Media>,
|
||||||
|
pub graphics: Option<Media>,
|
||||||
pub keys: Keys,
|
pub keys: Keys,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -60,7 +61,9 @@ impl Paths {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn key_path(&self) -> PathBuf {
|
pub(crate) fn key_path(&self) -> PathBuf {
|
||||||
let mut pb = self.contents_path();
|
let mut pb = PathBuf::from(&self.base_path);
|
||||||
|
pb.push("Contents");
|
||||||
|
pb.push(&self.contents_dir);
|
||||||
pb.push("key");
|
pb.push("key");
|
||||||
pb
|
pb
|
||||||
}
|
}
|
||||||
|
@ -72,7 +75,9 @@ impl Paths {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn headline_path(&self) -> PathBuf {
|
pub(crate) fn headline_path(&self) -> PathBuf {
|
||||||
let mut pb = self.contents_path();
|
let mut pb = PathBuf::from(&self.base_path);
|
||||||
|
pb.push("Contents");
|
||||||
|
pb.push(&self.contents_dir);
|
||||||
pb.push("headline");
|
pb.push("headline");
|
||||||
pb
|
pb
|
||||||
}
|
}
|
||||||
|
@ -131,13 +136,15 @@ impl MonokakidoDict {
|
||||||
contents_dir: contents.dir,
|
contents_dir: contents.dir,
|
||||||
};
|
};
|
||||||
let pages = Pages::new(&paths)?;
|
let pages = Pages::new(&paths)?;
|
||||||
let audio = Audio::new(&paths)?;
|
let audio = Media::new(&paths)?;
|
||||||
let keys = Keys::new(paths.key_headword_path())?;
|
let graphics = Media::new(&paths)?;
|
||||||
|
let keys = Keys::new(&paths)?;
|
||||||
|
|
||||||
Ok(MonokakidoDict {
|
Ok(MonokakidoDict {
|
||||||
paths,
|
paths,
|
||||||
pages,
|
pages,
|
||||||
audio,
|
audio,
|
||||||
|
graphics,
|
||||||
keys,
|
keys,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
57
src/key.rs
57
src/key.rs
|
@ -4,12 +4,12 @@ use std::{
|
||||||
fs::File,
|
fs::File,
|
||||||
io::{Read, Seek},
|
io::{Read, Seek},
|
||||||
mem::size_of,
|
mem::size_of,
|
||||||
path::Path,
|
|
||||||
str::from_utf8,
|
str::from_utf8,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
abi_utils::{read_vec, TransmuteSafe, LE32},
|
abi_utils::{TransmuteSafe, LE32, read_vec},
|
||||||
|
dict::Paths,
|
||||||
Error,
|
Error,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -19,42 +19,27 @@ mod abi {
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
#[derive(Debug, Clone, Copy, Default)]
|
#[derive(Debug, Clone, Copy, Default)]
|
||||||
pub(super) struct FileHeader {
|
pub(super) struct FileHeader {
|
||||||
pub ver: LE32,
|
|
||||||
magic1: LE32,
|
magic1: LE32,
|
||||||
|
magic2: LE32,
|
||||||
pub words_offset: LE32,
|
pub words_offset: LE32,
|
||||||
pub idx_offset: LE32,
|
pub idx_offset: LE32,
|
||||||
// Jimmy-Z: no idea what this is
|
magic3: LE32,
|
||||||
// present on (not limited to) OALD10, SANKOKU8
|
magic4: LE32,
|
||||||
pub next_offset: LE32,
|
|
||||||
magic5: LE32,
|
magic5: LE32,
|
||||||
magic6: LE32,
|
magic6: LE32,
|
||||||
magic7: LE32,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FileHeader {
|
impl FileHeader {
|
||||||
pub(super) fn from(r: &mut impl Read) -> Result<Self, Error> {
|
pub(super) fn validate(&self) -> Result<(), Error> {
|
||||||
let mut h = FileHeader::default();
|
if self.magic1.read() == 0x20000
|
||||||
r.read_exact(&mut h.as_bytes_mut()[..0x10])?;
|
&& self.magic2.read() == 0
|
||||||
if h.ver.read() == 0x10000 && h.words_offset.read() == 0x10{
|
&& self.magic3.read() == 0
|
||||||
} else if h.ver.read() == 0x20000 && h.words_offset.read() == 0x20 {
|
&& self.magic4.read() == 0
|
||||||
r.read_exact(&mut h.as_bytes_mut()[0x10..])?;
|
&& self.magic5.read() == 0
|
||||||
} else {
|
&& self.magic6.read() == 0
|
||||||
return Err(Error::KeyFileHeaderValidate)
|
&& self.words_offset.us() < self.idx_offset.us()
|
||||||
}
|
|
||||||
if h.ver.read() == 0x10000
|
|
||||||
&& h.magic1.read() == 0
|
|
||||||
&& h.words_offset.us() < h.idx_offset.us()
|
|
||||||
{
|
{
|
||||||
Ok(h)
|
Ok(())
|
||||||
} else if h.ver.read() == 0x20000
|
|
||||||
&& h.magic1.read() == 0
|
|
||||||
&& h.magic5.read() == 0
|
|
||||||
&& h.magic6.read() == 0
|
|
||||||
&& h.magic7.read() == 0
|
|
||||||
&& h.words_offset.us() < h.idx_offset.us()
|
|
||||||
&& (h.next_offset.read() == 0 || h.idx_offset.us() < h.next_offset.us())
|
|
||||||
{
|
|
||||||
Ok(h)
|
|
||||||
} else {
|
} else {
|
||||||
Err(Error::KeyFileHeaderValidate)
|
Err(Error::KeyFileHeaderValidate)
|
||||||
}
|
}
|
||||||
|
@ -134,20 +119,18 @@ impl Keys {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new<P: AsRef<Path>>(path: P) -> Result<Keys, Error> {
|
pub fn new(paths: &Paths) -> Result<Keys, Error> {
|
||||||
let mut file = File::open(path)?;
|
let mut file = File::open(paths.key_headword_path())?;
|
||||||
let file_size = file.metadata()?.len() as usize;
|
let file_size = file.metadata()?.len() as usize;
|
||||||
let hdr = FileHeader::from(&mut file)?;
|
let mut hdr = FileHeader::default();
|
||||||
|
file.read_exact(hdr.as_bytes_mut())?;
|
||||||
|
hdr.validate()?;
|
||||||
|
|
||||||
file.seek(std::io::SeekFrom::Start(hdr.words_offset.read() as u64))?;
|
file.seek(std::io::SeekFrom::Start(hdr.words_offset.read() as u64))?;
|
||||||
let words = read_vec(&mut file, hdr.words_offset.us(), hdr.idx_offset.us())?;
|
let words = read_vec(&mut file, hdr.words_offset.us(), hdr.idx_offset.us())?;
|
||||||
let Some(words) = words else { return Err(Error::InvalidIndex); };
|
let Some(words) = words else { return Err(Error::InvalidIndex); };
|
||||||
|
|
||||||
let idx_end = (if hdr.next_offset.us() == 0 {
|
let idx_end = file_size - hdr.idx_offset.us();
|
||||||
file_size
|
|
||||||
} else {
|
|
||||||
hdr.next_offset.us()
|
|
||||||
}) - hdr.idx_offset.us();
|
|
||||||
let mut ihdr = IndexHeader::default();
|
let mut ihdr = IndexHeader::default();
|
||||||
file.seek(std::io::SeekFrom::Start(hdr.idx_offset.read() as u64))?;
|
file.seek(std::io::SeekFrom::Start(hdr.idx_offset.read() as u64))?;
|
||||||
file.read_exact(ihdr.as_bytes_mut())?;
|
file.read_exact(ihdr.as_bytes_mut())?;
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
mod abi_utils;
|
mod abi_utils;
|
||||||
mod audio;
|
mod media;
|
||||||
mod dict;
|
mod dict;
|
||||||
mod error;
|
mod error;
|
||||||
mod key;
|
mod key;
|
||||||
mod pages;
|
mod pages;
|
||||||
pub mod resource;
|
mod resource;
|
||||||
mod headline;
|
mod headline;
|
||||||
|
|
||||||
pub use audio::Audio;
|
pub use media::Media;
|
||||||
pub use dict::MonokakidoDict;
|
pub use dict::MonokakidoDict;
|
||||||
pub use error::Error;
|
pub use error::Error;
|
||||||
pub use key::{KeyIndex, Keys, PageItemId};
|
pub use key::{KeyIndex, Keys, PageItemId};
|
||||||
|
|
|
@ -8,22 +8,22 @@ use crate::{
|
||||||
|
|
||||||
const RSC_NAME: &str = "audio";
|
const RSC_NAME: &str = "audio";
|
||||||
|
|
||||||
pub struct Audio {
|
pub struct Media {
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
res: Option<AudioResource>,
|
res: Option<MediaResource>,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum AudioResource {
|
enum MediaResource {
|
||||||
Rsc(Rsc),
|
Rsc(Rsc),
|
||||||
Nrsc(Nrsc),
|
Nrsc(Nrsc),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Audio {
|
impl Media {
|
||||||
pub fn new(paths: &Paths) -> Result<Option<Self>, Error> {
|
pub fn new(paths: &Paths) -> Result<Option<Self>, Error> {
|
||||||
let mut path = paths.contents_path();
|
let mut path = paths.contents_path();
|
||||||
path.push(RSC_NAME);
|
path.push(RSC_NAME);
|
||||||
Ok(if path.exists() {
|
Ok(if path.exists() {
|
||||||
Some(Audio { path, res: None })
|
Some(Media { path, res: None })
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
})
|
})
|
||||||
|
@ -35,9 +35,9 @@ impl Audio {
|
||||||
let nrsc_index_exists = self.path.exists();
|
let nrsc_index_exists = self.path.exists();
|
||||||
self.path.pop();
|
self.path.pop();
|
||||||
self.res = Some(if nrsc_index_exists {
|
self.res = Some(if nrsc_index_exists {
|
||||||
AudioResource::Nrsc(Nrsc::new(&self.path)?)
|
MediaResource::Nrsc(Nrsc::new(&self.path)?)
|
||||||
} else {
|
} else {
|
||||||
AudioResource::Rsc(Rsc::new(&self.path, RSC_NAME)?)
|
MediaResource::Rsc(Rsc::new(&self.path, RSC_NAME)?)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -47,22 +47,22 @@ impl Audio {
|
||||||
self.init()?;
|
self.init()?;
|
||||||
let Some(res) = self.res.as_mut() else { unreachable!() };
|
let Some(res) = self.res.as_mut() else { unreachable!() };
|
||||||
match res {
|
match res {
|
||||||
AudioResource::Rsc(rsc) => rsc.get(id.parse::<u32>().map_err(|_| Error::InvalidIndex)?),
|
MediaResource::Rsc(rsc) => rsc.get(id.parse::<u32>().map_err(|_| Error::InvalidIndex)?),
|
||||||
AudioResource::Nrsc(nrsc) => nrsc.get(id),
|
MediaResource::Nrsc(nrsc) => nrsc.get(id),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn get_by_idx(&mut self, idx: usize) -> Result<(AudioId, &[u8]), Error> {
|
pub fn get_by_idx(&mut self, idx: usize) -> Result<(MediaId, &[u8]), Error> {
|
||||||
self.init()?;
|
self.init()?;
|
||||||
let Some(res) = self.res.as_mut() else { unreachable!() };
|
let Some(res) = self.res.as_mut() else { unreachable!() };
|
||||||
Ok(match res {
|
Ok(match res {
|
||||||
AudioResource::Rsc(rsc) => {
|
MediaResource::Rsc(rsc) => {
|
||||||
let (id, page) = rsc.get_by_idx(idx)?;
|
let (id, page) = rsc.get_by_idx(idx)?;
|
||||||
(AudioId::Num(id), page)
|
(MediaId::Num(id), page)
|
||||||
}
|
}
|
||||||
AudioResource::Nrsc(nrsc) => {
|
MediaResource::Nrsc(nrsc) => {
|
||||||
let (id, page) = nrsc.get_by_idx(idx)?;
|
let (id, page) = nrsc.get_by_idx(idx)?;
|
||||||
(AudioId::Str(id), page)
|
(MediaId::Str(id), page)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -71,19 +71,19 @@ impl Audio {
|
||||||
self.init()?;
|
self.init()?;
|
||||||
let Some(res) = self.res.as_ref() else { unreachable!() };
|
let Some(res) = self.res.as_ref() else { unreachable!() };
|
||||||
Ok(0..match res {
|
Ok(0..match res {
|
||||||
AudioResource::Rsc(rsc) => rsc.len(),
|
MediaResource::Rsc(rsc) => rsc.len(),
|
||||||
AudioResource::Nrsc(nrsc) => nrsc.len(),
|
MediaResource::Nrsc(nrsc) => nrsc.len(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum AudioId<'a> {
|
pub enum MediaId<'a> {
|
||||||
Str(&'a str),
|
Str(&'a str),
|
||||||
Num(u32),
|
Num(u32),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Display for AudioId<'_> {
|
impl Display for MediaId<'_> {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
match self {
|
match self {
|
||||||
Self::Str(str) => f.write_str(str),
|
Self::Str(str) => f.write_str(str),
|
|
@ -210,7 +210,7 @@ impl Nrsc {
|
||||||
Ok(files)
|
Ok(files)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(path: &Path) -> Result<Self, Error> {
|
pub(crate) fn new(path: &Path) -> Result<Self, Error> {
|
||||||
let files = Nrsc::files(path)?;
|
let files = Nrsc::files(path)?;
|
||||||
let index = NrscIndex::new(path)?;
|
let index = NrscIndex::new(path)?;
|
||||||
Ok(Nrsc {
|
Ok(Nrsc {
|
||||||
|
|
|
@ -280,7 +280,7 @@ impl Rsc {
|
||||||
Ok(files)
|
Ok(files)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new(path: &Path, rsc_name: &str) -> Result<Self, Error> {
|
pub(crate) fn new(path: &Path, rsc_name: &str) -> Result<Self, Error> {
|
||||||
let files = Rsc::files(path, rsc_name)?;
|
let files = Rsc::files(path, rsc_name)?;
|
||||||
let index = RscIndex::new(path, rsc_name)?;
|
let index = RscIndex::new(path, rsc_name)?;
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
|
|
Loading…
Reference in a new issue