Compare commits

..

1 commit

Author SHA1 Message Date
stephenmk 7d503e8aba
Add support for exploding graphics files 2023-05-02 08:45:13 -05:00
8 changed files with 77 additions and 75 deletions

View file

@ -1,14 +1,6 @@
# monokakido.rs
A Rust library for parsing and interpreting the [Monokakido](https://www.monokakido.jp/en/dictionaries/app/) dictionary format.
Aiming for full test coverage and efficient implementation with minimal dependencies.
## Notice
This library started as a personal project driven by curiosity.
It is ABSOLUTELY NOT inteded to support piracy;
I strongly condemn making unauthorized copies of Monokakido's dictionaries,
and take no part or responsibility in that kind of activity.
Please buy your own dictionaries directly from Monokakido to show your love and support.
A Rust library for parsing and interpreting the [Monokakido](https://www.monokakido.jp/en/dictionaries/app/) dictionary format. Aiming for full test coverage and efficient implementation with minimal dependencies.
## TODO:
- Add headline support
@ -47,5 +39,12 @@ Please buy your own dictionaries directly from Monokakido to show your love and
## Planned to support:
- WISDOM3
- SMK8
- RHEJ
- OLT
- OLEX
- OLDAE
- OCD
- OALD10
- NHKACCENT2
- DAIJISEN2
- CCCAD

View file

@ -33,6 +33,7 @@ fn explode() -> Result<(), Error> {
let pages_dir = out_dir(&dict) + "pages/";
let audio_dir = out_dir(&dict) + "audio/";
let graphics_dir = out_dir(&dict) + "graphics/";
create_dir_all(&pages_dir)?;
let mut path = String::from(&pages_dir);
@ -56,6 +57,18 @@ fn explode() -> Result<(), Error> {
}
}
if let Some(graphics) = &mut dict.graphics {
create_dir_all(&graphics_dir)?;
let mut path = String::from(&graphics_dir);
for idx in graphics.idx_iter()? {
let (id, graphics) = graphics.get_by_idx(idx)?;
write!(&mut path, "{id}")?;
let mut file = File::create(&path)?;
path.truncate(graphics_dir.len());
file.write_all(graphics)?;
}
}
write_index(&dict, &dict.keys.index_len, "index_len.tsv")?;
write_index(&dict, &dict.keys.index_prefix, "index_prefix.tsv")?;
write_index(&dict, &dict.keys.index_suffix, "index_suffix.tsv")?;

View file

@ -5,12 +5,13 @@ use std::{
path::{Path, PathBuf},
};
use crate::{audio::Audio, key::Keys, pages::Pages, Error};
use crate::{media::Media, key::Keys, pages::Pages, Error};
pub struct MonokakidoDict {
paths: Paths,
pub pages: Pages,
pub audio: Option<Audio>,
pub audio: Option<Media>,
pub graphics: Option<Media>,
pub keys: Keys,
}
@ -60,7 +61,9 @@ impl Paths {
}
pub(crate) fn key_path(&self) -> PathBuf {
let mut pb = self.contents_path();
let mut pb = PathBuf::from(&self.base_path);
pb.push("Contents");
pb.push(&self.contents_dir);
pb.push("key");
pb
}
@ -72,7 +75,9 @@ impl Paths {
}
pub(crate) fn headline_path(&self) -> PathBuf {
let mut pb = self.contents_path();
let mut pb = PathBuf::from(&self.base_path);
pb.push("Contents");
pb.push(&self.contents_dir);
pb.push("headline");
pb
}
@ -131,13 +136,15 @@ impl MonokakidoDict {
contents_dir: contents.dir,
};
let pages = Pages::new(&paths)?;
let audio = Audio::new(&paths)?;
let keys = Keys::new(paths.key_headword_path())?;
let audio = Media::new(&paths)?;
let graphics = Media::new(&paths)?;
let keys = Keys::new(&paths)?;
Ok(MonokakidoDict {
paths,
pages,
audio,
graphics,
keys,
})
}

View file

@ -4,12 +4,12 @@ use std::{
fs::File,
io::{Read, Seek},
mem::size_of,
path::Path,
str::from_utf8,
};
use crate::{
abi_utils::{read_vec, TransmuteSafe, LE32},
abi_utils::{TransmuteSafe, LE32, read_vec},
dict::Paths,
Error,
};
@ -19,42 +19,27 @@ mod abi {
#[repr(C)]
#[derive(Debug, Clone, Copy, Default)]
pub(super) struct FileHeader {
pub ver: LE32,
magic1: LE32,
magic2: LE32,
pub words_offset: LE32,
pub idx_offset: LE32,
// Jimmy-Z: no idea what this is
// present on (not limited to) OALD10, SANKOKU8
pub next_offset: LE32,
magic3: LE32,
magic4: LE32,
magic5: LE32,
magic6: LE32,
magic7: LE32,
}
impl FileHeader {
pub(super) fn from(r: &mut impl Read) -> Result<Self, Error> {
let mut h = FileHeader::default();
r.read_exact(&mut h.as_bytes_mut()[..0x10])?;
if h.ver.read() == 0x10000 && h.words_offset.read() == 0x10{
} else if h.ver.read() == 0x20000 && h.words_offset.read() == 0x20 {
r.read_exact(&mut h.as_bytes_mut()[0x10..])?;
} else {
return Err(Error::KeyFileHeaderValidate)
}
if h.ver.read() == 0x10000
&& h.magic1.read() == 0
&& h.words_offset.us() < h.idx_offset.us()
pub(super) fn validate(&self) -> Result<(), Error> {
if self.magic1.read() == 0x20000
&& self.magic2.read() == 0
&& self.magic3.read() == 0
&& self.magic4.read() == 0
&& self.magic5.read() == 0
&& self.magic6.read() == 0
&& self.words_offset.us() < self.idx_offset.us()
{
Ok(h)
} else if h.ver.read() == 0x20000
&& h.magic1.read() == 0
&& h.magic5.read() == 0
&& h.magic6.read() == 0
&& h.magic7.read() == 0
&& h.words_offset.us() < h.idx_offset.us()
&& (h.next_offset.read() == 0 || h.idx_offset.us() < h.next_offset.us())
{
Ok(h)
Ok(())
} else {
Err(Error::KeyFileHeaderValidate)
}
@ -134,20 +119,18 @@ impl Keys {
Ok(())
}
pub fn new<P: AsRef<Path>>(path: P) -> Result<Keys, Error> {
let mut file = File::open(path)?;
pub fn new(paths: &Paths) -> Result<Keys, Error> {
let mut file = File::open(paths.key_headword_path())?;
let file_size = file.metadata()?.len() as usize;
let hdr = FileHeader::from(&mut file)?;
let mut hdr = FileHeader::default();
file.read_exact(hdr.as_bytes_mut())?;
hdr.validate()?;
file.seek(std::io::SeekFrom::Start(hdr.words_offset.read() as u64))?;
let words = read_vec(&mut file, hdr.words_offset.us(), hdr.idx_offset.us())?;
let Some(words) = words else { return Err(Error::InvalidIndex); };
let idx_end = (if hdr.next_offset.us() == 0 {
file_size
} else {
hdr.next_offset.us()
}) - hdr.idx_offset.us();
let idx_end = file_size - hdr.idx_offset.us();
let mut ihdr = IndexHeader::default();
file.seek(std::io::SeekFrom::Start(hdr.idx_offset.read() as u64))?;
file.read_exact(ihdr.as_bytes_mut())?;

View file

@ -1,13 +1,13 @@
mod abi_utils;
mod audio;
mod media;
mod dict;
mod error;
mod key;
mod pages;
pub mod resource;
mod resource;
mod headline;
pub use audio::Audio;
pub use media::Media;
pub use dict::MonokakidoDict;
pub use error::Error;
pub use key::{KeyIndex, Keys, PageItemId};

View file

@ -8,22 +8,22 @@ use crate::{
const RSC_NAME: &str = "audio";
pub struct Audio {
pub struct Media {
path: PathBuf,
res: Option<AudioResource>,
res: Option<MediaResource>,
}
enum AudioResource {
enum MediaResource {
Rsc(Rsc),
Nrsc(Nrsc),
}
impl Audio {
impl Media {
pub fn new(paths: &Paths) -> Result<Option<Self>, Error> {
let mut path = paths.contents_path();
path.push(RSC_NAME);
Ok(if path.exists() {
Some(Audio { path, res: None })
Some(Media { path, res: None })
} else {
None
})
@ -35,9 +35,9 @@ impl Audio {
let nrsc_index_exists = self.path.exists();
self.path.pop();
self.res = Some(if nrsc_index_exists {
AudioResource::Nrsc(Nrsc::new(&self.path)?)
MediaResource::Nrsc(Nrsc::new(&self.path)?)
} else {
AudioResource::Rsc(Rsc::new(&self.path, RSC_NAME)?)
MediaResource::Rsc(Rsc::new(&self.path, RSC_NAME)?)
});
}
Ok(())
@ -47,22 +47,22 @@ impl Audio {
self.init()?;
let Some(res) = self.res.as_mut() else { unreachable!() };
match res {
AudioResource::Rsc(rsc) => rsc.get(id.parse::<u32>().map_err(|_| Error::InvalidIndex)?),
AudioResource::Nrsc(nrsc) => nrsc.get(id),
MediaResource::Rsc(rsc) => rsc.get(id.parse::<u32>().map_err(|_| Error::InvalidIndex)?),
MediaResource::Nrsc(nrsc) => nrsc.get(id),
}
}
pub fn get_by_idx(&mut self, idx: usize) -> Result<(AudioId, &[u8]), Error> {
pub fn get_by_idx(&mut self, idx: usize) -> Result<(MediaId, &[u8]), Error> {
self.init()?;
let Some(res) = self.res.as_mut() else { unreachable!() };
Ok(match res {
AudioResource::Rsc(rsc) => {
MediaResource::Rsc(rsc) => {
let (id, page) = rsc.get_by_idx(idx)?;
(AudioId::Num(id), page)
(MediaId::Num(id), page)
}
AudioResource::Nrsc(nrsc) => {
MediaResource::Nrsc(nrsc) => {
let (id, page) = nrsc.get_by_idx(idx)?;
(AudioId::Str(id), page)
(MediaId::Str(id), page)
}
})
}
@ -71,19 +71,19 @@ impl Audio {
self.init()?;
let Some(res) = self.res.as_ref() else { unreachable!() };
Ok(0..match res {
AudioResource::Rsc(rsc) => rsc.len(),
AudioResource::Nrsc(nrsc) => nrsc.len(),
MediaResource::Rsc(rsc) => rsc.len(),
MediaResource::Nrsc(nrsc) => nrsc.len(),
})
}
}
#[derive(Debug)]
pub enum AudioId<'a> {
pub enum MediaId<'a> {
Str(&'a str),
Num(u32),
}
impl Display for AudioId<'_> {
impl Display for MediaId<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Str(str) => f.write_str(str),

View file

@ -210,7 +210,7 @@ impl Nrsc {
Ok(files)
}
pub fn new(path: &Path) -> Result<Self, Error> {
pub(crate) fn new(path: &Path) -> Result<Self, Error> {
let files = Nrsc::files(path)?;
let index = NrscIndex::new(path)?;
Ok(Nrsc {

View file

@ -280,7 +280,7 @@ impl Rsc {
Ok(files)
}
pub fn new(path: &Path, rsc_name: &str) -> Result<Self, Error> {
pub(crate) fn new(path: &Path, rsc_name: &str) -> Result<Self, Error> {
let files = Rsc::files(path, rsc_name)?;
let index = RscIndex::new(path, rsc_name)?;
Ok(Self {