Compare commits

..

1 commit

Author SHA1 Message Date
stephenmk 7d503e8aba
Add support for exploding graphics files 2023-05-02 08:45:13 -05:00
8 changed files with 77 additions and 75 deletions

View file

@ -1,14 +1,6 @@
# monokakido.rs # monokakido.rs
A Rust library for parsing and interpreting the [Monokakido](https://www.monokakido.jp/en/dictionaries/app/) dictionary format.
Aiming for full test coverage and efficient implementation with minimal dependencies.
## Notice A Rust library for parsing and interpreting the [Monokakido](https://www.monokakido.jp/en/dictionaries/app/) dictionary format. Aiming for full test coverage and efficient implementation with minimal dependencies.
This library started as a personal project driven by curiosity.
It is ABSOLUTELY NOT inteded to support piracy;
I strongly condemn making unauthorized copies of Monokakido's dictionaries,
and take no part or responsibility in that kind of activity.
Please buy your own dictionaries directly from Monokakido to show your love and support.
## TODO: ## TODO:
- Add headline support - Add headline support
@ -47,5 +39,12 @@ Please buy your own dictionaries directly from Monokakido to show your love and
## Planned to support: ## Planned to support:
- WISDOM3 - WISDOM3
- SMK8 - SMK8
- RHEJ
- OLT
- OLEX
- OLDAE
- OCD
- OALD10
- NHKACCENT2 - NHKACCENT2
- DAIJISEN2 - DAIJISEN2
- CCCAD

View file

@ -33,6 +33,7 @@ fn explode() -> Result<(), Error> {
let pages_dir = out_dir(&dict) + "pages/"; let pages_dir = out_dir(&dict) + "pages/";
let audio_dir = out_dir(&dict) + "audio/"; let audio_dir = out_dir(&dict) + "audio/";
let graphics_dir = out_dir(&dict) + "graphics/";
create_dir_all(&pages_dir)?; create_dir_all(&pages_dir)?;
let mut path = String::from(&pages_dir); let mut path = String::from(&pages_dir);
@ -56,6 +57,18 @@ fn explode() -> Result<(), Error> {
} }
} }
if let Some(graphics) = &mut dict.graphics {
create_dir_all(&graphics_dir)?;
let mut path = String::from(&graphics_dir);
for idx in graphics.idx_iter()? {
let (id, graphics) = graphics.get_by_idx(idx)?;
write!(&mut path, "{id}")?;
let mut file = File::create(&path)?;
path.truncate(graphics_dir.len());
file.write_all(graphics)?;
}
}
write_index(&dict, &dict.keys.index_len, "index_len.tsv")?; write_index(&dict, &dict.keys.index_len, "index_len.tsv")?;
write_index(&dict, &dict.keys.index_prefix, "index_prefix.tsv")?; write_index(&dict, &dict.keys.index_prefix, "index_prefix.tsv")?;
write_index(&dict, &dict.keys.index_suffix, "index_suffix.tsv")?; write_index(&dict, &dict.keys.index_suffix, "index_suffix.tsv")?;

View file

@ -5,12 +5,13 @@ use std::{
path::{Path, PathBuf}, path::{Path, PathBuf},
}; };
use crate::{audio::Audio, key::Keys, pages::Pages, Error}; use crate::{media::Media, key::Keys, pages::Pages, Error};
pub struct MonokakidoDict { pub struct MonokakidoDict {
paths: Paths, paths: Paths,
pub pages: Pages, pub pages: Pages,
pub audio: Option<Audio>, pub audio: Option<Media>,
pub graphics: Option<Media>,
pub keys: Keys, pub keys: Keys,
} }
@ -60,7 +61,9 @@ impl Paths {
} }
pub(crate) fn key_path(&self) -> PathBuf { pub(crate) fn key_path(&self) -> PathBuf {
let mut pb = self.contents_path(); let mut pb = PathBuf::from(&self.base_path);
pb.push("Contents");
pb.push(&self.contents_dir);
pb.push("key"); pb.push("key");
pb pb
} }
@ -72,7 +75,9 @@ impl Paths {
} }
pub(crate) fn headline_path(&self) -> PathBuf { pub(crate) fn headline_path(&self) -> PathBuf {
let mut pb = self.contents_path(); let mut pb = PathBuf::from(&self.base_path);
pb.push("Contents");
pb.push(&self.contents_dir);
pb.push("headline"); pb.push("headline");
pb pb
} }
@ -131,13 +136,15 @@ impl MonokakidoDict {
contents_dir: contents.dir, contents_dir: contents.dir,
}; };
let pages = Pages::new(&paths)?; let pages = Pages::new(&paths)?;
let audio = Audio::new(&paths)?; let audio = Media::new(&paths)?;
let keys = Keys::new(paths.key_headword_path())?; let graphics = Media::new(&paths)?;
let keys = Keys::new(&paths)?;
Ok(MonokakidoDict { Ok(MonokakidoDict {
paths, paths,
pages, pages,
audio, audio,
graphics,
keys, keys,
}) })
} }

View file

@ -4,12 +4,12 @@ use std::{
fs::File, fs::File,
io::{Read, Seek}, io::{Read, Seek},
mem::size_of, mem::size_of,
path::Path,
str::from_utf8, str::from_utf8,
}; };
use crate::{ use crate::{
abi_utils::{read_vec, TransmuteSafe, LE32}, abi_utils::{TransmuteSafe, LE32, read_vec},
dict::Paths,
Error, Error,
}; };
@ -19,42 +19,27 @@ mod abi {
#[repr(C)] #[repr(C)]
#[derive(Debug, Clone, Copy, Default)] #[derive(Debug, Clone, Copy, Default)]
pub(super) struct FileHeader { pub(super) struct FileHeader {
pub ver: LE32,
magic1: LE32, magic1: LE32,
magic2: LE32,
pub words_offset: LE32, pub words_offset: LE32,
pub idx_offset: LE32, pub idx_offset: LE32,
// Jimmy-Z: no idea what this is magic3: LE32,
// present on (not limited to) OALD10, SANKOKU8 magic4: LE32,
pub next_offset: LE32,
magic5: LE32, magic5: LE32,
magic6: LE32, magic6: LE32,
magic7: LE32,
} }
impl FileHeader { impl FileHeader {
pub(super) fn from(r: &mut impl Read) -> Result<Self, Error> { pub(super) fn validate(&self) -> Result<(), Error> {
let mut h = FileHeader::default(); if self.magic1.read() == 0x20000
r.read_exact(&mut h.as_bytes_mut()[..0x10])?; && self.magic2.read() == 0
if h.ver.read() == 0x10000 && h.words_offset.read() == 0x10{ && self.magic3.read() == 0
} else if h.ver.read() == 0x20000 && h.words_offset.read() == 0x20 { && self.magic4.read() == 0
r.read_exact(&mut h.as_bytes_mut()[0x10..])?; && self.magic5.read() == 0
} else { && self.magic6.read() == 0
return Err(Error::KeyFileHeaderValidate) && self.words_offset.us() < self.idx_offset.us()
}
if h.ver.read() == 0x10000
&& h.magic1.read() == 0
&& h.words_offset.us() < h.idx_offset.us()
{ {
Ok(h) Ok(())
} else if h.ver.read() == 0x20000
&& h.magic1.read() == 0
&& h.magic5.read() == 0
&& h.magic6.read() == 0
&& h.magic7.read() == 0
&& h.words_offset.us() < h.idx_offset.us()
&& (h.next_offset.read() == 0 || h.idx_offset.us() < h.next_offset.us())
{
Ok(h)
} else { } else {
Err(Error::KeyFileHeaderValidate) Err(Error::KeyFileHeaderValidate)
} }
@ -134,20 +119,18 @@ impl Keys {
Ok(()) Ok(())
} }
pub fn new<P: AsRef<Path>>(path: P) -> Result<Keys, Error> { pub fn new(paths: &Paths) -> Result<Keys, Error> {
let mut file = File::open(path)?; let mut file = File::open(paths.key_headword_path())?;
let file_size = file.metadata()?.len() as usize; let file_size = file.metadata()?.len() as usize;
let hdr = FileHeader::from(&mut file)?; let mut hdr = FileHeader::default();
file.read_exact(hdr.as_bytes_mut())?;
hdr.validate()?;
file.seek(std::io::SeekFrom::Start(hdr.words_offset.read() as u64))?; file.seek(std::io::SeekFrom::Start(hdr.words_offset.read() as u64))?;
let words = read_vec(&mut file, hdr.words_offset.us(), hdr.idx_offset.us())?; let words = read_vec(&mut file, hdr.words_offset.us(), hdr.idx_offset.us())?;
let Some(words) = words else { return Err(Error::InvalidIndex); }; let Some(words) = words else { return Err(Error::InvalidIndex); };
let idx_end = (if hdr.next_offset.us() == 0 { let idx_end = file_size - hdr.idx_offset.us();
file_size
} else {
hdr.next_offset.us()
}) - hdr.idx_offset.us();
let mut ihdr = IndexHeader::default(); let mut ihdr = IndexHeader::default();
file.seek(std::io::SeekFrom::Start(hdr.idx_offset.read() as u64))?; file.seek(std::io::SeekFrom::Start(hdr.idx_offset.read() as u64))?;
file.read_exact(ihdr.as_bytes_mut())?; file.read_exact(ihdr.as_bytes_mut())?;

View file

@ -1,13 +1,13 @@
mod abi_utils; mod abi_utils;
mod audio; mod media;
mod dict; mod dict;
mod error; mod error;
mod key; mod key;
mod pages; mod pages;
pub mod resource; mod resource;
mod headline; mod headline;
pub use audio::Audio; pub use media::Media;
pub use dict::MonokakidoDict; pub use dict::MonokakidoDict;
pub use error::Error; pub use error::Error;
pub use key::{KeyIndex, Keys, PageItemId}; pub use key::{KeyIndex, Keys, PageItemId};

View file

@ -8,22 +8,22 @@ use crate::{
const RSC_NAME: &str = "audio"; const RSC_NAME: &str = "audio";
pub struct Audio { pub struct Media {
path: PathBuf, path: PathBuf,
res: Option<AudioResource>, res: Option<MediaResource>,
} }
enum AudioResource { enum MediaResource {
Rsc(Rsc), Rsc(Rsc),
Nrsc(Nrsc), Nrsc(Nrsc),
} }
impl Audio { impl Media {
pub fn new(paths: &Paths) -> Result<Option<Self>, Error> { pub fn new(paths: &Paths) -> Result<Option<Self>, Error> {
let mut path = paths.contents_path(); let mut path = paths.contents_path();
path.push(RSC_NAME); path.push(RSC_NAME);
Ok(if path.exists() { Ok(if path.exists() {
Some(Audio { path, res: None }) Some(Media { path, res: None })
} else { } else {
None None
}) })
@ -35,9 +35,9 @@ impl Audio {
let nrsc_index_exists = self.path.exists(); let nrsc_index_exists = self.path.exists();
self.path.pop(); self.path.pop();
self.res = Some(if nrsc_index_exists { self.res = Some(if nrsc_index_exists {
AudioResource::Nrsc(Nrsc::new(&self.path)?) MediaResource::Nrsc(Nrsc::new(&self.path)?)
} else { } else {
AudioResource::Rsc(Rsc::new(&self.path, RSC_NAME)?) MediaResource::Rsc(Rsc::new(&self.path, RSC_NAME)?)
}); });
} }
Ok(()) Ok(())
@ -47,22 +47,22 @@ impl Audio {
self.init()?; self.init()?;
let Some(res) = self.res.as_mut() else { unreachable!() }; let Some(res) = self.res.as_mut() else { unreachable!() };
match res { match res {
AudioResource::Rsc(rsc) => rsc.get(id.parse::<u32>().map_err(|_| Error::InvalidIndex)?), MediaResource::Rsc(rsc) => rsc.get(id.parse::<u32>().map_err(|_| Error::InvalidIndex)?),
AudioResource::Nrsc(nrsc) => nrsc.get(id), MediaResource::Nrsc(nrsc) => nrsc.get(id),
} }
} }
pub fn get_by_idx(&mut self, idx: usize) -> Result<(AudioId, &[u8]), Error> { pub fn get_by_idx(&mut self, idx: usize) -> Result<(MediaId, &[u8]), Error> {
self.init()?; self.init()?;
let Some(res) = self.res.as_mut() else { unreachable!() }; let Some(res) = self.res.as_mut() else { unreachable!() };
Ok(match res { Ok(match res {
AudioResource::Rsc(rsc) => { MediaResource::Rsc(rsc) => {
let (id, page) = rsc.get_by_idx(idx)?; let (id, page) = rsc.get_by_idx(idx)?;
(AudioId::Num(id), page) (MediaId::Num(id), page)
} }
AudioResource::Nrsc(nrsc) => { MediaResource::Nrsc(nrsc) => {
let (id, page) = nrsc.get_by_idx(idx)?; let (id, page) = nrsc.get_by_idx(idx)?;
(AudioId::Str(id), page) (MediaId::Str(id), page)
} }
}) })
} }
@ -71,19 +71,19 @@ impl Audio {
self.init()?; self.init()?;
let Some(res) = self.res.as_ref() else { unreachable!() }; let Some(res) = self.res.as_ref() else { unreachable!() };
Ok(0..match res { Ok(0..match res {
AudioResource::Rsc(rsc) => rsc.len(), MediaResource::Rsc(rsc) => rsc.len(),
AudioResource::Nrsc(nrsc) => nrsc.len(), MediaResource::Nrsc(nrsc) => nrsc.len(),
}) })
} }
} }
#[derive(Debug)] #[derive(Debug)]
pub enum AudioId<'a> { pub enum MediaId<'a> {
Str(&'a str), Str(&'a str),
Num(u32), Num(u32),
} }
impl Display for AudioId<'_> { impl Display for MediaId<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Self::Str(str) => f.write_str(str), Self::Str(str) => f.write_str(str),

View file

@ -210,7 +210,7 @@ impl Nrsc {
Ok(files) Ok(files)
} }
pub fn new(path: &Path) -> Result<Self, Error> { pub(crate) fn new(path: &Path) -> Result<Self, Error> {
let files = Nrsc::files(path)?; let files = Nrsc::files(path)?;
let index = NrscIndex::new(path)?; let index = NrscIndex::new(path)?;
Ok(Nrsc { Ok(Nrsc {

View file

@ -280,7 +280,7 @@ impl Rsc {
Ok(files) Ok(files)
} }
pub fn new(path: &Path, rsc_name: &str) -> Result<Self, Error> { pub(crate) fn new(path: &Path, rsc_name: &str) -> Result<Self, Error> {
let files = Rsc::files(path, rsc_name)?; let files = Rsc::files(path, rsc_name)?;
let index = RscIndex::new(path, rsc_name)?; let index = RscIndex::new(path, rsc_name)?;
Ok(Self { Ok(Self {