Implement basic functionality (#2)

This commit is contained in:
Pyry Kontio 2022-10-11 10:50:31 +09:00 committed by GitHub
parent f7e9379eff
commit fcfbbf3c5c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 1419 additions and 3 deletions

88
Cargo.lock generated
View file

@ -2,6 +2,94 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "itoa"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4217ad341ebadf8d8e724e264f13e593e0648f5b3e94b3896a5df283be015ecc"
[[package]]
name = "mini-internal"
version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a63337614a1d280fdb2880599af563c99e9f388757f8d6515d785d85d14fb01"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "miniserde"
version = "0.1.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f4313e4a66a442473e181963daf8c1e9def85c2d9fb0bb2ae59444260b28285"
dependencies = [
"itoa",
"mini-internal",
"ryu",
]
[[package]]
name = "miniz_oxide"
version = "0.6.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b275950c28b37e794e8c55d88aeb5e139d0ce23fdbbeda68f8d7174abdf9e8fa"
dependencies = [
"adler",
]
[[package]] [[package]]
name = "monokakido" name = "monokakido"
version = "0.1.0" version = "0.1.0"
dependencies = [
"miniserde",
"miniz_oxide",
]
[[package]]
name = "proc-macro2"
version = "1.0.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94e2ef8dbfc347b10c094890f778ee2e36ca9bb4262e86dc99cd217e35f3470b"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
dependencies = [
"proc-macro2",
]
[[package]]
name = "ryu"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
[[package]]
name = "syn"
version = "1.0.102"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3fcd952facd492f9be3ef0d0b7032a6e442ee9b361d4acc2b1d0c4aaa5f613a1"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3"

View file

@ -3,6 +3,6 @@ name = "monokakido"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies] [dependencies]
miniz_oxide = { version = "0.6", default-features = false }
miniserde = "0.1"

85
src/abi.rs Normal file
View file

@ -0,0 +1,85 @@
use core::{
mem::{align_of, size_of},
slice,
};
use crate::Error;
#[repr(transparent)]
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct LE32(u32);
impl LE32 {
pub fn read(self) -> u32 {
u32::from_le(self.0)
}
pub fn us(self) -> usize {
self.read() as usize
}
pub fn from(slice: &[u8]) -> Result<(Self, &[u8]), Error> {
if slice.len() < size_of::<LE32>() {
return Err(Error::BufferTooSmall);
}
let (le32, tail) = slice.split_at(size_of::<LE32>());
Ok((LE32(u32::from_ne_bytes(le32.try_into().unwrap())), tail))
}
}
impl From<u32> for LE32 {
fn from(value: u32) -> Self {
Self(u32::from_le(value))
}
}
unsafe impl TransmuteSafe for LE32 {}
pub(crate) unsafe trait TransmuteSafe: Default + Clone {
fn from_buf(buf: &[u8]) -> Result<(&Self, &[u8]), Error> {
if buf.len() < size_of::<Self>() {
return Err(Error::Transmute);
}
if buf.as_ptr() as usize % align_of::<Self>() != 0 {
return Err(Error::Transmute);
}
let (me, tail) = buf.split_at(size_of::<Self>());
let me = unsafe { &*(me.as_ptr() as *const Self) };
Ok((me, tail))
}
fn slice_from_buf(buf: &[u8], n: usize) -> Result<(&[Self], &[u8]), Error> {
if buf.len() < n * size_of::<Self>() {
return Err(Error::Transmute);
}
if buf.as_ptr() as usize % align_of::<Self>() != 0 {
return Err(Error::Transmute);
}
let tail = &buf[n * size_of::<Self>()..];
let us: &[Self] = unsafe { slice::from_raw_parts(buf.as_ptr() as *const Self, n) };
Ok((us, tail))
}
fn slice_as_bytes_mut(slice: &mut [Self]) -> &mut [u8] {
unsafe {
slice::from_raw_parts_mut(
slice.as_mut_ptr() as *mut u8,
slice.len() * size_of::<Self>(),
)
}
}
fn slice_as_bytes(slice: &[Self]) -> &[u8] {
unsafe {
slice::from_raw_parts(slice.as_ptr() as *const u8, slice.len() * size_of::<Self>())
}
}
fn as_bytes_mut(&mut self) -> &mut [u8] {
Self::slice_as_bytes_mut(slice::from_mut(self))
}
fn as_bytes(&self) -> &[u8] {
Self::slice_as_bytes(slice::from_ref(self))
}
}

215
src/audio.rs Normal file
View file

@ -0,0 +1,215 @@
use core::{mem::size_of, ops::Not};
use std::{
ffi::OsStr,
fs::{self, File},
io::{Read, Seek, SeekFrom},
};
use miniz_oxide::inflate::core as zlib;
use crate::{abi::TransmuteSafe, decompress, dict::Paths, ContentsFile, Error};
#[derive(Debug, Clone)]
pub(crate) struct AudioIndex {
idx: Vec<AudioIdxRecord>,
ids: String, // contains null bytes as substring separators
}
mod abi {
use crate::{audio::AudioFormat, Error};
#[repr(C)]
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub(crate) struct AudioIdxRecord {
format: u16,
fileseq: u16,
id_str_offset: u32,
file_offset: u32,
len: u32,
}
impl AudioIdxRecord {
pub fn id_str_offset(&self) -> usize {
u32::from_le(self.id_str_offset) as usize
}
pub(super) fn format(&self) -> Result<AudioFormat, Error> {
match u16::from_le(self.format) {
0 => Ok(AudioFormat::Acc),
1 => Ok(AudioFormat::ZlibAcc),
_ => Err(Error::InvalidAudioFormat),
}
}
pub fn fileseq(&self) -> usize {
u16::from_le(self.fileseq) as usize
}
pub fn file_offset(&self) -> u64 {
u32::from_le(self.file_offset) as u64
}
pub fn len(&self) -> usize {
u32::from_le(self.len) as usize
}
}
#[test]
fn test_audio_index() {
use crate::audio::AudioIndex;
let air = |id_str_offset| AudioIdxRecord {
format: 0,
fileseq: 0,
id_str_offset,
file_offset: 0,
len: 0,
};
let mut audio_idx = AudioIndex {
idx: vec![air(0), air(1), air(3), air(6), air(10)],
ids: "\0a\0bb\0ccc\0dddd".to_owned(),
};
assert_eq!(audio_idx.get_id_at(0).unwrap(), "");
assert_eq!(audio_idx.get_id_at(1).unwrap(), "a");
assert_eq!(audio_idx.get_id_at(3).unwrap(), "bb");
assert_eq!(audio_idx.get_id_at(4), Err(Error::InvalidIndex));
assert_eq!(audio_idx.get_id_at(6).unwrap(), "ccc");
assert_eq!(audio_idx.get_id_at(10), Err(Error::InvalidIndex));
audio_idx.ids = "\0a\0bb\0ccc\0dddd\0".to_owned();
assert_eq!(audio_idx.get_by_id("").unwrap(), air(0));
assert_eq!(audio_idx.get_by_id("a").unwrap(), air(1));
assert_eq!(audio_idx.get_by_id("bb").unwrap(), air(3));
assert_eq!(audio_idx.get_by_id("ccc").unwrap(), air(6));
assert_eq!(audio_idx.get_by_id("dddd").unwrap(), air(10));
assert_eq!(audio_idx.get_by_id("ddd"), Err(Error::NotFound));
}
}
pub(crate) use abi::AudioIdxRecord;
enum AudioFormat {
Acc,
ZlibAcc,
}
unsafe impl TransmuteSafe for AudioIdxRecord {}
impl AudioIndex {
pub(crate) fn new(paths: &Paths) -> Result<Self, Error> {
let mut file = File::open(paths.audio_idx_path()).map_err(|_| Error::FopenError)?;
let mut len = [0; 8];
file.read_exact(&mut len).map_err(|_| Error::IOError)?;
let len = u32::from_le_bytes(len[4..8].try_into().unwrap()) as usize;
let file_size = file.metadata().map_err(|_| Error::IOError)?.len() as usize;
let idx_expected_size = size_of::<AudioIdxRecord>() * len + 8;
let mut idx = vec![AudioIdxRecord::default(); len];
let mut ids = String::with_capacity(file_size - idx_expected_size);
file.read_exact(AudioIdxRecord::slice_as_bytes_mut(idx.as_mut_slice()))
.map_err(|_| Error::IOError)?;
file.read_to_string(&mut ids).map_err(|_| Error::IOError)?;
Ok(Self { idx, ids })
}
fn get_id_at(&self, offset: usize) -> Result<&str, Error> {
let offset = offset - (size_of::<AudioIdxRecord>() * self.idx.len() + 8);
if offset > 0 && &self.ids[offset - 1..offset] != "\0" {
return Err(Error::InvalidIndex);
}
let tail = &self.ids[offset..];
let len = tail.find('\0').ok_or(Error::InvalidIndex)?;
Ok(&tail[..len])
}
pub fn get_by_id(&self, id: &str) -> Result<AudioIdxRecord, Error> {
let mut idx_err = Ok(());
let i = self
.idx
.binary_search_by_key(&id, |idx| match self.get_id_at(idx.id_str_offset()) {
Ok(ok) => ok,
Err(err) => {
idx_err = Err(err);
""
}
})
.map_err(|_| Error::NotFound)?;
idx_err?;
Ok(self.idx[i])
}
}
pub struct Audio {
index: AudioIndex,
audio: Vec<ContentsFile>,
read_buf: Vec<u8>,
decomp_buf: Vec<u8>,
zlib_state: zlib::DecompressorOxide,
}
impl Audio {
fn parse_fname(fname: &OsStr) -> Option<u32> {
let fname = fname.to_str()?;
if fname.ends_with(".nrsc").not() {
return None;
}
u32::from_str_radix(&fname[..5], 10).ok()
}
pub(crate) fn new(paths: &Paths) -> Result<Self, Error> {
let mut audio = Vec::new();
for entry in fs::read_dir(&paths.audio_path()).map_err(|_| Error::IOError)? {
let entry = entry.map_err(|_| Error::IOError)?;
let seqnum = Audio::parse_fname(&entry.file_name());
if let Some(seqnum) = seqnum {
audio.push(ContentsFile {
seqnum,
len: entry.metadata().map_err(|_| Error::IOError)?.len() as usize,
offset: 0,
file: File::open(entry.path()).map_err(|_| Error::IOError)?,
});
}
}
audio.sort_by_key(|f| f.seqnum);
if Some(audio.len()) != audio.last().map(|a| a.seqnum as usize + 1) {
return Err(Error::NoContentFilesFound);
}
let index = AudioIndex::new(&paths)?;
Ok(Audio {
index,
audio,
read_buf: Vec::new(),
decomp_buf: Vec::new(),
zlib_state: zlib::DecompressorOxide::new(),
})
}
fn get_by_idx(&mut self, idx: AudioIdxRecord) -> Result<&[u8], Error> {
let file = &mut self.audio[idx.fileseq() as usize];
file.file
.seek(SeekFrom::Start(idx.file_offset()))
.map_err(|_| Error::IOError)?;
if self.read_buf.len() < idx.len() {
self.read_buf.resize(idx.len(), 0);
}
file.file
.read_exact(&mut self.read_buf[..idx.len()])
.map_err(|_| Error::IOError)?;
match idx.format()? {
AudioFormat::Acc => Ok(&self.read_buf[..idx.len()]),
AudioFormat::ZlibAcc => {
let n_out = decompress(
&mut self.zlib_state,
&self.read_buf[..idx.len()],
&mut self.decomp_buf,
)?;
Ok(&self.decomp_buf[..n_out])
}
}
}
pub fn get(&mut self, id: &str) -> Result<&[u8], Error> {
self.get_by_idx(self.index.get_by_id(id)?)
}
}

155
src/dict.rs Normal file
View file

@ -0,0 +1,155 @@
use miniserde::{json, Deserialize};
use std::{
ffi::OsStr,
fs,
ops::Not,
path::{Path, PathBuf},
};
use crate::{audio::Audio, key::Keys, pages::Pages, Error};
pub struct MonokakidoDict {
paths: Paths,
pub pages: Pages,
pub audio: Audio,
pub keys: Keys,
}
#[derive(Deserialize, Debug)]
struct DictJson {
#[serde(rename = "DSProductContents")]
contents: Vec<DSProductContents>,
}
#[derive(Deserialize, Debug)]
struct DSProductContents {
#[serde(rename = "DSContentDirectory")]
dir: String,
}
pub(crate) struct Paths {
base_path: PathBuf,
name: String,
contents_dir: String,
}
impl Paths {
fn std_list_path() -> PathBuf {
PathBuf::from(
"/Library/Application Support/AppStoreContent/jp.monokakido.Dictionaries/Products/",
)
}
fn std_dict_path(name: &str) -> PathBuf {
let mut path = Paths::std_list_path();
path.push(format!("jp.monokakido.Dictionaries.{name}"));
path
}
fn json_path(path: &Path, name: &str) -> PathBuf {
let mut pb = PathBuf::from(path);
pb.push("Contents");
pb.push(format!("{name}.json"));
pb
}
pub(crate) fn contents_path(&self) -> PathBuf {
let mut pb = PathBuf::from(&self.base_path);
pb.push("Contents");
pb.push(&self.contents_dir);
pb.push("contents");
pb
}
pub(crate) fn audio_path(&self) -> PathBuf {
let mut pb = PathBuf::from(&self.base_path);
pb.push("Contents");
pb.push(&self.contents_dir);
pb.push("audio");
pb
}
pub(crate) fn contents_idx_path(&self) -> PathBuf {
let mut pb = self.contents_path();
pb.push("contents.idx");
pb
}
pub(crate) fn contents_map_path(&self) -> PathBuf {
let mut pb = self.contents_path();
pb.push("contents.map");
pb
}
pub(crate) fn audio_idx_path(&self) -> PathBuf {
let mut pb = self.audio_path();
pb.push("index.nidx");
pb
}
pub(crate) fn key_path(&self) -> PathBuf {
let mut pb = PathBuf::from(&self.base_path);
pb.push("Contents");
pb.push(&self.contents_dir);
pb.push("key");
pb
}
pub(crate) fn headword_key_path(&self) -> PathBuf {
let mut pb = self.key_path();
pb.push("headword.keystore");
pb
}
}
fn parse_dict_name(fname: &OsStr) -> Option<&str> {
let fname = fname.to_str()?;
if fname.starts_with("jp.monokakido.Dictionaries.").not() {
return None;
}
Some(&fname[27..])
}
impl MonokakidoDict {
pub fn list() -> Result<impl Iterator<Item = Result<String, Error>>, Error> {
let iter = fs::read_dir(&Paths::std_list_path()).map_err(|_| Error::IOError)?;
Ok(iter.filter_map(|entry| {
entry
.map_err(|_| Error::IOError)
.map(|e| parse_dict_name(&e.file_name()).map(ToOwned::to_owned))
.transpose()
}))
}
pub fn open(name: &str) -> Result<Self, Error> {
let std_path = Paths::std_dict_path(name);
Self::open_with_path(&std_path, name)
}
pub fn name(&self) -> &str {
&self.paths.name
}
pub fn open_with_path(path: impl Into<PathBuf>, name: &str) -> Result<Self, Error> {
let base_path = path.into();
let json_path = Paths::json_path(&base_path, name);
let json = fs::read_to_string(json_path).map_err(|_| Error::NoDictJsonFound)?;
let mut json: DictJson = json::from_str(&json).map_err(|_| Error::InvalidDictJson)?;
let contents = json.contents.pop().ok_or(Error::InvalidDictJson)?;
let paths = Paths {
base_path,
name: name.to_owned(),
contents_dir: contents.dir,
};
let pages = Pages::new(&paths)?;
let audio = Audio::new(&paths)?;
let keys = Keys::new(&paths)?;
Ok(MonokakidoDict {
paths,
pages,
audio,
keys,
})
}
}

35
src/error.rs Normal file
View file

@ -0,0 +1,35 @@
use std::{io::Error as IoError, str::Utf8Error};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Error {
Transmute,
Validate,
FopenError,
FstatError,
MmapError,
ZlibError,
Utf8Error,
RecordTooLarge,
IncorrectStreamLength,
BufferTooSmall,
IndexMismach,
NotFound,
NoDictJsonFound,
InvalidDictJson,
IOError,
NoContentFilesFound,
InvalidIndex,
InvalidAudioFormat,
}
impl From<IoError> for Error {
fn from(_: IoError) -> Self {
Error::IOError
}
}
impl From<Utf8Error> for Error {
fn from(_: Utf8Error) -> Self {
Error::Utf8Error
}
}

249
src/key.rs Normal file
View file

@ -0,0 +1,249 @@
use std::{
fs::File,
io::{Read, Seek},
mem::size_of,
str::from_utf8,
};
use crate::{
abi::{TransmuteSafe, LE32},
dict::Paths,
Error,
};
mod abi {
use super::*;
#[repr(C)]
#[derive(Debug, Clone, Copy, Default)]
pub(super) struct FileHeader {
magic1: LE32,
magic2: LE32,
pub words_offset: LE32,
pub idx_offset: LE32,
magic3: LE32,
magic4: LE32,
magic5: LE32,
magic6: LE32,
}
impl FileHeader {
pub(super) fn validate(&self) -> Result<(), Error> {
if self.magic1.read() == 0x20000
&& self.magic2.read() == 0
&& self.magic3.read() == 0
&& self.magic4.read() == 0
&& self.magic5.read() == 0
&& self.magic6.read() == 0
&& self.words_offset.us() < self.idx_offset.us()
{
Ok(())
} else {
Err(Error::Validate)
}
}
}
unsafe impl TransmuteSafe for FileHeader {}
#[repr(C)]
#[derive(Debug, Clone, Copy, Default)]
pub(super) struct IndexHeader {
magic1: LE32,
pub index_a_offset: LE32,
pub index_b_offset: LE32,
pub index_c_offset: LE32,
pub index_d_offset: LE32,
}
impl IndexHeader {
pub(super) fn validate(&self, file_end: usize) -> Result<(), Error> {
if self.magic1.read() == 0x04
&& self.index_a_offset.us() < self.index_b_offset.us()
&& self.index_b_offset.us() < self.index_c_offset.us()
&& self.index_c_offset.us() < self.index_d_offset.us()
&& self.index_d_offset.us() < file_end
{
Ok(())
} else {
Err(Error::Validate)
}
}
}
unsafe impl TransmuteSafe for IndexHeader {}
}
use abi::{FileHeader, IndexHeader};
pub struct Keys {
words: Vec<LE32>,
index_a: Vec<LE32>,
index_b: Vec<LE32>,
index_c: Vec<LE32>,
index_d: Vec<LE32>,
}
impl Keys {
fn read_vec(file: &mut File, start: usize, end: usize) -> Result<Vec<LE32>, Error> {
let size = (end - start + size_of::<LE32>() - 1) / size_of::<LE32>();
let mut buf = vec![LE32::default(); size];
file.read_exact(LE32::slice_as_bytes_mut(&mut buf))?;
Ok(buf)
}
fn check_vec_len(buf: &Vec<LE32>) -> Result<(), Error> {
if buf.get(0).ok_or(Error::InvalidIndex)?.us() + 1 != buf.len() {
return Err(Error::InvalidIndex);
}
Ok(())
}
pub(crate) fn new(paths: &Paths) -> Result<Keys, Error> {
let mut file = File::open(paths.headword_key_path())?;
let file_size = file.metadata()?.len() as usize;
let mut hdr = FileHeader::default();
file.read_exact(hdr.as_bytes_mut())?;
hdr.validate()?;
file.seek(std::io::SeekFrom::Start(hdr.words_offset.read() as u64))?;
let words = Self::read_vec(&mut file, hdr.words_offset.us(), hdr.idx_offset.us())?;
if words.get(0).ok_or(Error::InvalidIndex)?.us() + 1 >= words.len() {
return Err(Error::InvalidIndex);
}
let file_end = file_size - hdr.idx_offset.us();
let mut ihdr = IndexHeader::default();
file.seek(std::io::SeekFrom::Start(hdr.idx_offset.read() as u64))?;
file.read_exact(ihdr.as_bytes_mut())?;
ihdr.validate(file_end)?;
let index_a = Self::read_vec(
&mut file,
ihdr.index_a_offset.us(),
ihdr.index_b_offset.us(),
)?;
Self::check_vec_len(&index_a)?;
let index_b = Self::read_vec(
&mut file,
ihdr.index_b_offset.us(),
ihdr.index_c_offset.us(),
)?;
Self::check_vec_len(&index_b)?;
let index_c = Self::read_vec(
&mut file,
ihdr.index_c_offset.us(),
ihdr.index_d_offset.us(),
)?;
Self::check_vec_len(&index_c)?;
let index_d = Self::read_vec(&mut file, ihdr.index_d_offset.us(), file_end)?;
Self::check_vec_len(&index_d)?;
Ok(Keys {
words,
index_a,
index_b,
index_c,
index_d,
})
}
pub fn count(&self) -> usize {
// USE INVARIANT A
self.words[0].us()
}
fn get_page_iter(&self, pages_offset: usize) -> Result<PageIter, Error> {
let pages = &LE32::slice_as_bytes(&self.words)[pages_offset..];
PageIter::new(pages)
}
pub(crate) fn get_word_span(&self, offset: usize) -> Result<(&str, usize), Error> {
let words_bytes = LE32::slice_as_bytes(&self.words);
if words_bytes.len() < offset + 2 * size_of::<LE32>() {
return Err(Error::InvalidIndex);
}
let (pages_offset, word_bytes) = LE32::from(&words_bytes[offset..])?;
if let Some(word) = word_bytes[1..].split(|b| *b == b'\0').next() {
Ok((from_utf8(word)?, pages_offset.us()))
} else {
Err(Error::InvalidIndex)
}
}
fn get_inner(&self, index: &[LE32], idx: usize) -> Result<(&str, PageIter<'_>), Error> {
if idx >= self.count() {
return Err(Error::NotFound);
}
let word_offset = index[idx + 1].us();
let (word, pages_offset) = self.get_word_span(word_offset)?;
let pages = self.get_page_iter(pages_offset)?;
Ok((word, pages))
}
pub fn get_index_a(&self, idx: usize) -> Result<(&str, PageIter<'_>), Error> {
self.get_inner(&self.index_a, idx)
}
pub fn get_index_b(&self, idx: usize) -> Result<(&str, PageIter<'_>), Error> {
self.get_inner(&self.index_b, idx)
}
pub fn get_index_c(&self, idx: usize) -> Result<(&str, PageIter<'_>), Error> {
self.get_inner(&self.index_c, idx)
}
pub fn get_index_d(&self, idx: usize) -> Result<(&str, PageIter<'_>), Error> {
self.get_inner(&self.index_d, idx)
}
}
#[derive(Debug, Clone)]
pub struct PageIter<'a> {
count: u16,
span: &'a [u8],
}
impl<'a> PageIter<'a> {
fn new(pages: &'a [u8]) -> Result<Self, Error> {
let (count, pages) = pages.split_at(2);
let count = u16::from_le_bytes(count.try_into().unwrap());
// CHECK INVARIANT B: loop through `count` times and check that the shape is of expected
let mut tail = pages;
for _ in 0..count {
match tail {
&[2, _, _, ref t @ ..] => tail = t,
&[4, _, _, _, ref t @ ..] => tail = t,
_ => return Err(Error::InvalidIndex),
}
}
let span_len = pages.len() - tail.len();
Ok(PageIter {
span: &pages[..span_len],
count,
})
}
}
impl<'a> Iterator for PageIter<'a> {
type Item = u32;
fn next(&mut self) -> Option<Self::Item> {
// USE INVARIANT B: `self.span` is checked to conform to this shape,
// so unreachable is never reached. `self.count` is also checked to correspond,
// so overflow never happens.
let (id, tail) = match self.span {
&[2, hi, lo, ref tail @ ..] => (u32::from_be_bytes([0, 0, hi, lo]), tail),
&[4, hi, mid, lo, ref tail @ ..] => (u32::from_be_bytes([0, hi, mid, lo]), tail),
&[] => return None,
_ => unreachable!(),
};
self.count -= 1;
self.span = tail;
Some(id)
}
}

61
src/lib.rs Normal file
View file

@ -0,0 +1,61 @@
use std::fs;
use miniz_oxide::inflate::{core as zlib, TINFLStatus as ZStatus};
mod abi;
mod audio;
mod dict;
mod error;
mod key;
mod pages;
pub use dict::MonokakidoDict;
pub use error::Error;
pub use pages::Pages;
pub use audio::Audio;
pub use key::Keys;
fn decompress(
zlib_state: &mut zlib::DecompressorOxide,
in_buf: &[u8],
out_buf: &mut Vec<u8>,
) -> Result<usize, Error> {
use zlib::inflate_flags as flg;
use ZStatus::{Done, HasMoreOutput};
let flags = flg::TINFL_FLAG_PARSE_ZLIB_HEADER | flg::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
let mut n_in_total = 0;
let mut n_out_total = 0;
zlib_state.init();
loop {
let (status, n_in, n_out) = zlib::decompress(
zlib_state,
&in_buf[n_in_total..],
out_buf,
n_out_total,
flags,
);
n_out_total += n_out;
n_in_total += n_in;
match status {
HasMoreOutput => {
out_buf.resize(out_buf.len() * 2 + 1, 0);
continue;
}
Done => break,
_ => return Err(Error::ZlibError),
}
}
if n_in_total != in_buf.len() {
return Err(Error::IncorrectStreamLength);
}
Ok(n_out_total)
}
#[derive(Debug)]
struct ContentsFile {
seqnum: u32,
len: usize,
offset: usize,
file: fs::File,
}

View file

@ -1,3 +1,87 @@
use monokakido::MonokakidoDict;
fn main() { fn main() {
println!("Hello, world!"); /*
for dict in MonokakidoDict::list().unwrap() {
dbg!(dict.unwrap());
}
*/
let mut dict = MonokakidoDict::open("NHKACCENT2").unwrap();
let idx_list = [
0,
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
46200,
46201,
46202,
46203,
46204,
46205,
46206,
46207,
46208,
46209,
46210,
46211,
70000,
dict.keys.count() - 1,
];
println!("Index: length order");
for idx in idx_list {
let (word, pages) = dict.keys.get_index_a(idx).unwrap();
println!("\n{}", word);
for id in pages {
println!("{}", dict.pages.get(id).unwrap());
}
}
println!("Index: prefix order");
for idx in idx_list {
let (word, pages) = dict.keys.get_index_b(idx).unwrap();
println!("\n{}", word);
for id in pages {
println!("{}", dict.pages.get(id).unwrap());
}
}
println!("Index: suffix order");
for idx in idx_list {
let (word, pages) = dict.keys.get_index_c(idx).unwrap();
println!("\n{}", word);
for id in pages {
println!("{}", dict.pages.get(id).unwrap());
}
}
println!("Index: ?");
for idx in idx_list {
let (word, pages) = dict.keys.get_index_d(idx).unwrap();
println!("\n{}", word);
for id in pages {
println!("{}", dict.pages.get(id).unwrap());
}
}
//let mut stdout = stdout().lock();
//stdout.write_all(audio).unwrap();
} }

444
src/pages.rs Normal file
View file

@ -0,0 +1,444 @@
use core::{cmp::min, mem::size_of, ops::Not};
use miniz_oxide::inflate::core as zlib;
use std::{
ffi::OsStr,
fs::{self, File},
io::{Read, Seek, SeekFrom},
};
use crate::{
abi::{TransmuteSafe, LE32},
decompress,
dict::Paths,
ContentsFile, Error,
};
mod abi {
use crate::abi::LE32;
#[repr(C)]
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct TextIdxRecord {
pub dic_item_id: LE32,
pub map_idx: LE32,
}
#[repr(C)]
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct TextMapRecord {
pub zoffset: LE32,
pub ioffset: LE32,
}
#[test]
fn test_get_by_id() {
use crate::{pages::PageIndex, Error};
fn idx(id: u32, idx: u32) -> TextIdxRecord {
TextIdxRecord {
dic_item_id: id.into(),
map_idx: idx.into(),
}
}
fn map(z: u32, i: u32) -> TextMapRecord {
TextMapRecord {
zoffset: z.into(),
ioffset: i.into(),
}
}
assert_eq!(
PageIndex {
idx: vec![],
map: vec![],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
PageIndex {
idx: vec![idx(1, 0)],
map: vec![map(0, 0)],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
PageIndex {
idx: vec![idx(1, 0), idx(2, 1)],
map: vec![map(0, 0), map(0, 10)],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
PageIndex {
idx: vec![idx(1, 0), idx(2, 1), idx(1000, 2)],
map: vec![map(0, 0), map(0, 10), map(0, 20)],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
PageIndex {
idx: vec![idx(1, 0), idx(2, 1), idx(500, 2), idx(1000, 3)],
map: vec![map(0, 0), map(0, 10), map(0, 20), map(10, 0)],
}
.get_by_id(500),
Ok(map(0, 20))
);
assert_eq!(
PageIndex {
idx: vec![
idx(1, 0),
idx(2, 1),
idx(499, 2),
idx(500, 3),
idx(501, 4),
idx(1000, 5)
],
map: vec![
map(0, 0),
map(0, 10),
map(0, 20),
map(10, 0),
map(10, 0),
map(10, 0)
],
}
.get_by_id(500),
Ok(map(10, 0))
);
}
}
pub(crate) use abi::{TextIdxRecord, TextMapRecord};
#[derive(Debug, Clone)]
pub(crate) struct PageIndex {
idx: Vec<TextIdxRecord>,
map: Vec<TextMapRecord>,
}
unsafe impl TransmuteSafe for TextMapRecord {}
unsafe impl TransmuteSafe for TextIdxRecord {}
impl PageIndex {
pub(crate) fn new(paths: &Paths) -> Result<Self, Error> {
let mut idx_file = File::open(paths.contents_idx_path())?;
let mut map_file = File::open(paths.contents_map_path())?;
let mut len = [0; 4];
idx_file.read_exact(&mut len)?;
let len = u32::from_le_bytes(len) as usize;
idx_file.seek(SeekFrom::Start(8))?;
map_file.seek(SeekFrom::Start(8))?;
let idx_size = idx_file.metadata().map_err(|_| Error::IOError)?.len();
let map_size = map_file.metadata().map_err(|_| Error::IOError)?.len();
let idx_expected_size = (size_of::<TextIdxRecord>() * len + 8) as u64;
let map_expected_size = (size_of::<TextMapRecord>() * len + 8) as u64;
if idx_size != idx_expected_size || map_size != map_expected_size {
return Err(Error::IncorrectStreamLength);
}
let mut idx = vec![TextIdxRecord::default(); len];
let mut map = vec![TextMapRecord::default(); len];
idx_file
.read_exact(TextIdxRecord::slice_as_bytes_mut(idx.as_mut_slice()))
.map_err(|_| Error::IOError)?;
map_file
.read_exact(TextMapRecord::slice_as_bytes_mut(map.as_mut_slice()))
.map_err(|_| Error::IOError)?;
Ok(PageIndex { idx, map })
}
fn get_idx_by_id(&self, id: u32) -> Option<usize> {
if self.idx.is_empty() {
return None;
}
// Let's guess first, since usually the IDs are completely predictable, without gaps.
let idx_list = self.idx.as_slice();
let idx = min(id as usize, idx_list.len() - 1);
let guess = idx_list[idx].dic_item_id.read();
if id == guess {
return Some(idx);
}
let idx = min(id.saturating_sub(1) as usize, idx_list.len() - 1);
let guess = idx_list[idx].dic_item_id.read();
if id == guess {
return Some(idx);
}
return idx_list
.binary_search_by_key(&id, |r| r.dic_item_id.read())
.ok();
}
pub fn get_by_id(&self, id: u32) -> Result<TextMapRecord, Error> {
if let Some(idx) = self.get_idx_by_id(id) {
let record = self.map[self.idx[idx].map_idx.us()];
Ok(record)
} else {
Err(Error::NotFound)
}
}
}
pub struct Pages {
index: PageIndex,
contents: Vec<ContentsFile>,
zlib_buf: Vec<u8>,
zlib_state: zlib::DecompressorOxide,
contents_buf: Vec<u8>,
current_offset: usize,
current_len: usize,
}
impl Pages {
fn parse_fname(fname: &OsStr) -> Option<u32> {
let fname = fname.to_str()?;
if (fname.starts_with("contents-") && fname.ends_with(".rsc")).not() {
return None;
}
u32::from_str_radix(&fname[9..13], 10).ok()
}
pub(crate) fn new(paths: &Paths) -> Result<Self, Error> {
let mut contents = Vec::new();
for entry in fs::read_dir(&paths.contents_path()).map_err(|_| Error::IOError)? {
let entry = entry.map_err(|_| Error::IOError)?;
let seqnum = Pages::parse_fname(&entry.file_name());
if let Some(seqnum) = seqnum {
contents.push(ContentsFile {
seqnum,
len: entry.metadata().map_err(|_| Error::IOError)?.len() as usize,
offset: 0,
file: File::open(entry.path()).map_err(|_| Error::IOError)?,
});
}
}
contents.sort_by_key(|f| f.seqnum);
let mut offset = 0;
for (i, cf) in contents.iter_mut().enumerate() {
if cf.seqnum != i as u32 + 1 {
return Err(Error::NoContentFilesFound);
}
cf.offset = offset;
offset += cf.len;
}
let index = PageIndex::new(&paths)?;
Ok(Pages {
index,
contents,
zlib_buf: Vec::new(),
zlib_state: zlib::DecompressorOxide::new(),
contents_buf: Vec::new(),
current_offset: 0,
current_len: 0,
})
}
fn load_contents(&mut self, zoffset: usize) -> Result<(), Error> {
let (file, file_offset) = file_offset(&mut self.contents, zoffset)?;
let mut len = [0_u8; 4];
file.seek(SeekFrom::Start(file_offset))
.map_err(|_| Error::IOError)?;
file.read_exact(&mut len).map_err(|_| Error::IOError)?;
let len = u32::from_le_bytes(len) as usize;
if self.zlib_buf.len() < len {
self.zlib_buf.resize(len, 0);
}
file.read_exact(&mut self.zlib_buf[..len])
.map_err(|_| Error::IOError)?;
let n_out = decompress(
&mut self.zlib_state,
&self.zlib_buf[..len],
&mut self.contents_buf,
)?;
self.current_len = n_out;
self.current_offset = zoffset;
Ok(())
}
pub fn get(&mut self, id: u32) -> Result<&str, Error> {
self.get_by_idx(self.index.get_by_id(id)?)
}
fn get_by_idx(&mut self, idx: TextMapRecord) -> Result<&str, Error> {
if self.contents_buf.is_empty() || idx.zoffset.us() != self.current_offset {
self.load_contents(idx.zoffset.us())?;
}
let contents = &self.contents_buf[idx.ioffset.us()..self.current_len];
let (len, contents_tail) = LE32::from(contents)?;
Ok(std::str::from_utf8(&contents_tail[..len.us()]).map_err(|_| Error::Utf8Error)?)
}
}
fn file_offset(contents: &mut [ContentsFile], offset: usize) -> Result<(&mut File, u64), Error> {
let file_idx = contents
.binary_search_by(|cf| cmp_range(offset, cf.offset..cf.offset + cf.len).reverse())
.map_err(|_| Error::InvalidIndex)?;
let cf = &mut contents[file_idx];
let file = &mut cf.file;
let file_offset = (offset - cf.offset) as u64;
Ok((file, file_offset))
}
#[test]
fn test_file_offset() {
use std::os::unix::prelude::AsRawFd;
assert_eq!(file_offset(&mut [], 0).err(), Some(Error::InvalidIndex));
let mock_file = || {
let f = File::open("/dev/zero").unwrap();
let fd = f.as_raw_fd();
(f, fd)
};
let (f1, f1_fd) = mock_file();
let one_file = &mut vec![ContentsFile {
seqnum: 1,
len: 100,
offset: 0,
file: f1,
}];
let result = file_offset(one_file, 101);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(one_file, 100);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(one_file, 0);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(one_file, 99);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
let (f1, f1_fd) = mock_file();
let (f2, f2_fd) = mock_file();
let two_files = &mut vec![
ContentsFile {
seqnum: 1,
len: 100,
offset: 0,
file: f1,
},
ContentsFile {
seqnum: 2,
len: 200,
offset: 100,
file: f2,
},
];
let result = file_offset(two_files, 301);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(two_files, 300);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(two_files, 0);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(two_files, 99);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
let result = file_offset(two_files, 100);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(two_files, 299);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(199));
let (f1, f1_fd) = mock_file();
let (f2, f2_fd) = mock_file();
let (f3, f3_fd) = mock_file();
let three_files = &mut vec![
ContentsFile {
seqnum: 1,
len: 100,
offset: 0,
file: f1,
},
ContentsFile {
seqnum: 2,
len: 200,
offset: 100,
file: f2,
},
ContentsFile {
seqnum: 3,
len: 100,
offset: 300,
file: f3,
},
];
let result = file_offset(three_files, 401);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(three_files, 400);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(three_files, 0);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(three_files, 99);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
let result = file_offset(three_files, 100);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(three_files, 299);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(199));
let result = file_offset(three_files, 300);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f3_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(three_files, 399);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f3_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
}
fn cmp_range(num: usize, range: core::ops::Range<usize>) -> core::cmp::Ordering {
use core::cmp::Ordering;
if num < range.start {
Ordering::Less
} else if range.end <= num {
Ordering::Greater
} else {
Ordering::Equal
}
}
#[test]
fn test_cmp_to_range() {
use core::cmp::Ordering;
assert_eq!(cmp_range(0, 0..0), Ordering::Greater);
assert_eq!(cmp_range(0, 0..1), Ordering::Equal);
assert_eq!(cmp_range(0, 0..100), Ordering::Equal);
assert_eq!(cmp_range(1, 0..100), Ordering::Equal);
assert_eq!(cmp_range(99, 0..100), Ordering::Equal);
assert_eq!(cmp_range(100, 0..100), Ordering::Greater);
assert_eq!(cmp_range(101, 0..100), Ordering::Greater);
assert_eq!(cmp_range(0, 1..100), Ordering::Less);
assert_eq!(cmp_range(99, 100..100), Ordering::Less);
assert_eq!(cmp_range(100, 100..100), Ordering::Greater);
}