Refactored rsc and nrsc into their own implementations, started implementing explode functionality.

This commit is contained in:
Pyry Kontio 2023-01-31 04:54:26 +09:00
parent dac55a38c9
commit 5287f1b493
No known key found for this signature in database
13 changed files with 1182 additions and 840 deletions

View file

@ -1,3 +1,24 @@
# monokakido.rs
A Rust library for parsing and interpreting the [Monokakido](https://www.monokakido.jp/en/dictionaries/app/) dictionary format. Aiming for full test coverage and efficient implementation with minimal dependencies.
## TODO:
- Refactor code for generic "rsc" and "nrsc" support
- Audio using "rsc" (CCCAD, WISDOM3)
- Audio using "nrsc" (DAIJISEN2, NHKACCENT2, OALD10, OLDAE, OLEX, OLT, RHEJ, SMK8)
- Multiple contents (WISDOM3, OLEX)
- Document the rsc, nrsc and keystore formats
- Split main.rs into "dict exploder" and "dict cli"
## Planned to support:
- WISDOM3
- SMK8
- RHEJ
- OLT
- OLEX
- OLDAE
- OCD
- OALD10
- NHKACCENT2
- DAIJISEN2
- CCCAD

View file

@ -1,226 +1,85 @@
use core::{mem::size_of, ops::Not};
use std::{
ffi::OsStr,
fs::{self, File},
io::{Read, Seek, SeekFrom},
use std::{path::PathBuf, ops::Range};
use crate::{
dict::Paths,
resource::{Nrsc, Rsc},
Error,
};
use miniz_oxide::inflate::core as zlib;
use crate::{abi::TransmuteSafe, decompress, dict::Paths, ContentsFile, Error};
#[derive(Debug, Clone)]
pub(crate) struct AudioIndex {
idx: Vec<AudioIdxRecord>,
ids: String, // contains null bytes as substring separators
}
mod abi {
use std::mem::size_of;
use crate::{audio::AudioFormat, Error};
#[repr(C)]
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub(crate) struct AudioIdxRecord {
format: u16,
fileseq: u16,
id_str_offset: u32,
file_offset: u32,
len: u32,
}
impl AudioIdxRecord {
pub fn id_str_offset(&self) -> usize {
u32::from_le(self.id_str_offset) as usize
}
pub(super) fn format(&self) -> Result<AudioFormat, Error> {
match u16::from_le(self.format) {
0 => Ok(AudioFormat::Acc),
1 => Ok(AudioFormat::ZlibAcc),
_ => Err(Error::InvalidAudioFormat),
}
}
pub fn fileseq(&self) -> usize {
u16::from_le(self.fileseq) as usize
}
pub fn file_offset(&self) -> u64 {
u32::from_le(self.file_offset) as u64
}
pub fn len(&self) -> usize {
u32::from_le(self.len) as usize
}
}
#[test]
fn test_audio_index() {
use crate::audio::AudioIndex;
let air = |id_str_offset| AudioIdxRecord {
format: 0,
fileseq: 0,
id_str_offset,
file_offset: 0,
len: 0,
};
let mut audio_idx = AudioIndex {
idx: vec![air(0), air(1), air(3), air(6), air(10)],
ids: "\0a\0bb\0ccc\0dddd".to_owned(),
};
let diff = 8 + audio_idx.idx.len() * size_of::<AudioIdxRecord>();
// Fix offsets now that they are known
for air in audio_idx.idx.iter_mut() {
air.id_str_offset += diff as u32;
}
dbg!(&audio_idx);
assert_eq!(audio_idx.get_id_at(diff + 0).unwrap(), "");
assert_eq!(audio_idx.get_id_at(diff + 1).unwrap(), "a");
assert_eq!(audio_idx.get_id_at(diff + 3).unwrap(), "bb");
assert_eq!(audio_idx.get_id_at(diff + 4), Err(Error::InvalidIndex));
assert_eq!(audio_idx.get_id_at(diff + 6).unwrap(), "ccc");
assert_eq!(audio_idx.get_id_at(diff + 10), Err(Error::InvalidIndex));
audio_idx.ids = "\0a\0bb\0ccc\0dddd\0".to_owned();
let diff = diff as u32;
assert_eq!(audio_idx.get_by_id("").unwrap(), air(diff + 0));
assert_eq!(audio_idx.get_by_id("a").unwrap(), air(diff + 1));
assert_eq!(audio_idx.get_by_id("bb").unwrap(), air(diff + 3));
assert_eq!(audio_idx.get_by_id("ccc").unwrap(), air(diff + 6));
assert_eq!(audio_idx.get_by_id("dddd").unwrap(), air(diff + 10));
assert_eq!(audio_idx.get_by_id("ddd"), Err(Error::NotFound));
}
}
pub(crate) use abi::AudioIdxRecord;
enum AudioFormat {
Acc,
ZlibAcc,
}
unsafe impl TransmuteSafe for AudioIdxRecord {}
impl AudioIndex {
pub(crate) fn new(paths: &Paths) -> Result<Self, Error> {
let mut file = File::open(paths.audio_idx_path()).map_err(|_| Error::FopenError)?;
let mut len = [0; 8];
file.read_exact(&mut len).map_err(|_| Error::IOError)?;
let len = u32::from_le_bytes(len[4..8].try_into().unwrap()) as usize;
let file_size = file.metadata().map_err(|_| Error::IOError)?.len() as usize;
let idx_expected_size = size_of::<AudioIdxRecord>() * len + 8;
let mut idx = vec![AudioIdxRecord::default(); len];
let mut ids = String::with_capacity(file_size - idx_expected_size);
file.read_exact(AudioIdxRecord::slice_as_bytes_mut(idx.as_mut_slice()))
.map_err(|_| Error::IOError)?;
file.read_to_string(&mut ids).map_err(|_| Error::IOError)?;
Ok(Self { idx, ids })
}
fn get_id_at(&self, offset: usize) -> Result<&str, Error> {
let offset = offset - (size_of::<AudioIdxRecord>() * self.idx.len() + 8);
if offset > 0 && &self.ids[offset - 1..offset] != "\0" {
return Err(Error::InvalidIndex);
}
let tail = &self.ids[offset..];
let len = tail.find('\0').ok_or(Error::InvalidIndex)?;
Ok(&tail[..len])
}
pub fn get_by_id(&self, id: &str) -> Result<AudioIdxRecord, Error> {
let mut idx_err = Ok(());
let i = self
.idx
.binary_search_by_key(&id, |idx| match self.get_id_at(idx.id_str_offset()) {
Ok(ok) => ok,
Err(err) => {
idx_err = Err(err);
""
}
})
.map_err(|_| Error::NotFound)?;
idx_err?;
Ok(self.idx[i])
}
}
const RSC_NAME: &str = "audio";
pub struct Audio {
index: AudioIndex,
audio: Vec<ContentsFile>,
read_buf: Vec<u8>,
decomp_buf: Vec<u8>,
zlib_state: zlib::DecompressorOxide,
path: PathBuf,
res: Option<AudioResource>,
}
enum AudioResource {
Rsc(Rsc),
Nrsc(Nrsc),
}
impl Audio {
fn parse_fname(fname: &OsStr) -> Option<u32> {
let fname = fname.to_str()?;
if fname.ends_with(".nrsc").not() {
return None;
}
u32::from_str_radix(&fname[..5], 10).ok()
}
pub(crate) fn new(paths: &Paths) -> Result<Self, Error> {
let mut audio = Vec::new();
for entry in fs::read_dir(&paths.audio_path()).map_err(|_| Error::IOError)? {
let entry = entry.map_err(|_| Error::IOError)?;
let seqnum = Audio::parse_fname(&entry.file_name());
if let Some(seqnum) = seqnum {
audio.push(ContentsFile {
seqnum,
len: entry.metadata().map_err(|_| Error::IOError)?.len() as usize,
offset: 0,
file: File::open(entry.path()).map_err(|_| Error::IOError)?,
});
}
}
audio.sort_by_key(|f| f.seqnum);
if Some(audio.len()) != audio.last().map(|a| a.seqnum as usize + 1) {
return Err(Error::NoContentFilesFound);
}
let index = AudioIndex::new(&paths)?;
Ok(Audio {
index,
audio,
read_buf: Vec::new(),
decomp_buf: Vec::new(),
zlib_state: zlib::DecompressorOxide::new(),
pub fn new(paths: &Paths) -> Result<Option<Self>, Error> {
let mut path = paths.contents_path();
path.push(RSC_NAME);
Ok(if path.exists() {
Some(Audio { path, res: None })
} else {
None
})
}
fn get_by_idx(&mut self, idx: AudioIdxRecord) -> Result<&[u8], Error> {
let file = &mut self.audio[idx.fileseq() as usize];
file.file
.seek(SeekFrom::Start(idx.file_offset()))
.map_err(|_| Error::IOError)?;
if self.read_buf.len() < idx.len() {
self.read_buf.resize(idx.len(), 0);
}
file.file
.read_exact(&mut self.read_buf[..idx.len()])
.map_err(|_| Error::IOError)?;
match idx.format()? {
AudioFormat::Acc => Ok(&self.read_buf[..idx.len()]),
AudioFormat::ZlibAcc => {
let n_out = decompress(
&mut self.zlib_state,
&self.read_buf[..idx.len()],
&mut self.decomp_buf,
)?;
Ok(&self.decomp_buf[..n_out])
}
pub fn init(&mut self) -> Result<(), Error> {
if self.res.is_none() {
self.path.push("index.nidx");
let nrsc_index_exists = self.path.exists();
self.path.pop();
self.res = Some(if nrsc_index_exists {
AudioResource::Nrsc(Nrsc::new(&self.path)?)
} else {
AudioResource::Rsc(Rsc::new(&self.path, RSC_NAME)?)
});
}
Ok(())
}
pub fn get(&mut self, id: &str) -> Result<&[u8], Error> {
self.get_by_idx(self.index.get_by_id(id)?)
self.init()?;
let Some(res) = self.res.as_mut() else { unreachable!() };
match res {
AudioResource::Rsc(rsc) => {
rsc.get(u32::from_str_radix(id, 10).map_err(|_| Error::InvalidIndex)?)
}
AudioResource::Nrsc(nrsc) => nrsc.get(id),
}
}
pub fn get_by_idx(&mut self, idx: usize) -> Result<(AudioId, &[u8]), Error> {
self.init()?;
let Some(res) = self.res.as_mut() else { unreachable!() };
Ok(match res {
AudioResource::Rsc(rsc) => {
let (id, page) = rsc.get_by_idx(idx)?;
(AudioId::Num(id), page)
},
AudioResource::Nrsc(nrsc) => {
let (id, page) = nrsc.get_by_idx(idx)?;
(AudioId::Str(id), page)
},
})
}
pub fn idx_iter(&mut self) -> Result<Range<usize>, Error> {
self.init()?;
let Some(res) = self.res.as_ref() else { unreachable!() };
Ok(0..match res {
AudioResource::Rsc(rsc) => rsc.len(),
AudioResource::Nrsc(nrsc) => nrsc.len(),
})
}
}
pub enum AudioId<'a> {
Str(&'a str),
Num(u32)
}

View file

@ -1,6 +1,9 @@
use std::{io::{stdout, Write}, ops::Neg};
use std::{
io::{stdout, Write},
ops::Neg,
};
use monokakido::{MonokakidoDict, Error};
use monokakido::{Error, MonokakidoDict};
fn get_first_audio_id(page: &str) -> Result<&str, Error> {
if let Some((_, sound_tail)) = page.split_once("<sound>") {
@ -25,7 +28,10 @@ fn get_first_accent(page: &str) -> Result<i8, Error> {
let endpos = pos + "<symbol_backslash></symbol_backslash>".len();
let before = &accent[..pos];
let after = &accent[endpos..];
let is_mora = |&c: &char| (matches!(c, 'ぁ'..='ん' | 'ァ'..='ン' | 'ー') && !matches!(c, 'ゃ'..='ょ' | 'ャ'..='ョ'));
let is_mora = |&c: &char| {
(matches!(c, 'ぁ'..='ん' | 'ァ'..='ン' | 'ー')
&& !matches!(c, 'ゃ'..='ょ' | 'ャ'..='ョ'))
};
return Ok((before.chars().filter(is_mora).count() as i8));
}
if let Some(_) = accent.find("<symbol_macron>━</symbol_macron>") {
@ -44,25 +50,27 @@ fn get_accents(page: &str) -> Result<(i8, Option<i8>), Error> {
}
fn main() {
let Some(key) = std::env::args().nth(1) else {
return;
};
/*
for dict in MonokakidoDict::list().unwrap() {
dbg!(dict.unwrap());
}
*/
let mut dict = MonokakidoDict::open("NHKACCENT2").unwrap();
let mut accents = vec![];
// let mut accents = vec![];
let result = dict.keys.search_exact(&key);
match result {
Ok((_, pages)) => {
for id in pages {
let page = dict.pages.get(id).unwrap();
println!("{page}");
/*
if let Ok(accent) = get_accents(page) {
accents.push(accent);
}
} */
/*
let id = get_first_audio_id(page).unwrap();
let audio = dict.audio.get(id).unwrap();
@ -70,12 +78,13 @@ fn main() {
stdout.write_all(audio).unwrap();
*/
}
},
}
Err(e) => {
println!("{:?}", e);
return;
},
}
}
/*
print!("{key}\t");
accents.sort();
accents.dedup();
@ -91,7 +100,7 @@ fn main() {
}
print!(" ");
}
}
} */
println!()
/*

49
src/bin/explode.rs Normal file
View file

@ -0,0 +1,49 @@
use std::{
fmt::Write as _,
fs::{create_dir_all, File},
io::Write,
path::Path,
};
use monokakido::{Error, MonokakidoDict};
fn explode() -> Result<(), Error> {
let arg = std::env::args().nth(1).ok_or(Error::InvalidArg)?;
let mut dict = if Path::new(&arg).exists() {
MonokakidoDict::open_with_path(Path::new(&arg))
} else {
MonokakidoDict::open(&arg)
}?;
let pages_dir = "./pages/";
create_dir_all(pages_dir)?;
let mut path = String::from(pages_dir);
for idx in dict.pages.idx_iter()? {
let (id, page) = dict.pages.get_by_idx(idx)?;
write!(&mut path, "{id:0>10}.xml")?;
let mut file = File::create(&path)?;
path.truncate(pages_dir.len());
file.write_all(page.as_bytes())?;
}
if let Some(audio) = &mut dict.audio {
let audio_dir = "./audio/";
create_dir_all(audio_dir)?;
let mut path = String::from(audio_dir);
for idx in audio.idx_iter()? {
let (id, page) = dict.pages.get_by_idx(idx)?;
write!(&mut path, "{id:0>10}.aac")?;
let mut file = File::create(&path)?;
path.truncate(pages_dir.len());
file.write_all(page.as_bytes())?;
}
}
Ok(())
}
fn main() {
if let Err(err) = explode() {
eprintln!("{err:?}");
return;
};
}

View file

@ -2,7 +2,6 @@ use miniserde::{json, Deserialize};
use std::{
ffi::OsStr,
fs,
ops::Not,
path::{Path, PathBuf},
};
@ -11,7 +10,7 @@ use crate::{audio::Audio, key::Keys, pages::Pages, Error};
pub struct MonokakidoDict {
paths: Paths,
pub pages: Pages,
pub audio: Audio,
pub audio: Option<Audio>,
pub keys: Keys,
}
@ -27,7 +26,7 @@ struct DSProductContents {
dir: String,
}
pub(crate) struct Paths {
pub struct Paths {
base_path: PathBuf,
name: String,
contents_dir: String,
@ -57,33 +56,6 @@ impl Paths {
let mut pb = PathBuf::from(&self.base_path);
pb.push("Contents");
pb.push(&self.contents_dir);
pb.push("contents");
pb
}
pub(crate) fn audio_path(&self) -> PathBuf {
let mut pb = PathBuf::from(&self.base_path);
pb.push("Contents");
pb.push(&self.contents_dir);
pb.push("audio");
pb
}
pub(crate) fn contents_idx_path(&self) -> PathBuf {
let mut pb = self.contents_path();
pb.push("contents.idx");
pb
}
pub(crate) fn contents_map_path(&self) -> PathBuf {
let mut pb = self.contents_path();
pb.push("contents.map");
pb
}
pub(crate) fn audio_idx_path(&self) -> PathBuf {
let mut pb = self.audio_path();
pb.push("index.nidx");
pb
}
@ -104,10 +76,12 @@ impl Paths {
fn parse_dict_name(fname: &OsStr) -> Option<&str> {
let fname = fname.to_str()?;
if fname.starts_with("jp.monokakido.Dictionaries.").not() {
return None;
let dict_prefix = "jp.monokakido.Dictionaries.";
if fname.starts_with(dict_prefix) {
Some(&fname[dict_prefix.len()..])
} else {
None
}
Some(&fname[27..])
}
impl MonokakidoDict {
@ -123,14 +97,23 @@ impl MonokakidoDict {
pub fn open(name: &str) -> Result<Self, Error> {
let std_path = Paths::std_dict_path(name);
Self::open_with_path(&std_path, name)
Self::open_with_path_name(&std_path, name)
}
pub fn name(&self) -> &str {
&self.paths.name
}
pub fn open_with_path(path: impl Into<PathBuf>, name: &str) -> Result<Self, Error> {
pub fn open_with_path(path: impl Into<PathBuf>) -> Result<Self, Error> {
let path: PathBuf = path.into();
let dir_name = path.file_name().ok_or(Error::FopenError)?.to_string_lossy();
let dict_name = dir_name.rsplit_once(".").ok_or(Error::FopenError)?.0;
Self::open_with_path_name(&path, dict_name)
}
fn open_with_path_name(path: impl Into<PathBuf>, name: &str) -> Result<Self, Error> {
let base_path = path.into();
let json_path = Paths::json_path(&base_path, name);
let json = fs::read_to_string(json_path).map_err(|_| Error::NoDictJsonFound)?;

View file

@ -1,9 +1,10 @@
use std::{io::Error as IoError, str::Utf8Error};
use std::{fmt::Error as FmtError, io::Error as IoError, str::Utf8Error};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Error {
Transmute,
Validate,
KeyIndexHeaderValidate,
KeyFileHeaderValidate,
FopenError,
FstatError,
MmapError,
@ -17,9 +18,12 @@ pub enum Error {
NoDictJsonFound,
InvalidDictJson,
IOError,
NoContentFilesFound,
MissingResourceFile,
InvalidIndex,
InvalidAudioFormat,
InvalidArg,
FmtError,
IndexDoesntExist,
}
impl From<IoError> for Error {
@ -33,3 +37,9 @@ impl From<Utf8Error> for Error {
Error::Utf8Error
}
}
impl From<FmtError> for Error {
fn from(_: FmtError) -> Self {
Error::FmtError
}
}

View file

@ -1,13 +1,14 @@
use std::{
borrow::Cow,
cmp::Ordering,
fs::File,
io::{Read, Seek},
mem::size_of,
str::from_utf8,
cmp::Ordering, borrow::Cow,
};
use crate::{
abi::{TransmuteSafe, LE32},
abi_utils::{TransmuteSafe, LE32},
dict::Paths,
Error,
};
@ -40,7 +41,7 @@ mod abi {
{
Ok(())
} else {
Err(Error::Validate)
Err(Error::KeyFileHeaderValidate)
}
}
}
@ -59,15 +60,20 @@ mod abi {
impl IndexHeader {
pub(super) fn validate(&self, file_end: usize) -> Result<(), Error> {
let a = self.index_a_offset.us();
let b = self.index_b_offset.us();
let c = self.index_c_offset.us();
let d = self.index_d_offset.us();
let check_order = |l, r| l < r || r == 0;
if self.magic1.read() == 0x04
&& self.index_a_offset.us() < self.index_b_offset.us()
&& self.index_b_offset.us() < self.index_c_offset.us()
&& self.index_c_offset.us() < self.index_d_offset.us()
&& self.index_d_offset.us() < file_end
&& check_order(a, b)
&& check_order(b, c)
&& check_order(c, d)
&& check_order(d, file_end)
{
Ok(())
} else {
Err(Error::Validate)
Err(Error::KeyIndexHeaderValidate)
}
}
}
@ -78,21 +84,26 @@ use abi::{FileHeader, IndexHeader};
pub struct Keys {
words: Vec<LE32>,
index_len: Vec<LE32>,
index_prefix: Vec<LE32>,
index_suffix: Vec<LE32>,
index_d: Vec<LE32>,
index_len: Option<Vec<LE32>>,
index_prefix: Option<Vec<LE32>>,
index_suffix: Option<Vec<LE32>>,
index_d: Option<Vec<LE32>>,
}
impl Keys {
fn read_vec(file: &mut File, start: usize, end: usize) -> Result<Vec<LE32>, Error> {
fn read_vec(file: &mut File, start: usize, end: usize) -> Result<Option<Vec<LE32>>, Error> {
if start == 0 || end == 0 {
return Ok(None);
}
// Replace this with div_ceil once it stabilizes
let size = (end - start + size_of::<LE32>() - 1) / size_of::<LE32>();
let mut buf = vec![LE32::default(); size];
file.read_exact(LE32::slice_as_bytes_mut(&mut buf))?;
Ok(buf)
Ok(Some(buf))
}
fn check_vec_len(buf: &Vec<LE32>) -> Result<(), Error> {
fn check_vec_len(buf: &Option<Vec<LE32>>) -> Result<(), Error> {
let Some(buf) = buf else { return Ok(()) };
if buf.get(0).ok_or(Error::InvalidIndex)?.us() + 1 != buf.len() {
return Err(Error::InvalidIndex);
}
@ -108,10 +119,7 @@ impl Keys {
file.seek(std::io::SeekFrom::Start(hdr.words_offset.read() as u64))?;
let words = Self::read_vec(&mut file, hdr.words_offset.us(), hdr.idx_offset.us())?;
if words.get(0).ok_or(Error::InvalidIndex)?.us() + 1 >= words.len() {
return Err(Error::InvalidIndex);
}
let Some(words) = words else { return Err(Error::InvalidIndex); };
let file_end = file_size - hdr.idx_offset.us();
let mut ihdr = IndexHeader::default();
@ -177,26 +185,26 @@ impl Keys {
}
pub(crate) fn cmp_key(&self, target: &str, idx: usize) -> Result<Ordering, Error> {
let offset = self.index_prefix[idx + 1].us() + size_of::<LE32>() + 1;
let Some(index) = &self.index_len else { return Err(Error::IndexDoesntExist) };
let offset = index[idx + 1].us() + size_of::<LE32>() + 1;
let words_bytes = LE32::slice_as_bytes(&self.words);
if words_bytes.len() < offset + target.len() + 1 {
return Err(Error::InvalidIndex); // Maybe just return Ordering::Less instead?
}
let found_tail = &words_bytes[offset..];
let found = &found_tail[..target.len()];
Ok(match found.cmp(target.as_bytes()) {
Ordering::Equal => if found_tail[target.len()] == b'\0'
{
Ordering::Equal => {
if found_tail[target.len()] == b'\0' {
Ordering::Equal
} else {
Ordering::Greater
},
}
}
ord => ord,
})
}
fn get_inner(&self, index: &[LE32], idx: usize) -> Result<(&str, PageIter<'_>), Error> {
if idx >= self.count() {
return Err(Error::NotFound);
@ -208,19 +216,23 @@ impl Keys {
}
pub fn get_index_len(&self, idx: usize) -> Result<(&str, PageIter<'_>), Error> {
self.get_inner(&self.index_len, idx)
let Some(index) = &self.index_len else { return Err(Error::IndexDoesntExist) };
self.get_inner(index, idx)
}
pub fn get_index_prefix(&self, idx: usize) -> Result<(&str, PageIter<'_>), Error> {
self.get_inner(&self.index_prefix, idx)
let Some(index) = &self.index_prefix else { return Err(Error::IndexDoesntExist) };
self.get_inner(index, idx)
}
pub fn get_index_suffix(&self, idx: usize) -> Result<(&str, PageIter<'_>), Error> {
self.get_inner(&self.index_suffix, idx)
let Some(index) = &self.index_suffix else { return Err(Error::IndexDoesntExist) };
self.get_inner(index, idx)
}
pub fn get_index_d(&self, idx: usize) -> Result<(&str, PageIter<'_>), Error> {
self.get_inner(&self.index_d, idx)
let Some(index) = &self.index_d else { return Err(Error::IndexDoesntExist) };
self.get_inner(index, idx)
}
pub fn search_exact(&self, target_key: &str) -> Result<(usize, PageIter<'_>), Error> {
@ -300,7 +312,7 @@ impl<'a> PageIter<'a> {
e => {
dbg!("hmm", &e[..100]);
return Err(Error::InvalidIndex);
},
}
}
}
let span_len = pages.len() - tail.len();

View file

@ -1,61 +1,13 @@
use std::fs;
use miniz_oxide::inflate::{core as zlib, TINFLStatus as ZStatus};
mod abi;
mod abi_utils;
mod audio;
mod dict;
mod error;
mod key;
mod pages;
mod resource;
pub use audio::Audio;
pub use dict::MonokakidoDict;
pub use error::Error;
pub use pages::Pages;
pub use audio::Audio;
pub use key::Keys;
fn decompress(
zlib_state: &mut zlib::DecompressorOxide,
in_buf: &[u8],
out_buf: &mut Vec<u8>,
) -> Result<usize, Error> {
use zlib::inflate_flags as flg;
use ZStatus::{Done, HasMoreOutput};
let flags = flg::TINFL_FLAG_PARSE_ZLIB_HEADER | flg::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
let mut n_in_total = 0;
let mut n_out_total = 0;
zlib_state.init();
loop {
let (status, n_in, n_out) = zlib::decompress(
zlib_state,
&in_buf[n_in_total..],
out_buf,
n_out_total,
flags,
);
n_out_total += n_out;
n_in_total += n_in;
match status {
HasMoreOutput => {
out_buf.resize(out_buf.len() * 2 + 1, 0);
continue;
}
Done => break,
_ => return Err(Error::ZlibError),
}
}
if n_in_total != in_buf.len() {
return Err(Error::IncorrectStreamLength);
}
Ok(n_out_total)
}
#[derive(Debug)]
struct ContentsFile {
seqnum: u32,
len: usize,
offset: usize,
file: fs::File,
}
pub use pages::Pages;

View file

@ -1,444 +1,45 @@
use core::{cmp::min, mem::size_of, ops::Not};
use miniz_oxide::inflate::core as zlib;
use std::{
ffi::OsStr,
fs::{self, File},
io::{Read, Seek, SeekFrom},
};
use std::{ops::Range, path::PathBuf};
use crate::{
abi::{TransmuteSafe, LE32},
decompress,
dict::Paths,
ContentsFile, Error,
};
use crate::{dict::Paths, resource::Rsc, Error};
mod abi {
use crate::abi::LE32;
#[repr(C)]
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct TextIdxRecord {
pub dic_item_id: LE32,
pub map_idx: LE32,
}
#[repr(C)]
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct TextMapRecord {
pub zoffset: LE32,
pub ioffset: LE32,
}
#[test]
fn test_get_by_id() {
use crate::{pages::PageIndex, Error};
fn idx(id: u32, idx: u32) -> TextIdxRecord {
TextIdxRecord {
dic_item_id: id.into(),
map_idx: idx.into(),
}
}
fn map(z: u32, i: u32) -> TextMapRecord {
TextMapRecord {
zoffset: z.into(),
ioffset: i.into(),
}
}
assert_eq!(
PageIndex {
idx: vec![],
map: vec![],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
PageIndex {
idx: vec![idx(1, 0)],
map: vec![map(0, 0)],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
PageIndex {
idx: vec![idx(1, 0), idx(2, 1)],
map: vec![map(0, 0), map(0, 10)],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
PageIndex {
idx: vec![idx(1, 0), idx(2, 1), idx(1000, 2)],
map: vec![map(0, 0), map(0, 10), map(0, 20)],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
PageIndex {
idx: vec![idx(1, 0), idx(2, 1), idx(500, 2), idx(1000, 3)],
map: vec![map(0, 0), map(0, 10), map(0, 20), map(10, 0)],
}
.get_by_id(500),
Ok(map(0, 20))
);
assert_eq!(
PageIndex {
idx: vec![
idx(1, 0),
idx(2, 1),
idx(499, 2),
idx(500, 3),
idx(501, 4),
idx(1000, 5)
],
map: vec![
map(0, 0),
map(0, 10),
map(0, 20),
map(10, 0),
map(10, 0),
map(10, 0)
],
}
.get_by_id(500),
Ok(map(10, 0))
);
}
}
pub(crate) use abi::{TextIdxRecord, TextMapRecord};
#[derive(Debug, Clone)]
pub(crate) struct PageIndex {
idx: Vec<TextIdxRecord>,
map: Vec<TextMapRecord>,
}
unsafe impl TransmuteSafe for TextMapRecord {}
unsafe impl TransmuteSafe for TextIdxRecord {}
impl PageIndex {
pub(crate) fn new(paths: &Paths) -> Result<Self, Error> {
let mut idx_file = File::open(paths.contents_idx_path())?;
let mut map_file = File::open(paths.contents_map_path())?;
let mut len = [0; 4];
idx_file.read_exact(&mut len)?;
let len = u32::from_le_bytes(len) as usize;
idx_file.seek(SeekFrom::Start(8))?;
map_file.seek(SeekFrom::Start(8))?;
let idx_size = idx_file.metadata().map_err(|_| Error::IOError)?.len();
let map_size = map_file.metadata().map_err(|_| Error::IOError)?.len();
let idx_expected_size = (size_of::<TextIdxRecord>() * len + 8) as u64;
let map_expected_size = (size_of::<TextMapRecord>() * len + 8) as u64;
if idx_size != idx_expected_size || map_size != map_expected_size {
return Err(Error::IncorrectStreamLength);
}
let mut idx = vec![TextIdxRecord::default(); len];
let mut map = vec![TextMapRecord::default(); len];
idx_file
.read_exact(TextIdxRecord::slice_as_bytes_mut(idx.as_mut_slice()))
.map_err(|_| Error::IOError)?;
map_file
.read_exact(TextMapRecord::slice_as_bytes_mut(map.as_mut_slice()))
.map_err(|_| Error::IOError)?;
Ok(PageIndex { idx, map })
}
fn get_idx_by_id(&self, id: u32) -> Option<usize> {
if self.idx.is_empty() {
return None;
}
// Let's guess first, since usually the IDs are completely predictable, without gaps.
let idx_list = self.idx.as_slice();
let idx = min(id as usize, idx_list.len() - 1);
let guess = idx_list[idx].dic_item_id.read();
if id == guess {
return Some(idx);
}
let idx = min(id.saturating_sub(1) as usize, idx_list.len() - 1);
let guess = idx_list[idx].dic_item_id.read();
if id == guess {
return Some(idx);
}
return idx_list
.binary_search_by_key(&id, |r| r.dic_item_id.read())
.ok();
}
pub fn get_by_id(&self, id: u32) -> Result<TextMapRecord, Error> {
if let Some(idx) = self.get_idx_by_id(id) {
let record = self.map[self.idx[idx].map_idx.us()];
Ok(record)
} else {
Err(Error::NotFound)
}
}
}
const RSC_NAME: &str = "contents";
pub struct Pages {
index: PageIndex,
contents: Vec<ContentsFile>,
zlib_buf: Vec<u8>,
zlib_state: zlib::DecompressorOxide,
contents_buf: Vec<u8>,
current_offset: usize,
current_len: usize,
path: PathBuf,
res: Option<Rsc>,
}
impl Pages {
fn parse_fname(fname: &OsStr) -> Option<u32> {
let fname = fname.to_str()?;
if (fname.starts_with("contents-") && fname.ends_with(".rsc")).not() {
return None;
}
u32::from_str_radix(&fname[9..13], 10).ok()
}
pub(crate) fn new(paths: &Paths) -> Result<Self, Error> {
let mut contents = Vec::new();
for entry in fs::read_dir(&paths.contents_path()).map_err(|_| Error::IOError)? {
let entry = entry.map_err(|_| Error::IOError)?;
let seqnum = Pages::parse_fname(&entry.file_name());
if let Some(seqnum) = seqnum {
contents.push(ContentsFile {
seqnum,
len: entry.metadata().map_err(|_| Error::IOError)?.len() as usize,
offset: 0,
file: File::open(entry.path()).map_err(|_| Error::IOError)?,
});
}
}
contents.sort_by_key(|f| f.seqnum);
let mut offset = 0;
for (i, cf) in contents.iter_mut().enumerate() {
if cf.seqnum != i as u32 + 1 {
return Err(Error::NoContentFilesFound);
}
cf.offset = offset;
offset += cf.len;
}
let index = PageIndex::new(&paths)?;
pub fn new(paths: &Paths) -> Result<Self, Error> {
Ok(Pages {
index,
contents,
zlib_buf: Vec::new(),
zlib_state: zlib::DecompressorOxide::new(),
contents_buf: Vec::new(),
current_offset: 0,
current_len: 0,
path: paths.contents_path().join(RSC_NAME),
res: None,
})
}
fn load_contents(&mut self, zoffset: usize) -> Result<(), Error> {
let (file, file_offset) = file_offset(&mut self.contents, zoffset)?;
let mut len = [0_u8; 4];
file.seek(SeekFrom::Start(file_offset))
.map_err(|_| Error::IOError)?;
file.read_exact(&mut len).map_err(|_| Error::IOError)?;
let len = u32::from_le_bytes(len) as usize;
if self.zlib_buf.len() < len {
self.zlib_buf.resize(len, 0);
pub fn init(&mut self) -> Result<(), Error> {
if self.res.is_none() {
self.res = Some(Rsc::new(&self.path, RSC_NAME)?);
}
file.read_exact(&mut self.zlib_buf[..len])
.map_err(|_| Error::IOError)?;
let n_out = decompress(
&mut self.zlib_state,
&self.zlib_buf[..len],
&mut self.contents_buf,
)?;
self.current_len = n_out;
self.current_offset = zoffset;
Ok(())
}
pub fn get(&mut self, id: u32) -> Result<&str, Error> {
self.get_by_idx(self.index.get_by_id(id)?)
self.init()?;
let Some(res) = self.res.as_mut() else { unreachable!() };
Ok(std::str::from_utf8(res.get(id)?).map_err(|_| Error::Utf8Error)?)
}
fn get_by_idx(&mut self, idx: TextMapRecord) -> Result<&str, Error> {
if self.contents_buf.is_empty() || idx.zoffset.us() != self.current_offset {
self.load_contents(idx.zoffset.us())?;
pub fn get_by_idx(&mut self, idx: usize) -> Result<(u32, &str), Error> {
self.init()?;
let Some(res) = self.res.as_mut() else { unreachable!() };
let (id, page) = res.get_by_idx(idx)?;
Ok((id, std::str::from_utf8(page).map_err(|_| Error::Utf8Error)?))
}
let contents = &self.contents_buf[idx.ioffset.us()..self.current_len];
let (len, contents_tail) = LE32::from(contents)?;
Ok(std::str::from_utf8(&contents_tail[..len.us()]).map_err(|_| Error::Utf8Error)?)
pub fn idx_iter(&mut self) -> Result<Range<usize>, Error> {
self.init()?;
let Some(res) = self.res.as_ref() else { unreachable!() };
Ok(0..res.len())
}
}
fn file_offset(contents: &mut [ContentsFile], offset: usize) -> Result<(&mut File, u64), Error> {
let file_idx = contents
.binary_search_by(|cf| cmp_range(offset, cf.offset..cf.offset + cf.len).reverse())
.map_err(|_| Error::InvalidIndex)?;
let cf = &mut contents[file_idx];
let file = &mut cf.file;
let file_offset = (offset - cf.offset) as u64;
Ok((file, file_offset))
}
#[test]
fn test_file_offset() {
use std::os::unix::prelude::AsRawFd;
assert_eq!(file_offset(&mut [], 0).err(), Some(Error::InvalidIndex));
let mock_file = || {
let f = File::open("/dev/zero").unwrap();
let fd = f.as_raw_fd();
(f, fd)
};
let (f1, f1_fd) = mock_file();
let one_file = &mut vec![ContentsFile {
seqnum: 1,
len: 100,
offset: 0,
file: f1,
}];
let result = file_offset(one_file, 101);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(one_file, 100);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(one_file, 0);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(one_file, 99);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
let (f1, f1_fd) = mock_file();
let (f2, f2_fd) = mock_file();
let two_files = &mut vec![
ContentsFile {
seqnum: 1,
len: 100,
offset: 0,
file: f1,
},
ContentsFile {
seqnum: 2,
len: 200,
offset: 100,
file: f2,
},
];
let result = file_offset(two_files, 301);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(two_files, 300);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(two_files, 0);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(two_files, 99);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
let result = file_offset(two_files, 100);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(two_files, 299);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(199));
let (f1, f1_fd) = mock_file();
let (f2, f2_fd) = mock_file();
let (f3, f3_fd) = mock_file();
let three_files = &mut vec![
ContentsFile {
seqnum: 1,
len: 100,
offset: 0,
file: f1,
},
ContentsFile {
seqnum: 2,
len: 200,
offset: 100,
file: f2,
},
ContentsFile {
seqnum: 3,
len: 100,
offset: 300,
file: f3,
},
];
let result = file_offset(three_files, 401);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(three_files, 400);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(three_files, 0);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(three_files, 99);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
let result = file_offset(three_files, 100);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(three_files, 299);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(199));
let result = file_offset(three_files, 300);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f3_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(three_files, 399);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f3_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
}
fn cmp_range(num: usize, range: core::ops::Range<usize>) -> core::cmp::Ordering {
use core::cmp::Ordering;
if num < range.start {
Ordering::Less
} else if range.end <= num {
Ordering::Greater
} else {
Ordering::Equal
}
}
#[test]
fn test_cmp_to_range() {
use core::cmp::Ordering;
assert_eq!(cmp_range(0, 0..0), Ordering::Greater);
assert_eq!(cmp_range(0, 0..1), Ordering::Equal);
assert_eq!(cmp_range(0, 0..100), Ordering::Equal);
assert_eq!(cmp_range(1, 0..100), Ordering::Equal);
assert_eq!(cmp_range(99, 0..100), Ordering::Equal);
assert_eq!(cmp_range(100, 0..100), Ordering::Greater);
assert_eq!(cmp_range(101, 0..100), Ordering::Greater);
assert_eq!(cmp_range(0, 1..100), Ordering::Less);
assert_eq!(cmp_range(99, 100..100), Ordering::Less);
assert_eq!(cmp_range(100, 100..100), Ordering::Greater);
}

56
src/resource.rs Normal file
View file

@ -0,0 +1,56 @@
mod nrsc;
mod rsc;
use std::fs;
pub use nrsc::Nrsc;
pub use rsc::Rsc;
use crate::Error;
use miniz_oxide::inflate::{core as zlib, TINFLStatus as ZStatus};
#[derive(Debug)]
struct ResourceFile {
seqnum: u32,
len: usize,
offset: usize,
file: fs::File,
}
fn decompress(
zlib_state: &mut zlib::DecompressorOxide,
in_buf: &[u8],
out_buf: &mut Vec<u8>,
) -> Result<usize, Error> {
use zlib::inflate_flags as flg;
use ZStatus::{Done, HasMoreOutput};
let flags = flg::TINFL_FLAG_PARSE_ZLIB_HEADER | flg::TINFL_FLAG_USING_NON_WRAPPING_OUTPUT_BUF;
let mut n_in_total = 0;
let mut n_out_total = 0;
zlib_state.init();
loop {
let (status, n_in, n_out) = zlib::decompress(
zlib_state,
&in_buf[n_in_total..],
out_buf,
n_out_total,
flags,
);
n_out_total += n_out;
n_in_total += n_in;
match status {
HasMoreOutput => {
out_buf.resize(out_buf.len() * 2 + 1, 0);
continue;
}
Done => break,
_ => return Err(Error::ZlibError),
}
}
if n_in_total != in_buf.len() {
return Err(Error::IncorrectStreamLength);
}
Ok(n_out_total)
}

269
src/resource/nrsc.rs Normal file
View file

@ -0,0 +1,269 @@
use core::mem::size_of;
use std::{
ffi::OsStr,
fs::{self, File},
io::{Read, Seek, SeekFrom},
path::Path,
};
use miniz_oxide::inflate::core as zlib;
use crate::{abi_utils::TransmuteSafe, resource::decompress, Error};
#[derive(Debug, Clone)]
pub(crate) struct NrscIndex {
idx: Vec<NrscIdxRecord>,
ids: String, // contains null bytes as substring separators
}
mod abi {
use super::Format;
use crate::Error;
// TODO: Use LE16 & LE32?
#[repr(C)]
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
pub(crate) struct NrscIdxRecord {
format: u16,
fileseq: u16,
id_str_offset: u32,
file_offset: u32,
len: u32,
}
impl NrscIdxRecord {
pub fn id_str_offset(&self) -> usize {
u32::from_le(self.id_str_offset) as usize
}
pub(super) fn format(&self) -> Result<Format, Error> {
match u16::from_le(self.format) {
0 => Ok(Format::Uncompressed),
1 => Ok(Format::Zlib),
_ => Err(Error::InvalidAudioFormat),
}
}
pub fn fileseq(&self) -> usize {
u16::from_le(self.fileseq) as usize
}
pub fn file_offset(&self) -> u64 {
u32::from_le(self.file_offset) as u64
}
pub fn len(&self) -> usize {
u32::from_le(self.len) as usize
}
}
#[test]
fn test_audio_index() {
use super::NrscIndex;
use std::mem::size_of;
let air = |id_str_offset| NrscIdxRecord {
format: 0,
fileseq: 0,
id_str_offset,
file_offset: 0,
len: 0,
};
let mut audio_idx = NrscIndex {
idx: vec![air(0), air(1), air(3), air(6), air(10)],
ids: "\0a\0bb\0ccc\0dddd".to_owned(),
};
let diff = 8 + audio_idx.idx.len() * size_of::<NrscIdxRecord>();
// Fix offsets now that they are known
for air in audio_idx.idx.iter_mut() {
air.id_str_offset += diff as u32;
}
assert_eq!(audio_idx.get_id_at(diff + 0).unwrap(), "");
assert_eq!(audio_idx.get_id_at(diff + 1).unwrap(), "a");
assert_eq!(audio_idx.get_id_at(diff + 3).unwrap(), "bb");
assert_eq!(audio_idx.get_id_at(diff + 4), Err(Error::InvalidIndex));
assert_eq!(audio_idx.get_id_at(diff + 6).unwrap(), "ccc");
assert_eq!(audio_idx.get_id_at(diff + 10), Err(Error::InvalidIndex));
audio_idx.ids = "\0a\0bb\0ccc\0dddd\0".to_owned();
let diff = diff as u32;
assert_eq!(audio_idx.get_by_id("").unwrap(), air(diff + 0));
assert_eq!(audio_idx.get_by_id("a").unwrap(), air(diff + 1));
assert_eq!(audio_idx.get_by_id("bb").unwrap(), air(diff + 3));
assert_eq!(audio_idx.get_by_id("ccc").unwrap(), air(diff + 6));
assert_eq!(audio_idx.get_by_id("dddd").unwrap(), air(diff + 10));
assert_eq!(audio_idx.get_by_id("ddd"), Err(Error::NotFound));
}
}
pub(crate) use abi::NrscIdxRecord;
use super::ResourceFile;
enum Format {
Uncompressed,
Zlib,
}
unsafe impl TransmuteSafe for NrscIdxRecord {}
impl NrscIndex {
pub(crate) fn new(path: &Path) -> Result<Self, Error> {
let path = path.join("index.nidx");
let mut file = File::open(path).map_err(|_| Error::FopenError)?;
let mut len = [0; 8];
file.read_exact(&mut len).map_err(|_| Error::IOError)?;
let len = u32::from_le_bytes(len[4..8].try_into().unwrap()) as usize;
let file_size = file.metadata().map_err(|_| Error::IOError)?.len() as usize;
let idx_expected_size = size_of::<NrscIdxRecord>() * len + 8;
let mut idx = vec![NrscIdxRecord::default(); len];
let mut ids = String::with_capacity(file_size - idx_expected_size);
file.read_exact(NrscIdxRecord::slice_as_bytes_mut(idx.as_mut_slice()))
.map_err(|_| Error::IOError)?;
file.read_to_string(&mut ids).map_err(|_| Error::IOError)?;
Ok(Self { idx, ids })
}
fn get_id_at(&self, offset: usize) -> Result<&str, Error> {
let offset = offset - (size_of::<NrscIdxRecord>() * self.idx.len() + 8);
if offset > 0 && &self.ids[offset - 1..offset] != "\0" {
return Err(Error::InvalidIndex);
}
let tail = &self.ids[offset..];
let len = tail.find('\0').ok_or(Error::InvalidIndex)?;
Ok(&tail[..len])
}
pub fn get_by_id(&self, id: &str) -> Result<NrscIdxRecord, Error> {
let mut idx_err = Ok(());
let i = self
.idx
.binary_search_by_key(&id, |idx| match self.get_id_at(idx.id_str_offset()) {
Ok(ok) => ok,
Err(err) => {
idx_err = Err(err);
""
}
})
.map_err(|_| Error::NotFound)?;
idx_err?;
Ok(self.idx[i])
}
pub fn get_by_idx(&self, idx: usize) -> Result<(&str, NrscIdxRecord), Error> {
let idx_rec = self.idx.get(idx).copied().ok_or(Error::InvalidIndex)?;
let item_id = self.get_id_at(idx_rec.id_str_offset())?;
Ok((item_id, idx_rec))
}
}
pub struct Nrsc {
index: NrscIndex,
data: NrscData,
}
struct NrscData {
files: Vec<ResourceFile>,
read_buf: Vec<u8>,
decomp_buf: Vec<u8>,
zlib_state: zlib::DecompressorOxide,
}
impl Nrsc {
fn parse_fname(fname: &OsStr) -> Option<u32> {
let fname = fname.to_str()?;
if fname.ends_with(".nrsc") {
let secnum_end = fname.len() - ".nrsc".len();
u32::from_str_radix(&fname[..secnum_end], 10).ok()
} else {
None
}
}
fn files(path: &Path) -> Result<Vec<ResourceFile>, Error> {
let mut files = Vec::new();
for entry in fs::read_dir(path).map_err(|_| Error::IOError)? {
let entry = entry.map_err(|_| Error::IOError)?;
let seqnum = Nrsc::parse_fname(&entry.file_name());
if let Some(seqnum) = seqnum {
files.push(ResourceFile {
seqnum,
len: entry.metadata().map_err(|_| Error::IOError)?.len() as usize,
offset: 0,
file: File::open(entry.path()).map_err(|_| Error::IOError)?,
});
}
}
let mut offset = 0;
files.sort_by_key(|f| f.seqnum);
for (i, cf) in files.iter_mut().enumerate() {
if cf.seqnum != i as u32 {
return Err(Error::MissingResourceFile);
}
cf.offset = offset;
offset += cf.len;
}
Ok(files)
}
pub(crate) fn new(path: &Path) -> Result<Self, Error> {
let files = Nrsc::files(path)?;
let index = NrscIndex::new(path)?;
Ok(Nrsc {
index,
data: NrscData {
files,
read_buf: Vec::new(),
decomp_buf: Vec::new(),
zlib_state: zlib::DecompressorOxide::new(),
},
})
}
pub fn get_by_idx(&mut self, idx: usize) -> Result<(&str, &[u8]), Error> {
let (id, nidx_rec) = self.index.get_by_idx(idx)?;
let item = self.data.get_by_nidx_rec(nidx_rec)?;
Ok((id, item))
}
pub fn get(&mut self, id: &str) -> Result<&[u8], Error> {
self.data.get_by_nidx_rec(self.index.get_by_id(id)?)
}
pub fn len(&self) -> usize {
self.index.idx.len()
}
}
impl NrscData {
fn get_by_nidx_rec(&mut self, idx: NrscIdxRecord) -> Result<&[u8], Error> {
let file = &mut self.files[idx.fileseq() as usize];
file.file
.seek(SeekFrom::Start(idx.file_offset()))
.map_err(|_| Error::IOError)?;
if self.read_buf.len() < idx.len() {
self.read_buf.resize(idx.len(), 0);
}
file.file
.read_exact(&mut self.read_buf[..idx.len()])
.map_err(|_| Error::IOError)?;
match idx.format()? {
Format::Uncompressed => Ok(&self.read_buf[..idx.len()]),
Format::Zlib => {
let n_out = decompress(
&mut self.zlib_state,
&self.read_buf[..idx.len()],
&mut self.decomp_buf,
)?;
Ok(&self.decomp_buf[..n_out])
}
}
}
}

521
src/resource/rsc.rs Normal file
View file

@ -0,0 +1,521 @@
use core::{cmp::min, mem::size_of, ops::Not, slice};
use miniz_oxide::inflate::core as zlib;
use std::{
ffi::OsStr,
fs::{self, File},
io::{Read, Seek, SeekFrom},
path::Path,
};
use crate::{
abi_utils::{TransmuteSafe, LE32},
resource::decompress,
Error,
};
mod abi {
use crate::abi_utils::LE32;
#[repr(C)]
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct IdxRecord {
pub item_id: LE32,
pub map_idx: LE32,
}
#[repr(C)]
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct MapRecord {
pub(crate) zoffset: LE32,
pub(crate) ioffset: LE32,
}
#[test]
fn test_get_by_id() {
use super::RscIndex;
use crate::Error;
fn idx(id: u32, idx: u32) -> IdxRecord {
IdxRecord {
item_id: id.into(),
map_idx: idx.into(),
}
}
fn map(z: u32, i: u32) -> MapRecord {
MapRecord {
zoffset: z.into(),
ioffset: i.into(),
}
}
assert_eq!(
RscIndex {
idx: Some(vec![]),
map: vec![],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
RscIndex {
idx: Some(vec![idx(1, 0)]),
map: vec![map(0, 0)],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
RscIndex {
idx: Some(vec![idx(1, 0), idx(2, 1)]),
map: vec![map(0, 0), map(0, 10)],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
RscIndex {
idx: Some(vec![idx(1, 0), idx(2, 1), idx(1000, 2)]),
map: vec![map(0, 0), map(0, 10), map(0, 20)],
}
.get_by_id(500),
Err(Error::NotFound)
);
assert_eq!(
RscIndex {
idx: Some(vec![idx(1, 0), idx(2, 1), idx(500, 2), idx(1000, 3)]),
map: vec![map(0, 0), map(0, 10), map(0, 20), map(10, 0)],
}
.get_by_id(500),
Ok(map(0, 20))
);
assert_eq!(
RscIndex {
idx: Some(vec![
idx(1, 0),
idx(2, 1),
idx(499, 2),
idx(500, 3),
idx(501, 4),
idx(1000, 5)
]),
map: vec![
map(0, 0),
map(0, 10),
map(0, 20),
map(10, 0),
map(10, 0),
map(10, 0)
],
}
.get_by_id(500),
Ok(map(10, 0))
);
}
}
pub(crate) use abi::{IdxRecord, MapRecord};
use super::ResourceFile;
#[derive(Debug, Clone)]
pub(crate) struct RscIndex {
idx: Option<Vec<IdxRecord>>,
map: Vec<MapRecord>,
}
unsafe impl TransmuteSafe for MapRecord {}
unsafe impl TransmuteSafe for IdxRecord {}
impl RscIndex {
fn load_idx(path: &Path) -> Result<Option<Vec<IdxRecord>>, Error> {
let path = path.with_extension("idx");
if path.exists().not() {
return Ok(None);
};
let mut idx_file = File::open(path)?;
let mut len = [0; 4];
idx_file.read_exact(&mut len)?;
let len = u32::from_le_bytes(len) as usize;
idx_file.seek(SeekFrom::Start(8))?;
let idx_size = idx_file.metadata().map_err(|_| Error::IOError)?.len();
let idx_expected_size = (size_of::<IdxRecord>() * len + 8) as u64;
if idx_size != idx_expected_size {
return Err(Error::IncorrectStreamLength);
}
let mut idx = vec![IdxRecord::default(); len];
idx_file
.read_exact(IdxRecord::slice_as_bytes_mut(idx.as_mut_slice()))
.map_err(|_| Error::IOError)?;
Ok(Some(idx))
}
fn load_map(path: &Path) -> Result<Vec<MapRecord>, Error> {
let path = path.with_extension("map");
let mut map_file = File::open(path)?;
let mut len = [0; 4];
map_file.seek(SeekFrom::Start(4))?;
map_file.read_exact(&mut len)?;
let len = u32::from_le_bytes(len) as usize;
map_file.seek(SeekFrom::Start(8))?;
let map_size = map_file.metadata().map_err(|_| Error::IOError)?.len();
let map_expected_size = (size_of::<MapRecord>() * len + 8) as u64;
if map_size != map_expected_size {
return Err(Error::IncorrectStreamLength);
}
let mut map = vec![MapRecord::default(); len];
map_file
.read_exact(MapRecord::slice_as_bytes_mut(map.as_mut_slice()))
.map_err(|_| Error::IOError)?;
Ok(map)
}
pub(crate) fn new(path: &Path, rsc_name: &str) -> Result<Self, Error> {
let path = path.join(rsc_name); // filename stem
let idx = Self::load_idx(&path)?;
let map = Self::load_map(&path)?;
Ok(RscIndex { idx, map })
}
fn get_map_idx_by_id(&self, id: u32) -> Result<usize, Error> {
let Some(idx_list) = &self.idx else {
return Ok(id as usize);
};
if idx_list.is_empty() {
return Err(Error::NotFound);
}
// Let's guess first, since usually the IDs are completely predictable, without gaps.
let idx = min(id as usize, idx_list.len() - 1);
let guess = idx_list[idx].item_id.read();
if id == guess {
return Ok(idx);
}
let idx = min(id.saturating_sub(1) as usize, idx_list.len() - 1);
let guess = idx_list[idx].item_id.read();
if id == guess {
return Ok(idx);
}
let map_idx = idx_list
.binary_search_by_key(&id, |r| r.item_id.read())
.map(|idx| idx_list[idx].map_idx.us())
.map_err(|_| Error::NotFound)?;
if map_idx >= self.map.len() {
return Err(Error::IndexMismach);
}
Ok(map_idx)
}
pub fn get_by_id(&self, id: u32) -> Result<MapRecord, Error> {
let idx = self.get_map_idx_by_id(id)?;
let record = self.map[idx];
Ok(record)
}
pub fn get_by_idx(&self, idx: usize) -> Result<(u32, MapRecord), Error> {
let item_id = if let Some(indexes) = &self.idx {
let idx_rec = indexes.get(idx).copied().ok_or(Error::InvalidIndex)?;
if idx_rec.map_idx.us() != idx {
return Err(Error::InvalidIndex);
};
idx_rec.item_id.read()
} else {
idx as u32
};
let map_rec = self.map.get(idx).copied().ok_or(Error::InvalidIndex)?;
Ok((item_id, map_rec))
}
}
pub struct Rsc {
index: RscIndex,
files: Vec<ResourceFile>,
zlib_buf: Vec<u8>,
zlib_state: zlib::DecompressorOxide,
contents_buf: Vec<u8>,
current_offset: usize,
current_len: usize,
}
impl Rsc {
fn parse_fname(rsc_name: &str, fname: &OsStr) -> Option<u32> {
let fname = fname.to_str()?;
let ext = ".rsc";
let min_len = rsc_name.len() + 1 + ext.len();
if fname.starts_with(rsc_name) && fname.ends_with(ext) && fname.len() > min_len {
let seqnum_start = rsc_name.len() + 1;
let seqnum_end = fname.len() - ext.len();
u32::from_str_radix(&fname[seqnum_start..seqnum_end], 10).ok()
} else {
None
}
}
fn files(path: &Path, rsc_name: &str) -> Result<Vec<ResourceFile>, Error> {
let mut files = Vec::new();
for entry in fs::read_dir(path).map_err(|_| Error::IOError)? {
let entry = entry.map_err(|_| Error::IOError)?;
let seqnum = Self::parse_fname(rsc_name, &entry.file_name());
if let Some(seqnum) = seqnum {
files.push(ResourceFile {
seqnum,
len: entry.metadata().map_err(|_| Error::IOError)?.len() as usize,
offset: 0,
file: File::open(entry.path()).map_err(|_| Error::IOError)?,
});
}
}
files.sort_by_key(|f| f.seqnum);
let mut offset = 0;
for (i, cf) in files.iter_mut().enumerate() {
if cf.seqnum != i as u32 + 1 {
return Err(Error::MissingResourceFile);
}
cf.offset = offset;
offset += cf.len;
}
Ok(files)
}
pub(crate) fn new(path: &Path, rsc_name: &str) -> Result<Self, Error> {
let files = Rsc::files(path, rsc_name)?;
let index = RscIndex::new(path, rsc_name)?;
Ok(Self {
index,
files,
zlib_buf: Vec::new(),
zlib_state: zlib::DecompressorOxide::new(),
contents_buf: Vec::new(),
current_offset: 0,
current_len: 0,
})
}
fn load_contents(&mut self, zoffset: usize) -> Result<(), Error> {
let (file, file_offset) = file_offset(&mut self.files, zoffset)?;
let mut len = [0_u8; 4];
file.seek(SeekFrom::Start(file_offset))
.map_err(|_| Error::IOError)?;
file.read_exact(&mut len).map_err(|_| Error::IOError)?;
let len = u32::from_le_bytes(len) as usize;
if self.zlib_buf.len() < len {
self.zlib_buf.resize(len, 0);
}
file.read_exact(&mut self.zlib_buf[..len])
.map_err(|_| Error::IOError)?;
let n_out = decompress(
&mut self.zlib_state,
&self.zlib_buf[..len],
&mut self.contents_buf,
)?;
self.current_len = n_out;
self.current_offset = zoffset;
Ok(())
}
pub fn get(&mut self, id: u32) -> Result<&[u8], Error> {
self.get_by_map(self.index.get_by_id(id)?)
}
pub fn get_by_idx(&mut self, idx: usize) -> Result<(u32, &[u8]), Error> {
let (id, map_rec) = self.index.get_by_idx(idx)?;
let item = self.get_by_map(map_rec)?;
Ok((id, item))
}
fn get_by_map(&mut self, idx: MapRecord) -> Result<&[u8], Error> {
if self.contents_buf.is_empty() || idx.zoffset.us() != self.current_offset {
self.load_contents(idx.zoffset.us())?;
}
let contents = &self.contents_buf[idx.ioffset.us()..self.current_len];
let (len, contents_tail) = LE32::from(contents)?;
Ok(&contents_tail[..len.us()])
}
pub fn len(&self) -> usize {
self.index.map.len()
}
}
fn file_offset(contents: &mut [ResourceFile], offset: usize) -> Result<(&mut File, u64), Error> {
let file_idx = contents
.binary_search_by(|cf| cmp_range(offset, cf.offset..cf.offset + cf.len).reverse())
.map_err(|_| Error::InvalidIndex)?;
let cf = &mut contents[file_idx];
let file = &mut cf.file;
let file_offset = (offset - cf.offset) as u64;
Ok((file, file_offset))
}
#[test]
fn test_file_offset() {
use std::os::unix::prelude::AsRawFd;
assert_eq!(file_offset(&mut [], 0).err(), Some(Error::InvalidIndex));
let mock_file = || {
let f = File::open("/dev/zero").unwrap();
let fd = f.as_raw_fd();
(f, fd)
};
let (f1, f1_fd) = mock_file();
let one_file = &mut vec![ResourceFile {
seqnum: 1,
len: 100,
offset: 0,
file: f1,
}];
let result = file_offset(one_file, 101);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(one_file, 100);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(one_file, 0);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(one_file, 99);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
let (f1, f1_fd) = mock_file();
let (f2, f2_fd) = mock_file();
let two_files = &mut vec![
ResourceFile {
seqnum: 1,
len: 100,
offset: 0,
file: f1,
},
ResourceFile {
seqnum: 2,
len: 200,
offset: 100,
file: f2,
},
];
let result = file_offset(two_files, 301);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(two_files, 300);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(two_files, 0);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(two_files, 99);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
let result = file_offset(two_files, 100);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(two_files, 299);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(199));
let (f1, f1_fd) = mock_file();
let (f2, f2_fd) = mock_file();
let (f3, f3_fd) = mock_file();
let three_files = &mut vec![
ResourceFile {
seqnum: 1,
len: 100,
offset: 0,
file: f1,
},
ResourceFile {
seqnum: 2,
len: 200,
offset: 100,
file: f2,
},
ResourceFile {
seqnum: 3,
len: 100,
offset: 300,
file: f3,
},
];
let result = file_offset(three_files, 401);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(three_files, 400);
assert_eq!(result.err(), Some(Error::InvalidIndex));
let result = file_offset(three_files, 0);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(three_files, 99);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f1_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
let result = file_offset(three_files, 100);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(three_files, 299);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f2_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(199));
let result = file_offset(three_files, 300);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f3_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(0));
let result = file_offset(three_files, 399);
assert_eq!(result.as_ref().map(|f| f.0.as_raw_fd()), Ok(f3_fd));
assert_eq!(result.as_ref().map(|f| f.1), Ok(99));
}
fn cmp_range(num: usize, range: core::ops::Range<usize>) -> core::cmp::Ordering {
use core::cmp::Ordering;
if num < range.start {
Ordering::Less
} else if range.end <= num {
Ordering::Greater
} else {
Ordering::Equal
}
}
#[test]
fn test_cmp_to_range() {
use core::cmp::Ordering;
assert_eq!(cmp_range(0, 0..0), Ordering::Greater);
assert_eq!(cmp_range(0, 0..1), Ordering::Equal);
assert_eq!(cmp_range(0, 0..100), Ordering::Equal);
assert_eq!(cmp_range(1, 0..100), Ordering::Equal);
assert_eq!(cmp_range(99, 0..100), Ordering::Equal);
assert_eq!(cmp_range(100, 0..100), Ordering::Greater);
assert_eq!(cmp_range(101, 0..100), Ordering::Greater);
assert_eq!(cmp_range(0, 1..100), Ordering::Less);
assert_eq!(cmp_range(99, 100..100), Ordering::Less);
assert_eq!(cmp_range(100, 100..100), Ordering::Greater);
}
pub struct RscIter<'a> {
map: slice::Iter<'a, MapRecord>,
}
impl<'a> Iterator for RscIter<'a> {
type Item = MapRecord;
fn next(&mut self) -> Option<Self::Item> {
self.map.next().copied()
}
}