From b4f806f5d575045b2ab61cbe40257c6c2d72474b Mon Sep 17 00:00:00 2001 From: lifegpc Date: Wed, 11 Jun 2025 17:42:21 +0800 Subject: [PATCH] Add basic BGI unpack support --- Cargo.toml | 7 +- check_features.py | 1 + msg_tool_macro/src/lib.rs | 15 +++ src/scripts/bgi/archive/mod.rs | 2 + src/scripts/bgi/archive/v1.rs | 224 ++++++++++++++++++++++++++++++++ src/scripts/bgi/archive/v2.rs | 226 +++++++++++++++++++++++++++++++++ src/scripts/bgi/mod.rs | 2 + src/scripts/escude/lzw.rs | 74 +---------- src/scripts/mod.rs | 4 + src/types.rs | 8 ++ src/utils/bit_stream.rs | 76 +++++++++++ src/utils/mod.rs | 2 + 12 files changed, 566 insertions(+), 75 deletions(-) create mode 100644 src/scripts/bgi/archive/mod.rs create mode 100644 src/scripts/bgi/archive/v1.rs create mode 100644 src/scripts/bgi/archive/v2.rs create mode 100644 src/utils/bit_stream.rs diff --git a/Cargo.toml b/Cargo.toml index 56e4336..72219e6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,13 +17,16 @@ serde_json = "1" unicode-segmentation = "1.12" [features] -default = ["bgi", "circus", "escude", "escude-arc", "yaneurao", "yaneurao-itufuru"] +default = ["bgi", "bgi-arc", "circus", "escude", "escude-arc", "yaneurao", "yaneurao-itufuru"] bgi = [] +bgi-arc = ["bgi", "utils-bit-stream"] circus = [] escude = ["int-enum"] -escude-arc = ["escude", "rand"] +escude-arc = ["escude", "rand", "utils-bit-stream"] yaneurao = [] yaneurao-itufuru = ["yaneurao"] +# utils feature +utils-bit-stream = [] [target.'cfg(windows)'.dependencies] windows-sys = { version = "0", features = ["Win32_Globalization", "Win32_System_Diagnostics_Debug"] } diff --git a/check_features.py b/check_features.py index 2115a7b..c8be0fd 100644 --- a/check_features.py +++ b/check_features.py @@ -31,6 +31,7 @@ def main(): features = cargo_toml.get("features", {}) feature_names = list(features.keys()) + feature_names = [name for name in feature_names if not name.startswith("utils-")] if not feature_names: print("No features defined in Cargo.toml.") diff --git a/msg_tool_macro/src/lib.rs b/msg_tool_macro/src/lib.rs index c8e86d4..3f7255b 100644 --- a/msg_tool_macro/src/lib.rs +++ b/msg_tool_macro/src/lib.rs @@ -60,6 +60,13 @@ pub fn struct_unpack_impl_for_num(item: TokenStream) -> TokenStream { } /// Macro to derive `StructPack` trait for structs. +/// +/// make sure to import the necessary imports: +/// ``` +/// use crate::ext::io::*; +/// use crate::utils::struct_pack::*; +/// use std::io::{Read, Seek, Write}; +/// ``` /// /// * `skip_pack` attribute can be used to skip fields from packing. /// * `fstring = ` attribute can be used to specify a fixed string length for String fields. @@ -302,6 +309,14 @@ pub fn struct_pack_derive(input: TokenStream) -> TokenStream { } /// Macro to derive `StructUnpack` trait for structs. +/// +/// make sure to import the necessary imports: +/// ``` +/// use crate::ext::io::*; +/// use crate::utils::struct_pack::*; +/// use std::io::{Read, Seek, Write}; +/// ``` +/// /// * `skip_unpack` attribute can be used to skip fields from unpacking. /// * `fstring = ` attribute can be used to specify a fixed string length for String fields. /// * `fstring_no_trim` attribute can be used to disable trimming of fixed strings. diff --git a/src/scripts/bgi/archive/mod.rs b/src/scripts/bgi/archive/mod.rs new file mode 100644 index 0000000..ae6adc7 --- /dev/null +++ b/src/scripts/bgi/archive/mod.rs @@ -0,0 +1,2 @@ +pub mod v1; +pub mod v2; diff --git a/src/scripts/bgi/archive/v1.rs b/src/scripts/bgi/archive/v1.rs new file mode 100644 index 0000000..a609543 --- /dev/null +++ b/src/scripts/bgi/archive/v1.rs @@ -0,0 +1,224 @@ +use crate::ext::io::*; +use crate::scripts::base::*; +use crate::types::*; +use crate::utils::encoding::encode_string; +use crate::utils::struct_pack::*; +use anyhow::Result; +use msg_tool_macro::*; +use std::io::{Read, Seek, Write}; +use std::sync::{Arc, Mutex}; + +#[derive(Debug)] +pub struct BgiArchiveBuilder {} + +impl BgiArchiveBuilder { + pub const fn new() -> Self { + BgiArchiveBuilder {} + } +} + +impl ScriptBuilder for BgiArchiveBuilder { + fn default_encoding(&self) -> Encoding { + Encoding::Cp932 + } + + fn default_archive_encoding(&self) -> Option { + Some(Encoding::Cp932) + } + + fn build_script( + &self, + data: Vec, + _filename: &str, + _encoding: Encoding, + archive_encoding: Encoding, + config: &ExtraConfig, + ) -> Result> { + Ok(Box::new(BgiArchive::new( + MemReader::new(data), + archive_encoding, + config, + )?)) + } + + fn build_script_from_file( + &self, + _filename: &str, + _encoding: Encoding, + archive_encoding: Encoding, + config: &ExtraConfig, + ) -> Result> { + if _filename == "-" { + let data = crate::utils::files::read_file(_filename)?; + Ok(Box::new(BgiArchive::new( + MemReader::new(data), + archive_encoding, + config, + )?)) + } else { + let f = std::fs::File::open(_filename)?; + let reader = std::io::BufReader::new(f); + Ok(Box::new(BgiArchive::new(reader, archive_encoding, config)?)) + } + } + + fn build_script_from_reader( + &self, + reader: Box, + _filename: &str, + _encoding: Encoding, + archive_encoding: Encoding, + config: &ExtraConfig, + ) -> Result> { + Ok(Box::new(BgiArchive::new(reader, archive_encoding, config)?)) + } + + fn extensions(&self) -> &'static [&'static str] { + &["arc"] + } + + fn script_type(&self) -> &'static ScriptType { + &ScriptType::BGIArcV1 + } + + fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option { + if buf_len >= 12 && buf.starts_with(b"PackFile ") { + return Some(1); + } + None + } + + fn is_archive(&self) -> bool { + true + } +} + +#[derive(Clone, Debug, StructPack, StructUnpack)] +struct BgiFileHeader { + #[fstring = 16] + filename: String, + offset: u32, + size: u32, + #[fvec = 8] + _padding: Vec, +} + +struct Entry { + header: BgiFileHeader, + reader: Arc>, + pos: usize, + base_offset: u64, +} + +impl ArchiveContent for Entry { + fn name(&self) -> &str { + &self.header.filename + } +} + +impl Read for Entry { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let mut reader = self.reader.lock().map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::Other, + format!("Failed to lock mutex: {}", e), + ) + })?; + reader.seek(std::io::SeekFrom::Start( + self.base_offset + self.header.offset as u64 + self.pos as u64, + ))?; + let bytes_read = buf.len().min(self.header.size as usize - self.pos); + if bytes_read == 0 { + return Ok(0); + } + let bytes_read = reader.read(&mut buf[..bytes_read])?; + self.pos += bytes_read; + Ok(bytes_read) + } +} + +#[derive(Debug)] +pub struct BgiArchive { + reader: Arc>, + file_count: u32, + entries: Vec, +} + +impl BgiArchive { + pub fn new(mut reader: T, archive_encoding: Encoding, _config: &ExtraConfig) -> Result { + let mut header = [0u8; 12]; + reader.read_exact(&mut header)?; + if !header.starts_with(b"PackFile ") { + return Err(anyhow::anyhow!("Invalid BGI archive header")); + } + + let file_count = reader.read_u32()?; + let mut entries = Vec::with_capacity(file_count as usize); + for _ in 0..file_count { + let entry = BgiFileHeader::unpack(&mut reader, false, archive_encoding)?; + entries.push(entry); + } + + Ok(BgiArchive { + reader: Arc::new(Mutex::new(reader)), + file_count, + entries, + }) + } +} + +impl Script for BgiArchive { + fn default_output_script_type(&self) -> OutputScriptType { + OutputScriptType::Json + } + + fn default_format_type(&self) -> FormatOptions { + FormatOptions::None + } + + fn is_archive(&self) -> bool { + true + } + + fn iter_archive<'a>(&'a mut self) -> Result> + 'a>> { + Ok(Box::new( + self.entries.iter().map(|e| Ok(e.filename.clone())), + )) + } + + fn iter_archive_mut<'a>( + &'a mut self, + ) -> Result>> + 'a>> { + Ok(Box::new(BgiArchiveIter { + entries: self.entries.iter(), + reader: self.reader.clone(), + base_offset: 16 + (self.file_count as u64 * 32), + })) + } +} + +struct BgiArchiveIter<'a, T: Iterator, R: Read + Seek> { + entries: T, + reader: Arc>, + base_offset: u64, +} + +impl<'a, T: Iterator, R: Read + Seek + 'static> Iterator + for BgiArchiveIter<'a, T, R> +{ + type Item = Result>; + + fn next(&mut self) -> Option { + let entry = match self.entries.next() { + Some(e) => e, + None => return None, + }; + let entry = Entry { + header: entry.clone(), + reader: self.reader.clone(), + pos: 0, + base_offset: self.base_offset, + }; + Some(Ok(Box::new(entry))) + } +} diff --git a/src/scripts/bgi/archive/v2.rs b/src/scripts/bgi/archive/v2.rs new file mode 100644 index 0000000..d6ff50e --- /dev/null +++ b/src/scripts/bgi/archive/v2.rs @@ -0,0 +1,226 @@ +use crate::ext::io::*; +use crate::scripts::base::*; +use crate::types::*; +use crate::utils::encoding::encode_string; +use crate::utils::struct_pack::*; +use anyhow::Result; +use msg_tool_macro::*; +use std::io::{Read, Seek, Write}; +use std::sync::{Arc, Mutex}; + +#[derive(Debug)] +pub struct BgiArchiveBuilder {} + +impl BgiArchiveBuilder { + pub const fn new() -> Self { + BgiArchiveBuilder {} + } +} + +impl ScriptBuilder for BgiArchiveBuilder { + fn default_encoding(&self) -> Encoding { + Encoding::Cp932 + } + + fn default_archive_encoding(&self) -> Option { + Some(Encoding::Cp932) + } + + fn build_script( + &self, + data: Vec, + _filename: &str, + _encoding: Encoding, + archive_encoding: Encoding, + config: &ExtraConfig, + ) -> Result> { + Ok(Box::new(BgiArchive::new( + MemReader::new(data), + archive_encoding, + config, + )?)) + } + + fn build_script_from_file( + &self, + _filename: &str, + _encoding: Encoding, + archive_encoding: Encoding, + config: &ExtraConfig, + ) -> Result> { + if _filename == "-" { + let data = crate::utils::files::read_file(_filename)?; + Ok(Box::new(BgiArchive::new( + MemReader::new(data), + archive_encoding, + config, + )?)) + } else { + let f = std::fs::File::open(_filename)?; + let reader = std::io::BufReader::new(f); + Ok(Box::new(BgiArchive::new(reader, archive_encoding, config)?)) + } + } + + fn build_script_from_reader( + &self, + reader: Box, + _filename: &str, + _encoding: Encoding, + archive_encoding: Encoding, + config: &ExtraConfig, + ) -> Result> { + Ok(Box::new(BgiArchive::new(reader, archive_encoding, config)?)) + } + + fn extensions(&self) -> &'static [&'static str] { + &["arc"] + } + + fn script_type(&self) -> &'static ScriptType { + &ScriptType::BGIArcV2 + } + + fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option { + if buf_len >= 12 && buf.starts_with(b"BURIKO ARC20") { + return Some(1); + } + None + } + + fn is_archive(&self) -> bool { + true + } +} + +#[derive(Clone, Debug, StructPack, StructUnpack)] +struct BgiFileHeader { + #[fstring = 0x60] + filename: String, + offset: u32, + size: u32, + #[fvec = 8] + _unk: Vec, + #[fvec = 16] + _padding: Vec, +} + +struct Entry { + header: BgiFileHeader, + reader: Arc>, + pos: usize, + base_offset: u64, +} + +impl ArchiveContent for Entry { + fn name(&self) -> &str { + &self.header.filename + } +} + +impl Read for Entry { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let mut reader = self.reader.lock().map_err(|e| { + std::io::Error::new( + std::io::ErrorKind::Other, + format!("Failed to lock mutex: {}", e), + ) + })?; + reader.seek(std::io::SeekFrom::Start( + self.base_offset + self.header.offset as u64 + self.pos as u64, + ))?; + let bytes_read = buf.len().min(self.header.size as usize - self.pos); + if bytes_read == 0 { + return Ok(0); + } + let bytes_read = reader.read(&mut buf[..bytes_read])?; + self.pos += bytes_read; + Ok(bytes_read) + } +} + +#[derive(Debug)] +pub struct BgiArchive { + reader: Arc>, + file_count: u32, + entries: Vec, +} + +impl BgiArchive { + pub fn new(mut reader: T, archive_encoding: Encoding, _config: &ExtraConfig) -> Result { + let mut header = [0u8; 12]; + reader.read_exact(&mut header)?; + if !header.starts_with(b"BURIKO ARC20") { + return Err(anyhow::anyhow!("Invalid BGI archive header")); + } + + let file_count = reader.read_u32()?; + let mut entries = Vec::with_capacity(file_count as usize); + for _ in 0..file_count { + let entry = BgiFileHeader::unpack(&mut reader, false, archive_encoding)?; + entries.push(entry); + } + + Ok(BgiArchive { + reader: Arc::new(Mutex::new(reader)), + file_count, + entries, + }) + } +} + +impl Script for BgiArchive { + fn default_output_script_type(&self) -> OutputScriptType { + OutputScriptType::Json + } + + fn default_format_type(&self) -> FormatOptions { + FormatOptions::None + } + + fn is_archive(&self) -> bool { + true + } + + fn iter_archive<'a>(&'a mut self) -> Result> + 'a>> { + Ok(Box::new( + self.entries.iter().map(|e| Ok(e.filename.clone())), + )) + } + + fn iter_archive_mut<'a>( + &'a mut self, + ) -> Result>> + 'a>> { + Ok(Box::new(BgiArchiveIter { + entries: self.entries.iter(), + reader: self.reader.clone(), + base_offset: 16 + (self.file_count as u64 * 32), + })) + } +} + +struct BgiArchiveIter<'a, T: Iterator, R: Read + Seek> { + entries: T, + reader: Arc>, + base_offset: u64, +} + +impl<'a, T: Iterator, R: Read + Seek + 'static> Iterator + for BgiArchiveIter<'a, T, R> +{ + type Item = Result>; + + fn next(&mut self) -> Option { + let entry = match self.entries.next() { + Some(e) => e, + None => return None, + }; + let entry = Entry { + header: entry.clone(), + reader: self.reader.clone(), + pos: 0, + base_offset: self.base_offset, + }; + Some(Ok(Box::new(entry))) + } +} diff --git a/src/scripts/bgi/mod.rs b/src/scripts/bgi/mod.rs index b494910..09001c5 100644 --- a/src/scripts/bgi/mod.rs +++ b/src/scripts/bgi/mod.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "bgi-arc")] +pub mod archive; pub mod bp; pub mod bsi; mod parser; diff --git a/src/scripts/escude/lzw.rs b/src/scripts/escude/lzw.rs index 4d428fa..c0f48b8 100644 --- a/src/scripts/escude/lzw.rs +++ b/src/scripts/escude/lzw.rs @@ -1,35 +1,8 @@ use crate::ext::io::*; +use crate::utils::bit_stream::*; use anyhow::Result; use std::io::Write; -pub struct BitStream<'a> { - m_input: MemReaderRef<'a>, - m_bits: u32, - m_cached_bits: u32, -} - -impl<'a> BitStream<'a> { - pub fn new(input: MemReaderRef<'a>) -> Self { - BitStream { - m_input: input, - m_bits: 0, - m_cached_bits: 0, - } - } - - pub fn get_bits(&mut self, count: u32) -> Result { - while self.m_cached_bits < count { - let byte = self.m_input.read_u8()?; - self.m_bits = (self.m_bits << 8) | byte as u32; - self.m_cached_bits += 8; - } - let mask = (1 << count) - 1; - self.m_cached_bits -= count; - let result = (self.m_bits >> self.m_cached_bits) & mask; - Ok(result) - } -} - pub struct LZWDecoder<'a> { m_input: BitStream<'a>, m_output_size: u32, @@ -98,51 +71,6 @@ impl<'a> LZWDecoder<'a> { } } -pub struct BitWriter<'a, T: Write> { - writer: &'a mut T, - buffer: u32, - buffer_size: u32, -} - -impl<'a, T: Write> BitWriter<'a, T> { - pub fn new(writer: &'a mut T) -> Self { - BitWriter { - writer, - buffer: 0, - buffer_size: 0, - } - } - - pub fn flush(&mut self) -> Result<()> { - if self.buffer_size > 0 { - self.writer.write_u8((self.buffer & 0xFF) as u8)?; - self.buffer = 0; - self.buffer_size = 0; - } - Ok(()) - } - - pub fn put_bits(&mut self, byte: u32, token_width: u8) -> Result<()> { - for i in 0..token_width { - self.put_bit((byte & (1 << (token_width - 1 - i))) != 0)?; - } - Ok(()) - } - - pub fn put_bit(&mut self, bit: bool) -> Result<()> { - self.buffer <<= 1; - if bit { - self.buffer |= 1; - } - self.buffer_size += 1; - if self.buffer_size == 8 { - self.writer.write_u8((self.buffer & 0xFF) as u8)?; - self.buffer_size -= 8; - } - Ok(()) - } -} - pub struct LZWEncoder { buf: MemWriter, } diff --git a/src/scripts/mod.rs b/src/scripts/mod.rs index 071faeb..ec15405 100644 --- a/src/scripts/mod.rs +++ b/src/scripts/mod.rs @@ -20,6 +20,10 @@ lazy_static::lazy_static! { Box::new(bgi::bsi::BGIBsiScriptBuilder::new()), #[cfg(feature = "bgi")] Box::new(bgi::bp::BGIBpScriptBuilder::new()), + #[cfg(feature = "bgi-arc")] + Box::new(bgi::archive::v1::BgiArchiveBuilder::new()), + #[cfg(feature = "bgi-arc")] + Box::new(bgi::archive::v2::BgiArchiveBuilder::new()), #[cfg(feature = "escude-arc")] Box::new(escude::archive::EscudeBinArchiveBuilder::new()), #[cfg(feature = "escude")] diff --git a/src/types.rs b/src/types.rs index ebf6fca..8dd9012 100644 --- a/src/types.rs +++ b/src/types.rs @@ -217,6 +217,14 @@ pub enum ScriptType { #[value(alias("ethornell-bp"))] /// Buriko General Interpreter/Ethornell bp script (._bp) BGIBp, + #[cfg(feature = "bgi-arc")] + #[value(alias = "ethornell-arc-v1")] + /// Buriko General Interpreter/Ethornell archive v1 + BGIArcV1, + #[cfg(feature = "bgi-arc")] + #[value(alias = "ethornell-arc-v2", alias = "bgi-arc", alias = "ethornell-arc")] + /// Buriko General Interpreter/Ethornell archive v2 + BGIArcV2, #[cfg(feature = "escude-arc")] /// Escude bin archive EscudeArc, diff --git a/src/utils/bit_stream.rs b/src/utils/bit_stream.rs new file mode 100644 index 0000000..b835c7e --- /dev/null +++ b/src/utils/bit_stream.rs @@ -0,0 +1,76 @@ +use crate::ext::io::*; +use anyhow::Result; +use std::io::Write; + +pub struct BitStream<'a> { + m_input: MemReaderRef<'a>, + m_bits: u32, + m_cached_bits: u32, +} + +impl<'a> BitStream<'a> { + pub fn new(input: MemReaderRef<'a>) -> Self { + BitStream { + m_input: input, + m_bits: 0, + m_cached_bits: 0, + } + } + + pub fn get_bits(&mut self, count: u32) -> Result { + while self.m_cached_bits < count { + let byte = self.m_input.read_u8()?; + self.m_bits = (self.m_bits << 8) | byte as u32; + self.m_cached_bits += 8; + } + let mask = (1 << count) - 1; + self.m_cached_bits -= count; + let result = (self.m_bits >> self.m_cached_bits) & mask; + Ok(result) + } +} + +pub struct BitWriter<'a, T: Write> { + writer: &'a mut T, + buffer: u32, + buffer_size: u32, +} + +impl<'a, T: Write> BitWriter<'a, T> { + pub fn new(writer: &'a mut T) -> Self { + BitWriter { + writer, + buffer: 0, + buffer_size: 0, + } + } + + pub fn flush(&mut self) -> Result<()> { + if self.buffer_size > 0 { + self.writer.write_u8((self.buffer & 0xFF) as u8)?; + self.buffer = 0; + self.buffer_size = 0; + } + Ok(()) + } + + pub fn put_bits(&mut self, byte: u32, token_width: u8) -> Result<()> { + for i in 0..token_width { + self.put_bit((byte & (1 << (token_width - 1 - i))) != 0)?; + } + Ok(()) + } + + pub fn put_bit(&mut self, bit: bool) -> Result<()> { + self.buffer <<= 1; + if bit { + self.buffer |= 1; + } + self.buffer_size += 1; + if self.buffer_size == 8 { + self.writer.write_u8((self.buffer & 0xFF) as u8)?; + self.buffer_size -= 8; + } + Ok(()) + } +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 6a3297b..8bc3833 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -1,3 +1,5 @@ +#[cfg(feature = "utils-bit-stream")] +pub mod bit_stream; pub mod counter; pub mod encoding; #[cfg(windows)]