Add basic BGI unpack support

This commit is contained in:
2025-06-11 17:42:21 +08:00
parent a64d0e0380
commit b4f806f5d5
12 changed files with 566 additions and 75 deletions

View File

@@ -17,13 +17,16 @@ serde_json = "1"
unicode-segmentation = "1.12"
[features]
default = ["bgi", "circus", "escude", "escude-arc", "yaneurao", "yaneurao-itufuru"]
default = ["bgi", "bgi-arc", "circus", "escude", "escude-arc", "yaneurao", "yaneurao-itufuru"]
bgi = []
bgi-arc = ["bgi", "utils-bit-stream"]
circus = []
escude = ["int-enum"]
escude-arc = ["escude", "rand"]
escude-arc = ["escude", "rand", "utils-bit-stream"]
yaneurao = []
yaneurao-itufuru = ["yaneurao"]
# utils feature
utils-bit-stream = []
[target.'cfg(windows)'.dependencies]
windows-sys = { version = "0", features = ["Win32_Globalization", "Win32_System_Diagnostics_Debug"] }

View File

@@ -31,6 +31,7 @@ def main():
features = cargo_toml.get("features", {})
feature_names = list(features.keys())
feature_names = [name for name in feature_names if not name.startswith("utils-")]
if not feature_names:
print("No features defined in Cargo.toml.")

View File

@@ -60,6 +60,13 @@ pub fn struct_unpack_impl_for_num(item: TokenStream) -> TokenStream {
}
/// Macro to derive `StructPack` trait for structs.
///
/// make sure to import the necessary imports:
/// ```
/// use crate::ext::io::*;
/// use crate::utils::struct_pack::*;
/// use std::io::{Read, Seek, Write};
/// ```
///
/// * `skip_pack` attribute can be used to skip fields from packing.
/// * `fstring = <len>` attribute can be used to specify a fixed string length for String fields.
@@ -302,6 +309,14 @@ pub fn struct_pack_derive(input: TokenStream) -> TokenStream {
}
/// Macro to derive `StructUnpack` trait for structs.
///
/// make sure to import the necessary imports:
/// ```
/// use crate::ext::io::*;
/// use crate::utils::struct_pack::*;
/// use std::io::{Read, Seek, Write};
/// ```
///
/// * `skip_unpack` attribute can be used to skip fields from unpacking.
/// * `fstring = <len>` attribute can be used to specify a fixed string length for String fields.
/// * `fstring_no_trim` attribute can be used to disable trimming of fixed strings.

View File

@@ -0,0 +1,2 @@
pub mod v1;
pub mod v2;

View File

@@ -0,0 +1,224 @@
use crate::ext::io::*;
use crate::scripts::base::*;
use crate::types::*;
use crate::utils::encoding::encode_string;
use crate::utils::struct_pack::*;
use anyhow::Result;
use msg_tool_macro::*;
use std::io::{Read, Seek, Write};
use std::sync::{Arc, Mutex};
#[derive(Debug)]
pub struct BgiArchiveBuilder {}
impl BgiArchiveBuilder {
pub const fn new() -> Self {
BgiArchiveBuilder {}
}
}
impl ScriptBuilder for BgiArchiveBuilder {
fn default_encoding(&self) -> Encoding {
Encoding::Cp932
}
fn default_archive_encoding(&self) -> Option<Encoding> {
Some(Encoding::Cp932)
}
fn build_script(
&self,
data: Vec<u8>,
_filename: &str,
_encoding: Encoding,
archive_encoding: Encoding,
config: &ExtraConfig,
) -> Result<Box<dyn Script>> {
Ok(Box::new(BgiArchive::new(
MemReader::new(data),
archive_encoding,
config,
)?))
}
fn build_script_from_file(
&self,
_filename: &str,
_encoding: Encoding,
archive_encoding: Encoding,
config: &ExtraConfig,
) -> Result<Box<dyn Script>> {
if _filename == "-" {
let data = crate::utils::files::read_file(_filename)?;
Ok(Box::new(BgiArchive::new(
MemReader::new(data),
archive_encoding,
config,
)?))
} else {
let f = std::fs::File::open(_filename)?;
let reader = std::io::BufReader::new(f);
Ok(Box::new(BgiArchive::new(reader, archive_encoding, config)?))
}
}
fn build_script_from_reader(
&self,
reader: Box<dyn ReadSeek>,
_filename: &str,
_encoding: Encoding,
archive_encoding: Encoding,
config: &ExtraConfig,
) -> Result<Box<dyn Script>> {
Ok(Box::new(BgiArchive::new(reader, archive_encoding, config)?))
}
fn extensions(&self) -> &'static [&'static str] {
&["arc"]
}
fn script_type(&self) -> &'static ScriptType {
&ScriptType::BGIArcV1
}
fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
if buf_len >= 12 && buf.starts_with(b"PackFile ") {
return Some(1);
}
None
}
fn is_archive(&self) -> bool {
true
}
}
#[derive(Clone, Debug, StructPack, StructUnpack)]
struct BgiFileHeader {
#[fstring = 16]
filename: String,
offset: u32,
size: u32,
#[fvec = 8]
_padding: Vec<u8>,
}
struct Entry<T: Read + Seek> {
header: BgiFileHeader,
reader: Arc<Mutex<T>>,
pos: usize,
base_offset: u64,
}
impl<T: Read + Seek> ArchiveContent for Entry<T> {
fn name(&self) -> &str {
&self.header.filename
}
}
impl<T: Read + Seek> Read for Entry<T> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let mut reader = self.reader.lock().map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::Other,
format!("Failed to lock mutex: {}", e),
)
})?;
reader.seek(std::io::SeekFrom::Start(
self.base_offset + self.header.offset as u64 + self.pos as u64,
))?;
let bytes_read = buf.len().min(self.header.size as usize - self.pos);
if bytes_read == 0 {
return Ok(0);
}
let bytes_read = reader.read(&mut buf[..bytes_read])?;
self.pos += bytes_read;
Ok(bytes_read)
}
}
#[derive(Debug)]
pub struct BgiArchive<T: Read + Seek + std::fmt::Debug> {
reader: Arc<Mutex<T>>,
file_count: u32,
entries: Vec<BgiFileHeader>,
}
impl<T: Read + Seek + std::fmt::Debug> BgiArchive<T> {
pub fn new(mut reader: T, archive_encoding: Encoding, _config: &ExtraConfig) -> Result<Self> {
let mut header = [0u8; 12];
reader.read_exact(&mut header)?;
if !header.starts_with(b"PackFile ") {
return Err(anyhow::anyhow!("Invalid BGI archive header"));
}
let file_count = reader.read_u32()?;
let mut entries = Vec::with_capacity(file_count as usize);
for _ in 0..file_count {
let entry = BgiFileHeader::unpack(&mut reader, false, archive_encoding)?;
entries.push(entry);
}
Ok(BgiArchive {
reader: Arc::new(Mutex::new(reader)),
file_count,
entries,
})
}
}
impl<T: Read + Seek + std::fmt::Debug + 'static> Script for BgiArchive<T> {
fn default_output_script_type(&self) -> OutputScriptType {
OutputScriptType::Json
}
fn default_format_type(&self) -> FormatOptions {
FormatOptions::None
}
fn is_archive(&self) -> bool {
true
}
fn iter_archive<'a>(&'a mut self) -> Result<Box<dyn Iterator<Item = Result<String>> + 'a>> {
Ok(Box::new(
self.entries.iter().map(|e| Ok(e.filename.clone())),
))
}
fn iter_archive_mut<'a>(
&'a mut self,
) -> Result<Box<dyn Iterator<Item = Result<Box<dyn ArchiveContent>>> + 'a>> {
Ok(Box::new(BgiArchiveIter {
entries: self.entries.iter(),
reader: self.reader.clone(),
base_offset: 16 + (self.file_count as u64 * 32),
}))
}
}
struct BgiArchiveIter<'a, T: Iterator<Item = &'a BgiFileHeader>, R: Read + Seek> {
entries: T,
reader: Arc<Mutex<R>>,
base_offset: u64,
}
impl<'a, T: Iterator<Item = &'a BgiFileHeader>, R: Read + Seek + 'static> Iterator
for BgiArchiveIter<'a, T, R>
{
type Item = Result<Box<dyn ArchiveContent>>;
fn next(&mut self) -> Option<Self::Item> {
let entry = match self.entries.next() {
Some(e) => e,
None => return None,
};
let entry = Entry {
header: entry.clone(),
reader: self.reader.clone(),
pos: 0,
base_offset: self.base_offset,
};
Some(Ok(Box::new(entry)))
}
}

View File

@@ -0,0 +1,226 @@
use crate::ext::io::*;
use crate::scripts::base::*;
use crate::types::*;
use crate::utils::encoding::encode_string;
use crate::utils::struct_pack::*;
use anyhow::Result;
use msg_tool_macro::*;
use std::io::{Read, Seek, Write};
use std::sync::{Arc, Mutex};
#[derive(Debug)]
pub struct BgiArchiveBuilder {}
impl BgiArchiveBuilder {
pub const fn new() -> Self {
BgiArchiveBuilder {}
}
}
impl ScriptBuilder for BgiArchiveBuilder {
fn default_encoding(&self) -> Encoding {
Encoding::Cp932
}
fn default_archive_encoding(&self) -> Option<Encoding> {
Some(Encoding::Cp932)
}
fn build_script(
&self,
data: Vec<u8>,
_filename: &str,
_encoding: Encoding,
archive_encoding: Encoding,
config: &ExtraConfig,
) -> Result<Box<dyn Script>> {
Ok(Box::new(BgiArchive::new(
MemReader::new(data),
archive_encoding,
config,
)?))
}
fn build_script_from_file(
&self,
_filename: &str,
_encoding: Encoding,
archive_encoding: Encoding,
config: &ExtraConfig,
) -> Result<Box<dyn Script>> {
if _filename == "-" {
let data = crate::utils::files::read_file(_filename)?;
Ok(Box::new(BgiArchive::new(
MemReader::new(data),
archive_encoding,
config,
)?))
} else {
let f = std::fs::File::open(_filename)?;
let reader = std::io::BufReader::new(f);
Ok(Box::new(BgiArchive::new(reader, archive_encoding, config)?))
}
}
fn build_script_from_reader(
&self,
reader: Box<dyn ReadSeek>,
_filename: &str,
_encoding: Encoding,
archive_encoding: Encoding,
config: &ExtraConfig,
) -> Result<Box<dyn Script>> {
Ok(Box::new(BgiArchive::new(reader, archive_encoding, config)?))
}
fn extensions(&self) -> &'static [&'static str] {
&["arc"]
}
fn script_type(&self) -> &'static ScriptType {
&ScriptType::BGIArcV2
}
fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
if buf_len >= 12 && buf.starts_with(b"BURIKO ARC20") {
return Some(1);
}
None
}
fn is_archive(&self) -> bool {
true
}
}
#[derive(Clone, Debug, StructPack, StructUnpack)]
struct BgiFileHeader {
#[fstring = 0x60]
filename: String,
offset: u32,
size: u32,
#[fvec = 8]
_unk: Vec<u8>,
#[fvec = 16]
_padding: Vec<u8>,
}
struct Entry<T: Read + Seek> {
header: BgiFileHeader,
reader: Arc<Mutex<T>>,
pos: usize,
base_offset: u64,
}
impl<T: Read + Seek> ArchiveContent for Entry<T> {
fn name(&self) -> &str {
&self.header.filename
}
}
impl<T: Read + Seek> Read for Entry<T> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let mut reader = self.reader.lock().map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::Other,
format!("Failed to lock mutex: {}", e),
)
})?;
reader.seek(std::io::SeekFrom::Start(
self.base_offset + self.header.offset as u64 + self.pos as u64,
))?;
let bytes_read = buf.len().min(self.header.size as usize - self.pos);
if bytes_read == 0 {
return Ok(0);
}
let bytes_read = reader.read(&mut buf[..bytes_read])?;
self.pos += bytes_read;
Ok(bytes_read)
}
}
#[derive(Debug)]
pub struct BgiArchive<T: Read + Seek + std::fmt::Debug> {
reader: Arc<Mutex<T>>,
file_count: u32,
entries: Vec<BgiFileHeader>,
}
impl<T: Read + Seek + std::fmt::Debug> BgiArchive<T> {
pub fn new(mut reader: T, archive_encoding: Encoding, _config: &ExtraConfig) -> Result<Self> {
let mut header = [0u8; 12];
reader.read_exact(&mut header)?;
if !header.starts_with(b"BURIKO ARC20") {
return Err(anyhow::anyhow!("Invalid BGI archive header"));
}
let file_count = reader.read_u32()?;
let mut entries = Vec::with_capacity(file_count as usize);
for _ in 0..file_count {
let entry = BgiFileHeader::unpack(&mut reader, false, archive_encoding)?;
entries.push(entry);
}
Ok(BgiArchive {
reader: Arc::new(Mutex::new(reader)),
file_count,
entries,
})
}
}
impl<T: Read + Seek + std::fmt::Debug + 'static> Script for BgiArchive<T> {
fn default_output_script_type(&self) -> OutputScriptType {
OutputScriptType::Json
}
fn default_format_type(&self) -> FormatOptions {
FormatOptions::None
}
fn is_archive(&self) -> bool {
true
}
fn iter_archive<'a>(&'a mut self) -> Result<Box<dyn Iterator<Item = Result<String>> + 'a>> {
Ok(Box::new(
self.entries.iter().map(|e| Ok(e.filename.clone())),
))
}
fn iter_archive_mut<'a>(
&'a mut self,
) -> Result<Box<dyn Iterator<Item = Result<Box<dyn ArchiveContent>>> + 'a>> {
Ok(Box::new(BgiArchiveIter {
entries: self.entries.iter(),
reader: self.reader.clone(),
base_offset: 16 + (self.file_count as u64 * 32),
}))
}
}
struct BgiArchiveIter<'a, T: Iterator<Item = &'a BgiFileHeader>, R: Read + Seek> {
entries: T,
reader: Arc<Mutex<R>>,
base_offset: u64,
}
impl<'a, T: Iterator<Item = &'a BgiFileHeader>, R: Read + Seek + 'static> Iterator
for BgiArchiveIter<'a, T, R>
{
type Item = Result<Box<dyn ArchiveContent>>;
fn next(&mut self) -> Option<Self::Item> {
let entry = match self.entries.next() {
Some(e) => e,
None => return None,
};
let entry = Entry {
header: entry.clone(),
reader: self.reader.clone(),
pos: 0,
base_offset: self.base_offset,
};
Some(Ok(Box::new(entry)))
}
}

View File

@@ -1,3 +1,5 @@
#[cfg(feature = "bgi-arc")]
pub mod archive;
pub mod bp;
pub mod bsi;
mod parser;

View File

@@ -1,35 +1,8 @@
use crate::ext::io::*;
use crate::utils::bit_stream::*;
use anyhow::Result;
use std::io::Write;
pub struct BitStream<'a> {
m_input: MemReaderRef<'a>,
m_bits: u32,
m_cached_bits: u32,
}
impl<'a> BitStream<'a> {
pub fn new(input: MemReaderRef<'a>) -> Self {
BitStream {
m_input: input,
m_bits: 0,
m_cached_bits: 0,
}
}
pub fn get_bits(&mut self, count: u32) -> Result<u32> {
while self.m_cached_bits < count {
let byte = self.m_input.read_u8()?;
self.m_bits = (self.m_bits << 8) | byte as u32;
self.m_cached_bits += 8;
}
let mask = (1 << count) - 1;
self.m_cached_bits -= count;
let result = (self.m_bits >> self.m_cached_bits) & mask;
Ok(result)
}
}
pub struct LZWDecoder<'a> {
m_input: BitStream<'a>,
m_output_size: u32,
@@ -98,51 +71,6 @@ impl<'a> LZWDecoder<'a> {
}
}
pub struct BitWriter<'a, T: Write> {
writer: &'a mut T,
buffer: u32,
buffer_size: u32,
}
impl<'a, T: Write> BitWriter<'a, T> {
pub fn new(writer: &'a mut T) -> Self {
BitWriter {
writer,
buffer: 0,
buffer_size: 0,
}
}
pub fn flush(&mut self) -> Result<()> {
if self.buffer_size > 0 {
self.writer.write_u8((self.buffer & 0xFF) as u8)?;
self.buffer = 0;
self.buffer_size = 0;
}
Ok(())
}
pub fn put_bits(&mut self, byte: u32, token_width: u8) -> Result<()> {
for i in 0..token_width {
self.put_bit((byte & (1 << (token_width - 1 - i))) != 0)?;
}
Ok(())
}
pub fn put_bit(&mut self, bit: bool) -> Result<()> {
self.buffer <<= 1;
if bit {
self.buffer |= 1;
}
self.buffer_size += 1;
if self.buffer_size == 8 {
self.writer.write_u8((self.buffer & 0xFF) as u8)?;
self.buffer_size -= 8;
}
Ok(())
}
}
pub struct LZWEncoder {
buf: MemWriter,
}

View File

@@ -20,6 +20,10 @@ lazy_static::lazy_static! {
Box::new(bgi::bsi::BGIBsiScriptBuilder::new()),
#[cfg(feature = "bgi")]
Box::new(bgi::bp::BGIBpScriptBuilder::new()),
#[cfg(feature = "bgi-arc")]
Box::new(bgi::archive::v1::BgiArchiveBuilder::new()),
#[cfg(feature = "bgi-arc")]
Box::new(bgi::archive::v2::BgiArchiveBuilder::new()),
#[cfg(feature = "escude-arc")]
Box::new(escude::archive::EscudeBinArchiveBuilder::new()),
#[cfg(feature = "escude")]

View File

@@ -217,6 +217,14 @@ pub enum ScriptType {
#[value(alias("ethornell-bp"))]
/// Buriko General Interpreter/Ethornell bp script (._bp)
BGIBp,
#[cfg(feature = "bgi-arc")]
#[value(alias = "ethornell-arc-v1")]
/// Buriko General Interpreter/Ethornell archive v1
BGIArcV1,
#[cfg(feature = "bgi-arc")]
#[value(alias = "ethornell-arc-v2", alias = "bgi-arc", alias = "ethornell-arc")]
/// Buriko General Interpreter/Ethornell archive v2
BGIArcV2,
#[cfg(feature = "escude-arc")]
/// Escude bin archive
EscudeArc,

76
src/utils/bit_stream.rs Normal file
View File

@@ -0,0 +1,76 @@
use crate::ext::io::*;
use anyhow::Result;
use std::io::Write;
pub struct BitStream<'a> {
m_input: MemReaderRef<'a>,
m_bits: u32,
m_cached_bits: u32,
}
impl<'a> BitStream<'a> {
pub fn new(input: MemReaderRef<'a>) -> Self {
BitStream {
m_input: input,
m_bits: 0,
m_cached_bits: 0,
}
}
pub fn get_bits(&mut self, count: u32) -> Result<u32> {
while self.m_cached_bits < count {
let byte = self.m_input.read_u8()?;
self.m_bits = (self.m_bits << 8) | byte as u32;
self.m_cached_bits += 8;
}
let mask = (1 << count) - 1;
self.m_cached_bits -= count;
let result = (self.m_bits >> self.m_cached_bits) & mask;
Ok(result)
}
}
pub struct BitWriter<'a, T: Write> {
writer: &'a mut T,
buffer: u32,
buffer_size: u32,
}
impl<'a, T: Write> BitWriter<'a, T> {
pub fn new(writer: &'a mut T) -> Self {
BitWriter {
writer,
buffer: 0,
buffer_size: 0,
}
}
pub fn flush(&mut self) -> Result<()> {
if self.buffer_size > 0 {
self.writer.write_u8((self.buffer & 0xFF) as u8)?;
self.buffer = 0;
self.buffer_size = 0;
}
Ok(())
}
pub fn put_bits(&mut self, byte: u32, token_width: u8) -> Result<()> {
for i in 0..token_width {
self.put_bit((byte & (1 << (token_width - 1 - i))) != 0)?;
}
Ok(())
}
pub fn put_bit(&mut self, bit: bool) -> Result<()> {
self.buffer <<= 1;
if bit {
self.buffer |= 1;
}
self.buffer_size += 1;
if self.buffer_size == 8 {
self.writer.write_u8((self.buffer & 0xFF) as u8)?;
self.buffer_size -= 8;
}
Ok(())
}
}

View File

@@ -1,3 +1,5 @@
#[cfg(feature = "utils-bit-stream")]
pub mod bit_stream;
pub mod counter;
pub mod encoding;
#[cfg(windows)]