From 71cdebadd39dd330dc9e23a1e0cedf111167ddd5 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Thu, 12 Jun 2025 11:49:08 +0800 Subject: [PATCH] Add DSC decompress support --- src/ext/mod.rs | 1 + src/ext/vec.rs | 22 ++++ src/scripts/bgi/archive/dsc.rs | 206 +++++++++++++++++++++++++++++++++ src/scripts/bgi/archive/mod.rs | 1 + src/scripts/bgi/archive/v2.rs | 79 ++++++++++++- src/utils/bit_stream.rs | 13 ++- 6 files changed, 319 insertions(+), 3 deletions(-) create mode 100644 src/ext/vec.rs create mode 100644 src/scripts/bgi/archive/dsc.rs diff --git a/src/ext/mod.rs b/src/ext/mod.rs index af514a1..904f814 100644 --- a/src/ext/mod.rs +++ b/src/ext/mod.rs @@ -1 +1,2 @@ pub mod io; +pub mod vec; diff --git a/src/ext/vec.rs b/src/ext/vec.rs new file mode 100644 index 0000000..db33a9d --- /dev/null +++ b/src/ext/vec.rs @@ -0,0 +1,22 @@ +pub trait VecExt { + /// Copy potentially overlapping sequence of elements from `src` to `dst`. + fn copy_overlapped(&mut self, src: usize, dst: usize, len: usize); +} + +impl VecExt for Vec { + fn copy_overlapped(&mut self, src: usize, dst: usize, len: usize) { + let src = src.min(self.len()); + let dst = dst.min(self.len()); + if src < dst { + let max_count = len.min(dst - src); + for i in 0..max_count { + self[dst + i] = self[src + i]; + } + } else { + let max_count = len.min(src - dst); + for i in (0..max_count).rev() { + self[dst + i] = self[src + i]; + } + } + } +} diff --git a/src/scripts/bgi/archive/dsc.rs b/src/scripts/bgi/archive/dsc.rs new file mode 100644 index 0000000..e2e8b95 --- /dev/null +++ b/src/scripts/bgi/archive/dsc.rs @@ -0,0 +1,206 @@ +use crate::ext::io::*; +use crate::ext::vec::*; +use crate::utils::bit_stream::*; +use anyhow::Result; + +#[derive(Debug)] +struct HuffmanCode { + code: u16, + depth: u8, +} + +impl std::cmp::PartialEq for HuffmanCode { + fn eq(&self, other: &Self) -> bool { + self.code == other.code && self.depth == other.depth + } +} + +impl std::cmp::Eq for HuffmanCode {} + +impl std::cmp::PartialOrd for HuffmanCode { + fn partial_cmp(&self, other: &Self) -> Option { + let cmp = self.depth.cmp(&other.depth); + if cmp == std::cmp::Ordering::Equal { + Some(self.code.cmp(&other.code)) + } else { + Some(cmp) + } + } +} + +impl std::cmp::Ord for HuffmanCode { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + let cmp = self.depth.cmp(&other.depth); + if cmp == std::cmp::Ordering::Equal { + self.code.cmp(&other.code) + } else { + cmp + } + } +} + +#[derive(Clone, Debug)] +struct HuffmanNode { + is_parent: bool, + code: Option, + left_index: usize, + right_index: usize, +} + +pub struct DscDecoder<'a> { + stream: MsbBitStream<'a>, + key: u32, + magic: u32, + output_size: u32, + dec_count: u32, +} + +impl<'a> DscDecoder<'a> { + pub fn new(data: &'a [u8]) -> Result { + let mut reader = MemReaderRef::new(data); + let magic = (reader.read_u16()? as u32) << 16; + reader.pos = 0x10; + let key = reader.read_u32()?; + let output_size = reader.read_u32()?; + let dec_count = reader.read_u32()?; + let stream = MsbBitStream::new(reader); + Ok(DscDecoder { + stream, + key, + magic, + output_size, + dec_count, + }) + } + + pub fn unpack(mut self) -> Result> { + self.stream.m_input.pos = 0x20; + let mut codes = Vec::new(); + for i in 0..512 { + let src = self.stream.m_input.read_u8()?; + let depth = src.overflowing_sub(self.update_key()).0; + if depth > 0 { + codes.push(HuffmanCode { code: i, depth }) + } + } + codes.sort(); + let root = Self::create_huffman_tree(codes); + self.huffman_decompress(root) + } + + fn create_huffman_tree(codes: Vec) -> Vec { + let mut trees = Vec::with_capacity(1024); + trees.resize( + 1024, + HuffmanNode { + is_parent: false, + code: None, + left_index: 0, + right_index: 0, + }, + ); + let mut left_index = vec![0usize; 512]; + let mut right_index = vec![0usize; 512]; + let mut next_node_index = 1usize; + let mut depth_nodes = 1usize; + let mut depth = 0u8; + let mut left_child = true; + let mut n = 0; + while n < codes.len() { + let huffman_node_index = left_child; + left_child = !left_child; + let mut depth_existed_nodes = 0; + while n < codes.len() && codes[n].depth == depth { + let index = if huffman_node_index { + left_index[depth_existed_nodes] + } else { + right_index[depth_existed_nodes] + }; + trees[index].code = Some(codes[n].code); + n += 1; + depth_existed_nodes += 1; + } + let depth_nodes_to_create = depth_nodes - depth_existed_nodes; + for i in 0..depth_nodes_to_create { + let index = if huffman_node_index { + left_index[depth_existed_nodes + i] + } else { + right_index[depth_existed_nodes + i] + }; + let node = &mut trees[index]; + node.is_parent = true; + if left_child { + left_index[i * 2] = next_node_index; + node.left_index = next_node_index; + next_node_index += 1; + left_index[i * 2 + 1] = next_node_index; + node.right_index = next_node_index; + next_node_index += 1; + } else { + right_index[i * 2] = next_node_index; + node.left_index = next_node_index; + next_node_index += 1; + right_index[i * 2 + 1] = next_node_index; + node.right_index = next_node_index; + next_node_index += 1; + } + } + depth += 1; + depth_nodes = depth_nodes_to_create * 2; + } + trees + } + + fn huffman_decompress(&mut self, nodes: Vec) -> Result> { + let output_size = self.output_size as usize; + let mut output = Vec::with_capacity(output_size); + let mut dst = 0; + output.resize(output_size, 0); + for _ in 0..self.dec_count { + let mut current_node = &nodes[0]; + loop { + let bit = self.stream.get_next_bit()?; + if !bit { + current_node = &nodes[current_node.left_index] + } else { + current_node = &nodes[current_node.right_index] + } + if !current_node.is_parent { + break; + } + } + let code = *current_node.code.as_ref().unwrap(); + if code >= 256 { + let mut offset = self.stream.get_bits(12)?; + let count = ((code & 0xFF) + 2) as usize; + offset += 2; + output.copy_overlapped(dst - offset as usize, dst, count); + dst += count; + } else { + output[dst] = code as u8; + dst += 1; + } + } + if dst != output_size { + eprintln!( + "Warning: Output size mismatch, expected {}, got {}", + self.output_size, dst + ); + crate::COUNTER.inc_warning(); + } + Ok(output) + } + + fn update_key(&mut self) -> u8 { + let v0 = 20021 * (self.key & 0xffff); + let mut v1 = self.magic | (self.key >> 16); + v1 = v1 + .overflowing_mul(20021) + .0 + .overflowing_add(self.key.overflowing_mul(346).0) + .0; + v1 = (v1 + (v0 >> 16)) & 0xffff; + self.key = (v1 << 16) + (v0 & 0xffff) + 1; + v1 as u8 + } +} diff --git a/src/scripts/bgi/archive/mod.rs b/src/scripts/bgi/archive/mod.rs index ae6adc7..59a6efe 100644 --- a/src/scripts/bgi/archive/mod.rs +++ b/src/scripts/bgi/archive/mod.rs @@ -1,2 +1,3 @@ +mod dsc; pub mod v1; pub mod v2; diff --git a/src/scripts/bgi/archive/v2.rs b/src/scripts/bgi/archive/v2.rs index d6ff50e..ec53bc5 100644 --- a/src/scripts/bgi/archive/v2.rs +++ b/src/scripts/bgi/archive/v2.rs @@ -1,3 +1,4 @@ +use super::dsc::*; use crate::ext::io::*; use crate::scripts::base::*; use crate::types::*; @@ -139,6 +140,31 @@ impl Read for Entry { } } +struct MemEntry { + name: String, + data: MemReader, +} + +impl Read for MemEntry { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + self.data.read(buf) + } +} + +impl ArchiveContent for MemEntry { + fn name(&self) -> &str { + &self.name + } + + fn data(&mut self) -> Result> { + Ok(self.data.data.clone()) + } + + fn to_data<'a>(&'a mut self) -> Result> { + Ok(Box::new(&mut self.data)) + } +} + #[derive(Debug)] pub struct BgiArchive { reader: Arc>, @@ -194,7 +220,7 @@ impl Script for BgiArchive { Ok(Box::new(BgiArchiveIter { entries: self.entries.iter(), reader: self.reader.clone(), - base_offset: 16 + (self.file_count as u64 * 32), + base_offset: 16 + (self.file_count as u64 * 0x80), })) } } @@ -215,12 +241,61 @@ impl<'a, T: Iterator, R: Read + Seek + 'static> Iterat Some(e) => e, None => return None, }; - let entry = Entry { + let mut entry = Entry { header: entry.clone(), reader: self.reader.clone(), pos: 0, base_offset: self.base_offset, }; + let mut buf = [0u8; 16]; + match entry.read(&mut buf) { + Ok(_) => {} + Err(e) => { + return Some(Err(anyhow::anyhow!( + "Failed to read entry '{}': {}", + entry.header.filename, + e + ))); + } + } + entry.pos = 0; + if buf.starts_with(b"DSC FORMAT 1.00") { + let data = match entry.data() { + Ok(data) => data, + Err(e) => { + return Some(Err(anyhow::anyhow!( + "Failed to read DSC data for '{}': {}", + entry.header.filename, + e + ))); + } + }; + entry.pos = 0; + let dsc = match DscDecoder::new(&data) { + Ok(dsc) => dsc, + Err(e) => { + return Some(Err(anyhow::anyhow!( + "Failed to create DSC decoder for '{}': {}", + entry.header.filename, + e + ))); + } + }; + let decoded = match dsc.unpack() { + Ok(decoded) => decoded, + Err(e) => { + return Some(Err(anyhow::anyhow!( + "Failed to unpack DSC data for '{}': {}", + entry.header.filename, + e + ))); + } + }; + return Some(Ok(Box::new(MemEntry { + name: entry.header.filename.clone(), + data: MemReader::new(decoded), + }))); + } Some(Ok(Box::new(entry))) } } diff --git a/src/utils/bit_stream.rs b/src/utils/bit_stream.rs index 506ea28..d0136ce 100644 --- a/src/utils/bit_stream.rs +++ b/src/utils/bit_stream.rs @@ -3,7 +3,7 @@ use anyhow::Result; use std::io::Write; pub struct MsbBitStream<'a> { - m_input: MemReaderRef<'a>, + pub m_input: MemReaderRef<'a>, m_bits: u32, m_cached_bits: u32, } @@ -28,6 +28,17 @@ impl<'a> MsbBitStream<'a> { let result = (self.m_bits >> self.m_cached_bits) & mask; Ok(result) } + + pub fn get_next_bit(&mut self) -> Result { + if self.m_cached_bits == 0 { + let byte = self.m_input.read_u8()?; + self.m_bits = (self.m_bits << 8) | byte as u32; + self.m_cached_bits += 8; + } + self.m_cached_bits -= 1; + let bit = (self.m_bits >> self.m_cached_bits) & 1 != 0; + Ok(bit) + } } pub struct MsbBitWriter<'a, T: Write> {