//! Buriko General Interpreter/Ethornell compressed file in archive use crate::ext::io::*; use crate::ext::vec::*; use crate::scripts::base::*; use crate::types::*; use crate::utils::bit_stream::*; use crate::utils::num_range::*; use anyhow::Result; use rand::RngExt; use std::collections::BinaryHeap; use std::io::{Seek, Write}; #[derive(Debug)] struct HuffmanCode { code: u16, depth: u8, } impl std::cmp::PartialEq for HuffmanCode { fn eq(&self, other: &Self) -> bool { self.code == other.code && self.depth == other.depth } } impl std::cmp::Eq for HuffmanCode {} impl std::cmp::PartialOrd for HuffmanCode { fn partial_cmp(&self, other: &Self) -> Option { let cmp = self.depth.cmp(&other.depth); if cmp == std::cmp::Ordering::Equal { Some(self.code.cmp(&other.code)) } else { Some(cmp) } } } impl std::cmp::Ord for HuffmanCode { fn cmp(&self, other: &Self) -> std::cmp::Ordering { let cmp = self.depth.cmp(&other.depth); if cmp == std::cmp::Ordering::Equal { self.code.cmp(&other.code) } else { cmp } } } #[derive(Clone, Debug)] struct HuffmanNode { is_parent: bool, code: Option, left_index: usize, right_index: usize, } /// Decoder for Buriko General Interpreter/Ethornell compressed files (DSC format). pub struct DscDecoder<'a> { stream: MsbBitStream>, key: u32, magic: u32, output_size: u32, dec_count: u32, } impl<'a> DscDecoder<'a> { /// Creates a new DscDecoder from the given data slice. pub fn new(data: &'a [u8]) -> Result { let mut reader = MemReaderRef::new(data); let magic = (reader.read_u16()? as u32) << 16; reader.pos = 0x10; let key = reader.read_u32()?; let output_size = reader.read_u32()?; let dec_count = reader.read_u32()?; let stream = MsbBitStream::new(reader); Ok(DscDecoder { stream, key, magic, output_size, dec_count, }) } /// Unpacks the DSC file and returns the decompressed data. pub fn unpack(mut self) -> Result> { self.stream.m_input.pos = 0x20; let mut codes = Vec::new(); for i in 0..512 { let src = self.stream.m_input.read_u8()?; let depth = src.overflowing_sub(self.update_key()).0; if depth > 0 { codes.push(HuffmanCode { code: i, depth }) } } codes.sort(); let root = Self::create_huffman_tree(codes); self.huffman_decompress(root) } fn create_huffman_tree(codes: Vec) -> Vec { let mut trees = Vec::with_capacity(1024); trees.resize( 1024, HuffmanNode { is_parent: false, code: None, left_index: 0, right_index: 0, }, ); let mut left_index = vec![0usize; 512]; let mut right_index = vec![0usize; 512]; let mut next_node_index = 1usize; let mut depth_nodes = 1usize; let mut depth = 0u8; let mut left_child = true; let mut n = 0; while n < codes.len() { let huffman_node_index = left_child; left_child = !left_child; let mut depth_existed_nodes = 0; while n < codes.len() && codes[n].depth == depth { let index = if huffman_node_index { left_index[depth_existed_nodes] } else { right_index[depth_existed_nodes] }; trees[index].code = Some(codes[n].code); n += 1; depth_existed_nodes += 1; } let depth_nodes_to_create = depth_nodes - depth_existed_nodes; for i in 0..depth_nodes_to_create { let index = if huffman_node_index { left_index[depth_existed_nodes + i] } else { right_index[depth_existed_nodes + i] }; let node = &mut trees[index]; node.is_parent = true; if left_child { left_index[i * 2] = next_node_index; node.left_index = next_node_index; next_node_index += 1; left_index[i * 2 + 1] = next_node_index; node.right_index = next_node_index; next_node_index += 1; } else { right_index[i * 2] = next_node_index; node.left_index = next_node_index; next_node_index += 1; right_index[i * 2 + 1] = next_node_index; node.right_index = next_node_index; next_node_index += 1; } } depth += 1; depth_nodes = depth_nodes_to_create * 2; } trees } fn huffman_decompress(&mut self, nodes: Vec) -> Result> { let output_size = self.output_size as usize; let mut output = Vec::with_capacity(output_size); let mut dst = 0; output.resize(output_size, 0); for _ in 0..self.dec_count { let mut current_node = &nodes[0]; loop { let bit = self.stream.get_next_bit()?; if !bit { current_node = &nodes[current_node.left_index] } else { current_node = &nodes[current_node.right_index] } if !current_node.is_parent { break; } } let code = *current_node.code.as_ref().unwrap(); if code >= 256 { let mut offset = self.stream.get_bits(12)?; let count = ((code & 0xFF) + 2) as usize; offset += 2; output.copy_overlapped(dst - offset as usize, dst, count); dst += count; } else { output[dst] = code as u8; dst += 1; } } if dst != output_size { eprintln!( "Warning: Output size mismatch, expected {}, got {}", self.output_size, dst ); crate::COUNTER.inc_warning(); } Ok(output) } fn update_key(&mut self) -> u8 { let v0 = 20021 * (self.key & 0xffff); let mut v1 = self.magic | (self.key >> 16); v1 = v1 .overflowing_mul(20021) .0 .overflowing_add(self.key.overflowing_mul(346).0) .0; v1 = overf::wrapping!(v1 + (v0 >> 16)) & 0xffff; self.key = (v1 << 16) + (v0 & 0xffff) + 1; v1 as u8 } } #[derive(Debug, Clone, Copy)] enum LzssOp { Literal(u8), Match { len: u16, offset: u16 }, } #[derive(Debug)] struct FreqNode { freq: u32, symbol: Option, left: Option>, right: Option>, } impl PartialEq for FreqNode { fn eq(&self, other: &Self) -> bool { self.freq == other.freq } } impl Eq for FreqNode {} impl PartialOrd for FreqNode { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } impl Ord for FreqNode { fn cmp(&self, other: &Self) -> std::cmp::Ordering { other.freq.cmp(&self.freq) } } fn calculate_huffman_depths(freqs: &[u32]) -> Vec { const MAX_DEPTH: u8 = 9; // 收集所有非零频率的符号 let mut symbols_with_freq: Vec<(u16, u32)> = freqs .iter() .enumerate() .filter_map(|(symbol, &freq)| { if freq > 0 { Some((symbol as u16, freq)) } else { None } }) .collect(); let mut depths = vec![0u8; 512]; if symbols_with_freq.is_empty() { return depths; } if symbols_with_freq.len() == 1 { depths[symbols_with_freq[0].0 as usize] = 1; return depths; } // 使用受限Huffman算法 loop { let current_depths = build_huffman_tree(&symbols_with_freq); let max_depth = current_depths.iter().max().copied().unwrap_or(0); if max_depth <= MAX_DEPTH { // 将深度映射回原始数组 for &(symbol, _) in &symbols_with_freq { let symbol_index = symbols_with_freq .iter() .position(|(s, _)| *s == symbol) .unwrap(); depths[symbol as usize] = current_depths[symbol_index]; } break; } // 如果深度超限,调整频率 adjust_frequencies_for_depth_limit(&mut symbols_with_freq); } depths } fn build_huffman_tree(symbols_with_freq: &[(u16, u32)]) -> Vec { let mut heap = BinaryHeap::new(); // 添加所有叶子节点 for &(symbol, freq) in symbols_with_freq { heap.push(FreqNode { freq, symbol: Some(symbol), left: None, right: None, }); } // 构建Huffman树 while heap.len() > 1 { let node1 = heap.pop().unwrap(); let node2 = heap.pop().unwrap(); let new_node = FreqNode { freq: node1.freq + node2.freq, symbol: None, left: Some(Box::new(node1)), right: Some(Box::new(node2)), }; heap.push(new_node); } // 计算深度 let mut depths = vec![0u8; symbols_with_freq.len()]; if let Some(root) = heap.pop() { calculate_depths(&root, 0, symbols_with_freq, &mut depths); } depths } fn calculate_depths( node: &FreqNode, depth: u8, symbols_with_freq: &[(u16, u32)], depths: &mut [u8], ) { if let Some(symbol) = node.symbol { let symbol_index = symbols_with_freq .iter() .position(|(s, _)| *s == symbol) .unwrap(); depths[symbol_index] = if depth == 0 { 1 } else { depth }; } else { if let Some(ref left) = node.left { calculate_depths(left, depth + 1, symbols_with_freq, depths); } if let Some(ref right) = node.right { calculate_depths(right, depth + 1, symbols_with_freq, depths); } } } fn adjust_frequencies_for_depth_limit(symbols_with_freq: &mut [(u16, u32)]) { // 按频率排序 symbols_with_freq.sort_by(|a, b| a.1.cmp(&b.1)); // 使用Package-Merge算法的简化版本 // 这里使用一个启发式方法:增加低频符号的频率 let min_freq = symbols_with_freq[0].1; let adjustment = (min_freq as f64 * 0.1).max(1.0) as u32; // 找到频率最低的几个符号并调整它们的频率 let num_to_adjust = (symbols_with_freq.len() / 4).max(1); for i in 0..num_to_adjust.min(symbols_with_freq.len()) { symbols_with_freq[i].1 += adjustment; } } fn generate_canonical_codes(depths: &[u8]) -> Vec> { let mut codes_with_depths = vec![]; for (symbol, &depth) in depths.iter().enumerate() { if depth > 0 { codes_with_depths.push((symbol as u16, depth)); } } codes_with_depths.sort_by(|a, b| { let depth_cmp = a.1.cmp(&b.1); if depth_cmp == std::cmp::Ordering::Equal { a.0.cmp(&b.0) } else { depth_cmp } }); let mut huffman_codes = vec![None; 512]; let mut current_code = 0u16; let mut last_depth = 0u8; for &(symbol, depth) in &codes_with_depths { if last_depth != 0 { current_code <<= depth - last_depth; } huffman_codes[symbol as usize] = Some((current_code, depth)); current_code += 1; last_depth = depth; } huffman_codes } /// Encoder for Buriko General Interpreter/Ethornell compressed files (DSC format). pub struct DscEncoder<'a, T: Write + Seek> { stream: MsbBitWriter<'a, T>, magic: u32, key: u32, dec_count: u32, min_len: usize, } impl<'a, T: Write + Seek> DscEncoder<'a, T> { /// Creates a new DscEncoder with the given writer and minimum length for LZSS compression. pub fn new(writer: &'a mut T, min_len: usize) -> Self { let stream = MsbBitWriter::new(writer); DscEncoder { stream, magic: 0x5344 << 16, // "DS" key: rand::rng().random(), dec_count: 0, min_len, } } /// Packs the given data into the DSC format using LZSS compression. pub fn pack(mut self, data: &[u8]) -> Result<()> { // LZSS compression let mut ops = vec![]; let mut pos = 0; const MAX_LEN: usize = 257; const WINDOW_SIZE: usize = 4097; let mut head: Vec = vec![-1; 1 << 16]; let mut prev: Vec = vec![-1; data.len()]; while pos < data.len() { let max_len = (data.len() - pos).min(MAX_LEN); let mut best_len = 0; let mut best_offset = 0; if max_len >= self.min_len { let limit = pos.saturating_sub(WINDOW_SIZE); let key = (data[pos] as u16) << 8 | data[pos + 1] as u16; let mut match_pos_i32 = head[key as usize]; while match_pos_i32 != -1 { let match_pos = match_pos_i32 as usize; if match_pos < limit { break; } if data.get(match_pos + best_len) == data.get(pos + best_len) { let mut current_len = 0; for i in 0..max_len { if data.get(pos + i) != data.get(match_pos + i) { break; } current_len += 1; } if current_len > best_len { best_len = current_len; best_offset = pos - match_pos; if best_len >= max_len { break; } } } match_pos_i32 = prev[match_pos]; } } if best_len >= self.min_len && best_offset >= 2 { ops.push(LzssOp::Match { len: best_len as u16, offset: best_offset as u16, }); for i in 0..best_len { if pos + i + 1 < data.len() { let key = (data[pos + i] as u16) << 8 | data[pos + i + 1] as u16; let current_pos = pos + i; prev[current_pos] = head[key as usize]; head[key as usize] = current_pos as i32; } } pos += best_len; } else { ops.push(LzssOp::Literal(data[pos])); if pos + 1 < data.len() { let key = (data[pos] as u16) << 8 | data[pos + 1] as u16; prev[pos] = head[key as usize]; head[key as usize] = pos as i32; } pos += 1; } } let symbols: Vec = ops .iter() .map(|op| match op { LzssOp::Literal(byte) => *byte as u16, LzssOp::Match { len, .. } => 256 + (len - 2), }) .collect(); self.dec_count = symbols.len() as u32; let mut freqs = vec![0u32; 512]; for &s in &symbols { freqs[s as usize] += 1; } let depths = calculate_huffman_depths(&freqs); let huffman_codes = generate_canonical_codes(&depths); self.stream.writer.write_all(b"DSC FORMAT 1.00\0")?; self.stream.writer.seek(std::io::SeekFrom::Start(0x10))?; self.stream.writer.write_u32(self.key)?; self.stream.writer.write_u32(data.len() as u32)?; self.stream.writer.write_u32(self.dec_count)?; self.stream.writer.seek(std::io::SeekFrom::Start(0x20))?; for depth in depths.iter() { let key = self.update_key(); self.stream.writer.write_u8(depth.overflowing_add(key).0)?; } for op in &ops { match op { LzssOp::Literal(byte) => { let symbol = *byte as u16; let (code, len) = huffman_codes[symbol as usize].unwrap(); self.stream.put_bits(code as u32, len)?; } LzssOp::Match { len, offset } => { let symbol = 256 + (len - 2); let (code, huff_len) = huffman_codes[symbol as usize].unwrap(); self.stream.put_bits(code as u32, huff_len)?; self.stream.put_bits((*offset - 2) as u32, 12)?; } } } self.stream.flush()?; Ok(()) } fn update_key(&mut self) -> u8 { let v0 = 20021 * (self.key & 0xffff); let mut v1 = self.magic | (self.key >> 16); v1 = v1 .overflowing_mul(20021) .0 .overflowing_add(self.key.overflowing_mul(346).0) .0; v1 = (v1 + (v0 >> 16)) & 0xffff; self.key = (v1 << 16) + (v0 & 0xffff) + 1; v1 as u8 } } #[derive(Debug)] /// Builder for DSC scripts. pub struct DscBuilder {} impl DscBuilder { /// Creates a new instance of `DscBuilder`. pub fn new() -> Self { DscBuilder {} } } impl ScriptBuilder for DscBuilder { fn default_encoding(&self) -> Encoding { Encoding::Cp932 } fn default_archive_encoding(&self) -> Option { Some(Encoding::Cp932) } fn build_script( &self, buf: Vec, _filename: &str, _encoding: Encoding, _archive_encoding: Encoding, config: &ExtraConfig, _archive: Option<&Box>, ) -> Result> { Ok(Box::new(Dsc::new(buf, config)?)) } fn extensions(&self) -> &'static [&'static str] { &[] } fn script_type(&self) -> &'static ScriptType { &ScriptType::BGIDsc } fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option { if buf_len >= 16 && buf.starts_with(b"DSC FORMAT 1.00\0") { return Some(255); } None } fn can_create_file(&self) -> bool { true } fn create_file<'a>( &'a self, filename: &'a str, mut writer: Box, _encoding: Encoding, _file_encoding: Encoding, config: &ExtraConfig, ) -> Result<()> { let encoder = DscEncoder::new(&mut writer, config.bgi_compress_min_len); let data = crate::utils::files::read_file(filename)?; encoder.pack(&data)?; Ok(()) } } #[derive(Debug)] /// DSC script pub struct Dsc { data: Vec, min_len: usize, } impl Dsc { /// Creates a new Dsc script /// /// * `buf` - The buffer containing the DSC data. /// * `config` - Extra configuration options. pub fn new(buf: Vec, config: &ExtraConfig) -> Result { if buf.len() < 16 || !buf.starts_with(b"DSC FORMAT 1.00\0") { return Err(anyhow::anyhow!("Invalid DSC format")); } let decoder = DscDecoder::new(&buf)?; let data = decoder.unpack()?; Ok(Dsc { data, min_len: config.bgi_compress_min_len, }) } } impl Script for Dsc { fn default_output_script_type(&self) -> OutputScriptType { OutputScriptType::Custom } fn is_output_supported(&self, output: OutputScriptType) -> bool { matches!(output, OutputScriptType::Custom) } fn default_format_type(&self) -> FormatOptions { FormatOptions::None } fn custom_output_extension(&self) -> &'static str { "" } fn custom_export(&self, filename: &std::path::Path, _encoding: Encoding) -> Result<()> { let mut f = std::fs::File::create(filename)?; f.write_all(&self.data)?; Ok(()) } fn custom_import<'a>( &'a self, custom_filename: &'a str, mut file: Box, _encoding: Encoding, _output_encoding: Encoding, ) -> Result<()> { let encoder = DscEncoder::new(&mut file, self.min_len); let data = crate::utils::files::read_file(custom_filename)?; encoder.pack(&data)?; Ok(()) } } /// Parses the minimum length for LZSS compression from a string. pub fn parse_min_length(len: &str) -> Result { number_range(len, 2, 256) }