diff --git a/src/args.rs b/src/args.rs index d9df66a..a9473aa 100644 --- a/src/args.rs +++ b/src/args.rs @@ -318,9 +318,9 @@ pub struct Arg { /// Whether to compress files in BGI archive when packing BGI archive. pub bgi_compress_file: bool, #[cfg(feature = "bgi-arc")] - #[arg(long, global = true, default_value_t = 3, value_parser = crate::scripts::bgi::archive::dsc::parse_min_length)] - /// Minimum length of match size for DSC compression. Possible values are 2-256. - pub bgi_compress_min_len: usize, + #[arg(long, global = true, default_value_t = 9, value_parser = crate::scripts::bgi::archive::dsc::parse_compress_level)] + /// Compress level for BGI Dsc file. 0 means store, 9 mean best compression. + pub bgi_compress_level: u8, #[cfg(feature = "emote-img")] #[arg(long, global = true)] /// Whether to overlay PIMG images. (By default, true if all layers are not group layers.) diff --git a/src/main.rs b/src/main.rs index 0feddb4..7b12276 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3225,7 +3225,7 @@ fn main() { #[cfg(feature = "bgi-arc")] bgi_compress_file: arg.bgi_compress_file, #[cfg(feature = "bgi-arc")] - bgi_compress_min_len: arg.bgi_compress_min_len, + bgi_compress_level: arg.bgi_compress_level, #[cfg(feature = "emote-img")] emote_pimg_overlay: arg.emote_pimg_overlay, #[cfg(feature = "artemis-arc")] diff --git a/src/scripts/bgi/archive/dsc.rs b/src/scripts/bgi/archive/dsc.rs index 505f7f9..663ed4b 100644 --- a/src/scripts/bgi/archive/dsc.rs +++ b/src/scripts/bgi/archive/dsc.rs @@ -221,17 +221,111 @@ enum LzssOp { Match { len: u16, offset: u16 }, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MatchMode { + Store, + Rle, + NonLazy, + Lazy, +} + +#[derive(Debug, Clone, Copy)] +pub struct CompressConfig { + pub good_length: usize, + pub max_lazy: usize, + pub nice_length: usize, + pub max_chain: usize, + pub mode: MatchMode, +} + +pub const COMPRESS_CONFIGS: [CompressConfig; 10] = [ + // 0: Store (No compression) + CompressConfig { + good_length: 0, + max_lazy: 0, + nice_length: 0, + max_chain: 0, + mode: MatchMode::Store, + }, + // 1: RLE (Fastest) - Matches repeated patterns directly + CompressConfig { + good_length: 4, + max_lazy: 0, + nice_length: 8, + max_chain: 0, + mode: MatchMode::Rle, + }, + // 2: RLE + CompressConfig { + good_length: 4, + max_lazy: 0, + nice_length: 16, + max_chain: 0, + mode: MatchMode::Rle, + }, + // 3: Non-lazy match + CompressConfig { + good_length: 4, + max_lazy: 0, + nice_length: 32, + max_chain: 8, + mode: MatchMode::NonLazy, + }, + // 4: Non-lazy match + CompressConfig { + good_length: 4, + max_lazy: 0, + nice_length: 64, + max_chain: 16, + mode: MatchMode::NonLazy, + }, + // 5: Lazy match + CompressConfig { + good_length: 8, + max_lazy: 16, + nice_length: 32, + max_chain: 32, + mode: MatchMode::Lazy, + }, + // 6: Lazy match + CompressConfig { + good_length: 8, + max_lazy: 16, + nice_length: 128, + max_chain: 128, + mode: MatchMode::Lazy, + }, + // 7: Lazy match + CompressConfig { + good_length: 8, + max_lazy: 32, + nice_length: 128, + max_chain: 256, + mode: MatchMode::Lazy, + }, + // 8: Lazy match + CompressConfig { + good_length: 32, + max_lazy: 128, + nice_length: 258, + max_chain: 1024, + mode: MatchMode::Lazy, + }, + // 9: Lazy match (Best) + CompressConfig { + good_length: 32, + max_lazy: 258, + nice_length: 258, + max_chain: 4096, + mode: MatchMode::Lazy, + }, +]; + /// Computes optimal length-limited Huffman code depths using the Package-Merge algorithm. -/// -/// The Package-Merge algorithm solves the length-limited Huffman coding problem -/// optimally in O(nL) time, where n is the number of symbols and L is the maximum -/// code length. fn package_merge(freqs: &[u32], max_len: u8) -> Vec { let max_len = max_len as usize; let mut depths = vec![0u8; freqs.len()]; - // 1. 收集非零频率的符号 - // 注意:权重使用 u64 以防止多层打包导致的加法溢出 (u32 最大 42亿,对于大文件可能会溢出) let mut symbols: Vec<(u64, Vec)> = freqs .iter() .enumerate() @@ -244,55 +338,38 @@ fn package_merge(freqs: &[u32], max_len: u8) -> Vec { return depths; } if n == 1 { - // 如果只有一个符号,分配 1 位深 depths[symbols[0].1[0]] = 1; return depths; } - // 按权重升序排序 symbols.sort_by_key(|x| x.0); - - // 2. 迭代构建 Package 列表 let mut prev_list = symbols.clone(); for _ in 1..max_len { - let mut current_list = symbols.clone(); // 每一层都要混入基础叶子节点 - - // 将前一层列表中的元素两两打包 (Package) + let mut current_list = symbols.clone(); for p in 0..(prev_list.len() / 2) { let left = &prev_list[p * 2]; let right = &prev_list[p * 2 + 1]; - + let combined_weight = left.0 + right.0; - - // 合并符号:保留重复项,绝对不能用 dedup()! - // 因为被选中的包会使其内部所有符号的树深 +1 let mut combined_indices = Vec::with_capacity(left.1.len() + right.1.len()); combined_indices.extend_from_slice(&left.1); combined_indices.extend_from_slice(&right.1); current_list.push((combined_weight, combined_indices)); } - - // 按权重重新排序(Rust 的 sort_by_key 是稳定排序,满足要求) current_list.sort_by_key(|x| x.0); prev_list = current_list; } - // 3. Merge 选择阶段 - // 根据硬币收集模型,一棵有 n 个叶子的合法二叉树,必须恰好选中 2n-2 个节点/包 let items_to_select = (2 * n).saturating_sub(2); - - // 安全检查,正常情况下 L 足够大时,列表长度必然 >= 2n-2 let take_count = std::cmp::min(items_to_select, prev_list.len()); - // 统计各符号在被选中的前 2n-2 个包中出现的次数,次数就是它的深度 for i in 0..take_count { for &sym in &prev_list[i].1 { depths[sym] += 1; } } - depths } @@ -329,102 +406,188 @@ fn generate_canonical_codes(depths: &[u8]) -> Vec> { current_code += 1; last_depth = depth; } - huffman_codes } +#[inline(always)] +fn find_match( + data: &[u8], + pos: usize, + head: &[i32], + prev: &[i32], + config: &CompressConfig, +) -> (usize, usize) { + if config.mode == MatchMode::Store || pos + 2 > data.len() { + return (0, 0); + } + + let max_len = (data.len() - pos).min(257); + + // 低等级 RLE: 仅扫描由于格式限制的最小可用距离 (pos - 2) + if config.mode == MatchMode::Rle { + let mut best_len = 0; + if pos >= 2 { + while best_len < max_len && data.get(pos + best_len) == data.get(pos - 2 + best_len) { + best_len += 1; + } + } + if best_len >= 2 { + return (best_len, 2); + } + return (0, 0); + } + + let limit = pos.saturating_sub(4097); + let key = ((data[pos] as u16) << 8) | (data[pos + 1] as u16); + let mut match_pos_i32 = head[key as usize]; + let mut chain_length = config.max_chain; + + let mut best_len = 0; + let mut best_offset = 0; + + while match_pos_i32 != -1 && chain_length > 0 { + let match_pos = match_pos_i32 as usize; + if match_pos < limit { + break; + } + + // 格式强制限制最小的 offset >= 2 + if pos - match_pos < 2 { + match_pos_i32 = prev[match_pos]; + chain_length -= 1; + continue; + } + + // 快速剪枝优化 + if best_len < max_len { + if data.get(match_pos + best_len) != data.get(pos + best_len) { + match_pos_i32 = prev[match_pos]; + chain_length -= 1; + continue; + } + } + + let mut current_len = 0; + while current_len < max_len + && data.get(pos + current_len) == data.get(match_pos + current_len) + { + current_len += 1; + } + + if current_len > best_len { + best_len = current_len; + best_offset = pos - match_pos; + if current_len >= config.nice_length { + break; + } + if current_len >= config.good_length { + chain_length >>= 2; + } + } + + match_pos_i32 = prev[match_pos]; + chain_length -= 1; + } + + if best_len >= 2 { + (best_len, best_offset) + } else { + (0, 0) + } +} + /// Encoder for Buriko General Interpreter/Ethornell compressed files (DSC format). pub struct DscEncoder<'a, T: Write + Seek> { stream: MsbBitWriter<'a, T>, magic: u32, key: u32, dec_count: u32, - min_len: usize, + level: u8, } impl<'a, T: Write + Seek> DscEncoder<'a, T> { - /// Creates a new DscEncoder with the given writer and minimum length for LZSS compression. - pub fn new(writer: &'a mut T, min_len: usize) -> Self { + /// Creates a new DscEncoder with the given writer and compression level (0-9). + pub fn new(writer: &'a mut T, level: u8) -> Self { let stream = MsbBitWriter::new(writer); DscEncoder { stream, magic: 0x5344 << 16, // "DS" key: rand::rng().random(), dec_count: 0, - min_len, + level: level.min(9), } } - /// Packs the given data into the DSC format using LZSS compression. + /// Packs the given data into the DSC format using configured LZSS compression. pub fn pack(mut self, data: &[u8]) -> Result<()> { - // LZSS compression let mut ops = vec![]; let mut pos = 0; - - const MAX_LEN: usize = 257; - const WINDOW_SIZE: usize = 4097; + let config = &COMPRESS_CONFIGS[self.level as usize]; let mut head: Vec = vec![-1; 1 << 16]; let mut prev: Vec = vec![-1; data.len()]; + let insert_dict = |p: usize, head: &mut [i32], prev: &mut [i32]| { + if config.mode != MatchMode::Rle && p + 1 < data.len() { + let key = ((data[p] as u16) << 8) | (data[p + 1] as u16); + prev[p] = head[key as usize]; + head[key as usize] = p as i32; + } + }; + while pos < data.len() { - let max_len = (data.len() - pos).min(MAX_LEN); - let mut best_len = 0; - let mut best_offset = 0; - - if max_len >= self.min_len { - let limit = pos.saturating_sub(WINDOW_SIZE); - let key = (data[pos] as u16) << 8 | data[pos + 1] as u16; - let mut match_pos_i32 = head[key as usize]; - - while match_pos_i32 != -1 { - let match_pos = match_pos_i32 as usize; - if match_pos < limit { - break; - } - - if data.get(match_pos + best_len) == data.get(pos + best_len) { - let mut current_len = 0; - for i in 0..max_len { - if data.get(pos + i) != data.get(match_pos + i) { - break; - } - current_len += 1; - } - - if current_len > best_len { - best_len = current_len; - best_offset = pos - match_pos; - if best_len >= max_len { - break; - } - } - } - match_pos_i32 = prev[match_pos]; - } + if config.mode == MatchMode::Store { + ops.push(LzssOp::Literal(data[pos])); + pos += 1; + continue; } - if best_len >= self.min_len && best_offset >= 2 { - ops.push(LzssOp::Match { - len: best_len as u16, - offset: best_offset as u16, - }); - for i in 0..best_len { - if pos + i + 1 < data.len() { - let key = (data[pos + i] as u16) << 8 | data[pos + i + 1] as u16; - let current_pos = pos + i; - prev[current_pos] = head[key as usize]; - head[key as usize] = current_pos as i32; + let (match_len, match_offset) = find_match(data, pos, &head, &prev, config); + + if match_len >= 2 { + let mut lazy_match = false; + + // 延迟匹配逻辑 + if config.mode == MatchMode::Lazy + && match_len <= config.max_lazy + && pos + 1 < data.len() + { + insert_dict(pos, &mut head, &mut prev); + + let (next_len, _) = find_match(data, pos + 1, &head, &prev, config); + + if next_len > match_len { + lazy_match = true; } } - pos += best_len; + + if lazy_match { + ops.push(LzssOp::Literal(data[pos])); + pos += 1; + continue; + } + + ops.push(LzssOp::Match { + len: match_len as u16, + offset: match_offset as u16, + }); + + let start_insert = if config.mode == MatchMode::Lazy + && match_len <= config.max_lazy + && pos + 1 < data.len() + { + 1 // 如果进行了延迟检查,pos 已被插入 + } else { + 0 + }; + + for i in start_insert..match_len { + insert_dict(pos + i, &mut head, &mut prev); + } + pos += match_len; } else { ops.push(LzssOp::Literal(data[pos])); - if pos + 1 < data.len() { - let key = (data[pos] as u16) << 8 | data[pos + 1] as u16; - prev[pos] = head[key as usize]; - head[key as usize] = pos as i32; - } + insert_dict(pos, &mut head, &mut prev); pos += 1; } } @@ -550,7 +713,7 @@ impl ScriptBuilder for DscBuilder { _file_encoding: Encoding, config: &ExtraConfig, ) -> Result<()> { - let encoder = DscEncoder::new(&mut writer, config.bgi_compress_min_len); + let encoder = DscEncoder::new(&mut writer, config.bgi_compress_level); let data = crate::utils::files::read_file(filename)?; encoder.pack(&data)?; Ok(()) @@ -561,7 +724,7 @@ impl ScriptBuilder for DscBuilder { /// DSC script pub struct Dsc { data: Vec, - min_len: usize, + level: u8, } impl Dsc { @@ -577,7 +740,7 @@ impl Dsc { let data = decoder.unpack()?; Ok(Dsc { data, - min_len: config.bgi_compress_min_len, + level: config.bgi_compress_level, }) } } @@ -612,14 +775,14 @@ impl Script for Dsc { _encoding: Encoding, _output_encoding: Encoding, ) -> Result<()> { - let encoder = DscEncoder::new(&mut file, self.min_len); + let encoder = DscEncoder::new(&mut file, self.level); let data = crate::utils::files::read_file(custom_filename)?; encoder.pack(&data)?; Ok(()) } } -/// Parses the minimum length for LZSS compression from a string. -pub fn parse_min_length(len: &str) -> Result { - number_range(len, 2, 256) +/// Parses the compression level for LZSS compression from a string. +pub fn parse_compress_level(level: &str) -> Result { + number_range(level, 0, 9).map(|v| v as u8) } diff --git a/src/scripts/bgi/archive/v1.rs b/src/scripts/bgi/archive/v1.rs index 0530cac..44f525c 100644 --- a/src/scripts/bgi/archive/v1.rs +++ b/src/scripts/bgi/archive/v1.rs @@ -534,7 +534,7 @@ pub struct BgiArchiveWriter { headers: HashMap, compress_file: bool, encoding: Encoding, - min_len: usize, + compress_level: u8, } impl BgiArchiveWriter { @@ -569,7 +569,7 @@ impl BgiArchiveWriter { headers, compress_file: config.bgi_compress_file, encoding, - min_len: config.bgi_compress_min_len, + compress_level: config.bgi_compress_level, }) } } @@ -595,7 +595,7 @@ impl Archive for BgiArchiveWriter { pos: 0, }; Ok(if self.compress_file { - Box::new(BgiArchiveFileWithDsc::new(file, self.min_len)) + Box::new(BgiArchiveFileWithDsc::new(file, self.compress_level)) } else { Box::new(file) }) @@ -676,7 +676,7 @@ impl<'a, T: Write + Seek> Seek for BgiArchiveFile<'a, T> { pub struct BgiArchiveFileWithDsc<'a, T: Write + Seek> { writer: BgiArchiveFile<'a, T>, buf: MemWriter, - min_len: usize, + compress_level: u8, } impl<'a, T: Write + Seek> BgiArchiveFileWithDsc<'a, T> { @@ -684,11 +684,11 @@ impl<'a, T: Write + Seek> BgiArchiveFileWithDsc<'a, T> { /// /// * `writer` - The writer to write the archive file to. /// * `min_len` - The minimum length for LZSS compression. - pub fn new(writer: BgiArchiveFile<'a, T>, min_len: usize) -> Self { + pub fn new(writer: BgiArchiveFile<'a, T>, compress_level: u8) -> Self { BgiArchiveFileWithDsc { writer, buf: MemWriter::new(), - min_len, + compress_level, } } } @@ -720,7 +720,7 @@ impl<'a, T: Write + Seek> Seek for BgiArchiveFileWithDsc<'a, T> { impl<'a, T: Write + Seek> Drop for BgiArchiveFileWithDsc<'a, T> { fn drop(&mut self) { let buf = self.buf.as_slice(); - let encoder = DscEncoder::new(&mut self.writer, self.min_len); + let encoder = DscEncoder::new(&mut self.writer, self.compress_level); if let Err(e) = encoder.pack(&buf) { eprintln!("Failed to write DSC data: {}", e); crate::COUNTER.inc_error(); diff --git a/src/scripts/bgi/archive/v2.rs b/src/scripts/bgi/archive/v2.rs index 7c8be16..ca5467c 100644 --- a/src/scripts/bgi/archive/v2.rs +++ b/src/scripts/bgi/archive/v2.rs @@ -536,7 +536,7 @@ pub struct BgiArchiveWriter { headers: HashMap, compress_file: bool, encoding: Encoding, - min_len: usize, + compress_level: u8, } impl BgiArchiveWriter { @@ -572,7 +572,7 @@ impl BgiArchiveWriter { headers, compress_file: config.bgi_compress_file, encoding, - min_len: config.bgi_compress_min_len, + compress_level: config.bgi_compress_level, }) } } @@ -598,7 +598,7 @@ impl Archive for BgiArchiveWriter { pos: 0, }; Ok(if self.compress_file { - Box::new(BgiArchiveFileWithDsc::new(file, self.min_len)) + Box::new(BgiArchiveFileWithDsc::new(file, self.compress_level)) } else { Box::new(file) }) @@ -679,7 +679,7 @@ impl<'a, T: Write + Seek> Seek for BgiArchiveFile<'a, T> { pub struct BgiArchiveFileWithDsc<'a, T: Write + Seek> { writer: BgiArchiveFile<'a, T>, buf: MemWriter, - min_len: usize, + compress_level: u8, } impl<'a, T: Write + Seek> BgiArchiveFileWithDsc<'a, T> { @@ -687,11 +687,11 @@ impl<'a, T: Write + Seek> BgiArchiveFileWithDsc<'a, T> { /// /// * `writer` - The writer to write the archive file to. /// * `min_len` - The minimum length for LZSS compression. - pub fn new(writer: BgiArchiveFile<'a, T>, min_len: usize) -> Self { + pub fn new(writer: BgiArchiveFile<'a, T>, compress_level: u8) -> Self { BgiArchiveFileWithDsc { writer, buf: MemWriter::new(), - min_len, + compress_level, } } } @@ -723,7 +723,7 @@ impl<'a, T: Write + Seek> Seek for BgiArchiveFileWithDsc<'a, T> { impl<'a, T: Write + Seek> Drop for BgiArchiveFileWithDsc<'a, T> { fn drop(&mut self) { let buf = self.buf.as_slice(); - let encoder = DscEncoder::new(&mut self.writer, self.min_len); + let encoder = DscEncoder::new(&mut self.writer, self.compress_level); if let Err(e) = encoder.pack(&buf) { eprintln!("Failed to write DSC data: {}", e); crate::COUNTER.inc_error(); diff --git a/src/types.rs b/src/types.rs index b4ea60a..262d13b 100644 --- a/src/types.rs +++ b/src/types.rs @@ -349,9 +349,9 @@ pub struct ExtraConfig { /// Whether to compress files in BGI archive when packing BGI archive. pub bgi_compress_file: bool, #[cfg(feature = "bgi-arc")] - #[default(3)] - /// Minimum length of match size for DSC compression. Possible values are 2-256. - pub bgi_compress_min_len: usize, + #[default(9)] + /// Compress level for BGI Dsc file. 0 means store, 9 mean best compression. + pub bgi_compress_level: u8, #[cfg(feature = "emote-img")] /// Whether to overlay PIMG images. (By default, true if all layers are not group layers.) pub emote_pimg_overlay: Option,