Add LZW Support

This commit is contained in:
2025-06-04 01:02:44 +08:00
parent dead6a0b18
commit a734d8ac5b
8 changed files with 395 additions and 22 deletions

View File

@@ -73,6 +73,7 @@ pub trait ScriptBuilder: std::fmt::Debug {
_filename: &str,
_files: &[&str],
_encoding: Encoding,
_config: &ExtraConfig,
) -> Result<Box<dyn Archive>> {
Err(anyhow::anyhow!(
"This script type does not support creating an archive."

View File

@@ -105,10 +105,11 @@ impl ScriptBuilder for EscudeBinArchiveBuilder {
filename: &str,
files: &[&str],
encoding: Encoding,
config: &ExtraConfig,
) -> Result<Box<dyn Archive>> {
let f = std::fs::File::create(filename)?;
let writer = std::io::BufWriter::new(f);
let archive = EscudeBinArchiveWriter::new(writer, files, encoding)?;
let archive = EscudeBinArchiveWriter::new(writer, files, encoding, config)?;
Ok(Box::new(archive))
}
}
@@ -284,7 +285,7 @@ impl<'a, T: Iterator<Item = &'a BinEntry>, R: Read + Seek> Iterator
};
data = match decoder.unpack() {
Ok(unpacked_data) => unpacked_data,
Err(e) => return Some(Err(anyhow::anyhow!("Failed to unpack LZW data: {}", e))),
Err(e) => return Some(Err(e)),
};
}
Some(Ok(Box::new(Entry { name, data })))
@@ -295,10 +296,16 @@ pub struct EscudeBinArchiveWriter<T: Write + Seek> {
writer: T,
headers: HashMap<String, BinEntry>,
name_tbl_len: u32,
fake: bool,
}
impl<T: Write + Seek> EscudeBinArchiveWriter<T> {
pub fn new(mut writer: T, files: &[&str], encoding: Encoding) -> Result<Self> {
pub fn new(
mut writer: T,
files: &[&str],
encoding: Encoding,
config: &ExtraConfig,
) -> Result<Self> {
writer.write_all(b"ESC-ARC2")?;
let header_len = 0xC + 0xC * files.len();
let header = vec![0u8; header_len];
@@ -324,6 +331,7 @@ impl<T: Write + Seek> EscudeBinArchiveWriter<T> {
writer,
headers,
name_tbl_len,
fake: config.escude_fake_compress,
})
}
}
@@ -338,11 +346,11 @@ impl<T: Write + Seek> Archive for EscudeBinArchiveWriter<T> {
return Err(anyhow::anyhow!("File '{}' already exists in archive", name));
}
entry.data_offset = self.writer.stream_position()? as u32;
Ok(Box::new(EscudeBinArchiveFile {
header: entry,
writer: &mut self.writer,
pos: 0,
}))
Ok(Box::new(EscudeBinArchiveFileWithLzw::new(
entry,
&mut self.writer,
self.fake,
)?))
}
fn write_header(&mut self) -> Result<()> {
@@ -361,6 +369,75 @@ impl<T: Write + Seek> Archive for EscudeBinArchiveWriter<T> {
}
}
pub struct EscudeBinArchiveFileWithLzw<'a, T: Write + Seek> {
writer: EscudeBinArchiveFile<'a, T>,
buf: MemWriter,
fake: bool,
}
impl<'a, T: Write + Seek> EscudeBinArchiveFileWithLzw<'a, T> {
fn new(header: &'a mut BinEntry, writer: &'a mut T, fake: bool) -> Result<Self> {
let writer = EscudeBinArchiveFile {
header,
writer,
pos: 0,
};
Ok(EscudeBinArchiveFileWithLzw {
writer,
buf: MemWriter::new(),
fake,
})
}
}
impl<'a, T: Write + Seek> Write for EscudeBinArchiveFileWithLzw<'a, T> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.buf.write(buf)
}
fn flush(&mut self) -> std::io::Result<()> {
self.buf.flush()
}
}
impl<'a, T: Write + Seek> Seek for EscudeBinArchiveFileWithLzw<'a, T> {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
self.buf.seek(pos)
}
fn stream_position(&mut self) -> std::io::Result<u64> {
self.buf.stream_position()
}
fn rewind(&mut self) -> std::io::Result<()> {
self.buf.rewind()
}
}
impl<'a, T: Write + Seek> Drop for EscudeBinArchiveFileWithLzw<'a, T> {
fn drop(&mut self) {
let buf = self.buf.as_slice();
let encoder = super::lzw::LZWEncoder::new();
let data = match encoder.encode(buf, self.fake) {
Ok(data) => data,
Err(e) => {
eprintln!("Failed to encode LZW data: {}", e);
crate::COUNTER.inc_error();
return;
}
};
match self.writer.write_all(&data) {
Ok(_) => {
self.writer.header.length = self.writer.header.length.max(data.len() as u32);
}
Err(e) => {
eprintln!("Failed to write LZW data: {}", e);
crate::COUNTER.inc_error();
}
}
}
}
pub struct EscudeBinArchiveFile<'a, T: Write + Seek> {
header: &'a mut BinEntry,
writer: &'a mut T,

View File

@@ -1,5 +1,6 @@
use crate::ext::io::*;
use anyhow::Result;
use std::io::Write;
pub struct BitStream<'a> {
m_input: MemReaderRef<'a>,
@@ -96,3 +97,129 @@ impl<'a> LZWDecoder<'a> {
Ok(output)
}
}
pub struct BitWriter<'a, T: Write> {
writer: &'a mut T,
buffer: u32,
buffer_size: u32,
}
impl<'a, T: Write> BitWriter<'a, T> {
pub fn new(writer: &'a mut T) -> Self {
BitWriter {
writer,
buffer: 0,
buffer_size: 0,
}
}
pub fn flush(&mut self) -> Result<()> {
if self.buffer_size > 0 {
self.writer.write_u8((self.buffer & 0xFF) as u8)?;
self.buffer = 0;
self.buffer_size = 0;
}
Ok(())
}
pub fn put_bits(&mut self, byte: u16, token_width: u8) -> Result<()> {
for i in 0..token_width {
self.put_bit((byte & (1 << (token_width - 1 - i))) != 0)?;
}
Ok(())
}
pub fn put_bit(&mut self, bit: bool) -> Result<()> {
self.buffer <<= 1;
if bit {
self.buffer |= 1;
}
self.buffer_size += 1;
if self.buffer_size == 8 {
self.writer.write_u8((self.buffer & 0xFF) as u8)?;
self.buffer_size -= 8;
}
Ok(())
}
}
pub struct LZWEncoder {
buf: MemWriter,
}
impl LZWEncoder {
pub fn new() -> Self {
LZWEncoder {
buf: MemWriter::new(),
}
}
pub fn encode(mut self, input: &[u8], fake: bool) -> Result<Vec<u8>> {
self.buf.write_all(b"acp\0")?;
self.buf.write_u32_be(input.len() as u32)?;
let mut writer = BitWriter::new(&mut self.buf);
if fake {
for i in 0..input.len() {
if i > 0 && i % 0x4000 == 0 {
writer.put_bits(0x102, 9)?;
}
writer.put_bits(input[i] as u16, 9)?;
}
writer.put_bits(0x100, 9)?; // End of stream
writer.flush()?;
} else {
let mut dict = std::collections::HashMap::new();
for i in 0..256 {
dict.insert(vec![i as u8], i as u16);
}
let mut next_code = 0x103u16;
let mut token_width = 9;
let mut i = 0;
while i < input.len() {
let mut current = vec![input[i]];
i += 1;
while i < input.len()
&& dict.contains_key(&{
let mut temp = current.clone();
temp.push(input[i]);
temp
})
{
current.push(input[i]);
i += 1;
}
let code = dict[&current];
writer.put_bits(code, token_width)?;
if i < input.len() {
let mut new_entry = current.clone();
new_entry.push(input[i]);
dict.insert(new_entry, next_code);
next_code += 1;
if next_code >= (1 << token_width) && token_width < 24 {
writer.put_bits(0x101, token_width)?; // Increase token width
token_width += 1;
}
if dict.len() >= 0x8900 {
writer.put_bits(0x102, token_width)?; // Clear dictionary
dict.clear();
for j in 0..256 {
dict.insert(vec![j as u8], j as u16);
}
next_code = 0x103;
token_width = 9;
}
}
}
writer.put_bits(0x100, token_width)?; // End of stream
writer.flush()?;
}
Ok(self.buf.into_inner())
}
}

View File

@@ -135,7 +135,7 @@ impl Script for EscudeBinScript {
s = s.replace(from, to);
}
}
let encoded = encode_string(encoding, &s, true)?;
let encoded = encode_string(encoding, &s, false)?;
len += encoded.len() as u32 + 1;
strs.push(CString::new(encoded)?);
}
@@ -169,20 +169,20 @@ impl StrReplacer {
let mut s = StrReplacer {
replacements: HashMap::new(),
};
s.add("!?。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚", "!? 。「」、…をぁぃぅぇぉゃゅょっーあいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわん゛゜")?;
// 0xa0 to 0xde: Half-width katakana in CP932
let half_width_katakana = "!? 。「」、…をぁぃぅぇぉゃゅょっーあいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわん゛゜";
let mut bytes: Vec<u8> = (0xa0..=0xde).collect();
bytes.insert(0, 0x21);
bytes.insert(1, 0x22);
s.add(&bytes, half_width_katakana)?;
Ok(s)
}
fn add(&mut self, from: &str, to: &str) -> Result<()> {
fn add(&mut self, from: &[u8], to: &str) -> Result<()> {
let encoding = Encoding::Cp932; // Default encoding, can be changed as needed
let froms = UnicodeSegmentation::graphemes(from, true);
let tos = UnicodeSegmentation::graphemes(to, true);
for (from, to) in froms.zip(tos) {
let from_bytes = if from == "" {
vec![0xa0]
} else {
encode_string(encoding, from, true)?
};
for (from, to) in from.into_iter().zip(tos) {
let from_bytes = vec![from.clone()];
let to_bytes = encode_string(encoding, to, true)?;
self.replacements.insert(from_bytes, to_bytes);
}