Add LZW Support

This commit is contained in:
2025-06-04 01:02:44 +08:00
parent dead6a0b18
commit a734d8ac5b
8 changed files with 395 additions and 22 deletions

View File

@@ -66,13 +66,16 @@ pub struct Arg {
#[arg(global = true, action = ArgAction::SetTrue, short, long)]
/// Print backtrace on error
pub backtrace: bool,
#[arg(long, action = ArgAction::SetTrue, global = true)]
/// Whether to use fake compression for Escude archive
pub escude_fake_compress: bool,
#[command(subcommand)]
/// Command
pub command: Command,
}
#[derive(Parser, Debug)]
#[clap(group = ArgGroup::new("patched_encodingg").multiple(false))]
#[clap(group = ArgGroup::new("patched_encodingg").multiple(false), group = ArgGroup::new("patched_archive_encodingg").multiple(false))]
pub struct ImportArgs {
/// Input script file or directory
pub input: String,
@@ -87,6 +90,13 @@ pub struct ImportArgs {
#[arg(short = 'P', long, group = "patched_encodingg")]
/// Patched script code page
pub patched_code_page: Option<u32>,
#[arg(long, value_enum, group = "patched_archive_encodingg", alias = "pa")]
/// Patched archive filename encoding
pub patched_archive_encoding: Option<TextEncoding>,
#[cfg(windows)]
#[arg(long, value_enum, group = "patched_archive_encodingg", alias = "PA")]
/// Patched archive code page
pub patched_archive_code_page: Option<u32>,
#[arg(long)]
/// Patched script format type
pub patched_format: Option<FormatType>,

View File

@@ -678,3 +678,90 @@ impl<'a> Seek for MemReaderRef<'a> {
Ok(())
}
}
pub struct MemWriter {
data: Vec<u8>,
pos: usize,
}
impl MemWriter {
pub fn new() -> Self {
MemWriter {
data: Vec::new(),
pos: 0,
}
}
pub fn into_inner(self) -> Vec<u8> {
self.data
}
pub fn as_slice(&self) -> &[u8] {
&self.data
}
}
impl std::fmt::Debug for MemWriter {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("MemWriter")
.field("pos", &self.pos)
.field("data_length", &self.data.len())
.finish_non_exhaustive()
}
}
impl Write for MemWriter {
fn write(&mut self, buf: &[u8]) -> Result<usize> {
if self.pos + buf.len() > self.data.len() {
self.data.resize(self.pos + buf.len(), 0);
}
let bytes_written = buf.len();
self.data[self.pos..self.pos + bytes_written].copy_from_slice(buf);
self.pos += bytes_written;
Ok(bytes_written)
}
fn flush(&mut self) -> Result<()> {
Ok(())
}
}
impl Seek for MemWriter {
fn seek(&mut self, pos: SeekFrom) -> Result<u64> {
match pos {
SeekFrom::Start(offset) => {
self.pos = offset as usize;
}
SeekFrom::End(offset) => {
let end_pos = self.data.len() as i64 + offset;
if end_pos < 0 {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Seek from end resulted in negative position",
));
}
self.pos = end_pos as usize;
}
SeekFrom::Current(offset) => {
let new_pos = self.pos as i64 + offset;
if new_pos < 0 {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Seek position is negative",
));
}
self.pos = new_pos as usize;
}
}
Ok(self.pos as u64)
}
fn stream_position(&mut self) -> Result<u64> {
Ok(self.pos as u64)
}
fn rewind(&mut self) -> Result<()> {
self.pos = 0;
Ok(())
}
}

View File

@@ -112,6 +112,35 @@ fn get_patched_encoding(
builder.default_patched_encoding()
}
fn get_patched_archive_encoding(
arg: &args::ImportArgs,
builder: &Box<dyn scripts::ScriptBuilder + Send + Sync>,
encoding: types::Encoding,
) -> types::Encoding {
match &arg.patched_archive_encoding {
Some(enc) => {
return match enc {
&types::TextEncoding::Default => {
builder.default_archive_encoding().unwrap_or(encoding)
}
&types::TextEncoding::Auto => types::Encoding::Utf8,
&types::TextEncoding::Cp932 => types::Encoding::Cp932,
&types::TextEncoding::Utf8 => types::Encoding::Utf8,
&types::TextEncoding::Gb2312 => types::Encoding::Gb2312,
};
}
None => {}
}
#[cfg(windows)]
match &arg.patched_archive_code_page {
Some(code_page) => {
return types::Encoding::CodePage(*code_page);
}
None => {}
}
builder.default_archive_encoding().unwrap_or(encoding)
}
pub fn parse_script(
filename: &str,
arg: &args::Arg,
@@ -575,9 +604,9 @@ pub fn import_script(
imp_cfg.patched.clone()
};
let files: Vec<_> = files.iter().map(|s| s.as_str()).collect();
let encoding = get_encoding(arg, builder);
let enc = get_archived_encoding(arg, builder, encoding);
let mut arch = builder.create_archive(&patched_f, &files, enc)?;
let pencoding = get_patched_encoding(imp_cfg, builder);
let enc = get_patched_archive_encoding(imp_cfg, builder, pencoding);
let mut arch = builder.create_archive(&patched_f, &files, enc, config)?;
for f in script.iter_archive_mut()? {
let f = f?;
let mut writer = arch.new_file(f.name())?;
@@ -597,6 +626,46 @@ pub fn import_script(
of.as_ref()
};
out_path.set_extension(ext);
if !out_path.exists() {
out_path = std::path::PathBuf::from(&odir).join(f.name());
if !out_path.exists() {
eprintln!(
"Warning: File {} does not exist, using file from original archive.",
out_path.display()
);
COUNTER.inc_warning();
match writer.write_all(f.data()) {
Ok(_) => {}
Err(e) => {
eprintln!("Error writing to file {}: {}", out_path.display(), e);
COUNTER.inc_error();
continue;
}
}
COUNTER.inc(types::ScriptResult::Ok);
continue;
} else {
let file = match std::fs::File::open(&out_path) {
Ok(f) => f,
Err(e) => {
eprintln!("Error opening file {}: {}", out_path.display(), e);
COUNTER.inc_error();
continue;
}
};
let mut f = std::io::BufReader::new(file);
match std::io::copy(&mut f, &mut writer) {
Ok(_) => {}
Err(e) => {
eprintln!("Error writing to file {}: {}", out_path.display(), e);
COUNTER.inc_error();
continue;
}
}
COUNTER.inc(types::ScriptResult::Ok);
continue;
}
}
let mut mes = match of {
types::OutputScriptType::Json => {
let enc = get_output_encoding(arg);
@@ -838,6 +907,7 @@ fn main() {
}
let cfg = types::ExtraConfig {
circus_mes_type: arg.circus_mes_type.clone(),
escude_fake_compress: arg.escude_fake_compress.clone(),
};
match &arg.command {
args::Command::Export { input, output } => {

View File

@@ -73,6 +73,7 @@ pub trait ScriptBuilder: std::fmt::Debug {
_filename: &str,
_files: &[&str],
_encoding: Encoding,
_config: &ExtraConfig,
) -> Result<Box<dyn Archive>> {
Err(anyhow::anyhow!(
"This script type does not support creating an archive."

View File

@@ -105,10 +105,11 @@ impl ScriptBuilder for EscudeBinArchiveBuilder {
filename: &str,
files: &[&str],
encoding: Encoding,
config: &ExtraConfig,
) -> Result<Box<dyn Archive>> {
let f = std::fs::File::create(filename)?;
let writer = std::io::BufWriter::new(f);
let archive = EscudeBinArchiveWriter::new(writer, files, encoding)?;
let archive = EscudeBinArchiveWriter::new(writer, files, encoding, config)?;
Ok(Box::new(archive))
}
}
@@ -284,7 +285,7 @@ impl<'a, T: Iterator<Item = &'a BinEntry>, R: Read + Seek> Iterator
};
data = match decoder.unpack() {
Ok(unpacked_data) => unpacked_data,
Err(e) => return Some(Err(anyhow::anyhow!("Failed to unpack LZW data: {}", e))),
Err(e) => return Some(Err(e)),
};
}
Some(Ok(Box::new(Entry { name, data })))
@@ -295,10 +296,16 @@ pub struct EscudeBinArchiveWriter<T: Write + Seek> {
writer: T,
headers: HashMap<String, BinEntry>,
name_tbl_len: u32,
fake: bool,
}
impl<T: Write + Seek> EscudeBinArchiveWriter<T> {
pub fn new(mut writer: T, files: &[&str], encoding: Encoding) -> Result<Self> {
pub fn new(
mut writer: T,
files: &[&str],
encoding: Encoding,
config: &ExtraConfig,
) -> Result<Self> {
writer.write_all(b"ESC-ARC2")?;
let header_len = 0xC + 0xC * files.len();
let header = vec![0u8; header_len];
@@ -324,6 +331,7 @@ impl<T: Write + Seek> EscudeBinArchiveWriter<T> {
writer,
headers,
name_tbl_len,
fake: config.escude_fake_compress,
})
}
}
@@ -338,11 +346,11 @@ impl<T: Write + Seek> Archive for EscudeBinArchiveWriter<T> {
return Err(anyhow::anyhow!("File '{}' already exists in archive", name));
}
entry.data_offset = self.writer.stream_position()? as u32;
Ok(Box::new(EscudeBinArchiveFile {
header: entry,
writer: &mut self.writer,
pos: 0,
}))
Ok(Box::new(EscudeBinArchiveFileWithLzw::new(
entry,
&mut self.writer,
self.fake,
)?))
}
fn write_header(&mut self) -> Result<()> {
@@ -361,6 +369,75 @@ impl<T: Write + Seek> Archive for EscudeBinArchiveWriter<T> {
}
}
pub struct EscudeBinArchiveFileWithLzw<'a, T: Write + Seek> {
writer: EscudeBinArchiveFile<'a, T>,
buf: MemWriter,
fake: bool,
}
impl<'a, T: Write + Seek> EscudeBinArchiveFileWithLzw<'a, T> {
fn new(header: &'a mut BinEntry, writer: &'a mut T, fake: bool) -> Result<Self> {
let writer = EscudeBinArchiveFile {
header,
writer,
pos: 0,
};
Ok(EscudeBinArchiveFileWithLzw {
writer,
buf: MemWriter::new(),
fake,
})
}
}
impl<'a, T: Write + Seek> Write for EscudeBinArchiveFileWithLzw<'a, T> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.buf.write(buf)
}
fn flush(&mut self) -> std::io::Result<()> {
self.buf.flush()
}
}
impl<'a, T: Write + Seek> Seek for EscudeBinArchiveFileWithLzw<'a, T> {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
self.buf.seek(pos)
}
fn stream_position(&mut self) -> std::io::Result<u64> {
self.buf.stream_position()
}
fn rewind(&mut self) -> std::io::Result<()> {
self.buf.rewind()
}
}
impl<'a, T: Write + Seek> Drop for EscudeBinArchiveFileWithLzw<'a, T> {
fn drop(&mut self) {
let buf = self.buf.as_slice();
let encoder = super::lzw::LZWEncoder::new();
let data = match encoder.encode(buf, self.fake) {
Ok(data) => data,
Err(e) => {
eprintln!("Failed to encode LZW data: {}", e);
crate::COUNTER.inc_error();
return;
}
};
match self.writer.write_all(&data) {
Ok(_) => {
self.writer.header.length = self.writer.header.length.max(data.len() as u32);
}
Err(e) => {
eprintln!("Failed to write LZW data: {}", e);
crate::COUNTER.inc_error();
}
}
}
}
pub struct EscudeBinArchiveFile<'a, T: Write + Seek> {
header: &'a mut BinEntry,
writer: &'a mut T,

View File

@@ -1,5 +1,6 @@
use crate::ext::io::*;
use anyhow::Result;
use std::io::Write;
pub struct BitStream<'a> {
m_input: MemReaderRef<'a>,
@@ -96,3 +97,129 @@ impl<'a> LZWDecoder<'a> {
Ok(output)
}
}
pub struct BitWriter<'a, T: Write> {
writer: &'a mut T,
buffer: u32,
buffer_size: u32,
}
impl<'a, T: Write> BitWriter<'a, T> {
pub fn new(writer: &'a mut T) -> Self {
BitWriter {
writer,
buffer: 0,
buffer_size: 0,
}
}
pub fn flush(&mut self) -> Result<()> {
if self.buffer_size > 0 {
self.writer.write_u8((self.buffer & 0xFF) as u8)?;
self.buffer = 0;
self.buffer_size = 0;
}
Ok(())
}
pub fn put_bits(&mut self, byte: u16, token_width: u8) -> Result<()> {
for i in 0..token_width {
self.put_bit((byte & (1 << (token_width - 1 - i))) != 0)?;
}
Ok(())
}
pub fn put_bit(&mut self, bit: bool) -> Result<()> {
self.buffer <<= 1;
if bit {
self.buffer |= 1;
}
self.buffer_size += 1;
if self.buffer_size == 8 {
self.writer.write_u8((self.buffer & 0xFF) as u8)?;
self.buffer_size -= 8;
}
Ok(())
}
}
pub struct LZWEncoder {
buf: MemWriter,
}
impl LZWEncoder {
pub fn new() -> Self {
LZWEncoder {
buf: MemWriter::new(),
}
}
pub fn encode(mut self, input: &[u8], fake: bool) -> Result<Vec<u8>> {
self.buf.write_all(b"acp\0")?;
self.buf.write_u32_be(input.len() as u32)?;
let mut writer = BitWriter::new(&mut self.buf);
if fake {
for i in 0..input.len() {
if i > 0 && i % 0x4000 == 0 {
writer.put_bits(0x102, 9)?;
}
writer.put_bits(input[i] as u16, 9)?;
}
writer.put_bits(0x100, 9)?; // End of stream
writer.flush()?;
} else {
let mut dict = std::collections::HashMap::new();
for i in 0..256 {
dict.insert(vec![i as u8], i as u16);
}
let mut next_code = 0x103u16;
let mut token_width = 9;
let mut i = 0;
while i < input.len() {
let mut current = vec![input[i]];
i += 1;
while i < input.len()
&& dict.contains_key(&{
let mut temp = current.clone();
temp.push(input[i]);
temp
})
{
current.push(input[i]);
i += 1;
}
let code = dict[&current];
writer.put_bits(code, token_width)?;
if i < input.len() {
let mut new_entry = current.clone();
new_entry.push(input[i]);
dict.insert(new_entry, next_code);
next_code += 1;
if next_code >= (1 << token_width) && token_width < 24 {
writer.put_bits(0x101, token_width)?; // Increase token width
token_width += 1;
}
if dict.len() >= 0x8900 {
writer.put_bits(0x102, token_width)?; // Clear dictionary
dict.clear();
for j in 0..256 {
dict.insert(vec![j as u8], j as u16);
}
next_code = 0x103;
token_width = 9;
}
}
}
writer.put_bits(0x100, token_width)?; // End of stream
writer.flush()?;
}
Ok(self.buf.into_inner())
}
}

View File

@@ -135,7 +135,7 @@ impl Script for EscudeBinScript {
s = s.replace(from, to);
}
}
let encoded = encode_string(encoding, &s, true)?;
let encoded = encode_string(encoding, &s, false)?;
len += encoded.len() as u32 + 1;
strs.push(CString::new(encoded)?);
}
@@ -169,20 +169,20 @@ impl StrReplacer {
let mut s = StrReplacer {
replacements: HashMap::new(),
};
s.add("!?。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚", "!? 。「」、…をぁぃぅぇぉゃゅょっーあいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわん゛゜")?;
// 0xa0 to 0xde: Half-width katakana in CP932
let half_width_katakana = "!? 。「」、…をぁぃぅぇぉゃゅょっーあいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわん゛゜";
let mut bytes: Vec<u8> = (0xa0..=0xde).collect();
bytes.insert(0, 0x21);
bytes.insert(1, 0x22);
s.add(&bytes, half_width_katakana)?;
Ok(s)
}
fn add(&mut self, from: &str, to: &str) -> Result<()> {
fn add(&mut self, from: &[u8], to: &str) -> Result<()> {
let encoding = Encoding::Cp932; // Default encoding, can be changed as needed
let froms = UnicodeSegmentation::graphemes(from, true);
let tos = UnicodeSegmentation::graphemes(to, true);
for (from, to) in froms.zip(tos) {
let from_bytes = if from == "" {
vec![0xa0]
} else {
encode_string(encoding, from, true)?
};
for (from, to) in from.into_iter().zip(tos) {
let from_bytes = vec![from.clone()];
let to_bytes = encode_string(encoding, to, true)?;
self.replacements.insert(from_bytes, to_bytes);
}

View File

@@ -186,6 +186,7 @@ impl AsRef<str> for CircusMesType {
pub struct ExtraConfig {
pub circus_mes_type: Option<CircusMesType>,
pub escude_fake_compress: bool,
}
#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]