diff --git a/src/args.rs b/src/args.rs index bd2390f..c803333 100644 --- a/src/args.rs +++ b/src/args.rs @@ -66,13 +66,16 @@ pub struct Arg { #[arg(global = true, action = ArgAction::SetTrue, short, long)] /// Print backtrace on error pub backtrace: bool, + #[arg(long, action = ArgAction::SetTrue, global = true)] + /// Whether to use fake compression for Escude archive + pub escude_fake_compress: bool, #[command(subcommand)] /// Command pub command: Command, } #[derive(Parser, Debug)] -#[clap(group = ArgGroup::new("patched_encodingg").multiple(false))] +#[clap(group = ArgGroup::new("patched_encodingg").multiple(false), group = ArgGroup::new("patched_archive_encodingg").multiple(false))] pub struct ImportArgs { /// Input script file or directory pub input: String, @@ -87,6 +90,13 @@ pub struct ImportArgs { #[arg(short = 'P', long, group = "patched_encodingg")] /// Patched script code page pub patched_code_page: Option, + #[arg(long, value_enum, group = "patched_archive_encodingg", alias = "pa")] + /// Patched archive filename encoding + pub patched_archive_encoding: Option, + #[cfg(windows)] + #[arg(long, value_enum, group = "patched_archive_encodingg", alias = "PA")] + /// Patched archive code page + pub patched_archive_code_page: Option, #[arg(long)] /// Patched script format type pub patched_format: Option, diff --git a/src/ext/io.rs b/src/ext/io.rs index d27247e..0126a46 100644 --- a/src/ext/io.rs +++ b/src/ext/io.rs @@ -678,3 +678,90 @@ impl<'a> Seek for MemReaderRef<'a> { Ok(()) } } + +pub struct MemWriter { + data: Vec, + pos: usize, +} + +impl MemWriter { + pub fn new() -> Self { + MemWriter { + data: Vec::new(), + pos: 0, + } + } + + pub fn into_inner(self) -> Vec { + self.data + } + + pub fn as_slice(&self) -> &[u8] { + &self.data + } +} + +impl std::fmt::Debug for MemWriter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("MemWriter") + .field("pos", &self.pos) + .field("data_length", &self.data.len()) + .finish_non_exhaustive() + } +} + +impl Write for MemWriter { + fn write(&mut self, buf: &[u8]) -> Result { + if self.pos + buf.len() > self.data.len() { + self.data.resize(self.pos + buf.len(), 0); + } + let bytes_written = buf.len(); + self.data[self.pos..self.pos + bytes_written].copy_from_slice(buf); + self.pos += bytes_written; + Ok(bytes_written) + } + + fn flush(&mut self) -> Result<()> { + Ok(()) + } +} + +impl Seek for MemWriter { + fn seek(&mut self, pos: SeekFrom) -> Result { + match pos { + SeekFrom::Start(offset) => { + self.pos = offset as usize; + } + SeekFrom::End(offset) => { + let end_pos = self.data.len() as i64 + offset; + if end_pos < 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Seek from end resulted in negative position", + )); + } + self.pos = end_pos as usize; + } + SeekFrom::Current(offset) => { + let new_pos = self.pos as i64 + offset; + if new_pos < 0 { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "Seek position is negative", + )); + } + self.pos = new_pos as usize; + } + } + Ok(self.pos as u64) + } + + fn stream_position(&mut self) -> Result { + Ok(self.pos as u64) + } + + fn rewind(&mut self) -> Result<()> { + self.pos = 0; + Ok(()) + } +} diff --git a/src/main.rs b/src/main.rs index 42f7982..9e8d5c2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -112,6 +112,35 @@ fn get_patched_encoding( builder.default_patched_encoding() } +fn get_patched_archive_encoding( + arg: &args::ImportArgs, + builder: &Box, + encoding: types::Encoding, +) -> types::Encoding { + match &arg.patched_archive_encoding { + Some(enc) => { + return match enc { + &types::TextEncoding::Default => { + builder.default_archive_encoding().unwrap_or(encoding) + } + &types::TextEncoding::Auto => types::Encoding::Utf8, + &types::TextEncoding::Cp932 => types::Encoding::Cp932, + &types::TextEncoding::Utf8 => types::Encoding::Utf8, + &types::TextEncoding::Gb2312 => types::Encoding::Gb2312, + }; + } + None => {} + } + #[cfg(windows)] + match &arg.patched_archive_code_page { + Some(code_page) => { + return types::Encoding::CodePage(*code_page); + } + None => {} + } + builder.default_archive_encoding().unwrap_or(encoding) +} + pub fn parse_script( filename: &str, arg: &args::Arg, @@ -575,9 +604,9 @@ pub fn import_script( imp_cfg.patched.clone() }; let files: Vec<_> = files.iter().map(|s| s.as_str()).collect(); - let encoding = get_encoding(arg, builder); - let enc = get_archived_encoding(arg, builder, encoding); - let mut arch = builder.create_archive(&patched_f, &files, enc)?; + let pencoding = get_patched_encoding(imp_cfg, builder); + let enc = get_patched_archive_encoding(imp_cfg, builder, pencoding); + let mut arch = builder.create_archive(&patched_f, &files, enc, config)?; for f in script.iter_archive_mut()? { let f = f?; let mut writer = arch.new_file(f.name())?; @@ -597,6 +626,46 @@ pub fn import_script( of.as_ref() }; out_path.set_extension(ext); + if !out_path.exists() { + out_path = std::path::PathBuf::from(&odir).join(f.name()); + if !out_path.exists() { + eprintln!( + "Warning: File {} does not exist, using file from original archive.", + out_path.display() + ); + COUNTER.inc_warning(); + match writer.write_all(f.data()) { + Ok(_) => {} + Err(e) => { + eprintln!("Error writing to file {}: {}", out_path.display(), e); + COUNTER.inc_error(); + continue; + } + } + COUNTER.inc(types::ScriptResult::Ok); + continue; + } else { + let file = match std::fs::File::open(&out_path) { + Ok(f) => f, + Err(e) => { + eprintln!("Error opening file {}: {}", out_path.display(), e); + COUNTER.inc_error(); + continue; + } + }; + let mut f = std::io::BufReader::new(file); + match std::io::copy(&mut f, &mut writer) { + Ok(_) => {} + Err(e) => { + eprintln!("Error writing to file {}: {}", out_path.display(), e); + COUNTER.inc_error(); + continue; + } + } + COUNTER.inc(types::ScriptResult::Ok); + continue; + } + } let mut mes = match of { types::OutputScriptType::Json => { let enc = get_output_encoding(arg); @@ -838,6 +907,7 @@ fn main() { } let cfg = types::ExtraConfig { circus_mes_type: arg.circus_mes_type.clone(), + escude_fake_compress: arg.escude_fake_compress.clone(), }; match &arg.command { args::Command::Export { input, output } => { diff --git a/src/scripts/base.rs b/src/scripts/base.rs index 21fbbef..fba6d16 100644 --- a/src/scripts/base.rs +++ b/src/scripts/base.rs @@ -73,6 +73,7 @@ pub trait ScriptBuilder: std::fmt::Debug { _filename: &str, _files: &[&str], _encoding: Encoding, + _config: &ExtraConfig, ) -> Result> { Err(anyhow::anyhow!( "This script type does not support creating an archive." diff --git a/src/scripts/escude/archive.rs b/src/scripts/escude/archive.rs index fbc61c1..62e4dca 100644 --- a/src/scripts/escude/archive.rs +++ b/src/scripts/escude/archive.rs @@ -105,10 +105,11 @@ impl ScriptBuilder for EscudeBinArchiveBuilder { filename: &str, files: &[&str], encoding: Encoding, + config: &ExtraConfig, ) -> Result> { let f = std::fs::File::create(filename)?; let writer = std::io::BufWriter::new(f); - let archive = EscudeBinArchiveWriter::new(writer, files, encoding)?; + let archive = EscudeBinArchiveWriter::new(writer, files, encoding, config)?; Ok(Box::new(archive)) } } @@ -284,7 +285,7 @@ impl<'a, T: Iterator, R: Read + Seek> Iterator }; data = match decoder.unpack() { Ok(unpacked_data) => unpacked_data, - Err(e) => return Some(Err(anyhow::anyhow!("Failed to unpack LZW data: {}", e))), + Err(e) => return Some(Err(e)), }; } Some(Ok(Box::new(Entry { name, data }))) @@ -295,10 +296,16 @@ pub struct EscudeBinArchiveWriter { writer: T, headers: HashMap, name_tbl_len: u32, + fake: bool, } impl EscudeBinArchiveWriter { - pub fn new(mut writer: T, files: &[&str], encoding: Encoding) -> Result { + pub fn new( + mut writer: T, + files: &[&str], + encoding: Encoding, + config: &ExtraConfig, + ) -> Result { writer.write_all(b"ESC-ARC2")?; let header_len = 0xC + 0xC * files.len(); let header = vec![0u8; header_len]; @@ -324,6 +331,7 @@ impl EscudeBinArchiveWriter { writer, headers, name_tbl_len, + fake: config.escude_fake_compress, }) } } @@ -338,11 +346,11 @@ impl Archive for EscudeBinArchiveWriter { return Err(anyhow::anyhow!("File '{}' already exists in archive", name)); } entry.data_offset = self.writer.stream_position()? as u32; - Ok(Box::new(EscudeBinArchiveFile { - header: entry, - writer: &mut self.writer, - pos: 0, - })) + Ok(Box::new(EscudeBinArchiveFileWithLzw::new( + entry, + &mut self.writer, + self.fake, + )?)) } fn write_header(&mut self) -> Result<()> { @@ -361,6 +369,75 @@ impl Archive for EscudeBinArchiveWriter { } } +pub struct EscudeBinArchiveFileWithLzw<'a, T: Write + Seek> { + writer: EscudeBinArchiveFile<'a, T>, + buf: MemWriter, + fake: bool, +} + +impl<'a, T: Write + Seek> EscudeBinArchiveFileWithLzw<'a, T> { + fn new(header: &'a mut BinEntry, writer: &'a mut T, fake: bool) -> Result { + let writer = EscudeBinArchiveFile { + header, + writer, + pos: 0, + }; + Ok(EscudeBinArchiveFileWithLzw { + writer, + buf: MemWriter::new(), + fake, + }) + } +} + +impl<'a, T: Write + Seek> Write for EscudeBinArchiveFileWithLzw<'a, T> { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.buf.write(buf) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.buf.flush() + } +} + +impl<'a, T: Write + Seek> Seek for EscudeBinArchiveFileWithLzw<'a, T> { + fn seek(&mut self, pos: SeekFrom) -> std::io::Result { + self.buf.seek(pos) + } + + fn stream_position(&mut self) -> std::io::Result { + self.buf.stream_position() + } + + fn rewind(&mut self) -> std::io::Result<()> { + self.buf.rewind() + } +} + +impl<'a, T: Write + Seek> Drop for EscudeBinArchiveFileWithLzw<'a, T> { + fn drop(&mut self) { + let buf = self.buf.as_slice(); + let encoder = super::lzw::LZWEncoder::new(); + let data = match encoder.encode(buf, self.fake) { + Ok(data) => data, + Err(e) => { + eprintln!("Failed to encode LZW data: {}", e); + crate::COUNTER.inc_error(); + return; + } + }; + match self.writer.write_all(&data) { + Ok(_) => { + self.writer.header.length = self.writer.header.length.max(data.len() as u32); + } + Err(e) => { + eprintln!("Failed to write LZW data: {}", e); + crate::COUNTER.inc_error(); + } + } + } +} + pub struct EscudeBinArchiveFile<'a, T: Write + Seek> { header: &'a mut BinEntry, writer: &'a mut T, diff --git a/src/scripts/escude/lzw.rs b/src/scripts/escude/lzw.rs index 2c96d8f..44a4081 100644 --- a/src/scripts/escude/lzw.rs +++ b/src/scripts/escude/lzw.rs @@ -1,5 +1,6 @@ use crate::ext::io::*; use anyhow::Result; +use std::io::Write; pub struct BitStream<'a> { m_input: MemReaderRef<'a>, @@ -96,3 +97,129 @@ impl<'a> LZWDecoder<'a> { Ok(output) } } + +pub struct BitWriter<'a, T: Write> { + writer: &'a mut T, + buffer: u32, + buffer_size: u32, +} + +impl<'a, T: Write> BitWriter<'a, T> { + pub fn new(writer: &'a mut T) -> Self { + BitWriter { + writer, + buffer: 0, + buffer_size: 0, + } + } + + pub fn flush(&mut self) -> Result<()> { + if self.buffer_size > 0 { + self.writer.write_u8((self.buffer & 0xFF) as u8)?; + self.buffer = 0; + self.buffer_size = 0; + } + Ok(()) + } + + pub fn put_bits(&mut self, byte: u16, token_width: u8) -> Result<()> { + for i in 0..token_width { + self.put_bit((byte & (1 << (token_width - 1 - i))) != 0)?; + } + Ok(()) + } + + pub fn put_bit(&mut self, bit: bool) -> Result<()> { + self.buffer <<= 1; + if bit { + self.buffer |= 1; + } + self.buffer_size += 1; + if self.buffer_size == 8 { + self.writer.write_u8((self.buffer & 0xFF) as u8)?; + self.buffer_size -= 8; + } + Ok(()) + } +} + +pub struct LZWEncoder { + buf: MemWriter, +} + +impl LZWEncoder { + pub fn new() -> Self { + LZWEncoder { + buf: MemWriter::new(), + } + } + + pub fn encode(mut self, input: &[u8], fake: bool) -> Result> { + self.buf.write_all(b"acp\0")?; + self.buf.write_u32_be(input.len() as u32)?; + let mut writer = BitWriter::new(&mut self.buf); + if fake { + for i in 0..input.len() { + if i > 0 && i % 0x4000 == 0 { + writer.put_bits(0x102, 9)?; + } + writer.put_bits(input[i] as u16, 9)?; + } + writer.put_bits(0x100, 9)?; // End of stream + writer.flush()?; + } else { + let mut dict = std::collections::HashMap::new(); + for i in 0..256 { + dict.insert(vec![i as u8], i as u16); + } + let mut next_code = 0x103u16; + let mut token_width = 9; + + let mut i = 0; + while i < input.len() { + let mut current = vec![input[i]]; + i += 1; + + while i < input.len() + && dict.contains_key(&{ + let mut temp = current.clone(); + temp.push(input[i]); + temp + }) + { + current.push(input[i]); + i += 1; + } + + let code = dict[¤t]; + writer.put_bits(code, token_width)?; + + if i < input.len() { + let mut new_entry = current.clone(); + new_entry.push(input[i]); + dict.insert(new_entry, next_code); + next_code += 1; + + if next_code >= (1 << token_width) && token_width < 24 { + writer.put_bits(0x101, token_width)?; // Increase token width + token_width += 1; + } + + if dict.len() >= 0x8900 { + writer.put_bits(0x102, token_width)?; // Clear dictionary + dict.clear(); + for j in 0..256 { + dict.insert(vec![j as u8], j as u16); + } + next_code = 0x103; + token_width = 9; + } + } + } + writer.put_bits(0x100, token_width)?; // End of stream + writer.flush()?; + } + + Ok(self.buf.into_inner()) + } +} diff --git a/src/scripts/escude/script.rs b/src/scripts/escude/script.rs index d01a1c9..026e813 100644 --- a/src/scripts/escude/script.rs +++ b/src/scripts/escude/script.rs @@ -135,7 +135,7 @@ impl Script for EscudeBinScript { s = s.replace(from, to); } } - let encoded = encode_string(encoding, &s, true)?; + let encoded = encode_string(encoding, &s, false)?; len += encoded.len() as u32 + 1; strs.push(CString::new(encoded)?); } @@ -169,20 +169,20 @@ impl StrReplacer { let mut s = StrReplacer { replacements: HashMap::new(), }; - s.add("!?。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚", "!? 。「」、…をぁぃぅぇぉゃゅょっーあいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわん゛゜")?; + // 0xa0 to 0xde: Half-width katakana in CP932 + let half_width_katakana = "!? 。「」、…をぁぃぅぇぉゃゅょっーあいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわん゛゜"; + let mut bytes: Vec = (0xa0..=0xde).collect(); + bytes.insert(0, 0x21); + bytes.insert(1, 0x22); + s.add(&bytes, half_width_katakana)?; Ok(s) } - fn add(&mut self, from: &str, to: &str) -> Result<()> { + fn add(&mut self, from: &[u8], to: &str) -> Result<()> { let encoding = Encoding::Cp932; // Default encoding, can be changed as needed - let froms = UnicodeSegmentation::graphemes(from, true); let tos = UnicodeSegmentation::graphemes(to, true); - for (from, to) in froms.zip(tos) { - let from_bytes = if from == "" { - vec![0xa0] - } else { - encode_string(encoding, from, true)? - }; + for (from, to) in from.into_iter().zip(tos) { + let from_bytes = vec![from.clone()]; let to_bytes = encode_string(encoding, to, true)?; self.replacements.insert(from_bytes, to_bytes); } diff --git a/src/types.rs b/src/types.rs index 52e7970..c069e58 100644 --- a/src/types.rs +++ b/src/types.rs @@ -186,6 +186,7 @@ impl AsRef for CircusMesType { pub struct ExtraConfig { pub circus_mes_type: Option, + pub escude_fake_compress: bool, } #[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]