mirror of
https://github.com/lifegpc/msg-tool.git
synced 2026-06-16 10:04:20 +08:00
Add LZW Support
This commit is contained in:
12
src/args.rs
12
src/args.rs
@@ -66,13 +66,16 @@ pub struct Arg {
|
||||
#[arg(global = true, action = ArgAction::SetTrue, short, long)]
|
||||
/// Print backtrace on error
|
||||
pub backtrace: bool,
|
||||
#[arg(long, action = ArgAction::SetTrue, global = true)]
|
||||
/// Whether to use fake compression for Escude archive
|
||||
pub escude_fake_compress: bool,
|
||||
#[command(subcommand)]
|
||||
/// Command
|
||||
pub command: Command,
|
||||
}
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[clap(group = ArgGroup::new("patched_encodingg").multiple(false))]
|
||||
#[clap(group = ArgGroup::new("patched_encodingg").multiple(false), group = ArgGroup::new("patched_archive_encodingg").multiple(false))]
|
||||
pub struct ImportArgs {
|
||||
/// Input script file or directory
|
||||
pub input: String,
|
||||
@@ -87,6 +90,13 @@ pub struct ImportArgs {
|
||||
#[arg(short = 'P', long, group = "patched_encodingg")]
|
||||
/// Patched script code page
|
||||
pub patched_code_page: Option<u32>,
|
||||
#[arg(long, value_enum, group = "patched_archive_encodingg", alias = "pa")]
|
||||
/// Patched archive filename encoding
|
||||
pub patched_archive_encoding: Option<TextEncoding>,
|
||||
#[cfg(windows)]
|
||||
#[arg(long, value_enum, group = "patched_archive_encodingg", alias = "PA")]
|
||||
/// Patched archive code page
|
||||
pub patched_archive_code_page: Option<u32>,
|
||||
#[arg(long)]
|
||||
/// Patched script format type
|
||||
pub patched_format: Option<FormatType>,
|
||||
|
||||
@@ -678,3 +678,90 @@ impl<'a> Seek for MemReaderRef<'a> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MemWriter {
|
||||
data: Vec<u8>,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl MemWriter {
|
||||
pub fn new() -> Self {
|
||||
MemWriter {
|
||||
data: Vec::new(),
|
||||
pos: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> Vec<u8> {
|
||||
self.data
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
&self.data
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for MemWriter {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("MemWriter")
|
||||
.field("pos", &self.pos)
|
||||
.field("data_length", &self.data.len())
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
impl Write for MemWriter {
|
||||
fn write(&mut self, buf: &[u8]) -> Result<usize> {
|
||||
if self.pos + buf.len() > self.data.len() {
|
||||
self.data.resize(self.pos + buf.len(), 0);
|
||||
}
|
||||
let bytes_written = buf.len();
|
||||
self.data[self.pos..self.pos + bytes_written].copy_from_slice(buf);
|
||||
self.pos += bytes_written;
|
||||
Ok(bytes_written)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Seek for MemWriter {
|
||||
fn seek(&mut self, pos: SeekFrom) -> Result<u64> {
|
||||
match pos {
|
||||
SeekFrom::Start(offset) => {
|
||||
self.pos = offset as usize;
|
||||
}
|
||||
SeekFrom::End(offset) => {
|
||||
let end_pos = self.data.len() as i64 + offset;
|
||||
if end_pos < 0 {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"Seek from end resulted in negative position",
|
||||
));
|
||||
}
|
||||
self.pos = end_pos as usize;
|
||||
}
|
||||
SeekFrom::Current(offset) => {
|
||||
let new_pos = self.pos as i64 + offset;
|
||||
if new_pos < 0 {
|
||||
return Err(std::io::Error::new(
|
||||
std::io::ErrorKind::InvalidInput,
|
||||
"Seek position is negative",
|
||||
));
|
||||
}
|
||||
self.pos = new_pos as usize;
|
||||
}
|
||||
}
|
||||
Ok(self.pos as u64)
|
||||
}
|
||||
|
||||
fn stream_position(&mut self) -> Result<u64> {
|
||||
Ok(self.pos as u64)
|
||||
}
|
||||
|
||||
fn rewind(&mut self) -> Result<()> {
|
||||
self.pos = 0;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
76
src/main.rs
76
src/main.rs
@@ -112,6 +112,35 @@ fn get_patched_encoding(
|
||||
builder.default_patched_encoding()
|
||||
}
|
||||
|
||||
fn get_patched_archive_encoding(
|
||||
arg: &args::ImportArgs,
|
||||
builder: &Box<dyn scripts::ScriptBuilder + Send + Sync>,
|
||||
encoding: types::Encoding,
|
||||
) -> types::Encoding {
|
||||
match &arg.patched_archive_encoding {
|
||||
Some(enc) => {
|
||||
return match enc {
|
||||
&types::TextEncoding::Default => {
|
||||
builder.default_archive_encoding().unwrap_or(encoding)
|
||||
}
|
||||
&types::TextEncoding::Auto => types::Encoding::Utf8,
|
||||
&types::TextEncoding::Cp932 => types::Encoding::Cp932,
|
||||
&types::TextEncoding::Utf8 => types::Encoding::Utf8,
|
||||
&types::TextEncoding::Gb2312 => types::Encoding::Gb2312,
|
||||
};
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
#[cfg(windows)]
|
||||
match &arg.patched_archive_code_page {
|
||||
Some(code_page) => {
|
||||
return types::Encoding::CodePage(*code_page);
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
builder.default_archive_encoding().unwrap_or(encoding)
|
||||
}
|
||||
|
||||
pub fn parse_script(
|
||||
filename: &str,
|
||||
arg: &args::Arg,
|
||||
@@ -575,9 +604,9 @@ pub fn import_script(
|
||||
imp_cfg.patched.clone()
|
||||
};
|
||||
let files: Vec<_> = files.iter().map(|s| s.as_str()).collect();
|
||||
let encoding = get_encoding(arg, builder);
|
||||
let enc = get_archived_encoding(arg, builder, encoding);
|
||||
let mut arch = builder.create_archive(&patched_f, &files, enc)?;
|
||||
let pencoding = get_patched_encoding(imp_cfg, builder);
|
||||
let enc = get_patched_archive_encoding(imp_cfg, builder, pencoding);
|
||||
let mut arch = builder.create_archive(&patched_f, &files, enc, config)?;
|
||||
for f in script.iter_archive_mut()? {
|
||||
let f = f?;
|
||||
let mut writer = arch.new_file(f.name())?;
|
||||
@@ -597,6 +626,46 @@ pub fn import_script(
|
||||
of.as_ref()
|
||||
};
|
||||
out_path.set_extension(ext);
|
||||
if !out_path.exists() {
|
||||
out_path = std::path::PathBuf::from(&odir).join(f.name());
|
||||
if !out_path.exists() {
|
||||
eprintln!(
|
||||
"Warning: File {} does not exist, using file from original archive.",
|
||||
out_path.display()
|
||||
);
|
||||
COUNTER.inc_warning();
|
||||
match writer.write_all(f.data()) {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
eprintln!("Error writing to file {}: {}", out_path.display(), e);
|
||||
COUNTER.inc_error();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
COUNTER.inc(types::ScriptResult::Ok);
|
||||
continue;
|
||||
} else {
|
||||
let file = match std::fs::File::open(&out_path) {
|
||||
Ok(f) => f,
|
||||
Err(e) => {
|
||||
eprintln!("Error opening file {}: {}", out_path.display(), e);
|
||||
COUNTER.inc_error();
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let mut f = std::io::BufReader::new(file);
|
||||
match std::io::copy(&mut f, &mut writer) {
|
||||
Ok(_) => {}
|
||||
Err(e) => {
|
||||
eprintln!("Error writing to file {}: {}", out_path.display(), e);
|
||||
COUNTER.inc_error();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
COUNTER.inc(types::ScriptResult::Ok);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let mut mes = match of {
|
||||
types::OutputScriptType::Json => {
|
||||
let enc = get_output_encoding(arg);
|
||||
@@ -838,6 +907,7 @@ fn main() {
|
||||
}
|
||||
let cfg = types::ExtraConfig {
|
||||
circus_mes_type: arg.circus_mes_type.clone(),
|
||||
escude_fake_compress: arg.escude_fake_compress.clone(),
|
||||
};
|
||||
match &arg.command {
|
||||
args::Command::Export { input, output } => {
|
||||
|
||||
@@ -73,6 +73,7 @@ pub trait ScriptBuilder: std::fmt::Debug {
|
||||
_filename: &str,
|
||||
_files: &[&str],
|
||||
_encoding: Encoding,
|
||||
_config: &ExtraConfig,
|
||||
) -> Result<Box<dyn Archive>> {
|
||||
Err(anyhow::anyhow!(
|
||||
"This script type does not support creating an archive."
|
||||
|
||||
@@ -105,10 +105,11 @@ impl ScriptBuilder for EscudeBinArchiveBuilder {
|
||||
filename: &str,
|
||||
files: &[&str],
|
||||
encoding: Encoding,
|
||||
config: &ExtraConfig,
|
||||
) -> Result<Box<dyn Archive>> {
|
||||
let f = std::fs::File::create(filename)?;
|
||||
let writer = std::io::BufWriter::new(f);
|
||||
let archive = EscudeBinArchiveWriter::new(writer, files, encoding)?;
|
||||
let archive = EscudeBinArchiveWriter::new(writer, files, encoding, config)?;
|
||||
Ok(Box::new(archive))
|
||||
}
|
||||
}
|
||||
@@ -284,7 +285,7 @@ impl<'a, T: Iterator<Item = &'a BinEntry>, R: Read + Seek> Iterator
|
||||
};
|
||||
data = match decoder.unpack() {
|
||||
Ok(unpacked_data) => unpacked_data,
|
||||
Err(e) => return Some(Err(anyhow::anyhow!("Failed to unpack LZW data: {}", e))),
|
||||
Err(e) => return Some(Err(e)),
|
||||
};
|
||||
}
|
||||
Some(Ok(Box::new(Entry { name, data })))
|
||||
@@ -295,10 +296,16 @@ pub struct EscudeBinArchiveWriter<T: Write + Seek> {
|
||||
writer: T,
|
||||
headers: HashMap<String, BinEntry>,
|
||||
name_tbl_len: u32,
|
||||
fake: bool,
|
||||
}
|
||||
|
||||
impl<T: Write + Seek> EscudeBinArchiveWriter<T> {
|
||||
pub fn new(mut writer: T, files: &[&str], encoding: Encoding) -> Result<Self> {
|
||||
pub fn new(
|
||||
mut writer: T,
|
||||
files: &[&str],
|
||||
encoding: Encoding,
|
||||
config: &ExtraConfig,
|
||||
) -> Result<Self> {
|
||||
writer.write_all(b"ESC-ARC2")?;
|
||||
let header_len = 0xC + 0xC * files.len();
|
||||
let header = vec![0u8; header_len];
|
||||
@@ -324,6 +331,7 @@ impl<T: Write + Seek> EscudeBinArchiveWriter<T> {
|
||||
writer,
|
||||
headers,
|
||||
name_tbl_len,
|
||||
fake: config.escude_fake_compress,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -338,11 +346,11 @@ impl<T: Write + Seek> Archive for EscudeBinArchiveWriter<T> {
|
||||
return Err(anyhow::anyhow!("File '{}' already exists in archive", name));
|
||||
}
|
||||
entry.data_offset = self.writer.stream_position()? as u32;
|
||||
Ok(Box::new(EscudeBinArchiveFile {
|
||||
header: entry,
|
||||
writer: &mut self.writer,
|
||||
pos: 0,
|
||||
}))
|
||||
Ok(Box::new(EscudeBinArchiveFileWithLzw::new(
|
||||
entry,
|
||||
&mut self.writer,
|
||||
self.fake,
|
||||
)?))
|
||||
}
|
||||
|
||||
fn write_header(&mut self) -> Result<()> {
|
||||
@@ -361,6 +369,75 @@ impl<T: Write + Seek> Archive for EscudeBinArchiveWriter<T> {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct EscudeBinArchiveFileWithLzw<'a, T: Write + Seek> {
|
||||
writer: EscudeBinArchiveFile<'a, T>,
|
||||
buf: MemWriter,
|
||||
fake: bool,
|
||||
}
|
||||
|
||||
impl<'a, T: Write + Seek> EscudeBinArchiveFileWithLzw<'a, T> {
|
||||
fn new(header: &'a mut BinEntry, writer: &'a mut T, fake: bool) -> Result<Self> {
|
||||
let writer = EscudeBinArchiveFile {
|
||||
header,
|
||||
writer,
|
||||
pos: 0,
|
||||
};
|
||||
Ok(EscudeBinArchiveFileWithLzw {
|
||||
writer,
|
||||
buf: MemWriter::new(),
|
||||
fake,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Write + Seek> Write for EscudeBinArchiveFileWithLzw<'a, T> {
|
||||
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
|
||||
self.buf.write(buf)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> std::io::Result<()> {
|
||||
self.buf.flush()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Write + Seek> Seek for EscudeBinArchiveFileWithLzw<'a, T> {
|
||||
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
|
||||
self.buf.seek(pos)
|
||||
}
|
||||
|
||||
fn stream_position(&mut self) -> std::io::Result<u64> {
|
||||
self.buf.stream_position()
|
||||
}
|
||||
|
||||
fn rewind(&mut self) -> std::io::Result<()> {
|
||||
self.buf.rewind()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Write + Seek> Drop for EscudeBinArchiveFileWithLzw<'a, T> {
|
||||
fn drop(&mut self) {
|
||||
let buf = self.buf.as_slice();
|
||||
let encoder = super::lzw::LZWEncoder::new();
|
||||
let data = match encoder.encode(buf, self.fake) {
|
||||
Ok(data) => data,
|
||||
Err(e) => {
|
||||
eprintln!("Failed to encode LZW data: {}", e);
|
||||
crate::COUNTER.inc_error();
|
||||
return;
|
||||
}
|
||||
};
|
||||
match self.writer.write_all(&data) {
|
||||
Ok(_) => {
|
||||
self.writer.header.length = self.writer.header.length.max(data.len() as u32);
|
||||
}
|
||||
Err(e) => {
|
||||
eprintln!("Failed to write LZW data: {}", e);
|
||||
crate::COUNTER.inc_error();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct EscudeBinArchiveFile<'a, T: Write + Seek> {
|
||||
header: &'a mut BinEntry,
|
||||
writer: &'a mut T,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::ext::io::*;
|
||||
use anyhow::Result;
|
||||
use std::io::Write;
|
||||
|
||||
pub struct BitStream<'a> {
|
||||
m_input: MemReaderRef<'a>,
|
||||
@@ -96,3 +97,129 @@ impl<'a> LZWDecoder<'a> {
|
||||
Ok(output)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct BitWriter<'a, T: Write> {
|
||||
writer: &'a mut T,
|
||||
buffer: u32,
|
||||
buffer_size: u32,
|
||||
}
|
||||
|
||||
impl<'a, T: Write> BitWriter<'a, T> {
|
||||
pub fn new(writer: &'a mut T) -> Self {
|
||||
BitWriter {
|
||||
writer,
|
||||
buffer: 0,
|
||||
buffer_size: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn flush(&mut self) -> Result<()> {
|
||||
if self.buffer_size > 0 {
|
||||
self.writer.write_u8((self.buffer & 0xFF) as u8)?;
|
||||
self.buffer = 0;
|
||||
self.buffer_size = 0;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn put_bits(&mut self, byte: u16, token_width: u8) -> Result<()> {
|
||||
for i in 0..token_width {
|
||||
self.put_bit((byte & (1 << (token_width - 1 - i))) != 0)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn put_bit(&mut self, bit: bool) -> Result<()> {
|
||||
self.buffer <<= 1;
|
||||
if bit {
|
||||
self.buffer |= 1;
|
||||
}
|
||||
self.buffer_size += 1;
|
||||
if self.buffer_size == 8 {
|
||||
self.writer.write_u8((self.buffer & 0xFF) as u8)?;
|
||||
self.buffer_size -= 8;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct LZWEncoder {
|
||||
buf: MemWriter,
|
||||
}
|
||||
|
||||
impl LZWEncoder {
|
||||
pub fn new() -> Self {
|
||||
LZWEncoder {
|
||||
buf: MemWriter::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode(mut self, input: &[u8], fake: bool) -> Result<Vec<u8>> {
|
||||
self.buf.write_all(b"acp\0")?;
|
||||
self.buf.write_u32_be(input.len() as u32)?;
|
||||
let mut writer = BitWriter::new(&mut self.buf);
|
||||
if fake {
|
||||
for i in 0..input.len() {
|
||||
if i > 0 && i % 0x4000 == 0 {
|
||||
writer.put_bits(0x102, 9)?;
|
||||
}
|
||||
writer.put_bits(input[i] as u16, 9)?;
|
||||
}
|
||||
writer.put_bits(0x100, 9)?; // End of stream
|
||||
writer.flush()?;
|
||||
} else {
|
||||
let mut dict = std::collections::HashMap::new();
|
||||
for i in 0..256 {
|
||||
dict.insert(vec![i as u8], i as u16);
|
||||
}
|
||||
let mut next_code = 0x103u16;
|
||||
let mut token_width = 9;
|
||||
|
||||
let mut i = 0;
|
||||
while i < input.len() {
|
||||
let mut current = vec![input[i]];
|
||||
i += 1;
|
||||
|
||||
while i < input.len()
|
||||
&& dict.contains_key(&{
|
||||
let mut temp = current.clone();
|
||||
temp.push(input[i]);
|
||||
temp
|
||||
})
|
||||
{
|
||||
current.push(input[i]);
|
||||
i += 1;
|
||||
}
|
||||
|
||||
let code = dict[¤t];
|
||||
writer.put_bits(code, token_width)?;
|
||||
|
||||
if i < input.len() {
|
||||
let mut new_entry = current.clone();
|
||||
new_entry.push(input[i]);
|
||||
dict.insert(new_entry, next_code);
|
||||
next_code += 1;
|
||||
|
||||
if next_code >= (1 << token_width) && token_width < 24 {
|
||||
writer.put_bits(0x101, token_width)?; // Increase token width
|
||||
token_width += 1;
|
||||
}
|
||||
|
||||
if dict.len() >= 0x8900 {
|
||||
writer.put_bits(0x102, token_width)?; // Clear dictionary
|
||||
dict.clear();
|
||||
for j in 0..256 {
|
||||
dict.insert(vec![j as u8], j as u16);
|
||||
}
|
||||
next_code = 0x103;
|
||||
token_width = 9;
|
||||
}
|
||||
}
|
||||
}
|
||||
writer.put_bits(0x100, token_width)?; // End of stream
|
||||
writer.flush()?;
|
||||
}
|
||||
|
||||
Ok(self.buf.into_inner())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -135,7 +135,7 @@ impl Script for EscudeBinScript {
|
||||
s = s.replace(from, to);
|
||||
}
|
||||
}
|
||||
let encoded = encode_string(encoding, &s, true)?;
|
||||
let encoded = encode_string(encoding, &s, false)?;
|
||||
len += encoded.len() as u32 + 1;
|
||||
strs.push(CString::new(encoded)?);
|
||||
}
|
||||
@@ -169,20 +169,20 @@ impl StrReplacer {
|
||||
let mut s = StrReplacer {
|
||||
replacements: HashMap::new(),
|
||||
};
|
||||
s.add("!?。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚", "!? 。「」、…をぁぃぅぇぉゃゅょっーあいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわん゛゜")?;
|
||||
// 0xa0 to 0xde: Half-width katakana in CP932
|
||||
let half_width_katakana = "!? 。「」、…をぁぃぅぇぉゃゅょっーあいうえおかきくけこさしすせそたちつてとなにぬねのはひふへほまみむめもやゆよらりるれろわん゛゜";
|
||||
let mut bytes: Vec<u8> = (0xa0..=0xde).collect();
|
||||
bytes.insert(0, 0x21);
|
||||
bytes.insert(1, 0x22);
|
||||
s.add(&bytes, half_width_katakana)?;
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
fn add(&mut self, from: &str, to: &str) -> Result<()> {
|
||||
fn add(&mut self, from: &[u8], to: &str) -> Result<()> {
|
||||
let encoding = Encoding::Cp932; // Default encoding, can be changed as needed
|
||||
let froms = UnicodeSegmentation::graphemes(from, true);
|
||||
let tos = UnicodeSegmentation::graphemes(to, true);
|
||||
for (from, to) in froms.zip(tos) {
|
||||
let from_bytes = if from == "" {
|
||||
vec![0xa0]
|
||||
} else {
|
||||
encode_string(encoding, from, true)?
|
||||
};
|
||||
for (from, to) in from.into_iter().zip(tos) {
|
||||
let from_bytes = vec![from.clone()];
|
||||
let to_bytes = encode_string(encoding, to, true)?;
|
||||
self.replacements.insert(from_bytes, to_bytes);
|
||||
}
|
||||
|
||||
@@ -186,6 +186,7 @@ impl AsRef<str> for CircusMesType {
|
||||
|
||||
pub struct ExtraConfig {
|
||||
pub circus_mes_type: Option<CircusMesType>,
|
||||
pub escude_fake_compress: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]
|
||||
|
||||
Reference in New Issue
Block a user