From 99210a19cf6f6b8c039a788bcaf4ee053b423d79 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Wed, 21 May 2025 10:57:14 +0800 Subject: [PATCH] Add format support --- Cargo.lock | 7 +++++ Cargo.toml | 3 +- src/args.rs | 9 ++++++ src/format/fixed.rs | 54 ++++++++++++++++++++++++++++++++++++ src/format/mod.rs | 18 ++++++++++++ src/main.rs | 18 ++++++++++-- src/scripts/base.rs | 2 ++ src/scripts/circus/script.rs | 42 +++++++++++++++++++++++++--- src/types.rs | 33 ++++++++++++++++++++++ src/utils/encoding.rs | 40 ++++++++++++++++++-------- src/utils/encoding_win.rs | 35 +++++++++++++++++++---- 11 files changed, 235 insertions(+), 26 deletions(-) create mode 100644 src/format/fixed.rs create mode 100644 src/format/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 512a5fa..b95de43 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -159,6 +159,7 @@ dependencies = [ "lazy_static", "serde", "serde_json", + "unicode-segmentation", "windows-sys", ] @@ -247,6 +248,12 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-segmentation" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" + [[package]] name = "utf8parse" version = "0.2.2" diff --git a/Cargo.toml b/Cargo.toml index 0e28782..851a74d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,7 +9,8 @@ clap = { version = "4.5", features = ["derive"] } encoding_rs = "0.8" lazy_static = "1.5.0" serde = { version = "1", features = ["derive"] } -serde_json = "1.0.140" +serde_json = "1" +unicode-segmentation = "1.12" [target.'cfg(windows)'.dependencies] windows-sys = { version = "0", features = ["Win32_Globalization", "Win32_System_Diagnostics_Debug"] } diff --git a/src/args.rs b/src/args.rs index ccf930b..9befcd9 100644 --- a/src/args.rs +++ b/src/args.rs @@ -68,6 +68,15 @@ pub struct ImportArgs { #[arg(short = 'P', long, group = "patched_encodingg")] /// Patched script code page pub patched_code_page: Option, + #[arg(long)] + /// Patched script format type + pub patched_format: Option, + #[arg(long)] + /// Fixed length of one line in patched script (for fixed format) + pub patched_fixed_length: Option, + #[arg(long, action = ArgAction::SetTrue)] + /// Keep original line breaks in patched script (for fixed format) + pub patched_keep_original: bool, } #[derive(Subcommand, Debug)] diff --git a/src/format/fixed.rs b/src/format/fixed.rs new file mode 100644 index 0000000..0ab1baa --- /dev/null +++ b/src/format/fixed.rs @@ -0,0 +1,54 @@ +use unicode_segmentation::UnicodeSegmentation; + +pub struct FixedFormatter { + length: usize, + keep_original: bool, +} + +impl FixedFormatter { + pub fn new(length: usize, keep_original: bool) -> Self { + FixedFormatter { + length, + keep_original, + } + } + + pub fn format(&self, message: &str) -> String { + let mut result = String::new(); + let vec: Vec<_> = UnicodeSegmentation::graphemes(message, true).collect(); + let mut current_length = 0; + for grapheme in vec { + if grapheme == "\n" { + if self.keep_original { + result.push('\n'); + current_length = 0; + } + continue; + } + if current_length >= self.length { + result.push('\n'); + current_length = 0; + } + result.push_str(grapheme); + current_length += 1; + } + return result; + } +} + +#[test] +fn test_format() { + let formatter = FixedFormatter::new(10, false); + let message = "This is a test message.\nThis is another line."; + let formatted_message = formatter.format(message); + assert_eq!( + formatted_message, + "This is a \ntest messa\nge.This is\n another l\nine." + ); + assert_eq!(formatter.format("● This is a test."), "● This is \na test."); + let fommater2 = FixedFormatter::new(10, true); + assert_eq!( + fommater2.format("● Th\nis is a test."), + "● Th\nis is a te\nst." + ); +} diff --git a/src/format/mod.rs b/src/format/mod.rs new file mode 100644 index 0000000..ccae528 --- /dev/null +++ b/src/format/mod.rs @@ -0,0 +1,18 @@ +mod fixed; + +use crate::types::*; + +pub fn fmt_message(mes: &mut Vec, opt: FormatOptions) { + match opt { + FormatOptions::Fixed { + length, + keep_original, + } => { + let formatter = fixed::FixedFormatter::new(length, keep_original); + for message in mes.iter_mut() { + message.message = formatter.format(&message.message); + } + } + FormatOptions::None => {} + } +} diff --git a/src/main.rs b/src/main.rs index 48509e5..32bfc5e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,4 +1,5 @@ pub mod args; +pub mod format; pub mod output_scripts; pub mod scripts; pub mod types; @@ -160,14 +161,14 @@ pub fn export_script( types::OutputScriptType::Json => { let enc = get_output_encoding(arg); let s = serde_json::to_string_pretty(&mes)?; - let b = utils::encoding::encode_string(enc, &s)?; + let b = utils::encoding::encode_string(enc, &s, false)?; let mut f = utils::files::write_file(&f)?; f.write_all(&b)?; } types::OutputScriptType::M3t => { let enc = get_output_encoding(arg); let s = output_scripts::m3t::M3tDumper::dump(&mes); - let b = utils::encoding::encode_string(enc, &s)?; + let b = utils::encoding::encode_string(enc, &s, false)?; let mut f = utils::files::write_file(&f)?; f.write_all(&b)?; } @@ -203,7 +204,7 @@ pub fn import_script( eprintln!("Output file does not exist"); return Ok(types::ScriptResult::Ignored); } - let mes = match of { + let mut mes = match of { types::OutputScriptType::Json => { let enc = get_output_encoding(arg); let b = utils::files::read_file(&out_f)?; @@ -234,6 +235,17 @@ pub fn import_script( } else { imp_cfg.patched.clone() }; + let fmt = match imp_cfg.patched_format { + Some(fmt) => match fmt { + types::FormatType::Fixed => types::FormatOptions::Fixed { + length: imp_cfg.patched_fixed_length.unwrap_or(32), + keep_original: imp_cfg.patched_keep_original, + }, + types::FormatType::None => types::FormatOptions::None, + }, + None => script.default_format_type(), + }; + format::fmt_message(&mut mes, fmt); script.import_messages(mes, &patched_f, encoding)?; Ok(types::ScriptResult::Ok) } diff --git a/src/scripts/base.rs b/src/scripts/base.rs index 712a199..068d558 100644 --- a/src/scripts/base.rs +++ b/src/scripts/base.rs @@ -23,6 +23,8 @@ pub trait ScriptBuilder { pub trait Script: std::fmt::Debug { fn default_output_script_type(&self) -> OutputScriptType; + fn default_format_type(&self) -> FormatOptions; + fn extract_messages(&self) -> Result>; fn import_messages( diff --git a/src/scripts/circus/script.rs b/src/scripts/circus/script.rs index a166727..d05fdce 100644 --- a/src/scripts/circus/script.rs +++ b/src/scripts/circus/script.rs @@ -181,6 +181,13 @@ impl Script for CircusMesScript { OutputScriptType::Json } + fn default_format_type(&self) -> FormatOptions { + FormatOptions::Fixed { + length: 32, + keep_original: false, + } + } + fn extract_messages(&self) -> Result> { let mut mes = vec![]; let mut name = None; @@ -222,6 +229,27 @@ impl Script for CircusMesScript { filename: &str, encoding: Encoding, ) -> Result<()> { + let mut repls = Vec::new(); + if !encoding.is_jis() { + fn insert_repl( + repls: &mut Vec<(&'static str, String)>, + s: &'static str, + encoding: Encoding, + ) -> Result<()> { + let jis = encode_string(Encoding::Cp932, s, true)?; + let out = decode_to_string(encoding, &jis)?; + repls.push((s, out)); + Ok(()) + } + let _ = insert_repl(&mut repls, "{", encoding); + let _ = insert_repl(&mut repls, "/", encoding); + let _ = insert_repl(&mut repls, "}", encoding); + if repls.len() < 3 { + println!( + "Warning: Some replacements cannot used in current encoding. Ruby text may be broken." + ); + } + } let mut buffer = Vec::with_capacity(self.data.len()); buffer.extend_from_slice(&self.data[..self.asm_bin_offset]); let mut nmes = Vec::with_capacity(messages.len()); @@ -246,7 +274,7 @@ impl Script for CircusMesScript { return Err(anyhow::anyhow!("No more messages to import")); } } - let s = if token.value == self.info.nameopcode { + let mut s = if token.value == self.info.nameopcode { match mes.as_mut().unwrap().name.take() { Some(s) => s, None => { @@ -260,7 +288,10 @@ impl Script for CircusMesScript { mes = None; t }; - let mut text = encode_string(encoding, &s)?; + for i in repls.iter() { + s = s.replace(i.0, i.1.as_str()); + } + let mut text = encode_string(encoding, &s, false)?; buffer.push(token.value); for t in text.iter_mut() { *t = (*t).overflowing_sub(self.info.deckey).0; @@ -276,7 +307,7 @@ impl Script for CircusMesScript { return Err(anyhow::anyhow!("No more messages to import")); } } - let s = if token.value == self.info.nameopcode { + let mut s = if token.value == self.info.nameopcode { match mes.as_mut().unwrap().name.take() { Some(s) => s, None => { @@ -290,8 +321,11 @@ impl Script for CircusMesScript { mes = None; t }; + for i in repls.iter() { + s = s.replace(i.0, i.1.as_str()); + } buffer.push(token.value); - let text = encode_string(encoding, &s)?; + let text = encode_string(encoding, &s, false)?; buffer.extend_from_slice(&text); buffer.push(0x00); continue; diff --git a/src/types.rs b/src/types.rs index 8400621..6ddd6f1 100644 --- a/src/types.rs +++ b/src/types.rs @@ -24,6 +24,17 @@ impl Default for Encoding { } } +impl Encoding { + pub fn is_jis(&self) -> bool { + match self { + Self::Cp932 => true, + #[cfg(windows)] + Self::CodePage(code_page) => *code_page == 932, + _ => false, + } + } +} + #[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)] /// Text Encoding pub enum TextEncoding { @@ -189,3 +200,25 @@ pub enum ScriptResult { Ok, Ignored, } + +#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)] +/// Format type +pub enum FormatType { + /// Wrap line with fixed length + Fixed, + /// Do not wrap line + None, +} + +/// Format options +pub enum FormatOptions { + /// Wrap line with fixed length + Fixed { + /// Fixed length + length: usize, + /// Whether to keep original line breaks + keep_original: bool, + }, + /// Do not wrap line + None, +} diff --git a/src/utils/encoding.rs b/src/utils/encoding.rs index d1778da..8b3cebd 100644 --- a/src/utils/encoding.rs +++ b/src/utils/encoding.rs @@ -29,28 +29,44 @@ pub fn decode_to_string(encoding: Encoding, data: &[u8]) -> Result Result, anyhow::Error> { +pub fn encode_string( + encoding: Encoding, + data: &str, + check: bool, +) -> Result, anyhow::Error> { match encoding { Encoding::Auto => Ok(data.as_bytes().to_vec()), Encoding::Utf8 => Ok(data.as_bytes().to_vec()), Encoding::Cp932 => { let result = encoding_rs::SHIFT_JIS.encode(data); if result.2 { - Err(anyhow::anyhow!("Failed to encode Shift-JIS")) - } else { - Ok(result.0.to_vec()) + if check { + return Err(anyhow::anyhow!("Failed to encode Shift-JIS")); + } + eprintln!( + "Warning: Some characters could not be encoded in Shift-JIS: {}", + data + ); } + Ok(result.0.to_vec()) } Encoding::Gb2312 => { let result = encoding_rs::GBK.encode(data); if result.2 { - Err(anyhow::anyhow!("Failed to encode GB2312")) - } else { - Ok(result.0.to_vec()) + if check { + return Err(anyhow::anyhow!("Failed to encode GB2312")); + } + eprintln!( + "Warning: Some characters could not be encoded in GB2312: {}", + data + ); } + Ok(result.0.to_vec()) } #[cfg(windows)] - Encoding::CodePage(code_page) => Ok(super::encoding_win::encode_string(code_page, data)?), + Encoding::CodePage(code_page) => { + Ok(super::encoding_win::encode_string(code_page, data, check)?) + } } } @@ -106,22 +122,22 @@ fn test_decode_to_string() { #[test] fn test_encode_string() { assert_eq!( - encode_string(Encoding::Utf8, "中文测试").unwrap(), + encode_string(Encoding::Utf8, "中文测试", true).unwrap(), vec![228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149] ); assert_eq!( - encode_string(Encoding::Cp932, "きゃべつそふと").unwrap(), + encode_string(Encoding::Cp932, "きゃべつそふと", true).unwrap(), vec![ 130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198 ] ); assert_eq!( - encode_string(Encoding::Gb2312, "中文").unwrap(), + encode_string(Encoding::Gb2312, "中文", true).unwrap(), vec![214, 208, 206, 196] ); #[cfg(windows)] assert_eq!( - encode_string(Encoding::CodePage(936), "中文").unwrap(), + encode_string(Encoding::CodePage(936), "中文", true).unwrap(), vec![214, 208, 206, 196] ); } diff --git a/src/utils/encoding_win.rs b/src/utils/encoding_win.rs index 5fe75f5..75756a3 100644 --- a/src/utils/encoding_win.rs +++ b/src/utils/encoding_win.rs @@ -1,6 +1,6 @@ use windows_sys::Win32::Foundation::GetLastError; use windows_sys::Win32::Globalization::{ - MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte, + CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte, }; use windows_sys::Win32::System::Diagnostics::Debug::{ FORMAT_MESSAGE_FROM_SYSTEM, FORMAT_MESSAGE_IGNORE_INSERTS, FormatMessageW, @@ -79,7 +79,7 @@ pub fn decode_to_string(cp: u32, data: &[u8]) -> Result { Ok(String::from_utf16_lossy(&wc)) } -pub fn encode_string(cp: u32, data: &str) -> Result, WinError> { +pub fn encode_string(cp: u32, data: &str, check: bool) -> Result, WinError> { let wstr = data.encode_utf16().collect::>(); let needed_len = unsafe { WideCharToMultiByte( @@ -98,6 +98,7 @@ pub fn encode_string(cp: u32, data: &str) -> Result, WinError> { } let mut mb = Vec::with_capacity(needed_len as usize); mb.resize(needed_len as usize, 0); + let mut used_default_char = 0; let result = unsafe { WideCharToMultiByte( cp, @@ -107,9 +108,23 @@ pub fn encode_string(cp: u32, data: &str) -> Result, WinError> { mb.as_mut_ptr(), needed_len, std::ptr::null_mut(), - std::ptr::null_mut(), + if cp == CP_UTF7 || cp == CP_UTF8 { + std::ptr::null_mut() + } else { + &mut used_default_char + }, ) }; + if used_default_char != 0 { + if check { + return Err(WinError::new(0)); + } else { + eprintln!( + "Warning: Some characters could not be encoded in code page {}: {}", + cp, data + ); + } + } if result == 0 { return Err(WinError::from_last_error()); } @@ -145,17 +160,25 @@ fn test_decode_to_string() { #[test] fn test_encode_string() { assert_eq!( - encode_string(65001, "中文测试").unwrap(), + encode_string(65001, "中文测试", true).unwrap(), vec![228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149] ); assert_eq!( - encode_string(932, "きゃべつそふと").unwrap(), + encode_string(932, "きゃべつそふと", true).unwrap(), vec![ 130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198 ] ); assert_eq!( - encode_string(936, "中文").unwrap(), + encode_string(936, "中文", true).unwrap(), vec![214, 208, 206, 196] ); + assert!( + encode_string( + 936, + "「あ、こーら、逃げちゃダメだよー? 起きちゃうのも、まだダメだけ\nどね♪」", + true + ) + .is_err() + ); }