Add format support

This commit is contained in:
2025-05-21 10:57:14 +08:00
parent a2747d29b9
commit 99210a19cf
11 changed files with 235 additions and 26 deletions

7
Cargo.lock generated
View File

@@ -159,6 +159,7 @@ dependencies = [
"lazy_static",
"serde",
"serde_json",
"unicode-segmentation",
"windows-sys",
]
@@ -247,6 +248,12 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "utf8parse"
version = "0.2.2"

View File

@@ -9,7 +9,8 @@ clap = { version = "4.5", features = ["derive"] }
encoding_rs = "0.8"
lazy_static = "1.5.0"
serde = { version = "1", features = ["derive"] }
serde_json = "1.0.140"
serde_json = "1"
unicode-segmentation = "1.12"
[target.'cfg(windows)'.dependencies]
windows-sys = { version = "0", features = ["Win32_Globalization", "Win32_System_Diagnostics_Debug"] }

View File

@@ -68,6 +68,15 @@ pub struct ImportArgs {
#[arg(short = 'P', long, group = "patched_encodingg")]
/// Patched script code page
pub patched_code_page: Option<u32>,
#[arg(long)]
/// Patched script format type
pub patched_format: Option<FormatType>,
#[arg(long)]
/// Fixed length of one line in patched script (for fixed format)
pub patched_fixed_length: Option<usize>,
#[arg(long, action = ArgAction::SetTrue)]
/// Keep original line breaks in patched script (for fixed format)
pub patched_keep_original: bool,
}
#[derive(Subcommand, Debug)]

54
src/format/fixed.rs Normal file
View File

@@ -0,0 +1,54 @@
use unicode_segmentation::UnicodeSegmentation;
pub struct FixedFormatter {
length: usize,
keep_original: bool,
}
impl FixedFormatter {
pub fn new(length: usize, keep_original: bool) -> Self {
FixedFormatter {
length,
keep_original,
}
}
pub fn format(&self, message: &str) -> String {
let mut result = String::new();
let vec: Vec<_> = UnicodeSegmentation::graphemes(message, true).collect();
let mut current_length = 0;
for grapheme in vec {
if grapheme == "\n" {
if self.keep_original {
result.push('\n');
current_length = 0;
}
continue;
}
if current_length >= self.length {
result.push('\n');
current_length = 0;
}
result.push_str(grapheme);
current_length += 1;
}
return result;
}
}
#[test]
fn test_format() {
let formatter = FixedFormatter::new(10, false);
let message = "This is a test message.\nThis is another line.";
let formatted_message = formatter.format(message);
assert_eq!(
formatted_message,
"This is a \ntest messa\nge.This is\n another l\nine."
);
assert_eq!(formatter.format("● This is a test."), "● This is \na test.");
let fommater2 = FixedFormatter::new(10, true);
assert_eq!(
fommater2.format("● Th\nis is a test."),
"● Th\nis is a te\nst."
);
}

18
src/format/mod.rs Normal file
View File

@@ -0,0 +1,18 @@
mod fixed;
use crate::types::*;
pub fn fmt_message(mes: &mut Vec<Message>, opt: FormatOptions) {
match opt {
FormatOptions::Fixed {
length,
keep_original,
} => {
let formatter = fixed::FixedFormatter::new(length, keep_original);
for message in mes.iter_mut() {
message.message = formatter.format(&message.message);
}
}
FormatOptions::None => {}
}
}

View File

@@ -1,4 +1,5 @@
pub mod args;
pub mod format;
pub mod output_scripts;
pub mod scripts;
pub mod types;
@@ -160,14 +161,14 @@ pub fn export_script(
types::OutputScriptType::Json => {
let enc = get_output_encoding(arg);
let s = serde_json::to_string_pretty(&mes)?;
let b = utils::encoding::encode_string(enc, &s)?;
let b = utils::encoding::encode_string(enc, &s, false)?;
let mut f = utils::files::write_file(&f)?;
f.write_all(&b)?;
}
types::OutputScriptType::M3t => {
let enc = get_output_encoding(arg);
let s = output_scripts::m3t::M3tDumper::dump(&mes);
let b = utils::encoding::encode_string(enc, &s)?;
let b = utils::encoding::encode_string(enc, &s, false)?;
let mut f = utils::files::write_file(&f)?;
f.write_all(&b)?;
}
@@ -203,7 +204,7 @@ pub fn import_script(
eprintln!("Output file does not exist");
return Ok(types::ScriptResult::Ignored);
}
let mes = match of {
let mut mes = match of {
types::OutputScriptType::Json => {
let enc = get_output_encoding(arg);
let b = utils::files::read_file(&out_f)?;
@@ -234,6 +235,17 @@ pub fn import_script(
} else {
imp_cfg.patched.clone()
};
let fmt = match imp_cfg.patched_format {
Some(fmt) => match fmt {
types::FormatType::Fixed => types::FormatOptions::Fixed {
length: imp_cfg.patched_fixed_length.unwrap_or(32),
keep_original: imp_cfg.patched_keep_original,
},
types::FormatType::None => types::FormatOptions::None,
},
None => script.default_format_type(),
};
format::fmt_message(&mut mes, fmt);
script.import_messages(mes, &patched_f, encoding)?;
Ok(types::ScriptResult::Ok)
}

View File

@@ -23,6 +23,8 @@ pub trait ScriptBuilder {
pub trait Script: std::fmt::Debug {
fn default_output_script_type(&self) -> OutputScriptType;
fn default_format_type(&self) -> FormatOptions;
fn extract_messages(&self) -> Result<Vec<Message>>;
fn import_messages(

View File

@@ -181,6 +181,13 @@ impl Script for CircusMesScript {
OutputScriptType::Json
}
fn default_format_type(&self) -> FormatOptions {
FormatOptions::Fixed {
length: 32,
keep_original: false,
}
}
fn extract_messages(&self) -> Result<Vec<Message>> {
let mut mes = vec![];
let mut name = None;
@@ -222,6 +229,27 @@ impl Script for CircusMesScript {
filename: &str,
encoding: Encoding,
) -> Result<()> {
let mut repls = Vec::new();
if !encoding.is_jis() {
fn insert_repl(
repls: &mut Vec<(&'static str, String)>,
s: &'static str,
encoding: Encoding,
) -> Result<()> {
let jis = encode_string(Encoding::Cp932, s, true)?;
let out = decode_to_string(encoding, &jis)?;
repls.push((s, out));
Ok(())
}
let _ = insert_repl(&mut repls, "", encoding);
let _ = insert_repl(&mut repls, "", encoding);
let _ = insert_repl(&mut repls, "", encoding);
if repls.len() < 3 {
println!(
"Warning: Some replacements cannot used in current encoding. Ruby text may be broken."
);
}
}
let mut buffer = Vec::with_capacity(self.data.len());
buffer.extend_from_slice(&self.data[..self.asm_bin_offset]);
let mut nmes = Vec::with_capacity(messages.len());
@@ -246,7 +274,7 @@ impl Script for CircusMesScript {
return Err(anyhow::anyhow!("No more messages to import"));
}
}
let s = if token.value == self.info.nameopcode {
let mut s = if token.value == self.info.nameopcode {
match mes.as_mut().unwrap().name.take() {
Some(s) => s,
None => {
@@ -260,7 +288,10 @@ impl Script for CircusMesScript {
mes = None;
t
};
let mut text = encode_string(encoding, &s)?;
for i in repls.iter() {
s = s.replace(i.0, i.1.as_str());
}
let mut text = encode_string(encoding, &s, false)?;
buffer.push(token.value);
for t in text.iter_mut() {
*t = (*t).overflowing_sub(self.info.deckey).0;
@@ -276,7 +307,7 @@ impl Script for CircusMesScript {
return Err(anyhow::anyhow!("No more messages to import"));
}
}
let s = if token.value == self.info.nameopcode {
let mut s = if token.value == self.info.nameopcode {
match mes.as_mut().unwrap().name.take() {
Some(s) => s,
None => {
@@ -290,8 +321,11 @@ impl Script for CircusMesScript {
mes = None;
t
};
for i in repls.iter() {
s = s.replace(i.0, i.1.as_str());
}
buffer.push(token.value);
let text = encode_string(encoding, &s)?;
let text = encode_string(encoding, &s, false)?;
buffer.extend_from_slice(&text);
buffer.push(0x00);
continue;

View File

@@ -24,6 +24,17 @@ impl Default for Encoding {
}
}
impl Encoding {
pub fn is_jis(&self) -> bool {
match self {
Self::Cp932 => true,
#[cfg(windows)]
Self::CodePage(code_page) => *code_page == 932,
_ => false,
}
}
}
#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]
/// Text Encoding
pub enum TextEncoding {
@@ -189,3 +200,25 @@ pub enum ScriptResult {
Ok,
Ignored,
}
#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]
/// Format type
pub enum FormatType {
/// Wrap line with fixed length
Fixed,
/// Do not wrap line
None,
}
/// Format options
pub enum FormatOptions {
/// Wrap line with fixed length
Fixed {
/// Fixed length
length: usize,
/// Whether to keep original line breaks
keep_original: bool,
},
/// Do not wrap line
None,
}

View File

@@ -29,28 +29,44 @@ pub fn decode_to_string(encoding: Encoding, data: &[u8]) -> Result<String, anyho
}
}
pub fn encode_string(encoding: Encoding, data: &str) -> Result<Vec<u8>, anyhow::Error> {
pub fn encode_string(
encoding: Encoding,
data: &str,
check: bool,
) -> Result<Vec<u8>, anyhow::Error> {
match encoding {
Encoding::Auto => Ok(data.as_bytes().to_vec()),
Encoding::Utf8 => Ok(data.as_bytes().to_vec()),
Encoding::Cp932 => {
let result = encoding_rs::SHIFT_JIS.encode(data);
if result.2 {
Err(anyhow::anyhow!("Failed to encode Shift-JIS"))
} else {
Ok(result.0.to_vec())
if check {
return Err(anyhow::anyhow!("Failed to encode Shift-JIS"));
}
eprintln!(
"Warning: Some characters could not be encoded in Shift-JIS: {}",
data
);
}
Ok(result.0.to_vec())
}
Encoding::Gb2312 => {
let result = encoding_rs::GBK.encode(data);
if result.2 {
Err(anyhow::anyhow!("Failed to encode GB2312"))
} else {
Ok(result.0.to_vec())
if check {
return Err(anyhow::anyhow!("Failed to encode GB2312"));
}
eprintln!(
"Warning: Some characters could not be encoded in GB2312: {}",
data
);
}
Ok(result.0.to_vec())
}
#[cfg(windows)]
Encoding::CodePage(code_page) => Ok(super::encoding_win::encode_string(code_page, data)?),
Encoding::CodePage(code_page) => {
Ok(super::encoding_win::encode_string(code_page, data, check)?)
}
}
}
@@ -106,22 +122,22 @@ fn test_decode_to_string() {
#[test]
fn test_encode_string() {
assert_eq!(
encode_string(Encoding::Utf8, "中文测试").unwrap(),
encode_string(Encoding::Utf8, "中文测试", true).unwrap(),
vec![228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149]
);
assert_eq!(
encode_string(Encoding::Cp932, "きゃべつそふと").unwrap(),
encode_string(Encoding::Cp932, "きゃべつそふと", true).unwrap(),
vec![
130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198
]
);
assert_eq!(
encode_string(Encoding::Gb2312, "中文").unwrap(),
encode_string(Encoding::Gb2312, "中文", true).unwrap(),
vec![214, 208, 206, 196]
);
#[cfg(windows)]
assert_eq!(
encode_string(Encoding::CodePage(936), "中文").unwrap(),
encode_string(Encoding::CodePage(936), "中文", true).unwrap(),
vec![214, 208, 206, 196]
);
}

View File

@@ -1,6 +1,6 @@
use windows_sys::Win32::Foundation::GetLastError;
use windows_sys::Win32::Globalization::{
MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte,
CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte,
};
use windows_sys::Win32::System::Diagnostics::Debug::{
FORMAT_MESSAGE_FROM_SYSTEM, FORMAT_MESSAGE_IGNORE_INSERTS, FormatMessageW,
@@ -79,7 +79,7 @@ pub fn decode_to_string(cp: u32, data: &[u8]) -> Result<String, WinError> {
Ok(String::from_utf16_lossy(&wc))
}
pub fn encode_string(cp: u32, data: &str) -> Result<Vec<u8>, WinError> {
pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>, WinError> {
let wstr = data.encode_utf16().collect::<Vec<u16>>();
let needed_len = unsafe {
WideCharToMultiByte(
@@ -98,6 +98,7 @@ pub fn encode_string(cp: u32, data: &str) -> Result<Vec<u8>, WinError> {
}
let mut mb = Vec::with_capacity(needed_len as usize);
mb.resize(needed_len as usize, 0);
let mut used_default_char = 0;
let result = unsafe {
WideCharToMultiByte(
cp,
@@ -107,9 +108,23 @@ pub fn encode_string(cp: u32, data: &str) -> Result<Vec<u8>, WinError> {
mb.as_mut_ptr(),
needed_len,
std::ptr::null_mut(),
std::ptr::null_mut(),
if cp == CP_UTF7 || cp == CP_UTF8 {
std::ptr::null_mut()
} else {
&mut used_default_char
},
)
};
if used_default_char != 0 {
if check {
return Err(WinError::new(0));
} else {
eprintln!(
"Warning: Some characters could not be encoded in code page {}: {}",
cp, data
);
}
}
if result == 0 {
return Err(WinError::from_last_error());
}
@@ -145,17 +160,25 @@ fn test_decode_to_string() {
#[test]
fn test_encode_string() {
assert_eq!(
encode_string(65001, "中文测试").unwrap(),
encode_string(65001, "中文测试", true).unwrap(),
vec![228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149]
);
assert_eq!(
encode_string(932, "きゃべつそふと").unwrap(),
encode_string(932, "きゃべつそふと", true).unwrap(),
vec![
130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198
]
);
assert_eq!(
encode_string(936, "中文").unwrap(),
encode_string(936, "中文", true).unwrap(),
vec![214, 208, 206, 196]
);
assert!(
encode_string(
936,
"「あ、こーら、逃げちゃダメだよー? 起きちゃうのも、まだダメだけ\nどね♪」",
true
)
.is_err()
);
}