mirror of
https://github.com/lifegpc/msg-tool.git
synced 2026-06-06 12:58:45 +08:00
Add format support
This commit is contained in:
7
Cargo.lock
generated
7
Cargo.lock
generated
@@ -159,6 +159,7 @@ dependencies = [
|
||||
"lazy_static",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"unicode-segmentation",
|
||||
"windows-sys",
|
||||
]
|
||||
|
||||
@@ -247,6 +248,12 @@ version = "1.0.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
version = "1.12.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
|
||||
|
||||
[[package]]
|
||||
name = "utf8parse"
|
||||
version = "0.2.2"
|
||||
|
||||
@@ -9,7 +9,8 @@ clap = { version = "4.5", features = ["derive"] }
|
||||
encoding_rs = "0.8"
|
||||
lazy_static = "1.5.0"
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
serde_json = "1.0.140"
|
||||
serde_json = "1"
|
||||
unicode-segmentation = "1.12"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
windows-sys = { version = "0", features = ["Win32_Globalization", "Win32_System_Diagnostics_Debug"] }
|
||||
|
||||
@@ -68,6 +68,15 @@ pub struct ImportArgs {
|
||||
#[arg(short = 'P', long, group = "patched_encodingg")]
|
||||
/// Patched script code page
|
||||
pub patched_code_page: Option<u32>,
|
||||
#[arg(long)]
|
||||
/// Patched script format type
|
||||
pub patched_format: Option<FormatType>,
|
||||
#[arg(long)]
|
||||
/// Fixed length of one line in patched script (for fixed format)
|
||||
pub patched_fixed_length: Option<usize>,
|
||||
#[arg(long, action = ArgAction::SetTrue)]
|
||||
/// Keep original line breaks in patched script (for fixed format)
|
||||
pub patched_keep_original: bool,
|
||||
}
|
||||
|
||||
#[derive(Subcommand, Debug)]
|
||||
|
||||
54
src/format/fixed.rs
Normal file
54
src/format/fixed.rs
Normal file
@@ -0,0 +1,54 @@
|
||||
use unicode_segmentation::UnicodeSegmentation;
|
||||
|
||||
pub struct FixedFormatter {
|
||||
length: usize,
|
||||
keep_original: bool,
|
||||
}
|
||||
|
||||
impl FixedFormatter {
|
||||
pub fn new(length: usize, keep_original: bool) -> Self {
|
||||
FixedFormatter {
|
||||
length,
|
||||
keep_original,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn format(&self, message: &str) -> String {
|
||||
let mut result = String::new();
|
||||
let vec: Vec<_> = UnicodeSegmentation::graphemes(message, true).collect();
|
||||
let mut current_length = 0;
|
||||
for grapheme in vec {
|
||||
if grapheme == "\n" {
|
||||
if self.keep_original {
|
||||
result.push('\n');
|
||||
current_length = 0;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if current_length >= self.length {
|
||||
result.push('\n');
|
||||
current_length = 0;
|
||||
}
|
||||
result.push_str(grapheme);
|
||||
current_length += 1;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_format() {
|
||||
let formatter = FixedFormatter::new(10, false);
|
||||
let message = "This is a test message.\nThis is another line.";
|
||||
let formatted_message = formatter.format(message);
|
||||
assert_eq!(
|
||||
formatted_message,
|
||||
"This is a \ntest messa\nge.This is\n another l\nine."
|
||||
);
|
||||
assert_eq!(formatter.format("● This is a test."), "● This is \na test.");
|
||||
let fommater2 = FixedFormatter::new(10, true);
|
||||
assert_eq!(
|
||||
fommater2.format("● Th\nis is a test."),
|
||||
"● Th\nis is a te\nst."
|
||||
);
|
||||
}
|
||||
18
src/format/mod.rs
Normal file
18
src/format/mod.rs
Normal file
@@ -0,0 +1,18 @@
|
||||
mod fixed;
|
||||
|
||||
use crate::types::*;
|
||||
|
||||
pub fn fmt_message(mes: &mut Vec<Message>, opt: FormatOptions) {
|
||||
match opt {
|
||||
FormatOptions::Fixed {
|
||||
length,
|
||||
keep_original,
|
||||
} => {
|
||||
let formatter = fixed::FixedFormatter::new(length, keep_original);
|
||||
for message in mes.iter_mut() {
|
||||
message.message = formatter.format(&message.message);
|
||||
}
|
||||
}
|
||||
FormatOptions::None => {}
|
||||
}
|
||||
}
|
||||
18
src/main.rs
18
src/main.rs
@@ -1,4 +1,5 @@
|
||||
pub mod args;
|
||||
pub mod format;
|
||||
pub mod output_scripts;
|
||||
pub mod scripts;
|
||||
pub mod types;
|
||||
@@ -160,14 +161,14 @@ pub fn export_script(
|
||||
types::OutputScriptType::Json => {
|
||||
let enc = get_output_encoding(arg);
|
||||
let s = serde_json::to_string_pretty(&mes)?;
|
||||
let b = utils::encoding::encode_string(enc, &s)?;
|
||||
let b = utils::encoding::encode_string(enc, &s, false)?;
|
||||
let mut f = utils::files::write_file(&f)?;
|
||||
f.write_all(&b)?;
|
||||
}
|
||||
types::OutputScriptType::M3t => {
|
||||
let enc = get_output_encoding(arg);
|
||||
let s = output_scripts::m3t::M3tDumper::dump(&mes);
|
||||
let b = utils::encoding::encode_string(enc, &s)?;
|
||||
let b = utils::encoding::encode_string(enc, &s, false)?;
|
||||
let mut f = utils::files::write_file(&f)?;
|
||||
f.write_all(&b)?;
|
||||
}
|
||||
@@ -203,7 +204,7 @@ pub fn import_script(
|
||||
eprintln!("Output file does not exist");
|
||||
return Ok(types::ScriptResult::Ignored);
|
||||
}
|
||||
let mes = match of {
|
||||
let mut mes = match of {
|
||||
types::OutputScriptType::Json => {
|
||||
let enc = get_output_encoding(arg);
|
||||
let b = utils::files::read_file(&out_f)?;
|
||||
@@ -234,6 +235,17 @@ pub fn import_script(
|
||||
} else {
|
||||
imp_cfg.patched.clone()
|
||||
};
|
||||
let fmt = match imp_cfg.patched_format {
|
||||
Some(fmt) => match fmt {
|
||||
types::FormatType::Fixed => types::FormatOptions::Fixed {
|
||||
length: imp_cfg.patched_fixed_length.unwrap_or(32),
|
||||
keep_original: imp_cfg.patched_keep_original,
|
||||
},
|
||||
types::FormatType::None => types::FormatOptions::None,
|
||||
},
|
||||
None => script.default_format_type(),
|
||||
};
|
||||
format::fmt_message(&mut mes, fmt);
|
||||
script.import_messages(mes, &patched_f, encoding)?;
|
||||
Ok(types::ScriptResult::Ok)
|
||||
}
|
||||
|
||||
@@ -23,6 +23,8 @@ pub trait ScriptBuilder {
|
||||
pub trait Script: std::fmt::Debug {
|
||||
fn default_output_script_type(&self) -> OutputScriptType;
|
||||
|
||||
fn default_format_type(&self) -> FormatOptions;
|
||||
|
||||
fn extract_messages(&self) -> Result<Vec<Message>>;
|
||||
|
||||
fn import_messages(
|
||||
|
||||
@@ -181,6 +181,13 @@ impl Script for CircusMesScript {
|
||||
OutputScriptType::Json
|
||||
}
|
||||
|
||||
fn default_format_type(&self) -> FormatOptions {
|
||||
FormatOptions::Fixed {
|
||||
length: 32,
|
||||
keep_original: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_messages(&self) -> Result<Vec<Message>> {
|
||||
let mut mes = vec![];
|
||||
let mut name = None;
|
||||
@@ -222,6 +229,27 @@ impl Script for CircusMesScript {
|
||||
filename: &str,
|
||||
encoding: Encoding,
|
||||
) -> Result<()> {
|
||||
let mut repls = Vec::new();
|
||||
if !encoding.is_jis() {
|
||||
fn insert_repl(
|
||||
repls: &mut Vec<(&'static str, String)>,
|
||||
s: &'static str,
|
||||
encoding: Encoding,
|
||||
) -> Result<()> {
|
||||
let jis = encode_string(Encoding::Cp932, s, true)?;
|
||||
let out = decode_to_string(encoding, &jis)?;
|
||||
repls.push((s, out));
|
||||
Ok(())
|
||||
}
|
||||
let _ = insert_repl(&mut repls, "{", encoding);
|
||||
let _ = insert_repl(&mut repls, "/", encoding);
|
||||
let _ = insert_repl(&mut repls, "}", encoding);
|
||||
if repls.len() < 3 {
|
||||
println!(
|
||||
"Warning: Some replacements cannot used in current encoding. Ruby text may be broken."
|
||||
);
|
||||
}
|
||||
}
|
||||
let mut buffer = Vec::with_capacity(self.data.len());
|
||||
buffer.extend_from_slice(&self.data[..self.asm_bin_offset]);
|
||||
let mut nmes = Vec::with_capacity(messages.len());
|
||||
@@ -246,7 +274,7 @@ impl Script for CircusMesScript {
|
||||
return Err(anyhow::anyhow!("No more messages to import"));
|
||||
}
|
||||
}
|
||||
let s = if token.value == self.info.nameopcode {
|
||||
let mut s = if token.value == self.info.nameopcode {
|
||||
match mes.as_mut().unwrap().name.take() {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
@@ -260,7 +288,10 @@ impl Script for CircusMesScript {
|
||||
mes = None;
|
||||
t
|
||||
};
|
||||
let mut text = encode_string(encoding, &s)?;
|
||||
for i in repls.iter() {
|
||||
s = s.replace(i.0, i.1.as_str());
|
||||
}
|
||||
let mut text = encode_string(encoding, &s, false)?;
|
||||
buffer.push(token.value);
|
||||
for t in text.iter_mut() {
|
||||
*t = (*t).overflowing_sub(self.info.deckey).0;
|
||||
@@ -276,7 +307,7 @@ impl Script for CircusMesScript {
|
||||
return Err(anyhow::anyhow!("No more messages to import"));
|
||||
}
|
||||
}
|
||||
let s = if token.value == self.info.nameopcode {
|
||||
let mut s = if token.value == self.info.nameopcode {
|
||||
match mes.as_mut().unwrap().name.take() {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
@@ -290,8 +321,11 @@ impl Script for CircusMesScript {
|
||||
mes = None;
|
||||
t
|
||||
};
|
||||
for i in repls.iter() {
|
||||
s = s.replace(i.0, i.1.as_str());
|
||||
}
|
||||
buffer.push(token.value);
|
||||
let text = encode_string(encoding, &s)?;
|
||||
let text = encode_string(encoding, &s, false)?;
|
||||
buffer.extend_from_slice(&text);
|
||||
buffer.push(0x00);
|
||||
continue;
|
||||
|
||||
33
src/types.rs
33
src/types.rs
@@ -24,6 +24,17 @@ impl Default for Encoding {
|
||||
}
|
||||
}
|
||||
|
||||
impl Encoding {
|
||||
pub fn is_jis(&self) -> bool {
|
||||
match self {
|
||||
Self::Cp932 => true,
|
||||
#[cfg(windows)]
|
||||
Self::CodePage(code_page) => *code_page == 932,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]
|
||||
/// Text Encoding
|
||||
pub enum TextEncoding {
|
||||
@@ -189,3 +200,25 @@ pub enum ScriptResult {
|
||||
Ok,
|
||||
Ignored,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]
|
||||
/// Format type
|
||||
pub enum FormatType {
|
||||
/// Wrap line with fixed length
|
||||
Fixed,
|
||||
/// Do not wrap line
|
||||
None,
|
||||
}
|
||||
|
||||
/// Format options
|
||||
pub enum FormatOptions {
|
||||
/// Wrap line with fixed length
|
||||
Fixed {
|
||||
/// Fixed length
|
||||
length: usize,
|
||||
/// Whether to keep original line breaks
|
||||
keep_original: bool,
|
||||
},
|
||||
/// Do not wrap line
|
||||
None,
|
||||
}
|
||||
|
||||
@@ -29,28 +29,44 @@ pub fn decode_to_string(encoding: Encoding, data: &[u8]) -> Result<String, anyho
|
||||
}
|
||||
}
|
||||
|
||||
pub fn encode_string(encoding: Encoding, data: &str) -> Result<Vec<u8>, anyhow::Error> {
|
||||
pub fn encode_string(
|
||||
encoding: Encoding,
|
||||
data: &str,
|
||||
check: bool,
|
||||
) -> Result<Vec<u8>, anyhow::Error> {
|
||||
match encoding {
|
||||
Encoding::Auto => Ok(data.as_bytes().to_vec()),
|
||||
Encoding::Utf8 => Ok(data.as_bytes().to_vec()),
|
||||
Encoding::Cp932 => {
|
||||
let result = encoding_rs::SHIFT_JIS.encode(data);
|
||||
if result.2 {
|
||||
Err(anyhow::anyhow!("Failed to encode Shift-JIS"))
|
||||
} else {
|
||||
Ok(result.0.to_vec())
|
||||
if check {
|
||||
return Err(anyhow::anyhow!("Failed to encode Shift-JIS"));
|
||||
}
|
||||
eprintln!(
|
||||
"Warning: Some characters could not be encoded in Shift-JIS: {}",
|
||||
data
|
||||
);
|
||||
}
|
||||
Ok(result.0.to_vec())
|
||||
}
|
||||
Encoding::Gb2312 => {
|
||||
let result = encoding_rs::GBK.encode(data);
|
||||
if result.2 {
|
||||
Err(anyhow::anyhow!("Failed to encode GB2312"))
|
||||
} else {
|
||||
Ok(result.0.to_vec())
|
||||
if check {
|
||||
return Err(anyhow::anyhow!("Failed to encode GB2312"));
|
||||
}
|
||||
eprintln!(
|
||||
"Warning: Some characters could not be encoded in GB2312: {}",
|
||||
data
|
||||
);
|
||||
}
|
||||
Ok(result.0.to_vec())
|
||||
}
|
||||
#[cfg(windows)]
|
||||
Encoding::CodePage(code_page) => Ok(super::encoding_win::encode_string(code_page, data)?),
|
||||
Encoding::CodePage(code_page) => {
|
||||
Ok(super::encoding_win::encode_string(code_page, data, check)?)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,22 +122,22 @@ fn test_decode_to_string() {
|
||||
#[test]
|
||||
fn test_encode_string() {
|
||||
assert_eq!(
|
||||
encode_string(Encoding::Utf8, "中文测试").unwrap(),
|
||||
encode_string(Encoding::Utf8, "中文测试", true).unwrap(),
|
||||
vec![228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149]
|
||||
);
|
||||
assert_eq!(
|
||||
encode_string(Encoding::Cp932, "きゃべつそふと").unwrap(),
|
||||
encode_string(Encoding::Cp932, "きゃべつそふと", true).unwrap(),
|
||||
vec![
|
||||
130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
encode_string(Encoding::Gb2312, "中文").unwrap(),
|
||||
encode_string(Encoding::Gb2312, "中文", true).unwrap(),
|
||||
vec![214, 208, 206, 196]
|
||||
);
|
||||
#[cfg(windows)]
|
||||
assert_eq!(
|
||||
encode_string(Encoding::CodePage(936), "中文").unwrap(),
|
||||
encode_string(Encoding::CodePage(936), "中文", true).unwrap(),
|
||||
vec![214, 208, 206, 196]
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use windows_sys::Win32::Foundation::GetLastError;
|
||||
use windows_sys::Win32::Globalization::{
|
||||
MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte,
|
||||
CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte,
|
||||
};
|
||||
use windows_sys::Win32::System::Diagnostics::Debug::{
|
||||
FORMAT_MESSAGE_FROM_SYSTEM, FORMAT_MESSAGE_IGNORE_INSERTS, FormatMessageW,
|
||||
@@ -79,7 +79,7 @@ pub fn decode_to_string(cp: u32, data: &[u8]) -> Result<String, WinError> {
|
||||
Ok(String::from_utf16_lossy(&wc))
|
||||
}
|
||||
|
||||
pub fn encode_string(cp: u32, data: &str) -> Result<Vec<u8>, WinError> {
|
||||
pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>, WinError> {
|
||||
let wstr = data.encode_utf16().collect::<Vec<u16>>();
|
||||
let needed_len = unsafe {
|
||||
WideCharToMultiByte(
|
||||
@@ -98,6 +98,7 @@ pub fn encode_string(cp: u32, data: &str) -> Result<Vec<u8>, WinError> {
|
||||
}
|
||||
let mut mb = Vec::with_capacity(needed_len as usize);
|
||||
mb.resize(needed_len as usize, 0);
|
||||
let mut used_default_char = 0;
|
||||
let result = unsafe {
|
||||
WideCharToMultiByte(
|
||||
cp,
|
||||
@@ -107,9 +108,23 @@ pub fn encode_string(cp: u32, data: &str) -> Result<Vec<u8>, WinError> {
|
||||
mb.as_mut_ptr(),
|
||||
needed_len,
|
||||
std::ptr::null_mut(),
|
||||
std::ptr::null_mut(),
|
||||
if cp == CP_UTF7 || cp == CP_UTF8 {
|
||||
std::ptr::null_mut()
|
||||
} else {
|
||||
&mut used_default_char
|
||||
},
|
||||
)
|
||||
};
|
||||
if used_default_char != 0 {
|
||||
if check {
|
||||
return Err(WinError::new(0));
|
||||
} else {
|
||||
eprintln!(
|
||||
"Warning: Some characters could not be encoded in code page {}: {}",
|
||||
cp, data
|
||||
);
|
||||
}
|
||||
}
|
||||
if result == 0 {
|
||||
return Err(WinError::from_last_error());
|
||||
}
|
||||
@@ -145,17 +160,25 @@ fn test_decode_to_string() {
|
||||
#[test]
|
||||
fn test_encode_string() {
|
||||
assert_eq!(
|
||||
encode_string(65001, "中文测试").unwrap(),
|
||||
encode_string(65001, "中文测试", true).unwrap(),
|
||||
vec![228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149]
|
||||
);
|
||||
assert_eq!(
|
||||
encode_string(932, "きゃべつそふと").unwrap(),
|
||||
encode_string(932, "きゃべつそふと", true).unwrap(),
|
||||
vec![
|
||||
130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198
|
||||
]
|
||||
);
|
||||
assert_eq!(
|
||||
encode_string(936, "中文").unwrap(),
|
||||
encode_string(936, "中文", true).unwrap(),
|
||||
vec![214, 208, 206, 196]
|
||||
);
|
||||
assert!(
|
||||
encode_string(
|
||||
936,
|
||||
"「あ、こーら、逃げちゃダメだよー? 起きちゃうのも、まだダメだけ\nどね♪」",
|
||||
true
|
||||
)
|
||||
.is_err()
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user