From 99210a19cf6f6b8c039a788bcaf4ee053b423d79 Mon Sep 17 00:00:00 2001
From: lifegpc <root@lifegpc.com>
Date: Wed, 21 May 2025 10:57:14 +0800
Subject: [PATCH] Add format support

---
 Cargo.lock                   |  7 +++++
 Cargo.toml                   |  3 +-
 src/args.rs                  |  9 ++++++
 src/format/fixed.rs          | 54 ++++++++++++++++++++++++++++++++++++
 src/format/mod.rs            | 18 ++++++++++++
 src/main.rs                  | 18 ++++++++++--
 src/scripts/base.rs          |  2 ++
 src/scripts/circus/script.rs | 42 +++++++++++++++++++++++++---
 src/types.rs                 | 33 ++++++++++++++++++++++
 src/utils/encoding.rs        | 40 ++++++++++++++++++--------
 src/utils/encoding_win.rs    | 35 +++++++++++++++++++----
 11 files changed, 235 insertions(+), 26 deletions(-)
 create mode 100644 src/format/fixed.rs
 create mode 100644 src/format/mod.rs
diff --git a/Cargo.lock b/Cargo.lock
index 512a5fa..b95de43 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -159,6 +159,7 @@ dependencies = [
  "lazy_static",
  "serde",
  "serde_json",
+ "unicode-segmentation",
  "windows-sys",
 ]
 
@@ -247,6 +248,12 @@ version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
 
+[[package]]
+name = "unicode-segmentation"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+
 [[package]]
 name = "utf8parse"
 version = "0.2.2"
diff --git a/Cargo.toml b/Cargo.toml
index 0e28782..851a74d 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -9,7 +9,8 @@ clap = { version = "4.5", features = ["derive"] }
 encoding_rs = "0.8"
 lazy_static = "1.5.0"
 serde = { version = "1", features = ["derive"] }
-serde_json = "1.0.140"
+serde_json = "1"
+unicode-segmentation = "1.12"
 
 [target.'cfg(windows)'.dependencies]
 windows-sys = { version = "0", features = ["Win32_Globalization", "Win32_System_Diagnostics_Debug"] }
diff --git a/src/args.rs b/src/args.rs
index ccf930b..9befcd9 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -68,6 +68,15 @@ pub struct ImportArgs {
     #[arg(short = 'P', long, group = "patched_encodingg")]
     /// Patched script code page
     pub patched_code_page: Option<u32>,
+    #[arg(long)]
+    /// Patched script format type
+    pub patched_format: Option<FormatType>,
+    #[arg(long)]
+    /// Fixed length of one line in patched script (for fixed format)
+    pub patched_fixed_length: Option<usize>,
+    #[arg(long, action = ArgAction::SetTrue)]
+    /// Keep original line breaks in patched script (for fixed format)
+    pub patched_keep_original: bool,
 }
 
 #[derive(Subcommand, Debug)]
diff --git a/src/format/fixed.rs b/src/format/fixed.rs
new file mode 100644
index 0000000..0ab1baa
--- /dev/null
+++ b/src/format/fixed.rs
@@ -0,0 +1,54 @@
+use unicode_segmentation::UnicodeSegmentation;
+
+pub struct FixedFormatter {
+    length: usize,
+    keep_original: bool,
+}
+
+impl FixedFormatter {
+    pub fn new(length: usize, keep_original: bool) -> Self {
+        FixedFormatter {
+            length,
+            keep_original,
+        }
+    }
+
+    pub fn format(&self, message: &str) -> String {
+        let mut result = String::new();
+        let vec: Vec<_> = UnicodeSegmentation::graphemes(message, true).collect();
+        let mut current_length = 0;
+        for grapheme in vec {
+            if grapheme == "\n" {
+                if self.keep_original {
+                    result.push('\n');
+                    current_length = 0;
+                }
+                continue;
+            }
+            if current_length >= self.length {
+                result.push('\n');
+                current_length = 0;
+            }
+            result.push_str(grapheme);
+            current_length += 1;
+        }
+        return result;
+    }
+}
+
+#[test]
+fn test_format() {
+    let formatter = FixedFormatter::new(10, false);
+    let message = "This is a test message.\nThis is another line.";
+    let formatted_message = formatter.format(message);
+    assert_eq!(
+        formatted_message,
+        "This is a \ntest messa\nge.This is\n another l\nine."
+    );
+    assert_eq!(formatter.format("● This is a test."), "● This is \na test.");
+    let fommater2 = FixedFormatter::new(10, true);
+    assert_eq!(
+        fommater2.format("● Th\nis is a test."),
+        "● Th\nis is a te\nst."
+    );
+}
diff --git a/src/format/mod.rs b/src/format/mod.rs
new file mode 100644
index 0000000..ccae528
--- /dev/null
+++ b/src/format/mod.rs
@@ -0,0 +1,18 @@
+mod fixed;
+
+use crate::types::*;
+
+pub fn fmt_message(mes: &mut Vec<Message>, opt: FormatOptions) {
+    match opt {
+        FormatOptions::Fixed {
+            length,
+            keep_original,
+        } => {
+            let formatter = fixed::FixedFormatter::new(length, keep_original);
+            for message in mes.iter_mut() {
+                message.message = formatter.format(&message.message);
+            }
+        }
+        FormatOptions::None => {}
+    }
+}
diff --git a/src/main.rs b/src/main.rs
index 48509e5..32bfc5e 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,5 @@
 pub mod args;
+pub mod format;
 pub mod output_scripts;
 pub mod scripts;
 pub mod types;
@@ -160,14 +161,14 @@ pub fn export_script(
         types::OutputScriptType::Json => {
             let enc = get_output_encoding(arg);
             let s = serde_json::to_string_pretty(&mes)?;
-            let b = utils::encoding::encode_string(enc, &s)?;
+            let b = utils::encoding::encode_string(enc, &s, false)?;
             let mut f = utils::files::write_file(&f)?;
             f.write_all(&b)?;
         }
         types::OutputScriptType::M3t => {
             let enc = get_output_encoding(arg);
             let s = output_scripts::m3t::M3tDumper::dump(&mes);
-            let b = utils::encoding::encode_string(enc, &s)?;
+            let b = utils::encoding::encode_string(enc, &s, false)?;
             let mut f = utils::files::write_file(&f)?;
             f.write_all(&b)?;
         }
@@ -203,7 +204,7 @@ pub fn import_script(
         eprintln!("Output file does not exist");
         return Ok(types::ScriptResult::Ignored);
     }
-    let mes = match of {
+    let mut mes = match of {
         types::OutputScriptType::Json => {
             let enc = get_output_encoding(arg);
             let b = utils::files::read_file(&out_f)?;
@@ -234,6 +235,17 @@ pub fn import_script(
     } else {
         imp_cfg.patched.clone()
     };
+    let fmt = match imp_cfg.patched_format {
+        Some(fmt) => match fmt {
+            types::FormatType::Fixed => types::FormatOptions::Fixed {
+                length: imp_cfg.patched_fixed_length.unwrap_or(32),
+                keep_original: imp_cfg.patched_keep_original,
+            },
+            types::FormatType::None => types::FormatOptions::None,
+        },
+        None => script.default_format_type(),
+    };
+    format::fmt_message(&mut mes, fmt);
     script.import_messages(mes, &patched_f, encoding)?;
     Ok(types::ScriptResult::Ok)
 }
diff --git a/src/scripts/base.rs b/src/scripts/base.rs
index 712a199..068d558 100644
--- a/src/scripts/base.rs
+++ b/src/scripts/base.rs
@@ -23,6 +23,8 @@ pub trait ScriptBuilder {
 pub trait Script: std::fmt::Debug {
     fn default_output_script_type(&self) -> OutputScriptType;
 
+    fn default_format_type(&self) -> FormatOptions;
+
     fn extract_messages(&self) -> Result<Vec<Message>>;
 
     fn import_messages(
diff --git a/src/scripts/circus/script.rs b/src/scripts/circus/script.rs
index a166727..d05fdce 100644
--- a/src/scripts/circus/script.rs
+++ b/src/scripts/circus/script.rs
@@ -181,6 +181,13 @@ impl Script for CircusMesScript {
         OutputScriptType::Json
     }
 
+    fn default_format_type(&self) -> FormatOptions {
+        FormatOptions::Fixed {
+            length: 32,
+            keep_original: false,
+        }
+    }
+
     fn extract_messages(&self) -> Result<Vec<Message>> {
         let mut mes = vec![];
         let mut name = None;
@@ -222,6 +229,27 @@ impl Script for CircusMesScript {
         filename: &str,
         encoding: Encoding,
     ) -> Result<()> {
+        let mut repls = Vec::new();
+        if !encoding.is_jis() {
+            fn insert_repl(
+                repls: &mut Vec<(&'static str, String)>,
+                s: &'static str,
+                encoding: Encoding,
+            ) -> Result<()> {
+                let jis = encode_string(Encoding::Cp932, s, true)?;
+                let out = decode_to_string(encoding, &jis)?;
+                repls.push((s, out));
+                Ok(())
+            }
+            let _ = insert_repl(&mut repls, "｛", encoding);
+            let _ = insert_repl(&mut repls, "／", encoding);
+            let _ = insert_repl(&mut repls, "｝", encoding);
+            if repls.len() < 3 {
+                println!(
+                    "Warning: Some replacements cannot used in current encoding. Ruby text may be broken."
+                );
+            }
+        }
         let mut buffer = Vec::with_capacity(self.data.len());
         buffer.extend_from_slice(&self.data[..self.asm_bin_offset]);
         let mut nmes = Vec::with_capacity(messages.len());
@@ -246,7 +274,7 @@ impl Script for CircusMesScript {
                         return Err(anyhow::anyhow!("No more messages to import"));
                     }
                 }
-                let s = if token.value == self.info.nameopcode {
+                let mut s = if token.value == self.info.nameopcode {
                     match mes.as_mut().unwrap().name.take() {
                         Some(s) => s,
                         None => {
@@ -260,7 +288,10 @@ impl Script for CircusMesScript {
                     mes = None;
                     t
                 };
-                let mut text = encode_string(encoding, &s)?;
+                for i in repls.iter() {
+                    s = s.replace(i.0, i.1.as_str());
+                }
+                let mut text = encode_string(encoding, &s, false)?;
                 buffer.push(token.value);
                 for t in text.iter_mut() {
                     *t = (*t).overflowing_sub(self.info.deckey).0;
@@ -276,7 +307,7 @@ impl Script for CircusMesScript {
                         return Err(anyhow::anyhow!("No more messages to import"));
                     }
                 }
-                let s = if token.value == self.info.nameopcode {
+                let mut s = if token.value == self.info.nameopcode {
                     match mes.as_mut().unwrap().name.take() {
                         Some(s) => s,
                         None => {
@@ -290,8 +321,11 @@ impl Script for CircusMesScript {
                     mes = None;
                     t
                 };
+                for i in repls.iter() {
+                    s = s.replace(i.0, i.1.as_str());
+                }
                 buffer.push(token.value);
-                let text = encode_string(encoding, &s)?;
+                let text = encode_string(encoding, &s, false)?;
                 buffer.extend_from_slice(&text);
                 buffer.push(0x00);
                 continue;
diff --git a/src/types.rs b/src/types.rs
index 8400621..6ddd6f1 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -24,6 +24,17 @@ impl Default for Encoding {
     }
 }
 
+impl Encoding {
+    pub fn is_jis(&self) -> bool {
+        match self {
+            Self::Cp932 => true,
+            #[cfg(windows)]
+            Self::CodePage(code_page) => *code_page == 932,
+            _ => false,
+        }
+    }
+}
+
 #[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]
 /// Text Encoding
 pub enum TextEncoding {
@@ -189,3 +200,25 @@ pub enum ScriptResult {
     Ok,
     Ignored,
 }
+
+#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]
+/// Format type
+pub enum FormatType {
+    /// Wrap line with fixed length
+    Fixed,
+    /// Do not wrap line
+    None,
+}
+
+/// Format options
+pub enum FormatOptions {
+    /// Wrap line with fixed length
+    Fixed {
+        /// Fixed length
+        length: usize,
+        /// Whether to keep original line breaks
+        keep_original: bool,
+    },
+    /// Do not wrap line
+    None,
+}
diff --git a/src/utils/encoding.rs b/src/utils/encoding.rs
index d1778da..8b3cebd 100644
--- a/src/utils/encoding.rs
+++ b/src/utils/encoding.rs
@@ -29,28 +29,44 @@ pub fn decode_to_string(encoding: Encoding, data: &[u8]) -> Result<String, anyho
     }
 }
 
-pub fn encode_string(encoding: Encoding, data: &str) -> Result<Vec<u8>, anyhow::Error> {
+pub fn encode_string(
+    encoding: Encoding,
+    data: &str,
+    check: bool,
+) -> Result<Vec<u8>, anyhow::Error> {
     match encoding {
         Encoding::Auto => Ok(data.as_bytes().to_vec()),
         Encoding::Utf8 => Ok(data.as_bytes().to_vec()),
         Encoding::Cp932 => {
             let result = encoding_rs::SHIFT_JIS.encode(data);
             if result.2 {
-                Err(anyhow::anyhow!("Failed to encode Shift-JIS"))
-            } else {
-                Ok(result.0.to_vec())
+                if check {
+                    return Err(anyhow::anyhow!("Failed to encode Shift-JIS"));
+                }
+                eprintln!(
+                    "Warning: Some characters could not be encoded in Shift-JIS: {}",
+                    data
+                );
             }
+            Ok(result.0.to_vec())
         }
         Encoding::Gb2312 => {
             let result = encoding_rs::GBK.encode(data);
             if result.2 {
-                Err(anyhow::anyhow!("Failed to encode GB2312"))
-            } else {
-                Ok(result.0.to_vec())
+                if check {
+                    return Err(anyhow::anyhow!("Failed to encode GB2312"));
+                }
+                eprintln!(
+                    "Warning: Some characters could not be encoded in GB2312: {}",
+                    data
+                );
             }
+            Ok(result.0.to_vec())
         }
         #[cfg(windows)]
-        Encoding::CodePage(code_page) => Ok(super::encoding_win::encode_string(code_page, data)?),
+        Encoding::CodePage(code_page) => {
+            Ok(super::encoding_win::encode_string(code_page, data, check)?)
+        }
     }
 }
 
@@ -106,22 +122,22 @@ fn test_decode_to_string() {
 #[test]
 fn test_encode_string() {
     assert_eq!(
-        encode_string(Encoding::Utf8, "中文测试").unwrap(),
+        encode_string(Encoding::Utf8, "中文测试", true).unwrap(),
         vec![228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149]
     );
     assert_eq!(
-        encode_string(Encoding::Cp932, "きゃべつそふと").unwrap(),
+        encode_string(Encoding::Cp932, "きゃべつそふと", true).unwrap(),
         vec![
             130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198
         ]
     );
     assert_eq!(
-        encode_string(Encoding::Gb2312, "中文").unwrap(),
+        encode_string(Encoding::Gb2312, "中文", true).unwrap(),
         vec![214, 208, 206, 196]
     );
     #[cfg(windows)]
     assert_eq!(
-        encode_string(Encoding::CodePage(936), "中文").unwrap(),
+        encode_string(Encoding::CodePage(936), "中文", true).unwrap(),
         vec![214, 208, 206, 196]
     );
 }
diff --git a/src/utils/encoding_win.rs b/src/utils/encoding_win.rs
index 5fe75f5..75756a3 100644
--- a/src/utils/encoding_win.rs
+++ b/src/utils/encoding_win.rs
@@ -1,6 +1,6 @@
 use windows_sys::Win32::Foundation::GetLastError;
 use windows_sys::Win32::Globalization::{
-    MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte,
+    CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte,
 };
 use windows_sys::Win32::System::Diagnostics::Debug::{
     FORMAT_MESSAGE_FROM_SYSTEM, FORMAT_MESSAGE_IGNORE_INSERTS, FormatMessageW,
@@ -79,7 +79,7 @@ pub fn decode_to_string(cp: u32, data: &[u8]) -> Result<String, WinError> {
     Ok(String::from_utf16_lossy(&wc))
 }
 
-pub fn encode_string(cp: u32, data: &str) -> Result<Vec<u8>, WinError> {
+pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>, WinError> {
     let wstr = data.encode_utf16().collect::<Vec<u16>>();
     let needed_len = unsafe {
         WideCharToMultiByte(
@@ -98,6 +98,7 @@ pub fn encode_string(cp: u32, data: &str) -> Result<Vec<u8>, WinError> {
     }
     let mut mb = Vec::with_capacity(needed_len as usize);
     mb.resize(needed_len as usize, 0);
+    let mut used_default_char = 0;
     let result = unsafe {
         WideCharToMultiByte(
             cp,
@@ -107,9 +108,23 @@ pub fn encode_string(cp: u32, data: &str) -> Result<Vec<u8>, WinError> {
             mb.as_mut_ptr(),
             needed_len,
             std::ptr::null_mut(),
-            std::ptr::null_mut(),
+            if cp == CP_UTF7 || cp == CP_UTF8 {
+                std::ptr::null_mut()
+            } else {
+                &mut used_default_char
+            },
         )
     };
+    if used_default_char != 0 {
+        if check {
+            return Err(WinError::new(0));
+        } else {
+            eprintln!(
+                "Warning: Some characters could not be encoded in code page {}: {}",
+                cp, data
+            );
+        }
+    }
     if result == 0 {
         return Err(WinError::from_last_error());
     }
@@ -145,17 +160,25 @@ fn test_decode_to_string() {
 #[test]
 fn test_encode_string() {
     assert_eq!(
-        encode_string(65001, "中文测试").unwrap(),
+        encode_string(65001, "中文测试", true).unwrap(),
         vec![228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149]
     );
     assert_eq!(
-        encode_string(932, "きゃべつそふと").unwrap(),
+        encode_string(932, "きゃべつそふと", true).unwrap(),
         vec![
             130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198
         ]
     );
     assert_eq!(
-        encode_string(936, "中文").unwrap(),
+        encode_string(936, "中文", true).unwrap(),
         vec![214, 208, 206, 196]
     );
+    assert!(
+        encode_string(
+            936,
+            "「あ、こーら、逃げちゃダメだよー？　起きちゃうのも、まだダメだけ\nどね♪」",
+            true
+        )
+        .is_err()
+    );
 }