From bb2737845087fbba949336daf00553808d2e9cc1 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Wed, 20 Aug 2025 23:05:46 +0800 Subject: [PATCH] Add new argument for fixed text formatter --- src/args.rs | 3 ++ src/format/fixed.rs | 102 ++++++++++++++++++++++++++++++++--- src/format/mod.rs | 4 +- src/main.rs | 2 + src/scripts/bgi/script.rs | 1 + src/scripts/circus/script.rs | 1 + src/types.rs | 2 + 7 files changed, 106 insertions(+), 9 deletions(-) diff --git a/src/args.rs b/src/args.rs index 95cf443..db429ce 100644 --- a/src/args.rs +++ b/src/args.rs @@ -401,6 +401,9 @@ pub struct ImportArgs { #[arg(long, action = ArgAction::SetTrue)] /// Keep original line breaks in patched script (for fixed format) pub patched_keep_original: bool, + #[arg(long, action = ArgAction::SetTrue)] + /// Break words in patched script (for fixed format) + pub patched_break_words: bool, #[arg(long)] /// Name table file pub name_csv: Option, diff --git a/src/format/fixed.rs b/src/format/fixed.rs index eb941aa..57c68eb 100644 --- a/src/format/fixed.rs +++ b/src/format/fixed.rs @@ -6,15 +6,23 @@ const SPACE_STR_LIST: [&str; 2] = [" ", " "]; pub struct FixedFormatter { length: usize, keep_original: bool, + /// Whether to break words (ASCII only) at the end of the line. + break_words: bool, #[allow(unused)] typ: Option, } impl FixedFormatter { - pub fn new(length: usize, keep_original: bool, typ: Option) -> Self { + pub fn new( + length: usize, + keep_original: bool, + break_words: bool, + typ: Option, + ) -> Self { FixedFormatter { length, keep_original, + break_words, typ, } } @@ -38,7 +46,11 @@ impl FixedFormatter { let mut is_ruby = false; let mut is_ruby_rt = false; let mut last_command = None; - for grapheme in vec { + let mut i = 0; + + while i < vec.len() { + let grapheme = vec[i]; + if grapheme == "\n" { if self.keep_original || (self.is_circus() && last_command.as_ref().is_some_and(|cmd| cmd == "@n")) @@ -47,16 +59,57 @@ impl FixedFormatter { current_length = 0; } pre_is_lf = true; + i += 1; continue; } + + // Check if we need to break and handle word breaking if current_length >= self.length { - result.push('\n'); - current_length = 0; + if !self.break_words && !is_command && !is_ruby_rt { + // Look back to find a good break point (space or non-ASCII) + let mut break_pos = None; + let mut temp_length = current_length; + let mut j = result.len(); + + // Find the last space or non-ASCII character position + for ch in result.chars().rev() { + if ch == ' ' || ch == ' ' || !ch.is_ascii() { + break_pos = Some(j); + break; + } + if ch.is_ascii_alphabetic() { + temp_length -= 1; + if temp_length == 0 { + break; + } + } + j -= ch.len_utf8(); + } + + // If we found a good break point, move content after it to next line + if let Some(pos) = break_pos { + let remaining = result[pos..].trim_start().to_string(); + result.truncate(pos); + result.push('\n'); + result.push_str(&remaining); + current_length = remaining.chars().count(); + } else { + result.push('\n'); + current_length = 0; + } + } else { + result.push('\n'); + current_length = 0; + } } + if (current_length == 0 || pre_is_lf) && SPACE_STR_LIST.contains(&grapheme) { + i += 1; continue; } + result.push_str(grapheme); + if self.is_circus() { if grapheme == "@" { is_command = true; @@ -75,29 +128,36 @@ impl FixedFormatter { is_ruby_rt = true; } else if is_ruby && grapheme == "/" { is_ruby_rt = false; + i += 1; continue; } else if is_ruby && grapheme == "}" { is_ruby = false; + i += 1; continue; } } + if is_command { if let Some(ref mut cmd) = last_command { cmd.push_str(grapheme); } } + if !is_command && !is_ruby_rt { current_length += 1; } + pre_is_lf = false; + i += 1; } + return result; } } #[test] fn test_format() { - let formatter = FixedFormatter::new(10, false, None); + let formatter = FixedFormatter::new(10, false, true, None); let message = "This is a test message.\nThis is another line."; let formatted_message = formatter.format(message); assert_eq!( @@ -109,14 +169,40 @@ fn test_format() { formatter.format("● This is  a test."), "● This is \na test." ); - let fommater2 = FixedFormatter::new(10, true, None); + let fommater2 = FixedFormatter::new(10, true, true, None); assert_eq!( fommater2.format("● Th\n is is a te st."), "● Th\nis is a te\nst." ); + + // Test break_words = false + let no_break_formatter = FixedFormatter::new(10, false, false, None); + assert_eq!( + no_break_formatter.format("Example text."), + "Example \ntext." + ); + + let no_break_formatter2 = FixedFormatter::new(6, false, false, None); + assert_eq!( + no_break_formatter2.format("Example text."), + "Exampl\ne \ntext." + ); + + let no_break_formatter3 = FixedFormatter::new(7, false, false, None); + assert_eq!( + no_break_formatter3.format("Example text."), + "Example\ntext." + ); + + let real_world_no_break_formatter = FixedFormatter::new(32, false, false, None); + assert_eq!( + real_world_no_break_formatter.format("○咕噜咕噜(Temporary Magnetic Pattern Linkage)"), + "○咕噜咕噜(Temporary Magnetic \nPattern Linkage)" + ); + #[cfg(feature = "circus")] { - let circus_formatter = FixedFormatter::new(10, false, Some(ScriptType::Circus)); + let circus_formatter = FixedFormatter::new(10, false, true, Some(ScriptType::Circus)); assert_eq!( circus_formatter.format("● @cmd1@cmd2@cmd3中文字数是一\n 二三 四五六七八九十"), "● @cmd1@cmd2@cmd3中文字数是一二三\n四五六七八九十" @@ -126,7 +212,7 @@ fn test_format() { .format("● @cmd1@cmd2@cmd3{rubyText/中文}字数是一\n 二三 四五六七八九十"), "● @cmd1@cmd2@cmd3{rubyText/中文}字数是一二三\n四五六七八九十" ); - let circus_formatter2 = FixedFormatter::new(32, false, Some(ScriptType::Circus)); + let circus_formatter2 = FixedFormatter::new(32, false, true, Some(ScriptType::Circus)); assert_eq!( circus_formatter2.format("@re1@re2@b1@t30@w1「当然现在我很幸福哦?\n 因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?\n 因为有敦也君在身边」"), "@re1@re2@b1@t30@w1「当然现在我很幸福哦?因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?因为有敦也君在身边」" diff --git a/src/format/mod.rs b/src/format/mod.rs index 0464d09..a7ee855 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -9,8 +9,10 @@ pub fn fmt_message(mes: &mut Vec, opt: FormatOptions, typ: ScriptType) FormatOptions::Fixed { length, keep_original, + break_words, } => { - let formatter = fixed::FixedFormatter::new(length, keep_original, Some(typ)); + let formatter = + fixed::FixedFormatter::new(length, keep_original, break_words, Some(typ)); for message in mes.iter_mut() { message.message = formatter.format(&message.message); } diff --git a/src/main.rs b/src/main.rs index 65b3f0c..2a35dd6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1171,6 +1171,7 @@ pub fn import_script( types::FormatType::Fixed => types::FormatOptions::Fixed { length: imp_cfg.patched_fixed_length.unwrap_or(32), keep_original: imp_cfg.patched_keep_original, + break_words: imp_cfg.patched_break_words, }, types::FormatType::None => types::FormatOptions::None, }, @@ -1361,6 +1362,7 @@ pub fn import_script( types::FormatType::Fixed => types::FormatOptions::Fixed { length: imp_cfg.patched_fixed_length.unwrap_or(32), keep_original: imp_cfg.patched_keep_original, + break_words: imp_cfg.patched_break_words, }, types::FormatType::None => types::FormatOptions::None, }, diff --git a/src/scripts/bgi/script.rs b/src/scripts/bgi/script.rs index 496c4c6..75513a7 100644 --- a/src/scripts/bgi/script.rs +++ b/src/scripts/bgi/script.rs @@ -167,6 +167,7 @@ impl Script for BGIScript { FormatOptions::Fixed { length: 32, keep_original: false, + break_words: false, } } } diff --git a/src/scripts/circus/script.rs b/src/scripts/circus/script.rs index 3a3bf4c..7914e9f 100644 --- a/src/scripts/circus/script.rs +++ b/src/scripts/circus/script.rs @@ -193,6 +193,7 @@ impl Script for CircusMesScript { FormatOptions::Fixed { length: 32, keep_original: false, + break_words: false, } } diff --git a/src/types.rs b/src/types.rs index afaf7fe..386ef1b 100644 --- a/src/types.rs +++ b/src/types.rs @@ -545,6 +545,8 @@ pub enum FormatOptions { length: usize, /// Whether to keep original line breaks keep_original: bool, + /// Whether to break words(ASCII only) at the end of the line + break_words: bool, }, /// Do not wrap line None,