From 3469f6babbc2de542d2478098965e9fb66cfc04b Mon Sep 17 00:00:00 2001 From: lifegpc Date: Mon, 1 Sep 2025 18:50:27 +0800 Subject: [PATCH] feat: add new fixed formatter option --- src/args.rs | 3 + src/format/fixed.rs | 146 ++++++++++++++++++++++++++++++++--- src/format/mod.rs | 10 ++- src/main.rs | 4 + src/scripts/bgi/script.rs | 1 + src/scripts/circus/script.rs | 1 + src/types.rs | 2 + 7 files changed, 153 insertions(+), 14 deletions(-) diff --git a/src/args.rs b/src/args.rs index 0d96f30..5de5cf5 100644 --- a/src/args.rs +++ b/src/args.rs @@ -434,6 +434,9 @@ pub struct ImportArgs { #[arg(long, action = ArgAction::SetTrue)] /// Break words in patched script (for fixed format) pub patched_break_words: bool, + #[arg(long, action = ArgAction::SetTrue)] + /// Insert fullwidth space at the start of line in patched script (for fixed format) + pub patched_insert_fullwidth_space_at_line_start: bool, #[arg(long)] /// Name table file pub name_csv: Option, diff --git a/src/format/fixed.rs b/src/format/fixed.rs index 57c68eb..1a7a68c 100644 --- a/src/format/fixed.rs +++ b/src/format/fixed.rs @@ -2,12 +2,44 @@ use crate::types::*; use unicode_segmentation::UnicodeSegmentation; const SPACE_STR_LIST: [&str; 2] = [" ", " "]; +const QUOTE_LIST: [(&str, &str); 4] = [("「", "」"), ("『", "』"), ("(", ")"), ("【", "】")]; + +fn check_is_ascii_alphanumeric(s: &str) -> bool { + for c in s.chars() { + if !c.is_ascii_alphanumeric() { + return false; + } + } + true +} + +fn check_need_fullwidth_space(s: &str) -> bool { + let has_start_quote = QUOTE_LIST.iter().any(|(open, _)| s.starts_with(open)); + if !has_start_quote { + return false; + } + for (open, close) in QUOTE_LIST.iter() { + let open_index = s.rfind(open); + if let Some(open_index) = open_index { + let index = s.rfind(close); + match index { + Some(idx) => { + return idx < open_index; + } + None => return true, + } + } + } + false +} pub struct FixedFormatter { length: usize, keep_original: bool, /// Whether to break words (ASCII only) at the end of the line. break_words: bool, + /// Whether to insert a full-width space after a line break when a sentence starts with a full-width quotation mark. + insert_fullwidth_space_at_line_start: bool, #[allow(unused)] typ: Option, } @@ -17,12 +49,14 @@ impl FixedFormatter { length: usize, keep_original: bool, break_words: bool, + insert_fullwidth_space_at_line_start: bool, typ: Option, ) -> Self { FixedFormatter { length, keep_original, break_words, + insert_fullwidth_space_at_line_start, typ, } } @@ -47,6 +81,10 @@ impl FixedFormatter { let mut is_ruby_rt = false; let mut last_command = None; let mut i = 0; + // Store main content of the line (excluding commands and ruby) + let mut main_content = String::new(); + let mut first_line = true; + let mut need_insert_fullwidth_space = false; while i < vec.len() { let grapheme = vec[i]; @@ -57,6 +95,19 @@ impl FixedFormatter { { result.push('\n'); current_length = 0; + if first_line { + if self.insert_fullwidth_space_at_line_start { + if check_need_fullwidth_space(&main_content) { + need_insert_fullwidth_space = true; + } + } + } + if need_insert_fullwidth_space { + result.push(' '); + current_length += 1; + } + main_content.clear(); + first_line = false; } pre_is_lf = true; i += 1; @@ -65,7 +116,11 @@ impl FixedFormatter { // Check if we need to break and handle word breaking if current_length >= self.length { - if !self.break_words && !is_command && !is_ruby_rt { + if !self.break_words + && !is_command + && !is_ruby_rt + && check_is_ascii_alphanumeric(grapheme) + { // Look back to find a good break point (space or non-ASCII) let mut break_pos = None; let mut temp_length = current_length; @@ -91,15 +146,58 @@ impl FixedFormatter { let remaining = result[pos..].trim_start().to_string(); result.truncate(pos); result.push('\n'); + current_length = 0; + if first_line { + if self.insert_fullwidth_space_at_line_start { + if check_need_fullwidth_space(&main_content) { + need_insert_fullwidth_space = true; + } + } + first_line = false; + } + if need_insert_fullwidth_space { + result.push(' '); + current_length += 1; + } result.push_str(&remaining); - current_length = remaining.chars().count(); + current_length += remaining.chars().count(); + main_content.clear(); + pre_is_lf = true; } else { result.push('\n'); current_length = 0; + if first_line { + if self.insert_fullwidth_space_at_line_start { + if check_need_fullwidth_space(&main_content) { + need_insert_fullwidth_space = true; + } + } + first_line = false; + } + if need_insert_fullwidth_space { + result.push(' '); + current_length += 1; + } + main_content.clear(); + pre_is_lf = true; } } else { result.push('\n'); current_length = 0; + if first_line { + if self.insert_fullwidth_space_at_line_start { + if check_need_fullwidth_space(&main_content) { + need_insert_fullwidth_space = true; + } + } + first_line = false; + } + if need_insert_fullwidth_space { + result.push(' '); + current_length += 1; + } + main_content.clear(); + pre_is_lf = true; } } @@ -145,6 +243,7 @@ impl FixedFormatter { if !is_command && !is_ruby_rt { current_length += 1; + main_content.push_str(grapheme); } pre_is_lf = false; @@ -157,7 +256,7 @@ impl FixedFormatter { #[test] fn test_format() { - let formatter = FixedFormatter::new(10, false, true, None); + let formatter = FixedFormatter::new(10, false, true, false, None); let message = "This is a test message.\nThis is another line."; let formatted_message = formatter.format(message); assert_eq!( @@ -169,40 +268,62 @@ fn test_format() { formatter.format("● This is  a test."), "● This is \na test." ); - let fommater2 = FixedFormatter::new(10, true, true, None); + let fommater2 = FixedFormatter::new(10, true, true, false, None); assert_eq!( fommater2.format("● Th\n is is a te st."), "● Th\nis is a te\nst." ); // Test break_words = false - let no_break_formatter = FixedFormatter::new(10, false, false, None); + let no_break_formatter = FixedFormatter::new(10, false, false, false, None); assert_eq!( no_break_formatter.format("Example text."), "Example \ntext." ); - let no_break_formatter2 = FixedFormatter::new(6, false, false, None); + let no_break_formatter2 = FixedFormatter::new(6, false, false, false, None); assert_eq!( no_break_formatter2.format("Example text."), - "Exampl\ne \ntext." + "Exampl\ne text\n." ); - let no_break_formatter3 = FixedFormatter::new(7, false, false, None); + let no_break_formatter3 = FixedFormatter::new(7, false, false, false, None); assert_eq!( no_break_formatter3.format("Example text."), "Example\ntext." ); - let real_world_no_break_formatter = FixedFormatter::new(32, false, false, None); + let real_world_no_break_formatter = FixedFormatter::new(32, false, false, false, None); assert_eq!( real_world_no_break_formatter.format("○咕噜咕噜(Temporary Magnetic Pattern Linkage)"), - "○咕噜咕噜(Temporary Magnetic \nPattern Linkage)" + "○咕噜咕噜(Temporary Magnetic Pattern\nLinkage)" + ); + + let formatter3 = FixedFormatter::new(10, false, false, true, None); + assert_eq!( + formatter3.format("「This is a test."), + "「This is a\n\u{3000}test." + ); + + assert_eq!( + formatter3.format("(This) is a test."), + "(This) is \na test." + ); + + assert_eq!( + formatter3.format("(long text test here, test 1234"), + "(long text\n\u{3000}test here\n\u{3000}, test \n\u{3000}1234" + ); + + assert_eq!( + formatter3.format("(This) 「is a test."), + "(This) 「is\n\u{3000}a test." ); #[cfg(feature = "circus")] { - let circus_formatter = FixedFormatter::new(10, false, true, Some(ScriptType::Circus)); + let circus_formatter = + FixedFormatter::new(10, false, true, false, Some(ScriptType::Circus)); assert_eq!( circus_formatter.format("● @cmd1@cmd2@cmd3中文字数是一\n 二三 四五六七八九十"), "● @cmd1@cmd2@cmd3中文字数是一二三\n四五六七八九十" @@ -212,7 +333,8 @@ fn test_format() { .format("● @cmd1@cmd2@cmd3{rubyText/中文}字数是一\n 二三 四五六七八九十"), "● @cmd1@cmd2@cmd3{rubyText/中文}字数是一二三\n四五六七八九十" ); - let circus_formatter2 = FixedFormatter::new(32, false, true, Some(ScriptType::Circus)); + let circus_formatter2 = + FixedFormatter::new(32, false, true, false, Some(ScriptType::Circus)); assert_eq!( circus_formatter2.format("@re1@re2@b1@t30@w1「当然现在我很幸福哦?\n 因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?\n 因为有敦也君在身边」"), "@re1@re2@b1@t30@w1「当然现在我很幸福哦?因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?因为有敦也君在身边」" diff --git a/src/format/mod.rs b/src/format/mod.rs index a7ee855..f208f82 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -10,9 +10,15 @@ pub fn fmt_message(mes: &mut Vec, opt: FormatOptions, typ: ScriptType) length, keep_original, break_words, + insert_fullwidth_space_at_line_start, } => { - let formatter = - fixed::FixedFormatter::new(length, keep_original, break_words, Some(typ)); + let formatter = fixed::FixedFormatter::new( + length, + keep_original, + break_words, + insert_fullwidth_space_at_line_start, + Some(typ), + ); for message in mes.iter_mut() { message.message = formatter.format(&message.message); } diff --git a/src/main.rs b/src/main.rs index a44f16e..030f2ff 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1172,6 +1172,8 @@ pub fn import_script( length: imp_cfg.patched_fixed_length.unwrap_or(32), keep_original: imp_cfg.patched_keep_original, break_words: imp_cfg.patched_break_words, + insert_fullwidth_space_at_line_start: imp_cfg + .patched_insert_fullwidth_space_at_line_start, }, types::FormatType::None => types::FormatOptions::None, }, @@ -1363,6 +1365,8 @@ pub fn import_script( length: imp_cfg.patched_fixed_length.unwrap_or(32), keep_original: imp_cfg.patched_keep_original, break_words: imp_cfg.patched_break_words, + insert_fullwidth_space_at_line_start: imp_cfg + .patched_insert_fullwidth_space_at_line_start, }, types::FormatType::None => types::FormatOptions::None, }, diff --git a/src/scripts/bgi/script.rs b/src/scripts/bgi/script.rs index 75513a7..bf193fe 100644 --- a/src/scripts/bgi/script.rs +++ b/src/scripts/bgi/script.rs @@ -168,6 +168,7 @@ impl Script for BGIScript { length: 32, keep_original: false, break_words: false, + insert_fullwidth_space_at_line_start: true, } } } diff --git a/src/scripts/circus/script.rs b/src/scripts/circus/script.rs index 7914e9f..3329731 100644 --- a/src/scripts/circus/script.rs +++ b/src/scripts/circus/script.rs @@ -194,6 +194,7 @@ impl Script for CircusMesScript { length: 32, keep_original: false, break_words: false, + insert_fullwidth_space_at_line_start: true, } } diff --git a/src/types.rs b/src/types.rs index 10c1d6e..3c055ce 100644 --- a/src/types.rs +++ b/src/types.rs @@ -583,6 +583,8 @@ pub enum FormatOptions { keep_original: bool, /// Whether to break words(ASCII only) at the end of the line break_words: bool, + /// Whether to insert a full-width space after a line break when a sentence starts with a full-width quotation mark. + insert_fullwidth_space_at_line_start: bool, }, /// Do not wrap line None,