diff --git a/src/args.rs b/src/args.rs index c3363b3..61598ec 100644 --- a/src/args.rs +++ b/src/args.rs @@ -513,6 +513,9 @@ pub struct ImportArgs { #[arg(long, action = ArgAction::SetTrue)] /// Insert fullwidth space at the start of line in patched script (for fixed format) pub patched_insert_fullwidth_space_at_line_start: bool, + #[arg(long, action = ArgAction::SetTrue)] + /// If a line break occurs in the middle of some symbols, bring the sentence to next line (for fixed format) + pub patched_break_with_sentence: bool, #[arg(long)] /// Name table file pub name_csv: Option, diff --git a/src/format/fixed.rs b/src/format/fixed.rs index d81550b..4790886 100644 --- a/src/format/fixed.rs +++ b/src/format/fixed.rs @@ -3,6 +3,7 @@ use unicode_segmentation::UnicodeSegmentation; const SPACE_STR_LIST: [&str; 2] = [" ", " "]; const QUOTE_LIST: [(&str, &str); 4] = [("「", "」"), ("『", "』"), ("(", ")"), ("【", "】")]; +const BREAK_SENTENCE_SYMBOLS: [&str; 5] = ["…", ",", "。", "?", "!"]; fn check_is_ascii_alphanumeric(s: &str) -> bool { for c in s.chars() { @@ -33,6 +34,17 @@ fn check_need_fullwidth_space(s: &str) -> bool { false } +fn check_is_end_quote(segs: &[&str], pos: usize) -> bool { + for p in pos..segs.len() { + let d = segs[p]; + let is_end_quote = QUOTE_LIST.iter().any(|(_, close)| d == *close); + if !is_end_quote { + return false; + } + } + true +} + pub struct FixedFormatter { length: usize, keep_original: bool, @@ -40,6 +52,8 @@ pub struct FixedFormatter { break_words: bool, /// Whether to insert a full-width space after a line break when a sentence starts with a full-width quotation mark. insert_fullwidth_space_at_line_start: bool, + /// If a line break occurs in the middle of some symbols, bring the sentence to next line + break_with_sentence: bool, #[allow(unused)] typ: Option, } @@ -50,6 +64,7 @@ impl FixedFormatter { keep_original: bool, break_words: bool, insert_fullwidth_space_at_line_start: bool, + break_with_sentence: bool, typ: Option, ) -> Self { FixedFormatter { @@ -57,10 +72,53 @@ impl FixedFormatter { keep_original, break_words, insert_fullwidth_space_at_line_start, + break_with_sentence, typ, } } + #[cfg(test)] + fn builder(length: usize) -> Self { + FixedFormatter { + length, + keep_original: false, + break_words: true, + insert_fullwidth_space_at_line_start: false, + break_with_sentence: false, + typ: None, + } + } + + #[cfg(test)] + fn keep_original(mut self, keep: bool) -> Self { + self.keep_original = keep; + self + } + + #[cfg(test)] + fn break_words(mut self, break_words: bool) -> Self { + self.break_words = break_words; + self + } + + #[cfg(test)] + fn insert_fullwidth_space_at_line_start(mut self, insert: bool) -> Self { + self.insert_fullwidth_space_at_line_start = insert; + self + } + + #[cfg(test)] + fn break_with_sentence(mut self, break_with_sentence: bool) -> Self { + self.break_with_sentence = break_with_sentence; + self + } + + #[cfg(test)] + fn typ(mut self, typ: Option) -> Self { + self.typ = typ; + self + } + #[cfg(feature = "circus")] fn is_circus(&self) -> bool { matches!(self.typ, Some(ScriptType::Circus)) @@ -126,7 +184,76 @@ impl FixedFormatter { // Check if we need to break and handle word breaking if current_length >= self.length { - if !self.break_words + if self.break_with_sentence + && !is_command + && !is_ruby_rt + && ((BREAK_SENTENCE_SYMBOLS.contains(&grapheme) + && i > 1 + && BREAK_SENTENCE_SYMBOLS.contains(&vec[i - 1])) + || check_is_end_quote(&vec, i)) + { + let mut break_pos = None; + let segs = result.graphemes(true).collect::>(); + let is_end_quote = check_is_end_quote(&vec, i); + let mut end = segs.len(); + for (j, ch) in segs.iter().enumerate().rev() { + if BREAK_SENTENCE_SYMBOLS.contains(ch) { + end = j; + if !is_end_quote { + break_pos = Some(j); + } + } + break; + } + for (j, ch) in segs[..end].iter().enumerate().rev() { + if j >= end { + continue; + } + if BREAK_SENTENCE_SYMBOLS.contains(ch) { + break_pos = Some(j + 1); + break; + } + } + if let Some(pos) = break_pos { + let remaining = segs[pos..].concat().trim_start().to_string(); + result = segs[..pos].concat(); + result.push('\n'); + current_length = 0; + if first_line { + if self.insert_fullwidth_space_at_line_start { + if check_need_fullwidth_space(&main_content) { + need_insert_fullwidth_space = true; + } + } + first_line = false; + } + if need_insert_fullwidth_space { + result.push(' '); + current_length += 1; + } + result.push_str(&remaining); + current_length += remaining.graphemes(true).count(); + main_content.clear(); + pre_is_lf = true; + } else { + result.push('\n'); + current_length = 0; + if first_line { + if self.insert_fullwidth_space_at_line_start { + if check_need_fullwidth_space(&main_content) { + need_insert_fullwidth_space = true; + } + } + first_line = false; + } + if need_insert_fullwidth_space { + result.push(' '); + current_length += 1; + } + main_content.clear(); + pre_is_lf = true; + } + } else if !self.break_words && !is_command && !is_ruby_rt && check_is_ascii_alphanumeric(grapheme) @@ -287,7 +414,7 @@ impl FixedFormatter { #[test] fn test_format() { - let formatter = FixedFormatter::new(10, false, true, false, None); + let formatter = FixedFormatter::builder(10); let message = "This is a test message.\nThis is another line."; let formatted_message = formatter.format(message); assert_eq!( @@ -299,38 +426,40 @@ fn test_format() { formatter.format("● This is  a test."), "● This is \na test." ); - let fommater2 = FixedFormatter::new(10, true, true, false, None); + let fommater2 = FixedFormatter::builder(10).keep_original(true); assert_eq!( fommater2.format("● Th\n is is a te st."), "● Th\nis is a te\nst." ); // Test break_words = false - let no_break_formatter = FixedFormatter::new(10, false, false, false, None); + let no_break_formatter = FixedFormatter::builder(10).break_words(false); assert_eq!( no_break_formatter.format("Example text."), "Example \ntext." ); - let no_break_formatter2 = FixedFormatter::new(6, false, false, false, None); + let no_break_formatter2 = FixedFormatter::builder(6).break_words(false); assert_eq!( no_break_formatter2.format("Example text."), "Exampl\ne text\n." ); - let no_break_formatter3 = FixedFormatter::new(7, false, false, false, None); + let no_break_formatter3 = FixedFormatter::builder(7).break_words(false); assert_eq!( no_break_formatter3.format("Example text."), "Example\ntext." ); - let real_world_no_break_formatter = FixedFormatter::new(32, false, false, false, None); + let real_world_no_break_formatter = FixedFormatter::builder(32).break_words(false); assert_eq!( real_world_no_break_formatter.format("○咕噜咕噜(Temporary Magnetic Pattern Linkage)"), "○咕噜咕噜(Temporary Magnetic Pattern\nLinkage)" ); - let formatter3 = FixedFormatter::new(10, false, false, true, None); + let formatter3 = FixedFormatter::builder(10) + .break_words(false) + .insert_fullwidth_space_at_line_start(true); assert_eq!( formatter3.format("「This is a test."), "「This is a\n\u{3000}test." @@ -351,10 +480,32 @@ fn test_format() { "(This) 「is\n\u{3000}a test." ); + let formatter4 = FixedFormatter::builder(10) + .break_words(false) + .break_with_sentence(true); + assert_eq!( + formatter4.format("『打断测,测试一下……』"), + "『打断测,\n测试一下……』" + ); + + assert_eq!( + formatter4.format("『打断测,测试一下。』"), + "『打断测,\n测试一下。』" + ); + + assert_eq!( + formatter4.format("『打断是测试一下哦……』"), + "『打断是测试一下哦\n……』" + ); + + assert_eq!( + formatter4.format("『打断测是测试一下。』"), + "『打断测是测试一下。\n』" + ); + #[cfg(feature = "circus")] { - let circus_formatter = - FixedFormatter::new(10, false, true, false, Some(ScriptType::Circus)); + let circus_formatter = FixedFormatter::builder(10).typ(Some(ScriptType::Circus)); assert_eq!( circus_formatter.format("● @cmd1@cmd2@cmd3中文字数是一\n 二三 四五六七八九十"), "● @cmd1@cmd2@cmd3中文字数是一二三\n四五六七八九十" @@ -364,8 +515,7 @@ fn test_format() { .format("● @cmd1@cmd2@cmd3{rubyText/中文}字数是一\n 二三 四五六七八九十"), "● @cmd1@cmd2@cmd3{rubyText/中文}字数是一二三\n四五六七八九十" ); - let circus_formatter2 = - FixedFormatter::new(32, false, true, false, Some(ScriptType::Circus)); + let circus_formatter2 = FixedFormatter::builder(32).typ(Some(ScriptType::Circus)); assert_eq!( circus_formatter2.format("@re1@re2@b1@t30@w1「当然现在我很幸福哦?\n 因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?\n 因为有敦也君在身边」"), "@re1@re2@b1@t30@w1「当然现在我很幸福哦?因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?因为有敦也君在身边」" @@ -374,8 +524,9 @@ fn test_format() { #[cfg(feature = "kirikiri")] { - let scn_formatter = - FixedFormatter::new(3, false, false, false, Some(ScriptType::KirikiriScn)); + let scn_formatter = FixedFormatter::builder(3) + .break_words(false) + .typ(Some(ScriptType::KirikiriScn)); assert_eq!( scn_formatter.format("%test;[ruby]测[test]试打断。"), "%test;[ruby]测[test]试打\n断。" diff --git a/src/format/mod.rs b/src/format/mod.rs index f208f82..70fc7b1 100644 --- a/src/format/mod.rs +++ b/src/format/mod.rs @@ -11,12 +11,14 @@ pub fn fmt_message(mes: &mut Vec, opt: FormatOptions, typ: ScriptType) keep_original, break_words, insert_fullwidth_space_at_line_start, + break_with_sentence, } => { let formatter = fixed::FixedFormatter::new( length, keep_original, break_words, insert_fullwidth_space_at_line_start, + break_with_sentence, Some(typ), ); for message in mes.iter_mut() { diff --git a/src/main.rs b/src/main.rs index 3b047fc..9927ac5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1372,6 +1372,7 @@ pub fn import_script( break_words: imp_cfg.patched_break_words, insert_fullwidth_space_at_line_start: imp_cfg .patched_insert_fullwidth_space_at_line_start, + break_with_sentence: imp_cfg.patched_break_with_sentence, }, types::FormatType::None => types::FormatOptions::None, }, @@ -1590,6 +1591,7 @@ pub fn import_script( break_words: imp_cfg.patched_break_words, insert_fullwidth_space_at_line_start: imp_cfg .patched_insert_fullwidth_space_at_line_start, + break_with_sentence: imp_cfg.patched_break_with_sentence, }, types::FormatType::None => types::FormatOptions::None, }, diff --git a/src/scripts/bgi/script.rs b/src/scripts/bgi/script.rs index bf193fe..86223a4 100644 --- a/src/scripts/bgi/script.rs +++ b/src/scripts/bgi/script.rs @@ -169,6 +169,7 @@ impl Script for BGIScript { keep_original: false, break_words: false, insert_fullwidth_space_at_line_start: true, + break_with_sentence: true, } } } diff --git a/src/scripts/circus/script.rs b/src/scripts/circus/script.rs index 19ad744..0eeefef 100644 --- a/src/scripts/circus/script.rs +++ b/src/scripts/circus/script.rs @@ -219,6 +219,7 @@ impl Script for CircusMesScript { keep_original: false, break_words: false, insert_fullwidth_space_at_line_start: true, + break_with_sentence: true, } } diff --git a/src/types.rs b/src/types.rs index 82fe121..33e4d4c 100644 --- a/src/types.rs +++ b/src/types.rs @@ -662,6 +662,8 @@ pub enum FormatOptions { break_words: bool, /// Whether to insert a full-width space after a line break when a sentence starts with a full-width quotation mark. insert_fullwidth_space_at_line_start: bool, + /// If a line break occurs in the middle of some symbols, bring the sentence to next line + break_with_sentence: bool, }, /// Do not wrap line None,