From f0037b2c922bb11a4e146d556e10d7a01ffa9f42 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Mon, 15 Sep 2025 14:15:17 +0800 Subject: [PATCH] Add more check for break with sentence --- src/format/fixed.rs | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/format/fixed.rs b/src/format/fixed.rs index 96e5db1..ee5b8c9 100644 --- a/src/format/fixed.rs +++ b/src/format/fixed.rs @@ -48,6 +48,18 @@ fn check_is_end_quote(segs: &[&str], pos: usize) -> bool { true } +fn check_is_end_quote_or_symbol(segs: &[&str], pos: usize) -> bool { + for p in pos..segs.len() { + let d = segs[p]; + let is_end_quote = + QUOTE_LIST.iter().any(|(_, close)| d == *close) || BREAK_SENTENCE_SYMBOLS.contains(&d); + if !is_end_quote { + return false; + } + } + true +} + #[cfg(feature = "jieba")] fn check_chinese_word_is_break(segs: &[&str], pos: usize, jieba: &Jieba) -> bool { let s = segs.join(""); @@ -258,7 +270,7 @@ impl FixedFormatter { && ((BREAK_SENTENCE_SYMBOLS.contains(&grapheme) && i > 1 && BREAK_SENTENCE_SYMBOLS.contains(&vec[i - 1])) - || check_is_end_quote(&vec, i)) + || check_is_end_quote_or_symbol(&vec, i)) { let mut break_pos = None; let segs = result.graphemes(true).collect::>(); @@ -646,6 +658,21 @@ fn test_format() { "『打断测是测试一下。\n』" ); + assert_eq!( + formatter4.format("『打断测试,测试一下。』"), + "『打断测试,\n测试一下。』" + ); + + assert_eq!( + formatter4.format("这打断测试,测试一下。"), + "这打断测试,\n测试一下。" + ); + + assert_eq!( + formatter4.format("这打断测试哦测试一下。。"), + "这打断测试哦测试一下\n。。" + ); + #[cfg(feature = "circus")] { let circus_formatter = FixedFormatter::builder(10).typ(Some(ScriptType::Circus));