msg-tool/src/format/fixed.rs

use crate::types::*;
use anyhow::Result;
#[cfg(feature = "jieba")]
use jieba_rs::Jieba;
use unicode_segmentation::UnicodeSegmentation;

const SPACE_STR_LIST: [&str; 2] = [" ", "　"];
const QUOTE_LIST: [(&str, &str); 4] = [("「", "」"), ("『", "』"), ("（", "）"), ("【", "】")];
const BREAK_SENTENCE_SYMBOLS: [&str; 6] = ["…", "，", "。", "？", "！", "—"];

fn check_is_ascii_alphanumeric(s: &str) -> bool {
    for c in s.chars() {
        if !c.is_ascii_alphanumeric() {
            return false;
        }
    }
    true
}

fn check_need_fullwidth_space(s: &str) -> bool {
    let has_start_quote = QUOTE_LIST.iter().any(|(open, _)| s.starts_with(open));
    if !has_start_quote {
        return false;
    }
    for (open, close) in QUOTE_LIST.iter() {
        let open_index = s.rfind(open);
        if let Some(open_index) = open_index {
            let index = s.rfind(close);
            match index {
                Some(idx) => {
                    return idx < open_index;
                }
                None => return true,
            }
        }
    }
    false
}

fn check_is_end_quote(segs: &[&str], pos: usize) -> bool {
    let d = segs[pos];
    QUOTE_LIST.iter().any(|(_, close)| d == *close)
}

fn check_is_end_quote_or_symbol(segs: &[&str], pos: usize) -> bool {
    let d = segs[pos];
    QUOTE_LIST.iter().any(|(_, close)| d == *close) || BREAK_SENTENCE_SYMBOLS.contains(&d)
}

fn check_is_start_quote(s: &str) -> bool {
    QUOTE_LIST.iter().any(|(open, _)| s == *open)
}

fn take_trailing_start_quotes(buffer: &mut String) -> String {
    let (collected, trailing) = {
        let mut collected = buffer.graphemes(true).collect::<Vec<_>>();
        let mut trailing = Vec::new();
        while let Some(&last) = collected.last() {
            if check_is_start_quote(last) {
                collected.pop();
                trailing.push(last);
            } else {
                break;
            }
        }
        trailing.reverse();
        (collected.concat(), trailing.concat())
    };
    *buffer = collected;
    trailing
}

#[cfg(feature = "jieba")]
fn check_chinese_word_is_break(segs: &[&str], pos: usize, jieba: &Jieba) -> bool {
    let s = segs.join("");
    let mut breaked = jieba
        .cut(&s, false)
        .iter()
        .map(|s| s.graphemes(true).count())
        .collect::<Vec<_>>();
    let mut sum = 0;
    for i in breaked.iter_mut() {
        sum += *i;
        *i = sum;
    }
    breaked.binary_search(&pos).is_err()
}

#[cfg(not(feature = "jieba"))]
fn check_chinese_word_is_break(_segs: &[&str], _pos: usize, _jieba: &()) -> bool {
    false
}

pub struct FixedFormatter {
    length: usize,
    keep_original: bool,
    /// Whether to break words (ASCII only) at the end of the line.
    break_words: bool,
    /// Whether to insert a full-width space after a line break when a sentence starts with a full-width quotation mark.
    insert_fullwidth_space_at_line_start: bool,
    /// If a line break occurs in the middle of some symbols, bring the sentence to next line
    break_with_sentence: bool,
    #[cfg(feature = "jieba")]
    /// Jieba instance for Chinese word segmentation.
    jieba: Option<Jieba>,
    #[cfg(not(feature = "jieba"))]
    jieba: Option<()>,
    #[allow(unused)]
    typ: Option<ScriptType>,
}

impl FixedFormatter {
    pub fn new(
        length: usize,
        keep_original: bool,
        break_words: bool,
        insert_fullwidth_space_at_line_start: bool,
        break_with_sentence: bool,
        #[cfg(feature = "jieba")] break_chinese_words: bool,
        #[cfg(feature = "jieba")] jieba_dict: Option<String>,
        typ: Option<ScriptType>,
    ) -> Result<Self> {
        #[cfg(feature = "jieba")]
        let jieba = if !break_chinese_words {
            let mut jieba = Jieba::new();
            if let Some(dict) = jieba_dict {
                let file = std::fs::File::open(dict)?;
                let mut reader = std::io::BufReader::new(file);
                jieba.load_dict(&mut reader)?;
            }
            Some(jieba)
        } else {
            None
        };
        Ok(FixedFormatter {
            length,
            keep_original,
            break_words,
            insert_fullwidth_space_at_line_start,
            break_with_sentence,
            #[cfg(feature = "jieba")]
            jieba,
            #[cfg(not(feature = "jieba"))]
            jieba: None,
            typ,
        })
    }

    #[cfg(test)]
    fn builder(length: usize) -> Self {
        FixedFormatter {
            length,
            keep_original: false,
            break_words: true,
            insert_fullwidth_space_at_line_start: false,
            break_with_sentence: false,
            jieba: None,
            typ: None,
        }
    }

    #[cfg(test)]
    fn keep_original(mut self, keep: bool) -> Self {
        self.keep_original = keep;
        self
    }

    #[cfg(test)]
    fn break_words(mut self, break_words: bool) -> Self {
        self.break_words = break_words;
        self
    }

    #[cfg(test)]
    fn insert_fullwidth_space_at_line_start(mut self, insert: bool) -> Self {
        self.insert_fullwidth_space_at_line_start = insert;
        self
    }

    #[cfg(test)]
    fn break_with_sentence(mut self, break_with_sentence: bool) -> Self {
        self.break_with_sentence = break_with_sentence;
        self
    }

    #[cfg(all(feature = "jieba", test))]
    fn break_chinese_words(mut self, break_chinese_words: bool) -> Result<Self> {
        if !break_chinese_words {
            let jieba = Jieba::new();
            self.jieba = Some(jieba);
        } else {
            self.jieba = None;
        }
        Ok(self)
    }

    #[cfg(all(feature = "jieba", test))]
    fn add_dict(mut self, dict: &str, freq: Option<usize>, tag: Option<&str>) -> Self {
        if let Some(ref mut jieba) = self.jieba {
            jieba.add_word(&dict, freq, tag);
        }
        self
    }

    #[cfg(test)]
    #[allow(dead_code)]
    fn typ(mut self, typ: Option<ScriptType>) -> Self {
        self.typ = typ;
        self
    }

    #[cfg(feature = "circus")]
    fn is_circus(&self) -> bool {
        matches!(self.typ, Some(ScriptType::Circus))
    }

    #[cfg(not(feature = "circus"))]
    fn is_circus(&self) -> bool {
        false
    }

    #[cfg(feature = "kirikiri")]
    fn is_scn(&self) -> bool {
        matches!(self.typ, Some(ScriptType::KirikiriScn))
    }

    #[cfg(not(feature = "kirikiri"))]
    fn is_scn(&self) -> bool {
        false
    }

    pub fn format(&self, message: &str) -> String {
        let mut result = String::new();
        let vec: Vec<_> = UnicodeSegmentation::graphemes(message, true).collect();
        let mut current_length = 0;
        let mut is_command = false;
        let mut pre_is_lf = false;
        let mut is_ruby = false;
        let mut is_ruby_rt = false;
        let mut last_command = None;
        let mut i = 0;
        // Store main content of the line (excluding commands and ruby)
        let mut main_content = String::new();
        let mut first_line = true;
        let mut need_insert_fullwidth_space = false;

        while i < vec.len() {
            let grapheme = vec[i];

            if grapheme == "\n" {
                if self.keep_original
                    || (self.is_circus() && last_command.as_ref().is_some_and(|cmd| cmd == "@n"))
                {
                    result.push('\n');
                    current_length = 0;
                    if first_line {
                        if self.insert_fullwidth_space_at_line_start {
                            if check_need_fullwidth_space(&main_content) {
                                need_insert_fullwidth_space = true;
                            }
                        }
                    }
                    if need_insert_fullwidth_space {
                        result.push('　');
                        current_length += 1;
                    }
                    main_content.clear();
                    first_line = false;
                }
                pre_is_lf = true;
                i += 1;
                continue;
            }

            // Check if we need to break and handle word breaking
            if current_length >= self.length {
                if self.break_with_sentence
                    && !is_command
                    && !is_ruby_rt
                    && ((BREAK_SENTENCE_SYMBOLS.contains(&grapheme)
                        && i > 1
                        && BREAK_SENTENCE_SYMBOLS.contains(&vec[i - 1]))
                        || check_is_end_quote_or_symbol(&vec, i))
                {
                    let mut break_pos = None;
                    let segs = result.graphemes(true).collect::<Vec<_>>();
                    let is_end_quote = check_is_end_quote(&vec, i);
                    let mut end = segs.len();
                    for (j, ch) in segs.iter().enumerate().rev() {
                        if BREAK_SENTENCE_SYMBOLS.contains(ch) {
                            end = j;
                            if !is_end_quote {
                                break_pos = Some(j);
                            }
                        }
                        break;
                    }
                    for (j, ch) in segs[..end].iter().enumerate().rev() {
                        if j >= end {
                            continue;
                        }
                        if BREAK_SENTENCE_SYMBOLS.contains(ch) {
                            break_pos = Some(j + 1);
                            break;
                        }
                    }
                    if let Some(pos) = break_pos {
                        let mut head = segs[..pos].concat();
                        let mut remaining = segs[pos..].concat();
                        if self.break_with_sentence {
                            let trailing = take_trailing_start_quotes(&mut head);
                            if !trailing.is_empty() {
                                remaining.insert_str(0, &trailing);
                            }
                        }
                        let remaining = remaining.trim_start().to_string();
                        result = head;
                        result.push('\n');
                        current_length = 0;
                        if first_line {
                            if self.insert_fullwidth_space_at_line_start {
                                if check_need_fullwidth_space(&main_content) {
                                    need_insert_fullwidth_space = true;
                                }
                            }
                            first_line = false;
                        }
                        if need_insert_fullwidth_space {
                            result.push('　');
                            current_length += 1;
                        }
                        result.push_str(&remaining);
                        current_length += remaining.graphemes(true).count();
                        main_content.clear();
                        pre_is_lf = true;
                    } else {
                        let trailing = if self.break_with_sentence {
                            take_trailing_start_quotes(&mut result)
                        } else {
                            String::new()
                        };
                        result.push('\n');
                        current_length = 0;
                        if first_line {
                            if self.insert_fullwidth_space_at_line_start {
                                if check_need_fullwidth_space(&main_content) {
                                    need_insert_fullwidth_space = true;
                                }
                            }
                            first_line = false;
                        }
                        if need_insert_fullwidth_space {
                            result.push('　');
                            current_length += 1;
                        }
                        main_content.clear();
                        if !trailing.is_empty() {
                            result.push_str(&trailing);
                            current_length += trailing.graphemes(true).count();
                            main_content.push_str(&trailing);
                        }
                        pre_is_lf = true;
                    }
                } else if !self.break_words
                    && !is_command
                    && !is_ruby_rt
                    && check_is_ascii_alphanumeric(grapheme)
                {
                    // Look back to find a good break point (space or non-ASCII)
                    let mut break_pos = None;
                    let mut temp_length = current_length;
                    let mut j = result.len();

                    // Find the last space or non-ASCII character position
                    for ch in result.chars().rev() {
                        if ch == ' ' || ch == '　' || !ch.is_ascii() {
                            break_pos = Some(j);
                            break;
                        }
                        if ch.is_ascii_alphabetic() {
                            temp_length -= 1;
                            if temp_length == 0 {
                                break;
                            }
                        }
                        j -= ch.len_utf8();
                    }

                    // If we found a good break point, move content after it to next line
                    if let Some(pos) = break_pos {
                        let mut remaining = result[pos..].to_string();
                        result.truncate(pos);
                        if self.break_with_sentence {
                            let trailing = take_trailing_start_quotes(&mut result);
                            if !trailing.is_empty() {
                                remaining.insert_str(0, &trailing);
                            }
                        }
                        let remaining = remaining.trim_start().to_string();
                        result.push('\n');
                        current_length = 0;
                        if first_line {
                            if self.insert_fullwidth_space_at_line_start {
                                if check_need_fullwidth_space(&main_content) {
                                    need_insert_fullwidth_space = true;
                                }
                            }
                            first_line = false;
                        }
                        if need_insert_fullwidth_space {
                            result.push('　');
                            current_length += 1;
                        }
                        result.push_str(&remaining);
                        current_length += remaining.chars().count();
                        main_content.clear();
                        pre_is_lf = true;
                    } else {
                        let trailing = if self.break_with_sentence {
                            take_trailing_start_quotes(&mut result)
                        } else {
                            String::new()
                        };
                        result.push('\n');
                        current_length = 0;
                        if first_line {
                            if self.insert_fullwidth_space_at_line_start {
                                if check_need_fullwidth_space(&main_content) {
                                    need_insert_fullwidth_space = true;
                                }
                            }
                            first_line = false;
                        }
                        if need_insert_fullwidth_space {
                            result.push('　');
                            current_length += 1;
                        }
                        main_content.clear();
                        if !trailing.is_empty() {
                            result.push_str(&trailing);
                            current_length += trailing.graphemes(true).count();
                            main_content.push_str(&trailing);
                        }
                        pre_is_lf = true;
                    }
                } else if self
                    .jieba
                    .as_ref()
                    .is_some_and(|s| check_chinese_word_is_break(&vec, i, s))
                    && !is_command
                    && !is_ruby_rt
                {
                    #[cfg(feature = "jieba")]
                    {
                        let jieba = self.jieba.as_ref().unwrap();
                        let s = vec.join("");
                        let mut breaked = jieba
                            .cut(&s, false)
                            .iter()
                            .map(|s| s.graphemes(true).count())
                            .collect::<Vec<_>>();
                        let mut sum = 0;
                        for i in breaked.iter_mut() {
                            sum += *i;
                            *i = sum;
                        }
                        let break_pos = match breaked.binary_search(&i) {
                            Ok(pos) => Some(pos),
                            Err(pos) => {
                                if pos == 0 {
                                    None
                                } else {
                                    Some(pos - 1)
                                }
                            }
                        };
                        if let Some(break_pos) = break_pos {
                            let pos = breaked[break_pos];
                            let segs = result.graphemes(true).collect::<Vec<_>>();
                            let remain_count = i - pos;
                            let pos = segs.len() - remain_count;
                            let mut head = segs[..pos].concat();
                            let mut remaining = segs[pos..].concat();
                            if self.break_with_sentence {
                                let trailing = take_trailing_start_quotes(&mut head);
                                if !trailing.is_empty() {
                                    remaining.insert_str(0, &trailing);
                                }
                            }
                            let remaining = remaining.trim_start().to_string();
                            result = head;
                            result.push('\n');
                            current_length = 0;
                            if first_line {
                                if self.insert_fullwidth_space_at_line_start {
                                    if check_need_fullwidth_space(&main_content) {
                                        need_insert_fullwidth_space = true;
                                    }
                                }
                                first_line = false;
                            }
                            if need_insert_fullwidth_space {
                                result.push('　');
                                current_length += 1;
                            }
                            result.push_str(&remaining);
                            current_length += remaining.graphemes(true).count();
                            main_content.clear();
                            pre_is_lf = true;
                        } else {
                            let trailing = if self.break_with_sentence {
                                take_trailing_start_quotes(&mut result)
                            } else {
                                String::new()
                            };
                            result.push('\n');
                            current_length = 0;
                            if first_line {
                                if self.insert_fullwidth_space_at_line_start {
                                    if check_need_fullwidth_space(&main_content) {
                                        need_insert_fullwidth_space = true;
                                    }
                                }
                                first_line = false;
                            }
                            if need_insert_fullwidth_space {
                                result.push('　');
                                current_length += 1;
                            }
                            main_content.clear();
                            if !trailing.is_empty() {
                                result.push_str(&trailing);
                                current_length += trailing.graphemes(true).count();
                                main_content.push_str(&trailing);
                            }
                            pre_is_lf = true;
                        }
                    }
                } else {
                    let trailing = if self.break_with_sentence {
                        take_trailing_start_quotes(&mut result)
                    } else {
                        String::new()
                    };
                    result.push('\n');
                    current_length = 0;
                    if first_line {
                        if self.insert_fullwidth_space_at_line_start {
                            if check_need_fullwidth_space(&main_content) {
                                need_insert_fullwidth_space = true;
                            }
                        }
                        first_line = false;
                    }
                    if need_insert_fullwidth_space {
                        result.push('　');
                        current_length += 1;
                    }
                    main_content.clear();
                    if !trailing.is_empty() {
                        result.push_str(&trailing);
                        current_length += trailing.graphemes(true).count();
                        main_content.push_str(&trailing);
                    }
                    pre_is_lf = true;
                }
            }

            if (current_length == 0 || pre_is_lf) && SPACE_STR_LIST.contains(&grapheme) {
                i += 1;
                continue;
            }

            result.push_str(grapheme);

            #[cfg(feature = "kirikiri")]
            if self.is_scn() {
                if grapheme == "#" {
                    i += 1;
                    while i < vec.len() && vec[i] != ";" {
                        result.push_str(vec[i]);
                        i += 1;
                    }
                    if i < vec.len() {
                        result.push_str(vec[i]);
                        i += 1;
                    }
                    continue;
                }
                if grapheme == "%" && i + 1 < vec.len() && vec[i + 1] == "r" {
                    result.push('r');
                    i += 2;
                    continue;
                }
            }

            if self.is_circus() {
                if grapheme == "@" {
                    is_command = true;
                    last_command = Some(String::new());
                } else if is_command && grapheme.len() != 1
                    || !grapheme
                        .chars()
                        .next()
                        .unwrap_or(' ')
                        .is_ascii_alphanumeric()
                {
                    is_command = false;
                }
                if grapheme == "｛" {
                    is_ruby = true;
                    is_ruby_rt = true;
                } else if is_ruby && grapheme == "／" {
                    is_ruby_rt = false;
                    i += 1;
                    continue;
                } else if is_ruby && grapheme == "｝" {
                    is_ruby = false;
                    i += 1;
                    continue;
                }
            }

            if self.is_scn() {
                if grapheme == "%" {
                    is_command = true;
                } else if is_command && grapheme == ";" {
                    is_command = false;
                    i += 1;
                    continue;
                }
                if grapheme == "[" {
                    is_ruby = true;
                    is_ruby_rt = true;
                    i += 1;
                    continue;
                } else if is_ruby && grapheme == "]" {
                    is_ruby = false;
                    is_ruby_rt = false;
                    i += 1;
                    continue;
                }
            }

            if is_command {
                if let Some(ref mut cmd) = last_command {
                    cmd.push_str(grapheme);
                }
            }

            if !is_command && !is_ruby_rt {
                current_length += 1;
                main_content.push_str(grapheme);
            }

            pre_is_lf = false;
            i += 1;
        }

        result
    }
}

#[test]
fn test_format() {
    let formatter = FixedFormatter::builder(10);
    let message = "This is a test message.\nThis is another line.";
    let formatted_message = formatter.format(message);
    assert_eq!(
        formatted_message,
        "This is a \ntest messa\nge.This is\nanother li\nne."
    );
    assert_eq!(formatter.format("● This is a test."), "● This is \na test.");
    assert_eq!(
        formatter.format("● This is 　a test."),
        "● This is \na test."
    );
    let fommater2 = FixedFormatter::builder(10).keep_original(true);
    assert_eq!(
        fommater2.format("● Th\n is is a te st."),
        "● Th\nis is a te\nst."
    );

    // Test break_words = false
    let no_break_formatter = FixedFormatter::builder(10).break_words(false);
    assert_eq!(
        no_break_formatter.format("Example text."),
        "Example \ntext."
    );

    let no_break_formatter2 = FixedFormatter::builder(6).break_words(false);
    assert_eq!(
        no_break_formatter2.format("Example text."),
        "Exampl\ne text\n."
    );

    let no_break_formatter3 = FixedFormatter::builder(7).break_words(false);
    assert_eq!(
        no_break_formatter3.format("Example text."),
        "Example\ntext."
    );

    let real_world_no_break_formatter = FixedFormatter::builder(32).break_words(false);
    assert_eq!(
        real_world_no_break_formatter.format("○咕噜咕噜（Temporary Magnetic Pattern Linkage）"),
        "○咕噜咕噜（Temporary Magnetic Pattern\nLinkage）"
    );

    let formatter3 = FixedFormatter::builder(10)
        .break_words(false)
        .insert_fullwidth_space_at_line_start(true);
    assert_eq!(
        formatter3.format("「This is a test."),
        "「This is a\n\u{3000}test."
    );

    assert_eq!(
        formatter3.format("（This） is a test."),
        "（This） is \na test."
    );

    assert_eq!(
        formatter3.format("（long text test here, test 1234"),
        "（long text\n\u{3000}test here\n\u{3000}, test \n\u{3000}1234"
    );

    assert_eq!(
        formatter3.format("（This） 「is a test."),
        "（This） 「is\n\u{3000}a test."
    );

    let formatter4 = FixedFormatter::builder(10)
        .break_words(false)
        .break_with_sentence(true);
    assert_eq!(
        formatter4.format("『打断测，测试一下……』"),
        "『打断测，\n测试一下……』"
    );

    assert_eq!(
        formatter4.format("『打断测，测试一下。』"),
        "『打断测，\n测试一下。』"
    );

    assert_eq!(
        formatter4.format("『打断是测试一下哦……』"),
        "『打断是测试一下哦\n……』"
    );

    assert_eq!(
        formatter4.format("『打断测是测试一下。』"),
        "『打断测是测试一下。\n』"
    );

    assert_eq!(
        formatter4.format("『打断测试，测试一下。』"),
        "『打断测试，\n测试一下。』"
    );

    assert_eq!(
        formatter4.format("这打断测试，测试一下。"),
        "这打断测试，\n测试一下。"
    );

    assert_eq!(
        formatter4.format("这打断测试哦测试一下。。"),
        "这打断测试哦测试一下\n。。"
    );

    let formatter5 = FixedFormatter::builder(10)
        .break_words(false)
        .insert_fullwidth_space_at_line_start(true)
        .break_with_sentence(true);
    assert_eq!(
        formatter5.format("「一二三四『whatthe』"),
        "「一二三四\n\u{3000}『whatthe』"
    );

    let real_break_formatter = FixedFormatter::builder(27)
        .break_words(false)
        .break_with_sentence(true);
    assert_eq!(
        real_break_formatter.format("「他们就是想和阳见待在一个社团，在里面表现表现、耍耍帅，这样不就和她套上近乎了嘛！算盘珠子都打到我脸上了……」"),
        "「他们就是想和阳见待在一个社团，\n在里面表现表现、耍耍帅，这样不就和她套上近乎了嘛！算盘\n珠子都打到我脸上了……」"
    );

    assert_eq!(
        real_break_formatter
            .format("「在英山的话或许可以看看『moon river』『Lavir』或是『Patisserie Yuzuru』」"),
        "「在英山的话或许可以看看『moon river』\n『Lavir』或是『Patisserie Yuzuru\n』」"
    );

    #[cfg(feature = "circus")]
    {
        let circus_formatter = FixedFormatter::builder(10).typ(Some(ScriptType::Circus));
        assert_eq!(
            circus_formatter.format("● @cmd1@cmd2@cmd3中文字数是一\n　二三　四五六七八九十"),
            "● @cmd1@cmd2@cmd3中文字数是一二三\n四五六七八九十"
        );
        assert_eq!(
            circus_formatter
                .format("● @cmd1@cmd2@cmd3｛rubyText／中文｝字数是一\n　二三　四五六七八九十"),
            "● @cmd1@cmd2@cmd3｛rubyText／中文｝字数是一二三\n四五六七八九十"
        );
        let circus_formatter2 = FixedFormatter::builder(32).typ(Some(ScriptType::Circus));
        assert_eq!(
            circus_formatter2.format("@re1@re2@b1@t30@w1「当然现在我很幸福哦？\n　因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦？\n　因为有敦也君在身边」"),
            "@re1@re2@b1@t30@w1「当然现在我很幸福哦？因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦？因为有敦也君在身边」"
        );
    }

    #[cfg(feature = "kirikiri")]
    {
        let scn_formatter = FixedFormatter::builder(3)
            .break_words(false)
            .typ(Some(ScriptType::KirikiriScn));
        assert_eq!(
            scn_formatter.format("%test;[ruby]测[test]试打断。"),
            "%test;[ruby]测[test]试打\n断。"
        );
        assert_eq!(
            scn_formatter.format("%f$ハート$;#00ffadd6;♥%r打断测试"),
            "%f$ハート$;#00ffadd6;♥%r打断\n测试"
        )
    }
    #[cfg(feature = "jieba")]
    {
        let jieba_formatter = FixedFormatter::builder(8)
            .break_words(false)
            .break_chinese_words(false)
            .unwrap();
        assert_eq!(
            jieba_formatter.format("测试分词，我们中出了一个叛徒。"),
            "测试分词，我们中\n出了一个叛徒。"
        );
        let jieba_formatter2 = FixedFormatter::builder(8)
            .break_words(false)
            .break_chinese_words(false)
            .unwrap()
            .add_dict("中出", Some(114514), None);
        assert_eq!(
            jieba_formatter2
                .jieba
                .as_ref()
                .is_some_and(|s| s.has_word("中出")),
            true
        );
        assert_eq!(
            jieba_formatter2.format("测试分词，我们中出了一个叛徒。"),
            "测试分词，我们\n中出了一个叛徒。"
        );
    }
}