Add do not break chinese word support for fixed formatter

This commit is contained in:
2025-09-15 11:40:21 +08:00
parent 1567c16273
commit 56a79106c4
9 changed files with 426 additions and 20 deletions

214
Cargo.lock generated
View File

@@ -14,6 +14,24 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "adler32"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234"
[[package]]
name = "ahash"
version = "0.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
dependencies = [
"cfg-if",
"once_cell",
"version_check",
"zerocopy",
]
[[package]]
name = "aho-corasick"
version = "1.1.3"
@@ -23,6 +41,12 @@ dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "anstream"
version = "0.6.20"
@@ -187,6 +211,15 @@ dependencies = [
"shlex",
]
[[package]]
name = "cedarwood"
version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d910bedd62c24733263d0bed247460853c9d22e8956bd4cd964302095e04e90"
dependencies = [
"smallvec",
]
[[package]]
name = "cfg-if"
version = "1.0.3"
@@ -326,6 +359,15 @@ dependencies = [
"tiny-keccak",
]
[[package]]
name = "core2"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505"
dependencies = [
"memchr",
]
[[package]]
name = "cpufeatures"
version = "0.2.17"
@@ -426,6 +468,12 @@ dependencies = [
"windows-sys 0.61.0",
]
[[package]]
name = "dary_heap"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "04d2cd9c18b9f454ed67da600630b021a8a80bf33f8c95896ab33aaf1c26b728"
[[package]]
name = "dataview"
version = "1.0.1"
@@ -620,6 +668,12 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "fastrand"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
[[package]]
name = "fdeflate"
version = "0.3.7"
@@ -754,6 +808,10 @@ name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]]
name = "hashbrown"
@@ -922,6 +980,43 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "include-flate"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e01b7cb6ca682a621e7cda1c358c9724b53a7b4409be9be1dd443b7f3a26f998"
dependencies = [
"include-flate-codegen",
"include-flate-compress",
"libflate",
"zstd",
]
[[package]]
name = "include-flate-codegen"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f49bf5274aebe468d6e6eba14a977eaf1efa481dc173f361020de70c1c48050"
dependencies = [
"include-flate-compress",
"libflate",
"proc-macro-error",
"proc-macro2",
"quote",
"syn 2.0.106",
"zstd",
]
[[package]]
name = "include-flate-compress"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eae6a40e716bcd5931f5dbb79cd921512a4f647e2e9413fded3171fca3824dbc"
dependencies = [
"libflate",
"zstd",
]
[[package]]
name = "indexmap"
version = "1.9.3"
@@ -975,6 +1070,29 @@ version = "1.0.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
[[package]]
name = "jieba-macros"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "348294e44ee7e3c42685da656490f8febc7359632544019621588902216da95c"
dependencies = [
"phf_codegen 0.13.1",
]
[[package]]
name = "jieba-rs"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "766bd7012aa5ba49411ebdf4e93bddd59b182d2918e085d58dec5bb9b54b7105"
dependencies = [
"cedarwood",
"include-flate",
"jieba-macros",
"phf 0.13.1",
"regex",
"rustc-hash",
]
[[package]]
name = "jobserver"
version = "0.1.34"
@@ -1013,6 +1131,30 @@ dependencies = [
"libc",
]
[[package]]
name = "libflate"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45d9dfdc14ea4ef0900c1cddbc8dcd553fbaacd8a4a282cf4018ae9dd04fb21e"
dependencies = [
"adler32",
"core2",
"crc32fast",
"dary_heap",
"libflate_lz77",
]
[[package]]
name = "libflate_lz77"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e0d73b369f386f1c44abd9c570d5318f55ccde816ff4b562fa452e5182863d"
dependencies = [
"core2",
"hashbrown 0.14.5",
"rle-decode-fast",
]
[[package]]
name = "libtlg-rs"
version = "0.2.2"
@@ -1169,6 +1311,7 @@ dependencies = [
"fancy-regex",
"flate2",
"int-enum",
"jieba-rs",
"json",
"lazy_static",
"libflac-sys",
@@ -1195,7 +1338,7 @@ dependencies = [
"url",
"utf16string",
"webp",
"windows-sys 0.59.0",
"windows-sys 0.61.0",
"xml5ever",
"zstd",
]
@@ -1358,7 +1501,17 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078"
dependencies = [
"phf_shared",
"phf_shared 0.11.3",
]
[[package]]
name = "phf"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
dependencies = [
"phf_shared 0.13.1",
"serde",
]
[[package]]
@@ -1367,8 +1520,18 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a"
dependencies = [
"phf_generator",
"phf_shared",
"phf_generator 0.11.3",
"phf_shared 0.11.3",
]
[[package]]
name = "phf_codegen"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1"
dependencies = [
"phf_generator 0.13.1",
"phf_shared 0.13.1",
]
[[package]]
@@ -1377,10 +1540,20 @@ version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d"
dependencies = [
"phf_shared",
"phf_shared 0.11.3",
"rand 0.8.5",
]
[[package]]
name = "phf_generator"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
dependencies = [
"fastrand",
"phf_shared 0.13.1",
]
[[package]]
name = "phf_shared"
version = "0.11.3"
@@ -1390,6 +1563,15 @@ dependencies = [
"siphasher",
]
[[package]]
name = "phf_shared"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266"
dependencies = [
"siphasher",
]
[[package]]
name = "pkg-config"
version = "0.3.32"
@@ -1584,6 +1766,12 @@ dependencies = [
"bytemuck",
]
[[package]]
name = "rle-decode-fast"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422"
[[package]]
name = "rust-ini"
version = "0.21.3"
@@ -1594,6 +1782,12 @@ dependencies = [
"ordered-multimap",
]
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "ryu"
version = "1.0.20"
@@ -1737,7 +1931,7 @@ checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f"
dependencies = [
"new_debug_unreachable",
"parking_lot",
"phf_shared",
"phf_shared 0.11.3",
"precomputed-hash",
"serde",
]
@@ -1748,8 +1942,8 @@ version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0"
dependencies = [
"phf_generator",
"phf_shared",
"phf_generator 0.11.3",
"phf_shared 0.11.3",
"proc-macro2",
"quote",
]
@@ -2062,8 +2256,8 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "57ffde1dc01240bdf9992e3205668b235e59421fd085e8a317ed98da0178d414"
dependencies = [
"phf",
"phf_codegen",
"phf 0.11.3",
"phf_codegen 0.11.3",
"string_cache",
"string_cache_codegen",
]

View File

@@ -18,6 +18,7 @@ encoding = "0.2"
fancy-regex = { version = "0.16", optional = true }
flate2 = { version = "1.1", optional = true }
int-enum = { version = "1.2", optional = true }
jieba-rs = { version = "0.8", optional = true }
json = { version = "0.12", optional = true }
jpegxl-sys = { package = "msg-tool-jpegxl-sys", version = "0.11", optional = true, features = ["vendored"] }
lazy_static = "1.5.0"
@@ -47,7 +48,7 @@ xml5ever = { version = "0.35", optional = true }
zstd = { version = "0.13", optional = true }
[features]
default = ["all-fmt", "image-jpg", "image-jxl", "image-webp", "audio-flac"]
default = ["all-fmt", "image-jpg", "image-jxl", "image-webp", "audio-flac", "jieba"]
all-fmt = ["all-script", "all-img", "all-arc", "all-audio"]
all-script = ["artemis", "artemis-panmimisoft", "bgi", "cat-system", "circus", "entis-gls", "escude", "ex-hibit", "favorite", "hexen-haus", "kirikiri", "silky", "softpal", "will-plus", "yaneurao", "yaneurao-itufuru"]
all-img = ["bgi-img", "cat-system-img", "circus-img", "emote-img", "kirikiri-img"]
@@ -89,6 +90,7 @@ image-webp = ["webp"]
lossless-audio = ["utils-pcm"]
audio-flac = ["libflac-sys", "utils-pcm"]
unstable = ["msg_tool_macro/unstable"]
jieba = ["jieba-rs"]
# utils feature
utils-bit-stream = []
utils-blowfish = ["byteorder"]

View File

@@ -460,6 +460,10 @@ pub struct Arg {
/// Workers count for encode images in parallel. Default is half of CPU cores.
/// Set this to 1 to disable parallel encoding. 0 means same as 1.
pub image_workers: usize,
#[cfg(feature = "jieba")]
#[arg(long, global = true)]
/// Path to custom jieba dictionary
pub jieba_dict: Option<String>,
#[command(subcommand)]
/// Command
pub command: Command,
@@ -516,6 +520,10 @@ pub struct ImportArgs {
#[arg(long, action = ArgAction::SetTrue)]
/// If a line break occurs in the middle of some symbols, bring the sentence to next line (for fixed format)
pub patched_break_with_sentence: bool,
#[cfg(feature = "jieba")]
#[arg(long, action = ArgAction::SetTrue)]
/// Whether to disable break Chinese words at the end of the line.
pub patched_no_break_chinese_words: bool,
#[arg(long)]
/// Name table file
pub name_csv: Option<String>,

View File

@@ -1,9 +1,12 @@
use crate::types::*;
use anyhow::Result;
#[cfg(feature = "jieba")]
use jieba_rs::Jieba;
use unicode_segmentation::UnicodeSegmentation;
const SPACE_STR_LIST: [&str; 2] = [" ", " "];
const QUOTE_LIST: [(&str, &str); 4] = [("", ""), ("", ""), ("", ""), ("", "")];
const BREAK_SENTENCE_SYMBOLS: [&str; 5] = ["", "", "", "", ""];
const BREAK_SENTENCE_SYMBOLS: [&str; 6] = ["", "", "", "", "", ""];
fn check_is_ascii_alphanumeric(s: &str) -> bool {
for c in s.chars() {
@@ -45,6 +48,27 @@ fn check_is_end_quote(segs: &[&str], pos: usize) -> bool {
true
}
#[cfg(feature = "jieba")]
fn check_chinese_word_is_break(segs: &[&str], pos: usize, jieba: &Jieba) -> bool {
let s = segs.join("");
let mut breaked = jieba
.cut(&s, false)
.iter()
.map(|s| s.graphemes(true).count())
.collect::<Vec<_>>();
let mut sum = 0;
for i in breaked.iter_mut() {
sum += *i;
*i = sum;
}
breaked.binary_search(&pos).is_err()
}
#[cfg(not(feature = "jieba"))]
fn check_chinese_word_is_break(_segs: &[&str], _pos: usize, _jieba: &()) -> bool {
false
}
pub struct FixedFormatter {
length: usize,
keep_original: bool,
@@ -54,6 +78,11 @@ pub struct FixedFormatter {
insert_fullwidth_space_at_line_start: bool,
/// If a line break occurs in the middle of some symbols, bring the sentence to next line
break_with_sentence: bool,
#[cfg(feature = "jieba")]
/// Jieba instance for Chinese word segmentation.
jieba: Option<Jieba>,
#[cfg(not(feature = "jieba"))]
jieba: Option<()>,
#[allow(unused)]
typ: Option<ScriptType>,
}
@@ -65,16 +94,34 @@ impl FixedFormatter {
break_words: bool,
insert_fullwidth_space_at_line_start: bool,
break_with_sentence: bool,
#[cfg(feature = "jieba")] break_chinese_words: bool,
#[cfg(feature = "jieba")] jieba_dict: Option<String>,
typ: Option<ScriptType>,
) -> Self {
FixedFormatter {
) -> Result<Self> {
#[cfg(feature = "jieba")]
let jieba = if !break_chinese_words {
let mut jieba = Jieba::new();
if let Some(dict) = jieba_dict {
let file = std::fs::File::open(dict)?;
let mut reader = std::io::BufReader::new(file);
jieba.load_dict(&mut reader)?;
}
Some(jieba)
} else {
None
};
Ok(FixedFormatter {
length,
keep_original,
break_words,
insert_fullwidth_space_at_line_start,
break_with_sentence,
#[cfg(feature = "jieba")]
jieba,
#[cfg(not(feature = "jieba"))]
jieba: None,
typ,
}
})
}
#[cfg(test)]
@@ -85,6 +132,7 @@ impl FixedFormatter {
break_words: true,
insert_fullwidth_space_at_line_start: false,
break_with_sentence: false,
jieba: None,
typ: None,
}
}
@@ -113,7 +161,27 @@ impl FixedFormatter {
self
}
#[cfg(all(feature = "jieba", test))]
fn break_chinese_words(mut self, break_chinese_words: bool) -> Result<Self> {
if !break_chinese_words {
let jieba = Jieba::new();
self.jieba = Some(jieba);
} else {
self.jieba = None;
}
Ok(self)
}
#[cfg(all(feature = "jieba", test))]
fn add_dict(mut self, dict: &str, freq: Option<usize>, tag: Option<&str>) -> Self {
if let Some(ref mut jieba) = self.jieba {
jieba.add_word(&dict, freq, tag);
}
self
}
#[cfg(test)]
#[allow(dead_code)]
fn typ(mut self, typ: Option<ScriptType>) -> Self {
self.typ = typ;
self
@@ -318,6 +386,81 @@ impl FixedFormatter {
main_content.clear();
pre_is_lf = true;
}
} else if self
.jieba
.as_ref()
.is_some_and(|s| check_chinese_word_is_break(&vec, i, s))
&& !is_command
&& !is_ruby_rt
{
#[cfg(feature = "jieba")]
{
let jieba = self.jieba.as_ref().unwrap();
let s = vec.join("");
let mut breaked = jieba
.cut(&s, false)
.iter()
.map(|s| s.graphemes(true).count())
.collect::<Vec<_>>();
let mut sum = 0;
for i in breaked.iter_mut() {
sum += *i;
*i = sum;
}
let break_pos = match breaked.binary_search(&i) {
Ok(pos) => Some(pos),
Err(pos) => {
if pos == 0 {
None
} else {
Some(pos - 1)
}
}
};
if let Some(break_pos) = break_pos {
let pos = breaked[break_pos];
let segs = result.graphemes(true).collect::<Vec<_>>();
let remain_count = i - pos;
let pos = segs.len() - remain_count;
let remaining = segs[pos..].concat().trim_start().to_string();
result = segs[..pos].concat();
result.push('\n');
current_length = 0;
if first_line {
if self.insert_fullwidth_space_at_line_start {
if check_need_fullwidth_space(&main_content) {
need_insert_fullwidth_space = true;
}
}
first_line = false;
}
if need_insert_fullwidth_space {
result.push(' ');
current_length += 1;
}
result.push_str(&remaining);
current_length += remaining.graphemes(true).count();
main_content.clear();
pre_is_lf = true;
} else {
result.push('\n');
current_length = 0;
if first_line {
if self.insert_fullwidth_space_at_line_start {
if check_need_fullwidth_space(&main_content) {
need_insert_fullwidth_space = true;
}
}
first_line = false;
}
if need_insert_fullwidth_space {
result.push(' ');
current_length += 1;
}
main_content.clear();
pre_is_lf = true;
}
}
} else {
result.push('\n');
current_length = 0;
@@ -408,7 +551,7 @@ impl FixedFormatter {
i += 1;
}
return result;
result
}
}
@@ -532,4 +675,31 @@ fn test_format() {
"%test;[ruby]测[test]试打\n断。"
);
}
#[cfg(feature = "jieba")]
{
let jieba_formatter = FixedFormatter::builder(8)
.break_words(false)
.break_chinese_words(false)
.unwrap();
assert_eq!(
jieba_formatter.format("测试分词,我们中出了一个叛徒。"),
"测试分词,我们中\n出了一个叛徒。"
);
let jieba_formatter2 = FixedFormatter::builder(8)
.break_words(false)
.break_chinese_words(false)
.unwrap()
.add_dict("中出", Some(114514), None);
assert_eq!(
jieba_formatter2
.jieba
.as_ref()
.is_some_and(|s| s.has_word("中出")),
true
);
assert_eq!(
jieba_formatter2.format("测试分词,我们中出了一个叛徒。"),
"测试分词,我们\n中出了一个叛徒。"
);
}
}

View File

@@ -2,9 +2,10 @@
mod fixed;
use crate::types::*;
use anyhow::Result;
/// Formats messages with the given options.
pub fn fmt_message(mes: &mut Vec<Message>, opt: FormatOptions, typ: ScriptType) {
pub fn fmt_message(mes: &mut Vec<Message>, opt: FormatOptions, typ: ScriptType) -> Result<()> {
match opt {
FormatOptions::Fixed {
length,
@@ -12,6 +13,10 @@ pub fn fmt_message(mes: &mut Vec<Message>, opt: FormatOptions, typ: ScriptType)
break_words,
insert_fullwidth_space_at_line_start,
break_with_sentence,
#[cfg(feature = "jieba")]
break_chinese_words,
#[cfg(feature = "jieba")]
jieba_dict,
} => {
let formatter = fixed::FixedFormatter::new(
length,
@@ -19,12 +24,17 @@ pub fn fmt_message(mes: &mut Vec<Message>, opt: FormatOptions, typ: ScriptType)
break_words,
insert_fullwidth_space_at_line_start,
break_with_sentence,
#[cfg(feature = "jieba")]
break_chinese_words,
#[cfg(feature = "jieba")]
jieba_dict,
Some(typ),
);
)?;
for message in mes.iter_mut() {
message.message = formatter.format(&message.message);
}
}
FormatOptions::None => {}
}
Ok(())
}

View File

@@ -1373,6 +1373,10 @@ pub fn import_script(
insert_fullwidth_space_at_line_start: imp_cfg
.patched_insert_fullwidth_space_at_line_start,
break_with_sentence: imp_cfg.patched_break_with_sentence,
#[cfg(feature = "jieba")]
break_chinese_words: !imp_cfg.patched_no_break_chinese_words,
#[cfg(feature = "jieba")]
jieba_dict: arg.jieba_dict.clone(),
},
types::FormatType::None => types::FormatOptions::None,
},
@@ -1384,7 +1388,7 @@ pub fn import_script(
}
None => {}
}
format::fmt_message(&mut mes, fmt, *builder.script_type());
format::fmt_message(&mut mes, fmt, *builder.script_type())?;
if let Err(e) = script_file.import_messages(
mes,
writer,
@@ -1592,6 +1596,10 @@ pub fn import_script(
insert_fullwidth_space_at_line_start: imp_cfg
.patched_insert_fullwidth_space_at_line_start,
break_with_sentence: imp_cfg.patched_break_with_sentence,
#[cfg(feature = "jieba")]
break_chinese_words: !imp_cfg.patched_no_break_chinese_words,
#[cfg(feature = "jieba")]
jieba_dict: arg.jieba_dict.clone(),
},
types::FormatType::None => types::FormatOptions::None,
},
@@ -1603,7 +1611,7 @@ pub fn import_script(
}
None => {}
}
format::fmt_message(&mut mes, fmt, *builder.script_type());
format::fmt_message(&mut mes, fmt, *builder.script_type())?;
script.import_messages_filename(mes, &patched_f, encoding, repl)?;
Ok(types::ScriptResult::Ok)

View File

@@ -170,6 +170,10 @@ impl Script for BGIScript {
break_words: false,
insert_fullwidth_space_at_line_start: true,
break_with_sentence: true,
#[cfg(feature = "jieba")]
break_chinese_words: true,
#[cfg(feature = "jieba")]
jieba_dict: None,
}
}
}

View File

@@ -220,6 +220,10 @@ impl Script for CircusMesScript {
break_words: false,
insert_fullwidth_space_at_line_start: true,
break_with_sentence: true,
#[cfg(feature = "jieba")]
break_chinese_words: true,
#[cfg(feature = "jieba")]
jieba_dict: None,
}
}

View File

@@ -664,6 +664,12 @@ pub enum FormatOptions {
insert_fullwidth_space_at_line_start: bool,
/// If a line break occurs in the middle of some symbols, bring the sentence to next line
break_with_sentence: bool,
#[cfg(feature = "jieba")]
/// Whether to break Chinese words at the end of the line.
break_chinese_words: bool,
#[cfg(feature = "jieba")]
/// Path to custom jieba dictionary
jieba_dict: Option<String>,
},
/// Do not wrap line
None,