Files
msg-tool/src/format/fixed.rs

344 lines
12 KiB
Rust

use crate::types::*;
use unicode_segmentation::UnicodeSegmentation;
const SPACE_STR_LIST: [&str; 2] = [" ", " "];
const QUOTE_LIST: [(&str, &str); 4] = [("", ""), ("", ""), ("", ""), ("", "")];
fn check_is_ascii_alphanumeric(s: &str) -> bool {
for c in s.chars() {
if !c.is_ascii_alphanumeric() {
return false;
}
}
true
}
fn check_need_fullwidth_space(s: &str) -> bool {
let has_start_quote = QUOTE_LIST.iter().any(|(open, _)| s.starts_with(open));
if !has_start_quote {
return false;
}
for (open, close) in QUOTE_LIST.iter() {
let open_index = s.rfind(open);
if let Some(open_index) = open_index {
let index = s.rfind(close);
match index {
Some(idx) => {
return idx < open_index;
}
None => return true,
}
}
}
false
}
pub struct FixedFormatter {
length: usize,
keep_original: bool,
/// Whether to break words (ASCII only) at the end of the line.
break_words: bool,
/// Whether to insert a full-width space after a line break when a sentence starts with a full-width quotation mark.
insert_fullwidth_space_at_line_start: bool,
#[allow(unused)]
typ: Option<ScriptType>,
}
impl FixedFormatter {
pub fn new(
length: usize,
keep_original: bool,
break_words: bool,
insert_fullwidth_space_at_line_start: bool,
typ: Option<ScriptType>,
) -> Self {
FixedFormatter {
length,
keep_original,
break_words,
insert_fullwidth_space_at_line_start,
typ,
}
}
#[cfg(feature = "circus")]
fn is_circus(&self) -> bool {
matches!(self.typ, Some(ScriptType::Circus))
}
#[cfg(not(feature = "circus"))]
fn is_circus(&self) -> bool {
false
}
pub fn format(&self, message: &str) -> String {
let mut result = String::new();
let vec: Vec<_> = UnicodeSegmentation::graphemes(message, true).collect();
let mut current_length = 0;
let mut is_command = false;
let mut pre_is_lf = false;
let mut is_ruby = false;
let mut is_ruby_rt = false;
let mut last_command = None;
let mut i = 0;
// Store main content of the line (excluding commands and ruby)
let mut main_content = String::new();
let mut first_line = true;
let mut need_insert_fullwidth_space = false;
while i < vec.len() {
let grapheme = vec[i];
if grapheme == "\n" {
if self.keep_original
|| (self.is_circus() && last_command.as_ref().is_some_and(|cmd| cmd == "@n"))
{
result.push('\n');
current_length = 0;
if first_line {
if self.insert_fullwidth_space_at_line_start {
if check_need_fullwidth_space(&main_content) {
need_insert_fullwidth_space = true;
}
}
}
if need_insert_fullwidth_space {
result.push(' ');
current_length += 1;
}
main_content.clear();
first_line = false;
}
pre_is_lf = true;
i += 1;
continue;
}
// Check if we need to break and handle word breaking
if current_length >= self.length {
if !self.break_words
&& !is_command
&& !is_ruby_rt
&& check_is_ascii_alphanumeric(grapheme)
{
// Look back to find a good break point (space or non-ASCII)
let mut break_pos = None;
let mut temp_length = current_length;
let mut j = result.len();
// Find the last space or non-ASCII character position
for ch in result.chars().rev() {
if ch == ' ' || ch == ' ' || !ch.is_ascii() {
break_pos = Some(j);
break;
}
if ch.is_ascii_alphabetic() {
temp_length -= 1;
if temp_length == 0 {
break;
}
}
j -= ch.len_utf8();
}
// If we found a good break point, move content after it to next line
if let Some(pos) = break_pos {
let remaining = result[pos..].trim_start().to_string();
result.truncate(pos);
result.push('\n');
current_length = 0;
if first_line {
if self.insert_fullwidth_space_at_line_start {
if check_need_fullwidth_space(&main_content) {
need_insert_fullwidth_space = true;
}
}
first_line = false;
}
if need_insert_fullwidth_space {
result.push(' ');
current_length += 1;
}
result.push_str(&remaining);
current_length += remaining.chars().count();
main_content.clear();
pre_is_lf = true;
} else {
result.push('\n');
current_length = 0;
if first_line {
if self.insert_fullwidth_space_at_line_start {
if check_need_fullwidth_space(&main_content) {
need_insert_fullwidth_space = true;
}
}
first_line = false;
}
if need_insert_fullwidth_space {
result.push(' ');
current_length += 1;
}
main_content.clear();
pre_is_lf = true;
}
} else {
result.push('\n');
current_length = 0;
if first_line {
if self.insert_fullwidth_space_at_line_start {
if check_need_fullwidth_space(&main_content) {
need_insert_fullwidth_space = true;
}
}
first_line = false;
}
if need_insert_fullwidth_space {
result.push(' ');
current_length += 1;
}
main_content.clear();
pre_is_lf = true;
}
}
if (current_length == 0 || pre_is_lf) && SPACE_STR_LIST.contains(&grapheme) {
i += 1;
continue;
}
result.push_str(grapheme);
if self.is_circus() {
if grapheme == "@" {
is_command = true;
last_command = Some(String::new());
} else if is_command && grapheme.len() != 1
|| !grapheme
.chars()
.next()
.unwrap_or(' ')
.is_ascii_alphanumeric()
{
is_command = false;
}
if grapheme == "" {
is_ruby = true;
is_ruby_rt = true;
} else if is_ruby && grapheme == "" {
is_ruby_rt = false;
i += 1;
continue;
} else if is_ruby && grapheme == "" {
is_ruby = false;
i += 1;
continue;
}
}
if is_command {
if let Some(ref mut cmd) = last_command {
cmd.push_str(grapheme);
}
}
if !is_command && !is_ruby_rt {
current_length += 1;
main_content.push_str(grapheme);
}
pre_is_lf = false;
i += 1;
}
return result;
}
}
#[test]
fn test_format() {
let formatter = FixedFormatter::new(10, false, true, false, None);
let message = "This is a test message.\nThis is another line.";
let formatted_message = formatter.format(message);
assert_eq!(
formatted_message,
"This is a \ntest messa\nge.This is\nanother li\nne."
);
assert_eq!(formatter.format("● This is a test."), "● This is \na test.");
assert_eq!(
formatter.format("● This is  a test."),
"● This is \na test."
);
let fommater2 = FixedFormatter::new(10, true, true, false, None);
assert_eq!(
fommater2.format("● Th\n is is a te st."),
"● Th\nis is a te\nst."
);
// Test break_words = false
let no_break_formatter = FixedFormatter::new(10, false, false, false, None);
assert_eq!(
no_break_formatter.format("Example text."),
"Example \ntext."
);
let no_break_formatter2 = FixedFormatter::new(6, false, false, false, None);
assert_eq!(
no_break_formatter2.format("Example text."),
"Exampl\ne text\n."
);
let no_break_formatter3 = FixedFormatter::new(7, false, false, false, None);
assert_eq!(
no_break_formatter3.format("Example text."),
"Example\ntext."
);
let real_world_no_break_formatter = FixedFormatter::new(32, false, false, false, None);
assert_eq!(
real_world_no_break_formatter.format("○咕噜咕噜(Temporary Magnetic Pattern Linkage)"),
"○咕噜咕噜(Temporary Magnetic Pattern\nLinkage)"
);
let formatter3 = FixedFormatter::new(10, false, false, true, None);
assert_eq!(
formatter3.format("「This is a test."),
"「This is a\n\u{3000}test."
);
assert_eq!(
formatter3.format("(This) is a test."),
"(This) is \na test."
);
assert_eq!(
formatter3.format("(long text test here, test 1234"),
"(long text\n\u{3000}test here\n\u{3000}, test \n\u{3000}1234"
);
assert_eq!(
formatter3.format("(This) 「is a test."),
"(This) 「is\n\u{3000}a test."
);
#[cfg(feature = "circus")]
{
let circus_formatter =
FixedFormatter::new(10, false, true, false, Some(ScriptType::Circus));
assert_eq!(
circus_formatter.format("● @cmd1@cmd2@cmd3中文字数是一\n 二三 四五六七八九十"),
"● @cmd1@cmd2@cmd3中文字数是一二三\n四五六七八九十"
);
assert_eq!(
circus_formatter
.format("● @cmd1@cmd2@cmd3{rubyText/中文}字数是一\n 二三 四五六七八九十"),
"● @cmd1@cmd2@cmd3{rubyText/中文}字数是一二三\n四五六七八九十"
);
let circus_formatter2 =
FixedFormatter::new(32, false, true, false, Some(ScriptType::Circus));
assert_eq!(
circus_formatter2.format("@re1@re2@b1@t30@w1「当然现在我很幸福哦?\n 因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?\n 因为有敦也君在身边」"),
"@re1@re2@b1@t30@w1「当然现在我很幸福哦?因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?因为有敦也君在身边」"
);
}
}