From f738bbf25b5d42a49beb46585172ff38f20c3c88 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Wed, 23 Jul 2025 21:03:21 +0800 Subject: [PATCH] Fix lua parse/dump bug --- src/args.rs | 5 ++ src/main.rs | 2 + src/scripts/artemis/ast/dump.rs | 115 +++++++++++++++++++++++------- src/scripts/artemis/ast/mod.rs | 29 ++++++-- src/scripts/artemis/ast/parser.rs | 54 ++++++++++++-- src/types.rs | 2 + src/utils/escape.rs | 65 +++++++++++++++++ 7 files changed, 237 insertions(+), 35 deletions(-) diff --git a/src/args.rs b/src/args.rs index 3036ece..a8a66b0 100644 --- a/src/args.rs +++ b/src/args.rs @@ -174,6 +174,11 @@ pub struct Arg { #[arg(long, global = true, default_value_t = 100)] /// Max line width in Artemis script, used to format Artemis script. pub artemis_max_line_width: usize, + #[cfg(feature = "artemis")] + #[arg(long, global = true)] + /// Specify the language of Artemis AST script. + /// If not specified, the first language will be used. + pub artemis_ast_lang: Option, #[command(subcommand)] /// Command pub command: Command, diff --git a/src/main.rs b/src/main.rs index dd8473e..fea2f67 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1440,6 +1440,8 @@ fn main() { artemis_no_indent: arg.artemis_no_indent, #[cfg(feature = "artemis")] artemis_max_line_width: arg.artemis_max_line_width, + #[cfg(feature = "artemis")] + artemis_ast_lang: arg.artemis_ast_lang.clone(), }; match &arg.command { args::Command::Export { input, output } => { diff --git a/src/scripts/artemis/ast/dump.rs b/src/scripts/artemis/ast/dump.rs index 83e4f00..1d83eb8 100644 --- a/src/scripts/artemis/ast/dump.rs +++ b/src/scripts/artemis/ast/dump.rs @@ -1,4 +1,5 @@ use super::types::*; +use crate::utils::escape::*; use std::io::Write; struct LenChecker { @@ -24,9 +25,21 @@ impl LenChecker { } } Value::Int(i) => self.current_len += format!("{}", i).len(), - Value::Str(s) => self.current_len += s.len() + 2, + Value::Str(s) => { + self.current_len += s.len() + + if lua_str_contains_need_escape(s) { + 4 + } else { + 2 + } + } Value::KeyVal((k, v)) => { - self.current_len += k.as_bytes().len() + 3; + self.current_len += k.as_bytes().len() + + if lua_key_contains_need_escape(k) { + 7 + } else { + 3 + }; if !self.check(v) { return false; } @@ -96,19 +109,35 @@ impl<'a> Dumper<'a> { self.writer.write(b"astver=")?; self.dump_f64(&ast.astver)?; if let Some(astname) = &ast.astname { - self.writer.write(b"\nastname = \"")?; - self.writer.write(astname.as_bytes())?; + self.writer.write(b"\nastname = ")?; + if lua_str_contains_need_escape(astname) { + self.writer.write(b"[[")?; + self.writer.write(astname.as_bytes())?; + self.writer.write(b"]]")?; + } else { + self.writer.write(b"\"")?; + self.writer.write(astname.as_bytes())?; + self.writer.write(b"\"")?; + } }; - self.writer.write(b"\"\nast=")?; + self.writer.write(b"\nast=")?; self.dump_value(&ast.ast)?; } else { self.writer.write(b"astver = ")?; self.dump_f64(&ast.astver)?; if let Some(astname) = &ast.astname { - self.writer.write(b"\nastname = \"")?; - self.writer.write(astname.as_bytes())?; + self.writer.write(b"\nastname = ")?; + if lua_str_contains_need_escape(&astname) { + self.writer.write(b"[[")?; + self.writer.write(astname.as_bytes())?; + self.writer.write(b"]]")?; + } else { + self.writer.write(b"\"")?; + self.writer.write(astname.as_bytes())?; + self.writer.write(b"\"")?; + } }; - self.writer.write(b"\"\nast = ")?; + self.writer.write(b"\nast = ")?; self.current_line_width = 6; self.dump_value(&ast.ast)?; } @@ -122,13 +151,25 @@ impl<'a> Dumper<'a> { Value::Float(f) => self.dump_f64(f)?, Value::Int(i) => write!(self.writer, "{}", i)?, Value::Str(s) => { - self.writer.write(b"\"")?; - self.writer.write(s.as_bytes())?; - self.writer.write(b"\"")?; + if lua_str_contains_need_escape(s) { + self.writer.write(b"[[")?; + self.writer.write(s.as_bytes())?; + self.writer.write(b"]]")?; + } else { + self.writer.write(b"\"")?; + self.writer.write(s.as_bytes())?; + self.writer.write(b"\"")?; + } } Value::KeyVal((k, v)) => { - self.writer.write(k.as_bytes())?; - self.writer.write(b"=")?; + if lua_key_contains_need_escape(k) { + self.writer.write(b"[\"")?; + self.writer.write(k.as_bytes())?; + self.writer.write(b"\"]=")?; + } else { + self.writer.write(k.as_bytes())?; + self.writer.write(b"=")?; + } self.dump_value(v)?; } Value::Array(arr) => { @@ -150,15 +191,28 @@ impl<'a> Dumper<'a> { Value::Float(f) => self.dump_f64(f)?, Value::Int(i) => write!(self.writer, "{}", i)?, Value::Str(s) => { - self.writer.write(b"\"")?; - self.writer.write(s.as_bytes())?; - self.writer.write(b"\"")?; + if lua_str_contains_need_escape(s) { + self.writer.write(b"[[")?; + self.writer.write(s.as_bytes())?; + self.writer.write(b"]]")?; + } else { + self.writer.write(b"\"")?; + self.writer.write(s.as_bytes())?; + self.writer.write(b"\"")?; + } } Value::KeyVal((k, v)) => { let bytes = k.as_bytes(); - self.writer.write(bytes)?; - self.writer.write(b" = ")?; - self.current_line_width += bytes.len() + 3; + if lua_key_contains_need_escape(k) { + self.writer.write(b"[\"")?; + self.writer.write(bytes)?; + self.writer.write(b"\"] = ")?; + self.current_line_width += bytes.len() + 7; + } else { + self.writer.write(bytes)?; + self.writer.write(b" = ")?; + self.current_line_width += bytes.len() + 3; + } if v.is_array() { let tlen = self.current_line_width + self.current_indent; if tlen < self.max_line_width { @@ -215,14 +269,25 @@ impl<'a> Dumper<'a> { Value::Float(f) => self.dump_f64(f)?, Value::Int(i) => write!(self.writer, "{}", i)?, Value::Str(s) => { - self.writer.write(b"\"")?; - self.writer.write(s.as_bytes())?; - self.writer.write(b"\"")?; + if lua_str_contains_need_escape(s) { + self.writer.write(b"[[")?; + self.writer.write(s.as_bytes())?; + self.writer.write(b"]]")?; + } else { + self.writer.write(b"\"")?; + self.writer.write(s.as_bytes())?; + self.writer.write(b"\"")?; + } } Value::KeyVal((k, v)) => { - let bytes = k.as_bytes(); - self.writer.write(bytes)?; - self.writer.write(b"=")?; + if lua_key_contains_need_escape(k) { + self.writer.write(b"[\"")?; + self.writer.write(k.as_bytes())?; + self.writer.write(b"\"]=")?; + } else { + self.writer.write(k.as_bytes())?; + self.writer.write(b"=")?; + } self.dump_value_in_one(v)?; } Value::Array(arr) => { diff --git a/src/scripts/artemis/ast/mod.rs b/src/scripts/artemis/ast/mod.rs index c2daa96..45659dd 100644 --- a/src/scripts/artemis/ast/mod.rs +++ b/src/scripts/artemis/ast/mod.rs @@ -50,6 +50,7 @@ pub struct AstScript { indent: Option, max_line_width: usize, no_indent: bool, + lang: Option, } impl AstScript { @@ -61,6 +62,7 @@ impl AstScript { indent: config.artemis_indent, max_line_width: config.artemis_max_line_width, no_indent: config.artemis_no_indent, + lang: config.artemis_ast_lang.clone(), }) } } @@ -81,13 +83,28 @@ impl Script for AstScript { .as_str() .ok_or(anyhow::anyhow!("Missing top block name"))?; let mut block = &ast[block_name]; - let mut lang = None; + let mut lang: Option<&str> = self.lang.as_ref().map(|s| s.as_str()); loop { - if let Some(save_title) = block[Key("savetitle")]["text"].as_str() { - messages.push(Message { - name: None, - message: save_title.to_string(), - }); + let savetitle = &block[Key("savetitle")]; + if savetitle.is_array() { + if let Some(lang) = lang { + if let Some(title) = savetitle[lang].as_str() { + messages.push(Message { + name: None, + message: title.to_string(), + }); + } else if let Some(title) = savetitle["text"].as_str() { + messages.push(Message { + name: None, + message: title.to_string(), + }); + } + } else if let Some(title) = savetitle["text"].as_str() { + messages.push(Message { + name: None, + message: title.to_string(), + }); + } } let text = &block["text"]; if text.is_array() { diff --git a/src/scripts/artemis/ast/parser.rs b/src/scripts/artemis/ast/parser.rs index a87ed3c..2ab381b 100644 --- a/src/scripts/artemis/ast/parser.rs +++ b/src/scripts/artemis/ast/parser.rs @@ -1,6 +1,7 @@ use super::types::*; use crate::types::*; use crate::utils::encoding::*; +use crate::utils::escape::unescape_lua_str; use anyhow::Result; pub struct Parser<'a> { @@ -61,6 +62,19 @@ impl<'a> Parser<'a> { match self.peek() { Some(t) => match t { b'"' => return self.parse_str().map(|x| Value::Str(x.to_string())), + b'[' => { + self.eat_char(); + match self.peek().ok_or(self.error2("unexpected eof"))? { + b'[' => { + self.pos -= 1; // Rewind to the first '[' + self.parse_raw_str().map(|x| Value::Str(x)) + } + _ => { + self.pos -= 1; + self.parse_key_val() + } + } + } b'-' | b'.' | b'0'..=b'9' => return self.parse_any_number(), b'n' => { if self.is_indent(b"nil") { @@ -70,7 +84,7 @@ impl<'a> Parser<'a> { self.parse_key_val() } } - b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'[' | b']' => return self.parse_key_val(), + b'_' | b'a'..=b'z' | b'A'..=b'Z' | b']' => return self.parse_key_val(), b'{' => return self.parse_array(), _ => return self.error(format!("unexpected token: {}", t)), }, @@ -146,12 +160,40 @@ impl<'a> Parser<'a> { self.erase_whitespace(); self.parse_indent(b"\"")?; let start = self.pos; + let mut pc = None; let end = loop { match self.next() { Some(c) => { if c == b'"' { - break self.pos - 1; + if pc.is_none_or(|x| x != b'\\') { + break self.pos - 1; + } } + pc = Some(c); + } + None => return self.error("unexpected eof"), + } + }; + Ok(unescape_lua_str( + &decode_to_string(self.encoding, &self.str[start..end], true) + .map_err(|e| self.error2(e))?, + )) + } + + fn parse_raw_str(&mut self) -> Result { + self.erase_whitespace(); + self.parse_indent(b"[[")?; + let start = self.pos; + let mut pc = None; + let end = loop { + match self.next() { + Some(c) => { + if c == b']' { + if pc.is_some_and(|x| x == b']') { + break self.pos - 2; + } + } + pc = Some(c); } None => return self.error("unexpected eof"), } @@ -213,7 +255,7 @@ impl<'a> Parser<'a> { let end = loop { match self.peek() { Some(t) => match t { - b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'[' | b']' => self.eat_char(), + b'_' | b'a'..=b'z' | b'A'..=b'Z' | b'[' | b']' | b'"' => self.eat_char(), b'0'..=b'9' => { if is_first { return self.error("unexpected digit"); @@ -227,7 +269,11 @@ impl<'a> Parser<'a> { } is_first = false; }; - decode_to_string(self.encoding, &self.str[start..end], true).map_err(|e| self.error2(e)) + let mut data = &self.str[start..end]; + if data.starts_with(b"[\"") && data.ends_with(b"\"]") { + data = &data[2..data.len() - 2]; + } + decode_to_string(self.encoding, data, true).map_err(|e| self.error2(e)) } fn is_indent(&self, indent: &[u8]) -> bool { diff --git a/src/types.rs b/src/types.rs index 499ad7e..843f444 100644 --- a/src/types.rs +++ b/src/types.rs @@ -231,6 +231,8 @@ pub struct ExtraConfig { pub artemis_no_indent: bool, #[cfg(feature = "artemis")] pub artemis_max_line_width: usize, + #[cfg(feature = "artemis")] + pub artemis_ast_lang: Option, } #[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)] diff --git a/src/utils/escape.rs b/src/utils/escape.rs index 787561f..4d862a6 100644 --- a/src/utils/escape.rs +++ b/src/utils/escape.rs @@ -32,6 +32,9 @@ pub fn escape_xml_text_value(s: &str) -> String { lazy_static::lazy_static! { static ref XML_NCR_BASE10_REGEX: Regex = Regex::new(r"&#(\d+);").unwrap(); static ref XML_NCR_BASE16_REGEX: Regex = Regex::new(r"&#x([0-9a-fA-F]+);").unwrap(); + static ref LUA_NCR_BASE10_REGEX: Regex = Regex::new(r"\\(\d{3})").unwrap(); + static ref LUA_NCR_BASE16_REGEX: Regex = Regex::new(r"\\x([0-9a-fA-F]{2})").unwrap(); + static ref LUA_NCR_BASE16_U_REGEX: Regex = Regex::new(r"\\u([0-9a-fA-F]{4})").unwrap(); } pub fn unescape_xml(s: &str) -> String { @@ -55,6 +58,55 @@ pub fn unescape_xml(s: &str) -> String { .replace("'", "'") } +pub fn unescape_lua_str(s: &str) -> String { + let mut s = s.to_owned(); + s = s + .replace("\\n", "\n") + .replace("\\r", "\r") + .replace("\\t", "\t") + .replace("\\v", "\x0A") + .replace("\\b", "\x08") + .replace("\\f", "\x0C") + .replace("\\'", "'") + .replace("\\\"", "\""); + s = LUA_NCR_BASE10_REGEX + .replace_all(&s, |caps: &fancy_regex::Captures| { + let codepoint = caps[1].parse::().unwrap_or(0); + char::from_u32(codepoint).map_or("�".to_string(), |c| c.to_string()) + }) + .to_string(); + s = s.replace("\\0", "\0"); + s = LUA_NCR_BASE16_REGEX + .replace_all(&s, |caps: &fancy_regex::Captures| { + let codepoint = u32::from_str_radix(&caps[1], 16).unwrap_or(0); + char::from_u32(codepoint).map_or("�".to_string(), |c| c.to_string()) + }) + .to_string(); + s = LUA_NCR_BASE16_U_REGEX + .replace_all(&s, |caps: &fancy_regex::Captures| { + let codepoint = u32::from_str_radix(&caps[1], 16).unwrap_or(0); + char::from_u32(codepoint).map_or("�".to_string(), |c| c.to_string()) + }) + .to_string(); + s.replace("\\\\", "\\") +} + +pub fn lua_str_contains_need_escape(s: &str) -> bool { + s.contains('\\') + || s.contains('\n') + || s.contains('\r') + || s.contains('\t') + || s.contains('\x0A') + || s.contains('\x08') + || s.contains('\x0C') + || s.contains('\'') + || s.contains('"') +} + +pub fn lua_key_contains_need_escape(s: &str) -> bool { + s.chars().next().map_or(false, |c| c.is_ascii_digit()) +} + #[test] fn test_unescape_xml() { assert_eq!( @@ -63,3 +115,16 @@ fn test_unescape_xml() { ); assert_eq!(unescape_xml("你TEST "), "你TEST "); } + +#[test] +fn test_unescape_lua_str() { + assert_eq!(unescape_lua_str(r"Hello\nWorld"), "Hello\nWorld"); + assert_eq!(unescape_lua_str(r"Tab:\tEnd"), "Tab:\tEnd"); + assert_eq!(unescape_lua_str("Quote: \\' and \\\""), "Quote: ' and \""); + assert_eq!(unescape_lua_str(r"Backslash:\\Test"), "Backslash:\\Test"); + assert_eq!(unescape_lua_str(r"\065\066\067"), "ABC"); + assert_eq!(unescape_lua_str(r"\x41\x42\x43"), "ABC"); + assert_eq!(unescape_lua_str(r"\u4F60\u597D"), "你好"); + assert_eq!(unescape_lua_str(r"Null:\0End"), "Null:\0End"); + assert_eq!(unescape_lua_str(r"Mix:\n\x41\065\u4F60"), "Mix:\nAA你"); +}