Fix scn extract

This commit is contained in:
2025-07-02 11:38:22 +08:00
parent efb3d24442
commit 8c32dfb57e
3 changed files with 92 additions and 9 deletions

67
src/ext/fancy_regex.rs Normal file
View File

@@ -0,0 +1,67 @@
use anyhow::Result;
use fancy_regex::Regex;
pub trait FancyRegexExt {
/// Splits the input string by the regex pattern.
/// Like python's `re.split()`, but returns an iterator.
fn py_split<'a>(&'a self, input: &'a str) -> Result<PySplit<'a>>;
}
pub struct PySplit<'a> {
str: &'a str,
pos: Vec<(usize, usize)>,
start: usize,
}
impl<'a> Iterator for PySplit<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> {
if self.start >= self.str.len() {
return None;
}
match self.pos.first().cloned() {
Some((start, end)) => {
if self.start < start {
let result = &self.str[self.start..start];
self.start = start;
if start == end {
self.pos.remove(0);
}
Some(result)
} else if self.start < end {
let result = &self.str[self.start..end];
self.start = end;
self.pos.remove(0);
Some(result)
} else {
unreachable!();
}
}
None => {
if self.start < self.str.len() {
let result = &self.str[self.start..];
self.start = self.str.len();
Some(result)
} else {
None
}
}
}
}
}
impl FancyRegexExt for Regex {
fn py_split<'a>(&'a self, input: &'a str) -> Result<PySplit<'a>> {
let mut poss = Vec::new();
for pos in self.find_iter(input) {
let pos = pos?;
poss.push((pos.start(), pos.end()));
}
Ok(PySplit {
str: input,
pos: poss,
start: 0,
})
}
}

View File

@@ -1,4 +1,6 @@
pub mod atomic; pub mod atomic;
#[cfg(feature = "fancy-regex")]
pub mod fancy_regex;
pub mod io; pub mod io;
#[cfg(feature = "emote-psb")] #[cfg(feature = "emote-psb")]
pub mod psb; pub mod psb;

View File

@@ -1,3 +1,4 @@
use crate::ext::fancy_regex::*;
use crate::scripts::base::*; use crate::scripts::base::*;
use crate::types::*; use crate::types::*;
use crate::utils::encoding::*; use crate::utils::encoding::*;
@@ -308,7 +309,7 @@ impl Node for ParsedScript {
lazy_static::lazy_static! { lazy_static::lazy_static! {
static ref LINE_SPLIT_RE: Regex = Regex::new(r"(\[.*?\])").unwrap(); static ref LINE_SPLIT_RE: Regex = Regex::new(r"(\[.*?\])").unwrap();
static ref ATTR_RE: Regex = Regex::new("([a-zA-Z0-9_]+)(?:=(\"[^\"]*\" |'[^']*' |[^\\s\\]]+))?").unwrap(); static ref ATTR_RE: Regex = Regex::new("([a-zA-Z0-9_]+)(?:=(\"[^\"]*\"|'[^']*'|[^\\s\\]]+))?").unwrap();
} }
struct Parser { struct Parser {
@@ -333,7 +334,7 @@ impl Parser {
let value = cap let value = cap
.get(2) .get(2)
.map(|v| { .map(|v| {
let mut s = v.as_str().to_string(); let mut s = v.as_str().trim().to_string();
if s.starts_with("\"") && s.ends_with("\"") { if s.starts_with("\"") && s.ends_with("\"") {
s = s[1..s.len() - 1].to_string(); s = s[1..s.len() - 1].to_string();
} else if s.starts_with("'") && s.ends_with("'") { } else if s.starts_with("'") && s.ends_with("'") {
@@ -430,8 +431,8 @@ impl Parser {
} }
} }
let mut parsed_line_nodes = Vec::new(); let mut parsed_line_nodes = Vec::new();
for part in LINE_SPLIT_RE.split(&full_line) { for part in LINE_SPLIT_RE.py_split(&full_line)? {
let part = part?; let part = part.trim();
if part.is_empty() { if part.is_empty() {
continue; continue;
} }
@@ -448,6 +449,8 @@ impl Parser {
&part[1..part.len() - 1], &part[1..part.len() - 1],
)?)); )?));
} }
} else {
parsed_line_nodes.push(ParsedLineNode::Text(TextNode(part.to_string())));
} }
} }
if !parsed_line_nodes.is_empty() { if !parsed_line_nodes.is_empty() {
@@ -492,12 +495,20 @@ impl Script for KsScript {
fn extract_messages(&self) -> Result<Vec<Message>> { fn extract_messages(&self) -> Result<Vec<Message>> {
let mut messages = Vec::new(); let mut messages = Vec::new();
let mut name = None; let mut name = None;
let mut message = String::new();
for obj in self.tree.iter() { for obj in self.tree.iter() {
match obj { match obj {
ParsedScriptNode::Line(line) => messages.push(Message { ParsedScriptNode::Label(_) => {
name: name.take(), if !message.is_empty() {
message: line.to_xml(), messages.push(Message {
}), name: name.clone(),
message: message.clone(),
});
message.clear();
name = None;
}
}
ParsedScriptNode::Line(line) => message.push_str(&line.to_xml()),
ParsedScriptNode::Command(cmd) => { ParsedScriptNode::Command(cmd) => {
if self.name_commands.contains(&cmd.name) { if self.name_commands.contains(&cmd.name) {
for attr in &cmd.attributes { for attr in &cmd.attributes {
@@ -513,7 +524,7 @@ impl Script for KsScript {
if let TagAttr::Str(value) = &attr.1 { if let TagAttr::Str(value) = &attr.1 {
if !value.is_empty() && !value.is_ascii() { if !value.is_empty() && !value.is_ascii() {
messages.push(Message { messages.push(Message {
name: name.take(), name: None,
message: value.clone(), message: value.clone(),
}); });
break; // Only take the first message found break; // Only take the first message found
@@ -525,6 +536,9 @@ impl Script for KsScript {
_ => {} _ => {}
} }
} }
if !message.is_empty() {
messages.push(Message { name, message });
}
Ok(messages) Ok(messages)
} }