diff --git a/src/ext/fancy_regex.rs b/src/ext/fancy_regex.rs new file mode 100644 index 0000000..5a9bba8 --- /dev/null +++ b/src/ext/fancy_regex.rs @@ -0,0 +1,67 @@ +use anyhow::Result; +use fancy_regex::Regex; + +pub trait FancyRegexExt { + /// Splits the input string by the regex pattern. + /// Like python's `re.split()`, but returns an iterator. + fn py_split<'a>(&'a self, input: &'a str) -> Result>; +} + +pub struct PySplit<'a> { + str: &'a str, + pos: Vec<(usize, usize)>, + start: usize, +} + +impl<'a> Iterator for PySplit<'a> { + type Item = &'a str; + + fn next(&mut self) -> Option { + if self.start >= self.str.len() { + return None; + } + match self.pos.first().cloned() { + Some((start, end)) => { + if self.start < start { + let result = &self.str[self.start..start]; + self.start = start; + if start == end { + self.pos.remove(0); + } + Some(result) + } else if self.start < end { + let result = &self.str[self.start..end]; + self.start = end; + self.pos.remove(0); + Some(result) + } else { + unreachable!(); + } + } + None => { + if self.start < self.str.len() { + let result = &self.str[self.start..]; + self.start = self.str.len(); + Some(result) + } else { + None + } + } + } + } +} + +impl FancyRegexExt for Regex { + fn py_split<'a>(&'a self, input: &'a str) -> Result> { + let mut poss = Vec::new(); + for pos in self.find_iter(input) { + let pos = pos?; + poss.push((pos.start(), pos.end())); + } + Ok(PySplit { + str: input, + pos: poss, + start: 0, + }) + } +} diff --git a/src/ext/mod.rs b/src/ext/mod.rs index f5c8574..7162ed3 100644 --- a/src/ext/mod.rs +++ b/src/ext/mod.rs @@ -1,4 +1,6 @@ pub mod atomic; +#[cfg(feature = "fancy-regex")] +pub mod fancy_regex; pub mod io; #[cfg(feature = "emote-psb")] pub mod psb; diff --git a/src/scripts/kirikiri/ks.rs b/src/scripts/kirikiri/ks.rs index 8167e5b..fb91d01 100644 --- a/src/scripts/kirikiri/ks.rs +++ b/src/scripts/kirikiri/ks.rs @@ -1,3 +1,4 @@ +use crate::ext::fancy_regex::*; use crate::scripts::base::*; use crate::types::*; use crate::utils::encoding::*; @@ -308,7 +309,7 @@ impl Node for ParsedScript { lazy_static::lazy_static! { static ref LINE_SPLIT_RE: Regex = Regex::new(r"(\[.*?\])").unwrap(); - static ref ATTR_RE: Regex = Regex::new("([a-zA-Z0-9_]+)(?:=(\"[^\"]*\" |'[^']*' |[^\\s\\]]+))?").unwrap(); + static ref ATTR_RE: Regex = Regex::new("([a-zA-Z0-9_]+)(?:=(\"[^\"]*\"|'[^']*'|[^\\s\\]]+))?").unwrap(); } struct Parser { @@ -333,7 +334,7 @@ impl Parser { let value = cap .get(2) .map(|v| { - let mut s = v.as_str().to_string(); + let mut s = v.as_str().trim().to_string(); if s.starts_with("\"") && s.ends_with("\"") { s = s[1..s.len() - 1].to_string(); } else if s.starts_with("'") && s.ends_with("'") { @@ -430,8 +431,8 @@ impl Parser { } } let mut parsed_line_nodes = Vec::new(); - for part in LINE_SPLIT_RE.split(&full_line) { - let part = part?; + for part in LINE_SPLIT_RE.py_split(&full_line)? { + let part = part.trim(); if part.is_empty() { continue; } @@ -448,6 +449,8 @@ impl Parser { &part[1..part.len() - 1], )?)); } + } else { + parsed_line_nodes.push(ParsedLineNode::Text(TextNode(part.to_string()))); } } if !parsed_line_nodes.is_empty() { @@ -492,12 +495,20 @@ impl Script for KsScript { fn extract_messages(&self) -> Result> { let mut messages = Vec::new(); let mut name = None; + let mut message = String::new(); for obj in self.tree.iter() { match obj { - ParsedScriptNode::Line(line) => messages.push(Message { - name: name.take(), - message: line.to_xml(), - }), + ParsedScriptNode::Label(_) => { + if !message.is_empty() { + messages.push(Message { + name: name.clone(), + message: message.clone(), + }); + message.clear(); + name = None; + } + } + ParsedScriptNode::Line(line) => message.push_str(&line.to_xml()), ParsedScriptNode::Command(cmd) => { if self.name_commands.contains(&cmd.name) { for attr in &cmd.attributes { @@ -513,7 +524,7 @@ impl Script for KsScript { if let TagAttr::Str(value) = &attr.1 { if !value.is_empty() && !value.is_ascii() { messages.push(Message { - name: name.take(), + name: None, message: value.clone(), }); break; // Only take the first message found @@ -525,6 +536,9 @@ impl Script for KsScript { _ => {} } } + if !message.is_empty() { + messages.push(Message { name, message }); + } Ok(messages) }