diff --git a/src/args.rs b/src/args.rs index 72fbfea..ff3cb8e 100644 --- a/src/args.rs +++ b/src/args.rs @@ -667,6 +667,18 @@ pub struct Arg { #[arg(long, global = true, action = ArgAction::SetTrue)] /// Export Emote PIMG images as PSD files. pub emote_pimg_psd: bool, + #[cfg(feature = "kirikiri")] + #[arg(long, global = true)] + /// Whether to only extract message between Talk and Hitret command in Kirikiri KS script. Auto detect if not specified. + pub kirikiri_ks_hitret: Option, + #[cfg(feature = "kirikiri")] + #[arg(long, global = true)] + /// The line feed character used in Kirikiri KS script. + pub kirikiri_ks_lf: Option, + #[cfg(feature = "kirikiri")] + #[arg(long, global = true, value_delimiter = ',', default_value = "macCmd")] + /// Kirikiri message tags, used to extract more message from ks script. + pub kirikiri_message_tags: Vec, #[command(subcommand)] /// Command pub command: Command, diff --git a/src/main.rs b/src/main.rs index a6d735b..bde8590 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3375,6 +3375,14 @@ fn main() { psd_compress: !arg.psd_no_compress, #[cfg(feature = "emote-img")] emote_pimg_psd: arg.emote_pimg_psd, + #[cfg(feature = "kirikiri")] + kirikiri_ks_hitret: arg.kirikiri_ks_hitret, + #[cfg(feature = "kirikiri")] + kirikiri_ks_lf: arg.kirikiri_ks_lf.clone(), + #[cfg(feature = "kirikiri")] + kirikiri_message_tags: std::sync::Arc::new(std::collections::HashSet::from_iter( + arg.kirikiri_message_tags.iter().cloned(), + )), }); match &arg.command { args::Command::Export { input, output } => { diff --git a/src/scripts/kirikiri/ks.rs b/src/scripts/kirikiri/ks.rs index 2b61f85..788975c 100644 --- a/src/scripts/kirikiri/ks.rs +++ b/src/scripts/kirikiri/ks.rs @@ -6,6 +6,7 @@ use crate::utils::encoding::*; use crate::utils::escape::*; use anyhow::Result; use fancy_regex::Regex; +use std::collections::BTreeSet; use std::collections::HashSet; use std::io::Write; use std::ops::{Deref, DerefMut, Index, IndexMut}; @@ -166,6 +167,14 @@ impl TagNode { } } + /// Get an attribute for the tag + pub fn get_attr(&self, key: &str) -> Option<&TagAttr> { + self.attributes + .iter() + .find(|(k, _)| k == key) + .map(|(_, v)| v) + } + fn to_xml_tag(&self) -> String { let attr_str = self.ser_attributes_xml(); if attr_str.is_empty() { @@ -264,6 +273,16 @@ impl ParsedLineNode { fn is_np(&self) -> bool { matches!(self, ParsedLineNode::Tag(tag) if tag.name == "np") } + + fn is_tag(&self, name: &str) -> bool { + matches!(self, ParsedLineNode::Tag(tag) if tag.name == name) + } + + fn set_attr(&mut self, key: &str, value: String) { + if let ParsedLineNode::Tag(tag) = self { + tag.set_attr(key, value); + } + } } impl Node for ParsedLineNode { @@ -568,16 +587,26 @@ impl Parser { struct XMLTextParser { str: String, pos: usize, + lf: String, + no_np: bool, } impl XMLTextParser { - pub fn new(text: &str) -> Self { + pub fn new(text: &str, lf: Option) -> Self { + let lf = lf.unwrap_or_else(|| String::from("")); Self { - str: text.replace("\n", ""), + str: text.replace("\n", &lf), pos: 0, + lf, + no_np: false, } } + fn no_np(mut self) -> Self { + self.no_np = true; + self + } + fn parse_tag(&mut self) -> Result { let mut name = String::new(); let mut attributes = Vec::new(); @@ -673,16 +702,28 @@ impl XMLTextParser { current_line = Vec::new(); } } - _ => text.push(c), + _ => { + text.push(c); + if text.ends_with(&self.lf) { + if !text.is_empty() { + current_line.push(ParsedLineNode::Text(TextNode(unescape_xml(&text)))); + text.clear(); + } + lines.push(ParsedLine(current_line)); + current_line = Vec::new(); + } + } } } if !text.is_empty() { current_line.push(ParsedLineNode::Text(TextNode(unescape_xml(&text)))); } - current_line.push(ParsedLineNode::Tag(TagNode { - name: "np".to_string(), - attributes: Vec::new(), - })); + if !self.no_np { + current_line.push(ParsedLineNode::Tag(TagNode { + name: "np".to_string(), + attributes: Vec::new(), + })); + } lines.push(ParsedLine(current_line)); Ok(lines) } @@ -706,6 +747,9 @@ pub struct KsScript { name_commands: Arc>, message_commands: Arc>, remove_empty_lines: bool, + hitret: bool, + lf: Option, + message_tags: Arc>, } impl KsScript { @@ -718,12 +762,38 @@ impl KsScript { let (text, bom) = decode_with_bom_detect(encoding, &reader, true)?; let parser = Parser::new(&text); let tree = parser.parse(!config.kirikiri_remove_empty_lines)?; + let hitret = if let Some(hitret) = config.kirikiri_ks_hitret { + hitret + } else { + let mut talk_count = 0u64; + let mut hitret_count = 0u64; + for node in tree.iter() { + match node { + ParsedScriptNode::Line(line) => { + for node in line.iter() { + if let ParsedLineNode::Tag(node) = node { + if node.name == "Talk" { + talk_count += 1; + } else if node.name == "Hitret" { + hitret_count += 1; + } + } + } + } + _ => {} + } + } + talk_count == hitret_count && talk_count > 3 + }; Ok(Self { bom, tree, name_commands: config.kirikiri_name_commands.clone(), message_commands: config.kirikiri_message_commands.clone(), remove_empty_lines: config.kirikiri_remove_empty_lines, + hitret, + lf: config.kirikiri_ks_lf.clone(), + message_tags: config.kirikiri_message_tags.clone(), }) } } @@ -741,10 +811,80 @@ impl Script for KsScript { let mut messages = Vec::new(); let mut name = None; let mut message = String::new(); + if self.hitret { + for obj in self.tree.iter() { + match obj { + ParsedScriptNode::Line(line) => { + for node in line.iter() { + if node.is_tag("Talk") { + if let ParsedLineNode::Tag(tag) = &node { + if let Some(attr) = tag.get_attr("name") { + if let TagAttr::Str(s) = attr { + name = Some(s.to_string()); + } + } + if name.is_none() { + for attr in &tag.attributes { + if let TagAttr::Str(value) = &attr.1 { + if !value.is_empty() && !value.is_ascii() { + name = Some(value.clone()); + break; + } + } + } + } + if name.is_none() { + anyhow::bail!("name not found for Talk command"); + } + continue; + } + } else if node.is_tag("Hitret") { + if let Some(lf) = self.lf.as_ref() { + message = message.replace(lf, "\n"); + } + messages.push(Message { + name: if name.as_ref().is_some_and(|name| name == "心の声") { + None + } else { + name.clone() + }, + message: message.trim_end_matches("").to_owned(), + }); + message.clear(); + name = None; + continue; + } else if let ParsedLineNode::Tag(tag) = &node { + if self.message_tags.contains(&tag.name) { + for attr in &tag.attributes { + if let TagAttr::Str(value) = &attr.1 { + if !value.is_empty() && !value.is_ascii() { + messages.push(Message { + name: None, + message: value.clone(), + }); + break; + } + } + } + } + } + if name.is_some() { + message.push_str(&node.to_xml()); + } + } + } + _ => {} + } + } + return Ok(messages); + } for obj in self.tree.iter() { match obj { ParsedScriptNode::Label(_) => { if !message.is_empty() { + if let Some(lf) = self.lf.as_ref() { + message = message.replace(lf, "\n"); + } messages.push(Message { name: name.clone(), message: message.trim_end_matches("").to_owned(), @@ -786,6 +926,9 @@ impl Script for KsScript { } } if !message.is_empty() { + if let Some(lf) = self.lf.as_ref() { + message = message.replace(lf, "\n"); + } messages.push(Message { name, message: message.trim_end_matches("").to_owned(), @@ -805,143 +948,295 @@ impl Script for KsScript { let mut mes = messages.iter(); let mut cur_mes = None; let mut tree = self.tree.clone(); - let mut message_lines = Vec::new(); let mut i = 0; - let mut is_end = false; - let mut name_command_block_line: Option<(usize, String)> = None; - while i < tree.len() { - match tree[i].clone() { - ParsedScriptNode::Label(_) => { - if !message_lines.is_empty() { - let m: &Message = cur_mes - .take() - .ok_or(anyhow::anyhow!("Not enough messages"))?; - if let Some((line, key)) = name_command_block_line.take() { - let name = m - .name - .as_ref() - .ok_or(anyhow::anyhow!("Name not found in message"))?; - let mut name = name.clone(); - if let Some(replacement) = replacement { - for (key, value) in replacement.map.iter() { - name = name.replace(key, value); - } - } - tree[line].set_attr(&key, name); - } - let mut text = m.message.to_owned(); - if let Some(replacement) = replacement { - for (key, value) in replacement.map.iter() { - text = text.replace(key, value); - } - } - let mess = XMLTextParser::new(&text).parse()?; - let diff = mess.len() as isize - message_lines.len() as isize; - let common_lines = message_lines.len().min(mess.len()); - let mut last_index = message_lines.last().cloned().unwrap_or(0); - for j in 0..common_lines { - tree[message_lines[j]] = ParsedScriptNode::Line(mess[j].clone()); - } - for j in common_lines..message_lines.len() { - tree.remove(message_lines[j] - (j - common_lines)); - } - for i in common_lines..mess.len() { - let new_line = ParsedScriptNode::Line(mess[i].clone()); - if last_index < tree.len() { - tree.insert(last_index + 1, new_line); - last_index += 1; - } else { - tree.push(new_line); - } - } - i = (i as isize + diff) as usize; - } - message_lines.clear(); - is_end = false; - if cur_mes.is_none() { - cur_mes = mes.next(); - } - } - ParsedScriptNode::Line(line) => { - if !is_end { - message_lines.push(i); - is_end = line.last().map(|e| e.is_np()).unwrap_or(false); - } - } - ParsedScriptNode::Command(cmd) => { - if self.name_commands.contains(&cmd.name) { - for attr in &cmd.attributes { - if let TagAttr::Str(value) = &attr.1 { - if !value.is_empty() && !value.is_ascii() { - name_command_block_line = Some((i, attr.0.clone())); - break; // Only update the first name found - } - } - } - } else if self.message_commands.contains(&cmd.name) { - for attr in &cmd.attributes { - if let TagAttr::Str(value) = &attr.1 { - if !value.is_empty() && !value.is_ascii() { - let m = cur_mes - .take() - .ok_or(anyhow::anyhow!("Not enough messages"))?; - let mut text = m.message.clone(); - if let Some(replacement) = replacement { - for (key, value) in replacement.map.iter() { - text = text.replace(key, value); + if self.hitret { + let mut name_tag_loc = None; + let mut message_blocks = Vec::new(); + let mut in_block = false; + cur_mes = mes.next(); + while i < tree.len() { + match tree[i].clone() { + ParsedScriptNode::Line(line) => { + let mut j = 0; + while j < line.len() { + let node = line[j].clone(); + if node.is_tag("Talk") { + if let ParsedLineNode::Tag(tag) = &node { + if let Some(attr) = tag.get_attr("name") { + if let TagAttr::Str(_) = attr { + name_tag_loc = Some((i, j, "name".to_string())); } } - tree[i].set_attr(&attr.0, text); - cur_mes = mes.next(); - break; // Only update the first message found + if name_tag_loc.is_none() { + for attr in &tag.attributes { + if let TagAttr::Str(value) = &attr.1 { + if !value.is_empty() && !value.is_ascii() { + name_tag_loc = Some((i, j, attr.0.clone())); + break; + } + } + } + } + } + in_block = true; + j += 1; + continue; + } else if node.is_tag("Hitret") { + in_block = false; + let m = cur_mes + .take() + .ok_or(anyhow::anyhow!("Not enough messages"))?; + let (x, y, key) = name_tag_loc.take().ok_or(anyhow::anyhow!( + "Name tag not found for Talk command" + ))?; + let mut name = + m.name.clone().unwrap_or_else(|| "心の声".to_string()); + if let Some(replacement) = replacement { + for (key, value) in replacement.map.iter() { + name = name.replace(key, value); + } + } + let mut li = &mut tree[x]; + if let ParsedScriptNode::Line(line) = &mut li { + line[y].set_attr(&key, name); + } + let mut text = m.message.clone(); + if let Some(replacement) = replacement { + for (key, value) in replacement.map.iter() { + text = text.replace(key, value); + } + } + cur_mes = mes.next(); + let mut mess = + XMLTextParser::new(&text, self.lf.clone()).no_np().parse()?; + let first_line_len = mess[0].len(); + let line_indexs = + BTreeSet::from_iter(message_blocks.iter().map(|(i, _)| *i)); + let (start_x, start_y) = + message_blocks.first().cloned().unwrap_or((i, j)); + let (end_x, end_y) = + message_blocks.last().cloned().unwrap_or((x, y)); + { + let li = &mut mess[0]; + let source_line = &tree[start_x]; + let source_line = match source_line { + ParsedScriptNode::Line(line) => line, + _ => return Err(anyhow::anyhow!("Expected line node")), + }; + for z in 0..start_y { + li.insert(z, source_line[z].clone()); + } + } + { + let loc = mess.len() - 1; + let li = &mut mess[loc]; + let source_line = &tree[end_x]; + let source_line = match source_line { + ParsedScriptNode::Line(line) => line, + _ => return Err(anyhow::anyhow!("Expected line node")), + }; + for z in end_y + 1..source_line.len() { + li.push(source_line[z].clone()); + } + } + let diff = mess.len() as isize - line_indexs.len() as isize; + let common_lines = line_indexs.len().min(mess.len()); + let mut last_index = end_x; + let message_lines = Vec::from_iter(line_indexs.iter().cloned()); + for z in 0..common_lines { + tree[message_lines[z]] = + ParsedScriptNode::Line(mess[z].clone()); + } + for z in common_lines..message_lines.len() { + tree.remove(message_lines[z] - (z - common_lines)); + } + for z in common_lines..mess.len() { + let new_line = ParsedScriptNode::Line(mess[z].clone()); + if last_index < tree.len() { + tree.insert(last_index + 1, new_line); + last_index += 1; + } else { + tree.push(new_line); + } + } + i = (i as isize + diff) as usize; + j = start_y + first_line_len + 2; + message_blocks.clear(); + continue; + } else if let ParsedLineNode::Tag(tag) = &node { + if self.message_tags.contains(&tag.name) { + for attr in &tag.attributes { + if let TagAttr::Str(value) = &attr.1 { + if !value.is_empty() && !value.is_ascii() { + let m = cur_mes.take().ok_or(anyhow::anyhow!( + "No enough messages." + ))?; + cur_mes = mes.next(); + let mut text = m.message.clone(); + if let Some(replacement) = replacement { + for (key, value) in replacement.map.iter() { + text = text.replace(key, value); + } + } + let li = &mut tree[i]; + if let ParsedScriptNode::Line(line) = li { + line[j].set_attr(&attr.0, text); + } + j += 1; + continue; + } + } + } + } + } + if in_block { + message_blocks.push((i, j)); + } + j += 1; + } + } + _ => {} + } + i += 1; + } + } else { + let mut is_end = false; + let mut name_command_block_line: Option<(usize, String)> = None; + let mut message_lines = Vec::new(); + while i < tree.len() { + match tree[i].clone() { + ParsedScriptNode::Label(_) => { + if !message_lines.is_empty() { + let m: &Message = cur_mes + .take() + .ok_or(anyhow::anyhow!("Not enough messages"))?; + if let Some((line, key)) = name_command_block_line.take() { + let name = m + .name + .as_ref() + .ok_or(anyhow::anyhow!("Name not found in message"))?; + let mut name = name.clone(); + if let Some(replacement) = replacement { + for (key, value) in replacement.map.iter() { + name = name.replace(key, value); + } + } + tree[line].set_attr(&key, name); + } + let mut text = m.message.to_owned(); + if let Some(replacement) = replacement { + for (key, value) in replacement.map.iter() { + text = text.replace(key, value); + } + } + let mess = XMLTextParser::new(&text, self.lf.clone()).parse()?; + let diff = mess.len() as isize - message_lines.len() as isize; + let common_lines = message_lines.len().min(mess.len()); + let mut last_index = message_lines.last().cloned().unwrap_or(0); + for j in 0..common_lines { + tree[message_lines[j]] = ParsedScriptNode::Line(mess[j].clone()); + } + for j in common_lines..message_lines.len() { + tree.remove(message_lines[j] - (j - common_lines)); + } + for i in common_lines..mess.len() { + let new_line = ParsedScriptNode::Line(mess[i].clone()); + if last_index < tree.len() { + tree.insert(last_index + 1, new_line); + last_index += 1; + } else { + tree.push(new_line); + } + } + i = (i as isize + diff) as usize; + } + message_lines.clear(); + is_end = false; + if cur_mes.is_none() { + cur_mes = mes.next(); + } + } + ParsedScriptNode::Line(line) => { + if !is_end { + message_lines.push(i); + is_end = line.last().map(|e| e.is_np()).unwrap_or(false); + } + } + ParsedScriptNode::Command(cmd) => { + if self.name_commands.contains(&cmd.name) { + for attr in &cmd.attributes { + if let TagAttr::Str(value) = &attr.1 { + if !value.is_empty() && !value.is_ascii() { + name_command_block_line = Some((i, attr.0.clone())); + break; // Only update the first name found + } + } + } + } else if self.message_commands.contains(&cmd.name) { + for attr in &cmd.attributes { + if let TagAttr::Str(value) = &attr.1 { + if !value.is_empty() && !value.is_ascii() { + let m = cur_mes + .take() + .ok_or(anyhow::anyhow!("Not enough messages"))?; + let mut text = m.message.clone(); + if let Some(replacement) = replacement { + for (key, value) in replacement.map.iter() { + text = text.replace(key, value); + } + } + tree[i].set_attr(&attr.0, text); + cur_mes = mes.next(); + break; // Only update the first message found + } } } } } + _ => {} } - _ => {} + i += 1; } - i += 1; - } - if !message_lines.is_empty() { - let m: &Message = cur_mes - .take() - .ok_or(anyhow::anyhow!("Not enough messages"))?; - if let Some((line, key)) = name_command_block_line.take() { - let name = m - .name - .as_ref() - .ok_or(anyhow::anyhow!("Name not found in message"))?; - let mut name = name.clone(); + if !message_lines.is_empty() { + let m: &Message = cur_mes + .take() + .ok_or(anyhow::anyhow!("Not enough messages"))?; + if let Some((line, key)) = name_command_block_line.take() { + let name = m + .name + .as_ref() + .ok_or(anyhow::anyhow!("Name not found in message"))?; + let mut name = name.clone(); + if let Some(replacement) = replacement { + for (key, value) in replacement.map.iter() { + name = name.replace(key, value); + } + } + tree[line].set_attr(&key, name); + } + let mut text = m.message.to_owned(); if let Some(replacement) = replacement { for (key, value) in replacement.map.iter() { - name = name.replace(key, value); + text = text.replace(key, value); } } - tree[line].set_attr(&key, name); - } - let mut text = m.message.to_owned(); - if let Some(replacement) = replacement { - for (key, value) in replacement.map.iter() { - text = text.replace(key, value); + let mess = XMLTextParser::new(&text, self.lf.clone()).parse()?; + let common_lines = message_lines.len().min(mess.len()); + let mut last_index = message_lines.last().cloned().unwrap_or(0); + for j in 0..common_lines { + tree[message_lines[j]] = ParsedScriptNode::Line(mess[j].clone()); } - } - let mess = XMLTextParser::new(&text).parse()?; - let common_lines = message_lines.len().min(mess.len()); - let mut last_index = message_lines.last().cloned().unwrap_or(0); - for j in 0..common_lines { - tree[message_lines[j]] = ParsedScriptNode::Line(mess[j].clone()); - } - for j in common_lines..message_lines.len() { - tree.remove(message_lines[j] - (j - common_lines)); - } - for i in common_lines..mess.len() { - let new_line = ParsedScriptNode::Line(mess[i].clone()); - if last_index < tree.len() { - tree.insert(last_index + 1, new_line); - last_index += 1; - } else { - tree.push(new_line); + for j in common_lines..message_lines.len() { + tree.remove(message_lines[j] - (j - common_lines)); + } + for i in common_lines..mess.len() { + let new_line = ParsedScriptNode::Line(mess[i].clone()); + if last_index < tree.len() { + tree.insert(last_index + 1, new_line); + last_index += 1; + } else { + tree.push(new_line); + } } } } diff --git a/src/types.rs b/src/types.rs index aca55f7..c2f49fe 100644 --- a/src/types.rs +++ b/src/types.rs @@ -616,6 +616,15 @@ pub struct ExtraConfig { #[cfg(feature = "emote-img")] /// Export Emote PIMG images as PSD files. pub emote_pimg_psd: bool, + #[cfg(feature = "kirikiri")] + /// Whether to only extract message between Talk and Hitret command in Kirikiri KS script. Auto detect if not specified. + pub kirikiri_ks_hitret: Option, + #[cfg(feature = "kirikiri")] + /// The line feed character used in Kirikiri KS script. + pub kirikiri_ks_lf: Option, + #[cfg(feature = "kirikiri")] + /// Kirikiri message tags, used to extract more message from ks script. + pub kirikiri_message_tags: std::sync::Arc>, } #[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]