mirror of
https://github.com/lifegpc/msg-tool.git
synced 2026-06-06 21:08:48 +08:00
Add kirikiri ks script extract support
This commit is contained in:
53
Cargo.lock
generated
53
Cargo.lock
generated
@@ -14,6 +14,15 @@ version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "0.6.18"
|
||||
@@ -70,6 +79,21 @@ version = "1.0.98"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
|
||||
|
||||
[[package]]
|
||||
name = "bit-set"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3"
|
||||
dependencies = [
|
||||
"bit-vec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bit-vec"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.3.2"
|
||||
@@ -291,6 +315,17 @@ dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fancy-regex"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6e24cb5a94bcae1e5408b0effca5cd7172ea3c5755049c5f3af4cd283a165298"
|
||||
dependencies = [
|
||||
"bit-set",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fdeflate"
|
||||
version = "0.3.7"
|
||||
@@ -418,6 +453,7 @@ dependencies = [
|
||||
"csv",
|
||||
"emote-psb",
|
||||
"encoding_rs",
|
||||
"fancy-regex",
|
||||
"flate2",
|
||||
"int-enum",
|
||||
"lazy_static",
|
||||
@@ -544,6 +580,23 @@ dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.4.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.20"
|
||||
|
||||
@@ -10,6 +10,7 @@ clap = { version = "4.5", features = ["derive"] }
|
||||
csv = "1.3"
|
||||
emote-psb = { version = "0.5", optional = true }
|
||||
encoding_rs = "0.8"
|
||||
fancy-regex = { version = "0.14", optional = true }
|
||||
flate2 = { version = "1.1", optional = true }
|
||||
int-enum = { version = "1.2", optional = true }
|
||||
lazy_static = "1.5.0"
|
||||
@@ -33,7 +34,7 @@ cat-system-img = ["cat-system", "flate2", "image", "utils-bit-stream"]
|
||||
circus = []
|
||||
escude = ["int-enum"]
|
||||
escude-arc = ["escude", "rand", "utils-bit-stream"]
|
||||
kirikiri = ["emote-psb", "flate2"]
|
||||
kirikiri = ["emote-psb", "fancy-regex", "flate2", "utils-escape"]
|
||||
yaneurao = []
|
||||
yaneurao-itufuru = ["yaneurao"]
|
||||
# basic feature
|
||||
@@ -41,6 +42,7 @@ image = ["png"]
|
||||
# utils feature
|
||||
utils-bit-stream = []
|
||||
utils-crc32 = []
|
||||
utils-escape = ["fancy-regex"]
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
windows-sys = { version = "0", features = ["Win32_Globalization", "Win32_System_Diagnostics_Debug"] }
|
||||
|
||||
22
src/args.rs
22
src/args.rs
@@ -123,6 +123,28 @@ pub struct Arg {
|
||||
#[arg(long, global = true)]
|
||||
/// Kirikiri COMU message translation file. (Map<String, String>, key is original text, value is translated text.)
|
||||
pub kirikiri_comumode_json: Option<String>,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
#[arg(long, global = true, action = ArgAction::SetTrue, alias = "kr-no-empty-lines", alias = "kirikiri-no-empty-lines")]
|
||||
/// Remove empty lines in Kirikiri KS script.
|
||||
pub kirikiri_remove_empty_lines: bool,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
#[arg(
|
||||
long,
|
||||
global = true,
|
||||
value_delimiter = ',',
|
||||
default_value = "nm,set_title,speaker,Talk,talk,cn,name,名前"
|
||||
)]
|
||||
/// Kirikiri name commands, used to extract names from ks script.
|
||||
pub kirikiri_name_commands: Vec<String>,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
#[arg(
|
||||
long,
|
||||
global = true,
|
||||
value_delimiter = ',',
|
||||
default_value = "sel01,sel02,sel03,sel04,AddSelect,ruby,exlink,e_xlink"
|
||||
)]
|
||||
/// Kirikiri message commands, used to extract more message from ks script.
|
||||
pub kirikiri_message_commands: Vec<String>,
|
||||
#[command(subcommand)]
|
||||
/// Command
|
||||
pub command: Command,
|
||||
|
||||
10
src/main.rs
10
src/main.rs
@@ -1365,6 +1365,16 @@ fn main() {
|
||||
.kirikiri_comumode_json
|
||||
.as_ref()
|
||||
.map(|s| scripts::kirikiri::read_kirikiri_comu_json(s).unwrap()),
|
||||
#[cfg(feature = "kirikiri")]
|
||||
kirikiri_remove_empty_lines: arg.kirikiri_remove_empty_lines,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
kirikiri_name_commands: std::sync::Arc::new(std::collections::HashSet::from_iter(
|
||||
arg.kirikiri_name_commands.iter().cloned(),
|
||||
)),
|
||||
#[cfg(feature = "kirikiri")]
|
||||
kirikiri_message_commands: std::sync::Arc::new(std::collections::HashSet::from_iter(
|
||||
arg.kirikiri_message_commands.iter().cloned(),
|
||||
)),
|
||||
};
|
||||
match &arg.command {
|
||||
args::Command::Export { input, output } => {
|
||||
|
||||
551
src/scripts/kirikiri/ks.rs
Normal file
551
src/scripts/kirikiri/ks.rs
Normal file
@@ -0,0 +1,551 @@
|
||||
use crate::scripts::base::*;
|
||||
use crate::types::*;
|
||||
use crate::utils::encoding::*;
|
||||
use crate::utils::escape::*;
|
||||
use anyhow::Result;
|
||||
use fancy_regex::Regex;
|
||||
use std::collections::HashSet;
|
||||
use std::io::Write;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::sync::Arc;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct KsBuilder {}
|
||||
|
||||
impl KsBuilder {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScriptBuilder for KsBuilder {
|
||||
fn default_encoding(&self) -> Encoding {
|
||||
Encoding::Cp932
|
||||
}
|
||||
|
||||
fn build_script(
|
||||
&self,
|
||||
buf: Vec<u8>,
|
||||
_filename: &str,
|
||||
encoding: Encoding,
|
||||
_archive_encoding: Encoding,
|
||||
config: &ExtraConfig,
|
||||
) -> Result<Box<dyn Script>> {
|
||||
Ok(Box::new(KsScript::new(buf, encoding, config)?))
|
||||
}
|
||||
|
||||
fn extensions(&self) -> &'static [&'static str] {
|
||||
&["ks"]
|
||||
}
|
||||
|
||||
fn script_type(&self) -> &'static ScriptType {
|
||||
&ScriptType::Kirikiri
|
||||
}
|
||||
}
|
||||
|
||||
trait Node {
|
||||
fn serialize(&self) -> String;
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct CommentNode(String);
|
||||
|
||||
impl Node for CommentNode {
|
||||
fn serialize(&self) -> String {
|
||||
format!("; {}", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct LabelNode {
|
||||
name: String,
|
||||
page: Option<String>,
|
||||
}
|
||||
|
||||
impl Node for LabelNode {
|
||||
fn serialize(&self) -> String {
|
||||
if let Some(page) = &self.page {
|
||||
format!("*{}|{}", self.name, page)
|
||||
} else {
|
||||
format!("*{}", self.name)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct TextNode(String);
|
||||
|
||||
impl Node for TextNode {
|
||||
fn serialize(&self) -> String {
|
||||
// In KAG, [ is escaped as [[
|
||||
self.0.replace("[", "[[")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct EmptyLineNode;
|
||||
|
||||
impl Node for EmptyLineNode {
|
||||
fn serialize(&self) -> String {
|
||||
String::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum TagAttr {
|
||||
True,
|
||||
Str(String),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct TagNode {
|
||||
name: String,
|
||||
attributes: Vec<(String, TagAttr)>,
|
||||
}
|
||||
|
||||
impl TagNode {
|
||||
fn serialize_attributes(&self) -> String {
|
||||
let mut parts = Vec::new();
|
||||
for (key, value) in self.attributes.iter() {
|
||||
match value {
|
||||
TagAttr::True => {
|
||||
parts.push(key.clone());
|
||||
}
|
||||
TagAttr::Str(val) => {
|
||||
if val.contains(" ") || val.contains("=") {
|
||||
parts.push(format!("{}=\"{}\"", key, val));
|
||||
} else {
|
||||
parts.push(format!("{}={}", key, val));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
parts.join(" ")
|
||||
}
|
||||
|
||||
fn ser_attributes_xml(&self) -> String {
|
||||
let mut parts = Vec::new();
|
||||
for (key, value) in self.attributes.iter() {
|
||||
match value {
|
||||
TagAttr::True => {
|
||||
parts.push(key.clone());
|
||||
}
|
||||
TagAttr::Str(val) => {
|
||||
parts.push(format!("{}=\"{}\"", key, escape_xml_attr_value(val)));
|
||||
}
|
||||
}
|
||||
}
|
||||
parts.join(" ")
|
||||
}
|
||||
|
||||
fn to_xml_tag(&self) -> String {
|
||||
let attr_str = self.ser_attributes_xml();
|
||||
if attr_str.is_empty() {
|
||||
format!("<{}>", self.name)
|
||||
} else {
|
||||
format!("<{} {}>", self.name, attr_str)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Node for TagNode {
|
||||
fn serialize(&self) -> String {
|
||||
let attr_str = self.serialize_attributes();
|
||||
if attr_str.is_empty() {
|
||||
format!("[{}]", self.name)
|
||||
} else {
|
||||
format!("[{} {}]", self.name, attr_str)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct CommandNode {
|
||||
inner: TagNode,
|
||||
}
|
||||
|
||||
impl Deref for CommandNode {
|
||||
type Target = TagNode;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for CommandNode {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for CommandNode {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("CommandNode")
|
||||
.field("name", &self.inner.name)
|
||||
.field("attributes", &self.inner.attributes)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl Node for CommandNode {
|
||||
fn serialize(&self) -> String {
|
||||
let attr_str = self.inner.serialize_attributes();
|
||||
if attr_str.is_empty() {
|
||||
format!("@{}", self.inner.name)
|
||||
} else {
|
||||
format!("@{} {}", self.inner.name, attr_str)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct ScriptBlockNode(String);
|
||||
|
||||
impl Node for ScriptBlockNode {
|
||||
fn serialize(&self) -> String {
|
||||
format!("[iscript]\n{}\n[endscript]", self.0)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum ParsedLineNode {
|
||||
Text(TextNode),
|
||||
Tag(TagNode),
|
||||
}
|
||||
|
||||
impl ParsedLineNode {
|
||||
fn to_xml(&self) -> String {
|
||||
match self {
|
||||
ParsedLineNode::Text(text_node) => escape_xml_text_value(&text_node.0),
|
||||
ParsedLineNode::Tag(tag_node) => {
|
||||
if tag_node.name == "r" && tag_node.attributes.is_empty() {
|
||||
"\n".to_string()
|
||||
} else {
|
||||
tag_node.to_xml_tag()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Node for ParsedLineNode {
|
||||
fn serialize(&self) -> String {
|
||||
match self {
|
||||
ParsedLineNode::Text(text_node) => text_node.serialize(),
|
||||
ParsedLineNode::Tag(tag_node) => tag_node.serialize(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct ParsedLine(Vec<ParsedLineNode>);
|
||||
|
||||
impl ParsedLine {
|
||||
fn to_xml(&self) -> String {
|
||||
let mut s = String::new();
|
||||
for node in &self.0 {
|
||||
s.push_str(&node.to_xml());
|
||||
}
|
||||
s
|
||||
}
|
||||
}
|
||||
|
||||
impl Node for ParsedLine {
|
||||
fn serialize(&self) -> String {
|
||||
self.0
|
||||
.iter()
|
||||
.map(|node| node.serialize())
|
||||
.collect::<Vec<_>>()
|
||||
.join("")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum ParsedScriptNode {
|
||||
Comment(CommentNode),
|
||||
Label(LabelNode),
|
||||
Command(CommandNode),
|
||||
ScriptBlock(ScriptBlockNode),
|
||||
Line(ParsedLine),
|
||||
EmptyLine(EmptyLineNode),
|
||||
}
|
||||
|
||||
impl Node for ParsedScriptNode {
|
||||
fn serialize(&self) -> String {
|
||||
match self {
|
||||
ParsedScriptNode::Comment(comment) => comment.serialize(),
|
||||
ParsedScriptNode::Label(label) => label.serialize(),
|
||||
ParsedScriptNode::Command(command) => command.serialize(),
|
||||
ParsedScriptNode::ScriptBlock(script_block) => script_block.serialize(),
|
||||
ParsedScriptNode::Line(line) => line.serialize(),
|
||||
ParsedScriptNode::EmptyLine(empty_line) => empty_line.serialize(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct ParsedScript(Vec<ParsedScriptNode>);
|
||||
|
||||
impl ParsedScript {
|
||||
fn iter(&self) -> impl Iterator<Item = &ParsedScriptNode> {
|
||||
self.0.iter()
|
||||
}
|
||||
|
||||
fn iter_mut(&mut self) -> impl Iterator<Item = &mut ParsedScriptNode> {
|
||||
self.0.iter_mut()
|
||||
}
|
||||
}
|
||||
|
||||
impl Node for ParsedScript {
|
||||
fn serialize(&self) -> String {
|
||||
self.0
|
||||
.iter()
|
||||
.map(|node| node.serialize())
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref LINE_SPLIT_RE: Regex = Regex::new(r"(\[.*?\])").unwrap();
|
||||
static ref ATTR_RE: Regex = Regex::new("([a-zA-Z0-9_]+)(?:=(\"[^\"]*\" |'[^']*' |[^\\s\\]]+))?").unwrap();
|
||||
}
|
||||
|
||||
struct Parser {
|
||||
lines: Vec<String>,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
pub fn new(script: &str) -> Self {
|
||||
let lines = script.lines().map(|s| s.to_string()).collect();
|
||||
Self { lines }
|
||||
}
|
||||
|
||||
fn parse_attributes(attr_str: &str) -> Result<Vec<(String, TagAttr)>> {
|
||||
let mut attributes = Vec::new();
|
||||
for cap in ATTR_RE.captures_iter(attr_str) {
|
||||
let cap = cap?;
|
||||
let key = cap
|
||||
.get(1)
|
||||
.ok_or(anyhow::anyhow!("Invalid attribute key"))?
|
||||
.as_str()
|
||||
.to_string();
|
||||
let value = cap
|
||||
.get(2)
|
||||
.map(|v| {
|
||||
let mut s = v.as_str().to_string();
|
||||
if s.starts_with("\"") && s.ends_with("\"") {
|
||||
s = s[1..s.len() - 1].to_string();
|
||||
} else if s.starts_with("'") && s.ends_with("'") {
|
||||
s = s[1..s.len() - 1].to_string();
|
||||
}
|
||||
s = s.replace("`", "");
|
||||
TagAttr::Str(s)
|
||||
})
|
||||
.unwrap_or(TagAttr::True);
|
||||
attributes.push((key, value));
|
||||
}
|
||||
Ok(attributes)
|
||||
}
|
||||
|
||||
fn parse_tag_or_command(content: &str) -> Result<TagNode> {
|
||||
let parts = content.trim().split_ascii_whitespace().collect::<Vec<_>>();
|
||||
let tag_name = parts[0].to_string();
|
||||
let attr_string = parts[1..].join(" ");
|
||||
let attrs = Self::parse_attributes(&attr_string)?;
|
||||
Ok(TagNode {
|
||||
name: tag_name,
|
||||
attributes: attrs,
|
||||
})
|
||||
}
|
||||
|
||||
fn parse(&self, preserve_empty_lines: bool) -> Result<ParsedScript> {
|
||||
let mut parsed_scripts = Vec::new();
|
||||
let mut in_script_block = false;
|
||||
let mut script_buffer = Vec::new();
|
||||
let mut i = 0;
|
||||
let line_count = self.lines.len();
|
||||
while i < line_count {
|
||||
let line = self.lines[i].trim();
|
||||
i += 1;
|
||||
if line.is_empty() {
|
||||
if preserve_empty_lines {
|
||||
parsed_scripts.push(ParsedScriptNode::EmptyLine(EmptyLineNode));
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if in_script_block {
|
||||
if line == "[endscript]" {
|
||||
in_script_block = false;
|
||||
parsed_scripts.push(ParsedScriptNode::ScriptBlock(ScriptBlockNode(
|
||||
script_buffer.join("\n"),
|
||||
)));
|
||||
script_buffer.clear();
|
||||
} else {
|
||||
script_buffer.push(line.to_string());
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if line == "[iscript]" {
|
||||
in_script_block = true;
|
||||
continue;
|
||||
}
|
||||
if line.starts_with(";") {
|
||||
parsed_scripts.push(ParsedScriptNode::Comment(CommentNode(
|
||||
line[1..].trim().to_string(),
|
||||
)));
|
||||
continue;
|
||||
}
|
||||
if line.starts_with("*") {
|
||||
let parts: Vec<&str> = line.split('|').collect();
|
||||
let label_name = parts[0][1..].trim().to_string();
|
||||
let page = if parts.len() > 1 {
|
||||
Some(parts[1..].join("|"))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
parsed_scripts.push(ParsedScriptNode::Label(LabelNode {
|
||||
name: label_name,
|
||||
page,
|
||||
}));
|
||||
continue;
|
||||
}
|
||||
if line.starts_with("@") {
|
||||
let content = &line[1..];
|
||||
let tag_node = Self::parse_tag_or_command(content)?;
|
||||
parsed_scripts.push(ParsedScriptNode::Command(CommandNode { inner: tag_node }));
|
||||
continue;
|
||||
}
|
||||
let mut full_line = line.to_string();
|
||||
while full_line.ends_with("\\") {
|
||||
full_line.pop(); // Remove the trailing backslash
|
||||
full_line = full_line.trim_end().to_string();
|
||||
if i < line_count {
|
||||
full_line.push(' ');
|
||||
full_line.push_str(&self.lines[i].trim());
|
||||
i += 1;
|
||||
} else {
|
||||
break; // No more lines to append
|
||||
}
|
||||
}
|
||||
let mut parsed_line_nodes = Vec::new();
|
||||
for part in LINE_SPLIT_RE.split(&full_line) {
|
||||
let part = part?;
|
||||
if part.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if part.starts_with("[") && part.ends_with("]") {
|
||||
if part == "[[r]]" {
|
||||
parsed_line_nodes.push(ParsedLineNode::Text(TextNode("[r]".to_string())));
|
||||
} else if part == "[[[[" {
|
||||
parsed_line_nodes.push(ParsedLineNode::Text(TextNode("[[".to_string())));
|
||||
} else if part.starts_with("[[") {
|
||||
parsed_line_nodes
|
||||
.push(ParsedLineNode::Text(TextNode(part[1..].to_string())))
|
||||
} else {
|
||||
parsed_line_nodes.push(ParsedLineNode::Tag(Self::parse_tag_or_command(
|
||||
&part[1..part.len() - 1],
|
||||
)?));
|
||||
}
|
||||
}
|
||||
}
|
||||
if !parsed_line_nodes.is_empty() {
|
||||
parsed_scripts.push(ParsedScriptNode::Line(ParsedLine(parsed_line_nodes)));
|
||||
}
|
||||
}
|
||||
Ok(ParsedScript(parsed_scripts))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct KsScript {
|
||||
bom: BomType,
|
||||
tree: ParsedScript,
|
||||
name_commands: Arc<HashSet<String>>,
|
||||
message_commands: Arc<HashSet<String>>,
|
||||
}
|
||||
|
||||
impl KsScript {
|
||||
pub fn new(reader: Vec<u8>, encoding: Encoding, config: &ExtraConfig) -> Result<Self> {
|
||||
let (text, bom) = decode_with_bom_detect(encoding, &reader)?;
|
||||
let parser = Parser::new(&text);
|
||||
let tree = parser.parse(!config.kirikiri_remove_empty_lines)?;
|
||||
Ok(Self {
|
||||
bom,
|
||||
tree,
|
||||
name_commands: config.kirikiri_name_commands.clone(),
|
||||
message_commands: config.kirikiri_message_commands.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Script for KsScript {
|
||||
fn default_output_script_type(&self) -> OutputScriptType {
|
||||
OutputScriptType::Json
|
||||
}
|
||||
|
||||
fn default_format_type(&self) -> FormatOptions {
|
||||
FormatOptions::None
|
||||
}
|
||||
|
||||
fn extract_messages(&self) -> Result<Vec<Message>> {
|
||||
let mut messages = Vec::new();
|
||||
let mut name = None;
|
||||
for obj in self.tree.iter() {
|
||||
match obj {
|
||||
ParsedScriptNode::Line(line) => messages.push(Message {
|
||||
name: name.take(),
|
||||
message: line.to_xml(),
|
||||
}),
|
||||
ParsedScriptNode::Command(cmd) => {
|
||||
if self.name_commands.contains(&cmd.name) {
|
||||
for attr in &cmd.attributes {
|
||||
if let TagAttr::Str(value) = &attr.1 {
|
||||
if !value.is_empty() && !value.is_ascii() {
|
||||
name = Some(value.clone());
|
||||
break; // Only take the first name found
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if self.message_commands.contains(&cmd.name) {
|
||||
for attr in &cmd.attributes {
|
||||
if let TagAttr::Str(value) = &attr.1 {
|
||||
if !value.is_empty() && !value.is_ascii() {
|
||||
messages.push(Message {
|
||||
name: name.take(),
|
||||
message: value.clone(),
|
||||
});
|
||||
break; // Only take the first message found
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
Ok(messages)
|
||||
}
|
||||
|
||||
fn import_messages<'a>(
|
||||
&'a self,
|
||||
messages: Vec<Message>,
|
||||
mut file: Box<dyn WriteSeek + 'a>,
|
||||
encoding: Encoding,
|
||||
_replacement: Option<&'a ReplacementTable>,
|
||||
) -> Result<()> {
|
||||
let mut mes = messages.iter();
|
||||
let mut _cur_mes = mes.next();
|
||||
let mut tree = self.tree.clone();
|
||||
for obj in tree.iter_mut() {
|
||||
match obj {
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
let s = tree.serialize();
|
||||
let data = encode_string_with_bom(encoding, &s, false, self.bom)?;
|
||||
file.write_all(&data)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod ks;
|
||||
pub mod scn;
|
||||
pub mod simple_crypt;
|
||||
use std::collections::HashMap;
|
||||
|
||||
10
src/types.rs
10
src/types.rs
@@ -213,6 +213,12 @@ pub struct ExtraConfig {
|
||||
pub kirikiri_export_comumode: bool,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
pub kirikiri_comumode_json: Option<std::sync::Arc<HashMap<String, String>>>,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
pub kirikiri_remove_empty_lines: bool,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
pub kirikiri_name_commands: std::sync::Arc<std::collections::HashSet<String>>,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
pub kirikiri_message_commands: std::sync::Arc<std::collections::HashSet<String>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]
|
||||
@@ -276,6 +282,10 @@ pub enum ScriptType {
|
||||
#[value(alias("kr-simple-crypt"))]
|
||||
/// Kirikiri SimpleCrypt's text file
|
||||
KirikiriSimpleCrypt,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
#[value(alias = "kr", alias = "kr-ks", alias = "kirikiri-ks")]
|
||||
/// Kirikiri script
|
||||
Kirikiri,
|
||||
#[cfg(feature = "yaneurao-itufuru")]
|
||||
#[value(alias("itufuru"))]
|
||||
/// Yaneurao Itufuru script
|
||||
|
||||
65
src/utils/escape.rs
Normal file
65
src/utils/escape.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
use fancy_regex::Regex;
|
||||
|
||||
pub fn escape_xml_attr_value(s: &str) -> String {
|
||||
let mut escaped = String::with_capacity(s.len());
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'&' => escaped.push_str("&"),
|
||||
'<' => escaped.push_str("<"),
|
||||
'"' => escaped.push_str("""),
|
||||
'\'' => escaped.push_str("'"),
|
||||
_ => escaped.push(c),
|
||||
}
|
||||
}
|
||||
escaped
|
||||
}
|
||||
|
||||
pub fn escape_xml_text_value(s: &str) -> String {
|
||||
let mut escaped = String::with_capacity(s.len());
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'&' => escaped.push_str("&"),
|
||||
'<' => escaped.push_str("<"),
|
||||
'>' => escaped.push_str(">"),
|
||||
'"' => escaped.push_str("""),
|
||||
'\'' => escaped.push_str("'"),
|
||||
_ => escaped.push(c),
|
||||
}
|
||||
}
|
||||
escaped
|
||||
}
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref XML_NCR_BASE10_REGEX: Regex = Regex::new(r"&#(\d+);").unwrap();
|
||||
static ref XML_NCR_BASE16_REGEX: Regex = Regex::new(r"&#x([0-9a-fA-F]+);").unwrap();
|
||||
}
|
||||
|
||||
pub fn unescape_xml(s: &str) -> String {
|
||||
let mut s = s.to_owned();
|
||||
s = XML_NCR_BASE10_REGEX
|
||||
.replace_all(&s, |caps: &fancy_regex::Captures| {
|
||||
let codepoint = caps[1].parse::<u32>().unwrap_or(0);
|
||||
char::from_u32(codepoint).map_or("�".to_string(), |c| c.to_string())
|
||||
})
|
||||
.to_string();
|
||||
s = XML_NCR_BASE16_REGEX
|
||||
.replace_all(&s, |caps: &fancy_regex::Captures| {
|
||||
let codepoint = u32::from_str_radix(&caps[1], 16).unwrap_or(0);
|
||||
char::from_u32(codepoint).map_or("�".to_string(), |c| c.to_string())
|
||||
})
|
||||
.to_string();
|
||||
s.replace("&", "&")
|
||||
.replace("<", "<")
|
||||
.replace(">", ">")
|
||||
.replace(""", "\"")
|
||||
.replace("'", "'")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unescape_xml() {
|
||||
assert_eq!(
|
||||
unescape_xml("Hello &amp; World <script>alert('XSS')</script>"),
|
||||
"Hello & World <script>alert('XSS')</script>"
|
||||
);
|
||||
assert_eq!(unescape_xml("你TEST "), "你TEST ");
|
||||
}
|
||||
@@ -6,6 +6,8 @@ pub mod crc32;
|
||||
pub mod encoding;
|
||||
#[cfg(windows)]
|
||||
mod encoding_win;
|
||||
#[cfg(feature = "utils-escape")]
|
||||
pub mod escape;
|
||||
pub mod files;
|
||||
#[cfg(feature = "image")]
|
||||
pub mod img;
|
||||
|
||||
Reference in New Issue
Block a user