diff --git a/src/args.rs b/src/args.rs index b7d6677..f4f7af1 100644 --- a/src/args.rs +++ b/src/args.rs @@ -669,11 +669,24 @@ pub fn get_artemis_panmimisoft_txt_blacklist_names( #[cfg(feature = "kirikiri")] pub fn load_kirikiri_chat_json( arg: &Arg, -) -> anyhow::Result>>> { +) -> anyhow::Result< + Option< + std::sync::Arc< + std::collections::HashMap>, + >, + >, +> { if let Some(path) = &arg.kirikiri_chat_json { - return Ok(Some(crate::scripts::kirikiri::read_kirikiri_comu_json( - path, - )?)); + return Ok(Some(std::sync::Arc::new( + crate::scripts::kirikiri::read_kirikiri_comu_json(path)? + .into_iter() + .map(|(k, v)| { + let v: std::collections::HashMap<_, _> = + v.into_iter().map(|(k, v)| (k, (v, 1))).collect(); + (k, v) + }) + .collect(), + ))); } if let Some(dir) = &arg.kirikiri_chat_dir { let mut outt = arg.output_type.unwrap_or(OutputScriptType::M3t); @@ -690,6 +703,7 @@ pub fn load_kirikiri_chat_json( let files = crate::utils::files::find_ext_files(dir, arg.recursive, &[outt.as_ref()])?; if !files.is_empty() { let mut map = std::collections::HashMap::new(); + let mut global = std::collections::HashMap::new(); for file in files { let f = crate::utils::files::read_file(&file)?; let data = crate::utils::encoding::decode_to_string( @@ -702,18 +716,40 @@ pub fn load_kirikiri_chat_json( &data, arg.llm_trans_mark.as_ref().map(|s| s.as_str()), ) - .parse_as_map()? + .parse_as_vec()? } else { crate::output_scripts::po::PoParser::new( &data, arg.llm_trans_mark.as_ref().map(|s| s.as_str()), ) - .parse_as_map()? + .parse_as_vec()? }; + let current_key = std::path::Path::new(&file) + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or("unknown") + .to_string(); + let mut entry = std::collections::HashMap::new(); for (k, v) in m3t { - map.insert(k.replace("\\[", "["), v.replace("\\[", "[")); + if v.is_empty() { + continue; + } + let k = k.replace("\\[", "["); + let v = v.replace("\\[", "["); + if let Some((_, count)) = entry.get_mut(&k) { + *count += 1; + } else { + entry.insert(k.clone(), (v.clone(), 1)); + } + if let Some((_, count)) = global.get_mut(&k) { + *count += 1; + } else { + global.insert(k, (v, 1)); + } } + map.insert(current_key, entry); } + map.insert("global".to_string(), global); return Ok(Some(std::sync::Arc::new(map))); } } diff --git a/src/output_scripts/m3t.rs b/src/output_scripts/m3t.rs index 523395e..30b03be 100644 --- a/src/output_scripts/m3t.rs +++ b/src/output_scripts/m3t.rs @@ -8,8 +8,6 @@ //! △ LLM message //! ● Translated message //! ``` -use std::collections::HashMap; - use crate::types::Message; use anyhow::Result; @@ -50,8 +48,8 @@ impl<'a> M3tParser<'a> { } } - pub fn parse_as_map(&mut self) -> Result> { - let mut map = HashMap::new(); + pub fn parse_as_vec(&mut self) -> Result> { + let mut map = Vec::new(); let mut ori = None; let mut llm = None; while let Some(line) = self.next_line() { @@ -103,7 +101,7 @@ impl<'a> M3tParser<'a> { tmp.replace("\\n", "\n") }; if let Some(ori) = ori.take() { - map.insert(ori, message); + map.push((ori, message)); } else { return Err(anyhow::anyhow!( "Missing original message before translated message at line {}", @@ -215,6 +213,6 @@ fn test_zero_width_space() { let mut parser = M3tParser::new(input, None); let messages = parser.parse().unwrap(); assert_eq!(messages.len(), 1); - let map = M3tParser::new(input, None).parse_as_map().unwrap(); + let map = M3tParser::new(input, None).parse_as_vec().unwrap(); assert_eq!(map.len(), 1); } diff --git a/src/output_scripts/po.rs b/src/output_scripts/po.rs index c6041eb..8d5aab4 100644 --- a/src/output_scripts/po.rs +++ b/src/output_scripts/po.rs @@ -589,8 +589,8 @@ impl<'a> PoParser<'a> { r } - pub fn parse_as_map(&mut self) -> Result> { - let mut map = HashMap::new(); + pub fn parse_as_vec(&mut self) -> Result> { + let mut map = Vec::new(); let mut llm = None; for (i, entry) in self.parse_entries()?.into_iter().enumerate() { if entry.msgid.is_empty() && i == 0 { @@ -637,7 +637,7 @@ impl<'a> PoParser<'a> { return Err(anyhow!("Plural msgstr not supported in this context")); } }; - map.insert(entry.msgid, message); + map.push((entry.msgid, message)); } Ok(map) } diff --git a/src/scripts/kirikiri/mod.rs b/src/scripts/kirikiri/mod.rs index bb1777e..294ef3a 100644 --- a/src/scripts/kirikiri/mod.rs +++ b/src/scripts/kirikiri/mod.rs @@ -8,11 +8,12 @@ pub mod simple_crypt; pub mod tjs2; pub mod tjs_ns0; use std::collections::HashMap; -use std::sync::Arc; /// Read a Kirikiri Comu JSON file. (For CIRCUS games) -pub fn read_kirikiri_comu_json(path: &str) -> anyhow::Result>> { +pub fn read_kirikiri_comu_json( + path: &str, +) -> anyhow::Result>> { let mut reader = std::fs::File::open(path)?; let data = serde_json::from_reader(&mut reader)?; - Ok(Arc::new(data)) + Ok(data) } diff --git a/src/scripts/kirikiri/scn.rs b/src/scripts/kirikiri/scn.rs index d89f905..81d1906 100644 --- a/src/scripts/kirikiri/scn.rs +++ b/src/scripts/kirikiri/scn.rs @@ -1,6 +1,7 @@ //! Kirikiri Scene File (.scn) use super::mdf::Mdf; use crate::ext::io::*; +use crate::ext::mutex::*; use crate::ext::psb::*; use crate::scripts::base::*; use crate::types::*; @@ -11,7 +12,7 @@ use fancy_regex::Regex; use std::collections::{HashMap, HashSet}; use std::io::{Read, Seek}; use std::path::Path; -use std::sync::Arc; +use std::sync::{Arc, Mutex}; #[derive(Debug)] /// Kirikiri Scene Script Builder @@ -114,7 +115,7 @@ pub struct ScnScript { export_chat: bool, filename: String, chat_key: Option, - chat_json: Option>>, + chat_json: Option>>>, custom_yaml: bool, title: bool, chat_multilang: bool, @@ -452,6 +453,7 @@ impl Script for ScnScript { } else { None }, + self.filename.clone(), ) }); for (i, scene) in scenes.members_mut().enumerate() { @@ -878,29 +880,89 @@ impl ExportMes { } } +lazy_static::lazy_static! { + static ref DUP_WARN_SHOWN: Mutex> = Mutex::new(HashSet::new()); + static ref NOT_FOUND_WARN_SHOWN: Mutex> = Mutex::new(HashSet::new()); +} + +fn warn_dup(original: String, count: usize, filename: String) { + let mut guard = DUP_WARN_SHOWN.lock_blocking(); + if guard.contains(&(original.clone(), count, filename.clone())) { + return; + } + eprintln!( + "Warning: chat message '{}' has {} duplicates in translation table '{}'. Using the first one.", + original, count, filename + ); + crate::COUNTER.inc_warning(); + guard.insert((original.clone(), count, filename.clone())); +} + +fn warn_not_found(original: String) { + let mut guard = NOT_FOUND_WARN_SHOWN.lock_blocking(); + if guard.contains(&original) { + return; + } + eprintln!( + "Warning: chat message '{}' not found in translation table.", + original + ); + crate::COUNTER.inc_warning(); + guard.insert(original); +} + #[derive(Debug)] struct ImportMes<'a> { - messages: &'a Arc>, + messages: &'a Arc>>, replacement: Option<&'a ReplacementTable>, key: String, text_key: String, + filename: String, } impl<'a> ImportMes<'a> { pub fn new( - messages: &'a Arc>, + messages: &'a Arc>>, replacement: Option<&'a ReplacementTable>, key: String, lang: Option, + filename: String, ) -> Self { Self { messages, replacement, key: key, text_key: lang.map_or_else(|| String::from("text"), |s| format!("text_{}", s)), + filename: std::path::Path::new(&filename) + .file_stem() + .map(|s| s.to_string_lossy().to_string()) + .unwrap_or_else(|| "global".to_string()), } } + fn get_message(&self, original: &str) -> Option { + if let Some(global) = self.messages.get(&self.filename) { + if let Some(text) = global.get(original) { + if text.1 > 1 { + warn_dup(original.to_string(), text.1, self.filename.clone()); + } + return Some(text.0.clone()); + } + } + if self.filename == "global" { + return None; + } + if let Some(file) = self.messages.get("global") { + if let Some(text) = file.get(original) { + if text.1 > 1 { + warn_dup(original.to_string(), text.1, "global".to_string()); + } + return Some(text.0.clone()); + } + } + None + } + pub fn import(&self, value: &mut PsbValueFixed) { match value { PsbValueFixed::Object(obj) => { @@ -908,7 +970,7 @@ impl<'a> ImportMes<'a> { if k == &self.key { for obj in v.members_mut() { if let Some(text) = obj[&self.text_key].as_str() { - if let Some(replace_text) = self.messages.get(text) { + if let Some(replace_text) = self.get_message(text) { let mut text = replace_text.clone(); if let Some(replacement) = self.replacement { for (key, value) in replacement.map.iter() { @@ -918,15 +980,11 @@ impl<'a> ImportMes<'a> { obj[&self.text_key].set_string(text.replace("\n", "\\n")); continue; } else { - eprintln!( - "Warning: chat message '{}' not found in translation table.", - text - ); - crate::COUNTER.inc_warning(); + warn_not_found(text.to_string()); } } if let Some(text) = obj["text"].as_str() { - if let Some(replace_text) = self.messages.get(text) { + if let Some(replace_text) = self.get_message(text) { let mut text = replace_text.clone(); if let Some(replacement) = self.replacement { for (key, value) in replacement.map.iter() { @@ -935,11 +993,7 @@ impl<'a> ImportMes<'a> { } obj[&self.text_key].set_string(text.replace("\n", "\\n")); } else { - eprintln!( - "Warning: chat message '{}' not found in translation table.", - text - ); - crate::COUNTER.inc_warning(); + warn_not_found(text.to_string()); } } } @@ -954,7 +1008,7 @@ impl<'a> ImportMes<'a> { for i in 1..list.len() { if list[i - 1] == self.text_key { if let Some(text) = list[i].as_str() { - if let Some(replace_text) = self.messages.get(text) { + if let Some(replace_text) = self.get_message(text) { let mut text = replace_text.clone(); if let Some(replacement) = self.replacement { for (key, value) in replacement.map.iter() { @@ -964,11 +1018,7 @@ impl<'a> ImportMes<'a> { list[i].set_string(text.replace("\n", "\\n")); return; } else { - eprintln!( - "Warning: chat message '{}' not found in translation table.", - text - ); - crate::COUNTER.inc_warning(); + warn_not_found(text.to_string()); } } } @@ -979,7 +1029,7 @@ impl<'a> ImportMes<'a> { for i in 1..list.len() { if list[i - 1] == "text" { if let Some(text) = list[i].as_str() { - if let Some(replace_text) = self.messages.get(text) { + if let Some(replace_text) = self.get_message(text) { let mut text = replace_text.clone(); if let Some(replacement) = self.replacement { for (key, value) in replacement.map.iter() { @@ -991,11 +1041,7 @@ impl<'a> ImportMes<'a> { list[len + 1].set_string(text.replace("\n", "\\n")); return; } else { - eprintln!( - "Warning: chat message '{}' not found in translation table.", - text - ); - crate::COUNTER.inc_warning(); + warn_not_found(text.to_string()); } } } diff --git a/src/types.rs b/src/types.rs index d7b8333..d684139 100644 --- a/src/types.rs +++ b/src/types.rs @@ -304,8 +304,10 @@ pub struct ExtraConfig { /// If not specified, "comumode" will be used. pub kirikiri_chat_key: Option, #[cfg(feature = "kirikiri")] - /// Kirikiri chat message translation. key is original text, value is translated text. - pub kirikiri_chat_json: Option>>, + /// Kirikiri chat message translation. The outter object's key is filename(`global` is a special key). + /// The inner object: key is original text, value is (translated text, original text count). + pub kirikiri_chat_json: + Option>>>, #[cfg(feature = "kirikiri")] /// Kirikiri language list. First language code is code for language index 1. pub kirikiri_languages: Option>>,