From 61b183d9402c52d4af9226bdaa575b19d04cdc77 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Wed, 3 Sep 2025 23:33:42 +0800 Subject: [PATCH] Add load m3t dir support --- src/args.rs | 46 +++++++++++++++++++++++++ src/main.rs | 6 ++-- src/output_scripts/m3t.rs | 71 +++++++++++++++++++++++++++++++++++++++ src/utils/files.rs | 57 +++++++++++++++++++++++++++++++ 4 files changed, 176 insertions(+), 4 deletions(-) diff --git a/src/args.rs b/src/args.rs index 14e4ece..68e923a 100644 --- a/src/args.rs +++ b/src/args.rs @@ -203,6 +203,10 @@ pub struct Arg { /// Kirikiri chat message translation file. (Map, key is original text, value is translated text.) pub kirikiri_chat_json: Option, #[cfg(feature = "kirikiri")] + #[arg(long, global = true, group = "kirikiri_chat_jsong")] + /// Kirikiri chat message translation directory. All json files in this directory will be merged. (Only m3t files are supported.) + pub kirikiri_chat_dir: Option, + #[cfg(feature = "kirikiri")] #[arg(long, global = true, action = ArgAction::SetTrue, alias = "kr-no-empty-lines", alias = "kirikiri-no-empty-lines")] /// Remove empty lines in Kirikiri KS script. pub kirikiri_remove_empty_lines: bool, @@ -571,3 +575,45 @@ pub fn get_artemis_panmimisoft_txt_blacklist_names( .collect()), } } + +#[cfg(feature = "kirikiri")] +pub fn load_kirikiri_chat_json( + arg: &Arg, +) -> anyhow::Result>>> { + if let Some(path) = &arg.kirikiri_chat_json { + return Ok(Some(crate::scripts::kirikiri::read_kirikiri_comu_json( + path, + )?)); + } + if let Some(dir) = &arg.kirikiri_chat_dir { + let mut outt = arg.output_type.unwrap_or(OutputScriptType::M3t); + if !matches!( + outt, + OutputScriptType::M3t | OutputScriptType::M3ta | OutputScriptType::M3tTxt + ) { + outt = OutputScriptType::M3t; + } + let files = crate::utils::files::find_ext_files(dir, arg.recursive, &[outt.as_ref()])?; + if !files.is_empty() { + let mut map = std::collections::HashMap::new(); + for file in files { + let f = crate::utils::files::read_file(&file)?; + let data = crate::utils::encoding::decode_to_string( + crate::get_output_encoding(arg), + &f, + true, + )?; + let m3t = crate::output_scripts::m3t::M3tParser::new( + &data, + arg.llm_trans_mark.as_ref().map(|s| s.as_str()), + ) + .parse_as_map()?; + for (k, v) in m3t { + map.insert(k, v); + } + } + return Ok(Some(std::sync::Arc::new(map))); + } + } + Ok(None) +} diff --git a/src/main.rs b/src/main.rs index ad2c45a..32ac754 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1748,10 +1748,8 @@ fn main() { #[cfg(feature = "kirikiri")] kirikiri_chat_key: arg.kirikiri_chat_key.clone(), #[cfg(feature = "kirikiri")] - kirikiri_chat_json: arg - .kirikiri_chat_json - .as_ref() - .map(|s| scripts::kirikiri::read_kirikiri_comu_json(s).unwrap()), + kirikiri_chat_json: args::load_kirikiri_chat_json(&arg) + .expect("Failed to load Kirikiri chat JSON"), #[cfg(feature = "kirikiri")] kirikiri_remove_empty_lines: arg.kirikiri_remove_empty_lines, #[cfg(feature = "kirikiri")] diff --git a/src/output_scripts/m3t.rs b/src/output_scripts/m3t.rs index a3f0016..74a4f43 100644 --- a/src/output_scripts/m3t.rs +++ b/src/output_scripts/m3t.rs @@ -8,6 +8,8 @@ //! △ LLM message //! ● Translated message //! ``` +use std::collections::HashMap; + use crate::types::Message; use anyhow::Result; @@ -48,6 +50,75 @@ impl<'a> M3tParser<'a> { } } + pub fn parse_as_map(&mut self) -> Result> { + let mut map = HashMap::new(); + let mut ori = None; + let mut llm = None; + while let Some(line) = self.next_line() { + if line.is_empty() { + continue; + } + if line.starts_with("○") { + let line = line[3..].trim(); + if !line.starts_with("NAME:") { + ori = Some(line.to_string()); + } + } else if line.starts_with("△") { + let line = line[3..].trim(); + llm = Some(line); + } else if line.starts_with("●") { + let message = line[3..].trim(); + let message = if message + .trim_start_matches("「") + .trim_end_matches("」") + .is_empty() + { + llm.take() + .map(|s| { + let mut s = s.to_string(); + if let Some(mark) = self.llm_mark { + s.push_str(mark); + } + s + }) + .unwrap_or_else(|| { + String::from(if message.starts_with("「") { + "「」" + } else { + "" + }) + }) + .replace("\\n", "\n") + } else { + let mut tmp = message.replace("\\n", "\n"); + if let Some(llm) = llm.take() { + if tmp == llm { + if let Some(mark) = self.llm_mark { + tmp.push_str(mark); + } + } + } + tmp + }; + if let Some(ori) = ori.take() { + map.insert(ori, message); + } else { + return Err(anyhow::anyhow!( + "Missing original message before translated message at line {}", + self.line + )); + } + } else { + return Err(anyhow::anyhow!( + "Invalid line format at line {}: {}", + self.line, + line + )); + } + } + Ok(map) + } + /// Parses the M3T format and returns a vector of messages. pub fn parse(&mut self) -> Result> { let mut messages = Vec::new(); diff --git a/src/utils/files.rs b/src/utils/files.rs index a9010c8..756215a 100644 --- a/src/utils/files.rs +++ b/src/utils/files.rs @@ -139,6 +139,63 @@ pub fn collect_files( )) } +/// Finds all files with specific extensions in the specified directory and its subdirectories. +pub fn find_ext_files(path: &str, recursive: bool, exts: &[&str]) -> io::Result> { + let mut result = Vec::new(); + let dir_path = Path::new(&path); + + if dir_path.is_dir() { + for entry in fs::read_dir(dir_path)? { + let entry = entry?; + let path = entry.path(); + + if path.is_file() + && path.file_name().map_or(false, |file| { + path.extension().map_or(true, |_| { + let file = file.to_string_lossy().to_lowercase(); + for ext in exts { + if file.ends_with(&format!(".{}", ext)) { + return true; + } + } + false + }) + }) + { + if let Some(path_str) = path.to_str() { + result.push(path_str.to_string()); + } + } else if recursive && path.is_dir() { + if let Some(path_str) = path.to_str() { + let mut sub_files = find_arc_files(&path_str.to_string(), recursive)?; + result.append(&mut sub_files); + } + } + } + } + + Ok(result) +} + +/// Collects files with specific extensions from the specified path, either as a directory or a single file. +pub fn collect_ext_files( + path: &str, + recursive: bool, + exts: &[&str], +) -> io::Result<(Vec, bool)> { + let pa = Path::new(path); + if pa.is_dir() { + return Ok((find_ext_files(path, recursive, exts)?, true)); + } + if pa.is_file() { + return Ok((vec![path.to_string()], false)); + } + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("Path {} is neither a file nor a directory", pa.display()), + )) +} + /// Collects archive files from the specified path, either as a directory or a single file. pub fn collect_arc_files(path: &str, recursive: bool) -> io::Result<(Vec, bool)> { let pa = Path::new(path);