diff --git a/src/args.rs b/src/args.rs index 98d889a..d6e830d 100644 --- a/src/args.rs +++ b/src/args.rs @@ -716,6 +716,17 @@ pub enum Command { /// Output dependency file path. This file will contain a list of all files packed in the archive. dep_file: Option, }, + /// Convert output script to another format + Convert { + /// Input script format type + input_type: OutputScriptType, + /// Output script format type + output_type: OutputScriptType, + /// Input script file + input: String, + /// Output script file + output: Option, + }, } pub fn parse_args() -> Arg { diff --git a/src/main.rs b/src/main.rs index bf9ecf8..e77226f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -69,6 +69,29 @@ fn get_archived_encoding( builder.default_archive_encoding().unwrap_or(encoding) } +fn get_input_output_script_encoding(arg: &args::Arg) -> types::Encoding { + match &arg.encoding { + Some(enc) => { + return match enc { + &types::TextEncoding::Default => types::Encoding::Utf8, + &types::TextEncoding::Auto => types::Encoding::Utf8, + &types::TextEncoding::Cp932 => types::Encoding::Cp932, + &types::TextEncoding::Utf8 => types::Encoding::Utf8, + &types::TextEncoding::Gb2312 => types::Encoding::Gb2312, + }; + } + None => {} + } + #[cfg(windows)] + match &arg.code_page { + Some(code_page) => { + return types::Encoding::CodePage(*code_page); + } + None => {} + } + types::Encoding::Utf8 +} + fn get_output_encoding(arg: &args::Arg) -> types::Encoding { match &arg.output_encoding { Some(enc) => { @@ -2850,6 +2873,219 @@ pub fn create_file( Ok(()) } +pub fn parse_output_script_as_extend( + input: &str, + typ: types::OutputScriptType, + arg: &args::Arg, +) -> anyhow::Result> { + match typ { + types::OutputScriptType::M3t + | types::OutputScriptType::M3ta + | types::OutputScriptType::M3tTxt => { + let enc = get_input_output_script_encoding(arg); + let b = utils::files::read_file(input)?; + let s = utils::encoding::decode_to_string(enc, &b, true)?; + let mut parser = output_scripts::m3t::M3tParser::new( + &s, + arg.llm_trans_mark.as_ref().map(|s| s.as_str()), + ); + let mes = parser.parse_as_extend()?; + Ok(mes) + } + types::OutputScriptType::Po | types::OutputScriptType::Pot => { + let enc = get_input_output_script_encoding(arg); + let b = utils::files::read_file(input)?; + let s = utils::encoding::decode_to_string(enc, &b, true)?; + let mut parser = output_scripts::po::PoParser::new( + &s, + arg.llm_trans_mark.as_ref().map(|s| s.as_str()), + ); + let mes = parser.parse_as_extend()?; + Ok(mes) + } + _ => Err(anyhow::anyhow!( + "Output script type {:?} does not support extended messages", + typ + )), + } +} + +pub fn parse_output_script( + input: &str, + typ: types::OutputScriptType, + arg: &args::Arg, +) -> anyhow::Result> { + match typ { + types::OutputScriptType::M3t + | types::OutputScriptType::M3ta + | types::OutputScriptType::M3tTxt => { + let enc = get_input_output_script_encoding(arg); + let b = utils::files::read_file(input)?; + let s = utils::encoding::decode_to_string(enc, &b, true)?; + let mut parser = output_scripts::m3t::M3tParser::new( + &s, + arg.llm_trans_mark.as_ref().map(|s| s.as_str()), + ); + let mes = parser.parse()?; + Ok(mes) + } + types::OutputScriptType::Po | types::OutputScriptType::Pot => { + let enc = get_input_output_script_encoding(arg); + let b = utils::files::read_file(input)?; + let s = utils::encoding::decode_to_string(enc, &b, true)?; + let mut parser = output_scripts::po::PoParser::new( + &s, + arg.llm_trans_mark.as_ref().map(|s| s.as_str()), + ); + let mes = parser.parse()?; + Ok(mes) + } + types::OutputScriptType::Json => { + let enc = get_input_output_script_encoding(arg); + let b = utils::files::read_file(input)?; + let s = utils::encoding::decode_to_string(enc, &b, true)?; + let mes = serde_json::from_str::>(&s)?; + Ok(mes) + } + types::OutputScriptType::Yaml => { + let enc = get_input_output_script_encoding(arg); + let b = utils::files::read_file(input)?; + let s = utils::encoding::decode_to_string(enc, &b, true)?; + let mes = serde_yaml_ng::from_str::>(&s)?; + Ok(mes) + } + _ => Err(anyhow::anyhow!( + "Output script type {:?} does not support message parsing", + typ + )), + } +} + +pub fn dump_output_script_as_extend( + output: &str, + typ: types::OutputScriptType, + mes: &[types::ExtendedMessage], + arg: &args::Arg, +) -> anyhow::Result<()> { + match typ { + types::OutputScriptType::M3t + | types::OutputScriptType::M3ta + | types::OutputScriptType::M3tTxt => { + let enc = get_output_encoding(arg); + let s = output_scripts::m3t::M3tDumper::dump_extended(mes); + let b = utils::encoding::encode_string(enc, &s, false)?; + utils::files::write_file(output)?.write_all(&b)?; + Ok(()) + } + types::OutputScriptType::Po | types::OutputScriptType::Pot => { + let enc = get_output_encoding(arg); + let s = output_scripts::po::PoDumper::new(); + let s = s.dump_extended(mes, enc)?; + let b = utils::encoding::encode_string(enc, &s, false)?; + utils::files::write_file(output)?.write_all(&b)?; + Ok(()) + } + _ => Err(anyhow::anyhow!( + "Output script type {:?} does not support extended messages", + typ + )), + } +} + +pub fn dump_output_script( + output: &str, + typ: types::OutputScriptType, + mes: &[types::Message], + arg: &args::Arg, +) -> anyhow::Result<()> { + match typ { + types::OutputScriptType::M3t + | types::OutputScriptType::M3ta + | types::OutputScriptType::M3tTxt => { + let enc = get_output_encoding(arg); + let s = output_scripts::m3t::M3tDumper::dump(mes, arg.m3t_no_quote); + let b = utils::encoding::encode_string(enc, &s, false)?; + utils::files::write_file(output)?.write_all(&b)?; + Ok(()) + } + types::OutputScriptType::Po | types::OutputScriptType::Pot => { + let enc = get_output_encoding(arg); + let s = output_scripts::po::PoDumper::new(); + let s = s.dump(mes, enc)?; + let b = utils::encoding::encode_string(enc, &s, false)?; + utils::files::write_file(output)?.write_all(&b)?; + Ok(()) + } + types::OutputScriptType::Json => { + let enc = get_output_encoding(arg); + let s = serde_json::to_string_pretty(mes)?; + let b = utils::encoding::encode_string(enc, &s, false)?; + utils::files::write_file(output)?.write_all(&b)?; + Ok(()) + } + types::OutputScriptType::Yaml => { + let enc = get_output_encoding(arg); + let s = serde_yaml_ng::to_string(mes)?; + let b = utils::encoding::encode_string(enc, &s, false)?; + utils::files::write_file(output)?.write_all(&b)?; + Ok(()) + } + _ => Err(anyhow::anyhow!( + "Output script type {:?} does not support message dumping", + typ + )), + } +} + +pub fn convert_file( + input: &str, + input_type: types::OutputScriptType, + output: Option<&str>, + output_type: types::OutputScriptType, + arg: &args::Arg, + root_dir: Option<&std::path::Path>, +) -> anyhow::Result { + let input_support_src = input_type.is_src_supported(); + let output_support_src = output_type.is_src_supported(); + let output = match output { + Some(output) => match root_dir { + Some(root_dir) => { + let f = std::path::PathBuf::from(input); + let mut pb = std::path::PathBuf::from(output); + let rpath = utils::files::relative_path(root_dir, &f); + if let Some(parent) = rpath.parent() { + pb.push(parent); + } + if let Some(fname) = f.file_name() { + pb.push(fname); + } + if arg.output_no_extra_ext { + pb.remove_all_extensions(); + } + pb.set_extension(output_type.as_ref()); + pb.to_string_lossy().into_owned() + } + None => output.to_string(), + }, + None => { + let mut pb = std::path::PathBuf::from(input); + if arg.output_no_extra_ext { + pb.remove_all_extensions(); + } + pb.set_extension(output_type.as_ref()); + pb.to_string_lossy().into_owned() + } + }; + if input_support_src && output_support_src { + let input_mes = parse_output_script_as_extend(input, input_type, arg)?; + dump_output_script_as_extend(&output, output_type, &input_mes, arg)?; + return Ok(types::ScriptResult::Ok); + } + let input_mes = parse_output_script(input, input_type, arg)?; + dump_output_script(&output, output_type, &input_mes, arg)?; + Ok(types::ScriptResult::Ok) +} + lazy_static::lazy_static! { static ref COUNTER: utils::counter::Counter = utils::counter::Counter::new(); static ref EXIT_LISTENER: std::sync::Mutex>> = std::sync::Mutex::new(std::collections::BTreeMap::new()); @@ -3423,6 +3659,69 @@ fn main() { eprintln!("No input files specified for packing."); } } + args::Command::Convert { + input_type, + output_type, + input, + output, + } => { + if input_type.is_custom() { + eprintln!("Custom input type is not supported for conversion."); + std::process::exit(argn.exit_code_all_failed.unwrap_or(argn.exit_code)); + } + if output_type.is_custom() { + eprintln!("Custom output type is not supported for conversion."); + std::process::exit(argn.exit_code_all_failed.unwrap_or(argn.exit_code)); + } + let (scripts, is_dir) = + utils::files::collect_ext_files(input, arg.recursive, &[input_type.as_ref()]) + .unwrap(); + if is_dir { + match &output { + Some(output) => { + let op = std::path::Path::new(output); + if op.exists() { + if !op.is_dir() { + eprintln!("Output path is not a directory"); + std::process::exit( + argn.exit_code_all_failed.unwrap_or(argn.exit_code), + ); + } + } else { + std::fs::create_dir_all(op).unwrap(); + } + } + None => {} + } + } + let root_dir = if is_dir { + Some(std::path::Path::new(input)) + } else { + None + }; + for script in scripts.iter() { + let re = convert_file( + &script, + *input_type, + output.as_ref().map(|s| s.as_str()), + *output_type, + &arg, + root_dir, + ); + match re { + Ok(s) => { + COUNTER.inc(s); + } + Err(e) => { + COUNTER.inc_error(); + eprintln!("Error converting {}: {}", script, e); + if arg.backtrace { + eprintln!("Backtrace: {}", e.backtrace()); + } + } + } + } + } } let counter = std::ops::Deref::deref(&COUNTER); eprintln!("{}", counter); diff --git a/src/output_scripts/m3t.rs b/src/output_scripts/m3t.rs index 75ca24f..41673c4 100644 --- a/src/output_scripts/m3t.rs +++ b/src/output_scripts/m3t.rs @@ -8,7 +8,7 @@ //! △ LLM message //! ● Translated message //! ``` -use crate::types::Message; +use crate::types::*; use anyhow::Result; /// A parser for the M3T format. @@ -183,6 +183,50 @@ impl<'a> M3tParser<'a> { } Ok(messages) } + + pub fn parse_as_extend(&mut self) -> Result> { + let mut messages = Vec::new(); + let mut name = None; + let mut llm = None; + let mut source = None; + while let Some(line) = self.next_line() { + if line.is_empty() { + continue; + } + // Remove zero-width space characters + let line = line.trim().trim_matches('\u{200b}'); + if line.starts_with("○") { + let line = line[3..].trim(); + if line.starts_with("NAME:") { + name = Some(line[5..].trim().to_string()); + } else { + source = Some(line.to_string()); + } + } else if line.starts_with("△") { + let line = line[3..].trim(); + llm = Some(line.to_string()); + } else if line.starts_with("●") { + let message = line[3..].trim(); + let source = match source.take() { + Some(s) => s, + None => { + return Err(anyhow::anyhow!( + "Missing original message before translated message at line {}", + self.line + )); + } + }; + let m = ExtendedMessage { + name: name.take(), + source, + translated: message.to_string(), + llm: llm.take(), + }; + messages.push(m); + } + } + Ok(messages) + } } /// A dumper for the M3T format. @@ -205,6 +249,25 @@ impl M3tDumper { } result } + + /// Dumps the extended messages in M3T format. + pub fn dump_extended(messages: &[ExtendedMessage]) -> String { + let mut result = String::new(); + for message in messages { + if let Some(name) = &message.name { + result.push_str(&format!("○ NAME: {}\n\n", name)); + } + result.push_str(&format!("○ {}\n", message.source.replace("\n", "\\n"))); + if let Some(llm) = &message.llm { + result.push_str(&format!("△ {}\n", llm.replace("\n", "\\n"))); + } + result.push_str(&format!( + "● {}\n\n", + message.translated.replace("\n", "\\n") + )); + } + result + } } #[test] diff --git a/src/output_scripts/po.rs b/src/output_scripts/po.rs index acca3c5..a3b79a1 100644 --- a/src/output_scripts/po.rs +++ b/src/output_scripts/po.rs @@ -258,6 +258,67 @@ impl PoDumper { result } + pub fn dump_extended( + mut self, + entries: &[ExtendedMessage], + encoding: Encoding, + ) -> Result { + self.add_entry(PoEntry { + comments: vec![ + Comment::Translator(String::from("Generated by msg-tool")), + Comment::Flag(vec![String::from("fuzzy")]), + ], + msgctxt: None, + msgid: String::new(), + msgid_plural: None, + msgstr: MsgStr::Single(Self::gen_start_str(encoding)), + }); + let mut added = HashSet::new(); + let mut added_messages: HashMap<(&String, &Option), usize> = HashMap::new(); + for entry in entries { + let count = added_messages + .get(&(&entry.source, &entry.name)) + .map(|&s| s) + .unwrap_or(0); + let inadded = added.contains(&entry.source); + let mut comments = Vec::new(); + if let Some(name) = &entry.name { + comments.push(Comment::Translator(format!("NAME: {}", name))); + } + if let Some(llm) = &entry.llm { + comments.push(Comment::Translator(format!( + "LLM: {}", + llm.replace("\n", "\\n") + ))); + } + self.add_entry(PoEntry { + comments, + msgctxt: if count > 0 || inadded { + Some(format!( + "{}{}", + entry.name.as_ref().map(|s| s.as_str()).unwrap_or(""), + count + )) + } else { + None + }, + msgid: entry.source.clone(), + msgid_plural: None, + msgstr: MsgStr::Single(entry.translated.clone()), + }); + added_messages.insert((&entry.source, &entry.name), count + 1); + if !inadded { + added.insert(&entry.source); + } + } + let mut result = String::new(); + for line in &self.entries { + result.push_str(&line.dump()?); + result.push('\n'); + } + Ok(result) + } + pub fn dump(mut self, entries: &[Message], encoding: Encoding) -> Result { self.add_entry(PoEntry { comments: vec![ @@ -708,6 +769,42 @@ impl<'a> PoParser<'a> { } Ok(messages) } + + pub fn parse_as_extend(&mut self) -> Result> { + let mut messages = Vec::new(); + for (i, entry) in self.parse_entries()?.into_iter().enumerate() { + if entry.msgid.is_empty() && i == 0 { + // This is the header entry, skip it + continue; + } + let mut name = None; + let mut llm = None; + for comment in &entry.comments { + if let Comment::Translator(s) = comment { + let s = s.trim(); + if s.starts_with("NAME:") { + name = Some(s[5..].trim().to_string()); + } else if s.starts_with("LLM:") { + llm = Some(s[4..].trim().replace("\\n", "\n")); + } + } + } + let message = match entry.msgstr { + MsgStr::Single(s) => s, + MsgStr::Plural(_) => { + return Err(anyhow!("Plural msgstr not supported in this context")); + } + }; + let m = ExtendedMessage { + name: name, + source: entry.msgid, + translated: message, + llm: llm, + }; + messages.push(m); + } + Ok(messages) + } } // --- Unit Tests --- diff --git a/src/types.rs b/src/types.rs index 0e062c4..b7ae827 100644 --- a/src/types.rs +++ b/src/types.rs @@ -130,6 +130,18 @@ impl OutputScriptType { OutputScriptType::M3t | OutputScriptType::M3ta | OutputScriptType::M3tTxt ) } + + /// Returns true if the script type supports source messages. + pub fn is_src_supported(&self) -> bool { + matches!( + self, + OutputScriptType::M3t + | OutputScriptType::M3ta + | OutputScriptType::M3tTxt + | OutputScriptType::Po + | OutputScriptType::Pot + ) + } } impl AsRef for OutputScriptType { @@ -764,6 +776,19 @@ impl Message { } } +#[derive(Clone, Debug)] +/// Extended message structure for scripts +pub struct ExtendedMessage { + /// Optional name for the message, used in some scripts. + pub name: Option, + /// Original source text. + pub source: String, + /// Translated text. + pub translated: String, + /// Optional LLM translated text. + pub llm: Option, +} + /// Result of script operation. pub enum ScriptResult { /// Operation completed successfully.