Add load m3t dir support

This commit is contained in:
2025-09-03 23:33:42 +08:00
parent 59798fe697
commit 61b183d940
4 changed files with 176 additions and 4 deletions

View File

@@ -203,6 +203,10 @@ pub struct Arg {
/// Kirikiri chat message translation file. (Map<String, String>, key is original text, value is translated text.)
pub kirikiri_chat_json: Option<String>,
#[cfg(feature = "kirikiri")]
#[arg(long, global = true, group = "kirikiri_chat_jsong")]
/// Kirikiri chat message translation directory. All json files in this directory will be merged. (Only m3t files are supported.)
pub kirikiri_chat_dir: Option<String>,
#[cfg(feature = "kirikiri")]
#[arg(long, global = true, action = ArgAction::SetTrue, alias = "kr-no-empty-lines", alias = "kirikiri-no-empty-lines")]
/// Remove empty lines in Kirikiri KS script.
pub kirikiri_remove_empty_lines: bool,
@@ -571,3 +575,45 @@ pub fn get_artemis_panmimisoft_txt_blacklist_names(
.collect()),
}
}
#[cfg(feature = "kirikiri")]
pub fn load_kirikiri_chat_json(
arg: &Arg,
) -> anyhow::Result<Option<std::sync::Arc<std::collections::HashMap<String, String>>>> {
if let Some(path) = &arg.kirikiri_chat_json {
return Ok(Some(crate::scripts::kirikiri::read_kirikiri_comu_json(
path,
)?));
}
if let Some(dir) = &arg.kirikiri_chat_dir {
let mut outt = arg.output_type.unwrap_or(OutputScriptType::M3t);
if !matches!(
outt,
OutputScriptType::M3t | OutputScriptType::M3ta | OutputScriptType::M3tTxt
) {
outt = OutputScriptType::M3t;
}
let files = crate::utils::files::find_ext_files(dir, arg.recursive, &[outt.as_ref()])?;
if !files.is_empty() {
let mut map = std::collections::HashMap::new();
for file in files {
let f = crate::utils::files::read_file(&file)?;
let data = crate::utils::encoding::decode_to_string(
crate::get_output_encoding(arg),
&f,
true,
)?;
let m3t = crate::output_scripts::m3t::M3tParser::new(
&data,
arg.llm_trans_mark.as_ref().map(|s| s.as_str()),
)
.parse_as_map()?;
for (k, v) in m3t {
map.insert(k, v);
}
}
return Ok(Some(std::sync::Arc::new(map)));
}
}
Ok(None)
}

View File

@@ -1748,10 +1748,8 @@ fn main() {
#[cfg(feature = "kirikiri")]
kirikiri_chat_key: arg.kirikiri_chat_key.clone(),
#[cfg(feature = "kirikiri")]
kirikiri_chat_json: arg
.kirikiri_chat_json
.as_ref()
.map(|s| scripts::kirikiri::read_kirikiri_comu_json(s).unwrap()),
kirikiri_chat_json: args::load_kirikiri_chat_json(&arg)
.expect("Failed to load Kirikiri chat JSON"),
#[cfg(feature = "kirikiri")]
kirikiri_remove_empty_lines: arg.kirikiri_remove_empty_lines,
#[cfg(feature = "kirikiri")]

View File

@@ -8,6 +8,8 @@
//! △ LLM message
//! ● Translated message
//! ```
use std::collections::HashMap;
use crate::types::Message;
use anyhow::Result;
@@ -48,6 +50,75 @@ impl<'a> M3tParser<'a> {
}
}
pub fn parse_as_map(&mut self) -> Result<HashMap<String, String>> {
let mut map = HashMap::new();
let mut ori = None;
let mut llm = None;
while let Some(line) = self.next_line() {
if line.is_empty() {
continue;
}
if line.starts_with("") {
let line = line[3..].trim();
if !line.starts_with("NAME:") {
ori = Some(line.to_string());
}
} else if line.starts_with("") {
let line = line[3..].trim();
llm = Some(line);
} else if line.starts_with("") {
let message = line[3..].trim();
let message = if message
.trim_start_matches("")
.trim_end_matches("")
.is_empty()
{
llm.take()
.map(|s| {
let mut s = s.to_string();
if let Some(mark) = self.llm_mark {
s.push_str(mark);
}
s
})
.unwrap_or_else(|| {
String::from(if message.starts_with("") {
"「」"
} else {
""
})
})
.replace("\\n", "\n")
} else {
let mut tmp = message.replace("\\n", "\n");
if let Some(llm) = llm.take() {
if tmp == llm {
if let Some(mark) = self.llm_mark {
tmp.push_str(mark);
}
}
}
tmp
};
if let Some(ori) = ori.take() {
map.insert(ori, message);
} else {
return Err(anyhow::anyhow!(
"Missing original message before translated message at line {}",
self.line
));
}
} else {
return Err(anyhow::anyhow!(
"Invalid line format at line {}: {}",
self.line,
line
));
}
}
Ok(map)
}
/// Parses the M3T format and returns a vector of messages.
pub fn parse(&mut self) -> Result<Vec<Message>> {
let mut messages = Vec::new();

View File

@@ -139,6 +139,63 @@ pub fn collect_files(
))
}
/// Finds all files with specific extensions in the specified directory and its subdirectories.
pub fn find_ext_files(path: &str, recursive: bool, exts: &[&str]) -> io::Result<Vec<String>> {
let mut result = Vec::new();
let dir_path = Path::new(&path);
if dir_path.is_dir() {
for entry in fs::read_dir(dir_path)? {
let entry = entry?;
let path = entry.path();
if path.is_file()
&& path.file_name().map_or(false, |file| {
path.extension().map_or(true, |_| {
let file = file.to_string_lossy().to_lowercase();
for ext in exts {
if file.ends_with(&format!(".{}", ext)) {
return true;
}
}
false
})
})
{
if let Some(path_str) = path.to_str() {
result.push(path_str.to_string());
}
} else if recursive && path.is_dir() {
if let Some(path_str) = path.to_str() {
let mut sub_files = find_arc_files(&path_str.to_string(), recursive)?;
result.append(&mut sub_files);
}
}
}
}
Ok(result)
}
/// Collects files with specific extensions from the specified path, either as a directory or a single file.
pub fn collect_ext_files(
path: &str,
recursive: bool,
exts: &[&str],
) -> io::Result<(Vec<String>, bool)> {
let pa = Path::new(path);
if pa.is_dir() {
return Ok((find_ext_files(path, recursive, exts)?, true));
}
if pa.is_file() {
return Ok((vec![path.to_string()], false));
}
Err(io::Error::new(
io::ErrorKind::NotFound,
format!("Path {} is neither a file nor a directory", pa.display()),
))
}
/// Collects archive files from the specified path, either as a directory or a single file.
pub fn collect_arc_files(path: &str, recursive: bool) -> io::Result<(Vec<String>, bool)> {
let pa = Path::new(path);