mirror of
https://github.com/lifegpc/msg-tool.git
synced 2026-06-08 22:08:47 +08:00
Add load m3t dir support
This commit is contained in:
46
src/args.rs
46
src/args.rs
@@ -203,6 +203,10 @@ pub struct Arg {
|
||||
/// Kirikiri chat message translation file. (Map<String, String>, key is original text, value is translated text.)
|
||||
pub kirikiri_chat_json: Option<String>,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
#[arg(long, global = true, group = "kirikiri_chat_jsong")]
|
||||
/// Kirikiri chat message translation directory. All json files in this directory will be merged. (Only m3t files are supported.)
|
||||
pub kirikiri_chat_dir: Option<String>,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
#[arg(long, global = true, action = ArgAction::SetTrue, alias = "kr-no-empty-lines", alias = "kirikiri-no-empty-lines")]
|
||||
/// Remove empty lines in Kirikiri KS script.
|
||||
pub kirikiri_remove_empty_lines: bool,
|
||||
@@ -571,3 +575,45 @@ pub fn get_artemis_panmimisoft_txt_blacklist_names(
|
||||
.collect()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "kirikiri")]
|
||||
pub fn load_kirikiri_chat_json(
|
||||
arg: &Arg,
|
||||
) -> anyhow::Result<Option<std::sync::Arc<std::collections::HashMap<String, String>>>> {
|
||||
if let Some(path) = &arg.kirikiri_chat_json {
|
||||
return Ok(Some(crate::scripts::kirikiri::read_kirikiri_comu_json(
|
||||
path,
|
||||
)?));
|
||||
}
|
||||
if let Some(dir) = &arg.kirikiri_chat_dir {
|
||||
let mut outt = arg.output_type.unwrap_or(OutputScriptType::M3t);
|
||||
if !matches!(
|
||||
outt,
|
||||
OutputScriptType::M3t | OutputScriptType::M3ta | OutputScriptType::M3tTxt
|
||||
) {
|
||||
outt = OutputScriptType::M3t;
|
||||
}
|
||||
let files = crate::utils::files::find_ext_files(dir, arg.recursive, &[outt.as_ref()])?;
|
||||
if !files.is_empty() {
|
||||
let mut map = std::collections::HashMap::new();
|
||||
for file in files {
|
||||
let f = crate::utils::files::read_file(&file)?;
|
||||
let data = crate::utils::encoding::decode_to_string(
|
||||
crate::get_output_encoding(arg),
|
||||
&f,
|
||||
true,
|
||||
)?;
|
||||
let m3t = crate::output_scripts::m3t::M3tParser::new(
|
||||
&data,
|
||||
arg.llm_trans_mark.as_ref().map(|s| s.as_str()),
|
||||
)
|
||||
.parse_as_map()?;
|
||||
for (k, v) in m3t {
|
||||
map.insert(k, v);
|
||||
}
|
||||
}
|
||||
return Ok(Some(std::sync::Arc::new(map)));
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
@@ -1748,10 +1748,8 @@ fn main() {
|
||||
#[cfg(feature = "kirikiri")]
|
||||
kirikiri_chat_key: arg.kirikiri_chat_key.clone(),
|
||||
#[cfg(feature = "kirikiri")]
|
||||
kirikiri_chat_json: arg
|
||||
.kirikiri_chat_json
|
||||
.as_ref()
|
||||
.map(|s| scripts::kirikiri::read_kirikiri_comu_json(s).unwrap()),
|
||||
kirikiri_chat_json: args::load_kirikiri_chat_json(&arg)
|
||||
.expect("Failed to load Kirikiri chat JSON"),
|
||||
#[cfg(feature = "kirikiri")]
|
||||
kirikiri_remove_empty_lines: arg.kirikiri_remove_empty_lines,
|
||||
#[cfg(feature = "kirikiri")]
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
//! △ LLM message
|
||||
//! ● Translated message
|
||||
//! ```
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::types::Message;
|
||||
use anyhow::Result;
|
||||
|
||||
@@ -48,6 +50,75 @@ impl<'a> M3tParser<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_as_map(&mut self) -> Result<HashMap<String, String>> {
|
||||
let mut map = HashMap::new();
|
||||
let mut ori = None;
|
||||
let mut llm = None;
|
||||
while let Some(line) = self.next_line() {
|
||||
if line.is_empty() {
|
||||
continue;
|
||||
}
|
||||
if line.starts_with("○") {
|
||||
let line = line[3..].trim();
|
||||
if !line.starts_with("NAME:") {
|
||||
ori = Some(line.to_string());
|
||||
}
|
||||
} else if line.starts_with("△") {
|
||||
let line = line[3..].trim();
|
||||
llm = Some(line);
|
||||
} else if line.starts_with("●") {
|
||||
let message = line[3..].trim();
|
||||
let message = if message
|
||||
.trim_start_matches("「")
|
||||
.trim_end_matches("」")
|
||||
.is_empty()
|
||||
{
|
||||
llm.take()
|
||||
.map(|s| {
|
||||
let mut s = s.to_string();
|
||||
if let Some(mark) = self.llm_mark {
|
||||
s.push_str(mark);
|
||||
}
|
||||
s
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
String::from(if message.starts_with("「") {
|
||||
"「」"
|
||||
} else {
|
||||
""
|
||||
})
|
||||
})
|
||||
.replace("\\n", "\n")
|
||||
} else {
|
||||
let mut tmp = message.replace("\\n", "\n");
|
||||
if let Some(llm) = llm.take() {
|
||||
if tmp == llm {
|
||||
if let Some(mark) = self.llm_mark {
|
||||
tmp.push_str(mark);
|
||||
}
|
||||
}
|
||||
}
|
||||
tmp
|
||||
};
|
||||
if let Some(ori) = ori.take() {
|
||||
map.insert(ori, message);
|
||||
} else {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Missing original message before translated message at line {}",
|
||||
self.line
|
||||
));
|
||||
}
|
||||
} else {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Invalid line format at line {}: {}",
|
||||
self.line,
|
||||
line
|
||||
));
|
||||
}
|
||||
}
|
||||
Ok(map)
|
||||
}
|
||||
|
||||
/// Parses the M3T format and returns a vector of messages.
|
||||
pub fn parse(&mut self) -> Result<Vec<Message>> {
|
||||
let mut messages = Vec::new();
|
||||
|
||||
@@ -139,6 +139,63 @@ pub fn collect_files(
|
||||
))
|
||||
}
|
||||
|
||||
/// Finds all files with specific extensions in the specified directory and its subdirectories.
|
||||
pub fn find_ext_files(path: &str, recursive: bool, exts: &[&str]) -> io::Result<Vec<String>> {
|
||||
let mut result = Vec::new();
|
||||
let dir_path = Path::new(&path);
|
||||
|
||||
if dir_path.is_dir() {
|
||||
for entry in fs::read_dir(dir_path)? {
|
||||
let entry = entry?;
|
||||
let path = entry.path();
|
||||
|
||||
if path.is_file()
|
||||
&& path.file_name().map_or(false, |file| {
|
||||
path.extension().map_or(true, |_| {
|
||||
let file = file.to_string_lossy().to_lowercase();
|
||||
for ext in exts {
|
||||
if file.ends_with(&format!(".{}", ext)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
})
|
||||
})
|
||||
{
|
||||
if let Some(path_str) = path.to_str() {
|
||||
result.push(path_str.to_string());
|
||||
}
|
||||
} else if recursive && path.is_dir() {
|
||||
if let Some(path_str) = path.to_str() {
|
||||
let mut sub_files = find_arc_files(&path_str.to_string(), recursive)?;
|
||||
result.append(&mut sub_files);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
/// Collects files with specific extensions from the specified path, either as a directory or a single file.
|
||||
pub fn collect_ext_files(
|
||||
path: &str,
|
||||
recursive: bool,
|
||||
exts: &[&str],
|
||||
) -> io::Result<(Vec<String>, bool)> {
|
||||
let pa = Path::new(path);
|
||||
if pa.is_dir() {
|
||||
return Ok((find_ext_files(path, recursive, exts)?, true));
|
||||
}
|
||||
if pa.is_file() {
|
||||
return Ok((vec![path.to_string()], false));
|
||||
}
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::NotFound,
|
||||
format!("Path {} is neither a file nor a directory", pa.display()),
|
||||
))
|
||||
}
|
||||
|
||||
/// Collects archive files from the specified path, either as a directory or a single file.
|
||||
pub fn collect_arc_files(path: &str, recursive: bool) -> io::Result<(Vec<String>, bool)> {
|
||||
let pa = Path::new(path);
|
||||
|
||||
Reference in New Issue
Block a user