commit e2e739983221406f49053c3c9655abd7966c8788 Author: lifegpc Date: Tue May 20 17:47:04 2025 +0800 Add export support diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a92ca28 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +/testscripts +/output diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..512a5fa --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,327 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstream" +version = "0.6.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" + +[[package]] +name = "anstyle-parse" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +dependencies = [ + "anstyle", + "once_cell", + "windows-sys", +] + +[[package]] +name = "anyhow" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "4.5.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed93b9805f8ba930df42c2590f05453d5ec36cbb85d018868a5b24d31f6ac000" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.5.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "379026ff283facf611b0ea629334361c4211d1b12ee01024eec1591133b04120" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.5.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" + +[[package]] +name = "colorchoice" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" + +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "msg_tool" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "encoding_rs", + "lazy_static", + "serde", + "serde_json", + "windows-sys", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "proc-macro2" +version = "1.0.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + +[[package]] +name = "serde" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.219" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.140" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.101" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "unicode-ident" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d27efa1 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "msg_tool" +version = "0.1.0" +edition = "2024" + +[dependencies] +anyhow = "1" +clap = { version = "4.5", features = ["derive"] } +encoding_rs = "0.8" +lazy_static = "1.5.0" +serde = { version = "1", features = ["derive"] } +serde_json = "1.0.140" + +[target.'cfg(windows)'.dependencies] +windows-sys = { version = "0", features = ["Win32_Globalization"] } diff --git a/src/args.rs b/src/args.rs new file mode 100644 index 0000000..f978890 --- /dev/null +++ b/src/args.rs @@ -0,0 +1,69 @@ +use crate::types::*; +use clap::{ArgAction, ArgGroup, Parser, Subcommand}; + +/// Tools for export and import scripts +#[derive(Parser, Debug)] +#[clap(group = ArgGroup::new("encodingg").multiple(false), group = ArgGroup::new("output_encodingg").multiple(false))] +#[command(version, about, long_about = None)] +pub struct Arg { + #[arg(short = 't', long, value_enum, global = true)] + /// Script type + pub script_type: Option, + #[arg(short = 'T', long, value_enum, global = true)] + /// Output script type + pub output_type: Option, + #[arg(short = 'e', long, value_enum, global = true, group = "encodingg")] + /// Script encoding + pub encoding: Option, + #[cfg(windows)] + #[arg(short = 'c', long, value_enum, global = true, group = "encodingg")] + /// Script code page + pub code_page: Option, + #[arg( + short = 'E', + long, + value_enum, + global = true, + group = "output_encodingg" + )] + /// Output text encoding + pub output_encoding: Option, + #[cfg(windows)] + #[arg( + short = 'C', + long, + value_enum, + global = true, + group = "output_encodingg" + )] + /// Output code page + pub output_code_page: Option, + #[arg(long, value_enum, global = true)] + /// Circus Game + pub circus_mes_type: Option, + #[arg(short, long, action = ArgAction::SetTrue, global = true)] + /// Search for script files in the directory recursively + pub recursive: bool, + #[arg(global = true, action = ArgAction::SetTrue, short, long)] + /// Print backtrace on error + pub backtrace: bool, + #[command(subcommand)] + /// Command + pub command: Command, +} + +#[derive(Subcommand, Debug)] +/// Commands +pub enum Command { + /// Extract from script + Export { + /// Input script file or directory + input: String, + /// Output file or directory + output: Option, + }, +} + +pub fn parse_args() -> Arg { + Arg::parse() +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..076b30f --- /dev/null +++ b/src/main.rs @@ -0,0 +1,200 @@ +pub mod args; +pub mod scripts; +pub mod types; +pub mod utils; + +fn get_encoding(arg: &args::Arg, builder: &Box) -> types::Encoding { + match &arg.encoding { + Some(enc) => { + return match enc { + &types::TextEncoding::Default => { + builder.default_encoding() + } + &types::TextEncoding::Auto => { + types::Encoding::Auto + } + &types::TextEncoding::Cp932 => { + types::Encoding::Cp932 + } + &types::TextEncoding::Utf8 => { + types::Encoding::Utf8 + } + &types::TextEncoding::Gb2312 => { + types::Encoding::Gb2312 + } + }; + } + None => {} + } + #[cfg(windows)] + match &arg.code_page { + Some(code_page) => { + return types::Encoding::CodePage(*code_page); + } + None => {} + } + builder.default_encoding() +} + +fn get_output_encoding(arg: &args::Arg) -> types::Encoding { + match &arg.output_encoding { + Some(enc) => { + return match enc { + &types::TextEncoding::Default => { + types::Encoding::Utf8 + } + &types::TextEncoding::Auto => { + types::Encoding::Utf8 + } + &types::TextEncoding::Cp932 => { + types::Encoding::Cp932 + } + &types::TextEncoding::Utf8 => { + types::Encoding::Utf8 + } + &types::TextEncoding::Gb2312 => { + types::Encoding::Gb2312 + } + }; + } + None => {} + } + #[cfg(windows)] + match &arg.code_page { + Some(code_page) => { + return types::Encoding::CodePage(*code_page); + } + None => {} + } + types::Encoding::Utf8 +} + +pub fn parse_script(filename: &str, arg: &args::Arg, config: &types::ExtraConfig) -> anyhow::Result> { + match &arg.script_type { + Some(typ) => { + for builder in scripts::BUILDER.iter() { + if typ == builder.script_type() { + let encoding = get_encoding(arg, builder); + return Ok(builder.build_script(filename, encoding, config)?); + } + } + } + _ => {} + } + for builder in scripts::BUILDER.iter() { + let exts = builder.extensions(); + for ext in exts { + if filename.to_lowercase().ends_with(ext) { + let encoding = get_encoding(arg, builder); + return Ok(builder.build_script(filename, encoding, config)?); + } + } + } + Err(anyhow::anyhow!("Unsupported script type")) +} + +pub fn export_script( + filename: &str, + arg: &args::Arg, + config: &types::ExtraConfig, + output: &Option, + is_dir: bool, +) -> anyhow::Result<()> { + eprintln!("Exporting {}", filename); + let script = parse_script(filename, arg, config)?; + // println!("{:?}", script); + let mes = script.extract_messages()?; + // for m in mes.iter() { + // println!("{:?}", m); + // } + if mes.is_empty() { + eprintln!("No messages found"); + return Ok(()); + } + let of = match &arg.output_type { + Some(t) => t.clone(), + None => script.default_output_script_type(), + }; + let f = if filename == "-" { + String::from("-") + } else { + match output.as_ref() { + Some(output) => { + if is_dir { + let f = std::path::PathBuf::from(filename); + let mut pb = std::path::PathBuf::from(output); + if let Some(fname) = f.file_name() { + pb.push(fname); + } + pb.set_extension(of.as_ref()); + pb.to_string_lossy().into_owned() + } else { + output.clone() + } + } + None => { + let mut pb = std::path::PathBuf::from(filename); + pb.set_extension(of.as_ref()); + pb.to_string_lossy().into_owned() + } + } + }; + match of { + types::OutputScriptType::Json => { + let enc = get_output_encoding(arg); + let s = serde_json::to_string_pretty(&mes)?; + let b = utils::encoding::encode_string(enc, &s)?; + let mut f = utils::files::write_file(&f)?; + f.write_all(&b)?; + } + _ => {} + } + Ok(()) +} + +fn main() { + let arg = args::parse_args(); + if arg.backtrace { + unsafe { std::env::set_var("RUST_LIB_BACKTRACE", "1") }; + } + let cfg = types::ExtraConfig { + circus_mes_type: arg.circus_mes_type.clone(), + }; + match &arg.command { + args::Command::Export { input, output } => { + let (scripts, is_dir) = utils::files::collect_files(input, arg.recursive).unwrap(); + if is_dir { + match &output { + Some(output) => { + let op = std::path::Path::new(output); + if op.exists() { + if !op.is_dir() { + eprintln!("Output path is not a directory"); + return; + } + } else { + std::fs::create_dir_all(op).unwrap(); + } + } + None => { + eprintln!("Output path is not specified"); + return; + } + } + } + for script in scripts.iter() { + let re = export_script(&script, &arg, &cfg, output, is_dir); + match re { + Ok(_) => { + } + Err(e) => { + eprintln!("Error exporting {}: {}", script, e); + if arg.backtrace { + eprintln!("Backtrace: {:?}", e.backtrace()); + } + } + } + } + } + } +} diff --git a/src/scripts/base.rs b/src/scripts/base.rs new file mode 100644 index 0000000..d0e21b6 --- /dev/null +++ b/src/scripts/base.rs @@ -0,0 +1,23 @@ +use crate::types::*; +use anyhow::Result; + +pub trait ScriptBuilder { + fn default_encoding(&self) -> Encoding; + + fn build_script( + &self, + filename: &str, + encoding: Encoding, + config: &ExtraConfig, + ) -> Result>; + + fn extensions(&self) -> &'static [&'static str]; + + fn script_type(&self) -> &'static ScriptType; +} + +pub trait Script: std::fmt::Debug { + fn default_output_script_type(&self) -> OutputScriptType; + + fn extract_messages(&self) -> Result>; +} diff --git a/src/scripts/circus/info.rs b/src/scripts/circus/info.rs new file mode 100644 index 0000000..9c561a5 --- /dev/null +++ b/src/scripts/circus/info.rs @@ -0,0 +1,465 @@ +pub struct Section { + beg: u8, + end: u8, +} + +impl std::fmt::Debug for Section { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple("Section") + .field(&self.beg) + .field(&self.end) + .finish() + } +} + +impl Section { + pub const fn new(beg: u8, end: u8) -> Self { + Section { beg, end } + } + + pub fn its(&self, key: u8) -> bool { + return (!(self.beg == self.end && self.beg == 0xFF)) + && (key >= self.beg && key <= self.end); + } +} + +#[derive(Debug)] +pub struct ScriptInfo { + pub name: &'static str, + pub version: u16, + /// \[op: byte\] \[arg1: uint8\] \[arg2: uint8\] + pub uint8x2: Section, + /// \[op: byte\] \[arg1: uint8\] \[arg2: string\] + pub uint8str: Section, + /// \[op: byte\] \[arg1: string\] + pub string: Section, + /// \[op: byte\] \[arg1: encstr\] + pub encstr: Section, + /// \[op: byte\] \[arg1: uint16\] \[arg2: uint16\] \[arg3: uint16\] \[arg4: uint16\] + pub uint16x4: Section, + /// the opcode for unencrypted strings in scene text + pub optunenc: u8, + pub deckey: u8, + pub nameopcode: u8, +} + +const SCRIPT_INFO: [ScriptInfo; 31] = [ + ScriptInfo::new( + "ffexa", + 0x7B69, + (0x00, 0x28), + (0x29, 0x2E), + (0x2F, 0x49), + (0x4A, 0x4D), + (0x4E, 0xFF), + 0x43, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "ffexs", + 0x7B6B, + (0x00, 0x28), + (0x29, 0x2E), + (0x2F, 0x4B), + (0x4c, 0x4F), + (0x50, 0xFF), + 0x43, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "ef", + 0x466A, + (0x00, 0x28), + (0x2A, 0x2F), + (0x30, 0x4A), + (0x4B, 0x4E), + (0x4F, 0xFF), + 0x46, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dcos", + 0x315D, + (0x00, 0x2B), + (0xFF, 0xFF), + (0x2C, 0x45), + (0x46, 0x49), + (0x4A, 0xFF), + 0x42, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "ktlep", + 0x6E69, + (0x00, 0x28), + (0x29, 0x2E), + (0x2F, 0x49), + (0x4A, 0x4D), + (0x4E, 0xFF), + 0x45, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dcws", + 0x656C, + (0x00, 0x2B), + (0x2C, 0x31), + (0x32, 0x4C), + (0x4D, 0x50), + (0x51, 0xFF), + 0x48, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dcsv", + 0x636C, + (0x00, 0x2B), + (0x2C, 0x31), + (0x32, 0x4C), + (0x4D, 0x50), + (0x51, 0xFF), + 0x46, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dcpc", + 0x3D63, + (0x00, 0x2C), + (0xFF, 0xFF), + (0x2D, 0x49), + (0x4A, 0x4D), + (0x4E, 0xFF), + 0x44, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dcmems", + 0x315D, + (0x00, 0x2B), + (0xFF, 0xFF), + (0x2C, 0x45), + (0x46, 0x49), + (0x4A, 0xFF), + 0x42, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dcdx", + 0x7769, + (0x00, 0x28), + (0x29, 0x2E), + (0x2F, 0x49), + (0x4A, 0x4D), + (0x4E, 0xFF), + 0x45, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dcas", + 0x4E69, + (0x00, 0x28), + (0x29, 0x2E), + (0x2F, 0x49), + (0x4A, 0x4D), + (0x4E, 0xFF), + 0x43, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dcbs", + 0x3163, + (0x00, 0x2B), + (0xFF, 0xFF), + (0x2C, 0x48), + (0x49, 0x4C), + (0x4D, 0xFF), + 0xFF, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc2fl", + 0x9C69, + (0x00, 0x28), + (0x29, 0x2E), + (0x2F, 0x49), + (0x4A, 0x4D), + (0x4E, 0xFF), + 0x45, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc2bs", + 0x316C, + (0x00, 0x2B), + (0x2C, 0x31), + (0x32, 0x4C), + (0x4D, 0x50), + (0x51, 0xFF), + 0xFF, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc2dm", + 0x9D72, + (0x00, 0x29), + (0x2A, 0x31), + (0x32, 0x4C), + (0x4D, 0x50), + (0x51, 0xFF), + 0x44, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc2fy", + 0x3866, + (0x00, 0x2E), + (0xFF, 0xFF), + (0x2F, 0x4B), + (0x4C, 0x4F), + (0x50, 0xFF), + 0x48, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc2cckko", + 0x026C, + (0x00, 0x2B), + (0x2C, 0x31), + (0x32, 0x4C), + (0x4D, 0x50), + (0x51, 0xFF), + 0xFF, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc2ccotm", + 0x016C, + (0x00, 0x2B), + (0x2C, 0x31), + (0x32, 0x4C), + (0x4D, 0x50), + (0x51, 0xFF), + 0xFF, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc2sc", + 0x3B69, + (0x00, 0x28), + (0x29, 0x2E), + (0x2F, 0x49), + (0x4A, 0x4D), + (0x4E, 0xFF), + 0x45, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc2ty", + 0x5F69, + (0x00, 0x28), + (0x29, 0x2E), + (0x2F, 0x49), + (0x4A, 0x4D), + (0x4E, 0xFF), + 0xFF, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc2pc", + 0x5769, + (0x00, 0x28), + (0x29, 0x2E), + (0x2F, 0x49), + (0x4A, 0x4D), + (0x4E, 0xFF), + 0x45, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc3rx", + 0x9772, + (0x00, 0x2B), + (0x2C, 0x33), + (0x34, 0x4E), + (0x4F, 0x52), + (0x53, 0xFF), + 0x45, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc3pp", + 0x9872, + (0x00, 0x2A), + (0x2B, 0x32), + (0x33, 0x4E), + (0x4F, 0x51), + (0x52, 0xFF), + 0x45, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc3wy", + 0xA09F, + (0x00, 0x38), + (0x39, 0x41), + (0x42, 0x5F), + (0x60, 0x63), + (0x64, 0xFF), + 0x55, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc3dd", + 0xA5A8, + (0x00, 0x38), + (0x39, 0x43), + (0x44, 0x62), + (0x63, 0x67), + (0x68, 0xFF), + 0x58, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc4", + 0xAAB6, + (0x00, 0x3A), + (0x3B, 0x47), + (0x48, 0x68), + (0x69, 0x6D), + (0x6E, 0xFF), + 0x5D, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dc4ph", + 0xABB6, + (0x00, 0x3A), + (0x3B, 0x47), + (0x48, 0x68), + (0x69, 0x6D), + (0x6E, 0xFF), + 0x5D, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "ds", + 0x9F9A, + (0x00, 0x38), + (0x39, 0x4A), + (0x41, 0x5E), + (0x5F, 0x62), + (0x63, 0xFF), + 0x54, + 0x20, + 0xFF, + ), + ScriptInfo::new( + "dsif", + 0xA1A1, + (0x00, 0x39), + (0x3A, 0x42), + (0x43, 0x60), + (0x61, 0x64), + (0x65, 0xFF), + 0x56, + 0x20, + 0x62, + ), + ScriptInfo::new( + "tmpl", + 0xA6B4, + (0x00, 0x3B), + (0x3A, 0x46), + (0x46, 0x67), + (0x68, 0x6E), + (0x6D, 0xFF), + 0x5C, + 0x20, + 0x69, + ), + ScriptInfo::new( + "nightshade", + 0x0871, + (0x00, 0x2B), + (0x2C, 0x33), + (0x34, 0x4E), + (0x4F, 0x52), + (0x53, 0xFF), + 0x43, + 0x01, + 0xFF, + ), +]; + +impl ScriptInfo { + pub const fn new( + name: &'static str, + version: u16, + uint8x2: (u8, u8), + uint8str: (u8, u8), + string: (u8, u8), + encstr: (u8, u8), + uint16x4: (u8, u8), + optunenc: u8, + deckey: u8, + nameopcode: u8, + ) -> Self { + ScriptInfo { + name, + version, + uint8x2: Section::new(uint8x2.0, uint8x2.1), + uint8str: Section::new(uint8str.0, uint8str.1), + string: Section::new(string.0, string.1), + encstr: Section::new(encstr.0, encstr.1), + uint16x4: Section::new(uint16x4.0, uint16x4.1), + optunenc, + deckey, + nameopcode, + } + } + + pub fn query(name: &str) -> Option<&'static ScriptInfo> { + for info in SCRIPT_INFO.iter() { + if info.name == name { + return Some(info); + } + } + None + } + + pub fn query_by_version(version: u16) -> Option<&'static ScriptInfo> { + for info in SCRIPT_INFO.iter() { + if info.version == version { + return Some(info); + } + } + None + } +} diff --git a/src/scripts/circus/mod.rs b/src/scripts/circus/mod.rs new file mode 100644 index 0000000..438a8b3 --- /dev/null +++ b/src/scripts/circus/mod.rs @@ -0,0 +1,2 @@ +mod info; +pub mod script; diff --git a/src/scripts/circus/script.rs b/src/scripts/circus/script.rs new file mode 100644 index 0000000..c986ed8 --- /dev/null +++ b/src/scripts/circus/script.rs @@ -0,0 +1,218 @@ +use super::info::*; +use crate::scripts::base::*; +use crate::types::*; +use crate::utils::encoding::decode_to_string; +use anyhow::Result; + +pub struct CircusMesScriptBuilder {} + +impl CircusMesScriptBuilder { + pub const fn new() -> Self { + CircusMesScriptBuilder {} + } +} + +impl ScriptBuilder for CircusMesScriptBuilder { + fn default_encoding(&self) -> Encoding { + Encoding::Cp932 + } + + fn build_script( + &self, + filename: &str, + encoding: Encoding, + config: &ExtraConfig, + ) -> Result> { + Ok(Box::new(CircusMesScript::new( + filename.as_ref(), + encoding, + config, + )?)) + } + + fn extensions(&self) -> &'static [&'static str] { + &["mes"] + } + + fn script_type(&self) -> &'static ScriptType { + &ScriptType::Circus + } +} + +#[derive(Debug)] +struct Token { + offset: usize, + length: usize, + value: u8, +} + +pub struct CircusMesScript { + data: Vec, + encoding: Encoding, + is_new_ver: bool, + version: u16, + info: &'static ScriptInfo, + asm_bin_offset: usize, + blocks_offset: usize, + tokens: Vec, +} + +impl CircusMesScript { + pub fn new(filename: &str, encoding: Encoding, config: &ExtraConfig) -> Result { + let data = crate::utils::files::read_file(filename)?; + let head0 = i32::from_le_bytes(data[0..4].try_into()?); + let head1 = i32::from_le_bytes(data[4..8].try_into()?); + let mut is_new_ver = false; + let mut version = 0; + let mut info = config + .circus_mes_type + .as_ref() + .and_then(|name| ScriptInfo::query(name.as_ref())); + let mut asm_bin_offset = 0; + let mut blocks_offset = 0; + if head1 == 0x3 { + let offset = head0 * 0x6 + 0x4; + if data.len() > offset as usize { + if data.len() > offset as usize + 3 { + version = + u16::from_le_bytes(data[offset as usize..offset as usize + 2].try_into()?); + if info.is_none() { + info = ScriptInfo::query_by_version(version); + } + asm_bin_offset = offset as usize + 3; + } + blocks_offset = 8; + } + is_new_ver = true; + } else { + let offset = head0 * 0x4 + 0x4; + if data.len() > offset as usize { + if data.len() > offset as usize + 2 { + version = + u16::from_le_bytes(data[offset as usize..offset as usize + 2].try_into()?); + if info.is_none() { + info = ScriptInfo::query_by_version(version); + } + asm_bin_offset = offset as usize + 2; + } + blocks_offset = 4; + } + } + let info = info.ok_or(anyhow::anyhow!("Failed to detect version."))?; + let mut tokens = Vec::new(); + let mut offset = 0; + let asm_bin_size = if asm_bin_offset == 0 { + 0 + } else { + data.len() - asm_bin_offset + }; + while offset < asm_bin_size { + let value = data[asm_bin_offset + offset]; + let length = if info.uint8x2.its(value) { + 0x03 + } else if info.uint8str.its(value) { + let mut len = 0x3; + let mut temp = data[asm_bin_offset + offset + len - 1]; + while temp != 0x00 { + len += 0x1; + if asm_bin_offset + offset + len >= data.len() { + break; + } + temp = data[asm_bin_offset + offset + len - 1]; + } + len + } else if info.string.its(value) || info.encstr.its(value) { + let mut len = 1; + let mut temp = data[asm_bin_offset + offset + len - 1]; + while temp != 0x00 { + len += 0x1; + if asm_bin_offset + offset + len >= data.len() { + break; + } + temp = data[asm_bin_offset + offset + len - 1]; + } + len + } else if info.uint16x4.its(value) { + 0x09 + } else { + return Err(anyhow::anyhow!(format!( + "Unknown token type: 0x{:02X} at offset {}", + value, + asm_bin_offset + offset + ))); + }; + let token = Token { + offset, + length, + value, + }; + offset += length; + tokens.push(token); + } + Ok(CircusMesScript { + data, + encoding, + is_new_ver, + version, + info, + asm_bin_offset, + blocks_offset, + tokens, + }) + } +} + +impl std::fmt::Debug for CircusMesScript { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("CircusMesScript") + .field("encoding", &self.encoding) + .field("is_new_ver", &self.is_new_ver) + .field("version", &self.version) + .field("info", &self.info) + .field("asm_bin_offset", &self.asm_bin_offset) + .field("blocks_offset", &self.blocks_offset) + .field("tokens", &self.tokens) + .finish_non_exhaustive() + } +} + +impl Script for CircusMesScript { + fn default_output_script_type(&self) -> OutputScriptType { + OutputScriptType::Json + } + + fn extract_messages(&self) -> Result> { + let mut mes = vec![]; + let mut name = None; + for token in self.tokens.iter() { + let mut t = None; + if self.info.encstr.its(token.value) { + let mut text = self.data[self.asm_bin_offset + token.offset + 1 + ..self.asm_bin_offset + token.offset + token.length] + .to_vec(); + for t in text.iter_mut() { + *t = (*t).overflowing_add(self.info.deckey).0; + } + t = Some(decode_to_string(self.encoding, &text)?); + // println!("Token(enc): {:?}, {}", token, t.as_ref().unwrap()); + } else if token.value == self.info.optunenc { + let text = &self.data[self.asm_bin_offset + token.offset + 1 + ..self.asm_bin_offset + token.offset + token.length]; + t = Some(decode_to_string(self.encoding, text)?); + // println!("Token: {:?}, {}", token, t.as_ref().unwrap()); + } + match t { + Some(t) => { + if token.value == self.info.nameopcode { + name = Some(t); + } else { + let message = Message::new(t, name.take()); + mes.push(message); + } + } + None => {} + } + } + Ok(mes) + } +} diff --git a/src/scripts/mod.rs b/src/scripts/mod.rs new file mode 100644 index 0000000..a6d5cbd --- /dev/null +++ b/src/scripts/mod.rs @@ -0,0 +1,12 @@ +pub mod base; +pub mod circus; + +pub use base::{Script, ScriptBuilder}; + +lazy_static::lazy_static! { + pub static ref BUILDER: Vec> = vec![ + Box::new(circus::script::CircusMesScriptBuilder::new()), + ]; + pub static ref ALL_EXTS: Vec = + BUILDER.iter().flat_map(|b| b.extensions()).map(|s| s.to_string()).collect(); +} diff --git a/src/types.rs b/src/types.rs new file mode 100644 index 0000000..66b664f --- /dev/null +++ b/src/types.rs @@ -0,0 +1,187 @@ +use clap::ValueEnum; +use serde::{Deserialize, Serialize}; + +#[derive(Copy, Clone, Serialize, Deserialize, Debug)] +#[serde(untagged, rename_all = "camelCase")] +/// Text Encoding +pub enum Encoding { + /// Automatically detect encoding + Auto, + /// UTF-8 encoding + Utf8, + /// Shift-JIS encoding + Cp932, + /// GB2312 encoding + Gb2312, + /// Code page encoding (Windows only) + #[cfg(windows)] + CodePage(u32), +} + +impl Default for Encoding { + fn default() -> Self { + Encoding::Utf8 + } +} + +#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)] +/// Text Encoding +pub enum TextEncoding { + /// Use script's default encoding + Default, + /// Automatically detect encoding + Auto, + /// UTF-8 encoding + Utf8, + /// Shift-JIS encoding + Cp932, + /// GB2312 encoding + Gb2312, +} + +#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)] +/// Script type +pub enum OutputScriptType { + /// Text script + Txt, + /// JSON which can be used for GalTransl + Json, +} + +impl AsRef for OutputScriptType { + fn as_ref(&self) -> &str { + match self { + OutputScriptType::Txt => "txt", + OutputScriptType::Json => "json", + } + } +} + +#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)] +pub enum CircusMesType { + /// fortissimo//Akkord:Bsusvier + Ffexa, + /// fortissimo EXS//Akkord:nächsten Phase + Ffexs, + /// Eternal Fantasy + Ef, + /// D.C.〜ダ・カーポ〜 温泉編 + Dcos, + /// ことり Love Ex P + Ktlep, + /// D.C.WhiteSeason + Dcws, + /// D.C. Summer Vacation + Dcsv, + /// D.C.P.C.(Vista) + Dcpc, + /// D.C.〜ダ・カーポ〜 MEMORIES DISC + Dcmems, + /// D.C. Dream X’mas + Dcdx, + /// D.C.A.S. 〜ダ・カーポ〜アフターシーズンズ + Dcas, + /// D.C.II 春風のアルティメットバトル! + Dcbs, + /// D.C.II Fall in Love + Dc2fl, + /// D.C.II 春風のアルティメットバトル! + Dc2bs, + /// D.C.II Dearest Marriage + Dc2dm, + /// D.C.II 〜featuring Yun2〜 + Dc2fy, + /// D.C.II C.C. 月島小恋のらぶらぶバスルーム + Dc2cckko, + /// D.C.II C.C. 音姫先生のどきどき特別授業 + Dc2ccotm, + /// D.C.II Spring Celebration + Dc2sc, + /// D.C.II To You + Dc2ty, + /// D.C.II P.C. + Dc2pc, + /// D.C.III RX-rated + Dc3rx, + /// D.C.III P.P.~ダ・カーポIII プラチナパートナー~ + Dc3pp, + /// D.C.III WithYou + Dc3wy, + /// D.C.III DreamDays + Dc3dd, + /// D.C.4 ~ダ・カーポ4~ + Dc4, + /// D.C.4 Plus Harmony 〜ダ・カーポ4〜 プラスハーモニー + Dc4ph, + /// D.S. -Dal Segno- + Ds, + /// D.S.i.F. -Dal Segno- in Future + Dsif, + /// てんぷれ! + Tmpl, + /// 百花百狼/Hyakka Hyakurou + Nightshade, +} + +impl AsRef for CircusMesType { + fn as_ref(&self) -> &str { + match self { + CircusMesType::Ffexa => "ffexa", + CircusMesType::Ffexs => "ffexs", + CircusMesType::Ef => "ef", + CircusMesType::Dcos => "dcos", + CircusMesType::Ktlep => "ktlep", + CircusMesType::Dcws => "dcws", + CircusMesType::Dcsv => "dcsv", + CircusMesType::Dcpc => "dcpc", + CircusMesType::Dcmems => "dcmems", + CircusMesType::Dcdx => "dcdx", + CircusMesType::Dcas => "dcas", + CircusMesType::Dcbs => "dcbs", + CircusMesType::Dc2fl => "dc2fl", + CircusMesType::Dc2bs => "dc2bs", + CircusMesType::Dc2dm => "dc2dm", + CircusMesType::Dc2fy => "dc2fy", + CircusMesType::Dc2cckko => "dc2cckko", + CircusMesType::Dc2ccotm => "dc2ccotm", + CircusMesType::Dc2sc => "dc2sc", + CircusMesType::Dc2ty => "dc2ty", + CircusMesType::Dc2pc => "dc2pc", + CircusMesType::Dc3rx => "dc3rx", + CircusMesType::Dc3pp => "dc3pp", + CircusMesType::Dc3wy => "dc3wy", + CircusMesType::Dc3dd => "dc3dd", + CircusMesType::Dc4 => "dc4", + CircusMesType::Dc4ph => "dc4ph", + CircusMesType::Ds => "ds", + CircusMesType::Dsif => "dsif", + CircusMesType::Tmpl => "tmpl", + CircusMesType::Nightshade => "nightshade", + } + } +} + +pub struct ExtraConfig { + pub circus_mes_type: Option, +} + +#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)] +/// Script type +pub enum ScriptType { + /// Circus MES script + Circus, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct Message { + #[serde(skip_serializing_if = "Option::is_none")] + pub name: Option, + pub message: String, +} + +impl Message { + pub fn new(message: String, name: Option) -> Self { + Message { message, name } + } +} + diff --git a/src/utils/encoding.rs b/src/utils/encoding.rs new file mode 100644 index 0000000..1470aa9 --- /dev/null +++ b/src/utils/encoding.rs @@ -0,0 +1,106 @@ +use crate::types::*; + +pub fn decode_to_string(encoding: Encoding, data: &[u8]) -> Result { + match encoding { + Encoding::Auto => decode_to_string(Encoding::Utf8, data) + .or_else(|_| decode_to_string(Encoding::Cp932, data)) + .or_else(|_| decode_to_string(Encoding::Gb2312, data)), + Encoding::Utf8 => Ok(String::from_utf8(data.to_vec())?), + Encoding::Cp932 => { + let result = encoding_rs::SHIFT_JIS.decode(data); + if result.2 { + Err(anyhow::anyhow!("Failed to decode Shift-JIS")) + } else { + Ok(result.0.to_string()) + } + } + Encoding::Gb2312 => { + let result = encoding_rs::GBK.decode(data); + if result.2 { + Err(anyhow::anyhow!("Failed to decode GB2312")) + } else { + Ok(result.0.to_string()) + } + } + #[cfg(windows)] + Encoding::CodePage(code_page) => { + Ok(super::encoding_win::decode_to_string(code_page, data)?) + } + } +} + +pub fn encode_string(encoding: Encoding, data: &str) -> Result, anyhow::Error> { + match encoding { + Encoding::Auto => Ok(data.as_bytes().to_vec()), + Encoding::Utf8 => Ok(data.as_bytes().to_vec()), + Encoding::Cp932 => { + let result = encoding_rs::SHIFT_JIS.encode(data); + if result.2 { + Err(anyhow::anyhow!("Failed to encode Shift-JIS")) + } else { + Ok(result.0.to_vec()) + } + } + Encoding::Gb2312 => { + let result = encoding_rs::GBK.encode(data); + if result.2 { + Err(anyhow::anyhow!("Failed to encode GB2312")) + } else { + Ok(result.0.to_vec()) + } + } + #[cfg(windows)] + Encoding::CodePage(code_page) => { + Ok(super::encoding_win::encode_string(code_page, data)?) + } + } +} + +#[test] +fn test_decode_to_string() { + assert_eq!( + decode_to_string( + Encoding::Utf8, + &[228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149] + ) + .unwrap(), + "中文测试".to_string() + ); + assert_eq!( + decode_to_string( + Encoding::Cp932, + &[ + 130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198 + ] + ) + .unwrap(), + "きゃべつそふと".to_string() + ); + assert_eq!( + decode_to_string(Encoding::Gb2312, &[214, 208, 206, 196]).unwrap(), + "中文".to_string() + ); + assert_eq!( + decode_to_string( + Encoding::Auto, + &[228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149] + ) + .unwrap(), + "中文测试".to_string() + ); + assert_eq!( + decode_to_string( + Encoding::Auto, + &[ + 130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198 + ] + ) + .unwrap(), + "きゃべつそふと".to_string() + ); + #[cfg(windows)] + assert_eq!( + decode_to_string(Encoding::CodePage(936), &[214, 208, 206, 196]).unwrap(), + "中文".to_string() + ); +} diff --git a/src/utils/encoding_win.rs b/src/utils/encoding_win.rs new file mode 100644 index 0000000..eace9d8 --- /dev/null +++ b/src/utils/encoding_win.rs @@ -0,0 +1,121 @@ +use windows_sys::Win32::Foundation::GetLastError; +use windows_sys::Win32::Globalization::{MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte}; + +#[derive(Debug)] +pub struct WinError { + pub code: u32, +} + +impl WinError { + pub fn new(code: u32) -> Self { + WinError { code } + } + + pub fn from_last_error() -> Self { + let code = unsafe { GetLastError() }; + WinError::new(code) + } +} + +impl std::error::Error for WinError {} + +impl std::fmt::Display for WinError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Windows error code: {}", self.code) + } +} + +pub fn decode_to_string(cp: u32, data: &[u8]) -> Result { + let needed_len = unsafe { + MultiByteToWideChar( + cp, + MB_ERR_INVALID_CHARS, + data.as_ptr() as _, + data.len() as i32, + std::ptr::null_mut(), + 0, + ) + }; + if needed_len == 0 { + return Err(WinError::from_last_error()); + } + let mut wc = Vec::with_capacity(needed_len as usize); + wc.resize(needed_len as usize, 0); + let result = unsafe { + MultiByteToWideChar( + cp, + MB_ERR_INVALID_CHARS, + data.as_ptr() as _, + data.len() as i32, + wc.as_mut_ptr(), + needed_len, + ) + }; + if result == 0 { + return Err(WinError::from_last_error()); + } + Ok(String::from_utf16_lossy(&wc)) +} + +pub fn encode_string(cp: u32, data: &str) -> Result, WinError> { + let wstr = data.encode_utf16().collect::>(); + let needed_len = unsafe { + WideCharToMultiByte( + cp, + 0, + wstr.as_ptr(), + wstr.len() as i32, + std::ptr::null_mut(), + 0, + std::ptr::null_mut(), + std::ptr::null_mut(), + ) + }; + if needed_len == 0 { + return Err(WinError::from_last_error()); + } + let mut mb = Vec::with_capacity(needed_len as usize); + mb.resize(needed_len as usize, 0); + let result = unsafe { + WideCharToMultiByte( + cp, + 0, + wstr.as_ptr(), + wstr.len() as i32, + mb.as_mut_ptr(), + needed_len, + std::ptr::null_mut(), + std::ptr::null_mut(), + ) + }; + if result == 0 { + return Err(WinError::from_last_error()); + } + Ok(mb) +} + +#[test] +fn test_decode_to_string() { + assert_eq!( + decode_to_string( + 65001, + &[228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149] + ) + .unwrap(), + "中文测试".to_string() + ); + assert_eq!( + decode_to_string( + 932, + &[ + 130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198 + ] + ) + .unwrap(), + "きゃべつそふと".to_string() + ); + assert_eq!( + decode_to_string(936, &[214, 208, 206, 196]).unwrap(), + "中文".to_string() + ); +} diff --git a/src/utils/files.rs b/src/utils/files.rs new file mode 100644 index 0000000..4e766be --- /dev/null +++ b/src/utils/files.rs @@ -0,0 +1,67 @@ +use std::fs; +use std::io; +use std::path::Path; +use std::io::{Read, Write}; + +use crate::scripts::ALL_EXTS; + +pub fn find_files(path: &String, recursive: bool) -> io::Result> { + let mut result = Vec::new(); + let dir_path = Path::new(&path); + + if dir_path.is_dir() { + for entry in fs::read_dir(dir_path)? { + let entry = entry?; + let path = entry.path(); + + if path.is_file() + && path.extension().map_or(true, |ext| { + ALL_EXTS.contains(&ext.to_string_lossy().to_lowercase()) + }) + { + if let Some(path_str) = path.to_str() { + result.push(path_str.to_string()); + } + } else if recursive && path.is_dir() { + if let Some(path_str) = path.to_str() { + let mut sub_files = find_files(&path_str.to_string(), recursive)?; + result.append(&mut sub_files); + } + } + } + } + + Ok(result) +} + +pub fn collect_files(path: &String, recursive: bool) -> io::Result<(Vec, bool)> { + let pa = Path::new(path); + if pa.is_dir() { + return Ok((find_files(path, recursive)?, true)); + } + if pa.is_file() { + return Ok((vec![path.clone()], false)); + } + Err(io::Error::new( + io::ErrorKind::NotFound, + format!("Path {} is neither a file nor a directory", pa.display()), + )) +} + +pub fn read_file + ?Sized>(f: &F) -> io::Result> { + let mut content = Vec::new(); + if f.as_ref() == Path::new("-") { + io::stdin().read_to_end(&mut content)?; + } else { + content = fs::read(f)?; + } + Ok(content) +} + +pub fn write_file + ?Sized>(f: &F) -> io::Result> { + Ok(if f.as_ref() == Path::new("-") { + Box::new(io::stdout()) + } else { + Box::new(fs::File::create(f)?) + }) +} diff --git a/src/utils/mod.rs b/src/utils/mod.rs new file mode 100644 index 0000000..e3b35d8 --- /dev/null +++ b/src/utils/mod.rs @@ -0,0 +1,4 @@ +pub mod encoding; +#[cfg(windows)] +mod encoding_win; +pub mod files;