13 Commits

20 changed files with 1751 additions and 60 deletions

35
Cargo.lock generated
View File

@@ -674,6 +674,12 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "fastcdc"
version = "3.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf51ceb43e96afbfe4dd5c6f6082af5dfd60e220820b8123792d61963f2ce6bc"
[[package]]
name = "fastrand"
version = "2.3.0"
@@ -1323,8 +1329,9 @@ dependencies = [
[[package]]
name = "msg_tool"
version = "0.2.9"
version = "0.2.10"
dependencies = [
"adler",
"anyhow",
"base64",
"byteorder",
@@ -1334,6 +1341,7 @@ dependencies = [
"emote-psb",
"encoding",
"fancy-regex",
"fastcdc",
"flate2",
"int-enum",
"jieba-rs",
@@ -1359,6 +1367,7 @@ dependencies = [
"serde_json",
"serde_yaml_ng",
"sha1",
"sha2",
"stylua",
"unicode-segmentation",
"url",
@@ -1366,6 +1375,7 @@ dependencies = [
"webp",
"windows-sys 0.61.2",
"xml5ever",
"xp3",
"zstd",
]
@@ -1912,6 +1922,17 @@ dependencies = [
"digest",
]
[[package]]
name = "sha2"
version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "shlex"
version = "1.3.0"
@@ -2534,6 +2555,18 @@ dependencies = [
"markup5ever",
]
[[package]]
name = "xp3"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61c728da4ef7d98958a2d42fd957e82dd96723ec9c6255ccb3e743142d556ab6"
dependencies = [
"adler32",
"byteorder",
"encoding",
"flate2",
]
[[package]]
name = "yoke"
version = "0.8.0"

View File

@@ -1,6 +1,6 @@
[package]
name = "msg_tool"
version = "0.2.9"
version = "0.2.10"
edition = "2024"
repository = "https://github.com/lifegpc/msg-tool"
description = "A command-line tool for exporting, importing, packing, and unpacking script files."
@@ -8,6 +8,7 @@ license = "GPL-3.0-or-later"
exclude = [".github", "*.py", "AGENTS.md"]
[dependencies]
adler = { version = "1", optional = true }
anyhow = "1"
base64 = { version = "0.22", optional = true }
byteorder = { version = "1.5", default-features = false, optional = true}
@@ -17,6 +18,7 @@ ctrlc = "3.4"
emote-psb = { version = "0.5", optional = true , features = ["serde"] }
encoding = "0.2"
fancy-regex = { version = "0.16", optional = true }
fastcdc = { version = "3.2", optional = true }
flate2 = { version = "1.1", optional = true }
int-enum = { version = "1.2", optional = true }
jieba-rs = { version = "0.8", optional = true }
@@ -31,8 +33,9 @@ markup5ever_rcdom = { version = "0.35", optional = true }
memchr = { version = "2.7", optional = true }
mozjpeg = { version = "0.10", optional = true }
msg_tool_macro = { version = "0.2.9" }
num_cpus = { version = "1.17", optional = true }
num_cpus = "1.17"
overf = "0.1"
parse-size = { version = "1.1", optional = true }
pelite = { version = "0.10", optional = true }
png = { version = "0.18", optional = true }
rand = { version = "0.9", optional = true }
@@ -41,12 +44,14 @@ serde = { version = "1", features = ["derive"] }
serde_json = "1"
serde_yaml_ng = "0.10"
sha1 = { version = "0.10", optional = true }
sha2 = { version = "0.10", optional = true }
stylua = { version = "2.1", optional = true, default-features = false}
unicode-segmentation = "1.12"
url = { version = "2.5", optional = true }
utf16string = "0.2"
webp = { version = "0.3", default-features = false, optional = true }
xml5ever = { version = "0.35", optional = true }
xp3 = { version = "0.3", optional = true}
zstd = { version = "0.13", optional = true }
[features]
@@ -54,7 +59,7 @@ default = ["all-fmt", "image-jpg", "image-jxl", "image-webp", "audio-flac", "jie
all-fmt = ["all-script", "all-img", "all-arc", "all-audio"]
all-script = ["artemis", "artemis-panmimisoft", "bgi", "cat-system", "circus", "entis-gls", "escude", "ex-hibit", "favorite", "hexen-haus", "kirikiri", "silky", "softpal", "will-plus", "yaneurao", "yaneurao-itufuru"]
all-img = ["bgi-img", "cat-system-img", "circus-img", "emote-img", "hexen-haus-img", "kirikiri-img", "softpal-img", "will-plus-img"]
all-arc = ["artemis-arc", "bgi-arc", "cat-system-arc", "circus-arc", "escude-arc", "ex-hibit-arc", "hexen-haus-arc", "softpal-arc"]
all-arc = ["artemis-arc", "bgi-arc", "cat-system-arc", "circus-arc", "escude-arc", "ex-hibit-arc", "hexen-haus-arc", "kirikiri-arc", "softpal-arc"]
all-audio = ["bgi-audio", "circus-audio"]
artemis = ["stylua", "utils-escape"]
artemis-panmimisoft = ["artemis", "rust-ini"]
@@ -62,7 +67,7 @@ artemis-arc = ["artemis", "msg_tool_macro/artemis-arc", "sha1"]
bgi = ["fancy-regex"]
bgi-arc = ["bgi", "rand", "utils-bit-stream"]
bgi-audio = ["bgi"]
bgi-img = ["bgi", "image", "rand", "utils-threadpool", "utils-bit-stream"]
bgi-img = ["bgi", "image", "rand", "utils-bit-stream"]
cat-system = ["fancy-regex", "flate2", "int-enum"]
cat-system-arc = ["cat-system", "pelite", "utils-blowfish", "utils-crc32"]
cat-system-img = ["cat-system", "flate2", "image", "mozjpeg", "utils-bit-stream"]
@@ -81,6 +86,7 @@ hexen-haus = ["memchr", "utils-str"]
hexen-haus-arc = ["hexen-haus"]
hexen-haus-img = ["hexen-haus", "image"]
kirikiri = ["emote-psb", "fancy-regex", "flate2", "json", "lz4", "utils-escape"]
kirikiri-arc = ["kirikiri", "adler", "fastcdc", "flate2", "parse-size", "sha2", "xp3", "zstd"]
kirikiri-img = ["kirikiri", "image", "libtlg-rs"]
silky = []
softpal = ["int-enum"]
@@ -91,9 +97,9 @@ will-plus-img = ["will-plus", "image"]
yaneurao = []
yaneurao-itufuru = ["yaneurao"]
# basic feature
image = ["png", "utils-threadpool"]
image = ["png"]
image-jpg = ["mozjpeg"]
image-jxl = ["image", "jpegxl-sys", "utils-threadpool"]
image-jxl = ["image", "jpegxl-sys"]
image-webp = ["webp"]
lossless-audio = ["utils-pcm"]
audio-flac = ["libflac-sys", "utils-pcm"]
@@ -106,7 +112,6 @@ utils-crc32 = []
utils-escape = ["fancy-regex"]
utils-pcm = []
utils-str = []
utils-threadpool = ["num_cpus"]
[target.'cfg(windows)'.dependencies]
windows-sys = { version = "0.61", features = ["Win32_Globalization", "Win32_System_Diagnostics_Debug"] }

View File

@@ -75,7 +75,7 @@ msg-tool create -t <script-type> <input> <output>
| Archive Type | Feature Name | Name | Unpack | Pack | Remarks |
|---|---|---|---|---|---|
| `artemis-arc`/`pfs` | `artemis-arc` | Artemis Engine archive file (.pfs) | ✔️ | ✔️ | |
| `artemis-pf2`/`pfs` | `artemis-arc` | Artemis Engine Archive File (.pfs) (pf2) | ✔️ | ✔️ | |
| `artemis-pf2`/`pf2` | `artemis-arc` | Artemis Engine Archive File (.pfs) (pf2) | ✔️ | ✔️ | |
### Buriko General Interpreter / Ethornell
| Script Type | Feature Name | Name | Export | Import | Export Multiple | Import Multiple | Custom Export | Custom Import | Create | Remarks |
|---|---|---|---|---|---|---|---|---|---|---|
@@ -187,6 +187,10 @@ msg-tool create -t <script-type> <input> <output>
| `kirikiri-tjs-ns0`/`kr-tjs-ns0` | `kirikiri` | Kirikiri TJS NS0 binary encoded script | ❌ | ❌ | ❌ | ❌ | ✔️ | ✔️ | ✔️ | |
| `kirikiri-tjs2`/`kr-tjs2` | `kirikiri` | Kirikiri compiled TJS2 script | ✔️ | ✔️ | ❌ | ❌ | ✔️ | ✔️ | ❌ | |
| Archive Type | Feature Name | Name | Unpack | Pack | Remarks |
|---|---|---|---|---|---|
| `kirikiri-xp3`/`kr-xp3`/`xp3` | `kirikiri-arc` | Kirikiri XP3 Archive File (.xp3) | ✔️ | ✔️ | |
| Image Type | Feature Name | Name | Export | Import | Export Multiple | Import Multiple | Create | Remarks |
|---|---|---|---|---|---|---|---|---|
| `kirikiri-tlg`/`kr-tlg` | `kirikiri-img` | Kirikiri TLG Image File (.tlg) | ✔️ | ✔️ | ❌ | ❌ | ✔️ | tlg6 is not supported when importing/creating image |

View File

@@ -72,7 +72,7 @@ fn parse_jxl_distance(s: &str) -> Result<f32, String> {
}
/// Tools for export and import scripts
#[derive(Parser, Debug)]
#[derive(Parser, Debug, Clone)]
#[clap(
group = ArgGroup::new("encodingg").multiple(false),
group = ArgGroup::new("output_encodingg").multiple(false),
@@ -403,7 +403,7 @@ pub struct Arg {
/// Path to the ExHibit rld def keys file, which contains the keys in BINARY format.
pub ex_hibit_rld_def_keys: Option<String>,
#[cfg(feature = "mozjpeg")]
#[arg(short = 'j', long, global = true, default_value_t = 80, value_parser = parse_jpeg_quality)]
#[arg(long, global = true, default_value_t = 80, value_parser = parse_jpeg_quality)]
/// JPEG quality for output images, 0-100. 100 means best quality.
pub jpeg_quality: u8,
#[cfg(feature = "webp")]
@@ -484,12 +484,53 @@ pub struct Arg {
#[arg(long, global = true, visible_alias = "softpal-idx")]
/// Whether to add message index to Softpal src script when exporting.
pub softpal_add_message_index: bool,
#[cfg(feature = "kirikiri-arc")]
#[arg(long, global = true)]
/// Disable decrypt SimpleCrypt files in Kirikiri XP3 archive when extracting.
pub xp3_no_simple_crypt: bool,
#[cfg(feature = "kirikiri-arc")]
#[arg(long, global = true)]
/// Disable decompressing mdf files in Kirikiri XP3 archive when extracting.
pub xp3_no_mdf_decompress: bool,
#[cfg(feature = "kirikiri-arc")]
#[arg(long, global = true, default_value = "cdc:32KiB:256KiB:8MiB", value_parser = crate::scripts::kirikiri::archive::xp3::parse_segmenter_config)]
/// Configuration for Kirikiri XP3 segmenter when creating XP3 archive.
/// none segmenter - none
/// fastcdc segmenter - cdc:<min>:<avg>:<max>
/// fixed segmenter - fixed:<size>
pub xp3_segmenter: crate::scripts::kirikiri::archive::xp3::SegmenterConfig,
#[cfg(feature = "kirikiri-arc")]
#[arg(long, global = true)]
/// Disable compressing files in Kirikiri XP3 archive when creating XP3 archive.
pub xp3_no_compress_files: bool,
#[cfg(feature = "kirikiri-arc")]
#[arg(long, global = true)]
/// Disable compressing index in Kirikiri XP3 archive when creating XP3 archive.
pub xp3_no_compress_index: bool,
#[cfg(feature = "kirikiri-arc")]
#[arg(long, global = true, default_value_t = num_cpus::get(), visible_alias = "xp3-compress-jobs")]
/// Workers count for compress files in Kirikiri XP3 archive when creating in parallel.
pub xp3_compress_workers: usize,
#[cfg(feature = "kirikiri-arc")]
#[arg(long, global = true)]
/// Use zstd compression for files in Kirikiri XP3 archive when creating. (Warning: Kirikiri engine don't support this. Hook is required.)
pub xp3_zstd: bool,
#[cfg(feature = "kirikiri-arc")]
#[arg(
long,
global = true,
default_value_t = 1,
visible_alias = "xp3-pack-jobs"
)]
/// Workers count for packing file in Kirikiri XP3 archive in parallel.
/// This not works when segment is disabled.
pub xp3_pack_workers: usize,
#[command(subcommand)]
/// Command
pub command: Command,
}
#[derive(Parser, Debug)]
#[derive(Parser, Debug, Clone)]
#[clap(group = ArgGroup::new("patched_encodingg").multiple(false), group = ArgGroup::new("patched_archive_encodingg").multiple(false))]
pub struct ImportArgs {
/// Input script file or directory
@@ -552,9 +593,12 @@ pub struct ImportArgs {
pub replacement_json: Option<String>,
#[arg(long, action = ArgAction::SetTrue)]
pub warn_when_output_file_not_found: bool,
#[arg(short = 'j', long, default_value_t = 1)]
/// Workers count for import scripts in parallel.
pub jobs: usize,
}
#[derive(Subcommand, Debug)]
#[derive(Subcommand, Debug, Clone)]
/// Commands
pub enum Command {
/// Extract from script

View File

@@ -4,7 +4,7 @@ use crate::utils::encoding::decode_to_string;
use crate::utils::struct_pack::{StructPack, StructUnpack};
use std::ffi::CString;
use std::io::*;
use std::sync::Mutex;
use std::sync::{Arc, Mutex};
/// A trait to help to peek data from a reader.
pub trait Peek {
@@ -1719,6 +1719,7 @@ impl CPeek for MemWriter {
}
/// A region of a stream that can be read/write and seeked within a specified range.
#[derive(Debug)]
pub struct StreamRegion<T: Seek> {
stream: T,
start_pos: u64,
@@ -1999,3 +2000,169 @@ impl<R: Read + Seek, W: Write + Seek, A: Fn(u64) -> Result<u64>, O: Fn(u64) -> R
Ok(())
}
}
/// A thread-safe wrapper around a Mutex-protected writer/reader.
#[derive(Debug)]
pub struct MutexWrapper<T> {
inner: Arc<Mutex<T>>,
pos: u64,
}
impl<T> MutexWrapper<T> {
/// Creates a new `MutexWrapper` with the given inner value.
pub fn new(inner: Arc<Mutex<T>>, pos: u64) -> Self {
MutexWrapper { inner, pos }
}
}
impl<T: Read + Seek> Read for MutexWrapper<T> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
let mut lock = self.inner.lock().map_err(|_| {
std::io::Error::new(std::io::ErrorKind::Other, "Failed to lock the mutex")
})?;
lock.seek(SeekFrom::Start(self.pos))?;
let readed = lock.read(buf)?;
self.pos += readed as u64;
Ok(readed)
}
}
impl<T: Read + Seek> Seek for MutexWrapper<T> {
fn seek(&mut self, pos: SeekFrom) -> Result<u64> {
let mut lock = self.inner.lock().map_err(|_| {
std::io::Error::new(std::io::ErrorKind::Other, "Failed to lock the mutex")
})?;
let new_pos = match pos {
SeekFrom::Start(offset) => offset,
SeekFrom::End(offset) => {
let len = lock.stream_length()?;
(len as i64 + offset as i64) as u64
}
SeekFrom::Current(offset) => (self.pos as i64 + offset as i64) as u64,
};
if new_pos > lock.stream_length()? {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Seek position is beyond the end of the stream",
));
}
self.pos = new_pos;
Ok(self.pos)
}
fn stream_position(&mut self) -> Result<u64> {
Ok(self.pos)
}
fn rewind(&mut self) -> Result<()> {
self.pos = 0;
Ok(())
}
}
/// A writer that does nothing and always succeeds.
pub struct EmptyWriter;
impl EmptyWriter {
/// Creates a new `EmptyWriter`.
pub fn new() -> Self {
Self {}
}
}
impl Write for EmptyWriter {
fn write(&mut self, buf: &[u8]) -> Result<usize> {
Ok(buf.len())
}
fn flush(&mut self) -> Result<()> {
Ok(())
}
}
#[derive(Debug)]
/// A readable stream that starts with a given prefix before the actual data.
pub struct PrefixStream<T> {
prefix: Vec<u8>,
pos: usize,
inner: T,
}
impl<T> PrefixStream<T> {
/// Creates a new `PrefixStream` with the given prefix and inner stream.
pub fn new(prefix: Vec<u8>, inner: T) -> Self {
PrefixStream {
prefix,
pos: 0,
inner,
}
}
}
impl<T: Read> Read for PrefixStream<T> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
if self.pos < self.prefix.len() {
let bytes_to_read = std::cmp::min(buf.len(), self.prefix.len() - self.pos);
buf[..bytes_to_read].copy_from_slice(&self.prefix[self.pos..self.pos + bytes_to_read]);
self.pos += bytes_to_read;
Ok(bytes_to_read)
} else {
self.inner.read(buf)
}
}
}
impl<T: Seek> Seek for PrefixStream<T> {
fn seek(&mut self, pos: SeekFrom) -> Result<u64> {
let prefix_len = self.prefix.len() as u64;
let new_pos = match pos {
SeekFrom::Start(offset) => offset,
SeekFrom::End(offset) => {
let inner_len = self.inner.stream_length()?;
if offset < 0 {
if (-offset) as u64 > inner_len + prefix_len {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Seek position is before the start of the stream",
));
}
inner_len + prefix_len - (-offset) as u64
} else {
inner_len + prefix_len + offset as u64
}
}
SeekFrom::Current(offset) => {
let current_pos = self.stream_position()?;
if offset < 0 {
if (-offset) as u64 > current_pos + prefix_len {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Seek position is before the start of the stream",
));
}
prefix_len + current_pos - (-offset) as u64
} else {
prefix_len + current_pos + offset as u64
}
}
};
if new_pos < prefix_len {
self.pos = new_pos as usize;
self.inner.rewind()?;
} else {
self.pos = self.prefix.len();
self.inner.seek(SeekFrom::Start(new_pos - prefix_len))?;
}
Ok(new_pos)
}
fn stream_position(&mut self) -> Result<u64> {
Ok(self.pos as u64 + self.inner.stream_position()?)
}
fn rewind(&mut self) -> Result<()> {
self.pos = 0;
self.inner.rewind()?;
Ok(())
}
}

View File

@@ -1486,8 +1486,8 @@ pub fn import_script(
continue;
}
};
let mut writer = arch.new_file(f.name())?;
if arg.force_script || f.is_script() {
let mut writer = arch.new_file(f.name())?;
let (script_file, _) =
match parse_script_from_archive(&mut f, arg, config.clone(), &script) {
Ok(s) => s,
@@ -1952,6 +1952,7 @@ pub fn import_script(
continue;
}
} else {
let mut writer = arch.new_file_non_seek(f.name())?;
let out_path = std::path::PathBuf::from(&odir).join(f.name());
if out_path.is_file() {
let f = match std::fs::File::open(&out_path) {
@@ -2356,7 +2357,7 @@ pub fn pack_archive(
continue;
}
};
let mut wf = match archive.new_file(name) {
let mut wf = match archive.new_file_non_seek(name) {
Ok(f) => f,
Err(e) => {
eprintln!("Error creating file {} in archive: {}", name, e);
@@ -2602,6 +2603,7 @@ fn main() {
std::process::exit(1);
});
let arg = args::parse_args();
let argn = std::sync::Arc::new(arg.clone());
if arg.backtrace {
unsafe { std::env::set_var("RUST_LIB_BACKTRACE", "1") };
}
@@ -2744,6 +2746,22 @@ fn main() {
softpal_add_message_index: arg.softpal_add_message_index,
#[cfg(feature = "kirikiri")]
kirikiri_chat_multilang: !arg.kirikiri_chat_no_multilang,
#[cfg(feature = "kirikiri-arc")]
xp3_simple_crypt: !arg.xp3_no_simple_crypt,
#[cfg(feature = "kirikiri-arc")]
xp3_mdf_decompress: !arg.xp3_no_mdf_decompress,
#[cfg(feature = "kirikiri-arc")]
xp3_segmenter: arg.xp3_segmenter,
#[cfg(feature = "kirikiri-arc")]
xp3_compress_files: !arg.xp3_no_compress_files,
#[cfg(feature = "kirikiri-arc")]
xp3_compress_index: !arg.xp3_no_compress_index,
#[cfg(feature = "kirikiri-arc")]
xp3_compress_workers: arg.xp3_compress_workers,
#[cfg(feature = "kirikiri-arc")]
xp3_zstd: arg.xp3_zstd,
#[cfg(feature = "kirikiri-arc")]
xp3_pack_workers: arg.xp3_pack_workers,
});
match &arg.command {
args::Command::Export { input, output } => {
@@ -2853,7 +2871,7 @@ fn main() {
}
None => None,
};
let repl = match &args.replacement_json {
let repl = std::sync::Arc::new(match &args.replacement_json {
Some(replacement_json) => {
let b = utils::files::read_file(replacement_json).unwrap();
let s = String::from_utf8(b).unwrap();
@@ -2861,7 +2879,7 @@ fn main() {
Some(table)
}
None => None,
};
});
let (scripts, is_dir) =
utils::files::collect_files(&args.input, arg.recursive, false).unwrap();
if is_dir {
@@ -2880,25 +2898,76 @@ fn main() {
} else {
None
};
let workers = if args.jobs > 1 {
Some(
utils::threadpool::ThreadPool::<()>::new(
args.jobs,
Some("import-worker-"),
true,
)
.unwrap(),
)
} else {
None
};
for script in scripts.iter() {
let re = import_script(
&script,
&arg,
cfg.clone(),
args,
root_dir,
name_csv.as_ref(),
repl.as_ref(),
);
match re {
Ok(s) => {
COUNTER.inc(s);
}
Err(e) => {
if let Some(workers) = workers.as_ref() {
let arg = argn.clone();
let cfg = cfg.clone();
let script = script.clone();
let name_csv = name_csv.as_ref().map(|s| s.clone());
let repl = repl.clone();
let root_dir = root_dir.map(|s| s.to_path_buf());
let args = args.clone();
if let Err(e) = workers.execute(
move |_| {
let re = import_script(
&script,
&arg,
cfg,
&args,
root_dir.as_ref().map(|s| s.as_path()),
name_csv.as_ref(),
(*repl).as_ref(),
);
match re {
Ok(s) => {
COUNTER.inc(s);
}
Err(e) => {
COUNTER.inc_error();
eprintln!("Error exporting {}: {}", script, e);
if arg.backtrace {
eprintln!("Backtrace: {}", e.backtrace());
}
}
}
},
true,
) {
COUNTER.inc_error();
eprintln!("Error exporting {}: {}", script, e);
if arg.backtrace {
eprintln!("Backtrace: {}", e.backtrace());
eprintln!("Error executing import worker: {}", e);
}
} else {
let re = import_script(
&script,
&arg,
cfg.clone(),
args,
root_dir,
name_csv.as_ref(),
(*repl).as_ref(),
);
match re {
Ok(s) => {
COUNTER.inc(s);
}
Err(e) => {
COUNTER.inc_error();
eprintln!("Error exporting {}: {}", script, e);
if arg.backtrace {
eprintln!("Backtrace: {}", e.backtrace());
}
}
}
}

View File

@@ -586,6 +586,11 @@ pub trait Script: std::fmt::Debug + std::any::Any {
pub trait Archive {
/// Creates a new file in the archive.
fn new_file<'a>(&'a mut self, name: &str) -> Result<Box<dyn WriteSeek + 'a>>;
/// Creates a new file in the archive that does not require seeking.
fn new_file_non_seek<'a>(&'a mut self, name: &str) -> Result<Box<dyn Write + 'a>> {
self.new_file(name)
.map(|f| Box::new(f) as Box<dyn Write + 'a>)
}
/// Writes the header of the archive. (Must be called after writing all files.)
fn write_header(&mut self) -> Result<()>;
}

View File

@@ -242,6 +242,7 @@ impl Script for CstScript {
let mut mes = mess.next();
let strings_address_offset = 0x10 + self.data.cpeek_u32_at(0x8)? as usize;
let strings_offset = 0x10 + self.data.cpeek_u32_at(0xC)? as usize;
let mut name_index = None;
for (i, s) in self.strings.iter().enumerate() {
match s.typ {
CstStringType::Message => {
@@ -254,6 +255,28 @@ impl Script for CstScript {
return Err(anyhow::anyhow!("No enough messages."));
}
};
if let Some(name_idx) = name_index.take() {
let mut name = match &m.name {
Some(n) => n.clone(),
None => {
return Err(anyhow::anyhow!("Message has no name.",));
}
};
if let Some(replacement) = replacement {
for (k, v) in &replacement.map {
name = name.replace(k, v);
}
}
let data = encode_string(encoding, &name, true)?;
let s = &self.strings[name_idx];
let pos = writer.write_patched_string(s, &data)?;
if pos != s.address {
writer.write_u32_at(
strings_address_offset as u64 + name_idx as u64 * 4,
(pos - strings_offset) as u32,
)?;
}
}
let mut message = m.message.clone();
if let Some(replacement) = replacement {
for (k, v) in &replacement.map {
@@ -272,29 +295,7 @@ impl Script for CstScript {
mes = mess.next();
}
CstStringType::Character => {
let m = match mes {
Some(m) => m,
None => {
return Err(anyhow::anyhow!("No enough messages."));
}
};
let mut name = match &m.name {
Some(name) => name.to_owned(),
None => return Err(anyhow::anyhow!("Message without name.")),
};
if let Some(replacement) = replacement {
for (k, v) in &replacement.map {
name = name.replace(k, v);
}
}
let data = encode_string(encoding, &name, true)?;
let pos = writer.write_patched_string(s, &data)?;
if pos != s.address {
writer.write_u32_at(
strings_address_offset as u64 + i as u64 * 4,
(pos - strings_offset) as u32,
)?;
}
name_index = Some(i);
}
CstStringType::Command => {
if let Some(caps) = CST_COMMAND_REGEX.captures(&s.text)? {

View File

@@ -0,0 +1,2 @@
pub mod xp3;
mod xp3pack;

View File

@@ -0,0 +1,600 @@
use super::xp3pack::*;
use crate::ext::io::*;
use crate::scripts::base::*;
use crate::types::*;
use anyhow::Result;
use flate2::read::ZlibDecoder;
use overf::wrapping;
use std::io::{Read, Seek, SeekFrom, Take};
use std::sync::{Arc, Mutex};
use xp3::XP3Reader;
use xp3::index::file::{IndexSegmentFlag, XP3FileIndex};
pub use super::xp3pack::SegmenterConfig;
pub fn parse_segmenter_config(str: &str) -> Result<SegmenterConfig> {
let parts: Vec<&str> = str.split(':').collect();
if parts.is_empty() {
return Ok(SegmenterConfig::default());
}
match parts[0].to_lowercase().as_str() {
"none" => Ok(SegmenterConfig::None),
"cdc" => {
if parts.len() != 4 {
return Err(anyhow::anyhow!(
"Invalid FastCDC segmenter config. Expected format: fastcdc,min_size,avg_size,max_size"
));
}
let min_size = parse_size::parse_size(parts[1])?;
let avg_size = parse_size::parse_size(parts[2])?;
let max_size = parse_size::parse_size(parts[3])?;
if min_size == 0 || avg_size == 0 || max_size == 0 {
return Err(anyhow::anyhow!(
"Invalid FastCDC segmenter config. Sizes must be greater than 0."
));
}
if !(min_size <= avg_size && avg_size <= max_size) {
return Err(anyhow::anyhow!(
"Invalid FastCDC segmenter config. Expected min_size <= avg_size <= max_size."
));
}
Ok(SegmenterConfig::FastCdc {
min_size: min_size as u32,
avg_size: avg_size as u32,
max_size: max_size as u32,
})
}
"fixed" => {
if parts.len() != 2 {
return Err(anyhow::anyhow!(
"Invalid Fixed segmenter config. Expected format: fixed,size"
));
}
let size = parse_size::parse_size(parts[1])?;
if size == 0 {
return Err(anyhow::anyhow!(
"Invalid Fixed segmenter config. Size must be greater than 0."
));
}
Ok(SegmenterConfig::Fixed(size as usize))
}
_ => Err(anyhow::anyhow!("Unknown segmenter type: {}", parts[0])),
}
}
#[derive(Debug)]
/// Builder for Kirikiri XP3 Archive
pub struct Xp3ArchiveBuilder {}
impl Xp3ArchiveBuilder {
/// Create a new Kirikiri XP3 Archive Builder
pub fn new() -> Self {
Self {}
}
}
impl ScriptBuilder for Xp3ArchiveBuilder {
fn default_encoding(&self) -> Encoding {
Encoding::Utf8
}
fn default_archive_encoding(&self) -> Option<Encoding> {
Some(Encoding::Utf8)
}
fn build_script(
&self,
buf: Vec<u8>,
_filename: &str,
_encoding: Encoding,
_archive_encoding: Encoding,
config: &ExtraConfig,
_archive: Option<&Box<dyn Script>>,
) -> Result<Box<dyn Script>> {
Ok(Box::new(Xp3Archive::new(MemReader::new(buf), config)?))
}
fn build_script_from_file(
&self,
filename: &str,
_encoding: Encoding,
_archive_encoding: Encoding,
config: &ExtraConfig,
_archive: Option<&Box<dyn Script>>,
) -> Result<Box<dyn Script>> {
let file = std::fs::File::open(filename)?;
Ok(Box::new(Xp3Archive::new(file, config)?))
}
fn build_script_from_reader(
&self,
reader: Box<dyn ReadSeek>,
_filename: &str,
_encoding: Encoding,
_archive_encoding: Encoding,
config: &ExtraConfig,
_archive: Option<&Box<dyn Script>>,
) -> Result<Box<dyn Script>> {
Ok(Box::new(Xp3Archive::new(reader, config)?))
}
fn extensions(&self) -> &'static [&'static str] {
&["xp3"]
}
fn script_type(&self) -> &'static ScriptType {
&ScriptType::KirikiriXp3
}
fn is_archive(&self) -> bool {
true
}
fn create_archive(
&self,
filename: &str,
files: &[&str],
_encoding: Encoding,
config: &ExtraConfig,
) -> Result<Box<dyn Archive>> {
Ok(Box::new(Xp3ArchiveWriter::new(filename, files, config)?))
}
}
#[derive(Debug)]
/// Kirikiri XP3 Archive
pub struct Xp3Archive<T: Read + Seek + std::fmt::Debug> {
reader: Arc<Mutex<T>>,
entries: Vec<(String, XP3FileIndex)>,
decrypt_simple_crypt: bool,
decompress_mdf: bool,
}
impl<T: Read + Seek + std::fmt::Debug> Xp3Archive<T> {
/// Create a new Kirikiri XP3 Archive
pub fn new(reader: T, config: &ExtraConfig) -> Result<Self> {
let xp3_reader = XP3Reader::open_archive(reader)
.map_err(|e| anyhow::anyhow!("Failed to open XP3 archive: {:?}", e))?;
let entries = xp3_reader
.entries()
.filter_map(|(i, d)| {
// Skip garbage files
if i.find("$$$ This is a protected archive. $$$").is_some()
|| (i.to_lowercase().ends_with(".nene") && d.info().file_size() == 0)
{
None
} else {
Some((i.clone(), d.clone()))
}
})
.collect();
Ok(Self {
reader: Arc::new(Mutex::new(xp3_reader.close().1)),
entries,
decrypt_simple_crypt: config.xp3_simple_crypt,
decompress_mdf: config.xp3_mdf_decompress,
})
}
}
impl<T: Read + Seek + std::fmt::Debug + 'static> Script for Xp3Archive<T> {
fn default_output_script_type(&self) -> OutputScriptType {
OutputScriptType::Json
}
fn default_format_type(&self) -> FormatOptions {
FormatOptions::None
}
fn is_archive(&self) -> bool {
true
}
fn iter_archive_filename<'a>(
&'a self,
) -> Result<Box<dyn Iterator<Item = Result<String>> + 'a>> {
Ok(Box::new(
self.entries.iter().map(|entry| Ok(entry.0.clone())),
))
}
fn open_file<'a>(&'a self, index: usize) -> Result<Box<dyn ArchiveContent + 'a>> {
let index = self
.entries
.iter()
.nth(index)
.ok_or(anyhow::anyhow!("Index out of bounds: {}", index))?
.1
.clone();
let mut entry = Entry::new(self.reader.clone(), index);
let mut header = [0u8; 16];
let header_len = entry.read(&mut header)?;
entry.rewind()?;
entry.script_type = detect_script_type(entry.index.info().name(), &header, header_len);
if self.decrypt_simple_crypt
&& header_len >= 5
&& header[0] == 0xFE
&& header[1] == 0xFE
&& header[3] == 0xFF
&& header[4] == 0xFE
{
let crypt = header[2];
if crypt == 2 {
let index = entry.index.clone();
return Ok(Box::new(SimpleCryptZlib::new(entry, index)?));
}
if matches!(crypt, 0 | 1) {
let index = entry.index.clone();
return Ok(Box::new(SimpleCrypt::new(entry, index, crypt)?));
}
}
if self.decompress_mdf
&& header_len >= 4
&& &header[0..4] == b"mdf\0"
&& entry.index.info().file_size() > 8
{
let index = entry.index.clone();
return Ok(Box::new(MdfEntry::new(entry, index)?));
}
Ok(Box::new(entry))
}
}
fn detect_script_type(filename: &str, buf: &[u8], buf_len: usize) -> Option<ScriptType> {
#[cfg(feature = "kirikiri-img")]
if buf_len >= 11 && libtlg_rs::is_valid_tlg(buf) {
return Some(ScriptType::KirikiriTlg);
}
if buf_len >= 8 && (buf.starts_with(b"TJS/ns0\0") || buf.starts_with(b"TJS/4s0\0")) {
return Some(ScriptType::KirikiriTjsNs0);
}
if buf_len >= 8 && buf.starts_with(b"TJS2100\0") {
return Some(ScriptType::KirikiriTjs2);
}
let extension = std::path::Path::new(filename)
.extension()
.and_then(|s| s.to_str())
.unwrap_or("")
.to_lowercase();
match extension.as_str() {
"ks" => Some(ScriptType::Kirikiri),
"scn" => Some(ScriptType::KirikiriScn),
#[cfg(feature = "emote-img")]
"dref" => Some(ScriptType::EmoteDref),
#[cfg(feature = "emote-img")]
"pimg" => Some(ScriptType::EmotePimg),
_ => None,
}
}
#[derive(Debug)]
struct Entry<T: Read + Seek + std::fmt::Debug> {
reader: Arc<Mutex<T>>,
index: XP3FileIndex,
cache: Option<ZlibDecoder<Take<MutexWrapper<T>>>>,
pos: u64,
entries_pos: Vec<u64>,
script_type: Option<ScriptType>,
}
impl<T: Read + Seek + std::fmt::Debug> Entry<T> {
fn new(reader: Arc<Mutex<T>>, index: XP3FileIndex) -> Self {
let mut pos = 0;
let entries_pos = index
.segments()
.iter()
.map(|seg| {
let p = pos;
pos += seg.original_size();
p
})
.collect();
Self {
reader,
index,
cache: None,
pos: 0,
entries_pos,
script_type: None,
}
}
}
impl<T: Read + Seek + std::fmt::Debug> ArchiveContent for Entry<T> {
fn name(&self) -> &str {
&self.index.info().name()
}
fn to_data<'a>(&'a mut self) -> Result<Box<dyn ReadSeek + 'a>> {
Ok(Box::new(self))
}
fn script_type(&self) -> Option<&ScriptType> {
self.script_type.as_ref()
}
}
impl<T: Read + Seek + std::fmt::Debug> Read for Entry<T> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
if self.pos >= self.index.info().file_size() {
self.cache.take();
return Ok(0);
}
if let Some(cache) = self.cache.as_mut() {
let readed = cache.read(buf)?;
if readed > 0 {
self.pos += readed as u64;
return Ok(readed);
}
self.cache.take();
}
let seg_index = match self.entries_pos.binary_search(&self.pos) {
Ok(i) => i,
Err(i) => {
if i == 0 {
0
} else {
i - 1
}
}
};
let seg = &self.index.segments()[seg_index];
let start_pos = seg.data_offset();
let seg_pos = self.entries_pos[seg_index];
let skip_pos = self.pos - seg_pos;
let read_size = seg.saved_size();
match seg.flag() {
IndexSegmentFlag::UnCompressed => {
let mut lock = MutexWrapper::new(self.reader.clone(), start_pos + skip_pos);
let readed = (&mut lock).take(read_size - skip_pos).read(buf)?;
self.pos += readed as u64;
Ok(readed)
}
IndexSegmentFlag::Compressed => {
let mut cache = ZlibDecoder::new(
MutexWrapper::new(self.reader.clone(), start_pos).take(read_size),
);
if skip_pos != 0 {
let mut e = EmptyWriter::new();
std::io::copy(&mut (&mut cache).take(skip_pos), &mut e)?; // skip
}
let readed = cache.read(buf)?;
self.pos += readed as u64;
self.cache = Some(cache);
Ok(readed)
}
}
}
}
impl<T: Read + Seek + std::fmt::Debug> Seek for Entry<T> {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
let new_pos = match pos {
SeekFrom::Start(p) => p,
SeekFrom::End(offset) => {
if offset < 0 {
if (-offset) as u64 > self.index.info().file_size() {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Seek from end exceeds file length",
));
}
self.index.info().file_size() - (-offset) as u64
} else {
self.index.info().file_size() + offset as u64
}
}
SeekFrom::Current(offset) => {
if offset < 0 {
if (-offset) as u64 > self.pos {
return Err(std::io::Error::new(
std::io::ErrorKind::InvalidInput,
"Seek from current exceeds file start",
));
}
self.pos - (-offset) as u64
} else {
self.pos + offset as u64
}
}
};
if let Some(cache) = self.cache.as_mut() {
let old_seg_index = match self.entries_pos.binary_search(&self.pos) {
Ok(i) => i,
Err(i) => {
if i == 0 {
0
} else {
i - 1
}
}
};
let new_seg_index = match self.entries_pos.binary_search(&new_pos) {
Ok(i) => i,
Err(i) => {
if i == 0 {
0
} else {
i - 1
}
}
};
if old_seg_index != new_seg_index {
self.cache.take();
} else {
if new_pos >= self.pos {
let skip_pos = new_pos - self.pos;
let mut e = EmptyWriter::new();
std::io::copy(&mut cache.take(skip_pos), &mut e)?; // skip
} else {
self.cache.take();
}
}
}
self.pos = new_pos;
Ok(self.pos)
}
fn rewind(&mut self) -> std::io::Result<()> {
self.pos = 0;
self.cache.take();
Ok(())
}
fn stream_position(&mut self) -> std::io::Result<u64> {
Ok(self.pos)
}
}
struct SimpleCryptZlib<T: Read + Seek + std::fmt::Debug> {
inner: PrefixStream<ZlibDecoder<StreamRegion<Entry<T>>>>,
index: XP3FileIndex,
}
impl<T: Read + Seek + std::fmt::Debug> SimpleCryptZlib<T> {
fn new(mut entry: Entry<T>, index: XP3FileIndex) -> Result<Self> {
entry.seek(SeekFrom::Start(0x15))?;
let entry = StreamRegion::new(entry, 0x15, index.info().file_size())?;
let inner = PrefixStream::new(vec![0xFF, 0xFE], ZlibDecoder::new(entry));
Ok(Self { inner, index })
}
}
impl<T: Read + Seek + std::fmt::Debug> ArchiveContent for SimpleCryptZlib<T> {
fn name(&self) -> &str {
&self.index.info().name()
}
}
impl<T: Read + Seek + std::fmt::Debug> Read for SimpleCryptZlib<T> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
self.inner.read(buf)
}
}
#[derive(Debug)]
struct SimpleCryptInner<T: Read + Seek + std::fmt::Debug> {
inner: StreamRegion<Entry<T>>,
crypt: u8,
}
impl<T: Read + Seek + std::fmt::Debug> SimpleCryptInner<T> {
fn new(mut entry: Entry<T>, crypt: u8) -> Result<Self> {
entry.seek(SeekFrom::Start(5))?;
let size = entry.index.info().file_size();
let entry = StreamRegion::new(entry, 5, size)?;
Ok(Self {
inner: entry,
crypt,
})
}
}
impl<T: Read + Seek + std::fmt::Debug> Read for SimpleCryptInner<T> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let readed = self.inner.read(buf)?;
match self.crypt {
0 => {
for b in &mut buf[..readed] {
let ch = *b as u16;
if ch >= 20 {
*b = wrapping! {ch ^ (((ch & 0xfe) << 8) ^ 1)} as u8;
}
}
}
1 => {
for b in &mut buf[..readed] {
let mut ch = *b as u32;
ch = wrapping! {((ch & 0xaaaaaaaa) >> 1) | ((ch & 0x55555555) << 1)};
*b = ch as u8;
}
}
_ => {}
}
Ok(readed)
}
}
impl<T: Read + Seek + std::fmt::Debug> Seek for SimpleCryptInner<T> {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
self.inner.seek(pos)
}
fn rewind(&mut self) -> std::io::Result<()> {
self.inner.rewind()
}
fn stream_position(&mut self) -> std::io::Result<u64> {
self.inner.stream_position()
}
}
#[derive(Debug)]
struct SimpleCrypt<T: Read + Seek + std::fmt::Debug> {
inner: PrefixStream<SimpleCryptInner<T>>,
index: XP3FileIndex,
}
impl<T: Read + Seek + std::fmt::Debug> SimpleCrypt<T> {
fn new(entry: Entry<T>, index: XP3FileIndex, crypt: u8) -> Result<Self> {
let inner = PrefixStream::new(vec![0xFF, 0xFE], SimpleCryptInner::new(entry, crypt)?);
Ok(Self { inner, index })
}
}
impl<T: Read + Seek + std::fmt::Debug> ArchiveContent for SimpleCrypt<T> {
fn name(&self) -> &str {
&self.index.info().name()
}
fn to_data<'a>(&'a mut self) -> Result<Box<dyn ReadSeek + 'a>> {
Ok(Box::new(self))
}
}
impl<T: Read + Seek + std::fmt::Debug> Read for SimpleCrypt<T> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
self.inner.read(buf)
}
}
impl<T: Read + Seek + std::fmt::Debug> Seek for SimpleCrypt<T> {
fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
self.inner.seek(pos)
}
fn rewind(&mut self) -> std::io::Result<()> {
self.inner.rewind()
}
fn stream_position(&mut self) -> std::io::Result<u64> {
self.inner.stream_position()
}
}
#[derive(Debug)]
struct MdfEntry<T: Read + Seek + std::fmt::Debug> {
inner: ZlibDecoder<StreamRegion<Entry<T>>>,
index: XP3FileIndex,
}
impl<T: Read + Seek + std::fmt::Debug> MdfEntry<T> {
fn new(mut entry: Entry<T>, index: XP3FileIndex) -> Result<Self> {
entry.seek(SeekFrom::Start(8))?;
let entry = StreamRegion::new(entry, 8, index.info().file_size())?;
let inner = ZlibDecoder::new(entry);
Ok(Self { inner, index })
}
}
impl<T: Read + Seek + std::fmt::Debug> ArchiveContent for MdfEntry<T> {
fn name(&self) -> &str {
&self.index.info().name()
}
}
impl<T: Read + Seek + std::fmt::Debug> Read for MdfEntry<T> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
self.inner.read(buf)
}
}

View File

@@ -0,0 +1,24 @@
/// Represents a single data segment for a file.
/// A file can be split into multiple segments, which can be compressed independently.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Segment {
pub is_compressed: bool,
/// The offset of the segment's data within the archive file.
pub start: u64,
/// The offset of this segment within the original, uncompressed file.
pub offset_in_file: u64,
/// The size of the segment after decompression.
pub original_size: u64,
/// The size of the segment in the archive (potentially compressed).
pub archived_size: u64,
}
/// Represents a single file entry within the XP3 archive.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct ArchiveItem {
pub name: String,
pub file_hash: u32,
pub original_size: u64,
pub archived_size: u64,
pub segments: Vec<Segment>,
}

View File

@@ -0,0 +1,22 @@
/// XP3 file header signature: `XP3\r\n \n\x1a\x8b\x67\x01`
pub const XP3_MAGIC: &[u8; 11] = b"XP3\r\n \n\x1a\x8b\x67\x01";
// Chunk names
pub const CHUNK_FILE: &[u8; 4] = b"File";
pub const CHUNK_INFO: &[u8; 4] = b"info";
pub const CHUNK_SEGM: &[u8; 4] = b"segm";
pub const CHUNK_ADLR: &[u8; 4] = b"adlr";
// Index entry flags
pub const TVP_XP3_INDEX_ENCODE_METHOD_MASK: u8 = 0x07;
pub const TVP_XP3_INDEX_ENCODE_RAW: u8 = 0;
pub const TVP_XP3_INDEX_ENCODE_ZLIB: u8 = 1;
pub const TVP_XP3_INDEX_CONTINUE: u8 = 0x80;
// File entry flags
pub const TVP_XP3_FILE_PROTECTED: u32 = 1 << 31;
// Segment entry flags
pub const TVP_XP3_SEGM_ENCODE_METHOD_MASK: u32 = 0x07;
pub const TVP_XP3_SEGM_ENCODE_RAW: u32 = 0;
pub const TVP_XP3_SEGM_ENCODE_ZLIB: u32 = 1;

View File

@@ -0,0 +1,9 @@
mod archive;
#[allow(dead_code)]
mod consts;
mod reader;
mod segmenter;
mod writer;
pub use segmenter::SegmenterConfig;
pub use writer::Xp3ArchiveWriter;

View File

@@ -0,0 +1,28 @@
use adler::Adler32;
use std::io::{PipeReader, Read};
pub struct Reader {
inner: PipeReader,
adler: Adler32,
}
impl Reader {
pub fn new(inner: PipeReader) -> Self {
Self {
inner,
adler: Adler32::new(),
}
}
pub fn into_checksum(self) -> u32 {
self.adler.checksum()
}
}
impl Read for Reader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let n = self.inner.read(buf)?;
self.adler.write_slice(&buf[..n]);
Ok(n)
}
}

View File

@@ -0,0 +1,105 @@
use super::reader::Reader;
use anyhow::Result;
use fastcdc::v2020::StreamCDC;
use std::io::Read;
#[derive(Copy, Clone, Debug)]
/// Configuration options for the segmenter.
pub enum SegmenterConfig {
/// Do not segment the data.
None,
/// Use the FastCDC algorithm with specified minimum, average, and maximum chunk sizes.
FastCdc {
min_size: u32,
avg_size: u32,
max_size: u32,
},
/// Use fixed-size segments.
Fixed(usize),
}
impl Default for SegmenterConfig {
fn default() -> Self {
SegmenterConfig::FastCdc {
min_size: 32 * 1024,
avg_size: 256 * 1024,
max_size: 8 * 1024 * 1024,
}
}
}
impl SegmenterConfig {
pub fn is_none(&self) -> bool {
matches!(self, SegmenterConfig::None)
}
}
/// A trait for strategies that split a byte slice into one or more segments.
pub trait Segmenter {
fn segment<'a>(
&'a self,
data: &'a mut Reader,
) -> Box<dyn Iterator<Item = Result<Vec<u8>>> + 'a>;
}
pub struct FastCdcSegmenter {
min_size: u32,
avg_size: u32,
max_size: u32,
}
impl Segmenter for FastCdcSegmenter {
fn segment<'a>(
&'a self,
data: &'a mut Reader,
) -> Box<dyn Iterator<Item = Result<Vec<u8>>> + 'a> {
let cdc = StreamCDC::new(data, self.min_size, self.avg_size, self.max_size);
Box::new(cdc.map(|chunk| Ok(chunk?.data)))
}
}
pub struct FixedSizeSegmenter {
size: usize,
}
impl Segmenter for FixedSizeSegmenter {
fn segment<'a>(
&'a self,
data: &'a mut Reader,
) -> Box<dyn Iterator<Item = Result<Vec<u8>>> + 'a> {
let size = self.size;
let mut buf = vec![0; size];
Box::new(std::iter::from_fn(move || {
let nbuf = &mut buf;
let mut total_read = 0;
while total_read < size {
match data.read(&mut nbuf[total_read..]) {
Ok(0) => break, // EOF
Ok(n) => total_read += n,
Err(e) => return Some(Err(e.into())),
}
}
if total_read == 0 {
None // No more data to read
} else {
Some(Ok(buf[..total_read].to_vec()))
}
}))
}
}
pub fn create_segmenter(config: SegmenterConfig) -> Option<Box<dyn Segmenter + Send + Sync>> {
match config {
SegmenterConfig::None => None,
SegmenterConfig::FastCdc {
min_size,
avg_size,
max_size,
} => Some(Box::new(FastCdcSegmenter {
min_size,
avg_size,
max_size,
})),
SegmenterConfig::Fixed(size) => Some(Box::new(FixedSizeSegmenter { size })),
}
}

View File

@@ -0,0 +1,536 @@
use super::archive::*;
use super::consts::*;
use super::reader::*;
use super::segmenter::*;
use crate::ext::io::*;
use crate::ext::mutex::*;
use crate::scripts::base::*;
use crate::types::*;
use crate::utils::encoding::*;
use crate::utils::threadpool::ThreadPool;
use anyhow::Result;
use sha2::{Digest, Sha256};
use std::collections::{BTreeMap, HashMap, HashSet};
use std::io::{Seek, Write};
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
#[derive(Clone)]
struct WrittenSegment {
is_compressed: bool,
start: u64,
original_size: u64,
archived_size: u64,
}
#[derive(Default)]
struct Stats {
total_original_size: AtomicU64,
final_archive_size: AtomicU64,
total_segments: AtomicUsize,
unique_segments: AtomicUsize,
deduplication_savings: AtomicU64,
}
impl std::fmt::Display for Stats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let total_original_size = self
.total_original_size
.load(std::sync::atomic::Ordering::Relaxed);
let final_archive_size = self
.final_archive_size
.load(std::sync::atomic::Ordering::Relaxed);
let total_segments = self
.total_segments
.load(std::sync::atomic::Ordering::Relaxed);
let unique_segments = self
.unique_segments
.load(std::sync::atomic::Ordering::Relaxed);
let deduplication_savings = self
.deduplication_savings
.load(std::sync::atomic::Ordering::Relaxed);
write!(
f,
"Total Original Size: {} bytes\nFinal Archive Size: {} bytes\nTotal Segments: {}\nUnique Segments: {}\nDeduplication Savings: {} bytes",
total_original_size,
final_archive_size,
total_segments,
unique_segments,
deduplication_savings
)
}
}
pub struct Xp3ArchiveWriter<T: Write + Seek> {
file: Arc<Mutex<T>>,
segments: Arc<Mutex<HashMap<[u8; 32], WrittenSegment>>>,
items: Arc<Mutex<BTreeMap<String, ArchiveItem>>>,
runner: ThreadPool<Result<()>>,
compress_files: bool,
compress_index: bool,
zlib_compression_level: u32,
segmenter: Option<Arc<Box<dyn Segmenter + Send + Sync>>>,
stats: Arc<Stats>,
compress_workers: usize,
processing_segments: Arc<Mutex<HashSet<[u8; 32]>>>,
use_zstd: bool,
zstd_compression_level: i32,
}
impl Xp3ArchiveWriter<std::io::BufWriter<std::fs::File>> {
pub fn new(filename: &str, files: &[&str], config: &ExtraConfig) -> Result<Self> {
let file = std::fs::File::create(filename)?;
let mut file = std::io::BufWriter::new(file);
let mut items = BTreeMap::new();
for file in files {
let item = ArchiveItem {
name: file.to_string(),
file_hash: 0,
original_size: 0,
archived_size: 0,
segments: Vec::new(),
};
items.insert(file.to_string(), item);
}
let segmenter = create_segmenter(config.xp3_segmenter).map(|s| Arc::new(s));
file.write_all(XP3_MAGIC)?;
file.write_u64(0)?; // Placeholder for index offset
Ok(Self {
file: Arc::new(Mutex::new(file)),
segments: Arc::new(Mutex::new(HashMap::new())),
items: Arc::new(Mutex::new(items)),
runner: ThreadPool::new(
if config.xp3_segmenter.is_none() {
1
} else {
config.xp3_pack_workers.max(1)
},
Some("xp3-writer"),
false,
)?,
compress_files: config.xp3_compress_files,
compress_index: config.xp3_compress_index,
zlib_compression_level: config.zlib_compression_level,
segmenter,
stats: Arc::new(Stats::default()),
compress_workers: config.xp3_compress_workers.max(1),
processing_segments: Arc::new(Mutex::new(HashSet::new())),
use_zstd: config.xp3_zstd,
zstd_compression_level: config.zstd_compression_level,
})
}
}
struct Writer<'a> {
inner: Box<dyn Write + 'a>,
mem: MemWriter,
}
impl std::fmt::Debug for Writer<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Writer").field("mem", &self.mem).finish()
}
}
impl<'a> Write for Writer<'a> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.mem.write(buf)
}
fn flush(&mut self) -> std::io::Result<()> {
self.mem.flush()
}
}
impl<'a> Seek for Writer<'a> {
fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
self.mem.seek(pos)
}
fn stream_position(&mut self) -> std::io::Result<u64> {
self.mem.stream_position()
}
fn rewind(&mut self) -> std::io::Result<()> {
self.mem.rewind()
}
}
impl<'a> Drop for Writer<'a> {
fn drop(&mut self) {
let _ = self.inner.write_all(&self.mem.data);
let _ = self.inner.flush();
}
}
impl<T: Write + Seek + Sync + Send + 'static> Archive for Xp3ArchiveWriter<T> {
fn new_file<'a>(&'a mut self, name: &str) -> Result<Box<dyn WriteSeek + 'a>> {
let inner = self.new_file_non_seek(name)?;
Ok(Box::new(Writer {
inner,
mem: MemWriter::new(),
}))
}
fn new_file_non_seek<'a>(&'a mut self, name: &str) -> Result<Box<dyn Write + 'a>> {
if self.segmenter.is_none() {
self.runner.join();
}
for err in self.runner.take_results() {
err?;
}
let item = {
let items = self.items.lock_blocking();
Arc::new(Mutex::new(
items
.get(name)
.ok_or_else(|| anyhow::anyhow!("File not found in archive: {}", name))?
.clone(),
))
};
let (reader, writer) = std::io::pipe()?;
let reader = Reader::new(reader);
{
let file = self.file.clone();
let segments = self.segments.clone();
let items = self.items.clone();
let segmenter = self.segmenter.clone();
let stats = self.stats.clone();
let is_compressed = self.compress_files;
let zlib_compression_level = self.zlib_compression_level;
let workers = if self.segmenter.is_some() {
Some(Arc::new(ThreadPool::<Result<()>>::new(
self.compress_workers,
Some("xp3-compress"),
false,
)?))
} else {
None
};
let processiong_segments = self.processing_segments.clone();
let use_zstd = self.use_zstd;
let zstd_compression_level = self.zstd_compression_level;
self.runner.execute(
move |_| {
let mut reader = reader;
let mut offset_in_file = 0u64;
if let Some(segmenter) = segmenter {
for seg in segmenter.segment(&mut reader) {
let seg = seg?;
let hash: [u8; 32] = Sha256::digest(&seg).into();
let seg_offset_in_file = offset_in_file;
offset_in_file += seg.len() as u64;
let fseg = match {
let mut segments = segments.lock_blocking();
if let Some(old_seg) = segments.get(&hash) {
Err(old_seg.clone())
} else {
let seg_data = WrittenSegment {
is_compressed,
start: 0,
original_size: seg.len() as u64,
archived_size: seg.len() as u64,
};
segments.insert(hash, seg_data.clone());
Ok(seg_data)
}
} {
Ok(mut info) => {
if let Some(workers) = workers.as_ref() {
{
let mut processing =
processiong_segments.lock_blocking();
processing.insert(hash);
}
let file = file.clone();
let segments = segments.clone();
let stats = stats.clone();
let item = item.clone();
let processiong_segments = processiong_segments.clone();
workers.execute(
move |_| {
let data = {
if use_zstd {
let mut e = zstd::stream::Encoder::new(
Vec::new(),
zstd_compression_level,
)?;
e.write_all(&seg)?;
e.finish()?
} else {
let mut e = flate2::write::ZlibEncoder::new(
Vec::new(),
flate2::Compression::new(
zlib_compression_level,
),
);
e.write_all(&seg)?;
e.finish()?
}
};
let mut file = file.lock_blocking();
let start = file.seek(std::io::SeekFrom::End(0))?;
file.write_all(&data)?;
info.start = start;
info.archived_size = data.len() as u64;
let stats = stats.clone();
stats.total_original_size.fetch_add(
info.original_size,
Ordering::Relaxed,
);
stats.final_archive_size.fetch_add(
info.archived_size,
Ordering::Relaxed,
);
stats
.total_segments
.fetch_add(1, Ordering::Relaxed);
stats
.unique_segments
.fetch_add(1, Ordering::Relaxed);
let mut segments = segments.lock_blocking();
segments.insert(hash, info.clone());
let ninfo = Segment {
is_compressed: info.is_compressed,
start: info.start,
offset_in_file: seg_offset_in_file,
original_size: info.original_size,
archived_size: info.archived_size,
};
let mut item = item.lock_blocking();
item.original_size += ninfo.original_size;
item.archived_size += ninfo.archived_size;
item.segments.push(ninfo);
let mut processing =
processiong_segments.lock_blocking();
processing.remove(&hash);
Ok(())
},
true,
)?;
None
} else {
{
let mut processing =
processiong_segments.lock_blocking();
processing.insert(hash);
}
let data = seg;
let mut file = file.lock_blocking();
let start = file.seek(std::io::SeekFrom::End(0))?;
file.write_all(&data)?;
info.start = start;
info.archived_size = data.len() as u64;
let stats = stats.clone();
stats
.total_original_size
.fetch_add(info.original_size, Ordering::Relaxed);
stats
.final_archive_size
.fetch_add(info.archived_size, Ordering::Relaxed);
stats.total_segments.fetch_add(1, Ordering::Relaxed);
stats.unique_segments.fetch_add(1, Ordering::Relaxed);
let mut segments = segments.lock_blocking();
segments.insert(hash, info.clone());
let ninfo = Segment {
is_compressed: info.is_compressed,
start: info.start,
offset_in_file: seg_offset_in_file,
original_size: info.original_size,
archived_size: info.archived_size,
};
{
let mut processing =
processiong_segments.lock_blocking();
processing.remove(&hash);
}
Some(ninfo)
}
}
Err(mut seg_info) => {
let mut need_update = false;
loop {
if {
let processing = processiong_segments.lock_blocking();
!processing.contains(&hash)
} {
break;
}
need_update = true;
std::thread::sleep(std::time::Duration::from_millis(10));
}
if need_update {
seg_info = {
let segments = segments.lock_blocking();
segments
.get(&hash)
.ok_or(anyhow::anyhow!(
"Failed to get latest segment info."
))?
.clone()
};
}
let stats = stats.clone();
stats
.total_original_size
.fetch_add(seg_info.original_size, Ordering::Relaxed);
stats
.deduplication_savings
.fetch_add(seg_info.archived_size, Ordering::Relaxed);
stats.total_segments.fetch_add(1, Ordering::Relaxed);
let ninfo = Segment {
is_compressed: seg_info.is_compressed,
start: seg_info.start,
offset_in_file: seg_offset_in_file,
original_size: seg_info.original_size,
archived_size: seg_info.archived_size,
};
Some(ninfo)
}
};
if let Some(fseg) = fseg {
let mut item = item.lock_blocking();
item.original_size += fseg.original_size;
item.archived_size += fseg.archived_size;
item.segments.push(fseg);
}
}
} else {
let mut file = file.lock_blocking();
let start = file.seek(std::io::SeekFrom::End(0))?;
let size = {
let mut writer = if is_compressed {
if use_zstd {
let e = zstd::stream::Encoder::new(
&mut *file,
zstd_compression_level,
)?;
Box::new(e) as Box<dyn Write>
} else {
let e = flate2::write::ZlibEncoder::new(
&mut *file,
flate2::Compression::new(zlib_compression_level),
);
Box::new(e) as Box<dyn Write>
}
} else {
Box::new(&mut *file) as Box<dyn Write>
};
std::io::copy(&mut reader, &mut writer)?
};
let ninfo = Segment {
is_compressed,
start,
offset_in_file: 0,
original_size: size,
archived_size: if is_compressed {
file.stream_position()? - start
} else {
size
},
};
let mut item = item.lock_blocking();
item.original_size += ninfo.original_size;
item.archived_size += ninfo.archived_size;
let stats = stats.clone();
stats
.total_original_size
.fetch_add(ninfo.original_size, Ordering::Relaxed);
stats
.final_archive_size
.fetch_add(ninfo.archived_size, Ordering::Relaxed);
stats.total_segments.fetch_add(1, Ordering::Relaxed);
stats.unique_segments.fetch_add(1, Ordering::Relaxed);
item.segments.push(ninfo);
}
if let Some(workers) = workers {
workers.join();
for err in workers.take_results() {
err?;
}
}
let mut item = item.lock_blocking().to_owned();
item.file_hash = reader.into_checksum();
item.segments.sort_by_key(|s| s.offset_in_file);
let mut items = items.lock_blocking();
items.insert(item.name.clone(), item);
Ok(())
},
true,
)?;
}
Ok(Box::new(writer))
}
fn write_header(&mut self) -> Result<()> {
self.runner.join();
for err in self.runner.take_results() {
err?;
}
let mut file = self.file.lock_blocking();
let index_offset = file.seek(std::io::SeekFrom::End(0))?;
let mut index_data = MemWriter::new();
let items = self.items.lock_blocking();
for (_, item) in items.iter() {
let mut file_chunk = MemWriter::new();
let name = encode_string(Encoding::Utf16LE, &item.name, false)?;
let info_data_size = name.len() as u64 + 22;
file_chunk.write_all(CHUNK_INFO)?;
file_chunk.write_u64(info_data_size)?;
file_chunk.write_u32(0)?; // flags
file_chunk.write_u64(item.original_size)?;
file_chunk.write_u64(item.archived_size)?;
file_chunk.write_u16(name.len() as u16 / 2)?;
file_chunk.write_all(&name)?;
let segm_data_size = item.segments.len() as u64 * 28;
file_chunk.write_all(CHUNK_SEGM)?;
file_chunk.write_u64(segm_data_size)?;
for seg in &item.segments {
let flag = if seg.is_compressed {
TVP_XP3_SEGM_ENCODE_ZLIB
} else {
TVP_XP3_SEGM_ENCODE_RAW
};
file_chunk.write_u32(flag)?;
file_chunk.write_u64(seg.start)?;
file_chunk.write_u64(seg.original_size)?;
file_chunk.write_u64(seg.archived_size)?;
}
let adlr_data_size = 4;
file_chunk.write_all(CHUNK_ADLR)?;
file_chunk.write_u64(adlr_data_size)?;
file_chunk.write_u32(item.file_hash)?;
index_data.write_all(CHUNK_FILE)?;
let file_chunk = file_chunk.into_inner();
index_data.write_u64(file_chunk.len() as u64)?;
index_data.write_all(&file_chunk)?;
}
let index_data = index_data.into_inner();
if self.compress_index {
let compressed_index = if self.use_zstd {
let mut e = zstd::stream::Encoder::new(Vec::new(), self.zstd_compression_level)?;
e.write_all(&index_data)?;
e.finish()?
} else {
let mut e = flate2::write::ZlibEncoder::new(
Vec::new(),
flate2::Compression::new(self.zlib_compression_level),
);
e.write_all(&index_data)?;
e.finish()?
};
file.write_u8(TVP_XP3_INDEX_ENCODE_ZLIB)?;
file.write_u64(compressed_index.len() as u64)?;
file.write_u64(index_data.len() as u64)?;
file.write_all(&compressed_index)?;
} else {
file.write_u8(TVP_XP3_INDEX_ENCODE_RAW)?;
file.write_u64(index_data.len() as u64)?;
file.write_all(&index_data)?;
}
file.write_u64_at(11, index_offset)?; // Write index offset to header
file.flush()?;
eprintln!("XP3 Archive Statistics:\n{}", self.stats);
Ok(())
}
}

View File

@@ -1,4 +1,6 @@
//! Kirikiri Scripts
#[cfg(feature = "kirikiri-arc")]
pub mod archive;
#[cfg(feature = "kirikiri-img")]
pub mod image;
pub mod ks;

View File

@@ -154,6 +154,8 @@ lazy_static::lazy_static! {
Box::new(will_plus::img::wip::WillPlusWipImageBuilder::new()),
#[cfg(feature = "artemis")]
Box::new(artemis::txt::ArtemisTxtBuilder::new()),
#[cfg(feature = "kirikiri-arc")]
Box::new(kirikiri::archive::xp3::Xp3ArchiveBuilder::new()),
];
/// A list of all script extensions.
pub static ref ALL_EXTS: Vec<String> =

View File

@@ -472,6 +472,37 @@ pub struct ExtraConfig {
/// Enable multi-language support for Kirikiri chat messages. Default is true.
/// Note: This requires [Self::kirikiri_language_index] and [Self::kirikiri_languages] to be set correctly.
pub kirikiri_chat_multilang: bool,
#[cfg(feature = "kirikiri-arc")]
#[default(true)]
/// Decrypt SimpleCrypt files in Kirikiri XP3 archive when extracting. Default is true.
pub xp3_simple_crypt: bool,
#[cfg(feature = "kirikiri-arc")]
#[default(true)]
/// Decompress mdf files in Kirikiri XP3 archive when extracting. Default is true.
pub xp3_mdf_decompress: bool,
#[cfg(feature = "kirikiri-arc")]
/// Configuration for Kirikiri XP3 segmenter when creating XP3 archive.
pub xp3_segmenter: crate::scripts::kirikiri::archive::xp3::SegmenterConfig,
#[cfg(feature = "kirikiri-arc")]
#[default(true)]
/// Compress files in Kirikiri XP3 archive when creating. Default is true.
pub xp3_compress_files: bool,
#[cfg(feature = "kirikiri-arc")]
#[default(true)]
/// Compress index in Kirikiri XP3 archive when creating. Default is true.
pub xp3_compress_index: bool,
#[cfg(feature = "kirikiri-arc")]
#[default(num_cpus::get())]
/// Workers count for compress files in Kirikiri XP3 archive when creating in parallel. Default is CPU cores count.
pub xp3_compress_workers: usize,
#[cfg(feature = "kirikiri-arc")]
/// Use zstd compression for files in Kirikiri XP3 archive when creating. (Warning: Kirikiri engine don't support this. Hook is required.)
pub xp3_zstd: bool,
#[cfg(feature = "kirikiri-arc")]
#[default(1)]
/// Workers count for packing file in Kirikiri XP3 archive in parallel. Default is 1.
/// This not works when segment is disabled.
pub xp3_pack_workers: usize,
}
#[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]
@@ -626,6 +657,10 @@ pub enum ScriptType {
#[value(alias = "kr", alias = "kr-ks", alias = "kirikiri-ks")]
/// Kirikiri script
Kirikiri,
#[cfg(feature = "kirikiri-arc")]
#[value(alias = "kr-xp3", alias = "xp3")]
/// Kirikiri XP3 archive
KirikiriXp3,
#[cfg(feature = "kirikiri-img")]
#[value(alias("kr-tlg"))]
/// Kirikiri TLG image
@@ -1028,7 +1063,6 @@ impl AsRef<str> for LosslessAudioFormat {
}
}
#[cfg(feature = "utils-threadpool")]
#[allow(unused)]
pub(crate) fn get_default_threads() -> usize {
num_cpus::get().max(2) / 2

View File

@@ -28,7 +28,6 @@ pub mod pcm;
#[cfg(feature = "utils-str")]
pub mod str;
pub mod struct_pack;
#[cfg(feature = "utils-threadpool")]
pub mod threadpool;
#[cfg(windows)]