diff --git a/Cargo.lock b/Cargo.lock index 5df9e40..adec989 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -674,6 +674,12 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "fastcdc" +version = "3.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf51ceb43e96afbfe4dd5c6f6082af5dfd60e220820b8123792d61963f2ce6bc" + [[package]] name = "fastrand" version = "2.3.0" @@ -1325,6 +1331,7 @@ dependencies = [ name = "msg_tool" version = "0.2.9" dependencies = [ + "adler", "anyhow", "base64", "byteorder", @@ -1334,6 +1341,7 @@ dependencies = [ "emote-psb", "encoding", "fancy-regex", + "fastcdc", "flate2", "int-enum", "jieba-rs", @@ -1359,6 +1367,7 @@ dependencies = [ "serde_json", "serde_yaml_ng", "sha1", + "sha2", "stylua", "unicode-segmentation", "url", @@ -1913,6 +1922,17 @@ dependencies = [ "digest", ] +[[package]] +name = "sha2" +version = "0.10.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "shlex" version = "1.3.0" diff --git a/Cargo.toml b/Cargo.toml index efefdec..7f2fbdb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,6 +8,7 @@ license = "GPL-3.0-or-later" exclude = [".github", "*.py", "AGENTS.md"] [dependencies] +adler = { version = "1", optional = true } anyhow = "1" base64 = { version = "0.22", optional = true } byteorder = { version = "1.5", default-features = false, optional = true} @@ -17,6 +18,7 @@ ctrlc = "3.4" emote-psb = { version = "0.5", optional = true , features = ["serde"] } encoding = "0.2" fancy-regex = { version = "0.16", optional = true } +fastcdc = { version = "3.2", optional = true } flate2 = { version = "1.1", optional = true } int-enum = { version = "1.2", optional = true } jieba-rs = { version = "0.8", optional = true } @@ -33,6 +35,7 @@ mozjpeg = { version = "0.10", optional = true } msg_tool_macro = { version = "0.2.9" } num_cpus = { version = "1.17", optional = true } overf = "0.1" +parse-size = { version = "1.1", optional = true } pelite = { version = "0.10", optional = true } png = { version = "0.18", optional = true } rand = { version = "0.9", optional = true } @@ -41,6 +44,7 @@ serde = { version = "1", features = ["derive"] } serde_json = "1" serde_yaml_ng = "0.10" sha1 = { version = "0.10", optional = true } +sha2 = { version = "0.10", optional = true } stylua = { version = "2.1", optional = true, default-features = false} unicode-segmentation = "1.12" url = { version = "2.5", optional = true } @@ -82,7 +86,7 @@ hexen-haus = ["memchr", "utils-str"] hexen-haus-arc = ["hexen-haus"] hexen-haus-img = ["hexen-haus", "image"] kirikiri = ["emote-psb", "fancy-regex", "flate2", "json", "lz4", "utils-escape"] -kirikiri-arc = ["kirikiri", "xp3"] +kirikiri-arc = ["kirikiri", "adler", "fastcdc", "flate2", "parse-size", "sha2", "xp3", "utils-threadpool"] kirikiri-img = ["kirikiri", "image", "libtlg-rs"] silky = [] softpal = ["int-enum"] diff --git a/README.md b/README.md index 9714106..d2938cf 100644 --- a/README.md +++ b/README.md @@ -189,7 +189,7 @@ msg-tool create -t | Archive Type | Feature Name | Name | Unpack | Pack | Remarks | |---|---|---|---|---|---| -| `kirikiri-xp3`/`kr-xp3`/`xp3` | `kirikiri-arc` | Kirikiri XP3 Archive File (.xp3) | ✔️ | ❌ | | +| `kirikiri-xp3`/`kr-xp3`/`xp3` | `kirikiri-arc` | Kirikiri XP3 Archive File (.xp3) | ✔️ | ✔️ | | | Image Type | Feature Name | Name | Export | Import | Export Multiple | Import Multiple | Create | Remarks | |---|---|---|---|---|---|---|---|---| diff --git a/src/args.rs b/src/args.rs index fc24107..4b7939c 100644 --- a/src/args.rs +++ b/src/args.rs @@ -492,6 +492,21 @@ pub struct Arg { #[arg(long, global = true)] /// Disable decompressing mdf files in Kirikiri XP3 archive when extracting. pub xp3_no_mdf_decompress: bool, + #[cfg(feature = "kirikiri-arc")] + #[arg(long, global = true, default_value = "cdc:32KiB:256KiB:8MiB", value_parser = crate::scripts::kirikiri::archive::xp3::parse_segmenter_config)] + /// Configuration for Kirikiri XP3 segmenter when creating XP3 archive. + /// none segmenter - none + /// fastcdc segmenter - cdc::: + /// fixed segmenter - fixed: + pub xp3_segmenter: crate::scripts::kirikiri::archive::xp3::SegmenterConfig, + #[cfg(feature = "kirikiri-arc")] + #[arg(long, global = true)] + /// Disable compressing files in Kirikiri XP3 archive when creating XP3 archive. + pub xp3_no_compress_files: bool, + #[cfg(feature = "kirikiri-arc")] + #[arg(long, global = true)] + /// Disable compressing index in Kirikiri XP3 archive when creating XP3 archive. + pub xp3_no_compress_index: bool, #[command(subcommand)] /// Command pub command: Command, diff --git a/src/main.rs b/src/main.rs index 680355c..c0406ed 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1486,8 +1486,8 @@ pub fn import_script( continue; } }; - let mut writer = arch.new_file(f.name())?; if arg.force_script || f.is_script() { + let mut writer = arch.new_file(f.name())?; let (script_file, _) = match parse_script_from_archive(&mut f, arg, config.clone(), &script) { Ok(s) => s, @@ -1952,6 +1952,7 @@ pub fn import_script( continue; } } else { + let mut writer = arch.new_file_non_seek(f.name())?; let out_path = std::path::PathBuf::from(&odir).join(f.name()); if out_path.is_file() { let f = match std::fs::File::open(&out_path) { @@ -2356,7 +2357,7 @@ pub fn pack_archive( continue; } }; - let mut wf = match archive.new_file(name) { + let mut wf = match archive.new_file_non_seek(name) { Ok(f) => f, Err(e) => { eprintln!("Error creating file {} in archive: {}", name, e); @@ -2748,6 +2749,12 @@ fn main() { xp3_simple_crypt: !arg.xp3_no_simple_crypt, #[cfg(feature = "kirikiri-arc")] xp3_mdf_decompress: !arg.xp3_no_mdf_decompress, + #[cfg(feature = "kirikiri-arc")] + xp3_segmenter: arg.xp3_segmenter, + #[cfg(feature = "kirikiri-arc")] + xp3_compress_files: !arg.xp3_no_compress_files, + #[cfg(feature = "kirikiri-arc")] + xp3_compress_index: !arg.xp3_no_compress_index, }); match &arg.command { args::Command::Export { input, output } => { diff --git a/src/scripts/base.rs b/src/scripts/base.rs index 90d7810..9560d5f 100644 --- a/src/scripts/base.rs +++ b/src/scripts/base.rs @@ -586,6 +586,11 @@ pub trait Script: std::fmt::Debug + std::any::Any { pub trait Archive { /// Creates a new file in the archive. fn new_file<'a>(&'a mut self, name: &str) -> Result>; + /// Creates a new file in the archive that does not require seeking. + fn new_file_non_seek<'a>(&'a mut self, name: &str) -> Result> { + self.new_file(name) + .map(|f| Box::new(f) as Box) + } /// Writes the header of the archive. (Must be called after writing all files.) fn write_header(&mut self) -> Result<()>; } diff --git a/src/scripts/kirikiri/archive/mod.rs b/src/scripts/kirikiri/archive/mod.rs index 402fbea..bdec2f4 100644 --- a/src/scripts/kirikiri/archive/mod.rs +++ b/src/scripts/kirikiri/archive/mod.rs @@ -1 +1,2 @@ pub mod xp3; +mod xp3pack; diff --git a/src/scripts/kirikiri/archive/xp3.rs b/src/scripts/kirikiri/archive/xp3.rs index 0f24a90..6a88db3 100644 --- a/src/scripts/kirikiri/archive/xp3.rs +++ b/src/scripts/kirikiri/archive/xp3.rs @@ -1,3 +1,4 @@ +use super::xp3pack::*; use crate::ext::io::*; use crate::scripts::base::*; use crate::types::*; @@ -9,6 +10,58 @@ use std::sync::{Arc, Mutex}; use xp3::XP3Reader; use xp3::index::file::{IndexSegmentFlag, XP3FileIndex}; +pub use super::xp3pack::SegmenterConfig; + +pub fn parse_segmenter_config(str: &str) -> Result { + let parts: Vec<&str> = str.split(':').collect(); + if parts.is_empty() { + return Ok(SegmenterConfig::default()); + } + match parts[0].to_lowercase().as_str() { + "none" => Ok(SegmenterConfig::None), + "cdc" => { + if parts.len() != 4 { + return Err(anyhow::anyhow!( + "Invalid FastCDC segmenter config. Expected format: fastcdc,min_size,avg_size,max_size" + )); + } + let min_size = parse_size::parse_size(parts[1])?; + let avg_size = parse_size::parse_size(parts[2])?; + let max_size = parse_size::parse_size(parts[3])?; + if min_size == 0 || avg_size == 0 || max_size == 0 { + return Err(anyhow::anyhow!( + "Invalid FastCDC segmenter config. Sizes must be greater than 0." + )); + } + if !(min_size <= avg_size && avg_size <= max_size) { + return Err(anyhow::anyhow!( + "Invalid FastCDC segmenter config. Expected min_size <= avg_size <= max_size." + )); + } + Ok(SegmenterConfig::FastCdc { + min_size: min_size as u32, + avg_size: avg_size as u32, + max_size: max_size as u32, + }) + } + "fixed" => { + if parts.len() != 2 { + return Err(anyhow::anyhow!( + "Invalid Fixed segmenter config. Expected format: fixed,size" + )); + } + let size = parse_size::parse_size(parts[1])?; + if size == 0 { + return Err(anyhow::anyhow!( + "Invalid Fixed segmenter config. Size must be greater than 0." + )); + } + Ok(SegmenterConfig::Fixed(size as usize)) + } + _ => Err(anyhow::anyhow!("Unknown segmenter type: {}", parts[0])), + } +} + #[derive(Debug)] /// Builder for Kirikiri XP3 Archive pub struct Xp3ArchiveBuilder {} @@ -76,6 +129,16 @@ impl ScriptBuilder for Xp3ArchiveBuilder { fn is_archive(&self) -> bool { true } + + fn create_archive( + &self, + filename: &str, + files: &[&str], + _encoding: Encoding, + config: &ExtraConfig, + ) -> Result> { + Ok(Box::new(Xp3ArchiveWriter::new(filename, files, config)?)) + } } #[derive(Debug)] diff --git a/src/scripts/kirikiri/archive/xp3pack/archive.rs b/src/scripts/kirikiri/archive/xp3pack/archive.rs new file mode 100644 index 0000000..754a410 --- /dev/null +++ b/src/scripts/kirikiri/archive/xp3pack/archive.rs @@ -0,0 +1,24 @@ +/// Represents a single data segment for a file. +/// A file can be split into multiple segments, which can be compressed independently. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Segment { + pub is_compressed: bool, + /// The offset of the segment's data within the archive file. + pub start: u64, + /// The offset of this segment within the original, uncompressed file. + pub offset_in_file: u64, + /// The size of the segment after decompression. + pub original_size: u64, + /// The size of the segment in the archive (potentially compressed). + pub archived_size: u64, +} + +/// Represents a single file entry within the XP3 archive. +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct ArchiveItem { + pub name: String, + pub file_hash: u32, + pub original_size: u64, + pub archived_size: u64, + pub segments: Vec, +} diff --git a/src/scripts/kirikiri/archive/xp3pack/consts.rs b/src/scripts/kirikiri/archive/xp3pack/consts.rs new file mode 100644 index 0000000..3d87d03 --- /dev/null +++ b/src/scripts/kirikiri/archive/xp3pack/consts.rs @@ -0,0 +1,22 @@ +/// XP3 file header signature: `XP3\r\n \n\x1a\x8b\x67\x01` +pub const XP3_MAGIC: &[u8; 11] = b"XP3\r\n \n\x1a\x8b\x67\x01"; + +// Chunk names +pub const CHUNK_FILE: &[u8; 4] = b"File"; +pub const CHUNK_INFO: &[u8; 4] = b"info"; +pub const CHUNK_SEGM: &[u8; 4] = b"segm"; +pub const CHUNK_ADLR: &[u8; 4] = b"adlr"; + +// Index entry flags +pub const TVP_XP3_INDEX_ENCODE_METHOD_MASK: u8 = 0x07; +pub const TVP_XP3_INDEX_ENCODE_RAW: u8 = 0; +pub const TVP_XP3_INDEX_ENCODE_ZLIB: u8 = 1; +pub const TVP_XP3_INDEX_CONTINUE: u8 = 0x80; + +// File entry flags +pub const TVP_XP3_FILE_PROTECTED: u32 = 1 << 31; + +// Segment entry flags +pub const TVP_XP3_SEGM_ENCODE_METHOD_MASK: u32 = 0x07; +pub const TVP_XP3_SEGM_ENCODE_RAW: u32 = 0; +pub const TVP_XP3_SEGM_ENCODE_ZLIB: u32 = 1; diff --git a/src/scripts/kirikiri/archive/xp3pack/mod.rs b/src/scripts/kirikiri/archive/xp3pack/mod.rs new file mode 100644 index 0000000..9731bdd --- /dev/null +++ b/src/scripts/kirikiri/archive/xp3pack/mod.rs @@ -0,0 +1,9 @@ +mod archive; +#[allow(dead_code)] +mod consts; +mod reader; +mod segmenter; +mod writer; + +pub use segmenter::SegmenterConfig; +pub use writer::Xp3ArchiveWriter; diff --git a/src/scripts/kirikiri/archive/xp3pack/reader.rs b/src/scripts/kirikiri/archive/xp3pack/reader.rs new file mode 100644 index 0000000..1ec7b34 --- /dev/null +++ b/src/scripts/kirikiri/archive/xp3pack/reader.rs @@ -0,0 +1,28 @@ +use adler::Adler32; +use std::io::{PipeReader, Read}; + +pub struct Reader { + inner: PipeReader, + adler: Adler32, +} + +impl Reader { + pub fn new(inner: PipeReader) -> Self { + Self { + inner, + adler: Adler32::new(), + } + } + + pub fn into_checksum(self) -> u32 { + self.adler.checksum() + } +} + +impl Read for Reader { + fn read(&mut self, buf: &mut [u8]) -> std::io::Result { + let n = self.inner.read(buf)?; + self.adler.write_slice(&buf[..n]); + Ok(n) + } +} diff --git a/src/scripts/kirikiri/archive/xp3pack/segmenter.rs b/src/scripts/kirikiri/archive/xp3pack/segmenter.rs new file mode 100644 index 0000000..b719a2f --- /dev/null +++ b/src/scripts/kirikiri/archive/xp3pack/segmenter.rs @@ -0,0 +1,99 @@ +use super::reader::Reader; +use anyhow::Result; +use fastcdc::v2020::StreamCDC; +use std::io::Read; + +#[derive(Copy, Clone, Debug)] +/// Configuration options for the segmenter. +pub enum SegmenterConfig { + /// Do not segment the data. + None, + /// Use the FastCDC algorithm with specified minimum, average, and maximum chunk sizes. + FastCdc { + min_size: u32, + avg_size: u32, + max_size: u32, + }, + /// Use fixed-size segments. + Fixed(usize), +} + +impl Default for SegmenterConfig { + fn default() -> Self { + SegmenterConfig::FastCdc { + min_size: 32 * 1024, + avg_size: 256 * 1024, + max_size: 8 * 1024 * 1024, + } + } +} + +/// A trait for strategies that split a byte slice into one or more segments. +pub trait Segmenter { + fn segment<'a>( + &'a self, + data: &'a mut Reader, + ) -> Box>> + 'a>; +} + +pub struct FastCdcSegmenter { + min_size: u32, + avg_size: u32, + max_size: u32, +} + +impl Segmenter for FastCdcSegmenter { + fn segment<'a>( + &'a self, + data: &'a mut Reader, + ) -> Box>> + 'a> { + let cdc = StreamCDC::new(data, self.min_size, self.avg_size, self.max_size); + Box::new(cdc.map(|chunk| Ok(chunk?.data))) + } +} + +pub struct FixedSizeSegmenter { + size: usize, +} + +impl Segmenter for FixedSizeSegmenter { + fn segment<'a>( + &'a self, + data: &'a mut Reader, + ) -> Box>> + 'a> { + let size = self.size; + let mut buf = vec![0; size]; + Box::new(std::iter::from_fn(move || { + let nbuf = &mut buf; + let mut total_read = 0; + while total_read < size { + match data.read(&mut nbuf[total_read..]) { + Ok(0) => break, // EOF + Ok(n) => total_read += n, + Err(e) => return Some(Err(e.into())), + } + } + if total_read == 0 { + None // No more data to read + } else { + Some(Ok(buf[..total_read].to_vec())) + } + })) + } +} + +pub fn create_segmenter(config: SegmenterConfig) -> Option> { + match config { + SegmenterConfig::None => None, + SegmenterConfig::FastCdc { + min_size, + avg_size, + max_size, + } => Some(Box::new(FastCdcSegmenter { + min_size, + avg_size, + max_size, + })), + SegmenterConfig::Fixed(size) => Some(Box::new(FixedSizeSegmenter { size })), + } +} diff --git a/src/scripts/kirikiri/archive/xp3pack/writer.rs b/src/scripts/kirikiri/archive/xp3pack/writer.rs new file mode 100644 index 0000000..099b009 --- /dev/null +++ b/src/scripts/kirikiri/archive/xp3pack/writer.rs @@ -0,0 +1,380 @@ +use super::archive::*; +use super::consts::*; +use super::reader::*; +use super::segmenter::*; +use crate::ext::io::*; +use crate::ext::mutex::*; +use crate::scripts::base::*; +use crate::types::*; +use crate::utils::encoding::*; +use crate::utils::threadpool::ThreadPool; +use anyhow::Result; +use sha2::{Digest, Sha256}; +use std::collections::{BTreeMap, HashMap}; +use std::io::{Seek, Write}; +use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; + +#[derive(Clone)] +struct WrittenSegment { + is_compressed: bool, + start: u64, + original_size: u64, + archived_size: u64, +} + +#[derive(Default)] +struct Stats { + total_original_size: AtomicU64, + final_archive_size: AtomicU64, + total_segments: AtomicUsize, + unique_segments: AtomicUsize, + deduplication_savings: AtomicU64, +} + +impl std::fmt::Display for Stats { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let total_original_size = self + .total_original_size + .load(std::sync::atomic::Ordering::Relaxed); + let final_archive_size = self + .final_archive_size + .load(std::sync::atomic::Ordering::Relaxed); + let total_segments = self + .total_segments + .load(std::sync::atomic::Ordering::Relaxed); + let unique_segments = self + .unique_segments + .load(std::sync::atomic::Ordering::Relaxed); + let deduplication_savings = self + .deduplication_savings + .load(std::sync::atomic::Ordering::Relaxed); + write!( + f, + "Total Original Size: {} bytes\nFinal Archive Size: {} bytes\nTotal Segments: {}\nUnique Segments: {}\nDeduplication Savings: {} bytes", + total_original_size, + final_archive_size, + total_segments, + unique_segments, + deduplication_savings + ) + } +} + +pub struct Xp3ArchiveWriter { + file: Arc>, + segments: Arc>>, + items: Arc>>, + runner: ThreadPool>, + compress_files: bool, + compress_index: bool, + zlib_compression_level: u32, + segmenter: Option>>, + stats: Arc, +} + +impl Xp3ArchiveWriter> { + pub fn new(filename: &str, files: &[&str], config: &ExtraConfig) -> Result { + let file = std::fs::File::create(filename)?; + let mut file = std::io::BufWriter::new(file); + let mut items = BTreeMap::new(); + for file in files { + let item = ArchiveItem { + name: file.to_string(), + file_hash: 0, + original_size: 0, + archived_size: 0, + segments: Vec::new(), + }; + items.insert(file.to_string(), item); + } + let segmenter = create_segmenter(config.xp3_segmenter).map(|s| Arc::new(s)); + file.write_all(XP3_MAGIC)?; + file.write_u64(0)?; // Placeholder for index offset + Ok(Self { + file: Arc::new(Mutex::new(file)), + segments: Arc::new(Mutex::new(HashMap::new())), + items: Arc::new(Mutex::new(items)), + runner: ThreadPool::new(1, Some("xp3-writer"), false)?, + compress_files: config.xp3_compress_files, + compress_index: config.xp3_compress_index, + zlib_compression_level: config.zlib_compression_level, + segmenter, + stats: Arc::new(Stats::default()), + }) + } +} + +struct Writer<'a> { + inner: Box, + mem: MemWriter, +} + +impl std::fmt::Debug for Writer<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Writer").field("mem", &self.mem).finish() + } +} + +impl<'a> Write for Writer<'a> { + fn write(&mut self, buf: &[u8]) -> std::io::Result { + self.mem.write(buf) + } + + fn flush(&mut self) -> std::io::Result<()> { + self.mem.flush() + } +} + +impl<'a> Seek for Writer<'a> { + fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { + self.mem.seek(pos) + } + + fn stream_position(&mut self) -> std::io::Result { + self.mem.stream_position() + } + + fn rewind(&mut self) -> std::io::Result<()> { + self.mem.rewind() + } +} + +impl<'a> Drop for Writer<'a> { + fn drop(&mut self) { + let _ = self.inner.write_all(&self.mem.data); + let _ = self.inner.flush(); + } +} + +impl Archive for Xp3ArchiveWriter { + fn new_file<'a>(&'a mut self, name: &str) -> Result> { + let inner = self.new_file_non_seek(name)?; + Ok(Box::new(Writer { + inner, + mem: MemWriter::new(), + })) + } + + fn new_file_non_seek<'a>(&'a mut self, name: &str) -> Result> { + self.runner.join(); + for err in self.runner.take_results() { + err?; + } + let mut item = { + let items = self.items.lock_blocking(); + items + .get(name) + .ok_or_else(|| anyhow::anyhow!("File not found in archive: {}", name))? + .clone() + }; + let (reader, writer) = std::io::pipe()?; + let reader = Reader::new(reader); + { + let file = self.file.clone(); + let segments = self.segments.clone(); + let items = self.items.clone(); + let segmenter = self.segmenter.clone(); + let stats = self.stats.clone(); + let is_compressed = self.compress_files; + let zlib_compression_level = self.zlib_compression_level; + self.runner.execute( + move |_| { + let mut reader = reader; + let mut offset_in_file = 0u64; + if let Some(segmenter) = segmenter { + for seg in segmenter.segment(&mut reader) { + let seg = seg?; + let hash: [u8; 32] = Sha256::digest(&seg).into(); + let fseg = match { + let mut segments = segments.lock_blocking(); + if let Some(old_seg) = segments.get(&hash) { + Err(old_seg.clone()) + } else { + let seg_data = WrittenSegment { + is_compressed, + start: 0, + original_size: seg.len() as u64, + archived_size: seg.len() as u64, + }; + segments.insert(hash, seg_data.clone()); + Ok(seg_data) + } + } { + Ok(mut info) => { + let data = if is_compressed { + let mut e = flate2::write::ZlibEncoder::new( + Vec::new(), + flate2::Compression::new(zlib_compression_level), + ); + e.write_all(&seg)?; + e.finish()? + } else { + seg + }; + let mut file = file.lock_blocking(); + let start = file.seek(std::io::SeekFrom::End(0))?; + file.write_all(&data)?; + info.start = start; + info.archived_size = data.len() as u64; + let stats = stats.clone(); + stats + .total_original_size + .fetch_add(info.original_size, Ordering::Relaxed); + stats + .final_archive_size + .fetch_add(info.archived_size, Ordering::Relaxed); + stats.total_segments.fetch_add(1, Ordering::Relaxed); + stats.unique_segments.fetch_add(1, Ordering::Relaxed); + let mut segments = segments.lock_blocking(); + segments.insert(hash, info.clone()); + let ninfo = Segment { + is_compressed: info.is_compressed, + start: info.start, + offset_in_file: offset_in_file, + original_size: info.original_size, + archived_size: info.archived_size, + }; + offset_in_file += info.original_size; + ninfo + } + Err(seg_info) => { + let stats = stats.clone(); + stats + .total_original_size + .fetch_add(seg_info.original_size, Ordering::Relaxed); + stats + .deduplication_savings + .fetch_add(seg_info.archived_size, Ordering::Relaxed); + stats.total_segments.fetch_add(1, Ordering::Relaxed); + let ninfo = Segment { + is_compressed: seg_info.is_compressed, + start: seg_info.start, + offset_in_file: offset_in_file, + original_size: seg_info.original_size, + archived_size: seg_info.archived_size, + }; + offset_in_file += seg_info.original_size; + ninfo + } + }; + item.original_size += fseg.original_size; + item.archived_size += fseg.archived_size; + item.segments.push(fseg); + } + } else { + let mut file = file.lock_blocking(); + let start = file.seek(std::io::SeekFrom::End(0))?; + let size = { + let mut writer = if is_compressed { + let e = flate2::write::ZlibEncoder::new( + &mut *file, + flate2::Compression::new(zlib_compression_level), + ); + Box::new(e) as Box + } else { + Box::new(&mut *file) as Box + }; + std::io::copy(&mut reader, &mut writer)? + }; + let ninfo = Segment { + is_compressed, + start, + offset_in_file: 0, + original_size: size, + archived_size: if is_compressed { + file.stream_position()? - start + } else { + size + }, + }; + item.original_size += ninfo.original_size; + item.archived_size += ninfo.archived_size; + let stats = stats.clone(); + stats + .total_original_size + .fetch_add(ninfo.original_size, Ordering::Relaxed); + stats + .final_archive_size + .fetch_add(ninfo.archived_size, Ordering::Relaxed); + stats.total_segments.fetch_add(1, Ordering::Relaxed); + stats.unique_segments.fetch_add(1, Ordering::Relaxed); + item.segments.push(ninfo); + } + item.file_hash = reader.into_checksum(); + let mut items = items.lock_blocking(); + items.insert(item.name.clone(), item); + Ok(()) + }, + true, + )?; + } + Ok(Box::new(writer)) + } + + fn write_header(&mut self) -> Result<()> { + self.runner.join(); + for err in self.runner.take_results() { + err?; + } + let mut file = self.file.lock_blocking(); + let index_offset = file.seek(std::io::SeekFrom::End(0))?; + let mut index_data = MemWriter::new(); + let items = self.items.lock_blocking(); + for (_, item) in items.iter() { + let mut file_chunk = MemWriter::new(); + let name = encode_string(Encoding::Utf16LE, &item.name, false)?; + let info_data_size = name.len() as u64 + 22; + file_chunk.write_all(CHUNK_INFO)?; + file_chunk.write_u64(info_data_size)?; + file_chunk.write_u32(0)?; // flags + file_chunk.write_u64(item.original_size)?; + file_chunk.write_u64(item.archived_size)?; + file_chunk.write_u16(name.len() as u16 / 2)?; + file_chunk.write_all(&name)?; + let segm_data_size = item.segments.len() as u64 * 28; + file_chunk.write_all(CHUNK_SEGM)?; + file_chunk.write_u64(segm_data_size)?; + for seg in &item.segments { + let flag = if seg.is_compressed { + TVP_XP3_SEGM_ENCODE_ZLIB + } else { + TVP_XP3_SEGM_ENCODE_RAW + }; + file_chunk.write_u32(flag)?; + file_chunk.write_u64(seg.start)?; + file_chunk.write_u64(seg.original_size)?; + file_chunk.write_u64(seg.archived_size)?; + } + let adlr_data_size = 4; + file_chunk.write_all(CHUNK_ADLR)?; + file_chunk.write_u64(adlr_data_size)?; + file_chunk.write_u32(item.file_hash)?; + index_data.write_all(CHUNK_FILE)?; + let file_chunk = file_chunk.into_inner(); + index_data.write_u64(file_chunk.len() as u64)?; + index_data.write_all(&file_chunk)?; + } + let index_data = index_data.into_inner(); + if self.compress_index { + let mut e = flate2::write::ZlibEncoder::new( + Vec::new(), + flate2::Compression::new(self.zlib_compression_level), + ); + e.write_all(&index_data)?; + let compressed_index = e.finish()?; + file.write_u8(TVP_XP3_INDEX_ENCODE_ZLIB)?; + file.write_u64(compressed_index.len() as u64)?; + file.write_u64(index_data.len() as u64)?; + file.write_all(&compressed_index)?; + } else { + file.write_u8(TVP_XP3_INDEX_ENCODE_RAW)?; + file.write_u64(index_data.len() as u64)?; + file.write_all(&index_data)?; + } + file.write_u64_at(11, index_offset)?; // Write index offset to header + file.flush()?; + eprintln!("XP3 Archive Statistics:\n{}", self.stats); + Ok(()) + } +} diff --git a/src/types.rs b/src/types.rs index 90a6b46..abd4049 100644 --- a/src/types.rs +++ b/src/types.rs @@ -480,6 +480,17 @@ pub struct ExtraConfig { #[default(true)] /// Decompress mdf files in Kirikiri XP3 archive when extracting. Default is true. pub xp3_mdf_decompress: bool, + #[cfg(feature = "kirikiri-arc")] + /// Configuration for Kirikiri XP3 segmenter when creating XP3 archive. + pub xp3_segmenter: crate::scripts::kirikiri::archive::xp3::SegmenterConfig, + #[cfg(feature = "kirikiri-arc")] + #[default(true)] + /// Compress files in Kirikiri XP3 archive when creating. Default is true. + pub xp3_compress_files: bool, + #[cfg(feature = "kirikiri-arc")] + #[default(true)] + /// Compress index in Kirikiri XP3 archive when creating. Default is true. + pub xp3_compress_index: bool, } #[derive(Clone, Copy, Debug, ValueEnum, PartialEq, Eq, PartialOrd, Ord)]