Add support to pack xp3 files

This commit is contained in:
2025-10-10 20:52:45 +08:00
parent 021fe5b71a
commit 309bccd485
15 changed files with 692 additions and 4 deletions

View File

@@ -1 +1,2 @@
pub mod xp3;
mod xp3pack;

View File

@@ -1,3 +1,4 @@
use super::xp3pack::*;
use crate::ext::io::*;
use crate::scripts::base::*;
use crate::types::*;
@@ -9,6 +10,58 @@ use std::sync::{Arc, Mutex};
use xp3::XP3Reader;
use xp3::index::file::{IndexSegmentFlag, XP3FileIndex};
pub use super::xp3pack::SegmenterConfig;
pub fn parse_segmenter_config(str: &str) -> Result<SegmenterConfig> {
let parts: Vec<&str> = str.split(':').collect();
if parts.is_empty() {
return Ok(SegmenterConfig::default());
}
match parts[0].to_lowercase().as_str() {
"none" => Ok(SegmenterConfig::None),
"cdc" => {
if parts.len() != 4 {
return Err(anyhow::anyhow!(
"Invalid FastCDC segmenter config. Expected format: fastcdc,min_size,avg_size,max_size"
));
}
let min_size = parse_size::parse_size(parts[1])?;
let avg_size = parse_size::parse_size(parts[2])?;
let max_size = parse_size::parse_size(parts[3])?;
if min_size == 0 || avg_size == 0 || max_size == 0 {
return Err(anyhow::anyhow!(
"Invalid FastCDC segmenter config. Sizes must be greater than 0."
));
}
if !(min_size <= avg_size && avg_size <= max_size) {
return Err(anyhow::anyhow!(
"Invalid FastCDC segmenter config. Expected min_size <= avg_size <= max_size."
));
}
Ok(SegmenterConfig::FastCdc {
min_size: min_size as u32,
avg_size: avg_size as u32,
max_size: max_size as u32,
})
}
"fixed" => {
if parts.len() != 2 {
return Err(anyhow::anyhow!(
"Invalid Fixed segmenter config. Expected format: fixed,size"
));
}
let size = parse_size::parse_size(parts[1])?;
if size == 0 {
return Err(anyhow::anyhow!(
"Invalid Fixed segmenter config. Size must be greater than 0."
));
}
Ok(SegmenterConfig::Fixed(size as usize))
}
_ => Err(anyhow::anyhow!("Unknown segmenter type: {}", parts[0])),
}
}
#[derive(Debug)]
/// Builder for Kirikiri XP3 Archive
pub struct Xp3ArchiveBuilder {}
@@ -76,6 +129,16 @@ impl ScriptBuilder for Xp3ArchiveBuilder {
fn is_archive(&self) -> bool {
true
}
fn create_archive(
&self,
filename: &str,
files: &[&str],
_encoding: Encoding,
config: &ExtraConfig,
) -> Result<Box<dyn Archive>> {
Ok(Box::new(Xp3ArchiveWriter::new(filename, files, config)?))
}
}
#[derive(Debug)]

View File

@@ -0,0 +1,24 @@
/// Represents a single data segment for a file.
/// A file can be split into multiple segments, which can be compressed independently.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Segment {
pub is_compressed: bool,
/// The offset of the segment's data within the archive file.
pub start: u64,
/// The offset of this segment within the original, uncompressed file.
pub offset_in_file: u64,
/// The size of the segment after decompression.
pub original_size: u64,
/// The size of the segment in the archive (potentially compressed).
pub archived_size: u64,
}
/// Represents a single file entry within the XP3 archive.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct ArchiveItem {
pub name: String,
pub file_hash: u32,
pub original_size: u64,
pub archived_size: u64,
pub segments: Vec<Segment>,
}

View File

@@ -0,0 +1,22 @@
/// XP3 file header signature: `XP3\r\n \n\x1a\x8b\x67\x01`
pub const XP3_MAGIC: &[u8; 11] = b"XP3\r\n \n\x1a\x8b\x67\x01";
// Chunk names
pub const CHUNK_FILE: &[u8; 4] = b"File";
pub const CHUNK_INFO: &[u8; 4] = b"info";
pub const CHUNK_SEGM: &[u8; 4] = b"segm";
pub const CHUNK_ADLR: &[u8; 4] = b"adlr";
// Index entry flags
pub const TVP_XP3_INDEX_ENCODE_METHOD_MASK: u8 = 0x07;
pub const TVP_XP3_INDEX_ENCODE_RAW: u8 = 0;
pub const TVP_XP3_INDEX_ENCODE_ZLIB: u8 = 1;
pub const TVP_XP3_INDEX_CONTINUE: u8 = 0x80;
// File entry flags
pub const TVP_XP3_FILE_PROTECTED: u32 = 1 << 31;
// Segment entry flags
pub const TVP_XP3_SEGM_ENCODE_METHOD_MASK: u32 = 0x07;
pub const TVP_XP3_SEGM_ENCODE_RAW: u32 = 0;
pub const TVP_XP3_SEGM_ENCODE_ZLIB: u32 = 1;

View File

@@ -0,0 +1,9 @@
mod archive;
#[allow(dead_code)]
mod consts;
mod reader;
mod segmenter;
mod writer;
pub use segmenter::SegmenterConfig;
pub use writer::Xp3ArchiveWriter;

View File

@@ -0,0 +1,28 @@
use adler::Adler32;
use std::io::{PipeReader, Read};
pub struct Reader {
inner: PipeReader,
adler: Adler32,
}
impl Reader {
pub fn new(inner: PipeReader) -> Self {
Self {
inner,
adler: Adler32::new(),
}
}
pub fn into_checksum(self) -> u32 {
self.adler.checksum()
}
}
impl Read for Reader {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let n = self.inner.read(buf)?;
self.adler.write_slice(&buf[..n]);
Ok(n)
}
}

View File

@@ -0,0 +1,99 @@
use super::reader::Reader;
use anyhow::Result;
use fastcdc::v2020::StreamCDC;
use std::io::Read;
#[derive(Copy, Clone, Debug)]
/// Configuration options for the segmenter.
pub enum SegmenterConfig {
/// Do not segment the data.
None,
/// Use the FastCDC algorithm with specified minimum, average, and maximum chunk sizes.
FastCdc {
min_size: u32,
avg_size: u32,
max_size: u32,
},
/// Use fixed-size segments.
Fixed(usize),
}
impl Default for SegmenterConfig {
fn default() -> Self {
SegmenterConfig::FastCdc {
min_size: 32 * 1024,
avg_size: 256 * 1024,
max_size: 8 * 1024 * 1024,
}
}
}
/// A trait for strategies that split a byte slice into one or more segments.
pub trait Segmenter {
fn segment<'a>(
&'a self,
data: &'a mut Reader,
) -> Box<dyn Iterator<Item = Result<Vec<u8>>> + 'a>;
}
pub struct FastCdcSegmenter {
min_size: u32,
avg_size: u32,
max_size: u32,
}
impl Segmenter for FastCdcSegmenter {
fn segment<'a>(
&'a self,
data: &'a mut Reader,
) -> Box<dyn Iterator<Item = Result<Vec<u8>>> + 'a> {
let cdc = StreamCDC::new(data, self.min_size, self.avg_size, self.max_size);
Box::new(cdc.map(|chunk| Ok(chunk?.data)))
}
}
pub struct FixedSizeSegmenter {
size: usize,
}
impl Segmenter for FixedSizeSegmenter {
fn segment<'a>(
&'a self,
data: &'a mut Reader,
) -> Box<dyn Iterator<Item = Result<Vec<u8>>> + 'a> {
let size = self.size;
let mut buf = vec![0; size];
Box::new(std::iter::from_fn(move || {
let nbuf = &mut buf;
let mut total_read = 0;
while total_read < size {
match data.read(&mut nbuf[total_read..]) {
Ok(0) => break, // EOF
Ok(n) => total_read += n,
Err(e) => return Some(Err(e.into())),
}
}
if total_read == 0 {
None // No more data to read
} else {
Some(Ok(buf[..total_read].to_vec()))
}
}))
}
}
pub fn create_segmenter(config: SegmenterConfig) -> Option<Box<dyn Segmenter + Send + Sync>> {
match config {
SegmenterConfig::None => None,
SegmenterConfig::FastCdc {
min_size,
avg_size,
max_size,
} => Some(Box::new(FastCdcSegmenter {
min_size,
avg_size,
max_size,
})),
SegmenterConfig::Fixed(size) => Some(Box::new(FixedSizeSegmenter { size })),
}
}

View File

@@ -0,0 +1,380 @@
use super::archive::*;
use super::consts::*;
use super::reader::*;
use super::segmenter::*;
use crate::ext::io::*;
use crate::ext::mutex::*;
use crate::scripts::base::*;
use crate::types::*;
use crate::utils::encoding::*;
use crate::utils::threadpool::ThreadPool;
use anyhow::Result;
use sha2::{Digest, Sha256};
use std::collections::{BTreeMap, HashMap};
use std::io::{Seek, Write};
use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
#[derive(Clone)]
struct WrittenSegment {
is_compressed: bool,
start: u64,
original_size: u64,
archived_size: u64,
}
#[derive(Default)]
struct Stats {
total_original_size: AtomicU64,
final_archive_size: AtomicU64,
total_segments: AtomicUsize,
unique_segments: AtomicUsize,
deduplication_savings: AtomicU64,
}
impl std::fmt::Display for Stats {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let total_original_size = self
.total_original_size
.load(std::sync::atomic::Ordering::Relaxed);
let final_archive_size = self
.final_archive_size
.load(std::sync::atomic::Ordering::Relaxed);
let total_segments = self
.total_segments
.load(std::sync::atomic::Ordering::Relaxed);
let unique_segments = self
.unique_segments
.load(std::sync::atomic::Ordering::Relaxed);
let deduplication_savings = self
.deduplication_savings
.load(std::sync::atomic::Ordering::Relaxed);
write!(
f,
"Total Original Size: {} bytes\nFinal Archive Size: {} bytes\nTotal Segments: {}\nUnique Segments: {}\nDeduplication Savings: {} bytes",
total_original_size,
final_archive_size,
total_segments,
unique_segments,
deduplication_savings
)
}
}
pub struct Xp3ArchiveWriter<T: Write + Seek> {
file: Arc<Mutex<T>>,
segments: Arc<Mutex<HashMap<[u8; 32], WrittenSegment>>>,
items: Arc<Mutex<BTreeMap<String, ArchiveItem>>>,
runner: ThreadPool<Result<()>>,
compress_files: bool,
compress_index: bool,
zlib_compression_level: u32,
segmenter: Option<Arc<Box<dyn Segmenter + Send + Sync>>>,
stats: Arc<Stats>,
}
impl Xp3ArchiveWriter<std::io::BufWriter<std::fs::File>> {
pub fn new(filename: &str, files: &[&str], config: &ExtraConfig) -> Result<Self> {
let file = std::fs::File::create(filename)?;
let mut file = std::io::BufWriter::new(file);
let mut items = BTreeMap::new();
for file in files {
let item = ArchiveItem {
name: file.to_string(),
file_hash: 0,
original_size: 0,
archived_size: 0,
segments: Vec::new(),
};
items.insert(file.to_string(), item);
}
let segmenter = create_segmenter(config.xp3_segmenter).map(|s| Arc::new(s));
file.write_all(XP3_MAGIC)?;
file.write_u64(0)?; // Placeholder for index offset
Ok(Self {
file: Arc::new(Mutex::new(file)),
segments: Arc::new(Mutex::new(HashMap::new())),
items: Arc::new(Mutex::new(items)),
runner: ThreadPool::new(1, Some("xp3-writer"), false)?,
compress_files: config.xp3_compress_files,
compress_index: config.xp3_compress_index,
zlib_compression_level: config.zlib_compression_level,
segmenter,
stats: Arc::new(Stats::default()),
})
}
}
struct Writer<'a> {
inner: Box<dyn Write + 'a>,
mem: MemWriter,
}
impl std::fmt::Debug for Writer<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Writer").field("mem", &self.mem).finish()
}
}
impl<'a> Write for Writer<'a> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.mem.write(buf)
}
fn flush(&mut self) -> std::io::Result<()> {
self.mem.flush()
}
}
impl<'a> Seek for Writer<'a> {
fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result<u64> {
self.mem.seek(pos)
}
fn stream_position(&mut self) -> std::io::Result<u64> {
self.mem.stream_position()
}
fn rewind(&mut self) -> std::io::Result<()> {
self.mem.rewind()
}
}
impl<'a> Drop for Writer<'a> {
fn drop(&mut self) {
let _ = self.inner.write_all(&self.mem.data);
let _ = self.inner.flush();
}
}
impl<T: Write + Seek + Sync + Send + 'static> Archive for Xp3ArchiveWriter<T> {
fn new_file<'a>(&'a mut self, name: &str) -> Result<Box<dyn WriteSeek + 'a>> {
let inner = self.new_file_non_seek(name)?;
Ok(Box::new(Writer {
inner,
mem: MemWriter::new(),
}))
}
fn new_file_non_seek<'a>(&'a mut self, name: &str) -> Result<Box<dyn Write + 'a>> {
self.runner.join();
for err in self.runner.take_results() {
err?;
}
let mut item = {
let items = self.items.lock_blocking();
items
.get(name)
.ok_or_else(|| anyhow::anyhow!("File not found in archive: {}", name))?
.clone()
};
let (reader, writer) = std::io::pipe()?;
let reader = Reader::new(reader);
{
let file = self.file.clone();
let segments = self.segments.clone();
let items = self.items.clone();
let segmenter = self.segmenter.clone();
let stats = self.stats.clone();
let is_compressed = self.compress_files;
let zlib_compression_level = self.zlib_compression_level;
self.runner.execute(
move |_| {
let mut reader = reader;
let mut offset_in_file = 0u64;
if let Some(segmenter) = segmenter {
for seg in segmenter.segment(&mut reader) {
let seg = seg?;
let hash: [u8; 32] = Sha256::digest(&seg).into();
let fseg = match {
let mut segments = segments.lock_blocking();
if let Some(old_seg) = segments.get(&hash) {
Err(old_seg.clone())
} else {
let seg_data = WrittenSegment {
is_compressed,
start: 0,
original_size: seg.len() as u64,
archived_size: seg.len() as u64,
};
segments.insert(hash, seg_data.clone());
Ok(seg_data)
}
} {
Ok(mut info) => {
let data = if is_compressed {
let mut e = flate2::write::ZlibEncoder::new(
Vec::new(),
flate2::Compression::new(zlib_compression_level),
);
e.write_all(&seg)?;
e.finish()?
} else {
seg
};
let mut file = file.lock_blocking();
let start = file.seek(std::io::SeekFrom::End(0))?;
file.write_all(&data)?;
info.start = start;
info.archived_size = data.len() as u64;
let stats = stats.clone();
stats
.total_original_size
.fetch_add(info.original_size, Ordering::Relaxed);
stats
.final_archive_size
.fetch_add(info.archived_size, Ordering::Relaxed);
stats.total_segments.fetch_add(1, Ordering::Relaxed);
stats.unique_segments.fetch_add(1, Ordering::Relaxed);
let mut segments = segments.lock_blocking();
segments.insert(hash, info.clone());
let ninfo = Segment {
is_compressed: info.is_compressed,
start: info.start,
offset_in_file: offset_in_file,
original_size: info.original_size,
archived_size: info.archived_size,
};
offset_in_file += info.original_size;
ninfo
}
Err(seg_info) => {
let stats = stats.clone();
stats
.total_original_size
.fetch_add(seg_info.original_size, Ordering::Relaxed);
stats
.deduplication_savings
.fetch_add(seg_info.archived_size, Ordering::Relaxed);
stats.total_segments.fetch_add(1, Ordering::Relaxed);
let ninfo = Segment {
is_compressed: seg_info.is_compressed,
start: seg_info.start,
offset_in_file: offset_in_file,
original_size: seg_info.original_size,
archived_size: seg_info.archived_size,
};
offset_in_file += seg_info.original_size;
ninfo
}
};
item.original_size += fseg.original_size;
item.archived_size += fseg.archived_size;
item.segments.push(fseg);
}
} else {
let mut file = file.lock_blocking();
let start = file.seek(std::io::SeekFrom::End(0))?;
let size = {
let mut writer = if is_compressed {
let e = flate2::write::ZlibEncoder::new(
&mut *file,
flate2::Compression::new(zlib_compression_level),
);
Box::new(e) as Box<dyn Write>
} else {
Box::new(&mut *file) as Box<dyn Write>
};
std::io::copy(&mut reader, &mut writer)?
};
let ninfo = Segment {
is_compressed,
start,
offset_in_file: 0,
original_size: size,
archived_size: if is_compressed {
file.stream_position()? - start
} else {
size
},
};
item.original_size += ninfo.original_size;
item.archived_size += ninfo.archived_size;
let stats = stats.clone();
stats
.total_original_size
.fetch_add(ninfo.original_size, Ordering::Relaxed);
stats
.final_archive_size
.fetch_add(ninfo.archived_size, Ordering::Relaxed);
stats.total_segments.fetch_add(1, Ordering::Relaxed);
stats.unique_segments.fetch_add(1, Ordering::Relaxed);
item.segments.push(ninfo);
}
item.file_hash = reader.into_checksum();
let mut items = items.lock_blocking();
items.insert(item.name.clone(), item);
Ok(())
},
true,
)?;
}
Ok(Box::new(writer))
}
fn write_header(&mut self) -> Result<()> {
self.runner.join();
for err in self.runner.take_results() {
err?;
}
let mut file = self.file.lock_blocking();
let index_offset = file.seek(std::io::SeekFrom::End(0))?;
let mut index_data = MemWriter::new();
let items = self.items.lock_blocking();
for (_, item) in items.iter() {
let mut file_chunk = MemWriter::new();
let name = encode_string(Encoding::Utf16LE, &item.name, false)?;
let info_data_size = name.len() as u64 + 22;
file_chunk.write_all(CHUNK_INFO)?;
file_chunk.write_u64(info_data_size)?;
file_chunk.write_u32(0)?; // flags
file_chunk.write_u64(item.original_size)?;
file_chunk.write_u64(item.archived_size)?;
file_chunk.write_u16(name.len() as u16 / 2)?;
file_chunk.write_all(&name)?;
let segm_data_size = item.segments.len() as u64 * 28;
file_chunk.write_all(CHUNK_SEGM)?;
file_chunk.write_u64(segm_data_size)?;
for seg in &item.segments {
let flag = if seg.is_compressed {
TVP_XP3_SEGM_ENCODE_ZLIB
} else {
TVP_XP3_SEGM_ENCODE_RAW
};
file_chunk.write_u32(flag)?;
file_chunk.write_u64(seg.start)?;
file_chunk.write_u64(seg.original_size)?;
file_chunk.write_u64(seg.archived_size)?;
}
let adlr_data_size = 4;
file_chunk.write_all(CHUNK_ADLR)?;
file_chunk.write_u64(adlr_data_size)?;
file_chunk.write_u32(item.file_hash)?;
index_data.write_all(CHUNK_FILE)?;
let file_chunk = file_chunk.into_inner();
index_data.write_u64(file_chunk.len() as u64)?;
index_data.write_all(&file_chunk)?;
}
let index_data = index_data.into_inner();
if self.compress_index {
let mut e = flate2::write::ZlibEncoder::new(
Vec::new(),
flate2::Compression::new(self.zlib_compression_level),
);
e.write_all(&index_data)?;
let compressed_index = e.finish()?;
file.write_u8(TVP_XP3_INDEX_ENCODE_ZLIB)?;
file.write_u64(compressed_index.len() as u64)?;
file.write_u64(index_data.len() as u64)?;
file.write_all(&compressed_index)?;
} else {
file.write_u8(TVP_XP3_INDEX_ENCODE_RAW)?;
file.write_u64(index_data.len() as u64)?;
file.write_all(&index_data)?;
}
file.write_u64_at(11, index_offset)?; // Write index offset to header
file.flush()?;
eprintln!("XP3 Archive Statistics:\n{}", self.stats);
Ok(())
}
}