diff --git a/src/ext/atomic.rs b/src/ext/atomic.rs new file mode 100644 index 0000000..683c602 --- /dev/null +++ b/src/ext/atomic.rs @@ -0,0 +1,42 @@ +use std::sync::atomic::Ordering; + +/// A trait to help to load and store atomic value quickly. +pub trait AtomicQuick { + /// Loads a value from the atomic integer in [Ordering::SeqCst] mode. + fn qload(&self) -> T; + /// Stores a value into the atomic integer in [Ordering::SeqCst] mode. + fn qstore(&self, value: T); + #[inline] + /// Stores a value into the atomic integer in [Ordering::SeqCst] mode. + /// Alias for [Self::qstore] + fn qsave(&self, value: T) { + self.qstore(value) + } +} + +macro_rules! impl_atomic_quick_with_atomic { + ($type1:ty, $type2:ty) => { + impl AtomicQuick<$type2> for $type1 { + #[inline] + fn qload(&self) -> $type2 { + self.load(Ordering::SeqCst) + } + #[inline] + fn qstore(&self, value: $type2) { + self.store(value, Ordering::SeqCst) + } + } + }; +} + +impl_atomic_quick_with_atomic!(std::sync::atomic::AtomicBool, bool); +impl_atomic_quick_with_atomic!(std::sync::atomic::AtomicI8, i8); +impl_atomic_quick_with_atomic!(std::sync::atomic::AtomicU8, u8); +impl_atomic_quick_with_atomic!(std::sync::atomic::AtomicI16, i16); +impl_atomic_quick_with_atomic!(std::sync::atomic::AtomicU16, u16); +impl_atomic_quick_with_atomic!(std::sync::atomic::AtomicI32, i32); +impl_atomic_quick_with_atomic!(std::sync::atomic::AtomicU32, u32); +impl_atomic_quick_with_atomic!(std::sync::atomic::AtomicI64, i64); +impl_atomic_quick_with_atomic!(std::sync::atomic::AtomicU64, u64); +impl_atomic_quick_with_atomic!(std::sync::atomic::AtomicIsize, isize); +impl_atomic_quick_with_atomic!(std::sync::atomic::AtomicUsize, usize); diff --git a/src/ext/io.rs b/src/ext/io.rs index b2a08ca..3f51307 100644 --- a/src/ext/io.rs +++ b/src/ext/io.rs @@ -858,6 +858,7 @@ pub struct MemReader { pub pos: usize, } +#[derive(Clone)] pub struct MemReaderRef<'a> { pub data: &'a [u8], pub pos: usize, diff --git a/src/ext/mod.rs b/src/ext/mod.rs index 904f814..d4c1155 100644 --- a/src/ext/mod.rs +++ b/src/ext/mod.rs @@ -1,2 +1,3 @@ +pub mod atomic; pub mod io; pub mod vec; diff --git a/src/scripts/bgi/image/cbg.rs b/src/scripts/bgi/image/cbg.rs index 1687f00..b85c3c6 100644 --- a/src/scripts/bgi/image/cbg.rs +++ b/src/scripts/bgi/image/cbg.rs @@ -1,3 +1,4 @@ +use crate::ext::atomic::*; use crate::ext::io::*; use crate::scripts::base::*; use crate::types::*; @@ -7,7 +8,8 @@ use anyhow::Ok; use anyhow::Result; use msg_tool_macro::*; use std::io::{Read, Seek, Write}; -use std::u32; +use std::sync::atomic::AtomicBool; +use std::sync::{Arc, Mutex}; #[derive(Debug)] pub struct BgiCBGBuilder {} @@ -82,9 +84,14 @@ fn convert_bgr565_to_bgr24(input: Vec, width: u16, height: u16) -> ImageData for chunk in input.chunks_exact(2) { let pixel = u16::from_le_bytes([chunk[0], chunk[1]]); - let blue = ((pixel & 0x1F) << 3) as u8; - let green = (((pixel >> 5) & 0x3F) << 2) as u8; - let red = (((pixel >> 11) & 0x1F) << 3) as u8; + + let blue_5bit = (pixel & 0x1) as u8; + let green_6bit = ((pixel >> 5) & 0x3) as u8; + let red_5bit = ((pixel >> 11) & 0x1) as u8; + + let blue = ((blue_5bit as u16 * 255) / 31) as u8; + let green = ((green_6bit as u16 * 255) / 63) as u8; + let red = ((red_5bit as u16 * 255) / 31) as u8; output.push(blue); output.push(green); @@ -154,26 +161,7 @@ impl Script for BgiCBG { fn export_image(&self) -> Result { let decoder = CbgDecoder::new(self.data.to_ref(), &self.header, self.color_type)?; - let data = decoder.unpack()?; - let color_type = match self.color_type { - CbgColorType::Bgra32 => ImageColorType::Bgra, - CbgColorType::Bgr24 => ImageColorType::Bgr, - CbgColorType::Grayscale => ImageColorType::Grayscale, - CbgColorType::Bgr565 => { - return Ok(convert_bgr565_to_bgr24( - data, - self.header.width, - self.header.height, - )); - } - }; - Ok(ImageData { - width: self.header.width as u32, - height: self.header.height as u32, - color_type, - depth: 8, - data, - }) + Ok(decoder.unpack()?) } } @@ -209,18 +197,27 @@ impl<'a> CbgDecoder<'a> { }) } - fn unpack(mut self) -> Result> { + fn unpack(mut self) -> Result { self.stream.m_input.pos = 0x30; if self.info.version < 2 { return self.unpack_v1(); + } else if self.info.version == 2 { + if self.info.enc_length < 0x80 { + return Err(anyhow::anyhow!( + "Invalid encoded length: {}", + self.info.enc_length + )); + } + return self.unpack_v2(); } Err(anyhow::anyhow!("Unknown version: {}", self.info.version)) } - fn unpack_v1(&mut self) -> Result> { + fn unpack_v1(&mut self) -> Result { let leaf_nodes_weight = { let stream = MemReader::new(self.read_encoded()?); - Self::read_weight_table(stream.to_ref(), 0x100)? + let mut stream_ref = stream.to_ref(); + Self::read_weight_table(&mut stream_ref, 0x100)? }; let tree = HuffmanTree::new(&leaf_nodes_weight, false); let mut packed = Vec::with_capacity(self.info.intermediate_length as usize); @@ -231,7 +228,130 @@ impl<'a> CbgDecoder<'a> { output.resize(buf_size, 0); Self::unpack_zeros(&packed, &mut output); self.reverse_average_sampling(&mut output); - Ok(output) + let color_type = match self.color_type { + CbgColorType::Bgra32 => ImageColorType::Bgra, + CbgColorType::Bgr24 => ImageColorType::Bgr, + CbgColorType::Grayscale => ImageColorType::Grayscale, + CbgColorType::Bgr565 => { + return Ok(convert_bgr565_to_bgr24( + output, + self.info.width, + self.info.height, + )); + } + }; + Ok(ImageData { + width: self.info.width as u32, + height: self.info.height as u32, + color_type, + depth: 8, + data: output, + }) + } + + // #TODO: Fix + fn unpack_v2(&mut self) -> Result { + let dct_data = self.read_encoded()?; + let mut dct = [[0f32; 64]; 2]; + for i in 0..0x80usize { + dct[i >> 6][i & 0x3f] = DCT_TABLE[i & 0x3f] * dct_data[i] as f32; + } + let base_offset = self.stream.m_input.pos; + let tree1 = HuffmanTree::new( + &Self::read_weight_table(&mut self.stream.m_input, 0x10)?, + true, + ); + let tree2 = HuffmanTree::new( + &Self::read_weight_table(&mut self.stream.m_input, 0xB0)?, + true, + ); + let bpp = self.info.bpp; + let width = ((self.info.width as i32 + 7) & -8) as u16; + let height = ((self.info.height as i32 + 7) & -8) as u16; + let y_blocks = height / 8; + let mut offsets = Vec::with_capacity(y_blocks as usize + 1); + let input_base = self.stream.m_input.pos + (y_blocks as usize + 1) * 4 - base_offset; + for _ in 0..y_blocks + 1 { + let offset = self.stream.m_input.read_u32()?; + offsets.push(offset as usize - input_base); + } + let input = self.stream.m_input.data[self.stream.m_input.pos..].to_vec(); + let pad_skip = ((width as usize >> 3) + 7) >> 3; + let mut tasks = Vec::with_capacity(y_blocks as usize + 1); + let output_size = width as usize * height as usize * 4; + let mut output = Vec::with_capacity(output_size); + output.resize(output_size, 0); + let output = Mutex::new(output); + let decoder = Arc::new(ParallelCbgDecoder { + input, + output, + bpp: bpp, + width, + height, + tree1, + tree2, + dct, + has_alpha: AtomicBool::new(false), + }); + let mut dst = 0usize; + for i in 0..y_blocks { + let block_offset = offsets[i as usize] + pad_skip; + let next_offset = if i + 1 == y_blocks { + decoder.input.len() + } else { + offsets[(i + 1) as usize] + }; + let cdst = dst; + let decoder_ref = Arc::clone(&decoder); + let task = std::thread::spawn(move || { + decoder_ref.unpack_block(block_offset, next_offset - block_offset, cdst) + }); + tasks.push(task); + dst += decoder.width as usize * 32; + } + if self.info.bpp == 32 { + let decoder_ref = Arc::clone(&decoder); + let task = + std::thread::spawn(move || decoder_ref.unpack_alpha(offsets[y_blocks as usize])); + tasks.push(task); + } + for task in tasks { + task.join() + .map_err(|e| anyhow::anyhow!("Failed to join thread: {:?}", e))??; + } + let has_alpha = decoder.has_alpha.qload(); + let width = decoder.width as u32; + let height = decoder.height as u32; + let mut output = decoder + .output + .lock() + .map_err(|e| anyhow::anyhow!("Failed to lock output: {}", e))? + .clone(); + if !has_alpha { + let mut src_idx = 0; + let mut dst_idx = 0; + for _ in 0..self.info.height { + for _ in 0..self.info.width { + output[dst_idx] = output[src_idx]; + output[dst_idx + 1] = output[src_idx + 1]; + output[dst_idx + 2] = output[src_idx + 2]; + src_idx += 4; + dst_idx += 3; + } + } + output.truncate(dst_idx); + } + Ok(ImageData { + width, + height, + color_type: if has_alpha { + ImageColorType::Bgra + } else { + ImageColorType::Bgr + }, + depth: 8, + data: output, + }) } fn read_encoded(&mut self) -> Result> { @@ -276,10 +396,10 @@ impl<'a> CbgDecoder<'a> { Ok(v) } - fn read_weight_table(mut input: MemReaderRef<'_>, length: usize) -> Result> { + fn read_weight_table(input: &mut MemReaderRef<'_>, length: usize) -> Result> { let mut weights = Vec::with_capacity(length); for _ in 0..length { - let weight = Self::read_int(&mut input)? as u32; + let weight = Self::read_int(input)? as u32; weights.push(weight); } Ok(weights) @@ -372,6 +492,7 @@ impl<'a> CbgDecoder<'a> { } } +#[derive(Debug)] struct HuffmanNode { valid: bool, is_parent: bool, @@ -380,6 +501,7 @@ struct HuffmanNode { right_index: usize, } +#[derive(Debug)] struct HuffmanTree { nodes: Vec, } @@ -461,3 +583,338 @@ impl HuffmanTree { } } } + +const DCT_TABLE: [f32; 64] = [ + 1.00000000, 1.38703990, 1.30656302, 1.17587554, 1.00000000, 0.78569496, 0.54119611, 0.27589938, + 1.38703990, 1.92387950, 1.81225491, 1.63098633, 1.38703990, 1.08979023, 0.75066054, 0.38268343, + 1.30656302, 1.81225491, 1.70710683, 1.53635550, 1.30656302, 1.02655995, 0.70710677, 0.36047992, + 1.17587554, 1.63098633, 1.53635550, 1.38268340, 1.17587554, 0.92387950, 0.63637930, 0.32442334, + 1.00000000, 1.38703990, 1.30656302, 1.17587554, 1.00000000, 0.78569496, 0.54119611, 0.27589938, + 0.78569496, 1.08979023, 1.02655995, 0.92387950, 0.78569496, 0.61731654, 0.42521504, 0.21677275, + 0.54119611, 0.75066054, 0.70710677, 0.63637930, 0.54119611, 0.42521504, 0.29289323, 0.14931567, + 0.27589938, 0.38268343, 0.36047992, 0.32442334, 0.27589938, 0.21677275, 0.14931567, 0.07612047, +]; + +const BLOCK_FILL_ORDER: [u8; 64] = [ + 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, + 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, + 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, +]; + +struct ParallelCbgDecoder { + input: Vec, + output: Mutex>, + bpp: u32, + width: u16, + height: u16, + tree1: HuffmanTree, + tree2: HuffmanTree, + dct: [[f32; 64]; 2], + has_alpha: AtomicBool, +} + +impl ParallelCbgDecoder { + fn unpack_block(&self, offset: usize, length: usize, dst: usize) -> Result<()> { + let input = MemReaderRef::new(&self.input[offset..offset + length]); + let mut reader = MsbBitStream::new(input); + let block_size = CbgDecoder::read_int(&mut reader.m_input)?; + let mut color_data = Vec::with_capacity(block_size as usize); + color_data.resize(block_size as usize, 0i16); + let mut acc = 0; + let mut i = 0; + while i < block_size && reader.m_input.pos < reader.m_input.data.len() { + let count = self.tree1.decode_token(&mut reader)?; + if count != 0 { + let mut v = reader.get_bits(count as u32)? as i32; + if (v >> (count - 1)) == 0 { + v = (-1 << count | v) + 1; + } + acc += v; + } + color_data[i as usize] = acc as i16; + i += 64; + } + if reader.m_cached_bits & 7 != 0 { + reader.get_bits(reader.m_cached_bits & 7)?; + } + i = 0; + while i < block_size && reader.m_input.pos < reader.m_input.data.len() { + let mut index = 1; + while reader.m_input.pos < reader.m_input.data.len() { + let mut code = self.tree2.decode_token(&mut reader)?; + if code == 0 { + break; + } + if code == 0xf { + index += 0x10; + continue; + } + index += code & 0xf; + if index >= 64 { + break; + } + code >>= 4; + let mut v = reader.get_bits(code as u32)? as i32; + if code != 0 && (v >> (code - 1)) == 0 { + v = (-1 << code | v) + 1; + } + color_data[i as usize + BLOCK_FILL_ORDER[index as usize] as usize] = v as i16; + index += 1; + } + i += 64; + } + if self.bpp == 8 { + self.decode_grayscale(&color_data, dst)?; + } else { + self.decode_rgb(&color_data, dst)?; + } + Ok(()) + } + + fn decode_rgb(&self, data: &[i16], mut dst: usize) -> Result<()> { + let block_count = self.width / 8; + for i in 0..block_count { + let mut src = i as usize * 64; + let mut yuv_blocks = [[0; 3]; 64]; + for channel in 0..3 { + self.decode_dct(channel, data, src, &mut yuv_blocks)?; + src += self.width as usize * 8; + } + let mut output = self + .output + .lock() + .map_err(|e| anyhow::anyhow!("Failed to lock output: {}", e))?; + for j in 0..64 { + let cy = yuv_blocks[j][0] as f32; + let cb = yuv_blocks[j][1] as f32; + let cr = yuv_blocks[j][2] as f32; + let r = cy + 1.402 * cr - 178.956; + let g = cy - 0.34414 * cb - 0.71414 * cr + 135.95984; + let b = cy + 1.772 * cb - 226.316; + let y = j >> 3; + let x = j & 7; + let p = (y * self.width as usize + x) * 4 + dst; + output[p] = Self::float_to_byte(b); + output[p + 1] = Self::float_to_byte(g); + output[p + 2] = Self::float_to_byte(r); + } + dst += 32; + } + Ok(()) + } + + fn decode_grayscale(&self, data: &[i16], mut dst: usize) -> Result<()> { + let mut src = 0; + let block_count = self.width / 8; + for _ in 0..block_count { + let mut yuv_blocks = [[0; 3]; 64]; + self.decode_dct(0, data, src, &mut yuv_blocks)?; + src += 64; + let mut output = self + .output + .lock() + .map_err(|e| anyhow::anyhow!("Failed to lock output: {}", e))?; + for j in 0usize..64 { + let y = j >> 3; + let x = j & 7; + let p = (y * self.width as usize + x) * 4 + dst; + output[p] = yuv_blocks[j][0] as u8; + output[p + 1] = yuv_blocks[j][0] as u8; + output[p + 2] = yuv_blocks[j][0] as u8; + } + dst += 32; + } + Ok(()) + } + + fn unpack_alpha(&self, offset: usize) -> Result<()> { + let mut input = MemReaderRef::new(&self.input[offset..]); + if input.read_i32()? != 1 { + return Ok(()); + } + let mut dst = 3; + let mut ctl = 1 << 1; + let mut output = self + .output + .lock() + .map_err(|e| anyhow::anyhow!("Failed to lock output: {}", e))?; + while dst < output.len() { + ctl >>= 1; + if ctl == 1 { + ctl = (input.read_u8()? as i32) | 0x100; + } + if ctl & 1 != 0 { + let v = input.read_u16()? as i32; + let mut x = v & 0x3f; + if x > 0x1f { + x |= -0x40; + } + let mut y = (v >> 6) & 7; + if y != 0 { + y |= -8; + } + let count = ((v >> 9) & 0x7f) + 3; + let mut src = + (dst as isize + (x as isize + y as isize * self.width as isize) * 4) as usize; + if src >= dst { + return Ok(()); + } + for _ in 0..count { + output[dst] = output[src]; + src += 4; + dst += 4; + } + } else { + output[dst] = input.read_u8()?; + dst += 4; + } + } + self.has_alpha.qsave(true); + Ok(()) + } + + fn decode_dct( + &self, + channel: usize, + data: &[i16], + src: usize, + output: &mut [[i16; 3]; 64], + ) -> Result<()> { + let mut v1; + let mut v2; + let mut v3; + let mut v4; + let mut v5; + let mut v6; + let mut v7; + let mut v8; + let mut v9; + let mut v10; + let mut v11; + let mut v12; + let mut v13; + let mut v14; + let mut v15; + let mut v16; + let mut v17; + let d = if channel > 0 { 1 } else { 0 }; + let mut tmp = [[0f32; 8]; 8]; + for i in 0..8usize { + if 0 == data[src + 8 + i] + && 0 == data[src + 16 + i] + && 0 == data[src + 24 + i] + && 0 == data[src + 32 + i] + && 0 == data[src + 40 + i] + && 0 == data[src + 48 + i] + && 0 == data[src + 56 + i] + { + let t = (data[src + i] as f32) * self.dct[d][i]; + tmp[0][i] = t; + tmp[1][i] = t; + tmp[2][i] = t; + tmp[3][i] = t; + tmp[4][i] = t; + tmp[5][i] = t; + tmp[6][i] = t; + tmp[7][i] = t; + continue; + } + v1 = (data[src + i] as f32) * self.dct[d][i]; + v2 = (data[src + 8 + i] as f32) * self.dct[d][i + 8]; + v3 = (data[src + 16 + i] as f32) * self.dct[d][i + 16]; + v4 = (data[src + 24 + i] as f32) * self.dct[d][i + 24]; + v5 = (data[src + 32 + i] as f32) * self.dct[d][i + 32]; + v6 = (data[src + 40 + i] as f32) * self.dct[d][i + 40]; + v7 = (data[src + 48 + i] as f32) * self.dct[d][i + 48]; + v8 = (data[src + 56 + i] as f32) * self.dct[d][i + 56]; + + v10 = v1 + v5; + v11 = v1 - v5; + v12 = v3 + v7; + v13 = (v3 - v7) * 1.414213562 - v12; + v1 = v10 + v12; + v7 = v10 - v12; + v3 = v11 + v13; + v5 = v11 - v13; + v14 = v2 + v8; + v15 = v2 - v8; + v16 = v6 + v4; + v17 = v6 - v4; + v8 = v14 + v16; + v11 = (v14 - v16) * 1.414213562; + v9 = (v17 + v15) * 1.847759065; + v10 = 1.082392200 * v15 - v9; + v13 = -2.613125930 * v17 + v9; + v6 = v13 - v8; + v4 = v11 - v6; + v2 = v10 + v4; + tmp[0][i] = v1 + v8; + tmp[1][i] = v3 + v6; + tmp[2][i] = v5 + v4; + tmp[3][i] = v7 - v2; + tmp[4][i] = v7 + v2; + tmp[5][i] = v5 - v4; + tmp[6][i] = v3 - v6; + tmp[7][i] = v1 - v8; + } + let mut dst = 0; + for i in 0..8usize { + v10 = tmp[i][0] + tmp[i][4]; + v11 = tmp[i][0] - tmp[i][4]; + v12 = tmp[i][2] + tmp[i][6]; + v13 = tmp[i][2] - tmp[i][6]; + v14 = tmp[i][1] + tmp[i][7]; + v15 = tmp[i][1] - tmp[i][7]; + v16 = tmp[i][5] + tmp[i][3]; + v17 = tmp[i][5] - tmp[i][3]; + + v13 = 1.414213562 * v13 - v12; + v1 = v10 + v12; + v7 = v10 - v12; + v3 = v11 + v13; + v5 = v11 - v13; + v8 = v14 + v16; + v11 = (v14 - v16) * 1.414213562; + v9 = (v15 + v17) * 1.847759065; + v10 = v9 - v15 * 1.082392200; + v13 = v9 - v17 * 2.613125930; + v6 = v13 - v8; + v4 = v11 - v6; + v2 = v10 - v4; + + output[dst][channel] = Self::float_to_short(v1 + v8); + output[dst + 1][channel] = Self::float_to_short(v3 + v6); + output[dst + 2][channel] = Self::float_to_short(v5 + v4); + output[dst + 3][channel] = Self::float_to_short(v7 + v2); + output[dst + 4][channel] = Self::float_to_short(v7 - v2); + output[dst + 5][channel] = Self::float_to_short(v5 - v4); + output[dst + 6][channel] = Self::float_to_short(v3 - v6); + output[dst + 7][channel] = Self::float_to_short(v1 - v8); + dst += 8; + } + Ok(()) + } + + fn float_to_short(f: f32) -> i16 { + let a = 0x80 + (f as i32 >> 3); + if a <= 0 { + 0 + } else if a <= 0xff { + a as i16 + } else if a < 0x180 { + 0xff + } else { + 0 + } + } + + fn float_to_byte(f: f32) -> u8 { + if f >= 255.0 { + 0xff + } else if f <= 0.0 { + 0 + } else { + f as u8 + } + } +} diff --git a/src/utils/bit_stream.rs b/src/utils/bit_stream.rs index d0136ce..9441068 100644 --- a/src/utils/bit_stream.rs +++ b/src/utils/bit_stream.rs @@ -5,7 +5,7 @@ use std::io::Write; pub struct MsbBitStream<'a> { pub m_input: MemReaderRef<'a>, m_bits: u32, - m_cached_bits: u32, + pub m_cached_bits: u32, } impl<'a> MsbBitStream<'a> {