diff --git a/Cargo.toml b/Cargo.toml index 36eb0af..eea4f5d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -84,7 +84,7 @@ escude = ["int-enum"] escude-arc = ["escude", "rand", "utils-bit-stream"] ex-hibit = [] ex-hibit-arc = ["ex-hibit"] -favorite = [] +favorite = ["utils-str"] hexen-haus = ["memchr", "utils-str"] hexen-haus-arc = ["hexen-haus"] hexen-haus-img = ["hexen-haus", "image"] diff --git a/src/scripts/favorite/disasm.rs b/src/scripts/favorite/disasm.rs index 4b06925..b36ecb8 100644 --- a/src/scripts/favorite/disasm.rs +++ b/src/scripts/favorite/disasm.rs @@ -118,6 +118,7 @@ pub struct Data { func_pos_map: HashMap, #[serde(skip)] speaker_names: HashMap>, + pub sys_imports: Vec, } impl Data { @@ -129,6 +130,7 @@ impl Data { speak_func_indices: HashSet::new(), func_pos_map: HashMap::new(), speaker_names: HashMap::new(), + sys_imports: Vec::new(), }; let script_len = reader.read_u32()? as u64; let main_script_data = reader.peek_u32_at(script_len)? as u64; @@ -145,7 +147,17 @@ impl Data { } reader.seek(SeekFrom::Start(script_len + 4))?; reader.read_to_end(&mut data.extra_data)?; - + let mut off = script_len + 10; + let offset = reader.peek_u8_at(off)?; + off += 1 + offset as u64; + let sysimport_num = reader.peek_u16_at(off)?; + off += 2; + for _ in 0..sysimport_num { + let s = reader.peek_cstring_at(off + 2)?; + let s = decode_to_string(encoding, s.as_bytes(), true)?; + data.sys_imports.push(s); + off += 2 + reader.peek_u8_at(off + 1)? as u64; + } data.index_functions(); data.find_speak_functions(); data.collect_speaker_names(); diff --git a/src/scripts/favorite/hcb.rs b/src/scripts/favorite/hcb.rs index d904fc7..f4ce630 100644 --- a/src/scripts/favorite/hcb.rs +++ b/src/scripts/favorite/hcb.rs @@ -1,9 +1,12 @@ //! Favorite HCB script (.hcb) +use std::io::Write; + use super::disasm::*; use crate::ext::io::*; use crate::scripts::base::*; use crate::types::*; use crate::utils::encoding::*; +use crate::utils::str::*; use anyhow::Result; #[derive(Debug)] @@ -106,58 +109,299 @@ impl Script for HcbScript { let mut patcher = BinaryPatcher::new(self.reader.to_ref(), file, |pos| Ok(pos), |pos| Ok(pos))?; let mut need_pacth_addresses = Vec::new(); - for funcs in [&self.data.functions, &self.data.main_script] { - for func in funcs { - let mut cur_pos = func.pos + 1; - if matches!(func.opcode, 0x02 | 0x06 | 0x07) { - need_pacth_addresses.push(cur_pos); + let mut new_need_patch_addresses = Vec::new(); + let thread_start_callid = self + .data + .sys_imports + .iter() + .position(|s| s == "ThreadStart") + .map(|i| i as u16) + .unwrap_or(u16::MAX); + let mut func_index = 0; + let func_len = self.data.functions.len(); + while func_index < func_len { + let func = &self.data.functions[func_index]; + let mut cur_pos = func.pos + 1; + if matches!(func.opcode, 0x02 | 0x06 | 0x07) { + need_pacth_addresses.push(cur_pos); + } + if func.opcode == 0x03 { + let syscall_id = if let Some(Operand::W(id)) = func.operands.get(0) { + *id + } else { + anyhow::bail!("Invalid syscall operand at function index {}", func_index); + }; + if syscall_id == thread_start_callid { + if func_index == 0 { + anyhow::bail!("ThreadStart syscall cannot be at function index 0"); + } + let pre_func = &self.data.functions[func_index - 1]; + if pre_func.opcode == 0x0a { + need_pacth_addresses.push(pre_func.pos + 1); + } } - for operand in &func.operands { - if let Operand::S(s) = operand { - if self.filter_ascii && s.chars().all(|c| c.is_ascii()) { - continue; - } - let m = match mes { - Some(m) => m, - None => { - return Err(anyhow::anyhow!( - "Not enough messages to import. Missing message: {}", - s - )); - } - }; - let mut message = m.message.clone(); - if let Some(table) = replacement { - for (k, v) in &table.map { - message = message.replace(k, v); - } - } - patcher.copy_up_to(cur_pos)?; - let ori_len = operand.len(self.encoding)? as u64; - let mut s = encode_string(encoding, &message, true)?; - s.push(0); // null-terminated - let len = s.len(); - if len > 255 { + } + for operand in &func.operands { + if let Operand::S(s) = operand { + if self.filter_ascii && s.chars().all(|c| c.is_ascii()) { + continue; + } + let m = match mes { + Some(m) => m, + None => { return Err(anyhow::anyhow!( - "Message too long to import (max 255 bytes): {}", - message + "Not enough messages to import. Missing message: {}", + s )); } + }; + let mut message = m.message.clone(); + if let Some(table) = replacement { + for (k, v) in &table.map { + message = message.replace(k, v); + } + } + patcher.copy_up_to(cur_pos)?; + let ori_len = operand.len(self.encoding)? as u64; + let mut s = encode_string(encoding, &message, true)?; + s.push(0); // null-terminated + let len = s.len(); + if len > 255 { + return Err(anyhow::anyhow!( + "Message too long to import in functions section (max 255 bytes): {}", + message + )); + } + patcher.replace_bytes_with_write(ori_len, |writer| { + writer.write_u8(len as u8)?; + writer.write_all(&s)?; + Ok(()) + })?; + mes = mess.next(); + } + cur_pos += operand.len(self.encoding)? as u64; + } + func_index += 1; + } + func_index = 0; + let func_len = self.data.main_script.len(); + 'outer: while func_index < func_len { + let func = &self.data.main_script[func_index]; + let mut cur_pos = func.pos + 1; + if matches!(func.opcode, 0x02 | 0x06 | 0x07) { + need_pacth_addresses.push(cur_pos); + } + if func.opcode == 0x03 { + let syscall_id = if let Some(Operand::W(id)) = func.operands.get(0) { + *id + } else { + anyhow::bail!("Invalid syscall operand at function index {}", func_index); + }; + if syscall_id == thread_start_callid { + if func_index == 0 { + anyhow::bail!("ThreadStart syscall cannot be at function index 0"); + } + let pre_func = &self.data.main_script[func_index - 1]; + if pre_func.opcode == 0x0a { + need_pacth_addresses.push(pre_func.pos + 1); + } + } + } + for operand in &func.operands { + if let Operand::S(s) = operand { + if self.filter_ascii && s.chars().all(|c| c.is_ascii()) { + continue; + } + let m = match mes { + Some(m) => m, + None => { + return Err(anyhow::anyhow!( + "Not enough messages to import. Missing message: {}", + s + )); + } + }; + let mut message = m.message.clone(); + if let Some(table) = replacement { + for (k, v) in &table.map { + message = message.replace(k, v); + } + } + mes = mess.next(); + patcher.copy_up_to(cur_pos)?; + let ori_len = operand.len(self.encoding)? as u64; + let mut s = encode_string(encoding, &message, true)?; + s.push(0); // null-terminated + let len = s.len(); + if len > 255 { + if func.opcode != 0x0e { + anyhow::bail!( + "Message too long to import in main script functions section (max 255 bytes): {}", + message + ); + } + let cur = message.as_str(); + let (mut s, mut remaining) = + truncate_string_with_enter(cur, 254, encoding)?; + s.push(0); // null-terminated + let len = s.len(); patcher.replace_bytes_with_write(ori_len, |writer| { writer.write_u8(len as u8)?; writer.write_all(&s)?; Ok(()) })?; - mes = mess.next(); + let mut new_funcs = Vec::new(); + func_index += 1; + loop { + let toper = &self.data.main_script[func_index]; + new_funcs.push(toper.clone()); + func_index += 1; + if matches!(toper.opcode, 0x02 | 0x06 | 0x07) { + need_pacth_addresses.push(toper.pos + 1); + } + if toper.opcode == 0x03 { + let syscall_id = if let Some(Operand::W(id)) = toper.operands.get(0) + { + *id + } else { + anyhow::bail!( + "Invalid syscall operand at function index {}", + func_index + ); + }; + if syscall_id == thread_start_callid { + if func_index == 0 { + anyhow::bail!( + "ThreadStart syscall cannot be at function index 0" + ); + } + let pre_func = &self.data.main_script[func_index - 1]; + if pre_func.opcode == 0x0a { + need_pacth_addresses.push(pre_func.pos + 1); + } + } + } + // Copy until the next call opcode + if toper.opcode == 0x02 { + break; + } + } + cur_pos = self.data.main_script[func_index].pos; + patcher.copy_up_to(cur_pos)?; + let mut mem = MemWriter::new(); + while let Some(remain) = remaining { + let (mut s, rem) = truncate_string_with_enter(remain, 254, encoding)?; + s.push(0); // null-terminated + let len = s.len(); + remaining = rem; + mem.write_u8(0x0e)?; // pushstring + mem.write_u8(len as u8)?; + mem.write_all(&s)?; + let mut tindex = 0; + let tlen = new_funcs.len(); + while tindex < tlen { + let toper = &new_funcs[tindex]; + mem.write_u8(toper.opcode)?; + if matches!(toper.opcode, 0x02 | 0x06 | 0x07) { + let addr_pos = mem.pos; + let base_pos = patcher.output.stream_position()?; + let addr = base_pos + addr_pos as u64; + let data = toper + .operands + .iter() + .find_map(|operand| { + if let Operand::D(v) = operand { + Some(*v) + } else { + None + } + }) + .ok_or(anyhow::anyhow!( + "Unexpected operand type in function re-write." + ))?; + new_need_patch_addresses.push((addr, data)); + } + if toper.opcode == 0x03 { + let syscall_id = + if let Some(Operand::W(id)) = toper.operands.get(0) { + *id + } else { + anyhow::bail!( + "Invalid syscall operand at function index {}", + func_index + ); + }; + if syscall_id == thread_start_callid { + if tindex == 0 { + anyhow::bail!( + "ThreadStart syscall cannot be at function index 0" + ); + } + let pre_func = &new_funcs[tindex - 1]; + if pre_func.opcode == 0x0a { + let addr_pos = mem.pos - 5; // 1 for opcode, 4 for operand + let base_pos = patcher.output.stream_position()?; + let addr = base_pos + addr_pos as u64; + let data = pre_func + .operands + .get(0) + .and_then(|operand| { + if let Operand::D(v) = operand { + Some(*v) + } else { + None + } + }) + .ok_or(anyhow::anyhow!( + "Unexpected operand type in function re-write." + ))?; + new_need_patch_addresses.push((addr, data)); + } + } + } + for operand in &toper.operands { + match operand { + Operand::B(v) => mem.write_u8(*v)?, + Operand::W(v) => mem.write_u16(*v)?, + Operand::D(v) => mem.write_u32(*v)?, + Operand::F(v) => mem.write_f32(*v)?, + _ => { + return Err(anyhow::anyhow!( + "Unexpected operand type in function re-write." + )); + } + } + } + tindex += 1; + } + } + let new_data = mem.into_inner(); + patcher.replace_bytes(0, &new_data)?; + continue 'outer; } - cur_pos += operand.len(self.encoding)? as u64; + patcher.replace_bytes_with_write(ori_len, |writer| { + writer.write_u8(len as u8)?; + writer.write_all(&s)?; + Ok(()) + })?; } + cur_pos += operand.len(self.encoding)? as u64; } + func_index += 1; } patcher.copy_up_to(self.reader.data.len() as u64)?; for addr in need_pacth_addresses { patcher.patch_u32_address(addr)?; } + for (addr, data) in new_need_patch_addresses { + let new_data = patcher.map_offset(data as u64)? as u32; + patcher.output.write_u32_at(addr, new_data)?; + } + let script_len = self.reader.cpeek_u32_at(0)? as u64; + let new_script_len = patcher.map_offset(script_len)?; + patcher.patch_u32(0, new_script_len as u32)?; + // fix main script data position + patcher.patch_u32_address(script_len)?; Ok(()) } diff --git a/src/utils/str.rs b/src/utils/str.rs index 2650639..ef8a22a 100644 --- a/src/utils/str.rs +++ b/src/utils/str.rs @@ -18,3 +18,50 @@ pub fn truncate_string(s: &str, length: usize, encoding: Encoding, check: bool) } return Ok(result); } + +/// Truncate a string to a specified length, encoding it with the given encoding. +/// Output size may less than or equal to the specified length. +/// Returns the encoded bytes and the remaining string. +pub fn truncate_string2( + s: &str, + length: usize, + encoding: Encoding, +) -> Result<(Vec, Option<&str>)> { + let vec: Vec<_> = UnicodeSegmentation::graphemes(s, true).collect(); + let mut result = Vec::new(); + let mut used = 0; + for graphemes in vec { + let data = encode_string(encoding, graphemes, false)?; + if result.len() + data.len() > length { + break; + } + result.extend(data); + used += graphemes.len(); + } + let remaining = if used < s.len() { + Some(&s[used..]) + } else { + None + }; + return Ok((result, remaining)); +} + +/// Truncate a string to a specified length, encoding it with the given encoding. +/// Output size may less than or equal to the specified length. +/// Returns the encoded bytes and the remaining string. +/// Will try splitting at line breaks first. +pub fn truncate_string_with_enter( + s: &str, + length: usize, + encoding: Encoding, +) -> Result<(Vec, Option<&str>)> { + if let Some(pos) = s.find('\n') { + let (first, rest) = s.split_at(pos + 1); + // Try encoding the first part with line break + let data = encode_string(encoding, &first[..pos], false)?; + if data.len() <= length { + return Ok((data, if rest.is_empty() { None } else { Some(rest) })); + } + } + truncate_string2(s, length, encoding) +}