Allow to break long message. (Fix #6)

Fix patch address and offsets
This commit is contained in:
2026-02-01 20:09:18 +08:00
parent 24bc35d1db
commit 36f55fcfab
4 changed files with 340 additions and 37 deletions

View File

@@ -118,6 +118,7 @@ pub struct Data {
func_pos_map: HashMap<u64, usize>,
#[serde(skip)]
speaker_names: HashMap<usize, Vec<String>>,
pub sys_imports: Vec<String>,
}
impl Data {
@@ -129,6 +130,7 @@ impl Data {
speak_func_indices: HashSet::new(),
func_pos_map: HashMap::new(),
speaker_names: HashMap::new(),
sys_imports: Vec::new(),
};
let script_len = reader.read_u32()? as u64;
let main_script_data = reader.peek_u32_at(script_len)? as u64;
@@ -145,7 +147,17 @@ impl Data {
}
reader.seek(SeekFrom::Start(script_len + 4))?;
reader.read_to_end(&mut data.extra_data)?;
let mut off = script_len + 10;
let offset = reader.peek_u8_at(off)?;
off += 1 + offset as u64;
let sysimport_num = reader.peek_u16_at(off)?;
off += 2;
for _ in 0..sysimport_num {
let s = reader.peek_cstring_at(off + 2)?;
let s = decode_to_string(encoding, s.as_bytes(), true)?;
data.sys_imports.push(s);
off += 2 + reader.peek_u8_at(off + 1)? as u64;
}
data.index_functions();
data.find_speak_functions();
data.collect_speaker_names();

View File

@@ -1,9 +1,12 @@
//! Favorite HCB script (.hcb)
use std::io::Write;
use super::disasm::*;
use crate::ext::io::*;
use crate::scripts::base::*;
use crate::types::*;
use crate::utils::encoding::*;
use crate::utils::str::*;
use anyhow::Result;
#[derive(Debug)]
@@ -106,58 +109,299 @@ impl Script for HcbScript {
let mut patcher =
BinaryPatcher::new(self.reader.to_ref(), file, |pos| Ok(pos), |pos| Ok(pos))?;
let mut need_pacth_addresses = Vec::new();
for funcs in [&self.data.functions, &self.data.main_script] {
for func in funcs {
let mut cur_pos = func.pos + 1;
if matches!(func.opcode, 0x02 | 0x06 | 0x07) {
need_pacth_addresses.push(cur_pos);
let mut new_need_patch_addresses = Vec::new();
let thread_start_callid = self
.data
.sys_imports
.iter()
.position(|s| s == "ThreadStart")
.map(|i| i as u16)
.unwrap_or(u16::MAX);
let mut func_index = 0;
let func_len = self.data.functions.len();
while func_index < func_len {
let func = &self.data.functions[func_index];
let mut cur_pos = func.pos + 1;
if matches!(func.opcode, 0x02 | 0x06 | 0x07) {
need_pacth_addresses.push(cur_pos);
}
if func.opcode == 0x03 {
let syscall_id = if let Some(Operand::W(id)) = func.operands.get(0) {
*id
} else {
anyhow::bail!("Invalid syscall operand at function index {}", func_index);
};
if syscall_id == thread_start_callid {
if func_index == 0 {
anyhow::bail!("ThreadStart syscall cannot be at function index 0");
}
let pre_func = &self.data.functions[func_index - 1];
if pre_func.opcode == 0x0a {
need_pacth_addresses.push(pre_func.pos + 1);
}
}
for operand in &func.operands {
if let Operand::S(s) = operand {
if self.filter_ascii && s.chars().all(|c| c.is_ascii()) {
continue;
}
let m = match mes {
Some(m) => m,
None => {
return Err(anyhow::anyhow!(
"Not enough messages to import. Missing message: {}",
s
));
}
};
let mut message = m.message.clone();
if let Some(table) = replacement {
for (k, v) in &table.map {
message = message.replace(k, v);
}
}
patcher.copy_up_to(cur_pos)?;
let ori_len = operand.len(self.encoding)? as u64;
let mut s = encode_string(encoding, &message, true)?;
s.push(0); // null-terminated
let len = s.len();
if len > 255 {
}
for operand in &func.operands {
if let Operand::S(s) = operand {
if self.filter_ascii && s.chars().all(|c| c.is_ascii()) {
continue;
}
let m = match mes {
Some(m) => m,
None => {
return Err(anyhow::anyhow!(
"Message too long to import (max 255 bytes): {}",
message
"Not enough messages to import. Missing message: {}",
s
));
}
};
let mut message = m.message.clone();
if let Some(table) = replacement {
for (k, v) in &table.map {
message = message.replace(k, v);
}
}
patcher.copy_up_to(cur_pos)?;
let ori_len = operand.len(self.encoding)? as u64;
let mut s = encode_string(encoding, &message, true)?;
s.push(0); // null-terminated
let len = s.len();
if len > 255 {
return Err(anyhow::anyhow!(
"Message too long to import in functions section (max 255 bytes): {}",
message
));
}
patcher.replace_bytes_with_write(ori_len, |writer| {
writer.write_u8(len as u8)?;
writer.write_all(&s)?;
Ok(())
})?;
mes = mess.next();
}
cur_pos += operand.len(self.encoding)? as u64;
}
func_index += 1;
}
func_index = 0;
let func_len = self.data.main_script.len();
'outer: while func_index < func_len {
let func = &self.data.main_script[func_index];
let mut cur_pos = func.pos + 1;
if matches!(func.opcode, 0x02 | 0x06 | 0x07) {
need_pacth_addresses.push(cur_pos);
}
if func.opcode == 0x03 {
let syscall_id = if let Some(Operand::W(id)) = func.operands.get(0) {
*id
} else {
anyhow::bail!("Invalid syscall operand at function index {}", func_index);
};
if syscall_id == thread_start_callid {
if func_index == 0 {
anyhow::bail!("ThreadStart syscall cannot be at function index 0");
}
let pre_func = &self.data.main_script[func_index - 1];
if pre_func.opcode == 0x0a {
need_pacth_addresses.push(pre_func.pos + 1);
}
}
}
for operand in &func.operands {
if let Operand::S(s) = operand {
if self.filter_ascii && s.chars().all(|c| c.is_ascii()) {
continue;
}
let m = match mes {
Some(m) => m,
None => {
return Err(anyhow::anyhow!(
"Not enough messages to import. Missing message: {}",
s
));
}
};
let mut message = m.message.clone();
if let Some(table) = replacement {
for (k, v) in &table.map {
message = message.replace(k, v);
}
}
mes = mess.next();
patcher.copy_up_to(cur_pos)?;
let ori_len = operand.len(self.encoding)? as u64;
let mut s = encode_string(encoding, &message, true)?;
s.push(0); // null-terminated
let len = s.len();
if len > 255 {
if func.opcode != 0x0e {
anyhow::bail!(
"Message too long to import in main script functions section (max 255 bytes): {}",
message
);
}
let cur = message.as_str();
let (mut s, mut remaining) =
truncate_string_with_enter(cur, 254, encoding)?;
s.push(0); // null-terminated
let len = s.len();
patcher.replace_bytes_with_write(ori_len, |writer| {
writer.write_u8(len as u8)?;
writer.write_all(&s)?;
Ok(())
})?;
mes = mess.next();
let mut new_funcs = Vec::new();
func_index += 1;
loop {
let toper = &self.data.main_script[func_index];
new_funcs.push(toper.clone());
func_index += 1;
if matches!(toper.opcode, 0x02 | 0x06 | 0x07) {
need_pacth_addresses.push(toper.pos + 1);
}
if toper.opcode == 0x03 {
let syscall_id = if let Some(Operand::W(id)) = toper.operands.get(0)
{
*id
} else {
anyhow::bail!(
"Invalid syscall operand at function index {}",
func_index
);
};
if syscall_id == thread_start_callid {
if func_index == 0 {
anyhow::bail!(
"ThreadStart syscall cannot be at function index 0"
);
}
let pre_func = &self.data.main_script[func_index - 1];
if pre_func.opcode == 0x0a {
need_pacth_addresses.push(pre_func.pos + 1);
}
}
}
// Copy until the next call opcode
if toper.opcode == 0x02 {
break;
}
}
cur_pos = self.data.main_script[func_index].pos;
patcher.copy_up_to(cur_pos)?;
let mut mem = MemWriter::new();
while let Some(remain) = remaining {
let (mut s, rem) = truncate_string_with_enter(remain, 254, encoding)?;
s.push(0); // null-terminated
let len = s.len();
remaining = rem;
mem.write_u8(0x0e)?; // pushstring
mem.write_u8(len as u8)?;
mem.write_all(&s)?;
let mut tindex = 0;
let tlen = new_funcs.len();
while tindex < tlen {
let toper = &new_funcs[tindex];
mem.write_u8(toper.opcode)?;
if matches!(toper.opcode, 0x02 | 0x06 | 0x07) {
let addr_pos = mem.pos;
let base_pos = patcher.output.stream_position()?;
let addr = base_pos + addr_pos as u64;
let data = toper
.operands
.iter()
.find_map(|operand| {
if let Operand::D(v) = operand {
Some(*v)
} else {
None
}
})
.ok_or(anyhow::anyhow!(
"Unexpected operand type in function re-write."
))?;
new_need_patch_addresses.push((addr, data));
}
if toper.opcode == 0x03 {
let syscall_id =
if let Some(Operand::W(id)) = toper.operands.get(0) {
*id
} else {
anyhow::bail!(
"Invalid syscall operand at function index {}",
func_index
);
};
if syscall_id == thread_start_callid {
if tindex == 0 {
anyhow::bail!(
"ThreadStart syscall cannot be at function index 0"
);
}
let pre_func = &new_funcs[tindex - 1];
if pre_func.opcode == 0x0a {
let addr_pos = mem.pos - 5; // 1 for opcode, 4 for operand
let base_pos = patcher.output.stream_position()?;
let addr = base_pos + addr_pos as u64;
let data = pre_func
.operands
.get(0)
.and_then(|operand| {
if let Operand::D(v) = operand {
Some(*v)
} else {
None
}
})
.ok_or(anyhow::anyhow!(
"Unexpected operand type in function re-write."
))?;
new_need_patch_addresses.push((addr, data));
}
}
}
for operand in &toper.operands {
match operand {
Operand::B(v) => mem.write_u8(*v)?,
Operand::W(v) => mem.write_u16(*v)?,
Operand::D(v) => mem.write_u32(*v)?,
Operand::F(v) => mem.write_f32(*v)?,
_ => {
return Err(anyhow::anyhow!(
"Unexpected operand type in function re-write."
));
}
}
}
tindex += 1;
}
}
let new_data = mem.into_inner();
patcher.replace_bytes(0, &new_data)?;
continue 'outer;
}
cur_pos += operand.len(self.encoding)? as u64;
patcher.replace_bytes_with_write(ori_len, |writer| {
writer.write_u8(len as u8)?;
writer.write_all(&s)?;
Ok(())
})?;
}
cur_pos += operand.len(self.encoding)? as u64;
}
func_index += 1;
}
patcher.copy_up_to(self.reader.data.len() as u64)?;
for addr in need_pacth_addresses {
patcher.patch_u32_address(addr)?;
}
for (addr, data) in new_need_patch_addresses {
let new_data = patcher.map_offset(data as u64)? as u32;
patcher.output.write_u32_at(addr, new_data)?;
}
let script_len = self.reader.cpeek_u32_at(0)? as u64;
let new_script_len = patcher.map_offset(script_len)?;
patcher.patch_u32(0, new_script_len as u32)?;
// fix main script data position
patcher.patch_u32_address(script_len)?;
Ok(())
}

View File

@@ -18,3 +18,50 @@ pub fn truncate_string(s: &str, length: usize, encoding: Encoding, check: bool)
}
return Ok(result);
}
/// Truncate a string to a specified length, encoding it with the given encoding.
/// Output size may less than or equal to the specified length.
/// Returns the encoded bytes and the remaining string.
pub fn truncate_string2(
s: &str,
length: usize,
encoding: Encoding,
) -> Result<(Vec<u8>, Option<&str>)> {
let vec: Vec<_> = UnicodeSegmentation::graphemes(s, true).collect();
let mut result = Vec::new();
let mut used = 0;
for graphemes in vec {
let data = encode_string(encoding, graphemes, false)?;
if result.len() + data.len() > length {
break;
}
result.extend(data);
used += graphemes.len();
}
let remaining = if used < s.len() {
Some(&s[used..])
} else {
None
};
return Ok((result, remaining));
}
/// Truncate a string to a specified length, encoding it with the given encoding.
/// Output size may less than or equal to the specified length.
/// Returns the encoded bytes and the remaining string.
/// Will try splitting at line breaks first.
pub fn truncate_string_with_enter(
s: &str,
length: usize,
encoding: Encoding,
) -> Result<(Vec<u8>, Option<&str>)> {
if let Some(pos) = s.find('\n') {
let (first, rest) = s.split_at(pos + 1);
// Try encoding the first part with line break
let data = encode_string(encoding, &first[..pos], false)?;
if data.len() <= length {
return Ok((data, if rest.is_empty() { None } else { Some(rest) }));
}
}
truncate_string2(s, length, encoding)
}