use crate::ext::io::*; use crate::types::*; use crate::utils::encoding::*; use anyhow::Result; use serde::{Deserialize, Serialize}; use std::collections::{HashMap, HashSet}; use std::io::{Read, Seek, SeekFrom}; #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum Oper { // Byte B, // Word W, // Double Word D, // String S, // Float F, } use Oper::*; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(tag = "t", content = "c")] pub enum Operand { B(u8), W(u16), D(u32), S(String), F(f32), } impl Operand { pub fn len(&self, encoding: Encoding) -> Result { Ok(match self { Operand::B(_) => 1, Operand::W(_) => 2, Operand::D(_) => 4, Operand::S(s) => { let bytes = encode_string(encoding, s, true)?; // null terminator + length byte bytes.len() + 2 } Operand::F(_) => 4, }) } } const OPS: [(u8, &[Oper]); 49] = [ (0x00, &[]), //noop (0x01, &[B, B]), //initstack (0x02, &[D]), //call (0x03, &[W]), //syscall (0x04, &[]), //ret (0x05, &[]), //ret2 (0x06, &[D]), //jmp (0x07, &[D]), //jmpcond (0x08, &[]), //pushtrue (0x09, &[]), //pushfalse (0x0a, &[D]), //pushint (0x0b, &[W]), //pushint (0x0c, &[B]), //pushint (0x0d, &[F]), //pushfloat * unused (0x0e, &[S]), //pushstring (0x0f, &[W]), //pushglobal (0x10, &[B]), //pushstack (0x11, &[W]), //unknown (0x12, &[B]), //unknown (0x13, &[]), //pushtop (0x14, &[]), //pushtmp (0x15, &[W]), //popglobal (0x16, &[B]), //copystack (0x17, &[W]), //unknown (0x18, &[B]), //unknown (0x19, &[]), //neg (0x1a, &[]), //add (0x1b, &[]), //sub (0x1c, &[]), //mul (0x1d, &[]), //div (0x1e, &[]), //mod (0x1f, &[]), //test (0x20, &[]), //logand (0x21, &[]), //logor (0x22, &[]), //eq (0x23, &[]), //neq (0x24, &[]), //gt (0x25, &[]), //le (0x26, &[]), //lt (0x27, &[]), //ge (0x33, &[]), (0x3f, &[]), (0x40, &[]), (0xb3, &[]), (0xb8, &[]), (0xd8, &[]), (0xf0, &[]), (0x52, &[]), (0x9e, &[]), ]; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Func { pub pos: u64, pub opcode: u8, pub operands: Vec, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Data { pub functions: Vec, pub main_script: Vec, pub extra_data: Vec, #[serde(skip)] speak_func_indices: HashSet, #[serde(skip)] func_pos_map: HashMap, #[serde(skip)] speaker_names: HashMap>, pub sys_imports: Vec, } impl Data { pub fn disasm(mut reader: R, encoding: Encoding) -> Result { let mut data = Data { functions: Vec::new(), main_script: Vec::new(), extra_data: Vec::new(), speak_func_indices: HashSet::new(), func_pos_map: HashMap::new(), speaker_names: HashMap::new(), sys_imports: Vec::new(), }; let script_len = reader.read_u32()? as u64; let main_script_data = reader.peek_u32_at(script_len)? as u64; { let mut target = &mut data.functions; let mut pos = reader.stream_position()?; while pos < script_len { if pos >= main_script_data { target = &mut data.main_script; } target.push(Self::read_func(&mut reader, encoding)?); pos = reader.stream_position()?; } } reader.seek(SeekFrom::Start(script_len + 4))?; reader.read_to_end(&mut data.extra_data)?; let mut off = script_len + 10; let offset = reader.peek_u8_at(off)?; off += 1 + offset as u64; let sysimport_num = reader.peek_u16_at(off)?; off += 2; for _ in 0..sysimport_num { let s = reader.peek_cstring_at(off + 2)?; let s = decode_to_string(encoding, s.as_bytes(), true)?; data.sys_imports.push(s); off += 2 + reader.peek_u8_at(off + 1)? as u64; } data.index_functions(); data.find_speak_functions(); data.collect_speaker_names(); Ok(data) } fn index_functions(&mut self) { for (idx, func) in self.functions.iter().enumerate() { if func.opcode == 0x01 { self.func_pos_map.insert(func.pos, idx); } } } fn find_speak_functions(&mut self) { for (idx, func) in self.functions.iter().enumerate() { if func.opcode == 0x01 { // SPEAK functions have initstack with (3, 0) or (5, 0) parameters if let (Some(Operand::B(arg_count)), Some(Operand::B(0))) = (func.operands.first(), func.operands.get(1)) { if *arg_count == 3 || *arg_count == 5 { self.speak_func_indices.insert(idx as u32); } } } } } fn collect_speaker_names(&mut self) { let func_starts: Vec = self .functions .iter() .enumerate() .filter(|(_, f)| f.opcode == 0x01) .map(|(i, _)| i) .collect(); for &speak_idx in &self.speak_func_indices { let speak_idx = speak_idx as usize; let start_pos = func_starts.iter().position(|&s| s == speak_idx); if let Some(pos) = start_pos { let end = func_starts .get(pos + 1) .copied() .unwrap_or(self.functions.len()); let names: Vec = (speak_idx..end) .filter(|&i| self.functions[i].opcode == 0x0e) .filter_map(|i| match self.functions[i].operands.first() { Some(Operand::S(s)) if !s.trim().is_empty() => Some(s.clone()), _ => None, }) .collect(); if !names.is_empty() { self.speaker_names.insert(speak_idx, names); } } } } fn get_speaker(&self, func_idx: usize) -> Option { let names = self.speaker_names.get(&func_idx)?; // Prefer names without '?' prefix, take the last one (usually the "known" name) if let Some(name) = names.iter().filter(|n| !n.contains('?')).last() { return Some(name.trim().to_string()); } // If all names have '?', strip it from the last one names.last().and_then(|name| { let cleaned = name.trim().trim_start_matches('?').trim(); if !cleaned.is_empty() { Some(cleaned.to_string()) } else { None } }) } pub fn extract_messages(&self, filter_ascii: bool) -> Vec<(Option, String)> { let mut messages = Vec::new(); // Extract strings from functions section (no speakers) for func in &self.functions { if func.opcode == 0x0e { if let Some(Operand::S(s)) = func.operands.first() { if !(filter_ascii && s.chars().all(|c| c.is_ascii())) { messages.push((None, s.clone())); } } } } // Process main_script, track SPEAK calls for speaker names let mut current_speaker: Option = None; for func in &self.main_script { if func.opcode == 0x02 { if let Some(Operand::D(call_target)) = func.operands.first() { if let Some(&func_idx) = self.func_pos_map.get(&(*call_target as u64)) { if self.speak_func_indices.contains(&(func_idx as u32)) { current_speaker = self.get_speaker(func_idx); } } } } else if func.opcode == 0x0e { if let Some(Operand::S(s)) = func.operands.first() { if !(filter_ascii && s.chars().all(|c| c.is_ascii())) { messages.push((current_speaker.clone(), s.clone())); } } } } messages } fn read_func(reader: &mut R, encoding: Encoding) -> Result { let pos = reader.stream_position()?; let opcode = reader.read_u8()?; let operands = if let Some((_, ops)) = OPS.iter().find(|(code, _)| *code == opcode) { let mut operands = Vec::with_capacity(ops.len()); for &op in *ops { let operand = match op { B => Operand::B(reader.read_u8()?), W => Operand::W(reader.read_u16()?), D => Operand::D(reader.read_u32()?), S => { let len = reader.read_u8()? as usize; let s = reader.read_cstring()?; if s.as_bytes_with_nul().len() != len { return Err(anyhow::anyhow!( "String length mismatch at {:#x}: expected {}, got {}", pos, len, s.as_bytes_with_nul().len() )); } let s = decode_to_string(encoding, s.as_bytes(), true)?; Operand::S(s) } F => Operand::F(reader.read_f32()?), }; operands.push(operand); } operands } else { return Err(anyhow::anyhow!( "Unknown opcode: {:#x} at {:#x}", opcode, pos )); }; Ok(Func { pos, opcode, operands, }) } }