diff --git a/src/scripts/bgi/mod.rs b/src/scripts/bgi/mod.rs new file mode 100644 index 0000000..5b504d4 --- /dev/null +++ b/src/scripts/bgi/mod.rs @@ -0,0 +1,2 @@ +mod parser; +pub mod script; diff --git a/src/scripts/bgi/parser.rs b/src/scripts/bgi/parser.rs new file mode 100644 index 0000000..a913373 --- /dev/null +++ b/src/scripts/bgi/parser.rs @@ -0,0 +1,651 @@ +use crate::types::*; +use crate::utils::encoding::decode_to_string; +use anyhow::Result; +use std::collections::HashMap; + +#[allow(unused)] +pub enum Inst { + /// short + H, + /// int + I, + /// code offset + C, + /// message offset + M, + /// name offset + N, + /// string offset + Z, +} + +use Inst::*; + +const V0_INSTS: [(u16, &'static [Inst]); 160] = [ + (0x0010, &[I, I, M]), + (0x0012, &[Z, Z]), + (0x0013, &[Z]), + (0x0014, &[Z]), + (0x0018, &[I, I, I, I, I]), + (0x0019, &[I, I, I, I]), // untested + (0x001A, &[I, I, I]), + (0x001B, &[Z, I, I, I]), // untested + (0x001F, &[I]), // untested + (0x0022, &[I]), // untested + (0x0024, &[I, I, I, I, I]), + (0x0025, &[I, I]), + (0x0028, &[Z, I]), + (0x0029, &[Z, Z, I]), + (0x002A, &[I]), + (0x002B, &[Z, I]), + (0x002C, &[Z, I, I, I, I, I, I, I, I]), + (0x002D, &[Z, I, I, I, I, I, I, I, I]), + (0x002E, &[I, I, I, I, I]), + (0x0030, &[Z, I]), // untested + (0x0031, &[Z, I, I]), // untested + (0x0032, &[I]), // untested + (0x0033, &[I]), // untested + (0x0034, &[I, I]), + (0x0035, &[I]), + (0x0036, &[I]), + (0x0038, &[I, Z, I, I, I, I, I]), + (0x0039, &[I, I]), + (0x003A, &[I, Z, I, I, I, I, I, I, I, I]), // untested + (0x003B, &[I, I, I, I, I, I]), + (0x003C, &[I, I, I, I, I, I, I, I, I, I]), + (0x003D, &[I, I, I, I, I, I, I, I, I, I, I]), + (0x003F, &[I]), + (0x0040, &[I, I, Z, I, I]), + (0x0041, &[I, I, Z, I, I]), + (0x0042, &[I, I, Z, I]), + (0x0043, &[I, I, Z, I]), + (0x0044, &[I, I, Z, I]), + (0x0045, &[I, I, Z, I]), + (0x0046, &[I, Z, I]), + (0x0047, &[I, Z, I]), + (0x0048, &[I, I]), + (0x0049, &[I, I]), + (0x004A, &[I, Z, I]), + (0x004C, &[Z, I]), // untested + (0x004D, &[Z, I]), // untested + (0x004E, &[I]), // untested + (0x004F, &[I]), // untested + (0x0050, &[Z, I]), + (0x0051, &[Z, Z, I]), + (0x0052, &[I]), + (0x0053, &[Z, I]), + (0x0054, &[Z, I, I]), + (0x0060, &[I, I, I, I, I]), + (0x0061, &[I, I]), // untested + (0x0062, &[I, I, I, I, I, I]), + (0x0065, &[I]), + (0x0066, &[I, I]), + (0x0067, &[I]), + (0x0068, &[I]), + (0x0069, &[I]), + (0x006A, &[I]), + (0x006B, &[I]), // untested + (0x006C, &[I]), // untested + (0x006E, &[I, I, I]), + (0x006F, &[I]), + (0x0070, &[I, Z, I]), + (0x0071, &[I]), + (0x0072, &[I, I, I]), + (0x0073, &[I, I, I]), + (0x0074, &[I, Z, I]), + (0x0075, &[I]), + (0x0076, &[I, I, I]), + (0x0078, &[I, Z, I]), // untested + (0x0079, &[I]), // untested + (0x007A, &[I, I, I]), // untested + (0x0080, &[I, Z, I, I]), + (0x0081, &[Z]), + (0x0082, &[I]), + (0x0083, &[I]), + (0x0084, &[I, Z, I]), // untested + (0x0085, &[Z]), + (0x0086, &[I]), + (0x0087, &[I]), + (0x0088, &[Z]), + (0x008C, &[I]), + (0x008D, &[I]), // untested + (0x008E, &[I]), // untested + (0x0090, &[I]), // untested + (0x0091, &[I]), // untested + (0x0092, &[I]), + (0x0093, &[I]), + (0x0094, &[I]), + (0x0098, &[I, I]), + (0x0099, &[I, I]), + (0x009A, &[I, I]), // untested + (0x009B, &[I, I]), // untested + (0x009C, &[I, I]), // untested + (0x009D, &[I, I]), // untested + (0x00A0, &[C]), + (0x00A1, &[I, C]), // untested + (0x00A2, &[I, C]), // untested + (0x00A3, &[I, I, C]), // untested + (0x00A4, &[I, I, C]), + (0x00A5, &[I, I, C]), + (0x00A6, &[I, I, C]), + (0x00A7, &[I, I, C]), // untested + (0x00A8, &[I, I, C]), + (0x00AC, &[C]), // untested + (0x00AE, &[I]), + (0x00C0, &[Z]), + (0x00C1, &[Z]), + (0x00C4, &[I]), + (0x00C8, &[Z]), + (0x00CA, &[I]), // untested + (0x00D4, &[I]), // untested + (0x00D8, &[I]), + (0x00D9, &[I]), + (0x00DA, &[I]), + (0x00DB, &[I]), + (0x00DC, &[I]), + (0x00F8, &[Z]), // untested + (0x00F9, &[Z, I]), // untested + (0x00FE, &[H]), + (0x0110, &[Z, Z]), + (0x0111, &[I]), + (0x0120, &[I]), + (0x0121, &[I]), + (0x0128, &[Z, I, I]), + (0x012A, &[I, I]), + (0x0134, &[I, I]), // untested + (0x0135, &[I]), // untested + (0x0136, &[I]), // untested + (0x0138, &[I, Z, I, I, I, I, Z, I, I, I]), + (0x013B, &[I, I, I, I, I, I, I, I]), + (0x0140, &[I, I, Z, I, I, I, I]), // untested + (0x0141, &[I, I, Z, I, I, I, I]), // untested + (0x0142, &[I, I, Z, I, I, I]), // untested + (0x0143, &[I, I, Z, I, I, I]), // untested + (0x0144, &[I, I, Z, I, I, I]), // untested + (0x0145, &[I, I, Z, I, I, I]), // untested + (0x0146, &[I, Z, I, I, I]), // untested + (0x0147, &[I, Z, I, I, I]), // untested + (0x0148, &[I, I]), + (0x0149, &[I, I]), + (0x014B, &[Z, I, I, Z]), + (0x0150, &[Z, I, I]), + (0x0151, &[Z, I, I, I]), // untested + (0x0152, &[I, I]), + (0x0153, &[I, I, I]), // untested + (0x016E, &[I, I, I, I, I, I]), + (0x016F, &[I, I, I, I, I, I, I]), // untested + (0x0170, &[I, Z, Z, I, I]), + (0x01C0, &[Z, Z]), + (0x01C1, &[Z, Z]), // untested + (0x0249, &[Z]), // untested + (0x024C, &[Z, Z, I, I, I]), // untested + (0x024D, &[Z]), // untested + (0x024E, &[Z, Z]), // untested + (0x024F, &[Z]), // untested +]; + +const V1_INSTS: [(u32, &'static [Inst]); 12] = [ + (0x0000, &[I]), + (0x0001, &[C]), + (0x0002, &[I]), + // (0x0003, &[M]), + (0x0008, &[I]), + (0x0009, &[I]), + (0x000A, &[I]), + (0x0017, &[I]), + (0x0019, &[I]), + (0x003F, &[I]), + (0x007B, &[I, I, I]), + (0x007E, &[I]), + (0x007F, &[I, I]), +]; + +lazy_static::lazy_static! { + pub static ref V0_INSTS_MAP: HashMap = HashMap::from(V0_INSTS); + pub static ref V1_INSTS_MAP: HashMap = HashMap::from(V1_INSTS); +} + +#[derive(Debug, Clone)] +pub enum BGIStringType { + Name, + Message, + Internal, +} + +#[derive(Debug, Clone)] +pub struct BGIString { + pub offset: usize, + pub address: usize, + pub typ: BGIStringType, +} + +pub struct V0Parser<'a> { + buf: &'a [u8], + pos: usize, + largest_code_address_pperand_encountered: usize, + pub strings: Vec, +} + +impl<'a> V0Parser<'a> { + pub fn new(buf: &'a [u8]) -> Self { + V0Parser { + buf, + pos: 0, + largest_code_address_pperand_encountered: 0, + strings: Vec::new(), + } + } + + fn read_u16(&mut self) -> Result { + if self.pos + 2 > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + let val = u16::from_le_bytes(self.buf[self.pos..self.pos + 2].try_into()?); + self.pos += 2; + Ok(val) + } + + fn read_u32(&mut self) -> Result { + if self.pos + 4 > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + let val = u32::from_le_bytes(self.buf[self.pos..self.pos + 4].try_into()?); + self.pos += 4; + Ok(val) + } + + fn read_i16(&mut self) -> Result { + if self.pos + 2 > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + let val = i16::from_le_bytes(self.buf[self.pos..self.pos + 2].try_into()?); + self.pos += 2; + Ok(val) + } + + fn read_i32(&mut self) -> Result { + if self.pos + 4 > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + let val = i32::from_le_bytes(self.buf[self.pos..self.pos + 4].try_into()?); + self.pos += 4; + Ok(val) + } + + fn read_code_address(&mut self) -> Result<()> { + let address = self.read_u32()?; + self.largest_code_address_pperand_encountered = std::cmp::max( + self.largest_code_address_pperand_encountered, + address as usize, + ); + Ok(()) + } + + fn read_string_address(&mut self, typ: BGIStringType) -> Result<()> { + let offset = self.pos; + let address = self.read_u32()? as usize; + self.strings.push(BGIString { + offset, + address, + typ, + }); + Ok(()) + } + + fn skip_inline_string(&mut self) -> Result<()> { + while self.buf[self.pos] != 0 { + self.pos += 1; + if self.pos > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + } + self.pos += 1; // skip null terminator + Ok(()) + } + + fn read_oper_00a9(&mut self) -> Result<()> { + let count = self.read_u32()?; + for _ in 0..count { + self.read_code_address()?; + } + Ok(()) + } + + fn read_oper_00b0(&mut self) -> Result<()> { + let count = self.read_u32()?; + for _ in 0..count { + self.skip_inline_string()?; + } + Ok(()) + } + + fn read_oper_00b4(&mut self) -> Result<()> { + // untested + let count = self.read_u32()?; + for _ in 0..count { + self.skip_inline_string()?; + } + Ok(()) + } + + fn read_oper_00fd(&mut self) -> Result<()> { + // untested + let count = self.read_u32()?; + for _ in 0..count { + self.skip_inline_string()?; + self.read_code_address()?; + } + Ok(()) + } + + fn read_opers(&mut self, templ: &'static [Inst]) -> Result<()> { + for t in templ.iter() { + match t { + H => { + self.read_i16()?; + } + I => { + self.read_i32()?; + } + C => { + self.read_code_address()?; + } + M => { + self.read_string_address(BGIStringType::Message)?; + } + Z => { + self.skip_inline_string()?; + } + N => { + self.read_string_address(BGIStringType::Name)?; + } + } + } + Ok(()) + } + + pub fn disassemble(&mut self) -> Result<()> { + loop { + let opcode = self.read_u16()?; + if opcode == 0x00a9 { + self.read_oper_00a9()?; + } else if opcode == 0x00b0 { + self.read_oper_00b0()?; + } else if opcode == 0x00b4 { + self.read_oper_00b4()?; + } else if opcode == 0x00fd { + self.read_oper_00fd()?; + } else if let Some(templ) = V0_INSTS_MAP.get(&opcode) { + self.read_opers(templ)?; + } + if opcode == 0x00c2 && self.largest_code_address_pperand_encountered < self.pos { + break; + } + } + Ok(()) + } +} + +struct StackItem { + pub offset: usize, + pub value: usize, +} + +pub struct V1Parser<'a> { + buf: &'a [u8], + pos: usize, + largest_code_address_pperand_encountered: usize, + stacks: Vec, + encoding: Encoding, + pub offset: usize, + pub strings: Vec, +} + +impl<'a> V1Parser<'a> { + pub fn new(buf: &'a [u8], encoding: Encoding) -> Result { + if !buf.starts_with(b"BurikoCompiledScriptVer1.00\0") { + return Err(anyhow::anyhow!("Invalid BGI script")); + } + if buf.len() < 32 { + return Err(anyhow::anyhow!("Buffer too small")); + } + let offset = 28 + u32::from_le_bytes(buf[28..32].try_into()?) as usize; + Ok(V1Parser { + buf, + pos: offset, + largest_code_address_pperand_encountered: 0, + stacks: Vec::new(), + encoding, + offset, + strings: Vec::new(), + }) + } + + fn read_u32(&mut self) -> Result { + if self.pos + 4 > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + let val = u32::from_le_bytes(self.buf[self.pos..self.pos + 4].try_into()?); + self.pos += 4; + Ok(val) + } + + fn read_i16(&mut self) -> Result { + if self.pos + 2 > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + let val = i16::from_le_bytes(self.buf[self.pos..self.pos + 2].try_into()?); + self.pos += 2; + Ok(val) + } + + fn read_i32(&mut self) -> Result { + if self.pos + 4 > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + let val = i32::from_le_bytes(self.buf[self.pos..self.pos + 4].try_into()?); + self.pos += 4; + Ok(val) + } + + fn read_code_address(&mut self) -> Result<()> { + let address = self.read_u32()?; + self.largest_code_address_pperand_encountered = std::cmp::max( + self.largest_code_address_pperand_encountered, + address as usize, + ); + Ok(()) + } + + fn read_string_address(&mut self, typ: BGIStringType) -> Result<()> { + let offset = self.pos; + let address = self.read_u32()? as usize; + self.strings.push(BGIString { + offset, + address, + typ, + }); + Ok(()) + } + + fn skip_inline_string(&mut self) -> Result<()> { + while self.buf[self.pos] != 0 { + self.pos += 1; + if self.pos > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + } + self.pos += 1; // skip null terminator + Ok(()) + } + + fn read_opers(&mut self, templ: &'static [Inst]) -> Result<()> { + for t in templ.iter() { + match t { + H => { + self.read_i16()?; + } + I => { + self.read_i32()?; + } + C => { + self.read_code_address()?; + } + M => { + self.read_string_address(BGIStringType::Message)?; + } + Z => { + self.skip_inline_string()?; + } + N => { + self.read_string_address(BGIStringType::Name)?; + } + } + } + Ok(()) + } + + fn read_push_string_address_operand(&mut self) -> Result<()> { + let offset = self.pos; + let address = self.read_u32()? as usize; + self.stacks.push(StackItem { + offset, + value: address, + }); + Ok(()) + } + + pub fn is_empty_string(&self, address: usize) -> Result { + let start = self.offset + address; + if start > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + Ok(self.buf[start] == 0) + } + + pub fn read_string_at_address(&mut self, address: usize) -> Result { + let start = self.offset + address; + let mut end = start; + if end > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + while self.buf[end] != 0 { + end += 1; + if end > self.buf.len() { + return Err(anyhow::anyhow!("Buffer overflow")); + } + } + let buf = &self.buf[start..end]; + Ok(decode_to_string(self.encoding, buf)?) + } + + pub fn handle_user_function_call(&mut self) -> Result<()> { + let item = match self.stacks.pop() { + Some(item) => item, + None => return Ok(()), + }; + self.strings.push(BGIString { + offset: item.offset, + address: item.value, + typ: BGIStringType::Internal, + }); + let funcname = self.read_string_at_address(item.value)?; + if funcname == "_SelectEx" || funcname == "_SelectExtend" { + self.handle_choice_screen()?; + } + Ok(()) + } + + pub fn handle_message(&mut self) -> Result<()> { + let item = self + .stacks + .pop() + .ok_or(anyhow::anyhow!("Stack underflow"))?; + match self.stacks.pop() { + Some(stack) => { + self.strings.push(BGIString { + offset: stack.offset, + address: stack.value, + typ: if self.is_empty_string(stack.value)? { + BGIStringType::Internal + } else { + BGIStringType::Name + }, + }); + } + None => {} + } + self.strings.push(BGIString { + offset: item.offset, + address: item.value, + typ: if self.is_empty_string(item.value)? { + BGIStringType::Internal + } else { + BGIStringType::Message + }, + }); + Ok(()) + } + + pub fn handle_choice_screen(&mut self) -> Result<()> { + let mut choices = Vec::new(); + loop { + match self.stacks.pop() { + Some(stack) => { + choices.insert(0, stack); + } + None => break, + } + } + for choice in choices { + self.strings.push(BGIString { + offset: choice.offset, + address: choice.value, + typ: BGIStringType::Message, + }); + } + Ok(()) + } + + pub fn disassemble(&mut self) -> Result<()> { + loop { + let opcode = self.read_u32()?; + if opcode == 0x0003 { + self.read_push_string_address_operand()?; + } else if opcode == 0x001c { + self.handle_user_function_call()?; + } else if opcode == 0x0140 || opcode == 0x0143 { + self.handle_message()?; + } else if opcode == 0x0160 { + self.handle_choice_screen()?; + } else if let Some(templ) = V1_INSTS_MAP.get(&opcode) { + self.read_opers(templ)?; + } + if (opcode == 0x001b || opcode == 0x00f4) + && self.largest_code_address_pperand_encountered < self.pos - self.offset + { + break; + } + if opcode == 0x007e || opcode == 0x007f || opcode == 0x00fe { + self.output_internal_strings(); + } + } + self.output_internal_strings(); + Ok(()) + } + + pub fn output_internal_strings(&mut self) { + loop { + match self.stacks.pop() { + Some(stack) => { + self.strings.push(BGIString { + offset: stack.offset, + address: stack.value, + typ: BGIStringType::Internal, + }); + } + None => break, + } + } + } +} diff --git a/src/scripts/bgi/script.rs b/src/scripts/bgi/script.rs new file mode 100644 index 0000000..42e66f4 --- /dev/null +++ b/src/scripts/bgi/script.rs @@ -0,0 +1,147 @@ +use super::parser::*; +use crate::scripts::base::*; +use crate::types::*; +use crate::utils::encoding::{decode_to_string, encode_string}; +use anyhow::Result; + +pub struct BGIScriptBuilder {} + +impl BGIScriptBuilder { + pub fn new() -> Self { + BGIScriptBuilder {} + } +} + +impl ScriptBuilder for BGIScriptBuilder { + fn default_encoding(&self) -> Encoding { + Encoding::Cp932 + } + + fn build_script( + &self, + filename: &str, + encoding: Encoding, + config: &ExtraConfig, + ) -> Result> { + Ok(Box::new(BGIScript::new( + filename.as_ref(), + encoding, + config, + )?)) + } + + fn extensions(&self) -> &'static [&'static str] { + &[] + } + + fn script_type(&self) -> &'static ScriptType { + &ScriptType::BGI + } +} + +pub struct BGIScript { + data: Vec, + encoding: Encoding, + strings: Vec, + is_v1: bool, + offset: usize, +} + +impl std::fmt::Debug for BGIScript { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("BGIScript") + .field("encoding", &self.encoding) + .finish_non_exhaustive() + } +} + +impl BGIScript { + pub fn new(filename: &str, encoding: Encoding, _config: &ExtraConfig) -> Result { + let data = crate::utils::files::read_file(filename)?; + if data.starts_with(b"BurikoCompiledScriptVer1.00\0") { + let mut parser = V1Parser::new(&data, encoding)?; + parser.disassemble()?; + let strings = parser.strings.clone(); + let offset = parser.offset; + Ok(Self { + data, + encoding, + strings, + is_v1: true, + offset, + }) + } else { + let mut parser = V0Parser::new(&data); + parser.disassemble()?; + let strings = parser.strings.clone(); + Ok(Self { + data, + encoding, + strings, + is_v1: false, + offset: 0, + }) + } + } + + fn read_string(&self, offset: usize) -> Result { + let start = self.offset + offset; + let mut end = start; + while self.data[end] != 0 { + end += 1; + if end >= self.data.len() { + return Err(anyhow::anyhow!("String not null-terminated")); + } + } + let string_data = &self.data[start..end]; + let string = decode_to_string(self.encoding, string_data)?; + Ok(string) + } +} + +impl Script for BGIScript { + fn default_output_script_type(&self) -> OutputScriptType { + OutputScriptType::Json + } + + fn default_format_type(&self) -> FormatOptions { + if self.is_v1 { + FormatOptions::None + } else { + FormatOptions::Fixed { + length: 32, + keep_original: false, + } + } + } + + fn extract_messages(&self) -> Result> { + let mut messages = Vec::new(); + let mut name = None; + for bgi_string in &self.strings { + match bgi_string.typ { + BGIStringType::Name => { + name = Some(self.read_string(bgi_string.address)?); + } + BGIStringType::Message => { + let message = self.read_string(bgi_string.address)?; + messages.push(Message { + name: name.take(), + message: message, + }); + } + _ => {} + } + } + Ok(messages) + } + + fn import_messages( + &self, + _messages: Vec, + _filename: &str, + _encoding: Encoding, + ) -> Result<()> { + Ok(()) + } +} diff --git a/src/scripts/mod.rs b/src/scripts/mod.rs index a6d5cbd..a08a780 100644 --- a/src/scripts/mod.rs +++ b/src/scripts/mod.rs @@ -1,4 +1,5 @@ pub mod base; +pub mod bgi; pub mod circus; pub use base::{Script, ScriptBuilder}; @@ -6,6 +7,7 @@ pub use base::{Script, ScriptBuilder}; lazy_static::lazy_static! { pub static ref BUILDER: Vec> = vec![ Box::new(circus::script::CircusMesScriptBuilder::new()), + Box::new(bgi::script::BGIScriptBuilder::new()), ]; pub static ref ALL_EXTS: Vec = BUILDER.iter().flat_map(|b| b.extensions()).map(|s| s.to_string()).collect(); diff --git a/src/types.rs b/src/types.rs index 2b42d44..8307751 100644 --- a/src/types.rs +++ b/src/types.rs @@ -181,6 +181,9 @@ pub struct ExtraConfig { pub enum ScriptType { /// Circus MES script Circus, + #[value(alias("ethornell"))] + /// Buriko General Interpreter/Ethornell Script + BGI, } #[derive(Debug, Serialize, Deserialize)]