From 1657b3e0e6ebfce14ccbcdfffa09e8c0c784fdf3 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Sat, 9 Aug 2025 11:20:02 +0800 Subject: [PATCH] Fix BGI script parse (test game: Tayutama --- Cargo.toml | 2 +- src/scripts/bgi/parser.rs | 81 +++++++++++++++++-- src/scripts/bgi/script.rs | 166 +++++++++++++++++++++++++++++++++++--- 3 files changed, 234 insertions(+), 15 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 88a2641..700a30a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,7 +42,7 @@ all-arc = ["artemis-arc", "bgi-arc", "cat-system-arc", "circus-arc", "escude-arc all-audio = ["circus-audio"] artemis = ["utils-escape"] artemis-arc = ["artemis", "msg_tool_macro/artemis-arc", "sha1"] -bgi = [] +bgi = ["fancy-regex"] bgi-arc = ["bgi", "rand", "utils-bit-stream"] bgi-img = ["bgi", "image", "utils-bit-stream"] cat-system = ["fancy-regex", "flate2", "int-enum"] diff --git a/src/scripts/bgi/parser.rs b/src/scripts/bgi/parser.rs index 05acf2f..925b7a0 100644 --- a/src/scripts/bgi/parser.rs +++ b/src/scripts/bgi/parser.rs @@ -212,6 +212,9 @@ pub enum BGIStringType { Name, Message, Internal, + /// For v1 instructions. + /// Only old BGI scripts have this type. (Scripts that does not have a magic) + Ruby, } #[derive(Debug, Clone)] @@ -366,13 +369,14 @@ pub struct V1Parser<'a> { impl<'a> V1Parser<'a> { pub fn new(mut buf: MemReaderRef<'a>, encoding: Encoding) -> Result { - if !buf.data.starts_with(b"BurikoCompiledScriptVer1.00\0") { - return Err(anyhow::anyhow!("Invalid BGI script")); - } if buf.data.len() < 32 { return Err(anyhow::anyhow!("Buffer too small")); } - let offset = 28 + buf.peek_u32_at(28)? as u64; + let offset = if buf.data.starts_with(b"BurikoCompiledScriptVer1.00\0") { + 28 + buf.peek_u32_at(28)? as u64 + } else { + 0 + }; buf.seek(SeekFrom::Start(offset))?; Ok(V1Parser { buf, @@ -475,6 +479,69 @@ impl<'a> V1Parser<'a> { Ok(()) } + pub fn handle_ruby(&mut self) -> Result<()> { + let dest = self + .stacks + .pop() + .ok_or(anyhow::anyhow!("Stack underflow"))?; + let ori = self + .stacks + .pop() + .ok_or(anyhow::anyhow!("Stack underflow"))?; + self.strings.push(BGIString { + offset: ori.offset, + address: ori.value, + typ: BGIStringType::Ruby, + }); + self.strings.push(BGIString { + offset: dest.offset, + address: dest.value, + typ: BGIStringType::Ruby, + }); + Ok(()) + } + + pub fn handle_message_old(&mut self) -> Result<()> { + let item = self + .stacks + .pop() + .ok_or(anyhow::anyhow!("Stack underflow"))?; + match self.stacks.pop() { + Some(stack) => { + self.strings.push(BGIString { + offset: item.offset, + address: item.value, + typ: if self.is_empty_string(item.value)? { + BGIStringType::Internal + } else { + BGIStringType::Name + }, + }); + self.strings.push(BGIString { + offset: stack.offset, + address: stack.value, + typ: if self.is_empty_string(stack.value)? { + BGIStringType::Internal + } else { + BGIStringType::Message + }, + }); + return Ok(()); + } + None => {} + } + self.strings.push(BGIString { + offset: item.offset, + address: item.value, + typ: if self.is_empty_string(item.value)? { + BGIStringType::Internal + } else { + BGIStringType::Message + }, + }); + Ok(()) + } + pub fn handle_message(&mut self) -> Result<()> { let item = self .stacks @@ -539,13 +606,17 @@ impl<'a> V1Parser<'a> { self.handle_choice_screen()?; } else if let Some(templ) = V1_INSTS_MAP.get(&opcode) { self.read_opers(templ)?; + } else if opcode == 0x0145 { + self.handle_message_old()?; + } else if opcode == 0x014e { + self.handle_ruby()?; } if (opcode == 0x001b || opcode == 0x00f4) && self.largest_code_address_pperand_encountered < self.buf.pos - self.offset { break; } - if opcode == 0x007e || opcode == 0x007f || opcode == 0x00fe { + if opcode == 0x007e || opcode == 0x007f || opcode == 0x00fe || opcode == 0x01b5 { self.output_internal_strings(); } } diff --git a/src/scripts/bgi/script.rs b/src/scripts/bgi/script.rs index 910de52..0b844e4 100644 --- a/src/scripts/bgi/script.rs +++ b/src/scripts/bgi/script.rs @@ -4,7 +4,9 @@ use crate::scripts::base::*; use crate::types::*; use crate::utils::encoding::{decode_to_string, encode_string}; use anyhow::Result; -use std::collections::HashMap; +use fancy_regex::Regex; +use lazy_static::lazy_static; +use std::collections::{BTreeMap, HashMap}; #[derive(Debug)] pub struct BGIScriptBuilder {} @@ -57,6 +59,7 @@ pub struct BGIScript { encoding: Encoding, strings: Vec, is_v1: bool, + is_v1_instr: bool, offset: usize, import_duplicate: bool, append: bool, @@ -83,19 +86,31 @@ impl BGIScript { encoding, strings, is_v1: true, + is_v1_instr: true, offset, import_duplicate: config.bgi_import_duplicate, append: !config.bgi_disable_append, }) } else { - let mut parser = V0Parser::new(data.to_ref()); - parser.disassemble()?; - let strings = parser.strings.clone(); + let mut is_v1_instr = false; + let strings = { + let mut parser = V0Parser::new(data.to_ref()); + match parser.disassemble() { + Ok(_) => parser.strings, + Err(_) => { + let mut parser = V1Parser::new(data.to_ref(), encoding)?; + parser.disassemble()?; + is_v1_instr = true; + parser.strings + } + } + }; Ok(Self { data, encoding, strings, is_v1: false, + is_v1_instr, offset: 0, import_duplicate: config.bgi_import_duplicate, append: !config.bgi_disable_append, @@ -110,6 +125,25 @@ impl BGIScript { let string = decode_to_string(self.encoding, string_data.as_bytes(), false)?; Ok(string) } + + fn output_with_ruby(str: &mut String, ruby: &mut Vec) -> Result<()> { + if ruby.is_empty() { + return Ok(()); + } + if ruby.len() % 2 != 0 { + return Err(anyhow::anyhow!("Ruby strings count is not even.")); + } + for i in (0..ruby.len()).step_by(2) { + let ruby_str = &ruby[i]; + let ruby_text = &ruby[i + 1]; + if ruby_str.is_empty() || ruby_text.is_empty() { + continue; + } + *str = str.replace(ruby_str, &format!("{ruby_str}")); + } + ruby.clear(); + Ok(()) + } } impl Script for BGIScript { @@ -118,7 +152,7 @@ impl Script for BGIScript { } fn default_format_type(&self) -> FormatOptions { - if self.is_v1 { + if self.is_v1_instr { FormatOptions::None } else { FormatOptions::Fixed { @@ -131,18 +165,26 @@ impl Script for BGIScript { fn extract_messages(&self) -> Result> { let mut messages = Vec::new(); let mut name = None; + let mut ruby = Vec::new(); for bgi_string in &self.strings { match bgi_string.typ { BGIStringType::Name => { name = Some(self.read_string(bgi_string.address)?); } BGIStringType::Message => { - let message = self.read_string(bgi_string.address)?; + let mut message = self.read_string(bgi_string.address)?; + if !ruby.is_empty() { + Self::output_with_ruby(&mut message, &mut ruby)?; + } messages.push(Message { name: name.take(), message: message, }); } + BGIStringType::Ruby => { + let ruby_str = self.read_string(bgi_string.address)?; + ruby.push(ruby_str); + } _ => {} } } @@ -151,7 +193,7 @@ impl Script for BGIScript { fn import_messages<'a>( &'a self, - messages: Vec, + mut messages: Vec, mut file: Box, encoding: Encoding, replacement: Option<&'a ReplacementTable>, @@ -159,10 +201,12 @@ impl Script for BGIScript { if !self.import_duplicate { let mut used = HashMap::new(); let mut extra = HashMap::new(); - let mut mes = messages.iter(); + let mut mes = messages.iter_mut(); let mut cur_mes = mes.next(); let mut old_offset = 0; let mut new_offset = 0; + let mut rubys = Vec::new(); + let mut parsed_ruby = false; if self.append { file.write_all(&self.data.data)?; new_offset = self.data.data.len(); @@ -180,6 +224,31 @@ impl Script for BGIScript { } let nmes = match curs.typ { BGIStringType::Internal => self.read_string(curs.address)?, + BGIStringType::Ruby => { + if !self.is_v1 && self.is_v1_instr { + if rubys.is_empty() { + if parsed_ruby { + String::from("<") + } else { + rubys = match &mut cur_mes { + Some(m) => parse_ruby_from_text(&mut m.message)?, + None => return Err(anyhow::anyhow!("No enough messages.")), + }; + parsed_ruby = true; + if rubys.is_empty() { + String::from("<") + } else { + let ruby_str = rubys.remove(0); + ruby_str + } + } + } else { + rubys.remove(0) + } + } else { + self.read_string(curs.address)? + } + } BGIStringType::Name => match &cur_mes { Some(m) => { if let Some(name) = &m.name { @@ -197,6 +266,12 @@ impl Script for BGIScript { None => return Err(anyhow::anyhow!("No enough messages.")), }, BGIStringType::Message => { + if !rubys.is_empty() { + eprintln!("Warning: Some ruby strings are unused: {:?}", rubys); + crate::COUNTER.inc_warning(); + rubys.clear(); + } + parsed_ruby = false; let mes = match &cur_mes { Some(m) => { let mut message = m.message.clone(); @@ -271,13 +346,15 @@ impl Script for BGIScript { } return Ok(()); } - let mut mes = messages.iter(); + let mut mes = messages.iter_mut(); let mut cur_mes = None; let mut strs = self.strings.iter(); let mut nstrs = Vec::new(); let mut cur_str = strs.next(); let mut old_offset = 0; let mut new_offset = 0; + let mut rubys = Vec::new(); + let mut parsed_ruby = false; if self.append { file.write_all(&self.data.data)?; new_offset = self.data.data.len(); @@ -301,6 +378,31 @@ impl Script for BGIScript { .len(); let nmes = match curs.typ { BGIStringType::Internal => self.read_string(curs.address)?, + BGIStringType::Ruby => { + if !self.is_v1 && self.is_v1_instr { + if rubys.is_empty() { + if parsed_ruby { + String::from("<") + } else { + rubys = match &mut cur_mes { + Some(m) => parse_ruby_from_text(&mut m.message)?, + None => return Err(anyhow::anyhow!("No enough messages.")), + }; + parsed_ruby = true; + if rubys.is_empty() { + String::from("<") + } else { + let ruby_str = rubys.remove(0); + ruby_str + } + } + } else { + rubys.remove(0) + } + } else { + self.read_string(curs.address)? + } + } BGIStringType::Name => match &cur_mes { Some(m) => { if let Some(name) = &m.name { @@ -318,6 +420,12 @@ impl Script for BGIScript { None => return Err(anyhow::anyhow!("No enough messages.")), }, BGIStringType::Message => { + if !rubys.is_empty() { + eprintln!("Warning: Some ruby strings are unused: {:?}", rubys); + crate::COUNTER.inc_warning(); + rubys.clear(); + } + parsed_ruby = false; let mes = match &cur_mes { Some(m) => { let mut message = m.message.clone(); @@ -360,3 +468,43 @@ impl Script for BGIScript { Ok(()) } } + +lazy_static! { + static ref RUBY_REGEX: Regex = Regex::new(r"]+)>([^<]+)").unwrap(); +} + +fn parse_ruby_from_text(text: &mut String) -> Result> { + let mut map = BTreeMap::new(); + for i in RUBY_REGEX.captures_iter(&text) { + let i = i?; + let ruby_text = i.get(1).map_or("", |m| m.as_str()); + let ruby_str = i.get(2).map_or("", |m| m.as_str()); + if !ruby_text.is_empty() && !ruby_str.is_empty() { + map.insert(ruby_str.to_owned(), ruby_text.to_owned()); + } + } + let mut result = Vec::new(); + for (ruby_str, ruby_text) in map { + *text = text.replace(&format!("{ruby_str}"), &ruby_str); + result.push(ruby_str); + result.push(ruby_text); + } + Ok(result) +} + +#[test] +fn test_parse_ruby_from_text() { + let mut text = + String::from("This is a test RubyString and AnotherRuby."); + let ruby = parse_ruby_from_text(&mut text).unwrap(); + assert_eq!(text, "This is a test RubyString and AnotherRuby."); + assert_eq!( + ruby, + vec![ + "AnotherRuby".to_string(), + "AnotherText".to_string(), + "RubyString".to_string(), + "RubyText".to_string() + ] + ); +}