mirror of
https://github.com/lifegpc/msg-tool.git
synced 2026-06-07 21:38:58 +08:00
511 lines
19 KiB
Rust
511 lines
19 KiB
Rust
use super::parser::*;
|
|
use crate::ext::io::*;
|
|
use crate::scripts::base::*;
|
|
use crate::types::*;
|
|
use crate::utils::encoding::{decode_to_string, encode_string};
|
|
use anyhow::Result;
|
|
use fancy_regex::Regex;
|
|
use lazy_static::lazy_static;
|
|
use std::collections::{BTreeMap, HashMap};
|
|
|
|
#[derive(Debug)]
|
|
pub struct BGIScriptBuilder {}
|
|
|
|
impl BGIScriptBuilder {
|
|
pub fn new() -> Self {
|
|
BGIScriptBuilder {}
|
|
}
|
|
}
|
|
|
|
impl ScriptBuilder for BGIScriptBuilder {
|
|
fn default_encoding(&self) -> Encoding {
|
|
#[cfg(not(windows))]
|
|
return Encoding::Cp932;
|
|
#[cfg(windows)]
|
|
// Use Windows API first, because encoding-rs does not support PRIVATE USE AREA characters
|
|
return Encoding::CodePage(932);
|
|
}
|
|
|
|
fn build_script(
|
|
&self,
|
|
buf: Vec<u8>,
|
|
_filename: &str,
|
|
encoding: Encoding,
|
|
_archive_encoding: Encoding,
|
|
config: &ExtraConfig,
|
|
_archive: Option<&Box<dyn Script>>,
|
|
) -> Result<Box<dyn Script>> {
|
|
Ok(Box::new(BGIScript::new(buf, encoding, config)?))
|
|
}
|
|
|
|
fn extensions(&self) -> &'static [&'static str] {
|
|
&[]
|
|
}
|
|
|
|
fn script_type(&self) -> &'static ScriptType {
|
|
&ScriptType::BGI
|
|
}
|
|
|
|
fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
|
|
if buf_len > 28 && buf.starts_with(b"BurikoCompiledScriptVer1.00\0") {
|
|
return Some(255);
|
|
}
|
|
None
|
|
}
|
|
}
|
|
|
|
pub struct BGIScript {
|
|
data: MemReader,
|
|
encoding: Encoding,
|
|
strings: Vec<BGIString>,
|
|
is_v1: bool,
|
|
is_v1_instr: bool,
|
|
offset: usize,
|
|
import_duplicate: bool,
|
|
append: bool,
|
|
}
|
|
|
|
impl std::fmt::Debug for BGIScript {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
f.debug_struct("BGIScript")
|
|
.field("encoding", &self.encoding)
|
|
.finish_non_exhaustive()
|
|
}
|
|
}
|
|
|
|
impl BGIScript {
|
|
pub fn new(data: Vec<u8>, encoding: Encoding, config: &ExtraConfig) -> Result<Self> {
|
|
let data = MemReader::new(data);
|
|
if data.data.starts_with(b"BurikoCompiledScriptVer1.00\0") {
|
|
let mut parser = V1Parser::new(data.to_ref(), encoding)?;
|
|
parser.disassemble()?;
|
|
let strings = parser.strings.clone();
|
|
let offset = parser.offset;
|
|
Ok(Self {
|
|
data,
|
|
encoding,
|
|
strings,
|
|
is_v1: true,
|
|
is_v1_instr: true,
|
|
offset,
|
|
import_duplicate: config.bgi_import_duplicate,
|
|
append: !config.bgi_disable_append,
|
|
})
|
|
} else {
|
|
let mut is_v1_instr = false;
|
|
let strings = {
|
|
let mut parser = V0Parser::new(data.to_ref());
|
|
match parser.disassemble() {
|
|
Ok(_) => parser.strings,
|
|
Err(_) => {
|
|
let mut parser = V1Parser::new(data.to_ref(), encoding)?;
|
|
parser.disassemble()?;
|
|
is_v1_instr = true;
|
|
parser.strings
|
|
}
|
|
}
|
|
};
|
|
Ok(Self {
|
|
data,
|
|
encoding,
|
|
strings,
|
|
is_v1: false,
|
|
is_v1_instr,
|
|
offset: 0,
|
|
import_duplicate: config.bgi_import_duplicate,
|
|
append: !config.bgi_disable_append,
|
|
})
|
|
}
|
|
}
|
|
|
|
fn read_string(&self, offset: usize) -> Result<String> {
|
|
let start = self.offset + offset;
|
|
let string_data = self.data.cpeek_cstring_at(start)?;
|
|
// sometimes string has private use area characters, so we disable strict checking
|
|
let string = decode_to_string(self.encoding, string_data.as_bytes(), false)?;
|
|
Ok(string)
|
|
}
|
|
|
|
fn output_with_ruby(str: &mut String, ruby: &mut Vec<String>) -> Result<()> {
|
|
if ruby.is_empty() {
|
|
return Ok(());
|
|
}
|
|
if ruby.len() % 2 != 0 {
|
|
return Err(anyhow::anyhow!("Ruby strings count is not even."));
|
|
}
|
|
for i in (0..ruby.len()).step_by(2) {
|
|
let ruby_str = &ruby[i];
|
|
let ruby_text = &ruby[i + 1];
|
|
if ruby_str.is_empty() || ruby_text.is_empty() {
|
|
continue;
|
|
}
|
|
*str = str.replace(ruby_str, &format!("<r{ruby_text}>{ruby_str}</r>"));
|
|
}
|
|
ruby.clear();
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
impl Script for BGIScript {
|
|
fn default_output_script_type(&self) -> OutputScriptType {
|
|
OutputScriptType::Json
|
|
}
|
|
|
|
fn default_format_type(&self) -> FormatOptions {
|
|
if self.is_v1_instr {
|
|
FormatOptions::None
|
|
} else {
|
|
FormatOptions::Fixed {
|
|
length: 32,
|
|
keep_original: false,
|
|
}
|
|
}
|
|
}
|
|
|
|
fn extract_messages(&self) -> Result<Vec<Message>> {
|
|
let mut messages = Vec::new();
|
|
let mut name = None;
|
|
let mut ruby = Vec::new();
|
|
for bgi_string in &self.strings {
|
|
match bgi_string.typ {
|
|
BGIStringType::Name => {
|
|
name = Some(self.read_string(bgi_string.address)?);
|
|
}
|
|
BGIStringType::Message => {
|
|
let mut message = self.read_string(bgi_string.address)?;
|
|
if !ruby.is_empty() {
|
|
Self::output_with_ruby(&mut message, &mut ruby)?;
|
|
}
|
|
messages.push(Message {
|
|
name: name.take(),
|
|
message: message,
|
|
});
|
|
}
|
|
BGIStringType::Ruby => {
|
|
let ruby_str = self.read_string(bgi_string.address)?;
|
|
ruby.push(ruby_str);
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
Ok(messages)
|
|
}
|
|
|
|
fn import_messages<'a>(
|
|
&'a self,
|
|
mut messages: Vec<Message>,
|
|
mut file: Box<dyn WriteSeek + 'a>,
|
|
encoding: Encoding,
|
|
replacement: Option<&'a ReplacementTable>,
|
|
) -> Result<()> {
|
|
if !self.import_duplicate {
|
|
let mut used = HashMap::new();
|
|
let mut extra = HashMap::new();
|
|
let mut mes = messages.iter_mut();
|
|
let mut cur_mes = mes.next();
|
|
let mut old_offset = 0;
|
|
let mut new_offset = 0;
|
|
let mut rubys = Vec::new();
|
|
let mut parsed_ruby = false;
|
|
if self.append {
|
|
file.write_all(&self.data.data)?;
|
|
new_offset = self.data.data.len();
|
|
}
|
|
for curs in &self.strings {
|
|
if !curs.is_internal() {
|
|
if cur_mes.is_none() {
|
|
cur_mes = mes.next();
|
|
}
|
|
}
|
|
if used.contains_key(&curs.address) && curs.is_internal() {
|
|
let (_, new_address) = used.get(&curs.address).unwrap();
|
|
file.write_u32_at(curs.offset, *new_address as u32)?;
|
|
continue;
|
|
}
|
|
let nmes = match curs.typ {
|
|
BGIStringType::Internal => self.read_string(curs.address)?,
|
|
BGIStringType::Ruby => {
|
|
if !self.is_v1 && self.is_v1_instr {
|
|
if rubys.is_empty() {
|
|
if parsed_ruby {
|
|
String::from("<")
|
|
} else {
|
|
rubys = match &mut cur_mes {
|
|
Some(m) => parse_ruby_from_text(&mut m.message)?,
|
|
None => return Err(anyhow::anyhow!("No enough messages.")),
|
|
};
|
|
parsed_ruby = true;
|
|
if rubys.is_empty() {
|
|
String::from("<")
|
|
} else {
|
|
let ruby_str = rubys.remove(0);
|
|
ruby_str
|
|
}
|
|
}
|
|
} else {
|
|
rubys.remove(0)
|
|
}
|
|
} else {
|
|
self.read_string(curs.address)?
|
|
}
|
|
}
|
|
BGIStringType::Name => match &cur_mes {
|
|
Some(m) => {
|
|
if let Some(name) = &m.name {
|
|
let mut name = name.clone();
|
|
if let Some(replacement) = replacement {
|
|
for (key, value) in replacement.map.iter() {
|
|
name = name.replace(key, value);
|
|
}
|
|
}
|
|
name
|
|
} else {
|
|
return Err(anyhow::anyhow!("Name is missing for message."));
|
|
}
|
|
}
|
|
None => return Err(anyhow::anyhow!("No enough messages.")),
|
|
},
|
|
BGIStringType::Message => {
|
|
if !rubys.is_empty() {
|
|
eprintln!("Warning: Some ruby strings are unused: {:?}", rubys);
|
|
crate::COUNTER.inc_warning();
|
|
rubys.clear();
|
|
}
|
|
parsed_ruby = false;
|
|
let mes = match &cur_mes {
|
|
Some(m) => {
|
|
let mut message = m.message.clone();
|
|
if let Some(replacement) = replacement {
|
|
for (key, value) in replacement.map.iter() {
|
|
message = message.replace(key, value);
|
|
}
|
|
}
|
|
message
|
|
}
|
|
None => return Err(anyhow::anyhow!("No enough messages.")),
|
|
};
|
|
cur_mes.take();
|
|
mes
|
|
}
|
|
};
|
|
let in_used = match used.get(&curs.address) {
|
|
Some((s, address)) => {
|
|
if s == &nmes {
|
|
file.write_u32_at(curs.offset, *address as u32)?;
|
|
continue;
|
|
}
|
|
if let Some(address) = extra.get(&nmes) {
|
|
file.write_u32_at(curs.offset, *address as u32)?;
|
|
continue;
|
|
}
|
|
true
|
|
}
|
|
None => false,
|
|
};
|
|
let bgi_str_old_offset = curs.address + self.offset;
|
|
if !self.append && old_offset < bgi_str_old_offset {
|
|
file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
|
|
new_offset += bgi_str_old_offset - old_offset;
|
|
old_offset = bgi_str_old_offset;
|
|
}
|
|
let old_str_len = self
|
|
.data
|
|
.cpeek_cstring_at(bgi_str_old_offset)?
|
|
.as_bytes_with_nul()
|
|
.len();
|
|
let nmess = encode_string(encoding, &nmes, false)?;
|
|
let write_to_original = self.append && !in_used && nmess.len() + 1 <= old_str_len;
|
|
if write_to_original {
|
|
file.write_all_at(bgi_str_old_offset, &nmess)?;
|
|
file.write_u8_at(bgi_str_old_offset + nmess.len(), 0)?; // null terminator
|
|
} else {
|
|
file.write_all(&nmess)?;
|
|
file.write_u8(0)?; // null terminator
|
|
}
|
|
let new_address = if write_to_original {
|
|
bgi_str_old_offset - self.offset
|
|
} else {
|
|
new_offset - self.offset
|
|
};
|
|
file.write_u32_at(curs.offset, new_address as u32)?;
|
|
if in_used {
|
|
extra.insert(nmes, new_address);
|
|
} else {
|
|
used.insert(curs.address, (nmes, new_address));
|
|
}
|
|
old_offset += old_str_len;
|
|
if !write_to_original {
|
|
new_offset += nmess.len() + 1; // +1 for null terminator
|
|
}
|
|
}
|
|
if cur_mes.is_some() || mes.next().is_some() {
|
|
return Err(anyhow::anyhow!("Some messages were not processed."));
|
|
}
|
|
if !self.append && old_offset < self.data.data.len() {
|
|
file.write_all(&self.data.data[old_offset..])?;
|
|
}
|
|
return Ok(());
|
|
}
|
|
let mut mes = messages.iter_mut();
|
|
let mut cur_mes = None;
|
|
let mut strs = self.strings.iter();
|
|
let mut nstrs = Vec::new();
|
|
let mut cur_str = strs.next();
|
|
let mut old_offset = 0;
|
|
let mut new_offset = 0;
|
|
let mut rubys = Vec::new();
|
|
let mut parsed_ruby = false;
|
|
if self.append {
|
|
file.write_all(&self.data.data)?;
|
|
new_offset = self.data.data.len();
|
|
}
|
|
while let Some(curs) = cur_str {
|
|
if !curs.is_internal() {
|
|
if cur_mes.is_none() {
|
|
cur_mes = mes.next();
|
|
}
|
|
}
|
|
let bgi_str_old_offset = curs.address + self.offset;
|
|
if !self.append && old_offset < bgi_str_old_offset {
|
|
file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
|
|
new_offset += bgi_str_old_offset - old_offset;
|
|
old_offset = bgi_str_old_offset;
|
|
}
|
|
let old_str_len = self
|
|
.data
|
|
.cpeek_cstring_at(curs.address + self.offset)?
|
|
.as_bytes_with_nul()
|
|
.len();
|
|
let nmes = match curs.typ {
|
|
BGIStringType::Internal => self.read_string(curs.address)?,
|
|
BGIStringType::Ruby => {
|
|
if !self.is_v1 && self.is_v1_instr {
|
|
if rubys.is_empty() {
|
|
if parsed_ruby {
|
|
String::from("<")
|
|
} else {
|
|
rubys = match &mut cur_mes {
|
|
Some(m) => parse_ruby_from_text(&mut m.message)?,
|
|
None => return Err(anyhow::anyhow!("No enough messages.")),
|
|
};
|
|
parsed_ruby = true;
|
|
if rubys.is_empty() {
|
|
String::from("<")
|
|
} else {
|
|
let ruby_str = rubys.remove(0);
|
|
ruby_str
|
|
}
|
|
}
|
|
} else {
|
|
rubys.remove(0)
|
|
}
|
|
} else {
|
|
self.read_string(curs.address)?
|
|
}
|
|
}
|
|
BGIStringType::Name => match &cur_mes {
|
|
Some(m) => {
|
|
if let Some(name) = &m.name {
|
|
let mut name = name.clone();
|
|
if let Some(replacement) = replacement {
|
|
for (key, value) in replacement.map.iter() {
|
|
name = name.replace(key, value);
|
|
}
|
|
}
|
|
name
|
|
} else {
|
|
return Err(anyhow::anyhow!("Name is missing for message."));
|
|
}
|
|
}
|
|
None => return Err(anyhow::anyhow!("No enough messages.")),
|
|
},
|
|
BGIStringType::Message => {
|
|
if !rubys.is_empty() {
|
|
eprintln!("Warning: Some ruby strings are unused: {:?}", rubys);
|
|
crate::COUNTER.inc_warning();
|
|
rubys.clear();
|
|
}
|
|
parsed_ruby = false;
|
|
let mes = match &cur_mes {
|
|
Some(m) => {
|
|
let mut message = m.message.clone();
|
|
if let Some(replacement) = replacement {
|
|
for (key, value) in replacement.map.iter() {
|
|
message = message.replace(key, value);
|
|
}
|
|
}
|
|
message
|
|
}
|
|
None => return Err(anyhow::anyhow!("No enough messages.")),
|
|
};
|
|
cur_mes.take();
|
|
mes
|
|
}
|
|
};
|
|
let nmes = encode_string(encoding, &nmes, false)?;
|
|
file.write_all(&nmes)?;
|
|
file.write_u8(0)?;
|
|
let new_str_len = nmes.len() + 1; // +1 for null terminator
|
|
let new_address = new_offset - self.offset;
|
|
nstrs.push(BGIString {
|
|
offset: curs.offset,
|
|
address: new_address,
|
|
typ: curs.typ.clone(),
|
|
});
|
|
old_offset += old_str_len;
|
|
new_offset += new_str_len;
|
|
cur_str = strs.next();
|
|
}
|
|
if cur_mes.is_some() || mes.next().is_some() {
|
|
return Err(anyhow::anyhow!("Some messages were not processed."));
|
|
}
|
|
for str in nstrs {
|
|
file.write_u32_at(str.offset, str.address as u32)?;
|
|
}
|
|
if !self.append && old_offset < self.data.data.len() {
|
|
file.write_all(&self.data.data[old_offset..])?;
|
|
}
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
lazy_static! {
|
|
static ref RUBY_REGEX: Regex = Regex::new(r"<r([^>]+)>([^<]+)</r>").unwrap();
|
|
}
|
|
|
|
fn parse_ruby_from_text(text: &mut String) -> Result<Vec<String>> {
|
|
let mut map = BTreeMap::new();
|
|
for i in RUBY_REGEX.captures_iter(&text) {
|
|
let i = i?;
|
|
let ruby_text = i.get(1).map_or("", |m| m.as_str());
|
|
let ruby_str = i.get(2).map_or("", |m| m.as_str());
|
|
if !ruby_text.is_empty() && !ruby_str.is_empty() {
|
|
map.insert(ruby_str.to_owned(), ruby_text.to_owned());
|
|
}
|
|
}
|
|
let mut result = Vec::new();
|
|
for (ruby_str, ruby_text) in map {
|
|
*text = text.replace(&format!("<r{ruby_text}>{ruby_str}</r>"), &ruby_str);
|
|
result.push(ruby_str);
|
|
result.push(ruby_text);
|
|
}
|
|
Ok(result)
|
|
}
|
|
|
|
#[test]
|
|
fn test_parse_ruby_from_text() {
|
|
let mut text =
|
|
String::from("This is a test <rRubyText>RubyString</r> and <rAnotherText>AnotherRuby</r>.");
|
|
let ruby = parse_ruby_from_text(&mut text).unwrap();
|
|
assert_eq!(text, "This is a test RubyString and AnotherRuby.");
|
|
assert_eq!(
|
|
ruby,
|
|
vec![
|
|
"AnotherRuby".to_string(),
|
|
"AnotherText".to_string(),
|
|
"RubyString".to_string(),
|
|
"RubyText".to_string()
|
|
]
|
|
);
|
|
}
|