Add support to support PUA in BGI string

This commit is contained in:
2025-07-08 10:20:21 +08:00
parent e317c487fb
commit 601abba284
17 changed files with 104 additions and 60 deletions

View File

@@ -116,7 +116,7 @@ impl Script for BGIBpScript {
let text_address = i.offset_pos + i.text_offset as usize - 1;
// println!("offset: {}, text address: {}, text_offset: {}", i.offset_pos, text_address, i.text_offset);
let str = self.data.cpeek_cstring_at(text_address)?;
let str = decode_to_string(self.encoding, str.as_bytes())?;
let str = decode_to_string(self.encoding, str.as_bytes(), true)?;
messages.push(Message {
name: None,
message: str,

View File

@@ -66,14 +66,14 @@ impl BGIBsiScript {
let section_count = reader.read_u32()?;
for _ in 0..section_count {
let section_name = reader.read_cstring()?;
let section_name = decode_to_string(encoding, section_name.as_bytes())?;
let section_name = decode_to_string(encoding, section_name.as_bytes(), true)?;
let mut section_data = BTreeMap::new();
let entry_count = reader.read_u32()?;
for _ in 0..entry_count {
let key = reader.read_cstring()?;
let key = decode_to_string(encoding, key.as_bytes())?;
let key = decode_to_string(encoding, key.as_bytes(), true)?;
let value = reader.read_cstring()?;
let value = decode_to_string(encoding, value.as_bytes())?;
let value = decode_to_string(encoding, value.as_bytes(), true)?;
section_data.insert(key, value);
}
data.insert(section_name, section_data);
@@ -134,7 +134,7 @@ fn create_file<'a>(
output_encoding: Encoding,
) -> Result<()> {
let input = crate::utils::files::read_file(custom_filename)?;
let s = decode_to_string(output_encoding, &input)?;
let s = decode_to_string(output_encoding, &input, true)?;
let data: BTreeMap<String, BTreeMap<String, String>> = serde_json::from_str(&s)
.map_err(|e| anyhow::anyhow!("Failed to read BSI Map data from JSON: {}", e))?;
writer.write_u32(data.len() as u32)?;

View File

@@ -454,7 +454,8 @@ impl<'a> V1Parser<'a> {
pub fn read_string_at_address(&mut self, address: usize) -> Result<String> {
let start = self.offset + address;
let buf = self.buf.peek_cstring_at(start)?;
Ok(decode_to_string(self.encoding, buf.as_bytes())?)
// Sometimes string has private use area characters, so we disable strict checking
Ok(decode_to_string(self.encoding, buf.as_bytes(), false)?)
}
pub fn handle_user_function_call(&mut self) -> Result<()> {

View File

@@ -17,7 +17,11 @@ impl BGIScriptBuilder {
impl ScriptBuilder for BGIScriptBuilder {
fn default_encoding(&self) -> Encoding {
Encoding::Cp932
#[cfg(not(windows))]
return Encoding::Cp932;
#[cfg(windows)]
// Use Windows API first, because encoding-rs does not support PRIVATE USE AREA characters
return Encoding::CodePage(932);
}
fn build_script(
@@ -101,7 +105,8 @@ impl BGIScript {
fn read_string(&self, offset: usize) -> Result<String> {
let start = self.offset + offset;
let string_data = self.data.cpeek_cstring_at(start)?;
let string = decode_to_string(self.encoding, string_data.as_bytes())?;
// sometimes string has private use area characters, so we disable strict checking
let string = decode_to_string(self.encoding, string_data.as_bytes(), false)?;
Ok(string)
}
}

View File

@@ -362,7 +362,7 @@ impl<T: Read + Seek + std::fmt::Debug> CSIntArc<T> {
k += 1;
i += 1;
}
decode_to_string(encoding, &name[..i])
decode_to_string(encoding, &name[..i], true)
}
fn get_key(password: &str) -> Result<u32> {

View File

@@ -198,12 +198,12 @@ impl Script for CircusMesScript {
for t in text.iter_mut() {
*t = (*t).overflowing_add(self.info.deckey).0;
}
t = Some(decode_to_string(self.encoding, &text)?);
t = Some(decode_to_string(self.encoding, &text, true)?);
// println!("Token(enc): {:?}, {}", token, t.as_ref().unwrap());
} else if token.value == self.info.optunenc {
let text = &self.data[self.asm_bin_offset + token.offset + 1
..self.asm_bin_offset + token.offset + token.length - 1];
t = Some(decode_to_string(self.encoding, text)?);
t = Some(decode_to_string(self.encoding, text, true)?);
// println!("Token: {:?}, {}", token, t.as_ref().unwrap());
}
match t {
@@ -236,7 +236,7 @@ impl Script for CircusMesScript {
encoding: Encoding,
) -> Result<()> {
let jis = encode_string(Encoding::Cp932, s, true)?;
let out = decode_to_string(encoding, &jis)?;
let out = decode_to_string(encoding, &jis, true)?;
repls.push((s.to_string(), out));
Ok(())
}

View File

@@ -251,7 +251,7 @@ impl<'a, T: Iterator<Item = &'a BinEntry>, R: Read + Seek> Iterator
Ok(name) => name,
Err(e) => return Some(Err(e.into())),
};
let name = match decode_to_string(self.archive_encoding, name.as_bytes()) {
let name = match decode_to_string(self.archive_encoding, name.as_bytes(), true) {
Ok(name) => name,
Err(e) => return Some(Err(e.into())),
};
@@ -283,7 +283,7 @@ impl<'a, T: Iterator<Item = &'a BinEntry>, R: Read + Seek> Iterator
Ok(name) => name,
Err(e) => return Some(Err(e.into())),
};
let name = match decode_to_string(self.archive_encoding, name.as_bytes()) {
let name = match decode_to_string(self.archive_encoding, name.as_bytes(), true) {
Ok(name) => name,
Err(e) => return Some(Err(e.into())),
};

View File

@@ -278,7 +278,7 @@ fn create_file<'a>(
output_encoding: Encoding,
) -> Result<()> {
let input = crate::utils::files::read_file(custom_filename)?;
let s = decode_to_string(output_encoding, &input)?;
let s = decode_to_string(output_encoding, &input, true)?;
let entries: Vec<ListEntry> = serde_json::from_str(&s)
.map_err(|e| anyhow::anyhow!("Failed to read Escude list from JSON: {}", e))?;
writer.write_all(b"LIST")?;

View File

@@ -109,12 +109,12 @@ impl EscudeBinScript {
for _ in 0..string_count {
let s = reader.read_cstring()?;
let s = replaces.replace(s.as_bytes())?;
strings.push(decode_to_string(encoding, &s)?);
strings.push(decode_to_string(encoding, &s, true)?);
}
} else {
for _ in 0..string_count {
let s = reader.read_cstring()?;
strings.push(decode_to_string(encoding, s.as_bytes())?);
strings.push(decode_to_string(encoding, s.as_bytes(), true)?);
}
}
let names = match &config.escude_enum_scr {

View File

@@ -164,7 +164,7 @@ impl Dref {
filename: &str,
_config: &ExtraConfig,
) -> Result<Self> {
let text = decode_with_bom_detect(encoding, &buf)?.0;
let text = decode_with_bom_detect(encoding, &buf, true)?.0;
let mut urls = Vec::new();
for text in text.lines() {
let text = text.trim();

View File

@@ -666,7 +666,7 @@ pub struct KsScript {
impl KsScript {
pub fn new(reader: Vec<u8>, encoding: Encoding, config: &ExtraConfig) -> Result<Self> {
let (text, bom) = decode_with_bom_detect(encoding, &reader)?;
let (text, bom) = decode_with_bom_detect(encoding, &reader, true)?;
let parser = Parser::new(&text);
let tree = parser.parse(!config.kirikiri_remove_empty_lines)?;
Ok(Self {

View File

@@ -618,7 +618,7 @@ impl Script for ScnScript {
output_encoding: Encoding,
) -> Result<()> {
let data = crate::utils::files::read_file(custom_filename)?;
let s = decode_to_string(output_encoding, &data)?;
let s = decode_to_string(output_encoding, &data, true)?;
let json = json::parse(&s)?;
let mut psb = self.psb.clone();
psb.from_json(&json)?;

View File

@@ -122,7 +122,7 @@ impl Script for ItufuruScript {
for i in self.strings.iter() {
let str_pos = i.len_pos + 2; // Skip the length bytes
let s = self.data.cpeek_cstring_at(str_pos)?;
let decoded = decode_to_string(self.encoding, s.as_bytes())?;
let decoded = decode_to_string(self.encoding, s.as_bytes(), true)?;
messages.push(Message {
name: None,
message: decoded,