mirror of
https://github.com/lifegpc/msg-tool.git
synced 2026-06-06 21:08:48 +08:00
Add more PUA support for SHIFTJIS
This commit is contained in:
@@ -97,8 +97,16 @@ pub fn decode_to_string(
|
||||
DecoderTrap::Strict
|
||||
} else {
|
||||
DecoderTrap::Call(|_, d, out| {
|
||||
if d.len() == 1 && d[0] == 0xFF {
|
||||
out.write_char('\u{f8f3}'); // PUA character for U+F8F3
|
||||
if d.len() == 1 {
|
||||
if d[0] == 0xFF {
|
||||
out.write_char('\u{f8f3}'); // PUA character for U+F8F3
|
||||
} else if d[0] == 0xFE {
|
||||
out.write_char('\u{f8f2}'); // PUA character for U+F8F2
|
||||
} else if d[0] == 0xFD {
|
||||
out.write_char('\u{f8f1}'); // PUA character for U+F8F1
|
||||
} else {
|
||||
out.write_char('\u{FFFD}'); // Replacement character
|
||||
}
|
||||
} else {
|
||||
out.write_char('\u{FFFD}'); // Replacement character
|
||||
}
|
||||
@@ -150,6 +158,10 @@ thread_local! {
|
||||
fn jis_encoder_trap(_: &mut dyn RawEncoder, data: &str, out: &mut dyn ByteWriter) -> bool {
|
||||
if data == "\u{f8f3}" {
|
||||
out.write_byte(0xFF); // PUA character for U+F8F3
|
||||
} else if data == "\u{f8f2}" {
|
||||
out.write_byte(0xFE); // PUA character for U+F8F2
|
||||
} else if data == "\u{f8f1}" {
|
||||
out.write_byte(0xFD); // PUA character for U+F8F1
|
||||
} else {
|
||||
out.write_byte(b'?'); // Replacement character
|
||||
ENCODE_REPLACED.with(|f| f.qsave(true));
|
||||
@@ -402,4 +414,49 @@ fn shift_jis_pua_test() {
|
||||
decode_to_string(Encoding::Cp932, &ff, false).unwrap(),
|
||||
"\u{f8f3}\x01".to_string()
|
||||
);
|
||||
#[cfg(windows)]
|
||||
assert!(decode_to_string(Encoding::CodePage(932), &ff, true).is_err());
|
||||
assert!(decode_to_string(Encoding::Cp932, &ff, true).is_err());
|
||||
let fe = [0xFE, 0x01];
|
||||
#[cfg(windows)]
|
||||
assert_eq!(
|
||||
decode_to_string(Encoding::CodePage(932), &fe, false).unwrap(),
|
||||
"\u{f8f2}\x01".to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
decode_to_string(Encoding::Cp932, &fe, false).unwrap(),
|
||||
"\u{f8f2}\x01".to_string()
|
||||
);
|
||||
#[cfg(windows)]
|
||||
assert!(decode_to_string(Encoding::CodePage(932), &fe, true).is_err());
|
||||
assert!(decode_to_string(Encoding::Cp932, &fe, true).is_err());
|
||||
let fd = [0xFD, 0x01];
|
||||
#[cfg(windows)]
|
||||
assert_eq!(
|
||||
decode_to_string(Encoding::CodePage(932), &fd, false).unwrap(),
|
||||
"\u{f8f1}\x01".to_string()
|
||||
);
|
||||
assert_eq!(
|
||||
decode_to_string(Encoding::Cp932, &fd, false).unwrap(),
|
||||
"\u{f8f1}\x01".to_string()
|
||||
);
|
||||
#[cfg(windows)]
|
||||
assert!(decode_to_string(Encoding::CodePage(932), &fd, true).is_err());
|
||||
assert!(decode_to_string(Encoding::Cp932, &fd, true).is_err());
|
||||
let ff = "\u{f8f3}\x01";
|
||||
#[cfg(windows)]
|
||||
assert_eq!(
|
||||
encode_string(Encoding::CodePage(932), ff, false).unwrap(),
|
||||
vec![0xFF, 0x01]
|
||||
);
|
||||
assert_eq!(
|
||||
encode_string(Encoding::Cp932, ff, false).unwrap(),
|
||||
vec![0xFF, 0x01]
|
||||
);
|
||||
#[cfg(windows)]
|
||||
assert_eq!(
|
||||
encode_string(Encoding::CodePage(932), ff, true).unwrap(),
|
||||
vec![0xFF, 0x01]
|
||||
);
|
||||
assert!(encode_string(Encoding::Cp932, ff, true).is_err());
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
use windows_sys::Win32::Foundation::{ERROR_NO_UNICODE_TRANSLATION, GetLastError};
|
||||
use windows_sys::Win32::Globalization::{
|
||||
CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte,
|
||||
CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WC_ERR_INVALID_CHARS,
|
||||
WideCharToMultiByte,
|
||||
};
|
||||
use windows_sys::Win32::System::Diagnostics::Debug::{
|
||||
FORMAT_MESSAGE_FROM_SYSTEM, FORMAT_MESSAGE_IGNORE_INSERTS, FormatMessageW,
|
||||
@@ -99,11 +100,16 @@ pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>, WinErr
|
||||
if data.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
let dwflags = if check && cp == 65001 {
|
||||
WC_ERR_INVALID_CHARS
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let wstr = data.encode_utf16().collect::<Vec<u16>>();
|
||||
let needed_len = unsafe {
|
||||
WideCharToMultiByte(
|
||||
cp,
|
||||
0,
|
||||
dwflags,
|
||||
wstr.as_ptr(),
|
||||
wstr.len() as i32,
|
||||
std::ptr::null_mut(),
|
||||
@@ -121,7 +127,7 @@ pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>, WinErr
|
||||
let result = unsafe {
|
||||
WideCharToMultiByte(
|
||||
cp,
|
||||
0,
|
||||
dwflags,
|
||||
wstr.as_ptr(),
|
||||
wstr.len() as i32,
|
||||
mb.as_mut_ptr(),
|
||||
|
||||
Reference in New Issue
Block a user