diff --git a/src/utils/encoding.rs b/src/utils/encoding.rs index b9bf92f..494e483 100644 --- a/src/utils/encoding.rs +++ b/src/utils/encoding.rs @@ -97,8 +97,16 @@ pub fn decode_to_string( DecoderTrap::Strict } else { DecoderTrap::Call(|_, d, out| { - if d.len() == 1 && d[0] == 0xFF { - out.write_char('\u{f8f3}'); // PUA character for U+F8F3 + if d.len() == 1 { + if d[0] == 0xFF { + out.write_char('\u{f8f3}'); // PUA character for U+F8F3 + } else if d[0] == 0xFE { + out.write_char('\u{f8f2}'); // PUA character for U+F8F2 + } else if d[0] == 0xFD { + out.write_char('\u{f8f1}'); // PUA character for U+F8F1 + } else { + out.write_char('\u{FFFD}'); // Replacement character + } } else { out.write_char('\u{FFFD}'); // Replacement character } @@ -150,6 +158,10 @@ thread_local! { fn jis_encoder_trap(_: &mut dyn RawEncoder, data: &str, out: &mut dyn ByteWriter) -> bool { if data == "\u{f8f3}" { out.write_byte(0xFF); // PUA character for U+F8F3 + } else if data == "\u{f8f2}" { + out.write_byte(0xFE); // PUA character for U+F8F2 + } else if data == "\u{f8f1}" { + out.write_byte(0xFD); // PUA character for U+F8F1 } else { out.write_byte(b'?'); // Replacement character ENCODE_REPLACED.with(|f| f.qsave(true)); @@ -402,4 +414,49 @@ fn shift_jis_pua_test() { decode_to_string(Encoding::Cp932, &ff, false).unwrap(), "\u{f8f3}\x01".to_string() ); + #[cfg(windows)] + assert!(decode_to_string(Encoding::CodePage(932), &ff, true).is_err()); + assert!(decode_to_string(Encoding::Cp932, &ff, true).is_err()); + let fe = [0xFE, 0x01]; + #[cfg(windows)] + assert_eq!( + decode_to_string(Encoding::CodePage(932), &fe, false).unwrap(), + "\u{f8f2}\x01".to_string() + ); + assert_eq!( + decode_to_string(Encoding::Cp932, &fe, false).unwrap(), + "\u{f8f2}\x01".to_string() + ); + #[cfg(windows)] + assert!(decode_to_string(Encoding::CodePage(932), &fe, true).is_err()); + assert!(decode_to_string(Encoding::Cp932, &fe, true).is_err()); + let fd = [0xFD, 0x01]; + #[cfg(windows)] + assert_eq!( + decode_to_string(Encoding::CodePage(932), &fd, false).unwrap(), + "\u{f8f1}\x01".to_string() + ); + assert_eq!( + decode_to_string(Encoding::Cp932, &fd, false).unwrap(), + "\u{f8f1}\x01".to_string() + ); + #[cfg(windows)] + assert!(decode_to_string(Encoding::CodePage(932), &fd, true).is_err()); + assert!(decode_to_string(Encoding::Cp932, &fd, true).is_err()); + let ff = "\u{f8f3}\x01"; + #[cfg(windows)] + assert_eq!( + encode_string(Encoding::CodePage(932), ff, false).unwrap(), + vec![0xFF, 0x01] + ); + assert_eq!( + encode_string(Encoding::Cp932, ff, false).unwrap(), + vec![0xFF, 0x01] + ); + #[cfg(windows)] + assert_eq!( + encode_string(Encoding::CodePage(932), ff, true).unwrap(), + vec![0xFF, 0x01] + ); + assert!(encode_string(Encoding::Cp932, ff, true).is_err()); } diff --git a/src/utils/encoding_win.rs b/src/utils/encoding_win.rs index 104a5b8..283e26e 100644 --- a/src/utils/encoding_win.rs +++ b/src/utils/encoding_win.rs @@ -1,6 +1,7 @@ use windows_sys::Win32::Foundation::{ERROR_NO_UNICODE_TRANSLATION, GetLastError}; use windows_sys::Win32::Globalization::{ - CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte, + CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WC_ERR_INVALID_CHARS, + WideCharToMultiByte, }; use windows_sys::Win32::System::Diagnostics::Debug::{ FORMAT_MESSAGE_FROM_SYSTEM, FORMAT_MESSAGE_IGNORE_INSERTS, FormatMessageW, @@ -99,11 +100,16 @@ pub fn encode_string(cp: u32, data: &str, check: bool) -> Result, WinErr if data.is_empty() { return Ok(Vec::new()); } + let dwflags = if check && cp == 65001 { + WC_ERR_INVALID_CHARS + } else { + 0 + }; let wstr = data.encode_utf16().collect::>(); let needed_len = unsafe { WideCharToMultiByte( cp, - 0, + dwflags, wstr.as_ptr(), wstr.len() as i32, std::ptr::null_mut(), @@ -121,7 +127,7 @@ pub fn encode_string(cp: u32, data: &str, check: bool) -> Result, WinErr let result = unsafe { WideCharToMultiByte( cp, - 0, + dwflags, wstr.as_ptr(), wstr.len() as i32, mb.as_mut_ptr(),