mirror of
https://github.com/lifegpc/msg-tool.git
synced 2026-06-21 03:14:41 +08:00
Change utf-8 encoding behavior
This commit is contained in:
@@ -88,7 +88,8 @@ pub fn decode_to_string(
|
|||||||
Encoding::Auto => decode_to_string(Encoding::Utf8, data, check)
|
Encoding::Auto => decode_to_string(Encoding::Utf8, data, check)
|
||||||
.or_else(|_| decode_to_string(Encoding::Cp932, data, check))
|
.or_else(|_| decode_to_string(Encoding::Cp932, data, check))
|
||||||
.or_else(|_| decode_to_string(Encoding::Gb2312, data, check)),
|
.or_else(|_| decode_to_string(Encoding::Gb2312, data, check)),
|
||||||
Encoding::Utf8 => Ok(String::from_utf8(data.to_vec())?),
|
// Keep same behavior as Windows API (Code Page 65001)
|
||||||
|
Encoding::Utf8 => Ok(String::from_utf8_lossy(data).into_owned()),
|
||||||
Encoding::Cp932 => {
|
Encoding::Cp932 => {
|
||||||
let result = encoding::codec::japanese::Windows31JEncoding
|
let result = encoding::codec::japanese::Windows31JEncoding
|
||||||
.decode(
|
.decode(
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
|
use anyhow::Result;
|
||||||
use windows_sys::Win32::Foundation::{ERROR_NO_UNICODE_TRANSLATION, GetLastError};
|
use windows_sys::Win32::Foundation::{ERROR_NO_UNICODE_TRANSLATION, GetLastError};
|
||||||
use windows_sys::Win32::Globalization::{
|
use windows_sys::Win32::Globalization::{
|
||||||
CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WideCharToMultiByte,
|
CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WC_ERR_INVALID_CHARS,
|
||||||
|
WideCharToMultiByte,
|
||||||
};
|
};
|
||||||
use windows_sys::Win32::System::Diagnostics::Debug::{
|
use windows_sys::Win32::System::Diagnostics::Debug::{
|
||||||
FORMAT_MESSAGE_FROM_SYSTEM, FORMAT_MESSAGE_IGNORE_INSERTS, FormatMessageW,
|
FORMAT_MESSAGE_FROM_SYSTEM, FORMAT_MESSAGE_IGNORE_INSERTS, FormatMessageW,
|
||||||
@@ -47,11 +49,22 @@ impl std::fmt::Display for WinError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn decode_to_string(cp: u32, data: &[u8], check: bool) -> Result<String, WinError> {
|
fn is_special_code_page(cp: u32) -> bool {
|
||||||
|
matches!(
|
||||||
|
cp,
|
||||||
|
50220 | 50221 | 50222 | 50225 | 50227 | 50229 | 57002..=57011 | 65000 | 42
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn decode_to_string(cp: u32, data: &[u8], check: bool) -> Result<String> {
|
||||||
if data.is_empty() {
|
if data.is_empty() {
|
||||||
return Ok(String::new());
|
return Ok(String::new());
|
||||||
}
|
}
|
||||||
let dwflags = if check { MB_ERR_INVALID_CHARS } else { 0 };
|
let dwflags = if check && !is_special_code_page(cp) {
|
||||||
|
MB_ERR_INVALID_CHARS
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
let needed_len = unsafe {
|
let needed_len = unsafe {
|
||||||
MultiByteToWideChar(
|
MultiByteToWideChar(
|
||||||
cp,
|
cp,
|
||||||
@@ -63,12 +76,12 @@ pub fn decode_to_string(cp: u32, data: &[u8], check: bool) -> Result<String, Win
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
if needed_len == 0 {
|
if needed_len == 0 {
|
||||||
return Err(WinError::from_last_error());
|
return Err(WinError::from_last_error().into());
|
||||||
}
|
}
|
||||||
let last_error = unsafe { GetLastError() };
|
let last_error = unsafe { GetLastError() };
|
||||||
if last_error == ERROR_NO_UNICODE_TRANSLATION {
|
if last_error == ERROR_NO_UNICODE_TRANSLATION {
|
||||||
if check {
|
if check {
|
||||||
return Err(WinError::new(last_error));
|
return Err(WinError::new(last_error).into());
|
||||||
} else {
|
} else {
|
||||||
eprintln!(
|
eprintln!(
|
||||||
"Warning: Some characters could not be decoded in code page {}: {:?}",
|
"Warning: Some characters could not be decoded in code page {}: {:?}",
|
||||||
@@ -90,20 +103,25 @@ pub fn decode_to_string(cp: u32, data: &[u8], check: bool) -> Result<String, Win
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
if result == 0 {
|
if result == 0 {
|
||||||
return Err(WinError::from_last_error());
|
return Err(WinError::from_last_error().into());
|
||||||
}
|
}
|
||||||
Ok(String::from_utf16_lossy(&wc))
|
Ok(String::from_utf16_lossy(&wc))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>, WinError> {
|
pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>> {
|
||||||
if data.is_empty() {
|
if data.is_empty() {
|
||||||
return Ok(Vec::new());
|
return Ok(Vec::new());
|
||||||
}
|
}
|
||||||
|
let dwflags = if check && (cp == 65001 || cp == 54936) {
|
||||||
|
WC_ERR_INVALID_CHARS
|
||||||
|
} else {
|
||||||
|
0
|
||||||
|
};
|
||||||
let wstr = data.encode_utf16().collect::<Vec<u16>>();
|
let wstr = data.encode_utf16().collect::<Vec<u16>>();
|
||||||
let needed_len = unsafe {
|
let needed_len = unsafe {
|
||||||
WideCharToMultiByte(
|
WideCharToMultiByte(
|
||||||
cp,
|
cp,
|
||||||
0,
|
dwflags,
|
||||||
wstr.as_ptr(),
|
wstr.as_ptr(),
|
||||||
wstr.len() as i32,
|
wstr.len() as i32,
|
||||||
std::ptr::null_mut(),
|
std::ptr::null_mut(),
|
||||||
@@ -113,7 +131,7 @@ pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>, WinErr
|
|||||||
)
|
)
|
||||||
};
|
};
|
||||||
if needed_len == 0 {
|
if needed_len == 0 {
|
||||||
return Err(WinError::from_last_error());
|
return Err(WinError::from_last_error().into());
|
||||||
}
|
}
|
||||||
let mut mb = Vec::with_capacity(needed_len as usize);
|
let mut mb = Vec::with_capacity(needed_len as usize);
|
||||||
mb.resize(needed_len as usize, 0);
|
mb.resize(needed_len as usize, 0);
|
||||||
@@ -121,7 +139,7 @@ pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>, WinErr
|
|||||||
let result = unsafe {
|
let result = unsafe {
|
||||||
WideCharToMultiByte(
|
WideCharToMultiByte(
|
||||||
cp,
|
cp,
|
||||||
0,
|
dwflags,
|
||||||
wstr.as_ptr(),
|
wstr.as_ptr(),
|
||||||
wstr.len() as i32,
|
wstr.len() as i32,
|
||||||
mb.as_mut_ptr(),
|
mb.as_mut_ptr(),
|
||||||
@@ -136,7 +154,11 @@ pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>, WinErr
|
|||||||
};
|
};
|
||||||
if used_default_char != 0 {
|
if used_default_char != 0 {
|
||||||
if check {
|
if check {
|
||||||
return Err(WinError::new(0));
|
return Err(anyhow::anyhow!(
|
||||||
|
"Some characters could not be encoded in code page {}: {}",
|
||||||
|
cp,
|
||||||
|
data
|
||||||
|
));
|
||||||
} else {
|
} else {
|
||||||
eprintln!(
|
eprintln!(
|
||||||
"Warning: Some characters could not be encoded in code page {}: {}",
|
"Warning: Some characters could not be encoded in code page {}: {}",
|
||||||
@@ -146,7 +168,7 @@ pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>, WinErr
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if result == 0 {
|
if result == 0 {
|
||||||
return Err(WinError::from_last_error());
|
return Err(WinError::from_last_error().into());
|
||||||
}
|
}
|
||||||
Ok(mb)
|
Ok(mb)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user