Add format support

This commit is contained in:
2025-05-21 10:57:14 +08:00
parent a2747d29b9
commit 99210a19cf
11 changed files with 235 additions and 26 deletions

View File

@@ -29,28 +29,44 @@ pub fn decode_to_string(encoding: Encoding, data: &[u8]) -> Result<String, anyho
}
}
pub fn encode_string(encoding: Encoding, data: &str) -> Result<Vec<u8>, anyhow::Error> {
pub fn encode_string(
encoding: Encoding,
data: &str,
check: bool,
) -> Result<Vec<u8>, anyhow::Error> {
match encoding {
Encoding::Auto => Ok(data.as_bytes().to_vec()),
Encoding::Utf8 => Ok(data.as_bytes().to_vec()),
Encoding::Cp932 => {
let result = encoding_rs::SHIFT_JIS.encode(data);
if result.2 {
Err(anyhow::anyhow!("Failed to encode Shift-JIS"))
} else {
Ok(result.0.to_vec())
if check {
return Err(anyhow::anyhow!("Failed to encode Shift-JIS"));
}
eprintln!(
"Warning: Some characters could not be encoded in Shift-JIS: {}",
data
);
}
Ok(result.0.to_vec())
}
Encoding::Gb2312 => {
let result = encoding_rs::GBK.encode(data);
if result.2 {
Err(anyhow::anyhow!("Failed to encode GB2312"))
} else {
Ok(result.0.to_vec())
if check {
return Err(anyhow::anyhow!("Failed to encode GB2312"));
}
eprintln!(
"Warning: Some characters could not be encoded in GB2312: {}",
data
);
}
Ok(result.0.to_vec())
}
#[cfg(windows)]
Encoding::CodePage(code_page) => Ok(super::encoding_win::encode_string(code_page, data)?),
Encoding::CodePage(code_page) => {
Ok(super::encoding_win::encode_string(code_page, data, check)?)
}
}
}
@@ -106,22 +122,22 @@ fn test_decode_to_string() {
#[test]
fn test_encode_string() {
assert_eq!(
encode_string(Encoding::Utf8, "中文测试").unwrap(),
encode_string(Encoding::Utf8, "中文测试", true).unwrap(),
vec![228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149]
);
assert_eq!(
encode_string(Encoding::Cp932, "きゃべつそふと").unwrap(),
encode_string(Encoding::Cp932, "きゃべつそふと", true).unwrap(),
vec![
130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198
]
);
assert_eq!(
encode_string(Encoding::Gb2312, "中文").unwrap(),
encode_string(Encoding::Gb2312, "中文", true).unwrap(),
vec![214, 208, 206, 196]
);
#[cfg(windows)]
assert_eq!(
encode_string(Encoding::CodePage(936), "中文").unwrap(),
encode_string(Encoding::CodePage(936), "中文", true).unwrap(),
vec![214, 208, 206, 196]
);
}