Files
GalScripts/extract_bsi.py

100 lines
3.3 KiB
Python

import argparse
import os
import json
def read_binary_strings(filename):
results = {}
try:
with open(filename, 'rb') as f:
content = f.read()
except IOError as e:
print(f"Error opening file: {e}")
return []
section_count = None
section_name = None
section_len = None
section_current = 0
section_data = None
key = None
i = 0
while i < len(content):
start_position = i
# 查找下一个0,即字符串结束的位置
end_position = content.find(b'\x00', i)
if end_position == -1:
# 如果没有找到结束标记,则结束循环
break
# 提取字符串内容
string_bytes = content[i:end_position]
try:
if section_count is None:
# 如果没有找到节数,则从文件头部读取节数
section_count = int.from_bytes(string_bytes[:4], 'little')
i += 4
continue
text = string_bytes.decode('utf-8')
if section_name is None:
# 如果没有找到节名,则从文件头部读取节名
section_name = text
i = end_position + 1
section_len = int.from_bytes(content[i:i+4], 'little')
i += 4
section_current = 0
section_data = {}
continue
if key is None:
key = text
else:
section_data[key] = text
section_current += 1
key = None
if section_current >= section_len:
# 如果当前节的数据已经读取完毕,则将其添加到结果中
results[section_name] = section_data
section_name = None
section_len = None
section_current = 0
section_data = None
except UnicodeDecodeError:
# 如果解码失败,则跳过这部分数据
pass
# 移动到下一个字符串的开始位置(跳过终止符)
i = end_position + 1
if section_name is not None:
raise ValueError(f"Error: Section '{section_name}' not fully read. Expected {section_len} strings, but only found {section_current}.")
return results
def main():
# 设置命令行参数解析
parser = argparse.ArgumentParser(description='Extract UTF-8 strings from a binary file')
parser.add_argument('input_file', help='Input binary file')
args = parser.parse_args()
# 检查输入文件是否存在
if not os.path.exists(args.input_file):
print(f"Error: Input file '{args.input_file}' does not exist.")
return
# 生成输出文件名
output_file = args.input_file + '.json'
# 读取并解析二进制文件
string_data = read_binary_strings(args.input_file)
# 写入输出文件
try:
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(string_data, f, ensure_ascii=False, indent=4)
except IOError as e:
print(f"Error writing to output file: {e}")
if __name__ == '__main__':
main()