Add support to extract BGI ._bsi file
This commit is contained in:
100
extract_bsi.py
Normal file
100
extract_bsi.py
Normal file
@@ -0,0 +1,100 @@
|
||||
import argparse
|
||||
import os
|
||||
import json
|
||||
|
||||
def read_binary_strings(filename):
|
||||
results = {}
|
||||
|
||||
try:
|
||||
with open(filename, 'rb') as f:
|
||||
content = f.read()
|
||||
except IOError as e:
|
||||
print(f"Error opening file: {e}")
|
||||
return []
|
||||
|
||||
section_count = None
|
||||
section_name = None
|
||||
section_len = None
|
||||
section_current = 0
|
||||
section_data = None
|
||||
key = None
|
||||
i = 0
|
||||
while i < len(content):
|
||||
start_position = i
|
||||
# 查找下一个0,即字符串结束的位置
|
||||
end_position = content.find(b'\x00', i)
|
||||
if end_position == -1:
|
||||
# 如果没有找到结束标记,则结束循环
|
||||
break
|
||||
|
||||
# 提取字符串内容
|
||||
string_bytes = content[i:end_position]
|
||||
try:
|
||||
if section_count is None:
|
||||
# 如果没有找到节数,则从文件头部读取节数
|
||||
section_count = int.from_bytes(string_bytes[:4], 'little')
|
||||
i += 4
|
||||
continue
|
||||
text = string_bytes.decode('utf-8')
|
||||
if section_name is None:
|
||||
# 如果没有找到节名,则从文件头部读取节名
|
||||
section_name = text
|
||||
i = end_position + 1
|
||||
section_len = int.from_bytes(content[i:i+4], 'little')
|
||||
i += 4
|
||||
section_current = 0
|
||||
section_data = {}
|
||||
continue
|
||||
if key is None:
|
||||
key = text
|
||||
else:
|
||||
section_data[key] = text
|
||||
section_current += 1
|
||||
key = None
|
||||
if section_current >= section_len:
|
||||
# 如果当前节的数据已经读取完毕,则将其添加到结果中
|
||||
results[section_name] = section_data
|
||||
section_name = None
|
||||
section_len = None
|
||||
section_current = 0
|
||||
section_data = None
|
||||
except UnicodeDecodeError:
|
||||
# 如果解码失败,则跳过这部分数据
|
||||
pass
|
||||
|
||||
# 移动到下一个字符串的开始位置(跳过终止符)
|
||||
i = end_position + 1
|
||||
|
||||
if section_name is not None:
|
||||
raise ValueError(f"Error: Section '{section_name}' not fully read. Expected {section_len} strings, but only found {section_current}.")
|
||||
|
||||
return results
|
||||
|
||||
def main():
|
||||
# 设置命令行参数解析
|
||||
parser = argparse.ArgumentParser(description='Extract UTF-8 strings from a binary file')
|
||||
parser.add_argument('input_file', help='Input binary file')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# 检查输入文件是否存在
|
||||
if not os.path.exists(args.input_file):
|
||||
print(f"Error: Input file '{args.input_file}' does not exist.")
|
||||
return
|
||||
|
||||
# 生成输出文件名
|
||||
output_file = args.input_file + '.json'
|
||||
|
||||
# 读取并解析二进制文件
|
||||
string_data = read_binary_strings(args.input_file)
|
||||
|
||||
# 写入输出文件
|
||||
try:
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(string_data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
except IOError as e:
|
||||
print(f"Error writing to output file: {e}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user