diff --git a/extract_bsi.py b/extract_bsi.py new file mode 100644 index 0000000..ce086ae --- /dev/null +++ b/extract_bsi.py @@ -0,0 +1,100 @@ +import argparse +import os +import json + +def read_binary_strings(filename): + results = {} + + try: + with open(filename, 'rb') as f: + content = f.read() + except IOError as e: + print(f"Error opening file: {e}") + return [] + + section_count = None + section_name = None + section_len = None + section_current = 0 + section_data = None + key = None + i = 0 + while i < len(content): + start_position = i + # 查找下一个0,即字符串结束的位置 + end_position = content.find(b'\x00', i) + if end_position == -1: + # 如果没有找到结束标记,则结束循环 + break + + # 提取字符串内容 + string_bytes = content[i:end_position] + try: + if section_count is None: + # 如果没有找到节数,则从文件头部读取节数 + section_count = int.from_bytes(string_bytes[:4], 'little') + i += 4 + continue + text = string_bytes.decode('utf-8') + if section_name is None: + # 如果没有找到节名,则从文件头部读取节名 + section_name = text + i = end_position + 1 + section_len = int.from_bytes(content[i:i+4], 'little') + i += 4 + section_current = 0 + section_data = {} + continue + if key is None: + key = text + else: + section_data[key] = text + section_current += 1 + key = None + if section_current >= section_len: + # 如果当前节的数据已经读取完毕,则将其添加到结果中 + results[section_name] = section_data + section_name = None + section_len = None + section_current = 0 + section_data = None + except UnicodeDecodeError: + # 如果解码失败,则跳过这部分数据 + pass + + # 移动到下一个字符串的开始位置(跳过终止符) + i = end_position + 1 + + if section_name is not None: + raise ValueError(f"Error: Section '{section_name}' not fully read. Expected {section_len} strings, but only found {section_current}.") + + return results + +def main(): + # 设置命令行参数解析 + parser = argparse.ArgumentParser(description='Extract UTF-8 strings from a binary file') + parser.add_argument('input_file', help='Input binary file') + + args = parser.parse_args() + + # 检查输入文件是否存在 + if not os.path.exists(args.input_file): + print(f"Error: Input file '{args.input_file}' does not exist.") + return + + # 生成输出文件名 + output_file = args.input_file + '.json' + + # 读取并解析二进制文件 + string_data = read_binary_strings(args.input_file) + + # 写入输出文件 + try: + with open(output_file, 'w', encoding='utf-8') as f: + json.dump(string_data, f, ensure_ascii=False, indent=4) + + except IOError as e: + print(f"Error writing to output file: {e}") + +if __name__ == '__main__': + main() \ No newline at end of file