From 8a2eea3636a4a4af0020accdf4f12131299a5816 Mon Sep 17 00:00:00 2001
From: lifegpc <root@lifegpc.com>
Date: Sun, 20 Apr 2025 21:48:59 +0800
Subject: [PATCH] Add support to extarct ._bp file

---
 extract_bp.py                  | 108 +++++++++++++++++++
 patch_bp.py => patch_bp_old.py | 182 ++++++++++++++++-----------------
 2 files changed, 199 insertions(+), 91 deletions(-)
 create mode 100644 extract_bp.py
 rename patch_bp.py => patch_bp_old.py (97%)

diff --git a/extract_bp.py b/extract_bp.py
new file mode 100644
index 0000000..9bcdff7
--- /dev/null
+++ b/extract_bp.py
@@ -0,0 +1,108 @@
+import struct
+import sys
+import json
+
+
+class BPScript:
+    def __init__(self, path: str):
+        self.path = path
+        with open(path, 'rb') as f:
+            self.data = bytearray(f.read())
+        self.len = len(self.data)
+        self.read_header()
+        self.iPos = self.header_size
+
+    def read_header(self):
+        self.header_size = struct.unpack('<L', self.data[0:4])[0]
+        self.instr_size = struct.unpack('<L', self.data[4:8])[0]
+        if self.len != self.header_size + self.instr_size:
+            raise ValueError("Invalid BPScript file size")
+        self.iPos = self.header_size
+        self.last_pos = 0
+        while self.iPos < self.len - 4:
+            d = struct.unpack('<L', self.data[self.iPos:self.iPos + 4])[0]
+            if d == 0x17:
+                self.last_pos = self.iPos
+            self.iPos += 4
+
+    def extract_string(self):
+        offset = struct.unpack('<h', self.data[self.iPos:self.iPos + 2])[0]
+        start_pos = self.iPos + offset - 1
+        self.iPos += 2
+        if start_pos < self.last_pos or start_pos >= self.len or ((start_pos > self.last_pos + 1) and self.data[start_pos-1] != 0):
+            self.iPos -= 2
+            return None
+        pos = start_pos
+        while True:
+            if self.data[pos] == 0x00:
+                break
+            pos += 1
+        data = self.data[start_pos:pos]
+        try:
+            data = data.decode('cp932')
+        except UnicodeDecodeError:
+            data = data.decode('utf-8')
+            data = "utf8:" + data
+        if not data:
+            return None
+        return start_pos, data
+
+    def extract_values(self):
+        strings = []
+        self.iPos = self.header_size
+        while self.iPos < self.last_pos:
+            ins = self.data[self.iPos]
+            self.iPos += 1
+            if ins == 0x5:
+                try:
+                    t = self.extract_string()
+                    if t is not None:
+                        strings.append(t)
+                except UnicodeDecodeError:
+                    self.iPos -= 2
+        return dict(strings)
+    
+    def extract_strings(self):
+        self.iPos = self.last_pos + 1
+        while self.data[self.iPos] == 0:
+            self.iPos += 1
+        strings = []
+        start_pos = self.iPos
+        while self.iPos < self.len:
+            if self.data[self.iPos] == 0:
+                data = self.data[start_pos:self.iPos]
+                try:
+                    data = data.decode('cp932')
+                except UnicodeDecodeError:
+                    data = data.decode('utf-8')
+                    data = "utf8:" + data
+                if data:
+                    strings.append((start_pos, data))
+                start_pos = self.iPos + 1
+            self.iPos += 1
+        return dict(strings)
+
+
+base = sys.argv[1]
+json_f = f"{base}.json"
+if len(sys.argv) > 2:
+    json_f = sys.argv[2]
+scr = BPScript(base)
+print(scr.header_size)
+print(scr.last_pos)
+a = scr.extract_values()
+print(a)
+b = scr.extract_strings()
+print(b)
+print(len(a), len(b))
+missing = {}
+for key in b.keys():
+    if key not in a:
+        missing[key] = b[key]
+for key in a.keys():
+    if key not in b:
+        missing[key] = a[key]
+print(missing)
+print(len(missing))
+with open(json_f, "w", encoding="utf-8") as f:
+    json.dump(b, f, ensure_ascii=False, indent=2)
diff --git a/patch_bp.py b/patch_bp_old.py
similarity index 97%
rename from patch_bp.py
rename to patch_bp_old.py
index 5c683bd..015e5bb 100644
--- a/patch_bp.py
+++ b/patch_bp_old.py
@@ -1,91 +1,91 @@
-import argparse
-import os
-import struct
-
-def parse_arguments():
-    parser = argparse.ArgumentParser(description='Replace text in a binary file based on a mapping file.')
-    parser.add_argument('bp_file', type=str, help='The path to the binary file.')
-    parser.add_argument('txt_file', type=str, help='The path to the text file containing replacements.')
-    parser.add_argument('out_bp_file', type=str, nargs='?', help='The output binary file path. If not specified, will add "_out" to bp_file.')
-    return parser.parse_args()
-
-def read_replacements(txt_file):
-    replacements = {}
-    with open(txt_file, 'r', encoding='utf-8') as f:
-        while True:
-            original_line = f.readline()
-            if not original_line:
-                break
-            original_line = original_line.strip()
-            if not original_line:
-                continue
-            translation_line = f.readline().rstrip('\n')
-            if translation_line:  # Skip if translation is empty
-                # Extract the hex address and original text
-                hex_address, original_text = original_line.split(']', 1)
-                hex_address = hex_address.strip('[')
-                address = int(hex_address, 16)
-                translation_line = translation_line.split(']', 1)[1]
-                if not translation_line:
-                    continue
-                replacements[address] = (original_text.encode('cp932'), translation_line.encode('utf-8'))
-    return replacements
-
-def replace_text_in_bp(bp_file, replacements):
-    with open(bp_file, 'rb') as f:
-        data = bytearray(f.read())
-
-    for address, (original_text, translation_text) in replacements.items():
-        olen = len(original_text)
-        tlen = len(translation_text)
-        data[address:address + olen] = translation_text + b' ' * (olen - tlen)
-        print(address, olen, tlen, original_text.decode('cp932'), translation_text.decode(), data[address:address + olen])
-
-    return data
-
-# def replace_text_in_bp(bp_file, replacements):
-#     with open(bp_file, 'rb') as f:
-#         data = f.read()
-    
-#     offset = 0
-
-#     for address, (original_text, translation_text) in replacements.items():
-#         olen = len(original_text)
-#         tlen = len(translation_text)
-#         address += offset
-#         data = data[:address] + translation_text + data[address+olen:]
-#         print(address, offset, olen, tlen, original_text.decode('cp932'), translation_text.decode(), data[address:address + tlen])
-#         offset += tlen - olen
-
-#     data = bytearray(data)
-#     header = struct.unpack('<I', data[0:4])[0]
-#     size = struct.unpack('<I', data[4:8])[0]
-#     print(header, size)
-#     data[4:8] = struct.pack('<I', len(data) - header)
-#     size = struct.unpack('<I', data[4:8])[0]
-#     print(len(data) - header, size)
-#     return data
-
-def main():
-    args = parse_arguments()
-    
-    # Determine output file path
-    if args.out_bp_file:
-        out_bp_file = args.out_bp_file
-    else:
-        out_bp_file = f"{os.path.splitext(args.bp_file)[0]}_out{os.path.splitext(args.bp_file)[1]}"
-
-    # Read replacements from the txt file
-    replacements = read_replacements(args.txt_file)
-
-    # Replace text in the binary file
-    modified_data = replace_text_in_bp(args.bp_file, replacements)
-
-    # Write the modified data to the output file
-    with open(out_bp_file, 'wb') as f:
-        f.write(modified_data)
-
-    print(f'Modified binary file written to: {out_bp_file}')
-
-if __name__ == '__main__':
-    main()
+import argparse
+import os
+import struct
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='Replace text in a binary file based on a mapping file.')
+    parser.add_argument('bp_file', type=str, help='The path to the binary file.')
+    parser.add_argument('txt_file', type=str, help='The path to the text file containing replacements.')
+    parser.add_argument('out_bp_file', type=str, nargs='?', help='The output binary file path. If not specified, will add "_out" to bp_file.')
+    return parser.parse_args()
+
+def read_replacements(txt_file):
+    replacements = {}
+    with open(txt_file, 'r', encoding='utf-8') as f:
+        while True:
+            original_line = f.readline()
+            if not original_line:
+                break
+            original_line = original_line.strip()
+            if not original_line:
+                continue
+            translation_line = f.readline().rstrip('\n')
+            if translation_line:  # Skip if translation is empty
+                # Extract the hex address and original text
+                hex_address, original_text = original_line.split(']', 1)
+                hex_address = hex_address.strip('[')
+                address = int(hex_address, 16)
+                translation_line = translation_line.split(']', 1)[1]
+                if not translation_line:
+                    continue
+                replacements[address] = (original_text.encode('cp932'), translation_line.encode('utf-8'))
+    return replacements
+
+def replace_text_in_bp(bp_file, replacements):
+    with open(bp_file, 'rb') as f:
+        data = bytearray(f.read())
+
+    for address, (original_text, translation_text) in replacements.items():
+        olen = len(original_text)
+        tlen = len(translation_text)
+        data[address:address + olen] = translation_text + b' ' * (olen - tlen)
+        print(address, olen, tlen, original_text.decode('cp932'), translation_text.decode(), data[address:address + olen])
+
+    return data
+
+# def replace_text_in_bp(bp_file, replacements):
+#     with open(bp_file, 'rb') as f:
+#         data = f.read()
+    
+#     offset = 0
+
+#     for address, (original_text, translation_text) in replacements.items():
+#         olen = len(original_text)
+#         tlen = len(translation_text)
+#         address += offset
+#         data = data[:address] + translation_text + data[address+olen:]
+#         print(address, offset, olen, tlen, original_text.decode('cp932'), translation_text.decode(), data[address:address + tlen])
+#         offset += tlen - olen
+
+#     data = bytearray(data)
+#     header = struct.unpack('<I', data[0:4])[0]
+#     size = struct.unpack('<I', data[4:8])[0]
+#     print(header, size)
+#     data[4:8] = struct.pack('<I', len(data) - header)
+#     size = struct.unpack('<I', data[4:8])[0]
+#     print(len(data) - header, size)
+#     return data
+
+def main():
+    args = parse_arguments()
+    
+    # Determine output file path
+    if args.out_bp_file:
+        out_bp_file = args.out_bp_file
+    else:
+        out_bp_file = f"{os.path.splitext(args.bp_file)[0]}_out{os.path.splitext(args.bp_file)[1]}"
+
+    # Read replacements from the txt file
+    replacements = read_replacements(args.txt_file)
+
+    # Replace text in the binary file
+    modified_data = replace_text_in_bp(args.bp_file, replacements)
+
+    # Write the modified data to the output file
+    with open(out_bp_file, 'wb') as f:
+        f.write(modified_data)
+
+    print(f'Modified binary file written to: {out_bp_file}')
+
+if __name__ == '__main__':
+    main()