diff --git a/iinkai.py b/iinkai.py index 27fd489..b0b6bfd 100644 --- a/iinkai.py +++ b/iinkai.py @@ -1,10 +1,10 @@ from KAGParser import * -from html import escape +from html import escape, unescape import json -from os.path import isdir, join, basename, splitext, dirname +from os.path import isdir, join, basename, splitext, dirname, exists from os import listdir, makedirs from typing import List -from csv import DictWriter +from csv import DictReader, DictWriter def extract_script(script_path: str, output_path: str): @@ -93,6 +93,86 @@ def extract_dict(script_path: str, output_path: str): for k, v in dict.items(): writer.writerow({"term": k, "translation": "", "description": v}) + +def parse_message(message: str) -> List[ParsedLine]: + """Parse a message string (HTML-escaped KAG tag format) back into ParsedLines. + + Reverses the serialization in extract_script: → TagNode, + HTML entities → TextNode text, splits on \\n. + """ + + _TAG_RE = re.compile(r"(<[^>]+>)") + _ATTR_RE = re.compile(r"""([a-zA-Z0-9_]+)="([^"]*)"|([a-zA-Z0-9_]+)""", re.VERBOSE) + + result: List[ParsedLine] = [] + for line in message.split("\n"): + parsed_line: ParsedLine = [] + parts = _TAG_RE.split(line) + for part in parts: + if not part: + continue + if part.startswith("<") and part.endswith(">"): + inner = part[1:-1].strip() + tag_parts = inner.split(maxsplit=1) + tag_name = unescape(tag_parts[0]) + attributes: Dict[str, Any] = {} + if len(tag_parts) > 1: + for m in _ATTR_RE.finditer(tag_parts[1]): + if m.group(1) and m.group(2): # key="value" + key = unescape(m.group(1)) + value = unescape(m.group(2)) + attributes[key] = value + elif m.group(3): # boolean key + attributes[unescape(m.group(3))] = True + parsed_line.append(TagNode(name=tag_name, attributes=attributes)) + else: + parsed_line.append(TextNode(unescape(part))) + if parsed_line: + result.append(parsed_line) + + return result + + +def patch_script(script_path: str, m3t_path: str, output_path: str, names): + with open(script_path, "r", encoding="utf-8") as f: + script_text = f.read() + script = KAGScriptParser(script_text).parse(True) + new_script: ParsedScript = [] + messages = [] + name = None + with open(m3t_path, "r", encoding="utf-8") as f: + for line in f: + line = line.strip() + if line.startswith("○ NAME:"): + name = line[7:].strip() + elif line.startswith("●"): + message = line[1:].strip() + d = {} + if name is not None: + d['name'] = name + d['message'] = message.replace('\\n', '\n') + messages.append(d) + name = None + i = 0 + for line in script: + if isinstance(line, CommandNode): + if line.name == "page": + message = messages[i]['message'] + i += 1 + new_script.extend(parse_message(message)) + elif line.name.startswith("【") and line.name.endswith("】"): + name = line.name[1:-1] + if name in names: + line.name = f"【{names[name]}】" + elif isinstance(line, list): + continue + new_script.append(line) + if i != len(messages): + print(f"WARNING: processed message not matched. expected {len(message)}, actual {i}") + script_data = KAGScriptParser.serialize(new_script) + with open(output_path, 'w', encoding='utf-8') as f: + f.write(script_data) + def extract_script_auto(script_path: str, output_path: str): if isdir(script_path): for file in listdir(script_path): @@ -123,6 +203,45 @@ def extract_script_auto(script_path: str, output_path: str): extract_script(script_path, output_path) +def read_names(name_dict_path: str): + names = {} + with open(name_dict_path, 'r', encoding='utf-8-sig') as f: + for row in DictReader(f): + names[row['JP_Name']] = row['CN_Name'] + return names + + +def patch_script_auto(script_path: str, m3t_path: str, output_path: str, name_dict_path: str): + names = read_names(name_dict_path) + if isdir(script_path): + for file in listdir(script_path): + if not file.lower().endswith(".scn"): + continue + full_path = join(script_path, file) + m3t_fpath = splitext(basename(file))[0] + if file == "dict.scn": + m3t_fpath += ".csv" + else: + m3t_fpath += ".m3t" + m3t_full_path = join(m3t_path, m3t_fpath) + if not exists(m3t_full_path): + continue + output_full_path = join(output_path, basename(file)) + if file == "dict.scn": + pass + else: + patch_script(full_path, m3t_full_path, output_full_path, names) + else: + pdir = dirname(output_path) + if pdir and not isdir(pdir): + makedirs(pdir, exist_ok=True) + base_name = basename(script_path) + if base_name == "dict.scn": + pass + else: + patch_script(script_path, m3t_path, output_path, names) + + if __name__ == "__main__": from argparse import ArgumentParser parser = ArgumentParser(description="Process KAG script files") @@ -130,6 +249,13 @@ if __name__ == "__main__": extract_parser = subparser.add_parser("extract", help="Extract script to JSON") extract_parser.add_argument("script_path", help="Path to KAG script file or directory") extract_parser.add_argument("output_path", help="Path to output JSON file or directory") + patch_parser = subparser.add_parser("patch", help="Patch script") + patch_parser.add_argument("script_path", help="Path to KAG script file or directory") + patch_parser.add_argument("m3t_path", help="Path to m3t file or directory") + patch_parser.add_argument("output_path", help="Path to output KAG script file or directory") + patch_parser.add_argument("name_dict_path", help="Path to name dict") args = parser.parse_args() if args.command == "extract": extract_script_auto(args.script_path, args.output_path) + elif args.command == "patch": + patch_script_auto(args.script_path, args.m3t_path, args.output_path, args.name_dict_path)