from KAGParser import * from html import escape import json from os.path import isdir, join, basename, splitext, dirname from os import listdir, makedirs from typing import List from csv import DictWriter def extract_script(script_path: str, output_path: str): with open(script_path, "r", encoding="utf-8") as f: script_text = f.read() parser = KAGScriptParser(script_text) script = parser.parse(True) name = None message = '' result = [] for line in script: if isinstance(line, CommandNode): cmd = line if cmd.name == 'page': d = {} if name is not None: d['name'] = name d['message'] = message message = '' result.append(d) name = None elif cmd.name.startswith("【") and cmd.name.endswith("】"): name = cmd.name[1:-1] elif isinstance(line, list): for node in line: if isinstance(node, TextNode): message += node.text.replace("&", "&").replace("<", "<") elif isinstance(node, TagNode): data = f"<{escape(node.name)}" for k, v in node.attributes.items(): data += f' {escape(k)}="{escape(v)}"' data += ">" message += data if name is not None or message: d = {} if name is not None: d['name'] = name d['message'] = message result.append(d) with open(output_path, "w", encoding="utf-8") as f: json.dump(result, f, ensure_ascii=False, indent=2) def extract_dict(script_path: str, output_path: str): with open(script_path, "r", encoding="utf-8") as f: script_text = f.read() in_dict = False dict_data = '\n' for line in script_text.splitlines(): if line == "var text = '": in_dict = True elif line == "';": in_dict = False elif in_dict: dict_data += line + "\n" script = KAGScriptParser(dict_data).parse(True) dict = {} label = None term: List[str] = [] for line in script: if isinstance(line, LabelNode): label = line.name[1:] elif isinstance(line, list): if len(term) > 0: term.append('\n') for node in line: if isinstance(node, TextNode): term.append(node.text) else: raise ValueError("Unexpected node type in dict.scn", node) elif isinstance(line, CommandNode): if line.name == "return": # 干掉名字 term.pop(0) term.pop(0) # 干掉傻逼片假名 if len(term) > 0 and term[0].startswith("【") and term[0].endswith("】"): term.pop(0) term.pop(0) # 去掉换行 dict[label] = ''.join(term) term = [] label = None with open(output_path, "w", encoding="utf-8-sig", newline="") as f: writer = DictWriter(f, fieldnames=["term", "translation", "description"], lineterminator="\n") writer.writeheader() for k, v in dict.items(): writer.writerow({"term": k, "translation": "", "description": v}) def extract_script_auto(script_path: str, output_path: str): if isdir(script_path): for file in listdir(script_path): if not file.lower().endswith(".scn"): continue full_path = join(script_path, file) output_file = splitext(basename(file))[0] if file == "dict.scn": output_file += ".csv" else: output_file += ".json" output_full_path = join(output_path, output_file) pdir = dirname(output_full_path) if pdir and not isdir(pdir): makedirs(pdir, exist_ok=True) if file == "dict.scn": extract_dict(full_path, output_full_path) else: extract_script(full_path, output_full_path) else: pdir = dirname(output_path) if pdir and not isdir(pdir): makedirs(pdir, exist_ok=True) base_name = basename(script_path) if base_name == "dict.scn": extract_dict(script_path, output_path) else: extract_script(script_path, output_path) if __name__ == "__main__": from argparse import ArgumentParser parser = ArgumentParser(description="Process KAG script files") subparser = parser.add_subparsers(title="Commands", dest="command") extract_parser = subparser.add_parser("extract", help="Extract script to JSON") extract_parser.add_argument("script_path", help="Path to KAG script file or directory") extract_parser.add_argument("output_path", help="Path to output JSON file or directory") args = parser.parse_args() if args.command == "extract": extract_script_auto(args.script_path, args.output_path)