from KAGParser import * from html import escape, unescape import json from os.path import isdir, join, basename, splitext, dirname, exists from os import listdir, makedirs from typing import List from csv import DictReader, DictWriter def extract_script(script_path: str, output_path: str): with open(script_path, "r", encoding="utf-8") as f: script_text = f.read() parser = KAGScriptParser(script_text) script = parser.parse(True) name = None message = '' result = [] for line in script: if isinstance(line, CommandNode): cmd = line if cmd.name == 'page': d = {} if name is not None: d['name'] = name d['message'] = message message = '' result.append(d) name = None elif cmd.name.startswith("【") and cmd.name.endswith("】"): name = cmd.name[1:-1] elif isinstance(line, list): for node in line: if isinstance(node, TextNode): message += node.text.replace("&", "&").replace("<", "<") elif isinstance(node, TagNode): data = f"<{escape(node.name)}" for k, v in node.attributes.items(): data += f' {escape(k)}="{escape(v)}"' data += ">" message += data if name is not None or message: d = {} if name is not None: d['name'] = name d['message'] = message result.append(d) with open(output_path, "w", encoding="utf-8") as f: json.dump(result, f, ensure_ascii=False, indent=2) def extract_dict_terms(script_path: str): terms = {} overrides = {} with open(script_path, "r", encoding="utf-8-sig") as f: for row in DictReader(f): term = row['term'] terms[term] = row if ' ' in term: overrides[term.split(' ')[0]] = term return terms, overrides def extract_dict(script_path: str, output_path: str): with open(script_path, "r", encoding="utf-8") as f: script_text = f.read() in_dict = False dict_data = '\n' for line in script_text.splitlines(): if line == "var text = '": in_dict = True elif line == "';": in_dict = False elif in_dict: dict_data += line + "\n" script = KAGScriptParser(dict_data).parse(True) dict = {} label = None term: List[str] = [] for line in script: if isinstance(line, LabelNode): label = line.name[1:] elif isinstance(line, list): if len(term) > 0: term.append('\n') for node in line: if isinstance(node, TextNode): term.append(node.text) else: raise ValueError("Unexpected node type in dict.scn", node) elif isinstance(line, CommandNode): if line.name == "return": # 干掉名字 term.pop(0) term.pop(0) # 干掉傻逼片假名 if len(term) > 0 and term[0].startswith("【") and term[0].endswith("】"): term.pop(0) term.pop(0) # 去掉换行 dict[label] = ''.join(term) term = [] label = None with open(output_path, "w", encoding="utf-8-sig", newline="") as f: writer = DictWriter(f, fieldnames=["term", "translation", "description"], lineterminator="\n") writer.writeheader() for k, v in dict.items(): writer.writerow({"term": k, "translation": "", "description": v}) def parse_message(message: str) -> List[ParsedLine]: """Parse a message string (HTML-escaped KAG tag format) back into ParsedLines. Reverses the serialization in extract_script: → TagNode, HTML entities → TextNode text, splits on \\n. """ _TAG_RE = re.compile(r"(<[^>]+>)") _ATTR_RE = re.compile(r"""([a-zA-Z0-9_]+)="([^"]*)"|([a-zA-Z0-9_]+)""", re.VERBOSE) result: List[ParsedLine] = [] for line in message.split("\n"): parsed_line: ParsedLine = [] parts = _TAG_RE.split(line) for part in parts: if not part: continue if part.startswith("<") and part.endswith(">"): inner = part[1:-1].strip() tag_parts = inner.split(maxsplit=1) tag_name = unescape(tag_parts[0]) attributes: Dict[str, Any] = {} if len(tag_parts) > 1: for m in _ATTR_RE.finditer(tag_parts[1]): if m.group(1) and m.group(2): # key="value" key = unescape(m.group(1)) value = unescape(m.group(2)) attributes[key] = value elif m.group(3): # boolean key attributes[unescape(m.group(3))] = True parsed_line.append(TagNode(name=tag_name, attributes=attributes)) else: parsed_line.append(TextNode(unescape(part))) if parsed_line: result.append(parsed_line) return result def patch_dict(script_path: str, dict_path: str, output_path: str): with open(script_path, "r", encoding="utf-8") as f: script_text = f.read() in_dict = False dict_data = '\n' start_line = None end_line = None for (i, line) in enumerate(script_text.splitlines()): if line == "var text = '": in_dict = True start_line = i + 1 elif line == "';": in_dict = False end_line = i - 1 elif in_dict: dict_data += line + "\n" script = KAGScriptParser(dict_data).parse(True) dicts = {} with open(dict_path, "r", encoding="utf-8-sig") as f: for row in DictReader(f): dicts[row['term']] = row new_script = [] term = None for line in script: if isinstance(line, LabelNode): term = line.name[1:] new_script.append(LabelNode('.' + dicts[term]['translation'])) elif isinstance(line, CommentNode): new_script.append([TextNode(f";{line.text}")]) elif isinstance(line, EmptyLineNode): pass elif isinstance(line, list): pass elif isinstance(line, CommandNode): if line.name == "return": data = dicts[term] new_script.append([TextNode(data['translation'])]) desc: str = data['description'] for d in desc.splitlines(): new_script.append([TextNode(d)]) new_script.append(EmptyLineNode()) new_script.append(line) else: raise ValueError("Unsupported command", line.name) dict_data = KAGScriptParser.serialize(new_script) origin_lines = script_text.splitlines(True) with open(output_path, 'w', encoding='UTF-8') as f: f.writelines(origin_lines[:start_line]) f.write(dict_data) f.write("\n") f.writelines(origin_lines[end_line + 1:]) def wrap_lines(input: List[ParsedLine], max_width: int = 30) -> List[ParsedLine]: result: List[ParsedLine] = [] current_line: ParsedLine = [] current_len = 0 for parsed_line in input: for node in parsed_line: if isinstance(node, TagNode): current_line.append(node) elif isinstance(node, TextNode): text = node.text while text: available = max_width - current_len if available <= 0: if current_line: result.append(current_line) current_line = [] current_len = 0 available = max_width take = min(len(text), available) if take > 0: current_line.append(TextNode(text[:take])) current_len += take text = text[take:] if current_len >= max_width: result.append(current_line) current_line = [] current_len = 0 # End of original line — flush current line to preserve \n breaks if current_line: result.append(current_line) current_line = [] current_len = 0 if current_line: result.append(current_line) return result def patch_script(script_path: str, m3t_path: str, output_path: str, names, term): with open(script_path, "r", encoding="utf-8") as f: script_text = f.read() script = KAGScriptParser(script_text).parse(True) new_script: ParsedScript = [] messages = [] name = None terms, overrides = term ori_message = None with open(m3t_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line.startswith("○ NAME:"): name = line[7:].strip() elif line.startswith("○"): ori_message = line[1:].strip() elif line.startswith("●"): message = line[1:].strip() d = {} if name is not None: d['name'] = name d['message'] = message.replace('\\n', '\n') if ori_message: if ori_message.startswith('「') and not d['message'].startswith('「'): d['message'] = '「' + d['message'] if ori_message.endswith('」') and not d['message'].endswith('」'): d['message'] += '」' if ori_message.startswith('(') and not d['message'].startswith('('): d['message'] = '(' + d['message'] if ori_message.endswith(')') and not d['message'].endswith(')'): d['message'] += ')' messages.append(d) name = None i = 0 for line in script: if isinstance(line, CommandNode): if line.name == "page": message = messages[i]['message'] i += 1 nws = parse_message(message) for nw in nws: for node in nw: if isinstance(node, TagNode): if node.name == 'wd': target = node.attributes['s'] if target not in terms: if target not in overrides: print(message) raise ValueError('unknown wd target', target) node.attributes['s'] = overrides[target] node.attributes['s'] = terms[node.attributes['s']]['translation'] nws = wrap_lines(nws) new_script.extend(nws) elif line.name.startswith("【") and line.name.endswith("】"): name = line.name[1:-1] if name in names: line.name = f"【{names[name]}】" elif isinstance(line, list): continue new_script.append(line) if i != len(messages): print(f"WARNING: processed message not matched. expected {len(messages)}, actual {i}, {script_path}") script_data = KAGScriptParser.serialize(new_script) with open(output_path, 'w', encoding='utf-8') as f: f.write(script_data) def extract_script_auto(script_path: str, output_path: str): if isdir(script_path): for file in listdir(script_path): if not file.lower().endswith(".scn"): continue full_path = join(script_path, file) output_file = splitext(basename(file))[0] if file == "dict.scn": output_file += ".csv" else: output_file += ".json" output_full_path = join(output_path, output_file) pdir = dirname(output_full_path) if pdir and not isdir(pdir): makedirs(pdir, exist_ok=True) if file == "dict.scn": extract_dict(full_path, output_full_path) else: extract_script(full_path, output_full_path) else: pdir = dirname(output_path) if pdir and not isdir(pdir): makedirs(pdir, exist_ok=True) base_name = basename(script_path) if base_name == "dict.scn": extract_dict(script_path, output_path) else: extract_script(script_path, output_path) def read_names(name_dict_path: str): names = {} with open(name_dict_path, 'r', encoding='utf-8-sig') as f: for row in DictReader(f): names[row['JP_Name']] = row['CN_Name'] return names def patch_script_auto(script_path: str, m3t_path: str, output_path: str, name_dict_path: str, dict_path: str): names = read_names(name_dict_path) term = extract_dict_terms(dict_path) if isdir(script_path): for file in listdir(script_path): if not file.lower().endswith(".scn"): continue full_path = join(script_path, file) m3t_fpath = splitext(basename(file))[0] if file == "dict.scn": m3t_fpath += ".csv" else: m3t_fpath += ".m3t" m3t_full_path = join(m3t_path, m3t_fpath) if not exists(m3t_full_path): continue output_full_path = join(output_path, basename(file)) if file == "dict.scn": patch_dict(full_path, m3t_full_path, output_full_path) else: patch_script(full_path, m3t_full_path, output_full_path, names, term) else: pdir = dirname(output_path) if pdir and not isdir(pdir): makedirs(pdir, exist_ok=True) base_name = basename(script_path) if base_name == "dict.scn": patch_dict(script_path, m3t_path, output_path) else: patch_script(script_path, m3t_path, output_path, names, term) if __name__ == "__main__": from argparse import ArgumentParser parser = ArgumentParser(description="Process KAG script files") subparser = parser.add_subparsers(title="Commands", dest="command") extract_parser = subparser.add_parser("extract", help="Extract script to JSON") extract_parser.add_argument("script_path", help="Path to KAG script file or directory") extract_parser.add_argument("output_path", help="Path to output JSON file or directory") patch_parser = subparser.add_parser("patch", help="Patch script") patch_parser.add_argument("script_path", help="Path to KAG script file or directory") patch_parser.add_argument("m3t_path", help="Path to m3t file or directory") patch_parser.add_argument("output_path", help="Path to output KAG script file or directory") patch_parser.add_argument("name_dict_path", help="Path to name dict") patch_parser.add_argument("dict_path", help="path to dict.csv") args = parser.parse_args() if args.command == "extract": extract_script_auto(args.script_path, args.output_path) elif args.command == "patch": patch_script_auto(args.script_path, args.m3t_path, args.output_path, args.name_dict_path, args.dict_path)