GalScripts/iinkai.py

from KAGParser import *
from html import escape, unescape
import json
from os.path import isdir, join, basename, splitext, dirname, exists
from os import listdir, makedirs
from typing import List
from csv import DictReader, DictWriter


def extract_script(script_path: str, output_path: str):
    with open(script_path, "r", encoding="utf-8") as f:
        script_text = f.read()
    parser = KAGScriptParser(script_text)
    script = parser.parse(True)
    name = None
    message = ''
    result = []
    for line in script:
        if isinstance(line, CommandNode):
            cmd = line
            if cmd.name == 'page':
                d = {}
                if name is not None:
                    d['name'] = name
                d['message'] = message
                message = ''
                result.append(d)
                name = None
            elif cmd.name.startswith("【") and cmd.name.endswith("】"):
                name = cmd.name[1:-1]
        elif isinstance(line, list):
            for node in line:
                if isinstance(node, TextNode):
                    message += node.text.replace("&", "&amp;").replace("<", "&lt;")
                elif isinstance(node, TagNode):
                    data = f"<{escape(node.name)}"
                    for k, v in node.attributes.items():
                        data += f' {escape(k)}="{escape(v)}"'
                    data += ">"
                    message += data
    if name is not None or message:
        d = {}
        if name is not None:
            d['name'] = name
        d['message'] = message
        result.append(d)
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(result, f, ensure_ascii=False, indent=2)


def extract_dict_terms(script_path: str):
    terms = {}
    overrides = {}
    with open(script_path, "r", encoding="utf-8-sig") as f:
        for row in DictReader(f):
            term = row['term']
            terms[term] = row
            if '　' in term:
                overrides[term.split('　')[0]] = term
    return terms, overrides


def extract_dict(script_path: str, output_path: str):
    with open(script_path, "r", encoding="utf-8") as f:
        script_text = f.read()
    in_dict = False
    dict_data = '\n'
    for line in script_text.splitlines():
        if line == "var text = '":
            in_dict = True
        elif line == "';":
            in_dict = False
        elif in_dict:
            dict_data += line + "\n"
    script = KAGScriptParser(dict_data).parse(True)
    dict = {}
    label = None
    term: List[str] = []
    for line in script:
        if isinstance(line, LabelNode):
            label = line.name[1:]
        elif isinstance(line, list):
            if len(term) > 0:
                term.append('\n')
            for node in line:
                if isinstance(node, TextNode):
                    term.append(node.text)
                else:
                    raise ValueError("Unexpected node type in dict.scn", node)
        elif isinstance(line, CommandNode):
            if line.name == "return":
                # 干掉名字
                term.pop(0)
                term.pop(0)
                # 干掉傻逼片假名
                if len(term) > 0 and term[0].startswith("【") and term[0].endswith("】"):
                    term.pop(0)
                    term.pop(0)  # 去掉换行
                dict[label] = ''.join(term)
                term = []
                label = None
    with open(output_path, "w", encoding="utf-8-sig", newline="") as f:
        writer = DictWriter(f, fieldnames=["term", "translation", "description"], lineterminator="\n")
        writer.writeheader()
        for k, v in dict.items():
            writer.writerow({"term": k, "translation": "", "description": v})


def parse_message(message: str) -> List[ParsedLine]:
    """Parse a message string (HTML-escaped KAG tag format) back into ParsedLines.

    Reverses the serialization in extract_script: <tagname key="val"> → TagNode,
    HTML entities → TextNode text, splits on \\n.
    """

    _TAG_RE = re.compile(r"(<[^>]+>)")
    _ATTR_RE = re.compile(r"""([a-zA-Z0-9_]+)="([^"]*)"|([a-zA-Z0-9_]+)""", re.VERBOSE)

    result: List[ParsedLine] = []
    for line in message.split("\n"):
        parsed_line: ParsedLine = []
        parts = _TAG_RE.split(line)
        for part in parts:
            if not part:
                continue
            if part.startswith("<") and part.endswith(">"):
                inner = part[1:-1].strip()
                tag_parts = inner.split(maxsplit=1)
                tag_name = unescape(tag_parts[0])
                attributes: Dict[str, Any] = {}
                if len(tag_parts) > 1:
                    for m in _ATTR_RE.finditer(tag_parts[1]):
                        if m.group(1) and m.group(2):  # key="value"
                            key = unescape(m.group(1))
                            value = unescape(m.group(2))
                            attributes[key] = value
                        elif m.group(3):  # boolean key
                            attributes[unescape(m.group(3))] = True
                parsed_line.append(TagNode(name=tag_name, attributes=attributes))
            else:
                parsed_line.append(TextNode(unescape(part)))
        if parsed_line:
            result.append(parsed_line)

    return result


def patch_dict(script_path: str, dict_path: str, output_path: str):
    with open(script_path, "r", encoding="utf-8") as f:
        script_text = f.read()
    in_dict = False
    dict_data = '\n'
    start_line = None
    end_line = None
    for (i, line) in enumerate(script_text.splitlines()):
        if line == "var text = '":
            in_dict = True
            start_line = i + 1
        elif line == "';":
            in_dict = False
            end_line = i - 1
        elif in_dict:
            dict_data += line + "\n"
    script = KAGScriptParser(dict_data).parse(True)
    dicts = {}
    with open(dict_path, "r", encoding="utf-8-sig") as f:
        for row in DictReader(f):
            dicts[row['term']] = row
    new_script = []
    term = None
    for line in script:
        if isinstance(line, LabelNode):
            term = line.name[1:]
            new_script.append(LabelNode('.' + dicts[term]['translation']))
        elif isinstance(line, CommentNode):
            new_script.append([TextNode(f";{line.text}")])
        elif isinstance(line, EmptyLineNode):
            pass
        elif isinstance(line, list):
            pass
        elif isinstance(line, CommandNode):
            if line.name == "return":
                data = dicts[term]
                new_script.append([TextNode(data['translation'])])
                desc: str = data['description']
                for d in desc.splitlines():
                    new_script.append([TextNode(d)])
                new_script.append(EmptyLineNode())
                new_script.append(line)
            else:
                raise ValueError("Unsupported command", line.name)
    dict_data = KAGScriptParser.serialize(new_script)
    origin_lines = script_text.splitlines(True)
    with open(output_path, 'w', encoding='UTF-8') as f:
        f.writelines(origin_lines[:start_line])
        f.write(dict_data)
        f.write("\n")
        f.writelines(origin_lines[end_line + 1:])


def patch_script(script_path: str, m3t_path: str, output_path: str, names, term):
    with open(script_path, "r", encoding="utf-8") as f:
        script_text = f.read()
    script = KAGScriptParser(script_text).parse(True)
    new_script: ParsedScript = []
    messages = []
    name = None
    terms, overrides = term
    with open(m3t_path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line.startswith("○ NAME:"):
                name = line[7:].strip()
            elif line.startswith("●"):
                message = line[1:].strip()
                d = {}
                if name is not None:
                    d['name'] = name
                d['message'] = message.replace('\\n', '\n')
                if name:
                    if not d['message'].startswith('「'):
                        d['message'] = '「' + d['message']
                    if not d['message'].endswith('」'):
                        d['message'] += '」'
                messages.append(d)
                name = None
    i = 0
    for line in script:
        if isinstance(line, CommandNode):
            if line.name == "page":
                message = messages[i]['message']
                i += 1
                nws = parse_message(message)
                for nw in nws:
                    for node in nw:
                        if isinstance(node, TagNode):
                            if node.name == 'wd':
                                target = node.attributes['s']
                                if target not in terms:
                                    if target not in overrides:
                                        print(message)
                                        raise ValueError('unknown wd target', target)
                                    node.attributes['s'] = overrides[target]
                                node.attributes['s'] = terms[node.attributes['s']]['translation']
                new_script.extend(nws)
            elif line.name.startswith("【") and line.name.endswith("】"):
                name = line.name[1:-1]
                if name in names:
                    line.name = f"【{names[name]}】"
        elif isinstance(line, list):
            continue
        new_script.append(line)
    if i != len(messages):
        print(f"WARNING: processed message not matched. expected {len(messages)}, actual {i}, {script_path}")
    script_data = KAGScriptParser.serialize(new_script)
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(script_data)

def extract_script_auto(script_path: str, output_path: str):
    if isdir(script_path):
        for file in listdir(script_path):
            if not file.lower().endswith(".scn"):
                continue
            full_path = join(script_path, file)
            output_file = splitext(basename(file))[0]
            if file == "dict.scn":
                output_file += ".csv"
            else:
                output_file += ".json"
            output_full_path = join(output_path, output_file)
            pdir = dirname(output_full_path)
            if pdir and not isdir(pdir):
                makedirs(pdir, exist_ok=True)
            if file == "dict.scn":
                extract_dict(full_path, output_full_path)
            else:
                extract_script(full_path, output_full_path)
    else:
        pdir = dirname(output_path)
        if pdir and not isdir(pdir):
            makedirs(pdir, exist_ok=True)
        base_name = basename(script_path)
        if base_name == "dict.scn":
            extract_dict(script_path, output_path)
        else:
            extract_script(script_path, output_path)


def read_names(name_dict_path: str):
    names = {}
    with open(name_dict_path, 'r', encoding='utf-8-sig') as f:
        for row in DictReader(f):
            names[row['JP_Name']] = row['CN_Name']
    return names


def patch_script_auto(script_path: str, m3t_path: str, output_path: str, name_dict_path: str, dict_path: str):
    names = read_names(name_dict_path)
    term = extract_dict_terms(dict_path)
    if isdir(script_path):
        for file in listdir(script_path):
            if not file.lower().endswith(".scn"):
                continue
            full_path = join(script_path, file)
            m3t_fpath = splitext(basename(file))[0]
            if file == "dict.scn":
                m3t_fpath += ".csv"
            else:
                m3t_fpath += ".m3t"
            m3t_full_path = join(m3t_path, m3t_fpath)
            if not exists(m3t_full_path):
                continue
            output_full_path = join(output_path, basename(file))
            if file == "dict.scn":
                patch_dict(full_path, m3t_full_path, output_full_path)
            else:
                patch_script(full_path, m3t_full_path, output_full_path, names, term)
    else:
        pdir = dirname(output_path)
        if pdir and not isdir(pdir):
            makedirs(pdir, exist_ok=True)
        base_name = basename(script_path)
        if base_name == "dict.scn":
            patch_dict(script_path, m3t_path, output_path)
        else:
            patch_script(script_path, m3t_path, output_path, names, term)


if __name__ == "__main__":
    from argparse import ArgumentParser
    parser = ArgumentParser(description="Process KAG script files")
    subparser = parser.add_subparsers(title="Commands", dest="command")
    extract_parser = subparser.add_parser("extract", help="Extract script to JSON")
    extract_parser.add_argument("script_path", help="Path to KAG script file or directory")
    extract_parser.add_argument("output_path", help="Path to output JSON file or directory")
    patch_parser = subparser.add_parser("patch", help="Patch script")
    patch_parser.add_argument("script_path", help="Path to KAG script file or directory")
    patch_parser.add_argument("m3t_path", help="Path to m3t file or directory")
    patch_parser.add_argument("output_path", help="Path to output KAG script file or directory")
    patch_parser.add_argument("name_dict_path", help="Path to name dict")
    patch_parser.add_argument("dict_path", help="path to dict.csv")
    args = parser.parse_args()
    if args.command == "extract":
        extract_script_auto(args.script_path, args.output_path)
    elif args.command == "patch":
        patch_script_auto(args.script_path, args.m3t_path, args.output_path, args.name_dict_path, args.dict_path)