Files
GalScripts/iinkai.py
2026-04-28 17:26:17 +08:00

262 lines
10 KiB
Python

from KAGParser import *
from html import escape, unescape
import json
from os.path import isdir, join, basename, splitext, dirname, exists
from os import listdir, makedirs
from typing import List
from csv import DictReader, DictWriter
def extract_script(script_path: str, output_path: str):
with open(script_path, "r", encoding="utf-8") as f:
script_text = f.read()
parser = KAGScriptParser(script_text)
script = parser.parse(True)
name = None
message = ''
result = []
for line in script:
if isinstance(line, CommandNode):
cmd = line
if cmd.name == 'page':
d = {}
if name is not None:
d['name'] = name
d['message'] = message
message = ''
result.append(d)
name = None
elif cmd.name.startswith("") and cmd.name.endswith(""):
name = cmd.name[1:-1]
elif isinstance(line, list):
for node in line:
if isinstance(node, TextNode):
message += node.text.replace("&", "&amp;").replace("<", "&lt;")
elif isinstance(node, TagNode):
data = f"<{escape(node.name)}"
for k, v in node.attributes.items():
data += f' {escape(k)}="{escape(v)}"'
data += ">"
message += data
if name is not None or message:
d = {}
if name is not None:
d['name'] = name
d['message'] = message
result.append(d)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
def extract_dict(script_path: str, output_path: str):
with open(script_path, "r", encoding="utf-8") as f:
script_text = f.read()
in_dict = False
dict_data = '\n'
for line in script_text.splitlines():
if line == "var text = '":
in_dict = True
elif line == "';":
in_dict = False
elif in_dict:
dict_data += line + "\n"
script = KAGScriptParser(dict_data).parse(True)
dict = {}
label = None
term: List[str] = []
for line in script:
if isinstance(line, LabelNode):
label = line.name[1:]
elif isinstance(line, list):
if len(term) > 0:
term.append('\n')
for node in line:
if isinstance(node, TextNode):
term.append(node.text)
else:
raise ValueError("Unexpected node type in dict.scn", node)
elif isinstance(line, CommandNode):
if line.name == "return":
# 干掉名字
term.pop(0)
term.pop(0)
# 干掉傻逼片假名
if len(term) > 0 and term[0].startswith("") and term[0].endswith(""):
term.pop(0)
term.pop(0) # 去掉换行
dict[label] = ''.join(term)
term = []
label = None
with open(output_path, "w", encoding="utf-8-sig", newline="") as f:
writer = DictWriter(f, fieldnames=["term", "translation", "description"], lineterminator="\n")
writer.writeheader()
for k, v in dict.items():
writer.writerow({"term": k, "translation": "", "description": v})
def parse_message(message: str) -> List[ParsedLine]:
"""Parse a message string (HTML-escaped KAG tag format) back into ParsedLines.
Reverses the serialization in extract_script: <tagname key="val"> → TagNode,
HTML entities → TextNode text, splits on \\n.
"""
_TAG_RE = re.compile(r"(<[^>]+>)")
_ATTR_RE = re.compile(r"""([a-zA-Z0-9_]+)="([^"]*)"|([a-zA-Z0-9_]+)""", re.VERBOSE)
result: List[ParsedLine] = []
for line in message.split("\n"):
parsed_line: ParsedLine = []
parts = _TAG_RE.split(line)
for part in parts:
if not part:
continue
if part.startswith("<") and part.endswith(">"):
inner = part[1:-1].strip()
tag_parts = inner.split(maxsplit=1)
tag_name = unescape(tag_parts[0])
attributes: Dict[str, Any] = {}
if len(tag_parts) > 1:
for m in _ATTR_RE.finditer(tag_parts[1]):
if m.group(1) and m.group(2): # key="value"
key = unescape(m.group(1))
value = unescape(m.group(2))
attributes[key] = value
elif m.group(3): # boolean key
attributes[unescape(m.group(3))] = True
parsed_line.append(TagNode(name=tag_name, attributes=attributes))
else:
parsed_line.append(TextNode(unescape(part)))
if parsed_line:
result.append(parsed_line)
return result
def patch_script(script_path: str, m3t_path: str, output_path: str, names):
with open(script_path, "r", encoding="utf-8") as f:
script_text = f.read()
script = KAGScriptParser(script_text).parse(True)
new_script: ParsedScript = []
messages = []
name = None
with open(m3t_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line.startswith("○ NAME:"):
name = line[7:].strip()
elif line.startswith(""):
message = line[1:].strip()
d = {}
if name is not None:
d['name'] = name
d['message'] = message.replace('\\n', '\n')
messages.append(d)
name = None
i = 0
for line in script:
if isinstance(line, CommandNode):
if line.name == "page":
message = messages[i]['message']
i += 1
new_script.extend(parse_message(message))
elif line.name.startswith("") and line.name.endswith(""):
name = line.name[1:-1]
if name in names:
line.name = f"{names[name]}"
elif isinstance(line, list):
continue
new_script.append(line)
if i != len(messages):
print(f"WARNING: processed message not matched. expected {len(message)}, actual {i}")
script_data = KAGScriptParser.serialize(new_script)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(script_data)
def extract_script_auto(script_path: str, output_path: str):
if isdir(script_path):
for file in listdir(script_path):
if not file.lower().endswith(".scn"):
continue
full_path = join(script_path, file)
output_file = splitext(basename(file))[0]
if file == "dict.scn":
output_file += ".csv"
else:
output_file += ".json"
output_full_path = join(output_path, output_file)
pdir = dirname(output_full_path)
if pdir and not isdir(pdir):
makedirs(pdir, exist_ok=True)
if file == "dict.scn":
extract_dict(full_path, output_full_path)
else:
extract_script(full_path, output_full_path)
else:
pdir = dirname(output_path)
if pdir and not isdir(pdir):
makedirs(pdir, exist_ok=True)
base_name = basename(script_path)
if base_name == "dict.scn":
extract_dict(script_path, output_path)
else:
extract_script(script_path, output_path)
def read_names(name_dict_path: str):
names = {}
with open(name_dict_path, 'r', encoding='utf-8-sig') as f:
for row in DictReader(f):
names[row['JP_Name']] = row['CN_Name']
return names
def patch_script_auto(script_path: str, m3t_path: str, output_path: str, name_dict_path: str):
names = read_names(name_dict_path)
if isdir(script_path):
for file in listdir(script_path):
if not file.lower().endswith(".scn"):
continue
full_path = join(script_path, file)
m3t_fpath = splitext(basename(file))[0]
if file == "dict.scn":
m3t_fpath += ".csv"
else:
m3t_fpath += ".m3t"
m3t_full_path = join(m3t_path, m3t_fpath)
if not exists(m3t_full_path):
continue
output_full_path = join(output_path, basename(file))
if file == "dict.scn":
pass
else:
patch_script(full_path, m3t_full_path, output_full_path, names)
else:
pdir = dirname(output_path)
if pdir and not isdir(pdir):
makedirs(pdir, exist_ok=True)
base_name = basename(script_path)
if base_name == "dict.scn":
pass
else:
patch_script(script_path, m3t_path, output_path, names)
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser(description="Process KAG script files")
subparser = parser.add_subparsers(title="Commands", dest="command")
extract_parser = subparser.add_parser("extract", help="Extract script to JSON")
extract_parser.add_argument("script_path", help="Path to KAG script file or directory")
extract_parser.add_argument("output_path", help="Path to output JSON file or directory")
patch_parser = subparser.add_parser("patch", help="Patch script")
patch_parser.add_argument("script_path", help="Path to KAG script file or directory")
patch_parser.add_argument("m3t_path", help="Path to m3t file or directory")
patch_parser.add_argument("output_path", help="Path to output KAG script file or directory")
patch_parser.add_argument("name_dict_path", help="Path to name dict")
args = parser.parse_args()
if args.command == "extract":
extract_script_auto(args.script_path, args.output_path)
elif args.command == "patch":
patch_script_auto(args.script_path, args.m3t_path, args.output_path, args.name_dict_path)