Files
GalScripts/iinkai.py
2026-04-28 21:02:24 +08:00

347 lines
14 KiB
Python

from KAGParser import *
from html import escape, unescape
import json
from os.path import isdir, join, basename, splitext, dirname, exists
from os import listdir, makedirs
from typing import List
from csv import DictReader, DictWriter
def extract_script(script_path: str, output_path: str):
with open(script_path, "r", encoding="utf-8") as f:
script_text = f.read()
parser = KAGScriptParser(script_text)
script = parser.parse(True)
name = None
message = ''
result = []
for line in script:
if isinstance(line, CommandNode):
cmd = line
if cmd.name == 'page':
d = {}
if name is not None:
d['name'] = name
d['message'] = message
message = ''
result.append(d)
name = None
elif cmd.name.startswith("") and cmd.name.endswith(""):
name = cmd.name[1:-1]
elif isinstance(line, list):
for node in line:
if isinstance(node, TextNode):
message += node.text.replace("&", "&amp;").replace("<", "&lt;")
elif isinstance(node, TagNode):
data = f"<{escape(node.name)}"
for k, v in node.attributes.items():
data += f' {escape(k)}="{escape(v)}"'
data += ">"
message += data
if name is not None or message:
d = {}
if name is not None:
d['name'] = name
d['message'] = message
result.append(d)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
def extract_dict_terms(script_path: str):
terms = {}
overrides = {}
with open(script_path, "r", encoding="utf-8-sig") as f:
for row in DictReader(f):
term = row['term']
terms[term] = row
if ' ' in term:
overrides[term.split(' ')[0]] = term
return terms, overrides
def extract_dict(script_path: str, output_path: str):
with open(script_path, "r", encoding="utf-8") as f:
script_text = f.read()
in_dict = False
dict_data = '\n'
for line in script_text.splitlines():
if line == "var text = '":
in_dict = True
elif line == "';":
in_dict = False
elif in_dict:
dict_data += line + "\n"
script = KAGScriptParser(dict_data).parse(True)
dict = {}
label = None
term: List[str] = []
for line in script:
if isinstance(line, LabelNode):
label = line.name[1:]
elif isinstance(line, list):
if len(term) > 0:
term.append('\n')
for node in line:
if isinstance(node, TextNode):
term.append(node.text)
else:
raise ValueError("Unexpected node type in dict.scn", node)
elif isinstance(line, CommandNode):
if line.name == "return":
# 干掉名字
term.pop(0)
term.pop(0)
# 干掉傻逼片假名
if len(term) > 0 and term[0].startswith("") and term[0].endswith(""):
term.pop(0)
term.pop(0) # 去掉换行
dict[label] = ''.join(term)
term = []
label = None
with open(output_path, "w", encoding="utf-8-sig", newline="") as f:
writer = DictWriter(f, fieldnames=["term", "translation", "description"], lineterminator="\n")
writer.writeheader()
for k, v in dict.items():
writer.writerow({"term": k, "translation": "", "description": v})
def parse_message(message: str) -> List[ParsedLine]:
"""Parse a message string (HTML-escaped KAG tag format) back into ParsedLines.
Reverses the serialization in extract_script: <tagname key="val"> → TagNode,
HTML entities → TextNode text, splits on \\n.
"""
_TAG_RE = re.compile(r"(<[^>]+>)")
_ATTR_RE = re.compile(r"""([a-zA-Z0-9_]+)="([^"]*)"|([a-zA-Z0-9_]+)""", re.VERBOSE)
result: List[ParsedLine] = []
for line in message.split("\n"):
parsed_line: ParsedLine = []
parts = _TAG_RE.split(line)
for part in parts:
if not part:
continue
if part.startswith("<") and part.endswith(">"):
inner = part[1:-1].strip()
tag_parts = inner.split(maxsplit=1)
tag_name = unescape(tag_parts[0])
attributes: Dict[str, Any] = {}
if len(tag_parts) > 1:
for m in _ATTR_RE.finditer(tag_parts[1]):
if m.group(1) and m.group(2): # key="value"
key = unescape(m.group(1))
value = unescape(m.group(2))
attributes[key] = value
elif m.group(3): # boolean key
attributes[unescape(m.group(3))] = True
parsed_line.append(TagNode(name=tag_name, attributes=attributes))
else:
parsed_line.append(TextNode(unescape(part)))
if parsed_line:
result.append(parsed_line)
return result
def patch_dict(script_path: str, dict_path: str, output_path: str):
with open(script_path, "r", encoding="utf-8") as f:
script_text = f.read()
in_dict = False
dict_data = '\n'
start_line = None
end_line = None
for (i, line) in enumerate(script_text.splitlines()):
if line == "var text = '":
in_dict = True
start_line = i + 1
elif line == "';":
in_dict = False
end_line = i - 1
elif in_dict:
dict_data += line + "\n"
script = KAGScriptParser(dict_data).parse(True)
dicts = {}
with open(dict_path, "r", encoding="utf-8-sig") as f:
for row in DictReader(f):
dicts[row['term']] = row
new_script = []
term = None
for line in script:
if isinstance(line, LabelNode):
term = line.name[1:]
new_script.append(LabelNode('.' + dicts[term]['translation']))
elif isinstance(line, CommentNode):
new_script.append([TextNode(f";{line.text}")])
elif isinstance(line, EmptyLineNode):
pass
elif isinstance(line, list):
pass
elif isinstance(line, CommandNode):
if line.name == "return":
data = dicts[term]
new_script.append([TextNode(data['translation'])])
desc: str = data['description']
for d in desc.splitlines():
new_script.append([TextNode(d)])
new_script.append(EmptyLineNode())
new_script.append(line)
else:
raise ValueError("Unsupported command", line.name)
dict_data = KAGScriptParser.serialize(new_script)
origin_lines = script_text.splitlines(True)
with open(output_path, 'w', encoding='UTF-8') as f:
f.writelines(origin_lines[:start_line])
f.write(dict_data)
f.write("\n")
f.writelines(origin_lines[end_line + 1:])
def patch_script(script_path: str, m3t_path: str, output_path: str, names, term):
with open(script_path, "r", encoding="utf-8") as f:
script_text = f.read()
script = KAGScriptParser(script_text).parse(True)
new_script: ParsedScript = []
messages = []
name = None
terms, overrides = term
with open(m3t_path, "r", encoding="utf-8") as f:
for line in f:
line = line.strip()
if line.startswith("○ NAME:"):
name = line[7:].strip()
elif line.startswith(""):
message = line[1:].strip()
d = {}
if name is not None:
d['name'] = name
d['message'] = message.replace('\\n', '\n')
if name:
if not d['message'].startswith(''):
d['message'] = '' + d['message']
if not d['message'].endswith(''):
d['message'] += ''
messages.append(d)
name = None
i = 0
for line in script:
if isinstance(line, CommandNode):
if line.name == "page":
message = messages[i]['message']
i += 1
nws = parse_message(message)
for nw in nws:
for node in nw:
if isinstance(node, TagNode):
if node.name == 'wd':
target = node.attributes['s']
if target not in terms:
if target not in overrides:
print(message)
raise ValueError('unknown wd target', target)
node.attributes['s'] = overrides[target]
node.attributes['s'] = terms[node.attributes['s']]['translation']
new_script.extend(nws)
elif line.name.startswith("") and line.name.endswith(""):
name = line.name[1:-1]
if name in names:
line.name = f"{names[name]}"
elif isinstance(line, list):
continue
new_script.append(line)
if i != len(messages):
print(f"WARNING: processed message not matched. expected {len(messages)}, actual {i}, {script_path}")
script_data = KAGScriptParser.serialize(new_script)
with open(output_path, 'w', encoding='utf-8') as f:
f.write(script_data)
def extract_script_auto(script_path: str, output_path: str):
if isdir(script_path):
for file in listdir(script_path):
if not file.lower().endswith(".scn"):
continue
full_path = join(script_path, file)
output_file = splitext(basename(file))[0]
if file == "dict.scn":
output_file += ".csv"
else:
output_file += ".json"
output_full_path = join(output_path, output_file)
pdir = dirname(output_full_path)
if pdir and not isdir(pdir):
makedirs(pdir, exist_ok=True)
if file == "dict.scn":
extract_dict(full_path, output_full_path)
else:
extract_script(full_path, output_full_path)
else:
pdir = dirname(output_path)
if pdir and not isdir(pdir):
makedirs(pdir, exist_ok=True)
base_name = basename(script_path)
if base_name == "dict.scn":
extract_dict(script_path, output_path)
else:
extract_script(script_path, output_path)
def read_names(name_dict_path: str):
names = {}
with open(name_dict_path, 'r', encoding='utf-8-sig') as f:
for row in DictReader(f):
names[row['JP_Name']] = row['CN_Name']
return names
def patch_script_auto(script_path: str, m3t_path: str, output_path: str, name_dict_path: str, dict_path: str):
names = read_names(name_dict_path)
term = extract_dict_terms(dict_path)
if isdir(script_path):
for file in listdir(script_path):
if not file.lower().endswith(".scn"):
continue
full_path = join(script_path, file)
m3t_fpath = splitext(basename(file))[0]
if file == "dict.scn":
m3t_fpath += ".csv"
else:
m3t_fpath += ".m3t"
m3t_full_path = join(m3t_path, m3t_fpath)
if not exists(m3t_full_path):
continue
output_full_path = join(output_path, basename(file))
if file == "dict.scn":
patch_dict(full_path, m3t_full_path, output_full_path)
else:
patch_script(full_path, m3t_full_path, output_full_path, names, term)
else:
pdir = dirname(output_path)
if pdir and not isdir(pdir):
makedirs(pdir, exist_ok=True)
base_name = basename(script_path)
if base_name == "dict.scn":
patch_dict(script_path, m3t_path, output_path)
else:
patch_script(script_path, m3t_path, output_path, names, term)
if __name__ == "__main__":
from argparse import ArgumentParser
parser = ArgumentParser(description="Process KAG script files")
subparser = parser.add_subparsers(title="Commands", dest="command")
extract_parser = subparser.add_parser("extract", help="Extract script to JSON")
extract_parser.add_argument("script_path", help="Path to KAG script file or directory")
extract_parser.add_argument("output_path", help="Path to output JSON file or directory")
patch_parser = subparser.add_parser("patch", help="Patch script")
patch_parser.add_argument("script_path", help="Path to KAG script file or directory")
patch_parser.add_argument("m3t_path", help="Path to m3t file or directory")
patch_parser.add_argument("output_path", help="Path to output KAG script file or directory")
patch_parser.add_argument("name_dict_path", help="Path to name dict")
patch_parser.add_argument("dict_path", help="path to dict.csv")
args = parser.parse_args()
if args.command == "extract":
extract_script_auto(args.script_path, args.output_path)
elif args.command == "patch":
patch_script_auto(args.script_path, args.m3t_path, args.output_path, args.name_dict_path, args.dict_path)