347 lines
14 KiB
Python
347 lines
14 KiB
Python
from KAGParser import *
|
|
from html import escape, unescape
|
|
import json
|
|
from os.path import isdir, join, basename, splitext, dirname, exists
|
|
from os import listdir, makedirs
|
|
from typing import List
|
|
from csv import DictReader, DictWriter
|
|
|
|
|
|
def extract_script(script_path: str, output_path: str):
|
|
with open(script_path, "r", encoding="utf-8") as f:
|
|
script_text = f.read()
|
|
parser = KAGScriptParser(script_text)
|
|
script = parser.parse(True)
|
|
name = None
|
|
message = ''
|
|
result = []
|
|
for line in script:
|
|
if isinstance(line, CommandNode):
|
|
cmd = line
|
|
if cmd.name == 'page':
|
|
d = {}
|
|
if name is not None:
|
|
d['name'] = name
|
|
d['message'] = message
|
|
message = ''
|
|
result.append(d)
|
|
name = None
|
|
elif cmd.name.startswith("【") and cmd.name.endswith("】"):
|
|
name = cmd.name[1:-1]
|
|
elif isinstance(line, list):
|
|
for node in line:
|
|
if isinstance(node, TextNode):
|
|
message += node.text.replace("&", "&").replace("<", "<")
|
|
elif isinstance(node, TagNode):
|
|
data = f"<{escape(node.name)}"
|
|
for k, v in node.attributes.items():
|
|
data += f' {escape(k)}="{escape(v)}"'
|
|
data += ">"
|
|
message += data
|
|
if name is not None or message:
|
|
d = {}
|
|
if name is not None:
|
|
d['name'] = name
|
|
d['message'] = message
|
|
result.append(d)
|
|
with open(output_path, "w", encoding="utf-8") as f:
|
|
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
|
|
|
|
def extract_dict_terms(script_path: str):
|
|
terms = {}
|
|
overrides = {}
|
|
with open(script_path, "r", encoding="utf-8-sig") as f:
|
|
for row in DictReader(f):
|
|
term = row['term']
|
|
terms[term] = row
|
|
if ' ' in term:
|
|
overrides[term.split(' ')[0]] = term
|
|
return terms, overrides
|
|
|
|
|
|
def extract_dict(script_path: str, output_path: str):
|
|
with open(script_path, "r", encoding="utf-8") as f:
|
|
script_text = f.read()
|
|
in_dict = False
|
|
dict_data = '\n'
|
|
for line in script_text.splitlines():
|
|
if line == "var text = '":
|
|
in_dict = True
|
|
elif line == "';":
|
|
in_dict = False
|
|
elif in_dict:
|
|
dict_data += line + "\n"
|
|
script = KAGScriptParser(dict_data).parse(True)
|
|
dict = {}
|
|
label = None
|
|
term: List[str] = []
|
|
for line in script:
|
|
if isinstance(line, LabelNode):
|
|
label = line.name[1:]
|
|
elif isinstance(line, list):
|
|
if len(term) > 0:
|
|
term.append('\n')
|
|
for node in line:
|
|
if isinstance(node, TextNode):
|
|
term.append(node.text)
|
|
else:
|
|
raise ValueError("Unexpected node type in dict.scn", node)
|
|
elif isinstance(line, CommandNode):
|
|
if line.name == "return":
|
|
# 干掉名字
|
|
term.pop(0)
|
|
term.pop(0)
|
|
# 干掉傻逼片假名
|
|
if len(term) > 0 and term[0].startswith("【") and term[0].endswith("】"):
|
|
term.pop(0)
|
|
term.pop(0) # 去掉换行
|
|
dict[label] = ''.join(term)
|
|
term = []
|
|
label = None
|
|
with open(output_path, "w", encoding="utf-8-sig", newline="") as f:
|
|
writer = DictWriter(f, fieldnames=["term", "translation", "description"], lineterminator="\n")
|
|
writer.writeheader()
|
|
for k, v in dict.items():
|
|
writer.writerow({"term": k, "translation": "", "description": v})
|
|
|
|
|
|
def parse_message(message: str) -> List[ParsedLine]:
|
|
"""Parse a message string (HTML-escaped KAG tag format) back into ParsedLines.
|
|
|
|
Reverses the serialization in extract_script: <tagname key="val"> → TagNode,
|
|
HTML entities → TextNode text, splits on \\n.
|
|
"""
|
|
|
|
_TAG_RE = re.compile(r"(<[^>]+>)")
|
|
_ATTR_RE = re.compile(r"""([a-zA-Z0-9_]+)="([^"]*)"|([a-zA-Z0-9_]+)""", re.VERBOSE)
|
|
|
|
result: List[ParsedLine] = []
|
|
for line in message.split("\n"):
|
|
parsed_line: ParsedLine = []
|
|
parts = _TAG_RE.split(line)
|
|
for part in parts:
|
|
if not part:
|
|
continue
|
|
if part.startswith("<") and part.endswith(">"):
|
|
inner = part[1:-1].strip()
|
|
tag_parts = inner.split(maxsplit=1)
|
|
tag_name = unescape(tag_parts[0])
|
|
attributes: Dict[str, Any] = {}
|
|
if len(tag_parts) > 1:
|
|
for m in _ATTR_RE.finditer(tag_parts[1]):
|
|
if m.group(1) and m.group(2): # key="value"
|
|
key = unescape(m.group(1))
|
|
value = unescape(m.group(2))
|
|
attributes[key] = value
|
|
elif m.group(3): # boolean key
|
|
attributes[unescape(m.group(3))] = True
|
|
parsed_line.append(TagNode(name=tag_name, attributes=attributes))
|
|
else:
|
|
parsed_line.append(TextNode(unescape(part)))
|
|
if parsed_line:
|
|
result.append(parsed_line)
|
|
|
|
return result
|
|
|
|
|
|
def patch_dict(script_path: str, dict_path: str, output_path: str):
|
|
with open(script_path, "r", encoding="utf-8") as f:
|
|
script_text = f.read()
|
|
in_dict = False
|
|
dict_data = '\n'
|
|
start_line = None
|
|
end_line = None
|
|
for (i, line) in enumerate(script_text.splitlines()):
|
|
if line == "var text = '":
|
|
in_dict = True
|
|
start_line = i + 1
|
|
elif line == "';":
|
|
in_dict = False
|
|
end_line = i - 1
|
|
elif in_dict:
|
|
dict_data += line + "\n"
|
|
script = KAGScriptParser(dict_data).parse(True)
|
|
dicts = {}
|
|
with open(dict_path, "r", encoding="utf-8-sig") as f:
|
|
for row in DictReader(f):
|
|
dicts[row['term']] = row
|
|
new_script = []
|
|
term = None
|
|
for line in script:
|
|
if isinstance(line, LabelNode):
|
|
term = line.name[1:]
|
|
new_script.append(LabelNode('.' + dicts[term]['translation']))
|
|
elif isinstance(line, CommentNode):
|
|
new_script.append([TextNode(f";{line.text}")])
|
|
elif isinstance(line, EmptyLineNode):
|
|
pass
|
|
elif isinstance(line, list):
|
|
pass
|
|
elif isinstance(line, CommandNode):
|
|
if line.name == "return":
|
|
data = dicts[term]
|
|
new_script.append([TextNode(data['translation'])])
|
|
desc: str = data['description']
|
|
for d in desc.splitlines():
|
|
new_script.append([TextNode(d)])
|
|
new_script.append(EmptyLineNode())
|
|
new_script.append(line)
|
|
else:
|
|
raise ValueError("Unsupported command", line.name)
|
|
dict_data = KAGScriptParser.serialize(new_script)
|
|
origin_lines = script_text.splitlines(True)
|
|
with open(output_path, 'w', encoding='UTF-8') as f:
|
|
f.writelines(origin_lines[:start_line])
|
|
f.write(dict_data)
|
|
f.write("\n")
|
|
f.writelines(origin_lines[end_line + 1:])
|
|
|
|
|
|
def patch_script(script_path: str, m3t_path: str, output_path: str, names, term):
|
|
with open(script_path, "r", encoding="utf-8") as f:
|
|
script_text = f.read()
|
|
script = KAGScriptParser(script_text).parse(True)
|
|
new_script: ParsedScript = []
|
|
messages = []
|
|
name = None
|
|
terms, overrides = term
|
|
with open(m3t_path, "r", encoding="utf-8") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line.startswith("○ NAME:"):
|
|
name = line[7:].strip()
|
|
elif line.startswith("●"):
|
|
message = line[1:].strip()
|
|
d = {}
|
|
if name is not None:
|
|
d['name'] = name
|
|
d['message'] = message.replace('\\n', '\n')
|
|
if name:
|
|
if not d['message'].startswith('「'):
|
|
d['message'] = '「' + d['message']
|
|
if not d['message'].endswith('」'):
|
|
d['message'] += '」'
|
|
messages.append(d)
|
|
name = None
|
|
i = 0
|
|
for line in script:
|
|
if isinstance(line, CommandNode):
|
|
if line.name == "page":
|
|
message = messages[i]['message']
|
|
i += 1
|
|
nws = parse_message(message)
|
|
for nw in nws:
|
|
for node in nw:
|
|
if isinstance(node, TagNode):
|
|
if node.name == 'wd':
|
|
target = node.attributes['s']
|
|
if target not in terms:
|
|
if target not in overrides:
|
|
print(message)
|
|
raise ValueError('unknown wd target', target)
|
|
node.attributes['s'] = overrides[target]
|
|
node.attributes['s'] = terms[node.attributes['s']]['translation']
|
|
new_script.extend(nws)
|
|
elif line.name.startswith("【") and line.name.endswith("】"):
|
|
name = line.name[1:-1]
|
|
if name in names:
|
|
line.name = f"【{names[name]}】"
|
|
elif isinstance(line, list):
|
|
continue
|
|
new_script.append(line)
|
|
if i != len(messages):
|
|
print(f"WARNING: processed message not matched. expected {len(messages)}, actual {i}, {script_path}")
|
|
script_data = KAGScriptParser.serialize(new_script)
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
f.write(script_data)
|
|
|
|
def extract_script_auto(script_path: str, output_path: str):
|
|
if isdir(script_path):
|
|
for file in listdir(script_path):
|
|
if not file.lower().endswith(".scn"):
|
|
continue
|
|
full_path = join(script_path, file)
|
|
output_file = splitext(basename(file))[0]
|
|
if file == "dict.scn":
|
|
output_file += ".csv"
|
|
else:
|
|
output_file += ".json"
|
|
output_full_path = join(output_path, output_file)
|
|
pdir = dirname(output_full_path)
|
|
if pdir and not isdir(pdir):
|
|
makedirs(pdir, exist_ok=True)
|
|
if file == "dict.scn":
|
|
extract_dict(full_path, output_full_path)
|
|
else:
|
|
extract_script(full_path, output_full_path)
|
|
else:
|
|
pdir = dirname(output_path)
|
|
if pdir and not isdir(pdir):
|
|
makedirs(pdir, exist_ok=True)
|
|
base_name = basename(script_path)
|
|
if base_name == "dict.scn":
|
|
extract_dict(script_path, output_path)
|
|
else:
|
|
extract_script(script_path, output_path)
|
|
|
|
|
|
def read_names(name_dict_path: str):
|
|
names = {}
|
|
with open(name_dict_path, 'r', encoding='utf-8-sig') as f:
|
|
for row in DictReader(f):
|
|
names[row['JP_Name']] = row['CN_Name']
|
|
return names
|
|
|
|
|
|
def patch_script_auto(script_path: str, m3t_path: str, output_path: str, name_dict_path: str, dict_path: str):
|
|
names = read_names(name_dict_path)
|
|
term = extract_dict_terms(dict_path)
|
|
if isdir(script_path):
|
|
for file in listdir(script_path):
|
|
if not file.lower().endswith(".scn"):
|
|
continue
|
|
full_path = join(script_path, file)
|
|
m3t_fpath = splitext(basename(file))[0]
|
|
if file == "dict.scn":
|
|
m3t_fpath += ".csv"
|
|
else:
|
|
m3t_fpath += ".m3t"
|
|
m3t_full_path = join(m3t_path, m3t_fpath)
|
|
if not exists(m3t_full_path):
|
|
continue
|
|
output_full_path = join(output_path, basename(file))
|
|
if file == "dict.scn":
|
|
patch_dict(full_path, m3t_full_path, output_full_path)
|
|
else:
|
|
patch_script(full_path, m3t_full_path, output_full_path, names, term)
|
|
else:
|
|
pdir = dirname(output_path)
|
|
if pdir and not isdir(pdir):
|
|
makedirs(pdir, exist_ok=True)
|
|
base_name = basename(script_path)
|
|
if base_name == "dict.scn":
|
|
patch_dict(script_path, m3t_path, output_path)
|
|
else:
|
|
patch_script(script_path, m3t_path, output_path, names, term)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from argparse import ArgumentParser
|
|
parser = ArgumentParser(description="Process KAG script files")
|
|
subparser = parser.add_subparsers(title="Commands", dest="command")
|
|
extract_parser = subparser.add_parser("extract", help="Extract script to JSON")
|
|
extract_parser.add_argument("script_path", help="Path to KAG script file or directory")
|
|
extract_parser.add_argument("output_path", help="Path to output JSON file or directory")
|
|
patch_parser = subparser.add_parser("patch", help="Patch script")
|
|
patch_parser.add_argument("script_path", help="Path to KAG script file or directory")
|
|
patch_parser.add_argument("m3t_path", help="Path to m3t file or directory")
|
|
patch_parser.add_argument("output_path", help="Path to output KAG script file or directory")
|
|
patch_parser.add_argument("name_dict_path", help="Path to name dict")
|
|
patch_parser.add_argument("dict_path", help="path to dict.csv")
|
|
args = parser.parse_args()
|
|
if args.command == "extract":
|
|
extract_script_auto(args.script_path, args.output_path)
|
|
elif args.command == "patch":
|
|
patch_script_auto(args.script_path, args.m3t_path, args.output_path, args.name_dict_path, args.dict_path)
|