Add patch support for iinkai
This commit is contained in:
132
iinkai.py
132
iinkai.py
@@ -1,10 +1,10 @@
|
||||
from KAGParser import *
|
||||
from html import escape
|
||||
from html import escape, unescape
|
||||
import json
|
||||
from os.path import isdir, join, basename, splitext, dirname
|
||||
from os.path import isdir, join, basename, splitext, dirname, exists
|
||||
from os import listdir, makedirs
|
||||
from typing import List
|
||||
from csv import DictWriter
|
||||
from csv import DictReader, DictWriter
|
||||
|
||||
|
||||
def extract_script(script_path: str, output_path: str):
|
||||
@@ -93,6 +93,86 @@ def extract_dict(script_path: str, output_path: str):
|
||||
for k, v in dict.items():
|
||||
writer.writerow({"term": k, "translation": "", "description": v})
|
||||
|
||||
|
||||
def parse_message(message: str) -> List[ParsedLine]:
|
||||
"""Parse a message string (HTML-escaped KAG tag format) back into ParsedLines.
|
||||
|
||||
Reverses the serialization in extract_script: <tagname key="val"> → TagNode,
|
||||
HTML entities → TextNode text, splits on \\n.
|
||||
"""
|
||||
|
||||
_TAG_RE = re.compile(r"(<[^>]+>)")
|
||||
_ATTR_RE = re.compile(r"""([a-zA-Z0-9_]+)="([^"]*)"|([a-zA-Z0-9_]+)""", re.VERBOSE)
|
||||
|
||||
result: List[ParsedLine] = []
|
||||
for line in message.split("\n"):
|
||||
parsed_line: ParsedLine = []
|
||||
parts = _TAG_RE.split(line)
|
||||
for part in parts:
|
||||
if not part:
|
||||
continue
|
||||
if part.startswith("<") and part.endswith(">"):
|
||||
inner = part[1:-1].strip()
|
||||
tag_parts = inner.split(maxsplit=1)
|
||||
tag_name = unescape(tag_parts[0])
|
||||
attributes: Dict[str, Any] = {}
|
||||
if len(tag_parts) > 1:
|
||||
for m in _ATTR_RE.finditer(tag_parts[1]):
|
||||
if m.group(1) and m.group(2): # key="value"
|
||||
key = unescape(m.group(1))
|
||||
value = unescape(m.group(2))
|
||||
attributes[key] = value
|
||||
elif m.group(3): # boolean key
|
||||
attributes[unescape(m.group(3))] = True
|
||||
parsed_line.append(TagNode(name=tag_name, attributes=attributes))
|
||||
else:
|
||||
parsed_line.append(TextNode(unescape(part)))
|
||||
if parsed_line:
|
||||
result.append(parsed_line)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def patch_script(script_path: str, m3t_path: str, output_path: str, names):
|
||||
with open(script_path, "r", encoding="utf-8") as f:
|
||||
script_text = f.read()
|
||||
script = KAGScriptParser(script_text).parse(True)
|
||||
new_script: ParsedScript = []
|
||||
messages = []
|
||||
name = None
|
||||
with open(m3t_path, "r", encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line.startswith("○ NAME:"):
|
||||
name = line[7:].strip()
|
||||
elif line.startswith("●"):
|
||||
message = line[1:].strip()
|
||||
d = {}
|
||||
if name is not None:
|
||||
d['name'] = name
|
||||
d['message'] = message.replace('\\n', '\n')
|
||||
messages.append(d)
|
||||
name = None
|
||||
i = 0
|
||||
for line in script:
|
||||
if isinstance(line, CommandNode):
|
||||
if line.name == "page":
|
||||
message = messages[i]['message']
|
||||
i += 1
|
||||
new_script.extend(parse_message(message))
|
||||
elif line.name.startswith("【") and line.name.endswith("】"):
|
||||
name = line.name[1:-1]
|
||||
if name in names:
|
||||
line.name = f"【{names[name]}】"
|
||||
elif isinstance(line, list):
|
||||
continue
|
||||
new_script.append(line)
|
||||
if i != len(messages):
|
||||
print(f"WARNING: processed message not matched. expected {len(message)}, actual {i}")
|
||||
script_data = KAGScriptParser.serialize(new_script)
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
f.write(script_data)
|
||||
|
||||
def extract_script_auto(script_path: str, output_path: str):
|
||||
if isdir(script_path):
|
||||
for file in listdir(script_path):
|
||||
@@ -123,6 +203,45 @@ def extract_script_auto(script_path: str, output_path: str):
|
||||
extract_script(script_path, output_path)
|
||||
|
||||
|
||||
def read_names(name_dict_path: str):
|
||||
names = {}
|
||||
with open(name_dict_path, 'r', encoding='utf-8-sig') as f:
|
||||
for row in DictReader(f):
|
||||
names[row['JP_Name']] = row['CN_Name']
|
||||
return names
|
||||
|
||||
|
||||
def patch_script_auto(script_path: str, m3t_path: str, output_path: str, name_dict_path: str):
|
||||
names = read_names(name_dict_path)
|
||||
if isdir(script_path):
|
||||
for file in listdir(script_path):
|
||||
if not file.lower().endswith(".scn"):
|
||||
continue
|
||||
full_path = join(script_path, file)
|
||||
m3t_fpath = splitext(basename(file))[0]
|
||||
if file == "dict.scn":
|
||||
m3t_fpath += ".csv"
|
||||
else:
|
||||
m3t_fpath += ".m3t"
|
||||
m3t_full_path = join(m3t_path, m3t_fpath)
|
||||
if not exists(m3t_full_path):
|
||||
continue
|
||||
output_full_path = join(output_path, basename(file))
|
||||
if file == "dict.scn":
|
||||
pass
|
||||
else:
|
||||
patch_script(full_path, m3t_full_path, output_full_path, names)
|
||||
else:
|
||||
pdir = dirname(output_path)
|
||||
if pdir and not isdir(pdir):
|
||||
makedirs(pdir, exist_ok=True)
|
||||
base_name = basename(script_path)
|
||||
if base_name == "dict.scn":
|
||||
pass
|
||||
else:
|
||||
patch_script(script_path, m3t_path, output_path, names)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from argparse import ArgumentParser
|
||||
parser = ArgumentParser(description="Process KAG script files")
|
||||
@@ -130,6 +249,13 @@ if __name__ == "__main__":
|
||||
extract_parser = subparser.add_parser("extract", help="Extract script to JSON")
|
||||
extract_parser.add_argument("script_path", help="Path to KAG script file or directory")
|
||||
extract_parser.add_argument("output_path", help="Path to output JSON file or directory")
|
||||
patch_parser = subparser.add_parser("patch", help="Patch script")
|
||||
patch_parser.add_argument("script_path", help="Path to KAG script file or directory")
|
||||
patch_parser.add_argument("m3t_path", help="Path to m3t file or directory")
|
||||
patch_parser.add_argument("output_path", help="Path to output KAG script file or directory")
|
||||
patch_parser.add_argument("name_dict_path", help="Path to name dict")
|
||||
args = parser.parse_args()
|
||||
if args.command == "extract":
|
||||
extract_script_auto(args.script_path, args.output_path)
|
||||
elif args.command == "patch":
|
||||
patch_script_auto(args.script_path, args.m3t_path, args.output_path, args.name_dict_path)
|
||||
|
||||
Reference in New Issue
Block a user