Add patch support for iinkai
This commit is contained in:
132
iinkai.py
132
iinkai.py
@@ -1,10 +1,10 @@
|
|||||||
from KAGParser import *
|
from KAGParser import *
|
||||||
from html import escape
|
from html import escape, unescape
|
||||||
import json
|
import json
|
||||||
from os.path import isdir, join, basename, splitext, dirname
|
from os.path import isdir, join, basename, splitext, dirname, exists
|
||||||
from os import listdir, makedirs
|
from os import listdir, makedirs
|
||||||
from typing import List
|
from typing import List
|
||||||
from csv import DictWriter
|
from csv import DictReader, DictWriter
|
||||||
|
|
||||||
|
|
||||||
def extract_script(script_path: str, output_path: str):
|
def extract_script(script_path: str, output_path: str):
|
||||||
@@ -93,6 +93,86 @@ def extract_dict(script_path: str, output_path: str):
|
|||||||
for k, v in dict.items():
|
for k, v in dict.items():
|
||||||
writer.writerow({"term": k, "translation": "", "description": v})
|
writer.writerow({"term": k, "translation": "", "description": v})
|
||||||
|
|
||||||
|
|
||||||
|
def parse_message(message: str) -> List[ParsedLine]:
|
||||||
|
"""Parse a message string (HTML-escaped KAG tag format) back into ParsedLines.
|
||||||
|
|
||||||
|
Reverses the serialization in extract_script: <tagname key="val"> → TagNode,
|
||||||
|
HTML entities → TextNode text, splits on \\n.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_TAG_RE = re.compile(r"(<[^>]+>)")
|
||||||
|
_ATTR_RE = re.compile(r"""([a-zA-Z0-9_]+)="([^"]*)"|([a-zA-Z0-9_]+)""", re.VERBOSE)
|
||||||
|
|
||||||
|
result: List[ParsedLine] = []
|
||||||
|
for line in message.split("\n"):
|
||||||
|
parsed_line: ParsedLine = []
|
||||||
|
parts = _TAG_RE.split(line)
|
||||||
|
for part in parts:
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
if part.startswith("<") and part.endswith(">"):
|
||||||
|
inner = part[1:-1].strip()
|
||||||
|
tag_parts = inner.split(maxsplit=1)
|
||||||
|
tag_name = unescape(tag_parts[0])
|
||||||
|
attributes: Dict[str, Any] = {}
|
||||||
|
if len(tag_parts) > 1:
|
||||||
|
for m in _ATTR_RE.finditer(tag_parts[1]):
|
||||||
|
if m.group(1) and m.group(2): # key="value"
|
||||||
|
key = unescape(m.group(1))
|
||||||
|
value = unescape(m.group(2))
|
||||||
|
attributes[key] = value
|
||||||
|
elif m.group(3): # boolean key
|
||||||
|
attributes[unescape(m.group(3))] = True
|
||||||
|
parsed_line.append(TagNode(name=tag_name, attributes=attributes))
|
||||||
|
else:
|
||||||
|
parsed_line.append(TextNode(unescape(part)))
|
||||||
|
if parsed_line:
|
||||||
|
result.append(parsed_line)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def patch_script(script_path: str, m3t_path: str, output_path: str, names):
|
||||||
|
with open(script_path, "r", encoding="utf-8") as f:
|
||||||
|
script_text = f.read()
|
||||||
|
script = KAGScriptParser(script_text).parse(True)
|
||||||
|
new_script: ParsedScript = []
|
||||||
|
messages = []
|
||||||
|
name = None
|
||||||
|
with open(m3t_path, "r", encoding="utf-8") as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if line.startswith("○ NAME:"):
|
||||||
|
name = line[7:].strip()
|
||||||
|
elif line.startswith("●"):
|
||||||
|
message = line[1:].strip()
|
||||||
|
d = {}
|
||||||
|
if name is not None:
|
||||||
|
d['name'] = name
|
||||||
|
d['message'] = message.replace('\\n', '\n')
|
||||||
|
messages.append(d)
|
||||||
|
name = None
|
||||||
|
i = 0
|
||||||
|
for line in script:
|
||||||
|
if isinstance(line, CommandNode):
|
||||||
|
if line.name == "page":
|
||||||
|
message = messages[i]['message']
|
||||||
|
i += 1
|
||||||
|
new_script.extend(parse_message(message))
|
||||||
|
elif line.name.startswith("【") and line.name.endswith("】"):
|
||||||
|
name = line.name[1:-1]
|
||||||
|
if name in names:
|
||||||
|
line.name = f"【{names[name]}】"
|
||||||
|
elif isinstance(line, list):
|
||||||
|
continue
|
||||||
|
new_script.append(line)
|
||||||
|
if i != len(messages):
|
||||||
|
print(f"WARNING: processed message not matched. expected {len(message)}, actual {i}")
|
||||||
|
script_data = KAGScriptParser.serialize(new_script)
|
||||||
|
with open(output_path, 'w', encoding='utf-8') as f:
|
||||||
|
f.write(script_data)
|
||||||
|
|
||||||
def extract_script_auto(script_path: str, output_path: str):
|
def extract_script_auto(script_path: str, output_path: str):
|
||||||
if isdir(script_path):
|
if isdir(script_path):
|
||||||
for file in listdir(script_path):
|
for file in listdir(script_path):
|
||||||
@@ -123,6 +203,45 @@ def extract_script_auto(script_path: str, output_path: str):
|
|||||||
extract_script(script_path, output_path)
|
extract_script(script_path, output_path)
|
||||||
|
|
||||||
|
|
||||||
|
def read_names(name_dict_path: str):
|
||||||
|
names = {}
|
||||||
|
with open(name_dict_path, 'r', encoding='utf-8-sig') as f:
|
||||||
|
for row in DictReader(f):
|
||||||
|
names[row['JP_Name']] = row['CN_Name']
|
||||||
|
return names
|
||||||
|
|
||||||
|
|
||||||
|
def patch_script_auto(script_path: str, m3t_path: str, output_path: str, name_dict_path: str):
|
||||||
|
names = read_names(name_dict_path)
|
||||||
|
if isdir(script_path):
|
||||||
|
for file in listdir(script_path):
|
||||||
|
if not file.lower().endswith(".scn"):
|
||||||
|
continue
|
||||||
|
full_path = join(script_path, file)
|
||||||
|
m3t_fpath = splitext(basename(file))[0]
|
||||||
|
if file == "dict.scn":
|
||||||
|
m3t_fpath += ".csv"
|
||||||
|
else:
|
||||||
|
m3t_fpath += ".m3t"
|
||||||
|
m3t_full_path = join(m3t_path, m3t_fpath)
|
||||||
|
if not exists(m3t_full_path):
|
||||||
|
continue
|
||||||
|
output_full_path = join(output_path, basename(file))
|
||||||
|
if file == "dict.scn":
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
patch_script(full_path, m3t_full_path, output_full_path, names)
|
||||||
|
else:
|
||||||
|
pdir = dirname(output_path)
|
||||||
|
if pdir and not isdir(pdir):
|
||||||
|
makedirs(pdir, exist_ok=True)
|
||||||
|
base_name = basename(script_path)
|
||||||
|
if base_name == "dict.scn":
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
patch_script(script_path, m3t_path, output_path, names)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
parser = ArgumentParser(description="Process KAG script files")
|
parser = ArgumentParser(description="Process KAG script files")
|
||||||
@@ -130,6 +249,13 @@ if __name__ == "__main__":
|
|||||||
extract_parser = subparser.add_parser("extract", help="Extract script to JSON")
|
extract_parser = subparser.add_parser("extract", help="Extract script to JSON")
|
||||||
extract_parser.add_argument("script_path", help="Path to KAG script file or directory")
|
extract_parser.add_argument("script_path", help="Path to KAG script file or directory")
|
||||||
extract_parser.add_argument("output_path", help="Path to output JSON file or directory")
|
extract_parser.add_argument("output_path", help="Path to output JSON file or directory")
|
||||||
|
patch_parser = subparser.add_parser("patch", help="Patch script")
|
||||||
|
patch_parser.add_argument("script_path", help="Path to KAG script file or directory")
|
||||||
|
patch_parser.add_argument("m3t_path", help="Path to m3t file or directory")
|
||||||
|
patch_parser.add_argument("output_path", help="Path to output KAG script file or directory")
|
||||||
|
patch_parser.add_argument("name_dict_path", help="Path to name dict")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if args.command == "extract":
|
if args.command == "extract":
|
||||||
extract_script_auto(args.script_path, args.output_path)
|
extract_script_auto(args.script_path, args.output_path)
|
||||||
|
elif args.command == "patch":
|
||||||
|
patch_script_auto(args.script_path, args.m3t_path, args.output_path, args.name_dict_path)
|
||||||
|
|||||||
Reference in New Issue
Block a user