Add iinkai extract script

This commit is contained in:
2026-04-28 16:26:02 +08:00
parent 222dbd221a
commit 960d117794
2 changed files with 445 additions and 0 deletions

310
KAGParser.py Normal file
View File

@@ -0,0 +1,310 @@
# SPDX-License-Identifier: LicenseRef-Proprietary
from __future__ import annotations
import re
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
from typing import Any, Dict, List, Union
# --- Node Definitions ---
# We use dataclasses to represent the different types of parsed elements.
class INode(ABC):
@abstractmethod
def serialize(self) -> str:
pass
@dataclass
class CommentNode(INode):
text: str
def __repr__(self):
return f"Comment('{self.text}')"
def serialize(self) -> str:
return f"; {self.text}"
@dataclass
class LabelNode(INode):
name: str
page: str = ""
def __repr__(self):
return f"Label(name='{self.name}', page='{self.page}')"
def serialize(self) -> str:
if self.page:
return f"*{self.name}|{self.page}"
return f"*{self.name}"
@dataclass
class TextNode(INode):
text: str
def __repr__(self):
return f"Text('{self.text}')"
def serialize(self) -> str:
# In KAG, a literal '[' is escaped as '[['.
return self.text.replace("[", "[[")
@dataclass
class EmptyLineNode(INode):
def __repr__(self) -> str:
return f"EmptyLine"
def serialize(self) -> str:
return ""
@dataclass
class TagNode(INode):
name: str
attributes: Dict[str, Any] = field(default_factory=dict)
def __repr__(self):
return f"Tag(name='{self.name}', attributes={self.attributes})"
def _serialize_attributes(self) -> str:
"""Helper to convert the attribute dictionary to a string."""
parts = []
for key, value in self.attributes.items():
if value is True:
parts.append(key)
else:
val_str = str(value)
# Quote the value if it contains spaces to ensure it's parsed correctly.
if " " in val_str or "=" in val_str:
parts.append(f'{key}="{val_str}"')
else:
parts.append(f"{key}={val_str}")
return " ".join(parts)
def serialize(self) -> str:
attr_str = self._serialize_attributes()
if attr_str:
return f"[{self.name} {attr_str}]"
return f"[{self.name}]"
@dataclass
class CommandNode(TagNode): # A command is just a tag with a different syntax
def __repr__(self):
return f"Command(name='{self.name}', attributes={self.attributes})"
def serialize(self) -> str:
attr_str = self._serialize_attributes()
if attr_str:
return f"@{self.name} {attr_str}"
return f"@{self.name}"
@dataclass
class ScriptBlockNode(INode):
script: str
def __repr__(self):
return f"ScriptBlock(script='{self.script[:30]}...')"
def serialize(self) -> str:
return f"[iscript]\n{self.script}\n[endscript]"
# A line can contain a mix of text and tags
ParsedLine = List[Union[TextNode, TagNode]]
# The final parsed script is a list of different node types
ParsedScript = List[
Union[CommentNode, LabelNode, CommandNode, ScriptBlockNode, ParsedLine]
]
class KAGScriptParser:
"""
Parses a KAG (.ks) script file into a structured list of nodes.
"""
# Regex to split a line into text and tags. It keeps the delimiters (the tags).
_LINE_SPLIT_RE = re.compile(r"(\[.*?\])")
# Regex to parse attributes within a tag/command string.
# It handles: key=value, key="value", key='value', and boolean keys.
_ATTR_RE = re.compile(
r"""
([a-zA-Z0-9_]+) # Attribute key
(?:
= # Equals sign
(
"[^"]*" | # Double-quoted value
'[^']*' | # Single-quoted value
[^\s\]]+ # Unquoted value
)
)? # The entire value part is optional
""",
re.VERBOSE,
)
def __init__(self, script_text: str):
self.lines = script_text.splitlines()
self.parsed_script: ParsedScript = []
def _parse_attributes(self, attr_string: str) -> Dict[str, Any]:
"""Parses the attribute string of a tag or command."""
attributes = {}
for match in self._ATTR_RE.finditer(attr_string):
key = match.group(1)
value = match.group(2)
if value is None:
# Boolean attribute, like [p clickable]
attributes[key] = True
else:
# Un-quote if necessary
if value.startswith('"') and value.endswith('"'):
value = value[1:-1]
elif value.startswith("'") and value.endswith("'"):
value = value[1:-1]
# As per C++ code, unescape the ` character
value = value.replace("`", "")
attributes[key] = value
return attributes
def _parse_tag_or_command(
self, content: str, is_command: bool = False
) -> Union[TagNode, CommandNode]:
"""Parses a tag [name attr=val] or command @name attr=val."""
parts = content.strip().split(maxsplit=1)
tag_name = parts[0]
attr_string = parts[1] if len(parts) > 1 else ""
attributes = self._parse_attributes(attr_string)
NodeClass = CommandNode if is_command else TagNode
return NodeClass(name=tag_name, attributes=attributes)
def parse(self, perserve_empty_lines=False) -> ParsedScript:
"""
Executes the parsing process on the entire script.
Returns a list of parsed nodes.
"""
self.parsed_script = []
in_script_block = False
script_buffer = []
i = 0
while i < len(self.lines):
line = self.lines[i].strip()
i += 1
if not line:
if perserve_empty_lines:
self.parsed_script.append(EmptyLineNode())
else:
continue # Skip empty lines
# Handle script blocks [iscript]...[endscript]
if in_script_block:
if line == "[endscript]":
in_script_block = False
self.parsed_script.append(ScriptBlockNode("\n".join(script_buffer)))
script_buffer = []
else:
script_buffer.append(line)
continue
if line == "[iscript]":
in_script_block = True
continue
# Handle comments
if line.startswith(";"):
self.parsed_script.append(CommentNode(line[1:].lstrip()))
continue
# Handle labels
if line.startswith("*"):
label_part = line[1:]
if "|" in label_part:
name, page = label_part.split("|", 1)
self.parsed_script.append(LabelNode(name, page))
else:
self.parsed_script.append(LabelNode(label_part))
continue
# Handle commands
if line.startswith("@"):
self.parsed_script.append(
self._parse_tag_or_command(line[1:], is_command=True)
)
continue
# Handle line continuation
full_line = line
while full_line.endswith("\\"):
full_line = full_line[:-1].rstrip()
if i < len(self.lines):
full_line += " " + self.lines[i].strip()
i += 1
else:
break
# Handle a regular line with text and/or tags
parsed_line: ParsedLine = []
parts = self._LINE_SPLIT_RE.split(full_line)
for part in parts:
if not part:
continue
if part.startswith("[") and part.endswith("]"):
# It's a tag
if part == "[[r]]": # Special case from source, though rare
parsed_line.append(TextNode("[r]"))
elif part == "[[[[": # Another edge case
parsed_line.append(TextNode("[["))
elif part.startswith("[["): # Escaped literal text
parsed_line.append(TextNode(part[1:]))
else:
parsed_line.append(self._parse_tag_or_command(part[1:-1]))
else:
# It's plain text
parsed_line.append(TextNode(part))
if parsed_line:
self.parsed_script.append(parsed_line)
return self.parsed_script
def serialize(script: ParsedScript) -> str:
"""
Serializes a complete parsed script (AST) back into a KAG script string.
"""
lines = []
for node in script:
if isinstance(node, list):
# This is a ParsedLine, a mix of text and tags
line_parts = [sub_node.serialize() for sub_node in node]
lines.append("".join(line_parts))
else:
# This is a standalone node (Comment, Label, Command, etc.)
lines.append(node.serialize())
return "\n".join(lines)
if __name__ == "__main__":
from argparse import ArgumentParser
from pathlib import Path
parser = ArgumentParser("KAGParser", description="KAGParser Demo")
parser.add_argument("file")
args = parser.parse_args()
script = KAGScriptParser(Path(args.file).read_text()).parse(
perserve_empty_lines=True
)
for ln in script:
print(ln)