# SPDX-License-Identifier: LicenseRef-Proprietary from __future__ import annotations import re from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import Any, Dict, List, Union # --- Node Definitions --- # We use dataclasses to represent the different types of parsed elements. class INode(ABC): @abstractmethod def serialize(self) -> str: pass @dataclass class CommentNode(INode): text: str def __repr__(self): return f"Comment('{self.text}')" def serialize(self) -> str: return f"; {self.text}" @dataclass class LabelNode(INode): name: str page: str = "" def __repr__(self): return f"Label(name='{self.name}', page='{self.page}')" def serialize(self) -> str: if self.page: return f"*{self.name}|{self.page}" return f"*{self.name}" @dataclass class TextNode(INode): text: str def __repr__(self): return f"Text('{self.text}')" def serialize(self) -> str: # In KAG, a literal '[' is escaped as '[['. return self.text.replace("[", "[[") @dataclass class EmptyLineNode(INode): def __repr__(self) -> str: return f"EmptyLine" def serialize(self) -> str: return "" @dataclass class TagNode(INode): name: str attributes: Dict[str, Any] = field(default_factory=dict) def __repr__(self): return f"Tag(name='{self.name}', attributes={self.attributes})" def _serialize_attributes(self) -> str: """Helper to convert the attribute dictionary to a string.""" parts = [] for key, value in self.attributes.items(): if value is True: parts.append(key) else: val_str = str(value) # Quote the value if it contains spaces to ensure it's parsed correctly. if " " in val_str or "=" in val_str: parts.append(f'{key}="{val_str}"') else: parts.append(f"{key}={val_str}") return " ".join(parts) def serialize(self) -> str: attr_str = self._serialize_attributes() if attr_str: return f"[{self.name} {attr_str}]" return f"[{self.name}]" @dataclass class CommandNode(TagNode): # A command is just a tag with a different syntax def __repr__(self): return f"Command(name='{self.name}', attributes={self.attributes})" def serialize(self) -> str: attr_str = self._serialize_attributes() if attr_str: return f"@{self.name} {attr_str}" return f"@{self.name}" @dataclass class ScriptBlockNode(INode): script: str def __repr__(self): return f"ScriptBlock(script='{self.script[:30]}...')" def serialize(self) -> str: return f"[iscript]\n{self.script}\n[endscript]" # A line can contain a mix of text and tags ParsedLine = List[Union[TextNode, TagNode]] # The final parsed script is a list of different node types ParsedScript = List[ Union[CommentNode, LabelNode, CommandNode, ScriptBlockNode, ParsedLine] ] class KAGScriptParser: """ Parses a KAG (.ks) script file into a structured list of nodes. """ # Regex to split a line into text and tags. It keeps the delimiters (the tags). _LINE_SPLIT_RE = re.compile(r"(\[.*?\])") # Regex to parse attributes within a tag/command string. # It handles: key=value, key="value", key='value', and boolean keys. _ATTR_RE = re.compile( r""" ([a-zA-Z0-9_]+) # Attribute key (?: = # Equals sign ( "[^"]*" | # Double-quoted value '[^']*' | # Single-quoted value [^\s\]]+ # Unquoted value ) )? # The entire value part is optional """, re.VERBOSE, ) def __init__(self, script_text: str): self.lines = script_text.splitlines() self.parsed_script: ParsedScript = [] def _parse_attributes(self, attr_string: str) -> Dict[str, Any]: """Parses the attribute string of a tag or command.""" attributes = {} for match in self._ATTR_RE.finditer(attr_string): key = match.group(1) value = match.group(2) if value is None: # Boolean attribute, like [p clickable] attributes[key] = True else: # Un-quote if necessary if value.startswith('"') and value.endswith('"'): value = value[1:-1] elif value.startswith("'") and value.endswith("'"): value = value[1:-1] # As per C++ code, unescape the ` character value = value.replace("`", "") attributes[key] = value return attributes def _parse_tag_or_command( self, content: str, is_command: bool = False ) -> Union[TagNode, CommandNode]: """Parses a tag [name attr=val] or command @name attr=val.""" parts = content.strip().split(maxsplit=1) tag_name = parts[0] attr_string = parts[1] if len(parts) > 1 else "" attributes = self._parse_attributes(attr_string) NodeClass = CommandNode if is_command else TagNode return NodeClass(name=tag_name, attributes=attributes) def parse(self, perserve_empty_lines=False) -> ParsedScript: """ Executes the parsing process on the entire script. Returns a list of parsed nodes. """ self.parsed_script = [] in_script_block = False script_buffer = [] i = 0 while i < len(self.lines): line = self.lines[i].strip() i += 1 if not line: if perserve_empty_lines: self.parsed_script.append(EmptyLineNode()) else: continue # Skip empty lines # Handle script blocks [iscript]...[endscript] if in_script_block: if line == "[endscript]": in_script_block = False self.parsed_script.append(ScriptBlockNode("\n".join(script_buffer))) script_buffer = [] else: script_buffer.append(line) continue if line == "[iscript]": in_script_block = True continue # Handle comments if line.startswith(";"): self.parsed_script.append(CommentNode(line[1:].lstrip())) continue # Handle labels if line.startswith("*"): label_part = line[1:] if "|" in label_part: name, page = label_part.split("|", 1) self.parsed_script.append(LabelNode(name, page)) else: self.parsed_script.append(LabelNode(label_part)) continue # Handle commands if line.startswith("@"): self.parsed_script.append( self._parse_tag_or_command(line[1:], is_command=True) ) continue # Handle line continuation full_line = line while full_line.endswith("\\"): full_line = full_line[:-1].rstrip() if i < len(self.lines): full_line += " " + self.lines[i].strip() i += 1 else: break # Handle a regular line with text and/or tags parsed_line: ParsedLine = [] parts = self._LINE_SPLIT_RE.split(full_line) for part in parts: if not part: continue if part.startswith("[") and part.endswith("]"): # It's a tag if part == "[[r]]": # Special case from source, though rare parsed_line.append(TextNode("[r]")) elif part == "[[[[": # Another edge case parsed_line.append(TextNode("[[")) elif part.startswith("[["): # Escaped literal text parsed_line.append(TextNode(part[1:])) else: parsed_line.append(self._parse_tag_or_command(part[1:-1])) else: # It's plain text parsed_line.append(TextNode(part)) if parsed_line: self.parsed_script.append(parsed_line) return self.parsed_script def serialize(script: ParsedScript) -> str: """ Serializes a complete parsed script (AST) back into a KAG script string. """ lines = [] for node in script: if isinstance(node, list): # This is a ParsedLine, a mix of text and tags line_parts = [sub_node.serialize() for sub_node in node] lines.append("".join(line_parts)) else: # This is a standalone node (Comment, Label, Command, etc.) lines.append(node.serialize()) return "\n".join(lines) if __name__ == "__main__": from argparse import ArgumentParser from pathlib import Path parser = ArgumentParser("KAGParser", description="KAGParser Demo") parser.add_argument("file") args = parser.parse_args() script = KAGScriptParser(Path(args.file).read_text()).parse( perserve_empty_lines=True ) for ln in script: print(ln)