Add more check and fix

This commit is contained in:
2026-01-05 15:51:09 +08:00
parent bc4d83cef6
commit 64b4fb9fb8
2 changed files with 52 additions and 4 deletions

View File

@@ -8,6 +8,7 @@ class ScriptType(str, Enum):
GENERIC = "generic"
SCN = "scn"
XML = "xml"
CIRCUS = "circus"
def __str__(self):
return self.value
@@ -25,6 +26,9 @@ class ScriptType(str, Enum):
def is_xml(self):
return self == ScriptType.XML
def is_circus(self):
return self == ScriptType.CIRCUS
ELLIPSIS = re.compile(r'\.{3,}')
ELLIPSIS_ZH = re.compile(r'…+')
@@ -66,6 +70,10 @@ CJK_LEFT_QUOTE = ["「", "『"]
CJK_RIGHT_QUOTE = ["", ""]
NEED_REPLACED_LEFT_QUOTE = ["", ""]
NEED_REPLACED_RIGHT_QUOTE = ["", ""]
DIALOGUES = (
("", ""), ("", ""), ("", ""),
)
CIRCUS_SPECIAL = re.compile(r'(@[a-zA-Z0-9]+)')
def process_line(data: str) -> str:
@@ -119,6 +127,17 @@ def process_xml_line(data: str) -> str:
return ''.join(parts)
def process_circus_line(data: str) -> str:
parts = CIRCUS_SPECIAL.split(data)
for i in range(len(parts)):
if i % 2 == 0:
parts[i] = process_line(parts[i].replace('@', ''))
if i > 0 and parts[i].startswith(''):
while parts[i - 2].endswith(''):
parts[i - 2] = parts[i - 2][:-1]
return ''.join(parts)
def replace_quote_str(s) -> str:
depth = 0
re = ""
@@ -140,28 +159,49 @@ def replace_quote_str(s) -> str:
return re
def check_dialogue(source: str, target: str) -> str:
for left, right in DIALOGUES:
if source.startswith(left) and source.endswith(right):
if not target.startswith(left):
target = left + target
if not target.endswith(right):
target = target + right
return target
def process_m3t(m3t_path, type=ScriptType.GENERIC, llm_only=False, replace_quotes=True):
lines = []
line_num = 0
src = None
for line in open(m3t_path, "r", encoding="utf-8"):
line = line.strip()
line = line.replace('\u200b', '') # 移除零宽空格
if line_num == 0:
line = line.lstrip("\ufeff") # 移除可能的BOM
line_num += 1
if line.startswith(""):
if not line.startswith("○ NAME:"):
src = line[1:].strip()
if (not llm_only and line.startswith("")) or line.startswith(""):
data = line[1:].strip()
ndata = data
if src is not None:
ndata = check_dialogue(src, ndata)
if type.is_scn():
ndata = process_scn_line(data)
ndata = process_scn_line(ndata)
elif type.is_xml():
ndata = process_xml_line(data)
ndata = process_xml_line(ndata)
elif type.is_circus():
ndata = process_circus_line(ndata)
else:
ndata = process_line(data)
ndata = process_line(ndata)
if replace_quotes:
ndata = replace_quote_str(ndata)
if data != ndata:
print(f"{m3t_path}:{line_num}: {data} -> {ndata}")
line = line[0] + " " + ndata
elif not line and src is not None:
src = None
lines.append(line)
return lines

View File

@@ -1,4 +1,4 @@
from m3tpostprocess import process_line, process_scn_line, process_xml_line, replace_quote_str
from m3tpostprocess import process_line, process_scn_line, process_xml_line, replace_quote_str, process_circus_line
def test_process_line(src, expected):
@@ -24,6 +24,12 @@ def test_replace_quote_str(src, expected):
raise AssertionError(f"Expected: {expected}, but got: {processed}")
def test_process_circus_line(src, expected):
processed = process_circus_line(src)
if processed != expected:
raise AssertionError(f"Expected: {expected}, but got: {processed}")
if __name__ == "__main__":
test_process_line("Hello....", "Hello……")
test_process_line("……。", "……")
@@ -42,3 +48,5 @@ if __name__ == "__main__":
test_process_xml_line('"狗屎\'引号\'就是"一坨""<tag ok="this is ok">', '「狗屎『引号』就是「一坨」」<tag ok="this is ok">')
test_process_xml_line("「地形模型。<command>」", "「地形模型<command>」")
test_replace_quote_str("「狗屎『引号』就是「一坨」」", "「狗屎『引号』就是『一坨』」")
test_process_circus_line("这是一个@special标记的测试。", "这是一个@special标记的测试。")
test_process_circus_line("其他@测@123试", "其他@测@123试")