Add more check and fix
This commit is contained in:
@@ -8,6 +8,7 @@ class ScriptType(str, Enum):
|
||||
GENERIC = "generic"
|
||||
SCN = "scn"
|
||||
XML = "xml"
|
||||
CIRCUS = "circus"
|
||||
|
||||
def __str__(self):
|
||||
return self.value
|
||||
@@ -25,6 +26,9 @@ class ScriptType(str, Enum):
|
||||
def is_xml(self):
|
||||
return self == ScriptType.XML
|
||||
|
||||
def is_circus(self):
|
||||
return self == ScriptType.CIRCUS
|
||||
|
||||
|
||||
ELLIPSIS = re.compile(r'\.{3,}')
|
||||
ELLIPSIS_ZH = re.compile(r'…+')
|
||||
@@ -66,6 +70,10 @@ CJK_LEFT_QUOTE = ["「", "『"]
|
||||
CJK_RIGHT_QUOTE = ["」", "』"]
|
||||
NEED_REPLACED_LEFT_QUOTE = ["“", "‘"]
|
||||
NEED_REPLACED_RIGHT_QUOTE = ["”", "’"]
|
||||
DIALOGUES = (
|
||||
("「", "」"), ("『", "』"), ("(", ")"),
|
||||
)
|
||||
CIRCUS_SPECIAL = re.compile(r'(@[a-zA-Z0-9]+)')
|
||||
|
||||
|
||||
def process_line(data: str) -> str:
|
||||
@@ -119,6 +127,17 @@ def process_xml_line(data: str) -> str:
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
def process_circus_line(data: str) -> str:
|
||||
parts = CIRCUS_SPECIAL.split(data)
|
||||
for i in range(len(parts)):
|
||||
if i % 2 == 0:
|
||||
parts[i] = process_line(parts[i].replace('@', '@'))
|
||||
if i > 0 and parts[i].startswith('」'):
|
||||
while parts[i - 2].endswith('。'):
|
||||
parts[i - 2] = parts[i - 2][:-1]
|
||||
return ''.join(parts)
|
||||
|
||||
|
||||
def replace_quote_str(s) -> str:
|
||||
depth = 0
|
||||
re = ""
|
||||
@@ -140,28 +159,49 @@ def replace_quote_str(s) -> str:
|
||||
return re
|
||||
|
||||
|
||||
def check_dialogue(source: str, target: str) -> str:
|
||||
for left, right in DIALOGUES:
|
||||
if source.startswith(left) and source.endswith(right):
|
||||
if not target.startswith(left):
|
||||
target = left + target
|
||||
if not target.endswith(right):
|
||||
target = target + right
|
||||
return target
|
||||
|
||||
|
||||
def process_m3t(m3t_path, type=ScriptType.GENERIC, llm_only=False, replace_quotes=True):
|
||||
lines = []
|
||||
line_num = 0
|
||||
src = None
|
||||
for line in open(m3t_path, "r", encoding="utf-8"):
|
||||
line = line.strip()
|
||||
line = line.replace('\u200b', '') # 移除零宽空格
|
||||
if line_num == 0:
|
||||
line = line.lstrip("\ufeff") # 移除可能的BOM
|
||||
line_num += 1
|
||||
if line.startswith("○"):
|
||||
if not line.startswith("○ NAME:"):
|
||||
src = line[1:].strip()
|
||||
if (not llm_only and line.startswith("●")) or line.startswith("△"):
|
||||
data = line[1:].strip()
|
||||
ndata = data
|
||||
if src is not None:
|
||||
ndata = check_dialogue(src, ndata)
|
||||
if type.is_scn():
|
||||
ndata = process_scn_line(data)
|
||||
ndata = process_scn_line(ndata)
|
||||
elif type.is_xml():
|
||||
ndata = process_xml_line(data)
|
||||
ndata = process_xml_line(ndata)
|
||||
elif type.is_circus():
|
||||
ndata = process_circus_line(ndata)
|
||||
else:
|
||||
ndata = process_line(data)
|
||||
ndata = process_line(ndata)
|
||||
if replace_quotes:
|
||||
ndata = replace_quote_str(ndata)
|
||||
if data != ndata:
|
||||
print(f"{m3t_path}:{line_num}: {data} -> {ndata}")
|
||||
line = line[0] + " " + ndata
|
||||
elif not line and src is not None:
|
||||
src = None
|
||||
lines.append(line)
|
||||
return lines
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from m3tpostprocess import process_line, process_scn_line, process_xml_line, replace_quote_str
|
||||
from m3tpostprocess import process_line, process_scn_line, process_xml_line, replace_quote_str, process_circus_line
|
||||
|
||||
|
||||
def test_process_line(src, expected):
|
||||
@@ -24,6 +24,12 @@ def test_replace_quote_str(src, expected):
|
||||
raise AssertionError(f"Expected: {expected}, but got: {processed}")
|
||||
|
||||
|
||||
def test_process_circus_line(src, expected):
|
||||
processed = process_circus_line(src)
|
||||
if processed != expected:
|
||||
raise AssertionError(f"Expected: {expected}, but got: {processed}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_process_line("Hello....", "Hello……")
|
||||
test_process_line("……。", "……")
|
||||
@@ -42,3 +48,5 @@ if __name__ == "__main__":
|
||||
test_process_xml_line('"狗屎\'引号\'就是"一坨""<tag ok="this is ok">', '「狗屎『引号』就是「一坨」」<tag ok="this is ok">')
|
||||
test_process_xml_line("「地形模型。<command>」", "「地形模型<command>」")
|
||||
test_replace_quote_str("「狗屎『引号』就是「一坨」」", "「狗屎『引号』就是『一坨』」")
|
||||
test_process_circus_line("这是一个@special标记的测试。", "这是一个@special标记的测试。")
|
||||
test_process_circus_line("其他@测@123试", "其他@测@123试")
|
||||
|
||||
Reference in New Issue
Block a user