diff --git a/gen_names.py b/gen_names.py new file mode 100644 index 0000000..9ac9129 --- /dev/null +++ b/gen_names.py @@ -0,0 +1,64 @@ +import os +import json +import csv + +# 设置输入和输出路径 +gt_input_dir = 'gt_input' # JSON 文件所在目录 +csv_file = '人名替换表.csv' # CSV 文件名 + +# 初始化一个字典来存储 JP_Name 和出现次数 +name_count = {} + +# 遍历 gt_input 目录下的所有 JSON 文件 +for filename in os.listdir(gt_input_dir): + if filename.endswith('.json'): + file_path = os.path.join(gt_input_dir, filename) + with open(file_path, 'r', encoding='utf-8') as f: + try: + # 加载 JSON 数据 + data = json.load(f) + for entry in data: + jp_name = entry.get("name") + if jp_name: + # 更新计数 + name_count[jp_name] = name_count.get(jp_name, 0) + 1 + except json.JSONDecodeError: + print(f"Error decoding JSON from file: {file_path}") + +# 检查 CSV 文件是否存在 +existing_data = {} +if os.path.exists(csv_file): + # 如果存在,读取现有数据 + with open(csv_file, 'r', encoding='utf-8-sig') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + jp_name = row['JP_Name'] + cn_name = row['CN_Name'] + count = int(row['Count']) + existing_data[jp_name] = {'CN_Name': cn_name, 'Count': count} + +# 更新现有数据与新数据 +for jp_name in existing_data.keys(): + # 如果该 JP_Name 在新数据中未出现,则 Count 设置为 0 + existing_data[jp_name]['Count'] = name_count.get(jp_name, 0) +for jp_name, count in name_count.items(): + if jp_name in existing_data: + # 更新现有项的 Count + existing_data[jp_name]['Count'] = count + else: + # 新增项 + existing_data[jp_name] = {'CN_Name': '', 'Count': count} # CN_Name 默认为空 + +# 将结果写入 CSV 文件 +with open(csv_file, 'w', newline='', encoding='utf-8-sig') as csvfile: + fieldnames = ['JP_Name', 'CN_Name', 'Count'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + + # 按 Count 排序并写入 + sorted_data = sorted(existing_data.items(), key=lambda x: x[1]['Count'], reverse=True) + for jp_name, details in sorted_data: + writer.writerow({'JP_Name': jp_name, 'CN_Name': details['CN_Name'], 'Count': details['Count']}) + +print(f"CSV 文件 '{csv_file}' 已成功生成/更新。") \ No newline at end of file