Add GPT-based file renaming tool with config, file handling, and GPT integration

This commit is contained in:
2025-01-19 22:43:32 +08:00
parent db25b12669
commit 015f4d1922
7 changed files with 207 additions and 0 deletions

1
.gitignore vendored
View File

@@ -160,3 +160,4 @@ cython_debug/
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
config.yml

View File

View File

@@ -0,0 +1,12 @@
from .config import load_config
from .file import gen_input_list, link_files
from .gpt import get_response
cfg = load_config()
files = gen_input_list(cfg.input)
res = get_response(cfg, cfg.input, files, cfg.series_name, cfg.year, cfg.tmdb_id, cfg.tvdb_id)
for f in res.files:
print(files[f.index], '->', f.name)
input('Continue?')
link_files(cfg.input, files, res, cfg.output, cfg.hardlink)

View File

@@ -0,0 +1,88 @@
import argparse
import openai
import os.path
from typing import Optional
import yaml
class Config:
def __init__(self, args: argparse.Namespace, yaml_config: dict):
self._args = args
self._yaml_config = yaml_config
@property
def api_key(self) -> str:
api_key = self._args.api_key if self._args.api_key is not None else self._yaml_config.get('api_key')
if not api_key:
raise ValueError("API key is required")
return api_key
@property
def base_url(self) -> Optional[str]:
return self._args.base_url if self._args.base_url is not None else self._yaml_config.get('base_url', 'https://api.openai.com/v1')
@property
def hardlink(self) -> bool:
return self._args.hardlink
@property
def input(self) -> str:
return self._args.input
@property
def output(self) -> str:
return self._args.output
@property
def proxy(self) -> Optional[str]:
return self._args.proxy if self._args.proxy is not None else self._yaml_config.get('proxy')
@property
def series_name(self) -> Optional[str]:
return self._args.series_name
@property
def tmdb_id(self) -> Optional[int]:
return self._args.tmdb_id
@property
def tvdb_id(self) -> Optional[int]:
return self._args.tvdb_id
@property
def model(self) -> str:
if self._args.model:
return self._args.model
return self._yaml_config.get("model", "gpt-4o-mini")
@property
def year(self) -> Optional[int]:
return self._args.year
def get_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description='GPT Shows Rename Tool')
parser.add_argument('--api-key', type=str, help='API key for the GPT service')
parser.add_argument('--base-url', type=str, help='Base URL for the GPT service')
parser.add_argument('-p', '--proxy', type=str, help='Proxy URL (optional)')
parser.add_argument('-m', '--model', type=str, help='Model to use (default: gpt-4o-mini)')
parser.add_argument('-c', '--config', type=str, default='./config.yml', help='Path to the configuration file')
parser.add_argument('-s', '--series-name', type=str, help='Series name (optional)')
parser.add_argument('-Y', '--year', type=int, help='Year of the series (optional)')
parser.add_argument('-t', '--tmdb-id', type=int, help='TMDB ID (optional)')
parser.add_argument('-T', '--tvdb-id', type=int, help='TVDB ID (optional)')
parser.add_argument('-H', '--hardlink', action='store_true', help='Use hardlink instead of symlink (optional)')
parser.add_argument('input', help='Input directory.')
parser.add_argument('output', help='Output directory.')
return parser
def load_config():
parser = get_arg_parser()
args = parser.parse_intermixed_args()
if os.path.exists(args.config):
with open(args.config, 'r', encoding='utf-8') as file:
config = yaml.safe_load(file)
cfg = Config(args, config)
openai.base_url = cfg.base_url
openai.api_key = cfg.api_key
return cfg

39
gpt_shows_rename/file.py Normal file
View File

@@ -0,0 +1,39 @@
import os
import os.path
from typing import List
from .gpt import Files
EXTS = ['.mp4', '.mkv', '.ass', '.srt']
def gen_input_list(dir: str, prefix: str = None) -> List[str]:
if prefix is None:
prefix = dir
re = []
for f in os.listdir(dir):
if f.startswith('.'):
continue
path = os.path.join(dir, f)
if os.path.isdir(path):
data = gen_input_list(path, prefix=prefix)
re += data
else:
exts = os.path.splitext(f)[1]
if exts not in EXTS:
continue
p = os.path.relpath(path, prefix)
re.append(p)
re.sort()
return re
def link_files(input: str, files: List[str], data: Files, output: str, hardlink: bool = False):
for f in data.files:
target = os.path.join(output, f.name)
src = os.path.join(input, files[f.index])
os.makedirs(os.path.dirname(target), exist_ok=True)
if hardlink:
os.link(src, target)
else:
os.symlink(src, target)

64
gpt_shows_rename/gpt.py Normal file
View File

@@ -0,0 +1,64 @@
import json
import openai
import httpx
from pydantic import BaseModel
from typing import List
from .config import Config
SYSTEM_PROMPT = '''You are an assistant, and your goal is to help users rename file names according to the following rules. The user will provide an input directory and a list of files in JSONL format. You will output the new location for each file after renaming based on the file list.
You will rename files based on the information extracted from the input directory and the file list. Prioritize using the information specified by the user. If no specific information is provided by the user, use the information extracted from the inputs mentioned above.
The format for the highest-level directory is `Series Name (Year)`, which may optionally include a TMDB ID or TVDB ID, for example, `Series Name (Year) [tmdbid-1234]`.
The second-level directory format is `Season XX`. If there is not enough information, use `Season 01` by default. Special episodes, such as OVA, can use `Season 00`. Other movies, such as Bonus, should use `extras`. Trailers for episodes should be same as the episode.
The format for files in the third level is `SXXEXX Episode Name`. If multiple episodes are merged, use the following format: `SXXEXX-EXX Episode Name1/Episode Name2`. The episode name is optional. `SXXEXX` must be empty if file is in `extras` folder. If video is a trailer, add `.trailer` to name. For trailers, it is not necessary to replace the episode number with a small one.
The output file names must be relative path.
The output file names must retain parts of subtitle files, such as `.sc`, that indicate the language.'''
class File(BaseModel):
index: int
name: str
class Files(BaseModel):
files: List[File]
def gen_files_list(files: List[str]):
prompt = '''Here are file list:
```jsonl'''
ind = 0
for f in files:
prompt += '\n' + json.dumps({"index": ind, "name": f}, ensure_ascii=False, separators=(',', ':'))
ind += 1
prompt += '\n```'
return prompt
def get_response(cfg: Config, inp: str, files: List[str],
series_name: str = None, year: int = None, tmdb_id: int = None,
tvdb_id: int = None) -> Files:
prompt = f'The input directory is `{inp}`.'
if series_name:
prompt += f'\nThe series name is `{series_name}`.'
if year:
prompt += f'\nThe year of series is `{year}`.'
if tmdb_id:
prompt += f'\nThe TMDB ID is `{tmdb_id}`.'
if tvdb_id:
prompt += f'\nThe TVDB ID is `{tvdb_id}`.'
prompt += '\n' + gen_files_list(files)
http_client = httpx.Client(proxy=cfg.proxy)
client = openai.Client(api_key=cfg.api_key, base_url=cfg.base_url, http_client=http_client)
res = client.beta.chat.completions.parse(
model=cfg.model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": prompt},
],
response_format=Files,
)
mes = res.choices[0].message
if mes.refusal:
raise ValueError(f"Model refused to answer: {mes.refusal}")
return mes.parsed

3
requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
openai
pydantic
pyyaml