Add GPT-based file renaming tool with config, file handling, and GPT integration

2025-01-19 22:43:32 +08:00
parent db25b12669
commit 015f4d1922
7 changed files with 207 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -160,3 +160,4 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/

+config.yml
--- a/gpt_shows_rename/init.py
+++ b/gpt_shows_rename/init.py
--- a/gpt_shows_rename/main.py
+++ b/gpt_shows_rename/main.py
@@ -0,0 +1,12 @@
+from .config import load_config
+from .file import gen_input_list, link_files
+from .gpt import get_response
+
+
+cfg = load_config()
+files = gen_input_list(cfg.input)
+res = get_response(cfg, cfg.input, files, cfg.series_name, cfg.year, cfg.tmdb_id, cfg.tvdb_id)
+for f in res.files:
+    print(files[f.index], '->', f.name)
+input('Continue?')
+link_files(cfg.input, files, res, cfg.output, cfg.hardlink)
--- a/gpt_shows_rename/config.py
+++ b/gpt_shows_rename/config.py
@@ -0,0 +1,88 @@
+import argparse
+import openai
+import os.path
+from typing import Optional
+import yaml
+
+class Config:
+    def __init__(self, args: argparse.Namespace, yaml_config: dict):
+        self._args = args
+        self._yaml_config = yaml_config
+
+    @property
+    def api_key(self) -> str:
+        api_key = self._args.api_key if self._args.api_key is not None else self._yaml_config.get('api_key')
+        if not api_key:
+            raise ValueError("API key is required")
+        return api_key
+
+    @property
+    def base_url(self) -> Optional[str]:
+        return self._args.base_url if self._args.base_url is not None else self._yaml_config.get('base_url', 'https://api.openai.com/v1')
+
+    @property
+    def hardlink(self) -> bool:
+        return self._args.hardlink
+
+    @property
+    def input(self) -> str:
+        return self._args.input
+    
+    @property
+    def output(self) -> str:
+        return self._args.output
+
+    @property
+    def proxy(self) -> Optional[str]:
+        return self._args.proxy if self._args.proxy is not None else self._yaml_config.get('proxy')
+
+    @property
+    def series_name(self) -> Optional[str]:
+        return self._args.series_name
+
+    @property
+    def tmdb_id(self) -> Optional[int]:
+        return self._args.tmdb_id
+
+    @property
+    def tvdb_id(self) -> Optional[int]:
+        return self._args.tvdb_id
+
+    @property
+    def model(self) -> str:
+        if self._args.model:
+            return self._args.model
+        return self._yaml_config.get("model", "gpt-4o-mini")
+
+    @property
+    def year(self) -> Optional[int]:
+        return self._args.year
+
+
+def get_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description='GPT Shows Rename Tool')
+    parser.add_argument('--api-key', type=str, help='API key for the GPT service')
+    parser.add_argument('--base-url', type=str, help='Base URL for the GPT service')
+    parser.add_argument('-p', '--proxy', type=str, help='Proxy URL (optional)')
+    parser.add_argument('-m', '--model', type=str, help='Model to use (default: gpt-4o-mini)')
+    parser.add_argument('-c', '--config', type=str, default='./config.yml', help='Path to the configuration file')
+    parser.add_argument('-s', '--series-name', type=str, help='Series name (optional)')
+    parser.add_argument('-Y', '--year', type=int, help='Year of the series (optional)')
+    parser.add_argument('-t', '--tmdb-id', type=int, help='TMDB ID (optional)')
+    parser.add_argument('-T', '--tvdb-id', type=int, help='TVDB ID (optional)')
+    parser.add_argument('-H', '--hardlink', action='store_true', help='Use hardlink instead of symlink (optional)')
+    parser.add_argument('input', help='Input directory.')
+    parser.add_argument('output', help='Output directory.')
+    return parser
+
+
+def load_config():
+    parser = get_arg_parser()
+    args = parser.parse_intermixed_args()
+    if os.path.exists(args.config):
+        with open(args.config, 'r', encoding='utf-8') as file:
+            config = yaml.safe_load(file)
+    cfg = Config(args, config)
+    openai.base_url = cfg.base_url
+    openai.api_key = cfg.api_key
+    return cfg
--- a/gpt_shows_rename/file.py
+++ b/gpt_shows_rename/file.py
@@ -0,0 +1,39 @@
+import os
+import os.path
+from typing import List
+from .gpt import Files
+
+
+EXTS = ['.mp4', '.mkv', '.ass', '.srt']
+
+
+def gen_input_list(dir: str, prefix: str = None) -> List[str]:
+    if prefix is None:
+        prefix = dir
+    re = []
+    for f in os.listdir(dir):
+        if f.startswith('.'):
+            continue
+        path = os.path.join(dir, f)
+        if os.path.isdir(path):
+            data = gen_input_list(path, prefix=prefix)
+            re += data
+        else:
+            exts = os.path.splitext(f)[1]
+            if exts not in EXTS:
+                continue
+            p = os.path.relpath(path, prefix)
+            re.append(p)
+    re.sort()
+    return re
+
+
+def link_files(input: str, files: List[str], data: Files, output: str, hardlink: bool = False):
+    for f in data.files:
+        target = os.path.join(output, f.name)
+        src = os.path.join(input, files[f.index])
+        os.makedirs(os.path.dirname(target), exist_ok=True)
+        if hardlink:
+            os.link(src, target)
+        else:
+            os.symlink(src, target)
--- a/gpt_shows_rename/gpt.py
+++ b/gpt_shows_rename/gpt.py
@@ -0,0 +1,64 @@
+import json
+import openai
+import httpx
+from pydantic import BaseModel
+from typing import List
+from .config import Config
+
+
+SYSTEM_PROMPT = '''You are an assistant, and your goal is to help users rename file names according to the following rules. The user will provide an input directory and a list of files in JSONL format. You will output the new location for each file after renaming based on the file list.
+You will rename files based on the information extracted from the input directory and the file list. Prioritize using the information specified by the user. If no specific information is provided by the user, use the information extracted from the inputs mentioned above. 
+The format for the highest-level directory is `Series Name (Year)`, which may optionally include a TMDB ID or TVDB ID, for example, `Series Name (Year) [tmdbid-1234]`.
+The second-level directory format is `Season XX`. If there is not enough information, use `Season 01` by default. Special episodes, such as OVA, can use `Season 00`. Other movies, such as Bonus, should use `extras`. Trailers for episodes should be same as the episode.
+The format for files in the third level is `SXXEXX Episode Name`. If multiple episodes are merged, use the following format: `SXXEXX-EXX Episode Name1/Episode Name2`. The episode name is optional. `SXXEXX` must be empty if file is in `extras` folder. If video is a trailer, add `.trailer` to name. For trailers, it is not necessary to replace the episode number with a small one.
+The output file names must be relative path.
+The output file names must retain parts of subtitle files, such as `.sc`, that indicate the language.'''
+
+
+class File(BaseModel):
+    index: int
+    name: str
+
+
+class Files(BaseModel):
+    files: List[File]
+
+
+def gen_files_list(files: List[str]):
+    prompt = '''Here are file list:
+```jsonl'''
+    ind = 0
+    for f in files:
+        prompt += '\n' + json.dumps({"index": ind, "name": f}, ensure_ascii=False, separators=(',', ':'))
+        ind += 1
+    prompt += '\n```'
+    return prompt
+
+
+def get_response(cfg: Config, inp: str, files: List[str],
+                 series_name: str = None, year: int = None, tmdb_id: int = None,
+                 tvdb_id: int = None) -> Files:
+    prompt = f'The input directory is `{inp}`.'
+    if series_name:
+        prompt += f'\nThe series name is `{series_name}`.'
+    if year:
+        prompt += f'\nThe year of series is `{year}`.'
+    if tmdb_id:
+        prompt += f'\nThe TMDB ID is `{tmdb_id}`.'
+    if tvdb_id:
+        prompt += f'\nThe TVDB ID is `{tvdb_id}`.'
+    prompt += '\n' + gen_files_list(files)
+    http_client = httpx.Client(proxy=cfg.proxy)
+    client = openai.Client(api_key=cfg.api_key, base_url=cfg.base_url, http_client=http_client)
+    res = client.beta.chat.completions.parse(
+        model=cfg.model,
+        messages=[
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": prompt},
+        ],
+        response_format=Files,
+    )
+    mes = res.choices[0].message
+    if mes.refusal:
+        raise ValueError(f"Model refused to answer: {mes.refusal}")
+    return mes.parsed
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,3 @@
+openai
+pydantic
+pyyaml