Files
gpt-shows-rename-tools/gpt_shows_rename/gpt.py

150 lines
6.1 KiB
Python

import json
import openai
import httpx
from pydantic import BaseModel
from typing import Dict, List
from .config import Config
SYSTEM_PROMPT = '''You are an assistant, and your goal is to help users rename file names according to the following rules. The user will provide an input directory and a list of files in JSONL format. You will output the new location for each file after renaming based on the file list.
You will rename files based on the information extracted from the input directory, TMDB data and the file list. If user provide TMDB data, use the information from TMDB data first. If no specific information is provided by the user, use the information extracted from the input directory and the file list.
The format for the highest-level directory is `Series Name (Year)`, which may optionally include a TMDB ID or TVDB ID, for example, `Series Name (Year) [tmdbid-1234]`. The top directory must be the same for all files.
The second-level directory format is `Season XX`. If there is not enough information, use `Season 01` by default. Special episodes, such as OVA, can use `Season 00`. Other movies, such as Bonus, should use `extras`. Trailers for episodes should be same as the episode.
The format for files in the third level is `SXXEXX Episode Name`. If multiple episodes are merged, use the following format: `SXXEXX-EXX Episode Name1/Episode Name2`. The episode name is optional. `SXXEXX` must be empty if file is in `extras` folder. If video is a trailer, add `.trailer` to name. For trailers, it is not necessary to replace the episode number with a small one.
The output file names must be relative path.
The output file names must retain parts of subtitle files, such as `.sc`, that indicate the language.'''
class File(BaseModel):
index: int
name: str
class Files(BaseModel):
files: List[File]
class TmdbData(BaseModel):
series_info: dict
seasons_info: Dict[int, dict]
def gen_files_list(files: List[str]):
prompt = '''Here are file list:
```jsonl'''
ind = 0
for f in files:
prompt += '\n' + json.dumps({"index": ind, "name": f}, ensure_ascii=False, separators=(',', ':'))
ind += 1
prompt += '\n```'
return prompt
def gen_tmdb_data(tmdb_data: TmdbData):
prompt = '''Here are series info from TMDB:
```json'''
prompt += '\n' + json.dumps(tmdb_data.series_info, ensure_ascii=False, separators=(',', ':'))
prompt += '\n```'
for season_number, season_info in tmdb_data.seasons_info.items():
prompt += f'\nHere are season {season_number} info from TMDB:'
prompt += '\n```json'
prompt += '\n' + json.dumps(season_info, ensure_ascii=False, separators=(',', ':'))
prompt += '\n```'
return prompt
SUPPORTED_MODELS = ["gpt-4o", "gpt-4o-mini", "gpt-4o-2024-08-06", "gpt-4o-2024-11-20", "gpt-4o-mini-2024-07-18"]
def is_support_structed_output(model: str):
if model.startswith('ft:'):
models = model.split(':')
if len(models) < 2:
return False
model = models[1]
return model in SUPPORTED_MODELS
def parse_result(result: str):
lines = result.splitlines()
files = []
for line in lines:
line = line.strip()
if not line:
continue
if line.startswith("```"):
continue
try:
files.append(File(**json.loads(line)))
except Exception:
raise ValueError(f"Failed to parse message: {line}")
return Files(files=files)
async def get_response(cfg: Config, inp: str, files: List[str],
series_name: str = None, year: int = None, tmdb_id: int = None,
tvdb_id: int = None, tmdb_data: TmdbData = None, season_number: int = None) -> Files:
prompt = f'The input directory is `{inp}`.'
if tmdb_data:
prompt += '\n' + gen_tmdb_data(tmdb_data)
prompt += '\n' + gen_files_list(files)
if series_name:
prompt += f'\nThe series name is `{series_name}`.'
if year:
prompt += f'\nThe year of series is `{year}`.'
if tmdb_id:
prompt += f'\nThe TMDB ID is `{tmdb_id}`.'
if tvdb_id:
prompt += f'\nThe TVDB ID is `{tvdb_id}`.'
if season_number:
prompt += f'\nThe season number is `{season_number}`.'
http_client = httpx.AsyncClient(proxy=cfg.proxy)
client = openai.AsyncClient(api_key=cfg.api_key, base_url=cfg.base_url, http_client=http_client)
if is_support_structed_output(cfg.model):
result = None
mes = ''
refusal = ''
async with client.beta.chat.completions.stream(
model=cfg.model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": prompt},
],
response_format=Files,
) as stream:
async for event in stream:
if event.type == "content.delta":
mes += event.delta
print(event.delta, end='', flush=True)
if event.parsed:
result = event.parsed
elif event.type == "refusal.delta":
refusal += event.delta
print(event.delta, end='', flush=True)
print('', flush=True)
if refusal:
raise ValueError(f"Model refused to answer: {refusal}")
if not result:
raise ValueError(f"Unhandle Error: {mes}")
result = Files(**result)
return result
else:
res = await client.chat.completions.create(
model=cfg.model,
messages=[
{"role": "system", "content": SYSTEM_PROMPT + "\nThe format of the returned result is consistent with the input file list."},
{"role": "user", "content": prompt},
],
stream=True,
)
mes = ''
async for chunk in res:
if chunk.choices:
choice = chunk.choices[0]
if choice.delta and choice.delta.content:
data = choice.delta.content
mes += data
print(data, end='', flush=True)
print(flush=True)
return parse_result(mes)