mirror of
https://github.com/lifegpc/pythonscript.git
synced 2026-06-06 11:28:58 +08:00
438 lines
15 KiB
Python
438 lines
15 KiB
Python
# comic_library_info.py
|
|
# (C) 2022 lifegpc
|
|
# The repo location: https://github.com/lifegpc/pythonscript
|
|
#
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU Affero General Public License as published
|
|
# by the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Affero General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Affero General Public License
|
|
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
from argparse import ArgumentParser, Namespace
|
|
from json import dump as dumpjson, load as loadjson
|
|
from os import listdir
|
|
from os.path import abspath, isdir, isfile, join, relpath, split as splitpath
|
|
from os.path import splitext
|
|
from typing import Any, Callable, Dict, List, Optional, Union
|
|
import xml.etree.ElementTree as ET
|
|
from zipfile import ZipFile
|
|
try:
|
|
from yaml import dump as dumpyaml, load as loadyaml
|
|
try:
|
|
from yaml import CSafeDumper as SafeDumper, CSafeLoader as SafeLoader
|
|
except ImportError:
|
|
from yaml import SafeDumper, SafeLoader
|
|
have_yaml = True
|
|
except ImportError:
|
|
have_yaml = False
|
|
|
|
argp = ArgumentParser(description="A tool to scan/modify comic's info.")
|
|
argp.add_argument('-V', '--version', action='version', version="%(prog)s 1.0.0") # noqa: E501
|
|
argp.add_argument('-v', '--verbose', action='count', help='Enable verbose output.', default=0) # noqa: E501
|
|
argp.add_argument('ACTION', choices=['d', 'dump', 'm', 'modify'], help='d/dump Dump the file which contains info. m/modify Use informaiton from file to modify comic\'s information.') # noqa: E501
|
|
argp.add_argument('PATH', action='append', nargs='*', help='The path to the library you want to scan.') # noqa: E501
|
|
argp.add_argument('-f', '--file', help='The location of the file which contains comic info.') # noqa: E501
|
|
argp.add_argument('-t', '--type', help='The type of the file which contains comic info.', choices=['json', 'yaml']) # noqa: E501
|
|
argp.add_argument('-b', '--base', help='The base directory.')
|
|
|
|
|
|
def guess_type_from_file_name(fn: str) -> Optional[str]:
|
|
ext = splitext(fn)[1]
|
|
if ext in ['.yaml', '.yml']:
|
|
return 'yaml'
|
|
if ext in ['.json', '.jsonc']:
|
|
return 'json'
|
|
|
|
|
|
def guess_type(path: str) -> Optional[str]:
|
|
ext = splitext(path)[1]
|
|
if ext in ['.cbz']:
|
|
return 'cbz'
|
|
|
|
|
|
def split_path(path: str) -> List[str]:
|
|
path_list = []
|
|
tmp = ''
|
|
for i in path:
|
|
if i in ['/', '\\']:
|
|
if tmp == '':
|
|
continue
|
|
else:
|
|
if tmp != '.':
|
|
path_list.append(tmp)
|
|
tmp = ''
|
|
else:
|
|
tmp += i
|
|
if tmp not in ['.', '']:
|
|
path_list.append(tmp)
|
|
return path_list
|
|
|
|
|
|
def extract_str(ele: ET.Element) -> str:
|
|
return '' if ele.text is None else ele.text
|
|
|
|
|
|
def extract_xml_content(ele: ET.Element, key: str, obj: Dict[str, Any],
|
|
callback: Callable[[ET.Element],
|
|
Optional[Any]]) -> bool:
|
|
try:
|
|
e = ele.find(key)
|
|
if e is None:
|
|
return False
|
|
data = callback(e)
|
|
if data is not None:
|
|
obj[key] = data
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def extract_xml_attrs(ele: ET.Element, key: str, obj: Dict[str, Any],
|
|
callback: Callable[[str], Optional[Any]]) -> bool:
|
|
try:
|
|
if key not in ele.attrib:
|
|
return False
|
|
v = ele.attrib[key]
|
|
data = callback(v)
|
|
if data is not None:
|
|
obj[key] = data
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
|
|
def filter_int(s: str) -> Optional[int]:
|
|
try:
|
|
return int(s)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def extract_int(ele: ET.Element) -> Optional[int]:
|
|
s = extract_str(ele)
|
|
return filter_int(s)
|
|
|
|
|
|
def filter_manga(s: str) -> Optional[str]:
|
|
s = s.lower()
|
|
if s == 'unknown':
|
|
return 'Unknown'
|
|
if s == 'yes':
|
|
return 'Yes'
|
|
if s == 'no':
|
|
return 'No'
|
|
if s == 'yesandrighttoleft':
|
|
return 'YesAndRightToLeft'
|
|
|
|
|
|
def extract_manga(ele: ET.Element) -> Optional[str]:
|
|
e = extract_str(ele)
|
|
return filter_manga(e)
|
|
|
|
|
|
def filter_yesno(s: str) -> Optional[bool]:
|
|
s = s.lower()
|
|
if s == "yes":
|
|
return True
|
|
elif s == "no":
|
|
return False
|
|
|
|
|
|
def extract_yesno(ele: ET.Element) -> Optional[bool]:
|
|
s = extract_str(ele)
|
|
return filter_yesno(s)
|
|
|
|
|
|
def filter_age_rating(s: str) -> Optional[str]:
|
|
s = s.lower()
|
|
if s == 'unknown':
|
|
return 'Unknown'
|
|
if s == 'adults only 18+':
|
|
return 'Adults Only 18+'
|
|
if s == 'early childhood':
|
|
return 'Early Childhood'
|
|
if s == 'everyone':
|
|
return 'Everyone'
|
|
if s == 'everyone 10+':
|
|
return 'Everyone 10+'
|
|
if s == 'g':
|
|
return 'G'
|
|
if s == 'kids to adults':
|
|
return 'Kids to Adults'
|
|
if s == 'm':
|
|
return 'M'
|
|
if s == 'ma15+':
|
|
return 'MA15+'
|
|
if s == 'mature 17+':
|
|
return 'Mature 17+'
|
|
if s == 'pg':
|
|
return 'PG'
|
|
if s == 'r18+':
|
|
return 'R18+'
|
|
if s == 'rating pending':
|
|
return 'Rating Pending'
|
|
if s == 'teen':
|
|
return 'Teen'
|
|
if s == 'x18+':
|
|
return 'X18+'
|
|
|
|
|
|
def extract_age_rating(ele: ET.Element) -> Optional[str]:
|
|
e = extract_str(ele)
|
|
return filter_age_rating(e)
|
|
|
|
|
|
def filter_comic_page_type(i: str) -> Optional[str]:
|
|
i = i.lower()
|
|
if i == 'frontcover':
|
|
return 'FrontCover'
|
|
elif i == 'innercover':
|
|
return 'InnerCover'
|
|
elif i == 'roundup':
|
|
return 'Roundup'
|
|
elif i == 'story':
|
|
return 'Story'
|
|
elif i == 'advertisement':
|
|
return 'Advertisement'
|
|
elif i == 'editorial':
|
|
return 'Editorial'
|
|
elif i == 'letters':
|
|
return 'Letters'
|
|
elif i == 'preview':
|
|
return 'Preview'
|
|
elif i == 'backCover':
|
|
return 'BackCover'
|
|
elif i == 'other':
|
|
return 'Other'
|
|
elif i == 'deleted':
|
|
return 'Deleted'
|
|
|
|
|
|
def extract_comic_page_type(s: str) -> List[str]:
|
|
types = []
|
|
for i in s.split(' '):
|
|
i = filter_comic_page_type(i)
|
|
if i is not None:
|
|
types.append(i)
|
|
return types
|
|
|
|
|
|
def filter_bool(s: str) -> Optional[bool]:
|
|
s = s.lower()
|
|
if s == 'true':
|
|
return True
|
|
if s == 'false':
|
|
return False
|
|
|
|
|
|
def extract_bool(ele: ET.Element) -> Optional[bool]:
|
|
s = extract_str(ele)
|
|
return filter_bool(s)
|
|
|
|
|
|
def extract_comic_page_info(ele: ET.Element) -> Optional[Dict[str, Any]]:
|
|
obj = {}
|
|
if not extract_xml_attrs(ele, "Image", obj, filter_int):
|
|
return False
|
|
extract_xml_attrs(ele, "Story", obj, extract_comic_page_type)
|
|
extract_xml_attrs(ele, "DoublePage", obj, filter_bool)
|
|
extract_xml_attrs(ele, "ImageSize", obj, filter_int)
|
|
extract_xml_attrs(ele, "Key", obj, lambda s: s)
|
|
extract_xml_attrs(ele, "Bookmark", obj, lambda s: s)
|
|
extract_xml_attrs(ele, "ImageWidth", obj, filter_int)
|
|
extract_xml_attrs(ele, "ImageHeight", obj, filter_int)
|
|
return obj
|
|
|
|
|
|
def extract_array_of_comic_page_info(ele: ET.Element) -> List[Dict[str, Any]]:
|
|
pages = []
|
|
childrens = ele.getchildren()
|
|
for i in childrens:
|
|
if i.tag == 'Page':
|
|
dat = extract_comic_page_info(i)
|
|
if dat is not None:
|
|
pages.append(dat)
|
|
return pages
|
|
|
|
|
|
def filter_rating(s) -> Optional[float]:
|
|
try:
|
|
f = float(s)
|
|
f = round(f, 1)
|
|
return f if f >= 0 and f <= 5 else None
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def extract_rating(ele: ET.Element) -> Optional[float]:
|
|
s = extract_str(ele)
|
|
return filter_rating(s)
|
|
|
|
|
|
def parse_xml(content: Union[str, bytes]) -> Optional[Dict[str, Any]]:
|
|
try:
|
|
root = ET.fromstring(content)
|
|
except Exception:
|
|
return None
|
|
obj = {}
|
|
extract_xml_content(root, "Title", obj, extract_str)
|
|
extract_xml_content(root, "Series", obj, extract_str)
|
|
extract_xml_content(root, "Number", obj, extract_str)
|
|
extract_xml_content(root, "Count", obj, extract_int)
|
|
extract_xml_content(root, "Volume", obj, extract_int)
|
|
extract_xml_content(root, "AlternateSeries", obj, extract_str)
|
|
extract_xml_content(root, "AlternateNumber", obj, extract_str)
|
|
extract_xml_content(root, "AlternateCount", obj, extract_int)
|
|
extract_xml_content(root, "Summary", obj, extract_str)
|
|
extract_xml_content(root, "Notes", obj, extract_str)
|
|
extract_xml_content(root, "Year", obj, extract_int)
|
|
extract_xml_content(root, "Month", obj, extract_int)
|
|
extract_xml_content(root, "Day", obj, extract_int)
|
|
extract_xml_content(root, "Writer", obj, extract_str)
|
|
extract_xml_content(root, "Penciller", obj, extract_str)
|
|
extract_xml_content(root, "Inker", obj, extract_str)
|
|
extract_xml_content(root, "Colorist", obj, extract_str)
|
|
extract_xml_content(root, "Letterer", obj, extract_str)
|
|
extract_xml_content(root, "CoverArtist", obj, extract_str)
|
|
extract_xml_content(root, "Editor", obj, extract_str)
|
|
extract_xml_content(root, "Translator", obj, extract_str)
|
|
extract_xml_content(root, "Publisher", obj, extract_str)
|
|
extract_xml_content(root, "Imprint", obj, extract_str)
|
|
extract_xml_content(root, "Genre", obj, extract_str)
|
|
extract_xml_content(root, "Tags", obj, extract_str)
|
|
extract_xml_content(root, "Web", obj, extract_str)
|
|
extract_xml_content(root, "PageCount", obj, extract_int)
|
|
extract_xml_content(root, "LanguageISO", obj, extract_str)
|
|
extract_xml_content(root, "Format", obj, extract_str)
|
|
extract_xml_content(root, "BlackAndWhite", obj, extract_yesno)
|
|
extract_xml_content(root, "Manga", obj, extract_manga)
|
|
extract_xml_content(root, "Characters", obj, extract_str)
|
|
extract_xml_content(root, "Teams", obj, extract_str)
|
|
extract_xml_content(root, "Locations", obj, extract_str)
|
|
extract_xml_content(root, "ScanInformation", obj, extract_str)
|
|
extract_xml_content(root, "StoryArc", obj, extract_str)
|
|
extract_xml_content(root, "StoryArcNumber", obj, extract_str)
|
|
extract_xml_content(root, "SeriesGroup", obj, extract_str)
|
|
extract_xml_content(root, "AgeRating", obj, extract_age_rating)
|
|
extract_xml_content(root, "Pages", obj, extract_array_of_comic_page_info)
|
|
extract_xml_content(root, "CommunityRating", obj, extract_rating)
|
|
return obj
|
|
|
|
|
|
def iter_path(args: Namespace, path: str, data: object):
|
|
rpath = relpath(path, args.base)
|
|
path_list = split_path(rpath)
|
|
if args.verbose > 2:
|
|
print(f'Split {rpath} to {path_list}')
|
|
tdata = data
|
|
for p in path_list:
|
|
if p not in tdata:
|
|
if args.ACTION in ['d', 'dump']:
|
|
tdata[p] = {'type': 'directory', 'tree': {}}
|
|
else:
|
|
tdata[p]
|
|
tdata = tdata[p]['tree']
|
|
for f in listdir(path):
|
|
fpath = join(path, f)
|
|
rfpath = relpath(fpath, args.base)
|
|
if args.verbose > 0:
|
|
print(f'Scan {rfpath}')
|
|
if isdir(fpath):
|
|
iter_path(args, fpath, data)
|
|
elif isfile(fpath):
|
|
fn = splitpath(fpath)[1]
|
|
typ = guess_type(fn)
|
|
if args.verbose > 2:
|
|
print(f'Guess type: {typ}')
|
|
if typ == 'cbz':
|
|
tdata[fn] = {'type': 'cbz', 'comic_info': None}
|
|
if args.ACTION in ['d', 'dump']:
|
|
with ZipFile(fpath, 'r', allowZip64=True) as z:
|
|
if args.verbose > 1:
|
|
print(f"Opened {rfpath}.")
|
|
try:
|
|
info = z.getinfo("ComicInfo.xml")
|
|
if args.verbose > 2:
|
|
print(f"ComicInfo.xml information: {info}")
|
|
except KeyError:
|
|
info = None
|
|
if info is not None:
|
|
try:
|
|
content = z.read(info)
|
|
if args.verbose > 1:
|
|
print(f"Opend ComicInfo.xml in {rfpath}")
|
|
if args.verbose > 3:
|
|
print("ComicInfo.xml Content:")
|
|
try:
|
|
content2 = content.decode('UTF-8')
|
|
except Exception:
|
|
content2 = content
|
|
print(content2)
|
|
info = parse_xml(content)
|
|
tdata[fn]['comic_info'] = info
|
|
except Exception:
|
|
pass
|
|
else:
|
|
tdata[fn] = {'type': 'file'}
|
|
else:
|
|
print(f'{rfpath}({fpath}) has unknown file type.')
|
|
|
|
|
|
def run(args: Optional[List[str]] = None):
|
|
args = argp.parse_args(args)
|
|
args.PATH = args.PATH[0]
|
|
if args.file is None:
|
|
args.file = 'comic_info.json'
|
|
if args.type is None:
|
|
args.type = guess_type_from_file_name(args.file)
|
|
if args.type is None:
|
|
raise ValueError('Failed to guess file type.')
|
|
if args.ACTION in ['d', 'dump'] and len(args.PATH) == 0:
|
|
args.PATH.append('.')
|
|
if args.ACTION in ['d', 'dump'] and args.base is None:
|
|
args.base = abspath('.')
|
|
if args.type == 'yaml' and not have_yaml:
|
|
raise ValueError('pyyaml not installed but can be installed with pip install pyyaml.') # noqa: E501
|
|
if args.verbose > 1:
|
|
print(args)
|
|
if args.ACTION in ['d', 'dump']:
|
|
args.base = abspath(args.base)
|
|
data = {'path': [], 'base': args.base, 'tree': {}}
|
|
for p in args.PATH:
|
|
data['path'].append(relpath(abspath(p), args.base))
|
|
elif args.ACTION in ['m', 'modify']:
|
|
if args.type == 'json':
|
|
with open(args.file, 'r', encoding='UTF-8') as f:
|
|
data = loadjson(f)
|
|
elif args.type == 'yaml':
|
|
with open(args.file, 'r', encoding='UTF-8') as f:
|
|
data = loadyaml(f, SafeLoader)
|
|
if args.PATH is None:
|
|
for p in data['path']:
|
|
args.PATH.append(join(args.base, p))
|
|
if args.base is None:
|
|
args.base = data['base']
|
|
else:
|
|
args.base = abspath(args.base)
|
|
if args.verbose > 2:
|
|
print(data)
|
|
for p in args.PATH:
|
|
iter_path(args, abspath(p), data['tree'])
|
|
if args.ACTION in ['d', 'dump']:
|
|
if args.type == 'json':
|
|
with open(args.file, 'w', encoding='UTF-8') as f:
|
|
dumpjson(data, f, ensure_ascii=False, separators=(',', ':'))
|
|
elif args.type == 'yaml':
|
|
with open(args.file, 'w', encoding='UTF-8') as f:
|
|
dumpyaml(data, f, SafeDumper, allow_unicode=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run()
|