Files
pythonscript/comic_library_info.py

438 lines
15 KiB
Python

# comic_library_info.py
# (C) 2022 lifegpc
# The repo location: https://github.com/lifegpc/pythonscript
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
from argparse import ArgumentParser, Namespace
from json import dump as dumpjson, load as loadjson
from os import listdir
from os.path import abspath, isdir, isfile, join, relpath, split as splitpath
from os.path import splitext
from typing import Any, Callable, Dict, List, Optional, Union
import xml.etree.ElementTree as ET
from zipfile import ZipFile
try:
from yaml import dump as dumpyaml, load as loadyaml
try:
from yaml import CSafeDumper as SafeDumper, CSafeLoader as SafeLoader
except ImportError:
from yaml import SafeDumper, SafeLoader
have_yaml = True
except ImportError:
have_yaml = False
argp = ArgumentParser(description="A tool to scan/modify comic's info.")
argp.add_argument('-V', '--version', action='version', version="%(prog)s 1.0.0") # noqa: E501
argp.add_argument('-v', '--verbose', action='count', help='Enable verbose output.', default=0) # noqa: E501
argp.add_argument('ACTION', choices=['d', 'dump', 'm', 'modify'], help='d/dump Dump the file which contains info. m/modify Use informaiton from file to modify comic\'s information.') # noqa: E501
argp.add_argument('PATH', action='append', nargs='*', help='The path to the library you want to scan.') # noqa: E501
argp.add_argument('-f', '--file', help='The location of the file which contains comic info.') # noqa: E501
argp.add_argument('-t', '--type', help='The type of the file which contains comic info.', choices=['json', 'yaml']) # noqa: E501
argp.add_argument('-b', '--base', help='The base directory.')
def guess_type_from_file_name(fn: str) -> Optional[str]:
ext = splitext(fn)[1]
if ext in ['.yaml', '.yml']:
return 'yaml'
if ext in ['.json', '.jsonc']:
return 'json'
def guess_type(path: str) -> Optional[str]:
ext = splitext(path)[1]
if ext in ['.cbz']:
return 'cbz'
def split_path(path: str) -> List[str]:
path_list = []
tmp = ''
for i in path:
if i in ['/', '\\']:
if tmp == '':
continue
else:
if tmp != '.':
path_list.append(tmp)
tmp = ''
else:
tmp += i
if tmp not in ['.', '']:
path_list.append(tmp)
return path_list
def extract_str(ele: ET.Element) -> str:
return '' if ele.text is None else ele.text
def extract_xml_content(ele: ET.Element, key: str, obj: Dict[str, Any],
callback: Callable[[ET.Element],
Optional[Any]]) -> bool:
try:
e = ele.find(key)
if e is None:
return False
data = callback(e)
if data is not None:
obj[key] = data
return True
except Exception:
return False
def extract_xml_attrs(ele: ET.Element, key: str, obj: Dict[str, Any],
callback: Callable[[str], Optional[Any]]) -> bool:
try:
if key not in ele.attrib:
return False
v = ele.attrib[key]
data = callback(v)
if data is not None:
obj[key] = data
return True
except Exception:
return False
def filter_int(s: str) -> Optional[int]:
try:
return int(s)
except Exception:
pass
def extract_int(ele: ET.Element) -> Optional[int]:
s = extract_str(ele)
return filter_int(s)
def filter_manga(s: str) -> Optional[str]:
s = s.lower()
if s == 'unknown':
return 'Unknown'
if s == 'yes':
return 'Yes'
if s == 'no':
return 'No'
if s == 'yesandrighttoleft':
return 'YesAndRightToLeft'
def extract_manga(ele: ET.Element) -> Optional[str]:
e = extract_str(ele)
return filter_manga(e)
def filter_yesno(s: str) -> Optional[bool]:
s = s.lower()
if s == "yes":
return True
elif s == "no":
return False
def extract_yesno(ele: ET.Element) -> Optional[bool]:
s = extract_str(ele)
return filter_yesno(s)
def filter_age_rating(s: str) -> Optional[str]:
s = s.lower()
if s == 'unknown':
return 'Unknown'
if s == 'adults only 18+':
return 'Adults Only 18+'
if s == 'early childhood':
return 'Early Childhood'
if s == 'everyone':
return 'Everyone'
if s == 'everyone 10+':
return 'Everyone 10+'
if s == 'g':
return 'G'
if s == 'kids to adults':
return 'Kids to Adults'
if s == 'm':
return 'M'
if s == 'ma15+':
return 'MA15+'
if s == 'mature 17+':
return 'Mature 17+'
if s == 'pg':
return 'PG'
if s == 'r18+':
return 'R18+'
if s == 'rating pending':
return 'Rating Pending'
if s == 'teen':
return 'Teen'
if s == 'x18+':
return 'X18+'
def extract_age_rating(ele: ET.Element) -> Optional[str]:
e = extract_str(ele)
return filter_age_rating(e)
def filter_comic_page_type(i: str) -> Optional[str]:
i = i.lower()
if i == 'frontcover':
return 'FrontCover'
elif i == 'innercover':
return 'InnerCover'
elif i == 'roundup':
return 'Roundup'
elif i == 'story':
return 'Story'
elif i == 'advertisement':
return 'Advertisement'
elif i == 'editorial':
return 'Editorial'
elif i == 'letters':
return 'Letters'
elif i == 'preview':
return 'Preview'
elif i == 'backCover':
return 'BackCover'
elif i == 'other':
return 'Other'
elif i == 'deleted':
return 'Deleted'
def extract_comic_page_type(s: str) -> List[str]:
types = []
for i in s.split(' '):
i = filter_comic_page_type(i)
if i is not None:
types.append(i)
return types
def filter_bool(s: str) -> Optional[bool]:
s = s.lower()
if s == 'true':
return True
if s == 'false':
return False
def extract_bool(ele: ET.Element) -> Optional[bool]:
s = extract_str(ele)
return filter_bool(s)
def extract_comic_page_info(ele: ET.Element) -> Optional[Dict[str, Any]]:
obj = {}
if not extract_xml_attrs(ele, "Image", obj, filter_int):
return False
extract_xml_attrs(ele, "Story", obj, extract_comic_page_type)
extract_xml_attrs(ele, "DoublePage", obj, filter_bool)
extract_xml_attrs(ele, "ImageSize", obj, filter_int)
extract_xml_attrs(ele, "Key", obj, lambda s: s)
extract_xml_attrs(ele, "Bookmark", obj, lambda s: s)
extract_xml_attrs(ele, "ImageWidth", obj, filter_int)
extract_xml_attrs(ele, "ImageHeight", obj, filter_int)
return obj
def extract_array_of_comic_page_info(ele: ET.Element) -> List[Dict[str, Any]]:
pages = []
childrens = ele.getchildren()
for i in childrens:
if i.tag == 'Page':
dat = extract_comic_page_info(i)
if dat is not None:
pages.append(dat)
return pages
def filter_rating(s) -> Optional[float]:
try:
f = float(s)
f = round(f, 1)
return f if f >= 0 and f <= 5 else None
except Exception:
pass
def extract_rating(ele: ET.Element) -> Optional[float]:
s = extract_str(ele)
return filter_rating(s)
def parse_xml(content: Union[str, bytes]) -> Optional[Dict[str, Any]]:
try:
root = ET.fromstring(content)
except Exception:
return None
obj = {}
extract_xml_content(root, "Title", obj, extract_str)
extract_xml_content(root, "Series", obj, extract_str)
extract_xml_content(root, "Number", obj, extract_str)
extract_xml_content(root, "Count", obj, extract_int)
extract_xml_content(root, "Volume", obj, extract_int)
extract_xml_content(root, "AlternateSeries", obj, extract_str)
extract_xml_content(root, "AlternateNumber", obj, extract_str)
extract_xml_content(root, "AlternateCount", obj, extract_int)
extract_xml_content(root, "Summary", obj, extract_str)
extract_xml_content(root, "Notes", obj, extract_str)
extract_xml_content(root, "Year", obj, extract_int)
extract_xml_content(root, "Month", obj, extract_int)
extract_xml_content(root, "Day", obj, extract_int)
extract_xml_content(root, "Writer", obj, extract_str)
extract_xml_content(root, "Penciller", obj, extract_str)
extract_xml_content(root, "Inker", obj, extract_str)
extract_xml_content(root, "Colorist", obj, extract_str)
extract_xml_content(root, "Letterer", obj, extract_str)
extract_xml_content(root, "CoverArtist", obj, extract_str)
extract_xml_content(root, "Editor", obj, extract_str)
extract_xml_content(root, "Translator", obj, extract_str)
extract_xml_content(root, "Publisher", obj, extract_str)
extract_xml_content(root, "Imprint", obj, extract_str)
extract_xml_content(root, "Genre", obj, extract_str)
extract_xml_content(root, "Tags", obj, extract_str)
extract_xml_content(root, "Web", obj, extract_str)
extract_xml_content(root, "PageCount", obj, extract_int)
extract_xml_content(root, "LanguageISO", obj, extract_str)
extract_xml_content(root, "Format", obj, extract_str)
extract_xml_content(root, "BlackAndWhite", obj, extract_yesno)
extract_xml_content(root, "Manga", obj, extract_manga)
extract_xml_content(root, "Characters", obj, extract_str)
extract_xml_content(root, "Teams", obj, extract_str)
extract_xml_content(root, "Locations", obj, extract_str)
extract_xml_content(root, "ScanInformation", obj, extract_str)
extract_xml_content(root, "StoryArc", obj, extract_str)
extract_xml_content(root, "StoryArcNumber", obj, extract_str)
extract_xml_content(root, "SeriesGroup", obj, extract_str)
extract_xml_content(root, "AgeRating", obj, extract_age_rating)
extract_xml_content(root, "Pages", obj, extract_array_of_comic_page_info)
extract_xml_content(root, "CommunityRating", obj, extract_rating)
return obj
def iter_path(args: Namespace, path: str, data: object):
rpath = relpath(path, args.base)
path_list = split_path(rpath)
if args.verbose > 2:
print(f'Split {rpath} to {path_list}')
tdata = data
for p in path_list:
if p not in tdata:
if args.ACTION in ['d', 'dump']:
tdata[p] = {'type': 'directory', 'tree': {}}
else:
tdata[p]
tdata = tdata[p]['tree']
for f in listdir(path):
fpath = join(path, f)
rfpath = relpath(fpath, args.base)
if args.verbose > 0:
print(f'Scan {rfpath}')
if isdir(fpath):
iter_path(args, fpath, data)
elif isfile(fpath):
fn = splitpath(fpath)[1]
typ = guess_type(fn)
if args.verbose > 2:
print(f'Guess type: {typ}')
if typ == 'cbz':
tdata[fn] = {'type': 'cbz', 'comic_info': None}
if args.ACTION in ['d', 'dump']:
with ZipFile(fpath, 'r', allowZip64=True) as z:
if args.verbose > 1:
print(f"Opened {rfpath}.")
try:
info = z.getinfo("ComicInfo.xml")
if args.verbose > 2:
print(f"ComicInfo.xml information: {info}")
except KeyError:
info = None
if info is not None:
try:
content = z.read(info)
if args.verbose > 1:
print(f"Opend ComicInfo.xml in {rfpath}")
if args.verbose > 3:
print("ComicInfo.xml Content:")
try:
content2 = content.decode('UTF-8')
except Exception:
content2 = content
print(content2)
info = parse_xml(content)
tdata[fn]['comic_info'] = info
except Exception:
pass
else:
tdata[fn] = {'type': 'file'}
else:
print(f'{rfpath}({fpath}) has unknown file type.')
def run(args: Optional[List[str]] = None):
args = argp.parse_args(args)
args.PATH = args.PATH[0]
if args.file is None:
args.file = 'comic_info.json'
if args.type is None:
args.type = guess_type_from_file_name(args.file)
if args.type is None:
raise ValueError('Failed to guess file type.')
if args.ACTION in ['d', 'dump'] and len(args.PATH) == 0:
args.PATH.append('.')
if args.ACTION in ['d', 'dump'] and args.base is None:
args.base = abspath('.')
if args.type == 'yaml' and not have_yaml:
raise ValueError('pyyaml not installed but can be installed with pip install pyyaml.') # noqa: E501
if args.verbose > 1:
print(args)
if args.ACTION in ['d', 'dump']:
args.base = abspath(args.base)
data = {'path': [], 'base': args.base, 'tree': {}}
for p in args.PATH:
data['path'].append(relpath(abspath(p), args.base))
elif args.ACTION in ['m', 'modify']:
if args.type == 'json':
with open(args.file, 'r', encoding='UTF-8') as f:
data = loadjson(f)
elif args.type == 'yaml':
with open(args.file, 'r', encoding='UTF-8') as f:
data = loadyaml(f, SafeLoader)
if args.PATH is None:
for p in data['path']:
args.PATH.append(join(args.base, p))
if args.base is None:
args.base = data['base']
else:
args.base = abspath(args.base)
if args.verbose > 2:
print(data)
for p in args.PATH:
iter_path(args, abspath(p), data['tree'])
if args.ACTION in ['d', 'dump']:
if args.type == 'json':
with open(args.file, 'w', encoding='UTF-8') as f:
dumpjson(data, f, ensure_ascii=False, separators=(',', ':'))
elif args.type == 'yaml':
with open(args.file, 'w', encoding='UTF-8') as f:
dumpyaml(data, f, SafeDumper, allow_unicode=True)
if __name__ == "__main__":
run()