From 95676507f85c7a8e98891037529734530f976689 Mon Sep 17 00:00:00 2001 From: lifegpc Date: Sat, 27 Aug 2022 11:50:25 +0800 Subject: [PATCH] Add comic_library_info.py --- comic_library_info.py | 431 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 431 insertions(+) create mode 100644 comic_library_info.py diff --git a/comic_library_info.py b/comic_library_info.py new file mode 100644 index 0000000..87148c0 --- /dev/null +++ b/comic_library_info.py @@ -0,0 +1,431 @@ +# comic_library_info.py +# (C) 2022 lifegpc +# The repo location: https://github.com/lifegpc/pythonscript +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +from argparse import ArgumentParser, Namespace +from json import dump as dumpjson, load as loadjson +from os import listdir +from os.path import abspath, isdir, isfile, join, relpath, split as splitpath +from os.path import splitext +from typing import Any, Callable, Dict, List, Optional, Union +import xml.etree.ElementTree as ET +from zipfile import ZipFile +try: + from yaml import dump as dumpyaml, load as loadyaml + try: + from yaml import CSafeDumper as SafeDumper, CSafeLoader as SafeLoader + except ImportError: + from yaml import SafeDumper, SafeLoader + have_yaml = True +except ImportError: + have_yaml = False + +argp = ArgumentParser(description="A tool to scan/modify comic's info.") +argp.add_argument('-V', '--version', action='version', version="%(prog)s 1.0.0") # noqa: E501 +argp.add_argument('-v', '--verbose', action='count', help='Enable verbose output.', default=0) # noqa: E501 +argp.add_argument('ACTION', choices=['d', 'dump', 'm', 'modify'], help='d/dump Dump the file which contains info. m/modify Use informaiton from file to modify comic\'s information.') # noqa: E501 +argp.add_argument('PATH', action='append', nargs='*', help='The path to the library you want to scan.') # noqa: E501 +argp.add_argument('-f', '--file', help='The location of the file which contains comic info.') # noqa: E501 +argp.add_argument('-t', '--type', help='The type of the file which contains comic info.', choices=['json', 'yaml']) # noqa: E501 +argp.add_argument('-b', '--base', help='The base directory.') + + +def guess_type_from_file_name(fn: str) -> Optional[str]: + ext = splitext(fn)[1] + if ext in ['.yaml', '.yml']: + return 'yaml' + if ext in ['.json', '.jsonc']: + return 'json' + + +def guess_type(path: str) -> Optional[str]: + ext = splitext(path)[1] + if ext in ['.cbz']: + return 'cbz' + + +def split_path(path: str) -> List[str]: + path_list = [] + tmp = '' + for i in path: + if i in ['/', '\\']: + if tmp == '': + continue + else: + if tmp != '.': + path_list.append(tmp) + tmp = '' + else: + tmp += i + if tmp not in ['.', '']: + path_list.append(tmp) + return path_list + + +def extract_str(ele: ET.Element) -> str: + return '' if ele.text is None else ele.text + + +def extract_xml_content(ele: ET.Element, key: str, obj: Dict[str, Any], + callback: Callable[[ET.Element], + Optional[Any]]) -> bool: + try: + e = ele.find(key) + if e is None: + return False + data = callback(e) + if data is not None: + obj[key] = data + return True + except Exception: + return False + + +def extract_xml_attrs(ele: ET.Element, key: str, obj: Dict[str, Any], + callback: Callable[[str], Optional[Any]]) -> bool: + try: + if key not in ele.attrib: + return False + v = ele.attrib[key] + data = callback(v) + if data is not None: + obj[key] = data + return True + except Exception: + return False + + +def filter_int(s: str) -> Optional[int]: + try: + return int(s) + except Exception: + pass + + +def extract_int(ele: ET.Element) -> Optional[int]: + s = extract_str(ele) + return filter_int(s) + + +def filter_manga(s: str) -> Optional[str]: + s = s.lower() + if s == 'unknown': + return 'Unknown' + if s == 'yes': + return 'Yes' + if s == 'no': + return 'No' + if s == 'yesandrighttoleft': + return 'YesAndRightToLeft' + + +def extract_manga(ele: ET.Element) -> Optional[str]: + e = extract_str(ele) + return filter_manga(e) + + +def filter_yesno(s: str) -> Optional[bool]: + s = s.lower() + if s == "yes": + return True + elif s == "no": + return False + + +def extract_yesno(ele: ET.Element) -> Optional[bool]: + s = extract_str(ele) + return filter_yesno(s) + + +def filter_age_rating(s: str) -> Optional[str]: + s = s.lower() + if s == 'unknown': + return 'Unknown' + if s == 'adults only 18+': + return 'Adults Only 18+' + if s == 'early childhood': + return 'Early Childhood' + if s == 'everyone': + return 'Everyone' + if s == 'everyone 10+': + return 'Everyone 10+' + if s == 'g': + return 'G' + if s == 'kids to adults': + return 'Kids to Adults' + if s == 'm': + return 'M' + if s == 'ma15+': + return 'MA15+' + if s == 'mature 17+': + return 'Mature 17+' + if s == 'pg': + return 'PG' + if s == 'r18+': + return 'R18+' + if s == 'rating pending': + return 'Rating Pending' + if s == 'teen': + return 'Teen' + if s == 'x18+': + return 'X18+' + + +def extract_age_rating(ele: ET.Element) -> Optional[str]: + e = extract_str(ele) + return filter_age_rating(e) + + +def filter_comic_page_type(i: str) -> Optional[str]: + i = i.lower() + if i == 'frontcover': + return 'FrontCover' + elif i == 'innercover': + return 'InnerCover' + elif i == 'roundup': + return 'Roundup' + elif i == 'story': + return 'Story' + elif i == 'advertisement': + return 'Advertisement' + elif i == 'editorial': + return 'Editorial' + elif i == 'letters': + return 'Letters' + elif i == 'preview': + return 'Preview' + elif i == 'backCover': + return 'BackCover' + elif i == 'other': + return 'Other' + elif i == 'deleted': + return 'Deleted' + + +def extract_comic_page_type(s: str) -> List[str]: + types = [] + for i in s.split(' '): + i = filter_comic_page_type(i) + if i is not None: + types.append(i) + return types + + +def filter_bool(s: str) -> Optional[bool]: + s = s.lower() + if s == 'true': + return True + if s == 'false': + return False + + +def extract_bool(ele: ET.Element) -> Optional[bool]: + s = extract_str(ele) + return filter_bool(s) + + +def extract_comic_page_info(ele: ET.Element) -> Optional[Dict[str, Any]]: + obj = {} + if not extract_xml_attrs(ele, "Image", obj, filter_int): + return False + extract_xml_attrs(ele, "Story", obj, extract_comic_page_type) + extract_xml_attrs(ele, "DoublePage", obj, filter_bool) + extract_xml_attrs(ele, "ImageSize", obj, filter_int) + extract_xml_attrs(ele, "Key", obj, lambda s: s) + extract_xml_attrs(ele, "Bookmark", obj, lambda s: s) + extract_xml_attrs(ele, "ImageWidth", obj, filter_int) + extract_xml_attrs(ele, "ImageHeight", obj, filter_int) + return obj + + +def extract_array_of_comic_page_info(ele: ET.Element) -> List[Dict[str, Any]]: + pages = [] + childrens = ele.getchildren() + for i in childrens: + if i.tag == 'Page': + dat = extract_comic_page_info(i) + if dat is not None: + pages.append(dat) + return pages + + +def filter_rating(s) -> Optional[float]: + try: + f = float(s) + f = round(f, 1) + return f if f >= 0 and f <= 5 else None + except Exception: + pass + + +def extract_rating(ele: ET.Element) -> Optional[float]: + s = extract_str(ele) + return filter_rating(s) + + +def parse_xml(content: Union[str, bytes]) -> Optional[Dict[str, Any]]: + try: + root = ET.fromstring(content) + except Exception: + return None + obj = {} + extract_xml_content(root, "Title", obj, extract_str) + extract_xml_content(root, "Series", obj, extract_str) + extract_xml_content(root, "Number", obj, extract_str) + extract_xml_content(root, "Count", obj, extract_int) + extract_xml_content(root, "Volume", obj, extract_int) + extract_xml_content(root, "AlternateSeries", obj, extract_str) + extract_xml_content(root, "AlternateNumber", obj, extract_str) + extract_xml_content(root, "AlternateCount", obj, extract_int) + extract_xml_content(root, "Summary", obj, extract_str) + extract_xml_content(root, "Notes", obj, extract_str) + extract_xml_content(root, "Year", obj, extract_int) + extract_xml_content(root, "Month", obj, extract_int) + extract_xml_content(root, "Day", obj, extract_int) + extract_xml_content(root, "Writer", obj, extract_str) + extract_xml_content(root, "Penciller", obj, extract_str) + extract_xml_content(root, "Inker", obj, extract_str) + extract_xml_content(root, "Colorist", obj, extract_str) + extract_xml_content(root, "Letterer", obj, extract_str) + extract_xml_content(root, "CoverArtist", obj, extract_str) + extract_xml_content(root, "Editor", obj, extract_str) + extract_xml_content(root, "Translator", obj, extract_str) + extract_xml_content(root, "Publisher", obj, extract_str) + extract_xml_content(root, "Imprint", obj, extract_str) + extract_xml_content(root, "Genre", obj, extract_str) + extract_xml_content(root, "Tags", obj, extract_str) + extract_xml_content(root, "Web", obj, extract_str) + extract_xml_content(root, "PageCount", obj, extract_int) + extract_xml_content(root, "LanguageISO", obj, extract_str) + extract_xml_content(root, "Format", obj, extract_str) + extract_xml_content(root, "BlackAndWhite", obj, extract_yesno) + extract_xml_content(root, "Manga", obj, extract_manga) + extract_xml_content(root, "Characters", obj, extract_str) + extract_xml_content(root, "Teams", obj, extract_str) + extract_xml_content(root, "Locations", obj, extract_str) + extract_xml_content(root, "ScanInformation", obj, extract_str) + extract_xml_content(root, "StoryArc", obj, extract_str) + extract_xml_content(root, "StoryArcNumber", obj, extract_str) + extract_xml_content(root, "SeriesGroup", obj, extract_str) + extract_xml_content(root, "AgeRating", obj, extract_age_rating) + extract_xml_content(root, "Pages", obj, extract_array_of_comic_page_info) + extract_xml_content(root, "CommunityRating", obj, extract_rating) + return obj + + +def iter_path(args: Namespace, path: str, data: object): + rpath = relpath(path, args.base) + path_list = split_path(rpath) + if args.verbose > 2: + print(f'Split {rpath} to {path_list}') + tdata = data + for p in path_list: + if p not in tdata: + if args.ACTION in ['d', 'dump']: + tdata[p] = {'type': 'directory', 'tree': {}} + else: + tdata[p] + tdata = tdata[p]['tree'] + for f in listdir(path): + fpath = join(path, f) + rfpath = relpath(fpath, args.base) + if args.verbose > 0: + print(f'Scan {rfpath}') + if isdir(fpath): + iter_path(args, fpath, data) + elif isfile(fpath): + fn = splitpath(fpath)[1] + typ = guess_type(fn) + if args.verbose > 2: + print(f'Guess type: {typ}') + if typ == 'cbz': + tdata[fn] = {'type': 'cbz', 'comic_info': None} + if args.ACTION in ['d', 'dump']: + with ZipFile(fpath, 'r', allowZip64=True) as z: + if args.verbose > 1: + print(f"Opened {rfpath}.") + try: + info = z.getinfo("ComicInfo.xml") + if args.verbose > 2: + print(f"ComicInfo.xml information: {info}") + except KeyError: + info = None + if info is not None: + try: + content = z.read(info) + if args.verbose > 1: + print(f"Opend ComicInfo.xml in {rfpath}") + if args.verbose > 3: + print("ComicInfo.xml Content:") + try: + content2 = content.decode('UTF-8') + except Exception: + content2 = content + print(content2) + info = parse_xml(content) + tdata[fn]['comic_info'] = info + except Exception: + pass + else: + tdata[fn] = {'type': 'file'} + else: + print(f'{rfpath}({fpath}) has unknown file type.') + + +def run(args: Optional[List[str]] = None): + args = argp.parse_args(args) + args.PATH = args.PATH[0] + if args.file is None: + args.file = 'comic_info.json' + if args.type is None: + args.type = guess_type_from_file_name(args.file) + if args.type is None: + raise ValueError('Failed to guess file type.') + if args.ACTION in ['d', 'dump'] and len(args.PATH) == 0: + args.PATH.append('.') + if args.ACTION in ['d', 'dump'] and args.base is None: + args.base = abspath('.') + if args.type == 'yaml' and not have_yaml: + raise ValueError('pyyaml not installed but can be installed with pip install pyyaml.') # noqa: E501 + if args.verbose > 1: + print(args) + if args.ACTION in ['d', 'dump']: + data = {'path': args.PATH, 'base': args.base, 'tree': {}} + elif args.ACTION in ['m', 'modify']: + if args.type == 'json': + with open(args.file, 'r', encoding='UTF-8') as f: + data = loadjson(f) + elif args.type == 'yaml': + with open(args.file, 'r', encoding='UTF-8') as f: + data = loadyaml(f, SafeLoader) + if args.PATH is None: + args.PATH = data['path'] + if args.base is None: + args.base = data['base'] + if args.verbose > 2: + print(data) + for p in args.PATH: + iter_path(args, abspath(p), data['tree']) + if args.ACTION in ['d', 'dump']: + if args.type == 'json': + with open(args.file, 'w', encoding='UTF-8') as f: + dumpjson(data, f, ensure_ascii=False, separators=(',', ':')) + elif args.type == 'yaml': + with open(args.file, 'w', encoding='UTF-8') as f: + dumpyaml(data, f, SafeDumper, allow_unicode=True) + + +if __name__ == "__main__": + run()