mirror of
https://github.com/lifegpc/pythonscript.git
synced 2026-06-05 11:08:49 +08:00
Add comic_library_info.py
This commit is contained in:
431
comic_library_info.py
Normal file
431
comic_library_info.py
Normal file
@@ -0,0 +1,431 @@
|
||||
# comic_library_info.py
|
||||
# (C) 2022 lifegpc
|
||||
# The repo location: https://github.com/lifegpc/pythonscript
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU Affero General Public License as published
|
||||
# by the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU Affero General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU Affero General Public License
|
||||
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
from argparse import ArgumentParser, Namespace
|
||||
from json import dump as dumpjson, load as loadjson
|
||||
from os import listdir
|
||||
from os.path import abspath, isdir, isfile, join, relpath, split as splitpath
|
||||
from os.path import splitext
|
||||
from typing import Any, Callable, Dict, List, Optional, Union
|
||||
import xml.etree.ElementTree as ET
|
||||
from zipfile import ZipFile
|
||||
try:
|
||||
from yaml import dump as dumpyaml, load as loadyaml
|
||||
try:
|
||||
from yaml import CSafeDumper as SafeDumper, CSafeLoader as SafeLoader
|
||||
except ImportError:
|
||||
from yaml import SafeDumper, SafeLoader
|
||||
have_yaml = True
|
||||
except ImportError:
|
||||
have_yaml = False
|
||||
|
||||
argp = ArgumentParser(description="A tool to scan/modify comic's info.")
|
||||
argp.add_argument('-V', '--version', action='version', version="%(prog)s 1.0.0") # noqa: E501
|
||||
argp.add_argument('-v', '--verbose', action='count', help='Enable verbose output.', default=0) # noqa: E501
|
||||
argp.add_argument('ACTION', choices=['d', 'dump', 'm', 'modify'], help='d/dump Dump the file which contains info. m/modify Use informaiton from file to modify comic\'s information.') # noqa: E501
|
||||
argp.add_argument('PATH', action='append', nargs='*', help='The path to the library you want to scan.') # noqa: E501
|
||||
argp.add_argument('-f', '--file', help='The location of the file which contains comic info.') # noqa: E501
|
||||
argp.add_argument('-t', '--type', help='The type of the file which contains comic info.', choices=['json', 'yaml']) # noqa: E501
|
||||
argp.add_argument('-b', '--base', help='The base directory.')
|
||||
|
||||
|
||||
def guess_type_from_file_name(fn: str) -> Optional[str]:
|
||||
ext = splitext(fn)[1]
|
||||
if ext in ['.yaml', '.yml']:
|
||||
return 'yaml'
|
||||
if ext in ['.json', '.jsonc']:
|
||||
return 'json'
|
||||
|
||||
|
||||
def guess_type(path: str) -> Optional[str]:
|
||||
ext = splitext(path)[1]
|
||||
if ext in ['.cbz']:
|
||||
return 'cbz'
|
||||
|
||||
|
||||
def split_path(path: str) -> List[str]:
|
||||
path_list = []
|
||||
tmp = ''
|
||||
for i in path:
|
||||
if i in ['/', '\\']:
|
||||
if tmp == '':
|
||||
continue
|
||||
else:
|
||||
if tmp != '.':
|
||||
path_list.append(tmp)
|
||||
tmp = ''
|
||||
else:
|
||||
tmp += i
|
||||
if tmp not in ['.', '']:
|
||||
path_list.append(tmp)
|
||||
return path_list
|
||||
|
||||
|
||||
def extract_str(ele: ET.Element) -> str:
|
||||
return '' if ele.text is None else ele.text
|
||||
|
||||
|
||||
def extract_xml_content(ele: ET.Element, key: str, obj: Dict[str, Any],
|
||||
callback: Callable[[ET.Element],
|
||||
Optional[Any]]) -> bool:
|
||||
try:
|
||||
e = ele.find(key)
|
||||
if e is None:
|
||||
return False
|
||||
data = callback(e)
|
||||
if data is not None:
|
||||
obj[key] = data
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def extract_xml_attrs(ele: ET.Element, key: str, obj: Dict[str, Any],
|
||||
callback: Callable[[str], Optional[Any]]) -> bool:
|
||||
try:
|
||||
if key not in ele.attrib:
|
||||
return False
|
||||
v = ele.attrib[key]
|
||||
data = callback(v)
|
||||
if data is not None:
|
||||
obj[key] = data
|
||||
return True
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
def filter_int(s: str) -> Optional[int]:
|
||||
try:
|
||||
return int(s)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def extract_int(ele: ET.Element) -> Optional[int]:
|
||||
s = extract_str(ele)
|
||||
return filter_int(s)
|
||||
|
||||
|
||||
def filter_manga(s: str) -> Optional[str]:
|
||||
s = s.lower()
|
||||
if s == 'unknown':
|
||||
return 'Unknown'
|
||||
if s == 'yes':
|
||||
return 'Yes'
|
||||
if s == 'no':
|
||||
return 'No'
|
||||
if s == 'yesandrighttoleft':
|
||||
return 'YesAndRightToLeft'
|
||||
|
||||
|
||||
def extract_manga(ele: ET.Element) -> Optional[str]:
|
||||
e = extract_str(ele)
|
||||
return filter_manga(e)
|
||||
|
||||
|
||||
def filter_yesno(s: str) -> Optional[bool]:
|
||||
s = s.lower()
|
||||
if s == "yes":
|
||||
return True
|
||||
elif s == "no":
|
||||
return False
|
||||
|
||||
|
||||
def extract_yesno(ele: ET.Element) -> Optional[bool]:
|
||||
s = extract_str(ele)
|
||||
return filter_yesno(s)
|
||||
|
||||
|
||||
def filter_age_rating(s: str) -> Optional[str]:
|
||||
s = s.lower()
|
||||
if s == 'unknown':
|
||||
return 'Unknown'
|
||||
if s == 'adults only 18+':
|
||||
return 'Adults Only 18+'
|
||||
if s == 'early childhood':
|
||||
return 'Early Childhood'
|
||||
if s == 'everyone':
|
||||
return 'Everyone'
|
||||
if s == 'everyone 10+':
|
||||
return 'Everyone 10+'
|
||||
if s == 'g':
|
||||
return 'G'
|
||||
if s == 'kids to adults':
|
||||
return 'Kids to Adults'
|
||||
if s == 'm':
|
||||
return 'M'
|
||||
if s == 'ma15+':
|
||||
return 'MA15+'
|
||||
if s == 'mature 17+':
|
||||
return 'Mature 17+'
|
||||
if s == 'pg':
|
||||
return 'PG'
|
||||
if s == 'r18+':
|
||||
return 'R18+'
|
||||
if s == 'rating pending':
|
||||
return 'Rating Pending'
|
||||
if s == 'teen':
|
||||
return 'Teen'
|
||||
if s == 'x18+':
|
||||
return 'X18+'
|
||||
|
||||
|
||||
def extract_age_rating(ele: ET.Element) -> Optional[str]:
|
||||
e = extract_str(ele)
|
||||
return filter_age_rating(e)
|
||||
|
||||
|
||||
def filter_comic_page_type(i: str) -> Optional[str]:
|
||||
i = i.lower()
|
||||
if i == 'frontcover':
|
||||
return 'FrontCover'
|
||||
elif i == 'innercover':
|
||||
return 'InnerCover'
|
||||
elif i == 'roundup':
|
||||
return 'Roundup'
|
||||
elif i == 'story':
|
||||
return 'Story'
|
||||
elif i == 'advertisement':
|
||||
return 'Advertisement'
|
||||
elif i == 'editorial':
|
||||
return 'Editorial'
|
||||
elif i == 'letters':
|
||||
return 'Letters'
|
||||
elif i == 'preview':
|
||||
return 'Preview'
|
||||
elif i == 'backCover':
|
||||
return 'BackCover'
|
||||
elif i == 'other':
|
||||
return 'Other'
|
||||
elif i == 'deleted':
|
||||
return 'Deleted'
|
||||
|
||||
|
||||
def extract_comic_page_type(s: str) -> List[str]:
|
||||
types = []
|
||||
for i in s.split(' '):
|
||||
i = filter_comic_page_type(i)
|
||||
if i is not None:
|
||||
types.append(i)
|
||||
return types
|
||||
|
||||
|
||||
def filter_bool(s: str) -> Optional[bool]:
|
||||
s = s.lower()
|
||||
if s == 'true':
|
||||
return True
|
||||
if s == 'false':
|
||||
return False
|
||||
|
||||
|
||||
def extract_bool(ele: ET.Element) -> Optional[bool]:
|
||||
s = extract_str(ele)
|
||||
return filter_bool(s)
|
||||
|
||||
|
||||
def extract_comic_page_info(ele: ET.Element) -> Optional[Dict[str, Any]]:
|
||||
obj = {}
|
||||
if not extract_xml_attrs(ele, "Image", obj, filter_int):
|
||||
return False
|
||||
extract_xml_attrs(ele, "Story", obj, extract_comic_page_type)
|
||||
extract_xml_attrs(ele, "DoublePage", obj, filter_bool)
|
||||
extract_xml_attrs(ele, "ImageSize", obj, filter_int)
|
||||
extract_xml_attrs(ele, "Key", obj, lambda s: s)
|
||||
extract_xml_attrs(ele, "Bookmark", obj, lambda s: s)
|
||||
extract_xml_attrs(ele, "ImageWidth", obj, filter_int)
|
||||
extract_xml_attrs(ele, "ImageHeight", obj, filter_int)
|
||||
return obj
|
||||
|
||||
|
||||
def extract_array_of_comic_page_info(ele: ET.Element) -> List[Dict[str, Any]]:
|
||||
pages = []
|
||||
childrens = ele.getchildren()
|
||||
for i in childrens:
|
||||
if i.tag == 'Page':
|
||||
dat = extract_comic_page_info(i)
|
||||
if dat is not None:
|
||||
pages.append(dat)
|
||||
return pages
|
||||
|
||||
|
||||
def filter_rating(s) -> Optional[float]:
|
||||
try:
|
||||
f = float(s)
|
||||
f = round(f, 1)
|
||||
return f if f >= 0 and f <= 5 else None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def extract_rating(ele: ET.Element) -> Optional[float]:
|
||||
s = extract_str(ele)
|
||||
return filter_rating(s)
|
||||
|
||||
|
||||
def parse_xml(content: Union[str, bytes]) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
root = ET.fromstring(content)
|
||||
except Exception:
|
||||
return None
|
||||
obj = {}
|
||||
extract_xml_content(root, "Title", obj, extract_str)
|
||||
extract_xml_content(root, "Series", obj, extract_str)
|
||||
extract_xml_content(root, "Number", obj, extract_str)
|
||||
extract_xml_content(root, "Count", obj, extract_int)
|
||||
extract_xml_content(root, "Volume", obj, extract_int)
|
||||
extract_xml_content(root, "AlternateSeries", obj, extract_str)
|
||||
extract_xml_content(root, "AlternateNumber", obj, extract_str)
|
||||
extract_xml_content(root, "AlternateCount", obj, extract_int)
|
||||
extract_xml_content(root, "Summary", obj, extract_str)
|
||||
extract_xml_content(root, "Notes", obj, extract_str)
|
||||
extract_xml_content(root, "Year", obj, extract_int)
|
||||
extract_xml_content(root, "Month", obj, extract_int)
|
||||
extract_xml_content(root, "Day", obj, extract_int)
|
||||
extract_xml_content(root, "Writer", obj, extract_str)
|
||||
extract_xml_content(root, "Penciller", obj, extract_str)
|
||||
extract_xml_content(root, "Inker", obj, extract_str)
|
||||
extract_xml_content(root, "Colorist", obj, extract_str)
|
||||
extract_xml_content(root, "Letterer", obj, extract_str)
|
||||
extract_xml_content(root, "CoverArtist", obj, extract_str)
|
||||
extract_xml_content(root, "Editor", obj, extract_str)
|
||||
extract_xml_content(root, "Translator", obj, extract_str)
|
||||
extract_xml_content(root, "Publisher", obj, extract_str)
|
||||
extract_xml_content(root, "Imprint", obj, extract_str)
|
||||
extract_xml_content(root, "Genre", obj, extract_str)
|
||||
extract_xml_content(root, "Tags", obj, extract_str)
|
||||
extract_xml_content(root, "Web", obj, extract_str)
|
||||
extract_xml_content(root, "PageCount", obj, extract_int)
|
||||
extract_xml_content(root, "LanguageISO", obj, extract_str)
|
||||
extract_xml_content(root, "Format", obj, extract_str)
|
||||
extract_xml_content(root, "BlackAndWhite", obj, extract_yesno)
|
||||
extract_xml_content(root, "Manga", obj, extract_manga)
|
||||
extract_xml_content(root, "Characters", obj, extract_str)
|
||||
extract_xml_content(root, "Teams", obj, extract_str)
|
||||
extract_xml_content(root, "Locations", obj, extract_str)
|
||||
extract_xml_content(root, "ScanInformation", obj, extract_str)
|
||||
extract_xml_content(root, "StoryArc", obj, extract_str)
|
||||
extract_xml_content(root, "StoryArcNumber", obj, extract_str)
|
||||
extract_xml_content(root, "SeriesGroup", obj, extract_str)
|
||||
extract_xml_content(root, "AgeRating", obj, extract_age_rating)
|
||||
extract_xml_content(root, "Pages", obj, extract_array_of_comic_page_info)
|
||||
extract_xml_content(root, "CommunityRating", obj, extract_rating)
|
||||
return obj
|
||||
|
||||
|
||||
def iter_path(args: Namespace, path: str, data: object):
|
||||
rpath = relpath(path, args.base)
|
||||
path_list = split_path(rpath)
|
||||
if args.verbose > 2:
|
||||
print(f'Split {rpath} to {path_list}')
|
||||
tdata = data
|
||||
for p in path_list:
|
||||
if p not in tdata:
|
||||
if args.ACTION in ['d', 'dump']:
|
||||
tdata[p] = {'type': 'directory', 'tree': {}}
|
||||
else:
|
||||
tdata[p]
|
||||
tdata = tdata[p]['tree']
|
||||
for f in listdir(path):
|
||||
fpath = join(path, f)
|
||||
rfpath = relpath(fpath, args.base)
|
||||
if args.verbose > 0:
|
||||
print(f'Scan {rfpath}')
|
||||
if isdir(fpath):
|
||||
iter_path(args, fpath, data)
|
||||
elif isfile(fpath):
|
||||
fn = splitpath(fpath)[1]
|
||||
typ = guess_type(fn)
|
||||
if args.verbose > 2:
|
||||
print(f'Guess type: {typ}')
|
||||
if typ == 'cbz':
|
||||
tdata[fn] = {'type': 'cbz', 'comic_info': None}
|
||||
if args.ACTION in ['d', 'dump']:
|
||||
with ZipFile(fpath, 'r', allowZip64=True) as z:
|
||||
if args.verbose > 1:
|
||||
print(f"Opened {rfpath}.")
|
||||
try:
|
||||
info = z.getinfo("ComicInfo.xml")
|
||||
if args.verbose > 2:
|
||||
print(f"ComicInfo.xml information: {info}")
|
||||
except KeyError:
|
||||
info = None
|
||||
if info is not None:
|
||||
try:
|
||||
content = z.read(info)
|
||||
if args.verbose > 1:
|
||||
print(f"Opend ComicInfo.xml in {rfpath}")
|
||||
if args.verbose > 3:
|
||||
print("ComicInfo.xml Content:")
|
||||
try:
|
||||
content2 = content.decode('UTF-8')
|
||||
except Exception:
|
||||
content2 = content
|
||||
print(content2)
|
||||
info = parse_xml(content)
|
||||
tdata[fn]['comic_info'] = info
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
tdata[fn] = {'type': 'file'}
|
||||
else:
|
||||
print(f'{rfpath}({fpath}) has unknown file type.')
|
||||
|
||||
|
||||
def run(args: Optional[List[str]] = None):
|
||||
args = argp.parse_args(args)
|
||||
args.PATH = args.PATH[0]
|
||||
if args.file is None:
|
||||
args.file = 'comic_info.json'
|
||||
if args.type is None:
|
||||
args.type = guess_type_from_file_name(args.file)
|
||||
if args.type is None:
|
||||
raise ValueError('Failed to guess file type.')
|
||||
if args.ACTION in ['d', 'dump'] and len(args.PATH) == 0:
|
||||
args.PATH.append('.')
|
||||
if args.ACTION in ['d', 'dump'] and args.base is None:
|
||||
args.base = abspath('.')
|
||||
if args.type == 'yaml' and not have_yaml:
|
||||
raise ValueError('pyyaml not installed but can be installed with pip install pyyaml.') # noqa: E501
|
||||
if args.verbose > 1:
|
||||
print(args)
|
||||
if args.ACTION in ['d', 'dump']:
|
||||
data = {'path': args.PATH, 'base': args.base, 'tree': {}}
|
||||
elif args.ACTION in ['m', 'modify']:
|
||||
if args.type == 'json':
|
||||
with open(args.file, 'r', encoding='UTF-8') as f:
|
||||
data = loadjson(f)
|
||||
elif args.type == 'yaml':
|
||||
with open(args.file, 'r', encoding='UTF-8') as f:
|
||||
data = loadyaml(f, SafeLoader)
|
||||
if args.PATH is None:
|
||||
args.PATH = data['path']
|
||||
if args.base is None:
|
||||
args.base = data['base']
|
||||
if args.verbose > 2:
|
||||
print(data)
|
||||
for p in args.PATH:
|
||||
iter_path(args, abspath(p), data['tree'])
|
||||
if args.ACTION in ['d', 'dump']:
|
||||
if args.type == 'json':
|
||||
with open(args.file, 'w', encoding='UTF-8') as f:
|
||||
dumpjson(data, f, ensure_ascii=False, separators=(',', ':'))
|
||||
elif args.type == 'yaml':
|
||||
with open(args.file, 'w', encoding='UTF-8') as f:
|
||||
dumpyaml(data, f, SafeDumper, allow_unicode=True)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run()
|
||||
Reference in New Issue
Block a user