mirror of
https://github.com/lifegpc/pythonscript.git
synced 2026-06-05 11:08:49 +08:00
1244 lines
56 KiB
Python
1244 lines
56 KiB
Python
from requests import Session
|
|
from http.cookiejar import MozillaCookieJar
|
|
from html.parser import HTMLParser
|
|
from typing import Any, Callable, Dict, List, Tuple, Optional, Union
|
|
from js2py import eval_js
|
|
from os import makedirs, remove
|
|
from os.path import join, exists, relpath, abspath, basename, splitext
|
|
from traceback import print_exc
|
|
from json import load, dump, loads
|
|
from urllib.parse import urljoin, parse_qs, urlparse
|
|
from base64 import b64decode as _b64decode
|
|
from re import compile
|
|
from argparse import ArgumentParser, RawTextHelpFormatter
|
|
from textwrap import wrap as _wrap
|
|
from zipfile import ZIP_DEFLATED, ZIP_STORED, ZipFile
|
|
from subprocess import Popen, DEVNULL
|
|
from xml.etree import ElementTree as ET
|
|
from time import gmtime, strftime, strptime, time
|
|
from calendar import timegm
|
|
from posixpath import join as posixjoin
|
|
|
|
|
|
def get_iso8601_time(d: int) -> str:
|
|
return strftime('%Y-%m-%dT%H:%M:%SZ', gmtime(d))
|
|
|
|
|
|
def parse_time(s: str) -> int:
|
|
return timegm(strptime(s, "%Y.%m.%d"))
|
|
|
|
|
|
DC_NS = 'http://purl.org/dc/elements/1.1/'
|
|
OPF_NS = 'http://www.idpf.org/2007/opf'
|
|
SVG_NS = 'http://www.w3.org/2000/svg'
|
|
XLINK_NS = 'http://www.w3.org/1999/xlink'
|
|
XHTML_NS = 'http://www.w3.org/1999/xhtml'
|
|
SVG_TAGS = ['svg', '{%s}svg' % (SVG_NS)]
|
|
SWITCH_TAGS = ['switch', '{%s}switch' % (OPF_NS)]
|
|
ALLOW_DC_LIST = ['coverage', 'description', 'format', 'publisher', 'relation', 'rights', 'source', 'type']
|
|
ALL_IMAGE_MIMETYPES = ['image/gif', 'image/jpeg', 'image/png', 'image/svg+xml', 'image/webp']
|
|
EXT_MIMETYPES = {'.jpeg': 'image/jpeg', '.jpg': 'image/jpeg', '.png': 'image/png', '.gif': 'image/gif', '.svg': 'image/svg+xml', '.webp': 'image/webp'}
|
|
PREFIX_DICT = {'rendition': 'http://www.idpf.org/vocab/rendition/#',
|
|
'ebpaj': 'http://www.ebpaj.jp/',
|
|
'fixed-layout-jp': 'http://www.digital-comic.jp/',
|
|
'kadokawa': 'http://www.access-company.com/2012/layout#',
|
|
'ibooks': 'http://vocabulary.itunes.apple.com/rdf/ibooks/vocabulary-extensions-1.0/',
|
|
'dcterms': 'https://www.dublincore.org/specifications/dublin-core/dcmi-terms/'}
|
|
js_dumps: Callable[[Any], str] = eval_js('function(a){return JSON.stringify(a);}')
|
|
|
|
|
|
class InvalidEPUB(Exception):
|
|
def __init__(self, message: str, *args):
|
|
super().__init__(message, *args)
|
|
|
|
|
|
class EPUBLink:
|
|
def __init__(self, href: str, rel: str, file: str, media_type: str = None, compress_type: int = None):
|
|
self.href = href
|
|
self.media_type = media_type
|
|
self.rel = rel
|
|
self.file = file
|
|
self.compress_type = compress_type
|
|
|
|
def dump(self) -> Dict[str, str]:
|
|
if not self.href or not self.rel or not self.file:
|
|
raise InvalidEPUB('href or rel is needed for link element.')
|
|
d = {'href': self.href, 'rel': self.rel}
|
|
if self.media_type:
|
|
d['media-type'] = self.media_type
|
|
return d
|
|
|
|
def save(self, f: ZipFile):
|
|
f.write(self.file, self.href, self.compress_type)
|
|
print(f'Added file {self.file} to {self.href} in archive.')
|
|
|
|
|
|
class EPUBIdentifier:
|
|
def __init__(self, value: str, id: str = None):
|
|
self.value = value
|
|
self.id = id
|
|
|
|
|
|
class EPUBCreator:
|
|
def __init__(self, type: str, value: str, role: str = None, file_as: str = None):
|
|
self.type = type
|
|
self.value = value
|
|
self.role = role
|
|
self.file_as = file_as
|
|
|
|
def dump(self) -> ET.Element:
|
|
if not self.value or not self.type:
|
|
raise InvalidEPUB('Creator need a value or type.')
|
|
e = ET.Element(f"dc:{self.type}")
|
|
if self.role:
|
|
e.attrib['opf:role'] = self.role
|
|
if self.file_as:
|
|
e.attrib['opf:file_as'] = self.file_as
|
|
e.text = self.value
|
|
return e
|
|
|
|
|
|
class EPUBSubject:
|
|
def __init__(self, sub: str, authority: str = None, term: str = None):
|
|
self.sub = sub
|
|
self.authority = authority
|
|
self.term = term
|
|
|
|
def dump(self) -> ET.Element:
|
|
if not self.sub:
|
|
raise InvalidEPUB('Subject is needed for subject.')
|
|
if self.authority and not self.term:
|
|
raise InvalidEPUB('Term is needed if authority is defined for subject.')
|
|
d = {'opf:authority': self.authority, 'opf:term': self.term} if self.authority else {}
|
|
e = ET.Element('dc:subject', d)
|
|
e.text = self.sub
|
|
return e
|
|
|
|
|
|
class EPUBMeta:
|
|
def __init__(self, prop: str, value: str):
|
|
self.prefix_url = None
|
|
if ':' in prop:
|
|
prefix = prop[:prop.find(':')]
|
|
if prefix not in PREFIX_DICT:
|
|
raise ValueError('Unknown prefix')
|
|
self.prefix_url = f"{prefix}: {PREFIX_DICT[prefix]}"
|
|
self.property = prop
|
|
self.value = value
|
|
|
|
def dump(self) -> ET.Element:
|
|
e = ET.Element('meta', {'property': self.property})
|
|
e.text = self.value
|
|
return e
|
|
|
|
|
|
class ADEPageMap:
|
|
def __init__(self, p):
|
|
self.location = 'page_map.xml'
|
|
self.pages: List[Union[str, Tuple[str, str]]] = []
|
|
self.id = 'page_map'
|
|
self._p: EPUBPackage = p
|
|
|
|
def add_page(self, href: str, name: str = None):
|
|
if name is None:
|
|
self.pages.append(href)
|
|
else:
|
|
self.pages.append((name, href))
|
|
|
|
def dump(self):
|
|
map = ET.Element('page-map', {'xmlns': OPF_NS})
|
|
for i in self.pages:
|
|
d = {'href': i, 'name': ''} if isinstance(i, str) else {'href': i[1], 'name': i[0]}
|
|
if not self._p.manifest.have_href(d['href']):
|
|
raise ValueError(f"Can not find href {d['href']} in manifest.")
|
|
map.append(ET.Element('page', d))
|
|
return ET.tostring(map, 'UTF-8')
|
|
|
|
def save(self, f: ZipFile):
|
|
f.writestr(self.location, self.dump())
|
|
print(f'Added {self.location} to archive.')
|
|
|
|
|
|
class EPUBMetadata:
|
|
def __init__(self, p):
|
|
self._p: EPUBPackage = p
|
|
self.identifiers: List[EPUBIdentifier] = []
|
|
self.unique_identifier = ''
|
|
self.title = ''
|
|
self._language: List[str] = []
|
|
self.contributors: List[EPUBCreator] = []
|
|
self.creators: List[EPUBCreator] = []
|
|
self.date: int = None
|
|
self.subjects: List[EPUBSubject] = []
|
|
self.maps: Dict[str, List[str]] = {}
|
|
self.metas: List[EPUBMeta] = []
|
|
|
|
def add_contributor(self, name: str, role: str = None, file_as: str = None):
|
|
self.contributors.append(EPUBCreator('contributor', name, role, file_as))
|
|
|
|
def add_creator(self, name: str, role: str = None, file_as: str = None):
|
|
self.creators.append(EPUBCreator('creator', name, role, file_as))
|
|
|
|
def add_data(self, key: str, value: str):
|
|
if key in ALLOW_DC_LIST:
|
|
if key in self.maps:
|
|
self.maps[key].append(value)
|
|
else:
|
|
self.maps[key] = [value]
|
|
|
|
def add_identifier(self, value: str, id: str = None):
|
|
if id is not None:
|
|
for i in self.identifiers:
|
|
if i.id is not None and i.id == id:
|
|
raise ValueError(f'id {id} already have value.')
|
|
self.identifiers.append(EPUBIdentifier(value, id))
|
|
|
|
def add_meta(self, prop: str, value: str):
|
|
self.metas.append(EPUBMeta(prop, value))
|
|
|
|
def add_subject(self, sub: str, authority: str = None, term: str = None):
|
|
self.subjects.append(EPUBSubject(sub, authority, term))
|
|
|
|
def dump(self) -> ET.Element:
|
|
metadata = ET.Element('metadata', {'xmlns:dc': DC_NS, 'xmlns:opf': OPF_NS})
|
|
for i in self.identifiers:
|
|
d = {"id": i.id} if i.id else {}
|
|
ide = ET.Element('dc:identifier', d)
|
|
ide.text = i.value
|
|
metadata.append(ide)
|
|
if not self.title:
|
|
raise InvalidEPUB('title is needed for metadata.')
|
|
title = ET.Element('dc:title')
|
|
title.text = self.title
|
|
metadata.append(title)
|
|
lang = self.language
|
|
if lang is None or len(lang) == 0:
|
|
raise InvalidEPUB('language is needed for metadata.')
|
|
for lan in lang:
|
|
la = ET.Element('dc:language')
|
|
la.text = lan
|
|
metadata.append(la)
|
|
for i in self.contributors:
|
|
metadata.append(i.dump())
|
|
for i in self.creators:
|
|
metadata.append(i.dump())
|
|
if self.date is not None:
|
|
date = ET.Element('dc:date')
|
|
date.text = get_iso8601_time(self.date)
|
|
metadata.append(date)
|
|
for i in self.subjects:
|
|
metadata.append(i.dump())
|
|
for key in self.maps:
|
|
for v in self.maps[key]:
|
|
e = ET.Element(f"dc:{key}")
|
|
e.text = v
|
|
metadata.append(e)
|
|
for i in self.metas:
|
|
metadata.append(i.dump())
|
|
lmt = EPUBMeta('dcterms:modified', get_iso8601_time(time()))
|
|
metadata.append(lmt.dump())
|
|
return metadata
|
|
|
|
def get_unique_identifier(self) -> Optional[str]:
|
|
if self.unique_identifier:
|
|
for i in self.identifiers:
|
|
if i.id == self.unique_identifier:
|
|
return self.unique_identifier
|
|
self.unique_identifier = ''
|
|
return self.get_unique_identifier()
|
|
else:
|
|
for i in self.identifiers:
|
|
if i.id:
|
|
return i.id
|
|
return None
|
|
|
|
@property
|
|
def language(self):
|
|
if len(self._language) > 0:
|
|
return self._language
|
|
if self._p.language:
|
|
return [self._p.language]
|
|
return None
|
|
|
|
@property
|
|
def prefix(self):
|
|
r = [f"dcterms: {PREFIX_DICT['dcterms']}"]
|
|
for i in self.metas:
|
|
if i.prefix_url:
|
|
if i.prefix_url not in r:
|
|
r.append(i.prefix_url)
|
|
return ' '.join(r)
|
|
|
|
|
|
class EPUBItem:
|
|
def __init__(self, href: str, id: str, media_type: str, file: str = None, properties: List[str] = None, fallback: str = None, media_overlay: str = None):
|
|
self.href = href
|
|
self.id = id
|
|
self.media_type = media_type
|
|
self.file = file
|
|
self.properties = properties
|
|
self.fallback = fallback
|
|
self.media_overlay = media_overlay
|
|
self.file = file
|
|
|
|
def dump(self) -> ET.Element:
|
|
e = ET.Element('item', {'href': self.href, 'id': self.id, 'media-type': self.media_type})
|
|
if self.properties:
|
|
e.attrib['properties'] = ' '.join(self.properties)
|
|
if self.fallback:
|
|
e.attrib['fallback'] = self.fallback
|
|
if self.media_overlay:
|
|
e.attrib['media-overlay'] = self.media_overlay
|
|
return e
|
|
|
|
def save(self, f: ZipFile):
|
|
if self.file:
|
|
f.write(self.file, self.href)
|
|
print(f'Added {self.file} to {self.href} in archive.')
|
|
|
|
|
|
class EPUBManifest:
|
|
def __init__(self, p):
|
|
self._p: EPUBPackage = p
|
|
self.items: List[EPUBItem] = []
|
|
|
|
def add_item(self, href: str, id: str, media_type: str, file: str = None, properties: List[str] = None, fallback: str = None, media_overlay: str = None):
|
|
if id == 'nav' or (self._p.page_map and self._p.page_map.id == id):
|
|
raise ValueError(f'id {id} already used.')
|
|
for i in self.items:
|
|
if i.href == href:
|
|
raise ValueError(f'href {href} already used.')
|
|
elif i.id == id:
|
|
raise ValueError(f'id {id} already used.')
|
|
self.items.append(EPUBItem(href, id, media_type, file, properties, fallback, media_overlay))
|
|
|
|
def add_cover(self, href: str, id: str, media_type: str, file: str):
|
|
if self.have_cover_image:
|
|
raise ValueError('Only one cover image.')
|
|
if media_type not in ALL_IMAGE_MIMETYPES:
|
|
raise ValueError(f'Unsupported media type {media_type}')
|
|
self.add_item(href, id, media_type, file, ['cover-image'])
|
|
|
|
def dump(self):
|
|
items = self.items + [EPUBItem(self._p.page_map.location, self._p.page_map.id, 'application/oebps-page-map+xml')] if self._p.page_map else self.items
|
|
items.append(EPUBItem(self._p.nav.location, 'nav', 'application/xhtml+xml', properties=['nav']))
|
|
e = ET.Element('manifest')
|
|
for i in items:
|
|
e.append(i.dump())
|
|
return e
|
|
|
|
@property
|
|
def have_cover_image(self):
|
|
for i in self.items:
|
|
if i.properties and 'cover-image' in i.properties:
|
|
return True
|
|
return False
|
|
|
|
def have_href(self, href: str):
|
|
if '#' in href:
|
|
href = href[:href.rfind('#')]
|
|
for i in self.items:
|
|
if i.href == href:
|
|
return True
|
|
return False
|
|
|
|
def have_id(self, id: str):
|
|
for i in self.items:
|
|
if i.id == id:
|
|
return True
|
|
return False
|
|
|
|
def save(self, f: ZipFile):
|
|
for i in self.items:
|
|
i.save(f)
|
|
|
|
|
|
class EPUBItemRef:
|
|
def __init__(self, idref: str, linear: str = None, properties: List[str] = None, id: str = None):
|
|
self.idref = idref
|
|
self.linear = linear
|
|
self.properties = properties
|
|
self.id = id
|
|
|
|
def dump(self):
|
|
e = ET.Element('itemref', {'idref': self.idref})
|
|
if self.linear:
|
|
e.attrib['linear'] = self.linear
|
|
if self.properties:
|
|
e.attrib['properties'] = ' '.join(self.properties)
|
|
if self.id:
|
|
e.attrib['id'] = self.id
|
|
return e
|
|
|
|
|
|
class EPUBSpine:
|
|
def __init__(self, p):
|
|
self._p: EPUBPackage = p
|
|
self.refs: List[EPUBItemRef] = []
|
|
self.page_progression_direction = None
|
|
|
|
def add_ref(self, idref: str, linear: str = None, properties: List[str] = None, id: str = None):
|
|
self.refs.append(EPUBItemRef(idref, linear, properties, id))
|
|
|
|
def dump(self):
|
|
e = ET.Element('spine')
|
|
if self.page_progression_direction:
|
|
e.attrib['page-progression-direction'] = self.page_progression_direction
|
|
for ref in self.refs:
|
|
if not self._p.manifest.have_id(ref.idref):
|
|
raise ValueError(f"Can not find id {ref.idref} in manifest.")
|
|
e.append(ref.dump())
|
|
return e
|
|
|
|
|
|
class EPUBNav:
|
|
def __init__(self, text: str, href: str = None):
|
|
self.text = text
|
|
self.href = href
|
|
self.childrens: List[EPUBNav] = []
|
|
|
|
def dump(self):
|
|
e = ET.Element('li')
|
|
a = ET.Element('a' if self.href else 'span', {'href': self.href} if self.href else {})
|
|
a.text = self.text
|
|
e.append(a)
|
|
if self.childrens:
|
|
ol = ET.Element('ol')
|
|
for i in self.childrens:
|
|
ol.append(i.dump())
|
|
e.append(ol)
|
|
return e
|
|
|
|
|
|
class EPUBNavigation:
|
|
def __init__(self):
|
|
self.location = 'nav.xhtml'
|
|
self.head = None
|
|
self.navs: List[EPUBNav] = []
|
|
|
|
def dump(self):
|
|
e = ET.Element('nav', {'xmlns:epub': OPF_NS, 'epub:type': 'toc', 'id': 'toc'})
|
|
if len(self.navs) < 1:
|
|
raise InvalidEPUB('At least one nav element is needed.')
|
|
if self.head:
|
|
h = ET.Element('h1')
|
|
h.text = self.head
|
|
e.append(h)
|
|
ol = ET.Element('ol')
|
|
for i in self.navs:
|
|
ol.append(i.dump())
|
|
e.append(ol)
|
|
return ET.tostring(e, 'UTF-8')
|
|
|
|
def save(self, f: ZipFile):
|
|
f.writestr(self.location, self.dump())
|
|
print(f'Added {self.location} to archive.')
|
|
|
|
|
|
class EPUBPackage:
|
|
def __init__(self, location: str = 'package.opf'):
|
|
self.location = location
|
|
self.metadata = EPUBMetadata(self)
|
|
self.language = ''
|
|
self.manifest = EPUBManifest(self)
|
|
self.spine = EPUBSpine(self)
|
|
self.page_map: Optional[ADEPageMap] = None
|
|
self.nav = EPUBNavigation()
|
|
|
|
def add_identifier(self, value: str, id: str = None):
|
|
self.metadata.add_identifier(value, id)
|
|
|
|
def add_page(self, href: str, name: str = None):
|
|
if self.page_map is None:
|
|
self.page_map = ADEPageMap(self)
|
|
self.page_map.add_page(href, name)
|
|
|
|
def dump(self):
|
|
ide = self.metadata.get_unique_identifier()
|
|
if ide is None:
|
|
raise InvalidEPUB('A unique identifier is needed.')
|
|
root = ET.Element('package', {'xmlns': OPF_NS, 'version': '3.0', 'unique-identifier': ide})
|
|
if self.language:
|
|
root.attrib['xml:lang'] = self.language
|
|
prefix = self.metadata.prefix
|
|
if prefix:
|
|
root.attrib['prefix'] = prefix
|
|
root.append(self.metadata.dump())
|
|
root.append(self.manifest.dump())
|
|
root.append(self.spine.dump())
|
|
return ET.tostring(root, 'UTF-8')
|
|
|
|
def save(self, f: ZipFile):
|
|
f.writestr(self.location, self.dump())
|
|
print(f'Added {self.location} to archive.')
|
|
self.nav.save(f)
|
|
if self.page_map:
|
|
self.page_map.save(f)
|
|
self.manifest.save(f)
|
|
|
|
@property
|
|
def unique_identifier(self):
|
|
return self.metadata.get_unique_identifier()
|
|
|
|
@unique_identifier.setter
|
|
def unique_identifier(self, v):
|
|
if isinstance(v, str):
|
|
self.metadata.unique_identifier = v
|
|
else:
|
|
raise TypeError('Unsupported type.')
|
|
|
|
|
|
class EPUBContainer:
|
|
def __init__(self):
|
|
self.packages = [EPUBPackage()]
|
|
self.links: List[EPUBLink] = []
|
|
|
|
def add_link(self, href: str, rel: str, file: str, media_type: str = None, compress_type: int = None):
|
|
for i in self.links:
|
|
if i.href == href:
|
|
raise ValueError(f'{href} already in links.')
|
|
self.links.append(EPUBLink(href, rel, file, media_type, compress_type))
|
|
|
|
def dump(self):
|
|
if len(self.packages) < 1:
|
|
raise InvalidEPUB('1 or more packages is needed.')
|
|
root = ET.Element('container', {'xmlns': 'urn:oasis:names:tc:opendocument:xmlns:container', 'version': '1.0'})
|
|
rootfiles = ET.Element('rootfiles')
|
|
for p in self.packages:
|
|
rootfile = ET.Element('rootfile', {'full-path': p.location, 'media-type': 'application/oebps-package+xml'})
|
|
rootfiles.append(rootfile)
|
|
root.append(rootfiles)
|
|
if len(self.links) > 0:
|
|
links = ET.Element('links')
|
|
for i in self.links:
|
|
link = ET.Element('link', i.dump())
|
|
links.append(link)
|
|
root.append(links)
|
|
return ET.tostring(root, 'UTF-8')
|
|
|
|
def save(self, f: ZipFile):
|
|
f.writestr('META-INF/container.xml', self.dump())
|
|
print('Added META-INF/container.xml to archive.')
|
|
for i in self.links:
|
|
i.save(f)
|
|
for p in self.packages:
|
|
p.save(f)
|
|
|
|
|
|
class EPUB:
|
|
def __init__(self) -> None:
|
|
self.container = EPUBContainer()
|
|
self.packages = self.container.packages
|
|
self.package = self.packages[0]
|
|
self.metadata = self.package.metadata
|
|
self.manifest = self.package.manifest
|
|
self.spine = self.package.spine
|
|
self.nav = self.package.nav
|
|
|
|
def add_identifier(self, value: str, id: str = None):
|
|
self.package.add_identifier(value, id)
|
|
|
|
def add_link(self, href: str, rel: str, file: str, media_type: str = None, compress_type: int = None):
|
|
self.container.add_link(href, rel, file, media_type, compress_type)
|
|
|
|
def save(self, filename: str):
|
|
with ZipFile(filename, 'w', ZIP_DEFLATED, True) as f:
|
|
f.writestr('mimetype', b'application/epub+zip', ZIP_STORED)
|
|
print('Added mimetype to archive.')
|
|
self.container.save(f)
|
|
|
|
|
|
class XHTMLConvert:
|
|
def __init__(self, root: ET.Element):
|
|
self.root = root
|
|
self.have_svg = False
|
|
self.scripted = False
|
|
self.have_remote_resources = False
|
|
self.have_switch = False
|
|
self.head = self.root.find('head')
|
|
if self.head is None:
|
|
raise ValueError('Can not find head element.')
|
|
self.body = self.root.find('body')
|
|
if self.body is None:
|
|
raise ValueError('Can not find body element.')
|
|
self.head.append(ET.Element('meta', {'charset': 'UTF-8'}))
|
|
self.title = None
|
|
|
|
def add_css(self, href):
|
|
self.head.append(ET.Element('link', {'href': href, 'rel': 'stylesheet', 'type': 'text/css'}))
|
|
|
|
def convert(self, url_maps: Dict[str, str], allow_remote_resources: bool = False):
|
|
if self.root.find('script'):
|
|
self.scripted = True
|
|
for i in SVG_TAGS:
|
|
if self.root.find(i):
|
|
self.have_svg = True
|
|
for i in SWITCH_TAGS:
|
|
if self.root.find(i):
|
|
self.have_switch = True
|
|
for i in self.body.iter():
|
|
if i.tag == '{%s}image' % (SVG_NS):
|
|
for tag in ['href', '{%s}href' % (XLINK_NS)]:
|
|
if tag in i.attrib:
|
|
url = i.attrib[tag]
|
|
re = urlparse(url)
|
|
if re.scheme or re.hostname:
|
|
if url in url_maps:
|
|
i.attrib[tag] = url_maps[url]
|
|
else:
|
|
if allow_remote_resources:
|
|
self.have_remote_resources = True
|
|
else:
|
|
raise ValueError(f'Unknown remote resource: {url}')
|
|
elif i.tag == 'img':
|
|
if 'src' in i.attrib:
|
|
url = i.attrib['src']
|
|
re = urlparse(url)
|
|
if re.scheme or re.hostname:
|
|
if url in url_maps:
|
|
i.attrib['src'] = url_maps[url]
|
|
else:
|
|
if allow_remote_resources:
|
|
self.have_remote_resources = True
|
|
else:
|
|
raise ValueError(f'Unknown remote resource: {url}')
|
|
|
|
@property
|
|
def properties(self) -> Optional[List[str]]:
|
|
r = []
|
|
if self.have_svg:
|
|
r.append('svg')
|
|
if self.have_switch:
|
|
r.append('switch')
|
|
if self.have_remote_resources:
|
|
r.append('remote-resources')
|
|
if self.scripted:
|
|
r.append('scripted')
|
|
return r if len(r) else None
|
|
|
|
def save(self, fn: str):
|
|
with open(fn, 'wb') as f:
|
|
f.write(ET.tostring(self.root, 'UTF-8'))
|
|
print(f'Writed XHMTL to {fn}.')
|
|
|
|
def set_title(self, title: str):
|
|
if self.title is None:
|
|
self.title = ET.Element('title')
|
|
self.head.append(self.title)
|
|
self.title.text = title
|
|
|
|
|
|
def detect_7z() -> bool:
|
|
try:
|
|
pro = Popen(['7z', '-h'], stdout=DEVNULL)
|
|
if pro.wait() == 0:
|
|
return True
|
|
return False
|
|
except Exception:
|
|
print_exc()
|
|
return False
|
|
|
|
|
|
def add_7z_archive(file: str, list_file: str, pwd: str, compress_level: int) -> bool:
|
|
try:
|
|
a = ['7z', 'a', '-mmt1']
|
|
if compress_level is not None:
|
|
a.append(f'-mx{compress_level}')
|
|
a += [abspath(file), f'@{abspath(list_file)}']
|
|
print(a)
|
|
pro = Popen(a, cwd=pwd)
|
|
if pro.wait() == 0:
|
|
return True
|
|
return False
|
|
except Exception:
|
|
print_exc()
|
|
return False
|
|
|
|
|
|
def wrap(s: str, width: int = 56):
|
|
return '\n'.join(_wrap(s, width))
|
|
|
|
|
|
def b64decode(s):
|
|
m = len(s) % 4
|
|
if m != 0:
|
|
if isinstance(s, bytes):
|
|
s += b'=' * (4 - m)
|
|
elif isinstance(s, str):
|
|
s += '=' * (4 - m)
|
|
return _b64decode(s)
|
|
|
|
|
|
KEY_REG = compile(rb'([^\d]+\d)')
|
|
MW = [99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118, 202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192, 183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21, 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117, 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132, 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, 208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210, 205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115, 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219, 224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121, 231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8, 186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138, 112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158, 225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223, 140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22]
|
|
SW = [82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251, 124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203, 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78, 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37, 114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146, 108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132, 144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6, 208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107, 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115, 150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110, 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27, 252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244, 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95, 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239, 160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97, 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125]
|
|
TW = [0, 14, 28, 18, 56, 54, 36, 42, 112, 126, 108, 98, 72, 70, 84, 90, 224, 238, 252, 242, 216, 214, 196, 202, 144, 158, 140, 130, 168, 166, 180, 186, 219, 213, 199, 201, 227, 237, 255, 241, 171, 165, 183, 185, 147, 157, 143, 129, 59, 53, 39, 41, 3, 13, 31, 17, 75, 69, 87, 89, 115, 125, 111, 97, 173, 163, 177, 191, 149, 155, 137, 135, 221, 211, 193, 207, 229, 235, 249, 247, 77, 67, 81, 95, 117, 123, 105, 103, 61, 51, 33, 47, 5, 11, 25, 23, 118, 120, 106, 100, 78, 64, 82, 92, 6, 8, 26, 20, 62, 48, 34, 44, 150, 152, 138, 132, 174, 160, 178, 188, 230, 232, 250, 244, 222, 208, 194, 204, 65, 79, 93, 83, 121, 119, 101, 107, 49, 63, 45, 35, 9, 7, 21, 27, 161, 175, 189, 179, 153, 151, 133, 139, 209, 223, 205, 195, 233, 231, 245, 251, 154, 148, 134, 136, 162, 172, 190, 176, 234, 228, 246, 248, 210, 220, 206, 192, 122, 116, 102, 104, 66, 76, 94, 80, 10, 4, 22, 24, 50, 60, 46, 32, 236, 226, 240, 254, 212, 218, 200, 198, 156, 146, 128, 142, 164, 170, 184, 182, 12, 2, 16, 30, 52, 58, 40, 38, 124, 114, 96, 110, 68, 74, 88, 86, 55, 57, 43, 37, 15, 1, 19, 29, 71, 73, 91, 85, 127, 113, 99, 109, 215, 217, 203, 197, 239, 225, 243, 253, 167, 169, 187, 181, 159, 145, 131, 141]
|
|
UW = [0, 11, 22, 29, 44, 39, 58, 49, 88, 83, 78, 69, 116, 127, 98, 105, 176, 187, 166, 173, 156, 151, 138, 129, 232, 227, 254, 245, 196, 207, 210, 217, 123, 112, 109, 102, 87, 92, 65, 74, 35, 40, 53, 62, 15, 4, 25, 18, 203, 192, 221, 214, 231, 236, 241, 250, 147, 152, 133, 142, 191, 180, 169, 162, 246, 253, 224, 235, 218, 209, 204, 199, 174, 165, 184, 179, 130, 137, 148, 159, 70, 77, 80, 91, 106, 97, 124, 119, 30, 21, 8, 3, 50, 57, 36, 47, 141, 134, 155, 144, 161, 170, 183, 188, 213, 222, 195, 200, 249, 242, 239, 228, 61, 54, 43, 32, 17, 26, 7, 12, 101, 110, 115, 120, 73, 66, 95, 84, 247, 252, 225, 234, 219, 208, 205, 198, 175, 164, 185, 178, 131, 136, 149, 158, 71, 76, 81, 90, 107, 96, 125, 118, 31, 20, 9, 2, 51, 56, 37, 46, 140, 135, 154, 145, 160, 171, 182, 189, 212, 223, 194, 201, 248, 243, 238, 229, 60, 55, 42, 33, 16, 27, 6, 13, 100, 111, 114, 121, 72, 67, 94, 85, 1, 10, 23, 28, 45, 38, 59, 48, 89, 82, 79, 68, 117, 126, 99, 104, 177, 186, 167, 172, 157, 150, 139, 128, 233, 226, 255, 244, 197, 206, 211, 216, 122, 113, 108, 103, 86, 93, 64, 75, 34, 41, 52, 63, 14, 5, 24, 19, 202, 193, 220, 215, 230, 237, 240, 251, 146, 153, 132, 143, 190, 181, 168, 163]
|
|
VW = [0, 13, 26, 23, 52, 57, 46, 35, 104, 101, 114, 127, 92, 81, 70, 75, 208, 221, 202, 199, 228, 233, 254, 243, 184, 181, 162, 175, 140, 129, 150, 155, 187, 182, 161, 172, 143, 130, 149, 152, 211, 222, 201, 196, 231, 234, 253, 240, 107, 102, 113, 124, 95, 82, 69, 72, 3, 14, 25, 20, 55, 58, 45, 32, 109, 96, 119, 122, 89, 84, 67, 78, 5, 8, 31, 18, 49, 60, 43, 38, 189, 176, 167, 170, 137, 132, 147, 158, 213, 216, 207, 194, 225, 236, 251, 246, 214, 219, 204, 193, 226, 239, 248, 245, 190, 179, 164, 169, 138, 135, 144, 157, 6, 11, 28, 17, 50, 63, 40, 37, 110, 99, 116, 121, 90, 87, 64, 77, 218, 215, 192, 205, 238, 227, 244, 249, 178, 191, 168, 165, 134, 139, 156, 145, 10, 7, 16, 29, 62, 51, 36, 41, 98, 111, 120, 117, 86, 91, 76, 65, 97, 108, 123, 118, 85, 88, 79, 66, 9, 4, 19, 30, 61, 48, 39, 42, 177, 188, 171, 166, 133, 136, 159, 146, 217, 212, 195, 206, 237, 224, 247, 250, 183, 186, 173, 160, 131, 142, 153, 148, 223, 210, 197, 200, 235, 230, 241, 252, 103, 106, 125, 112, 83, 94, 73, 68, 15, 2, 21, 24, 59, 54, 33, 44, 12, 1, 22, 27, 56, 53, 34, 47, 100, 105, 126, 115, 80, 93, 74, 71, 220, 209, 198, 203, 232, 229, 242, 255, 180, 185, 174, 163, 128, 141, 154, 151]
|
|
WW = [0, 9, 18, 27, 36, 45, 54, 63, 72, 65, 90, 83, 108, 101, 126, 119, 144, 153, 130, 139, 180, 189, 166, 175, 216, 209, 202, 195, 252, 245, 238, 231, 59, 50, 41, 32, 31, 22, 13, 4, 115, 122, 97, 104, 87, 94, 69, 76, 171, 162, 185, 176, 143, 134, 157, 148, 227, 234, 241, 248, 199, 206, 213, 220, 118, 127, 100, 109, 82, 91, 64, 73, 62, 55, 44, 37, 26, 19, 8, 1, 230, 239, 244, 253, 194, 203, 208, 217, 174, 167, 188, 181, 138, 131, 152, 145, 77, 68, 95, 86, 105, 96, 123, 114, 5, 12, 23, 30, 33, 40, 51, 58, 221, 212, 207, 198, 249, 240, 235, 226, 149, 156, 135, 142, 177, 184, 163, 170, 236, 229, 254, 247, 200, 193, 218, 211, 164, 173, 182, 191, 128, 137, 146, 155, 124, 117, 110, 103, 88, 81, 74, 67, 52, 61, 38, 47, 16, 25, 2, 11, 215, 222, 197, 204, 243, 250, 225, 232, 159, 150, 141, 132, 187, 178, 169, 160, 71, 78, 85, 92, 99, 106, 113, 120, 15, 6, 29, 20, 43, 34, 57, 48, 154, 147, 136, 129, 190, 183, 172, 165, 210, 219, 192, 201, 246, 255, 228, 237, 10, 3, 24, 17, 46, 39, 60, 53, 66, 75, 80, 89, 102, 111, 116, 125, 161, 168, 179, 186, 133, 140, 151, 158, 233, 224, 251, 242, 205, 196, 223, 214, 49, 56, 35, 42, 21, 28, 7, 14, 121, 112, 107, 98, 93, 84, 79, 70]
|
|
|
|
|
|
class GoogleBooksDecrpyter:
|
|
def __init__(self, key: bytes) -> None:
|
|
self._key = key
|
|
self._key_len = round(len(key) / 4)
|
|
self._key_len2 = self._key_len + 6
|
|
self._data1 = [] # Jc
|
|
self._data2 = [] # qC
|
|
for _ in range(4):
|
|
self._data1.append([None, None, None, None])
|
|
self._data2.append([None, None, None, None])
|
|
self._data3: List[List[int]] = [None] * (4 * (self._key_len2 + 1)) # ji
|
|
for i in range(self._key_len):
|
|
self._data3[i] = [key[4 * i], key[4 * i + 1], key[4 * i + 2], key[4 * i + 3]]
|
|
b = [0, 0, 0, 0]
|
|
for i in range(self._key_len, 4 * (self._key_len2 + 1)):
|
|
b = self._data3[i - 1].copy()
|
|
if (i % self._key_len == 0):
|
|
b = b[1:4] + [b[0]]
|
|
for _ in range(4):
|
|
b[_] = MW[b[_]]
|
|
t = i / self._key_len
|
|
for _ in range(4):
|
|
b[_] ^= round((2 ** (t - 1) if t <= 8 else 27 * (2 ** (t - 9))) if _ % 4 == 0 else 0)
|
|
else:
|
|
if self._key_len > 6 and i % self._key_len == 4:
|
|
for _ in range(4):
|
|
b[_] = MW[b[_]]
|
|
self._data3[i] = [None, None, None, None]
|
|
for _ in range(4):
|
|
self._data3[i][_] = self._data3[i - self._key_len][_] ^ b[_]
|
|
|
|
def decrypt(self, data: bytes):
|
|
first_xor = data[:16]
|
|
le = int.from_bytes(data[16:20], 'little')
|
|
re = b''
|
|
data = data[20:]
|
|
while len(re) < len(data):
|
|
a = len(re)
|
|
xor = list(data[a - 16:a] if a > 0 else first_xor)
|
|
for i in range(a, min(a + 1024, len(data)), 16):
|
|
b = data[i : i+16]
|
|
r = self.__decrypt(b)
|
|
for _ in range(16):
|
|
re += (xor[_] ^ r[_]).to_bytes(1, 'little')
|
|
xor = list(b)
|
|
re = re[:le]
|
|
return re
|
|
|
|
def __decrypt(self, a: bytes):
|
|
for i in range(4): # JW(this, a)
|
|
for j in range(4):
|
|
self._data1[i][j] = a[4 * j + i]
|
|
self.KW(self._key_len2) # KW(this, this.Sw);
|
|
for i in range(1, self._key_len2):
|
|
self.RW() # RW(this);
|
|
self.LW(SW) # LW(this, SW);
|
|
self.KW(self._key_len2 - i) # KW(this, this.Sw - a)
|
|
for j in range(4):
|
|
c = self._data2[0]
|
|
for _ in range(4):
|
|
c[_] = self._data1[_][j]
|
|
self._data1[0][j] = TW[c[0]] ^ UW[c[1]] ^ VW[c[2]] ^ WW[c[3]]
|
|
self._data1[1][j] = WW[c[0]] ^ TW[c[1]] ^ UW[c[2]] ^ VW[c[3]]
|
|
self._data1[2][j] = VW[c[0]] ^ WW[c[1]] ^ TW[c[2]] ^ UW[c[3]]
|
|
self._data1[3][j] = UW[c[0]] ^ VW[c[1]] ^ WW[c[2]] ^ TW[c[3]]
|
|
self.RW()
|
|
self.LW(SW)
|
|
self.KW(0)
|
|
return self.QW()
|
|
|
|
def KW(self, l):
|
|
for i in range(4):
|
|
for j in range(4):
|
|
self._data1[i][j] ^= self._data3[4 * l + j][i]
|
|
|
|
def RW(self):
|
|
for i in range(1, 4):
|
|
for j in range(4):
|
|
self._data2[i][(i + j) % 4] = self._data1[i][j]
|
|
for i in range(1, 4):
|
|
for j in range(4):
|
|
self._data1[i][j] = self._data2[i][j]
|
|
|
|
def LW(self, b: List[int]):
|
|
for i in range(4):
|
|
for j in range(4):
|
|
self._data1[i][j] = b[self._data1[i][j]]
|
|
|
|
def QW(self):
|
|
r = [None] * 16
|
|
for i in range(4):
|
|
for j in range(4):
|
|
r[j * 4 +i] = self._data1[i][j]
|
|
return r
|
|
|
|
|
|
def decode_key(key: str) -> bytes:
|
|
print(key)
|
|
key = b64decode(key)
|
|
print(key)
|
|
key = KEY_REG.findall(key)
|
|
print(key)
|
|
if key is None or len(key) != 128:
|
|
raise ValueError('Invaild key')
|
|
r = ''
|
|
for i in key:
|
|
r += '1' if i[i[-1] - 48] == i[len(i) - 2] else '0'
|
|
r = r[64:] + r[:64]
|
|
return int(r[::-1], 2).to_bytes(16, 'little')
|
|
|
|
|
|
class MetadataParser(HTMLParser):
|
|
def __init__(self, *k, convert_charrefs: bool = ...) -> None:
|
|
self._metadata = ''
|
|
self._in_script = False
|
|
self._is_meta = False
|
|
self._key = ''
|
|
super().__init__(*k, convert_charrefs=convert_charrefs)
|
|
|
|
def handle_data(self, data: str) -> None:
|
|
if self._in_script and (self._is_meta or (self._metadata == '' and data.startswith('start'))):
|
|
self._is_meta = True
|
|
self._metadata += data
|
|
|
|
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
|
|
if tag == 'script':
|
|
self._in_script = True
|
|
elif tag == 'img':
|
|
for i in attrs:
|
|
if i[0] == 'src':
|
|
if i[1].startswith('data:image/gif;base64,'):
|
|
self._key = i[1].lstrip('data:image/gif;base64,')
|
|
|
|
def handle_endtag(self, tag: str) -> None:
|
|
if tag == 'script':
|
|
self._in_script = False
|
|
self._is_meta = False
|
|
|
|
@property
|
|
def metadata(self) -> Optional[str]:
|
|
if self._metadata == '':
|
|
return None
|
|
if self._metadata.endswith(';'):
|
|
self._metadata = self._metadata[:-1]
|
|
v = "function(){return [" + self._metadata[6:-1] + "]}"
|
|
f = eval_js(v)
|
|
return f()
|
|
|
|
|
|
class MetadataParser2(HTMLParser):
|
|
def __init__(self, *k, convert_charrefs: bool = ...) -> None:
|
|
self._metadata = ''
|
|
self._metadatal = []
|
|
self._in_script = False
|
|
self._is_meta = False
|
|
super().__init__(*k, convert_charrefs=convert_charrefs)
|
|
|
|
|
|
def handle_data(self, data: str) -> None:
|
|
if self._in_script and (self._is_meta or (self._metadata == '' and data.startswith('AF_initDataCallback'))):
|
|
self._is_meta = True
|
|
self._metadata += data
|
|
|
|
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
|
|
if tag == 'script':
|
|
self._in_script = True
|
|
|
|
def handle_endtag(self, tag: str) -> None:
|
|
if tag == 'script':
|
|
if self._in_script and self._metadata:
|
|
self._metadatal.append(self._metadata)
|
|
self._metadata = ''
|
|
self._in_script = False
|
|
self._is_meta = False
|
|
|
|
@property
|
|
def metadata(self):
|
|
r = {}
|
|
for i in self._metadatal:
|
|
f = eval_js("function(){return " + i[20:-2] + ";}")
|
|
d = f()
|
|
d = loads(js_dumps(d))
|
|
r[d['key']] = d['data']
|
|
return r
|
|
|
|
|
|
def get_isbn(d: dict) -> Optional[str]:
|
|
for key in d:
|
|
try:
|
|
for v in d[key][1][7][16][0]:
|
|
k: str = v[0]
|
|
if k.lower() == 'isbn':
|
|
return v[1][0][0][1]
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
def get_genres(d: dict) -> Optional[str]:
|
|
for key in d:
|
|
try:
|
|
for v in d[key][1][7][16][0]:
|
|
k: str = v[0]
|
|
if k.lower() in ['类型', '類型', 'Genres', 'ジャンル']:
|
|
return v[1][0][0][1]
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
def find_page(d, pid):
|
|
for i in d:
|
|
if i['pid'] == pid:
|
|
return i
|
|
return None
|
|
|
|
ses = Session()
|
|
ses.headers['user-agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
|
|
ses.headers['accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'
|
|
arg = ArgumentParser(description='Download from Google Play Books', add_help=True, formatter_class=RawTextHelpFormatter)
|
|
arg.add_argument('id', help=wrap("Book's id or url. Id is recommend because url may not be detected."))
|
|
arg.add_argument('-o', '--output', help=wrap('Specify the location of output file. By default it will output to current directory and use "<author> - <title>" as file name.'), metavar='FILE', dest='output')
|
|
arg.add_argument('type', help=f"Specify output type. (Default: null)\nSupported value:\nnull: Only download segments and resources.\nCBZ: Package all images file as a comic book ZIP archive.\n{wrap('CB7: Package all images file as a comic book 7-ZIP archive.')}", nargs='?', default='null', choices=['null', 'CBZ', 'CB7', 'EPUB'], metavar='type')
|
|
arg.add_argument('-c', '--cookies', help=wrap('Specify the location of cookies file. File must be Netscape HTTP Cookie File. (Default: google.txt)'), default="google.txt", metavar='FILE', dest='cookies')
|
|
arg.add_argument('-a', '--authuser', help=wrap('Specify the index of current user. Will be useful when multiply Google Account is logined in a same cookie file. Index is start at 0. (Default: 0)'), default=0, type=int, metavar='INDEX', dest='authuser')
|
|
arg.add_argument('-d', '--cache-dir', help=wrap('Specify the cache directory. By default, it will be "<author> - <title>"'), metavar='DIR', dest='cache_dir')
|
|
arg.add_argument('--7z-compress-level', help=wrap('The compress level when using 7-zip to archive file. (1 fastest 9 ultra)'), type=int, metavar='LEVEL', dest='7z_compress_level')
|
|
arg.add_argument('--crtl', '--comic-right-to-left', action='store_true', help=wrap('When packaging images as a comic book archive, the sequence of pages will like this: p1, p3, p2, p5, p4 ... This will produce a good archive for Japanese commic. You may need Okular to open archive with facing pages (center first page) view mode.'), dest='crtl')
|
|
args = arg.parse_intermixed_args()
|
|
argsd = vars(args)
|
|
cookies = MozillaCookieJar(args.cookies)
|
|
cookies.load()
|
|
ses.cookies = cookies
|
|
try:
|
|
re = ses.get(f"https://play.google.com/books?authuser={args.authuser}")
|
|
if re.status_code >= 400:
|
|
raise ValueError('Can not switch content')
|
|
if args.id.find("://") > -1:
|
|
arg_id_qs = parse_qs(urlparse(args.id).query)
|
|
if 'id' in arg_id_qs:
|
|
book_id = arg_id_qs['id'][0]
|
|
else:
|
|
book_id = args.id
|
|
re = ses.get(f'https://play.google.com/books/reader?id={book_id}')
|
|
print(re.status_code)
|
|
print(re.reason)
|
|
parser = MetadataParser()
|
|
parser.feed(re.text)
|
|
meta = parser.metadata
|
|
if 2 not in meta[0]['available_mode']:
|
|
raise NotImplementedError('The old version is not supported now.')
|
|
metadata = meta[0]['metadata']
|
|
title = metadata['title']
|
|
num_pages = metadata['num_pages']
|
|
authors = metadata['authors']
|
|
pub_date = metadata['pub_date']
|
|
publisher = metadata['publisher']
|
|
volume_id = metadata['volume_id']
|
|
print(title)
|
|
print(num_pages)
|
|
if args.cache_dir is None:
|
|
filename = f"{authors} - {title}"
|
|
else:
|
|
filename = args.cache_dir
|
|
makedirs(filename, exist_ok=True)
|
|
makedirs('temp', exist_ok=True)
|
|
key_file = join(filename, "encrypt.key")
|
|
if parser._key != '' and not exists(key_file):
|
|
keys = decode_key(parser._key)
|
|
decrypter = GoogleBooksDecrpyter(keys)
|
|
with open(key_file, "wb") as f:
|
|
f.write(keys)
|
|
elif exists(key_file):
|
|
with open(key_file, 'rb') as f:
|
|
decrypter = GoogleBooksDecrpyter(f.read())
|
|
else:
|
|
decrypter = None
|
|
segment = meta[0]['segment']
|
|
with open(join(filename, 'metadata.json'), 'w', encoding='UTF8') as f:
|
|
f.write(js_dumps(meta))
|
|
resources = {}
|
|
resource_file = join(filename, "resources.json")
|
|
if exists(resource_file):
|
|
with open(resource_file, 'r', encoding='UTF-8') as f:
|
|
resources = load(f)
|
|
for i in segment:
|
|
segment_info_file = join(filename, f"{i['label']}.json")
|
|
if not exists(segment_info_file):
|
|
for _ in range(3):
|
|
try:
|
|
print(f"Downloading segment information: {i['label']}")
|
|
link = urljoin('https://play.google.com', i['link'])
|
|
re = ses.get(link)
|
|
if re.status_code >= 400:
|
|
raise ValueError(f'{re.status_code} {re.reason}')
|
|
break
|
|
except Exception:
|
|
print_exc()
|
|
print(f'Download failed. Retry the {_ + 1} times.')
|
|
segment = re.json()
|
|
if segment['content_encrypted'] and 'content' in segment and decrypter is not None:
|
|
segment['content'] = decrypter.decrypt(b64decode(segment['content'])).decode()
|
|
segment['content_encrypted'] = False
|
|
with open(segment_info_file, 'w', encoding='UTF-8') as f:
|
|
dump(segment, f, ensure_ascii=False, separators=(',', ':'))
|
|
else:
|
|
with open(segment_info_file, 'r', encoding='UTF-8') as f:
|
|
segment = load(f)
|
|
if 'resource' in segment:
|
|
for res in segment['resource']:
|
|
res_name = ''
|
|
res_par = parse_qs(urlparse(res['url']).query)
|
|
if 'pg' in res_par:
|
|
res_name = res_par['pg'][0]
|
|
elif 'start' in res_par:
|
|
res_name = res_par['start'][0]
|
|
elif 'aid' in res_par:
|
|
res_name = res_par['aid'][0]
|
|
if res_name == '':
|
|
raise ValueError(f"Can not detect the resource's name:{res['url']}")
|
|
res_ext = ''
|
|
if 'mime_type' in res:
|
|
if res['mime_type'] == 'image':
|
|
res_ext = '.jpg'
|
|
if res_name.endswith('.png'):
|
|
res_ext = '.png'
|
|
elif res['mime_type'] == 'text/css':
|
|
res_ext = '.css'
|
|
elif res['mime_type'] == 'video':
|
|
res_ext = '.mp4'
|
|
if res_ext == '':
|
|
raise ValueError(f"Can not detect the resource's type:{res['mime_type']}")
|
|
res_file = join(filename, f"{res_name}{res_ext}" if not res_name.endswith(res_ext) else res_name)
|
|
if exists(res_file) and res['url'] in resources:
|
|
print(f'Skip downloading resource file:{res_file}')
|
|
else:
|
|
if exists(res_file):
|
|
i = 1
|
|
res_file = join(filename, f"{res_name}_{i}{res_ext}")
|
|
if exists(res_file):
|
|
i += 1
|
|
res_file = join(filename, f"{res_name}_{i}{res_ext}")
|
|
for _ in range(3):
|
|
try:
|
|
print(f"Downloading resource file: {res_file}")
|
|
link = urljoin('https://play.google.com', res['url'])
|
|
re = ses.get(link)
|
|
if re.status_code >= 400:
|
|
raise ValueError(f'{re.status_code} {re.reason}')
|
|
break
|
|
except Exception:
|
|
print_exc()
|
|
print(f'Download failed. Retry the {_ + 1} times.')
|
|
if res_ext in ['.jpg', '.png']:
|
|
with open(res_file, 'wb') as f:
|
|
f.write(re.content)
|
|
elif res_ext == '.css':
|
|
res_css = re.json()
|
|
with open(res_file, 'w', encoding='UTF-8') as f:
|
|
f.write(res_css['style'])
|
|
elif res_ext == '.mp4':
|
|
video_info = parse_qs(re.text)
|
|
if video_info['status'][0] != 'ok':
|
|
raise ValueError('Can not parse video')
|
|
fmt_list = video_info['fmt_list'][0].split(',')
|
|
ind = 1
|
|
for info in fmt_list:
|
|
info = info.split('/')
|
|
print(f'{ind}: ID: {info[0]} Video size: {info[1]}')
|
|
ind += 1
|
|
choice = input('Please choose:')
|
|
while not choice.isnumeric() or int(choice) == 0 or int(choice) >= ind:
|
|
choice = input('Please choose:')
|
|
fmt = fmt_list[int(choice) - 1]
|
|
fmt_id = fmt.split('/')[0]
|
|
fmt_stream_map = video_info['fmt_stream_map'][0].split(',')
|
|
for fmt_stream in fmt_stream_map:
|
|
fmt_stream = fmt_stream.split('|')
|
|
if fmt_stream[0] == fmt_id:
|
|
link = fmt_stream[1]
|
|
for _ in range(3):
|
|
try:
|
|
print(f'Downloading video file:{res_file}')
|
|
re = ses.get(link, stream=True)
|
|
if re.status_code >= 400:
|
|
raise ValueError(f'{re.status_code} {re.reason}')
|
|
with open(res_file, 'wb') as f:
|
|
for i in re.iter_content(1024):
|
|
if i:
|
|
f.write(i)
|
|
break
|
|
except Exception:
|
|
print_exc()
|
|
print(f'Download failed. Retry the {_ + 1} times.')
|
|
if exists(res_file):
|
|
remove(res_file)
|
|
nres = res.copy()
|
|
del nres['url']
|
|
nres['file'] = res_file
|
|
resources[res['url']] = nres
|
|
if args.type == 'CBZ':
|
|
if 'resource' in segment:
|
|
output = args.output if args.output else f'{authors} - {title}.cbz'
|
|
z = ZipFile(output, 'w', ZIP_STORED, True)
|
|
tmp = None
|
|
picn = 1
|
|
picc = 1
|
|
for segment in meta[0]['segment']:
|
|
segment_info_file = join(filename, f"{segment['label']}.json")
|
|
with open(segment_info_file, 'r', encoding='UTF-8') as f:
|
|
segment = load(f)
|
|
for res in segment['resource']:
|
|
nres = resources[res['url']]
|
|
if 'mime_type' in nres and nres['mime_type'] == 'image':
|
|
if not args.crtl or picn % 2 == 1:
|
|
print(f"Add {nres['file']} to commic book archive.")
|
|
z.write(nres['file'], f"{picc:03}.{relpath(nres['file'], filename)}")
|
|
picc += 1
|
|
if tmp is not None:
|
|
print(f"Add {tmp[0]} to commic book archive.")
|
|
z.write(tmp[0], f"{picc:03}.{tmp[1]}")
|
|
picc += 1
|
|
tmp = None
|
|
else:
|
|
tmp = (nres['file'], relpath(nres['file'], filename))
|
|
picn += 1
|
|
elif args.type == "CB7":
|
|
if not detect_7z():
|
|
raise ValueError('Can not find 7z executable. Make sure 7z is in PATH environment variable or in current directory.')
|
|
if 'resource' in segment:
|
|
output = args.output if args.output else f'{authors} - {title}.cb7'
|
|
file_list = []
|
|
for segment in meta[0]['segment']:
|
|
segment_info_file = join(filename, f"{segment['label']}.json")
|
|
with open(segment_info_file, 'r', encoding='UTF-8') as f:
|
|
segment = load(f)
|
|
for res in segment['resource']:
|
|
nres = resources[res['url']]
|
|
if 'mime_type' in nres and nres['mime_type'] == 'image':
|
|
print(f"Add {nres['file']} to commic book archive.")
|
|
file_list.append(relpath(nres['file'], filename))
|
|
file_list_loc = join('temp', f'{volume_id}_cb7_filelist.txt')
|
|
with open(file_list_loc, 'w', encoding='UTF-8') as f:
|
|
f.write('\n'.join(file_list))
|
|
add_7z_archive(output, file_list_loc, filename, argsd['7z_compress_level'])
|
|
elif args.type == 'EPUB':
|
|
output = args.output if args.output else f'{authors} - {title}.epub'
|
|
webre = ses.get(f'https://play.google.com/store/books/details/?id={book_id}')
|
|
if webre.status_code >= 400:
|
|
raise ValueError('Failed to fetch metadata.')
|
|
mf = MetadataParser2()
|
|
mf.feed(webre.text)
|
|
webmeta = mf.metadata
|
|
with open(join(filename, 'metadata2.json'), 'w', encoding='UTF-8') as f:
|
|
dump(webmeta, f, ensure_ascii=False, separators=(',', ':'))
|
|
e = EPUB()
|
|
isbn = get_isbn(webmeta)
|
|
if isbn is not None:
|
|
print(f'Get ISBN: {isbn}')
|
|
e.add_identifier(isbn, 'ISBN')
|
|
else:
|
|
print('Failed to extract ISBN.')
|
|
e.add_identifier(book_id, 'GOOGLE')
|
|
e.metadata.title = title
|
|
e.package.language = meta[0]['language']
|
|
e.metadata.add_creator(authors, 'aut')
|
|
e.metadata.date = parse_time(pub_date)
|
|
genres = get_genres(webmeta)
|
|
if genres is not None:
|
|
print(f'Get genres: {genres}')
|
|
genres = genres.split('/')
|
|
for genre in genres:
|
|
genre = genre.strip()
|
|
e.metadata.add_subject(genre)
|
|
else:
|
|
print('Failed to extarct genres.')
|
|
desc = meta[1][1][4]
|
|
print(f'Get description: {desc}')
|
|
e.metadata.add_data('description', desc)
|
|
e.metadata.add_data('publisher', publisher)
|
|
for i in meta[0]['meta']:
|
|
e.metadata.add_meta(i['property'], i['cdata'])
|
|
if meta[0]['is_right_to_left']:
|
|
e.spine.page_progression_direction = 'rtl'
|
|
for i in meta[0]['toc_entry']:
|
|
if i['depth'] != 0:
|
|
raise NotImplementedError('Non-zero depth toc.')
|
|
seg_meta = meta[0]['segment'][i['segment_index']]
|
|
label = seg_meta['label']
|
|
if not label.endswith('.xhtml'):
|
|
label += '.xhtml'
|
|
href = posixjoin("xhtml", label)
|
|
e.nav.navs.append(EPUBNav(i['label'], href))
|
|
cover_meta = meta[0]['segment'][0]
|
|
seg_file = join(filename, f"{cover_meta['label']}.json")
|
|
with open(seg_file, 'r', encoding='UTF-8') as f:
|
|
seg_info = load(f)
|
|
for res in seg_info['resource']:
|
|
if res['mime_type'] == 'image':
|
|
res_info = resources[res['url']]
|
|
href = posixjoin('image', basename(res_info['file']))
|
|
id = splitext(basename(res_info['file']))[0]
|
|
mimetype = EXT_MIMETYPES[splitext(res_info['file'])[1]]
|
|
e.manifest.add_cover(href, id, mimetype, res_info['file'])
|
|
break
|
|
ET.register_namespace('epub', OPF_NS)
|
|
ET.register_namespace('svg', SVG_NS)
|
|
ET.register_namespace('xlink', XLINK_NS)
|
|
for seg_meta in meta[0]['segment']:
|
|
seg_file = join(filename, f"{seg_meta['label']}.json")
|
|
with open(seg_file, 'r', encoding='UTF-8') as f:
|
|
seg_info = load(f)
|
|
if 'content' in seg_info:
|
|
if seg_info['content_encrypted'] and decrypter is not None:
|
|
seg_info['content'] = decrypter.decrypt(b64decode(segment['content'])).decode()
|
|
seg_info['content_encrypted'] = False
|
|
tree: ET.Element = ET.fromstring(f"<html xmlns:epub=\"{OPF_NS}\"><head></head><body>{seg_info['content']}</body></html>")
|
|
converter = XHTMLConvert(tree)
|
|
url_maps = {}
|
|
for i in seg_info['resource']:
|
|
res = resources[i['url']]
|
|
if i['mime_type'] == 'text/css':
|
|
href = posixjoin('css', basename(res['file']))
|
|
id = splitext(basename(res['file']))[0]
|
|
if not e.manifest.have_href(href):
|
|
e.manifest.add_item(href, id, 'text/css', res['file'])
|
|
converter.add_css(posixjoin('..', href))
|
|
elif i['mime_type'] in 'image':
|
|
href = posixjoin('image', basename(res['file']))
|
|
id = splitext(basename(res['file']))[0]
|
|
mimetype = EXT_MIMETYPES[splitext(res['file'])[1]]
|
|
if not e.manifest.have_href(href):
|
|
e.manifest.add_item(href, id, mimetype, res['file'])
|
|
url_maps[i['url']] = posixjoin('..', href)
|
|
converter.set_title(title)
|
|
print(url_maps)
|
|
converter.convert(url_maps)
|
|
for i in converter.root.iter():
|
|
print(i, i.text, i.attrib)
|
|
label = seg_meta['label']
|
|
if not label.endswith('.xhtml'):
|
|
label += '.xhtml'
|
|
href = posixjoin('xhtml', label)
|
|
path = join(filename, label)
|
|
converter.save(path)
|
|
e.manifest.add_item(href, seg_meta['label'], 'application/xhtml+xml', path, converter.properties)
|
|
e.spine.add_ref(seg_meta['label'], 'yes')
|
|
if 'page' in seg_info:
|
|
for i in seg_info['page']:
|
|
p = find_page(meta[0]['page'], i['pid'])
|
|
if p is None:
|
|
raise ValueError(f"Can not find page {i['pid']}")
|
|
e.package.add_page(f"{href}#{i['pid']}", p['title'])
|
|
else:
|
|
print(f"No content in segment {seg_meta['label']}")
|
|
e.save(output)
|
|
finally:
|
|
cookies.save()
|
|
try:
|
|
with open(resource_file, 'w', encoding='UTF-8') as f:
|
|
dump(resources, f, ensure_ascii=False, separators=(',', ':'))
|
|
except Exception:
|
|
pass
|