427 lines
15 KiB
Python
427 lines
15 KiB
Python
from ebooklib import epub, ITEM_IMAGE
|
|
from lxml import etree
|
|
import subprocess
|
|
from typing import Optional
|
|
import os
|
|
from config import Config
|
|
from image_cache import get_cache
|
|
from traceback import print_exc
|
|
import xml.etree.ElementTree as ET
|
|
from html.parser import HTMLParser
|
|
try:
|
|
import magic
|
|
have_magic = True
|
|
have_filetype = False
|
|
except ImportError:
|
|
have_magic = False
|
|
try:
|
|
import filetype
|
|
have_filetype = True
|
|
except ImportError:
|
|
have_filetype = False
|
|
print('Warning: python-magic or filetype not found. The mimetype in EPUB file may wrong.') # noqa: E501
|
|
import platform
|
|
if platform.system() == "Windows":
|
|
print('python-magic-bin is also needed on Windows if you use magic.') # noqa: E501
|
|
|
|
|
|
# Add fallback property to ebooklib
|
|
# ebooklib does not support fallback
|
|
class EpubItem(epub.EpubItem):
|
|
def __init__(self, uid=None, file_name='', media_type='',
|
|
content=epub.six.b(''), manifest=True):
|
|
super().__init__(uid, file_name, media_type, content, manifest)
|
|
self.fallback = None
|
|
|
|
|
|
class EpubImage(EpubItem):
|
|
def __init__(self):
|
|
super().__init__()
|
|
|
|
def get_type(self):
|
|
return ITEM_IMAGE
|
|
|
|
def __str__(self):
|
|
return '<EpubImage:%s:%s>' % (self.id, self.file_name)
|
|
|
|
|
|
class EpubPathImage(EpubImage):
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.path = None
|
|
|
|
def get_content(self, default=b''):
|
|
if self.path:
|
|
with open(self.path, 'rb') as f:
|
|
return f.read()
|
|
return default
|
|
|
|
|
|
class EpubWriter(epub.EpubWriter):
|
|
def _write_opf_manifest(self, root):
|
|
manifest = epub.etree.SubElement(root, 'manifest')
|
|
_ncx_id = None
|
|
|
|
for item in self.book.get_items():
|
|
if not item.manifest:
|
|
continue
|
|
|
|
if isinstance(item, epub.EpubNav):
|
|
etree.SubElement(manifest, 'item', {'href': item.get_name(),
|
|
'id': item.id,
|
|
'media-type': item.media_type, # noqa: E501
|
|
'properties': 'nav'})
|
|
elif isinstance(item, epub.EpubNcx):
|
|
_ncx_id = item.id
|
|
etree.SubElement(manifest, 'item', {'href': item.file_name,
|
|
'id': item.id,
|
|
'media-type': item.media_type}) # noqa: E501
|
|
|
|
elif isinstance(item, epub.EpubCover):
|
|
etree.SubElement(manifest, 'item', {'href': item.file_name,
|
|
'id': item.id,
|
|
'media-type': item.media_type, # noqa: E501
|
|
'properties': 'cover-image'}) # noqa: E501
|
|
else:
|
|
opts = {'href': item.file_name,
|
|
'id': item.id,
|
|
'media-type': item.media_type}
|
|
|
|
if hasattr(item, 'properties') and len(item.properties) > 0:
|
|
opts['properties'] = ' '.join(item.properties)
|
|
|
|
if hasattr(item, 'media_overlay') and item.media_overlay is not None: # noqa: E501
|
|
opts['media-overlay'] = item.media_overlay
|
|
|
|
if hasattr(item, 'media_duration') and item.media_duration is not None: # noqa: E501
|
|
opts['duration'] = item.media_duration
|
|
|
|
if hasattr(item, 'fallback') and item.fallback is not None:
|
|
opts['fallback'] = item.fallback
|
|
|
|
etree.SubElement(manifest, 'item', opts)
|
|
|
|
return _ncx_id
|
|
|
|
|
|
have_ffmpeg = None
|
|
|
|
|
|
def check_ffmpeg():
|
|
p = subprocess.Popen(['ffmpeg', '-h'], stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.DEVNULL)
|
|
p.communicate()
|
|
global have_ffmpeg
|
|
have_ffmpeg = not p.wait()
|
|
if not have_ffmpeg:
|
|
print('Warning: Can not find ffmpeg. Some epub readers may failed to open these pictures.') # noqa: E501
|
|
|
|
|
|
def perform_convert(image_path: str) -> Optional[str]:
|
|
output_path = os.path.splitext(image_path)[0] + '_fallback.jpg'
|
|
if os.path.exists(output_path) and os.path.getsize(output_path) > 4096:
|
|
return output_path
|
|
if have_ffmpeg is None:
|
|
check_ffmpeg()
|
|
if have_ffmpeg:
|
|
p = subprocess.Popen(['ffmpeg', '-y', '-i', image_path, output_path],
|
|
stdout=subprocess.DEVNULL,
|
|
stderr=subprocess.DEVNULL)
|
|
p.communicate()
|
|
code = p.wait()
|
|
if not code:
|
|
return output_path
|
|
else:
|
|
print(f'Warning: Can not convert images by using ffmpeg. (Exit code: {code}) Some epub readers may failed to open these pictures.') # noqa: E501
|
|
return None
|
|
else:
|
|
return None
|
|
|
|
|
|
class HTMLImage:
|
|
def __init__(self, attrs, cfg: Config):
|
|
self.src = None
|
|
self.alt = None
|
|
self.path = None
|
|
self.epub_path = None
|
|
for key, value in attrs:
|
|
if key == 'src':
|
|
self.src = value
|
|
elif key == 'alt':
|
|
self.alt = value
|
|
self.cfg = cfg
|
|
self.footnote = ''
|
|
|
|
def is_valid(self):
|
|
return self.src is not None and self.src != ''
|
|
|
|
def download_image(self):
|
|
if not self.is_valid():
|
|
return False
|
|
try:
|
|
self.path = get_cache(self.cfg, self.src)
|
|
return True
|
|
except Exception:
|
|
print_exc()
|
|
|
|
def to_local(self, index: int):
|
|
if not self.is_valid():
|
|
return ""
|
|
if not self.download_image():
|
|
raise ValueError("Failed to download image.")
|
|
self.epub_path = os.path.basename(self.path)
|
|
if have_magic:
|
|
with open(self.path, 'rb') as f:
|
|
mime = magic.from_buffer(f.read(4096), True)
|
|
self.epub_path = os.path.splitext(self.epub_path)[0] + get_extension(mime) # noqa: E501
|
|
if have_filetype:
|
|
with open(self.path, 'rb') as f:
|
|
mime = filetype.guess_mime(f.read(4096))
|
|
self.epub_path = os.path.splitext(self.epub_path)[0] + get_extension(mime) # noqa: E501
|
|
d = {'src': self.epub_path}
|
|
if self.alt:
|
|
d['alt'] = self.alt
|
|
img = ET.Element('img', d)
|
|
if self.cfg.image_type == 'inline':
|
|
return ET.tostring(img, 'unicode')
|
|
else:
|
|
link = ET.Element('a', {'href': f'#img{index}',
|
|
'epub:type': 'noteref'})
|
|
if self.alt:
|
|
link.text = self.alt
|
|
aside = ET.Element('aside', {'epub:type': 'footnote',
|
|
'id': f'img{index}'})
|
|
p = ET.Element('p')
|
|
p.append(img)
|
|
aside.append(p)
|
|
self.footnote = ET.tostring(aside, 'unicode') + '\n'
|
|
return ET.tostring(link, 'unicode')
|
|
|
|
|
|
# Used to parse content
|
|
class ContentParser(HTMLParser):
|
|
def __init__(self, cfg: Config):
|
|
super().__init__()
|
|
self._in_paragraph = False
|
|
self.data = []
|
|
# Local image file lists
|
|
self.images = []
|
|
self._paragraph_data = ''
|
|
self.cfg = cfg
|
|
self.footnote = ''
|
|
|
|
def handle_data(self, data: str):
|
|
if self._in_paragraph:
|
|
if isinstance(self._paragraph_data, str):
|
|
self._paragraph_data += data
|
|
elif isinstance(self._paragraph_data, list):
|
|
self._paragraph_data.append(data)
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag == 'img':
|
|
if self._in_paragraph:
|
|
if self._paragraph_data:
|
|
self._paragraph_data = [self._paragraph_data]
|
|
else:
|
|
self._paragraph_data = []
|
|
self._paragraph_data.append(HTMLImage(attrs, self.cfg))
|
|
else:
|
|
self.data.append(HTMLImage(attrs, self.cfg))
|
|
elif tag == 'p':
|
|
self._in_paragraph = True
|
|
elif tag == 'book':
|
|
pass
|
|
else:
|
|
raise NotImplementedError()
|
|
|
|
def handle_endtag(self, tag: str):
|
|
if tag == 'img':
|
|
pass
|
|
elif tag == 'p':
|
|
self._in_paragraph = False
|
|
if self._paragraph_data:
|
|
self.data.append(self._paragraph_data)
|
|
self._paragraph_data = ''
|
|
else:
|
|
raise NotImplementedError()
|
|
|
|
def have_image(self, data_list=None) -> bool:
|
|
if data_list is None:
|
|
data_list = self.data
|
|
for i in data_list:
|
|
if isinstance(i, HTMLImage):
|
|
if i.is_valid():
|
|
return True
|
|
elif isinstance(i, list):
|
|
if self.have_image(i):
|
|
return True
|
|
return False
|
|
|
|
def to_local(self, data_list=None, root=None) -> str:
|
|
default_data_list = False
|
|
if data_list is None:
|
|
data_list = self.data
|
|
default_data_list = True
|
|
root = self
|
|
self.footnote = ''
|
|
data = ''
|
|
img_index = 0
|
|
for i in data_list:
|
|
if isinstance(i, str):
|
|
if default_data_list:
|
|
data += f'<p>{i}</p>\n'
|
|
else:
|
|
data += i
|
|
elif isinstance(i, HTMLImage):
|
|
if i.is_valid():
|
|
try:
|
|
data += i.to_local(img_index)
|
|
self.images.append(i)
|
|
if i.footnote:
|
|
root.footnote += i.footnote
|
|
img_index += 1
|
|
except ValueError:
|
|
print("the image is not valid.", i.src)
|
|
elif isinstance(i, list):
|
|
data += f'<p>{self.to_local(i, root)}</p>\n'
|
|
else:
|
|
raise NotImplementedError()
|
|
if self._paragraph_data:
|
|
data += f'<p>{self._paragraph_data}</p>\n'
|
|
if default_data_list:
|
|
data += self.footnote
|
|
return data
|
|
|
|
|
|
def get_extension(mime: str) -> str:
|
|
if mime == 'image/gif':
|
|
return '.gif'
|
|
elif mime == 'image/jpeg':
|
|
return '.jpeg'
|
|
elif mime == 'image/png':
|
|
return '.png'
|
|
elif mime == 'image/svg+xml':
|
|
return '.svg'
|
|
elif mime == 'image/webp': # EPUB 3.3 Draft
|
|
return '.webp'
|
|
else:
|
|
print(mime)
|
|
raise NotImplementedError()
|
|
|
|
|
|
class EpubFile:
|
|
def __init__(self, cfg: Config, out: str):
|
|
self.epub = epub.EpubBook()
|
|
self.EpubList = list()
|
|
self.epub.set_language('zh-CN')
|
|
self.cfg = cfg
|
|
self.out = out
|
|
|
|
def set_book(self, book):
|
|
self.epub.set_identifier(str(book['book_id']))
|
|
self.epub.set_title(book['book_name'])
|
|
self.epub.add_author(book['author_name'])
|
|
cover_path = get_cache(self.cfg, book['cover'])
|
|
with open(cover_path, 'rb') as f:
|
|
cover = f.read()
|
|
if have_magic:
|
|
mime_type = magic.from_buffer(cover, mime=True)
|
|
file_name = 'cover' + get_extension(mime_type)
|
|
elif have_filetype:
|
|
mime_type = filetype.guess_mime(cover)
|
|
file_name = 'cover' + get_extension(mime_type)
|
|
else:
|
|
file_name = 'cover.png'
|
|
self.epub.set_cover(file_name, cover)
|
|
intro = epub.EpubHtml(title='book-detailed', file_name='intro.xhtml',
|
|
lang='zh-CN')
|
|
intro.content = f'''<html><head></head><body>
|
|
<h1>书名:{book['book_name']}</h1><p>ID:{book['book_id']}</p>
|
|
<p>作者:{book['author_name']}</p><p>更新时间:{book['uptime']}</p>
|
|
<p>最新章节:{book['last_chapter_info']['chapter_title']}</p></body></html>'''
|
|
self.epub.add_item(intro)
|
|
self.EpubList.append(intro)
|
|
self.epub.spine.append(intro)
|
|
self.last_division_name = ''
|
|
|
|
def add_chapter(self, chapter, content: str, division_name: str):
|
|
chapter_title = chapter['chapter_title']
|
|
chapter_id = chapter['chapter_id']
|
|
ch = epub.EpubHtml(
|
|
title=chapter_title,
|
|
file_name=f'ch{chapter_id}.xhtml',
|
|
lang='zh-CN',
|
|
uid=f'ch{chapter_id}',
|
|
)
|
|
if division_name == '作品相关':
|
|
ch.is_linear = False
|
|
parser = ContentParser(self.cfg)
|
|
contents = content.splitlines()
|
|
try:
|
|
parser.feed('<p>' + '</p>\n<p>'.join(contents) + '</p>')
|
|
except Exception as e:
|
|
print('<p>' + '</p>\n<p>'.join(contents) + '</p>')
|
|
raise e
|
|
parser.close()
|
|
ch.content = f'<h1 style="text-align: center;">{chapter_title}</h1>\n{parser.to_local()}' # noqa: E501
|
|
self.epub.add_item(ch)
|
|
count = 0
|
|
for oimg in parser.images:
|
|
img = EpubPathImage()
|
|
img.file_name = oimg.epub_path
|
|
img.path = oimg.path
|
|
img.id = f'i{chapter_id}_{count}'
|
|
self.epub.add_item(img)
|
|
if oimg.epub_path.endswith('.webp'):
|
|
img.media_type = 'image/webp'
|
|
jpg_path = perform_convert(oimg.path)
|
|
if jpg_path is not None:
|
|
jpg_img = EpubPathImage()
|
|
jpg_img.file_name = os.path.basename(jpg_path)
|
|
jpg_img.path = jpg_path
|
|
jpg_img.id = img.id + 'f'
|
|
img.fallback = jpg_img.id
|
|
self.epub.add_item(jpg_img)
|
|
count += 1
|
|
if self.last_division_name != division_name:
|
|
self.EpubList.append([epub.Link(ch.file_name, division_name), []])
|
|
self.last_division_name = division_name
|
|
if isinstance(self.EpubList[-1], list):
|
|
self.EpubList[-1][-1].append(ch)
|
|
else:
|
|
self.EpubList.append(ch)
|
|
self.epub.spine.append(ch)
|
|
|
|
def add_nodownload_chapter(self, chapter, division_name: str):
|
|
chapter_title = chapter['chapter_title']
|
|
chapter_id = chapter['chapter_id']
|
|
ch = epub.EpubHtml(
|
|
title=f"{chapter_title} (未下载)",
|
|
file_name=f'ch{chapter_id}.xhtml',
|
|
lang='zh-CN',
|
|
uid=f'ch{chapter_id}',
|
|
)
|
|
if division_name == '作品相关':
|
|
ch.is_linear = False
|
|
ch.content = f'<h1 style="text-align: center;">{chapter_title}</h1>\n<p>本章未下载</p>' # noqa: E501
|
|
self.epub.add_item(ch)
|
|
if self.last_division_name != division_name:
|
|
self.EpubList.append([epub.Link(ch.file_name, division_name), []])
|
|
self.last_division_name = division_name
|
|
if isinstance(self.EpubList[-1], list):
|
|
self.EpubList[-1][-1].append(ch)
|
|
else:
|
|
self.EpubList.append(ch)
|
|
self.epub.spine.append(ch)
|
|
|
|
def save_epub_file(self): # save epub file to local
|
|
# the path to save epub file to local
|
|
self.epub.toc = self.EpubList
|
|
self.epub.add_item(epub.EpubNcx()), self.epub.add_item(epub.EpubNav())
|
|
book = EpubWriter(self.out, self.epub, {})
|
|
book.process()
|
|
try:
|
|
book.write()
|
|
except IOError:
|
|
print_exc()
|