down_ireader support footnotes

This commit is contained in:
2022-03-13 10:36:01 +08:00
parent a7a8319e1d
commit f803d42ba3

View File

@@ -1,4 +1,5 @@
from argparse import ArgumentParser from argparse import ArgumentParser
from copy import copy
from http.cookiejar import MozillaCookieJar from http.cookiejar import MozillaCookieJar
from re import compile from re import compile
from urllib.parse import urlparse from urllib.parse import urlparse
@@ -167,6 +168,10 @@ def main():
print(f'正在下载第{p["id"]}') print(f'正在下载第{p["id"]}')
res = dr.get_page(bid, p["id"]) res = dr.get_page(bid, p["id"])
pa = BeautifulSoup(res.text, 'lxml') pa = BeautifulSoup(res.text, 'lxml')
pa.attrs['xmlns:epub'] = 'http://www.idpf.org/2007/ops'
footnotes = []
have_footnote = False
while True:
for i in pa.descendants: for i in pa.descendants:
if isinstance(i, Tag): if isinstance(i, Tag):
if i.name == 'img': if i.name == 'img':
@@ -194,7 +199,7 @@ def main():
resources.append(name2) resources.append(name2)
book.add_item(EpubItem(file_name=name2, content=dr.get(src).content)) # noqa: E501 book.add_item(EpubItem(file_name=name2, content=dr.get(src).content)) # noqa: E501
print(f'css内部url已转换:{src} -> {name2}') # noqa: E501 print(f'css内部url已转换:{src} -> {name2}') # noqa: E501
content = content.replace(src, name2) content = content.replace(src, name2) # noqa: E501
m = URL_RE.search(content) m = URL_RE.search(content)
resources.append(name) resources.append(name)
book.add_item(EpubItem(file_name=name, content=content.encode())) # noqa: E501 book.add_item(EpubItem(file_name=name, content=content.encode())) # noqa: E501
@@ -213,6 +218,46 @@ def main():
s = s.replace(src, name) s = s.replace(src, name)
m = URL_RE.search(s) m = URL_RE.search(s)
i.attrs['style'] = s i.attrs['style'] = s
if 'class' in i.attrs:
if 'zhangyue-footnote' in i.attrs['class']:
if 'zy-footnote' in i.attrs:
footnote = i.attrs['zy-footnote']
footnote_id = f'footnote{len(footnotes)}'
if footnote != '':
tmp = Tag(name='div')
tmp2 = Tag(name='p')
tmp.append(tmp2)
tmp3 = Tag(name='a')
tmp3.attrs['href'] = f'#{footnote_id}'
tmp3.attrs['id'] = f'{footnote_id}n'
# tmp.attrs['epub:type'] = 'footnote'
tmp3.append(f"[{len(footnotes) + 1}]")
tmp2.append(tmp3)
tmp2.append(footnote)
footnotes.append(tmp)
i2 = copy(i)
del i2.attrs['zy-footnote']
i2.attrs['class'].remove('zhangyue-footnote') # noqa: E501
if i2.name == 'img':
if 'style' in i2.attrs:
i2.attrs['style'] += 'height: 1em;'
else:
i2.attrs['style'] = 'height: 1em;'
alink = Tag(name='a')
alink.attrs['href'] = f'#{footnote_id}n'
alink.attrs['id'] = f'{footnote_id}'
# alink.attrs['epub:type'] = 'noteref'
alink.append(i2)
sup = Tag(name='sup')
sup.append(alink)
i.replace_with(sup)
have_footnote = True
if not have_footnote:
break
have_footnote = False
body = pa.find('body')
for i in footnotes:
body.append(i)
data = pa.encode(formatter="html5") data = pa.encode(formatter="html5")
c = RawEpubHtml(f'{p["id"]}.html', file_name=f'{p["id"]}.html', content=data, title=p["chapterName"]) # noqa: E501 c = RawEpubHtml(f'{p["id"]}.html', file_name=f'{p["id"]}.html', content=data, title=p["chapterName"]) # noqa: E501
book.add_item(c) book.add_item(c)