down_ireader support footnotes

This commit is contained in:
2022-03-13 10:36:01 +08:00
parent a7a8319e1d
commit f803d42ba3

View File

@@ -1,4 +1,5 @@
from argparse import ArgumentParser from argparse import ArgumentParser
from copy import copy
from http.cookiejar import MozillaCookieJar from http.cookiejar import MozillaCookieJar
from re import compile from re import compile
from urllib.parse import urlparse from urllib.parse import urlparse
@@ -167,52 +168,96 @@ def main():
print(f'正在下载第{p["id"]}') print(f'正在下载第{p["id"]}')
res = dr.get_page(bid, p["id"]) res = dr.get_page(bid, p["id"])
pa = BeautifulSoup(res.text, 'lxml') pa = BeautifulSoup(res.text, 'lxml')
for i in pa.descendants: pa.attrs['xmlns:epub'] = 'http://www.idpf.org/2007/ops'
if isinstance(i, Tag): footnotes = []
if i.name == 'img': have_footnote = False
if 'src' in i.attrs: while True:
src = i.attrs['src'] for i in pa.descendants:
name = get_url_name(src) if isinstance(i, Tag):
if name not in resources: if i.name == 'img':
resources.append(name) if 'src' in i.attrs:
book.add_item(EpubItem(file_name=name, content=dr.get(src).content)) # noqa: E501 src = i.attrs['src']
print(f'img资源已转换:{src} -> {name}') name = get_url_name(src)
i.attrs['src'] = name if name not in resources:
elif i.name == 'link': resources.append(name)
if 'rel' in i.attrs: book.add_item(EpubItem(file_name=name, content=dr.get(src).content)) # noqa: E501
if 'stylesheet' in i.attrs['rel']: print(f'img资源已转换:{src} -> {name}')
if 'href' in i.attrs: i.attrs['src'] = name
href = i.attrs['href'] elif i.name == 'link':
name = get_url_name(href) if 'rel' in i.attrs:
if name not in resources: if 'stylesheet' in i.attrs['rel']:
content = dr.get(href).content.decode() if 'href' in i.attrs:
m = URL_RE.search(content) href = i.attrs['href']
while m is not None: name = get_url_name(href)
src = m.group(1) if name not in resources:
name2 = get_url_name(src) content = dr.get(href).content.decode()
if name2 not in resources:
resources.append(name2)
book.add_item(EpubItem(file_name=name2, content=dr.get(src).content)) # noqa: E501
print(f'css内部url已转换:{src} -> {name2}') # noqa: E501
content = content.replace(src, name2)
m = URL_RE.search(content) m = URL_RE.search(content)
resources.append(name) while m is not None:
book.add_item(EpubItem(file_name=name, content=content.encode())) # noqa: E501 src = m.group(1)
print(f'css资源已转换:{href} -> {name}') name2 = get_url_name(src)
i.attrs['href'] = name if name2 not in resources:
if 'style' in i.attrs: resources.append(name2)
s = i.attrs['style'] book.add_item(EpubItem(file_name=name2, content=dr.get(src).content)) # noqa: E501
m = URL_RE.search(s) print(f'css内部url已转换:{src} -> {name2}') # noqa: E501
while m is not None: content = content.replace(src, name2) # noqa: E501
src = m.group(1) m = URL_RE.search(content)
name = get_url_name(src) resources.append(name)
if name not in resources: book.add_item(EpubItem(file_name=name, content=content.encode())) # noqa: E501
resources.append(name) print(f'css资源已转换:{href} -> {name}')
book.add_item(EpubItem(file_name=name, content=dr.get(src).content)) # noqa: E501 i.attrs['href'] = name
print(f'style内部url已转换:{src} -> {name}') if 'style' in i.attrs:
s = s.replace(src, name) s = i.attrs['style']
m = URL_RE.search(s) m = URL_RE.search(s)
i.attrs['style'] = s while m is not None:
src = m.group(1)
name = get_url_name(src)
if name not in resources:
resources.append(name)
book.add_item(EpubItem(file_name=name, content=dr.get(src).content)) # noqa: E501
print(f'style内部url已转换:{src} -> {name}')
s = s.replace(src, name)
m = URL_RE.search(s)
i.attrs['style'] = s
if 'class' in i.attrs:
if 'zhangyue-footnote' in i.attrs['class']:
if 'zy-footnote' in i.attrs:
footnote = i.attrs['zy-footnote']
footnote_id = f'footnote{len(footnotes)}'
if footnote != '':
tmp = Tag(name='div')
tmp2 = Tag(name='p')
tmp.append(tmp2)
tmp3 = Tag(name='a')
tmp3.attrs['href'] = f'#{footnote_id}'
tmp3.attrs['id'] = f'{footnote_id}n'
# tmp.attrs['epub:type'] = 'footnote'
tmp3.append(f"[{len(footnotes) + 1}]")
tmp2.append(tmp3)
tmp2.append(footnote)
footnotes.append(tmp)
i2 = copy(i)
del i2.attrs['zy-footnote']
i2.attrs['class'].remove('zhangyue-footnote') # noqa: E501
if i2.name == 'img':
if 'style' in i2.attrs:
i2.attrs['style'] += 'height: 1em;'
else:
i2.attrs['style'] = 'height: 1em;'
alink = Tag(name='a')
alink.attrs['href'] = f'#{footnote_id}n'
alink.attrs['id'] = f'{footnote_id}'
# alink.attrs['epub:type'] = 'noteref'
alink.append(i2)
sup = Tag(name='sup')
sup.append(alink)
i.replace_with(sup)
have_footnote = True
if not have_footnote:
break
have_footnote = False
body = pa.find('body')
for i in footnotes:
body.append(i)
data = pa.encode(formatter="html5") data = pa.encode(formatter="html5")
c = RawEpubHtml(f'{p["id"]}.html', file_name=f'{p["id"]}.html', content=data, title=p["chapterName"]) # noqa: E501 c = RawEpubHtml(f'{p["id"]}.html', file_name=f'{p["id"]}.html', content=data, title=p["chapterName"]) # noqa: E501
book.add_item(c) book.add_item(c)