down_ireader support footnotes

2026-06-05 11:08:49 +08:00 · 2022-03-13 10:36:01 +08:00
parent a7a8319e1d
commit f803d42ba3
1 changed files with 89 additions and 44 deletions
--- a/down_ireader.py
+++ b/down_ireader.py
@@ -1,4 +1,5 @@
 from argparse import ArgumentParser
 from copy import copy
 from http.cookiejar import MozillaCookieJar
 from re import compile
 from urllib.parse import urlparse
@@ -167,6 +168,10 @@ def main():
        print(f'正在下载第{p["id"]}章')
        res = dr.get_page(bid, p["id"])
        pa = BeautifulSoup(res.text, 'lxml')
        pa.attrs['xmlns:epub'] = 'http://www.idpf.org/2007/ops'
        footnotes = []
        have_footnote = False
        while True:
            for i in pa.descendants:
                if isinstance(i, Tag):
                    if i.name == 'img':
@@ -194,7 +199,7 @@ def main():
                                                resources.append(name2)
                                                book.add_item(EpubItem(file_name=name2, content=dr.get(src).content))  # noqa: E501
                                                print(f'css内部url已转换：{src} -> {name2}')  # noqa: E501
-                                        content = content.replace(src, name2)
+                                            content = content.replace(src, name2)  # noqa: E501
                                            m = URL_RE.search(content)
                                        resources.append(name)
                                        book.add_item(EpubItem(file_name=name, content=content.encode()))  # noqa: E501
@@ -213,6 +218,46 @@ def main():
                            s = s.replace(src, name)
                            m = URL_RE.search(s)
                        i.attrs['style'] = s
                    if 'class' in i.attrs:
                        if 'zhangyue-footnote' in i.attrs['class']:
                            if 'zy-footnote' in i.attrs:
                                footnote = i.attrs['zy-footnote']
                                footnote_id = f'footnote{len(footnotes)}'
                                if footnote != '':
                                    tmp = Tag(name='div')
                                    tmp2 = Tag(name='p')
                                    tmp.append(tmp2)
                                    tmp3 = Tag(name='a')
                                    tmp3.attrs['href'] = f'#{footnote_id}'
                                    tmp3.attrs['id'] = f'{footnote_id}n'
                                    # tmp.attrs['epub:type'] = 'footnote'
                                    tmp3.append(f"[{len(footnotes) + 1}]")
                                    tmp2.append(tmp3)
                                    tmp2.append(footnote)
                                    footnotes.append(tmp)
                                    i2 = copy(i)
                                    del i2.attrs['zy-footnote']
                                    i2.attrs['class'].remove('zhangyue-footnote')  # noqa: E501
                                    if i2.name == 'img':
                                        if 'style' in i2.attrs:
                                            i2.attrs['style'] += 'height: 1em;'
                                        else:
                                            i2.attrs['style'] = 'height: 1em;'
                                    alink = Tag(name='a')
                                    alink.attrs['href'] = f'#{footnote_id}n'
                                    alink.attrs['id'] = f'{footnote_id}'
                                    # alink.attrs['epub:type'] = 'noteref'
                                    alink.append(i2)
                                    sup = Tag(name='sup')
                                    sup.append(alink)
                                    i.replace_with(sup)
                                    have_footnote = True
            if not have_footnote:
                break
            have_footnote = False
        body = pa.find('body')
        for i in footnotes:
            body.append(i)
        data = pa.encode(formatter="html5")
        c = RawEpubHtml(f'{p["id"]}.html', file_name=f'{p["id"]}.html', content=data, title=p["chapterName"])  # noqa: E501
        book.add_item(c)