epub converter: update footnote

2021-07-12 08:36:36 +03:00
parent 21d2c09de4
commit 20dbae96e1
1 changed files with 18 additions and 12 deletions
--- a/src/epub_postprocessor.py
+++ b/src/epub_postprocessor.py
@@ -4,11 +4,11 @@ import logging
 import re
 from os.path import dirname, normpath, join
 from collections import defaultdict
-from typing import Dict, Union
+from typing import Dict, Union, List
 from itertools import chain
 import ebooklib
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag
 from ebooklib import epub
 from ebooklib.epub import Link, Section
@@ -47,17 +47,22 @@ class EpubPostprocessor:
        self.add_css_styles2soup()
        self.logger.log('Footnotes processing.')
-        self.footnotes = []
+        self.footnotes_contents: List[str] = []
        self.noterefs = []
        self.footnotes: List[Tag] = []
        for href in self.href2soup_html:
-            footnotes, noterefs = preprocess_footnotes(self.href2soup_html[href], self.href2soup_html)
+            content, noterefs, footnotes_tags = preprocess_footnotes(self.href2soup_html[href],
-            self.footnotes.extend(footnotes)
+                                                                     self.href2soup_html)
            self.footnotes_contents.extend(content)
            self.noterefs.extend(noterefs)
-        for i, noteref in enumerate(self.noterefs):
+            self.footnotes.extend(footnotes_tags)
        for i, (noteref, footnote) in enumerate(zip(self.noterefs, self.footnotes)):
            noteref.attrs['data-id'] = i + 1
            noteref.attrs['id'] = f'footnote-{i + 1}'
            footnote.attrs['href'] = f'#footnote-{i + 1}'
-        self.logger.log(f'Added {len(self.footnotes)} footnotes.')
+        self.logger.log(f'Added {len(self.footnotes_contents)} footnotes.')
        self.logger.log('TOC processing.')
        self.href2subchapter_ids = defaultdict(list)
        self.added_to_toc_hrefs = set()
@@ -284,6 +289,8 @@ class EpubPostprocessor:
                anchor_soup = self.href2soup_html[href_in_link]
                anchor_tags = anchor_soup.find_all(attrs={'id': new_id})
                anchor_tags = anchor_tags or anchor_soup.find_all(attrs={'id': id_in_link})
                if anchor_tags:
                    if len(anchor_tags) > 1:
                        self.logger.log(f'Warning in {href}: multiple anchors: {len(anchor_tags)} found.'
@@ -308,10 +315,9 @@ class EpubPostprocessor:
                else:
                    internal_link_tag.attrs['converter-mark'] = 'bad-link'
-                    if 'page' not in id_in_link:
+                    self.logger.log(f'Error in {href}. While processing {internal_link_tag} no anchor found.'
-                        self.logger.log(f'Error in {href}. While processing {internal_link_tag} no anchor found.'
+                                    f' Should be anchor with new id={new_id} in {href_in_link} file.'
-                                        f' Should be anchor with new id={new_id} in {href_in_link} file.'
+                                    f' Old id={id_in_link}')
                                        f' Old id={id_in_link}')
    def build_one_anchored_section(self, node):
        """
@@ -391,7 +397,7 @@ class EpubPostprocessor:
        return {
            "content": top_level_dict_chapters,
-            "footnotes": self.footnotes
+            "footnotes": self.footnotes_contents
        }