epub converter: update footnote

2021-07-12 08:37:13 +03:00
parent 20dbae96e1
commit 7907f87594
1 changed files with 7 additions and 5 deletions
--- a/src/html_epub_preprocessor.py
+++ b/src/html_epub_preprocessor.py
@@ -203,7 +203,7 @@ def replace_with_livecarta_anchor_tag(anchor, i):


 def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name='epub:type') -> Tuple[
-    list, list]:
+    list, list, list]:
    """
    This function should be earlier that adding fonts in pipeline.

@@ -216,6 +216,7 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
    bad_noterefs_tags = set([tag for tag in noterefs_tags if not tag.attrs.get('href')])
    noterefs_tags = [tag for tag in noterefs_tags if tag not in bad_noterefs_tags]
    new_noterefs_tags = []
+    new_footnotes_tags = []
    [tag.decompose() for tag in bad_noterefs_tags]

    def parse_a_tag_href(s: str):
@@ -256,11 +257,11 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
            footnote_tag = footnote_tag.parent
        new_noterefs_tags.append(replace_with_livecarta_anchor_tag(noteref_tag, i))
        content = footnote_tag.text
-
-        footnote_tag.decompose()
+        # footnote_tag.decompose()
        footnotes.append(content)
+        new_footnotes_tags.append(footnote_tag.find(attrs={'role': 'doc-backlink'}))

-    return footnotes, new_noterefs_tags
+    return footnotes, new_noterefs_tags, new_footnotes_tags


 def unwrap_structural_tags(body_tag):
@@ -500,7 +501,8 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
    preprocess_block_tags(chapter_tag)
    # 2. class removal
    for tag in chapter_tag.find_all(recursive=True):
-        if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor','footnote-element']):
+        if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
+                                                                                                'footnote-element']):
            del tag.attrs['class']

    # content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))