From 90c55875706dff5ad5d211baf55a84a5d4d35728 Mon Sep 17 00:00:00 2001
From: shirshasa <katerinagorbac@gmail.com>
Date: Fri, 9 Jul 2021 12:24:36 +0300
Subject: [PATCH] epub converter: update footnotes

---
 src/epub_postprocessor.py     |  9 ++++++++-
 src/html_epub_preprocessor.py | 13 +++++++++----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/epub_postprocessor.py b/src/epub_postprocessor.py
index 42e5f8d..b7813bc 100644
--- a/src/epub_postprocessor.py
+++ b/src/epub_postprocessor.py
@@ -48,8 +48,15 @@ class EpubPostprocessor:
 
         self.logger.log('Footnotes processing.')
         self.footnotes = []
+        self.noterefs = []
         for href in self.href2soup_html:
-            self.footnotes.extend(preprocess_footnotes(self.href2soup_html[href], self.href2soup_html))
+            footnotes, noterefs = preprocess_footnotes(self.href2soup_html[href], self.href2soup_html)
+            self.footnotes.extend(footnotes)
+            self.noterefs.extend(noterefs)
+        for i, noteref in enumerate(self.noterefs):
+            noteref.attrs['data-id'] = i + 1
+            noteref.attrs['id'] = f'footnote-{i + 1}'
+
         self.logger.log(f'Added {len(self.footnotes)} footnotes.')
         self.logger.log('TOC processing.')
         self.href2subchapter_ids = defaultdict(list)
diff --git a/src/html_epub_preprocessor.py b/src/html_epub_preprocessor.py
index 886d072..d783a47 100644
--- a/src/html_epub_preprocessor.py
+++ b/src/html_epub_preprocessor.py
@@ -194,10 +194,14 @@ def replace_with_livecarta_anchor_tag(anchor, i):
     new_tag['data-id'] = i + 1
     new_tag['id'] = f'footnote-{i + 1}'
     new_tag.string = '*'
+    if anchor.parent.name == 'sup':
+        anchor.parent.unwrap()
     anchor.replace_with(new_tag)
+    return new_tag
 
 
-def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name='epub:type') -> List[str]:
+def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name='epub:type') -> Tuple[
+    list, list]:
     """
     This function should be earlier that adding fonts in pipeline.
 
@@ -209,6 +213,7 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
     noterefs_tags = source_html_tag.find_all(attrs={noteref_attr_name: 'noteref'})
     bad_noterefs_tags = set([tag for tag in noterefs_tags if not tag.attrs.get('href')])
     noterefs_tags = [tag for tag in noterefs_tags if tag not in bad_noterefs_tags]
+    new_noterefs_tags = []
     [tag.decompose() for tag in bad_noterefs_tags]
 
     def parse_a_tag_href(s: str):
@@ -257,13 +262,13 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
         footnote_tag = expected_footnote_tags[0]
         if footnote_tag.parent.attrs.get('role') and footnote_tag.parent.attrs.get('role') == 'doc-endnote':
             footnote_tag = footnote_tag.parent
-        replace_with_livecarta_anchor_tag(noteref_tag, i)
+        new_noterefs_tags.append(replace_with_livecarta_anchor_tag(noteref_tag, i))
         content = footnote_tag.text
 
         footnote_tag.decompose()
         footnotes.append(content)
 
-    return footnotes
+    return footnotes, new_noterefs_tags
 
 
 def unwrap_structural_tags(body_tag):
@@ -503,7 +508,7 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
     preprocess_block_tags(chapter_tag)
     # 2. class removal
     for tag in chapter_tag.find_all(recursive=True):
-        if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor']):
+        if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor','footnote-element']):
             del tag.attrs['class']
 
     # content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))