diff --git a/src/html_epub_preprocessor.py b/src/html_epub_preprocessor.py index 4913477..b22b735 100644 --- a/src/html_epub_preprocessor.py +++ b/src/html_epub_preprocessor.py @@ -203,7 +203,7 @@ def replace_with_livecarta_anchor_tag(anchor, i): def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name='epub:type') -> Tuple[ - list, list]: + list, list, list]: """ This function should be earlier that adding fonts in pipeline. @@ -216,6 +216,7 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note bad_noterefs_tags = set([tag for tag in noterefs_tags if not tag.attrs.get('href')]) noterefs_tags = [tag for tag in noterefs_tags if tag not in bad_noterefs_tags] new_noterefs_tags = [] + new_footnotes_tags = [] [tag.decompose() for tag in bad_noterefs_tags] def parse_a_tag_href(s: str): @@ -256,11 +257,11 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note footnote_tag = footnote_tag.parent new_noterefs_tags.append(replace_with_livecarta_anchor_tag(noteref_tag, i)) content = footnote_tag.text - - footnote_tag.decompose() + # footnote_tag.decompose() footnotes.append(content) + new_footnotes_tags.append(footnote_tag.find(attrs={'role': 'doc-backlink'})) - return footnotes, new_noterefs_tags + return footnotes, new_noterefs_tags, new_footnotes_tags def unwrap_structural_tags(body_tag): @@ -500,7 +501,8 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr preprocess_block_tags(chapter_tag) # 2. class removal for tag in chapter_tag.find_all(recursive=True): - if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor','footnote-element']): + if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor', + 'footnote-element']): del tag.attrs['class'] # content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))