diff --git a/src/epub_postprocessor.py b/src/epub_postprocessor.py index fbbc82c..e9ad705 100644 --- a/src/epub_postprocessor.py +++ b/src/epub_postprocessor.py @@ -4,11 +4,11 @@ import logging import re from os.path import dirname, normpath, join from collections import defaultdict -from typing import Dict, Union +from typing import Dict, Union, List from itertools import chain import ebooklib -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, Tag from ebooklib import epub from ebooklib.epub import Link, Section @@ -47,17 +47,22 @@ class EpubPostprocessor: self.add_css_styles2soup() self.logger.log('Footnotes processing.') - self.footnotes = [] + self.footnotes_contents: List[str] = [] self.noterefs = [] + self.footnotes: List[Tag] = [] for href in self.href2soup_html: - footnotes, noterefs = preprocess_footnotes(self.href2soup_html[href], self.href2soup_html) - self.footnotes.extend(footnotes) + content, noterefs, footnotes_tags = preprocess_footnotes(self.href2soup_html[href], + self.href2soup_html) + self.footnotes_contents.extend(content) self.noterefs.extend(noterefs) - for i, noteref in enumerate(self.noterefs): + self.footnotes.extend(footnotes_tags) + + for i, (noteref, footnote) in enumerate(zip(self.noterefs, self.footnotes)): noteref.attrs['data-id'] = i + 1 noteref.attrs['id'] = f'footnote-{i + 1}' + footnote.attrs['href'] = f'#footnote-{i + 1}' - self.logger.log(f'Added {len(self.footnotes)} footnotes.') + self.logger.log(f'Added {len(self.footnotes_contents)} footnotes.') self.logger.log('TOC processing.') self.href2subchapter_ids = defaultdict(list) self.added_to_toc_hrefs = set() @@ -284,6 +289,8 @@ class EpubPostprocessor: anchor_soup = self.href2soup_html[href_in_link] anchor_tags = anchor_soup.find_all(attrs={'id': new_id}) + anchor_tags = anchor_tags or anchor_soup.find_all(attrs={'id': id_in_link}) + if anchor_tags: if len(anchor_tags) > 1: self.logger.log(f'Warning in {href}: multiple anchors: {len(anchor_tags)} found.' @@ -308,10 +315,9 @@ class EpubPostprocessor: else: internal_link_tag.attrs['converter-mark'] = 'bad-link' - if 'page' not in id_in_link: - self.logger.log(f'Error in {href}. While processing {internal_link_tag} no anchor found.' - f' Should be anchor with new id={new_id} in {href_in_link} file.' - f' Old id={id_in_link}') + self.logger.log(f'Error in {href}. While processing {internal_link_tag} no anchor found.' + f' Should be anchor with new id={new_id} in {href_in_link} file.' + f' Old id={id_in_link}') def build_one_anchored_section(self, node): """ @@ -391,7 +397,7 @@ class EpubPostprocessor: return { "content": top_level_dict_chapters, - "footnotes": self.footnotes + "footnotes": self.footnotes_contents }