epub converter: update footnote

This commit is contained in:
shirshasa
2021-07-12 08:36:36 +03:00
parent 21d2c09de4
commit 20dbae96e1

View File

@@ -4,11 +4,11 @@ import logging
import re
from os.path import dirname, normpath, join
from collections import defaultdict
from typing import Dict, Union
from typing import Dict, Union, List
from itertools import chain
import ebooklib
from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, Tag
from ebooklib import epub
from ebooklib.epub import Link, Section
@@ -47,17 +47,22 @@ class EpubPostprocessor:
self.add_css_styles2soup()
self.logger.log('Footnotes processing.')
self.footnotes = []
self.footnotes_contents: List[str] = []
self.noterefs = []
self.footnotes: List[Tag] = []
for href in self.href2soup_html:
footnotes, noterefs = preprocess_footnotes(self.href2soup_html[href], self.href2soup_html)
self.footnotes.extend(footnotes)
content, noterefs, footnotes_tags = preprocess_footnotes(self.href2soup_html[href],
self.href2soup_html)
self.footnotes_contents.extend(content)
self.noterefs.extend(noterefs)
for i, noteref in enumerate(self.noterefs):
self.footnotes.extend(footnotes_tags)
for i, (noteref, footnote) in enumerate(zip(self.noterefs, self.footnotes)):
noteref.attrs['data-id'] = i + 1
noteref.attrs['id'] = f'footnote-{i + 1}'
footnote.attrs['href'] = f'#footnote-{i + 1}'
self.logger.log(f'Added {len(self.footnotes)} footnotes.')
self.logger.log(f'Added {len(self.footnotes_contents)} footnotes.')
self.logger.log('TOC processing.')
self.href2subchapter_ids = defaultdict(list)
self.added_to_toc_hrefs = set()
@@ -284,6 +289,8 @@ class EpubPostprocessor:
anchor_soup = self.href2soup_html[href_in_link]
anchor_tags = anchor_soup.find_all(attrs={'id': new_id})
anchor_tags = anchor_tags or anchor_soup.find_all(attrs={'id': id_in_link})
if anchor_tags:
if len(anchor_tags) > 1:
self.logger.log(f'Warning in {href}: multiple anchors: {len(anchor_tags)} found.'
@@ -308,10 +315,9 @@ class EpubPostprocessor:
else:
internal_link_tag.attrs['converter-mark'] = 'bad-link'
if 'page' not in id_in_link:
self.logger.log(f'Error in {href}. While processing {internal_link_tag} no anchor found.'
f' Should be anchor with new id={new_id} in {href_in_link} file.'
f' Old id={id_in_link}')
self.logger.log(f'Error in {href}. While processing {internal_link_tag} no anchor found.'
f' Should be anchor with new id={new_id} in {href_in_link} file.'
f' Old id={id_in_link}')
def build_one_anchored_section(self, node):
"""
@@ -391,7 +397,7 @@ class EpubPostprocessor:
return {
"content": top_level_dict_chapters,
"footnotes": self.footnotes
"footnotes": self.footnotes_contents
}