epub converter: update footnote

This commit is contained in:
shirshasa
2021-07-12 08:36:36 +03:00
parent 21d2c09de4
commit 20dbae96e1

View File

@@ -4,11 +4,11 @@ import logging
import re import re
from os.path import dirname, normpath, join from os.path import dirname, normpath, join
from collections import defaultdict from collections import defaultdict
from typing import Dict, Union from typing import Dict, Union, List
from itertools import chain from itertools import chain
import ebooklib import ebooklib
from bs4 import BeautifulSoup from bs4 import BeautifulSoup, Tag
from ebooklib import epub from ebooklib import epub
from ebooklib.epub import Link, Section from ebooklib.epub import Link, Section
@@ -47,17 +47,22 @@ class EpubPostprocessor:
self.add_css_styles2soup() self.add_css_styles2soup()
self.logger.log('Footnotes processing.') self.logger.log('Footnotes processing.')
self.footnotes = [] self.footnotes_contents: List[str] = []
self.noterefs = [] self.noterefs = []
self.footnotes: List[Tag] = []
for href in self.href2soup_html: for href in self.href2soup_html:
footnotes, noterefs = preprocess_footnotes(self.href2soup_html[href], self.href2soup_html) content, noterefs, footnotes_tags = preprocess_footnotes(self.href2soup_html[href],
self.footnotes.extend(footnotes) self.href2soup_html)
self.footnotes_contents.extend(content)
self.noterefs.extend(noterefs) self.noterefs.extend(noterefs)
for i, noteref in enumerate(self.noterefs): self.footnotes.extend(footnotes_tags)
for i, (noteref, footnote) in enumerate(zip(self.noterefs, self.footnotes)):
noteref.attrs['data-id'] = i + 1 noteref.attrs['data-id'] = i + 1
noteref.attrs['id'] = f'footnote-{i + 1}' noteref.attrs['id'] = f'footnote-{i + 1}'
footnote.attrs['href'] = f'#footnote-{i + 1}'
self.logger.log(f'Added {len(self.footnotes)} footnotes.') self.logger.log(f'Added {len(self.footnotes_contents)} footnotes.')
self.logger.log('TOC processing.') self.logger.log('TOC processing.')
self.href2subchapter_ids = defaultdict(list) self.href2subchapter_ids = defaultdict(list)
self.added_to_toc_hrefs = set() self.added_to_toc_hrefs = set()
@@ -284,6 +289,8 @@ class EpubPostprocessor:
anchor_soup = self.href2soup_html[href_in_link] anchor_soup = self.href2soup_html[href_in_link]
anchor_tags = anchor_soup.find_all(attrs={'id': new_id}) anchor_tags = anchor_soup.find_all(attrs={'id': new_id})
anchor_tags = anchor_tags or anchor_soup.find_all(attrs={'id': id_in_link})
if anchor_tags: if anchor_tags:
if len(anchor_tags) > 1: if len(anchor_tags) > 1:
self.logger.log(f'Warning in {href}: multiple anchors: {len(anchor_tags)} found.' self.logger.log(f'Warning in {href}: multiple anchors: {len(anchor_tags)} found.'
@@ -308,10 +315,9 @@ class EpubPostprocessor:
else: else:
internal_link_tag.attrs['converter-mark'] = 'bad-link' internal_link_tag.attrs['converter-mark'] = 'bad-link'
if 'page' not in id_in_link: self.logger.log(f'Error in {href}. While processing {internal_link_tag} no anchor found.'
self.logger.log(f'Error in {href}. While processing {internal_link_tag} no anchor found.' f' Should be anchor with new id={new_id} in {href_in_link} file.'
f' Should be anchor with new id={new_id} in {href_in_link} file.' f' Old id={id_in_link}')
f' Old id={id_in_link}')
def build_one_anchored_section(self, node): def build_one_anchored_section(self, node):
""" """
@@ -391,7 +397,7 @@ class EpubPostprocessor:
return { return {
"content": top_level_dict_chapters, "content": top_level_dict_chapters,
"footnotes": self.footnotes "footnotes": self.footnotes_contents
} }