forked from LiveCarta/BookConverter
epub converter: update footnote
This commit is contained in:
@@ -4,11 +4,11 @@ import logging
|
||||
import re
|
||||
from os.path import dirname, normpath, join
|
||||
from collections import defaultdict
|
||||
from typing import Dict, Union
|
||||
from typing import Dict, Union, List
|
||||
from itertools import chain
|
||||
|
||||
import ebooklib
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from ebooklib import epub
|
||||
from ebooklib.epub import Link, Section
|
||||
|
||||
@@ -47,17 +47,22 @@ class EpubPostprocessor:
|
||||
self.add_css_styles2soup()
|
||||
|
||||
self.logger.log('Footnotes processing.')
|
||||
self.footnotes = []
|
||||
self.footnotes_contents: List[str] = []
|
||||
self.noterefs = []
|
||||
self.footnotes: List[Tag] = []
|
||||
for href in self.href2soup_html:
|
||||
footnotes, noterefs = preprocess_footnotes(self.href2soup_html[href], self.href2soup_html)
|
||||
self.footnotes.extend(footnotes)
|
||||
content, noterefs, footnotes_tags = preprocess_footnotes(self.href2soup_html[href],
|
||||
self.href2soup_html)
|
||||
self.footnotes_contents.extend(content)
|
||||
self.noterefs.extend(noterefs)
|
||||
for i, noteref in enumerate(self.noterefs):
|
||||
self.footnotes.extend(footnotes_tags)
|
||||
|
||||
for i, (noteref, footnote) in enumerate(zip(self.noterefs, self.footnotes)):
|
||||
noteref.attrs['data-id'] = i + 1
|
||||
noteref.attrs['id'] = f'footnote-{i + 1}'
|
||||
footnote.attrs['href'] = f'#footnote-{i + 1}'
|
||||
|
||||
self.logger.log(f'Added {len(self.footnotes)} footnotes.')
|
||||
self.logger.log(f'Added {len(self.footnotes_contents)} footnotes.')
|
||||
self.logger.log('TOC processing.')
|
||||
self.href2subchapter_ids = defaultdict(list)
|
||||
self.added_to_toc_hrefs = set()
|
||||
@@ -284,6 +289,8 @@ class EpubPostprocessor:
|
||||
|
||||
anchor_soup = self.href2soup_html[href_in_link]
|
||||
anchor_tags = anchor_soup.find_all(attrs={'id': new_id})
|
||||
anchor_tags = anchor_tags or anchor_soup.find_all(attrs={'id': id_in_link})
|
||||
|
||||
if anchor_tags:
|
||||
if len(anchor_tags) > 1:
|
||||
self.logger.log(f'Warning in {href}: multiple anchors: {len(anchor_tags)} found.'
|
||||
@@ -308,10 +315,9 @@ class EpubPostprocessor:
|
||||
|
||||
else:
|
||||
internal_link_tag.attrs['converter-mark'] = 'bad-link'
|
||||
if 'page' not in id_in_link:
|
||||
self.logger.log(f'Error in {href}. While processing {internal_link_tag} no anchor found.'
|
||||
f' Should be anchor with new id={new_id} in {href_in_link} file.'
|
||||
f' Old id={id_in_link}')
|
||||
self.logger.log(f'Error in {href}. While processing {internal_link_tag} no anchor found.'
|
||||
f' Should be anchor with new id={new_id} in {href_in_link} file.'
|
||||
f' Old id={id_in_link}')
|
||||
|
||||
def build_one_anchored_section(self, node):
|
||||
"""
|
||||
@@ -391,7 +397,7 @@ class EpubPostprocessor:
|
||||
|
||||
return {
|
||||
"content": top_level_dict_chapters,
|
||||
"footnotes": self.footnotes
|
||||
"footnotes": self.footnotes_contents
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user