epub converter: prettify names

This commit is contained in:
shirshasa
2021-07-02 12:31:21 +03:00
parent 9ff9759793
commit 142f33ed90

View File

@@ -52,7 +52,7 @@ class EpubPostprocessor:
self.footnotes.extend(preprocess_footnotes(self.href2soup_html[href], self.href2soup_html))
self.logger.log(f'Added {len(self.footnotes)} footnotes.')
self.logger.log('TOC processing.')
self.href2ids = defaultdict(list)
self.href2subchapter_ids = defaultdict(list)
self.added_to_toc_hrefs = set()
self.adjacency_list: Dict[Union[NavPoint, -1], Union[list, None]] = {} # k = -1 if root, v = None if leaf
self.build_adjacency_list_from_toc(self.ebooklib_book.toc)
@@ -62,7 +62,7 @@ class EpubPostprocessor:
not_added = [x for x in self.href2soup_html if x not in self.added_to_toc_hrefs]
self.logger.log(f'Html documents not added to TOC: {not_added}.')
# read anchored blocks, split html into separate block
self.mark_and_line_href2soup_html() # used only after parsed toc, ids from toc needed
self.unwrap_all_html_soup() # used only after parsed toc, ids from toc needed
self.process_internal_links()
self.id_anchor2soup: Dict[tuple, BeautifulSoup] = {}
self.build_anchor2soup()
@@ -137,7 +137,7 @@ class EpubPostprocessor:
node = NavPoint(element)
if node.id:
self.id_anchor_exist_in_nav_points = True
self.href2ids[node.href].append(node.id)
self.href2subchapter_ids[node.href].append(node.id)
self.adjacency_list[node] = None
self.added_to_toc_hrefs.add(node.href)
return node
@@ -148,7 +148,7 @@ class EpubPostprocessor:
node = NavPoint(first)
if node.id:
self.id_anchor_exist_in_nav_points = True
self.href2ids[node.href].append(node.id)
self.href2subchapter_ids[node.href].append(node.id)
sub_nodes = []
for i in second:
@@ -183,10 +183,10 @@ class EpubPostprocessor:
self.adjacency_list[-1].append(node)
self.added_to_toc_hrefs.add(node.href)
def mark_and_line_href2soup_html(self):
def unwrap_all_html_soup(self):
# mark
for href in self.href2soup_html:
ids = self.href2ids[href]
ids = self.href2subchapter_ids[href]
for i in ids:
soup = self.href2soup_html[href]
tag = soup.find(id=i)