epub converter: prettify

This commit is contained in:
shirshasa
2021-07-30 14:01:31 +03:00
parent 899a3e89ac
commit 12baa07c0a

View File

@@ -238,21 +238,20 @@ class EpubPostprocessor:
tag.attrs['id'] = new_id tag.attrs['id'] = new_id
# --------------------------------------------------------------------------------- # ---------------------------------------------------------------------------------
internal_link_reg = re.compile(r'(^.+\.(html|xhtml)$)') internal_link_reg1 = re.compile(r'(^.+\.(html|xhtml)$)')
for href in self.added_to_toc_hrefs: for href in self.added_to_toc_hrefs:
soup = self.href2soup_html[href] soup = self.href2soup_html[href]
tags = soup.find_all('a', {'href': internal_link_reg}) for internal_link_tag in soup.find_all('a', {'href': internal_link_reg1}):
for t in tags: href_in_link = internal_link_tag.attrs['href']
href_in_link = t.attrs['href']
full_path = [path for path in self.added_to_toc_hrefs if href_in_link in path] full_path = [path for path in self.added_to_toc_hrefs if href_in_link in path]
if not full_path: if not full_path:
self.logger.log(f'Error in {href} file. No {href_in_link} file found in added to TOC documents. ' self.logger.log(f'Error in {href} file. No {href_in_link} file found in added to TOC documents. '
f'While processing href in {t}.') f'While processing href in {internal_link_tag}.')
continue continue
href_in_link = full_path[0] href_in_link = full_path[0]
new_id = self._create_unique_id(href_in_link, '') new_id = self._create_unique_id(href_in_link, '')
t.attrs['placeholder'] = '{{tempStyleToAnchor-' + new_id + '}}' internal_link_tag.attrs['placeholder'] = '{{tempStyleToAnchor-' + new_id + '}}'
if new_id not in self.internal_anchors: if new_id not in self.internal_anchors:
anchor_soup = self.href2soup_html[href_in_link] anchor_soup = self.href2soup_html[href_in_link]
new_anchor_span = soup.new_tag("span") new_anchor_span = soup.new_tag("span")
@@ -262,14 +261,14 @@ class EpubPostprocessor:
anchor_soup.insert(0, new_anchor_span) anchor_soup.insert(0, new_anchor_span)
self.internal_anchors.add(new_id) self.internal_anchors.add(new_id)
del t.attrs['href'] del internal_link_tag.attrs['href']
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# write placeholder to all internal links # write placeholder to all internal links
internal_link_reg = re.compile(r'(^.+\.(html|xhtml)\#.+)|(^\#.+)') internal_link_reg2 = re.compile(r'(^.+\.(html|xhtml)\#.+)|(^\#.+)')
for href in self.added_to_toc_hrefs: for href in self.added_to_toc_hrefs:
soup = self.href2soup_html[href] soup = self.href2soup_html[href]
for internal_link_tag in soup.find_all('a', {'href': internal_link_reg}): for internal_link_tag in soup.find_all('a', {'href': internal_link_reg2}):
href_in_link, id_in_link = internal_link_tag.attrs['href'].split('#') href_in_link, id_in_link = internal_link_tag.attrs['href'].split('#')
if not href_in_link: if not href_in_link:
href_in_link = href href_in_link = href