From 12baa07c0aef14ce96523391687d0d4d8d8e0226 Mon Sep 17 00:00:00 2001 From: shirshasa Date: Fri, 30 Jul 2021 14:01:31 +0300 Subject: [PATCH] epub converter: prettify --- src/epub_postprocessor.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/epub_postprocessor.py b/src/epub_postprocessor.py index 0678032..409ceb9 100644 --- a/src/epub_postprocessor.py +++ b/src/epub_postprocessor.py @@ -238,21 +238,20 @@ class EpubPostprocessor: tag.attrs['id'] = new_id # --------------------------------------------------------------------------------- - internal_link_reg = re.compile(r'(^.+\.(html|xhtml)$)') + internal_link_reg1 = re.compile(r'(^.+\.(html|xhtml)$)') for href in self.added_to_toc_hrefs: soup = self.href2soup_html[href] - tags = soup.find_all('a', {'href': internal_link_reg}) - for t in tags: - href_in_link = t.attrs['href'] + for internal_link_tag in soup.find_all('a', {'href': internal_link_reg1}): + href_in_link = internal_link_tag.attrs['href'] full_path = [path for path in self.added_to_toc_hrefs if href_in_link in path] if not full_path: self.logger.log(f'Error in {href} file. No {href_in_link} file found in added to TOC documents. ' - f'While processing href in {t}.') + f'While processing href in {internal_link_tag}.') continue href_in_link = full_path[0] new_id = self._create_unique_id(href_in_link, '') - t.attrs['placeholder'] = '{{tempStyleToAnchor-' + new_id + '}}' + internal_link_tag.attrs['placeholder'] = '{{tempStyleToAnchor-' + new_id + '}}' if new_id not in self.internal_anchors: anchor_soup = self.href2soup_html[href_in_link] new_anchor_span = soup.new_tag("span") @@ -262,14 +261,14 @@ class EpubPostprocessor: anchor_soup.insert(0, new_anchor_span) self.internal_anchors.add(new_id) - del t.attrs['href'] + del internal_link_tag.attrs['href'] # ------------------------------------------------------------------------ # write placeholder to all internal links - internal_link_reg = re.compile(r'(^.+\.(html|xhtml)\#.+)|(^\#.+)') + internal_link_reg2 = re.compile(r'(^.+\.(html|xhtml)\#.+)|(^\#.+)') for href in self.added_to_toc_hrefs: soup = self.href2soup_html[href] - for internal_link_tag in soup.find_all('a', {'href': internal_link_reg}): + for internal_link_tag in soup.find_all('a', {'href': internal_link_reg2}): href_in_link, id_in_link = internal_link_tag.attrs['href'].split('#') if not href_in_link: href_in_link = href