From 4017ed62a251866fd4df7f536a81549de8ddbc3d Mon Sep 17 00:00:00 2001 From: shirshasa Date: Fri, 30 Jul 2021 17:35:19 +0300 Subject: [PATCH] epub converter: fix regexp in internal links --- src/epub_postprocessor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/epub_postprocessor.py b/src/epub_postprocessor.py index 5a0c347..e559f6e 100644 --- a/src/epub_postprocessor.py +++ b/src/epub_postprocessor.py @@ -263,7 +263,7 @@ class EpubPostprocessor: tag.attrs['id'] = new_id # --------------------------------------------------------------------------------- - internal_link_reg1 = re.compile(r'(^.+\.(html|xhtml)$)') # anchor is a whole xhtml file + internal_link_reg1 = re.compile(r'(^(?!https?://).+\.(html|xhtml)$)') # anchor is a whole xhtml file for toc_href in self.added_to_toc_hrefs: soup = self.href2soup_html[toc_href] for internal_link_tag in soup.find_all('a', {'href': internal_link_reg1}): @@ -414,7 +414,7 @@ if __name__ == "__main__": logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0) - json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781284171242.epub', + json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119682387_pre_code2.epub', logger=logger_object) tmp = json_converter.convert_to_dict()