diff --git a/src/epub_postprocessor.py b/src/epub_postprocessor.py index 5a0c347..e559f6e 100644 --- a/src/epub_postprocessor.py +++ b/src/epub_postprocessor.py @@ -263,7 +263,7 @@ class EpubPostprocessor: tag.attrs['id'] = new_id # --------------------------------------------------------------------------------- - internal_link_reg1 = re.compile(r'(^.+\.(html|xhtml)$)') # anchor is a whole xhtml file + internal_link_reg1 = re.compile(r'(^(?!https?://).+\.(html|xhtml)$)') # anchor is a whole xhtml file for toc_href in self.added_to_toc_hrefs: soup = self.href2soup_html[toc_href] for internal_link_tag in soup.find_all('a', {'href': internal_link_reg1}): @@ -414,7 +414,7 @@ if __name__ == "__main__": logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0) - json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781284171242.epub', + json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119682387_pre_code2.epub', logger=logger_object) tmp = json_converter.convert_to_dict()