epub converter: fix regexp in internal links

This commit is contained in:
shirshasa
2021-07-30 17:35:19 +03:00
parent fae85d5280
commit 4017ed62a2

View File

@@ -263,7 +263,7 @@ class EpubPostprocessor:
tag.attrs['id'] = new_id
# ---------------------------------------------------------------------------------
internal_link_reg1 = re.compile(r'(^.+\.(html|xhtml)$)') # anchor is a whole xhtml file
internal_link_reg1 = re.compile(r'(^(?!https?://).+\.(html|xhtml)$)') # anchor is a whole xhtml file
for toc_href in self.added_to_toc_hrefs:
soup = self.href2soup_html[toc_href]
for internal_link_tag in soup.find_all('a', {'href': internal_link_reg1}):
@@ -414,7 +414,7 @@ if __name__ == "__main__":
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781284171242.epub',
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119682387_pre_code2.epub',
logger=logger_object)
tmp = json_converter.convert_to_dict()