forked from LiveCarta/BookConverter
epub converter: fix regexp in internal links
This commit is contained in:
@@ -263,7 +263,7 @@ class EpubPostprocessor:
|
||||
tag.attrs['id'] = new_id
|
||||
|
||||
# ---------------------------------------------------------------------------------
|
||||
internal_link_reg1 = re.compile(r'(^.+\.(html|xhtml)$)') # anchor is a whole xhtml file
|
||||
internal_link_reg1 = re.compile(r'(^(?!https?://).+\.(html|xhtml)$)') # anchor is a whole xhtml file
|
||||
for toc_href in self.added_to_toc_hrefs:
|
||||
soup = self.href2soup_html[toc_href]
|
||||
for internal_link_tag in soup.find_all('a', {'href': internal_link_reg1}):
|
||||
@@ -414,7 +414,7 @@ if __name__ == "__main__":
|
||||
|
||||
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
||||
|
||||
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781284171242.epub',
|
||||
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/9781119682387_pre_code2.epub',
|
||||
logger=logger_object)
|
||||
tmp = json_converter.convert_to_dict()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user