forked from LiveCarta/BookConverter
converter fix: cleaning bad utf symbols in hrefs
This commit is contained in:
@@ -277,6 +277,12 @@ class HTMLPreprocessor:
|
|||||||
tag.string = tag.text.replace('\u200c', '')
|
tag.string = tag.text.replace('\u200c', '')
|
||||||
tag['href'] = tag.attrs.get('href').replace('%E2%80%8C', '')
|
tag['href'] = tag.attrs.get('href').replace('%E2%80%8C', '')
|
||||||
|
|
||||||
|
a_tags_with_href = self.body_tag.find_all('a', {'href': re.compile('^(?!#sdfootnote)')})
|
||||||
|
for tag in a_tags_with_href:
|
||||||
|
tag.string = tag.text.replace('\u200c', '')
|
||||||
|
tag.string = tag.text.replace('\u200b', '')
|
||||||
|
tag['href'] = tag.attrs.get('href').replace('%E2%80%8C', '')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _clean_footnote_content(content):
|
def _clean_footnote_content(content):
|
||||||
content = content.strip()
|
content = content.strip()
|
||||||
|
|||||||
Reference in New Issue
Block a user