forked from LiveCarta/BookConverter
converter fix: cleaning bad utf symbols in hrefs
This commit is contained in:
@@ -277,6 +277,12 @@ class HTMLPreprocessor:
|
||||
tag.string = tag.text.replace('\u200c', '')
|
||||
tag['href'] = tag.attrs.get('href').replace('%E2%80%8C', '')
|
||||
|
||||
a_tags_with_href = self.body_tag.find_all('a', {'href': re.compile('^(?!#sdfootnote)')})
|
||||
for tag in a_tags_with_href:
|
||||
tag.string = tag.text.replace('\u200c', '')
|
||||
tag.string = tag.text.replace('\u200b', '')
|
||||
tag['href'] = tag.attrs.get('href').replace('%E2%80%8C', '')
|
||||
|
||||
@staticmethod
|
||||
def _clean_footnote_content(content):
|
||||
content = content.strip()
|
||||
|
||||
Reference in New Issue
Block a user