diff --git a/src/html_preprocessor.py b/src/html_preprocessor.py index 5962d18..71496f7 100644 --- a/src/html_preprocessor.py +++ b/src/html_preprocessor.py @@ -277,6 +277,12 @@ class HTMLPreprocessor: tag.string = tag.text.replace('\u200c', '') tag['href'] = tag.attrs.get('href').replace('%E2%80%8C', '') + a_tags_with_href = self.body_tag.find_all('a', {'href': re.compile('^(?!#sdfootnote)')}) + for tag in a_tags_with_href: + tag.string = tag.text.replace('\u200c', '') + tag.string = tag.text.replace('\u200b', '') + tag['href'] = tag.attrs.get('href').replace('%E2%80%8C', '') + @staticmethod def _clean_footnote_content(content): content = content.strip()