diff --git a/src/html_preprocessor.py b/src/html_preprocessor.py index d8c19cb..5962d18 100644 --- a/src/html_preprocessor.py +++ b/src/html_preprocessor.py @@ -322,6 +322,15 @@ class HTMLPreprocessor: for tag_a in cont_tag.find_all('a', {'class': 'sdfootnotesym'}): tag_a.decompose() + # remove font-size + for span in cont_tag.find_all('span', {'style': re.compile('font-size')}): + style = span.get('style') + style = re.sub(r"font-size: \d+px", "", style) + if style == '': + del span.attrs['style'] + else: + span.attrs['style'] = style + unicode_string = '' for child in cont_tag.children: if type(child) is NavigableString: