[LAW-3626] fix

This commit is contained in:
shirshasa
2020-10-13 18:50:11 +03:00
parent 5917fe51e0
commit d22103239f

View File

@@ -519,10 +519,14 @@ class HTMLPreprocessor:
if is_first_span:
cleaned_text = self.clean_header_title(text)
else:
cleaned_text = re.sub(r'\s+', ' ', text).strip()
cleaned_text = text # re.sub(r'\s+', ' ', text).strip()
tag.string = cleaned_text
if cleaned_text == '':
tag.unwrap()
return
for i, child in enumerate(tag.find_all(recursive=False)):
if is_first_span and i == 0:
self._clean_header_by_children(child, True)
@@ -550,14 +554,13 @@ class HTMLPreprocessor:
self._clean_header_by_children(tag, is_first_span=True)
span_with_style_font = tag.find_all("span", {'style': re.compile(r'^font.+')})
if span_with_style_font:
for span in span_with_style_font:
span.unwrap()
b_tags = tag.find_all("b")
[tag.unwrap() for tag in b_tags]
span_with_face = tag.find_all("span", {'face': re.compile(r'^.+')})
if span_with_face:
for span in span_with_face:
spans = tag.find_all("span")
if spans:
for span in spans:
style = span.attrs.get("style")
span.unwrap()
tag.attrs = {}