forked from LiveCarta/BookConverter
[LAW-3626] fix
This commit is contained in:
@@ -519,10 +519,14 @@ class HTMLPreprocessor:
|
||||
if is_first_span:
|
||||
cleaned_text = self.clean_header_title(text)
|
||||
else:
|
||||
cleaned_text = re.sub(r'\s+', ' ', text).strip()
|
||||
cleaned_text = text # re.sub(r'\s+', ' ', text).strip()
|
||||
|
||||
tag.string = cleaned_text
|
||||
|
||||
if cleaned_text == '':
|
||||
tag.unwrap()
|
||||
return
|
||||
|
||||
for i, child in enumerate(tag.find_all(recursive=False)):
|
||||
if is_first_span and i == 0:
|
||||
self._clean_header_by_children(child, True)
|
||||
@@ -550,14 +554,13 @@ class HTMLPreprocessor:
|
||||
|
||||
self._clean_header_by_children(tag, is_first_span=True)
|
||||
|
||||
span_with_style_font = tag.find_all("span", {'style': re.compile(r'^font.+')})
|
||||
if span_with_style_font:
|
||||
for span in span_with_style_font:
|
||||
span.unwrap()
|
||||
b_tags = tag.find_all("b")
|
||||
[tag.unwrap() for tag in b_tags]
|
||||
|
||||
span_with_face = tag.find_all("span", {'face': re.compile(r'^.+')})
|
||||
if span_with_face:
|
||||
for span in span_with_face:
|
||||
spans = tag.find_all("span")
|
||||
if spans:
|
||||
for span in spans:
|
||||
style = span.attrs.get("style")
|
||||
span.unwrap()
|
||||
|
||||
tag.attrs = {}
|
||||
|
||||
Reference in New Issue
Block a user