forked from LiveCarta/BookConverter
[LAW-3626] fix
This commit is contained in:
@@ -519,10 +519,14 @@ class HTMLPreprocessor:
|
|||||||
if is_first_span:
|
if is_first_span:
|
||||||
cleaned_text = self.clean_header_title(text)
|
cleaned_text = self.clean_header_title(text)
|
||||||
else:
|
else:
|
||||||
cleaned_text = re.sub(r'\s+', ' ', text).strip()
|
cleaned_text = text # re.sub(r'\s+', ' ', text).strip()
|
||||||
|
|
||||||
tag.string = cleaned_text
|
tag.string = cleaned_text
|
||||||
|
|
||||||
|
if cleaned_text == '':
|
||||||
|
tag.unwrap()
|
||||||
|
return
|
||||||
|
|
||||||
for i, child in enumerate(tag.find_all(recursive=False)):
|
for i, child in enumerate(tag.find_all(recursive=False)):
|
||||||
if is_first_span and i == 0:
|
if is_first_span and i == 0:
|
||||||
self._clean_header_by_children(child, True)
|
self._clean_header_by_children(child, True)
|
||||||
@@ -550,14 +554,13 @@ class HTMLPreprocessor:
|
|||||||
|
|
||||||
self._clean_header_by_children(tag, is_first_span=True)
|
self._clean_header_by_children(tag, is_first_span=True)
|
||||||
|
|
||||||
span_with_style_font = tag.find_all("span", {'style': re.compile(r'^font.+')})
|
b_tags = tag.find_all("b")
|
||||||
if span_with_style_font:
|
[tag.unwrap() for tag in b_tags]
|
||||||
for span in span_with_style_font:
|
|
||||||
span.unwrap()
|
|
||||||
|
|
||||||
span_with_face = tag.find_all("span", {'face': re.compile(r'^.+')})
|
spans = tag.find_all("span")
|
||||||
if span_with_face:
|
if spans:
|
||||||
for span in span_with_face:
|
for span in spans:
|
||||||
|
style = span.attrs.get("style")
|
||||||
span.unwrap()
|
span.unwrap()
|
||||||
|
|
||||||
tag.attrs = {}
|
tag.attrs = {}
|
||||||
|
|||||||
Reference in New Issue
Block a user