Fix headings cleaning problem

This commit is contained in:
Kiryl
2022-06-21 16:17:05 +03:00
parent dcec9bad2e
commit 5a237c3974

View File

@@ -186,12 +186,13 @@ def _remove_headings_content(content_tag, title_of_chapter: str):
for tag in content_tag.contents:
text = tag if isinstance(tag, NavigableString) else tag.text
if text:
text = re.sub(r"^[\s\xa0]+|[\s\xa0]+$", " ", text).lower()
text = re.sub(r"[\s\xa0]", " ", text).lower()
text = text.strip() # delete extra spaces
if title_of_chapter == text or \
(title_of_chapter in text and re.findall(r"^h[1-3]$", tag.name)):
_add_span_to_save_ids_for_links(tag, content_tag)
tag.extract()
break
break
# todo remove