Heading removal fix

This commit is contained in:
Kiryl
2022-07-20 15:44:28 +03:00
parent 20fa1bfa86
commit 4f7aa69ab3

View File

@@ -276,20 +276,25 @@ class HtmlEpubPreprocessor:
"""
title_of_chapter = title_of_chapter.lower()
if title_of_chapter == "chapter 1":
pass
for tag in chapter_tag.contents:
text = tag if isinstance(tag, NavigableString) else tag.text
if re.sub(r"[\s\xa0]", "", text):
text = re.sub(r"[\s\xa0]", " ", text).lower()
text = text.strip() # delete extra spaces
if title_of_chapter == text or \
(title_of_chapter in text and
re.findall(r"^h[1-3]$", tag.name or chapter_tag.name)):
self._add_span_to_save_ids_for_links(tag, chapter_tag)
if not isinstance(tag, NavigableString):
if title_of_chapter == text or \
(title_of_chapter in text and
re.findall(r"^h[1-3]$", tag.name or chapter_tag.name)):
self._add_span_to_save_ids_for_links(tag, chapter_tag)
tag.extract()
return
elif not self._remove_headings_content(tag, title_of_chapter):
break
else:
tag.extract()
return
elif not isinstance(tag, NavigableString):
if not self._remove_headings_content(tag, title_of_chapter):
break
@staticmethod
def _process_tables(chapter_tag: BeautifulSoup):