forked from LiveCarta/BookConverter
Heading removal fix
This commit is contained in:
@@ -276,20 +276,25 @@ class HtmlEpubPreprocessor:
|
||||
|
||||
"""
|
||||
title_of_chapter = title_of_chapter.lower()
|
||||
if title_of_chapter == "chapter 1":
|
||||
pass
|
||||
for tag in chapter_tag.contents:
|
||||
text = tag if isinstance(tag, NavigableString) else tag.text
|
||||
if re.sub(r"[\s\xa0]", "", text):
|
||||
text = re.sub(r"[\s\xa0]", " ", text).lower()
|
||||
text = text.strip() # delete extra spaces
|
||||
if title_of_chapter == text or \
|
||||
(title_of_chapter in text and
|
||||
re.findall(r"^h[1-3]$", tag.name or chapter_tag.name)):
|
||||
self._add_span_to_save_ids_for_links(tag, chapter_tag)
|
||||
if not isinstance(tag, NavigableString):
|
||||
if title_of_chapter == text or \
|
||||
(title_of_chapter in text and
|
||||
re.findall(r"^h[1-3]$", tag.name or chapter_tag.name)):
|
||||
self._add_span_to_save_ids_for_links(tag, chapter_tag)
|
||||
tag.extract()
|
||||
return
|
||||
elif not self._remove_headings_content(tag, title_of_chapter):
|
||||
break
|
||||
else:
|
||||
tag.extract()
|
||||
return
|
||||
elif not isinstance(tag, NavigableString):
|
||||
if not self._remove_headings_content(tag, title_of_chapter):
|
||||
break
|
||||
|
||||
@staticmethod
|
||||
def _process_tables(chapter_tag: BeautifulSoup):
|
||||
|
||||
Reference in New Issue
Block a user