forked from LiveCarta/BookConverter
Heading removal fix
This commit is contained in:
@@ -276,20 +276,25 @@ class HtmlEpubPreprocessor:
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
title_of_chapter = title_of_chapter.lower()
|
title_of_chapter = title_of_chapter.lower()
|
||||||
|
if title_of_chapter == "chapter 1":
|
||||||
|
pass
|
||||||
for tag in chapter_tag.contents:
|
for tag in chapter_tag.contents:
|
||||||
text = tag if isinstance(tag, NavigableString) else tag.text
|
text = tag if isinstance(tag, NavigableString) else tag.text
|
||||||
if re.sub(r"[\s\xa0]", "", text):
|
if re.sub(r"[\s\xa0]", "", text):
|
||||||
text = re.sub(r"[\s\xa0]", " ", text).lower()
|
text = re.sub(r"[\s\xa0]", " ", text).lower()
|
||||||
text = text.strip() # delete extra spaces
|
text = text.strip() # delete extra spaces
|
||||||
|
if not isinstance(tag, NavigableString):
|
||||||
if title_of_chapter == text or \
|
if title_of_chapter == text or \
|
||||||
(title_of_chapter in text and
|
(title_of_chapter in text and
|
||||||
re.findall(r"^h[1-3]$", tag.name or chapter_tag.name)):
|
re.findall(r"^h[1-3]$", tag.name or chapter_tag.name)):
|
||||||
self._add_span_to_save_ids_for_links(tag, chapter_tag)
|
self._add_span_to_save_ids_for_links(tag, chapter_tag)
|
||||||
tag.extract()
|
tag.extract()
|
||||||
return
|
return
|
||||||
elif not isinstance(tag, NavigableString):
|
elif not self._remove_headings_content(tag, title_of_chapter):
|
||||||
if not self._remove_headings_content(tag, title_of_chapter):
|
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
tag.extract()
|
||||||
|
return
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _process_tables(chapter_tag: BeautifulSoup):
|
def _process_tables(chapter_tag: BeautifulSoup):
|
||||||
|
|||||||
Reference in New Issue
Block a user