From 5a237c3974624d740423ab9b731861029c496518 Mon Sep 17 00:00:00 2001 From: Kiryl Date: Tue, 21 Jun 2022 16:17:05 +0300 Subject: [PATCH] Fix headings cleaning problem --- src/epub_converter/html_epub_preprocessor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/epub_converter/html_epub_preprocessor.py b/src/epub_converter/html_epub_preprocessor.py index efdba02..6944caf 100644 --- a/src/epub_converter/html_epub_preprocessor.py +++ b/src/epub_converter/html_epub_preprocessor.py @@ -186,12 +186,13 @@ def _remove_headings_content(content_tag, title_of_chapter: str): for tag in content_tag.contents: text = tag if isinstance(tag, NavigableString) else tag.text if text: - text = re.sub(r"^[\s\xa0]+|[\s\xa0]+$", " ", text).lower() + text = re.sub(r"[\s\xa0]", " ", text).lower() + text = text.strip() # delete extra spaces if title_of_chapter == text or \ (title_of_chapter in text and re.findall(r"^h[1-3]$", tag.name)): _add_span_to_save_ids_for_links(tag, content_tag) tag.extract() - break + break # todo remove