LAW-6695|Change process of removing heading content

2023-05-13 16:23:44 +03:00
parent 1b96b52e4c
commit d81fc18403
3 changed files with 13 additions and 17 deletions
--- a/src/epub_converter/html_epub_processor.py
+++ b/src/epub_converter/html_epub_processor.py
@@ -100,6 +100,11 @@ class HtmlEpubProcessor:
            text = text.strip()  # delete extra spaces
            return text

+        def remove_text(found_tag: Tag):
+            for text_node in found_tag.find_all(text=True):
+                if text_node.strip():
+                    text_node.extract()
+
        title_of_chapter: str = title_of_chapter.lower()
        title_in_text: List[Tag] = chapter_tag.find_all(lambda tag: (title_of_chapter in text_preparing(tag) and
                                                                     len(text_preparing(tag)) != 0 and
@@ -110,10 +115,10 @@ class HtmlEpubProcessor:
                                                        re.findall(r"^h[1-5]$", tag.name or chapter_tag.name))
        if title_in_text:
            self.html_presets_processor.add_span_to_save_ids_for_links(title_in_text[-1], chapter_tag)
-            title_in_text[-1].extract()
+            remove_text(title_in_text[-1])
        elif text_in_title:
            [self.html_presets_processor.add_span_to_save_ids_for_links(tag, chapter_tag) for tag in text_in_title]
-            [tag.extract() for tag in text_in_title]
+            [remove_text(tag) for tag in text_in_title]

    @staticmethod
    def _class_removing(chapter_tag: BeautifulSoup):