epub converter: fix comments removal

2021-09-03 10:51:01 +03:00
parent 2f090eeac1
commit 60b83ce650
1 changed files with 5 additions and 5 deletions
--- a/src/html_epub_preprocessor.py
+++ b/src/html_epub_preprocessor.py
@@ -89,7 +89,7 @@ def preprocess_table(body_tag: BeautifulSoup):
            table.attrs['border'] = '1'
-def _process_lists(body_tag):
+def process_lists(body_tag):
    """
    Function to process tags <li>.
    Unwrap <p> tags.
@@ -560,7 +560,7 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
    # 1. heading removal
    if remove_title_from_chapter:
        clean_headings_content(chapter_tag, title_str)
-    _process_lists(chapter_tag)
+    process_lists(chapter_tag)
    preprocess_table(chapter_tag)
    preprocess_code_tags(chapter_tag)
    preprocess_pre_tags(chapter_tag)
@@ -571,9 +571,9 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
                                                                                                'footnote-element']):
            del tag.attrs['class']
    # 3. comments removal
-    comments = chapter_tag.findAll(text=lambda text: isinstance(text, Comment))
+    for tag in chapter_tag.find_all():
-    for comment in comments:
+        for element in tag(text=lambda text: isinstance(text, Comment)):
-        comment.extract()
+            element.extract()
    # content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
    title_str = clean_title_from_numbering(title_str)
    return title_str, str(chapter_tag)