From 60b83ce6502cd27eba28271bcb16adef05f46095 Mon Sep 17 00:00:00 2001 From: shirshasa Date: Fri, 3 Sep 2021 10:51:01 +0300 Subject: [PATCH] epub converter: fix comments removal --- src/html_epub_preprocessor.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/html_epub_preprocessor.py b/src/html_epub_preprocessor.py index 6108da3..10714cf 100644 --- a/src/html_epub_preprocessor.py +++ b/src/html_epub_preprocessor.py @@ -89,7 +89,7 @@ def preprocess_table(body_tag: BeautifulSoup): table.attrs['border'] = '1' -def _process_lists(body_tag): +def process_lists(body_tag): """ Function to process tags
  • . Unwrap

    tags. @@ -560,7 +560,7 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr # 1. heading removal if remove_title_from_chapter: clean_headings_content(chapter_tag, title_str) - _process_lists(chapter_tag) + process_lists(chapter_tag) preprocess_table(chapter_tag) preprocess_code_tags(chapter_tag) preprocess_pre_tags(chapter_tag) @@ -571,9 +571,9 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr 'footnote-element']): del tag.attrs['class'] # 3. comments removal - comments = chapter_tag.findAll(text=lambda text: isinstance(text, Comment)) - for comment in comments: - comment.extract() + for tag in chapter_tag.find_all(): + for element in tag(text=lambda text: isinstance(text, Comment)): + element.extract() # content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag)) title_str = clean_title_from_numbering(title_str) return title_str, str(chapter_tag)