epub converter: fix comments removal

This commit is contained in:
shirshasa
2021-09-03 10:51:01 +03:00
parent 2f090eeac1
commit 60b83ce650

View File

@@ -89,7 +89,7 @@ def preprocess_table(body_tag: BeautifulSoup):
table.attrs['border'] = '1' table.attrs['border'] = '1'
def _process_lists(body_tag): def process_lists(body_tag):
""" """
Function to process tags <li>. Function to process tags <li>.
Unwrap <p> tags. Unwrap <p> tags.
@@ -560,7 +560,7 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
# 1. heading removal # 1. heading removal
if remove_title_from_chapter: if remove_title_from_chapter:
clean_headings_content(chapter_tag, title_str) clean_headings_content(chapter_tag, title_str)
_process_lists(chapter_tag) process_lists(chapter_tag)
preprocess_table(chapter_tag) preprocess_table(chapter_tag)
preprocess_code_tags(chapter_tag) preprocess_code_tags(chapter_tag)
preprocess_pre_tags(chapter_tag) preprocess_pre_tags(chapter_tag)
@@ -571,9 +571,9 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
'footnote-element']): 'footnote-element']):
del tag.attrs['class'] del tag.attrs['class']
# 3. comments removal # 3. comments removal
comments = chapter_tag.findAll(text=lambda text: isinstance(text, Comment)) for tag in chapter_tag.find_all():
for comment in comments: for element in tag(text=lambda text: isinstance(text, Comment)):
comment.extract() element.extract()
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag)) # content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
title_str = clean_title_from_numbering(title_str) title_str = clean_title_from_numbering(title_str)
return title_str, str(chapter_tag) return title_str, str(chapter_tag)