epub converter: add comments removal

This commit is contained in:
shirshasa
2021-09-01 16:12:35 +03:00
parent 91b722f441
commit c4c776ea3e

View File

@@ -3,7 +3,7 @@ import pathlib
import re
from typing import List, Tuple
from bs4 import BeautifulSoup, NavigableString, Tag
from bs4 import BeautifulSoup, NavigableString, Tag, Comment
from access import Access
from livecarta_config import LawCartaConfig
@@ -538,7 +538,10 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
'footnote-element']):
del tag.attrs['class']
# 3. comments removal
comments = chapter_tag.findAll(text=lambda text: isinstance(text, Comment))
for comment in comments:
comment.extract()
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
title_str = clean_title_from_numbering(title_str)
return title_str, str(chapter_tag)