epub converter: add comments removal

This commit is contained in:
shirshasa
2021-09-01 16:12:35 +03:00
parent 91b722f441
commit c4c776ea3e

View File

@@ -3,7 +3,7 @@ import pathlib
import re import re
from typing import List, Tuple from typing import List, Tuple
from bs4 import BeautifulSoup, NavigableString, Tag from bs4 import BeautifulSoup, NavigableString, Tag, Comment
from access import Access from access import Access
from livecarta_config import LawCartaConfig from livecarta_config import LawCartaConfig
@@ -538,7 +538,10 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor', if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
'footnote-element']): 'footnote-element']):
del tag.attrs['class'] del tag.attrs['class']
# 3. comments removal
comments = chapter_tag.findAll(text=lambda text: isinstance(text, Comment))
for comment in comments:
comment.extract()
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag)) # content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
title_str = clean_title_from_numbering(title_str) title_str = clean_title_from_numbering(title_str)
return title_str, str(chapter_tag) return title_str, str(chapter_tag)