forked from LiveCarta/BookConverter
epub converter: add comments removal
This commit is contained in:
@@ -3,7 +3,7 @@ import pathlib
|
|||||||
import re
|
import re
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, NavigableString, Tag
|
from bs4 import BeautifulSoup, NavigableString, Tag, Comment
|
||||||
|
|
||||||
from access import Access
|
from access import Access
|
||||||
from livecarta_config import LawCartaConfig
|
from livecarta_config import LawCartaConfig
|
||||||
@@ -538,7 +538,10 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
|
|||||||
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
|
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
|
||||||
'footnote-element']):
|
'footnote-element']):
|
||||||
del tag.attrs['class']
|
del tag.attrs['class']
|
||||||
|
# 3. comments removal
|
||||||
|
comments = chapter_tag.findAll(text=lambda text: isinstance(text, Comment))
|
||||||
|
for comment in comments:
|
||||||
|
comment.extract()
|
||||||
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
|
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
|
||||||
title_str = clean_title_from_numbering(title_str)
|
title_str = clean_title_from_numbering(title_str)
|
||||||
return title_str, str(chapter_tag)
|
return title_str, str(chapter_tag)
|
||||||
|
|||||||
Reference in New Issue
Block a user