forked from LiveCarta/BookConverter
epub converter: add comments removal
This commit is contained in:
@@ -3,7 +3,7 @@ import pathlib
|
||||
import re
|
||||
from typing import List, Tuple
|
||||
|
||||
from bs4 import BeautifulSoup, NavigableString, Tag
|
||||
from bs4 import BeautifulSoup, NavigableString, Tag, Comment
|
||||
|
||||
from access import Access
|
||||
from livecarta_config import LawCartaConfig
|
||||
@@ -538,7 +538,10 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
|
||||
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
|
||||
'footnote-element']):
|
||||
del tag.attrs['class']
|
||||
|
||||
# 3. comments removal
|
||||
comments = chapter_tag.findAll(text=lambda text: isinstance(text, Comment))
|
||||
for comment in comments:
|
||||
comment.extract()
|
||||
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
|
||||
title_str = clean_title_from_numbering(title_str)
|
||||
return title_str, str(chapter_tag)
|
||||
|
||||
Reference in New Issue
Block a user