forked from LiveCarta/BookConverter
epub converter: fix
This commit is contained in:
@@ -302,6 +302,11 @@ def unwrap_structural_tags(body_tag):
|
||||
'figure', 'footer', 'iframe', 'span', 'p'
|
||||
]
|
||||
|
||||
# comments removal
|
||||
for tag in body_tag.find_all():
|
||||
for element in tag(text=lambda text: isinstance(text, Comment)):
|
||||
element.extract()
|
||||
|
||||
for div in body_tag.find_all("div"):
|
||||
if div.attrs.get('class'):
|
||||
div_class = div.attrs['class'] if not isinstance(div.attrs['class'], list) else div.attrs['class'][0]
|
||||
@@ -500,7 +505,7 @@ def preprocess_pre_tags(chapter_tag):
|
||||
|
||||
for child in pre.children:
|
||||
if isinstance(child, NavigableString):
|
||||
cleaned_text = _prepare_formatted(pre.text)
|
||||
cleaned_text = _prepare_formatted(str(child))
|
||||
sub_strings = re.split('\r\n|\n|\r', cleaned_text)
|
||||
for string in sub_strings:
|
||||
new_tag.append(NavigableString(string))
|
||||
@@ -519,8 +524,7 @@ def preprocess_pre_tags(chapter_tag):
|
||||
|
||||
new_tag.attrs['style'] = "font-family: courier new,courier,monospace; " \
|
||||
"font-size: 14px; white-space: nowrap;"
|
||||
pre.insert_before(new_tag)
|
||||
pre.extract()
|
||||
pre.replace_with(new_tag)
|
||||
table = wrap_preformatted_span_with_table(chapter_tag, new_tag)
|
||||
p_for_br = chapter_tag.new_tag("p")
|
||||
p_for_br.string = "\xa0"
|
||||
@@ -570,10 +574,6 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
|
||||
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
|
||||
'footnote-element']):
|
||||
del tag.attrs['class']
|
||||
# 3. comments removal
|
||||
for tag in chapter_tag.find_all():
|
||||
for element in tag(text=lambda text: isinstance(text, Comment)):
|
||||
element.extract()
|
||||
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
|
||||
title_str = clean_title_from_numbering(title_str)
|
||||
return title_str, str(chapter_tag)
|
||||
|
||||
Reference in New Issue
Block a user