forked from LiveCarta/BookConverter
epub converter: fix
This commit is contained in:
@@ -302,6 +302,11 @@ def unwrap_structural_tags(body_tag):
|
|||||||
'figure', 'footer', 'iframe', 'span', 'p'
|
'figure', 'footer', 'iframe', 'span', 'p'
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# comments removal
|
||||||
|
for tag in body_tag.find_all():
|
||||||
|
for element in tag(text=lambda text: isinstance(text, Comment)):
|
||||||
|
element.extract()
|
||||||
|
|
||||||
for div in body_tag.find_all("div"):
|
for div in body_tag.find_all("div"):
|
||||||
if div.attrs.get('class'):
|
if div.attrs.get('class'):
|
||||||
div_class = div.attrs['class'] if not isinstance(div.attrs['class'], list) else div.attrs['class'][0]
|
div_class = div.attrs['class'] if not isinstance(div.attrs['class'], list) else div.attrs['class'][0]
|
||||||
@@ -500,7 +505,7 @@ def preprocess_pre_tags(chapter_tag):
|
|||||||
|
|
||||||
for child in pre.children:
|
for child in pre.children:
|
||||||
if isinstance(child, NavigableString):
|
if isinstance(child, NavigableString):
|
||||||
cleaned_text = _prepare_formatted(pre.text)
|
cleaned_text = _prepare_formatted(str(child))
|
||||||
sub_strings = re.split('\r\n|\n|\r', cleaned_text)
|
sub_strings = re.split('\r\n|\n|\r', cleaned_text)
|
||||||
for string in sub_strings:
|
for string in sub_strings:
|
||||||
new_tag.append(NavigableString(string))
|
new_tag.append(NavigableString(string))
|
||||||
@@ -519,8 +524,7 @@ def preprocess_pre_tags(chapter_tag):
|
|||||||
|
|
||||||
new_tag.attrs['style'] = "font-family: courier new,courier,monospace; " \
|
new_tag.attrs['style'] = "font-family: courier new,courier,monospace; " \
|
||||||
"font-size: 14px; white-space: nowrap;"
|
"font-size: 14px; white-space: nowrap;"
|
||||||
pre.insert_before(new_tag)
|
pre.replace_with(new_tag)
|
||||||
pre.extract()
|
|
||||||
table = wrap_preformatted_span_with_table(chapter_tag, new_tag)
|
table = wrap_preformatted_span_with_table(chapter_tag, new_tag)
|
||||||
p_for_br = chapter_tag.new_tag("p")
|
p_for_br = chapter_tag.new_tag("p")
|
||||||
p_for_br.string = "\xa0"
|
p_for_br.string = "\xa0"
|
||||||
@@ -570,10 +574,6 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
|
|||||||
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
|
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
|
||||||
'footnote-element']):
|
'footnote-element']):
|
||||||
del tag.attrs['class']
|
del tag.attrs['class']
|
||||||
# 3. comments removal
|
|
||||||
for tag in chapter_tag.find_all():
|
|
||||||
for element in tag(text=lambda text: isinstance(text, Comment)):
|
|
||||||
element.extract()
|
|
||||||
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
|
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
|
||||||
title_str = clean_title_from_numbering(title_str)
|
title_str = clean_title_from_numbering(title_str)
|
||||||
return title_str, str(chapter_tag)
|
return title_str, str(chapter_tag)
|
||||||
|
|||||||
Reference in New Issue
Block a user