epub converter: small fix

This commit is contained in:
shirshasa
2021-07-08 15:09:01 +03:00
parent 7963486d7c
commit 00b808eaad

View File

@@ -276,17 +276,6 @@ def unwrap_structural_tags(body_tag):
'div', 'section', 'article', 'main', 'body', 'html', 'aside', 'canvas', 'data',
'figure', 'footer', 'iframe', 'span', 'p'
]
# should be before other tags processing, not to remove converter empty tags with id
# for s in body_tag.find_all("span"):
# if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'):
# continue
# if s.contents:
# is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents]
# if all(is_not_struct_tag):
# continue
#
# _add_span_to_save_ids_for_links(s)
# s.unwrap()
for div in body_tag.find_all("div"):
if div.contents:
@@ -450,6 +439,8 @@ def preprocess_pre_tags(chapter_tag):
for child in pre.children:
if isinstance(child, NavigableString):
text = pre.text
text = text.replace("<", "\x3C;")
text = text.replace(">", "\x3E;")
text = text.replace('\t', "\xa0 \xa0 \xa0 ")
text = text.replace(' ', "\xa0 ")
elements = re.split('\r\n|\n|\r', text)