forked from LiveCarta/BookConverter
epub converter: small fix
This commit is contained in:
@@ -276,17 +276,6 @@ def unwrap_structural_tags(body_tag):
|
|||||||
'div', 'section', 'article', 'main', 'body', 'html', 'aside', 'canvas', 'data',
|
'div', 'section', 'article', 'main', 'body', 'html', 'aside', 'canvas', 'data',
|
||||||
'figure', 'footer', 'iframe', 'span', 'p'
|
'figure', 'footer', 'iframe', 'span', 'p'
|
||||||
]
|
]
|
||||||
# should be before other tags processing, not to remove converter empty tags with id
|
|
||||||
# for s in body_tag.find_all("span"):
|
|
||||||
# if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'):
|
|
||||||
# continue
|
|
||||||
# if s.contents:
|
|
||||||
# is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents]
|
|
||||||
# if all(is_not_struct_tag):
|
|
||||||
# continue
|
|
||||||
#
|
|
||||||
# _add_span_to_save_ids_for_links(s)
|
|
||||||
# s.unwrap()
|
|
||||||
|
|
||||||
for div in body_tag.find_all("div"):
|
for div in body_tag.find_all("div"):
|
||||||
if div.contents:
|
if div.contents:
|
||||||
@@ -450,6 +439,8 @@ def preprocess_pre_tags(chapter_tag):
|
|||||||
for child in pre.children:
|
for child in pre.children:
|
||||||
if isinstance(child, NavigableString):
|
if isinstance(child, NavigableString):
|
||||||
text = pre.text
|
text = pre.text
|
||||||
|
text = text.replace("<", "\x3C;")
|
||||||
|
text = text.replace(">", "\x3E;")
|
||||||
text = text.replace('\t', "\xa0 \xa0 \xa0 ")
|
text = text.replace('\t', "\xa0 \xa0 \xa0 ")
|
||||||
text = text.replace(' ', "\xa0 ")
|
text = text.replace(' ', "\xa0 ")
|
||||||
elements = re.split('\r\n|\n|\r', text)
|
elements = re.split('\r\n|\n|\r', text)
|
||||||
|
|||||||
Reference in New Issue
Block a user