diff --git a/src/html_epub_preprocessor.py b/src/html_epub_preprocessor.py index 9e3497f..9cd431f 100644 --- a/src/html_epub_preprocessor.py +++ b/src/html_epub_preprocessor.py @@ -276,17 +276,6 @@ def unwrap_structural_tags(body_tag): 'div', 'section', 'article', 'main', 'body', 'html', 'aside', 'canvas', 'data', 'figure', 'footer', 'iframe', 'span', 'p' ] - # should be before other tags processing, not to remove converter empty tags with id - # for s in body_tag.find_all("span"): - # if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'): - # continue - # if s.contents: - # is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents] - # if all(is_not_struct_tag): - # continue - # - # _add_span_to_save_ids_for_links(s) - # s.unwrap() for div in body_tag.find_all("div"): if div.contents: @@ -450,6 +439,8 @@ def preprocess_pre_tags(chapter_tag): for child in pre.children: if isinstance(child, NavigableString): text = pre.text + text = text.replace("<", "\x3C;") + text = text.replace(">", "\x3E;") text = text.replace('\t', "\xa0 \xa0 \xa0 ") text = text.replace(' ', "\xa0 ") elements = re.split('\r\n|\n|\r', text)