epub converter: small fix

2021-07-08 15:09:01 +03:00
parent 7963486d7c
commit 00b808eaad
1 changed files with 2 additions and 11 deletions
--- a/src/html_epub_preprocessor.py
+++ b/src/html_epub_preprocessor.py
@@ -276,17 +276,6 @@ def unwrap_structural_tags(body_tag):
        'div', 'section', 'article', 'main', 'body', 'html', 'aside', 'canvas', 'data',
        'figure', 'footer', 'iframe', 'span', 'p'
    ]
-    # should be before other tags processing, not to remove converter empty tags with id
-    # for s in body_tag.find_all("span"):
-    #     if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'):
-    #         continue
-    #     if s.contents:
-    #         is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents]
-    #         if all(is_not_struct_tag):
-    #             continue
-    #
-    #     _add_span_to_save_ids_for_links(s)
-    #     s.unwrap()

    for div in body_tag.find_all("div"):
        if div.contents:
@@ -450,6 +439,8 @@ def preprocess_pre_tags(chapter_tag):
        for child in pre.children:
            if isinstance(child, NavigableString):
                text = pre.text
+                text = text.replace("<", "\x3C;")
+                text = text.replace(">", "\x3E;")
                text = text.replace('\t', "\xa0 \xa0 \xa0 ")
                text = text.replace(' ', "\xa0 ")
                elements = re.split('\r\n|\n|\r', text)