epub converter: change to html.parser

2021-07-19 13:58:28 +03:00
parent 978b9b0b66
commit eff824f446
1 changed files with 2 additions and 1 deletions
--- a/src/epub_postprocessor.py
+++ b/src/epub_postprocessor.py
@@ -90,7 +90,8 @@ class EpubPostprocessor:
        nodes = dict()
        for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
            html_body_text = item.get_body_content()
-            soup = BeautifulSoup(html_body_text, features='lxml')
+            # html.parser  closes tags if needed
+            soup = BeautifulSoup(html_body_text, features='html.parser')
            nodes[item.file_name] = soup

        return nodes