From eff824f44613bdbc1f8e06bce5e882489da3887b Mon Sep 17 00:00:00 2001 From: shirshasa Date: Mon, 19 Jul 2021 13:58:28 +0300 Subject: [PATCH] epub converter: change to html.parser --- src/epub_postprocessor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/epub_postprocessor.py b/src/epub_postprocessor.py index e9ad705..0678032 100644 --- a/src/epub_postprocessor.py +++ b/src/epub_postprocessor.py @@ -90,7 +90,8 @@ class EpubPostprocessor: nodes = dict() for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT): html_body_text = item.get_body_content() - soup = BeautifulSoup(html_body_text, features='lxml') + # html.parser closes tags if needed + soup = BeautifulSoup(html_body_text, features='html.parser') nodes[item.file_name] = soup return nodes