From d7a9c52acdb1c1dcd7762d74ad1cfd4ff307b8e1 Mon Sep 17 00:00:00 2001
From: Kiryl <kiryl.miatselitsa@teqniksoft.com>
Date: Mon, 19 Sep 2022 19:25:17 +0300
Subject: [PATCH] Change workong process of docx body

---
 src/docx_converter/html_docx_processor.py | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)
diff --git a/src/docx_converter/html_docx_processor.py b/src/docx_converter/html_docx_processor.py
index 05c413e..c92e997 100644
--- a/src/docx_converter/html_docx_processor.py
+++ b/src/docx_converter/html_docx_processor.py
@@ -13,16 +13,15 @@ from src.inline_style_processor import modify_html_soup_with_css_styles
 class HtmlDocxProcessor:
     def __init__(self, logger: BookLogger, html_soup: BeautifulSoup, html_preprocessor, style_preprocessor):
         self.logger = logger
-        self.html_soup = html_soup
+        self.body_tag: BeautifulSoup = BeautifulSoup(str(html_soup.body))
         self.html_preprocessor = html_preprocessor
         self.style_preprocessor = style_preprocessor
         self.content: List[Tag] = []
 
     def _font_to_span(self):
-        for font in self.html_soup.find_all("font"):
+        for font in self.body_tag.find_all("font"):
             font.name = "span"
 
-
     def _process_hrefs(self):
         a_tags_with_href = self.body_tag.find_all(
             "a", {"href": re.compile("^.*http.+")})
@@ -205,10 +204,9 @@ class HtmlDocxProcessor:
             else:
                 h_tag.unwrap()
 
-
     def delete_content_before_toc(self):
         # remove all tag upper the <TOC> only in content !!! body tag is not updated
-        toc_tag = self.html_soup.new_tag("TOC")
+        toc_tag = self.body_tag.new_tag("TOC")
         if toc_tag in self.content:
             ind = self.content.index(toc_tag) + 1
             self.content = self.content[ind:]
@@ -225,12 +223,10 @@ class HtmlDocxProcessor:
 
         self.logger.log("Inline style reading.")
         self.style_preprocessor.process_inline_styles_in_html_soup(
-            self.html_soup)
+            self.body_tag)
 
         self.logger.log("Inline style processing.")
-        self.html_soup = modify_html_soup_with_css_styles(self.html_soup)
-
-        self.body_tag = self.html_soup.body
+        self.body_tag = modify_html_soup_with_css_styles(self.body_tag)
 
         self.logger.log("Image processing.")
         images = process_images(access, path_to_html=html_path,
@@ -257,9 +253,9 @@ class HtmlDocxProcessor:
 
         self.logger.log(f".html using presets processing.")
         _process_presets(html_preprocessor=self.html_preprocessor,
-                         html_soup=self.html_soup)
+                         html_soup=self.body_tag)
 
-        self.content = self.body_tag.find_all(recursive=False)
+        self.content = self.body_tag.body.find_all(recursive=False)
         # delete text before table of content if exists
         self.delete_content_before_toc()