From 4853d2c49fa0af0e8f692e15132f07d5b61e97d3 Mon Sep 17 00:00:00 2001
From: shirshasa <katerinagorbac@gmail.com>
Date: Mon, 28 Sep 2020 11:36:09 +0300
Subject: [PATCH] fix toc removing

---
 src/html_preprocessor.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/src/html_preprocessor.py b/src/html_preprocessor.py
index 61e1bbd..bd213ec 100644
--- a/src/html_preprocessor.py
+++ b/src/html_preprocessor.py
@@ -110,13 +110,7 @@ class HTMLPreprocessor:
         assert len(self.body_tag.find_all("font")) == 0  # on this step there should be no more <font> tags
 
     def delete_content_before_toc(self):
-        # replace toc with empty <TOC> tag
-        tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
-        for table in tables:
-            table.wrap(self.html_soup.new_tag("TOC"))
-            table.decompose()
-
-        # remove all tag upper the <TOC>
+        # remove all tag upper the <TOC> only in content !!! body tag is not updated
         toc_tag = self.html_soup.new_tag('TOC')
         if toc_tag in self.content:
             ind = self.content.index(toc_tag) + 1
@@ -137,6 +131,12 @@ class HTMLPreprocessor:
 
         self._font_to_span()
 
+        # replace toc with empty <TOC> tag
+        tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
+        for table in tables:
+            table.wrap(self.html_soup.new_tag("TOC"))
+            table.decompose()
+
     def _process_paragraph(self):
         """
         Function to process <p> tags (text-align and text-indent value).
@@ -578,6 +578,9 @@ class HTMLPreprocessor:
         Process html code to satisfy LawCarta formatting.
         """
         try:
+            self.logger_object.log(f'Processing TOC and headers.')
+            self._process_toc_links()
+
             self.clean_trash()
 
             # process main elements of the .html doc
@@ -609,9 +612,6 @@ class HTMLPreprocessor:
 
             self.content = self.body_tag.find_all(recursive=False)
 
-            self.logger_object.log(f'Processing TOC and headers.')
-            self._process_toc_links()
-
             self.top_level_headers = self._get_top_level_headers()
             self._mark_introduction_headers()