fix toc removing

2020-09-28 11:36:09 +03:00
parent 1daa851e59
commit 4853d2c49f
1 changed files with 10 additions and 10 deletions
--- a/src/html_preprocessor.py
+++ b/src/html_preprocessor.py
@@ -110,13 +110,7 @@ class HTMLPreprocessor:
        assert len(self.body_tag.find_all("font")) == 0  # on this step there should be no more <font> tags

    def delete_content_before_toc(self):
-        # replace toc with empty <TOC> tag
-        tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
-        for table in tables:
-            table.wrap(self.html_soup.new_tag("TOC"))
-            table.decompose()
-
-        # remove all tag upper the <TOC>
+        # remove all tag upper the <TOC> only in content !!! body tag is not updated
        toc_tag = self.html_soup.new_tag('TOC')
        if toc_tag in self.content:
            ind = self.content.index(toc_tag) + 1
@@ -137,6 +131,12 @@ class HTMLPreprocessor:

        self._font_to_span()

+        # replace toc with empty <TOC> tag
+        tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
+        for table in tables:
+            table.wrap(self.html_soup.new_tag("TOC"))
+            table.decompose()
+
    def _process_paragraph(self):
        """
        Function to process <p> tags (text-align and text-indent value).
@@ -578,6 +578,9 @@ class HTMLPreprocessor:
        Process html code to satisfy LawCarta formatting.
        """
        try:
+            self.logger_object.log(f'Processing TOC and headers.')
+            self._process_toc_links()
+
            self.clean_trash()

            # process main elements of the .html doc
@@ -609,9 +612,6 @@ class HTMLPreprocessor:

            self.content = self.body_tag.find_all(recursive=False)

-            self.logger_object.log(f'Processing TOC and headers.')
-            self._process_toc_links()
-
            self.top_level_headers = self._get_top_level_headers()
            self._mark_introduction_headers()