fix toc removing

This commit is contained in:
shirshasa
2020-09-28 11:36:09 +03:00
parent 1daa851e59
commit 4853d2c49f

View File

@@ -110,13 +110,7 @@ class HTMLPreprocessor:
assert len(self.body_tag.find_all("font")) == 0 # on this step there should be no more <font> tags
def delete_content_before_toc(self):
# replace toc with empty <TOC> tag
tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
for table in tables:
table.wrap(self.html_soup.new_tag("TOC"))
table.decompose()
# remove all tag upper the <TOC>
# remove all tag upper the <TOC> only in content !!! body tag is not updated
toc_tag = self.html_soup.new_tag('TOC')
if toc_tag in self.content:
ind = self.content.index(toc_tag) + 1
@@ -137,6 +131,12 @@ class HTMLPreprocessor:
self._font_to_span()
# replace toc with empty <TOC> tag
tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
for table in tables:
table.wrap(self.html_soup.new_tag("TOC"))
table.decompose()
def _process_paragraph(self):
"""
Function to process <p> tags (text-align and text-indent value).
@@ -578,6 +578,9 @@ class HTMLPreprocessor:
Process html code to satisfy LawCarta formatting.
"""
try:
self.logger_object.log(f'Processing TOC and headers.')
self._process_toc_links()
self.clean_trash()
# process main elements of the .html doc
@@ -609,9 +612,6 @@ class HTMLPreprocessor:
self.content = self.body_tag.find_all(recursive=False)
self.logger_object.log(f'Processing TOC and headers.')
self._process_toc_links()
self.top_level_headers = self._get_top_level_headers()
self._mark_introduction_headers()