fix toc removing

This commit is contained in:
shirshasa
2020-09-28 11:36:09 +03:00
parent 1daa851e59
commit 4853d2c49f

View File

@@ -110,13 +110,7 @@ class HTMLPreprocessor:
assert len(self.body_tag.find_all("font")) == 0 # on this step there should be no more <font> tags assert len(self.body_tag.find_all("font")) == 0 # on this step there should be no more <font> tags
def delete_content_before_toc(self): def delete_content_before_toc(self):
# replace toc with empty <TOC> tag # remove all tag upper the <TOC> only in content !!! body tag is not updated
tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
for table in tables:
table.wrap(self.html_soup.new_tag("TOC"))
table.decompose()
# remove all tag upper the <TOC>
toc_tag = self.html_soup.new_tag('TOC') toc_tag = self.html_soup.new_tag('TOC')
if toc_tag in self.content: if toc_tag in self.content:
ind = self.content.index(toc_tag) + 1 ind = self.content.index(toc_tag) + 1
@@ -137,6 +131,12 @@ class HTMLPreprocessor:
self._font_to_span() self._font_to_span()
# replace toc with empty <TOC> tag
tables = self.body_tag.find_all("div", id=re.compile(r'^Table of Contents\d+'))
for table in tables:
table.wrap(self.html_soup.new_tag("TOC"))
table.decompose()
def _process_paragraph(self): def _process_paragraph(self):
""" """
Function to process <p> tags (text-align and text-indent value). Function to process <p> tags (text-align and text-indent value).
@@ -578,6 +578,9 @@ class HTMLPreprocessor:
Process html code to satisfy LawCarta formatting. Process html code to satisfy LawCarta formatting.
""" """
try: try:
self.logger_object.log(f'Processing TOC and headers.')
self._process_toc_links()
self.clean_trash() self.clean_trash()
# process main elements of the .html doc # process main elements of the .html doc
@@ -609,9 +612,6 @@ class HTMLPreprocessor:
self.content = self.body_tag.find_all(recursive=False) self.content = self.body_tag.find_all(recursive=False)
self.logger_object.log(f'Processing TOC and headers.')
self._process_toc_links()
self.top_level_headers = self._get_top_level_headers() self.top_level_headers = self._get_top_level_headers()
self._mark_introduction_headers() self._mark_introduction_headers()