diff --git a/src/epub_converter/epub_converter.py b/src/epub_converter/epub_converter.py index 57f2904..ae900ea 100644 --- a/src/epub_converter/epub_converter.py +++ b/src/epub_converter/epub_converter.py @@ -104,8 +104,8 @@ class EpubConverter: self.add_not_added_files_to_adjacency_list(not_added) self.logger.log(f"Html internal links and structure processing.") self.label_chapters_ids_with_lc_id() + self.chapter_marks_are_same_level() # used only after parsed toc, ids from toc needed - self.process_html_soup_structure_to_line() self.process_internal_links() self.logger.log(f"Define chapters content.") self.define_chapters_content() @@ -316,11 +316,24 @@ class EpubConverter: new_h.attrs["id"] = i tag.insert_before(new_h) - def process_html_soup_structure_to_line(self): - # go to line structure + def chapter_marks_are_same_level(self): + """ + Function checks that marks for pointing a start of a chapter are placed on one level in html tree. + Mark is tag with "class": "converter-chapter-mark". Added while TOC was parsed. + This tag must have a chapter_tag as a parent. + Otherwise, it is wrapped with some tags. Like: +

+ + """ for html_href in self.html_href2html_body_soup: - soup = self.html_href2html_body_soup[html_href] - self.html_href2html_body_soup[html_href] = process_structural_tags(soup) + chapter_tag = self.html_href2html_body_soup[html_href] + # check marks for chapter starting are on the same level - 1st + marks = chapter_tag.find_all(attrs={"class": "converter-chapter-mark"}) + + # fix marks to be on 1 level + for mark in marks: + while mark.parent != chapter_tag: + mark.parent.unwrap() # todo warning! could reflect on formatting/internal links in some cases @staticmethod def create_unique_id(href, id_):