From 0f53caaffaff2c02efbe2a948ff1ff8f3ab23b69 Mon Sep 17 00:00:00 2001
From: Kiryl <kiryl.miatselitsa@teqniksoft.com>
Date: Wed, 22 Jun 2022 18:20:21 +0300
Subject: [PATCH] Replace functions working to the 1 html processing

---
 src/epub_converter/html_epub_preprocessor.py | 72 +++-----------------
 1 file changed, 9 insertions(+), 63 deletions(-)
diff --git a/src/epub_converter/html_epub_preprocessor.py b/src/epub_converter/html_epub_preprocessor.py
index c3ce356..e46e46d 100644
--- a/src/epub_converter/html_epub_preprocessor.py
+++ b/src/epub_converter/html_epub_preprocessor.py
@@ -34,69 +34,6 @@ def _add_span_to_save_ids_for_links(tag_to_be_removed, chapter_tag: BeautifulSou
                                            class_=tag_to_be_removed.attrs.get("class"))
 
 
-def process_structural_tags(chapter_tag: BeautifulSoup) -> BeautifulSoup:
-    """
-    Main function that works with structure of html. Make changes inplace.
-    Parameters
-    ----------
-    chapter_tag: Tag, soup object
-
-    Steps
-    ----------
-    1. Extracts tags that are not needed
-    2. Checks that marks for pointing a start of a chapter are placed on one level in html tree.
-    Mark is tag with "class": "converter-chapter-mark". Added while TOC was parsed.
-    This tag must have a chapter_tag as a parent.
-    Otherwise, it is wrapped with some tags. Like:
-        <p> <span id="123", class="converter-chapter-mark"> </span> </p>
-    3. Headings that are not supported by livecarta converts to <p>
-    4. Wrapping NavigableString
-
-    Returns
-    -------
-    chapter_tag: Tag, BeautifulSoup
-        adjusted chapter_tag
-
-    """
-    def _tags_to_correspond_livecarta_tag(chapter_tag):
-        """Function to replace all tags to correspond livecarta tags"""
-        for reg_key, to_replace_value in LiveCartaConfig.REPLACE_REGEX_WITH_LIVECARTA_CORRESPOND_TAGS.items():
-            for key in reg_key:
-                tags = chapter_tag.find_all(re.compile(key))
-                for tag in tags:
-                    tag.name = to_replace_value
-
-    def _unwrap_tags(chapter_tag):
-        """Function unwrap tags and move id to span"""
-        for tag in LiveCartaConfig. TAGS_TO_UNWRAP:
-            for s in chapter_tag.find_all(tag):
-                _add_span_to_save_ids_for_links(s, chapter_tag)
-                s.unwrap()
-
-    def _mark_parent_is_body(chapter_tag):
-        # check marks for chapter starting are on the same level - 1st
-        marks = chapter_tag.find_all(attrs={"class": "converter-chapter-mark"})
-
-        # fix marks to be on 1 level
-        for mark in marks:
-            while mark.parent != chapter_tag:
-                mark.parent.unwrap()  # todo warning! could reflect on formatting/internal links in some cases
-
-    # 1. remove comments
-    _remove_comments(chapter_tag)
-
-    # 2. wrap NavigableString with tag <p>
-    _wrap_strings_with_p(chapter_tag)
-
-    _tags_to_correspond_livecarta_tag(chapter_tag)
-
-    _unwrap_tags(chapter_tag)
-
-    _mark_parent_is_body(chapter_tag)
-
-    return chapter_tag
-
-
 def get_tags_between_chapter_marks(first_id: str, href: str, html_soup: BeautifulSoup) -> list:
     """
     After processing on a first_id that corresponds to current chapter,
@@ -156,6 +93,7 @@ def _remove_comments(chapter_tag):
 
 
 def _wrap_strings_with_p(chapter_tag):
+    # Headings that are not supported by livecarta converts to <p>
     # wrap NavigableString with <p>
     for node in chapter_tag:
         if isinstance(node, NavigableString):
@@ -408,7 +346,15 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
         prepared content
 
     """
+    # 1. remove comments
+    _remove_comments(content_tag)
 
+    # 2. wrap NavigableString with tag <p>
+    _wrap_strings_with_p(content_tag)
+
+    _tags_to_correspond_livecarta_tag(content_tag)
+
+    _unwrap_tags(content_tag)
 
     # 3. heading removal
     if remove_title_from_chapter: