From 114ac78eb0d6f74163be8b01ea12c9cd138a3a2a Mon Sep 17 00:00:00 2001
From: Kiryl <kiryl.miatselitsa@teqniksoft.com>
Date: Tue, 28 Jun 2022 16:39:50 +0300
Subject: [PATCH] refactor with PEP8

---
 src/epub_converter/epub_converter.py         |   5 +-
 src/epub_converter/html_epub_preprocessor.py | 169 ++++++++++++++++---
 2 files changed, 144 insertions(+), 30 deletions(-)
diff --git a/src/epub_converter/epub_converter.py b/src/epub_converter/epub_converter.py
index ca7b69f..1ecc7a1 100644
--- a/src/epub_converter/epub_converter.py
+++ b/src/epub_converter/epub_converter.py
@@ -497,7 +497,7 @@ class EpubConverter:
             id wraps chapter"s content + subchapters" content
             id points to the start of title of a chapter
 
-        In all cases we know where chapter starts. Therefore, chapter is all tags between chapter"s id
+        In all cases we know where chapter starts. Therefore, chapter is all tags between chapter's id
         and id of the next chapter/subchapter
         Parameters
         ----------
@@ -539,6 +539,7 @@ class EpubConverter:
 
         lvl: int
             level of chapter
+
         Returns
         -------
         ChapterItem
@@ -597,7 +598,7 @@ class EpubConverter:
 
 
 if __name__ == "__main__":
-    epub_file_path = "../../epub/9781614382264.epub"
+    epub_file_path = "../../epub/9781641050234.epub"
     logger_object = BookLogger(
         name="epub", book_id=epub_file_path.split("/")[-1])
 
diff --git a/src/epub_converter/html_epub_preprocessor.py b/src/epub_converter/html_epub_preprocessor.py
index f9c2c06..3f762b4 100644
--- a/src/epub_converter/html_epub_preprocessor.py
+++ b/src/epub_converter/html_epub_preprocessor.py
@@ -21,7 +21,7 @@ def _add_span_to_save_ids_for_links(tag_to_be_removed, chapter_tag: BeautifulSou
 
     """
     def _insert_span_with_attrs_before_tag(chapter_tag: BeautifulSoup, tag_to_be_removed: Tag, id_: str, class_: list):
-        """Function inserts span before tag aren't supported by livecarta"""
+        """Function inserts span before tag aren't supported by LiveCarta"""
         new_tag = chapter_tag.new_tag("span")
         new_tag.attrs["id"] = id_ or ""
         new_tag.attrs["class"] = class_ or ""
@@ -77,22 +77,57 @@ def get_tags_between_chapter_marks(first_id: str, href: str, html_soup: Beautifu
 
 
 def prepare_title(title_of_chapter: str) -> str:
-    """Function finalise processing/cleaning title"""
-    title_str = BeautifulSoup(title_of_chapter, features="lxml").string
+    """
+    Function finalise processing/cleaning title
+    Parameters
+    ----------
+    title_of_chapter: str
+
+    Returns
+    -------
+    title: str
+        cleaned title
+
+    """
+    title = BeautifulSoup(title_of_chapter, features="lxml").string
     # clean extra whitespace characters ([\r\n\t\f\v ])
-    title_str = re.sub(r"[\s\xa0]", " ", title_str).strip()
-    return title_str
+    title = re.sub(r"[\s\xa0]", " ", title).strip()
+    return title
 
 
 def _remove_comments(chapter_tag):
+    """
+    Function remove comments
+    Parameters
+    ----------
+    chapter_tag: BeautifulSoup
+        Tag & contents of the chapter tag
+
+    Returns
+    -------
+    None
+        Chapter Tag without comments
+
+    """
     for tag in chapter_tag.find_all():
         for element in tag(text=lambda text: isinstance(text, Comment)):
             element.extract()
 
 
 def _wrap_strings_with_p(chapter_tag):
-    # Headings that are not supported by livecarta converts to <p>
-    # wrap NavigableString with <p>
+    """
+    Function converts headings that aren't supported by LiveCarta with <p>
+    Parameters
+    ----------
+    chapter_tag: BeautifulSoup
+        Tag & contents of the chapter tag
+
+    Returns
+    -------
+    None
+        Chapter Tag with wrapped NavigableStrings
+
+    """
     for node in chapter_tag:
         if isinstance(node, NavigableString):
             content = str(node)
@@ -104,7 +139,19 @@ def _wrap_strings_with_p(chapter_tag):
 
 
 def _wrap_tags_with_table(chapter_tag):
-    """Function wraps <tag> with <table>"""
+    """
+    Function wraps <tag> with <table>
+    Parameters
+    ----------
+    chapter_tag: BeautifulSoup
+        Tag & contents of the chapter tag
+
+    Returns
+    -------
+    None
+        Chapter Tag with wrapped certain tags with <table>
+
+    """
     def _wrap_tag_with_table(chapter_tag, tag_to_be_wrapped, width="100", border="", bg_color=None):
         table = chapter_tag.new_tag("table")
         table.attrs["border"], table.attrs["align"], table.attrs["style"] \
@@ -141,7 +188,19 @@ def _wrap_tags_with_table(chapter_tag):
 
 
 def _tags_to_correspond_livecarta_tag(chapter_tag):
-    """Function to replace all tags to correspond livecarta tags"""
+    """
+    Function to replace all tags to correspond LiveCarta tags
+    Parameters
+    ----------
+    chapter_tag: BeautifulSoup
+        Tag & contents of the chapter tag
+
+    Returns
+    -------
+    None
+        Chapter Tag with all tags replaced with LiveCarta tags
+
+    """
     for reg_keys, to_replace_value in LiveCartaConfig.REPLACE_TAG_WITH_LIVECARTA_CORRESPOND_TAGS.items():
         for key in reg_keys:
             if isinstance(key, tuple):
@@ -164,12 +223,23 @@ def _tags_to_correspond_livecarta_tag(chapter_tag):
 
 
 def _unwrap_tags(chapter_tag):
-    """Function unwrap tags and move id to span"""
+    """
+    Function unwrap tags and moves id to span
+    Parameters
+    ----------
+    chapter_tag: BeautifulSoup
+        Tag & contents of the chapter tag
+
+    Returns
+    -------
+    None
+        Chapter Tag with unwrapped certain tags
+
+    """
     for tag_name in LiveCartaConfig.TAGS_TO_UNWRAP:
         for tag in chapter_tag.select(tag_name):
             # if tag is a subtag
             if ">" in tag_name:
-                parent = tag.parent
                 tag.parent.attrs.update(tag.attrs)
             _add_span_to_save_ids_for_links(tag, chapter_tag)
             tag.unwrap()
@@ -178,8 +248,8 @@ def _unwrap_tags(chapter_tag):
 def _remove_headings_content(content_tag, title_of_chapter: str):
     """
     Function
-    clean/remove headings from chapter in order to avoid duplication of chapter titles in the content
-    add span with id in order to
+    - cleans/removes headings from chapter in order to avoid duplication of chapter titles in the content
+    - adds span with id in order to
     Parameters
     ----------
     content_tag: soup object
@@ -210,8 +280,20 @@ def _remove_headings_content(content_tag, title_of_chapter: str):
                     break
 
 
-def _preprocess_table(chapter_tag: BeautifulSoup):
-    """Function to preprocess tables and tags(td|th|tr): style"""
+def _process_table(chapter_tag: BeautifulSoup):
+    """
+    Function preprocesses tables and tags(td|th|tr)
+    Parameters
+    ----------
+    chapter_tag: BeautifulSoup
+        Tag & contents of the chapter tag
+
+    Returns
+    -------
+    None
+        Chapter Tag with processed tables
+
+    """
     tables = chapter_tag.find_all("table")
     for table in tables:
         for t_tag in table.find_all(re.compile("td|th|tr")):
@@ -236,6 +318,19 @@ def _preprocess_table(chapter_tag: BeautifulSoup):
 
 
 def _insert_tags_in_parents(chapter_tag):
+    """
+    Function inserts tags into correspond tags
+    Parameters
+    ----------
+    chapter_tag: BeautifulSoup
+        Tag & contents of the chapter tag
+
+    Returns
+    -------
+    None
+        Chapter Tag with inserted tags
+
+    """
     parent_tag2condition = {parent[0]: parent[1] for parent in LiveCartaConfig.INSERT_TAG_IN_PARENT_TAG.keys()}
     for parent_tag_name, condition in parent_tag2condition.items():
         for parent_tag in chapter_tag.select(parent_tag_name):
@@ -252,6 +347,19 @@ def _insert_tags_in_parents(chapter_tag):
 
 
 def _class_removing(chapter_tag):
+    """
+    Function removes classes that aren't created by converter
+    Parameters
+    ----------
+    chapter_tag: BeautifulSoup
+        Tag & contents of the chapter tag
+
+    Returns
+    -------
+    None
+        Chapter Tag without original classes of the book
+
+    """
     for tag in chapter_tag.find_all(recursive=True):
         if tag.attrs.get("class") \
                 and (tag.attrs.get("class") not in ["link-anchor", "footnote-element"]):
@@ -271,9 +379,15 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
 
     Steps
     ----------
-    1. heading removal
-    2. processing tags
-    3. class removal
+    1. comments removal
+    2. wrap NavigableString with tag <p>
+    3. wrap tags with <table>
+    4. replace tags with correspond LiveCarta tags
+    5. unwrap tags
+    6. heading removal
+    7. process_table
+    8. insert tags into correspond tags
+    9. class removal
 
     Returns
     -------
@@ -284,23 +398,22 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
     # 1. remove comments
     _remove_comments(content_tag)
 
-    # 2. wrap NavigableString with tag <p>
+    # 2.
     _wrap_strings_with_p(content_tag)
-
+    # 3.
     _wrap_tags_with_table(content_tag)
-
+    # 4.
     _tags_to_correspond_livecarta_tag(content_tag)
-
+    # 5.
     _unwrap_tags(content_tag)
-
-    # 3. heading removal
+    # 6.
     if remove_title_from_chapter:
         _remove_headings_content(content_tag, title_str)
-
-    # 4. processing tags (<li>, <table>, <code>, <pre>, <div>, <block>)
-    _preprocess_table(content_tag)
+    # 7.
+    _process_table(content_tag)
+    # 8.
     _insert_tags_in_parents(content_tag)
 
-    # 5. remove classes that weren't created by converter
+    # 9. remove classes that weren't created by converter
     _class_removing(content_tag)
     return str(content_tag)