From acb2ce48c2c27c1b88445270cf6d90a9698561e6 Mon Sep 17 00:00:00 2001
From: Kiryl <kiryl.miatselitsa@teqniksoft.com>
Date: Mon, 6 Jun 2022 16:37:42 +0300
Subject: [PATCH] Formatting: documentation + optimization

---
 src/book_solver.py                           |  8 ++--
 src/docx_converter/html_docx_preprocessor.py | 26 +++++-------
 src/epub_converter/html_epub_preprocessor.py | 42 +++++++++-----------
 3 files changed, 33 insertions(+), 43 deletions(-)

diff --git a/src/book_solver.py b/src/book_solver.py
index 4176280..c45af0f 100644
--- a/src/book_solver.py
+++ b/src/book_solver.py
@@ -13,8 +13,8 @@ class BookSolver:
     """
     This is Main Abstract class for solving a task of a book conversion
     Having an id of coming book, gets book from server, runs conversion.
-    In parallel it updates status of a book conversion on admin panel.
-    Finally sends result to server.
+    In parallel, it updates status of a book conversion on admin panel.
+    Finally, sends result to server.
     Result is a json, JSON schema in book_schema.json
     """
 
@@ -35,12 +35,12 @@ class BookSolver:
         assert LiveCartaConfig.SUPPORTED_LEVELS == len(LiveCartaConfig.SUPPORTED_HEADERS), \
             "Length of headers doesn't match allowed levels."
 
-    def save_book_file(self, content: str):
+    def save_book_file(self, content: bytes):
         """
         Function saves binary content of file to .docx/.epub
         Parameters
         ----------
-        content: str
+        content: bytes str
             binary content of the file
 
         """
diff --git a/src/docx_converter/html_docx_preprocessor.py b/src/docx_converter/html_docx_preprocessor.py
index b2b89a1..db847b0 100644
--- a/src/docx_converter/html_docx_preprocessor.py
+++ b/src/docx_converter/html_docx_preprocessor.py
@@ -116,8 +116,8 @@ class HTMLDocxPreprocessor:
 
             if face is not None:
                 face = re.sub(r",[\w,\- ]*$", "", face)
-                if face != LiveCartaConfig.DEFAULT_FONT_NAME and LiveCartaConfig.font_correspondence_table.get(face):
-                    font.attrs["face"] = LiveCartaConfig.font_correspondence_table[face]
+                if face != LiveCartaConfig.DEFAULT_FONT_NAME and LiveCartaConfig.FONT_CORRESPONDANCE_TABLE.get(face):
+                    font.attrs["face"] = LiveCartaConfig.FONT_CORRESPONDANCE_TABLE[face]
                 else:
                     font.attrs["face"] = LiveCartaConfig.DEFAULT_FONT_NAME
 
@@ -137,11 +137,11 @@ class HTMLDocxPreprocessor:
     def clean_trash(self):
         """Function to remove all styles and tags we don't need."""
         self._clean_tag('span', 'style', re.compile(
-            r'^background: #[0-9a-fA-F]{6}$'))
+            r'^background: #[\da-fA-F]{6}$'))
         # todo: check for another languages
         self._clean_tag('span', 'lang', re.compile(r'^ru-RU$'))
         self._clean_tag('span', 'style', re.compile(
-            '^letter-spacing: -?[\d\.]+pt$'))
+            '^letter-spacing: -?[\d.]+pt$'))
 
         self._clean_tag('font', 'face', re.compile(
             r'^Times New Roman[\w, ]+$'))
@@ -179,13 +179,13 @@ class HTMLDocxPreprocessor:
             style = p.get('style')
 
             if style:
-                indent = re.search(r'text-indent: ([\d\.]{1,4})in', style)
-                margin_left = re.search(r'margin-left: ([\d\.]{1,4})in', style)
+                indent = re.search(r'text-indent: ([\d.]{1,4})in', style)
+                margin_left = re.search(r'margin-left: ([\d.]{1,4})in', style)
                 margin_right = re.search(
-                    r'margin-right: ([\d\.]{1,4})in', style)
-                margin_top = re.search(r'margin-top: ([\d\.]{1,4})in', style)
+                    r'margin-right: ([\d.]{1,4})in', style)
+                margin_top = re.search(r'margin-top: ([\d.]{1,4})in', style)
                 margin_bottom = re.search(
-                    r'margin-bottom: ([\d\.]{1,4})in', style)
+                    r'margin-bottom: ([\d.]{1,4})in', style)
             else:
                 indent = None
                 margin_left = None
@@ -517,7 +517,7 @@ class HTMLDocxPreprocessor:
         Function for gathering info about top-level chapters.
 
         Assume:
-            - Headers with smallest outline(or digit in <h>) are top level chapters.
+            - Headers with the smallest outline(or digit in <h>) are top level chapters.
             [ It is consistent with a recursive algorithm
             for saving content to a resulted json structure,
             which happens in  header_to_json()]
@@ -560,7 +560,7 @@ class HTMLDocxPreprocessor:
 
         Assume  header(s) to be introduction if:
             1. one header not numbered, before 1 numbered header
-            2. it is first header from the top level list and it equals to 'introductio
+            2. it is first header from the top level list, and it equals to 'introduction'
         Returns
         -------
         None
@@ -665,10 +665,6 @@ class HTMLDocxPreprocessor:
         Function
         - process tags <li>.
         - unwrap <p> tags.
-        Parameters
-        ----------
-        body_tag: Tag, soup object
-
         Returns
         -------
         None
diff --git a/src/epub_converter/html_epub_preprocessor.py b/src/epub_converter/html_epub_preprocessor.py
index 065481f..73e357c 100644
--- a/src/epub_converter/html_epub_preprocessor.py
+++ b/src/epub_converter/html_epub_preprocessor.py
@@ -75,8 +75,8 @@ def _preprocess_table(body_tag: BeautifulSoup):
     """Function to preprocess tables and tags(td|th|tr): style"""
     tables = body_tag.find_all("table")
     for table in tables:
-        ts = table.find_all(re.compile("td|th|tr"))
-        for t_tag in ts:
+        t_tags = table.find_all(re.compile("td|th|tr"))
+        for t_tag in t_tags:
             style = t_tag.get('style')
             width = ''
             if style:
@@ -113,7 +113,6 @@ def _process_lists(body_tag: BeautifulSoup):
     None
 
     """
-
     li_tags = body_tag.find_all("li")
     for li_tag in li_tags:
         if li_tag.p:
@@ -268,7 +267,7 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
     return footnotes, new_noterefs_tags, new_footnotes_tags
 
 
-def unwrap_structural_tags(body_tag: BeautifulSoup):
+def unwrap_structural_tags(body_tag: BeautifulSoup) -> BeautifulSoup:
     """
     Main function that works with structure of html. Make changes inplace.
     Parameters
@@ -288,10 +287,10 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
 
     Returns
     -------
-    None
+    body_tag: Tag, BeautifulSoup
+        adjusted body_tag
 
     """
-
     def _preserve_class_in_aside_tag(tag_):
         """to save css style inherited from class, copy class to aside tag (which is parent to tag_)"""
         # this is for Wiley books with boxes
@@ -311,7 +310,7 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
 
         Returns
         -------
-        None
+        bool
 
         """
         # this is for Wiley books with boxes
@@ -454,21 +453,19 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
                 tag = body_tag.new_tag('p')
                 tag.append(str(node))
                 node.replace_with(tag)
-
     return body_tag
 
 
 def get_tags_between_chapter_marks(first_id: str, href: str, html_soup: BeautifulSoup) -> list:
     """After processing on a first_id that corresponds to current chapter,
     from initial html_soup all tags from current chapter are extracted
-
     Parameters
     ----------
     first_id:
         Id that point where a chapter starts. A Tag with class: 'converter-chapter-mark'
     href:
         Name of current chapter's file
-    html_soup: Tag, soup object
+    html_soup: Tag
         Soup object of current  file
 
     Returns
@@ -530,19 +527,17 @@ def _clean_wiley_block(block):
         h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
 
 
-def _preprocess_block_tags(chapter_tag):
+def _preprocess_block_tags(chapter_tag: Tag):
     """Function preprocessing <block> tags"""
-    for block in chapter_tag.find_all("blockquote"):
-        if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
-            _clean_wiley_block(block)
-
-            color = '#DDDDDD' if block.attrs.get(
-                'class') == 'feature1' else None
-            color = '#EEEEEE' if block.attrs.get(
-                'class') == 'feature2' else color
-            _wrap_block_tag_with_table(chapter_tag, block, bg_color=color)
-            block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
-            block.unwrap()
+    for block in chapter_tag.find_all("blockquote", attrs={"class": re.compile("feature[1234]")}):
+        _clean_wiley_block(block)
+        color = '#DDDDDD' if block.attrs.get(
+            'class') == 'feature1' else None
+        color = '#EEEEEE' if block.attrs.get(
+            'class') == 'feature2' else color
+        _wrap_block_tag_with_table(chapter_tag, block, bg_color=color)
+        block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
+        block.unwrap()
 
     for future_block in chapter_tag.find_all("p", attrs={"class": re.compile("feature[1234]")}):
         _clean_wiley_block(future_block)
@@ -647,8 +642,7 @@ def _preprocess_code_tags(chapter_tag: BeautifulSoup):
         code.name = "span"
         if code.parent.name == "pre":
             continue
-
-        # if tags aren't in pre
+        # if tags aren't in pre and don't have style
         if not code.attrs.get('style'):
             code.attrs['style'] = 'font-size: 14px; font-family: courier new,courier,monospace;'