Annotations in Epub converter

2022-08-03 14:39:13 +03:00
parent 7453029295
commit 78e3ad8911
16 changed files with 259 additions and 192 deletions
--- a/src/epub_converter/css_processor.py
+++ b/src/epub_converter/css_processor.py
@@ -1,5 +1,6 @@
 import re
 import cssutils
+from typing import Tuple, Dict
 from bs4 import BeautifulSoup
 from os.path import dirname, normpath, join

@@ -41,13 +42,13 @@ class CSSPreprocessor:
        }

    @staticmethod
-    def get_text_color(x):
+    def get_text_color(x: str) -> str:
        color = str2hex(x)
        color = color if color not in ["#000000", "#000", "black"] else ""
        return color

    @staticmethod
-    def get_bg_color(x):
+    def get_bg_color(x: str) -> str:
        color = str2hex(x)
        color = color if color not in ["#ffffff", "#fff", "white"] else ""
        return color
@@ -114,7 +115,7 @@ class CSSPreprocessor:
        return cleaned_value

    @staticmethod
-    def style_conditions(style_value: str, style_name: str) -> tuple[bool, bool]:
+    def style_conditions(style_value: str, style_name: str) -> Tuple[bool, bool]:
        constraints_on_value = LiveCartaConfig.LIVECARTA_STYLE_ATTRS.get(
            style_name)
        value_not_in_possible_values_list = style_value not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS[
@@ -156,7 +157,7 @@ class CSSPreprocessor:
        style = "; ".join(split_style)
        return style

-    def process_inline_styles_in_html_soup(self, html_href2html_body_soup: dict):
+    def process_inline_styles_in_html_soup(self, html_href2html_body_soup: Dict[str, BeautifulSoup]):
        """This function is designed to convert inline html styles"""
        for html_href in html_href2html_body_soup:
            html_content: BeautifulSoup = html_href2html_body_soup[html_href]
@@ -169,7 +170,7 @@ class CSSPreprocessor:
                    self.build_inline_style_content(inline_style)

    @staticmethod
-    def get_css_content(css_href, html_href, ebooklib_book):
+    def get_css_content(css_href: str, html_href: str, ebooklib_book) -> str:
        path_to_css_from_html = css_href
        html_folder = dirname(html_href)
        path_to_css_from_root = normpath(
--- a/src/epub_converter/epub_converter.py
+++ b/src/epub_converter/epub_converter.py
@@ -9,8 +9,8 @@ from pathlib import Path
 from itertools import chain
 from premailer import transform
 from collections import defaultdict
-from typing import Dict, Union, List
-from bs4 import BeautifulSoup, NavigableString, Tag
+from typing import List, Tuple, Dict, Union
+from bs4 import BeautifulSoup, Tag, NavigableString

 from src.util.helpers import BookLogger
 from src.epub_converter.css_processor import CSSPreprocessor
@@ -39,7 +39,8 @@ class EpubConverter:

        # toc tree structure stored as adj.list (NavPoint to list of NavPoints)
        # key = -1 for top level NavPoints
-        self.adjacency_list: Dict[Union[NavPoint, -1], Union[list, None]] = {}
+        self.adjacency_list: Dict[Union[NavPoint, -1],
+                                  Union[List[NavPoint], None]] = {}

        # list to offset Chapter_i on 1st level
        self.offset_sub_nodes = []
@@ -70,7 +71,8 @@ class EpubConverter:
                                            BeautifulSoup] = self.build_href2soup_content()

        self.logger.log("CSS inline style processing.")
-        self.css_processor.process_inline_styles_in_html_soup(self.html_href2html_body_soup)
+        self.css_processor.process_inline_styles_in_html_soup(
+            self.html_href2html_body_soup)
        self.logger.log("CSS files processing.")
        self.html_href2css_href, self.css_href2css_content = self.build_html_and_css_relations()
        self.logger.log("CSS styles fusion(inline+file).")
@@ -107,7 +109,6 @@ class EpubConverter:
    def build_href2soup_content(self) -> Dict[str, BeautifulSoup]:
        # using EpubElements
        # for now just for HTML objects, as it is the simplest chapter
-
        nodes = dict()
        for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
            html_body_text = item.get_body_content()
@@ -116,7 +117,7 @@ class EpubConverter:
            nodes[item.file_name] = soup
        return nodes

-    def build_html_and_css_relations(self) -> tuple[dict, dict]:
+    def build_html_and_css_relations(self) -> Tuple[Dict[str, List[str]], Dict[str, str]]:
        """
        Function is designed to get 2 dictionaries:
        The first is html_href2css_href. It is created to connect href of html to css files(hrefs of them
@@ -130,8 +131,8 @@ class EpubConverter:

        """
        # dictionary: href of html to related css files
-        html_href2css_href: defaultdict = defaultdict(list)
-        css_href2css_content: dict = {}
+        html_href2css_href: Dict[str, List[str]] = defaultdict(list)
+        css_href2css_content: Dict[str, str] = {}

        for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
            html_content = item.content
@@ -213,7 +214,9 @@ class EpubConverter:
                    html_content, css)
                self.html_href2html_body_soup[html_href] = html_content

-    def build_adjacency_list_from_toc(self, element: [Link, tuple, list], lvl=0):
+    def build_adjacency_list_from_toc(self,
+                                      element: Union[Link, Tuple[Section, List], List[Union[Link, Tuple]]],
+                                      lvl: int = 0) -> NavPoint:
        """
        Function
        self.adjacency_list builds based on TOC nested structure, got from self.ebooklib.toc
@@ -304,7 +307,7 @@ class EpubConverter:
            self.adjacency_list[-1].append(nav_point)
            self.hrefs_added_to_toc.add(nav_point.href)

-    def add_not_added_files_to_adjacency_list(self, not_added: list):
+    def add_not_added_files_to_adjacency_list(self, not_added: List[str]):
        """Function add files that not added to adjacency list"""
        for i, file in enumerate(not_added):
            nav_point = NavPoint(
@@ -315,7 +318,7 @@ class EpubConverter:
    def label_subchapters_with_lc_tag(self):
        for html_href in self.html_href2html_body_soup:
            ids, soup = self.html_href2subchapters_ids[html_href], \
-                  self.html_href2html_body_soup[html_href]
+                self.html_href2html_body_soup[html_href]
            for i in ids:
                tag = soup.find(id=i)
                tmp_tag = soup.new_tag("lc_tmp")
@@ -345,10 +348,13 @@ class EpubConverter:
                    mark.parent.unwrap()

    @staticmethod
-    def create_unique_id(href, id_):
+    def create_unique_id(href: str, id_: str) -> str:
        return re.sub(r"([^\w\s])|_|-", "", href) + re.sub(r"[_-]", "0", id_)

-    def match_href_to_path_from_toc(self, cur_file_path: str, href_in_link: str, internal_link_tag: Tag) -> [None, str]:
+    def match_href_to_path_from_toc(self,
+                                    cur_file_path: str,
+                                    href_in_link: str,
+                                    internal_link_tag: Tag) -> Union[None, str]:
        """
        Function used to find full path to file that is parsed from tag link
        TOC: a/b/c.xhtml
@@ -387,7 +393,7 @@ class EpubConverter:
        return full_path[0]

    @staticmethod
-    def create_new_anchor_span(soup, id_):
+    def create_new_anchor_span(soup: BeautifulSoup, id_: str) -> Tag:
        new_anchor_span = soup.new_tag("span")
        new_anchor_span.attrs["id"] = id_
        new_anchor_span.attrs["class"] = "link-anchor"
@@ -415,7 +421,8 @@ class EpubConverter:
            for toc_href in self.hrefs_added_to_toc:
                for tag in self.html_href2html_body_soup[toc_href].find_all(attrs={"id": re.compile(r".+")}):
                    if tag.attrs.get("class") not in ["converter-chapter-mark", "footnote-element"]:
-                        new_id = self.create_unique_id(toc_href, tag.attrs["id"])
+                        new_id = self.create_unique_id(
+                            toc_href, tag.attrs["id"])
                        tag.attrs["id"] = new_id

        def process_file_anchor():
@@ -427,11 +434,13 @@ class EpubConverter:
                    a_tag_href_matched_to_toc = self.match_href_to_path_from_toc(
                        toc_href, a_tag_href, internal_link_tag)
                    if a_tag_href_matched_to_toc:
-                        new_id = self.create_unique_id(a_tag_href_matched_to_toc, "")
+                        new_id = self.create_unique_id(
+                            a_tag_href_matched_to_toc, "")
                        internal_link_tag.attrs["placeholder"] = "{{tempStyleToAnchor-" + new_id + "}}"
                        if new_id not in self.internal_anchors:
                            anchor_soup = self.html_href2html_body_soup[a_tag_href_matched_to_toc]
-                            new_anchor_span = self.create_new_anchor_span(soup, new_id)
+                            new_anchor_span = self.create_new_anchor_span(
+                                soup, new_id)
                            # insert a new span to the beginning of the file
                            anchor_soup.insert(0, new_anchor_span)
                            self.internal_anchors.add(new_id)
@@ -442,7 +451,8 @@ class EpubConverter:
                soup = self.html_href2html_body_soup[toc_href]
                # process_file_element_anchor
                for internal_link_tag in soup.find_all("a", {"href": re.compile(r"(^.+\.(htm|html|xhtml)#.+)|(^#.+)")}):
-                    a_tag_href, a_tag_id = internal_link_tag.attrs["href"].split("#")
+                    a_tag_href, a_tag_id = internal_link_tag.attrs["href"].split(
+                        "#")
                    a_tag_href_matched_to_toc = self.match_href_to_path_from_toc(
                        toc_href, a_tag_href, internal_link_tag) if a_tag_href \
                        else path.normpath(toc_href).replace("\\", "/")
@@ -452,7 +462,8 @@ class EpubConverter:

                        anchor_soup = self.html_href2html_body_soup[a_tag_href_matched_to_toc]
                        anchor_tags = anchor_soup.find_all(attrs={"id": new_id}) or \
-                                      anchor_soup.find_all(attrs={"id": a_tag_id})  # if link is a footnote
+                            anchor_soup.find_all(
+                                attrs={"id": a_tag_id})  # if link is a footnote
                        if anchor_tags:
                            if len(anchor_tags) > 1:
                                self.logger.log(f"Warning in {toc_href}: multiple anchors:"
@@ -487,7 +498,9 @@ class EpubConverter:
        process_file_element_anchor()

    @staticmethod
-    def get_tags_between_chapter_marks(first_id: str, href: str, html_soup: BeautifulSoup) -> list:
+    def get_tags_between_chapter_marks(first_id: str,
+                                       href: str,
+                                       html_soup: BeautifulSoup) -> List[Union[Tag, NavigableString]]:
        """
        Get tags between LiveCarta chapter marks
        Parameters
@@ -568,7 +581,7 @@ class EpubConverter:
            for tl_nav_point in top_level_nav_points:
                self.detect_one_chapter(tl_nav_point)

-    def html_node_to_livecarta_chapter_item(self, nav_point: NavPoint, lvl=1) -> ChapterItem:
+    def html_node_to_livecarta_chapter_item(self, nav_point: NavPoint, lvl: int = 1) -> ChapterItem:
        """
        Function prepare style, tags to json structure
        Parameters
@@ -584,18 +597,18 @@ class EpubConverter:
            built chapter

        """
-        title = nav_point.title
+        title: str = nav_point.title
        content: BeautifulSoup = self.href_chapter_id2soup_html[(nav_point.href, nav_point.id)] \
            if nav_point.id else self.html_href2html_body_soup[nav_point.href]

-        indent = " " * lvl
+        indent: str = " " * lvl
        self.logger.log(indent + f"Chapter: {title} is processing.")
-        is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
+        is_chapter: bool = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
        self.logger.log(indent + "Process title.")
-        title_preprocessed = self.html_processor.prepare_title(title)
+        title_preprocessed: str = self.html_processor.prepare_title(title)
        self.logger.log(indent + "Process content.")
-        content_preprocessed = self.html_processor.prepare_content(title_preprocessed, content,
-                                                                   remove_title_from_chapter=is_chapter)
+        content_preprocessed: BeautifulSoup = self.html_processor.prepare_content(
+            title_preprocessed, content, remove_title_from_chapter=is_chapter)

        self.book_image_src_path2aws_path = update_images_src_links(content_preprocessed,
                                                                    self.img_href2img_bytes,
@@ -613,7 +626,7 @@ class EpubConverter:
                sub_nodes.append(sub_chapter_item)
        return ChapterItem(title_preprocessed, str(content_preprocessed), sub_nodes)

-    def convert_to_dict(self) -> dict:
+    def convert_to_dict(self) -> Dict[str, List[Dict[str, Union[List, str]]]]:
        """Function which convert list of html nodes to appropriate json structure"""
        top_level_nav_points = self.adjacency_list[-1]
        top_level_chapters = []
@@ -633,7 +646,7 @@ class EpubConverter:


 if __name__ == "__main__":
-    epub_file_path = "../../books/epub/9780763774134.epub"
+    epub_file_path = "../../books/epub/9781119646044.epub"
    logger_object = BookLogger(
        name="epub", book_id=epub_file_path.split("/")[-1])

--- a/src/epub_converter/epub_solver.py
+++ b/src/epub_converter/epub_solver.py
@@ -7,7 +7,7 @@ from src.epub_converter.epub_converter import EpubConverter
 class EpubBook(BookSolver):
    """Class of .epub type book - child of BookSolver"""

-    def __init__(self, book_id=0, access=None, main_logger=None):
+    def __init__(self, book_id: int = 0, access=None, main_logger=None):
        super().__init__(book_id, access, main_logger)
        self.book_type = "epub"

@@ -28,7 +28,8 @@ class EpubBook(BookSolver):

        """
        css_processor = CSSPreprocessor()
-        html_processor = HtmlEpubPreprocessor(self.preset_path, logger=self.logger_object)
+        html_processor = HtmlEpubPreprocessor(
+            self.preset_path, logger=self.logger_object)
        json_converter = EpubConverter(
            self.book_path, access=self.access, logger=self.logger_object,
            css_processor=css_processor, html_processor=html_processor)
--- a/src/epub_converter/footnotes_processing.py
+++ b/src/epub_converter/footnotes_processing.py
@@ -1,5 +1,5 @@
 import re
-from typing import Tuple
+from typing import List, Tuple
 from bs4 import BeautifulSoup, Tag


@@ -16,8 +16,8 @@ def _replace_with_livecarta_anchor_tag(anchor, i):
    return new_tag


-def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name="epub:type") \
-        -> Tuple[list, list, list]:
+def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name: str = "epub:type") \
+        -> Tuple[List, List, List]:
    """
    This function preprocessing footnotes
    This function should be earlier that adding fonts in pipeline.
@@ -87,5 +87,4 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
        noteref.attrs["data-id"] = i + 1
        noteref.attrs["id"] = f"footnote-{i + 1}"
        footnote.attrs["href"] = f"#footnote-{i + 1}"
-
    return footnotes, new_noterefs_tags, new_footnotes_tags
--- a/src/epub_converter/html_epub_processor.py
+++ b/src/epub_converter/html_epub_processor.py
@@ -1,14 +1,16 @@
 import re
 import json
-from bs4 import BeautifulSoup, NavigableString, Comment, Tag
+from typing import List, Dict, Union
+from bs4 import BeautifulSoup, Tag, NavigableString, Comment
+from bs4.element import PageElement

 from src.util.helpers import BookLogger


 class HtmlEpubPreprocessor:
-    def __init__(self, preset_path="../../presets/presets.json", logger=None):
+    def __init__(self, preset_path: str = "../../presets/presets.json", logger: BookLogger = None):
        self.preset = json.load(open(preset_path))
-        self.logger: BookLogger = logger
+        self.logger = logger
        self.name2function = {
            "table_wrapper": self._wrap_tags_with_table,
            "replacer": self._tags_to_correspond_livecarta_tag,
@@ -18,33 +20,37 @@ class HtmlEpubPreprocessor:
        }

    @staticmethod
-    def _add_span_to_save_ids_for_links(tag_to_be_removed, chapter_tag: BeautifulSoup):
+    def _add_span_to_save_ids_for_links(tag_to_be_removed: Union[PageElement, BeautifulSoup],
+                                        chapter_tag: BeautifulSoup):
        """
        Function adds span with id from tag_to_be_removed
        because this tag will be removed(unwrapped/extract)
        Parameters
        ----------
-        tag_to_be_removed: Soup object
+        tag_to_be_removed: Union[PageElement, BeautifulSoup]
+
        chapter_tag: BeautifulSoup

        Returns
        -------
-        None
+        NoReturn
            updated body tag

        """
-
-        def _insert_span_with_attrs_before_tag(chapter_tag: BeautifulSoup, tag_to_be_removed: Tag, id_: str,
-                                               class_: list):
+        def _insert_span_with_attrs_before_tag(chapter_tag: BeautifulSoup,
+                                               tag_to_be_removed: Tag,
+                                               id_: str,
+                                               class_: Union[List[str], str]):
            """Function inserts span before tag aren't supported by LiveCarta"""
-            new_tag = chapter_tag.new_tag("span")
+            new_tag: Tag = chapter_tag.new_tag("span")
            new_tag.attrs["id"] = id_ or ""
            new_tag.attrs["class"] = class_ or ""
            new_tag.string = "\xa0"
            tag_to_be_removed.insert_before(new_tag)

        if tag_to_be_removed.attrs.get("id"):
-            _insert_span_with_attrs_before_tag(chapter_tag=chapter_tag, tag_to_be_removed=tag_to_be_removed,
+            _insert_span_with_attrs_before_tag(chapter_tag=chapter_tag,
+                                               tag_to_be_removed=tag_to_be_removed,
                                               id_=tag_to_be_removed.attrs["id"],
                                               class_=tag_to_be_removed.attrs.get("class"))

@@ -78,7 +84,7 @@ class HtmlEpubPreprocessor:

        Returns
        -------
-        None
+        NoReturn
            Chapter Tag without comments

        """
@@ -110,27 +116,32 @@ class HtmlEpubPreprocessor:
                    p_tag.append(str(node))
                    node.replace_with(p_tag)

-    def _wrap_tags_with_table(self, chapter_tag: BeautifulSoup, rules: list):
+    def _wrap_tags_with_table(self,
+                              chapter_tag: BeautifulSoup,
+                              rules: List[Dict[str, List[Union[str, Dict[str, str]]]]]):
        """
        Function wraps <tag> with <table>
        Parameters
        ----------
        chapter_tag: BeautifulSoup
            Tag & contents of the chapter tag
+        rules: List[Dict[str, List[str, Dict[str, str]]]]
+            list of conditions when fire function

        Returns
        -------
-        None
+        NoReturn
            Chapter Tag with wrapped certain tags with <table>

        """

-        def _wrap_tag_with_table(width="100", border="", bg_color=None):
+        def _wrap_tag_with_table(width: str = "100", border: str = "", bg_color: str = None) -> Tag:
            table = chapter_tag.new_tag("table")
            table.attrs["border"], table.attrs["align"], table.attrs["style"] \
                = border, "center", f"width:{width}%;"
            tbody, tr, td = \
-                chapter_tag.new_tag("tbody"), chapter_tag.new_tag("tr"), chapter_tag.new_tag("td")
+                chapter_tag.new_tag("tbody"), chapter_tag.new_tag(
+                    "tr"), chapter_tag.new_tag("td")
            td.attrs["bgcolor"] = bg_color
            tag_to_wrap.wrap(td)
            td.wrap(tr)
@@ -141,8 +152,10 @@ class HtmlEpubPreprocessor:

        def process_tag_using_table():
            _wrap_tag_with_table(
-                width=tag_to_wrap.attrs["width"] if tag_to_wrap.attrs.get("width") else "100",
-                border=tag_to_wrap.attrs["border"] if tag_to_wrap.attrs.get("border") else None,
+                width=tag_to_wrap.attrs["width"] if tag_to_wrap.attrs.get(
+                    "width") else "100",
+                border=tag_to_wrap.attrs["border"] if tag_to_wrap.attrs.get(
+                    "border") else None,
                bg_color=tag_to_wrap.attrs["bgcolor"] if tag_to_wrap.attrs.get("bgcolor") else None)
            self._add_span_to_save_ids_for_links(tag_to_wrap, chapter_tag)
            tag_to_wrap.unwrap()
@@ -155,23 +168,26 @@ class HtmlEpubPreprocessor:
                    process_tag_using_table()

    @staticmethod
-    def _tags_to_correspond_livecarta_tag(chapter_tag: BeautifulSoup, rules: list):
+    def _tags_to_correspond_livecarta_tag(chapter_tag: BeautifulSoup,
+                                          rules: List[Dict[str, Union[List[str], str, int, Dict[str, Union[str, int]]]]]):
        """
        Function to replace all tags to correspond LiveCarta tags
        Parameters
        ----------
        chapter_tag: BeautifulSoup
            Tag & contents of the chapter tag
+        rules: List[Dict[str, Union[List[str], str, int, Dict[str, Union[str, int]]]]]
+            list of conditions when fire function

        Returns
        -------
-        None
+        NoReturn
            Chapter Tag with all tags replaced with LiveCarta tags

        """
        for rule in rules:
-            tags = rule["tags"]
-            tag_to_replace = rule["tag_to_replace"]
+            tags: List[str] = rule["tags"]
+            tag_to_replace: str = rule["tag_to_replace"]
            if rule["condition"]:
                for condition_on_tag in ((k, v) for k, v in rule["condition"].items() if v):
                    if condition_on_tag[0] == 'parent_tags':
@@ -193,40 +209,44 @@ class HtmlEpubPreprocessor:
                    tag.name = tag_to_replace

    @staticmethod
-    def _replace_attrs_in_tags(chapter_tag: BeautifulSoup, rules: list):
+    def _replace_attrs_in_tags(chapter_tag: BeautifulSoup, rules: List[Dict[str, Union[str, Dict[str, List[str]]]]]):
        """
        Function to replace all tags to correspond LiveCarta tags
        Parameters
        ----------
        chapter_tag: BeautifulSoup
            Tag & contents of the chapter tag
+        rules: List[Dict[str, Union[str, Dict[str, List[str]]]]]
+            list of conditions when fire function

        Returns
        -------
-        None
+        NoReturn
            Chapter Tag with all tags replaced with LiveCarta tags

        """
        for rule in rules:
            attr = rule["attr"]
-            tags = rule["condition"]["tags"]
+            tags: List[str] = rule["condition"]["tags"]
            attr_to_replace = rule["attr_to_replace"]
            for tag in chapter_tag.find_all([re.compile(tag) for tag in tags],
-                                                            {attr: re.compile(r".*")}):
+                                            {attr: re.compile(r".*")}):
                tag[attr_to_replace] = tag[attr]
                del tag[attr]

-    def _unwrap_tags(self, chapter_tag: BeautifulSoup, rules: dict):
+    def _unwrap_tags(self, chapter_tag: BeautifulSoup, rules: Dict[str, List[str]]):
        """
        Function unwrap tags and moves id to span
        Parameters
        ----------
        chapter_tag: BeautifulSoup
            Tag & contents of the chapter tag
+        rules: Dict[str, List[str]]
+            dict of tags to unwrap

        Returns
        -------
-        None
+        NoReturn
            Chapter Tag with unwrapped certain tags

        """
@@ -239,21 +259,23 @@ class HtmlEpubPreprocessor:
                tag.unwrap()

    @staticmethod
-    def _insert_tags_into_correspond_tags(chapter_tag: BeautifulSoup, rules: list):
+    def _insert_tags_into_correspond_tags(chapter_tag: BeautifulSoup, rules: List[Dict[str, Union[List[str], str, Dict[str, Union[str, int]]]]]):
        """
        Function inserts tags into correspond tags
        Parameters
        ----------
        chapter_tag: BeautifulSoup
            Tag & contents of the chapter tag
+        rules: List[Dict[str, Union[List[str], str, Dict[str, Union[str, int]]]]]
+            list of conditions when fire function

        Returns
        -------
-        None
+        NoReturn
            Chapter Tag with inserted tags

        """
-        def insert(tag):
+        def insert(tag: Tag):
            tag_to_insert = \
                chapter_tag.new_tag(rule["tag_to_insert"])
            # insert all items that was in tag to subtag and remove from tag
@@ -263,7 +285,7 @@ class HtmlEpubPreprocessor:
            tag.append(tag_to_insert)

        for rule in rules:
-            tags = rule["tags"]
+            tags: List[str] = rule["tags"]
            if rule["condition"]:
                for condition_on_tag in ((k, v) for k, v in rule["condition"].items() if v):
                    if condition_on_tag[0] == 'parent_tags':
@@ -283,29 +305,28 @@ class HtmlEpubPreprocessor:
                for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
                    insert(tag)

-    def _remove_headings_content(self, chapter_tag, title_of_chapter: str):
+    def _remove_headings_content(self, chapter_tag: Union[BeautifulSoup, PageElement], title_of_chapter: str):
        """
        Function
        - cleans/removes headings from chapter in order to avoid duplication of chapter titles in the content
        - adds span with id in order to
        Parameters
        ----------
-        chapter_tag: soup object
+        chapter_tag: Union[BeautifulSoup, PageElement]
            Tag of the page
        title_of_chapter: str
            Chapter title

        Returns
        -------
-        None
+        NoReturn
            clean/remove headings & add span with id

        """
        title_of_chapter = title_of_chapter.lower()
-        if title_of_chapter == "chapter 1":
-            pass
        for tag in chapter_tag.contents:
-            text = tag if isinstance(tag, NavigableString) else tag.text
+            tag: PageElement
+            text: str = tag if isinstance(tag, NavigableString) else tag.text
            if re.sub(r"[\s\xa0]", "", text):
                text = re.sub(r"[\s\xa0]", " ", text).lower()
                text = text.strip()  # delete extra spaces
@@ -333,7 +354,7 @@ class HtmlEpubPreprocessor:

        Returns
        -------
-        None
+        NoReturn
            Chapter Tag with processed tables

        """
@@ -370,7 +391,7 @@ class HtmlEpubPreprocessor:

        Returns
        -------
-        None
+        NoReturn
            Chapter Tag without original classes of the book

        """
@@ -413,9 +434,9 @@ class HtmlEpubPreprocessor:
        # 2.
        self._wrap_strings_with_p(content_tag)
        # 3-6.
-        for dict in self.preset:
-            func = self.name2function[dict["preset_name"]]
-            func(content_tag, dict['rules'])
+        for rule in self.preset:
+            func = self.name2function[rule["preset_name"]]
+            func(content_tag, rule['rules'])
        # 7.
        if remove_title_from_chapter:
            self._remove_headings_content(content_tag, title_str)
--- a/src/epub_converter/image_processing.py
+++ b/src/epub_converter/image_processing.py
@@ -1,13 +1,14 @@
 import os
 import pathlib
+from typing import Dict
 from bs4 import BeautifulSoup

 from src.access import Access


-def save_image_to_aws(access: Access, img_file_path: str, img_content: bytes, book_id: str):
+def save_image_to_aws(access: Access, img_file_path: str, img_content: bytes, book_id: str) -> str:
    """Function saves all images to Amazon web service"""
-    link_path = access.send_image(
+    link_path: str = access.send_image(
        img_file_path, doc_id=book_id, img_content=img_content)
    return link_path

@@ -27,11 +28,11 @@ def save_image_locally(img_file_path: str, img_content: bytes, book_id: str):


 def update_images_src_links(body_tag: BeautifulSoup,
-                            img_href2img_content: dict,
+                            img_href2img_content: Dict[str, bytes],
                            path_to_html: str,
-                            access=None,
-                            path2aws_path: dict = None,
-                            book_id: str = None) -> dict:
+                            access: Access = None,
+                            path2aws_path: Dict[str, str] = None,
+                            book_id: str = None) -> Dict[str, str]:
    """Function makes dictionary image_src_path -> Amazon web service_path"""
    img_tags = body_tag.find_all("img")
    for img in img_tags:
@@ -43,7 +44,7 @@ def update_images_src_links(body_tag: BeautifulSoup,
        assert path_to_img_from_root in img_href2img_content, \
            f"Image {path_to_img_from_html} in file {path_to_html} was not added to manifest."

-        img_content = img_href2img_content[path_to_img_from_root]
+        img_content: bytes = img_href2img_content[path_to_img_from_root]
        if access is not None:
            if path_to_img_from_root in path2aws_path:
                new_folder = path2aws_path[path_to_img_from_root]
--- a/src/epub_converter/tag_inline_style_processor.py
+++ b/src/epub_converter/tag_inline_style_processor.py
@@ -1,9 +1,8 @@
 import re
 import cssutils
 from typing import List
-
 from logging import CRITICAL
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag

 from src.livecarta_config import LiveCartaConfig

@@ -11,13 +10,13 @@ cssutils.log.setLevel(CRITICAL)


 class TagInlineStyleProcessor:
-    def __init__(self, tag_inline_style):
+    def __init__(self, tag_inline_style: Tag):
        # tag with inline style + style parsed from css file
        self.tag_inline_style = tag_inline_style
-        self.tag_inline_style.attrs['style'] = self.process_inline_style()
+        self.tag_inline_style.attrs['style']: str = self.process_inline_style()

    @staticmethod
-    def remove_white_if_no_bgcolor(style_, tag):
+    def remove_white_if_no_bgcolor(style_: str, tag: Tag) -> str:
        """Function remove text white color if there is no bg color"""
        if "background" in style_:
            style_ = style_.replace(
@@ -62,13 +61,13 @@ class TagInlineStyleProcessor:
    #     return split_style

    @staticmethod
-    def indents_processing(split_style: list) -> str:
+    def indents_processing(split_style: List[str]) -> str:
        """
        Function process indents from left using
        formula_of_indent: indent = abs(margin - text_indent)
        Parameters
        ----------
-         split_style: list
+         split_style: List[str]
             list of styles split by ";"

         Returns
@@ -111,7 +110,7 @@ class TagInlineStyleProcessor:
            return processed_style
        return processed_style

-    def process_inline_style(self):
+    def process_inline_style(self) -> str:
        """
        Function processes final(css+initial inline) inline style
        Steps
@@ -180,7 +179,7 @@ class TagInlineStyleProcessor:
            self.tag_inline_style.append(correspond_tag)

    @staticmethod
-    def wrap_span_in_tag_to_save_style_attrs(initial_tag):
+    def wrap_span_in_tag_to_save_style_attrs(initial_tag: Tag):
        """Function designed to save style attrs that cannot be in tag.name -> span"""
        dictkeys_pattern = re.compile("|".join(LiveCartaConfig.LIVECARTA_STYLES_CAN_BE_IN_TAG))
        if re.findall(dictkeys_pattern, initial_tag.name) and initial_tag.attrs.get("style"):
@@ -212,7 +211,7 @@ class TagInlineStyleProcessor:
                initial_tag.attrs["style"] = span_style
                initial_tag.wrap(tag)

-    def convert_initial_tag(self):
+    def convert_initial_tag(self) -> Tag:
        self.change_attrs_with_corresponding_tags()
        self.wrap_span_in_tag_to_save_style_attrs(self.tag_inline_style)
        return self.tag_inline_style