Function annotations

2022-04-29 17:44:07 +03:00
parent 8de1d0d042
commit 37533e9b67
5 changed files with 187 additions and 130 deletions
--- a/src/data_objects.py
+++ b/src/data_objects.py
@@ -38,7 +38,7 @@ class NavPoint:
 def flatten(x):
-    """magic function from stackoverflow for list flattening"""
+    """Magic function from stackoverflow for list flattening"""
    atom = lambda i: not isinstance(i, list)
    nil = lambda i: not i
    car = lambda i: i[0]
--- a/src/epub_converter/css_reader.py
+++ b/src/epub_converter/css_reader.py
@@ -28,24 +28,27 @@ list_types = ['circle', 'disc', 'armenian', 'decimal',
              'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none']
-def convert_tag_values(value):
+def convert_tag_values(value: str) -> str:
-    """Function 1. converts values of tags from em/%/pt to px
+    """
-                2. find closest font-size px
+    Function
    - converts values of tags from em/%/pt to px
    - find closest font-size px
    Parameters
    ----------
    value: str
    Returns
    -------
-    converted value: str
+    value: str
    """
    """
    def find_closest_size(value):
        possible_sizes = list(takewhile(lambda x: value > x, sizes_pr))
        last_possible_size_index = sizes_pr.index(possible_sizes[-1])
        return sizes_px[last_possible_size_index]
-    font_size_regexp = re.compile(r'(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)')
+    font_size_regexp = re.compile(
        r'(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)')
    has_style_attrs = re.search(font_size_regexp, value)
    if has_style_attrs:
        if has_style_attrs.group(1):
@@ -61,7 +64,6 @@ def convert_tag_values(value):
    return value
 """
 Dictionary LIVECARTA_STYLE_ATTRS = { css property: value }
 Style properties that can be used to fit livecarta css style convention.
@@ -164,17 +166,20 @@ LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
 def check_style_to_be_tag(style) -> List[tuple]:
-    """Function search style properties that can be converted to tags.
+    """
    Function searches style properties that can be converted to tags.
    It searches for them and prepare list of properties to be removed from style string
    Parameters
    ----------
    style: str
        <tag style="...">
    Returns
    -------
-    properties to remove: list
+    to_remove: list
-    """
+        properties to remove
    """
    to_remove = []
    for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG:
        if f'{k[0]}:{k[1]}' in style:
@@ -227,7 +232,7 @@ class TagStyleConverter:
    @staticmethod
    def remove_white_if_no_bgcolor(style_, tag):
-        """ Function remove white color if there is no text bg color """
+        """Function remove text white color if there is no bg color"""
        if 'background' in style_:
            return style_
@@ -264,9 +269,11 @@ class TagStyleConverter:
            item = item.split(':')
            if item[0] in ['text-indent', 'margin-left', 'margin']:
                if len(item[1].split(' ')) == 3:
-                    item[1] = convert_tag_values(item[1].split(' ')[-2]) # split returns middle value
+                    item[1] = convert_tag_values(item[1].split(
                        ' ')[-2])  # split returns middle value
                else:
-                    item[1] = convert_tag_values(item[1].split(' ')[-1]) # split returns last value
+                    item[1] = convert_tag_values(item[1].split(
                        ' ')[-1])  # split returns last value
            clean_style += item[0] + ': ' + item[1] + '; '
        margin_left_regexp = re.compile(
@@ -360,7 +367,7 @@ class TagStyleConverter:
            s = f'{attr}:{value};'
            self.style = self.style.replace(s, '')
            self.style = self.style.strip()
-            if i == 0:
+            if not i:
                self.tag_with_inline_style.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(
                    attr, value)]
                new_tags.append(self.tag_with_inline_style)
@@ -402,7 +409,6 @@ class TagStyleConverter:
                if has_p_style_attrs:
                    p_style += item + ';'
                    initial_style = initial_style.replace(item + ';', '')
            # here check that this style i exactly the same. Not 'align' when we have 'text-align', or 'border' when we have 'border-top'
            styles_to_be_saved_in_span = [((attr + ':') in initial_style) & (
                '-' + attr not in initial_style) for attr in styles_cant_be_in_p]
@@ -410,14 +416,15 @@ class TagStyleConverter:
                # if find styles that cannot be in <p> -> wrap them in span
                tag.name = 'span'
                p_tag = BeautifulSoup(features='lxml').new_tag('p')
-                li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
+                p_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
-                has_li_style_attr = re.search(li_attrs_regexp, initial_style)
+                has_p_style_attr = re.search(p_attrs_regexp, initial_style)
-                span_style = initial_style if not has_li_style_attr else initial_style.replace(
+                span_style = initial_style if not has_p_style_attr else initial_style.replace(
-                    has_li_style_attr.group(1), '')
+                    has_p_style_attr.group(1), '')
                p_tag.attrs['style'] = p_style
                tag.attrs['style'] = span_style
                tag.wrap(p_tag)
-            else: tag.attrs['style'] = p_style
+            else:
                tag.attrs['style'] = p_style
    @staticmethod
    def wrap_span_in_li_to_save_style_attrs(tag):
@@ -426,14 +433,13 @@ class TagStyleConverter:
            styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
                                    attr not in ['text-align', 'list-style-type']]
-            styles_to_be_saved = [attr in tag.attrs.get(
+            styles_to_be_saved_in_span = [attr in tag.attrs.get(
                'style') for attr in styles_cant_be_in_li]
-            if any(styles_to_be_saved):
+            if any(styles_to_be_saved_in_span):
                tag.name = 'span'
                li_tag = BeautifulSoup(features='lxml').new_tag('li')
                span_style = tag.attrs['style']
                li_style = ''
                for possible_li_attrs_regexp in [re.compile(r'(text-align:(\w+);)'),
                                                 re.compile(r'(list-style-type:(\w+);)')]:
                    has_li_style_attrs = re.search(
@@ -442,7 +448,6 @@ class TagStyleConverter:
                        li_style += has_li_style_attrs.group(1)
                        span_style = span_style.replace(
                            has_li_style_attrs.group(1), '')
                li_tag.attrs['style'] = li_style
                tag.attrs['style'] = span_style
                tag.wrap(li_tag)
@@ -454,23 +459,23 @@ class TagStyleConverter:
            styles_cant_be_in_ul_ol = [
                attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
-            check = [attr in tag.attrs.get('style')
+            styles_to_be_saved_in_span = [attr in tag.attrs.get('style')
                                          for attr in styles_cant_be_in_ul_ol]
-            if any(check):
+            if any(styles_to_be_saved_in_span):
                tag.name = 'span'
-                li_tag = BeautifulSoup(features='lxml').new_tag('ul')
+                oul_tag = BeautifulSoup(features='lxml').new_tag(tag.name)
                span_style = tag.attrs['style']
-                possible_li_attrs_regexp = re.compile(
+                possible_uol_attrs_regexp = re.compile(
                    r'(list-style-type:(\w+);)')
-                has_li_style_attrs = re.search(
+                has_uol_style_attrs = re.search(
-                    possible_li_attrs_regexp, span_style)
+                    possible_uol_attrs_regexp, span_style)
-                if has_li_style_attrs and has_li_style_attrs.group(1):
+                if has_uol_style_attrs and has_uol_style_attrs.group(1):
-                    oul_style = has_li_style_attrs.group(1)
+                    oul_style = has_uol_style_attrs.group(1)
                    span_style = span_style.replace(oul_style, '')
-                    li_tag.attrs['style'] = oul_style
+                    oul_tag.attrs['style'] = oul_style
                tag.attrs['style'] = span_style
-                tag.wrap(li_tag)
+                tag.wrap(oul_tag)
    @staticmethod
    def wrap_span_in_h_to_save_style_attrs(tag):
@@ -482,10 +487,10 @@ class TagStyleConverter:
            tag.name = 'span'
            tag.wrap(h_tag)
            style = tag.attrs['style']
-            li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
+            h_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
-            has_li_style_attr = re.search(li_attrs_regexp, style)
+            has_h_style_attr = re.search(h_attrs_regexp, style)
-            tag.attrs['style'] = style if not has_li_style_attr else style.replace(
+            tag.attrs['style'] = style if not has_h_style_attr else style.replace(
-                has_li_style_attr.group(1), '')
+                has_h_style_attr.group(1), '')
    def convert_initial_tag(self):
        self.tag_with_inline_style = self.change_attrs_with_corresponding_tags()
@@ -496,7 +501,7 @@ class TagStyleConverter:
        return self.tag_with_inline_style
-def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
+def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str) -> BeautifulSoup:
    """Function adds styles from .css to inline style"""
    css_text = css_text.replace(
        '@namespace epub "http://www.idpf.org/2007/ops";', '')
--- a/src/epub_converter/epub_converter.py
+++ b/src/epub_converter/epub_converter.py
@@ -1,7 +1,6 @@
 import re
 import json
 import codecs
 import logging
 import os
 from os.path import dirname, normpath, join
 from itertools import chain
@@ -51,7 +50,8 @@ class EpubConverter:
        # flag to be updated while ebooklib.toc is parsed
        self.id_anchor_exist_in_nav_points = False
        self.img_href2img_bytes = {}  # file path to bytes
-        self.book_image_src_path2aws_path = {}  # file path from <a> to generated aws path
+        # file path from <a> to generated aws path
        self.book_image_src_path2aws_path = {}
        self.footnotes_contents: List[str] = []  # to be sent on server as is
        self.noterefs: List[Tag] = []  # start of the footnote
        self.footnotes: List[Tag] = []  # end of the footnote
@@ -116,7 +116,6 @@ class EpubConverter:
        return nodes
    def get_css_content(self, css_href, html_href):
        path_to_css_from_html = css_href
        html_folder = dirname(html_href)
        path_to_css_from_root = normpath(
@@ -132,8 +131,8 @@ class EpubConverter:
        The first is css_href2css_content. It is created to connect href of css to content of css
        The second is html_href2css_href. It is created to connect href of html to css files(hrefs of them) which are used on this html
        ...2... = key2value
        """
        """
        # dictionary: href of html to related css files
        html_href2css_href: defaultdict = defaultdict(list)
        css_href2css_content: dict = {}
@@ -165,6 +164,7 @@ class EpubConverter:
        """
        This function is designed to update html_href2html_body_soup
        And add to html_inline_style css_style_content
        """
        for html_href in self.html_href2html_body_soup:
            if self.html_href2css_href.get(html_href):
@@ -191,8 +191,8 @@ class EpubConverter:
        :param element: [Link, tuple, list] - element that appears in TOC(usually parsed from nav.ncx)
        :param lvl: level of depth
        """
        """
        if isinstance(element, Link):
            nav_point = NavPoint(element)
            if nav_point.id:
@@ -215,7 +215,8 @@ class EpubConverter:
            sub_nodes = []
            for elem in second:
                if ('section' in first.title.lower() or 'part' in first.title.lower()) and lvl == 1:
-                    self.offset_sub_nodes.append(self.build_adjacency_list_from_toc(elem, lvl))
+                    self.offset_sub_nodes.append(
                        self.build_adjacency_list_from_toc(elem, lvl))
                else:
                    sub_nodes.append(
                        self.build_adjacency_list_from_toc(elem, lvl + 1))
@@ -239,7 +240,7 @@ class EpubConverter:
        else:
            assert 0, f'Error. Element is not tuple/Link/list instance: {type(element)}'
-    def is_toc_empty(self):
+    def is_toc_empty(self) -> bool:
        """Function checks is toc empty"""
        # there is no toc in ebook or no top chapters
        if (self.ebooklib_book.toc is None) or (self.adjacency_list.get(-1) is None):
@@ -295,19 +296,26 @@ class EpubConverter:
        new_anchor_span.string = "\xa0"
        return new_anchor_span
-    def match_href_to_path_from_toc(self, cur_file_path, href_in_link, internal_link_tag):
+    def match_href_to_path_from_toc(self, cur_file_path: str, href_in_link: str, internal_link_tag: Tag) -> str:
        """
        Function used to find full path to file that is parsed from tag link
        TOC: a/b/c.xhtml
        b/c.xhtml -> a/b/c.xhtml
        c.xhtml -> a/b/c.xhtml
        Parameters
        ----------
        cur_file_path: str
            path to current file with tag link
        href_in_link: str
            filename got from tag link, like file1.xhtml
        internal_link_tag: Tag
            tag object that is parsed now
-        Used to find full path to file that is parsed from tag link
+        Returns
        -------
        full_path[0]: s
            prepared content
        :param cur_file_path: path to current file with tag link
        :param href_in_link: filename got from tag link, like file1.xhtml
        :param internal_link_tag: tag object that is parsed now
        :return:
        """
        dir_name = os.path.dirname(cur_file_path)
        normed_path = os.path.normpath(os.path.join(
@@ -331,6 +339,12 @@ class EpubConverter:
        Function
        - processing internal links in a book
        - make ids unique
        Steps
        ----------
        1. rebuild ids to be unique in all documents
        2a. process anchor which is a whole xhtml file
        2b. process anchor which is an element in xhtml file
        """
        # 1. rebuild ids to be unique in all documents
        for toc_href in self.hrefs_added_to_toc:
@@ -344,7 +358,7 @@ class EpubConverter:
                new_id = self.create_unique_id(toc_href, tag.attrs['id'])
                tag.attrs['id'] = new_id
-        # 2.a) process anchor which is a whole xhtml file
+        # 2a. process anchor which is a whole xhtml file
        internal_link_reg1 = re.compile(
            r'(^(?!https?://).+\.(htm|html|xhtml)$)')
        for toc_href in self.hrefs_added_to_toc:
@@ -367,7 +381,7 @@ class EpubConverter:
                del internal_link_tag.attrs['href']
-        # 2.b) process anchor which is an element in xhtml file
+        # 2b. process anchor which is an element in xhtml file
        internal_link_reg2 = re.compile(r'(^.+\.(htm|html|xhtml)\#.+)|(^\#.+)')
        for toc_href in self.hrefs_added_to_toc:
            soup = self.html_href2html_body_soup[toc_href]
@@ -418,9 +432,9 @@ class EpubConverter:
                                    f' Should be anchor with new id={new_id} in {a_tag_href_matched_to_toc} file.'
                                    f' Old id={a_tag_id}')
-    def build_one_chapter(self, nav_point):
+    def build_one_chapter(self, nav_point: NavPoint):
        """
-        Updates self.href_chapter_id2soup_html (mapping from (href,id) to chapter content/html soup object)
+        Function updates self.href_chapter_id2soup_html (mapping from (href,id) to chapter content/html soup object)
        3 cases:
            id wraps all chapter content,
@@ -429,7 +443,13 @@ class EpubConverter:
        In all cases we know where chapter starts. Therefore chapter is all tags between chapter's id
        and id of the next chapter/subchapter
        Parameters
        ----------
        nav_point: NavPoint
        Returns
        -------
        None
        """
        if nav_point.id:
            soup = self.html_href2html_body_soup[nav_point.href]
@@ -446,7 +466,7 @@ class EpubConverter:
                self.build_one_chapter(sub_node)
    def define_chapters_content(self):
-        """ Function build chapters content starts from top level chapters """
+        """Function build chapters content, starts from top level chapters"""
        top_level_nav_points = self.adjacency_list[-1]
        if self.id_anchor_exist_in_nav_points:
            for point in top_level_nav_points:
@@ -483,8 +503,8 @@ class EpubConverter:
            self.logger.log(f'{indent}Chapter: {title} is prepared.')
        return ChapterItem(title_preprocessed, content_preprocessed, sub_nodes)
-    def convert_to_dict(self):
+    def convert_to_dict(self) -> dict:
-        """ Function which convert list of html nodes to appropriate json structure. """
+        """Function which convert list of html nodes to appropriate json structure"""
        top_level_nav_points = self.adjacency_list[-1]
        top_level_chapters = []
@@ -502,7 +522,7 @@ class EpubConverter:
 if __name__ == "__main__":
-    filename = '9781641051217'
+    filename = '9781614382264'
    logger_object = BookLogger(name='epub', book_id=filename)
    json_converter = EpubConverter(f'../../epub/{filename}.epub',
--- a/src/epub_converter/html_epub_preprocessor.py
+++ b/src/epub_converter/html_epub_preprocessor.py
@@ -9,7 +9,7 @@ from src.access import Access
 from src.livecarta_config import LiveCartaConfig
-def save_image_locally(img_file_path, img_content, book_id):
+def save_image_locally(img_file_path: str, img_content: bytes, book_id: str):
    """Function saves all images locally"""
    folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    new_path = pathlib.Path(os.path.join(
@@ -24,19 +24,19 @@ def save_image_locally(img_file_path, img_content, book_id):
    return new_img_path
-def save_image_to_aws(access: Access, img_file_path, img_content: bytes, book_id):
+def save_image_to_aws(access: Access, img_file_path: str, img_content: bytes, book_id: str):
    """Function saves all images to Amazon web service"""
    link_path = access.send_image(
        img_file_path, doc_id=book_id, img_content=img_content)
    return link_path
-def update_images_src_links(body_tag: Tag,
+def update_images_src_links(body_tag: BeautifulSoup,
                            href2img_content: dict,
-                            path_to_html,
+                            path_to_html: str,
                            access=None,
-                            path2aws_path=None,
+                            path2aws_path: dict = None,
-                            book_id=None):
+                            book_id: str = None) -> dict:
    """Function makes dictionary image_src_path -> Amazon web service_path"""
    img_tags = body_tag.find_all('img')
@@ -99,13 +99,22 @@ def preprocess_table(body_tag: BeautifulSoup):
            table.attrs['border'] = '1'
-def process_lists(body_tag):
+def process_lists(body_tag: BeautifulSoup):
    """
-    Function to process tags <li>.
+    Function
-    Unwrap <p> tags.
+    - process tags <li>.
-    """
+    - unwrap <p> tags.
-    li_tags = body_tag.find_all("li")
+    Parameters
    ----------
    body_tag: Tag, soup object
    Returns
    -------
    None
    """
    li_tags = body_tag.find_all("li")
    for li_tag in li_tags:
        if li_tag.p:
            li_tag.attrs.update(li_tag.p.attrs)
@@ -113,7 +122,7 @@ def process_lists(body_tag):
 def insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
-    """Function inserts span before tag to be removed(aren't supported by livecarta)"""
+    """Function inserts span before tag aren't supported by livecarta"""
    new_tag = main_tag.new_tag("span")
    new_tag.attrs['id'] = id_ or ''
    new_tag.attrs['class'] = class_ or ''
@@ -121,8 +130,8 @@ def insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
    tag.insert_before(new_tag)
-def clean_headings_content(content: Tag, title: str):
+def clean_headings_content(content: BeautifulSoup, title: str):
-    def add_span_to_save_ids_for_links(tag_to_be_removed, body_tag):
+    def add_span_to_save_ids_for_links(tag_to_be_removed: Tag, body_tag: BeautifulSoup):
        if tag_to_be_removed.attrs.get('id'):
            insert_span_with_attrs_before_tag(body_tag,
                                              tag_to_be_removed,
@@ -194,6 +203,7 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
    <p>Here is an example footnote<sup><a epub:type="noteref" href="#n1">1</a></sup></p>
    <aside epub:type="footnote" id="n1"><p>With a footnote here.</p></aside>
   """
    footnotes = []
    noterefs_tags = source_html_tag.find_all(
@@ -258,21 +268,28 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
    return footnotes, new_noterefs_tags, new_footnotes_tags
-def unwrap_structural_tags(body_tag):
+def unwrap_structural_tags(body_tag: BeautifulSoup):
-    """Main function that works with structure of html. Make changes inplace.
+    """
    Main function that works with structure of html. Make changes inplace.
    Parameters
    ----------
    body_tag: Tag, soup object
    Steps
    ----------
    1. Extracts tags that are not needed
    2. Checks that marks for pointing a start of a chapter are placed on one level in html tree.
    Mark is tag with 'class': 'converter-chapter-mark'. Added while TOC was parsed.
    This tag must have a body_tag as a parent.
    Otherwise, it is wrapped with some tags. Like:
        <p> <span id='123', class='converter-chapter-mark'> </span> </p>
    3. Headings that are not supported by livecarta converts to <p>
    4. Wrapping NavigableString
-    :param body_tag: Tag, soup object
+
-    :return: None
+    Returns
    -------
    None
    """
    def preserve_class_in_aside_tag(tag_):
@@ -284,10 +301,18 @@ def unwrap_structural_tags(body_tag):
            if not tag_.parent.attrs.get('class'):
                tag_.parent.attrs['class'] = tag_class
-    def preserve_class_in_section_tag(tag_) -> bool:
+    def preserve_class_in_section_tag(tag_: BeautifulSoup) -> bool:
        """
-        to save css style inherited from class, copy class to child <p>
+        Function saves css style inherited from class, copies class to child <p>
        returns True, if <section> could be unwrapped
        Parameters
        ----------
        tag_: Tag, soup object
        Returns
        -------
        None
        """
        # this is for Wiley books with boxes
        tag_class = tag_.attrs['class'] if not isinstance(
@@ -314,9 +339,11 @@ def unwrap_structural_tags(body_tag):
                                              class_=tag_to_be_removed.attrs.get('class'))
    def replace_div_tag_with_table():
-        """Function replace <div> with <table>:
+        """
        Function replace <div> with <table>:
        1. Convert div with certain classes to tables
        2. Add background color to div with background-color
        """
        for div in body_tag.find_all("div"):
            if div.attrs.get('class'):
@@ -431,7 +458,7 @@ def unwrap_structural_tags(body_tag):
    return body_tag
-def get_tags_between_chapter_marks(first_id, href, html_soup):
+def get_tags_between_chapter_marks(first_id: str, href: str, html_soup: BeautifulSoup) -> list:
    """After processing on a first_id that corresponds to current chapter,
    from initial html_soup all tags from current chapter are extracted
@@ -441,7 +468,7 @@ def get_tags_between_chapter_marks(first_id, href, html_soup):
        Id that point where a chapter starts. A Tag with class: 'converter-chapter-mark'
    href:
        Name of current chapter's file
-    html_soup :
+    html_soup: Tag, soup object
        Soup object of current  file
    Returns
@@ -536,37 +563,33 @@ def prepare_formatted(text: str) -> str:
    return text
-def wrap_preformatted_span_with_table(main_tag, old_tag):
+def wrap_preformatted_span_with_table(chapter_tag: Tag, span_tag: Tag) -> Tag:
    """Function wraps <span> with <table>"""
-    table = main_tag.new_tag("table")
+    table, tbody, tr, td = chapter_tag.new_tag("table"), chapter_tag.new_tag(
-    table.attrs['border'] = '1px #ccc;'
+        "tbody"), chapter_tag.new_tag("tr"), chapter_tag.new_tag("td")
-    table.attrs['style'] = 'width:100%;'
+    table.attrs['border'], table.attrs['style'] = '1px #ccc;', 'width:100%;'
    tbody = main_tag.new_tag("tbody")
    tr = main_tag.new_tag("tr")
    td = main_tag.new_tag("td")
    td.attrs['bgcolor'] = '#f5f5f5'
    # td.attrs['border-radius'] = '4px'
-    old_tag.wrap(td)
+    span_tag.wrap(td)
    td.wrap(tr)
    tr.wrap(tbody)
    tbody.wrap(table)
    return table
-def preprocess_pre_tags(chapter_tag):
+def preprocess_pre_tags(chapter_tag: BeautifulSoup):
-    """Function preprocessing <pre> tags
+    """
    Function preprocessing <pre> tags
    Parameters
    ----------
-    chapter_tag: BeautifulSoup
+    chapter_tag: Tag, soup object
    Steps
    ----------
-    1. cleaning \n
+    1. Process NavigableString
-    2. heading removal
+    2. Process Tags and their children
    3. processing tags
    4. class removal
    """
    """
    for pre in chapter_tag.find_all("pre"):
        new_tag = BeautifulSoup(features='lxml').new_tag("span")
        new_tag.attrs = pre.attrs.copy()
@@ -599,17 +622,26 @@ def preprocess_pre_tags(chapter_tag):
                                 "font-size: 14px; white-space: nowrap;"
        pre.replace_with(new_tag)
        table = wrap_preformatted_span_with_table(chapter_tag, new_tag)
        # add <p> to save brs
        p_for_br = chapter_tag.new_tag("p")
        p_for_br.string = "\xa0"
        table.insert_after(p_for_br)
-def preprocess_code_tags(chapter_tag: Tag):
+def preprocess_code_tags(chapter_tag: BeautifulSoup):
-    """Function that
+    """
    Function
    - transform <code>, <kdb>, <var> tags into span
    - add code style to this tags
-    """
+    Parameters
    ----------
    chapter_tag: Tag, soup object
    Returns
    -------
    None
    """
    for code in chapter_tag.find_all(re.compile("code|kbd|var")):
        code.name = "span"
        if code.parent.name == "pre":
@@ -620,7 +652,6 @@ def preprocess_code_tags(chapter_tag: Tag):
            code.attrs['style'] = 'font-size: 14px; font-family: courier new,courier,monospace;'
 def prepare_title(title_of_chapter: str) -> str:
    """Function finalise processing/cleaning title"""
    title_str = BeautifulSoup(title_of_chapter, features='lxml').string
@@ -631,18 +662,19 @@ def prepare_title(title_of_chapter: str) -> str:
 def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_from_chapter: bool) -> str:
-    """Function finalise processing/cleaning content
+    """
    Function finalise processing/cleaning content
    Parameters
    ----------
    title_str: str
-    content_tag: BeautifulSoup
+    content_tag: Tag, soup object
    remove_title_from_chapter: bool
    Steps
    ----------
-    1. cleaning \n
+    1. find \n
    2. heading removal
    3. processing tags
    4. class removal
@@ -651,9 +683,9 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
    -------
    content_tag: str
        prepared content
    """
-    # 0. cleaning \n
+    """
    # 1. find \n
    to_remove = []
    for child in content_tag.contents:
        if isinstance(child, NavigableString):
@@ -661,18 +693,18 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
            if s == '':
                to_remove.append(child)
-    # 1. heading removal
+    # 2. heading removal
    if remove_title_from_chapter:
        clean_headings_content(content_tag, title_str)
-    # 2. processing tags (<li>, <table>, <code>, <pre>, <block>)
+    # 3. processing tags (<li>, <table>, <code>, <pre>, <block>)
    process_lists(content_tag)
    preprocess_table(content_tag)
    preprocess_code_tags(content_tag)
    preprocess_pre_tags(content_tag)
    preprocess_block_tags(content_tag)
-    # 3. class removal
+    # 4. class removal
    for tag in content_tag.find_all(recursive=True):
        if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
                                                                                                'footnote-element']):