Wrote documentation for every func/class in .py

2021-12-10 10:53:40 +03:00
parent ef3502cd0a
commit 4b1109e6b4
13 changed files with 198 additions and 172 deletions
--- a/src/epub_converter/css_reader.py
+++ b/src/epub_converter/css_reader.py
@@ -11,9 +11,9 @@ from itertools import takewhile
 from src.util.color_reader import str2hex
 from src.livecarta_config import LiveCartaConfig

-
 cssutils.log.setLevel(CRITICAL)

+
 sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
            1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69,
            2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0]
@@ -29,6 +29,7 @@ list_types = ['circle', 'disc', 'armenian', 'decimal',


 def convert_font_size(value):
+    """ Function converts font-size in mapping """
    if 'pt' in value:
        if int(value.replace('pt', '')) == LiveCartaConfig.LIVECARTA_DEFAULT_FONT_SIZE:
            return ''
@@ -58,6 +59,7 @@ def convert_font_size(value):


 def convert_indents(value):
+    """ Function converts text-indent and margin-left values to px """
    # 30px = 3.2% = 1.25em = 23pt
    text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)')
    has_style_attrs = re.search(text_indent_regexp, value)
@@ -115,13 +117,6 @@ LIVECARTA_STYLE_ATTRS = {
    'margin-left': []
 }

-"""
-LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
-
-Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated 
-to suit livecarta style convention.
-"""
-

 def get_bg_color(x):
    color = str2hex(x)
@@ -135,6 +130,12 @@ def get_text_color(x):
    return color


+"""
+LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
+
+Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated 
+to suit livecarta style convention.
+"""
 LIVECARTA_STYLE_ATTRS_MAPPING = {
    'text-indent': convert_indents,
    'font-variant': lambda x: x,
@@ -178,8 +179,10 @@ LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {


 def check_style_to_be_tag(style) -> List[tuple]:
-    """ Some css style properties converts to tags.
-    Search for them and prepare list of properties to be removed from style string"""
+    """ 
+    Some css style properties converts to tags.
+    Search for them and prepare list of properties to be removed from style string 
+    """
    to_remove = []
    for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG:
        if f'{k[0]}:{k[1]}' in style:
@@ -208,6 +211,7 @@ def update_css_style_types_to_livecarta_convention(css_rule, style_type):


 def build_css_content(css_content):
+    """ Build css content with livecarta convention """
    sheet = cssutils.parseString(css_content, validate=False)

    for css_rule in sheet:
@@ -231,6 +235,7 @@ class TagStyleConverter:

    @staticmethod
    def remove_white_if_no_bgcolor(style_, tag):
+        """ Function remove white color if there is no text bg color """
        if 'background' in style_:
            return style_

@@ -260,8 +265,7 @@ class TagStyleConverter:

    @staticmethod
    def process_indents_to_px(split_style: list) -> str:
-        # clean with convert_indents() style string and make new clean_style
-
+        """ Function cleans using convert_indents() style string and returns new clean_style """
        clean_style = ''
        for item in split_style:
            item = item.split(':')
@@ -276,7 +280,7 @@ class TagStyleConverter:

        has_margin_left = re.search(margin_left_regexp, clean_style)
        has_text_indent = re.search(text_indent_regexp, clean_style)
-        #formula_of_indent: indent = abs(margin_left - text_indent)
+        # formula_of_indent: indent = abs(margin_left - text_indent)
        if has_margin_left:
            num_ml = abs(int("".join(
                filter(str.isdigit, str(has_margin_left.group(2))))))
@@ -302,6 +306,7 @@ class TagStyleConverter:

    def preprocess_style(self):
        def remove_extra_spaces(style: str) -> List:
+            """ Function to remove extra spaces in style to process clean_style """
            # replace all spaces between '; & letter' to ';'
            style = re.sub(r"; *", ";", style)
            split_style = style.split(';')
@@ -381,7 +386,7 @@ class TagStyleConverter:

    @staticmethod
    def wrap_span_in_p_to_save_style_attrs(tag):
-        '''Function designed to save style attrs that cannot be in p -> span'''
+        """ Function designed to save style attrs that cannot be in p -> span """
        if tag.name == 'p' and tag.attrs.get('style'):
            styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
                                   if attr not in ['text-align', 'text-indent', 'border-bottom', 'border-top']]
@@ -414,6 +419,7 @@ class TagStyleConverter:

    @staticmethod
    def wrap_span_in_li_to_save_style_attrs(tag):
+        """ Function designed to save style attrs that cannot be in li -> span """
        if tag.name == 'li' and tag.attrs.get('style'):
            styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
                                    attr not in ['text-align', 'list-style-type']]
@@ -441,6 +447,7 @@ class TagStyleConverter:

    @staticmethod
    def wrap_span_in_ul_ol_to_save_style_attrs(tag):
+        """ Function designed to save style attrs that cannot be in ul/ol -> span """
        if tag.name in ['ul', 'ol'] and tag.attrs.get('style'):
            styles_cant_be_in_ul_ol = [
                attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
@@ -465,6 +472,7 @@ class TagStyleConverter:

    @staticmethod
    def wrap_span_in_h_to_save_style_attrs(tag):
+        """ Function designed to save style attrs that cannot be in h -> span """
        h_regexp = re.compile('(^h[1-9]$)')

        if re.search(h_regexp, tag.name) and tag.attrs.get('style'):
@@ -487,6 +495,7 @@ class TagStyleConverter:


 def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
+    """ Function adds styles from .css to inline style """
    css_text = css_text.replace(
        '@namespace epub "http://www.idpf.org/2007/ops";', '')
    livecarta_tmp_ids = []
--- a/src/epub_converter/epub_converter.py
+++ b/src/epub_converter/epub_converter.py
@@ -20,7 +20,7 @@ from src.livecarta_config import LiveCartaConfig
 from src.data_objects import ChapterItem, NavPoint
 from src.epub_converter.css_reader import build_css_content, convert_html_soup_with_css_style
 from src.epub_converter.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_chapter_marks, prepare_title, prepare_content, \
-    update_src_links_in_images, preprocess_footnotes
+    update_images_src_links, preprocess_footnotes


 class EpubConverter:
@@ -48,7 +48,7 @@ class EpubConverter:
        # flag to be updated while ebooklib.toc is parsed
        self.id_anchor_exist_in_nav_points = False
        self.img_href2img_bytes = {}  # file path to bytes
-        self.old_image_path2aws_path = {}  # file path from <a> to generated aws path
+        self.book_image_src_path2aws_path = {}  # file path from <a> to generated aws path
        self.footnotes_contents: List[str] = []  # to be sent on server as is
        self.noterefs: List[Tag] = []  # start of the footnote
        self.footnotes: List[Tag] = []  # end of the footnote
@@ -124,12 +124,12 @@ class EpubConverter:
        return css_content

    def build_html_and_css_relations(self):
-        '''
+        """
        This function is designed to get 2 dictionaries:
        The first is css_href2css_content. It is created to connect href of css to content of css
        The second is html_href2css_href. It is created to connect href of html to css files(hrefs of them) which are used on this html
        ...2... = key2value
-        '''
+        """

        # dictionary: href of html to related css files
        html_href2css_href: defaultdict = defaultdict(list)
@@ -159,10 +159,10 @@ class EpubConverter:
        return html_href2css_href, css_href2css_content,

    def add_css_styles_to_html_soup(self):
-        '''
+        """
        This function is designed to update html_href2html_body_soup
        And add to html_inline_style css_style_content
-        '''
+        """
        for html_href in self.html_href2html_body_soup:
            if self.html_href2css_href.get(html_href):
                css = ''
@@ -179,6 +179,7 @@ class EpubConverter:

        return links

+    # t_nodes = []
    def build_adjacency_list_from_toc(self, element, lvl=0):
        """
        self.adjacency_list builds based on TOC nested structure, got from self.ebooklib.toc
@@ -211,25 +212,31 @@ class EpubConverter:

            sub_nodes = []
            for i in second:
+                # if 'chapter' in (i.title.lower() if isinstance(i, Link) else i[0].title.lower()):
+                #     self.t_nodes.append(self.build_adjacency_list_from_toc(i, lvl))
+                # else:
                sub_nodes.append(
                    self.build_adjacency_list_from_toc(i, lvl + 1))
-
            self.adjacency_list[nav_point] = sub_nodes
            self.hrefs_added_to_toc.add(nav_point.href)
            return nav_point

        elif isinstance(element, list) and (lvl == 0):
-            sub_nodes = []
+            nodes = []
            for i in element:
-                sub_nodes.append(
+                nodes.append(
                    self.build_adjacency_list_from_toc(i, lvl + 1))
-
-            self.adjacency_list[-1] = sub_nodes
+            #     for j in self.t_nodes:
+            #         nodes.append(j)
+            #     self.t_nodes = []
+            #
+            # self.adjacency_list[-1] = nodes

        else:
            assert 0, f'Error. Element is not tuple/Link/list instance: {type(element)}'

    def is_toc_empty(self):
+        """ Function checks is toc empty """
        # there is no toc in ebook or no top chapters
        if (self.ebooklib_book.toc is None) or (self.adjacency_list.get(-1) is None):
            return True
@@ -247,6 +254,7 @@ class EpubConverter:
            self.hrefs_added_to_toc.add(nav_point.href)

    def add_not_added_files_to_adjacency_list(self, not_added):
+        """ Function add files that not added to adjacency list """
        for i, file in enumerate(not_added):
            nav_point = NavPoint(
                Section(f'To check #{i}, filename: {file}', file))
@@ -315,6 +323,11 @@ class EpubConverter:
        return full_path[0]

    def process_internal_links(self):
+        """
+        Function
+        - processing internal links in a book
+        - make ids unique
+        """
        # 1. rebuild ids to be unique in all documents
        for toc_href in self.hrefs_added_to_toc:
            for tag in self.html_href2html_body_soup[toc_href].find_all(attrs={'id': re.compile(r'.+')}):
@@ -429,6 +442,7 @@ class EpubConverter:
                self.build_one_chapter(sub_node)

    def define_chapters_content(self):
+        """ Function build chapters content starts from top level chapters """
        top_level_nav_points = self.adjacency_list[-1]
        if self.id_anchor_exist_in_nav_points:
            for point in top_level_nav_points:
@@ -441,12 +455,12 @@ class EpubConverter:
                nav_point.href, nav_point.id)]
        else:
            content: BeautifulSoup = self.html_href2html_body_soup[nav_point.href]
-        self.old_image_path2aws_path = update_src_links_in_images(content,
-                                                                  self.img_href2img_bytes,
-                                                                  path_to_html=nav_point.href,
-                                                                  access=self.access,
-                                                                  path2aws_path=self.old_image_path2aws_path,
-                                                                  book_id=lambda x: self.file.stem if hasattr(self.file, self.file.stem) else 'book_id')
+        self.book_image_src_path2aws_path = update_images_src_links(content,
+                                                                    self.img_href2img_bytes,
+                                                                    path_to_html=nav_point.href,
+                                                                    access=self.access,
+                                                                    path2aws_path=self.book_image_src_path2aws_path,
+                                                                    book_id=self.file.stem if hasattr(self.file, self.file.stem) else 'book_id')

        is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
        title_preprocessed = prepare_title(title)
@@ -466,6 +480,7 @@ class EpubConverter:
        return ChapterItem(title_preprocessed, content_preprocessed, sub_nodes)

    def convert_to_dict(self):
+        """ Function which convert list of html nodes to appropriate json structure. """
        top_level_nav_points = self.adjacency_list[-1]
        top_level_chapters = []

@@ -491,7 +506,7 @@ if __name__ == "__main__":

    logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)

-    json_converter = EpubConverter('../../epub/9781641051217.epub',
+    json_converter = EpubConverter('../../epub/9781614382263.epub',
                                   logger=logger_object)
    tmp = json_converter.convert_to_dict()

--- a/src/epub_converter/epub_solver.py
+++ b/src/epub_converter/epub_solver.py
@@ -2,12 +2,17 @@ from src.book_solver import BookSolver
 from src.epub_converter.epub_converter import EpubConverter

 class EpubBook(BookSolver):
+    """ Class of .epub type book - child of BookSolver """

    def __init__(self, book_id=0, access=None, main_logger=None):
        super().__init__(book_id, access, main_logger)
        self.book_type = 'epub'

    def get_converted_book(self):
+        """
+        1. Convert epub to html
+        2. Parse from line structure to nested structure
+        """
        json_converter = EpubConverter(self.file_path, access=self.access, logger=self.logger_object)
        content_dict = json_converter.convert_to_dict()
        self.status_wrapper.set_generating()
--- a/src/epub_converter/html_epub_preprocessor.py
+++ b/src/epub_converter/html_epub_preprocessor.py
@@ -10,6 +10,7 @@ from src.livecarta_config import LiveCartaConfig


 def save_image_locally(img_file_path, img_content, book_id):
+    """ Function saves all images locally """
    folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    new_path = pathlib.Path(os.path.join(
        folder_path, f'../json/img_{book_id}/'))
@@ -24,17 +25,19 @@ def save_image_locally(img_file_path, img_content, book_id):


 def save_image_to_aws(access: Access, img_file_path, img_content: bytes, book_id):
-    link = access.send_image(
+    """ Function saves all images to Amazon web service """
+    link_path = access.send_image(
        img_file_path, doc_id=book_id, img_content=img_content)
-    return link
+    return link_path


-def update_src_links_in_images(body_tag: Tag,
-                               href2img_content: dict,
-                               path_to_html,
-                               access=None,
-                               path2aws_path=None,
-                               book_id=None):
+def update_images_src_links(body_tag: Tag,
+                            href2img_content: dict,
+                            path_to_html,
+                            access=None,
+                            path2aws_path=None,
+                            book_id=None):
+    """ Function makes dictionary image_src_path -> Amazon web service_path """
    img_tags = body_tag.find_all('img')

    for img in img_tags:
@@ -65,16 +68,16 @@ def update_src_links_in_images(body_tag: Tag,
            del img.attrs['height']
        if img.attrs.get('style'):
            del img.attrs['style']
-
    return path2aws_path


 def preprocess_table(body_tag: BeautifulSoup):
+    """ Function to preprocess tables and tags(td|th|tr): style """
    tables = body_tag.find_all("table")
    for table in tables:
-        tds = table.find_all(re.compile("td|th|tr"))
-        for td in tds:
-            style = td.get('style')
+        ts = table.find_all(re.compile("td|th|tr"))
+        for t_tag in ts:
+            style = t_tag.get('style')
            width = ''
            if style:
                width_match = re.search(
@@ -84,13 +87,13 @@ def preprocess_table(body_tag: BeautifulSoup):
                    units = width_match.group(2)
                    width = size+'px'

-            td.attrs['width'] = td.get('width') or width
+            t_tag.attrs['width'] = t_tag.get('width') or width

-            if td.attrs.get('style'):
-                td.attrs['style'] = td.attrs['style'].replace('border:0;', '')
+            if t_tag.attrs.get('style'):
+                t_tag.attrs['style'] = t_tag.attrs['style'].replace('border:0;', '')

-            if td.attrs.get('style') == '':
-                del td.attrs['style']
+            elif t_tag.attrs.get('style') == '':
+                del t_tag.attrs['style']

        if not table.attrs.get('border') or table.attrs.get('border') in ['0', '0px']:
            table.attrs['border'] = '1'
@@ -110,6 +113,7 @@ def process_lists(body_tag):


 def insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
+    """ Function inserts span before tag to be removed(aren't supported by livecarta) """
    new_tag = main_tag.new_tag("span")
    new_tag.attrs['id'] = id_ or ''
    new_tag.attrs['class'] = class_ or ''
@@ -153,9 +157,7 @@ def clean_headings_content(content: Tag, title: str):


 def heading_tag_to_p_tag(body_tag):
-    """
-    Function to convert all lower level headings to p tags
-    """
+    """ Function to convert all lower level headings to p tags """
    pattern = f'^h[{LiveCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
    header_tags = body_tag.find_all(re.compile(pattern))
    for tag in header_tags:
@@ -163,17 +165,16 @@ def heading_tag_to_p_tag(body_tag):


 def clean_title_from_numbering(title: str):
-    """
-    Function to remove digits  from headers.
-    """
+    """ Function removes numbering from titles """
    title = re.sub(r'^(\s+)+', '', title)
    title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
    # title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title)  # delete chapter numbering from the title
-    title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
+    title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title) # delete chapter I, (ABC) from the title
    return title


 def replace_with_livecarta_anchor_tag(anchor, i):
+    """ Function replace noteref_tag(anchor) with new livecarta tag """
    new_tag = BeautifulSoup(features='lxml').new_tag('sup')
    new_tag['class'] = 'footnote-element'
    new_tag['data-id'] = i + 1
@@ -188,11 +189,11 @@ def replace_with_livecarta_anchor_tag(anchor, i):
 def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name='epub:type') \
        -> Tuple[list, list, list]:
    """
+    This function preprocessing footnotes
    This function should be earlier that adding fonts in pipeline.

    <p>Here is an example footnote<sup><a epub:type="noteref" href="#n1">1</a></sup></p>
    <aside epub:type="footnote" id="n1"><p>With a footnote here.</p></aside>
-
    """
    footnotes = []
    noterefs_tags = source_html_tag.find_all(
@@ -205,12 +206,14 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
    new_footnotes_tags = []
    [tag.decompose() for tag in bad_noterefs_tags]

-    def parse_a_tag_href(s: str):
+    def parse_a_tag_href(s: str) -> Tuple[str, str]:
+        """ Returns name of file & id of an anchor """
        assert '#' in s, f'Error. Unexpected href: {s} in a tag. Href must contain an id.'
        f, id_ = s.split('#')
        return f, id_

    def verify_footnote_tag(tags: list):
+        """ Function verifies is tag - footnote """
        assert len(tags) <= 1, f'Error, Multiple id: {href}.\n{tags}'
        if len(tags) == 0:
            anchored_tags = list(target_html_tag.find_all(id=element_id))
@@ -275,7 +278,7 @@ def unwrap_structural_tags(body_tag):
    """

    def _preserve_class_in_aside_tag(tag_):
-        # to save css style inherited from class, copy class to aside tag (which is parent to tag_)
+        """ to save css style inherited from class, copy class to aside tag (which is parent to tag_) """
        # this is for Wiley books with boxes
        tag_class = tag_.attrs['class'] if not isinstance(
            tag_.attrs['class'], list) else tag_.attrs['class'][0]
@@ -284,10 +287,11 @@ def unwrap_structural_tags(body_tag):
                tag_.parent.attrs['class'] = tag_class

    def preserve_class_in_section_tag(tag_) -> bool:
-        # to save css style inherited from class, copy class to child <p>
+        """
+        to save css style inherited from class, copy class to child <p>
+        returns True, if <section> could be unwrapped
+        """
        # this is for Wiley books with boxes
-        # returns True, if <section> could be unwrapped
-
        tag_class = tag_.attrs['class'] if not isinstance(
            tag_.attrs['class'], list) else tag_.attrs['class'][0]
        if 'feature' not in tag_class:
@@ -312,6 +316,10 @@ def unwrap_structural_tags(body_tag):
                                              class_=tag_to_be_removed.attrs.get('class'))

    def replace_div_tag_with_table():
+        """Function replace <div> with <table>:
+        1. Convert div with certain classes to tables
+        2. Add background color to div with background-color
+        """
        for div in body_tag.find_all("div"):
            if div.attrs.get('class'):
                div_class = div.attrs['class'] if not isinstance(
@@ -348,12 +356,12 @@ def unwrap_structural_tags(body_tag):
                    continue
            add_span_to_save_ids_for_links(div)
            div.unwrap()
+
    # comments removal
    for tag in body_tag.find_all():
        for element in tag(text=lambda text: isinstance(text, Comment)):
            element.extract()

-
    replace_div_tag_with_table()

    for s in body_tag.find_all("section"):
@@ -458,23 +466,8 @@ def get_tags_between_chapter_marks(first_id, href, html_soup):
    return tags


-def wrap_preformatted_span_with_table(main_tag, old_tag):
-    table = main_tag.new_tag("table")
-    table.attrs['border'] = '1px #ccc;'
-    table.attrs['style'] = 'width:100%;'
-    tbody = main_tag.new_tag("tbody")
-    tr = main_tag.new_tag("tr")
-    td = main_tag.new_tag("td")
-    td.attrs['bgcolor'] = '#f5f5f5'
-    # td.attrs['border-radius'] = '4px'
-    old_tag.wrap(td)
-    td.wrap(tr)
-    tr.wrap(tbody)
-    tbody.wrap(table)
-    return table
-
-
 def wrap_block_tag_with_table(main_tag, old_tag, width='95', border='1px', bg_color=None):
+    """ Function wraps <block> with <table> """
    table = main_tag.new_tag("table")
    table.attrs['border'] = border
    table.attrs['align'] = 'center'
@@ -497,7 +490,6 @@ def clean_wiley_block(block):
    hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
    for hr in hrs:
        hr.extract()
-        print(hr)
    h = block.find(re.compile("h[1-9]"))
    if h:
        h.name = "p"
@@ -505,6 +497,7 @@ def clean_wiley_block(block):


 def preprocess_block_tags(chapter_tag):
+    """ Function preprocessing <block> tags """
    for block in chapter_tag.find_all("blockquote"):
        if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
            clean_wiley_block(block)
@@ -527,7 +520,7 @@ def preprocess_block_tags(chapter_tag):


 def prepare_formatted(text):
-    # replace <,> to save them as is in html code
+    """ Function replaces special symbols with their Unicode representation """
    text = text.replace("<", "\x3C")
    text = text.replace(">", "\x3E")
    text = text.replace('\t', "\xa0 \xa0 ")  # &nbsp; &nbsp;
@@ -536,7 +529,25 @@ def prepare_formatted(text):
    return text


+def wrap_preformatted_span_with_table(main_tag, old_tag):
+    """ Function wraps <span> with <table> """
+    table = main_tag.new_tag("table")
+    table.attrs['border'] = '1px #ccc;'
+    table.attrs['style'] = 'width:100%;'
+    tbody = main_tag.new_tag("tbody")
+    tr = main_tag.new_tag("tr")
+    td = main_tag.new_tag("td")
+    td.attrs['bgcolor'] = '#f5f5f5'
+    # td.attrs['border-radius'] = '4px'
+    old_tag.wrap(td)
+    td.wrap(tr)
+    tr.wrap(tbody)
+    tbody.wrap(table)
+    return table
+
+
 def preprocess_pre_tags(chapter_tag):
+    """ Function preprocessing <pre> tags """
    for pre in chapter_tag.find_all("pre"):
        new_tag = BeautifulSoup(features='lxml').new_tag("span")
        new_tag.attrs = pre.attrs.copy()
@@ -575,7 +586,7 @@ def preprocess_pre_tags(chapter_tag):


 def preprocess_code_tags(chapter_tag):
-    # function that emulates style of <code>, <kdb>, <var>
+    """ Function that emulates style of <code>, <kdb>, <var> """
    for code in chapter_tag.find_all(re.compile("code|kdb|var")):
        code.name = 'span'
        if code.parent.name == "pre":
@@ -584,9 +595,7 @@ def preprocess_code_tags(chapter_tag):


 def prepare_title(title_of_chapter: str) -> str:
-    """
-    Final processing/cleaning function.
-    """
+    """ Function finalise processing/cleaning title """
    title_str = BeautifulSoup(title_of_chapter, features='lxml').string
    title_str = re.sub(r'([\n\t\xa0])', ' ', title_str)
    title_str = re.sub(r' +', ' ', title_str).rstrip()
@@ -596,7 +605,11 @@ def prepare_title(title_of_chapter: str) -> str:

 def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_from_chapter: bool) -> str:
    """
-    Final processing/cleaning function.
+    Function finalise processing/cleaning content
+    1. cleaning \n
+    2. heading removal
+    3. processing tags
+    4. class removal
    """
    # 0. cleaning \n
    to_remove = []
@@ -609,13 +622,15 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
    # 1. heading removal
    if remove_title_from_chapter:
        clean_headings_content(content_tag, title_str)
+
+    # 2. processing tags (<li>, <table>, <code>, <pre>, <block>)
    process_lists(content_tag)
    preprocess_table(content_tag)
    preprocess_code_tags(content_tag)
    preprocess_pre_tags(content_tag)
    preprocess_block_tags(content_tag)

-    # 2. class removal
+    # 3. class removal
    for tag in content_tag.find_all(recursive=True):
        if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
                                                                                                'footnote-element']):