Wrote documentation for every func/class in .py

2021-12-10 10:53:40 +03:00
parent ef3502cd0a
commit 4b1109e6b4
13 changed files with 198 additions and 172 deletions
--- a/consumer.py
+++ b/consumer.py
@@ -54,7 +54,6 @@ def convert_book(book_type: [DocxBook, EpubBook], params: dict, logger, book_id)
        raise exc
    logger.info(f'Book-{book_id} has been proceeded.')
    print('Book has been proceeded.')
 def callback(ch, method, properties, body, logger, libra_locker):
--- a/src/book_solver.py
+++ b/src/book_solver.py
@@ -1,10 +1,3 @@
 """ This is Main Abstract class for solving a task of a book conversion
 Having an id of coming book, gets book from server, runs conversion.
 In parallel it updates status of a book conversion on admin panel.
 Finally sends result to server.
 Result is a json, JSON schema in book_schema.json
 """
 import os
 import json
 import codecs
@@ -17,6 +10,14 @@ from src.util.helpers import BookLogger, BookStatusWrapper
 class BookSolver:
    """
    This is Main Abstract class for solving a task of a book conversion
    Having an id of coming book, gets book from server, runs conversion.
    In parallel it updates status of a book conversion on admin panel.
    Finally sends result to server.
    Result is a json, JSON schema in book_schema.json
    """
    __metaclass__ = ABCMeta
    def __init__(self, book_id=0, access=None, main_logger=None):
@@ -55,9 +56,7 @@ class BookSolver:
        self.file_path = pathlib.Path(file_path)
    def get_book_file(self):
-        """
+        """ Method for getting and saving book from server. """
        Method for getting and saving book from server.
        """
        try:
            self.logger_object.log(f'Start receiving file from server. URL: {self.access.url}/doc-convert/{self.book_id}/file')
            content = self.access.get_book(self.book_id)
@@ -92,6 +91,7 @@ class BookSolver:
            self.logger_object.log('Error has occurred while writing json file.' + str(exc), logging.ERROR)
    def send_json_content_to_server(self, content: dict):
        """ Function sends json_content to site """
        try:
            self.access.send_book(self.book_id, content)
            self.logger_object.log(f'JSON data has been sent to server.')
@@ -108,8 +108,10 @@ class BookSolver:
        return {}
    def test_conversion(self):
-        '''Function
+        """
-        without sending to server'''
+        Function
        - without sending to server
        """
        self.logger_object.log('Beginning of the test.')
        folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        folder_path = os.path.join(folder_path, f'{self.book_type}')
@@ -121,9 +123,11 @@ class BookSolver:
        self.logger_object.log('End of the test.')
    def conversion(self):
-        '''Function
+        """
-        with downloading book from server
+        Function
-        with sending to server'''
+        - with downloading book from server
        - with sending to server
        """
        try:
            self.logger_object.log(f'Beginning of conversion from .{self.book_type} to .json.')
            self.get_book_file()
@@ -140,9 +144,11 @@ class BookSolver:
            raise exc
    def conversion_local(self):
-        '''Function
+        """
-        without downloading book from server (local)
+        Function
-        with sending to server'''
+        - without downloading book from server (local)
        - with sending to server
        """
        try:
            self.logger_object.log(f'Data has been downloaded from tmp.json file: {self.file_path}')
            with codecs.open('json/tmp.json', 'r', encoding='utf-8') as f_json:
--- a/src/data_objects.py
+++ b/src/data_objects.py
@@ -2,21 +2,22 @@ import re
 from typing import Union
 from ebooklib.epub import Section, Link
 from src.livecarta_config import LiveCartaConfig
 """
 These are data structures which form mapping from NCX to python data structures.
 """
 class NavPoint:
    """
    Class - Navigation Point, - every html|xhtml from epub
    These are data structures which form mapping from NCX to python data structures.
    """
    def __init__(self, obj: Union[Link, Section] = None, ):
        self.href, self.id = self.parse_href_id(obj)
        self.title = obj.title
    @staticmethod
    def parse_href_id(item: Union[Link, Section]):
        """Function parses href & id from item.href"""
        reg = r'(.+\..+\#)(.+)'
        match = re.search(reg, item.href)
        href, div_id = None, None
@@ -36,13 +37,8 @@ class NavPoint:
        return '<NavPoint: %s, %s>' % (self.href, self.id)
 """
 These are data structures which form mapping to livecarta json structure.
 """
 def flatten(x):
-    """ magic function from stackoverflow for list flattening """
+    """magic function from stackoverflow for list flattening"""
    atom = lambda i: not isinstance(i, list)
    nil = lambda i: not i
    car = lambda i: i[0]
@@ -54,12 +50,18 @@ def flatten(x):
 class ChapterItem:
    """
    Class of Chapter that could have subchapters
    These are data structures which form mapping to livecarta json structure.
    """
    def __init__(self, title, content, sub_items):
        self.title = title
        self.content = content
        self.sub_items = sub_items
    def to_dict(self, lvl=1):
        """Function returns dictionary of chapter"""
        sub_dicts = []
        if self.sub_items:
            for i in self.sub_items:
--- a/src/docx_converter/docx_solver.py
+++ b/src/docx_converter/docx_solver.py
@@ -12,6 +12,7 @@ from src.book_solver import BookSolver
 class DocxBook(BookSolver):
    """Class of .docx type book - child of BookSolver"""
    def __init__(self, book_id=0, access=None, html_path=None,
                 main_logger=None, libra_locker=None):
@@ -30,9 +31,7 @@ class DocxBook(BookSolver):
        self.logger_object.log(f'Any error while libra conversion for book_{self.book_id}: {result.stderr}', logging.DEBUG)
    def convert_doc_to_html(self):
-        """
+        """Method for convert .docx document to .html file."""
        Method for convert .docx document to .html file.
        """
        self.logger_object.log(f'File - {self.file_path}.')
        print(f'{self.file_path}')
        self.logger_object.log('Beginning of conversion from .docx to .html.')
@@ -92,9 +91,7 @@ class DocxBook(BookSolver):
        self.logger_object.log(f'Input file path after conversion: {self.html_path}.')
    def read_html(self):
-        """
+        """Method for reading .html file into beautiful soup tag."""
        Method for reading .html file into beautiful soup tag.
        """
        try:
            html_text = open(self.html_path, 'r', encoding='utf8').read()
            self.logger_object.log('HTML for book has been loaded.')
@@ -130,7 +127,6 @@ class DocxBook(BookSolver):
        1. Convert docx to html with libra office
        2. Parse and clean html, get list of tags, get footnotes
        3. Parse from line structure to nested structure with JSONConverter
        """
        self.convert_doc_to_html()
        self.check_output_directory()
--- a/src/docx_converter/html_docx_preprocessor.py
+++ b/src/docx_converter/html_docx_preprocessor.py
@@ -35,9 +35,7 @@ class HTMLDocxPreprocessor:
                tag.unwrap()
    def _clean_underline_links(self):
-        """
+        """Function cleans meaningless <u> tags before links."""
        Function cleans meaningless <u> tags before links.
        """
        underlines = self.body_tag.find_all("u")
        for u in underlines:
            if u.find_all('a'):
@@ -79,9 +77,7 @@ class HTMLDocxPreprocessor:
        return re.sub(size + "pt", str(new_size) + "px", style)
    def _font_to_span(self):
-        """
+        """Function to convert <font> tag to <span>. If font style is default, then remove this tag."""
        Function to convert <font> tag to <span>. If font style is default, then remove this tag.
        """
        fonts = self.body_tag.find_all("font")
        for font in fonts:
            face = font.get("face")
@@ -119,9 +115,7 @@ class HTMLDocxPreprocessor:
            self.content = self.content[ind:]
    def clean_trash(self):
-        """
+        """Function to remove all styles and tags we don't need."""
        Function to remove all styles and tags we don't need.
        """
        self._clean_tag('span', 'style', re.compile(r'^background: #[0-9a-fA-F]{6}$'))
        self._clean_tag('span', 'lang', re.compile(r'^ru-RU$'))  # todo: check for another languages
        self._clean_tag('span', 'style', re.compile('^letter-spacing: -?[\d\.]+pt$'))
@@ -140,9 +134,7 @@ class HTMLDocxPreprocessor:
            table.decompose()
    def _process_paragraph(self):
-        """
+        """Function to process <p> tags (text-align and text-indent value)."""
        Function to process <p> tags (text-align and text-indent value).
        """
        paragraphs = self.body_tag.find_all('p')
        for p in paragraphs:
@@ -193,9 +185,7 @@ class HTMLDocxPreprocessor:
                p.attrs['style'] = style
    def _process_two_columns(self):
-        """
+        """Function to process paragraphs which has two columns layout."""
        Function to process paragraphs which has two columns layout.
        """
        two_columns = self.body_tag.find_all("div", style="column-count: 2")
        for div in two_columns:
            for child in div.children:
@@ -204,9 +194,7 @@ class HTMLDocxPreprocessor:
            div.unwrap()
    def _process_tables(self):
-        """
+        """Function to process tables. Set "border" attribute."""
        Function to process tables. Set "border" attribute.
        """
        tables = self.body_tag.find_all("table")
        for table in tables:
            tds = table.find_all("td")
@@ -296,9 +284,7 @@ class HTMLDocxPreprocessor:
        return content.strip()
    def _process_footnotes(self):
-        """
+        """Function returns list of footnotes and delete them from html_soup."""
        Function returns list of footnotes and delete them from html_soup.
        """
        footnote_anchors = self.body_tag.find_all('a', class_='sdfootnoteanc')
        footnote_content = self.body_tag.find_all('div', id=re.compile(r'^sdfootnote\d+$'))
        footnote_amt = len(footnote_anchors)
@@ -404,9 +390,7 @@ class HTMLDocxPreprocessor:
            div.decompose()
    def _process_div(self):
-        """
+        """Function to process <div> tags. All the tags will be deleted from file, all content of the tags will stay."""
        Function to process <div> tags. All the tags will be deleted from file, all content of the tags will stay.
        """
        divs = self.body_tag.find_all("div")
        for div in divs:
@@ -423,9 +407,7 @@ class HTMLDocxPreprocessor:
        return len(toc_links) > 0
    def _process_toc_links(self):
-        """
+        """Function to extract nodes which contains TOC links, remove links from file and detect headers."""
        Function to extract nodes which contains TOC links, remove links from file and detect headers.
        """
        toc_links = self.body_tag.find_all("a", {'name': re.compile(r'^_Toc\d+')})
        headers = [link.parent for link in toc_links]
        outline_level = "1"  # All the unknown outlines will be predicted as <h1>
@@ -448,13 +430,11 @@ class HTMLDocxPreprocessor:
    @staticmethod
    def clean_title_from_numbering(title: str):
-        """
+        """Function to remove digits  from headers."""
        Function to remove digits  from headers.
        """
        title = re.sub(r'^(\s+)+', '', title)
        title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
        # title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title)  # delete chapter numbering from the title
-        title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
+        title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title) # delete chapter I, (ABC) from the title
        return title
    @staticmethod
@@ -485,9 +465,7 @@ class HTMLDocxPreprocessor:
                self.apply_func_to_last_child(children[0], func)
    def _preprocessing_headings(self):
-        """
+        """Function to convert all lower level headings to p tags"""
        Function to convert all lower level headings to p tags
        """
        pattern = f'^h[{LiveCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
        header_tags = self.body_tag.find_all(re.compile(pattern))
        for tag in header_tags:
@@ -561,9 +539,7 @@ class HTMLDocxPreprocessor:
                self.top_level_headers[i]['should_be_numbered'] = True
    def _process_headings(self):
-        """
+        """Function to process tags <h>."""
        Function to process tags <h>.
        """
        header_tags = self.body_tag.find_all(re.compile("^h[1-9]$"))
        # 1. remove <b>, <span>
@@ -634,9 +610,7 @@ class HTMLDocxPreprocessor:
            il_tag.p.unwrap()
    def process_html(self, access, html_path, book_id):
-        """
+        """Process html code to satisfy LiveCarta formatting."""
        Process html code to satisfy LiveCarta formatting.
        """
        try:
            self.logger_object.log(f'Processing TOC and headers.')
            self._process_toc_links()
--- a/src/docx_converter/libra_html2json_converter.py
+++ b/src/docx_converter/libra_html2json_converter.py
@@ -90,9 +90,7 @@ class LibraHTML2JSONConverter:
        return True
    def convert_to_dict(self):
-        """
+        """Function which convert list of html nodes to appropriate json structure."""
        Function which convert list of html nodes to appropriate json structure.
        """
        json_strc = []
        ind = 0
        ch_num = 0
--- a/src/epub_converter/css_reader.py
+++ b/src/epub_converter/css_reader.py
@@ -11,9 +11,9 @@ from itertools import takewhile
 from src.util.color_reader import str2hex
 from src.livecarta_config import LiveCartaConfig
 cssutils.log.setLevel(CRITICAL)
 sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
            1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69,
            2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0]
@@ -29,6 +29,7 @@ list_types = ['circle', 'disc', 'armenian', 'decimal',
 def convert_font_size(value):
    """ Function converts font-size in mapping """
    if 'pt' in value:
        if int(value.replace('pt', '')) == LiveCartaConfig.LIVECARTA_DEFAULT_FONT_SIZE:
            return ''
@@ -58,6 +59,7 @@ def convert_font_size(value):
 def convert_indents(value):
    """ Function converts text-indent and margin-left values to px """
    # 30px = 3.2% = 1.25em = 23pt
    text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)')
    has_style_attrs = re.search(text_indent_regexp, value)
@@ -115,13 +117,6 @@ LIVECARTA_STYLE_ATTRS = {
    'margin-left': []
 }
 """
 LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
 Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated 
 to suit livecarta style convention.
 """
 def get_bg_color(x):
    color = str2hex(x)
@@ -135,6 +130,12 @@ def get_text_color(x):
    return color
 """
 LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
 Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated 
 to suit livecarta style convention.
 """
 LIVECARTA_STYLE_ATTRS_MAPPING = {
    'text-indent': convert_indents,
    'font-variant': lambda x: x,
@@ -178,8 +179,10 @@ LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
 def check_style_to_be_tag(style) -> List[tuple]:
-    """ Some css style properties converts to tags.
+    """ 
-    Search for them and prepare list of properties to be removed from style string"""
+    Some css style properties converts to tags.
    Search for them and prepare list of properties to be removed from style string 
    """
    to_remove = []
    for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG:
        if f'{k[0]}:{k[1]}' in style:
@@ -208,6 +211,7 @@ def update_css_style_types_to_livecarta_convention(css_rule, style_type):
 def build_css_content(css_content):
    """ Build css content with livecarta convention """
    sheet = cssutils.parseString(css_content, validate=False)
    for css_rule in sheet:
@@ -231,6 +235,7 @@ class TagStyleConverter:
    @staticmethod
    def remove_white_if_no_bgcolor(style_, tag):
        """ Function remove white color if there is no text bg color """
        if 'background' in style_:
            return style_
@@ -260,8 +265,7 @@ class TagStyleConverter:
    @staticmethod
    def process_indents_to_px(split_style: list) -> str:
-        # clean with convert_indents() style string and make new clean_style
+        """ Function cleans using convert_indents() style string and returns new clean_style """
        clean_style = ''
        for item in split_style:
            item = item.split(':')
@@ -276,7 +280,7 @@ class TagStyleConverter:
        has_margin_left = re.search(margin_left_regexp, clean_style)
        has_text_indent = re.search(text_indent_regexp, clean_style)
-        #formula_of_indent: indent = abs(margin_left - text_indent)
+        # formula_of_indent: indent = abs(margin_left - text_indent)
        if has_margin_left:
            num_ml = abs(int("".join(
                filter(str.isdigit, str(has_margin_left.group(2))))))
@@ -302,6 +306,7 @@ class TagStyleConverter:
    def preprocess_style(self):
        def remove_extra_spaces(style: str) -> List:
            """ Function to remove extra spaces in style to process clean_style """
            # replace all spaces between '; & letter' to ';'
            style = re.sub(r"; *", ";", style)
            split_style = style.split(';')
@@ -381,7 +386,7 @@ class TagStyleConverter:
    @staticmethod
    def wrap_span_in_p_to_save_style_attrs(tag):
-        '''Function designed to save style attrs that cannot be in p -> span'''
+        """ Function designed to save style attrs that cannot be in p -> span """
        if tag.name == 'p' and tag.attrs.get('style'):
            styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
                                   if attr not in ['text-align', 'text-indent', 'border-bottom', 'border-top']]
@@ -414,6 +419,7 @@ class TagStyleConverter:
    @staticmethod
    def wrap_span_in_li_to_save_style_attrs(tag):
        """ Function designed to save style attrs that cannot be in li -> span """
        if tag.name == 'li' and tag.attrs.get('style'):
            styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
                                    attr not in ['text-align', 'list-style-type']]
@@ -441,6 +447,7 @@ class TagStyleConverter:
    @staticmethod
    def wrap_span_in_ul_ol_to_save_style_attrs(tag):
        """ Function designed to save style attrs that cannot be in ul/ol -> span """
        if tag.name in ['ul', 'ol'] and tag.attrs.get('style'):
            styles_cant_be_in_ul_ol = [
                attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
@@ -465,6 +472,7 @@ class TagStyleConverter:
    @staticmethod
    def wrap_span_in_h_to_save_style_attrs(tag):
        """ Function designed to save style attrs that cannot be in h -> span """
        h_regexp = re.compile('(^h[1-9]$)')
        if re.search(h_regexp, tag.name) and tag.attrs.get('style'):
@@ -487,6 +495,7 @@ class TagStyleConverter:
 def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
    """ Function adds styles from .css to inline style """
    css_text = css_text.replace(
        '@namespace epub "http://www.idpf.org/2007/ops";', '')
    livecarta_tmp_ids = []
--- a/src/epub_converter/epub_converter.py
+++ b/src/epub_converter/epub_converter.py
@@ -20,7 +20,7 @@ from src.livecarta_config import LiveCartaConfig
 from src.data_objects import ChapterItem, NavPoint
 from src.epub_converter.css_reader import build_css_content, convert_html_soup_with_css_style
 from src.epub_converter.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_chapter_marks, prepare_title, prepare_content, \
-    update_src_links_in_images, preprocess_footnotes
+    update_images_src_links, preprocess_footnotes
 class EpubConverter:
@@ -48,7 +48,7 @@ class EpubConverter:
        # flag to be updated while ebooklib.toc is parsed
        self.id_anchor_exist_in_nav_points = False
        self.img_href2img_bytes = {}  # file path to bytes
-        self.old_image_path2aws_path = {}  # file path from <a> to generated aws path
+        self.book_image_src_path2aws_path = {}  # file path from <a> to generated aws path
        self.footnotes_contents: List[str] = []  # to be sent on server as is
        self.noterefs: List[Tag] = []  # start of the footnote
        self.footnotes: List[Tag] = []  # end of the footnote
@@ -124,12 +124,12 @@ class EpubConverter:
        return css_content
    def build_html_and_css_relations(self):
-        '''
+        """
        This function is designed to get 2 dictionaries:
        The first is css_href2css_content. It is created to connect href of css to content of css
        The second is html_href2css_href. It is created to connect href of html to css files(hrefs of them) which are used on this html
        ...2... = key2value
-        '''
+        """
        # dictionary: href of html to related css files
        html_href2css_href: defaultdict = defaultdict(list)
@@ -159,10 +159,10 @@ class EpubConverter:
        return html_href2css_href, css_href2css_content,
    def add_css_styles_to_html_soup(self):
-        '''
+        """
        This function is designed to update html_href2html_body_soup
        And add to html_inline_style css_style_content
-        '''
+        """
        for html_href in self.html_href2html_body_soup:
            if self.html_href2css_href.get(html_href):
                css = ''
@@ -179,6 +179,7 @@ class EpubConverter:
        return links
    # t_nodes = []
    def build_adjacency_list_from_toc(self, element, lvl=0):
        """
        self.adjacency_list builds based on TOC nested structure, got from self.ebooklib.toc
@@ -211,25 +212,31 @@ class EpubConverter:
            sub_nodes = []
            for i in second:
                # if 'chapter' in (i.title.lower() if isinstance(i, Link) else i[0].title.lower()):
                #     self.t_nodes.append(self.build_adjacency_list_from_toc(i, lvl))
                # else:
                sub_nodes.append(
                    self.build_adjacency_list_from_toc(i, lvl + 1))
            self.adjacency_list[nav_point] = sub_nodes
            self.hrefs_added_to_toc.add(nav_point.href)
            return nav_point
        elif isinstance(element, list) and (lvl == 0):
-            sub_nodes = []
+            nodes = []
            for i in element:
-                sub_nodes.append(
+                nodes.append(
                    self.build_adjacency_list_from_toc(i, lvl + 1))
-
+            #     for j in self.t_nodes:
-            self.adjacency_list[-1] = sub_nodes
+            #         nodes.append(j)
            #     self.t_nodes = []
            #
            # self.adjacency_list[-1] = nodes
        else:
            assert 0, f'Error. Element is not tuple/Link/list instance: {type(element)}'
    def is_toc_empty(self):
        """ Function checks is toc empty """
        # there is no toc in ebook or no top chapters
        if (self.ebooklib_book.toc is None) or (self.adjacency_list.get(-1) is None):
            return True
@@ -247,6 +254,7 @@ class EpubConverter:
            self.hrefs_added_to_toc.add(nav_point.href)
    def add_not_added_files_to_adjacency_list(self, not_added):
        """ Function add files that not added to adjacency list """
        for i, file in enumerate(not_added):
            nav_point = NavPoint(
                Section(f'To check #{i}, filename: {file}', file))
@@ -315,6 +323,11 @@ class EpubConverter:
        return full_path[0]
    def process_internal_links(self):
        """
        Function
        - processing internal links in a book
        - make ids unique
        """
        # 1. rebuild ids to be unique in all documents
        for toc_href in self.hrefs_added_to_toc:
            for tag in self.html_href2html_body_soup[toc_href].find_all(attrs={'id': re.compile(r'.+')}):
@@ -429,6 +442,7 @@ class EpubConverter:
                self.build_one_chapter(sub_node)
    def define_chapters_content(self):
        """ Function build chapters content starts from top level chapters """
        top_level_nav_points = self.adjacency_list[-1]
        if self.id_anchor_exist_in_nav_points:
            for point in top_level_nav_points:
@@ -441,12 +455,12 @@ class EpubConverter:
                nav_point.href, nav_point.id)]
        else:
            content: BeautifulSoup = self.html_href2html_body_soup[nav_point.href]
-        self.old_image_path2aws_path = update_src_links_in_images(content,
+        self.book_image_src_path2aws_path = update_images_src_links(content,
                                                                    self.img_href2img_bytes,
                                                                    path_to_html=nav_point.href,
                                                                    access=self.access,
-                                                                  path2aws_path=self.old_image_path2aws_path,
+                                                                    path2aws_path=self.book_image_src_path2aws_path,
-                                                                  book_id=lambda x: self.file.stem if hasattr(self.file, self.file.stem) else 'book_id')
+                                                                    book_id=self.file.stem if hasattr(self.file, self.file.stem) else 'book_id')
        is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
        title_preprocessed = prepare_title(title)
@@ -466,6 +480,7 @@ class EpubConverter:
        return ChapterItem(title_preprocessed, content_preprocessed, sub_nodes)
    def convert_to_dict(self):
        """ Function which convert list of html nodes to appropriate json structure. """
        top_level_nav_points = self.adjacency_list[-1]
        top_level_chapters = []
@@ -491,7 +506,7 @@ if __name__ == "__main__":
    logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
-    json_converter = EpubConverter('../../epub/9781641051217.epub',
+    json_converter = EpubConverter('../../epub/9781614382263.epub',
                                   logger=logger_object)
    tmp = json_converter.convert_to_dict()
--- a/src/epub_converter/epub_solver.py
+++ b/src/epub_converter/epub_solver.py
@@ -2,12 +2,17 @@ from src.book_solver import BookSolver
 from src.epub_converter.epub_converter import EpubConverter
 class EpubBook(BookSolver):
    """ Class of .epub type book - child of BookSolver """
    def __init__(self, book_id=0, access=None, main_logger=None):
        super().__init__(book_id, access, main_logger)
        self.book_type = 'epub'
    def get_converted_book(self):
        """
        1. Convert epub to html
        2. Parse from line structure to nested structure
        """
        json_converter = EpubConverter(self.file_path, access=self.access, logger=self.logger_object)
        content_dict = json_converter.convert_to_dict()
        self.status_wrapper.set_generating()
--- a/src/epub_converter/html_epub_preprocessor.py
+++ b/src/epub_converter/html_epub_preprocessor.py
@@ -10,6 +10,7 @@ from src.livecarta_config import LiveCartaConfig
 def save_image_locally(img_file_path, img_content, book_id):
    """ Function saves all images locally """
    folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    new_path = pathlib.Path(os.path.join(
        folder_path, f'../json/img_{book_id}/'))
@@ -24,17 +25,19 @@ def save_image_locally(img_file_path, img_content, book_id):
 def save_image_to_aws(access: Access, img_file_path, img_content: bytes, book_id):
-    link = access.send_image(
+    """ Function saves all images to Amazon web service """
    link_path = access.send_image(
        img_file_path, doc_id=book_id, img_content=img_content)
-    return link
+    return link_path
-def update_src_links_in_images(body_tag: Tag,
+def update_images_src_links(body_tag: Tag,
                            href2img_content: dict,
                            path_to_html,
                            access=None,
                            path2aws_path=None,
                            book_id=None):
    """ Function makes dictionary image_src_path -> Amazon web service_path """
    img_tags = body_tag.find_all('img')
    for img in img_tags:
@@ -65,16 +68,16 @@ def update_src_links_in_images(body_tag: Tag,
            del img.attrs['height']
        if img.attrs.get('style'):
            del img.attrs['style']
    return path2aws_path
 def preprocess_table(body_tag: BeautifulSoup):
    """ Function to preprocess tables and tags(td|th|tr): style """
    tables = body_tag.find_all("table")
    for table in tables:
-        tds = table.find_all(re.compile("td|th|tr"))
+        ts = table.find_all(re.compile("td|th|tr"))
-        for td in tds:
+        for t_tag in ts:
-            style = td.get('style')
+            style = t_tag.get('style')
            width = ''
            if style:
                width_match = re.search(
@@ -84,13 +87,13 @@ def preprocess_table(body_tag: BeautifulSoup):
                    units = width_match.group(2)
                    width = size+'px'
-            td.attrs['width'] = td.get('width') or width
+            t_tag.attrs['width'] = t_tag.get('width') or width
-            if td.attrs.get('style'):
+            if t_tag.attrs.get('style'):
-                td.attrs['style'] = td.attrs['style'].replace('border:0;', '')
+                t_tag.attrs['style'] = t_tag.attrs['style'].replace('border:0;', '')
-            if td.attrs.get('style') == '':
+            elif t_tag.attrs.get('style') == '':
-                del td.attrs['style']
+                del t_tag.attrs['style']
        if not table.attrs.get('border') or table.attrs.get('border') in ['0', '0px']:
            table.attrs['border'] = '1'
@@ -110,6 +113,7 @@ def process_lists(body_tag):
 def insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
    """ Function inserts span before tag to be removed(aren't supported by livecarta) """
    new_tag = main_tag.new_tag("span")
    new_tag.attrs['id'] = id_ or ''
    new_tag.attrs['class'] = class_ or ''
@@ -153,9 +157,7 @@ def clean_headings_content(content: Tag, title: str):
 def heading_tag_to_p_tag(body_tag):
-    """
+    """ Function to convert all lower level headings to p tags """
    Function to convert all lower level headings to p tags
    """
    pattern = f'^h[{LiveCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
    header_tags = body_tag.find_all(re.compile(pattern))
    for tag in header_tags:
@@ -163,17 +165,16 @@ def heading_tag_to_p_tag(body_tag):
 def clean_title_from_numbering(title: str):
-    """
+    """ Function removes numbering from titles """
    Function to remove digits  from headers.
    """
    title = re.sub(r'^(\s+)+', '', title)
    title = re.sub(r'^(?:\.?\d+\.? ?)+', '', title)
    # title = re.sub(r'^(?:\.?[MDCLXVIclxvi]+\.? ?)+ ', '', title)  # delete chapter numbering from the title
-    title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title)
+    title = re.sub(r'^(?:[A-Za-z]\. ?)+', '', title) # delete chapter I, (ABC) from the title
    return title
 def replace_with_livecarta_anchor_tag(anchor, i):
    """ Function replace noteref_tag(anchor) with new livecarta tag """
    new_tag = BeautifulSoup(features='lxml').new_tag('sup')
    new_tag['class'] = 'footnote-element'
    new_tag['data-id'] = i + 1
@@ -188,11 +189,11 @@ def replace_with_livecarta_anchor_tag(anchor, i):
 def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name='epub:type') \
        -> Tuple[list, list, list]:
    """
    This function preprocessing footnotes
    This function should be earlier that adding fonts in pipeline.
    <p>Here is an example footnote<sup><a epub:type="noteref" href="#n1">1</a></sup></p>
    <aside epub:type="footnote" id="n1"><p>With a footnote here.</p></aside>
    """
    footnotes = []
    noterefs_tags = source_html_tag.find_all(
@@ -205,12 +206,14 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
    new_footnotes_tags = []
    [tag.decompose() for tag in bad_noterefs_tags]
-    def parse_a_tag_href(s: str):
+    def parse_a_tag_href(s: str) -> Tuple[str, str]:
        """ Returns name of file & id of an anchor """
        assert '#' in s, f'Error. Unexpected href: {s} in a tag. Href must contain an id.'
        f, id_ = s.split('#')
        return f, id_
    def verify_footnote_tag(tags: list):
        """ Function verifies is tag - footnote """
        assert len(tags) <= 1, f'Error, Multiple id: {href}.\n{tags}'
        if len(tags) == 0:
            anchored_tags = list(target_html_tag.find_all(id=element_id))
@@ -275,7 +278,7 @@ def unwrap_structural_tags(body_tag):
    """
    def _preserve_class_in_aside_tag(tag_):
-        # to save css style inherited from class, copy class to aside tag (which is parent to tag_)
+        """ to save css style inherited from class, copy class to aside tag (which is parent to tag_) """
        # this is for Wiley books with boxes
        tag_class = tag_.attrs['class'] if not isinstance(
            tag_.attrs['class'], list) else tag_.attrs['class'][0]
@@ -284,10 +287,11 @@ def unwrap_structural_tags(body_tag):
                tag_.parent.attrs['class'] = tag_class
    def preserve_class_in_section_tag(tag_) -> bool:
-        # to save css style inherited from class, copy class to child <p>
+        """
        to save css style inherited from class, copy class to child <p>
        returns True, if <section> could be unwrapped
        """
        # this is for Wiley books with boxes
        # returns True, if <section> could be unwrapped
        tag_class = tag_.attrs['class'] if not isinstance(
            tag_.attrs['class'], list) else tag_.attrs['class'][0]
        if 'feature' not in tag_class:
@@ -312,6 +316,10 @@ def unwrap_structural_tags(body_tag):
                                              class_=tag_to_be_removed.attrs.get('class'))
    def replace_div_tag_with_table():
        """Function replace <div> with <table>:
        1. Convert div with certain classes to tables
        2. Add background color to div with background-color
        """
        for div in body_tag.find_all("div"):
            if div.attrs.get('class'):
                div_class = div.attrs['class'] if not isinstance(
@@ -348,12 +356,12 @@ def unwrap_structural_tags(body_tag):
                    continue
            add_span_to_save_ids_for_links(div)
            div.unwrap()
    # comments removal
    for tag in body_tag.find_all():
        for element in tag(text=lambda text: isinstance(text, Comment)):
            element.extract()
    replace_div_tag_with_table()
    for s in body_tag.find_all("section"):
@@ -458,23 +466,8 @@ def get_tags_between_chapter_marks(first_id, href, html_soup):
    return tags
 def wrap_preformatted_span_with_table(main_tag, old_tag):
    table = main_tag.new_tag("table")
    table.attrs['border'] = '1px #ccc;'
    table.attrs['style'] = 'width:100%;'
    tbody = main_tag.new_tag("tbody")
    tr = main_tag.new_tag("tr")
    td = main_tag.new_tag("td")
    td.attrs['bgcolor'] = '#f5f5f5'
    # td.attrs['border-radius'] = '4px'
    old_tag.wrap(td)
    td.wrap(tr)
    tr.wrap(tbody)
    tbody.wrap(table)
    return table
 def wrap_block_tag_with_table(main_tag, old_tag, width='95', border='1px', bg_color=None):
    """ Function wraps <block> with <table> """
    table = main_tag.new_tag("table")
    table.attrs['border'] = border
    table.attrs['align'] = 'center'
@@ -497,7 +490,6 @@ def clean_wiley_block(block):
    hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
    for hr in hrs:
        hr.extract()
        print(hr)
    h = block.find(re.compile("h[1-9]"))
    if h:
        h.name = "p"
@@ -505,6 +497,7 @@ def clean_wiley_block(block):
 def preprocess_block_tags(chapter_tag):
    """ Function preprocessing <block> tags """
    for block in chapter_tag.find_all("blockquote"):
        if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
            clean_wiley_block(block)
@@ -527,7 +520,7 @@ def preprocess_block_tags(chapter_tag):
 def prepare_formatted(text):
-    # replace <,> to save them as is in html code
+    """ Function replaces special symbols with their Unicode representation """
    text = text.replace("<", "\x3C")
    text = text.replace(">", "\x3E")
    text = text.replace('\t', "\xa0 \xa0 ")  # &nbsp; &nbsp;
@@ -536,7 +529,25 @@ def prepare_formatted(text):
    return text
 def wrap_preformatted_span_with_table(main_tag, old_tag):
    """ Function wraps <span> with <table> """
    table = main_tag.new_tag("table")
    table.attrs['border'] = '1px #ccc;'
    table.attrs['style'] = 'width:100%;'
    tbody = main_tag.new_tag("tbody")
    tr = main_tag.new_tag("tr")
    td = main_tag.new_tag("td")
    td.attrs['bgcolor'] = '#f5f5f5'
    # td.attrs['border-radius'] = '4px'
    old_tag.wrap(td)
    td.wrap(tr)
    tr.wrap(tbody)
    tbody.wrap(table)
    return table
 def preprocess_pre_tags(chapter_tag):
    """ Function preprocessing <pre> tags """
    for pre in chapter_tag.find_all("pre"):
        new_tag = BeautifulSoup(features='lxml').new_tag("span")
        new_tag.attrs = pre.attrs.copy()
@@ -575,7 +586,7 @@ def preprocess_pre_tags(chapter_tag):
 def preprocess_code_tags(chapter_tag):
-    # function that emulates style of <code>, <kdb>, <var>
+    """ Function that emulates style of <code>, <kdb>, <var> """
    for code in chapter_tag.find_all(re.compile("code|kdb|var")):
        code.name = 'span'
        if code.parent.name == "pre":
@@ -584,9 +595,7 @@ def preprocess_code_tags(chapter_tag):
 def prepare_title(title_of_chapter: str) -> str:
-    """
+    """ Function finalise processing/cleaning title """
    Final processing/cleaning function.
    """
    title_str = BeautifulSoup(title_of_chapter, features='lxml').string
    title_str = re.sub(r'([\n\t\xa0])', ' ', title_str)
    title_str = re.sub(r' +', ' ', title_str).rstrip()
@@ -596,7 +605,11 @@ def prepare_title(title_of_chapter: str) -> str:
 def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_from_chapter: bool) -> str:
    """
-    Final processing/cleaning function.
+    Function finalise processing/cleaning content
    1. cleaning \n
    2. heading removal
    3. processing tags
    4. class removal
    """
    # 0. cleaning \n
    to_remove = []
@@ -609,13 +622,15 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
    # 1. heading removal
    if remove_title_from_chapter:
        clean_headings_content(content_tag, title_str)
    # 2. processing tags (<li>, <table>, <code>, <pre>, <block>)
    process_lists(content_tag)
    preprocess_table(content_tag)
    preprocess_code_tags(content_tag)
    preprocess_pre_tags(content_tag)
    preprocess_block_tags(content_tag)
-    # 2. class removal
+    # 3. class removal
    for tag in content_tag.find_all(recursive=True):
        if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor',
                                                                                                'footnote-element']):
--- a/src/livecarta_config.py
+++ b/src/livecarta_config.py
@@ -1,5 +1,5 @@
 class LiveCartaConfig:
    """Class of values that LiveCarta platform using and supports"""
    SUPPORTED_LEVELS = 5
    SUPPORTED_HEADERS = {"h1", "h2", "h3", "h4", "h5"}
    HEADERS_LEVELS = {"h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "h9"}
--- a/src/util/color_reader.py
+++ b/src/util/color_reader.py
@@ -6,6 +6,7 @@ from webcolors import html4_hex_to_names, hex_to_rgb, rgb_to_name, rgb_percent_t
 def closest_colour_rgb(requested_color):
    """ Function finds closes colour rgb """
    min_colours = {}
    for key, name in html4_hex_to_names.items():
        r_c, g_c, b_c = hex_to_rgb(key)
@@ -18,6 +19,7 @@ def closest_colour_rgb(requested_color):
 def rgb2color_name(color):
    """ Transform rgb -> color name """
    try:
        closest_name = actual_name = rgb_to_name(color, 'html4')
    except ValueError:
@@ -30,6 +32,7 @@ def rgb2color_name(color):
 def hex2color_name(color):
    """ Transform hex -> color name """
    try:
        color = hex_to_rgb(color)
    except ValueError:
@@ -47,6 +50,7 @@ def hex2color_name(color):
 def str2closest_html_color_name(s: str):
    """ Transform str -> closest color name """
    if 'rgb' in s:
        rgb_str = 'rgba' if ('rgba' in s) else 'rgb'
        s = s.replace(rgb_str, '').replace('(', '').replace(')', '')
@@ -80,6 +84,7 @@ def str2closest_html_color_name(s: str):
 def rgba2rgb(r, g, b, alpha):
    """ Transform rgba -> rgb """
    r_background, g_background, b_background = 255, 255, 255
    r_new = int((1 - alpha) * r_background + alpha * r)
    g_new = int((1 - alpha) * g_background + alpha * g)
@@ -88,6 +93,7 @@ def rgba2rgb(r, g, b, alpha):
 def str2hex(s: str):
    """ Transform str -> hex """
    if '#' in s and (len(s) <= 7):
        return s.lower()
--- a/src/util/helpers.py
+++ b/src/util/helpers.py
@@ -3,6 +3,7 @@ import logging
 class ColoredFormatter(logging.Formatter):
    """ Class to prettify logger and command line output """
    MAPPING = {
        'DEBUG': 37,  # white
        'INFO': 36,  # cyan
@@ -61,9 +62,7 @@ class BookLogger:
        self.logger.log(msg=message, level=logging_level, stacklevel=2)
    def log_error_to_main_log(self, message=''):
-        """
+        """ Method for logging error to main log file. """
        Method for logging error to main log file.
        """
        if self.main_logger:
            if not message:
                message = f'Error in book conversion. Check log file.'
@@ -71,6 +70,8 @@ class BookLogger:
 class BookStatusWrapper:
    """Class sets/updates statuses of Converter on Platform"""
    def __init__(self, access, logger_object, book_id=0):
        self.access = access
        self.logger_object = logger_object