add processing of JSON presets

2022-07-07 19:32:24 +03:00
parent 687c09417a
commit c4752a19db
5 changed files with 497 additions and 417 deletions
--- a/src/docx_converter/image_processing.py
+++ b/src/docx_converter/image_processing.py
@@ -1,5 +1,4 @@
 import os
 import logging
 import pathlib
 from shutil import copyfile
--- a/src/epub_converter/epub_converter.py
+++ b/src/epub_converter/epub_converter.py
@@ -4,33 +4,34 @@ import codecs
 import os
 from os.path import dirname, normpath, join
 from itertools import chain
 from premailer import transform
 from collections import defaultdict
 from typing import Dict, Union, List
 import ebooklib
 from ebooklib import epub
 from ebooklib.epub import Link, Section
-from bs4 import BeautifulSoup, Tag
+from bs4 import BeautifulSoup, NavigableString, Tag
 from src.util.helpers import BookLogger
 from src.preset_processor import PresetProcessor
 from src.epub_converter.css_preprocessor import CSSPreprocessor
 from src.epub_converter.html_epub_preprocessor import HtmlEpubPreprocessor
 from src.livecarta_config import LiveCartaConfig
 from src.data_objects import ChapterItem, NavPoint
 from src.epub_converter.image_processing import update_images_src_links
 from src.epub_converter.footnotes_processing import preprocess_footnotes
-from src.epub_converter.css_preprocessing import build_inline_style_content, build_css_file_content
+from src.epub_converter.tag_inline_style_processor import TagInlineStyleProcessor
 from src.epub_converter.tag_css_style_converter import convert_html_soup_with_css_style
 from src.epub_converter.html_epub_preprocessor import get_tags_between_chapter_marks,\
    prepare_title, prepare_content
 class EpubConverter:
-    def __init__(self, file_path, access=None, logger=None):
+    def __init__(self, file_path, access=None, logger=None, css_preprocessor=None, html_processor=None):
        self.file_path = file_path
        self.access = access
        self.logger: BookLogger = logger
        self.ebooklib_book = epub.read_epub(file_path)
        self.css_processor = css_preprocessor
        self.html_preprocessor = html_processor
        # main container for all epub .xhtml files
        self.html_href2html_body_soup: Dict[str, BeautifulSoup] = {}
@@ -77,22 +78,12 @@ class EpubConverter:
        self.logger.log("CSS  styles adding.")
        self.add_css_styles_to_html_soup()
        # todo presets
        self.logger.log("Footnotes processing.")
        for href in self.html_href2html_body_soup:
-            content, noterefs, footnotes_tags = preprocess_footnotes(self.html_href2html_body_soup[href],
+            self.footnotes_contents, self.noterefs, self.footnotes =\
-                                                                     self.html_href2html_body_soup)
+                preprocess_footnotes(self.html_href2html_body_soup[href], self.html_href2html_body_soup)
            self.footnotes_contents.extend(content)
            self.noterefs.extend(noterefs)
            self.footnotes.extend(footnotes_tags)
        for i, (noteref, footnote) in enumerate(zip(self.noterefs, self.footnotes)):
            noteref.attrs["data-id"] = i + 1
            noteref.attrs["id"] = f"footnote-{i + 1}"
            footnote.attrs["href"] = f"#footnote-{i + 1}"
        self.logger.log(f"Added {len(self.footnotes_contents)} footnotes.")
        self.logger.log("TOC processing.")
        self.build_adjacency_list_from_toc(self.ebooklib_book.toc)
        # build simple toc from spine if needed
@@ -101,6 +92,7 @@ class EpubConverter:
        not_added = [
            x for x in self.html_href2html_body_soup if x not in self.hrefs_added_to_toc]
        self.logger.log(f"Html documents not added to TOC: {not_added}.")
        self.logger.log(f"Add documents not added to TOC.")
        self.add_not_added_files_to_adjacency_list(not_added)
        self.logger.log(f"Html internal links and structure processing.")
        self.label_chapters_ids_with_lc_id()
@@ -149,7 +141,7 @@ class EpubConverter:
            for tag_initial_inline_style in tags_with_inline_style:
                inline_style = tag_initial_inline_style.attrs["style"]
                tag_initial_inline_style.attrs["style"] = \
-                    build_inline_style_content(inline_style)
+                    self.css_processor.build_inline_style_content(inline_style)
    def build_html_and_css_relations(self) -> tuple[dict, dict]:
        """
@@ -181,16 +173,53 @@ class EpubConverter:
                html_href2css_href[html_href].append(css_href)
                if css_href not in css_href2css_content:
                    # css_href not in css_href2css_content, add to this dict
-                    css_href2css_content[css_href] = build_css_file_content(
+                    css_href2css_content[css_href] = self.css_processor.build_css_file_content(
                        self.get_css_content(css_href, html_href))
            for i, tag in enumerate(soup_html_content.find_all("style")):
                css_content = tag.string
                html_href2css_href[html_href].append(f"href{i}")
-                css_href2css_content[f"href{i}"] = build_css_file_content(
+                css_href2css_content[f"href{i}"] = self.css_processor.build_css_file_content(
                    css_content)
        return html_href2css_href, css_href2css_content
    def convert_html_soup_with_css_style(self, html_soup: BeautifulSoup, css_text: str) -> BeautifulSoup:
        """
        Function adds styles from .css to inline style.
        Parameters
        ----------
        html_soup: BeautifulSoup
            html page with inline style
        css_text: str
            css content from css file
        Returns
        -------
        inline_soup: BeautifulSoup
            soup with styles from css
        """
        # remove this specification because it causes problems
        css_text = css_text.replace(
            '@namespace epub "http://www.idpf.org/2007/ops";', '')
        # here we add css styles to inline style
        html_with_css_styles: str = transform(str(html_soup), css_text=css_text,
                                              remove_classes=False,
                                              external_styles=False,
                                              allow_network=False,
                                              disable_validation=True,
                                              )
        # soup with converted styles from css
        inline_soup = BeautifulSoup(html_with_css_styles, features="lxml")
        tags_with_inline_style = inline_soup.find_all(LiveCartaConfig.could_have_style_in_livecarta_regexp,
                                                      attrs={"style": re.compile(".*")})
        # go through the tags with inline style + style parsed from css file
        for tag_inline_style in tags_with_inline_style:
            style_converter = TagInlineStyleProcessor(tag_inline_style)
            style_converter.convert_initial_tag()
        return inline_soup
    def add_css_styles_to_html_soup(self):
        """
        This function is designed to update html_href2html_body_soup
@@ -203,7 +232,7 @@ class EpubConverter:
                for css_href in self.html_href2css_href[html_href]:
                    css += self.css_href2css_content[css_href]
                html_content: BeautifulSoup = self.html_href2html_body_soup[html_href]
-                html_content = convert_html_soup_with_css_style(html_content, css)
+                html_content = self.convert_html_soup_with_css_style(html_content, css)
                self.html_href2html_body_soup[html_href] = html_content
    def build_adjacency_list_from_toc(self, element: [Link, tuple, list], lvl=0):
@@ -488,6 +517,48 @@ class EpubConverter:
                                    f" Should be anchor with new id={new_id} in {a_tag_href_matched_to_toc} file."
                                    f" Old id={a_tag_id}")
    @staticmethod
    def get_tags_between_chapter_marks(first_id: str, href: str, html_soup: BeautifulSoup) -> list:
        """
        After processing on a first_id that corresponds to current chapter,
        from initial html_soup all tags from current chapter are extracted
        Parameters
        ----------
        first_id: str
            Id that point where a chapter starts. A Tag with class: "converter-chapter-mark"
        href: str
            Name of current chapters file
        html_soup: Tag
            Soup object of current  file
        Returns
        -------
        tags: list [Tag, NavigableString]
            Chapter's tags
        """
        marked_tags = html_soup.find(
            attrs={"id": first_id, "class": "converter-chapter-mark"})
        if marked_tags:
            next_tag = marked_tags.next_sibling
            tags = []
            while next_tag:
                if not isinstance(next_tag, NavigableString) and \
                        (next_tag.attrs.get("class") == "converter-chapter-mark"):
                    break
                tags.append(next_tag)
                next_tag = next_tag.next_sibling
            # remove tags between first_id and next found id
            # save them in list for next steps
            tags = [tag.extract() for tag in tags]
            html_soup.smooth()
        else:
            assert 0, f"Warning: no match for {first_id, href}"
        return tags
    def detect_one_chapter(self, nav_point: NavPoint):
        """
        Function updates self.href_chapter_id2soup_html (mapping from (href,id) to chapter content/html soup object)
@@ -511,11 +582,11 @@ class EpubConverter:
        """
        if nav_point.id:
            soup = self.html_href2html_body_soup[nav_point.href]
-            chapter_tags = get_tags_between_chapter_marks(
+            subchapter_tags = self.get_tags_between_chapter_marks(
                first_id=nav_point.id, href=nav_point.href, html_soup=soup)
            new_tree = BeautifulSoup("", "html.parser")
-            for tag in chapter_tags:
+            for subchapter_tag in subchapter_tags:
-                new_tree.append(tag)
+                new_tree.append(subchapter_tag)
            self.href_chapter_id2soup_html[(
                nav_point.href, nav_point.id)] = new_tree
@@ -527,8 +598,8 @@ class EpubConverter:
        """Function build chapters content, starts from top level chapters"""
        top_level_nav_points = self.adjacency_list[-1]
        if self.id_anchor_exist_in_nav_points:
-            for point in top_level_nav_points:
+            for tl_nav_point in top_level_nav_points:
-                self.detect_one_chapter(point)
+                self.detect_one_chapter(tl_nav_point)
    def html_node_to_livecarta_chapter_item(self, nav_point: NavPoint, lvl=1) -> ChapterItem:
        """
@@ -561,8 +632,8 @@ class EpubConverter:
                                                                    if hasattr(self.file_path, "stem") else "book_id")
        is_chapter = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
-        title_preprocessed = prepare_title(title)
+        title_preprocessed = self.html_preprocessor.prepare_title(title)
-        content_preprocessed = prepare_content(title_preprocessed, content,
+        content_preprocessed = self.html_preprocessor.prepare_content(title_preprocessed, content,
                                                                      remove_title_from_chapter=is_chapter)
        sub_nodes = []
        # warning! not EpubHtmlItems won't be added to chapter
@@ -598,11 +669,17 @@ class EpubConverter:
 if __name__ == "__main__":
-    epub_file_path = "../../epub/9781641050234.epub"
+    epub_file_path = "../../epub/Modern_Java_in_Action.epub"
    logger_object = BookLogger(
        name="epub", book_id=epub_file_path.split("/")[-1])
-    json_converter = EpubConverter(epub_file_path, logger=logger_object)
+    preset = PresetProcessor(preset_path="../../config/presets.json", logger=logger_object)\
        .get_preset_json()
    css_preprocessor = CSSPreprocessor(logger=logger_object)
    html_preprocessor = HtmlEpubPreprocessor(preset=preset, logger=logger_object)
    json_converter = EpubConverter(epub_file_path, logger=logger_object,
                                   css_preprocessor=css_preprocessor, html_processor=html_preprocessor)
    content_dict = json_converter.convert_to_dict()
    with codecs.open(epub_file_path.replace("epub", "json"), "w", encoding="utf-8") as f_json:
--- a/src/epub_converter/epub_solver.py
+++ b/src/epub_converter/epub_solver.py
@@ -1,4 +1,7 @@
 from src.book_solver import BookSolver
 from src.preset_processor import PresetProcessor
 from src.epub_converter.css_preprocessor import CSSPreprocessor
 from src.epub_converter.html_epub_preprocessor import HtmlEpubPreprocessor
 from src.epub_converter.epub_converter import EpubConverter
@@ -14,8 +17,10 @@ class EpubBook(BookSolver):
        Function
        Steps
        ----------
-        1. Converts .epub to .html
+        1. Gets data from preset structure
-        2. Parses from line structure to nested structure
+        2. Add preset to html preprocessor
        3. Converts .epub to .html
        4. Parses from line structure to nested structure
        Returns
        ----------
@@ -23,7 +28,12 @@ class EpubBook(BookSolver):
            json for LiveCarta platform
        """
        preset = PresetProcessor(preset_path="config/presets.json", logger=self.logger_object)\
            .get_preset_json()
        css_preprocessor = CSSPreprocessor(logger=self.logger_object)
        html_preprocessor = HtmlEpubPreprocessor(preset=preset, logger=self.logger_object)
        json_converter = EpubConverter(
-            self.file_path, access=self.access, logger=self.logger_object)
+            self.file_path, access=self.access, logger=self.logger_object,
            css_preprocessor=css_preprocessor, html_processor=html_preprocessor)
        content_dict = json_converter.convert_to_dict()
        return content_dict
--- a/src/epub_converter/html_epub_preprocessor.py
+++ b/src/epub_converter/html_epub_preprocessor.py
@@ -1,11 +1,22 @@
 import re
 from bs4 import BeautifulSoup, NavigableString, Comment, Tag
-from bs4 import BeautifulSoup, NavigableString, Tag, Comment
+from src.util.helpers import BookLogger
 from src.livecarta_config import LiveCartaConfig
-def _add_span_to_save_ids_for_links(tag_to_be_removed, chapter_tag: BeautifulSoup):
+class HtmlEpubPreprocessor:
    def __init__(self, preset, logger=None):
        self.preset = preset
        self.logger: BookLogger = logger
        self.name2function = {
            "table_wrapper": self._wrap_tags_with_table,
            "replacer": self._tags_to_correspond_livecarta_tag,
            "unwrapper": self._unwrap_tags,
            "inserter": self._insert_tags_into_correspond_tags
        }
    @staticmethod
    def _add_span_to_save_ids_for_links(tag_to_be_removed, chapter_tag: BeautifulSoup):
        """
        Function adds span with id from tag_to_be_removed
        because this tag will be removed(unwrapped/extract)
@@ -20,7 +31,9 @@ def _add_span_to_save_ids_for_links(tag_to_be_removed, chapter_tag: BeautifulSou
            updated body tag
        """
-    def _insert_span_with_attrs_before_tag(chapter_tag: BeautifulSoup, tag_to_be_removed: Tag, id_: str, class_: list):
+
        def _insert_span_with_attrs_before_tag(chapter_tag: BeautifulSoup, tag_to_be_removed: Tag, id_: str,
                                               class_: list):
            """Function inserts span before tag aren't supported by LiveCarta"""
            new_tag = chapter_tag.new_tag("span")
            new_tag.attrs["id"] = id_ or ""
@@ -33,50 +46,8 @@ def _add_span_to_save_ids_for_links(tag_to_be_removed, chapter_tag: BeautifulSou
                                               id_=tag_to_be_removed.attrs["id"],
                                               class_=tag_to_be_removed.attrs.get("class"))
-
+    @staticmethod
-def get_tags_between_chapter_marks(first_id: str, href: str, html_soup: BeautifulSoup) -> list:
+    def prepare_title(title_of_chapter: str) -> str:
    """
    After processing on a first_id that corresponds to current chapter,
    from initial html_soup all tags from current chapter are extracted
    Parameters
    ----------
    first_id: str
        Id that point where a chapter starts. A Tag with class: "converter-chapter-mark"
    href: str
        Name of current chapters file
    html_soup: Tag
        Soup object of current  file
    Returns
    -------
    tags: list [Tag, NavigableString]
        Chapter's tags
    """
    marked_tags = html_soup.find(
        attrs={"id": first_id, "class": "converter-chapter-mark"})
    if marked_tags:
        next_tag = marked_tags.next_sibling
        tags = []
        while next_tag:
            if not isinstance(next_tag, NavigableString) and \
                    (next_tag.attrs.get("class") == "converter-chapter-mark"):
                break
            tags.append(next_tag)
            next_tag = next_tag.next_sibling
        # remove tags between first_id and next found id
        # save them in list for next steps
        tags = [tag.extract() for tag in tags]
        html_soup.smooth()
    else:
        assert 0, f"Warning: no match for {first_id, href}"
    return tags
 def prepare_title(title_of_chapter: str) -> str:
        """
        Function finalise processing/cleaning title
        Parameters
@@ -94,8 +65,8 @@ def prepare_title(title_of_chapter: str) -> str:
        title = re.sub(r"[\s\xa0]", " ", title).strip()
        return title
-
+    @staticmethod
-def _remove_comments(chapter_tag):
+    def _remove_comments(chapter_tag):
        """
        Function remove comments
        Parameters
@@ -113,8 +84,8 @@ def _remove_comments(chapter_tag):
            for element in tag(text=lambda text: isinstance(text, Comment)):
                element.extract()
-
+    @staticmethod
-def _wrap_strings_with_p(chapter_tag):
+    def _wrap_strings_with_p(chapter_tag):
        """
        Function converts headings that aren't supported by LiveCarta with <p>
        Parameters
@@ -137,8 +108,7 @@ def _wrap_strings_with_p(chapter_tag):
                    p_tag.append(str(node))
                    node.replace_with(p_tag)
-
+    def _wrap_tags_with_table(self, chapter_tag, rules: list):
 def _wrap_tags_with_table(chapter_tag):
        """
        Function wraps <tag> with <table>
        Parameters
@@ -152,6 +122,7 @@ def _wrap_tags_with_table(chapter_tag):
            Chapter Tag with wrapped certain tags with <table>
        """
        def _wrap_tag_with_table(chapter_tag, tag_to_be_wrapped, width="100", border="", bg_color=None):
            table = chapter_tag.new_tag("table")
            table.attrs["border"], table.attrs["align"], table.attrs["style"] \
@@ -173,21 +144,18 @@ def _wrap_tags_with_table(chapter_tag):
                width=tag_to_wrap.attrs["width"] if tag_to_wrap.attrs.get("width") else "100",
                border=tag_to_wrap.attrs["border"] if tag_to_wrap.attrs.get("border") else None,
                bg_color=tag_to_wrap.attrs["bgcolor"] if tag_to_wrap.attrs.get("bgcolor") else None)
-        _add_span_to_save_ids_for_links(tag_to_wrap, chapter_tag)
+            self._add_span_to_save_ids_for_links(tag_to_wrap, chapter_tag)
            tag_to_wrap.unwrap()
-    for tags_to_wrap, attrs in LiveCartaConfig.WRAP_TAGS_WITH_TABLE.items():
+        for rule in rules:
-        if isinstance(attrs, tuple):
+            tags = rule["tags"]
-            attr, val = attrs[0], attrs[1]
+            for attr in rule["attrs"]:
-            for tag_to_wrap in chapter_tag.find_all(tags_to_wrap, {attr: re.compile(fr"{val}")}):
+                for tag_to_wrap in chapter_tag.find_all([re.compile(tag) for tag in tags],
-                process_tag_using_table(tag_to_wrap)
+                                                        {attr["name"]: re.compile(fr"{attr['value']}")}):
        else:
            for tag_to_wrap in chapter_tag.find_all(tags_to_wrap):
                if any(attr_name in attrs for attr_name in tag_to_wrap.attrs):
                    process_tag_using_table(tag_to_wrap)
-
+    @staticmethod
-def _tags_to_correspond_livecarta_tag(chapter_tag):
+    def _tags_to_correspond_livecarta_tag(chapter_tag, rules: list):
        """
        Function to replace all tags to correspond LiveCarta tags
        Parameters
@@ -201,28 +169,30 @@ def _tags_to_correspond_livecarta_tag(chapter_tag):
            Chapter Tag with all tags replaced with LiveCarta tags
        """
-    for reg_keys, to_replace_value in LiveCartaConfig.REPLACE_TAG_WITH_LIVECARTA_CORRESPOND_TAGS.items():
+        for rule in rules:
-        for key in reg_keys:
+            tags = rule["tags"]
-            if isinstance(key, tuple):
+            tag_to_replace = rule["tag_to_replace"]
-                replace = key[0]
+            if rule["condition"]:
-                parent, child = key[1], key[2]
+                for condition_on_tag in ((k, v) for k, v in rule["condition"].items() if v):
-                for parent_tag in chapter_tag.select(parent):
+                    if condition_on_tag[0] == 'parent_tags':
-                    if replace == "parent":
+                        for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
-                        parent_tag.name = to_replace_value
+                            if tag.parent.select(condition_on_tag[1]):
-                    elif replace == "child":
+                                tag.name = tag_to_replace
-                        for child_tag in parent_tag.select(child):
+                    elif condition_on_tag[0] == 'child_tags':
-                            child_tag.name = to_replace_value
+                        for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
-                            if not child_tag.attrs.get("style"):
+                            if not tag.select(re.sub('[():]|not', '', condition_on_tag[1])):
-                                child_tag.attrs["style"] =\
+                                tag.name = tag_to_replace
-                                    "font-size: 14px; font-family: courier new,courier,monospace;"
+                    elif condition_on_tag[0] == "attrs":
                        for attr in rule["condition"]["attrs"]:
                            for tag in chapter_tag.find_all([re.compile(tag) for tag in tags],
                                                            {attr["name"]: re.compile(fr"{attr['value']}")}):
                                tag.name = tag_to_replace
            else:
-                tags = chapter_tag.find_all(re.compile(key))
+                for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
                for tag in tags:
                    # todo can cause appearance of \n <p><p>...</p></p> -> <p>\n</p> <p>...</p> <p>\n</p> (section)
-                    tag.name = to_replace_value
+                    tag.name = tag_to_replace
-
+    def _unwrap_tags(self, chapter_tag, rules: dict):
 def _unwrap_tags(chapter_tag):
        """
        Function unwrap tags and moves id to span
        Parameters
@@ -236,16 +206,61 @@ def _unwrap_tags(chapter_tag):
            Chapter Tag with unwrapped certain tags
        """
-    for tag_name in LiveCartaConfig.TAGS_TO_UNWRAP:
+        for tag_name in rules["tags"]:
            for tag in chapter_tag.select(tag_name):
                # if tag is a subtag
                if ">" in tag_name:
                    tag.parent.attrs.update(tag.attrs)
-            _add_span_to_save_ids_for_links(tag, chapter_tag)
+                self._add_span_to_save_ids_for_links(tag, chapter_tag)
                tag.unwrap()
    @staticmethod
    def _insert_tags_into_correspond_tags(chapter_tag, rules: list):
        """
        Function inserts tags into correspond tags
        Parameters
        ----------
        chapter_tag: BeautifulSoup
            Tag & contents of the chapter tag
-def _remove_headings_content(content_tag, title_of_chapter: str):
+        Returns
        -------
        None
            Chapter Tag with inserted tags
        """
        def insert(tag, tag_to_insert):
            # insert all items that was in tag to subtag and remove from tag
            for content in reversed(tag.contents):
                tag_to_insert.insert(0, content.extract())
            # wrap subtag with items
            tag.append(tag_to_insert)
        for rule in rules:
            tags = rule["tags"]
            tag_to_insert = \
                chapter_tag.new_tag(rule["tag_to_insert"])
            if rule["condition"]:
                for condition_on_tag in ((k, v) for k, v in rule["condition"].items() if v):
                    if condition_on_tag[0] == 'parent_tags':
                        for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
                            if tag.parent.select(condition_on_tag[1]):
                                insert(tag, tag_to_insert)
                    elif condition_on_tag[0] == 'child_tags':
                        for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
                            if not tag.select(re.sub('[():]|not', '', condition_on_tag[1])):
                                insert(tag, tag_to_insert)
                    elif condition_on_tag[0] == "attrs":
                        for attr in rule["condition"]["attrs"]:
                            for tag in chapter_tag.find_all([re.compile(tag) for tag in tags],
                                                            {attr["name"]: re.compile(fr"{attr['value']}")}):
                                insert(tag, tag_to_insert)
            else:
                for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
                    insert(tag, tag_to_insert)
    def _remove_headings_content(self, content_tag, title_of_chapter: str):
        """
        Function
        - cleans/removes headings from chapter in order to avoid duplication of chapter titles in the content
@@ -272,15 +287,15 @@ def _remove_headings_content(content_tag, title_of_chapter: str):
                if title_of_chapter == text or \
                        (title_of_chapter in text and
                         re.findall(r"^h[1-3]$", tag.name or content_tag.name)):
-                _add_span_to_save_ids_for_links(tag, content_tag)
+                    self._add_span_to_save_ids_for_links(tag, content_tag)
                    tag.extract()
                    return
                elif not isinstance(tag, NavigableString):
-                if not _remove_headings_content(tag, title_of_chapter):
+                    if not self._remove_headings_content(tag, title_of_chapter):
                        break
-
+    @staticmethod
-def _process_table(chapter_tag: BeautifulSoup):
+    def _process_tables(chapter_tag: BeautifulSoup):
        """
        Function preprocesses tables and tags(td|th|tr)
        Parameters
@@ -316,37 +331,8 @@ def _process_table(chapter_tag: BeautifulSoup):
            if not table.attrs.get("border") or table.attrs.get("border") in ["0", "0px"]:
                table.attrs["border"] = "1"
-
+    @staticmethod
-def _insert_tags_in_parents(chapter_tag):
+    def _class_removing(chapter_tag):
    """
    Function inserts tags into correspond tags
    Parameters
    ----------
    chapter_tag: BeautifulSoup
        Tag & contents of the chapter tag
    Returns
    -------
    None
        Chapter Tag with inserted tags
    """
    parent_tag2condition = {parent[0]: parent[1] for parent in LiveCartaConfig.INSERT_TAG_IN_PARENT_TAG.keys()}
    for parent_tag_name, condition in parent_tag2condition.items():
        for parent_tag in chapter_tag.select(parent_tag_name):
            if parent_tag.select(condition):
                continue
            else:
                tag_to_insert = chapter_tag.new_tag(
                    LiveCartaConfig.INSERT_TAG_IN_PARENT_TAG[(parent_tag_name, condition)])
                # insert all items that was in pre to code and remove from pre
                for content in reversed(parent_tag.contents):
                    tag_to_insert.insert(0, content.extract())
                # wrap code with items
                parent_tag.append(tag_to_insert)
 def _class_removing(chapter_tag):
        """
        Function removes classes that aren't created by converter
        Parameters
@@ -365,8 +351,7 @@ def _class_removing(chapter_tag):
                    and (tag.attrs.get("class") not in ["link-anchor", "footnote-element"]):
                del tag.attrs["class"]
-
+    def prepare_content(self, title_str: str, content_tag: BeautifulSoup, remove_title_from_chapter: bool) -> str:
 def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_from_chapter: bool) -> str:
        """
        Function finalise processing/cleaning content
        Parameters
@@ -381,12 +366,12 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
        ----------
        1. comments removal
        2. wrap NavigableString with tag <p>
-    3. wrap tags with <table>
+        3-6. wrap tags with <table>
-    4. replace tags with correspond LiveCarta tags
+            replace tags with correspond LiveCarta tags
-    5. unwrap tags
+            unwrap tags
-    6. heading removal
+            insert tags into correspond tags
-    7. process_table
+        7. heading removal
-    8. insert tags into correspond tags
+        8. process_tables
        9. class removal
        Returns
@@ -396,24 +381,18 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
        """
        # 1. remove comments
-    _remove_comments(content_tag)
+        self._remove_comments(content_tag)
        # 2.
-    _wrap_strings_with_p(content_tag)
+        self._wrap_strings_with_p(content_tag)
-    # 3.
+        # 3-6.
-    _wrap_tags_with_table(content_tag)
+        for dict in self.preset:
-    # 4.
+            func = self.name2function[dict["preset_name"]]
-    _tags_to_correspond_livecarta_tag(content_tag)
+            func(content_tag, dict['rules'])
    # 5.
    _unwrap_tags(content_tag)
    # 6.
    if remove_title_from_chapter:
        _remove_headings_content(content_tag, title_str)
        # 7.
-    _process_table(content_tag)
+        if remove_title_from_chapter:
            self._remove_headings_content(content_tag, title_str)
        # 8.
-    _insert_tags_in_parents(content_tag)
+        self._process_tables(content_tag)
        # 9. remove classes that weren't created by converter
-    _class_removing(content_tag)
+        self._class_removing(content_tag)
        return str(content_tag)
--- a/src/preset_processor.py
+++ b/src/preset_processor.py
@@ -0,0 +1,15 @@
 import json
 from src.util.helpers import BookLogger
 class PresetProcessor:
    def __init__(self, preset_path="config/presets.json", logger=None):
        self.preset_path = preset_path
        self.logger: BookLogger = logger
    def get_preset_json(self):
        f = open(self.preset_path)
        data = json.load(f)
        return data