put style processors on general level

2022-09-01 18:12:04 +03:00
parent 39d5e27df2
commit 115a53e366
5 changed files with 35 additions and 26 deletions
--- a/src/epub_converter/epub_converter.py
+++ b/src/epub_converter/epub_converter.py
@@ -15,15 +15,15 @@ from bs4 import BeautifulSoup, Tag, NavigableString
 from src.util.helpers import BookLogger
 from src.livecarta_config import LiveCartaConfig
 from src.data_objects import ChapterItem, NavPoint
-from src.epub_converter.css_processor import CSSPreprocessor
+from src.style_preprocessor import CSSPreprocessor
 from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor
 from src.epub_converter.image_processing import update_images_src_links
 from src.epub_converter.footnotes_processing import preprocess_footnotes
-from src.epub_converter.tag_inline_style_processor import TagInlineStyleProcessor
+from src.tag_inline_style_processor import TagInlineStyleProcessor


 class EpubConverter:
-    def __init__(self, book_path, access=None, logger=None, css_processor=None, html_processor=None):
+    def __init__(self, book_path, access=None, logger: BookLogger = None, css_processor: CSSPreprocessor = None, html_processor: HtmlEpubPreprocessor = None):
        self.book_path = book_path
        self.access = access
        self.logger: BookLogger = logger
@@ -257,7 +257,7 @@ class EpubConverter:

            sub_nodes = []
            for elem in second:
-                if (bool(re.search('^section$|^part$', first.title.lower()))) and lvl == 1:
+                if (bool(re.search("^section$|^part$", first.title.lower()))) and lvl == 1:
                    self.offset_sub_nodes.append(
                        self.build_adjacency_list_from_toc(elem, lvl))
                else:
@@ -291,7 +291,7 @@ class EpubConverter:
        return False

    def build_adjacency_list_from_spine(self):
-        def build_manifest_id2html_href() -> dict:
+        def build_manifest_id2html_href() -> Dict[int, str]:
            links = dict()
            for item in self.ebooklib_book.get_items_of_type(ebooklib.ITEM_DOCUMENT):
                links[item.id] = item.file_name
@@ -607,7 +607,7 @@ class EpubConverter:
        self.logger.log(indent + "Process title.")
        title_preprocessed: str = self.html_processor.prepare_title(title)
        self.logger.log(indent + "Process content.")
-        content_preprocessed: BeautifulSoup = self.html_processor.prepare_content(
+        content_preprocessed: Union[Tag, BeautifulSoup] = self.html_processor.prepare_content(
            title_preprocessed, content, remove_title_from_chapter=is_chapter)

        self.book_image_src_path2aws_path = update_images_src_links(content_preprocessed,
--- a/src/epub_converter/epub_solver.py
+++ b/src/epub_converter/epub_solver.py
@@ -1,5 +1,5 @@
 from src.book_solver import BookSolver
-from src.epub_converter.css_processor import CSSPreprocessor
+from src.style_preprocessor import CSSPreprocessor
 from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor
 from src.epub_converter.epub_converter import EpubConverter

--- a/src/epub_converter/html_epub_processor.py
+++ b/src/epub_converter/html_epub_processor.py
@@ -192,14 +192,18 @@ class HtmlEpubPreprocessor:
            tag_to_replace: str = rule["tag_to_replace"]
            if rule["condition"]:
                for condition_on_tag in ((k, v) for k, v in rule["condition"].items() if v):
-                    if condition_on_tag[0] == 'parent_tags':
+                    if condition_on_tag[0] == "parent_tags":
                        for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
                            if tag.parent.select(condition_on_tag[1]):
                                tag.name = tag_to_replace
-                    elif condition_on_tag[0] == 'child_tags':
+                    elif condition_on_tag[0] == "child_tags":
                        for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
-                            if not tag.select(re.sub('[():]|not', '', condition_on_tag[1])):
-                                tag.name = tag_to_replace
+                            if "not" in condition_on_tag[1]:
+                                if not tag.select(re.sub("[():]|not", "", condition_on_tag[1])):
+                                    tag.name = tag_to_replace
+                            else:
+                                if tag.select(condition_on_tag[1]):
+                                    tag.name = tag_to_replace
                    elif condition_on_tag[0] == "attrs":
                        for attr in rule["condition"]["attrs"]:
                            for tag in chapter_tag.find_all([re.compile(tag) for tag in tags],
@@ -236,15 +240,15 @@ class HtmlEpubPreprocessor:
                tag[attr_to_replace] = tag[attr]
                del tag[attr]

-    def _unwrap_tags(self, chapter_tag: BeautifulSoup, rules: Dict[str, List[str]]):
+    def _unwrap_tags(self, chapter_tag: BeautifulSoup, rules: List[Dict[str, List[str]]]):
        """
        Function unwrap tags and moves id to span
        Parameters
        ----------
        chapter_tag: BeautifulSoup
            Tag & contents of the chapter tag
-        rules: Dict[str, List[str]]
-            dict of tags to unwrap
+        rules: List[Dict[str, List[str]]]
+            list of conditions when fire function

        Returns
        -------
@@ -252,13 +256,14 @@ class HtmlEpubPreprocessor:
            Chapter Tag with unwrapped certain tags

        """
-        for tag_name in rules["tags"]:
-            for tag in chapter_tag.select(tag_name):
-                # if tag is a subtag
-                if ">" in tag_name:
-                    tag.parent.attrs.update(tag.attrs)
-                self._add_span_to_save_ids_for_links(tag, chapter_tag)
-                tag.unwrap()
+        for rule in rules:
+            for tag_name in rule["tags"]:
+                for tag in chapter_tag.select(tag_name):
+                    # if tag is a subtag
+                    if ">" in tag_name:
+                        tag.parent.attrs.update(tag.attrs)
+                    self._add_span_to_save_ids_for_links(tag, chapter_tag)
+                    tag.unwrap()

    @staticmethod
    def _insert_tags_into_correspond_tags(chapter_tag: BeautifulSoup,
@@ -293,14 +298,18 @@ class HtmlEpubPreprocessor:
            tags: List[str] = rule["tags"]
            if rule["condition"]:
                for condition_on_tag in ((k, v) for k, v in rule["condition"].items() if v):
-                    if condition_on_tag[0] == 'parent_tags':
+                    if condition_on_tag[0] == "parent_tags":
                        for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
                            if tag.parent.select(condition_on_tag[1]):
                                insert(tag)
-                    elif condition_on_tag[0] == 'child_tags':
+                    elif condition_on_tag[0] == "child_tags":
                        for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
-                            if not tag.select(re.sub('[():]|not', '', condition_on_tag[1])):
-                                insert(tag)
+                            if "not" in condition_on_tag[1]:
+                                if not tag.select(re.sub("[():]|not", "", condition_on_tag[1])):
+                                    tag.unwrap()
+                            else:
+                                if tag.select(condition_on_tag[1]):
+                                    tag.unwrap()
                    elif condition_on_tag[0] == "attrs":
                        for attr in rule["condition"]["attrs"]:
                            for tag in chapter_tag.find_all([re.compile(tag) for tag in tags],
@@ -441,7 +450,7 @@ class HtmlEpubPreprocessor:
        # 3-6.
        for rule in self.preset:
            func = self.name2function[rule["preset_name"]]
-            func(content_tag, rule['rules'])
+            func(content_tag, rule["rules"])
        # 7.
        if remove_title_from_chapter:
            self._remove_headings_content(content_tag, title_str)
--- a/src/epub_converter/css_processor.py
+++ b/src/epub_converter/css_processor.py
--- a/src/epub_converter/tag_inline_style_processor.py
+++ b/src/epub_converter/tag_inline_style_processor.py