Clear font-family value of regex characters

2022-07-15 14:18:53 +03:00
parent 16a8690738
commit 20fa1bfa86
3 changed files with 34 additions and 29 deletions
--- a/src/epub_converter/html_epub_processor.py
+++ b/src/epub_converter/html_epub_processor.py
@@ -66,7 +66,7 @@ class HtmlEpubPreprocessor:
        return title

    @staticmethod
-    def _remove_comments(chapter_tag):
+    def _remove_comments(chapter_tag: BeautifulSoup):
        """
        Function remove comments
        Parameters
@@ -85,7 +85,7 @@ class HtmlEpubPreprocessor:
                element.extract()

    @staticmethod
-    def _wrap_strings_with_p(chapter_tag):
+    def _wrap_strings_with_p(chapter_tag: BeautifulSoup):
        """
        Function converts headings that aren't supported by LiveCarta with <p>
        Parameters
@@ -108,7 +108,7 @@ class HtmlEpubPreprocessor:
                    p_tag.append(str(node))
                    node.replace_with(p_tag)

-    def _wrap_tags_with_table(self, chapter_tag, rules: list):
+    def _wrap_tags_with_table(self, chapter_tag: BeautifulSoup, rules: list):
        """
        Function wraps <tag> with <table>
        Parameters
@@ -153,7 +153,7 @@ class HtmlEpubPreprocessor:
                    process_tag_using_table()

    @staticmethod
-    def _tags_to_correspond_livecarta_tag(chapter_tag, rules: list):
+    def _tags_to_correspond_livecarta_tag(chapter_tag: BeautifulSoup, rules: list):
        """
        Function to replace all tags to correspond LiveCarta tags
        Parameters
@@ -190,7 +190,7 @@ class HtmlEpubPreprocessor:
                    # todo can cause appearance of \n <p><p>...</p></p> -> <p>\n</p> <p>...</p> <p>\n</p> (section)
                    tag.name = tag_to_replace

-    def _unwrap_tags(self, chapter_tag, rules: dict):
+    def _unwrap_tags(self, chapter_tag: BeautifulSoup, rules: dict):
        """
        Function unwrap tags and moves id to span
        Parameters
@@ -213,7 +213,7 @@ class HtmlEpubPreprocessor:
                tag.unwrap()

    @staticmethod
-    def _insert_tags_into_correspond_tags(chapter_tag, rules: list):
+    def _insert_tags_into_correspond_tags(chapter_tag: BeautifulSoup, rules: list):
        """
        Function inserts tags into correspond tags
        Parameters
@@ -257,14 +257,14 @@ class HtmlEpubPreprocessor:
                for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]):
                    insert(tag)

-    def _remove_headings_content(self, content_tag, title_of_chapter: str):
+    def _remove_headings_content(self, chapter_tag, title_of_chapter: str):
        """
        Function
        - cleans/removes headings from chapter in order to avoid duplication of chapter titles in the content
        - adds span with id in order to
        Parameters
        ----------
-        content_tag: soup object
+        chapter_tag: soup object
            Tag of the page
        title_of_chapter: str
            Chapter title
@@ -276,15 +276,15 @@ class HtmlEpubPreprocessor:

        """
        title_of_chapter = title_of_chapter.lower()
-        for tag in content_tag.contents:
+        for tag in chapter_tag.contents:
            text = tag if isinstance(tag, NavigableString) else tag.text
            if re.sub(r"[\s\xa0]", "", text):
                text = re.sub(r"[\s\xa0]", " ", text).lower()
                text = text.strip()  # delete extra spaces
                if title_of_chapter == text or \
                        (title_of_chapter in text and
-                         re.findall(r"^h[1-3]$", tag.name or content_tag.name)):
-                    self._add_span_to_save_ids_for_links(tag, content_tag)
+                         re.findall(r"^h[1-3]$", tag.name or chapter_tag.name)):
+                    self._add_span_to_save_ids_for_links(tag, chapter_tag)
                    tag.extract()
                    return
                elif not isinstance(tag, NavigableString):
@@ -329,7 +329,7 @@ class HtmlEpubPreprocessor:
                table.attrs["border"] = "1"

    @staticmethod
-    def _class_removing(chapter_tag):
+    def _class_removing(chapter_tag: BeautifulSoup):
        """
        Function removes classes that aren't created by converter
        Parameters