diff --git a/src/epub_converter/css_processor.py b/src/epub_converter/css_processor.py index fbb2f99..2be0dab 100644 --- a/src/epub_converter/css_processor.py +++ b/src/epub_converter/css_processor.py @@ -37,6 +37,7 @@ class CSSPreprocessor: "margin-left": self.convert_indents_tag_values, "margin-top": self.convert_tag_style_values, "margin": self.convert_indents_tag_values, + "width": self.convert_tag_style_values, } @staticmethod @@ -104,16 +105,23 @@ class CSSPreprocessor: return size_value @staticmethod - def style_conditions(style_value, style_name): + def clean_value(style_value: str, style_name: str): cleaned_value = style_value.replace("\"", "") - # cleaned_value = style_value.replace("+", "%2B") + if style_name == 'font-family': + for symbol in ["+", "*", ".", "%", "?", "$", "^", "[", "]"]: + cleaned_value = re.sub( + re.escape(f"{symbol}"), rf"\\{symbol}", cleaned_value) + return cleaned_value + + @staticmethod + def style_conditions(style_value: str, style_name: str) -> tuple[bool, bool]: constraints_on_value = LiveCartaConfig.LIVECARTA_STYLE_ATTRS.get( style_name) - value_not_in_possible_values_list = cleaned_value not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS[ + value_not_in_possible_values_list = style_value not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS[ style_name] - return cleaned_value, constraints_on_value, value_not_in_possible_values_list + return constraints_on_value, value_not_in_possible_values_list - def update_inline_styles_to_livecarta_convention(self, split_style: list): + def update_inline_styles_to_livecarta_convention(self, split_style: list) -> list: for i, style in enumerate(split_style): style_name, style_value = style.split(":") if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS: @@ -121,9 +129,8 @@ class CSSPreprocessor: split_style[i] = "" return split_style - cleaned_value, constraints_on_value, value_not_in_possible_values_list =\ - self.style_conditions(style_value, style_name) - if constraints_on_value and value_not_in_possible_values_list: + cleaned_value = self.clean_value(style_value, style_name) + if all(self.style_conditions(cleaned_value, style_name)): # there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file split_style[i] = "" else: @@ -138,18 +145,18 @@ class CSSPreprocessor: """Build inline style with LiveCarta convention""" # replace all spaces between "; & letter" to ";" style = re.sub(r"; *", ";", style) - # when we split style by ";", last element of the list is "" - None - # remove it + # when we split style by ";", last element of the list is "" - None (we remove it) split_style: list = list(filter(None, style.split(";"))) # replace all spaces between ": & letter" to ":" split_style = [el.replace( re.search(r"(:\s*)", el).group(1), ":") for el in split_style] - split_style = self.update_inline_styles_to_livecarta_convention(split_style) + split_style = self.update_inline_styles_to_livecarta_convention( + split_style) style = "; ".join(split_style) return style - def process_inline_styles_in_html_soup(self, html_href2html_body_soup): + def process_inline_styles_in_html_soup(self, html_href2html_body_soup: dict): """This function is designed to convert inline html styles""" for html_href in html_href2html_body_soup: html_content: BeautifulSoup = html_href2html_body_soup[html_href] @@ -185,16 +192,14 @@ class CSSPreprocessor: css_rule.style[style_type.name] = "" return - cleaned_value, constraints_on_value, value_not_in_possible_values_list =\ - self.style_conditions(style_type.value, style_type.name) - if constraints_on_value and value_not_in_possible_values_list: + cleaned_value = self.clean_value(style_type.value, style_type.name) + if all(self.style_conditions(cleaned_value, style_type.name)): # there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file css_rule.style[style_type.name] = "" else: if style_type.name in self.LIVECARTA_STYLE_ATTRS_MAPPING: # function that converts our data func = self.LIVECARTA_STYLE_ATTRS_MAPPING[style_type.name] - print(cleaned_value) css_rule.style[style_type.name] = func(cleaned_value) def build_css_file_content(self, css_content: str) -> str: diff --git a/src/epub_converter/epub_converter.py b/src/epub_converter/epub_converter.py index f779f98..7e8ab8a 100644 --- a/src/epub_converter/epub_converter.py +++ b/src/epub_converter/epub_converter.py @@ -305,7 +305,7 @@ class EpubConverter: self.adjacency_list[-1].append(nav_point) self.hrefs_added_to_toc.add(nav_point.href) - def add_not_added_files_to_adjacency_list(self, not_added): + def add_not_added_files_to_adjacency_list(self, not_added: list): """Function add files that not added to adjacency list""" for i, file in enumerate(not_added): nav_point = NavPoint( diff --git a/src/epub_converter/html_epub_processor.py b/src/epub_converter/html_epub_processor.py index 7a6e476..aba8811 100644 --- a/src/epub_converter/html_epub_processor.py +++ b/src/epub_converter/html_epub_processor.py @@ -66,7 +66,7 @@ class HtmlEpubPreprocessor: return title @staticmethod - def _remove_comments(chapter_tag): + def _remove_comments(chapter_tag: BeautifulSoup): """ Function remove comments Parameters @@ -85,7 +85,7 @@ class HtmlEpubPreprocessor: element.extract() @staticmethod - def _wrap_strings_with_p(chapter_tag): + def _wrap_strings_with_p(chapter_tag: BeautifulSoup): """ Function converts headings that aren't supported by LiveCarta with

Parameters @@ -108,7 +108,7 @@ class HtmlEpubPreprocessor: p_tag.append(str(node)) node.replace_with(p_tag) - def _wrap_tags_with_table(self, chapter_tag, rules: list): + def _wrap_tags_with_table(self, chapter_tag: BeautifulSoup, rules: list): """ Function wraps with Parameters @@ -153,7 +153,7 @@ class HtmlEpubPreprocessor: process_tag_using_table() @staticmethod - def _tags_to_correspond_livecarta_tag(chapter_tag, rules: list): + def _tags_to_correspond_livecarta_tag(chapter_tag: BeautifulSoup, rules: list): """ Function to replace all tags to correspond LiveCarta tags Parameters @@ -190,7 +190,7 @@ class HtmlEpubPreprocessor: # todo can cause appearance of \n

...

->

\n

...

\n

(section) tag.name = tag_to_replace - def _unwrap_tags(self, chapter_tag, rules: dict): + def _unwrap_tags(self, chapter_tag: BeautifulSoup, rules: dict): """ Function unwrap tags and moves id to span Parameters @@ -213,7 +213,7 @@ class HtmlEpubPreprocessor: tag.unwrap() @staticmethod - def _insert_tags_into_correspond_tags(chapter_tag, rules: list): + def _insert_tags_into_correspond_tags(chapter_tag: BeautifulSoup, rules: list): """ Function inserts tags into correspond tags Parameters @@ -257,14 +257,14 @@ class HtmlEpubPreprocessor: for tag in chapter_tag.find_all([re.compile(tag) for tag in tags]): insert(tag) - def _remove_headings_content(self, content_tag, title_of_chapter: str): + def _remove_headings_content(self, chapter_tag, title_of_chapter: str): """ Function - cleans/removes headings from chapter in order to avoid duplication of chapter titles in the content - adds span with id in order to Parameters ---------- - content_tag: soup object + chapter_tag: soup object Tag of the page title_of_chapter: str Chapter title @@ -276,15 +276,15 @@ class HtmlEpubPreprocessor: """ title_of_chapter = title_of_chapter.lower() - for tag in content_tag.contents: + for tag in chapter_tag.contents: text = tag if isinstance(tag, NavigableString) else tag.text if re.sub(r"[\s\xa0]", "", text): text = re.sub(r"[\s\xa0]", " ", text).lower() text = text.strip() # delete extra spaces if title_of_chapter == text or \ (title_of_chapter in text and - re.findall(r"^h[1-3]$", tag.name or content_tag.name)): - self._add_span_to_save_ids_for_links(tag, content_tag) + re.findall(r"^h[1-3]$", tag.name or chapter_tag.name)): + self._add_span_to_save_ids_for_links(tag, chapter_tag) tag.extract() return elif not isinstance(tag, NavigableString): @@ -329,7 +329,7 @@ class HtmlEpubPreprocessor: table.attrs["border"] = "1" @staticmethod - def _class_removing(chapter_tag): + def _class_removing(chapter_tag: BeautifulSoup): """ Function removes classes that aren't created by converter Parameters