import re import cssutils from typing import List, Tuple from os.path import dirname, normpath, join from src.util.color_reader import str2hex from src.livecarta_config import LiveCartaConfig class StyleReader: def __init__(self): """ Dictionary LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function } Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated to suit LiveCarta style convention. """ self.LIVECARTA_STYLE_ATTRS_MAPPING = { "font": lambda x: "", "font-family": lambda x: x, "font-size": self.convert_tag_style_values, "font-variant": lambda x: x, "text-transform": lambda x: x, "text-align": lambda x: x, "text-indent": lambda x: self.convert_tag_style_values(x, is_indent=True), "margin": self.convert_tag_style_values, "margin-top": self.convert_tag_style_values, "margin-right": self.convert_tag_style_values, "margin-left": lambda x: self.convert_tag_style_values(x, is_indent=True), "margin-bottom": self.convert_tag_style_values, "padding": self.convert_tag_style_values, "padding-top": self.convert_tag_style_values, "padding-right": self.convert_tag_style_values, "padding-left": self.convert_tag_style_values, "padding-bottom": self.convert_tag_style_values, "color": self.get_text_color, "background-color": self.get_bg_color, "background": self.get_bg_color, "width": lambda x: self.convert_tag_style_values(x) if "%" not in x else x, "border": self.convert_tag_style_values, "border-top-width": self.convert_tag_style_values, "border-right-width": self.convert_tag_style_values, "border-left-width": self.convert_tag_style_values, "border-bottom-width": self.convert_tag_style_values, "border-top": self.convert_tag_style_values, "border-right": self.convert_tag_style_values, "border-left": self.convert_tag_style_values, "border-bottom": self.convert_tag_style_values, "list-style-type": lambda x: x if x in LiveCartaConfig.list_types else "disc", "list-style-image": lambda x: "disc" } @staticmethod def get_text_color(x: str) -> str: color = str2hex(x) color = color if color not in ["#000000", "#000", "black"] else "" return color @staticmethod def get_bg_color(x: str) -> str: color = str2hex(x) color = color if color not in ["#ffffff", "#fff", "white"] else "" return color @staticmethod def convert_tag_style_values(size_value: str, is_indent: bool = False) -> str: """ Function - converts values of tags from em/%/pt/in to px - find closest font-size px Parameters ---------- size_value: str is_indent: bool Returns ------- size_value: str converted value size """ def convert_size_number(size_number: str, unit_to_replace: str, multiplier: float) -> str: size_number = float(size_number.replace(unit_to_replace, "")) * multiplier return str(size_number) + "px" has_size = re.search(r"(\d+(?:\.\d+)?)([\w%]+)", size_value) values: List = size_value.split(" ") if has_size: size_number_idx = [i for i, value in enumerate(values) if re.search("(\d+)([\w%]+)", value)][0] if has_size.group(2) == "%": multiplier = 5.76 if is_indent else 0.16 values[size_number_idx] = convert_size_number(values[size_number_idx], "%", multiplier) elif has_size.group(2) == "em": multiplier = 18 if is_indent else 16 values[size_number_idx] = convert_size_number(values[size_number_idx], "em", multiplier) elif has_size.group(2) == "pt": values[size_number_idx] = convert_size_number(values[size_number_idx], "pt", 4 / 3) elif has_size.group(2) == "in": values[size_number_idx] = convert_size_number(values[size_number_idx], "in", 96) elif has_size.group(2) == "rem": values[size_number_idx] = convert_size_number(values[size_number_idx], "rem", 80 / 7) size_value = " ".join(values) return size_value @staticmethod def clean_value(style_value: str, style_name: str): cleaned_value = style_value.replace("\"", "") if style_name == 'font-family': for symbol in ["+", "*", ".", "%", "?", "$", "^", "[", "]"]: cleaned_value = re.sub( re.escape(f"{symbol}"), rf"\\{symbol}", cleaned_value) return cleaned_value @staticmethod def style_conditions(style_value: str, style_name: str) -> Tuple[bool, bool]: constraints_on_value = LiveCartaConfig.LIVECARTA_STYLE_ATTRS.get( style_name) value_not_in_possible_values_list = style_value not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS[ style_name] return constraints_on_value, value_not_in_possible_values_list def update_inline_styles_to_livecarta_convention(self, split_style: list) -> list: try: for i, style in reversed(list(enumerate(split_style))): style_name, style_value = style.split(":") if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS: # property not in LIVECARTA_STYLE_ATTRS, remove split_style.remove(style) continue cleaned_value = self.clean_value(style_value, style_name) if all(self.style_conditions(cleaned_value, style_name)): # there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove split_style.remove(style) continue else: if style_name in self.LIVECARTA_STYLE_ATTRS_MAPPING: # function that converts our data func = self.LIVECARTA_STYLE_ATTRS_MAPPING[style_name] style_value = func(cleaned_value) split_style[i] = style_name + ":" + style_value except ValueError as ve: print(f"Style value isn't correct.") return split_style def build_inline_style_content(self, style: str) -> str: """Build inline style with LiveCarta convention""" # replace all spaces between "; & letter" to ";" style = re.sub(r"; *", ";", style) # when we split style by ";", last element of the list is "" - None (we remove it) split_style: list = list(filter(None, style.split(";"))) # replace all spaces between ": & letter" to ":" split_style = [el.replace( re.search(r"(:\s*)", el).group(1), ":") for el in split_style] split_style = self.update_inline_styles_to_livecarta_convention( split_style) style = "; ".join(split_style) if split_style else "" return style def process_inline_styles_in_html_soup(self, html_content): """This function is designed to convert inline html styles""" tags_with_inline_style = html_content.find_all(LiveCartaConfig.could_have_style_in_livecarta_regexp, attrs={"style": re.compile(".*")}) for tag_initial_inline_style in tags_with_inline_style: inline_style = tag_initial_inline_style.attrs["style"] if tag_initial_inline_style.attrs.get("align"): inline_style += f";text-align: {tag_initial_inline_style.attrs['align']};" tag_initial_inline_style.attrs["style"] = \ self.build_inline_style_content(inline_style) @staticmethod def get_css_content(css_href: str, html_href: str, ebooklib_book) -> str: path_to_css_from_html = css_href html_folder = dirname(html_href) path_to_css_from_root = normpath( join(html_folder, path_to_css_from_html)).replace("\\", "/") css_obj = ebooklib_book.get_item_with_href(path_to_css_from_root) # if in css file we import another css if "@import" in str(css_obj.content): path_to_css_from_root = "css/" + \ re.search('"(.*)"', str(css_obj.content)).group(1) css_obj = ebooklib_book.get_item_with_href( path_to_css_from_root) assert css_obj, f"Css style {css_href} was not in manifest." css_content: str = css_obj.get_content().decode() return css_content def update_css_styles_to_livecarta_convention(self, css_rule: cssutils.css.CSSStyleRule, style_type: cssutils.css.property.Property): if style_type.name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS: # property not in LIVECARTA_STYLE_ATTRS, remove from css file css_rule.style[style_type.name] = "" return cleaned_value = self.clean_value(style_type.value, style_type.name) if all(self.style_conditions(cleaned_value, style_type.name)): # there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file css_rule.style[style_type.name] = "" else: if style_type.name in self.LIVECARTA_STYLE_ATTRS_MAPPING: # function that converts our data func = self.LIVECARTA_STYLE_ATTRS_MAPPING[style_type.name] css_rule.style[style_type.name] = func(cleaned_value) def build_css_file_content(self, css_content: str) -> str: """Build css content with LiveCarta convention""" sheet = cssutils.parseString(css_content, validate=False) for css_rule in sheet: if css_rule.type == css_rule.STYLE_RULE: for style_type in css_rule.style: self.update_css_styles_to_livecarta_convention( css_rule, style_type) css_text: str = sheet._getCssText().decode() return css_text