This repository has been archived on 2026-04-06. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
BookConverter/src/style_reader.py
2022-09-08 13:12:49 +03:00

220 lines
9.4 KiB
Python

import re
import cssutils
from typing import Tuple
from os.path import dirname, normpath, join
from src.util.color_reader import str2hex
from src.livecarta_config import LiveCartaConfig
class StyleReader:
def __init__(self):
"""
Dictionary LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated
to suit LiveCarta style convention.
"""
self.LIVECARTA_STYLE_ATTRS_MAPPING = {
"text-indent": self.convert_indents_tag_values,
"font-variant": lambda x: x,
"text-align": lambda x: x,
"font": lambda x: "",
"font-family": lambda x: x,
"font-size": self.convert_tag_style_values,
"color": self.get_text_color,
"background-color": self.get_bg_color,
"background": self.get_bg_color,
"border": lambda x: x if x != "0" else "",
"border-top-width": lambda x: x if x != "0" else "",
"border-right-width": lambda x: x if x != "0" else "",
"border-left-width": lambda x: x if x != "0" else "",
"border-bottom-width": lambda x: x if x != "0" else "",
"border-top": lambda x: x if x != "0" else "",
"border-bottom": lambda x: x if x != "0" else "",
"list-style-type": lambda x: x if x in LiveCartaConfig.list_types else "disc",
"list-style-image": lambda x: "disc",
"margin-left": self.convert_indents_tag_values,
"margin-top": self.convert_tag_style_values,
"margin": self.convert_indents_tag_values,
"width": self.convert_tag_style_values,
}
@staticmethod
def get_text_color(x: str) -> str:
color = str2hex(x)
color = color if color not in ["#000000", "#000", "black"] else ""
return color
@staticmethod
def get_bg_color(x: str) -> str:
color = str2hex(x)
color = color if color not in ["#ffffff", "#fff", "white"] else ""
return color
@staticmethod
def convert_tag_style_values(size_value: str, is_indent: bool = False) -> str:
"""
Function
- converts values of tags from em/%/pt/in to px
- find closest font-size px
Parameters
----------
size_value: str
is_indent: bool
Returns
-------
size_value: str
converted value size
"""
size_regexp = re.compile(
r"(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)|(^-*(\d*\.*\d+)in$)")
has_style_attrs = re.search(size_regexp, size_value)
if has_style_attrs:
if has_style_attrs.group(1):
multiplier = 5.76 if is_indent else 0.16
size_value = float(size_value.replace("%", "")) * multiplier
return str(size_value) + "px"
elif has_style_attrs.group(3):
multiplier = 18 if is_indent else 16
size_value = float(size_value.replace("em", "")) * multiplier
return str(size_value) + "px"
elif has_style_attrs.group(5):
size_value = float(size_value.replace("pt", "")) * 4/3
return str(size_value) + "px"
elif has_style_attrs.group(7):
size_value = float(size_value.replace("in", "")) * 96
return str(size_value) + "px"
else:
return ""
return size_value
def convert_indents_tag_values(self, size_value: str) -> str:
"""
Function converts values of ["text-indent", "margin-left", "margin"]
Parameters
----------
size_value: str
Returns
-------
size_value: str
"""
size_value = self.convert_tag_style_values(size_value.split(" ")[-2], True) if len(size_value.split(" ")) == 3\
else self.convert_tag_style_values(size_value.split(" ")[-1], True)
return size_value
@staticmethod
def clean_value(style_value: str, style_name: str):
cleaned_value = style_value.replace("\"", "")
if style_name == 'font-family':
for symbol in ["+", "*", ".", "%", "?", "$", "^", "[", "]"]:
cleaned_value = re.sub(
re.escape(f"{symbol}"), rf"\\{symbol}", cleaned_value)
return cleaned_value
@staticmethod
def style_conditions(style_value: str, style_name: str) -> Tuple[bool, bool]:
constraints_on_value = LiveCartaConfig.LIVECARTA_STYLE_ATTRS.get(
style_name)
value_not_in_possible_values_list = style_value not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS[
style_name]
return constraints_on_value, value_not_in_possible_values_list
def update_inline_styles_to_livecarta_convention(self, split_style: list) -> list:
for i, style in enumerate(split_style):
style_name, style_value = style.split(":")
if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
# property not in LIVECARTA_STYLE_ATTRS, remove from css file
split_style[i] = ""
return split_style
cleaned_value = self.clean_value(style_value, style_name)
if all(self.style_conditions(cleaned_value, style_name)):
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
split_style[i] = ""
else:
if style_name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
# function that converts our data
func = self.LIVECARTA_STYLE_ATTRS_MAPPING[style_name]
style_value = func(cleaned_value)
split_style[i] = style_name + ":" + style_value
return split_style
def build_inline_style_content(self, style: str) -> str:
"""Build inline style with LiveCarta convention"""
# replace all spaces between "; & letter" to ";"
style = re.sub(r"; *", ";", style)
# when we split style by ";", last element of the list is "" - None (we remove it)
split_style: list = list(filter(None, style.split(";")))
# replace all spaces between ": & letter" to ":"
split_style = [el.replace(
re.search(r"(:\s*)", el).group(1), ":") for el in split_style]
split_style = self.update_inline_styles_to_livecarta_convention(
split_style)
style = "; ".join(split_style)
return style
def process_inline_styles_in_html_soup(self, html_content):
"""This function is designed to convert inline html styles"""
tags_with_inline_style = html_content.find_all(LiveCartaConfig.could_have_style_in_livecarta_regexp,
attrs={"style": re.compile(".*")})
for tag_initial_inline_style in tags_with_inline_style:
inline_style = tag_initial_inline_style.attrs["style"]
if tag_initial_inline_style.attrs.get("align"):
inline_style += f";text-align: {tag_initial_inline_style.attrs['align']};"
tag_initial_inline_style.attrs["style"] = \
self.build_inline_style_content(inline_style)
@staticmethod
def get_css_content(css_href: str, html_href: str, ebooklib_book) -> str:
path_to_css_from_html = css_href
html_folder = dirname(html_href)
path_to_css_from_root = normpath(
join(html_folder, path_to_css_from_html)).replace("\\", "/")
css_obj = ebooklib_book.get_item_with_href(path_to_css_from_root)
# if in css file we import another css
if "@import" in str(css_obj.content):
path_to_css_from_root = "css/" + \
re.search('"(.*)"', str(css_obj.content)).group(1)
css_obj = ebooklib_book.get_item_with_href(
path_to_css_from_root)
assert css_obj, f"Css style {css_href} was not in manifest."
css_content: str = css_obj.get_content().decode()
return css_content
def update_css_styles_to_livecarta_convention(self, css_rule: cssutils.css.CSSStyleRule,
style_type: cssutils.css.property.Property):
if style_type.name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
# property not in LIVECARTA_STYLE_ATTRS, remove from css file
css_rule.style[style_type.name] = ""
return
cleaned_value = self.clean_value(style_type.value, style_type.name)
if all(self.style_conditions(cleaned_value, style_type.name)):
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
css_rule.style[style_type.name] = ""
else:
if style_type.name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
# function that converts our data
func = self.LIVECARTA_STYLE_ATTRS_MAPPING[style_type.name]
css_rule.style[style_type.name] = func(cleaned_value)
def build_css_file_content(self, css_content: str) -> str:
"""Build css content with LiveCarta convention"""
sheet = cssutils.parseString(css_content, validate=False)
for css_rule in sheet:
if css_rule.type == css_rule.STYLE_RULE:
for style_type in css_rule.style:
self.update_css_styles_to_livecarta_convention(
css_rule, style_type)
css_text: str = sheet._getCssText().decode()
return css_text