forked from LiveCarta/BookConverter
219 lines
10 KiB
Python
219 lines
10 KiB
Python
import re
|
|
import cssutils
|
|
from typing import List, Tuple
|
|
from os.path import dirname, normpath, join
|
|
|
|
from src.util.color_reader import str2hex
|
|
from src.livecarta_config import LiveCartaConfig
|
|
|
|
|
|
class StyleReader:
|
|
def __init__(self):
|
|
"""
|
|
Dictionary LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
|
|
|
|
Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated
|
|
to suit LiveCarta style convention.
|
|
"""
|
|
self.LIVECARTA_STYLE_ATTRS_MAPPING = {
|
|
"font": lambda x: "",
|
|
"font-family": lambda x: x,
|
|
"font-size": self.convert_tag_style_values,
|
|
"font-variant": lambda x: x,
|
|
"text-transform": lambda x: x,
|
|
"text-align": lambda x: x,
|
|
"text-indent": lambda x: self.convert_tag_style_values(x, is_indent=True),
|
|
"margin": self.convert_tag_style_values,
|
|
"margin-top": self.convert_tag_style_values,
|
|
"margin-right": self.convert_tag_style_values,
|
|
"margin-left": lambda x: self.convert_tag_style_values(x, is_indent=True),
|
|
"margin-bottom": self.convert_tag_style_values,
|
|
"padding": self.convert_tag_style_values,
|
|
"padding-top": self.convert_tag_style_values,
|
|
"padding-right": self.convert_tag_style_values,
|
|
"padding-left": self.convert_tag_style_values,
|
|
"padding-bottom": self.convert_tag_style_values,
|
|
"color": self.get_text_color,
|
|
"background-color": self.get_bg_color,
|
|
"background": self.get_bg_color,
|
|
"width": lambda x: self.convert_tag_style_values(x) if "%" not in x else x,
|
|
"border": self.convert_tag_style_values,
|
|
"border-top-width": self.convert_tag_style_values,
|
|
"border-right-width": self.convert_tag_style_values,
|
|
"border-left-width": self.convert_tag_style_values,
|
|
"border-bottom-width": self.convert_tag_style_values,
|
|
"border-top": self.convert_tag_style_values,
|
|
"border-right": self.convert_tag_style_values,
|
|
"border-left": self.convert_tag_style_values,
|
|
"border-bottom": self.convert_tag_style_values,
|
|
"list-style-type": lambda x: x if x in LiveCartaConfig.list_types else "disc",
|
|
"list-style-image": lambda x: "disc"
|
|
}
|
|
|
|
@staticmethod
|
|
def get_text_color(x: str) -> str:
|
|
color = str2hex(x)
|
|
color = color if color not in ["#000000", "#000", "black"] else ""
|
|
return color
|
|
|
|
@staticmethod
|
|
def get_bg_color(x: str) -> str:
|
|
color = str2hex(x)
|
|
color = color if color not in ["#ffffff", "#fff", "white"] else ""
|
|
return color
|
|
|
|
@staticmethod
|
|
def convert_tag_style_values(size_value: str, is_indent: bool = False) -> str:
|
|
"""
|
|
Function
|
|
- converts values of tags from em/%/pt/in to px
|
|
- find closest font-size px
|
|
Parameters
|
|
----------
|
|
size_value: str
|
|
|
|
is_indent: bool
|
|
|
|
Returns
|
|
-------
|
|
size_value: str
|
|
converted value size
|
|
|
|
"""
|
|
def convert_size_number(size_number: str, unit_to_replace: str, multiplier: float) -> str:
|
|
size_number = float(size_number.replace(unit_to_replace, "")) * multiplier
|
|
return str(size_number) + "px"
|
|
has_size = re.search(r"(\d+(?:\.\d+)?)([\w%]+)", size_value)
|
|
values: List = size_value.split(" ")
|
|
if has_size:
|
|
size_number_idx = [i for i, value in enumerate(values) if re.search("(\d+)([\w%]+)", value)][0]
|
|
if has_size.group(2) == "%":
|
|
multiplier = 5.76 if is_indent else 0.16
|
|
values[size_number_idx] = convert_size_number(values[size_number_idx], "%", multiplier)
|
|
elif has_size.group(2) == "em":
|
|
multiplier = 18 if is_indent else 16
|
|
values[size_number_idx] = convert_size_number(values[size_number_idx], "em", multiplier)
|
|
elif has_size.group(2) == "pt":
|
|
values[size_number_idx] = convert_size_number(values[size_number_idx], "pt", 4 / 3)
|
|
elif has_size.group(2) == "in":
|
|
values[size_number_idx] = convert_size_number(values[size_number_idx], "in", 96)
|
|
elif has_size.group(2) == "rem":
|
|
values[size_number_idx] = convert_size_number(values[size_number_idx], "rem", 80 / 7)
|
|
size_value = " ".join(values)
|
|
return size_value
|
|
|
|
@staticmethod
|
|
def clean_value(style_value: str, style_name: str):
|
|
cleaned_value = style_value.replace("\"", "")
|
|
if style_name == 'font-family':
|
|
for symbol in ["+", "*", ".", "%", "?", "$", "^", "[", "]"]:
|
|
cleaned_value = re.sub(
|
|
re.escape(f"{symbol}"), rf"\\{symbol}", cleaned_value)
|
|
return cleaned_value
|
|
|
|
@staticmethod
|
|
def style_conditions(style_value: str, style_name: str) -> Tuple[bool, bool]:
|
|
constraints_on_value = LiveCartaConfig.LIVECARTA_STYLE_ATTRS.get(
|
|
style_name)
|
|
value_not_in_possible_values_list = style_value not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS[
|
|
style_name]
|
|
return constraints_on_value, value_not_in_possible_values_list
|
|
|
|
def update_inline_styles_to_livecarta_convention(self, split_style: list) -> list:
|
|
try:
|
|
for i, style in reversed(list(enumerate(split_style))):
|
|
style_name, style_value = style.split(":")
|
|
if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
|
|
# property not in LIVECARTA_STYLE_ATTRS, remove
|
|
split_style.remove(style)
|
|
continue
|
|
|
|
cleaned_value = self.clean_value(style_value, style_name)
|
|
if all(self.style_conditions(cleaned_value, style_name)):
|
|
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove
|
|
split_style.remove(style)
|
|
continue
|
|
else:
|
|
if style_name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
|
|
# function that converts our data
|
|
func = self.LIVECARTA_STYLE_ATTRS_MAPPING[style_name]
|
|
style_value = func(cleaned_value)
|
|
split_style[i] = style_name + ":" + style_value
|
|
except ValueError as ve:
|
|
print(f"Style value isn't correct.")
|
|
return split_style
|
|
|
|
def build_inline_style_content(self, style: str) -> str:
|
|
"""Build inline style with LiveCarta convention"""
|
|
# replace all spaces between "; & letter" to ";"
|
|
style = re.sub(r"; *", ";", style)
|
|
# when we split style by ";", last element of the list is "" - None (we remove it)
|
|
split_style: list = list(filter(None, style.split(";")))
|
|
# replace all spaces between ": & letter" to ":"
|
|
split_style = [el.replace(
|
|
re.search(r"(:\s*)", el).group(1), ":") for el in split_style]
|
|
|
|
split_style = self.update_inline_styles_to_livecarta_convention(
|
|
split_style)
|
|
style = "; ".join(split_style) if split_style else ""
|
|
return style
|
|
|
|
def process_inline_styles_in_html_soup(self, html_content):
|
|
"""This function is designed to convert inline html styles"""
|
|
tags_with_inline_style = html_content.find_all(LiveCartaConfig.could_have_style_in_livecarta_regexp,
|
|
attrs={"style": re.compile(".*")})
|
|
|
|
for tag_initial_inline_style in tags_with_inline_style:
|
|
inline_style = tag_initial_inline_style.attrs["style"]
|
|
if tag_initial_inline_style.attrs.get("align"):
|
|
inline_style += f";text-align: {tag_initial_inline_style.attrs['align']};"
|
|
tag_initial_inline_style.attrs["style"] = \
|
|
self.build_inline_style_content(inline_style)
|
|
|
|
@staticmethod
|
|
def get_css_content(css_href: str, html_href: str, ebooklib_book) -> str:
|
|
path_to_css_from_html = css_href
|
|
html_folder = dirname(html_href)
|
|
path_to_css_from_root = normpath(
|
|
join(html_folder, path_to_css_from_html)).replace("\\", "/")
|
|
css_obj = ebooklib_book.get_item_with_href(path_to_css_from_root)
|
|
# if in css file we import another css
|
|
if "@import" in str(css_obj.content):
|
|
path_to_css_from_root = "css/" + \
|
|
re.search('"(.*)"', str(css_obj.content)).group(1)
|
|
css_obj = ebooklib_book.get_item_with_href(
|
|
path_to_css_from_root)
|
|
assert css_obj, f"Css style {css_href} was not in manifest."
|
|
css_content: str = css_obj.get_content().decode()
|
|
return css_content
|
|
|
|
def update_css_styles_to_livecarta_convention(self, css_rule: cssutils.css.CSSStyleRule,
|
|
style_type: cssutils.css.property.Property):
|
|
if style_type.name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
|
|
# property not in LIVECARTA_STYLE_ATTRS, remove from css file
|
|
css_rule.style[style_type.name] = ""
|
|
return
|
|
|
|
cleaned_value = self.clean_value(style_type.value, style_type.name)
|
|
if all(self.style_conditions(cleaned_value, style_type.name)):
|
|
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
|
css_rule.style[style_type.name] = ""
|
|
else:
|
|
if style_type.name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
|
|
# function that converts our data
|
|
func = self.LIVECARTA_STYLE_ATTRS_MAPPING[style_type.name]
|
|
css_rule.style[style_type.name] = func(cleaned_value)
|
|
|
|
def build_css_file_content(self, css_content: str) -> str:
|
|
"""Build css content with LiveCarta convention"""
|
|
sheet = cssutils.parseString(css_content, validate=False)
|
|
|
|
for css_rule in sheet:
|
|
if css_rule.type == css_rule.STYLE_RULE:
|
|
for style_type in css_rule.style:
|
|
self.update_css_styles_to_livecarta_convention(
|
|
css_rule, style_type)
|
|
|
|
css_text: str = sheet._getCssText().decode()
|
|
return css_text
|