forked from LiveCarta/BookConverter
css processing formatting
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
import re
|
import re
|
||||||
from bs4 import BeautifulSoup, NavigableString, Tag
|
from bs4 import BeautifulSoup, NavigableString
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _clean_footnote_content(content):
|
def _clean_footnote_content(content):
|
||||||
|
|||||||
@@ -1,237 +0,0 @@
|
|||||||
import re
|
|
||||||
import cssutils
|
|
||||||
|
|
||||||
from ebooklib import epub
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from itertools import takewhile
|
|
||||||
|
|
||||||
from src.util.color_reader import str2hex
|
|
||||||
from src.livecarta_config import LiveCartaConfig
|
|
||||||
|
|
||||||
|
|
||||||
def get_text_color(x):
|
|
||||||
color = str2hex(x)
|
|
||||||
color = color if color not in ["#000000", "#000", "black"] else ""
|
|
||||||
return color
|
|
||||||
|
|
||||||
|
|
||||||
def get_bg_color(x):
|
|
||||||
color = str2hex(x)
|
|
||||||
color = color if color not in ["#ffffff", "#fff", "white"] else ""
|
|
||||||
return color
|
|
||||||
|
|
||||||
|
|
||||||
def convert_tag_style_values(size_value: str, is_indent: bool = False) -> str:
|
|
||||||
"""
|
|
||||||
Function
|
|
||||||
- converts values of tags from em/%/pt to px
|
|
||||||
- find closest font-size px
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
size_value: str
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
size_value: str
|
|
||||||
converted value size
|
|
||||||
"""
|
|
||||||
size_regexp = re.compile(
|
|
||||||
r"(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)")
|
|
||||||
has_style_attrs = re.search(size_regexp, size_value)
|
|
||||||
if has_style_attrs:
|
|
||||||
if has_style_attrs.group(1):
|
|
||||||
multiplier = 5.76 if is_indent else 0.16
|
|
||||||
size_value = float(size_value.replace("%", "")) * multiplier
|
|
||||||
return str(size_value)+'px'
|
|
||||||
elif has_style_attrs.group(3):
|
|
||||||
multiplier = 18 if is_indent else 16
|
|
||||||
size_value = float(size_value.replace("em", "")) * multiplier
|
|
||||||
return str(size_value)+'px'
|
|
||||||
elif has_style_attrs.group(5):
|
|
||||||
size_value = float(size_value.replace("pt", "")) * 4/3
|
|
||||||
return str(size_value)+'px'
|
|
||||||
else:
|
|
||||||
return ""
|
|
||||||
return size_value
|
|
||||||
|
|
||||||
|
|
||||||
def convert_indents_tag_values(size_value: str) -> str:
|
|
||||||
"""
|
|
||||||
Function converts values of ["text-indent", "margin-left", "margin"]
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
size_value: str
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
size_value: str
|
|
||||||
|
|
||||||
"""
|
|
||||||
if len(size_value.split(" ")) == 3:
|
|
||||||
size_value = convert_tag_style_values(size_value.split(
|
|
||||||
" ")[-2], True) # returns middle value
|
|
||||||
else:
|
|
||||||
size_value = convert_tag_style_values(size_value.split(
|
|
||||||
" ")[-1], True) # returns last value
|
|
||||||
return size_value
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
Dictionary LIVECARTA_STYLE_ATTRS = { css property: value }
|
|
||||||
Style properties that can be used to fit LiveCarta css style convention.
|
|
||||||
If property has empty list, it means that any value can be converted.
|
|
||||||
If property has not empty list, it means that only certain property-value combinations can be transformed.
|
|
||||||
"""
|
|
||||||
LIVECARTA_STYLE_ATTRS = {
|
|
||||||
"text-indent": [],
|
|
||||||
"font-variant": ["small-caps"],
|
|
||||||
"text-align": [x for x in LiveCartaConfig.ALIGN_STYLES if x != LiveCartaConfig.DEFAULT_ALIGN_STYLE],
|
|
||||||
"align": [],
|
|
||||||
"font": [],
|
|
||||||
"font-family": [],
|
|
||||||
"font-size": [],
|
|
||||||
"font-weight": ["bold", "600", "700", "800", "900"], # <strong>
|
|
||||||
"font-style": ["italic"], # <i>
|
|
||||||
"text-decoration": ["underline", "line-through"], # <u> , <s>
|
|
||||||
"text-decoration-line": ["underline", "line-through"], # <u> , <s>
|
|
||||||
"vertical-align": ["super"], # <sup>
|
|
||||||
"color": [],
|
|
||||||
"background-color": [],
|
|
||||||
"background": [],
|
|
||||||
"width": [],
|
|
||||||
"border": [],
|
|
||||||
"border-top-width": [],
|
|
||||||
"border-right-width": [],
|
|
||||||
"border-left-width": [],
|
|
||||||
"border-bottom-width": [],
|
|
||||||
"border-top": [],
|
|
||||||
"border-bottom": [],
|
|
||||||
"list-style-type": [],
|
|
||||||
"list-style-image": [],
|
|
||||||
"margin-left": [],
|
|
||||||
"margin-top": [],
|
|
||||||
"margin": [],
|
|
||||||
}
|
|
||||||
|
|
||||||
"""
|
|
||||||
Dictionary LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
|
|
||||||
|
|
||||||
Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated
|
|
||||||
to suit LiveCarta style convention.
|
|
||||||
"""
|
|
||||||
LIVECARTA_STYLE_ATTRS_MAPPING = {
|
|
||||||
"text-indent": convert_indents_tag_values,
|
|
||||||
"font-variant": lambda x: x,
|
|
||||||
"text-align": lambda x: x,
|
|
||||||
"font": lambda x: "",
|
|
||||||
"font-family": lambda x: x,
|
|
||||||
"font-size": convert_tag_style_values,
|
|
||||||
"color": get_text_color,
|
|
||||||
"background-color": get_bg_color,
|
|
||||||
"background": get_bg_color,
|
|
||||||
"border": lambda x: x if x != "0" else "",
|
|
||||||
"border-top-width": lambda x: x if x != "0" else "",
|
|
||||||
"border-right-width": lambda x: x if x != "0" else "",
|
|
||||||
"border-left-width": lambda x: x if x != "0" else "",
|
|
||||||
"border-bottom-width": lambda x: x if x != "0" else "",
|
|
||||||
"border-top": lambda x: x if x != "0" else "",
|
|
||||||
"border-bottom": lambda x: x if x != "0" else "",
|
|
||||||
"list-style-type": lambda x: x if x in LiveCartaConfig.list_types else "disc",
|
|
||||||
"list-style-image": lambda x: "disc",
|
|
||||||
"margin-left": convert_indents_tag_values,
|
|
||||||
"margin-top": convert_tag_style_values,
|
|
||||||
"margin": convert_indents_tag_values,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def style_conditions(style_value, style_name):
|
|
||||||
cleaned_value = style_value.replace("\"", "")
|
|
||||||
constraints_on_value = LIVECARTA_STYLE_ATTRS.get(
|
|
||||||
style_name)
|
|
||||||
value_not_in_possible_values_list = cleaned_value not in LIVECARTA_STYLE_ATTRS[
|
|
||||||
style_name]
|
|
||||||
return cleaned_value, constraints_on_value, value_not_in_possible_values_list
|
|
||||||
|
|
||||||
|
|
||||||
def update_inline_styles_to_livecarta_convention(split_style: list):
|
|
||||||
for i, style in enumerate(split_style):
|
|
||||||
style_name, style_value = style.split(":")
|
|
||||||
if style_name not in LIVECARTA_STYLE_ATTRS:
|
|
||||||
# property not in LIVECARTA_STYLE_ATTRS, remove from css file
|
|
||||||
split_style[i] = ""
|
|
||||||
return split_style
|
|
||||||
|
|
||||||
cleaned_value, constraints_on_value, value_not_in_possible_values_list =\
|
|
||||||
style_conditions(style_value, style_name)
|
|
||||||
if constraints_on_value and value_not_in_possible_values_list:
|
|
||||||
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
|
||||||
split_style[i] = ""
|
|
||||||
else:
|
|
||||||
if style_name in LIVECARTA_STYLE_ATTRS_MAPPING:
|
|
||||||
# function that converts our data
|
|
||||||
func = LIVECARTA_STYLE_ATTRS_MAPPING[style_name]
|
|
||||||
style_value = func(cleaned_value)
|
|
||||||
split_style[i] = style_name + ":" + style_value
|
|
||||||
return split_style
|
|
||||||
|
|
||||||
|
|
||||||
def build_inline_style_content(style: str) -> str:
|
|
||||||
"""Build inline style with LiveCarta convention"""
|
|
||||||
# replace all spaces between "; & letter" to ";"
|
|
||||||
style = re.sub(r"; *", ";", style)
|
|
||||||
# when we split style by ";", last element of the list is "" - None
|
|
||||||
# remove it
|
|
||||||
split_style: list = list(filter(None, style.split(";")))
|
|
||||||
# replace all spaces between ": & letter" to ":"
|
|
||||||
split_style = [el.replace(
|
|
||||||
re.search(r"(:\s*)", el).group(1), ":") for el in split_style]
|
|
||||||
|
|
||||||
split_style = update_inline_styles_to_livecarta_convention(split_style)
|
|
||||||
style = "; ".join(split_style)
|
|
||||||
return style
|
|
||||||
|
|
||||||
|
|
||||||
def update_css_styles_to_livecarta_convention(css_rule: cssutils.css.CSSStyleRule,
|
|
||||||
style_type: cssutils.css.property.Property):
|
|
||||||
if style_type.name == "font-family":
|
|
||||||
pass
|
|
||||||
if style_type.name not in LIVECARTA_STYLE_ATTRS:
|
|
||||||
# property not in LIVECARTA_STYLE_ATTRS, remove from css file
|
|
||||||
css_rule.style[style_type.name] = ""
|
|
||||||
return
|
|
||||||
|
|
||||||
cleaned_value, constraints_on_value, value_not_in_possible_values_list =\
|
|
||||||
style_conditions(style_type.value, style_type.name)
|
|
||||||
if constraints_on_value and value_not_in_possible_values_list:
|
|
||||||
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
|
||||||
css_rule.style[style_type.name] = ""
|
|
||||||
else:
|
|
||||||
if style_type.name in LIVECARTA_STYLE_ATTRS_MAPPING:
|
|
||||||
# function that converts our data
|
|
||||||
func = LIVECARTA_STYLE_ATTRS_MAPPING[style_type.name]
|
|
||||||
css_rule.style[style_type.name] = func(cleaned_value)
|
|
||||||
|
|
||||||
|
|
||||||
def build_css_file_content(css_content: str) -> str:
|
|
||||||
"""Build css content with LiveCarta convention"""
|
|
||||||
sheet = cssutils.parseString(css_content, validate=False)
|
|
||||||
|
|
||||||
for css_rule in sheet:
|
|
||||||
if css_rule.type == css_rule.STYLE_RULE:
|
|
||||||
for style_type in css_rule.style:
|
|
||||||
update_css_styles_to_livecarta_convention(
|
|
||||||
css_rule, style_type)
|
|
||||||
|
|
||||||
css_text: str = sheet._getCssText().decode()
|
|
||||||
return css_text
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
file = "../../epub/9781627222174.epub"
|
|
||||||
ebooklib_book = epub.read_epub(file)
|
|
||||||
css_ = ebooklib_book.get_item_with_href("css/epub.css")
|
|
||||||
css_ = css_.get_content().decode()
|
|
||||||
css_cleaned = build_css_file_content(css_)
|
|
||||||
html_ = ebooklib_book.get_item_with_href(
|
|
||||||
"pr01s05.xhtml").get_body_content().decode()
|
|
||||||
html_soup = BeautifulSoup(html_, features="lxml")
|
|
||||||
186
src/epub_converter/css_preprocessor.py
Normal file
186
src/epub_converter/css_preprocessor.py
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
import re
|
||||||
|
import cssutils
|
||||||
|
|
||||||
|
from src.util.helpers import BookLogger
|
||||||
|
from src.util.color_reader import str2hex
|
||||||
|
from src.livecarta_config import LiveCartaConfig
|
||||||
|
|
||||||
|
|
||||||
|
class CSSPreprocessor:
|
||||||
|
def __init__(self, logger=None):
|
||||||
|
self.logger: BookLogger = logger
|
||||||
|
"""
|
||||||
|
Dictionary LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
|
||||||
|
|
||||||
|
Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated
|
||||||
|
to suit LiveCarta style convention.
|
||||||
|
"""
|
||||||
|
self.LIVECARTA_STYLE_ATTRS_MAPPING = {
|
||||||
|
"text-indent": self.convert_indents_tag_values,
|
||||||
|
"font-variant": lambda x: x,
|
||||||
|
"text-align": lambda x: x,
|
||||||
|
"font": lambda x: "",
|
||||||
|
"font-family": lambda x: x,
|
||||||
|
"font-size": self.convert_tag_style_values,
|
||||||
|
"color": self.get_text_color,
|
||||||
|
"background-color": self.get_bg_color,
|
||||||
|
"background": self.get_bg_color,
|
||||||
|
"border": lambda x: x if x != "0" else "",
|
||||||
|
"border-top-width": lambda x: x if x != "0" else "",
|
||||||
|
"border-right-width": lambda x: x if x != "0" else "",
|
||||||
|
"border-left-width": lambda x: x if x != "0" else "",
|
||||||
|
"border-bottom-width": lambda x: x if x != "0" else "",
|
||||||
|
"border-top": lambda x: x if x != "0" else "",
|
||||||
|
"border-bottom": lambda x: x if x != "0" else "",
|
||||||
|
"list-style-type": lambda x: x if x in LiveCartaConfig.list_types else "disc",
|
||||||
|
"list-style-image": lambda x: "disc",
|
||||||
|
"margin-left": self.convert_indents_tag_values,
|
||||||
|
"margin-top": self.convert_tag_style_values,
|
||||||
|
"margin": self.convert_indents_tag_values,
|
||||||
|
}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_text_color(x):
|
||||||
|
color = str2hex(x)
|
||||||
|
color = color if color not in ["#000000", "#000", "black"] else ""
|
||||||
|
return color
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_bg_color(x):
|
||||||
|
color = str2hex(x)
|
||||||
|
color = color if color not in ["#ffffff", "#fff", "white"] else ""
|
||||||
|
return color
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def convert_tag_style_values(size_value: str, is_indent: bool = False) -> str:
|
||||||
|
"""
|
||||||
|
Function
|
||||||
|
- converts values of tags from em/%/pt to px
|
||||||
|
- find closest font-size px
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
size_value: str
|
||||||
|
|
||||||
|
is_indent: bool
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
size_value: str
|
||||||
|
converted value size
|
||||||
|
"""
|
||||||
|
size_regexp = re.compile(
|
||||||
|
r"(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)")
|
||||||
|
has_style_attrs = re.search(size_regexp, size_value)
|
||||||
|
if has_style_attrs:
|
||||||
|
if has_style_attrs.group(1):
|
||||||
|
multiplier = 5.76 if is_indent else 0.16
|
||||||
|
size_value = float(size_value.replace("%", "")) * multiplier
|
||||||
|
return str(size_value)+'px'
|
||||||
|
elif has_style_attrs.group(3):
|
||||||
|
multiplier = 18 if is_indent else 16
|
||||||
|
size_value = float(size_value.replace("em", "")) * multiplier
|
||||||
|
return str(size_value)+'px'
|
||||||
|
elif has_style_attrs.group(5):
|
||||||
|
size_value = float(size_value.replace("pt", "")) * 4/3
|
||||||
|
return str(size_value)+'px'
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
return size_value
|
||||||
|
|
||||||
|
def convert_indents_tag_values(self, size_value: str) -> str:
|
||||||
|
"""
|
||||||
|
Function converts values of ["text-indent", "margin-left", "margin"]
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
size_value: str
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
size_value: str
|
||||||
|
|
||||||
|
"""
|
||||||
|
if len(size_value.split(" ")) == 3:
|
||||||
|
size_value = self.convert_tag_style_values(size_value.split(
|
||||||
|
" ")[-2], True) # returns middle value
|
||||||
|
else:
|
||||||
|
size_value = self.convert_tag_style_values(size_value.split(
|
||||||
|
" ")[-1], True) # returns last value
|
||||||
|
return size_value
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def style_conditions(style_value, style_name):
|
||||||
|
cleaned_value = style_value.replace("\"", "")
|
||||||
|
constraints_on_value = LiveCartaConfig.LIVECARTA_STYLE_ATTRS.get(
|
||||||
|
style_name)
|
||||||
|
value_not_in_possible_values_list = cleaned_value not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS[
|
||||||
|
style_name]
|
||||||
|
return cleaned_value, constraints_on_value, value_not_in_possible_values_list
|
||||||
|
|
||||||
|
def update_inline_styles_to_livecarta_convention(self, split_style: list):
|
||||||
|
for i, style in enumerate(split_style):
|
||||||
|
style_name, style_value = style.split(":")
|
||||||
|
if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
|
||||||
|
# property not in LIVECARTA_STYLE_ATTRS, remove from css file
|
||||||
|
split_style[i] = ""
|
||||||
|
return split_style
|
||||||
|
|
||||||
|
cleaned_value, constraints_on_value, value_not_in_possible_values_list =\
|
||||||
|
self.style_conditions(style_value, style_name)
|
||||||
|
if constraints_on_value and value_not_in_possible_values_list:
|
||||||
|
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
||||||
|
split_style[i] = ""
|
||||||
|
else:
|
||||||
|
if style_name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
|
||||||
|
# function that converts our data
|
||||||
|
func = self.LIVECARTA_STYLE_ATTRS_MAPPING[style_name]
|
||||||
|
style_value = func(cleaned_value)
|
||||||
|
split_style[i] = style_name + ":" + style_value
|
||||||
|
return split_style
|
||||||
|
|
||||||
|
def build_inline_style_content(self, style: str) -> str:
|
||||||
|
"""Build inline style with LiveCarta convention"""
|
||||||
|
# replace all spaces between "; & letter" to ";"
|
||||||
|
style = re.sub(r"; *", ";", style)
|
||||||
|
# when we split style by ";", last element of the list is "" - None
|
||||||
|
# remove it
|
||||||
|
split_style: list = list(filter(None, style.split(";")))
|
||||||
|
# replace all spaces between ": & letter" to ":"
|
||||||
|
split_style = [el.replace(
|
||||||
|
re.search(r"(:\s*)", el).group(1), ":") for el in split_style]
|
||||||
|
|
||||||
|
split_style = self.update_inline_styles_to_livecarta_convention(split_style)
|
||||||
|
style = "; ".join(split_style)
|
||||||
|
return style
|
||||||
|
|
||||||
|
def update_css_styles_to_livecarta_convention(self, css_rule: cssutils.css.CSSStyleRule,
|
||||||
|
style_type: cssutils.css.property.Property):
|
||||||
|
if style_type.name == "font-family":
|
||||||
|
pass
|
||||||
|
if style_type.name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
|
||||||
|
# property not in LIVECARTA_STYLE_ATTRS, remove from css file
|
||||||
|
css_rule.style[style_type.name] = ""
|
||||||
|
return
|
||||||
|
|
||||||
|
cleaned_value, constraints_on_value, value_not_in_possible_values_list =\
|
||||||
|
self.style_conditions(style_type.value, style_type.name)
|
||||||
|
if constraints_on_value and value_not_in_possible_values_list:
|
||||||
|
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
||||||
|
css_rule.style[style_type.name] = ""
|
||||||
|
else:
|
||||||
|
if style_type.name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
|
||||||
|
# function that converts our data
|
||||||
|
func = self.LIVECARTA_STYLE_ATTRS_MAPPING[style_type.name]
|
||||||
|
css_rule.style[style_type.name] = func(cleaned_value)
|
||||||
|
|
||||||
|
def build_css_file_content(self, css_content: str) -> str:
|
||||||
|
"""Build css content with LiveCarta convention"""
|
||||||
|
sheet = cssutils.parseString(css_content, validate=False)
|
||||||
|
|
||||||
|
for css_rule in sheet:
|
||||||
|
if css_rule.type == css_rule.STYLE_RULE:
|
||||||
|
for style_type in css_rule.style:
|
||||||
|
self.update_css_styles_to_livecarta_convention(
|
||||||
|
css_rule, style_type)
|
||||||
|
|
||||||
|
css_text: str = sheet._getCssText().decode()
|
||||||
|
return css_text
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
|
import re
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, Tag
|
from bs4 import BeautifulSoup, Tag
|
||||||
|
|
||||||
|
|
||||||
@@ -84,4 +84,10 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
|
|||||||
footnote_tag = footnote_tag.find(
|
footnote_tag = footnote_tag.find(
|
||||||
attrs={"role": "doc-backlink"}) or footnote_tag
|
attrs={"role": "doc-backlink"}) or footnote_tag
|
||||||
new_footnotes_tags.append(footnote_tag)
|
new_footnotes_tags.append(footnote_tag)
|
||||||
|
|
||||||
|
for i, (noteref, footnote) in enumerate(zip(new_noterefs_tags, new_footnotes_tags)):
|
||||||
|
noteref.attrs["data-id"] = i + 1
|
||||||
|
noteref.attrs["id"] = f"footnote-{i + 1}"
|
||||||
|
footnote.attrs["href"] = f"#footnote-{i + 1}"
|
||||||
|
|
||||||
return footnotes, new_noterefs_tags, new_footnotes_tags
|
return footnotes, new_noterefs_tags, new_footnotes_tags
|
||||||
|
|||||||
@@ -4,15 +4,13 @@ from typing import List
|
|||||||
|
|
||||||
from logging import CRITICAL
|
from logging import CRITICAL
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from premailer import transform
|
|
||||||
|
|
||||||
from src.livecarta_config import LiveCartaConfig
|
from src.livecarta_config import LiveCartaConfig
|
||||||
from src.epub_converter.css_preprocessing import LIVECARTA_STYLE_ATTRS
|
|
||||||
|
|
||||||
cssutils.log.setLevel(CRITICAL)
|
cssutils.log.setLevel(CRITICAL)
|
||||||
|
|
||||||
|
|
||||||
class TagStyleConverter:
|
class TagInlineStyleProcessor:
|
||||||
def __init__(self, tag_inline_style):
|
def __init__(self, tag_inline_style):
|
||||||
# tag with inline style + style parsed from css file
|
# tag with inline style + style parsed from css file
|
||||||
self.tag_inline_style = tag_inline_style
|
self.tag_inline_style = tag_inline_style
|
||||||
@@ -190,7 +188,7 @@ class TagStyleConverter:
|
|||||||
for tag, styles in LiveCartaConfig.LIVECARTA_STYLES_CAN_BE_IN_TAG.items()
|
for tag, styles in LiveCartaConfig.LIVECARTA_STYLES_CAN_BE_IN_TAG.items()
|
||||||
if re.match(tag, initial_tag.name)
|
if re.match(tag, initial_tag.name)
|
||||||
for style in styles]
|
for style in styles]
|
||||||
styles_cant_be_in_tag = [attr for attr in LIVECARTA_STYLE_ATTRS
|
styles_cant_be_in_tag = [attr for attr in LiveCartaConfig.LIVECARTA_STYLE_ATTRS
|
||||||
if attr not in styles_can_be_in_tag]
|
if attr not in styles_can_be_in_tag]
|
||||||
span_style = initial_tag.attrs["style"]
|
span_style = initial_tag.attrs["style"]
|
||||||
# here check that this style is exactly the same.
|
# here check that this style is exactly the same.
|
||||||
@@ -218,41 +216,3 @@ class TagStyleConverter:
|
|||||||
self.change_attrs_with_corresponding_tags()
|
self.change_attrs_with_corresponding_tags()
|
||||||
self.wrap_span_in_tag_to_save_style_attrs(self.tag_inline_style)
|
self.wrap_span_in_tag_to_save_style_attrs(self.tag_inline_style)
|
||||||
return self.tag_inline_style
|
return self.tag_inline_style
|
||||||
|
|
||||||
|
|
||||||
def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str) -> BeautifulSoup:
|
|
||||||
"""
|
|
||||||
Function adds styles from .css to inline style.
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
html_soup: BeautifulSoup
|
|
||||||
html page with inline style
|
|
||||||
css_text: str
|
|
||||||
css content from css file
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
inline_soup: BeautifulSoup
|
|
||||||
soup with styles from css
|
|
||||||
|
|
||||||
"""
|
|
||||||
# remove this specification because it causes problems
|
|
||||||
css_text = css_text.replace(
|
|
||||||
'@namespace epub "http://www.idpf.org/2007/ops";', '')
|
|
||||||
# here we add css styles to inline style
|
|
||||||
html_with_css_styles: str = transform(str(html_soup), css_text=css_text,
|
|
||||||
remove_classes=False,
|
|
||||||
external_styles=False,
|
|
||||||
allow_network=False,
|
|
||||||
disable_validation=True,
|
|
||||||
)
|
|
||||||
# soup with converted styles from css
|
|
||||||
inline_soup = BeautifulSoup(html_with_css_styles, features="lxml")
|
|
||||||
|
|
||||||
tags_with_inline_style = inline_soup.find_all(LiveCartaConfig.could_have_style_in_livecarta_regexp,
|
|
||||||
attrs={"style": re.compile(".*")})
|
|
||||||
|
|
||||||
# go through the tags with inline style + style parsed from css file
|
|
||||||
for tag_inline_style in tags_with_inline_style:
|
|
||||||
style_converter = TagStyleConverter(tag_inline_style)
|
|
||||||
style_converter.convert_initial_tag()
|
|
||||||
return inline_soup
|
|
||||||
@@ -101,25 +101,39 @@ class LiveCartaConfig:
|
|||||||
r"(^h[1-9]$)": ["list-style-type"]
|
r"(^h[1-9]$)": ["list-style-type"]
|
||||||
}
|
}
|
||||||
|
|
||||||
WRAP_TAGS_WITH_TABLE = {
|
"""
|
||||||
("div",): ["width", "border", "bgcolor"],
|
Dictionary LIVECARTA_STYLE_ATTRS = { css property: value }
|
||||||
("section", "blockquote",): ("class", r"feature[1234]"),
|
Style properties that can be used to fit LiveCarta css style convention.
|
||||||
}
|
If property has empty list, it means that any value can be converted.
|
||||||
|
If property has not empty list, it means that only certain property-value combinations can be transformed.
|
||||||
"""('what to replace', 'parent tag', 'child tag')"""
|
"""
|
||||||
REPLACE_TAG_WITH_LIVECARTA_CORRESPOND_TAGS = {
|
LIVECARTA_STYLE_ATTRS = {
|
||||||
(r"^h[6-9]$", "^figure$", "^section$", "^div$"): "p",
|
"text-indent": [],
|
||||||
("^aside$",): "blockquote",
|
"font-variant": ["small-caps"],
|
||||||
("^header$", "^footer$", ("child", ":not(pre)", "code, kbd, var")): "span",
|
"text-align": [x for x in ["justify", "right", "center", "left"] if x != "left"],
|
||||||
("^b$",): "strong",
|
"align": [],
|
||||||
# (("parent", ":not(pre)", "code")): "p",
|
"font": [],
|
||||||
}
|
"font-family": [],
|
||||||
|
"font-size": [],
|
||||||
""" > == in (p in li)"""
|
"font-weight": ["bold", "600", "700", "800", "900"], # <strong>
|
||||||
TAGS_TO_UNWRAP = [
|
"font-style": ["italic"], # <i>
|
||||||
"section", "article", "figcaption", "main", "body", "html", "li > p",
|
"text-decoration": ["underline", "line-through"], # <u> , <s>
|
||||||
]
|
"text-decoration-line": ["underline", "line-through"], # <u> , <s>
|
||||||
|
"vertical-align": ["super"], # <sup>
|
||||||
INSERT_TAG_IN_PARENT_TAG = {
|
"color": [],
|
||||||
("pre", "code, kbd, var"): "code",
|
"background-color": [],
|
||||||
|
"background": [],
|
||||||
|
"width": [],
|
||||||
|
"border": [],
|
||||||
|
"border-top-width": [],
|
||||||
|
"border-right-width": [],
|
||||||
|
"border-left-width": [],
|
||||||
|
"border-bottom-width": [],
|
||||||
|
"border-top": [],
|
||||||
|
"border-bottom": [],
|
||||||
|
"list-style-type": [],
|
||||||
|
"list-style-image": [],
|
||||||
|
"margin-left": [],
|
||||||
|
"margin-top": [],
|
||||||
|
"margin": [],
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user