forked from LiveCarta/BookConverter
LAW-6561 | Adding attributes to style attr
This commit is contained in:
@@ -11,10 +11,20 @@ cssutils.log.setLevel(CRITICAL)
|
||||
|
||||
|
||||
class InlineStyleProcessor:
|
||||
def __init__(self, tag_inline_style: Tag):
|
||||
def __init__(self, inline_styled_tag: Tag):
|
||||
# tag with inline style + style parsed from css file
|
||||
self.tag_inline_style = tag_inline_style
|
||||
self.tag_inline_style.attrs["style"]: str = self.process_inline_style()
|
||||
self.inline_styled_tag = inline_styled_tag
|
||||
self.inline_styled_tag.attrs["style"]: str = self.add_attrs_to_inline_style()
|
||||
self.inline_styled_tag.attrs["style"]: str = self.process_inline_style()
|
||||
|
||||
def add_attrs_to_inline_style(self) -> str:
|
||||
inline_style = self.inline_styled_tag.attrs.get("style", "")
|
||||
attr_names_without_style_attr = [attr for attr in self.inline_styled_tag.attrs.keys() if attr != "style"]
|
||||
for attr_name in attr_names_without_style_attr:
|
||||
if attr_name in LiveCartaConfig.ATTR_SHOULD_BE_IN_STYLE_ATTR:
|
||||
inline_style += f"; {attr_name}: {self.inline_styled_tag.attrs[attr_name]}"
|
||||
self.inline_styled_tag["style"] = inline_style
|
||||
return inline_style
|
||||
|
||||
@staticmethod
|
||||
def remove_white_if_no_bgcolor(style_: str, tag: Tag) -> str:
|
||||
@@ -63,11 +73,11 @@ class InlineStyleProcessor:
|
||||
processed inline style
|
||||
|
||||
"""
|
||||
if self.tag_inline_style.attrs.get("style"):
|
||||
inline_style = self.tag_inline_style.attrs.get("style") + ";"
|
||||
if self.inline_styled_tag.attrs.get("style"):
|
||||
inline_style = self.inline_styled_tag.attrs.get("style") + ";"
|
||||
# 1. Remove white color if tag doesn't have background color in style
|
||||
inline_style = self.remove_white_if_no_bgcolor(
|
||||
inline_style, self.tag_inline_style)
|
||||
inline_style, self.inline_styled_tag)
|
||||
inline_style = inline_style.replace(
|
||||
"list-style-image", "list-style-type")
|
||||
# 2. Create list of styles from inline style
|
||||
@@ -75,6 +85,7 @@ class InlineStyleProcessor:
|
||||
style = re.sub(r"; *", ";", inline_style)
|
||||
# when we split style by ";", last element of the list is "" - None (remove it)
|
||||
split_inline_style: list = list(filter(None, style.split(";")))
|
||||
inline_style: str = ";".join(split_inline_style)+';'
|
||||
return inline_style
|
||||
else:
|
||||
return ""
|
||||
@@ -103,13 +114,12 @@ class InlineStyleProcessor:
|
||||
|
||||
def change_attrs_with_corresponding_tags(self):
|
||||
# adds <strong>, <u>, <sup> instead of styles
|
||||
styles_to_remove = self.check_style_to_be_tag(self.tag_inline_style.attrs['style'])
|
||||
style_attr = self.tag_inline_style.attrs.get('style', '')
|
||||
styles_to_remove = self.check_style_to_be_tag(self.inline_styled_tag.attrs['style'])
|
||||
# Replace each style with its corresponding tag
|
||||
for attr, value in styles_to_remove:
|
||||
# Remove the attribute and value from the style attribute
|
||||
self.tag_inline_style.attrs["style"] = '; '.join(
|
||||
[s for s in self.tag_inline_style.attrs.get('style', '').split(';')
|
||||
self.inline_styled_tag.attrs["style"] = '; '.join(
|
||||
[s for s in self.inline_styled_tag.attrs.get('style', '').split(';')
|
||||
if f'{attr}:{value}' not in s]).strip()
|
||||
|
||||
# Create a new tag for the corresponding style
|
||||
@@ -117,11 +127,11 @@ class InlineStyleProcessor:
|
||||
correspond_tag = BeautifulSoup(features="lxml").new_tag(corr_tag_name)
|
||||
|
||||
# Move the contents of the original tag into the new tag
|
||||
for content in reversed(self.tag_inline_style.contents):
|
||||
for content in reversed(self.inline_styled_tag.contents):
|
||||
correspond_tag.insert(0, content.extract())
|
||||
|
||||
# Add the new tag to the original tag
|
||||
self.tag_inline_style.append(correspond_tag)
|
||||
self.inline_styled_tag.append(correspond_tag)
|
||||
|
||||
@staticmethod
|
||||
def wrap_span_in_tag_to_save_style_attrs(initial_tag: Tag) -> Tag:
|
||||
@@ -173,8 +183,8 @@ class InlineStyleProcessor:
|
||||
|
||||
def convert_initial_tag(self) -> Tag:
|
||||
self.change_attrs_with_corresponding_tags()
|
||||
self.tag_inline_style = self.wrap_span_in_tag_to_save_style_attrs(self.tag_inline_style)
|
||||
return self.tag_inline_style
|
||||
self.inline_styled_tag = self.wrap_span_in_tag_to_save_style_attrs(self.inline_styled_tag)
|
||||
return self.inline_styled_tag
|
||||
|
||||
|
||||
def modify_html_soup_with_css_styles(html_soup: BeautifulSoup, css_text: str = "") -> BeautifulSoup:
|
||||
@@ -208,11 +218,11 @@ def modify_html_soup_with_css_styles(html_soup: BeautifulSoup, css_text: str = "
|
||||
# soup with converted styles from css
|
||||
inline_soup = BeautifulSoup(html_with_css_styles, features="lxml")
|
||||
|
||||
tags_with_inline_style = inline_soup.find_all(LiveCartaConfig.REGEX_TAGS_WITH_STYLE_ATTR,
|
||||
inline_styled_tags = inline_soup.find_all(LiveCartaConfig.REGEX_TAGS_WITH_STYLE_ATTR,
|
||||
attrs={"style": re.compile(".*")})
|
||||
|
||||
# go through the tags with inline style + style parsed from css file
|
||||
for tag_inline_style in tags_with_inline_style:
|
||||
style_converter = InlineStyleProcessor(tag_inline_style)
|
||||
for inline_styled_tag in inline_styled_tags:
|
||||
style_converter = InlineStyleProcessor(inline_styled_tag)
|
||||
style_converter.convert_initial_tag()
|
||||
return inline_soup
|
||||
|
||||
@@ -38,6 +38,9 @@ class LiveCartaConfig:
|
||||
"^ol$": ["list-style-type", "reversed", "start"]
|
||||
}
|
||||
|
||||
# List of attrs name that should be in style attr
|
||||
ATTR_SHOULD_BE_IN_STYLE_ATTR = ["bgcolor"]
|
||||
|
||||
# Dictionary mapping CSS style attribute names to names that should replace them
|
||||
STYLE_ATTR_TO_REPLACEMENT = {
|
||||
"list-style": "list-style-type",
|
||||
|
||||
Reference in New Issue
Block a user