Fix style processor error

This commit is contained in:
Kiryl
2022-09-19 17:31:27 +03:00
parent d76f847dee
commit e0e3e3199d
3 changed files with 38 additions and 33 deletions

View File

@@ -14,13 +14,12 @@ class HtmlDocxProcessor:
def __init__(self, logger: BookLogger, html_soup: BeautifulSoup, html_preprocessor, style_preprocessor): def __init__(self, logger: BookLogger, html_soup: BeautifulSoup, html_preprocessor, style_preprocessor):
self.logger = logger self.logger = logger
self.html_soup = html_soup self.html_soup = html_soup
self.body_tag = self.html_soup.body
self.html_preprocessor = html_preprocessor self.html_preprocessor = html_preprocessor
self.style_preprocessor = style_preprocessor self.style_preprocessor = style_preprocessor
self.content: List[Tag] = [] self.content: List[Tag] = []
def _font_to_span(self): def _font_to_span(self):
for font in self.body_tag.find_all("font"): for font in self.html_soup.find_all("font"):
font.name = "span" font.name = "span"
@@ -226,10 +225,12 @@ class HtmlDocxProcessor:
self.logger.log("Inline style reading.") self.logger.log("Inline style reading.")
self.style_preprocessor.process_inline_styles_in_html_soup( self.style_preprocessor.process_inline_styles_in_html_soup(
self.body_tag) self.html_soup)
self.logger.log("Inline style processing.") self.logger.log("Inline style processing.")
modify_html_soup_with_css_styles(self.body_tag) self.html_soup = modify_html_soup_with_css_styles(self.html_soup)
self.body_tag = self.html_soup.body
self.logger.log("Image processing.") self.logger.log("Image processing.")
images = process_images(access, path_to_html=html_path, images = process_images(access, path_to_html=html_path,

View File

@@ -14,7 +14,7 @@ class InlineStyleProcessor:
def __init__(self, tag_inline_style: Tag): def __init__(self, tag_inline_style: Tag):
# tag with inline style + style parsed from css file # tag with inline style + style parsed from css file
self.tag_inline_style = tag_inline_style self.tag_inline_style = tag_inline_style
self.tag_inline_style.attrs['style']: str = self.process_inline_style() self.tag_inline_style.attrs["style"]: str = self.process_inline_style()
@staticmethod @staticmethod
def remove_white_if_no_bgcolor(style_: str, tag: Tag) -> str: def remove_white_if_no_bgcolor(style_: str, tag: Tag) -> str:
@@ -80,19 +80,19 @@ class InlineStyleProcessor:
processed_style = ";".join(split_style)+';' processed_style = ";".join(split_style)+';'
margin_left_regexp = re.compile( margin_left_regexp = re.compile(
r"((margin-left|margin): *(-*\w+);*)") r"((margin-left|margin): *-*((\d*)\.*\d+)\w+;*)")
text_indent_regexp = re.compile( text_indent_regexp = re.compile(
r"(text-indent: *(-*\w+);*)") r"(text-indent: *-*((\d*)\.*\d+)\w+;*)")
has_margin = re.search(margin_left_regexp, processed_style) has_margin = re.search(margin_left_regexp, processed_style)
has_text_indent = re.search(text_indent_regexp, processed_style) has_text_indent = re.search(text_indent_regexp, processed_style)
if has_margin: if has_margin:
num_m = abs(int("0" + "".join( num_m = abs(int("0" + "".join(
filter(str.isdigit, str(has_margin.group(3)))))) filter(str.isdigit, str(has_margin.group(4))))))
if has_text_indent: if has_text_indent:
num_ti = abs(int("0" + "".join( num_ti = abs(int("0" + "".join(
filter(str.isdigit, str(has_text_indent.group(2)))))) filter(str.isdigit, str(has_text_indent.group(3))))))
processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " + processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " +
str(abs(num_m - num_ti)) + "px; ") str(abs(num_m - num_ti)) + "px; ")
processed_style = processed_style.replace( processed_style = processed_style.replace(
@@ -106,7 +106,7 @@ class InlineStyleProcessor:
elif has_text_indent: elif has_text_indent:
processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " + processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " +
str(abs(int("0" + "".join( str(abs(int("0" + "".join(
filter(str.isdigit, str(has_text_indent.group(2))))))) filter(str.isdigit, str(has_text_indent.group(3)))))))
+ "px; ") + "px; ")
return processed_style return processed_style
return processed_style return processed_style
@@ -127,8 +127,9 @@ class InlineStyleProcessor:
processed inline style processed inline style
""" """
if self.tag_inline_style.attrs.get("style"):
inline_style = self.tag_inline_style.attrs.get("style") + ";" inline_style = self.tag_inline_style.attrs.get("style") + ";"
# 1. Remove white color if tag doesn"t have background color in style # 1. Remove white color if tag doesn't have background color in style
inline_style = self.remove_white_if_no_bgcolor( inline_style = self.remove_white_if_no_bgcolor(
inline_style, self.tag_inline_style) inline_style, self.tag_inline_style)
inline_style = inline_style.replace( inline_style = inline_style.replace(
@@ -143,6 +144,8 @@ class InlineStyleProcessor:
# 4. Processing indents # 4. Processing indents
inline_style: str = self.indents_processing(split_inline_style) inline_style: str = self.indents_processing(split_inline_style)
return inline_style return inline_style
else:
return ""
@staticmethod @staticmethod
def check_style_to_be_tag(style: str) -> List[tuple]: def check_style_to_be_tag(style: str) -> List[tuple]:

View File

@@ -126,17 +126,18 @@ class StyleReader:
return constraints_on_value, value_not_in_possible_values_list return constraints_on_value, value_not_in_possible_values_list
def update_inline_styles_to_livecarta_convention(self, split_style: list) -> list: def update_inline_styles_to_livecarta_convention(self, split_style: list) -> list:
for i, style in enumerate(split_style): for i, style in reversed(list(enumerate(split_style))):
style_name, style_value = style.split(":") style_name, style_value = style.split(":")
if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS: if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
# property not in LIVECARTA_STYLE_ATTRS, remove from css file # property not in LIVECARTA_STYLE_ATTRS, remove
split_style[i] = "" split_style.remove(style)
return split_style continue
cleaned_value = self.clean_value(style_value, style_name) cleaned_value = self.clean_value(style_value, style_name)
if all(self.style_conditions(cleaned_value, style_name)): if all(self.style_conditions(cleaned_value, style_name)):
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file # there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove
split_style[i] = "" split_style.remove(style)
continue
else: else:
if style_name in self.LIVECARTA_STYLE_ATTRS_MAPPING: if style_name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
# function that converts our data # function that converts our data
@@ -157,7 +158,7 @@ class StyleReader:
split_style = self.update_inline_styles_to_livecarta_convention( split_style = self.update_inline_styles_to_livecarta_convention(
split_style) split_style)
style = "; ".join(split_style) style = "; ".join(split_style) if split_style else ""
return style return style
def process_inline_styles_in_html_soup(self, html_content): def process_inline_styles_in_html_soup(self, html_content):