forked from LiveCarta/BookConverter
Fix style processor error
This commit is contained in:
@@ -14,13 +14,12 @@ class HtmlDocxProcessor:
|
|||||||
def __init__(self, logger: BookLogger, html_soup: BeautifulSoup, html_preprocessor, style_preprocessor):
|
def __init__(self, logger: BookLogger, html_soup: BeautifulSoup, html_preprocessor, style_preprocessor):
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.html_soup = html_soup
|
self.html_soup = html_soup
|
||||||
self.body_tag = self.html_soup.body
|
|
||||||
self.html_preprocessor = html_preprocessor
|
self.html_preprocessor = html_preprocessor
|
||||||
self.style_preprocessor = style_preprocessor
|
self.style_preprocessor = style_preprocessor
|
||||||
self.content: List[Tag] = []
|
self.content: List[Tag] = []
|
||||||
|
|
||||||
def _font_to_span(self):
|
def _font_to_span(self):
|
||||||
for font in self.body_tag.find_all("font"):
|
for font in self.html_soup.find_all("font"):
|
||||||
font.name = "span"
|
font.name = "span"
|
||||||
|
|
||||||
|
|
||||||
@@ -226,10 +225,12 @@ class HtmlDocxProcessor:
|
|||||||
|
|
||||||
self.logger.log("Inline style reading.")
|
self.logger.log("Inline style reading.")
|
||||||
self.style_preprocessor.process_inline_styles_in_html_soup(
|
self.style_preprocessor.process_inline_styles_in_html_soup(
|
||||||
self.body_tag)
|
self.html_soup)
|
||||||
|
|
||||||
self.logger.log("Inline style processing.")
|
self.logger.log("Inline style processing.")
|
||||||
modify_html_soup_with_css_styles(self.body_tag)
|
self.html_soup = modify_html_soup_with_css_styles(self.html_soup)
|
||||||
|
|
||||||
|
self.body_tag = self.html_soup.body
|
||||||
|
|
||||||
self.logger.log("Image processing.")
|
self.logger.log("Image processing.")
|
||||||
images = process_images(access, path_to_html=html_path,
|
images = process_images(access, path_to_html=html_path,
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ class InlineStyleProcessor:
|
|||||||
def __init__(self, tag_inline_style: Tag):
|
def __init__(self, tag_inline_style: Tag):
|
||||||
# tag with inline style + style parsed from css file
|
# tag with inline style + style parsed from css file
|
||||||
self.tag_inline_style = tag_inline_style
|
self.tag_inline_style = tag_inline_style
|
||||||
self.tag_inline_style.attrs['style']: str = self.process_inline_style()
|
self.tag_inline_style.attrs["style"]: str = self.process_inline_style()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def remove_white_if_no_bgcolor(style_: str, tag: Tag) -> str:
|
def remove_white_if_no_bgcolor(style_: str, tag: Tag) -> str:
|
||||||
@@ -80,19 +80,19 @@ class InlineStyleProcessor:
|
|||||||
processed_style = ";".join(split_style)+';'
|
processed_style = ";".join(split_style)+';'
|
||||||
|
|
||||||
margin_left_regexp = re.compile(
|
margin_left_regexp = re.compile(
|
||||||
r"((margin-left|margin): *(-*\w+);*)")
|
r"((margin-left|margin): *-*((\d*)\.*\d+)\w+;*)")
|
||||||
text_indent_regexp = re.compile(
|
text_indent_regexp = re.compile(
|
||||||
r"(text-indent: *(-*\w+);*)")
|
r"(text-indent: *-*((\d*)\.*\d+)\w+;*)")
|
||||||
|
|
||||||
has_margin = re.search(margin_left_regexp, processed_style)
|
has_margin = re.search(margin_left_regexp, processed_style)
|
||||||
has_text_indent = re.search(text_indent_regexp, processed_style)
|
has_text_indent = re.search(text_indent_regexp, processed_style)
|
||||||
if has_margin:
|
if has_margin:
|
||||||
num_m = abs(int("0" + "".join(
|
num_m = abs(int("0" + "".join(
|
||||||
filter(str.isdigit, str(has_margin.group(3))))))
|
filter(str.isdigit, str(has_margin.group(4))))))
|
||||||
|
|
||||||
if has_text_indent:
|
if has_text_indent:
|
||||||
num_ti = abs(int("0" + "".join(
|
num_ti = abs(int("0" + "".join(
|
||||||
filter(str.isdigit, str(has_text_indent.group(2))))))
|
filter(str.isdigit, str(has_text_indent.group(3))))))
|
||||||
processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " +
|
processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " +
|
||||||
str(abs(num_m - num_ti)) + "px; ")
|
str(abs(num_m - num_ti)) + "px; ")
|
||||||
processed_style = processed_style.replace(
|
processed_style = processed_style.replace(
|
||||||
@@ -106,7 +106,7 @@ class InlineStyleProcessor:
|
|||||||
elif has_text_indent:
|
elif has_text_indent:
|
||||||
processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " +
|
processed_style = processed_style.replace(has_text_indent.group(1), "text-indent: " +
|
||||||
str(abs(int("0" + "".join(
|
str(abs(int("0" + "".join(
|
||||||
filter(str.isdigit, str(has_text_indent.group(2)))))))
|
filter(str.isdigit, str(has_text_indent.group(3)))))))
|
||||||
+ "px; ")
|
+ "px; ")
|
||||||
return processed_style
|
return processed_style
|
||||||
return processed_style
|
return processed_style
|
||||||
@@ -127,22 +127,25 @@ class InlineStyleProcessor:
|
|||||||
processed inline style
|
processed inline style
|
||||||
|
|
||||||
"""
|
"""
|
||||||
inline_style = self.tag_inline_style.attrs.get("style") + ";"
|
if self.tag_inline_style.attrs.get("style"):
|
||||||
# 1. Remove white color if tag doesn"t have background color in style
|
inline_style = self.tag_inline_style.attrs.get("style") + ";"
|
||||||
inline_style = self.remove_white_if_no_bgcolor(
|
# 1. Remove white color if tag doesn't have background color in style
|
||||||
inline_style, self.tag_inline_style)
|
inline_style = self.remove_white_if_no_bgcolor(
|
||||||
inline_style = inline_style.replace(
|
inline_style, self.tag_inline_style)
|
||||||
"list-style-image", "list-style-type")
|
inline_style = inline_style.replace(
|
||||||
# 2. Create list of styles from inline style
|
"list-style-image", "list-style-type")
|
||||||
# replace all spaces between "; & letter" to ";"
|
# 2. Create list of styles from inline style
|
||||||
style = re.sub(r"; *", ";", inline_style)
|
# replace all spaces between "; & letter" to ";"
|
||||||
# when we split style by ";", last element of the list is "" - None (remove it)
|
style = re.sub(r"; *", ";", inline_style)
|
||||||
split_inline_style: list = list(filter(None, style.split(";")))
|
# when we split style by ";", last element of the list is "" - None (remove it)
|
||||||
# 3. Duplicate styles check - if the tag had duplicate styles
|
split_inline_style: list = list(filter(None, style.split(";")))
|
||||||
# split_inline_style = self.duplicate_styles_check(split_inline_style)
|
# 3. Duplicate styles check - if the tag had duplicate styles
|
||||||
# 4. Processing indents
|
# split_inline_style = self.duplicate_styles_check(split_inline_style)
|
||||||
inline_style: str = self.indents_processing(split_inline_style)
|
# 4. Processing indents
|
||||||
return inline_style
|
inline_style: str = self.indents_processing(split_inline_style)
|
||||||
|
return inline_style
|
||||||
|
else:
|
||||||
|
return ""
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def check_style_to_be_tag(style: str) -> List[tuple]:
|
def check_style_to_be_tag(style: str) -> List[tuple]:
|
||||||
|
|||||||
@@ -126,17 +126,18 @@ class StyleReader:
|
|||||||
return constraints_on_value, value_not_in_possible_values_list
|
return constraints_on_value, value_not_in_possible_values_list
|
||||||
|
|
||||||
def update_inline_styles_to_livecarta_convention(self, split_style: list) -> list:
|
def update_inline_styles_to_livecarta_convention(self, split_style: list) -> list:
|
||||||
for i, style in enumerate(split_style):
|
for i, style in reversed(list(enumerate(split_style))):
|
||||||
style_name, style_value = style.split(":")
|
style_name, style_value = style.split(":")
|
||||||
if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
|
if style_name not in LiveCartaConfig.LIVECARTA_STYLE_ATTRS:
|
||||||
# property not in LIVECARTA_STYLE_ATTRS, remove from css file
|
# property not in LIVECARTA_STYLE_ATTRS, remove
|
||||||
split_style[i] = ""
|
split_style.remove(style)
|
||||||
return split_style
|
continue
|
||||||
|
|
||||||
cleaned_value = self.clean_value(style_value, style_name)
|
cleaned_value = self.clean_value(style_value, style_name)
|
||||||
if all(self.style_conditions(cleaned_value, style_name)):
|
if all(self.style_conditions(cleaned_value, style_name)):
|
||||||
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
# there are constraints + value not in LIVECARTA_STYLE_ATTRS, remove
|
||||||
split_style[i] = ""
|
split_style.remove(style)
|
||||||
|
continue
|
||||||
else:
|
else:
|
||||||
if style_name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
|
if style_name in self.LIVECARTA_STYLE_ATTRS_MAPPING:
|
||||||
# function that converts our data
|
# function that converts our data
|
||||||
@@ -157,7 +158,7 @@ class StyleReader:
|
|||||||
|
|
||||||
split_style = self.update_inline_styles_to_livecarta_convention(
|
split_style = self.update_inline_styles_to_livecarta_convention(
|
||||||
split_style)
|
split_style)
|
||||||
style = "; ".join(split_style)
|
style = "; ".join(split_style) if split_style else ""
|
||||||
return style
|
return style
|
||||||
|
|
||||||
def process_inline_styles_in_html_soup(self, html_content):
|
def process_inline_styles_in_html_soup(self, html_content):
|
||||||
|
|||||||
Reference in New Issue
Block a user