epub converter: fix bg-color

This commit is contained in:
shirshasa
2021-06-28 13:56:23 +03:00
parent f3fd1a5651
commit 45c1931ab3
3 changed files with 40 additions and 18 deletions

View File

@@ -95,13 +95,13 @@ to suit livecarta style convention.
def get_bg_color(x):
color = str2hex(x)
color = color if color not in ['#ffffff', '#fff'] else ''
color = color if color not in ['#ffffff', '#fff', 'white'] else ''
return color
def get_text_color(x):
color = str2hex(x)
color = color if color not in ['#000000', '#000'] else ''
color = color if color not in ['#000000', '#000', 'black'] else ''
return color
@@ -180,7 +180,7 @@ def clean_css(css):
def add_inline_style_to_html_soup(soup1, css_text):
livecarta_tmp_ids = []
h_regex = f'(^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$)'
h_regex = f'(^h[1-9]$)'
could_have_style_in_livecarta_regexp = re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex)
elements_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp)
for i, x in enumerate(elements_with_possible_style_attr):
@@ -193,8 +193,28 @@ def add_inline_style_to_html_soup(soup1, css_text):
disable_validation=True)
soup2 = BeautifulSoup(html_with_inline_style, features='lxml')
def remove_white_if_no_bgcolor(style_):
if ('color:white' in style_) and ('background' not in style_):
def remove_white_if_no_bgcolor(style_, tag):
if 'background' in style_:
return style_
# if text color is white, check that we have bg-color
if ('color:#ffffff' in style_) or ('color:#fff' in style_) or ('color:white' in style_):
# if bg color is inherited, just return style as is
for parent_tag in tag.parents:
# white bg color not need to be checked as we do not write 'white bg color'
if parent_tag.attrs.get('style') and ('background' in parent_tag.attrs.get('style')):
print(tag, parent_tag.attrs.get('style'))
return style_
children = tag.find_all()
for child in children:
if child.attrs.get('style') and ('background' in child.attrs.get('style')):
tmp_style = child.attrs['style'] + '; color:#fff; '
child.attrs['style'] = tmp_style
# for child with bg color we added white text color, so this tag don't need white color
style_ = style_.replace('color:#fff;', '')
style_ = style_.replace('color:#ffffff;', '')
style_ = style_.replace('color:white;', '')
return style_
@@ -209,7 +229,7 @@ def add_inline_style_to_html_soup(soup1, css_text):
tag_with_style = soup2.find(attrs={'livecarta_id': i})
if tag_with_style.attrs.get('style'):
style = tag_with_style.attrs.get('style') + ';'
style = remove_white_if_no_bgcolor(style)
style = remove_white_if_no_bgcolor(style, tag_with_style)
style = style.replace('background:', 'background-color:')
to_remove = check_style_to_be_tag(style)
new_tags = []

View File

@@ -97,7 +97,9 @@ def preprocess_table(body_tag: BeautifulSoup):
if width:
td.attrs['width'] = width
td.attrs['style'] = td.attrs.get('style').replace('border:0;', '')
if td.attrs.get('style'):
td.attrs['style'] = td.attrs['style'].replace('border:0;', '')
if border_sizes:
border_size = sum(border_sizes) / len(border_sizes)
@@ -270,16 +272,16 @@ def unwrap_structural_tags(body_tag):
'figure', 'footer', 'iframe', 'span', 'p'
]
# should be before other tags processing, not to remove converter empty tags with id
for s in body_tag.find_all("span"):
if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'):
continue
if s.contents:
is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents]
if all(is_not_struct_tag):
continue
_add_span_to_save_ids_for_links(s)
s.unwrap()
# for s in body_tag.find_all("span"):
# if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'):
# continue
# if s.contents:
# is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents]
# if all(is_not_struct_tag):
# continue
#
# _add_span_to_save_ids_for_links(s)
# s.unwrap()
for div in body_tag.find_all("div"):
if div.contents:

View File

@@ -80,7 +80,7 @@ def str2closest_html_color_name(s: str):
def str2hex(s: str):
if '#' in s:
return s
return s.lower()
if ('rgb' in s) and ('%' in s):
match = re.search(r'rgba*\(((\d+)%, *(\d+)%, *(\d+)%(, \d\.\d+)*)\)', s)