forked from LiveCarta/BookConverter
epub converter: fix bg-color
This commit is contained in:
@@ -95,13 +95,13 @@ to suit livecarta style convention.
|
||||
|
||||
def get_bg_color(x):
|
||||
color = str2hex(x)
|
||||
color = color if color not in ['#ffffff', '#fff'] else ''
|
||||
color = color if color not in ['#ffffff', '#fff', 'white'] else ''
|
||||
return color
|
||||
|
||||
|
||||
def get_text_color(x):
|
||||
color = str2hex(x)
|
||||
color = color if color not in ['#000000', '#000'] else ''
|
||||
color = color if color not in ['#000000', '#000', 'black'] else ''
|
||||
return color
|
||||
|
||||
|
||||
@@ -180,7 +180,7 @@ def clean_css(css):
|
||||
|
||||
def add_inline_style_to_html_soup(soup1, css_text):
|
||||
livecarta_tmp_ids = []
|
||||
h_regex = f'(^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$)'
|
||||
h_regex = f'(^h[1-9]$)'
|
||||
could_have_style_in_livecarta_regexp = re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex)
|
||||
elements_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp)
|
||||
for i, x in enumerate(elements_with_possible_style_attr):
|
||||
@@ -193,8 +193,28 @@ def add_inline_style_to_html_soup(soup1, css_text):
|
||||
disable_validation=True)
|
||||
soup2 = BeautifulSoup(html_with_inline_style, features='lxml')
|
||||
|
||||
def remove_white_if_no_bgcolor(style_):
|
||||
if ('color:white' in style_) and ('background' not in style_):
|
||||
def remove_white_if_no_bgcolor(style_, tag):
|
||||
if 'background' in style_:
|
||||
return style_
|
||||
|
||||
# if text color is white, check that we have bg-color
|
||||
if ('color:#ffffff' in style_) or ('color:#fff' in style_) or ('color:white' in style_):
|
||||
# if bg color is inherited, just return style as is
|
||||
for parent_tag in tag.parents:
|
||||
# white bg color not need to be checked as we do not write 'white bg color'
|
||||
if parent_tag.attrs.get('style') and ('background' in parent_tag.attrs.get('style')):
|
||||
print(tag, parent_tag.attrs.get('style'))
|
||||
return style_
|
||||
|
||||
children = tag.find_all()
|
||||
for child in children:
|
||||
if child.attrs.get('style') and ('background' in child.attrs.get('style')):
|
||||
tmp_style = child.attrs['style'] + '; color:#fff; '
|
||||
child.attrs['style'] = tmp_style
|
||||
|
||||
# for child with bg color we added white text color, so this tag don't need white color
|
||||
style_ = style_.replace('color:#fff;', '')
|
||||
style_ = style_.replace('color:#ffffff;', '')
|
||||
style_ = style_.replace('color:white;', '')
|
||||
return style_
|
||||
|
||||
@@ -209,7 +229,7 @@ def add_inline_style_to_html_soup(soup1, css_text):
|
||||
tag_with_style = soup2.find(attrs={'livecarta_id': i})
|
||||
if tag_with_style.attrs.get('style'):
|
||||
style = tag_with_style.attrs.get('style') + ';'
|
||||
style = remove_white_if_no_bgcolor(style)
|
||||
style = remove_white_if_no_bgcolor(style, tag_with_style)
|
||||
style = style.replace('background:', 'background-color:')
|
||||
to_remove = check_style_to_be_tag(style)
|
||||
new_tags = []
|
||||
|
||||
@@ -97,7 +97,9 @@ def preprocess_table(body_tag: BeautifulSoup):
|
||||
|
||||
if width:
|
||||
td.attrs['width'] = width
|
||||
td.attrs['style'] = td.attrs.get('style').replace('border:0;', '')
|
||||
|
||||
if td.attrs.get('style'):
|
||||
td.attrs['style'] = td.attrs['style'].replace('border:0;', '')
|
||||
|
||||
if border_sizes:
|
||||
border_size = sum(border_sizes) / len(border_sizes)
|
||||
@@ -270,16 +272,16 @@ def unwrap_structural_tags(body_tag):
|
||||
'figure', 'footer', 'iframe', 'span', 'p'
|
||||
]
|
||||
# should be before other tags processing, not to remove converter empty tags with id
|
||||
for s in body_tag.find_all("span"):
|
||||
if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'):
|
||||
continue
|
||||
if s.contents:
|
||||
is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents]
|
||||
if all(is_not_struct_tag):
|
||||
continue
|
||||
|
||||
_add_span_to_save_ids_for_links(s)
|
||||
s.unwrap()
|
||||
# for s in body_tag.find_all("span"):
|
||||
# if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'):
|
||||
# continue
|
||||
# if s.contents:
|
||||
# is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents]
|
||||
# if all(is_not_struct_tag):
|
||||
# continue
|
||||
#
|
||||
# _add_span_to_save_ids_for_links(s)
|
||||
# s.unwrap()
|
||||
|
||||
for div in body_tag.find_all("div"):
|
||||
if div.contents:
|
||||
|
||||
@@ -80,7 +80,7 @@ def str2closest_html_color_name(s: str):
|
||||
|
||||
def str2hex(s: str):
|
||||
if '#' in s:
|
||||
return s
|
||||
return s.lower()
|
||||
|
||||
if ('rgb' in s) and ('%' in s):
|
||||
match = re.search(r'rgba*\(((\d+)%, *(\d+)%, *(\d+)%(, \d\.\d+)*)\)', s)
|
||||
|
||||
Reference in New Issue
Block a user