epub converter: fix bg-color

2021-06-28 13:56:23 +03:00
parent f3fd1a5651
commit 45c1931ab3
3 changed files with 40 additions and 18 deletions
--- a/src/css_reader.py
+++ b/src/css_reader.py
@@ -95,13 +95,13 @@ to suit livecarta style convention.

 def get_bg_color(x):
    color = str2hex(x)
-    color = color if color not in ['#ffffff', '#fff'] else ''
+    color = color if color not in ['#ffffff', '#fff', 'white'] else ''
    return color


 def get_text_color(x):
    color = str2hex(x)
-    color = color if color not in ['#000000', '#000'] else ''
+    color = color if color not in ['#000000', '#000', 'black'] else ''
    return color


@@ -180,7 +180,7 @@ def clean_css(css):

 def add_inline_style_to_html_soup(soup1, css_text):
    livecarta_tmp_ids = []
-    h_regex = f'(^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$)'
+    h_regex = f'(^h[1-9]$)'
    could_have_style_in_livecarta_regexp = re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex)
    elements_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp)
    for i, x in enumerate(elements_with_possible_style_attr):
@@ -193,8 +193,28 @@ def add_inline_style_to_html_soup(soup1, css_text):
                                       disable_validation=True)
    soup2 = BeautifulSoup(html_with_inline_style, features='lxml')

-    def remove_white_if_no_bgcolor(style_):
-        if ('color:white' in style_) and ('background' not in style_):
+    def remove_white_if_no_bgcolor(style_, tag):
+        if 'background' in style_:
+            return style_
+
+        # if text color is white, check that we have bg-color
+        if ('color:#ffffff' in style_) or ('color:#fff' in style_) or ('color:white' in style_):
+            # if bg color is inherited, just return style as is
+            for parent_tag in tag.parents:
+                # white bg color not need to be checked as we do not write 'white bg color'
+                if parent_tag.attrs.get('style') and ('background' in parent_tag.attrs.get('style')):
+                    print(tag, parent_tag.attrs.get('style'))
+                    return style_
+
+            children = tag.find_all()
+            for child in children:
+                if child.attrs.get('style') and ('background' in child.attrs.get('style')):
+                    tmp_style = child.attrs['style'] + '; color:#fff; '
+                    child.attrs['style'] = tmp_style
+
+            # for child with bg color we added white text color, so this tag don't need white color
+            style_ = style_.replace('color:#fff;', '')
+            style_ = style_.replace('color:#ffffff;', '')
            style_ = style_.replace('color:white;', '')
        return style_

@@ -209,7 +229,7 @@ def add_inline_style_to_html_soup(soup1, css_text):
        tag_with_style = soup2.find(attrs={'livecarta_id': i})
        if tag_with_style.attrs.get('style'):
            style = tag_with_style.attrs.get('style') + ';'
-            style = remove_white_if_no_bgcolor(style)
+            style = remove_white_if_no_bgcolor(style, tag_with_style)
            style = style.replace('background:', 'background-color:')
            to_remove = check_style_to_be_tag(style)
            new_tags = []
--- a/src/html_epub_preprocessor.py
+++ b/src/html_epub_preprocessor.py
@@ -97,7 +97,9 @@ def preprocess_table(body_tag: BeautifulSoup):

            if width:
                td.attrs['width'] = width
-            td.attrs['style'] = td.attrs.get('style').replace('border:0;', '')
+
+            if td.attrs.get('style'):
+                td.attrs['style'] = td.attrs['style'].replace('border:0;', '')

        if border_sizes:
            border_size = sum(border_sizes) / len(border_sizes)
@@ -270,16 +272,16 @@ def unwrap_structural_tags(body_tag):
        'figure', 'footer', 'iframe', 'span', 'p'
    ]
    # should be before other tags processing, not to remove converter empty tags with id
-    for s in body_tag.find_all("span"):
-        if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'):
-            continue
-        if s.contents:
-            is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents]
-            if all(is_not_struct_tag):
-                continue
-
-        _add_span_to_save_ids_for_links(s)
-        s.unwrap()
+    # for s in body_tag.find_all("span"):
+    #     if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'):
+    #         continue
+    #     if s.contents:
+    #         is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents]
+    #         if all(is_not_struct_tag):
+    #             continue
+    #
+    #     _add_span_to_save_ids_for_links(s)
+    #     s.unwrap()

    for div in body_tag.find_all("div"):
        if div.contents:
--- a/src/util/color_reader.py
+++ b/src/util/color_reader.py
@@ -80,7 +80,7 @@ def str2closest_html_color_name(s: str):

 def str2hex(s: str):
    if '#' in s:
-        return s
+        return s.lower()

    if ('rgb' in s) and ('%' in s):
        match = re.search(r'rgba*\(((\d+)%, *(\d+)%, *(\d+)%(, \d\.\d+)*)\)', s)