From 45c1931ab3f0fb8e367e0f6a7ebf8c3434cbd98a Mon Sep 17 00:00:00 2001
From: shirshasa <katerinagorbac@gmail.com>
Date: Mon, 28 Jun 2021 13:56:23 +0300
Subject: [PATCH] epub converter:  fix bg-color

---
 src/css_reader.py             | 32 ++++++++++++++++++++++++++------
 src/html_epub_preprocessor.py | 24 +++++++++++++-----------
 src/util/color_reader.py      |  2 +-
 3 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/src/css_reader.py b/src/css_reader.py
index 12f1e49..4a4e724 100644
--- a/src/css_reader.py
+++ b/src/css_reader.py
@@ -95,13 +95,13 @@ to suit livecarta style convention.
 
 def get_bg_color(x):
     color = str2hex(x)
-    color = color if color not in ['#ffffff', '#fff'] else ''
+    color = color if color not in ['#ffffff', '#fff', 'white'] else ''
     return color
 
 
 def get_text_color(x):
     color = str2hex(x)
-    color = color if color not in ['#000000', '#000'] else ''
+    color = color if color not in ['#000000', '#000', 'black'] else ''
     return color
 
 
@@ -180,7 +180,7 @@ def clean_css(css):
 
 def add_inline_style_to_html_soup(soup1, css_text):
     livecarta_tmp_ids = []
-    h_regex = f'(^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$)'
+    h_regex = f'(^h[1-9]$)'
     could_have_style_in_livecarta_regexp = re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex)
     elements_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp)
     for i, x in enumerate(elements_with_possible_style_attr):
@@ -193,8 +193,28 @@ def add_inline_style_to_html_soup(soup1, css_text):
                                        disable_validation=True)
     soup2 = BeautifulSoup(html_with_inline_style, features='lxml')
 
-    def remove_white_if_no_bgcolor(style_):
-        if ('color:white' in style_) and ('background' not in style_):
+    def remove_white_if_no_bgcolor(style_, tag):
+        if 'background' in style_:
+            return style_
+
+        # if text color is white, check that we have bg-color
+        if ('color:#ffffff' in style_) or ('color:#fff' in style_) or ('color:white' in style_):
+            # if bg color is inherited, just return style as is
+            for parent_tag in tag.parents:
+                # white bg color not need to be checked as we do not write 'white bg color'
+                if parent_tag.attrs.get('style') and ('background' in parent_tag.attrs.get('style')):
+                    print(tag, parent_tag.attrs.get('style'))
+                    return style_
+
+            children = tag.find_all()
+            for child in children:
+                if child.attrs.get('style') and ('background' in child.attrs.get('style')):
+                    tmp_style = child.attrs['style'] + '; color:#fff; '
+                    child.attrs['style'] = tmp_style
+
+            # for child with bg color we added white text color, so this tag don't need white color
+            style_ = style_.replace('color:#fff;', '')
+            style_ = style_.replace('color:#ffffff;', '')
             style_ = style_.replace('color:white;', '')
         return style_
 
@@ -209,7 +229,7 @@ def add_inline_style_to_html_soup(soup1, css_text):
         tag_with_style = soup2.find(attrs={'livecarta_id': i})
         if tag_with_style.attrs.get('style'):
             style = tag_with_style.attrs.get('style') + ';'
-            style = remove_white_if_no_bgcolor(style)
+            style = remove_white_if_no_bgcolor(style, tag_with_style)
             style = style.replace('background:', 'background-color:')
             to_remove = check_style_to_be_tag(style)
             new_tags = []
diff --git a/src/html_epub_preprocessor.py b/src/html_epub_preprocessor.py
index ab34ba1..4ce6ce9 100644
--- a/src/html_epub_preprocessor.py
+++ b/src/html_epub_preprocessor.py
@@ -97,7 +97,9 @@ def preprocess_table(body_tag: BeautifulSoup):
 
             if width:
                 td.attrs['width'] = width
-            td.attrs['style'] = td.attrs.get('style').replace('border:0;', '')
+
+            if td.attrs.get('style'):
+                td.attrs['style'] = td.attrs['style'].replace('border:0;', '')
 
         if border_sizes:
             border_size = sum(border_sizes) / len(border_sizes)
@@ -270,16 +272,16 @@ def unwrap_structural_tags(body_tag):
         'figure', 'footer', 'iframe', 'span', 'p'
     ]
     # should be before other tags processing, not to remove converter empty tags with id
-    for s in body_tag.find_all("span"):
-        if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'):
-            continue
-        if s.contents:
-            is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents]
-            if all(is_not_struct_tag):
-                continue
-
-        _add_span_to_save_ids_for_links(s)
-        s.unwrap()
+    # for s in body_tag.find_all("span"):
+    #     if (s.attrs.get('epub:type') == 'pagebreak') or s.attrs.get('id'):
+    #         continue
+    #     if s.contents:
+    #         is_not_struct_tag = [child.name not in structural_tags_names for child in s.contents]
+    #         if all(is_not_struct_tag):
+    #             continue
+    #
+    #     _add_span_to_save_ids_for_links(s)
+    #     s.unwrap()
 
     for div in body_tag.find_all("div"):
         if div.contents:
diff --git a/src/util/color_reader.py b/src/util/color_reader.py
index e5a9263..d7a3d61 100644
--- a/src/util/color_reader.py
+++ b/src/util/color_reader.py
@@ -80,7 +80,7 @@ def str2closest_html_color_name(s: str):
 
 def str2hex(s: str):
     if '#' in s:
-        return s
+        return s.lower()
 
     if ('rgb' in s) and ('%' in s):
         match = re.search(r'rgba*\(((\d+)%, *(\d+)%, *(\d+)%(, \d\.\d+)*)\)', s)