From 9fb7a7eda21891eb63d3f7ca2d61d0a6d3b1e7d5 Mon Sep 17 00:00:00 2001
From: shirshasa <katerinagorbac@gmail.com>
Date: Fri, 20 Aug 2021 16:18:52 +0300
Subject: [PATCH] epub converter: add new type of blocks

---
 src/html_epub_preprocessor.py | 45 ++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 11 deletions(-)
diff --git a/src/html_epub_preprocessor.py b/src/html_epub_preprocessor.py
index baf1b59..76f07b5 100644
--- a/src/html_epub_preprocessor.py
+++ b/src/html_epub_preprocessor.py
@@ -250,13 +250,24 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
 def unwrap_structural_tags(body_tag):
 
     def _preserve_class_in_aside_tag(tag_):
-        # to save css style inherited from class, copy class to aside tag
+        # to save css style inherited from class, copy class to aside tag (which is parent to tag_)
         # this is for Wiley books with boxes
         tag_class = tag_.attrs['class'] if not isinstance(tag_.attrs['class'], list) else tag_.attrs['class'][0]
         if tag_.parent.name == 'aside':
             if not tag_.parent.attrs.get('class'):
                 tag_.parent.attrs['class'] = tag_class
 
+    def _preserve_class_in_section_tag(tag_):
+        # to save css style inherited from class, copy class to child <p>
+        # this is for Wiley books with boxes
+        tag_class = tag_.attrs['class'] if not isinstance(tag_.attrs['class'], list) else tag_.attrs['class'][0]
+        child_p_tag = tag_.find_all("p")
+        if len(child_p_tag) != 1:
+            return
+        child_p_tag = child_p_tag[0]
+        if not child_p_tag.attrs.get('class'):
+            child_p_tag.attrs['class'] = tag_class
+
     def _add_table_to_abc_books(tag_, border, bg_color):
         wrap_block_tag_with_table(body_tag, old_tag=tag_, width='100', border=border, bg_color=bg_color)
 
@@ -292,6 +303,7 @@ def unwrap_structural_tags(body_tag):
     for s in body_tag.find_all("section"):
         if s.attrs.get('class'):
             _preserve_class_in_aside_tag(s)
+            _preserve_class_in_section_tag(s)
         _add_span_to_save_ids_for_links(s)
         s.unwrap()
 
@@ -411,26 +423,36 @@ def wrap_block_tag_with_table(main_tag, old_tag, width='95', border='1px', bg_co
     return table
 
 
+def _clean_wiley_block(block):
+    hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
+    for hr in hrs:
+        hr.extract()
+    h = block.find(re.compile("h[1-9]"))
+    if h:
+        h.name = "p"
+        h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
+
+
 def preprocess_block_tags(chapter_tag):
     for block in chapter_tag.find_all("blockquote"):
         if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
-            hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
-            for hr in hrs:
-                hr.extract()
-
-            h = block.find(re.compile("h[1-9]"))
-            if h:
-                h.name = "p"
-                h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
+            _clean_wiley_block(block)
 
             color = '#DDDDDD' if block.attrs.get('class') == 'feature1' else None
             color = '#EEEEEE' if block.attrs.get('class') == 'feature2' else color
-            wrap_block_tag_with_table(chapter_tag, block, color)
+            wrap_block_tag_with_table(chapter_tag, block, bg_color=color)
             block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
             block.unwrap()
 
+    for future_block in chapter_tag.find_all("p", attrs={"class": re.compile("feature[1234]")}):
+        _clean_wiley_block(future_block)
+        color = '#DDDDDD' if future_block.attrs.get('class') == 'feature1' else None
+        color = '#EEEEEE' if future_block.attrs.get('class') == 'feature2' else color
+        wrap_block_tag_with_table(chapter_tag, future_block, bg_color=color)
+
 
 def _prepare_formatted(text):
+    # replace <,> to save them as is in html code
     text = text.replace("<", "\x3C")
     text = text.replace(">", "\x3E")
     text = text.replace('\t', "\xa0 \xa0 ")  # &nbsp; &nbsp;
@@ -443,7 +465,7 @@ def preprocess_pre_tags(chapter_tag):
         new_tag = BeautifulSoup(features='lxml').new_tag("span")
         new_tag.attrs = pre.attrs.copy()
         spans = pre.find_all("span")
-        to_add_br = len(spans) > 1
+        to_add_br = len(spans) > 1  # if in <pre> there are multiple <span>, we need to add <br> after each content
 
         for child in pre.children:
             if isinstance(child, NavigableString):
@@ -470,6 +492,7 @@ def preprocess_pre_tags(chapter_tag):
 
 
 def preprocess_code_tags(chapter_tag):
+    # function that emulates style of <code>, <kdb>, <var>
     for code in chapter_tag.find_all(re.compile("code|kdb|var")):
         code.name = 'span'
         if code.parent.name == "pre":