epub converter: add aside as table - block processing

2021-07-08 15:08:34 +03:00
parent 8e0fb42c18
commit 7963486d7c
1 changed files with 45 additions and 0 deletions
--- a/src/html_epub_preprocessor.py
+++ b/src/html_epub_preprocessor.py
@@ -129,6 +129,7 @@ def insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
    new_tag = main_tag.new_tag("span")
    new_tag.attrs['id'] = id_ or ''
    new_tag.attrs['class'] = class_ or ''
+    new_tag.string = "\xa0"
    tag.insert_before(new_tag)


@@ -298,6 +299,11 @@ def unwrap_structural_tags(body_tag):
        div.unwrap()

    for s in body_tag.find_all("section"):
+        if s.attrs.get('class'):
+            class_ = s.attrs['class'] if not isinstance(s.attrs['class'], list) else s.attrs['class'][0]
+            if s.parent.name == 'aside':
+                if not s.parent.attrs.get('class'):
+                    s.parent.attrs['class'] = class_
        _add_span_to_save_ids_for_links(s)
        s.unwrap()

@@ -398,6 +404,44 @@ def wrap_span_with_table(main_tag, old_tag):
    return table


+def wrap_block_with_table(main_tag, old_tag, color=None):
+    table = main_tag.new_tag("table")
+    table.attrs['border'] = '1px solid'
+    table.attrs['align'] = 'center'
+    table.attrs['style'] = 'width:95%;'
+    tbody = main_tag.new_tag("tbody")
+    tr = main_tag.new_tag("tr")
+    td = main_tag.new_tag("td")
+    td.attrs['border-radius'] = '8px'
+    if color:
+        td.attrs['bgcolor'] = color
+    old_tag.wrap(td)
+    td.wrap(tr)
+    tr.wrap(tbody)
+    tbody.wrap(table)
+    table.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
+    return table
+
+
+def preprocess_block_tags(chapter_tag):
+    for block in chapter_tag.find_all("blockquote"):
+        if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
+            hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
+            for hr in hrs:
+                hr.extract()
+
+            h = block.find(re.compile("h[1-9]"))
+            if h:
+                h.name = "p"
+                h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
+
+            color = '#DDDDDD' if block.attrs.get('class') == 'feature1' else None
+            color = '#EEEEEE' if block.attrs.get('class') == 'feature2' else color
+            wrap_block_with_table(chapter_tag, block, color)
+            block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
+            block.unwrap()
+
+
 def preprocess_pre_tags(chapter_tag):
    for pre in chapter_tag.find_all("pre"):
        new_tag = BeautifulSoup(features='lxml').new_tag("span")
@@ -451,6 +495,7 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
    preprocess_table(chapter_tag)
    preprocess_code_tags(chapter_tag)
    preprocess_pre_tags(chapter_tag)
+    preprocess_block_tags(chapter_tag)
    # 2. class removal
    for tag in chapter_tag.find_all(recursive=True):
        if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor']):