From 367bb16a239c8ed85e61f056bb281404ac1894a3 Mon Sep 17 00:00:00 2001 From: shirshasa Date: Mon, 23 Aug 2021 16:24:38 +0300 Subject: [PATCH] epub converter: add new type of blocks 2 --- src/html_epub_preprocessor.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/html_epub_preprocessor.py b/src/html_epub_preprocessor.py index 76f07b5..cb3972b 100644 --- a/src/html_epub_preprocessor.py +++ b/src/html_epub_preprocessor.py @@ -257,16 +257,26 @@ def unwrap_structural_tags(body_tag): if not tag_.parent.attrs.get('class'): tag_.parent.attrs['class'] = tag_class - def _preserve_class_in_section_tag(tag_): + def _preserve_class_in_section_tag(tag_) -> bool: # to save css style inherited from class, copy class to child

# this is for Wiley books with boxes + # returns True, if

could be unwrapped + tag_class = tag_.attrs['class'] if not isinstance(tag_.attrs['class'], list) else tag_.attrs['class'][0] - child_p_tag = tag_.find_all("p") - if len(child_p_tag) != 1: - return - child_p_tag = child_p_tag[0] - if not child_p_tag.attrs.get('class'): - child_p_tag.attrs['class'] = tag_class + if 'feature' not in tag_class: + return True + child_p_tags = tag_.find_all("p") + if len(child_p_tags) == 1: + child_p_tag = child_p_tags[0] + if not child_p_tag.attrs.get('class'): + child_p_tag.attrs['class'] = tag_class + return True + + elif len(child_p_tags) > 1: + tag_.name = 'p' + return False + else: + return True def _add_table_to_abc_books(tag_, border, bg_color): wrap_block_tag_with_table(body_tag, old_tag=tag_, width='100', border=border, bg_color=bg_color) @@ -301,11 +311,13 @@ def unwrap_structural_tags(body_tag): div.unwrap() for s in body_tag.find_all("section"): + could_be_unwrapped = True if s.attrs.get('class'): _preserve_class_in_aside_tag(s) - _preserve_class_in_section_tag(s) + could_be_unwrapped = _preserve_class_in_section_tag(s) _add_span_to_save_ids_for_links(s) - s.unwrap() + if could_be_unwrapped: + s.unwrap() for s in body_tag.find_all("article"): _add_span_to_save_ids_for_links(s)