From b62062d6d252e37881d624435b2050bb45839dba Mon Sep 17 00:00:00 2001 From: shirshasa Date: Mon, 2 Aug 2021 11:29:59 +0300 Subject: [PATCH] epub converter: add wrapping tags in abc books --- src/html_epub_preprocessor.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/html_epub_preprocessor.py b/src/html_epub_preprocessor.py index 36cd35b..7e8ae9b 100644 --- a/src/html_epub_preprocessor.py +++ b/src/html_epub_preprocessor.py @@ -249,6 +249,17 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note def unwrap_structural_tags(body_tag): + def _preserve_class_in_aside_tag(tag_): + # to save css style inherited from class, copy class to aside tag + # this is for Wiley books with boxes + tag_class = tag_.attrs['class'] if not isinstance(tag_.attrs['class'], list) else tag_.attrs['class'][0] + if tag_.parent.name == 'aside': + if not tag_.parent.attrs.get('class'): + tag_.parent.attrs['class'] = tag_class + + def _add_table_to_abc_books(tag_, border, bg_color): + wrap_block_tag_with_table(body_tag, old_tag=tag_, width='100', border=border, bg_color=bg_color) + def _add_span_to_save_ids_for_links(tag_to_be_removed): if tag_to_be_removed.attrs.get('id'): insert_span_with_attrs_before_tag(main_tag=body_tag, tag=tag_to_be_removed, @@ -261,6 +272,14 @@ def unwrap_structural_tags(body_tag): ] for div in body_tag.find_all("div"): + if div.attrs.get('class'): + div_class = div.attrs['class'] if not isinstance(div.attrs['class'], list) else div.attrs['class'][0] + if div_class in ['C409', 'C409a']: + _add_table_to_abc_books(div, border='solid 3px', bg_color='#e7e7e9') + + elif div_class in ['C441', 'C816']: + _add_table_to_abc_books(div, border='solid #6e6e70 1px', bg_color='#e7e7e8') + if div.contents: is_not_struct_tag = [child.name not in structural_tags_names for child in div.contents] if all(is_not_struct_tag): @@ -272,10 +291,7 @@ def unwrap_structural_tags(body_tag): for s in body_tag.find_all("section"): if s.attrs.get('class'): - class_ = s.attrs['class'] if not isinstance(s.attrs['class'], list) else s.attrs['class'][0] - if s.parent.name == 'aside': - if not s.parent.attrs.get('class'): - s.parent.attrs['class'] = class_ + _preserve_class_in_aside_tag(s) _add_span_to_save_ids_for_links(s) s.unwrap() @@ -376,11 +392,11 @@ def wrap_preformatted_span_with_table(main_tag, old_tag): return table -def wrap_block_tag_with_table(main_tag, old_tag, bg_color=None): +def wrap_block_tag_with_table(main_tag, old_tag, width='95', border='1px solid', bg_color=None): table = main_tag.new_tag("table") - table.attrs['border'] = '1px solid' + table.attrs['border'] = border table.attrs['align'] = 'center' - table.attrs['style'] = 'width:95%;' + table.attrs['style'] = f'width:{width}%;' tbody = main_tag.new_tag("tbody") tr = main_tag.new_tag("tr") td = main_tag.new_tag("td")