forked from LiveCarta/BookConverter
Add lines with style 'border-bottom'
This commit is contained in:
@@ -144,7 +144,7 @@ def clean_headings_content(content: Tag, title: str):
|
||||
break
|
||||
|
||||
|
||||
def _heading_tag2p_tag(body_tag):
|
||||
def heading_tag_to_p_tag(body_tag):
|
||||
"""
|
||||
Function to convert all lower level headings to p tags
|
||||
"""
|
||||
@@ -267,7 +267,7 @@ def unwrap_structural_tags(body_tag):
|
||||
if not tag_.parent.attrs.get('class'):
|
||||
tag_.parent.attrs['class'] = tag_class
|
||||
|
||||
def _preserve_class_in_section_tag(tag_) -> bool:
|
||||
def preserve_class_in_section_tag(tag_) -> bool:
|
||||
# to save css style inherited from class, copy class to child <p>
|
||||
# this is for Wiley books with boxes
|
||||
# returns True, if <section> could be unwrapped
|
||||
@@ -288,10 +288,10 @@ def unwrap_structural_tags(body_tag):
|
||||
else:
|
||||
return True
|
||||
|
||||
def _add_table_to_abc_books(tag_, border, bg_color):
|
||||
def add_table_to_abc_books(tag_, border, bg_color):
|
||||
wrap_block_tag_with_table(body_tag, old_tag=tag_, width='100', border=border, bg_color=bg_color)
|
||||
|
||||
def _add_span_to_save_ids_for_links(tag_to_be_removed):
|
||||
def add_span_to_save_ids_for_links(tag_to_be_removed):
|
||||
if tag_to_be_removed.attrs.get('id'):
|
||||
insert_span_with_attrs_before_tag(main_tag=body_tag, tag=tag_to_be_removed,
|
||||
id_=tag_to_be_removed.attrs['id'],
|
||||
@@ -311,17 +311,17 @@ def unwrap_structural_tags(body_tag):
|
||||
if div.attrs.get('class'):
|
||||
div_class = div.attrs['class'] if not isinstance(div.attrs['class'], list) else div.attrs['class'][0]
|
||||
if div_class in ['C409', 'C409a']:
|
||||
_add_table_to_abc_books(div, border='solid 3px', bg_color='#e7e7e9')
|
||||
add_table_to_abc_books(div, border='solid 3px', bg_color='#e7e7e9')
|
||||
|
||||
elif div_class in ['C441', 'C816']:
|
||||
_add_table_to_abc_books(div, border='solid #6e6e70 1px', bg_color='#e7e7e8')
|
||||
add_table_to_abc_books(div, border='solid #6e6e70 1px', bg_color='#e7e7e8')
|
||||
|
||||
if div.attrs.get('style'):
|
||||
if 'background-color' in div.attrs['style']:
|
||||
end_index = div.attrs['style'].find('background-color') + len('background-color')
|
||||
start_index_of_color = end_index + 2
|
||||
bg_color = div.attrs['style'][start_index_of_color:start_index_of_color+7]
|
||||
_add_table_to_abc_books(div, border='', bg_color=bg_color)
|
||||
add_table_to_abc_books(div, border='', bg_color=bg_color)
|
||||
|
||||
if div.attrs.get('style') == '':
|
||||
del div.attrs['style']
|
||||
@@ -331,19 +331,19 @@ def unwrap_structural_tags(body_tag):
|
||||
div.name = 'p'
|
||||
continue
|
||||
|
||||
_add_span_to_save_ids_for_links(div)
|
||||
add_span_to_save_ids_for_links(div)
|
||||
div.unwrap()
|
||||
|
||||
for s in body_tag.find_all("section"):
|
||||
could_be_unwrapped = True
|
||||
if s.attrs.get('class'):
|
||||
could_be_unwrapped = _preserve_class_in_section_tag(s)
|
||||
_add_span_to_save_ids_for_links(s)
|
||||
could_be_unwrapped = preserve_class_in_section_tag(s)
|
||||
add_span_to_save_ids_for_links(s)
|
||||
if could_be_unwrapped:
|
||||
s.unwrap()
|
||||
|
||||
for s in body_tag.find_all("article"):
|
||||
_add_span_to_save_ids_for_links(s)
|
||||
add_span_to_save_ids_for_links(s)
|
||||
s.unwrap()
|
||||
|
||||
for s in body_tag.find_all("figure"):
|
||||
@@ -351,22 +351,22 @@ def unwrap_structural_tags(body_tag):
|
||||
s.attrs['style'] = "text-align: center;" # to center image inside this tag
|
||||
|
||||
for s in body_tag.find_all("figcaption"):
|
||||
_add_span_to_save_ids_for_links(s)
|
||||
add_span_to_save_ids_for_links(s)
|
||||
s.unwrap()
|
||||
|
||||
for s in body_tag.find_all("aside"):
|
||||
s.name = 'blockquote'
|
||||
|
||||
for s in body_tag.find_all("main"):
|
||||
_add_span_to_save_ids_for_links(s)
|
||||
add_span_to_save_ids_for_links(s)
|
||||
s.unwrap()
|
||||
|
||||
for s in body_tag.find_all("body"):
|
||||
_add_span_to_save_ids_for_links(s)
|
||||
add_span_to_save_ids_for_links(s)
|
||||
s.unwrap()
|
||||
|
||||
for s in body_tag.find_all("html"):
|
||||
_add_span_to_save_ids_for_links(s)
|
||||
add_span_to_save_ids_for_links(s)
|
||||
s.unwrap()
|
||||
|
||||
for s in body_tag.find_all("header"):
|
||||
@@ -385,7 +385,7 @@ def unwrap_structural_tags(body_tag):
|
||||
parents_marks_are_body = [x.parent == body_tag for x in marks]
|
||||
assert all(parents_marks_are_body), 'Anchor for chapter is deeper than 2 level. Chapters can not be parsed.'
|
||||
|
||||
_heading_tag2p_tag(body_tag)
|
||||
heading_tag_to_p_tag(body_tag)
|
||||
|
||||
# wrap NavigableString with <p>
|
||||
for node in body_tag:
|
||||
|
||||
Reference in New Issue
Block a user