forked from LiveCarta/BookConverter
epub converter: add new type of blocks
This commit is contained in:
@@ -250,13 +250,24 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
|
||||
def unwrap_structural_tags(body_tag):
|
||||
|
||||
def _preserve_class_in_aside_tag(tag_):
|
||||
# to save css style inherited from class, copy class to aside tag
|
||||
# to save css style inherited from class, copy class to aside tag (which is parent to tag_)
|
||||
# this is for Wiley books with boxes
|
||||
tag_class = tag_.attrs['class'] if not isinstance(tag_.attrs['class'], list) else tag_.attrs['class'][0]
|
||||
if tag_.parent.name == 'aside':
|
||||
if not tag_.parent.attrs.get('class'):
|
||||
tag_.parent.attrs['class'] = tag_class
|
||||
|
||||
def _preserve_class_in_section_tag(tag_):
|
||||
# to save css style inherited from class, copy class to child <p>
|
||||
# this is for Wiley books with boxes
|
||||
tag_class = tag_.attrs['class'] if not isinstance(tag_.attrs['class'], list) else tag_.attrs['class'][0]
|
||||
child_p_tag = tag_.find_all("p")
|
||||
if len(child_p_tag) != 1:
|
||||
return
|
||||
child_p_tag = child_p_tag[0]
|
||||
if not child_p_tag.attrs.get('class'):
|
||||
child_p_tag.attrs['class'] = tag_class
|
||||
|
||||
def _add_table_to_abc_books(tag_, border, bg_color):
|
||||
wrap_block_tag_with_table(body_tag, old_tag=tag_, width='100', border=border, bg_color=bg_color)
|
||||
|
||||
@@ -292,6 +303,7 @@ def unwrap_structural_tags(body_tag):
|
||||
for s in body_tag.find_all("section"):
|
||||
if s.attrs.get('class'):
|
||||
_preserve_class_in_aside_tag(s)
|
||||
_preserve_class_in_section_tag(s)
|
||||
_add_span_to_save_ids_for_links(s)
|
||||
s.unwrap()
|
||||
|
||||
@@ -411,26 +423,36 @@ def wrap_block_tag_with_table(main_tag, old_tag, width='95', border='1px', bg_co
|
||||
return table
|
||||
|
||||
|
||||
def _clean_wiley_block(block):
|
||||
hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
|
||||
for hr in hrs:
|
||||
hr.extract()
|
||||
h = block.find(re.compile("h[1-9]"))
|
||||
if h:
|
||||
h.name = "p"
|
||||
h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
|
||||
|
||||
|
||||
def preprocess_block_tags(chapter_tag):
|
||||
for block in chapter_tag.find_all("blockquote"):
|
||||
if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
|
||||
hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
|
||||
for hr in hrs:
|
||||
hr.extract()
|
||||
|
||||
h = block.find(re.compile("h[1-9]"))
|
||||
if h:
|
||||
h.name = "p"
|
||||
h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
|
||||
_clean_wiley_block(block)
|
||||
|
||||
color = '#DDDDDD' if block.attrs.get('class') == 'feature1' else None
|
||||
color = '#EEEEEE' if block.attrs.get('class') == 'feature2' else color
|
||||
wrap_block_tag_with_table(chapter_tag, block, color)
|
||||
wrap_block_tag_with_table(chapter_tag, block, bg_color=color)
|
||||
block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
|
||||
block.unwrap()
|
||||
|
||||
for future_block in chapter_tag.find_all("p", attrs={"class": re.compile("feature[1234]")}):
|
||||
_clean_wiley_block(future_block)
|
||||
color = '#DDDDDD' if future_block.attrs.get('class') == 'feature1' else None
|
||||
color = '#EEEEEE' if future_block.attrs.get('class') == 'feature2' else color
|
||||
wrap_block_tag_with_table(chapter_tag, future_block, bg_color=color)
|
||||
|
||||
|
||||
def _prepare_formatted(text):
|
||||
# replace <,> to save them as is in html code
|
||||
text = text.replace("<", "\x3C")
|
||||
text = text.replace(">", "\x3E")
|
||||
text = text.replace('\t', "\xa0 \xa0 ") #
|
||||
@@ -443,7 +465,7 @@ def preprocess_pre_tags(chapter_tag):
|
||||
new_tag = BeautifulSoup(features='lxml').new_tag("span")
|
||||
new_tag.attrs = pre.attrs.copy()
|
||||
spans = pre.find_all("span")
|
||||
to_add_br = len(spans) > 1
|
||||
to_add_br = len(spans) > 1 # if in <pre> there are multiple <span>, we need to add <br> after each content
|
||||
|
||||
for child in pre.children:
|
||||
if isinstance(child, NavigableString):
|
||||
@@ -470,6 +492,7 @@ def preprocess_pre_tags(chapter_tag):
|
||||
|
||||
|
||||
def preprocess_code_tags(chapter_tag):
|
||||
# function that emulates style of <code>, <kdb>, <var>
|
||||
for code in chapter_tag.find_all(re.compile("code|kdb|var")):
|
||||
code.name = 'span'
|
||||
if code.parent.name == "pre":
|
||||
|
||||
Reference in New Issue
Block a user