epub converter: add aside as table - block processing

This commit is contained in:
shirshasa
2021-07-08 15:08:34 +03:00
parent 8e0fb42c18
commit 7963486d7c

View File

@@ -129,6 +129,7 @@ def insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
new_tag = main_tag.new_tag("span")
new_tag.attrs['id'] = id_ or ''
new_tag.attrs['class'] = class_ or ''
new_tag.string = "\xa0"
tag.insert_before(new_tag)
@@ -298,6 +299,11 @@ def unwrap_structural_tags(body_tag):
div.unwrap()
for s in body_tag.find_all("section"):
if s.attrs.get('class'):
class_ = s.attrs['class'] if not isinstance(s.attrs['class'], list) else s.attrs['class'][0]
if s.parent.name == 'aside':
if not s.parent.attrs.get('class'):
s.parent.attrs['class'] = class_
_add_span_to_save_ids_for_links(s)
s.unwrap()
@@ -398,6 +404,44 @@ def wrap_span_with_table(main_tag, old_tag):
return table
def wrap_block_with_table(main_tag, old_tag, color=None):
table = main_tag.new_tag("table")
table.attrs['border'] = '1px solid'
table.attrs['align'] = 'center'
table.attrs['style'] = 'width:95%;'
tbody = main_tag.new_tag("tbody")
tr = main_tag.new_tag("tr")
td = main_tag.new_tag("td")
td.attrs['border-radius'] = '8px'
if color:
td.attrs['bgcolor'] = color
old_tag.wrap(td)
td.wrap(tr)
tr.wrap(tbody)
tbody.wrap(table)
table.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
return table
def preprocess_block_tags(chapter_tag):
for block in chapter_tag.find_all("blockquote"):
if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
for hr in hrs:
hr.extract()
h = block.find(re.compile("h[1-9]"))
if h:
h.name = "p"
h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
color = '#DDDDDD' if block.attrs.get('class') == 'feature1' else None
color = '#EEEEEE' if block.attrs.get('class') == 'feature2' else color
wrap_block_with_table(chapter_tag, block, color)
block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
block.unwrap()
def preprocess_pre_tags(chapter_tag):
for pre in chapter_tag.find_all("pre"):
new_tag = BeautifulSoup(features='lxml').new_tag("span")
@@ -451,6 +495,7 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
preprocess_table(chapter_tag)
preprocess_code_tags(chapter_tag)
preprocess_pre_tags(chapter_tag)
preprocess_block_tags(chapter_tag)
# 2. class removal
for tag in chapter_tag.find_all(recursive=True):
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor']):