diff --git a/src/html_epub_preprocessor.py b/src/html_epub_preprocessor.py
index ff50c4c..9e3497f 100644
--- a/src/html_epub_preprocessor.py
+++ b/src/html_epub_preprocessor.py
@@ -129,6 +129,7 @@ def insert_span_with_attrs_before_tag(main_tag, tag, id_, class_):
new_tag = main_tag.new_tag("span")
new_tag.attrs['id'] = id_ or ''
new_tag.attrs['class'] = class_ or ''
+ new_tag.string = "\xa0"
tag.insert_before(new_tag)
@@ -298,6 +299,11 @@ def unwrap_structural_tags(body_tag):
div.unwrap()
for s in body_tag.find_all("section"):
+ if s.attrs.get('class'):
+ class_ = s.attrs['class'] if not isinstance(s.attrs['class'], list) else s.attrs['class'][0]
+ if s.parent.name == 'aside':
+ if not s.parent.attrs.get('class'):
+ s.parent.attrs['class'] = class_
_add_span_to_save_ids_for_links(s)
s.unwrap()
@@ -398,6 +404,44 @@ def wrap_span_with_table(main_tag, old_tag):
return table
+def wrap_block_with_table(main_tag, old_tag, color=None):
+ table = main_tag.new_tag("table")
+ table.attrs['border'] = '1px solid'
+ table.attrs['align'] = 'center'
+ table.attrs['style'] = 'width:95%;'
+ tbody = main_tag.new_tag("tbody")
+ tr = main_tag.new_tag("tr")
+ td = main_tag.new_tag("td")
+ td.attrs['border-radius'] = '8px'
+ if color:
+ td.attrs['bgcolor'] = color
+ old_tag.wrap(td)
+ td.wrap(tr)
+ tr.wrap(tbody)
+ tbody.wrap(table)
+ table.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
+ return table
+
+
+def preprocess_block_tags(chapter_tag):
+ for block in chapter_tag.find_all("blockquote"):
+ if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
+ hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
+ for hr in hrs:
+ hr.extract()
+
+ h = block.find(re.compile("h[1-9]"))
+ if h:
+ h.name = "p"
+ h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
+
+ color = '#DDDDDD' if block.attrs.get('class') == 'feature1' else None
+ color = '#EEEEEE' if block.attrs.get('class') == 'feature2' else color
+ wrap_block_with_table(chapter_tag, block, color)
+ block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
+ block.unwrap()
+
+
def preprocess_pre_tags(chapter_tag):
for pre in chapter_tag.find_all("pre"):
new_tag = BeautifulSoup(features='lxml').new_tag("span")
@@ -451,6 +495,7 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
preprocess_table(chapter_tag)
preprocess_code_tags(chapter_tag)
preprocess_pre_tags(chapter_tag)
+ preprocess_block_tags(chapter_tag)
# 2. class removal
for tag in chapter_tag.find_all(recursive=True):
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor']):