forked from LiveCarta/BookConverter
Create preset for wrapping tags with tables
This commit is contained in:
@@ -103,6 +103,61 @@ def _wrap_strings_with_p(chapter_tag):
|
||||
node.replace_with(p_tag)
|
||||
|
||||
|
||||
def _wrap_tags_with_table(chapter_tag):
|
||||
"""Function wraps <tag> with <table>"""
|
||||
def _wrap_tag_with_table(chapter_tag, tag_to_be_wrapped, width="100", border="", bg_color=None):
|
||||
table = chapter_tag.new_tag("table")
|
||||
table.attrs["border"], table.attrs["align"], table.attrs["style"] \
|
||||
= border, "center", f"width:{width}%;"
|
||||
tbody, tr, td = \
|
||||
chapter_tag.new_tag("tbody"), chapter_tag.new_tag("tr"), chapter_tag.new_tag("td")
|
||||
td.attrs["bgcolor"] = bg_color
|
||||
tag_to_be_wrapped.wrap(td)
|
||||
td.wrap(tr)
|
||||
tr.wrap(tbody)
|
||||
tbody.wrap(table)
|
||||
table.insert_after(BeautifulSoup(features="lxml").new_tag("br"))
|
||||
return table
|
||||
|
||||
def process_tag_using_table(tag_to_wrap):
|
||||
_wrap_tag_with_table(
|
||||
chapter_tag,
|
||||
tag_to_be_wrapped=tag_to_wrap,
|
||||
width=tag_to_wrap.attrs["width"] if tag_to_wrap.attrs.get("width") else "100",
|
||||
border=tag_to_wrap.attrs["border"] if tag_to_wrap.attrs.get("border") else None,
|
||||
bg_color=tag_to_wrap.attrs["bgcolor"] if tag_to_wrap.attrs.get("bgcolor") else None)
|
||||
_add_span_to_save_ids_for_links(tag_to_wrap, chapter_tag)
|
||||
tag_to_wrap.unwrap()
|
||||
|
||||
for tags_to_wrap, attrs in LiveCartaConfig.WRAP_TAGS_WITH_TABLE.items():
|
||||
if isinstance(attrs, tuple):
|
||||
attr, val = attrs[0], attrs[1]
|
||||
for tag_to_wrap in chapter_tag.find_all(tags_to_wrap, {attr: re.compile(fr"{val}")}):
|
||||
process_tag_using_table(tag_to_wrap)
|
||||
else:
|
||||
for tag_to_wrap in chapter_tag.find_all(tags_to_wrap):
|
||||
if any(attr_name in attrs for attr_name in tag_to_wrap.attrs):
|
||||
process_tag_using_table(tag_to_wrap)
|
||||
|
||||
|
||||
def _tags_to_correspond_livecarta_tag(chapter_tag):
|
||||
"""Function to replace all tags to correspond livecarta tags"""
|
||||
for reg_key, to_replace_value in LiveCartaConfig.REPLACE_REGEX_WITH_LIVECARTA_CORRESPOND_TAGS.items():
|
||||
for key in reg_key:
|
||||
tags = chapter_tag.find_all(re.compile(key))
|
||||
for tag in tags:
|
||||
# todo can cause appearance of \n <p><p>...</p></p> -> <p>\n</p> <p>...</p> <p>\n</p> (section)
|
||||
tag.name = to_replace_value
|
||||
|
||||
|
||||
def _unwrap_tags(chapter_tag):
|
||||
"""Function unwrap tags and move id to span"""
|
||||
for tag in LiveCartaConfig.TAGS_TO_UNWRAP:
|
||||
for s in chapter_tag.find_all(tag):
|
||||
_add_span_to_save_ids_for_links(s, chapter_tag)
|
||||
s.unwrap()
|
||||
|
||||
|
||||
def _remove_headings_content(content_tag, title_of_chapter: str):
|
||||
"""
|
||||
Function
|
||||
@@ -138,23 +193,6 @@ def _remove_headings_content(content_tag, title_of_chapter: str):
|
||||
break
|
||||
|
||||
|
||||
def _tags_to_correspond_livecarta_tag(chapter_tag):
|
||||
"""Function to replace all tags to correspond livecarta tags"""
|
||||
for reg_key, to_replace_value in LiveCartaConfig.REPLACE_REGEX_WITH_LIVECARTA_CORRESPOND_TAGS.items():
|
||||
for key in reg_key:
|
||||
tags = chapter_tag.find_all(re.compile(key))
|
||||
for tag in tags:
|
||||
# todo can cause appearance of \n <p><p>...</p></p> -> <p>\n</p> <p>...</p> <p>\n</p> (section)
|
||||
tag.name = to_replace_value
|
||||
|
||||
def _unwrap_tags(chapter_tag):
|
||||
"""Function unwrap tags and move id to span"""
|
||||
for tag in LiveCartaConfig. TAGS_TO_UNWRAP:
|
||||
for s in chapter_tag.find_all(tag):
|
||||
_add_span_to_save_ids_for_links(s, chapter_tag)
|
||||
s.unwrap()
|
||||
|
||||
|
||||
# todo remove
|
||||
def _process_lists(chapter_tag: BeautifulSoup):
|
||||
"""
|
||||
@@ -181,13 +219,11 @@ def _preprocess_table(chapter_tag: BeautifulSoup):
|
||||
"""Function to preprocess tables and tags(td|th|tr): style"""
|
||||
tables = chapter_tag.find_all("table")
|
||||
for table in tables:
|
||||
t_tags = table.find_all(re.compile("td|th|tr"))
|
||||
for t_tag in t_tags:
|
||||
style = t_tag.get("style")
|
||||
for t_tag in table.find_all(re.compile("td|th|tr")):
|
||||
width = ""
|
||||
if style:
|
||||
if t_tag.get("style"):
|
||||
width_match = re.search(
|
||||
r"[^-]width: ?(\d+\.?\d*)(p[tx])", style)
|
||||
r"[^-]width: ?(\d+\.?\d*)(p[tx])", t_tag["style"])
|
||||
if width_match:
|
||||
size = width_match.group(1)
|
||||
width = size + "px"
|
||||
@@ -197,9 +233,8 @@ def _preprocess_table(chapter_tag: BeautifulSoup):
|
||||
if t_tag.attrs.get("style"):
|
||||
t_tag.attrs["style"] = t_tag.attrs["style"].replace(
|
||||
"border:0;", "")
|
||||
|
||||
elif t_tag.attrs.get("style") == "":
|
||||
del t_tag.attrs["style"]
|
||||
if re.sub(r"[\s\xa0]", "", t_tag.attrs.get("style")) == "":
|
||||
del t_tag.attrs["style"]
|
||||
|
||||
if not table.attrs.get("border") or table.attrs.get("border") in ["0", "0px"]:
|
||||
table.attrs["border"] = "1"
|
||||
@@ -254,72 +289,6 @@ def _preprocess_pre_tags(chapter_tag: BeautifulSoup):
|
||||
pre.append(code)
|
||||
|
||||
|
||||
def _wrap_tag_with_table(chapter_tag, tag_to_be_wrapped, width="100", border="", bg_color=None):
|
||||
"""Function wraps <tag> with <table>"""
|
||||
table = chapter_tag.new_tag("table")
|
||||
table.attrs["border"], table.attrs["align"], table.attrs["style"] \
|
||||
= border, "center", f"width:{width}%;"
|
||||
tbody, tr, td = \
|
||||
chapter_tag.new_tag("tbody"), chapter_tag.new_tag("tr"), chapter_tag.new_tag("td")
|
||||
td.attrs["bgcolor"] = bg_color
|
||||
tag_to_be_wrapped.wrap(td)
|
||||
td.wrap(tr)
|
||||
tr.wrap(tbody)
|
||||
tbody.wrap(table)
|
||||
table.insert_after(BeautifulSoup(features="lxml").new_tag("br"))
|
||||
return table
|
||||
|
||||
|
||||
def _preprocess_div_tags(chapter_tag):
|
||||
"""
|
||||
Function replace <div> with <table>:
|
||||
"""
|
||||
for div in chapter_tag.find_all("div"):
|
||||
if any(attr in ["width", "border", "bgcolor"] for attr in div.attrs):
|
||||
_wrap_tag_with_table(
|
||||
chapter_tag,
|
||||
tag_to_be_wrapped=div,
|
||||
width=div.attrs["width"] if div.attrs.get("width") else "100",
|
||||
border=div.attrs["border"] if div.attrs.get("border") else None,
|
||||
bg_color=div.attrs["bgcolor"] if div.attrs.get("bgcolor") else None)
|
||||
else:
|
||||
div.name = "p"
|
||||
continue
|
||||
_add_span_to_save_ids_for_links(div, chapter_tag)
|
||||
div.unwrap()
|
||||
|
||||
|
||||
def _clean_wiley_block(block):
|
||||
hrs = block.find_all("p", attrs={"class": re.compile(".+ hr")})
|
||||
for hr in hrs:
|
||||
hr.extract()
|
||||
h = block.find(re.compile("h[1-9]"))
|
||||
if h:
|
||||
h.name = "p"
|
||||
h.insert_before(BeautifulSoup(features="lxml").new_tag("br"))
|
||||
|
||||
|
||||
def _preprocess_block_tags(chapter_tag: Tag):
|
||||
"""Function preprocessing <block> tags"""
|
||||
for block in chapter_tag.find_all("blockquote", attrs={"class": re.compile("feature[1234]")}):
|
||||
_clean_wiley_block(block)
|
||||
color = "#DDDDDD" if block.attrs.get(
|
||||
"class") == "feature1" else None
|
||||
color = "#EEEEEE" if block.attrs.get(
|
||||
"class") == "feature2" else color
|
||||
_wrap_tag_with_table(chapter_tag, block, bg_color=color)
|
||||
block.insert_after(BeautifulSoup(features="lxml").new_tag("br"))
|
||||
block.unwrap()
|
||||
|
||||
for future_block in chapter_tag.find_all("p", attrs={"class": re.compile("feature[1234]")}):
|
||||
_clean_wiley_block(future_block)
|
||||
color = "#DDDDDD" if future_block.attrs.get(
|
||||
"class") == "feature1" else None
|
||||
color = "#EEEEEE" if future_block.attrs.get(
|
||||
"class") == "feature2" else color
|
||||
_wrap_tag_with_table(chapter_tag, future_block, bg_color=color)
|
||||
|
||||
|
||||
def _class_removing(chapter_tag):
|
||||
for tag in chapter_tag.find_all(recursive=True):
|
||||
if tag.attrs.get("class") \
|
||||
@@ -356,6 +325,8 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
|
||||
# 2. wrap NavigableString with tag <p>
|
||||
_wrap_strings_with_p(content_tag)
|
||||
|
||||
_wrap_tags_with_table(content_tag)
|
||||
|
||||
_tags_to_correspond_livecarta_tag(content_tag)
|
||||
|
||||
_unwrap_tags(content_tag)
|
||||
@@ -365,12 +336,10 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
|
||||
_remove_headings_content(content_tag, title_str)
|
||||
|
||||
# 4. processing tags (<li>, <table>, <code>, <pre>, <div>, <block>)
|
||||
_process_lists(content_tag)
|
||||
_process_lists(content_tag) # todo regex
|
||||
_preprocess_table(content_tag)
|
||||
_preprocess_code_tags(content_tag)
|
||||
_preprocess_pre_tags(content_tag)
|
||||
_preprocess_div_tags(content_tag)
|
||||
_preprocess_block_tags(content_tag)
|
||||
_preprocess_code_tags(content_tag) # todo regex
|
||||
_preprocess_pre_tags(content_tag) # todo regex
|
||||
|
||||
# 5. remove classes that weren't created by converter
|
||||
_class_removing(content_tag)
|
||||
|
||||
@@ -115,8 +115,13 @@ class LiveCartaConfig:
|
||||
r"(^h[1-9]$)": ["list-style-type"]
|
||||
}
|
||||
|
||||
WRAP_TAGS_WITH_TABLE = {
|
||||
("div",) :["width", "border", "bgcolor"],
|
||||
("section", "blockquote",) : ("class", r"feature[1234]"),
|
||||
}
|
||||
|
||||
REPLACE_REGEX_WITH_LIVECARTA_CORRESPOND_TAGS = {
|
||||
(r"^h[6-9]$", "figure$", "section$"): "p",
|
||||
(r"^h[6-9]$", "^figure$", "^section$", "^div$"): "p",
|
||||
("^aside$",): "blockquote",
|
||||
("^header$", "^footer$"): "span",
|
||||
("^b$",): "strong",
|
||||
|
||||
Reference in New Issue
Block a user