forked from LiveCarta/BookConverter
Take class removing to a function
This commit is contained in:
@@ -96,13 +96,11 @@ def _wrap_strings_with_p(chapter_tag):
|
|||||||
for node in chapter_tag:
|
for node in chapter_tag:
|
||||||
if isinstance(node, NavigableString):
|
if isinstance(node, NavigableString):
|
||||||
content = str(node)
|
content = str(node)
|
||||||
content = re.sub(r"([\n\t\xa0])", " ", content)
|
content = re.sub(r"([\s\xa0])", " ", content).strip()
|
||||||
# remove spaces at the beginning and at the end of the string:
|
|
||||||
content = content.strip()
|
|
||||||
if content:
|
if content:
|
||||||
tag = chapter_tag.new_tag("p")
|
p_tag = chapter_tag.new_tag("p")
|
||||||
tag.append(str(node))
|
p_tag.append(str(node))
|
||||||
node.replace_with(tag)
|
node.replace_with(p_tag)
|
||||||
|
|
||||||
|
|
||||||
def _remove_headings_content(content_tag, title_of_chapter: str):
|
def _remove_headings_content(content_tag, title_of_chapter: str):
|
||||||
@@ -146,6 +144,7 @@ def _tags_to_correspond_livecarta_tag(chapter_tag):
|
|||||||
for key in reg_key:
|
for key in reg_key:
|
||||||
tags = chapter_tag.find_all(re.compile(key))
|
tags = chapter_tag.find_all(re.compile(key))
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
|
# todo can cause appearance of \n <p><p>...</p></p> -> <p>\n</p> <p>...</p> <p>\n</p> (section)
|
||||||
tag.name = to_replace_value
|
tag.name = to_replace_value
|
||||||
|
|
||||||
def _unwrap_tags(chapter_tag):
|
def _unwrap_tags(chapter_tag):
|
||||||
@@ -300,8 +299,6 @@ def _clean_wiley_block(block):
|
|||||||
h.insert_before(BeautifulSoup(features="lxml").new_tag("br"))
|
h.insert_before(BeautifulSoup(features="lxml").new_tag("br"))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _preprocess_block_tags(chapter_tag: Tag):
|
def _preprocess_block_tags(chapter_tag: Tag):
|
||||||
"""Function preprocessing <block> tags"""
|
"""Function preprocessing <block> tags"""
|
||||||
for block in chapter_tag.find_all("blockquote", attrs={"class": re.compile("feature[1234]")}):
|
for block in chapter_tag.find_all("blockquote", attrs={"class": re.compile("feature[1234]")}):
|
||||||
@@ -323,6 +320,13 @@ def _preprocess_block_tags(chapter_tag: Tag):
|
|||||||
_wrap_tag_with_table(chapter_tag, future_block, bg_color=color)
|
_wrap_tag_with_table(chapter_tag, future_block, bg_color=color)
|
||||||
|
|
||||||
|
|
||||||
|
def _class_removing(chapter_tag):
|
||||||
|
for tag in chapter_tag.find_all(recursive=True):
|
||||||
|
if tag.attrs.get("class") \
|
||||||
|
and (tag.attrs.get("class") not in ["link-anchor", "footnote-element"]):
|
||||||
|
del tag.attrs["class"]
|
||||||
|
|
||||||
|
|
||||||
def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_from_chapter: bool) -> str:
|
def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_from_chapter: bool) -> str:
|
||||||
"""
|
"""
|
||||||
Function finalise processing/cleaning content
|
Function finalise processing/cleaning content
|
||||||
@@ -368,9 +372,6 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
|
|||||||
_preprocess_div_tags(content_tag)
|
_preprocess_div_tags(content_tag)
|
||||||
_preprocess_block_tags(content_tag)
|
_preprocess_block_tags(content_tag)
|
||||||
|
|
||||||
# 5. remove classes that were created by converter
|
# 5. remove classes that weren't created by converter
|
||||||
for tag in content_tag.find_all(recursive=True):
|
_class_removing(content_tag)
|
||||||
if hasattr(tag, "attrs") and tag.attrs.get("class") \
|
|
||||||
and (tag.attrs.get("class") not in ["link-anchor", "footnote-element"]):
|
|
||||||
del tag.attrs["class"]
|
|
||||||
return str(content_tag)
|
return str(content_tag)
|
||||||
|
|||||||
Reference in New Issue
Block a user