This commit is contained in:
Kiryl
2022-04-28 16:26:49 +03:00
parent 46064bf247
commit c10190662b
3 changed files with 82 additions and 86 deletions

View File

@@ -275,7 +275,7 @@ def unwrap_structural_tags(body_tag):
:return: None
"""
def _preserve_class_in_aside_tag(tag_):
def preserve_class_in_aside_tag(tag_):
"""to save css style inherited from class, copy class to aside tag (which is parent to tag_)"""
# this is for Wiley books with boxes
tag_class = tag_.attrs['class'] if not isinstance(
@@ -561,8 +561,8 @@ def preprocess_pre_tags(chapter_tag):
spans = pre.find_all("span")
# if in <pre> there are multiple <span>, we need to add <br> after each content
to_add_br = len(spans) > 1
for child in pre.children:
copy_contents = pre.contents[:]
for child in copy_contents:
if isinstance(child, NavigableString):
cleaned_text = prepare_formatted(str(child))
sub_strings = re.split('\r\n|\n|\r', cleaned_text)
@@ -573,8 +573,8 @@ def preprocess_pre_tags(chapter_tag):
else:
for sub_child in child.children:
if isinstance(sub_child, NavigableString):
cleaned_text2 = prepare_formatted(str(sub_child))
sub_child.replace_with(NavigableString(cleaned_text2))
cleaned_text = prepare_formatted(str(sub_child))
sub_child.replace_with(NavigableString(cleaned_text))
else:
sub_child.string = prepare_formatted(sub_child.text)
cleaned_tag = child.extract()
@@ -594,11 +594,15 @@ def preprocess_pre_tags(chapter_tag):
def preprocess_code_tags(chapter_tag):
"""Function that emulates style of <code>, <kdb>, <var>"""
for code in chapter_tag.find_all(re.compile("code|kdb|var")):
code.name = 'span'
if code.parent.name == "pre":
continue
code.attrs['style'] = 'color:#c7254e; font-size: 14px; font-family: courier new,courier,monospace;'
for parent_tag in chapter_tag.find_all(re.compile("pre|p")):
for code in parent_tag.find_all(re.compile("code|kbd|var")):
# if code.name == "code":
# parent_tag.name = "pre"
code.name = "span"
if parent_tag.name == "pre":
continue
# if tags aren't in pre
code.attrs['style'] = 'font-size: 14px; font-family: courier new,courier,monospace;'
def prepare_title(title_of_chapter: str) -> str:
@@ -614,11 +618,11 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
"""Function finalise processing/cleaning content
Parameters
----------
title_str : str
title_str: str
content_tag : BeautifulSoup
content_tag: BeautifulSoup
remove_title_from_chapter : bool
remove_title_from_chapter: bool
Steps
----------
@@ -629,10 +633,9 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
Returns
-------
str
Prepared content
prepared content: str
"""
# 0. cleaning \n
to_remove = []
for child in content_tag.contents: