forked from LiveCarta/BookConverter
LAW-5444
This commit is contained in:
@@ -275,7 +275,7 @@ def unwrap_structural_tags(body_tag):
|
||||
:return: None
|
||||
"""
|
||||
|
||||
def _preserve_class_in_aside_tag(tag_):
|
||||
def preserve_class_in_aside_tag(tag_):
|
||||
"""to save css style inherited from class, copy class to aside tag (which is parent to tag_)"""
|
||||
# this is for Wiley books with boxes
|
||||
tag_class = tag_.attrs['class'] if not isinstance(
|
||||
@@ -561,8 +561,8 @@ def preprocess_pre_tags(chapter_tag):
|
||||
spans = pre.find_all("span")
|
||||
# if in <pre> there are multiple <span>, we need to add <br> after each content
|
||||
to_add_br = len(spans) > 1
|
||||
|
||||
for child in pre.children:
|
||||
copy_contents = pre.contents[:]
|
||||
for child in copy_contents:
|
||||
if isinstance(child, NavigableString):
|
||||
cleaned_text = prepare_formatted(str(child))
|
||||
sub_strings = re.split('\r\n|\n|\r', cleaned_text)
|
||||
@@ -573,8 +573,8 @@ def preprocess_pre_tags(chapter_tag):
|
||||
else:
|
||||
for sub_child in child.children:
|
||||
if isinstance(sub_child, NavigableString):
|
||||
cleaned_text2 = prepare_formatted(str(sub_child))
|
||||
sub_child.replace_with(NavigableString(cleaned_text2))
|
||||
cleaned_text = prepare_formatted(str(sub_child))
|
||||
sub_child.replace_with(NavigableString(cleaned_text))
|
||||
else:
|
||||
sub_child.string = prepare_formatted(sub_child.text)
|
||||
cleaned_tag = child.extract()
|
||||
@@ -594,11 +594,15 @@ def preprocess_pre_tags(chapter_tag):
|
||||
|
||||
def preprocess_code_tags(chapter_tag):
|
||||
"""Function that emulates style of <code>, <kdb>, <var>"""
|
||||
for code in chapter_tag.find_all(re.compile("code|kdb|var")):
|
||||
code.name = 'span'
|
||||
if code.parent.name == "pre":
|
||||
continue
|
||||
code.attrs['style'] = 'color:#c7254e; font-size: 14px; font-family: courier new,courier,monospace;'
|
||||
for parent_tag in chapter_tag.find_all(re.compile("pre|p")):
|
||||
for code in parent_tag.find_all(re.compile("code|kbd|var")):
|
||||
# if code.name == "code":
|
||||
# parent_tag.name = "pre"
|
||||
code.name = "span"
|
||||
if parent_tag.name == "pre":
|
||||
continue
|
||||
# if tags aren't in pre
|
||||
code.attrs['style'] = 'font-size: 14px; font-family: courier new,courier,monospace;'
|
||||
|
||||
|
||||
def prepare_title(title_of_chapter: str) -> str:
|
||||
@@ -614,11 +618,11 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
|
||||
"""Function finalise processing/cleaning content
|
||||
Parameters
|
||||
----------
|
||||
title_str : str
|
||||
title_str: str
|
||||
|
||||
content_tag : BeautifulSoup
|
||||
content_tag: BeautifulSoup
|
||||
|
||||
remove_title_from_chapter : bool
|
||||
remove_title_from_chapter: bool
|
||||
|
||||
Steps
|
||||
----------
|
||||
@@ -629,10 +633,9 @@ def prepare_content(title_str: str, content_tag: BeautifulSoup, remove_title_fro
|
||||
|
||||
Returns
|
||||
-------
|
||||
str
|
||||
Prepared content
|
||||
|
||||
prepared content: str
|
||||
"""
|
||||
|
||||
# 0. cleaning \n
|
||||
to_remove = []
|
||||
for child in content_tag.contents:
|
||||
|
||||
Reference in New Issue
Block a user