Formatting: documentation + optimization

This commit is contained in:
Kiryl
2022-06-06 16:37:42 +03:00
parent 002316f086
commit acb2ce48c2
3 changed files with 33 additions and 43 deletions

View File

@@ -75,8 +75,8 @@ def _preprocess_table(body_tag: BeautifulSoup):
"""Function to preprocess tables and tags(td|th|tr): style"""
tables = body_tag.find_all("table")
for table in tables:
ts = table.find_all(re.compile("td|th|tr"))
for t_tag in ts:
t_tags = table.find_all(re.compile("td|th|tr"))
for t_tag in t_tags:
style = t_tag.get('style')
width = ''
if style:
@@ -113,7 +113,6 @@ def _process_lists(body_tag: BeautifulSoup):
None
"""
li_tags = body_tag.find_all("li")
for li_tag in li_tags:
if li_tag.p:
@@ -268,7 +267,7 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
return footnotes, new_noterefs_tags, new_footnotes_tags
def unwrap_structural_tags(body_tag: BeautifulSoup):
def unwrap_structural_tags(body_tag: BeautifulSoup) -> BeautifulSoup:
"""
Main function that works with structure of html. Make changes inplace.
Parameters
@@ -288,10 +287,10 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
Returns
-------
None
body_tag: Tag, BeautifulSoup
adjusted body_tag
"""
def _preserve_class_in_aside_tag(tag_):
"""to save css style inherited from class, copy class to aside tag (which is parent to tag_)"""
# this is for Wiley books with boxes
@@ -311,7 +310,7 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
Returns
-------
None
bool
"""
# this is for Wiley books with boxes
@@ -454,21 +453,19 @@ def unwrap_structural_tags(body_tag: BeautifulSoup):
tag = body_tag.new_tag('p')
tag.append(str(node))
node.replace_with(tag)
return body_tag
def get_tags_between_chapter_marks(first_id: str, href: str, html_soup: BeautifulSoup) -> list:
"""After processing on a first_id that corresponds to current chapter,
from initial html_soup all tags from current chapter are extracted
Parameters
----------
first_id:
Id that point where a chapter starts. A Tag with class: 'converter-chapter-mark'
href:
Name of current chapter's file
html_soup: Tag, soup object
html_soup: Tag
Soup object of current file
Returns
@@ -530,19 +527,17 @@ def _clean_wiley_block(block):
h.insert_before(BeautifulSoup(features='lxml').new_tag("br"))
def _preprocess_block_tags(chapter_tag):
def _preprocess_block_tags(chapter_tag: Tag):
"""Function preprocessing <block> tags"""
for block in chapter_tag.find_all("blockquote"):
if block.attrs.get('class') in ['feature1', 'feature2', 'feature3', 'feature4']:
_clean_wiley_block(block)
color = '#DDDDDD' if block.attrs.get(
'class') == 'feature1' else None
color = '#EEEEEE' if block.attrs.get(
'class') == 'feature2' else color
_wrap_block_tag_with_table(chapter_tag, block, bg_color=color)
block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
block.unwrap()
for block in chapter_tag.find_all("blockquote", attrs={"class": re.compile("feature[1234]")}):
_clean_wiley_block(block)
color = '#DDDDDD' if block.attrs.get(
'class') == 'feature1' else None
color = '#EEEEEE' if block.attrs.get(
'class') == 'feature2' else color
_wrap_block_tag_with_table(chapter_tag, block, bg_color=color)
block.insert_after(BeautifulSoup(features='lxml').new_tag("br"))
block.unwrap()
for future_block in chapter_tag.find_all("p", attrs={"class": re.compile("feature[1234]")}):
_clean_wiley_block(future_block)
@@ -647,8 +642,7 @@ def _preprocess_code_tags(chapter_tag: BeautifulSoup):
code.name = "span"
if code.parent.name == "pre":
continue
# if tags aren't in pre
# if tags aren't in pre and don't have style
if not code.attrs.get('style'):
code.attrs['style'] = 'font-size: 14px; font-family: courier new,courier,monospace;'