Function annotations

This commit is contained in:
Kiryl
2022-04-29 17:44:07 +03:00
parent 8de1d0d042
commit 37533e9b67
5 changed files with 187 additions and 130 deletions

View File

@@ -28,24 +28,27 @@ list_types = ['circle', 'disc', 'armenian', 'decimal',
'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none']
def convert_tag_values(value):
"""Function 1. converts values of tags from em/%/pt to px
2. find closest font-size px
def convert_tag_values(value: str) -> str:
"""
Function
- converts values of tags from em/%/pt to px
- find closest font-size px
Parameters
----------
value: str
Returns
-------
converted value: str
"""
value: str
"""
def find_closest_size(value):
possible_sizes = list(takewhile(lambda x: value > x, sizes_pr))
last_possible_size_index = sizes_pr.index(possible_sizes[-1])
return sizes_px[last_possible_size_index]
font_size_regexp = re.compile(r'(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)')
font_size_regexp = re.compile(
r'(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)')
has_style_attrs = re.search(font_size_regexp, value)
if has_style_attrs:
if has_style_attrs.group(1):
@@ -61,8 +64,7 @@ def convert_tag_values(value):
return value
"""
"""
Dictionary LIVECARTA_STYLE_ATTRS = { css property: value }
Style properties that can be used to fit livecarta css style convention.
If property has empty list, it means that any value can be converted.
@@ -164,17 +166,20 @@ LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
def check_style_to_be_tag(style) -> List[tuple]:
"""Function search style properties that can be converted to tags.
"""
Function searches style properties that can be converted to tags.
It searches for them and prepare list of properties to be removed from style string
Parameters
----------
style: str
<tag style="...">
Returns
-------
properties to remove: list
"""
to_remove: list
properties to remove
"""
to_remove = []
for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG:
if f'{k[0]}:{k[1]}' in style:
@@ -203,7 +208,7 @@ def update_css_style_types_to_livecarta_convention(css_rule, style_type):
def build_css_content(css_content):
""" Build css content with livecarta convention """
"""Build css content with livecarta convention"""
sheet = cssutils.parseString(css_content, validate=False)
for css_rule in sheet:
@@ -227,7 +232,7 @@ class TagStyleConverter:
@staticmethod
def remove_white_if_no_bgcolor(style_, tag):
""" Function remove white color if there is no text bg color """
"""Function remove text white color if there is no bg color"""
if 'background' in style_:
return style_
@@ -264,9 +269,11 @@ class TagStyleConverter:
item = item.split(':')
if item[0] in ['text-indent', 'margin-left', 'margin']:
if len(item[1].split(' ')) == 3:
item[1] = convert_tag_values(item[1].split(' ')[-2]) # split returns middle value
item[1] = convert_tag_values(item[1].split(
' ')[-2]) # split returns middle value
else:
item[1] = convert_tag_values(item[1].split(' ')[-1]) # split returns last value
item[1] = convert_tag_values(item[1].split(
' ')[-1]) # split returns last value
clean_style += item[0] + ': ' + item[1] + '; '
margin_left_regexp = re.compile(
@@ -360,7 +367,7 @@ class TagStyleConverter:
s = f'{attr}:{value};'
self.style = self.style.replace(s, '')
self.style = self.style.strip()
if i == 0:
if not i:
self.tag_with_inline_style.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(
attr, value)]
new_tags.append(self.tag_with_inline_style)
@@ -388,7 +395,7 @@ class TagStyleConverter:
@staticmethod
def wrap_span_in_p_to_save_style_attrs(tag):
""" Function designed to save style attrs that cannot be in p -> span """
"""Function designed to save style attrs that cannot be in p -> span"""
if tag.name == 'p' and tag.attrs.get('style'):
styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
if attr not in ['text-align', 'text-indent', 'border-bottom', 'border-top']]
@@ -402,7 +409,6 @@ class TagStyleConverter:
if has_p_style_attrs:
p_style += item + ';'
initial_style = initial_style.replace(item + ';', '')
# here check that this style i exactly the same. Not 'align' when we have 'text-align', or 'border' when we have 'border-top'
styles_to_be_saved_in_span = [((attr + ':') in initial_style) & (
'-' + attr not in initial_style) for attr in styles_cant_be_in_p]
@@ -410,30 +416,30 @@ class TagStyleConverter:
# if find styles that cannot be in <p> -> wrap them in span
tag.name = 'span'
p_tag = BeautifulSoup(features='lxml').new_tag('p')
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
has_li_style_attr = re.search(li_attrs_regexp, initial_style)
span_style = initial_style if not has_li_style_attr else initial_style.replace(
has_li_style_attr.group(1), '')
p_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
has_p_style_attr = re.search(p_attrs_regexp, initial_style)
span_style = initial_style if not has_p_style_attr else initial_style.replace(
has_p_style_attr.group(1), '')
p_tag.attrs['style'] = p_style
tag.attrs['style'] = span_style
tag.wrap(p_tag)
else: tag.attrs['style'] = p_style
else:
tag.attrs['style'] = p_style
@staticmethod
def wrap_span_in_li_to_save_style_attrs(tag):
""" Function designed to save style attrs that cannot be in li -> span """
"""Function designed to save style attrs that cannot be in li -> span"""
if tag.name == 'li' and tag.attrs.get('style'):
styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
attr not in ['text-align', 'list-style-type']]
styles_to_be_saved = [attr in tag.attrs.get(
styles_to_be_saved_in_span = [attr in tag.attrs.get(
'style') for attr in styles_cant_be_in_li]
if any(styles_to_be_saved):
if any(styles_to_be_saved_in_span):
tag.name = 'span'
li_tag = BeautifulSoup(features='lxml').new_tag('li')
span_style = tag.attrs['style']
li_style = ''
for possible_li_attrs_regexp in [re.compile(r'(text-align:(\w+);)'),
re.compile(r'(list-style-type:(\w+);)')]:
has_li_style_attrs = re.search(
@@ -442,39 +448,38 @@ class TagStyleConverter:
li_style += has_li_style_attrs.group(1)
span_style = span_style.replace(
has_li_style_attrs.group(1), '')
li_tag.attrs['style'] = li_style
tag.attrs['style'] = span_style
tag.wrap(li_tag)
@staticmethod
def wrap_span_in_ul_ol_to_save_style_attrs(tag):
""" Function designed to save style attrs that cannot be in ul/ol -> span """
"""Function designed to save style attrs that cannot be in ul/ol -> span"""
if tag.name in ['ul', 'ol'] and tag.attrs.get('style'):
styles_cant_be_in_ul_ol = [
attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
check = [attr in tag.attrs.get('style')
for attr in styles_cant_be_in_ul_ol]
if any(check):
styles_to_be_saved_in_span = [attr in tag.attrs.get('style')
for attr in styles_cant_be_in_ul_ol]
if any(styles_to_be_saved_in_span):
tag.name = 'span'
li_tag = BeautifulSoup(features='lxml').new_tag('ul')
oul_tag = BeautifulSoup(features='lxml').new_tag(tag.name)
span_style = tag.attrs['style']
possible_li_attrs_regexp = re.compile(
possible_uol_attrs_regexp = re.compile(
r'(list-style-type:(\w+);)')
has_li_style_attrs = re.search(
possible_li_attrs_regexp, span_style)
if has_li_style_attrs and has_li_style_attrs.group(1):
oul_style = has_li_style_attrs.group(1)
has_uol_style_attrs = re.search(
possible_uol_attrs_regexp, span_style)
if has_uol_style_attrs and has_uol_style_attrs.group(1):
oul_style = has_uol_style_attrs.group(1)
span_style = span_style.replace(oul_style, '')
li_tag.attrs['style'] = oul_style
oul_tag.attrs['style'] = oul_style
tag.attrs['style'] = span_style
tag.wrap(li_tag)
tag.wrap(oul_tag)
@staticmethod
def wrap_span_in_h_to_save_style_attrs(tag):
""" Function designed to save style attrs that cannot be in h -> span """
"""Function designed to save style attrs that cannot be in h -> span"""
h_regexp = re.compile('(^h[1-9]$)')
if re.search(h_regexp, tag.name) and tag.attrs.get('style'):
@@ -482,10 +487,10 @@ class TagStyleConverter:
tag.name = 'span'
tag.wrap(h_tag)
style = tag.attrs['style']
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
has_li_style_attr = re.search(li_attrs_regexp, style)
tag.attrs['style'] = style if not has_li_style_attr else style.replace(
has_li_style_attr.group(1), '')
h_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
has_h_style_attr = re.search(h_attrs_regexp, style)
tag.attrs['style'] = style if not has_h_style_attr else style.replace(
has_h_style_attr.group(1), '')
def convert_initial_tag(self):
self.tag_with_inline_style = self.change_attrs_with_corresponding_tags()
@@ -496,8 +501,8 @@ class TagStyleConverter:
return self.tag_with_inline_style
def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
""" Function adds styles from .css to inline style """
def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str) -> BeautifulSoup:
"""Function adds styles from .css to inline style"""
css_text = css_text.replace(
'@namespace epub "http://www.idpf.org/2007/ops";', '')
livecarta_tmp_ids = []