forked from LiveCarta/BookConverter
Function annotations
This commit is contained in:
@@ -28,24 +28,27 @@ list_types = ['circle', 'disc', 'armenian', 'decimal',
|
||||
'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none']
|
||||
|
||||
|
||||
def convert_tag_values(value):
|
||||
"""Function 1. converts values of tags from em/%/pt to px
|
||||
2. find closest font-size px
|
||||
def convert_tag_values(value: str) -> str:
|
||||
"""
|
||||
Function
|
||||
- converts values of tags from em/%/pt to px
|
||||
- find closest font-size px
|
||||
Parameters
|
||||
----------
|
||||
value: str
|
||||
|
||||
Returns
|
||||
-------
|
||||
converted value: str
|
||||
"""
|
||||
value: str
|
||||
|
||||
"""
|
||||
def find_closest_size(value):
|
||||
possible_sizes = list(takewhile(lambda x: value > x, sizes_pr))
|
||||
last_possible_size_index = sizes_pr.index(possible_sizes[-1])
|
||||
return sizes_px[last_possible_size_index]
|
||||
|
||||
font_size_regexp = re.compile(r'(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)')
|
||||
font_size_regexp = re.compile(
|
||||
r'(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)')
|
||||
has_style_attrs = re.search(font_size_regexp, value)
|
||||
if has_style_attrs:
|
||||
if has_style_attrs.group(1):
|
||||
@@ -61,8 +64,7 @@ def convert_tag_values(value):
|
||||
return value
|
||||
|
||||
|
||||
|
||||
"""
|
||||
"""
|
||||
Dictionary LIVECARTA_STYLE_ATTRS = { css property: value }
|
||||
Style properties that can be used to fit livecarta css style convention.
|
||||
If property has empty list, it means that any value can be converted.
|
||||
@@ -164,17 +166,20 @@ LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
|
||||
|
||||
|
||||
def check_style_to_be_tag(style) -> List[tuple]:
|
||||
"""Function search style properties that can be converted to tags.
|
||||
"""
|
||||
Function searches style properties that can be converted to tags.
|
||||
It searches for them and prepare list of properties to be removed from style string
|
||||
Parameters
|
||||
----------
|
||||
style: str
|
||||
<tag style="...">
|
||||
|
||||
Returns
|
||||
-------
|
||||
properties to remove: list
|
||||
"""
|
||||
to_remove: list
|
||||
properties to remove
|
||||
|
||||
"""
|
||||
to_remove = []
|
||||
for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG:
|
||||
if f'{k[0]}:{k[1]}' in style:
|
||||
@@ -203,7 +208,7 @@ def update_css_style_types_to_livecarta_convention(css_rule, style_type):
|
||||
|
||||
|
||||
def build_css_content(css_content):
|
||||
""" Build css content with livecarta convention """
|
||||
"""Build css content with livecarta convention"""
|
||||
sheet = cssutils.parseString(css_content, validate=False)
|
||||
|
||||
for css_rule in sheet:
|
||||
@@ -227,7 +232,7 @@ class TagStyleConverter:
|
||||
|
||||
@staticmethod
|
||||
def remove_white_if_no_bgcolor(style_, tag):
|
||||
""" Function remove white color if there is no text bg color """
|
||||
"""Function remove text white color if there is no bg color"""
|
||||
if 'background' in style_:
|
||||
return style_
|
||||
|
||||
@@ -264,9 +269,11 @@ class TagStyleConverter:
|
||||
item = item.split(':')
|
||||
if item[0] in ['text-indent', 'margin-left', 'margin']:
|
||||
if len(item[1].split(' ')) == 3:
|
||||
item[1] = convert_tag_values(item[1].split(' ')[-2]) # split returns middle value
|
||||
item[1] = convert_tag_values(item[1].split(
|
||||
' ')[-2]) # split returns middle value
|
||||
else:
|
||||
item[1] = convert_tag_values(item[1].split(' ')[-1]) # split returns last value
|
||||
item[1] = convert_tag_values(item[1].split(
|
||||
' ')[-1]) # split returns last value
|
||||
clean_style += item[0] + ': ' + item[1] + '; '
|
||||
|
||||
margin_left_regexp = re.compile(
|
||||
@@ -360,7 +367,7 @@ class TagStyleConverter:
|
||||
s = f'{attr}:{value};'
|
||||
self.style = self.style.replace(s, '')
|
||||
self.style = self.style.strip()
|
||||
if i == 0:
|
||||
if not i:
|
||||
self.tag_with_inline_style.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(
|
||||
attr, value)]
|
||||
new_tags.append(self.tag_with_inline_style)
|
||||
@@ -388,7 +395,7 @@ class TagStyleConverter:
|
||||
|
||||
@staticmethod
|
||||
def wrap_span_in_p_to_save_style_attrs(tag):
|
||||
""" Function designed to save style attrs that cannot be in p -> span """
|
||||
"""Function designed to save style attrs that cannot be in p -> span"""
|
||||
if tag.name == 'p' and tag.attrs.get('style'):
|
||||
styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
|
||||
if attr not in ['text-align', 'text-indent', 'border-bottom', 'border-top']]
|
||||
@@ -402,7 +409,6 @@ class TagStyleConverter:
|
||||
if has_p_style_attrs:
|
||||
p_style += item + ';'
|
||||
initial_style = initial_style.replace(item + ';', '')
|
||||
|
||||
# here check that this style i exactly the same. Not 'align' when we have 'text-align', or 'border' when we have 'border-top'
|
||||
styles_to_be_saved_in_span = [((attr + ':') in initial_style) & (
|
||||
'-' + attr not in initial_style) for attr in styles_cant_be_in_p]
|
||||
@@ -410,30 +416,30 @@ class TagStyleConverter:
|
||||
# if find styles that cannot be in <p> -> wrap them in span
|
||||
tag.name = 'span'
|
||||
p_tag = BeautifulSoup(features='lxml').new_tag('p')
|
||||
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
||||
has_li_style_attr = re.search(li_attrs_regexp, initial_style)
|
||||
span_style = initial_style if not has_li_style_attr else initial_style.replace(
|
||||
has_li_style_attr.group(1), '')
|
||||
p_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
||||
has_p_style_attr = re.search(p_attrs_regexp, initial_style)
|
||||
span_style = initial_style if not has_p_style_attr else initial_style.replace(
|
||||
has_p_style_attr.group(1), '')
|
||||
p_tag.attrs['style'] = p_style
|
||||
tag.attrs['style'] = span_style
|
||||
tag.wrap(p_tag)
|
||||
else: tag.attrs['style'] = p_style
|
||||
else:
|
||||
tag.attrs['style'] = p_style
|
||||
|
||||
@staticmethod
|
||||
def wrap_span_in_li_to_save_style_attrs(tag):
|
||||
""" Function designed to save style attrs that cannot be in li -> span """
|
||||
"""Function designed to save style attrs that cannot be in li -> span"""
|
||||
if tag.name == 'li' and tag.attrs.get('style'):
|
||||
styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
|
||||
attr not in ['text-align', 'list-style-type']]
|
||||
|
||||
styles_to_be_saved = [attr in tag.attrs.get(
|
||||
styles_to_be_saved_in_span = [attr in tag.attrs.get(
|
||||
'style') for attr in styles_cant_be_in_li]
|
||||
if any(styles_to_be_saved):
|
||||
if any(styles_to_be_saved_in_span):
|
||||
tag.name = 'span'
|
||||
li_tag = BeautifulSoup(features='lxml').new_tag('li')
|
||||
span_style = tag.attrs['style']
|
||||
li_style = ''
|
||||
|
||||
for possible_li_attrs_regexp in [re.compile(r'(text-align:(\w+);)'),
|
||||
re.compile(r'(list-style-type:(\w+);)')]:
|
||||
has_li_style_attrs = re.search(
|
||||
@@ -442,39 +448,38 @@ class TagStyleConverter:
|
||||
li_style += has_li_style_attrs.group(1)
|
||||
span_style = span_style.replace(
|
||||
has_li_style_attrs.group(1), '')
|
||||
|
||||
li_tag.attrs['style'] = li_style
|
||||
tag.attrs['style'] = span_style
|
||||
tag.wrap(li_tag)
|
||||
|
||||
@staticmethod
|
||||
def wrap_span_in_ul_ol_to_save_style_attrs(tag):
|
||||
""" Function designed to save style attrs that cannot be in ul/ol -> span """
|
||||
"""Function designed to save style attrs that cannot be in ul/ol -> span"""
|
||||
if tag.name in ['ul', 'ol'] and tag.attrs.get('style'):
|
||||
styles_cant_be_in_ul_ol = [
|
||||
attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
|
||||
|
||||
check = [attr in tag.attrs.get('style')
|
||||
for attr in styles_cant_be_in_ul_ol]
|
||||
if any(check):
|
||||
styles_to_be_saved_in_span = [attr in tag.attrs.get('style')
|
||||
for attr in styles_cant_be_in_ul_ol]
|
||||
if any(styles_to_be_saved_in_span):
|
||||
tag.name = 'span'
|
||||
li_tag = BeautifulSoup(features='lxml').new_tag('ul')
|
||||
oul_tag = BeautifulSoup(features='lxml').new_tag(tag.name)
|
||||
span_style = tag.attrs['style']
|
||||
|
||||
possible_li_attrs_regexp = re.compile(
|
||||
possible_uol_attrs_regexp = re.compile(
|
||||
r'(list-style-type:(\w+);)')
|
||||
has_li_style_attrs = re.search(
|
||||
possible_li_attrs_regexp, span_style)
|
||||
if has_li_style_attrs and has_li_style_attrs.group(1):
|
||||
oul_style = has_li_style_attrs.group(1)
|
||||
has_uol_style_attrs = re.search(
|
||||
possible_uol_attrs_regexp, span_style)
|
||||
if has_uol_style_attrs and has_uol_style_attrs.group(1):
|
||||
oul_style = has_uol_style_attrs.group(1)
|
||||
span_style = span_style.replace(oul_style, '')
|
||||
li_tag.attrs['style'] = oul_style
|
||||
oul_tag.attrs['style'] = oul_style
|
||||
tag.attrs['style'] = span_style
|
||||
tag.wrap(li_tag)
|
||||
tag.wrap(oul_tag)
|
||||
|
||||
@staticmethod
|
||||
def wrap_span_in_h_to_save_style_attrs(tag):
|
||||
""" Function designed to save style attrs that cannot be in h -> span """
|
||||
"""Function designed to save style attrs that cannot be in h -> span"""
|
||||
h_regexp = re.compile('(^h[1-9]$)')
|
||||
|
||||
if re.search(h_regexp, tag.name) and tag.attrs.get('style'):
|
||||
@@ -482,10 +487,10 @@ class TagStyleConverter:
|
||||
tag.name = 'span'
|
||||
tag.wrap(h_tag)
|
||||
style = tag.attrs['style']
|
||||
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
||||
has_li_style_attr = re.search(li_attrs_regexp, style)
|
||||
tag.attrs['style'] = style if not has_li_style_attr else style.replace(
|
||||
has_li_style_attr.group(1), '')
|
||||
h_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
||||
has_h_style_attr = re.search(h_attrs_regexp, style)
|
||||
tag.attrs['style'] = style if not has_h_style_attr else style.replace(
|
||||
has_h_style_attr.group(1), '')
|
||||
|
||||
def convert_initial_tag(self):
|
||||
self.tag_with_inline_style = self.change_attrs_with_corresponding_tags()
|
||||
@@ -496,8 +501,8 @@ class TagStyleConverter:
|
||||
return self.tag_with_inline_style
|
||||
|
||||
|
||||
def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
|
||||
""" Function adds styles from .css to inline style """
|
||||
def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str) -> BeautifulSoup:
|
||||
"""Function adds styles from .css to inline style"""
|
||||
css_text = css_text.replace(
|
||||
'@namespace epub "http://www.idpf.org/2007/ops";', '')
|
||||
livecarta_tmp_ids = []
|
||||
|
||||
Reference in New Issue
Block a user