This commit is contained in:
Kiryl
2022-04-28 16:26:49 +03:00
parent 46064bf247
commit c10190662b
3 changed files with 82 additions and 86 deletions

View File

@@ -14,11 +14,11 @@ from src.livecarta_config import LiveCartaConfig
cssutils.log.setLevel(CRITICAL)
sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
sizes_pr = [-100, -1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69,
2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0]
sizes_px = ['10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17px', '18px', '19px', '20px', '21px',
sizes_px = ['0px', '10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17px', '18px', '19px', '20px', '21px',
'22px', '23px', '24px', '25px', '26px', '27px', '28px', '29px', '30px', '31px', '32px', '33px', '34px',
'35px', '36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px',
'48px', '49px', '50px', '64px', '72px']
@@ -28,61 +28,42 @@ list_types = ['circle', 'disc', 'armenian', 'decimal',
'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none']
def convert_font_size(value):
""" Function converts font-size in mapping """
if 'pt' in value:
if int(value.replace('pt', '')) == LiveCartaConfig.LIVECARTA_DEFAULT_FONT_SIZE:
return ''
else:
return value.replace('pt', 'px')
def convert_tag_values(value):
"""Function 1. converts values of tags from em/%/pt to px
2. find closest font-size px
Parameters
----------
value: str
if value == '100%':
return ''
try:
if '%' in value:
value = float(value.replace('%', ''))
value = value / 100.0
elif 'em' in value:
value = float(value.replace('em', ''))
else:
return ''
if value > 5:
return ''
Returns
-------
converted value: str
"""
def find_closest_size(value):
possible_sizes = list(takewhile(lambda x: value > x, sizes_pr))
last_possible_size_index = sizes_pr.index(possible_sizes[-1])
return sizes_px[last_possible_size_index]
except ValueError:
return ''
def convert_indents(value):
""" Function converts text-indent and margin-left values to px """
# 30px = 3.2% = 1.25em = 23pt
text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)')
has_style_attrs = re.search(text_indent_regexp, value)
font_size_regexp = re.compile(r'(^-*(\d*\.*\d+)%$)|(^-*(\d*\.*\d+)em$)|(^-*(\d*\.*\d+)pt$)')
has_style_attrs = re.search(font_size_regexp, value)
if has_style_attrs:
if has_style_attrs.group(1):
value = value.replace(has_style_attrs.group(1),
str(abs(int("0" + "".join(filter(str.isdigit, str(has_style_attrs.group(1))))) * 6)) +
'px')
elif has_style_attrs.group(2):
value = value.replace(has_style_attrs.group(2),
str(abs(int("0" + "".join(filter(str.isdigit, str(has_style_attrs.group(3))))) * 30)) +
'px')
elif has_style_attrs.group(4):
value = value.replace(has_style_attrs.group(4),
str(abs(int("0" + "".join(filter(str.isdigit, str(has_style_attrs.group(4))))))) + 'px')
value = float(value.replace('%', '')) / 100.0
return find_closest_size(value)
elif has_style_attrs.group(3):
value = float(value.replace('em', ''))
return find_closest_size(value)
elif has_style_attrs.group(5):
return value.replace('pt', 'px')
else:
return ''
return value
"""
LIVECARTA_STYLE_ATTRS = { css property: value }
"""
Dictionary LIVECARTA_STYLE_ATTRS = { css property: value }
Style properties that can be used to fit livecarta css style convention.
If property has empty list, it means that any value can be converted.
If property has not empty list, it means that only certain property-value combinations can be transformed.
@@ -115,7 +96,8 @@ LIVECARTA_STYLE_ATTRS = {
'list-style-type': [],
'list-style-image': [],
'margin-left': [],
'margin-top': []
'margin-top': [],
'margin': [],
}
@@ -132,18 +114,18 @@ def get_text_color(x):
"""
LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
Dictionary LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated
to suit livecarta style convention.
"""
LIVECARTA_STYLE_ATTRS_MAPPING = {
'text-indent': convert_indents,
'text-indent': convert_tag_values,
'font-variant': lambda x: x,
'text-align': lambda x: x,
'font': lambda x: '',
'font-family': lambda x: LiveCartaConfig.font_correspondence_table.get(x) or LiveCartaConfig.font_correspondence_table.get(x.capitalize()),
'font-size': convert_font_size,
'font-size': convert_tag_values,
'color': get_text_color,
'background-color': get_bg_color,
'background': get_bg_color,
@@ -156,8 +138,9 @@ LIVECARTA_STYLE_ATTRS_MAPPING = {
'border-bottom': lambda x: x if x != '0' else '',
'list-style-type': lambda x: x if x in list_types else 'disc',
'list-style-image': lambda x: 'disc',
'margin-left': convert_indents,
'margin-top': convert_indents
'margin-left': convert_tag_values,
'margin-top': convert_tag_values,
'margin': convert_tag_values,
}
"""
@@ -181,10 +164,17 @@ LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
def check_style_to_be_tag(style) -> List[tuple]:
"""
Some css style properties converts to tags.
Search for them and prepare list of properties to be removed from style string
"""Function search style properties that can be converted to tags.
It searches for them and prepare list of properties to be removed from style string
Parameters
----------
style: str
<tag style="...">
Returns
-------
properties to remove: list
"""
to_remove = []
for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG:
if f'{k[0]}:{k[1]}' in style:
@@ -267,37 +257,40 @@ class TagStyleConverter:
@staticmethod
def process_indents_to_px(split_style: dict) -> str:
""" Function cleans using convert_indents() style string and returns new clean_style """
"""Function cleans style string using convert_tag_values() and returns new clean_style"""
split_style = [k + ":" + v for k, v in split_style.items()]
clean_style = ''
for item in split_style:
item = item.split(':')
if item[0] in ['text-indent', 'margin-left']:
item[1] = convert_indents(item[1])
if item[0] in ['text-indent', 'margin-left', 'margin']:
if len(item[1].split(' ')) == 3:
item[1] = convert_tag_values(item[1].split(' ')[-2]) # split returns middle value
else:
item[1] = convert_tag_values(item[1].split(' ')[-1]) # split returns last value
clean_style += item[0] + ': ' + item[1] + '; '
margin_left_regexp = re.compile(
r'(margin-left: *(-*\w+);*)')
r'((margin-left|margin): *(-*\w+);*)')
text_indent_regexp = re.compile(
r'(text-indent: *(-*\w+);*)')
has_margin_left = re.search(margin_left_regexp, clean_style)
has_margin = re.search(margin_left_regexp, clean_style)
has_text_indent = re.search(text_indent_regexp, clean_style)
# formula_of_indent: indent = abs(margin_left - text_indent)
if has_margin_left:
num_ml = abs(int("0" + "".join(
filter(str.isdigit, str(has_margin_left.group(2))))))
# formula_of_indent: indent = abs(margin - text_indent)
if has_margin:
num_m = abs(int("0" + "".join(
filter(str.isdigit, str(has_margin.group(3))))))
if has_text_indent:
num_ti = abs(int("0" + "".join(
filter(str.isdigit, str(has_text_indent.group(2))))))
clean_style = clean_style.replace(has_text_indent.group(1), 'text-indent: ' +
str(abs(num_ml - num_ti)) + 'px; ')
clean_style = clean_style.replace(has_margin_left.group(1), '')
str(abs(num_m - num_ti)) + 'px; ')
clean_style = clean_style.replace(has_margin.group(1), '')
return clean_style
clean_style = clean_style.replace(has_margin_left.group(1), 'text-indent: ' +
str(abs(num_ml)) + 'px; ')
clean_style = clean_style.replace(has_margin.group(1), 'text-indent: ' +
str(abs(num_m)) + 'px; ')
return clean_style
elif has_text_indent:
@@ -309,7 +302,7 @@ class TagStyleConverter:
def preprocess_style(self):
def remove_extra_spaces(style: str) -> dict:
""" Function to remove extra spaces in style to process clean_style """
"""Function to remove extra spaces in style to process clean_style"""
# replace all spaces between '; & letter' to ';'
style = re.sub(r"; *", ";", style)
split_style: List = style.split(';')
@@ -509,7 +502,7 @@ def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
'@namespace epub "http://www.idpf.org/2007/ops";', '')
livecarta_tmp_ids = []
could_have_style_in_livecarta_regexp = re.compile(
'(^div$)|(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|(^h[1-9]$)')
'(^div$)|(^p$)|(^span$)|(^code$)|(^kbd$)|(^var$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|(^h[1-9]$)')
tags_with_possible_style_attr = html_soup.find_all(
could_have_style_in_livecarta_regexp)
for i, x in enumerate(tags_with_possible_style_attr):