forked from LiveCarta/BookConverter
Rewrite wrap_span_in_p function
This commit is contained in:
@@ -12,7 +12,6 @@ from src.util.color_reader import str2hex
|
|||||||
from src.livecarta_config import LiveCartaConfig
|
from src.livecarta_config import LiveCartaConfig
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cssutils.log.setLevel(CRITICAL)
|
cssutils.log.setLevel(CRITICAL)
|
||||||
|
|
||||||
sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
|
sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
|
||||||
@@ -57,26 +56,28 @@ def convert_font_size(value):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
|
||||||
def convert_indents(value):
|
def convert_indents(value):
|
||||||
# 30px = 3.2% = 1.25em = 23pt
|
# 30px = 3.2% = 1.25em = 23pt
|
||||||
text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)')
|
text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)')
|
||||||
has_style_attrs = re.search(text_indent_regexp, value)
|
has_style_attrs = re.search(text_indent_regexp, value)
|
||||||
if has_style_attrs:
|
if has_style_attrs:
|
||||||
if has_style_attrs.group(1):
|
if has_style_attrs.group(1):
|
||||||
value = value.replace(has_style_attrs.group(1),
|
value = value.replace(has_style_attrs.group(1),
|
||||||
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(1))))) * 6)) +
|
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(1))))) * 6)) +
|
||||||
'px')
|
'px')
|
||||||
|
|
||||||
elif has_style_attrs.group(2):
|
elif has_style_attrs.group(2):
|
||||||
value = value.replace(has_style_attrs.group(2),
|
value = value.replace(has_style_attrs.group(2),
|
||||||
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(3))))) * 30)) +
|
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(3))))) * 30)) +
|
||||||
'px')
|
'px')
|
||||||
|
|
||||||
elif has_style_attrs.group(4):
|
elif has_style_attrs.group(4):
|
||||||
value = value.replace(has_style_attrs.group(4),
|
value = value.replace(has_style_attrs.group(4),
|
||||||
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(4))))))) + 'px')
|
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(4))))))) + 'px')
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
LIVECARTA_STYLE_ATTRS = { css property: value }
|
LIVECARTA_STYLE_ATTRS = { css property: value }
|
||||||
|
|
||||||
@@ -107,6 +108,7 @@ LIVECARTA_STYLE_ATTRS = {
|
|||||||
'border-right-width': [],
|
'border-right-width': [],
|
||||||
'border-left-width': [],
|
'border-left-width': [],
|
||||||
'border-bottom-width': [],
|
'border-bottom-width': [],
|
||||||
|
'border-top': [],
|
||||||
'border-bottom': [],
|
'border-bottom': [],
|
||||||
'list-style-type': [],
|
'list-style-type': [],
|
||||||
'list-style-image': [],
|
'list-style-image': [],
|
||||||
@@ -132,6 +134,7 @@ def get_text_color(x):
|
|||||||
color = color if color not in ['#000000', '#000', 'black'] else ''
|
color = color if color not in ['#000000', '#000', 'black'] else ''
|
||||||
return color
|
return color
|
||||||
|
|
||||||
|
|
||||||
LIVECARTA_STYLE_ATTRS_MAPPING = {
|
LIVECARTA_STYLE_ATTRS_MAPPING = {
|
||||||
'text-indent': convert_indents,
|
'text-indent': convert_indents,
|
||||||
'font-variant': lambda x: x,
|
'font-variant': lambda x: x,
|
||||||
@@ -147,6 +150,7 @@ LIVECARTA_STYLE_ATTRS_MAPPING = {
|
|||||||
'border-right-width': lambda x: x if x != '0' else '',
|
'border-right-width': lambda x: x if x != '0' else '',
|
||||||
'border-left-width': lambda x: x if x != '0' else '',
|
'border-left-width': lambda x: x if x != '0' else '',
|
||||||
'border-bottom-width': lambda x: x if x != '0' else '',
|
'border-bottom-width': lambda x: x if x != '0' else '',
|
||||||
|
'border-top': lambda x: x if x != '0' else '',
|
||||||
'border-bottom': lambda x: x if x != '0' else '',
|
'border-bottom': lambda x: x if x != '0' else '',
|
||||||
'list-style-type': lambda x: x if x in list_types else 'disc',
|
'list-style-type': lambda x: x if x in list_types else 'disc',
|
||||||
'list-style-image': lambda x: 'disc',
|
'list-style-image': lambda x: 'disc',
|
||||||
@@ -182,30 +186,35 @@ def check_style_to_be_tag(style) -> List[tuple]:
|
|||||||
to_remove.append(k)
|
to_remove.append(k)
|
||||||
return to_remove
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
def update_css_style_types_to_livecarta_convention(css_rule, style_type):
|
def update_css_style_types_to_livecarta_convention(css_rule, style_type):
|
||||||
if style_type.name not in LIVECARTA_STYLE_ATTRS:
|
if style_type.name not in LIVECARTA_STYLE_ATTRS:
|
||||||
# property not in LIVECARTA_STYLE_ATTRS, remove from css file
|
# property not in LIVECARTA_STYLE_ATTRS, remove from css file
|
||||||
css_rule.style[style_type.name] = ''
|
css_rule.style[style_type.name] = ''
|
||||||
return
|
return
|
||||||
|
|
||||||
cleaned_value = style_type.value.replace('\"', '') # value of style
|
cleaned_value = style_type.value.replace('\"', '') # value of style
|
||||||
there_are_constraints_on_value = LIVECARTA_STYLE_ATTRS.get(style_type.name)
|
there_are_constraints_on_value = LIVECARTA_STYLE_ATTRS.get(style_type.name)
|
||||||
value_not_in_possible_values_list = cleaned_value not in LIVECARTA_STYLE_ATTRS[style_type.name]
|
value_not_in_possible_values_list = cleaned_value not in LIVECARTA_STYLE_ATTRS[
|
||||||
|
style_type.name]
|
||||||
if there_are_constraints_on_value and value_not_in_possible_values_list:
|
if there_are_constraints_on_value and value_not_in_possible_values_list:
|
||||||
# style_type + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
# style_type + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
||||||
css_rule.style[style_type.name] = ''
|
css_rule.style[style_type.name] = ''
|
||||||
else:
|
else:
|
||||||
if style_type.name in LIVECARTA_STYLE_ATTRS_MAPPING:
|
if style_type.name in LIVECARTA_STYLE_ATTRS_MAPPING:
|
||||||
func = LIVECARTA_STYLE_ATTRS_MAPPING[style_type.name] # function that converts our data
|
# function that converts our data
|
||||||
|
func = LIVECARTA_STYLE_ATTRS_MAPPING[style_type.name]
|
||||||
css_rule.style[style_type.name] = func(cleaned_value)
|
css_rule.style[style_type.name] = func(cleaned_value)
|
||||||
|
|
||||||
|
|
||||||
def build_css_content(css_content):
|
def build_css_content(css_content):
|
||||||
sheet = cssutils.parseString(css_content, validate=False)
|
sheet = cssutils.parseString(css_content, validate=False)
|
||||||
|
|
||||||
for css_rule in sheet:
|
for css_rule in sheet:
|
||||||
if css_rule.type == css_rule.STYLE_RULE:
|
if css_rule.type == css_rule.STYLE_RULE:
|
||||||
for style_type in css_rule.style:
|
for style_type in css_rule.style:
|
||||||
update_css_style_types_to_livecarta_convention(css_rule, style_type)
|
update_css_style_types_to_livecarta_convention(
|
||||||
|
css_rule, style_type)
|
||||||
|
|
||||||
css_text = sheet._getCssText().decode()
|
css_text = sheet._getCssText().decode()
|
||||||
return css_text
|
return css_text
|
||||||
@@ -213,9 +222,11 @@ def build_css_content(css_content):
|
|||||||
|
|
||||||
class TagStyleConverter:
|
class TagStyleConverter:
|
||||||
def __init__(self, tag_with_inline_style, tag_with_ultimate_style):
|
def __init__(self, tag_with_inline_style, tag_with_ultimate_style):
|
||||||
self.tag_with_inline_style = tag_with_inline_style # tag with inline style to be updated with style attribute
|
# tag with inline style to be updated with style attribute
|
||||||
|
self.tag_with_inline_style = tag_with_inline_style
|
||||||
self.tag_initial_name = tag_with_inline_style.name
|
self.tag_initial_name = tag_with_inline_style.name
|
||||||
self.tag_with_ultimate_style = tag_with_ultimate_style # tag with inline style + style parsed from css file
|
# tag with inline style + style parsed from css file
|
||||||
|
self.tag_with_ultimate_style = tag_with_ultimate_style
|
||||||
self.style = self.preprocess_style()
|
self.style = self.preprocess_style()
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -230,7 +241,8 @@ class TagStyleConverter:
|
|||||||
# white bg color not need to be checked as we do not write 'white bg color'
|
# white bg color not need to be checked as we do not write 'white bg color'
|
||||||
tag_with_bg = ['span', 'td', 'tr', 'p']
|
tag_with_bg = ['span', 'td', 'tr', 'p']
|
||||||
tag_will_be_saved = parent_tag.name in tag_with_bg
|
tag_will_be_saved = parent_tag.name in tag_with_bg
|
||||||
has_bg = parent_tag.attrs.get('style') and ('background' in parent_tag.attrs.get('style'))
|
has_bg = parent_tag.attrs.get('style') and (
|
||||||
|
'background' in parent_tag.attrs.get('style'))
|
||||||
if has_bg and tag_will_be_saved:
|
if has_bg and tag_will_be_saved:
|
||||||
return style_
|
return style_
|
||||||
|
|
||||||
@@ -256,7 +268,7 @@ class TagStyleConverter:
|
|||||||
if item[0] in ['text-indent', 'margin-left']:
|
if item[0] in ['text-indent', 'margin-left']:
|
||||||
item[1] = convert_indents(item[1])
|
item[1] = convert_indents(item[1])
|
||||||
clean_style += item[0] + ': ' + item[1] + '; '
|
clean_style += item[0] + ': ' + item[1] + '; '
|
||||||
|
|
||||||
margin_left_regexp = re.compile(
|
margin_left_regexp = re.compile(
|
||||||
r'(margin-left:( *-*\w+);*)')
|
r'(margin-left:( *-*\w+);*)')
|
||||||
text_indent_regexp = re.compile(
|
text_indent_regexp = re.compile(
|
||||||
@@ -267,63 +279,70 @@ class TagStyleConverter:
|
|||||||
#formula_of_indent: indent = abs(margin_left - text_indent)
|
#formula_of_indent: indent = abs(margin_left - text_indent)
|
||||||
if has_margin_left:
|
if has_margin_left:
|
||||||
num_ml = abs(int("".join(
|
num_ml = abs(int("".join(
|
||||||
filter(str.isdigit, str(has_margin_left.group(2))))))
|
filter(str.isdigit, str(has_margin_left.group(2))))))
|
||||||
|
|
||||||
if has_text_indent:
|
if has_text_indent:
|
||||||
num_ti = abs(int("".join(
|
num_ti = abs(int("".join(
|
||||||
filter(str.isdigit, str(has_text_indent.group(2))))))
|
filter(str.isdigit, str(has_text_indent.group(2))))))
|
||||||
clean_style = clean_style.replace(has_text_indent.group(1), 'text-indent: ' +
|
clean_style = clean_style.replace(has_text_indent.group(1), 'text-indent: ' +
|
||||||
str(abs(num_ml - num_ti)) + 'px; ')
|
str(abs(num_ml - num_ti)) + 'px; ')
|
||||||
clean_style = clean_style.replace(has_margin_left.group(1), '')
|
clean_style = clean_style.replace(has_margin_left.group(1), '')
|
||||||
return clean_style
|
return clean_style
|
||||||
|
|
||||||
clean_style = clean_style.replace(has_margin_left.group(1), 'text-indent: ' +
|
clean_style = clean_style.replace(has_margin_left.group(1), 'text-indent: ' +
|
||||||
str(abs(num_ml)) + 'px; ')
|
str(abs(num_ml)) + 'px; ')
|
||||||
return clean_style
|
return clean_style
|
||||||
|
|
||||||
elif has_text_indent:
|
elif has_text_indent:
|
||||||
clean_style = clean_style.replace(has_text_indent.group(1), 'text-indent: ' +
|
clean_style = clean_style.replace(has_text_indent.group(1), 'text-indent: ' +
|
||||||
str(abs(int("".join(
|
str(abs(int("".join(
|
||||||
filter(str.isdigit, str(has_text_indent.group(2))))))) + 'px; ')
|
filter(str.isdigit, str(has_text_indent.group(2))))))) + 'px; ')
|
||||||
return clean_style
|
return clean_style
|
||||||
return clean_style
|
return clean_style
|
||||||
|
|
||||||
def preprocess_style(self):
|
def preprocess_style(self):
|
||||||
ultimate_style = self.tag_with_ultimate_style.attrs.get('style') + ';'
|
ultimate_style = self.tag_with_ultimate_style.attrs.get('style') + ';'
|
||||||
ultimate_style = self.remove_white_if_no_bgcolor(ultimate_style, self.tag_with_ultimate_style)
|
ultimate_style = self.remove_white_if_no_bgcolor(
|
||||||
ultimate_style = ultimate_style.replace('background:', 'background-color:')
|
ultimate_style, self.tag_with_ultimate_style)
|
||||||
ultimate_style = ultimate_style.replace('list-style-image', 'list-style-type')
|
ultimate_style = ultimate_style.replace(
|
||||||
|
'background:', 'background-color:')
|
||||||
|
ultimate_style = ultimate_style.replace(
|
||||||
|
'list-style-image', 'list-style-type')
|
||||||
|
|
||||||
split_ultimate_style = ultimate_style.replace('; ',';').split(';')
|
split_ultimate_style = ultimate_style.replace('; ', ';').split(';')
|
||||||
|
|
||||||
# when we split style by ; and we have at the end ; that's why we have '' in list
|
# when we split style by ; and we have at the end ; that's why we have '' in list
|
||||||
while '' in split_ultimate_style:
|
while '' in split_ultimate_style:
|
||||||
split_ultimate_style.remove('')
|
split_ultimate_style.remove('')
|
||||||
|
|
||||||
# replace all spaces between ': & letter' to ':'
|
# replace all spaces between ': & letter' to ':'
|
||||||
split_ultimate_style = [el.replace(re.search(r'(:\s*)', el).group(1), ':') for el in split_ultimate_style]
|
split_ultimate_style = [el.replace(
|
||||||
|
re.search(r'(:\s*)', el).group(1), ':') for el in split_ultimate_style]
|
||||||
|
|
||||||
if self.tag_with_inline_style.attrs.get('style'):
|
if self.tag_with_inline_style.attrs.get('style'):
|
||||||
inline_style = self.tag_with_inline_style.attrs['style']
|
inline_style = self.tag_with_inline_style.attrs['style']
|
||||||
|
|
||||||
split_inline_style = inline_style.replace('; ',';').split(';')
|
split_inline_style = inline_style.replace('; ', ';').split(';')
|
||||||
|
|
||||||
# when we split style by ; and we have at the end ; that's why we have '' in list
|
# when we split style by ; and we have at the end ; that's why we have '' in list
|
||||||
while '' in split_inline_style:
|
while '' in split_inline_style:
|
||||||
split_inline_style.remove('')
|
split_inline_style.remove('')
|
||||||
|
|
||||||
# replace all spaces between ': & letter' to ':'
|
# replace all spaces between ': & letter' to ':'
|
||||||
split_inline_style = [el.replace(re.search(r'(:\s*)', el).group(1), ':') for el in split_inline_style]
|
split_inline_style = [el.replace(
|
||||||
|
re.search(r'(:\s*)', el).group(1), ':') for el in split_inline_style]
|
||||||
|
|
||||||
# repetition check - if the tag had already had inline style that isn't in the css styles, add this to style parsed from css
|
# repetition check - if the tag had already had inline style that isn't in the css styles, add this to style parsed from css
|
||||||
repeat_styles = list(set(split_ultimate_style) & set(split_inline_style))
|
repeat_styles = list(set(split_ultimate_style)
|
||||||
|
& set(split_inline_style))
|
||||||
for item in repeat_styles:
|
for item in repeat_styles:
|
||||||
split_inline_style.remove(item)
|
split_inline_style.remove(item)
|
||||||
|
|
||||||
if split_inline_style:
|
if split_inline_style:
|
||||||
# if inline style is not empty - start convert and add to ultimate style
|
# if inline style is not empty - start convert and add to ultimate style
|
||||||
print('we enter repetition check', '\n')
|
print('we enter repetition check', '\n')
|
||||||
inline_style: str = self.process_indents_to_px(split_inline_style)
|
inline_style: str = self.process_indents_to_px(
|
||||||
|
split_inline_style)
|
||||||
ultimate_style += inline_style
|
ultimate_style += inline_style
|
||||||
|
|
||||||
ultimate_style: str = self.process_indents_to_px(split_ultimate_style)
|
ultimate_style: str = self.process_indents_to_px(split_ultimate_style)
|
||||||
@@ -338,7 +357,8 @@ class TagStyleConverter:
|
|||||||
self.style = self.style.replace(s, '')
|
self.style = self.style.replace(s, '')
|
||||||
self.style = self.style.strip()
|
self.style = self.style.strip()
|
||||||
if i == 0:
|
if i == 0:
|
||||||
self.tag_with_inline_style.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)]
|
self.tag_with_inline_style.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(
|
||||||
|
attr, value)]
|
||||||
new_tags.append(self.tag_with_inline_style)
|
new_tags.append(self.tag_with_inline_style)
|
||||||
else:
|
else:
|
||||||
name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)]
|
name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)]
|
||||||
@@ -351,7 +371,8 @@ class TagStyleConverter:
|
|||||||
if new_tags:
|
if new_tags:
|
||||||
tmp_attrs = top_tag.attrs.copy()
|
tmp_attrs = top_tag.attrs.copy()
|
||||||
top_tag.attrs = {}
|
top_tag.attrs = {}
|
||||||
top_tag2 = BeautifulSoup(features='lxml').new_tag(self.tag_initial_name)
|
top_tag2 = BeautifulSoup(features='lxml').new_tag(
|
||||||
|
self.tag_initial_name)
|
||||||
top_tag2.attrs = tmp_attrs
|
top_tag2.attrs = tmp_attrs
|
||||||
if self.style:
|
if self.style:
|
||||||
top_tag2.attrs['style'] = self.style
|
top_tag2.attrs['style'] = self.style
|
||||||
@@ -363,39 +384,36 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def wrap_span_in_p_to_save_style_attrs(tag):
|
def wrap_span_in_p_to_save_style_attrs(tag):
|
||||||
'''Function designed to save style attrs that cannot be in p -> span
|
'''Function designed to save style attrs that cannot be in p -> span'''
|
||||||
that cannot be in span -> p'''
|
|
||||||
if tag.name == 'p' and tag.attrs.get('style'):
|
if tag.name == 'p' and tag.attrs.get('style'):
|
||||||
styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
|
styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
|
||||||
if attr not in ['text-align', 'text-indent', 'border-bottom']]
|
if attr not in ['text-align', 'text-indent', 'border-bottom', 'border-top']]
|
||||||
|
p_style = ''
|
||||||
|
initial_style = tag.attrs['style']
|
||||||
|
split_style = initial_style.replace('; ', ';').split(';')
|
||||||
|
possible_p_attrs_regexp = re.compile(
|
||||||
|
r'(text-align:)|(text-indent:)|(border-bottom:)|(border-top:)')
|
||||||
|
for item in split_style:
|
||||||
|
has_p_style_attrs = re.search(possible_p_attrs_regexp, item)
|
||||||
|
if has_p_style_attrs:
|
||||||
|
p_style += item + ';'
|
||||||
|
initial_style = initial_style.replace(item + ';', '')
|
||||||
|
|
||||||
styles_to_be_saved = [attr in tag.attrs.get('style') for attr in styles_cant_be_in_p]
|
# here check that this style i exactly the same. Not 'align' when we have 'text-align', or 'border' when we have 'border-top'
|
||||||
if any(styles_to_be_saved):
|
styles_to_be_saved_in_span = [((attr + ':') in initial_style) & (
|
||||||
|
'-' + attr not in initial_style) for attr in styles_cant_be_in_p]
|
||||||
|
if any(styles_to_be_saved_in_span):
|
||||||
|
# if find styles that cannot be in <p> -> wrap them in span
|
||||||
tag.name = 'span'
|
tag.name = 'span'
|
||||||
p_tag = BeautifulSoup(features='lxml').new_tag('p')
|
p_tag = BeautifulSoup(features='lxml').new_tag('p')
|
||||||
span_style = tag.attrs['style']
|
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
||||||
p_style = ''
|
has_li_style_attr = re.search(li_attrs_regexp, initial_style)
|
||||||
possible_p_attrs_regexp = re.compile(r'(text-align:( *\w+);*)|(text-indent:( *\w+);*)|(border-bottom:( *\w+);*)')
|
span_style = initial_style if not has_li_style_attr else initial_style.replace(
|
||||||
for i in range(span_style.count(';') + 1):
|
has_li_style_attr.group(1), '')
|
||||||
has_p_style_attrs = re.search(possible_p_attrs_regexp, span_style)
|
p_tag.attrs['style'] = p_style
|
||||||
if has_p_style_attrs:
|
|
||||||
if has_p_style_attrs.group(1):
|
|
||||||
p_style += has_p_style_attrs.group(1)
|
|
||||||
span_style = span_style.replace(has_p_style_attrs.group(1), '')
|
|
||||||
if has_p_style_attrs.group(3):
|
|
||||||
p_style += has_p_style_attrs.group(3)
|
|
||||||
span_style = span_style.replace(has_p_style_attrs.group(3), '')
|
|
||||||
if has_p_style_attrs.group(5):
|
|
||||||
p_style += span_style
|
|
||||||
span_style = span_style.replace(span_style, '')
|
|
||||||
|
|
||||||
p_tag.attrs['style'] = p_style
|
|
||||||
|
|
||||||
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
|
||||||
has_li_style_attr = re.search(li_attrs_regexp, span_style)
|
|
||||||
span_style = span_style if not has_li_style_attr else span_style.replace(has_li_style_attr.group(1), '')
|
|
||||||
tag.attrs['style'] = span_style
|
tag.attrs['style'] = span_style
|
||||||
tag.wrap(p_tag)
|
tag.wrap(p_tag)
|
||||||
|
else: tag.attrs['style'] = p_style
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def wrap_span_in_li_to_save_style_attrs(tag):
|
def wrap_span_in_li_to_save_style_attrs(tag):
|
||||||
@@ -403,7 +421,8 @@ class TagStyleConverter:
|
|||||||
styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
|
styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
|
||||||
attr not in ['text-align', 'list-style-type']]
|
attr not in ['text-align', 'list-style-type']]
|
||||||
|
|
||||||
styles_to_be_saved = [attr in tag.attrs.get('style') for attr in styles_cant_be_in_li]
|
styles_to_be_saved = [attr in tag.attrs.get(
|
||||||
|
'style') for attr in styles_cant_be_in_li]
|
||||||
if any(styles_to_be_saved):
|
if any(styles_to_be_saved):
|
||||||
tag.name = 'span'
|
tag.name = 'span'
|
||||||
li_tag = BeautifulSoup(features='lxml').new_tag('li')
|
li_tag = BeautifulSoup(features='lxml').new_tag('li')
|
||||||
@@ -412,10 +431,12 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
for possible_li_attrs_regexp in [re.compile(r'(text-align:(\w+);)'),
|
for possible_li_attrs_regexp in [re.compile(r'(text-align:(\w+);)'),
|
||||||
re.compile(r'(list-style-type:(\w+);)')]:
|
re.compile(r'(list-style-type:(\w+);)')]:
|
||||||
has_li_style_attrs = re.search(possible_li_attrs_regexp, span_style)
|
has_li_style_attrs = re.search(
|
||||||
|
possible_li_attrs_regexp, span_style)
|
||||||
if has_li_style_attrs and has_li_style_attrs.group(1):
|
if has_li_style_attrs and has_li_style_attrs.group(1):
|
||||||
li_style += has_li_style_attrs.group(1)
|
li_style += has_li_style_attrs.group(1)
|
||||||
span_style = span_style.replace(has_li_style_attrs.group(1), '')
|
span_style = span_style.replace(
|
||||||
|
has_li_style_attrs.group(1), '')
|
||||||
|
|
||||||
li_tag.attrs['style'] = li_style
|
li_tag.attrs['style'] = li_style
|
||||||
tag.attrs['style'] = span_style
|
tag.attrs['style'] = span_style
|
||||||
@@ -424,16 +445,20 @@ class TagStyleConverter:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def wrap_span_in_ul_ol_to_save_style_attrs(tag):
|
def wrap_span_in_ul_ol_to_save_style_attrs(tag):
|
||||||
if tag.name in ['ul', 'ol'] and tag.attrs.get('style'):
|
if tag.name in ['ul', 'ol'] and tag.attrs.get('style'):
|
||||||
styles_cant_be_in_ul_ol = [attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
|
styles_cant_be_in_ul_ol = [
|
||||||
|
attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
|
||||||
|
|
||||||
check = [attr in tag.attrs.get('style') for attr in styles_cant_be_in_ul_ol]
|
check = [attr in tag.attrs.get('style')
|
||||||
|
for attr in styles_cant_be_in_ul_ol]
|
||||||
if any(check):
|
if any(check):
|
||||||
tag.name = 'span'
|
tag.name = 'span'
|
||||||
li_tag = BeautifulSoup(features='lxml').new_tag('ul')
|
li_tag = BeautifulSoup(features='lxml').new_tag('ul')
|
||||||
span_style = tag.attrs['style']
|
span_style = tag.attrs['style']
|
||||||
|
|
||||||
possible_li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
possible_li_attrs_regexp = re.compile(
|
||||||
has_li_style_attrs = re.search(possible_li_attrs_regexp, span_style)
|
r'(list-style-type:(\w+);)')
|
||||||
|
has_li_style_attrs = re.search(
|
||||||
|
possible_li_attrs_regexp, span_style)
|
||||||
if has_li_style_attrs and has_li_style_attrs.group(1):
|
if has_li_style_attrs and has_li_style_attrs.group(1):
|
||||||
oul_style = has_li_style_attrs.group(1)
|
oul_style = has_li_style_attrs.group(1)
|
||||||
span_style = span_style.replace(oul_style, '')
|
span_style = span_style.replace(oul_style, '')
|
||||||
@@ -452,7 +477,8 @@ class TagStyleConverter:
|
|||||||
style = tag.attrs['style']
|
style = tag.attrs['style']
|
||||||
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
||||||
has_li_style_attr = re.search(li_attrs_regexp, style)
|
has_li_style_attr = re.search(li_attrs_regexp, style)
|
||||||
tag.attrs['style'] = style if not has_li_style_attr else style.replace(has_li_style_attr.group(1), '')
|
tag.attrs['style'] = style if not has_li_style_attr else style.replace(
|
||||||
|
has_li_style_attr.group(1), '')
|
||||||
|
|
||||||
def convert_initial_tag(self):
|
def convert_initial_tag(self):
|
||||||
self.tag_with_inline_style = self.change_attrs_with_corresponding_tags()
|
self.tag_with_inline_style = self.change_attrs_with_corresponding_tags()
|
||||||
@@ -464,10 +490,13 @@ class TagStyleConverter:
|
|||||||
|
|
||||||
|
|
||||||
def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
|
def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
|
||||||
css_text = css_text.replace('@namespace epub "http://www.idpf.org/2007/ops";', '')
|
css_text = css_text.replace(
|
||||||
|
'@namespace epub "http://www.idpf.org/2007/ops";', '')
|
||||||
livecarta_tmp_ids = []
|
livecarta_tmp_ids = []
|
||||||
could_have_style_in_livecarta_regexp = re.compile('(^div$)|(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|(^h[1-9]$)')
|
could_have_style_in_livecarta_regexp = re.compile(
|
||||||
tags_with_possible_style_attr = html_soup.find_all(could_have_style_in_livecarta_regexp)
|
'(^div$)|(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|(^h[1-9]$)')
|
||||||
|
tags_with_possible_style_attr = html_soup.find_all(
|
||||||
|
could_have_style_in_livecarta_regexp)
|
||||||
for i, x in enumerate(tags_with_possible_style_attr):
|
for i, x in enumerate(tags_with_possible_style_attr):
|
||||||
x.attrs['livecarta_id'] = i
|
x.attrs['livecarta_id'] = i
|
||||||
livecarta_tmp_ids.append(i)
|
livecarta_tmp_ids.append(i)
|
||||||
@@ -488,7 +517,8 @@ def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
|
|||||||
tag_with_ultimate_style = inline_soup.find(attrs={'livecarta_id': i})
|
tag_with_ultimate_style = inline_soup.find(attrs={'livecarta_id': i})
|
||||||
del tag_with_initial_style.attrs['livecarta_id']
|
del tag_with_initial_style.attrs['livecarta_id']
|
||||||
if tag_with_ultimate_style.attrs.get('style'):
|
if tag_with_ultimate_style.attrs.get('style'):
|
||||||
style_converter = TagStyleConverter(tag_with_initial_style, tag_with_ultimate_style)
|
style_converter = TagStyleConverter(
|
||||||
|
tag_with_initial_style, tag_with_ultimate_style)
|
||||||
style_converter.convert_initial_tag()
|
style_converter.convert_initial_tag()
|
||||||
|
|
||||||
return html_soup
|
return html_soup
|
||||||
@@ -500,7 +530,8 @@ if __name__ == '__main__':
|
|||||||
css_ = ebooklib_book.get_item_with_href('css/epub.css')
|
css_ = ebooklib_book.get_item_with_href('css/epub.css')
|
||||||
css_ = css_.get_content().decode()
|
css_ = css_.get_content().decode()
|
||||||
css_cleaned = build_css_content(css_)
|
css_cleaned = build_css_content(css_)
|
||||||
html_ = ebooklib_book.get_item_with_href('pr01s05.xhtml').get_body_content().decode()
|
html_ = ebooklib_book.get_item_with_href(
|
||||||
|
'pr01s05.xhtml').get_body_content().decode()
|
||||||
html_soup = BeautifulSoup(html_, features='lxml')
|
html_soup = BeautifulSoup(html_, features='lxml')
|
||||||
|
|
||||||
print(convert_html_soup_with_css_style(html_soup, css_cleaned))
|
print(convert_html_soup_with_css_style(html_soup, css_cleaned))
|
||||||
Reference in New Issue
Block a user