Make todos & refactor code

This commit is contained in:
Kiryl
2021-11-02 12:06:34 +03:00
parent 8c37482616
commit 479695e185
5 changed files with 314 additions and 242 deletions

View File

@@ -8,8 +8,9 @@ from bs4 import BeautifulSoup
from premailer import transform
from itertools import takewhile
from src.livecarta_config import LiveCartaConfig
from src.util.color_reader import str2hex
from src.livecarta_config import LiveCartaConfig
cssutils.log.setLevel(CRITICAL)
@@ -211,9 +212,9 @@ def build_css_content(css_content):
class TagStyleConverter:
def __init__(self, tag_with_initial_style, tag_with_ultimate_style):
self.tag_with_initial_style = tag_with_initial_style # tag with inline style to be updated with style attribute
self.tag_initial_name = tag_with_initial_style.name
def __init__(self, tag_with_inline_style, tag_with_ultimate_style):
self.tag_with_inline_style = tag_with_inline_style # tag with inline style to be updated with style attribute
self.tag_initial_name = tag_with_inline_style.name
self.tag_with_ultimate_style = tag_with_ultimate_style # tag with inline style + style parsed from css file
self.style = self.preprocess_style()
@@ -293,32 +294,39 @@ class TagStyleConverter:
ultimate_style = ultimate_style.replace('background:', 'background-color:')
ultimate_style = ultimate_style.replace('list-style-image', 'list-style-type')
split_ultimate_style = ultimate_style.split(';') # make for repetition check and convert to px
split_ultimate_style = ultimate_style.replace('; ',';').split(';')
# check for another ; in style string in preprocess_style()
# when we split style by ; and we have at the end ; that's why we have '' in list
while '' in split_ultimate_style:
split_ultimate_style.remove('')
ultimate_style: str = self.process_indents_to_px(split_ultimate_style)
if self.tag_with_initial_style.attrs.get('style'):
# replace all spaces between ': & letter' to ':'
split_ultimate_style = [el.replace(re.search(r'(:\s*)', el).group(1), ':') for el in split_ultimate_style]
initial_style = self.tag_with_initial_style.attrs['style']
split_initial_style = initial_style.split(';')
if self.tag_with_inline_style.attrs.get('style'):
inline_style = self.tag_with_inline_style.attrs['style']
# check for another ; in style string in preprocess_style()
while '' in split_initial_style:
split_initial_style.remove('')
split_inline_style = inline_style.replace('; ',';').split(';')
# repetition check - if tag had already had inline style, add this to style parsed from css
repeat_styles = list(set(split_ultimate_style) & set(split_initial_style))
# when we split style by ; and we have at the end ; that's why we have '' in list
while '' in split_inline_style:
split_inline_style.remove('')
# replace all spaces between ': & letter' to ':'
split_inline_style = [el.replace(re.search(r'(:\s*)', el).group(1), ':') for el in split_inline_style]
# repetition check - if the tag had already had inline style that isn't in the css styles, add this to style parsed from css
repeat_styles = list(set(split_ultimate_style) & set(split_inline_style))
for item in repeat_styles:
split_initial_style.remove(item)
split_inline_style.remove(item)
if split_initial_style:
# if initial style is not empty - start convert and add to ultimate style
if split_inline_style:
# if inline style is not empty - start convert and add to ultimate style
print('we enter repetition check', '\n')
initial_style: str = self.process_indents_to_px(split_initial_style)
ultimate_style += initial_style
inline_style: str = self.process_indents_to_px(split_inline_style)
ultimate_style += inline_style
ultimate_style: str = self.process_indents_to_px(split_ultimate_style)
return ultimate_style
def change_attrs_with_corresponding_tags(self):
@@ -330,15 +338,15 @@ class TagStyleConverter:
self.style = self.style.replace(s, '')
self.style = self.style.strip()
if i == 0:
self.tag_with_initial_style.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)]
new_tags.append(self.tag_with_initial_style)
self.tag_with_inline_style.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)]
new_tags.append(self.tag_with_inline_style)
else:
name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)]
new_tag = BeautifulSoup(features='lxml').new_tag(name)
new_tags[-1].wrap(new_tag)
new_tags.append(new_tag)
top_tag = self.tag_with_initial_style
top_tag = self.tag_with_inline_style
if new_tags:
tmp_attrs = top_tag.attrs.copy()
@@ -355,10 +363,12 @@ class TagStyleConverter:
@staticmethod
def wrap_span_in_p_to_save_style_attrs(tag):
styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
if attr not in ['text-align', 'text-indent', 'border-bottom']]
'''Function designed to save style attrs that cannot be in p -> span
that cannot be in span -> p'''
if tag.name == 'p' and tag.attrs.get('style'):
styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
if attr not in ['text-align', 'text-indent', 'border-bottom']]
styles_to_be_saved = [attr in tag.attrs.get('style') for attr in styles_cant_be_in_p]
if any(styles_to_be_saved):
tag.name = 'span'
@@ -388,83 +398,81 @@ class TagStyleConverter:
tag.wrap(p_tag)
@staticmethod
def add_span_to_save_style_attrs_in_li(t):
if t.name == 'li' and t.attrs.get('style'):
def wrap_span_in_li_to_save_style_attrs(tag):
if tag.name == 'li' and tag.attrs.get('style'):
styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
attr not in ['text-align', 'list-style-type', 'border-bottom']]
attr not in ['text-align', 'list-style-type']]
check = [attr in t.attrs.get('style') for attr in styles_cant_be_in_li]
if any(check):
t.name = 'span'
styles_to_be_saved = [attr in tag.attrs.get('style') for attr in styles_cant_be_in_li]
if any(styles_to_be_saved):
tag.name = 'span'
li_tag = BeautifulSoup(features='lxml').new_tag('li')
old_style = t.attrs['style']
new_style = ''
span_style = tag.attrs['style']
li_style = ''
for possible_li_attrs_regexp in [re.compile(r'(text-align:(\w+);)'),
re.compile(r'(list-style-type:(\w+);)')]:
has_li_style_attrs = re.search(possible_li_attrs_regexp, old_style)
has_li_style_attrs = re.search(possible_li_attrs_regexp, span_style)
if has_li_style_attrs and has_li_style_attrs.group(1):
new_style += has_li_style_attrs.group(1)
old_style = old_style.replace(has_li_style_attrs.group(1), '')
li_style += has_li_style_attrs.group(1)
span_style = span_style.replace(has_li_style_attrs.group(1), '')
li_tag.attrs['style'] = new_style
t.attrs['style'] = old_style
t.wrap(li_tag)
li_tag.attrs['style'] = li_style
tag.attrs['style'] = span_style
tag.wrap(li_tag)
@staticmethod
def add_span_to_save_style_attrs_in_ul_ol(t):
if t.name in ['ul', 'ol'] and t.attrs.get('style'):
def wrap_span_in_ul_ol_to_save_style_attrs(tag):
if tag.name in ['ul', 'ol'] and tag.attrs.get('style'):
styles_cant_be_in_ul_ol = [attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
check = [attr in t.attrs.get('style') for attr in styles_cant_be_in_ul_ol]
check = [attr in tag.attrs.get('style') for attr in styles_cant_be_in_ul_ol]
if any(check):
t.name = 'span'
tag.name = 'span'
li_tag = BeautifulSoup(features='lxml').new_tag('ul')
old_style = t.attrs['style']
span_style = tag.attrs['style']
possible_li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
has_li_style_attrs = re.search(possible_li_attrs_regexp, old_style)
has_li_style_attrs = re.search(possible_li_attrs_regexp, span_style)
if has_li_style_attrs and has_li_style_attrs.group(1):
new_style = has_li_style_attrs.group(1)
old_style = old_style.replace(new_style, '')
li_tag.attrs['style'] = new_style
t.attrs['style'] = old_style
t.wrap(li_tag)
oul_style = has_li_style_attrs.group(1)
span_style = span_style.replace(oul_style, '')
li_tag.attrs['style'] = oul_style
tag.attrs['style'] = span_style
tag.wrap(li_tag)
@staticmethod
def add_span_to_save_style_attrs(t):
no_style_in_livecarta_regexp = re.compile('(^h[1-9]$)')
def wrap_span_in_h_to_save_style_attrs(tag):
h_regexp = re.compile('(^h[1-9]$)')
if re.search(no_style_in_livecarta_regexp, t.name) and t.attrs.get('style'):
new_tag = BeautifulSoup(features='lxml').new_tag(t.name)
t.name = 'span'
t.wrap(new_tag)
style = t.attrs['style']
if re.search(h_regexp, tag.name) and tag.attrs.get('style'):
h_tag = BeautifulSoup(features='lxml').new_tag(tag.name)
tag.name = 'span'
tag.wrap(h_tag)
style = tag.attrs['style']
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
has_li_style_attr = re.search(li_attrs_regexp, style)
t.attrs['style'] = style if not has_li_style_attr else style.replace(has_li_style_attr.group(1), '')
tag.attrs['style'] = style if not has_li_style_attr else style.replace(has_li_style_attr.group(1), '')
def convert_initial_tag(self):
self.tag_with_initial_style = self.change_attrs_with_corresponding_tags()
self.wrap_span_in_p_to_save_style_attrs(self.tag_with_initial_style)
self.add_span_to_save_style_attrs_in_li(self.tag_with_initial_style)
self.add_span_to_save_style_attrs_in_ul_ol(self.tag_with_initial_style)
self.add_span_to_save_style_attrs(self.tag_with_initial_style)
return self.tag_with_initial_style
self.tag_with_inline_style = self.change_attrs_with_corresponding_tags()
self.wrap_span_in_p_to_save_style_attrs(self.tag_with_inline_style)
self.wrap_span_in_li_to_save_style_attrs(self.tag_with_inline_style)
self.wrap_span_in_ul_ol_to_save_style_attrs(self.tag_with_inline_style)
self.wrap_span_in_h_to_save_style_attrs(self.tag_with_inline_style)
return self.tag_with_inline_style
def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
css_text = css_text.replace('@namespace epub "http://www.idpf.org/2007/ops";', '')
livecarta_tmp_ids = []
h_regex = f'(^h[1-9]$)'
could_have_style_in_livecarta_regexp = re.compile('(^div$)|(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex)
could_have_style_in_livecarta_regexp = re.compile('(^div$)|(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|(^h[1-9]$)')
tags_with_possible_style_attr = html_soup.find_all(could_have_style_in_livecarta_regexp)
for i, x in enumerate(tags_with_possible_style_attr):
x.attrs['livecarta_id'] = i
livecarta_tmp_ids.append(i)
# here we add css styles to inline style
# sometimes in html_with_css_styles
html_with_css_styles: str = transform(str(html_soup), css_text=css_text,
remove_classes=False,
external_styles=False,
@@ -474,6 +482,7 @@ def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
inline_soup = BeautifulSoup(html_with_css_styles, features='lxml')
# go through tags with possible style attrs
for i in livecarta_tmp_ids:
tag_with_initial_style = html_soup.find(attrs={'livecarta_id': i})
tag_with_ultimate_style = inline_soup.find(attrs={'livecarta_id': i})