diff --git a/src/css_reader.py b/src/css_reader.py index bbb4710..e31d562 100644 --- a/src/css_reader.py +++ b/src/css_reader.py @@ -179,21 +179,14 @@ def clean_css(css): return css_text -def add_inline_style_to_html_soup(soup1, css_text): - css_text = css_text.replace('@namespace epub "http://www.idpf.org/2007/ops";', '') - livecarta_tmp_ids = [] - h_regex = f'(^h[1-9]$)' - could_have_style_in_livecarta_regexp = re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex) - elements_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp) - for i, x in enumerate(elements_with_possible_style_attr): - x.attrs['livecarta_id'] = i - livecarta_tmp_ids.append(i) - html_with_inline_style = transform(str(soup1), css_text=css_text, - remove_classes=False, - external_styles=False, - disable_validation=True) - soup2 = BeautifulSoup(html_with_inline_style, features='lxml') +class TagStyleConverter: + def __init__(self, tag, tag_with_style): + self.tag = tag + self.tag_initial_name = tag.name + self.tag_with_style = tag_with_style + self.style = self.preprocess_style() + @staticmethod def remove_white_if_no_bgcolor(style_, tag): if 'background' in style_: return style_ @@ -221,57 +214,98 @@ def add_inline_style_to_html_soup(soup1, css_text): style_ = style_.replace('color:white;', '') return style_ + def preprocess_style(self): + style = self.tag_with_style.attrs.get('style') + ';' + style = self.remove_white_if_no_bgcolor(style, self.tag_with_style) + style = style.replace('background:', 'background-color:') + return style + + def change_attrs_with_corresponding_tags(self): + # adds , , , etc + to_remove = check_style_to_be_tag(self.style) + new_tags = [] + for i, (p, v) in enumerate(to_remove): + s = f'{p}:{v};' + self.style = self.style.replace(s, '') + self.style = self.style.strip() + if i == 0: + self.tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] + new_tags.append(self.tag) + else: + name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] + new_tag = BeautifulSoup(features='lxml').new_tag(name) + new_tags[-1].wrap(new_tag) + new_tags.append(new_tag) + + top_tag = self.tag + + if new_tags: + tmp_attrs = top_tag.attrs.copy() + top_tag = BeautifulSoup(features='lxml').new_tag(self.tag_initial_name) + top_tag.attrs = tmp_attrs + if self.style: + top_tag.attrs['style'] = self.style + new_tags[-1].wrap(top_tag) + else: + top_tag.attrs['style'] = self.style + + return top_tag + + @staticmethod def wrap_p_to_save_style_attrs(t): - styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['text-align', 'text-indent']] + styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS + if attr not in ['text-align', 'text-indent']] if t.name == 'p' and t.attrs.get('style'): check = [attr in t.attrs.get('style') for attr in styles_cant_be_in_p] if any(check): t.name = 'span' - t.wrap( BeautifulSoup(features='lxml').new_tag('p')) + p_tag = BeautifulSoup(features='lxml').new_tag('p') + old_style = t.attrs['style'] + new_style = '' + possible_p_attrs_regexp = re.compile(r'(text-align:(\w+);)|(text-indent:(\w+);)') + has_p_style_attrs = re.search(possible_p_attrs_regexp, old_style) + if has_p_style_attrs: + if has_p_style_attrs.group(1): + new_style += has_p_style_attrs.group(1) + old_style = old_style.replace(has_p_style_attrs.group(1), '') + if has_p_style_attrs.group(3): + new_style += has_p_style_attrs.group(3) + old_style = old_style.replace(has_p_style_attrs.group(3), '') + + p_tag.attrs['style'] = new_style + + t.attrs['style'] = old_style + t.wrap(p_tag) + + def convert_initial_tag(self): + del self.tag.attrs['livecarta_id'] + self.tag = self.change_attrs_with_corresponding_tags() + self.wrap_p_to_save_style_attrs(self.tag) + return self.tag + + +def add_inline_style_to_html_soup(soup1, css_text): + css_text = css_text.replace('@namespace epub "http://www.idpf.org/2007/ops";', '') + livecarta_tmp_ids = [] + h_regex = f'(^h[1-9]$)' + could_have_style_in_livecarta_regexp = re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex) + elements_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp) + for i, x in enumerate(elements_with_possible_style_attr): + x.attrs['livecarta_id'] = i + livecarta_tmp_ids.append(i) + html_with_inline_style = transform(str(soup1), css_text=css_text, + remove_classes=False, + external_styles=False, + disable_validation=True) + soup2 = BeautifulSoup(html_with_inline_style, features='lxml') for i in livecarta_tmp_ids: tag = soup1.find(attrs={'livecarta_id': i}) - tag_initial_name = tag.name tag_with_style = soup2.find(attrs={'livecarta_id': i}) if tag_with_style.attrs.get('style'): - style = tag_with_style.attrs.get('style') + ';' - style = remove_white_if_no_bgcolor(style, tag_with_style) - style = style.replace('background:', 'background-color:') - to_remove = check_style_to_be_tag(style) - new_tags = [] - - for i, (p, v) in enumerate(to_remove): - s = f'{p}:{v};' - style = style.replace(s, '') - if i == 0: - tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] - new_tags.append(tag) - else: - name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] - new_tag = BeautifulSoup(features='lxml').new_tag(name) - new_tags[-1].wrap(new_tag) - new_tags.append(new_tag) - - top_tag = tag - if to_remove: - style = style.strip() - tmp_attrs = tag.attrs.copy() - tag.attrs = {} - - top_tag = BeautifulSoup(features='lxml').new_tag(tag_initial_name) - top_tag.attrs = tmp_attrs - if style: - top_tag.attrs['style'] = style - del top_tag.attrs['livecarta_id'] - - new_tags[-1].wrap(top_tag) - else: - tag.attrs['style'] = style - del tag.attrs['livecarta_id'] - - wrap_p_to_save_style_attrs(top_tag) - + style_converter = TagStyleConverter(tag, tag_with_style) + style_converter.convert_initial_tag() else: del tag.attrs['livecarta_id'] return soup1