diff --git a/src/css_reader.py b/src/css_reader.py index e19935a..4e24b53 100644 --- a/src/css_reader.py +++ b/src/css_reader.py @@ -23,9 +23,9 @@ sizes_px = ['10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17p '35px', '36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px', '48px', '49px', '50px', '64px', '72px'] -list_types = ['circle', 'disc', 'armenian','decimal', - 'decimal-leading-zero', 'georgian', 'lower-alpha','lower-latin', - 'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none' ] +list_types = ['circle', 'disc', 'armenian', 'decimal', + 'decimal-leading-zero', 'georgian', 'lower-alpha', 'lower-latin', + 'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none'] def convert_font_size(value): @@ -132,6 +132,8 @@ LIVECARTA_STYLE_ATTRS_MAPPING = { """ LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = { (property, value): tag } + +
, , , etc to_remove = check_style_to_be_tag(self.style) new_tags = [] - for i, (p, v) in enumerate(to_remove): - s = f'{p}:{v};' + for i, (attr, value) in enumerate(to_remove): + s = f'{attr}:{value};' self.style = self.style.replace(s, '') self.style = self.style.strip() if i == 0: - self.tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] + self.tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)] new_tags.append(self.tag) else: - name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] + name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)] new_tag = BeautifulSoup(features='lxml').new_tag(name) new_tags[-1].wrap(new_tag) new_tags.append(new_tag) @@ -267,34 +269,34 @@ class TagStyleConverter: return top_tag @staticmethod - def wrap_p_to_save_style_attrs(t): + def wrap_span_in_p_to_save_style_attrs(tag): styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['text-align', 'text-indent']] - if t.name == 'p' and t.attrs.get('style'): - check = [attr in t.attrs.get('style') for attr in styles_cant_be_in_p] - if any(check): - t.name = 'span' + if tag.name == 'p' and tag.attrs.get('style'): + styles_to_be_saved = [attr in tag.attrs.get('style') for attr in styles_cant_be_in_p] + if any(styles_to_be_saved): + tag.name = 'span' p_tag = BeautifulSoup(features='lxml').new_tag('p') - old_style = t.attrs['style'] - new_style = '' + span_style = tag.attrs['style'] + p_style = '' possible_p_attrs_regexp = re.compile(r'(text-align:(\w+);)|(text-indent:(\w+);)') - has_p_style_attrs = re.search(possible_p_attrs_regexp, old_style) + has_p_style_attrs = re.search(possible_p_attrs_regexp, span_style) if has_p_style_attrs: if has_p_style_attrs.group(1): - new_style += has_p_style_attrs.group(1) - old_style = old_style.replace(has_p_style_attrs.group(1), '') + p_style += has_p_style_attrs.group(1) + span_style = span_style.replace(has_p_style_attrs.group(1), '') if has_p_style_attrs.group(3): - new_style += has_p_style_attrs.group(3) - old_style = old_style.replace(has_p_style_attrs.group(3), '') + p_style += has_p_style_attrs.group(3) + span_style = span_style.replace(has_p_style_attrs.group(3), '') - p_tag.attrs['style'] = new_style + p_tag.attrs['style'] = p_style li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)') - has_li_style_attr = re.search(li_attrs_regexp, old_style) - old_style = old_style if not has_li_style_attr else old_style.replace(has_li_style_attr.group(1), '') - t.attrs['style'] = old_style - t.wrap(p_tag) + has_li_style_attr = re.search(li_attrs_regexp, span_style) + span_style = span_style if not has_li_style_attr else span_style.replace(has_li_style_attr.group(1), '') + tag.attrs['style'] = span_style + tag.wrap(p_tag) @staticmethod def add_span_to_save_style_attrs_in_li(t): @@ -354,29 +356,28 @@ class TagStyleConverter: t.attrs['style'] = style if not has_li_style_attr else style.replace(has_li_style_attr.group(1), '') def convert_initial_tag(self): - del self.tag.attrs['livecarta_id'] self.tag = self.change_attrs_with_corresponding_tags() - self.wrap_p_to_save_style_attrs(self.tag) + self.wrap_span_in_p_to_save_style_attrs(self.tag) self.add_span_to_save_style_attrs_in_li(self.tag) self.add_span_to_save_style_attrs_in_ul_ol(self.tag) self.add_span_to_save_style_attrs(self.tag) return self.tag -def add_inline_style_to_html_soup(soup1, css_text): +def add_inline_style_to_html_soup(soup1: BeautifulSoup, css_text: str): css_text = css_text.replace('@namespace epub "http://www.idpf.org/2007/ops";', '') livecarta_tmp_ids = [] h_regex = f'(^h[1-9]$)' could_have_style_in_livecarta_regexp = re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex) - elements_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp) - for i, x in enumerate(elements_with_possible_style_attr): + tags_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp) + for i, x in enumerate(tags_with_possible_style_attr): x.attrs['livecarta_id'] = i livecarta_tmp_ids.append(i) - html_with_inline_style = transform(str(soup1), css_text=css_text, - remove_classes=False, - external_styles=False, - allow_network=False, - disable_validation=True) + html_with_inline_style: str = transform(str(soup1), css_text=css_text, + remove_classes=False, + external_styles=False, + allow_network=False, + disable_validation=True) soup2 = BeautifulSoup(html_with_inline_style, features='lxml') for i in livecarta_tmp_ids: @@ -385,8 +386,7 @@ def add_inline_style_to_html_soup(soup1, css_text): if tag_with_style.attrs.get('style'): style_converter = TagStyleConverter(tag, tag_with_style) style_converter.convert_initial_tag() - else: - del tag.attrs['livecarta_id'] + del tag.attrs['livecarta_id'] return soup1