diff --git a/src/epub_converter/tag_css_style_converter.py b/src/epub_converter/tag_css_style_converter.py
index bb2a7fc..7d1ff1b 100644
--- a/src/epub_converter/tag_css_style_converter.py
+++ b/src/epub_converter/tag_css_style_converter.py
@@ -202,112 +202,42 @@ class TagStyleConverter:
return top_tag
@staticmethod
- def wrap_span_in_p_to_save_style_attrs(tag):
- """Function designed to save style attrs that cannot be in p -> span"""
- if tag.name == 'p' and tag.attrs.get('style'):
- styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
- if attr not in ['text-align', 'text-indent', 'border-bottom', 'border-top']]
- p_style = ''
- initial_style = tag.attrs['style']
- split_style = initial_style.replace('; ', ';').split(';')
- possible_p_attrs_regexp = re.compile(
- r'(text-align:)|(text-indent:)|(border-bottom:)|(border-top:)')
- for item in split_style:
- has_p_style_attrs = re.search(possible_p_attrs_regexp, item)
- if has_p_style_attrs:
- p_style += item + ';'
- initial_style = initial_style.replace(item + ';', '')
- # here check that this style i exactly the same.
+ def wrap_span_in_tag_to_save_style_attrs(initial_tag):
+ """Function designed to save style attrs that cannot be in tag.name -> span"""
+ dictkeys_pattern = re.compile('|'.join(LiveCartaConfig.LIVECARTA_STYLES_CANT_BE_IN_TAG))
+ if re.findall(dictkeys_pattern, initial_tag.name) and initial_tag.attrs.get('style'):
+ styles_can_be_in_tag = [style
+ for tag, styles in LiveCartaConfig.LIVECARTA_STYLES_CANT_BE_IN_TAG.items()
+ if re.match(tag, initial_tag.name)
+ for style in styles]
+ styles_cant_be_in_tag = [attr for attr in LIVECARTA_STYLE_ATTRS
+ if attr not in styles_can_be_in_tag]
+ span_style = initial_tag.attrs['style']
+ # here check that this style is exactly the same.
# Not 'align' when we have 'text-align', or 'border' when we have 'border-top'
- styles_to_be_saved_in_span = [((attr + ':') in initial_style) & (
- '-' + attr not in initial_style) for attr in styles_cant_be_in_p]
+ styles_to_be_saved_in_span = [((attr + ':') in span_style) & (
+ '-' + attr not in span_style) for attr in styles_cant_be_in_tag]
if any(styles_to_be_saved_in_span):
- # if we find styles that cannot be in
-> wrap them in span
- tag.name = 'span'
- p_tag = BeautifulSoup(features='lxml').new_tag('p')
- p_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
- has_p_style_attr = re.search(p_attrs_regexp, initial_style)
- span_style = initial_style if not has_p_style_attr else initial_style.replace(
- has_p_style_attr.group(1), '')
- p_tag.attrs['style'] = p_style
- tag.attrs['style'] = span_style
- tag.wrap(p_tag)
- else:
- tag.attrs['style'] = p_style
-
- @staticmethod
- def wrap_span_in_li_to_save_style_attrs(tag):
- """Function designed to save style attrs that cannot be in li -> span"""
- if tag.name == 'li' and tag.attrs.get('style'):
- styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
- attr not in ['text-align', 'list-style-type']]
-
- styles_to_be_saved_in_span = [attr in tag.attrs.get(
- 'style') for attr in styles_cant_be_in_li]
- if any(styles_to_be_saved_in_span):
- tag.name = 'span'
- li_tag = BeautifulSoup(features='lxml').new_tag('li')
- span_style = tag.attrs['style']
- li_style = ''
- for possible_li_attrs_regexp in [re.compile(r'(text-align:(\w+);)'),
- re.compile(r'(list-style-type:(\w+);)')]:
- has_li_style_attrs = re.search(
- possible_li_attrs_regexp, span_style)
- if has_li_style_attrs and has_li_style_attrs.group(1):
- li_style += has_li_style_attrs.group(1)
+ # if we find styles that cannot be in -> wrap them in span
+ tag = BeautifulSoup(features='lxml').new_tag(f'{initial_tag.name}')
+ style = ''
+ possible_attrs_regexp = [re.compile(fr'({style}: *(\w+);)') for style in styles_can_be_in_tag]
+ for possible_attr_regexp in possible_attrs_regexp:
+ has_style_attrs = re.search(
+ possible_attr_regexp, span_style)
+ if has_style_attrs and has_style_attrs.group(1):
+ style += has_style_attrs.group(1)
span_style = span_style.replace(
- has_li_style_attrs.group(1), '')
- li_tag.attrs['style'] = li_style
- tag.attrs['style'] = span_style
- tag.wrap(li_tag)
-
- @staticmethod
- def wrap_span_in_ul_ol_to_save_style_attrs(tag):
- """Function designed to save style attrs that cannot be in ul/ol -> span"""
- if tag.name in ['ul', 'ol'] and tag.attrs.get('style'):
- styles_cant_be_in_ul_ol = [
- attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
-
- styles_to_be_saved_in_span = [attr in tag.attrs.get('style')
- for attr in styles_cant_be_in_ul_ol]
- if any(styles_to_be_saved_in_span):
- tag.name = 'span'
- oul_tag = BeautifulSoup(features='lxml').new_tag(tag.name)
- span_style = tag.attrs['style']
-
- possible_uol_attrs_regexp = re.compile(
- r'(list-style-type:(\w+);)')
- has_uol_style_attrs = re.search(
- possible_uol_attrs_regexp, span_style)
- if has_uol_style_attrs and has_uol_style_attrs.group(1):
- oul_style = has_uol_style_attrs.group(1)
- span_style = span_style.replace(oul_style, '')
- oul_tag.attrs['style'] = oul_style
- tag.attrs['style'] = span_style
- tag.wrap(oul_tag)
-
- @staticmethod
- def wrap_span_in_h_to_save_style_attrs(tag):
- """Function designed to save style attrs that cannot be in h -> span"""
- h_regexp = re.compile('(^h[1-9]$)')
-
- if re.search(h_regexp, tag.name) and tag.attrs.get('style'):
- h_tag = BeautifulSoup(features='lxml').new_tag(tag.name)
- tag.name = 'span'
- tag.wrap(h_tag)
- style = tag.attrs['style']
- h_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
- has_h_style_attr = re.search(h_attrs_regexp, style)
- tag.attrs['style'] = style if not has_h_style_attr else style.replace(
- has_h_style_attr.group(1), '')
+ has_style_attrs.group(1), '')
+ tag.attrs['style'] = style
+ initial_tag.name = 'span'
+ initial_tag.attrs['style'] = span_style
+ initial_tag.wrap(tag)
def convert_initial_tag(self):
self.tag_inline_style = self.change_attrs_with_corresponding_tags(
self.tag_inline_style.name)
- self.wrap_span_in_p_to_save_style_attrs(self.tag_inline_style)
- self.wrap_span_in_li_to_save_style_attrs(self.tag_inline_style)
- self.wrap_span_in_ul_ol_to_save_style_attrs(self.tag_inline_style)
- self.wrap_span_in_h_to_save_style_attrs(self.tag_inline_style)
+ self.wrap_span_in_tag_to_save_style_attrs(self.tag_inline_style)
return self.tag_inline_style
@@ -339,9 +269,7 @@ def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str) ->
# soup with converted styles from css
inline_soup = BeautifulSoup(html_with_css_styles, features='lxml')
- could_have_style_in_livecarta_regexp = re.compile(
- '(^div$)|(^p$)|(^span$)|(^code$)|(^kbd$)|(^var$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|(^h[1-9]$)')
- tags_with_inline_style = inline_soup.find_all(could_have_style_in_livecarta_regexp,
+ tags_with_inline_style = inline_soup.find_all(LiveCartaConfig.could_have_style_in_livecarta_regexp,
attrs={'style': re.compile('.*')})
# go through the tags with inline style + style parsed from css file
diff --git a/src/livecarta_config.py b/src/livecarta_config.py
index 3d5f667..e3e63d4 100644
--- a/src/livecarta_config.py
+++ b/src/livecarta_config.py
@@ -1,3 +1,6 @@
+import re
+
+
class LiveCartaConfig:
"""Class of values that LiveCarta platform using and supports"""
# tag with inline style to be updated with style attribute
@@ -87,6 +90,14 @@ class LiveCartaConfig:
'decimal-leading-zero', 'georgian', 'lower-alpha', 'lower-latin',
'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none']
+ structural_tags_names = [
+ 'div', 'section', 'article', 'main', 'body', 'html', 'aside',
+ 'canvas', 'data', 'figure', 'footer', 'iframe', 'span', 'p'
+ ]
+
+ could_have_style_in_livecarta_regexp = re.compile(
+ '(^div$)|(^p$)|(^span$)|(^code$)|(^kbd$)|(^var$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|(^h[1-9]$)')
+
"""
LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = { (property, value): tag }
@@ -105,3 +116,11 @@ class LiveCartaConfig:
('text-decoration-line', 'line-through'): 's',
('vertical-align', 'super'): 'sup'
}
+
+ LIVECARTA_STYLES_CANT_BE_IN_TAG = {
+ 'p': ['text-align', 'text-indent', 'border-bottom', 'border-top'],
+ 'li': ['text-align', 'list-style-type'],
+ 'ul': ['list-style-type'],
+ 'ol': ['list-style-type'],
+ '(^h[1-9]$)': ['list-style-type']
+ }