forked from LiveCarta/BookConverter
epub converter: update css_reader.py
This commit is contained in:
@@ -23,9 +23,9 @@ sizes_px = ['10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17p
|
|||||||
'35px', '36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px',
|
'35px', '36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px',
|
||||||
'48px', '49px', '50px', '64px', '72px']
|
'48px', '49px', '50px', '64px', '72px']
|
||||||
|
|
||||||
list_types = ['circle', 'disc', 'armenian','decimal',
|
list_types = ['circle', 'disc', 'armenian', 'decimal',
|
||||||
'decimal-leading-zero', 'georgian', 'lower-alpha','lower-latin',
|
'decimal-leading-zero', 'georgian', 'lower-alpha', 'lower-latin',
|
||||||
'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none' ]
|
'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none']
|
||||||
|
|
||||||
|
|
||||||
def convert_font_size(value):
|
def convert_font_size(value):
|
||||||
@@ -132,6 +132,8 @@ LIVECARTA_STYLE_ATTRS_MAPPING = {
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = { (property, value): tag }
|
LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = { (property, value): tag }
|
||||||
|
|
||||||
|
<p style="font-weight:600> foo </p> -> <p><strong>foo</strong></p>
|
||||||
"""
|
"""
|
||||||
LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
|
LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
|
||||||
('font-weight', 'bold'): 'strong',
|
('font-weight', 'bold'): 'strong',
|
||||||
@@ -231,22 +233,22 @@ class TagStyleConverter:
|
|||||||
# if tag had already had inline style, add this to style parsed from css
|
# if tag had already had inline style, add this to style parsed from css
|
||||||
if self.tag.attrs.get('style') and self.tag.attrs['style'] not in style:
|
if self.tag.attrs.get('style') and self.tag.attrs['style'] not in style:
|
||||||
style += self.tag.attrs['style']
|
style += self.tag.attrs['style']
|
||||||
print(style)
|
|
||||||
return style
|
return style
|
||||||
|
|
||||||
def change_attrs_with_corresponding_tags(self):
|
def change_attrs_with_corresponding_tags(self):
|
||||||
# adds <b>, <u>, <sup>, etc
|
# adds <b>, <u>, <sup>, etc
|
||||||
to_remove = check_style_to_be_tag(self.style)
|
to_remove = check_style_to_be_tag(self.style)
|
||||||
new_tags = []
|
new_tags = []
|
||||||
for i, (p, v) in enumerate(to_remove):
|
for i, (attr, value) in enumerate(to_remove):
|
||||||
s = f'{p}:{v};'
|
s = f'{attr}:{value};'
|
||||||
self.style = self.style.replace(s, '')
|
self.style = self.style.replace(s, '')
|
||||||
self.style = self.style.strip()
|
self.style = self.style.strip()
|
||||||
if i == 0:
|
if i == 0:
|
||||||
self.tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)]
|
self.tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)]
|
||||||
new_tags.append(self.tag)
|
new_tags.append(self.tag)
|
||||||
else:
|
else:
|
||||||
name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)]
|
name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)]
|
||||||
new_tag = BeautifulSoup(features='lxml').new_tag(name)
|
new_tag = BeautifulSoup(features='lxml').new_tag(name)
|
||||||
new_tags[-1].wrap(new_tag)
|
new_tags[-1].wrap(new_tag)
|
||||||
new_tags.append(new_tag)
|
new_tags.append(new_tag)
|
||||||
@@ -267,34 +269,34 @@ class TagStyleConverter:
|
|||||||
return top_tag
|
return top_tag
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def wrap_p_to_save_style_attrs(t):
|
def wrap_span_in_p_to_save_style_attrs(tag):
|
||||||
styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
|
styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
|
||||||
if attr not in ['text-align', 'text-indent']]
|
if attr not in ['text-align', 'text-indent']]
|
||||||
|
|
||||||
if t.name == 'p' and t.attrs.get('style'):
|
if tag.name == 'p' and tag.attrs.get('style'):
|
||||||
check = [attr in t.attrs.get('style') for attr in styles_cant_be_in_p]
|
styles_to_be_saved = [attr in tag.attrs.get('style') for attr in styles_cant_be_in_p]
|
||||||
if any(check):
|
if any(styles_to_be_saved):
|
||||||
t.name = 'span'
|
tag.name = 'span'
|
||||||
p_tag = BeautifulSoup(features='lxml').new_tag('p')
|
p_tag = BeautifulSoup(features='lxml').new_tag('p')
|
||||||
old_style = t.attrs['style']
|
span_style = tag.attrs['style']
|
||||||
new_style = ''
|
p_style = ''
|
||||||
possible_p_attrs_regexp = re.compile(r'(text-align:(\w+);)|(text-indent:(\w+);)')
|
possible_p_attrs_regexp = re.compile(r'(text-align:(\w+);)|(text-indent:(\w+);)')
|
||||||
has_p_style_attrs = re.search(possible_p_attrs_regexp, old_style)
|
has_p_style_attrs = re.search(possible_p_attrs_regexp, span_style)
|
||||||
if has_p_style_attrs:
|
if has_p_style_attrs:
|
||||||
if has_p_style_attrs.group(1):
|
if has_p_style_attrs.group(1):
|
||||||
new_style += has_p_style_attrs.group(1)
|
p_style += has_p_style_attrs.group(1)
|
||||||
old_style = old_style.replace(has_p_style_attrs.group(1), '')
|
span_style = span_style.replace(has_p_style_attrs.group(1), '')
|
||||||
if has_p_style_attrs.group(3):
|
if has_p_style_attrs.group(3):
|
||||||
new_style += has_p_style_attrs.group(3)
|
p_style += has_p_style_attrs.group(3)
|
||||||
old_style = old_style.replace(has_p_style_attrs.group(3), '')
|
span_style = span_style.replace(has_p_style_attrs.group(3), '')
|
||||||
|
|
||||||
p_tag.attrs['style'] = new_style
|
p_tag.attrs['style'] = p_style
|
||||||
|
|
||||||
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
||||||
has_li_style_attr = re.search(li_attrs_regexp, old_style)
|
has_li_style_attr = re.search(li_attrs_regexp, span_style)
|
||||||
old_style = old_style if not has_li_style_attr else old_style.replace(has_li_style_attr.group(1), '')
|
span_style = span_style if not has_li_style_attr else span_style.replace(has_li_style_attr.group(1), '')
|
||||||
t.attrs['style'] = old_style
|
tag.attrs['style'] = span_style
|
||||||
t.wrap(p_tag)
|
tag.wrap(p_tag)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def add_span_to_save_style_attrs_in_li(t):
|
def add_span_to_save_style_attrs_in_li(t):
|
||||||
@@ -354,29 +356,28 @@ class TagStyleConverter:
|
|||||||
t.attrs['style'] = style if not has_li_style_attr else style.replace(has_li_style_attr.group(1), '')
|
t.attrs['style'] = style if not has_li_style_attr else style.replace(has_li_style_attr.group(1), '')
|
||||||
|
|
||||||
def convert_initial_tag(self):
|
def convert_initial_tag(self):
|
||||||
del self.tag.attrs['livecarta_id']
|
|
||||||
self.tag = self.change_attrs_with_corresponding_tags()
|
self.tag = self.change_attrs_with_corresponding_tags()
|
||||||
self.wrap_p_to_save_style_attrs(self.tag)
|
self.wrap_span_in_p_to_save_style_attrs(self.tag)
|
||||||
self.add_span_to_save_style_attrs_in_li(self.tag)
|
self.add_span_to_save_style_attrs_in_li(self.tag)
|
||||||
self.add_span_to_save_style_attrs_in_ul_ol(self.tag)
|
self.add_span_to_save_style_attrs_in_ul_ol(self.tag)
|
||||||
self.add_span_to_save_style_attrs(self.tag)
|
self.add_span_to_save_style_attrs(self.tag)
|
||||||
return self.tag
|
return self.tag
|
||||||
|
|
||||||
|
|
||||||
def add_inline_style_to_html_soup(soup1, css_text):
|
def add_inline_style_to_html_soup(soup1: BeautifulSoup, css_text: str):
|
||||||
css_text = css_text.replace('@namespace epub "http://www.idpf.org/2007/ops";', '')
|
css_text = css_text.replace('@namespace epub "http://www.idpf.org/2007/ops";', '')
|
||||||
livecarta_tmp_ids = []
|
livecarta_tmp_ids = []
|
||||||
h_regex = f'(^h[1-9]$)'
|
h_regex = f'(^h[1-9]$)'
|
||||||
could_have_style_in_livecarta_regexp = re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex)
|
could_have_style_in_livecarta_regexp = re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex)
|
||||||
elements_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp)
|
tags_with_possible_style_attr = soup1.find_all(could_have_style_in_livecarta_regexp)
|
||||||
for i, x in enumerate(elements_with_possible_style_attr):
|
for i, x in enumerate(tags_with_possible_style_attr):
|
||||||
x.attrs['livecarta_id'] = i
|
x.attrs['livecarta_id'] = i
|
||||||
livecarta_tmp_ids.append(i)
|
livecarta_tmp_ids.append(i)
|
||||||
html_with_inline_style = transform(str(soup1), css_text=css_text,
|
html_with_inline_style: str = transform(str(soup1), css_text=css_text,
|
||||||
remove_classes=False,
|
remove_classes=False,
|
||||||
external_styles=False,
|
external_styles=False,
|
||||||
allow_network=False,
|
allow_network=False,
|
||||||
disable_validation=True)
|
disable_validation=True)
|
||||||
soup2 = BeautifulSoup(html_with_inline_style, features='lxml')
|
soup2 = BeautifulSoup(html_with_inline_style, features='lxml')
|
||||||
|
|
||||||
for i in livecarta_tmp_ids:
|
for i in livecarta_tmp_ids:
|
||||||
@@ -385,8 +386,7 @@ def add_inline_style_to_html_soup(soup1, css_text):
|
|||||||
if tag_with_style.attrs.get('style'):
|
if tag_with_style.attrs.get('style'):
|
||||||
style_converter = TagStyleConverter(tag, tag_with_style)
|
style_converter = TagStyleConverter(tag, tag_with_style)
|
||||||
style_converter.convert_initial_tag()
|
style_converter.convert_initial_tag()
|
||||||
else:
|
del tag.attrs['livecarta_id']
|
||||||
del tag.attrs['livecarta_id']
|
|
||||||
return soup1
|
return soup1
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user