epub converter: update css processing

This commit is contained in:
shirshasa
2021-04-23 09:35:35 +03:00
parent e0e64a0c38
commit bf7989e716

View File

@@ -1,26 +1,23 @@
import re
from itertools import takewhile
import cssutils
from bs4 import BeautifulSoup
from ebooklib import epub
from premailer import transform
from itertools import takewhile
from logging import CRITICAL
from src.config import LawCartaConfig
def convert_font_property(property):
return ''
cssutils.log.setLevel(CRITICAL)
sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69,
2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0]
sizes_px = ['10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17px', '18px', '19px', '20px', '21px',
'22px',
'23px', '24px', '25px', '26px', '27px', '28px', '29px', '30px', '31px', '32px', '33px', '34px', '35px',
'22px', '23px', '24px', '25px', '26px', '27px', '28px', '29px', '30px', '31px', '32px', '33px', '34px',
'35px',
'36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px', '48px',
'49px', '50px', '64px', '72px']
@@ -75,28 +72,38 @@ LIVECARTA_STYLE_ATTRS_MAPPING = {
'text-indent': lambda x: LawCartaConfig.INDENT,
'font-variant': lambda x: x,
'text-align': lambda x: x,
'font': convert_font_property,
'font': lambda x: '',
'font-family': lambda x: LawCartaConfig.font_correspondence_table.get(x.capitalize()),
'font-size': convert_font_size,
}
LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
'font-weight': ['bold', '600', '700', '800', '900'], # <strong>
'font-style': ['italic'], # <i>
'text-decoration': ['underline', 'line-through'], # <u> , <s>
'text-decoration-line': ['underline', 'line-through'], # <u> , <s>
'vertical-align': ['super'], # <sup>
('font-weight', 'bold'): 'strong',
('font-weight', '600'): 'strong',
('font-weight', '700'): 'strong',
('font-weight', '800'): 'strong',
('font-weight', '900'): 'strong',
('font-style', 'italic'): 'i',
('text-decoration', 'underline'): 'u',
('text-decoration', 'line-through'): 's',
('text-decoration-line', 'underline'): 'u',
('text-decoration-line', 'line-through'): 's',
('vertical-align', 'super'): 'sup',
}
'''
FONT -> <span>
font-size:14pt; pt->px
def check_style_to_be_tag(style):
to_remove = []
for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG:
if f'{k[0]}:{k[1]}' in style:
to_remove.append(k)
return to_remove
'''
LATER:
vertical-align: sub; <span style="font-size:10px">o</span>
text-transform: uppercase;
text-decoration-color: red;
em, in, pt -> px
'''
@@ -116,7 +123,6 @@ def clean_css(css):
func = LIVECARTA_STYLE_ATTRS_MAPPING[property_.name]
tmp = property_.value.replace('\"', '')
rule.style[property_.name] = func(tmp)
print(property_.name, rule.style[property_.name], )
else:
rule.style[property_.name] = ''
else:
@@ -124,24 +130,21 @@ def clean_css(css):
func = LIVECARTA_STYLE_ATTRS_MAPPING[property_.name]
tmp = property_.value.replace('\"', '')
rule.style[property_.name] = func(tmp)
print(property_.name, rule.style[property_.name], )
css_text = sheet._getCssText().decode()
return css_text
def style_property2livecarta_convention(style_str):
return style_str
def add_inline_style_to_html_soup(soup1, css_text):
livecarta_p_ids = []
h_regex = f'^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
for i, x in enumerate(soup1.find_all(re.compile('(^p$)|(^span$)|(^li$)|(^ul$)'))):
h_regex = f'(^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$)'
for i, x in enumerate(soup1.find_all(re.compile('(^p$)|(^span$)|(^li$)|(^ul$)' + h_regex))):
x.attrs['livecarta_id'] = i
livecarta_p_ids.append(i)
html_with_inline_style = transform(str(soup1), css_text=css_text, remove_classes=False, external_styles=False,
html_with_inline_style = transform(str(soup1), css_text=css_text,
remove_classes=False,
external_styles=False,
disable_validation=True)
soup2 = BeautifulSoup(html_with_inline_style, features='lxml')
@@ -150,8 +153,36 @@ def add_inline_style_to_html_soup(soup1, css_text):
tag_with_style = soup2.find(attrs={'livecarta_id': i})
if tag_with_style.attrs.get('style'):
style = tag_with_style.attrs.get('style') + ';'
tag.attrs['style'] = style_property2livecarta_convention(style)
del tag.attrs['livecarta_id']
to_remove = check_style_to_be_tag(style)
for i, (p, v) in enumerate(to_remove):
s = f'{p}:{v};'
style = style.replace(s, '')
if i == 0:
tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)]
tmp_attrs = tag.attrs.copy()
tag.attrs = {}
new_tag = BeautifulSoup(features='lxml').new_tag('span')
new_tag.attrs = tmp_attrs
tag.wrap(new_tag)
print(new_tag)
else:
name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)]
new_tag = BeautifulSoup(features='lxml').new_tag(name)
tag.wrap(new_tag)
if to_remove:
new_tag = BeautifulSoup(features='lxml').new_tag('span')
new_tag.attrs['style'] = style
tag.wrap(new_tag)
print(tag)
print(list(tag.parent))
print()
print('---')
else:
tag.attrs['style'] = style
del tag.attrs['livecarta_id']
return soup1