forked from LiveCarta/BookConverter
epub converter: update css processing
This commit is contained in:
@@ -1,26 +1,23 @@
|
|||||||
import re
|
import re
|
||||||
|
|
||||||
from itertools import takewhile
|
|
||||||
|
|
||||||
import cssutils
|
import cssutils
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from ebooklib import epub
|
from ebooklib import epub
|
||||||
from premailer import transform
|
from premailer import transform
|
||||||
|
from itertools import takewhile
|
||||||
|
from logging import CRITICAL
|
||||||
|
|
||||||
from src.config import LawCartaConfig
|
from src.config import LawCartaConfig
|
||||||
|
|
||||||
|
cssutils.log.setLevel(CRITICAL)
|
||||||
def convert_font_property(property):
|
|
||||||
return ''
|
|
||||||
|
|
||||||
|
|
||||||
sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
|
sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
|
||||||
1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69,
|
1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69,
|
||||||
2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0]
|
2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0]
|
||||||
|
|
||||||
sizes_px = ['10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17px', '18px', '19px', '20px', '21px',
|
sizes_px = ['10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17px', '18px', '19px', '20px', '21px',
|
||||||
'22px',
|
'22px', '23px', '24px', '25px', '26px', '27px', '28px', '29px', '30px', '31px', '32px', '33px', '34px',
|
||||||
'23px', '24px', '25px', '26px', '27px', '28px', '29px', '30px', '31px', '32px', '33px', '34px', '35px',
|
'35px',
|
||||||
'36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px', '48px',
|
'36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px', '48px',
|
||||||
'49px', '50px', '64px', '72px']
|
'49px', '50px', '64px', '72px']
|
||||||
|
|
||||||
@@ -75,28 +72,38 @@ LIVECARTA_STYLE_ATTRS_MAPPING = {
|
|||||||
'text-indent': lambda x: LawCartaConfig.INDENT,
|
'text-indent': lambda x: LawCartaConfig.INDENT,
|
||||||
'font-variant': lambda x: x,
|
'font-variant': lambda x: x,
|
||||||
'text-align': lambda x: x,
|
'text-align': lambda x: x,
|
||||||
'font': convert_font_property,
|
'font': lambda x: '',
|
||||||
'font-family': lambda x: LawCartaConfig.font_correspondence_table.get(x.capitalize()),
|
'font-family': lambda x: LawCartaConfig.font_correspondence_table.get(x.capitalize()),
|
||||||
'font-size': convert_font_size,
|
'font-size': convert_font_size,
|
||||||
}
|
}
|
||||||
|
|
||||||
LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
|
LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
|
||||||
'font-weight': ['bold', '600', '700', '800', '900'], # <strong>
|
('font-weight', 'bold'): 'strong',
|
||||||
'font-style': ['italic'], # <i>
|
('font-weight', '600'): 'strong',
|
||||||
'text-decoration': ['underline', 'line-through'], # <u> , <s>
|
('font-weight', '700'): 'strong',
|
||||||
'text-decoration-line': ['underline', 'line-through'], # <u> , <s>
|
('font-weight', '800'): 'strong',
|
||||||
'vertical-align': ['super'], # <sup>
|
('font-weight', '900'): 'strong',
|
||||||
|
('font-style', 'italic'): 'i',
|
||||||
|
('text-decoration', 'underline'): 'u',
|
||||||
|
('text-decoration', 'line-through'): 's',
|
||||||
|
('text-decoration-line', 'underline'): 'u',
|
||||||
|
('text-decoration-line', 'line-through'): 's',
|
||||||
|
('vertical-align', 'super'): 'sup',
|
||||||
}
|
}
|
||||||
'''
|
|
||||||
FONT -> <span>
|
|
||||||
font-size:14pt; pt->px
|
|
||||||
|
|
||||||
|
|
||||||
|
def check_style_to_be_tag(style):
|
||||||
|
to_remove = []
|
||||||
|
for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG:
|
||||||
|
if f'{k[0]}:{k[1]}' in style:
|
||||||
|
to_remove.append(k)
|
||||||
|
return to_remove
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
LATER:
|
LATER:
|
||||||
vertical-align: sub; <span style="font-size:10px">o</span>
|
|
||||||
text-transform: uppercase;
|
text-transform: uppercase;
|
||||||
text-decoration-color: red;
|
text-decoration-color: red;
|
||||||
|
|
||||||
em, in, pt -> px
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
|
||||||
@@ -116,7 +123,6 @@ def clean_css(css):
|
|||||||
func = LIVECARTA_STYLE_ATTRS_MAPPING[property_.name]
|
func = LIVECARTA_STYLE_ATTRS_MAPPING[property_.name]
|
||||||
tmp = property_.value.replace('\"', '')
|
tmp = property_.value.replace('\"', '')
|
||||||
rule.style[property_.name] = func(tmp)
|
rule.style[property_.name] = func(tmp)
|
||||||
print(property_.name, rule.style[property_.name], )
|
|
||||||
else:
|
else:
|
||||||
rule.style[property_.name] = ''
|
rule.style[property_.name] = ''
|
||||||
else:
|
else:
|
||||||
@@ -124,24 +130,21 @@ def clean_css(css):
|
|||||||
func = LIVECARTA_STYLE_ATTRS_MAPPING[property_.name]
|
func = LIVECARTA_STYLE_ATTRS_MAPPING[property_.name]
|
||||||
tmp = property_.value.replace('\"', '')
|
tmp = property_.value.replace('\"', '')
|
||||||
rule.style[property_.name] = func(tmp)
|
rule.style[property_.name] = func(tmp)
|
||||||
print(property_.name, rule.style[property_.name], )
|
|
||||||
|
|
||||||
css_text = sheet._getCssText().decode()
|
css_text = sheet._getCssText().decode()
|
||||||
return css_text
|
return css_text
|
||||||
|
|
||||||
|
|
||||||
def style_property2livecarta_convention(style_str):
|
|
||||||
return style_str
|
|
||||||
|
|
||||||
|
|
||||||
def add_inline_style_to_html_soup(soup1, css_text):
|
def add_inline_style_to_html_soup(soup1, css_text):
|
||||||
livecarta_p_ids = []
|
livecarta_p_ids = []
|
||||||
h_regex = f'^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$'
|
h_regex = f'(^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$)'
|
||||||
for i, x in enumerate(soup1.find_all(re.compile('(^p$)|(^span$)|(^li$)|(^ul$)'))):
|
for i, x in enumerate(soup1.find_all(re.compile('(^p$)|(^span$)|(^li$)|(^ul$)' + h_regex))):
|
||||||
x.attrs['livecarta_id'] = i
|
x.attrs['livecarta_id'] = i
|
||||||
livecarta_p_ids.append(i)
|
livecarta_p_ids.append(i)
|
||||||
|
|
||||||
html_with_inline_style = transform(str(soup1), css_text=css_text, remove_classes=False, external_styles=False,
|
html_with_inline_style = transform(str(soup1), css_text=css_text,
|
||||||
|
remove_classes=False,
|
||||||
|
external_styles=False,
|
||||||
disable_validation=True)
|
disable_validation=True)
|
||||||
soup2 = BeautifulSoup(html_with_inline_style, features='lxml')
|
soup2 = BeautifulSoup(html_with_inline_style, features='lxml')
|
||||||
|
|
||||||
@@ -150,8 +153,36 @@ def add_inline_style_to_html_soup(soup1, css_text):
|
|||||||
tag_with_style = soup2.find(attrs={'livecarta_id': i})
|
tag_with_style = soup2.find(attrs={'livecarta_id': i})
|
||||||
if tag_with_style.attrs.get('style'):
|
if tag_with_style.attrs.get('style'):
|
||||||
style = tag_with_style.attrs.get('style') + ';'
|
style = tag_with_style.attrs.get('style') + ';'
|
||||||
tag.attrs['style'] = style_property2livecarta_convention(style)
|
to_remove = check_style_to_be_tag(style)
|
||||||
del tag.attrs['livecarta_id']
|
|
||||||
|
for i, (p, v) in enumerate(to_remove):
|
||||||
|
s = f'{p}:{v};'
|
||||||
|
style = style.replace(s, '')
|
||||||
|
if i == 0:
|
||||||
|
tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)]
|
||||||
|
tmp_attrs = tag.attrs.copy()
|
||||||
|
tag.attrs = {}
|
||||||
|
|
||||||
|
new_tag = BeautifulSoup(features='lxml').new_tag('span')
|
||||||
|
new_tag.attrs = tmp_attrs
|
||||||
|
tag.wrap(new_tag)
|
||||||
|
print(new_tag)
|
||||||
|
else:
|
||||||
|
name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)]
|
||||||
|
new_tag = BeautifulSoup(features='lxml').new_tag(name)
|
||||||
|
tag.wrap(new_tag)
|
||||||
|
|
||||||
|
if to_remove:
|
||||||
|
new_tag = BeautifulSoup(features='lxml').new_tag('span')
|
||||||
|
new_tag.attrs['style'] = style
|
||||||
|
tag.wrap(new_tag)
|
||||||
|
print(tag)
|
||||||
|
print(list(tag.parent))
|
||||||
|
print()
|
||||||
|
print('---')
|
||||||
|
else:
|
||||||
|
tag.attrs['style'] = style
|
||||||
|
del tag.attrs['livecarta_id']
|
||||||
|
|
||||||
return soup1
|
return soup1
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user