diff --git a/src/util/css_reader.py b/src/util/css_reader.py index 80a60b9..d0b897d 100644 --- a/src/util/css_reader.py +++ b/src/util/css_reader.py @@ -1,26 +1,23 @@ import re - -from itertools import takewhile - import cssutils + from bs4 import BeautifulSoup from ebooklib import epub from premailer import transform +from itertools import takewhile +from logging import CRITICAL from src.config import LawCartaConfig - -def convert_font_property(property): - return '' - +cssutils.log.setLevel(CRITICAL) sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56, 1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69, 2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0] sizes_px = ['10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17px', '18px', '19px', '20px', '21px', - '22px', - '23px', '24px', '25px', '26px', '27px', '28px', '29px', '30px', '31px', '32px', '33px', '34px', '35px', + '22px', '23px', '24px', '25px', '26px', '27px', '28px', '29px', '30px', '31px', '32px', '33px', '34px', + '35px', '36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px', '48px', '49px', '50px', '64px', '72px'] @@ -75,28 +72,38 @@ LIVECARTA_STYLE_ATTRS_MAPPING = { 'text-indent': lambda x: LawCartaConfig.INDENT, 'font-variant': lambda x: x, 'text-align': lambda x: x, - 'font': convert_font_property, + 'font': lambda x: '', 'font-family': lambda x: LawCartaConfig.font_correspondence_table.get(x.capitalize()), 'font-size': convert_font_size, } LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = { - 'font-weight': ['bold', '600', '700', '800', '900'], # - 'font-style': ['italic'], # - 'text-decoration': ['underline', 'line-through'], # , - 'text-decoration-line': ['underline', 'line-through'], # , - 'vertical-align': ['super'], # + ('font-weight', 'bold'): 'strong', + ('font-weight', '600'): 'strong', + ('font-weight', '700'): 'strong', + ('font-weight', '800'): 'strong', + ('font-weight', '900'): 'strong', + ('font-style', 'italic'): 'i', + ('text-decoration', 'underline'): 'u', + ('text-decoration', 'line-through'): 's', + ('text-decoration-line', 'underline'): 'u', + ('text-decoration-line', 'line-through'): 's', + ('vertical-align', 'super'): 'sup', } -''' -FONT -> -font-size:14pt; pt->px + +def check_style_to_be_tag(style): + to_remove = [] + for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG: + if f'{k[0]}:{k[1]}' in style: + to_remove.append(k) + return to_remove + + +''' LATER: -vertical-align: sub; o text-transform: uppercase; text-decoration-color: red; - -em, in, pt -> px ''' @@ -116,7 +123,6 @@ def clean_css(css): func = LIVECARTA_STYLE_ATTRS_MAPPING[property_.name] tmp = property_.value.replace('\"', '') rule.style[property_.name] = func(tmp) - print(property_.name, rule.style[property_.name], ) else: rule.style[property_.name] = '' else: @@ -124,24 +130,21 @@ def clean_css(css): func = LIVECARTA_STYLE_ATTRS_MAPPING[property_.name] tmp = property_.value.replace('\"', '') rule.style[property_.name] = func(tmp) - print(property_.name, rule.style[property_.name], ) css_text = sheet._getCssText().decode() return css_text -def style_property2livecarta_convention(style_str): - return style_str - - def add_inline_style_to_html_soup(soup1, css_text): livecarta_p_ids = [] - h_regex = f'^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$' - for i, x in enumerate(soup1.find_all(re.compile('(^p$)|(^span$)|(^li$)|(^ul$)'))): + h_regex = f'(^h[{LawCartaConfig.SUPPORTED_LEVELS + 1}-9]$)' + for i, x in enumerate(soup1.find_all(re.compile('(^p$)|(^span$)|(^li$)|(^ul$)' + h_regex))): x.attrs['livecarta_id'] = i livecarta_p_ids.append(i) - html_with_inline_style = transform(str(soup1), css_text=css_text, remove_classes=False, external_styles=False, + html_with_inline_style = transform(str(soup1), css_text=css_text, + remove_classes=False, + external_styles=False, disable_validation=True) soup2 = BeautifulSoup(html_with_inline_style, features='lxml') @@ -150,8 +153,36 @@ def add_inline_style_to_html_soup(soup1, css_text): tag_with_style = soup2.find(attrs={'livecarta_id': i}) if tag_with_style.attrs.get('style'): style = tag_with_style.attrs.get('style') + ';' - tag.attrs['style'] = style_property2livecarta_convention(style) - del tag.attrs['livecarta_id'] + to_remove = check_style_to_be_tag(style) + + for i, (p, v) in enumerate(to_remove): + s = f'{p}:{v};' + style = style.replace(s, '') + if i == 0: + tag.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] + tmp_attrs = tag.attrs.copy() + tag.attrs = {} + + new_tag = BeautifulSoup(features='lxml').new_tag('span') + new_tag.attrs = tmp_attrs + tag.wrap(new_tag) + print(new_tag) + else: + name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(p, v)] + new_tag = BeautifulSoup(features='lxml').new_tag(name) + tag.wrap(new_tag) + + if to_remove: + new_tag = BeautifulSoup(features='lxml').new_tag('span') + new_tag.attrs['style'] = style + tag.wrap(new_tag) + print(tag) + print(list(tag.parent)) + print() + print('---') + else: + tag.attrs['style'] = style + del tag.attrs['livecarta_id'] return soup1