forked from LiveCarta/BookConverter
change structure of project
This commit is contained in:
492
src/epub_converter/css_reader.py
Normal file
492
src/epub_converter/css_reader.py
Normal file
@@ -0,0 +1,492 @@
|
||||
import re
|
||||
from typing import List
|
||||
|
||||
import cssutils
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from ebooklib import epub
|
||||
from premailer import transform
|
||||
from itertools import takewhile
|
||||
from logging import CRITICAL
|
||||
|
||||
from src.livecarta_config import LiveCartaConfig
|
||||
from src.util.color_reader import str2hex
|
||||
|
||||
cssutils.log.setLevel(CRITICAL)
|
||||
|
||||
sizes_pr = [-1, 0.5, 0.56, 0.63, 0.69, 0.75, 0.81, 0.88, 0.94, 1.0, 1.06, 1.13, 1.19, 1.25, 1.31, 1.38, 1.44, 1.5, 1.56,
|
||||
1.63, 1.69, 1.75, 1.81, 1.88, 1.94, 2.0, 2.06, 2.13, 2.19, 2.25, 2.31, 2.38, 2.44, 2.5, 2.56, 2.63, 2.69,
|
||||
2.75, 2.81, 2.88, 2.94, 3.0, 4.0, 5.0]
|
||||
|
||||
sizes_px = ['10px', '10px', '11px', '12px', '13px', '14px', '15px', '16px', '17px', '18px', '19px', '20px', '21px',
|
||||
'22px', '23px', '24px', '25px', '26px', '27px', '28px', '29px', '30px', '31px', '32px', '33px', '34px',
|
||||
'35px', '36px', '37px', '38px', '39px', '40px', '41px', '42px', '43px', '44px', '45px', '46px', '47px',
|
||||
'48px', '49px', '50px', '64px', '72px']
|
||||
|
||||
list_types = ['circle', 'disc', 'armenian', 'decimal',
|
||||
'decimal-leading-zero', 'georgian', 'lower-alpha', 'lower-latin',
|
||||
'lower-roman', 'upper-alpha', 'upper-latin', 'upper-roman', 'none']
|
||||
|
||||
|
||||
def convert_font_size(value):
|
||||
if 'pt' in value:
|
||||
if int(value.replace('pt', '')) == LiveCartaConfig.LIVECARTA_DEFAULT_FONT_SIZE:
|
||||
return ''
|
||||
else:
|
||||
return value.replace('pt', 'px')
|
||||
|
||||
if value == '100%':
|
||||
return ''
|
||||
try:
|
||||
if '%' in value:
|
||||
value = float(value.replace('%', ''))
|
||||
value = value / 100.0
|
||||
elif 'em' in value:
|
||||
value = float(value.replace('em', ''))
|
||||
else:
|
||||
return ''
|
||||
|
||||
if value > 5:
|
||||
return ''
|
||||
|
||||
possible_sizes = list(takewhile(lambda x: value > x, sizes_pr))
|
||||
last_possible_size_index = sizes_pr.index(possible_sizes[-1])
|
||||
return sizes_px[last_possible_size_index]
|
||||
|
||||
except ValueError:
|
||||
return ''
|
||||
|
||||
def convert_indents(value):
|
||||
# 30px = 3.2% = 1.25em = 23pt
|
||||
text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)')
|
||||
has_style_attrs = re.search(text_indent_regexp, value)
|
||||
if has_style_attrs:
|
||||
if has_style_attrs.group(1):
|
||||
value = value.replace(has_style_attrs.group(1),
|
||||
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(1))))) * 6)) +
|
||||
'px')
|
||||
|
||||
elif has_style_attrs.group(2):
|
||||
value = value.replace(has_style_attrs.group(2),
|
||||
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(3))))) * 30)) +
|
||||
'px')
|
||||
|
||||
elif has_style_attrs.group(4):
|
||||
value = value.replace(has_style_attrs.group(4),
|
||||
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(5))))))) + 'px')
|
||||
return value
|
||||
"""
|
||||
LIVECARTA_STYLE_ATTRS = { css property: value }
|
||||
|
||||
Style properties that can be used to fit livecarta css style convention.
|
||||
If property has empty list, it means that any value can be converted.
|
||||
If property has not empty list, it means that only certain property-value combinations can be transformed.
|
||||
"""
|
||||
LIVECARTA_STYLE_ATTRS = {
|
||||
'text-indent': [],
|
||||
'font-variant': ['small-caps'],
|
||||
'text-align': [x for x in LiveCartaConfig.ALIGN_STYLES if x != LiveCartaConfig.DEFAULT_ALIGN_STYLE],
|
||||
'align': [],
|
||||
'font': [],
|
||||
'font-family': [x for x in LiveCartaConfig.font_correspondence_table.keys()
|
||||
if x != LiveCartaConfig.DEFAULT_FONT_NAME],
|
||||
'font-size': [],
|
||||
'font-weight': ['bold', '600', '700', '800', '900'], # <strong>
|
||||
'font-style': ['italic'], # <i>
|
||||
'text-decoration': ['underline', 'line-through'], # <u> , <s>
|
||||
'text-decoration-line': ['underline', 'line-through'], # <u> , <s>
|
||||
'vertical-align': ['super'], # <sup>
|
||||
'color': [],
|
||||
'background-color': [],
|
||||
'background': [],
|
||||
'width': [],
|
||||
'border-top-width': [],
|
||||
'border-right-width': [],
|
||||
'border-left-width': [],
|
||||
'border-bottom-width': [],
|
||||
'border': [],
|
||||
'list-style-type': [],
|
||||
'list-style-image': [],
|
||||
'margin-left': []
|
||||
}
|
||||
|
||||
"""
|
||||
LIVECARTA_STYLE_ATTRS_MAPPING = { property: mapping function }
|
||||
|
||||
Warning, if LIVECARTA_STYLE_ATTRS is changed, LIVECARTA_STYLE_ATTRS_MAPPING should be updated
|
||||
to suit livecarta style convention.
|
||||
"""
|
||||
|
||||
|
||||
def get_bg_color(x):
|
||||
color = str2hex(x)
|
||||
color = color if color not in ['#ffffff', '#fff', 'white'] else ''
|
||||
return color
|
||||
|
||||
|
||||
def get_text_color(x):
|
||||
color = str2hex(x)
|
||||
color = color if color not in ['#000000', '#000', 'black'] else ''
|
||||
return color
|
||||
|
||||
|
||||
LIVECARTA_STYLE_ATTRS_MAPPING = {
|
||||
'text-indent': convert_indents,
|
||||
'font-variant': lambda x: x,
|
||||
'text-align': lambda x: x,
|
||||
'font': lambda x: '',
|
||||
'font-family': lambda x: LiveCartaConfig.font_correspondence_table.get(x) or LiveCartaConfig.font_correspondence_table.get(x.capitalize()),
|
||||
'font-size': convert_font_size,
|
||||
'color': get_text_color,
|
||||
'background-color': get_bg_color,
|
||||
'background': get_bg_color,
|
||||
'border': lambda x: x if x != '0' else '',
|
||||
'border-top-width': lambda x: x if x != '0' else '',
|
||||
'border-right-width': lambda x: x if x != '0' else '',
|
||||
'border-left-width': lambda x: x if x != '0' else '',
|
||||
'border-bottom-width': lambda x: x if x != '0' else '',
|
||||
'list-style-type': lambda x: x if x in list_types else 'disc',
|
||||
'list-style-image': lambda x: 'disc',
|
||||
'margin-left': convert_indents
|
||||
}
|
||||
|
||||
"""
|
||||
LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = { (property, value): tag }
|
||||
|
||||
<p style="font-weight:600> foo </p> -> <p><strong>foo</strong></p>
|
||||
"""
|
||||
LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG = {
|
||||
('font-weight', 'bold'): 'strong',
|
||||
('font-weight', '600'): 'strong',
|
||||
('font-weight', '700'): 'strong',
|
||||
('font-weight', '800'): 'strong',
|
||||
('font-weight', '900'): 'strong',
|
||||
('font-style', 'italic'): 'i',
|
||||
('text-decoration', 'underline'): 'u',
|
||||
('text-decoration', 'line-through'): 's',
|
||||
('text-decoration-line', 'underline'): 'u',
|
||||
('text-decoration-line', 'line-through'): 's',
|
||||
('vertical-align', 'super'): 'sup',
|
||||
}
|
||||
|
||||
|
||||
def check_style_to_be_tag(style) -> List[tuple]:
|
||||
""" Some css style properties converts to tags.
|
||||
Search for them and prepare list of properties to be removed from style string"""
|
||||
to_remove = []
|
||||
for k in LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG:
|
||||
if f'{k[0]}:{k[1]}' in style:
|
||||
to_remove.append(k)
|
||||
return to_remove
|
||||
|
||||
def update_css_style_types_to_livecarta_convention(css_rule, style_type):
|
||||
if style_type.name not in LIVECARTA_STYLE_ATTRS:
|
||||
# property not in LIVECARTA_STYLE_ATTRS, remove from css file
|
||||
css_rule.style[style_type.name] = ''
|
||||
return
|
||||
|
||||
cleaned_value = style_type.value.replace('\"', '') # value of style
|
||||
there_are_constraints_on_value = LIVECARTA_STYLE_ATTRS.get(style_type.name)
|
||||
value_not_in_possible_values_list = cleaned_value not in LIVECARTA_STYLE_ATTRS[style_type.name]
|
||||
if there_are_constraints_on_value and value_not_in_possible_values_list:
|
||||
# style_type + value not in LIVECARTA_STYLE_ATTRS, remove from css file
|
||||
css_rule.style[style_type.name] = ''
|
||||
else:
|
||||
if style_type.name in LIVECARTA_STYLE_ATTRS_MAPPING:
|
||||
func = LIVECARTA_STYLE_ATTRS_MAPPING[style_type.name] # function that converts our data
|
||||
css_rule.style[style_type.name] = func(cleaned_value)
|
||||
|
||||
def build_css_content(css_content):
|
||||
sheet = cssutils.parseString(css_content, validate=False)
|
||||
|
||||
for css_rule in sheet:
|
||||
if css_rule.type == css_rule.STYLE_RULE:
|
||||
for style_type in css_rule.style:
|
||||
update_css_style_types_to_livecarta_convention(css_rule, style_type)
|
||||
|
||||
css_text = sheet._getCssText().decode()
|
||||
return css_text
|
||||
|
||||
|
||||
class TagStyleConverter:
|
||||
def __init__(self, tag_with_initial_style, tag_with_ultimate_style):
|
||||
self.tag_with_initial_style = tag_with_initial_style # tag with inline style to be updated with style attribute
|
||||
self.tag_initial_name = tag_with_initial_style.name
|
||||
self.tag_with_ultimate_style = tag_with_ultimate_style # tag with inline style + style parsed from css file
|
||||
self.style = self.preprocess_style()
|
||||
|
||||
@staticmethod
|
||||
def remove_white_if_no_bgcolor(style_, tag):
|
||||
if 'background' in style_:
|
||||
return style_
|
||||
|
||||
# if text color is white, check that we have bg-color
|
||||
if ('color:#ffffff' in style_) or ('color:#fff' in style_) or ('color:white' in style_):
|
||||
# if bg color is inherited, just return style as is
|
||||
for parent_tag in tag.parents:
|
||||
# white bg color not need to be checked as we do not write 'white bg color'
|
||||
tag_with_bg = ['span', 'td', 'tr', 'p']
|
||||
tag_will_be_saved = parent_tag.name in tag_with_bg
|
||||
has_bg = parent_tag.attrs.get('style') and ('background' in parent_tag.attrs.get('style'))
|
||||
if has_bg and tag_will_be_saved:
|
||||
return style_
|
||||
|
||||
children = tag.find_all()
|
||||
for child in children:
|
||||
if child.attrs.get('style') and ('background' in child.attrs.get('style')):
|
||||
tmp_style = child.attrs['style'] + '; color:#fff; '
|
||||
child.attrs['style'] = tmp_style
|
||||
|
||||
# for child with bg color we added white text color, so this tag don't need white color
|
||||
style_ = style_.replace('color:#fff;', '')
|
||||
style_ = style_.replace('color:#ffffff;', '')
|
||||
style_ = style_.replace('color:white;', '')
|
||||
return style_
|
||||
|
||||
@staticmethod
|
||||
def process_indents_in_px(split_style: list) -> str:
|
||||
# clean with convert_indents() style string and make new clean_style
|
||||
clean_style = ''
|
||||
for item in split_style:
|
||||
item = item.split(':')
|
||||
if item[0] in ['text-indent', 'margin-left']:
|
||||
item[1] = convert_indents(item[1])
|
||||
clean_style += item[0] + ': ' + item[1] + '; '
|
||||
|
||||
margin_left_regexp = re.compile(
|
||||
r'(margin-left:( *-*\w+);*)')
|
||||
text_indent_regexp = re.compile(
|
||||
r'(text-indent:( *-*\w+);*)')
|
||||
|
||||
has_margin_left = re.search(margin_left_regexp, clean_style)
|
||||
has_text_indent = re.search(text_indent_regexp, clean_style)
|
||||
#formula_of_indent: indent = abs(margin_left - text_indent)
|
||||
if has_margin_left:
|
||||
num_ml = abs(int("".join(
|
||||
filter(str.isdigit, str(has_margin_left.group(2))))))
|
||||
|
||||
if has_text_indent:
|
||||
num_ti = abs(int("".join(
|
||||
filter(str.isdigit, str(has_text_indent.group(2))))))
|
||||
clean_style = clean_style.replace(has_text_indent.group(1), 'text-indent: ' +
|
||||
str(abs(num_ml - num_ti)) + 'px; ')
|
||||
clean_style = clean_style.replace(has_margin_left.group(1), '')
|
||||
return clean_style
|
||||
|
||||
clean_style = clean_style.replace(has_margin_left.group(1), 'text-indent: ' +
|
||||
str(abs(num_ml)) + 'px; ')
|
||||
return clean_style
|
||||
|
||||
elif has_text_indent:
|
||||
clean_style = clean_style.replace(has_text_indent.group(1), 'text-indent: ' +
|
||||
str(abs(int("".join(
|
||||
filter(str.isdigit, str(has_text_indent.group(2))))))) + 'px; ')
|
||||
return clean_style
|
||||
return clean_style
|
||||
|
||||
def preprocess_style(self):
|
||||
ultimate_style = self.tag_with_ultimate_style.attrs.get('style') + ';'
|
||||
ultimate_style = self.remove_white_if_no_bgcolor(ultimate_style, self.tag_with_ultimate_style)
|
||||
ultimate_style = ultimate_style.replace('background:', 'background-color:')
|
||||
ultimate_style = ultimate_style.replace('list-style-image', 'list-style-type')
|
||||
|
||||
split_ultimate_style = ultimate_style.replace(' ', '').split(';') # make for repetition check and convert to px
|
||||
|
||||
# check for another ; in style string in preprocess_style()
|
||||
while '' in split_ultimate_style:
|
||||
split_ultimate_style.remove('')
|
||||
ultimate_style: str = self.process_indents_in_px(split_ultimate_style)
|
||||
|
||||
if self.tag_with_initial_style.attrs.get('style'):
|
||||
|
||||
initial_style = self.tag_with_initial_style.attrs['style']
|
||||
split_initial_style = initial_style.replace(' ', '').split(';')
|
||||
|
||||
# check for another ; in style string in preprocess_style()
|
||||
while '' in split_initial_style:
|
||||
split_initial_style.remove('')
|
||||
|
||||
# repetition check - if tag had already had inline style, add this to style parsed from css
|
||||
repeat_styles = list(set(split_ultimate_style) & set(split_initial_style))
|
||||
for item in repeat_styles:
|
||||
split_initial_style.remove(item)
|
||||
|
||||
if split_initial_style:
|
||||
# if initial style is not empty - start convert and add to ultimate style
|
||||
print('we enter repetition check', '\n')
|
||||
initial_style: str = self.process_indents_in_px(split_initial_style)
|
||||
ultimate_style += initial_style
|
||||
|
||||
return ultimate_style
|
||||
|
||||
def change_attrs_with_corresponding_tags(self):
|
||||
# adds <b>, <u>, <sup>, etc
|
||||
to_remove = check_style_to_be_tag(self.style)
|
||||
new_tags = []
|
||||
for i, (attr, value) in enumerate(to_remove):
|
||||
s = f'{attr}:{value};'
|
||||
self.style = self.style.replace(s, '')
|
||||
self.style = self.style.strip()
|
||||
if i == 0:
|
||||
self.tag_with_initial_style.name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)]
|
||||
new_tags.append(self.tag_with_initial_style)
|
||||
else:
|
||||
name = LIVECARTA_STYLE_ATTRS_SHOULD_BE_TAG[(attr, value)]
|
||||
new_tag = BeautifulSoup(features='lxml').new_tag(name)
|
||||
new_tags[-1].wrap(new_tag)
|
||||
new_tags.append(new_tag)
|
||||
|
||||
top_tag = self.tag_with_initial_style
|
||||
|
||||
if new_tags:
|
||||
tmp_attrs = top_tag.attrs.copy()
|
||||
top_tag.attrs = {}
|
||||
top_tag2 = BeautifulSoup(features='lxml').new_tag(self.tag_initial_name)
|
||||
top_tag2.attrs = tmp_attrs
|
||||
if self.style:
|
||||
top_tag2.attrs['style'] = self.style
|
||||
new_tags[-1].wrap(top_tag2)
|
||||
else:
|
||||
top_tag.attrs['style'] = self.style
|
||||
|
||||
return top_tag
|
||||
|
||||
@staticmethod
|
||||
def wrap_span_in_p_to_save_style_attrs(tag):
|
||||
styles_cant_be_in_p = [attr for attr in LIVECARTA_STYLE_ATTRS
|
||||
if attr not in ['text-align', 'text-indent']]
|
||||
|
||||
if tag.name == 'p' and tag.attrs.get('style'):
|
||||
styles_to_be_saved = [attr in tag.attrs.get('style') for attr in styles_cant_be_in_p]
|
||||
if any(styles_to_be_saved):
|
||||
tag.name = 'span'
|
||||
p_tag = BeautifulSoup(features='lxml').new_tag('p')
|
||||
span_style = tag.attrs['style']
|
||||
p_style = ''
|
||||
possible_p_attrs_regexp = re.compile(r'(text-align:( *\w+);*)|(text-indent:( *\w+);*)')
|
||||
for i in range(span_style.count(';') + 1):
|
||||
has_p_style_attrs = re.search(possible_p_attrs_regexp, span_style)
|
||||
if has_p_style_attrs:
|
||||
if has_p_style_attrs.group(1):
|
||||
p_style += has_p_style_attrs.group(1)
|
||||
span_style = span_style.replace(has_p_style_attrs.group(1), '')
|
||||
if has_p_style_attrs.group(3):
|
||||
p_style += has_p_style_attrs.group(3)
|
||||
span_style = span_style.replace(has_p_style_attrs.group(3), '')
|
||||
|
||||
p_tag.attrs['style'] = p_style
|
||||
|
||||
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
||||
has_li_style_attr = re.search(li_attrs_regexp, span_style)
|
||||
span_style = span_style if not has_li_style_attr else span_style.replace(has_li_style_attr.group(1), '')
|
||||
tag.attrs['style'] = span_style
|
||||
tag.wrap(p_tag)
|
||||
|
||||
@staticmethod
|
||||
def add_span_to_save_style_attrs_in_li(t):
|
||||
if t.name == 'li' and t.attrs.get('style'):
|
||||
styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if
|
||||
attr not in ['text-align', 'list-style-type']]
|
||||
|
||||
check = [attr in t.attrs.get('style') for attr in styles_cant_be_in_li]
|
||||
if any(check):
|
||||
t.name = 'span'
|
||||
li_tag = BeautifulSoup(features='lxml').new_tag('li')
|
||||
old_style = t.attrs['style']
|
||||
new_style = ''
|
||||
|
||||
for possible_li_attrs_regexp in [re.compile(r'(text-align:(\w+);)'),
|
||||
re.compile(r'(list-style-type:(\w+);)')]:
|
||||
has_li_style_attrs = re.search(possible_li_attrs_regexp, old_style)
|
||||
if has_li_style_attrs and has_li_style_attrs.group(1):
|
||||
new_style += has_li_style_attrs.group(1)
|
||||
old_style = old_style.replace(has_li_style_attrs.group(1), '')
|
||||
|
||||
li_tag.attrs['style'] = new_style
|
||||
t.attrs['style'] = old_style
|
||||
t.wrap(li_tag)
|
||||
|
||||
@staticmethod
|
||||
def add_span_to_save_style_attrs_in_ul_ol(t):
|
||||
if t.name in ['ul', 'ol'] and t.attrs.get('style'):
|
||||
styles_cant_be_in_li = [attr for attr in LIVECARTA_STYLE_ATTRS if attr not in ['list-style-type']]
|
||||
|
||||
check = [attr in t.attrs.get('style') for attr in styles_cant_be_in_li]
|
||||
if any(check):
|
||||
t.name = 'span'
|
||||
li_tag = BeautifulSoup(features='lxml').new_tag('ul')
|
||||
old_style = t.attrs['style']
|
||||
|
||||
possible_li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
||||
has_li_style_attrs = re.search(possible_li_attrs_regexp, old_style)
|
||||
if has_li_style_attrs and has_li_style_attrs.group(1):
|
||||
new_style = has_li_style_attrs.group(1)
|
||||
old_style = old_style.replace(new_style, '')
|
||||
li_tag.attrs['style'] = new_style
|
||||
t.attrs['style'] = old_style
|
||||
t.wrap(li_tag)
|
||||
|
||||
@staticmethod
|
||||
def add_span_to_save_style_attrs(t):
|
||||
no_style_in_livecarta_regexp = re.compile('(^h[1-9]$)')
|
||||
|
||||
if re.search(no_style_in_livecarta_regexp, t.name) and t.attrs.get('style'):
|
||||
new_tag = BeautifulSoup(features='lxml').new_tag(t.name)
|
||||
t.name = 'span'
|
||||
t.wrap(new_tag)
|
||||
style = t.attrs['style']
|
||||
li_attrs_regexp = re.compile(r'(list-style-type:(\w+);)')
|
||||
has_li_style_attr = re.search(li_attrs_regexp, style)
|
||||
t.attrs['style'] = style if not has_li_style_attr else style.replace(has_li_style_attr.group(1), '')
|
||||
|
||||
def convert_initial_tag(self):
|
||||
self.tag_with_initial_style = self.change_attrs_with_corresponding_tags()
|
||||
self.wrap_span_in_p_to_save_style_attrs(self.tag_with_initial_style)
|
||||
self.add_span_to_save_style_attrs_in_li(self.tag_with_initial_style)
|
||||
self.add_span_to_save_style_attrs_in_ul_ol(self.tag_with_initial_style)
|
||||
self.add_span_to_save_style_attrs(self.tag_with_initial_style)
|
||||
return self.tag_with_initial_style
|
||||
|
||||
|
||||
def convert_html_soup_with_css_style(html_soup: BeautifulSoup, css_text: str):
|
||||
css_text = css_text.replace('@namespace epub "http://www.idpf.org/2007/ops";', '')
|
||||
livecarta_tmp_ids = []
|
||||
h_regex = f'(^h[1-9]$)'
|
||||
could_have_style_in_livecarta_regexp = re.compile('(^p$)|(^span$)|(^li$)|(^ul$)|(^ol$)|(^td$)|(^th$)|' + h_regex)
|
||||
tags_with_possible_style_attr = html_soup.find_all(could_have_style_in_livecarta_regexp)
|
||||
for i, x in enumerate(tags_with_possible_style_attr):
|
||||
x.attrs['livecarta_id'] = i
|
||||
livecarta_tmp_ids.append(i)
|
||||
|
||||
# here we add css styles to inline style
|
||||
# sometimes in html_with_css_styles
|
||||
html_with_css_styles: str = transform(str(html_soup), css_text=css_text,
|
||||
remove_classes=False,
|
||||
external_styles=False,
|
||||
allow_network=False,
|
||||
disable_validation=True,
|
||||
)
|
||||
|
||||
inline_soup = BeautifulSoup(html_with_css_styles, features='lxml')
|
||||
|
||||
for i in livecarta_tmp_ids:
|
||||
tag_with_initial_style = html_soup.find(attrs={'livecarta_id': i})
|
||||
tag_with_ultimate_style = inline_soup.find(attrs={'livecarta_id': i})
|
||||
del tag_with_initial_style.attrs['livecarta_id']
|
||||
if tag_with_ultimate_style.attrs.get('style'):
|
||||
style_converter = TagStyleConverter(tag_with_initial_style, tag_with_ultimate_style)
|
||||
style_converter.convert_initial_tag()
|
||||
|
||||
return html_soup
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
file = '../../epub/9781627222174.epub'
|
||||
ebooklib_book = epub.read_epub(file)
|
||||
css_ = ebooklib_book.get_item_with_href('css/epub.css')
|
||||
css_ = css_.get_content().decode()
|
||||
css_cleaned = build_css_content(css_)
|
||||
html_ = ebooklib_book.get_item_with_href('pr01s05.xhtml').get_body_content().decode()
|
||||
html_soup = BeautifulSoup(html_, features='lxml')
|
||||
|
||||
print(convert_html_soup_with_css_style(html_soup, css_cleaned))
|
||||
Reference in New Issue
Block a user