forked from LiveCarta/BookConverter
put limits on styles
This commit is contained in:
@@ -58,7 +58,7 @@ def convert_font_size(value):
|
|||||||
|
|
||||||
def convert_indents(value):
|
def convert_indents(value):
|
||||||
# 30px = 3.2% = 1.25em = 23pt
|
# 30px = 3.2% = 1.25em = 23pt
|
||||||
text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(\w+px)|(-*\w+pt)')
|
text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)')
|
||||||
has_style_attrs = re.search(text_indent_regexp, value)
|
has_style_attrs = re.search(text_indent_regexp, value)
|
||||||
if has_style_attrs:
|
if has_style_attrs:
|
||||||
if has_style_attrs.group(1):
|
if has_style_attrs.group(1):
|
||||||
@@ -72,10 +72,7 @@ def convert_indents(value):
|
|||||||
'px')
|
'px')
|
||||||
|
|
||||||
elif has_style_attrs.group(4):
|
elif has_style_attrs.group(4):
|
||||||
value = value.replace(has_style_attrs.group(4), '30px')
|
value = value.replace(has_style_attrs.group(4),
|
||||||
|
|
||||||
elif has_style_attrs.group(5):
|
|
||||||
value = value.replace(has_style_attrs.group(5),
|
|
||||||
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(5))))))) + 'px')
|
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(5))))))) + 'px')
|
||||||
return value
|
return value
|
||||||
"""
|
"""
|
||||||
@@ -252,7 +249,8 @@ class TagStyleConverter:
|
|||||||
clean_style = ''
|
clean_style = ''
|
||||||
for item in split_style:
|
for item in split_style:
|
||||||
item = item.split(':')
|
item = item.split(':')
|
||||||
item[1] = convert_indents(item[1])
|
if item[0] in ['text-indent', 'margin-left']:
|
||||||
|
item[1] = convert_indents(item[1])
|
||||||
clean_style += item[0] + ': ' + item[1] + '; '
|
clean_style += item[0] + ': ' + item[1] + '; '
|
||||||
|
|
||||||
margin_left_regexp = re.compile(
|
margin_left_regexp = re.compile(
|
||||||
@@ -264,9 +262,7 @@ class TagStyleConverter:
|
|||||||
has_text_indent = re.search(text_indent_regexp, clean_style)
|
has_text_indent = re.search(text_indent_regexp, clean_style)
|
||||||
#formula_of_indent: indent = abs(margin_left - text_indent)
|
#formula_of_indent: indent = abs(margin_left - text_indent)
|
||||||
if has_margin_left:
|
if has_margin_left:
|
||||||
num_ml = 0
|
num_ml = abs(int("".join(
|
||||||
if has_margin_left.group(1):
|
|
||||||
num_ml = abs(int("".join(
|
|
||||||
filter(str.isdigit, str(has_margin_left.group(2))))))
|
filter(str.isdigit, str(has_margin_left.group(2))))))
|
||||||
|
|
||||||
if has_text_indent:
|
if has_text_indent:
|
||||||
@@ -369,8 +365,8 @@ class TagStyleConverter:
|
|||||||
p_tag = BeautifulSoup(features='lxml').new_tag('p')
|
p_tag = BeautifulSoup(features='lxml').new_tag('p')
|
||||||
span_style = tag.attrs['style']
|
span_style = tag.attrs['style']
|
||||||
p_style = ''
|
p_style = ''
|
||||||
for i in range(span_style.count(';')):
|
possible_p_attrs_regexp = re.compile(r'(text-align:( *\w+);*)|(text-indent:( *\w+);*)')
|
||||||
possible_p_attrs_regexp = re.compile(r'(text-align:( *\w+);*)|(text-indent:( *\w+);*)')
|
for i in range(span_style.count(';') + 1):
|
||||||
has_p_style_attrs = re.search(possible_p_attrs_regexp, span_style)
|
has_p_style_attrs = re.search(possible_p_attrs_regexp, span_style)
|
||||||
if has_p_style_attrs:
|
if has_p_style_attrs:
|
||||||
if has_p_style_attrs.group(1):
|
if has_p_style_attrs.group(1):
|
||||||
|
|||||||
@@ -455,7 +455,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
||||||
|
|
||||||
json_converter = EpubConverter('../epub/9781641050692.epub',
|
json_converter = EpubConverter('../epub/Cook.epub',
|
||||||
logger=logger_object)
|
logger=logger_object)
|
||||||
tmp = json_converter.convert_to_dict()
|
tmp = json_converter.convert_to_dict()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user