put limits on styles

This commit is contained in:
Kiryl
2021-09-29 19:35:07 +03:00
parent 552741fbb5
commit 7cf90b25f7
2 changed files with 8 additions and 12 deletions

View File

@@ -58,7 +58,7 @@ def convert_font_size(value):
def convert_indents(value): def convert_indents(value):
# 30px = 3.2% = 1.25em = 23pt # 30px = 3.2% = 1.25em = 23pt
text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(\w+px)|(-*\w+pt)') text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)')
has_style_attrs = re.search(text_indent_regexp, value) has_style_attrs = re.search(text_indent_regexp, value)
if has_style_attrs: if has_style_attrs:
if has_style_attrs.group(1): if has_style_attrs.group(1):
@@ -72,10 +72,7 @@ def convert_indents(value):
'px') 'px')
elif has_style_attrs.group(4): elif has_style_attrs.group(4):
value = value.replace(has_style_attrs.group(4), '30px') value = value.replace(has_style_attrs.group(4),
elif has_style_attrs.group(5):
value = value.replace(has_style_attrs.group(5),
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(5))))))) + 'px') str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(5))))))) + 'px')
return value return value
""" """
@@ -252,6 +249,7 @@ class TagStyleConverter:
clean_style = '' clean_style = ''
for item in split_style: for item in split_style:
item = item.split(':') item = item.split(':')
if item[0] in ['text-indent', 'margin-left']:
item[1] = convert_indents(item[1]) item[1] = convert_indents(item[1])
clean_style += item[0] + ': ' + item[1] + '; ' clean_style += item[0] + ': ' + item[1] + '; '
@@ -264,8 +262,6 @@ class TagStyleConverter:
has_text_indent = re.search(text_indent_regexp, clean_style) has_text_indent = re.search(text_indent_regexp, clean_style)
#formula_of_indent: indent = abs(margin_left - text_indent) #formula_of_indent: indent = abs(margin_left - text_indent)
if has_margin_left: if has_margin_left:
num_ml = 0
if has_margin_left.group(1):
num_ml = abs(int("".join( num_ml = abs(int("".join(
filter(str.isdigit, str(has_margin_left.group(2)))))) filter(str.isdigit, str(has_margin_left.group(2))))))
@@ -369,8 +365,8 @@ class TagStyleConverter:
p_tag = BeautifulSoup(features='lxml').new_tag('p') p_tag = BeautifulSoup(features='lxml').new_tag('p')
span_style = tag.attrs['style'] span_style = tag.attrs['style']
p_style = '' p_style = ''
for i in range(span_style.count(';')):
possible_p_attrs_regexp = re.compile(r'(text-align:( *\w+);*)|(text-indent:( *\w+);*)') possible_p_attrs_regexp = re.compile(r'(text-align:( *\w+);*)|(text-indent:( *\w+);*)')
for i in range(span_style.count(';') + 1):
has_p_style_attrs = re.search(possible_p_attrs_regexp, span_style) has_p_style_attrs = re.search(possible_p_attrs_regexp, span_style)
if has_p_style_attrs: if has_p_style_attrs:
if has_p_style_attrs.group(1): if has_p_style_attrs.group(1):

View File

@@ -455,7 +455,7 @@ if __name__ == "__main__":
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0) logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
json_converter = EpubConverter('../epub/9781641050692.epub', json_converter = EpubConverter('../epub/Cook.epub',
logger=logger_object) logger=logger_object)
tmp = json_converter.convert_to_dict() tmp = json_converter.convert_to_dict()