From 7cf90b25f7e612494c672381b9055aa8c0136e47 Mon Sep 17 00:00:00 2001 From: Kiryl Date: Wed, 29 Sep 2021 19:35:07 +0300 Subject: [PATCH] put limits on styles --- src/css_reader.py | 18 +++++++----------- src/epub_converter.py | 2 +- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/css_reader.py b/src/css_reader.py index 6cad7eb..4ea7514 100644 --- a/src/css_reader.py +++ b/src/css_reader.py @@ -58,7 +58,7 @@ def convert_font_size(value): def convert_indents(value): # 30px = 3.2% = 1.25em = 23pt - text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(\w+px)|(-*\w+pt)') + text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)') has_style_attrs = re.search(text_indent_regexp, value) if has_style_attrs: if has_style_attrs.group(1): @@ -72,10 +72,7 @@ def convert_indents(value): 'px') elif has_style_attrs.group(4): - value = value.replace(has_style_attrs.group(4), '30px') - - elif has_style_attrs.group(5): - value = value.replace(has_style_attrs.group(5), + value = value.replace(has_style_attrs.group(4), str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(5))))))) + 'px') return value """ @@ -252,7 +249,8 @@ class TagStyleConverter: clean_style = '' for item in split_style: item = item.split(':') - item[1] = convert_indents(item[1]) + if item[0] in ['text-indent', 'margin-left']: + item[1] = convert_indents(item[1]) clean_style += item[0] + ': ' + item[1] + '; ' margin_left_regexp = re.compile( @@ -264,9 +262,7 @@ class TagStyleConverter: has_text_indent = re.search(text_indent_regexp, clean_style) #formula_of_indent: indent = abs(margin_left - text_indent) if has_margin_left: - num_ml = 0 - if has_margin_left.group(1): - num_ml = abs(int("".join( + num_ml = abs(int("".join( filter(str.isdigit, str(has_margin_left.group(2)))))) if has_text_indent: @@ -369,8 +365,8 @@ class TagStyleConverter: p_tag = BeautifulSoup(features='lxml').new_tag('p') span_style = tag.attrs['style'] p_style = '' - for i in range(span_style.count(';')): - possible_p_attrs_regexp = re.compile(r'(text-align:( *\w+);*)|(text-indent:( *\w+);*)') + possible_p_attrs_regexp = re.compile(r'(text-align:( *\w+);*)|(text-indent:( *\w+);*)') + for i in range(span_style.count(';') + 1): has_p_style_attrs = re.search(possible_p_attrs_regexp, span_style) if has_p_style_attrs: if has_p_style_attrs.group(1): diff --git a/src/epub_converter.py b/src/epub_converter.py index 4ac4ae1..baafcf0 100644 --- a/src/epub_converter.py +++ b/src/epub_converter.py @@ -455,7 +455,7 @@ if __name__ == "__main__": logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0) - json_converter = EpubConverter('../epub/9781641050692.epub', + json_converter = EpubConverter('../epub/Cook.epub', logger=logger_object) tmp = json_converter.convert_to_dict()