put limits on styles

This commit is contained in:
Kiryl
2021-09-29 19:35:07 +03:00
parent 552741fbb5
commit 7cf90b25f7
2 changed files with 8 additions and 12 deletions

View File

@@ -58,7 +58,7 @@ def convert_font_size(value):
def convert_indents(value):
# 30px = 3.2% = 1.25em = 23pt
text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(\w+px)|(-*\w+pt)')
text_indent_regexp = re.compile(r'(-*\w+%)|((-*\w*).*em)|(-*\w+pt)')
has_style_attrs = re.search(text_indent_regexp, value)
if has_style_attrs:
if has_style_attrs.group(1):
@@ -72,10 +72,7 @@ def convert_indents(value):
'px')
elif has_style_attrs.group(4):
value = value.replace(has_style_attrs.group(4), '30px')
elif has_style_attrs.group(5):
value = value.replace(has_style_attrs.group(5),
value = value.replace(has_style_attrs.group(4),
str(abs(int("".join(filter(str.isdigit, str(has_style_attrs.group(5))))))) + 'px')
return value
"""
@@ -252,7 +249,8 @@ class TagStyleConverter:
clean_style = ''
for item in split_style:
item = item.split(':')
item[1] = convert_indents(item[1])
if item[0] in ['text-indent', 'margin-left']:
item[1] = convert_indents(item[1])
clean_style += item[0] + ': ' + item[1] + '; '
margin_left_regexp = re.compile(
@@ -264,9 +262,7 @@ class TagStyleConverter:
has_text_indent = re.search(text_indent_regexp, clean_style)
#formula_of_indent: indent = abs(margin_left - text_indent)
if has_margin_left:
num_ml = 0
if has_margin_left.group(1):
num_ml = abs(int("".join(
num_ml = abs(int("".join(
filter(str.isdigit, str(has_margin_left.group(2))))))
if has_text_indent:
@@ -369,8 +365,8 @@ class TagStyleConverter:
p_tag = BeautifulSoup(features='lxml').new_tag('p')
span_style = tag.attrs['style']
p_style = ''
for i in range(span_style.count(';')):
possible_p_attrs_regexp = re.compile(r'(text-align:( *\w+);*)|(text-indent:( *\w+);*)')
possible_p_attrs_regexp = re.compile(r'(text-align:( *\w+);*)|(text-indent:( *\w+);*)')
for i in range(span_style.count(';') + 1):
has_p_style_attrs = re.search(possible_p_attrs_regexp, span_style)
if has_p_style_attrs:
if has_p_style_attrs.group(1):

View File

@@ -455,7 +455,7 @@ if __name__ == "__main__":
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
json_converter = EpubConverter('../epub/9781641050692.epub',
json_converter = EpubConverter('../epub/Cook.epub',
logger=logger_object)
tmp = json_converter.convert_to_dict()