From 3c579210ff303ea707b57858420b67c8dfc1a76e Mon Sep 17 00:00:00 2001 From: Kiryl Date: Mon, 27 Sep 2021 15:26:56 +0300 Subject: [PATCH] Fix bugs --- src/css_reader.py | 47 ++++++++++++++++++++++++++++--------------- src/epub_converter.py | 2 +- 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/css_reader.py b/src/css_reader.py index 4d2f944..68871b3 100644 --- a/src/css_reader.py +++ b/src/css_reader.py @@ -56,7 +56,18 @@ def convert_font_size(value): except ValueError: return '' - +def convert_text_indent(value): + if value[0] != '-': + positive_text_indent_regexp = re.compile(r'(\w+%)') + has_css_style_attrs = re.search(positive_text_indent_regexp, value) + if has_css_style_attrs: + if has_css_style_attrs.group(1): + value = value.replace(has_css_style_attrs.group(1), + str(int("".join(filter(str.isdigit, str(has_css_style_attrs.group(1))))) * 6) + + 'px') + return value + else: + return '' """ LIVECARTA_STYLE_ATTRS = { css property: value } @@ -112,7 +123,7 @@ def get_text_color(x): LIVECARTA_STYLE_ATTRS_MAPPING = { - #'text-indent': lambda x: LawCartaConfig.INDENT if x != '0' else '', # add - numbers + 'text-indent': convert_text_indent, 'font-variant': lambda x: x, 'text-align': lambda x: x, 'font': lambda x: '', @@ -230,18 +241,17 @@ class TagStyleConverter: style = style.replace('background:', 'background-color:') style = style.replace('list-style-image', 'list-style-type') - # hot_fix positive_text_indent_regexp = re.compile( - r'(text-indent:( *\w+%*);)') + r'(text-indent:( *\w+%*);*)') has_css_style_attrs = re.search(positive_text_indent_regexp, style) if has_css_style_attrs: if has_css_style_attrs.group(1): style = style.replace(has_css_style_attrs.group(1), 'text-indent: ' + - str(int("".join(filter(str.isdigit, str(has_css_style_attrs.group(2))))) * 1) + - 'px') + str(int("".join(filter(str.isdigit, str(has_css_style_attrs.group(2))))) * 6) + + 'px; ') negative_text_indent_regexp = re.compile( - r'((text-indent:( *-\w+%*);) *(margin-left:( *\w+%*);))|(text-indent:( *-\w+%*);)') + r'((text-indent:( *-\w+%*);) *(margin-left:( *\w+%*);))|(text-indent:( *-\w+%*);*)') has_css_style_attrs = re.search(negative_text_indent_regexp, style) if has_css_style_attrs: if has_css_style_attrs.group(1): @@ -250,24 +260,28 @@ class TagStyleConverter: style = style.replace(has_css_style_attrs.group(6), '') # if tag had already had inline style, add this to style parsed from css - if self.tag.attrs.get('style') and self.tag.attrs['style'] not in style: + if self.tag.attrs.get('style'): inline_style = self.tag.attrs['style'] has_inline_style_attrs = re.search(positive_text_indent_regexp, inline_style) if has_inline_style_attrs: if has_inline_style_attrs.group(1): inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' + - str(int("".join(filter(str.isdigit, str(has_inline_style_attrs.group(2)))))*6) + - 'px') + str(int("".join(filter(str.isdigit, + str(has_inline_style_attrs.group( + 2))))) * 6) + + 'px; ') has_inline_style_attrs = re.search(negative_text_indent_regexp, inline_style) if has_inline_style_attrs: if has_inline_style_attrs.group(1): inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' + - str(int("".join(filter(str.isdigit, str(has_inline_style_attrs.group(5)))))*6) + - 'px') - if has_inline_style_attrs.group(6) or has_css_style_attrs.group(6): + str(int("".join(filter(str.isdigit, + str(has_inline_style_attrs.group( + 5))))) * 6) + + 'px; ') + if has_inline_style_attrs.group(6): inline_style = inline_style.replace(has_inline_style_attrs.group(6), '') - - style += inline_style + if self.tag.attrs['style'] not in style: + style += inline_style return style @@ -412,7 +426,8 @@ def add_inline_style_to_html_soup(soup1: BeautifulSoup, css_text: str): remove_classes=False, external_styles=False, allow_network=False, - disable_validation=True) + disable_validation=True, + ) soup2 = BeautifulSoup(html_with_inline_style, features='lxml') for i in livecarta_tmp_ids: diff --git a/src/epub_converter.py b/src/epub_converter.py index 5ee513c..798d81a 100644 --- a/src/epub_converter.py +++ b/src/epub_converter.py @@ -442,7 +442,7 @@ if __name__ == "__main__": logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0) - json_converter = EpubConverter('../epub/9781614389729.epub', + json_converter = EpubConverter('../epub/9781614389170.epub', logger=logger_object) tmp = json_converter.convert_to_dict()