From f7d921dd2796ff1fc77c11806a328aff64461393 Mon Sep 17 00:00:00 2001 From: Kiryl Date: Fri, 24 Sep 2021 19:05:41 +0300 Subject: [PATCH] Fix css & html Indents --- src/css_reader.py | 44 ++++++++++++++++++++++++++++++++----------- src/epub_converter.py | 2 +- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/css_reader.py b/src/css_reader.py index 156ebb6..4d2f944 100644 --- a/src/css_reader.py +++ b/src/css_reader.py @@ -112,7 +112,7 @@ def get_text_color(x): LIVECARTA_STYLE_ATTRS_MAPPING = { - 'text-indent': lambda x: LawCartaConfig.INDENT if x != '0' else '', + #'text-indent': lambda x: LawCartaConfig.INDENT if x != '0' else '', # add - numbers 'font-variant': lambda x: x, 'text-align': lambda x: x, 'font': lambda x: '', @@ -127,7 +127,7 @@ LIVECARTA_STYLE_ATTRS_MAPPING = { 'border-left-width': lambda x: x if x != '0' else '', 'border-bottom-width': lambda x: x if x != '0' else '', 'list-style-type': lambda x: x if x in list_types else 'disc', - 'list-style-image': lambda x: 'disc' + 'list-style-image': lambda x: 'disc', } """ @@ -230,21 +230,43 @@ class TagStyleConverter: style = style.replace('background:', 'background-color:') style = style.replace('list-style-image', 'list-style-type') - # if tag had already had inline style, add this to style parsed from css + # hot_fix + positive_text_indent_regexp = re.compile( + r'(text-indent:( *\w+%*);)') + has_css_style_attrs = re.search(positive_text_indent_regexp, style) + if has_css_style_attrs: + if has_css_style_attrs.group(1): + style = style.replace(has_css_style_attrs.group(1), 'text-indent: ' + + str(int("".join(filter(str.isdigit, str(has_css_style_attrs.group(2))))) * 1) + + 'px') + negative_text_indent_regexp = re.compile( + r'((text-indent:( *-\w+%*);) *(margin-left:( *\w+%*);))|(text-indent:( *-\w+%*);)') + has_css_style_attrs = re.search(negative_text_indent_regexp, style) + if has_css_style_attrs: + if has_css_style_attrs.group(1): + style = style.replace(has_css_style_attrs.group(1), '') + if has_css_style_attrs.group(6): + style = style.replace(has_css_style_attrs.group(6), '') + + # if tag had already had inline style, add this to style parsed from css if self.tag.attrs.get('style') and self.tag.attrs['style'] not in style: - # hot_fix - negative_text_indent_regexp = re.compile(r'((text-indent:( *-\w+%*);) *(margin-left:( *\w+%*);))|(text-indent:( *-\w+%*);)') inline_style = self.tag.attrs['style'] - has_inline_style_attrs = re.search(negative_text_indent_regexp, inline_style) - has_css_style_attrs = re.search(negative_text_indent_regexp, style) + has_inline_style_attrs = re.search(positive_text_indent_regexp, inline_style) if has_inline_style_attrs: if has_inline_style_attrs.group(1): - inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' + has_inline_style_attrs.group(5)) - style = style.replace(has_css_style_attrs.group(1), '') - if has_inline_style_attrs.group(6): + inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' + + str(int("".join(filter(str.isdigit, str(has_inline_style_attrs.group(2)))))*6) + + 'px') + has_inline_style_attrs = re.search(negative_text_indent_regexp, inline_style) + if has_inline_style_attrs: + if has_inline_style_attrs.group(1): + inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' + + str(int("".join(filter(str.isdigit, str(has_inline_style_attrs.group(5)))))*6) + + 'px') + if has_inline_style_attrs.group(6) or has_css_style_attrs.group(6): inline_style = inline_style.replace(has_inline_style_attrs.group(6), '') - style = style.replace(has_css_style_attrs.group(6), '') + style += inline_style return style diff --git a/src/epub_converter.py b/src/epub_converter.py index 82510af..5ee513c 100644 --- a/src/epub_converter.py +++ b/src/epub_converter.py @@ -442,7 +442,7 @@ if __name__ == "__main__": logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0) - json_converter = EpubConverter('../epub/', + json_converter = EpubConverter('../epub/9781614389729.epub', logger=logger_object) tmp = json_converter.convert_to_dict()