Fix css & html Indents

This commit is contained in:
Kiryl
2021-09-24 19:05:41 +03:00
parent 63b6a3bda3
commit f7d921dd27
2 changed files with 34 additions and 12 deletions

View File

@@ -112,7 +112,7 @@ def get_text_color(x):
LIVECARTA_STYLE_ATTRS_MAPPING = {
'text-indent': lambda x: LawCartaConfig.INDENT if x != '0' else '',
#'text-indent': lambda x: LawCartaConfig.INDENT if x != '0' else '', # add - numbers
'font-variant': lambda x: x,
'text-align': lambda x: x,
'font': lambda x: '',
@@ -127,7 +127,7 @@ LIVECARTA_STYLE_ATTRS_MAPPING = {
'border-left-width': lambda x: x if x != '0' else '',
'border-bottom-width': lambda x: x if x != '0' else '',
'list-style-type': lambda x: x if x in list_types else 'disc',
'list-style-image': lambda x: 'disc'
'list-style-image': lambda x: 'disc',
}
"""
@@ -230,21 +230,43 @@ class TagStyleConverter:
style = style.replace('background:', 'background-color:')
style = style.replace('list-style-image', 'list-style-type')
# if tag had already had inline style, add this to style parsed from css
# hot_fix
positive_text_indent_regexp = re.compile(
r'(text-indent:( *\w+%*);)')
has_css_style_attrs = re.search(positive_text_indent_regexp, style)
if has_css_style_attrs:
if has_css_style_attrs.group(1):
style = style.replace(has_css_style_attrs.group(1), 'text-indent: ' +
str(int("".join(filter(str.isdigit, str(has_css_style_attrs.group(2))))) * 1) +
'px')
negative_text_indent_regexp = re.compile(
r'((text-indent:( *-\w+%*);) *(margin-left:( *\w+%*);))|(text-indent:( *-\w+%*);)')
has_css_style_attrs = re.search(negative_text_indent_regexp, style)
if has_css_style_attrs:
if has_css_style_attrs.group(1):
style = style.replace(has_css_style_attrs.group(1), '')
if has_css_style_attrs.group(6):
style = style.replace(has_css_style_attrs.group(6), '')
# if tag had already had inline style, add this to style parsed from css
if self.tag.attrs.get('style') and self.tag.attrs['style'] not in style:
# hot_fix
negative_text_indent_regexp = re.compile(r'((text-indent:( *-\w+%*);) *(margin-left:( *\w+%*);))|(text-indent:( *-\w+%*);)')
inline_style = self.tag.attrs['style']
has_inline_style_attrs = re.search(negative_text_indent_regexp, inline_style)
has_css_style_attrs = re.search(negative_text_indent_regexp, style)
has_inline_style_attrs = re.search(positive_text_indent_regexp, inline_style)
if has_inline_style_attrs:
if has_inline_style_attrs.group(1):
inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' + has_inline_style_attrs.group(5))
style = style.replace(has_css_style_attrs.group(1), '')
if has_inline_style_attrs.group(6):
inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' +
str(int("".join(filter(str.isdigit, str(has_inline_style_attrs.group(2)))))*6) +
'px')
has_inline_style_attrs = re.search(negative_text_indent_regexp, inline_style)
if has_inline_style_attrs:
if has_inline_style_attrs.group(1):
inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' +
str(int("".join(filter(str.isdigit, str(has_inline_style_attrs.group(5)))))*6) +
'px')
if has_inline_style_attrs.group(6) or has_css_style_attrs.group(6):
inline_style = inline_style.replace(has_inline_style_attrs.group(6), '')
style = style.replace(has_css_style_attrs.group(6), '')
style += inline_style
return style

View File

@@ -442,7 +442,7 @@ if __name__ == "__main__":
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
json_converter = EpubConverter('../epub/',
json_converter = EpubConverter('../epub/9781614389729.epub',
logger=logger_object)
tmp = json_converter.convert_to_dict()