forked from LiveCarta/BookConverter
Refix Index Bug
This commit is contained in:
@@ -56,15 +56,20 @@ def convert_font_size(value):
|
|||||||
except ValueError:
|
except ValueError:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
def convert_text_indent(value):
|
def convert_indents(value):
|
||||||
if value[0] != '-':
|
if '-' not in value[0]:
|
||||||
positive_text_indent_regexp = re.compile(r'(\w+%)')
|
# 30px = 3.2% = 1.25em = 23pt
|
||||||
has_css_style_attrs = re.search(positive_text_indent_regexp, value)
|
positive_text_indent_regexp = re.compile(r'(\w+%)|(\w*.*\w+em)')
|
||||||
if has_css_style_attrs:
|
has_style_attrs = re.search(positive_text_indent_regexp, value)
|
||||||
if has_css_style_attrs.group(1):
|
if has_style_attrs:
|
||||||
value = value.replace(has_css_style_attrs.group(1),
|
if has_style_attrs.group(1):
|
||||||
str(int("".join(filter(str.isdigit, str(has_css_style_attrs.group(1))))) * 6) +
|
value = value.replace(has_style_attrs.group(1),
|
||||||
'px')
|
str(int("".join(filter(str.isdigit, str(has_style_attrs.group(1)))))) +
|
||||||
|
'%')
|
||||||
|
# elif has_style_attrs.group(2):
|
||||||
|
# value = value.replace(has_style_attrs.group(2),
|
||||||
|
# str(int("".join(filter(str.isdigit, str(has_style_attrs.group(2))))) * 5) +
|
||||||
|
# '%')
|
||||||
return value
|
return value
|
||||||
else:
|
else:
|
||||||
return ''
|
return ''
|
||||||
@@ -99,7 +104,8 @@ LIVECARTA_STYLE_ATTRS = {
|
|||||||
'border-bottom-width': [],
|
'border-bottom-width': [],
|
||||||
'border': [],
|
'border': [],
|
||||||
'list-style-type': [],
|
'list-style-type': [],
|
||||||
'list-style-image': []
|
'list-style-image': [],
|
||||||
|
'margin-left': []
|
||||||
}
|
}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -123,7 +129,7 @@ def get_text_color(x):
|
|||||||
|
|
||||||
|
|
||||||
LIVECARTA_STYLE_ATTRS_MAPPING = {
|
LIVECARTA_STYLE_ATTRS_MAPPING = {
|
||||||
'text-indent': convert_text_indent,
|
#'text-indent': convert_indents,
|
||||||
'font-variant': lambda x: x,
|
'font-variant': lambda x: x,
|
||||||
'text-align': lambda x: x,
|
'text-align': lambda x: x,
|
||||||
'font': lambda x: '',
|
'font': lambda x: '',
|
||||||
@@ -139,6 +145,7 @@ LIVECARTA_STYLE_ATTRS_MAPPING = {
|
|||||||
'border-bottom-width': lambda x: x if x != '0' else '',
|
'border-bottom-width': lambda x: x if x != '0' else '',
|
||||||
'list-style-type': lambda x: x if x in list_types else 'disc',
|
'list-style-type': lambda x: x if x in list_types else 'disc',
|
||||||
'list-style-image': lambda x: 'disc',
|
'list-style-image': lambda x: 'disc',
|
||||||
|
'margin-left': lambda x: x
|
||||||
}
|
}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
@@ -235,53 +242,66 @@ class TagStyleConverter:
|
|||||||
style_ = style_.replace('color:white;', '')
|
style_ = style_.replace('color:white;', '')
|
||||||
return style_
|
return style_
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def convert_indentions_to_px(style):
|
||||||
|
margin_left_regexp = re.compile(
|
||||||
|
r'(margin-left:( *-*\w+%*);*)')
|
||||||
|
text_indent_regexp = re.compile(
|
||||||
|
r'(text-indent:( *-*\w+%);*)|(text-indent:( *-*\w+);*)')
|
||||||
|
|
||||||
|
has_margin_left = re.search(margin_left_regexp, style)
|
||||||
|
has_text_indent = re.search(text_indent_regexp, style)
|
||||||
|
# consider that 5% = 30px
|
||||||
|
if has_margin_left and has_text_indent:
|
||||||
|
num_ml = abs(int("".join(
|
||||||
|
filter(str.isdigit, str(has_margin_left.group(2))))) * 6)
|
||||||
|
if has_text_indent.group(1):
|
||||||
|
num_ti = abs(int("".join(
|
||||||
|
filter(str.isdigit, str(has_text_indent.group(2))))) * 6)
|
||||||
|
style = style.replace(has_text_indent.group(1), 'text-indent: ' +
|
||||||
|
str(abs(num_ml - num_ti)) + 'px; ')
|
||||||
|
style = style.replace(has_margin_left.group(1), '')
|
||||||
|
return style
|
||||||
|
|
||||||
|
elif has_text_indent.group(3):
|
||||||
|
num_ti = abs(int("".join(
|
||||||
|
filter(str.isdigit, str(has_text_indent.group(4))))) * 6)
|
||||||
|
style = style.replace(has_text_indent.group(3), 'text-indent: ' +
|
||||||
|
str(abs(num_ml - num_ti)) + 'px; ')
|
||||||
|
style = style.replace(has_margin_left.group(1), '')
|
||||||
|
return style
|
||||||
|
|
||||||
|
elif has_text_indent:
|
||||||
|
if has_text_indent.group(1):
|
||||||
|
style = style.replace(has_text_indent.group(1), 'text-indent: ' +
|
||||||
|
str(abs(int("".join(
|
||||||
|
filter(str.isdigit, str(has_text_indent.group(2))))) * 6)) + 'px; ')
|
||||||
|
return style
|
||||||
|
elif has_text_indent.group(3):
|
||||||
|
style = style.replace(has_text_indent.group(3), 'text-indent: ' +
|
||||||
|
str("".join(
|
||||||
|
filter(str.isdigit, str(has_text_indent.group(4))))) + 'px; ')
|
||||||
|
return style
|
||||||
|
elif has_margin_left:
|
||||||
|
num_ml = abs(int("".join(
|
||||||
|
filter(str.isdigit, str(has_margin_left.group(2))))) * 6)
|
||||||
|
style = style.replace(has_margin_left.group(1), 'text-indent: ' +
|
||||||
|
str(abs(num_ml)) + 'px; ')
|
||||||
|
return style
|
||||||
|
return style
|
||||||
|
|
||||||
def preprocess_style(self):
|
def preprocess_style(self):
|
||||||
style = self.tag_with_style.attrs.get('style') + ';'
|
style = self.tag_with_style.attrs.get('style') + ';'
|
||||||
style = self.remove_white_if_no_bgcolor(style, self.tag_with_style)
|
style = self.remove_white_if_no_bgcolor(style, self.tag_with_style)
|
||||||
style = style.replace('background:', 'background-color:')
|
style = style.replace('background:', 'background-color:')
|
||||||
style = style.replace('list-style-image', 'list-style-type')
|
style = style.replace('list-style-image', 'list-style-type')
|
||||||
|
|
||||||
positive_text_indent_regexp = re.compile(
|
# todo: make hmtl_reader + do a repetition check with inline_style
|
||||||
r'(text-indent:( *\w+%*);*)')
|
style = self.convert_indentions_to_px(style)
|
||||||
has_css_style_attrs = re.search(positive_text_indent_regexp, style)
|
|
||||||
if has_css_style_attrs:
|
|
||||||
if has_css_style_attrs.group(1):
|
|
||||||
style = style.replace(has_css_style_attrs.group(1), 'text-indent: ' +
|
|
||||||
str(int("".join(filter(str.isdigit, str(has_css_style_attrs.group(2))))) * 6) +
|
|
||||||
'px; ')
|
|
||||||
|
|
||||||
negative_text_indent_regexp = re.compile(
|
|
||||||
r'((text-indent:( *-\w+%*);) *(margin-left:( *\w+%*);))|(text-indent:( *-\w+%*);*)')
|
|
||||||
has_css_style_attrs = re.search(negative_text_indent_regexp, style)
|
|
||||||
if has_css_style_attrs:
|
|
||||||
if has_css_style_attrs.group(1):
|
|
||||||
style = style.replace(has_css_style_attrs.group(1), '')
|
|
||||||
if has_css_style_attrs.group(6):
|
|
||||||
style = style.replace(has_css_style_attrs.group(6), '')
|
|
||||||
|
|
||||||
# if tag had already had inline style, add this to style parsed from css
|
# if tag had already had inline style, add this to style parsed from css
|
||||||
if self.tag.attrs.get('style'):
|
if self.tag.attrs.get('style'):
|
||||||
inline_style = self.tag.attrs['style']
|
inline_style = self.convert_indentions_to_px(self.tag.attrs['style'])
|
||||||
has_inline_style_attrs = re.search(positive_text_indent_regexp, inline_style)
|
style += inline_style
|
||||||
if has_inline_style_attrs:
|
|
||||||
if has_inline_style_attrs.group(1):
|
|
||||||
inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' +
|
|
||||||
str(int("".join(filter(str.isdigit,
|
|
||||||
str(has_inline_style_attrs.group(
|
|
||||||
2))))) * 6) +
|
|
||||||
'px; ')
|
|
||||||
has_inline_style_attrs = re.search(negative_text_indent_regexp, inline_style)
|
|
||||||
if has_inline_style_attrs:
|
|
||||||
if has_inline_style_attrs.group(1):
|
|
||||||
inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' +
|
|
||||||
str(int("".join(filter(str.isdigit,
|
|
||||||
str(has_inline_style_attrs.group(
|
|
||||||
5))))) * 6) +
|
|
||||||
'px; ')
|
|
||||||
if has_inline_style_attrs.group(6):
|
|
||||||
inline_style = inline_style.replace(has_inline_style_attrs.group(6), '')
|
|
||||||
if self.tag.attrs['style'] not in style:
|
|
||||||
style += inline_style
|
|
||||||
|
|
||||||
return style
|
return style
|
||||||
|
|
||||||
|
|||||||
@@ -442,7 +442,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
|
||||||
|
|
||||||
json_converter = EpubConverter('../epub/9781614389170.epub',
|
json_converter = EpubConverter('../epub/9781634256063.epub',
|
||||||
logger=logger_object)
|
logger=logger_object)
|
||||||
tmp = json_converter.convert_to_dict()
|
tmp = json_converter.convert_to_dict()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user