This commit is contained in:
Kiryl
2021-09-27 15:26:56 +03:00
parent f7d921dd27
commit 3c579210ff
2 changed files with 32 additions and 17 deletions

View File

@@ -56,7 +56,18 @@ def convert_font_size(value):
except ValueError: except ValueError:
return '' return ''
def convert_text_indent(value):
if value[0] != '-':
positive_text_indent_regexp = re.compile(r'(\w+%)')
has_css_style_attrs = re.search(positive_text_indent_regexp, value)
if has_css_style_attrs:
if has_css_style_attrs.group(1):
value = value.replace(has_css_style_attrs.group(1),
str(int("".join(filter(str.isdigit, str(has_css_style_attrs.group(1))))) * 6) +
'px')
return value
else:
return ''
""" """
LIVECARTA_STYLE_ATTRS = { css property: value } LIVECARTA_STYLE_ATTRS = { css property: value }
@@ -112,7 +123,7 @@ def get_text_color(x):
LIVECARTA_STYLE_ATTRS_MAPPING = { LIVECARTA_STYLE_ATTRS_MAPPING = {
#'text-indent': lambda x: LawCartaConfig.INDENT if x != '0' else '', # add - numbers 'text-indent': convert_text_indent,
'font-variant': lambda x: x, 'font-variant': lambda x: x,
'text-align': lambda x: x, 'text-align': lambda x: x,
'font': lambda x: '', 'font': lambda x: '',
@@ -230,18 +241,17 @@ class TagStyleConverter:
style = style.replace('background:', 'background-color:') style = style.replace('background:', 'background-color:')
style = style.replace('list-style-image', 'list-style-type') style = style.replace('list-style-image', 'list-style-type')
# hot_fix
positive_text_indent_regexp = re.compile( positive_text_indent_regexp = re.compile(
r'(text-indent:( *\w+%*);)') r'(text-indent:( *\w+%*);*)')
has_css_style_attrs = re.search(positive_text_indent_regexp, style) has_css_style_attrs = re.search(positive_text_indent_regexp, style)
if has_css_style_attrs: if has_css_style_attrs:
if has_css_style_attrs.group(1): if has_css_style_attrs.group(1):
style = style.replace(has_css_style_attrs.group(1), 'text-indent: ' + style = style.replace(has_css_style_attrs.group(1), 'text-indent: ' +
str(int("".join(filter(str.isdigit, str(has_css_style_attrs.group(2))))) * 1) + str(int("".join(filter(str.isdigit, str(has_css_style_attrs.group(2))))) * 6) +
'px') 'px; ')
negative_text_indent_regexp = re.compile( negative_text_indent_regexp = re.compile(
r'((text-indent:( *-\w+%*);) *(margin-left:( *\w+%*);))|(text-indent:( *-\w+%*);)') r'((text-indent:( *-\w+%*);) *(margin-left:( *\w+%*);))|(text-indent:( *-\w+%*);*)')
has_css_style_attrs = re.search(negative_text_indent_regexp, style) has_css_style_attrs = re.search(negative_text_indent_regexp, style)
if has_css_style_attrs: if has_css_style_attrs:
if has_css_style_attrs.group(1): if has_css_style_attrs.group(1):
@@ -250,23 +260,27 @@ class TagStyleConverter:
style = style.replace(has_css_style_attrs.group(6), '') style = style.replace(has_css_style_attrs.group(6), '')
# if tag had already had inline style, add this to style parsed from css # if tag had already had inline style, add this to style parsed from css
if self.tag.attrs.get('style') and self.tag.attrs['style'] not in style: if self.tag.attrs.get('style'):
inline_style = self.tag.attrs['style'] inline_style = self.tag.attrs['style']
has_inline_style_attrs = re.search(positive_text_indent_regexp, inline_style) has_inline_style_attrs = re.search(positive_text_indent_regexp, inline_style)
if has_inline_style_attrs: if has_inline_style_attrs:
if has_inline_style_attrs.group(1): if has_inline_style_attrs.group(1):
inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' + inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' +
str(int("".join(filter(str.isdigit, str(has_inline_style_attrs.group(2)))))*6) + str(int("".join(filter(str.isdigit,
'px') str(has_inline_style_attrs.group(
2))))) * 6) +
'px; ')
has_inline_style_attrs = re.search(negative_text_indent_regexp, inline_style) has_inline_style_attrs = re.search(negative_text_indent_regexp, inline_style)
if has_inline_style_attrs: if has_inline_style_attrs:
if has_inline_style_attrs.group(1): if has_inline_style_attrs.group(1):
inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' + inline_style = inline_style.replace(has_inline_style_attrs.group(1), 'text-indent: ' +
str(int("".join(filter(str.isdigit, str(has_inline_style_attrs.group(5)))))*6) + str(int("".join(filter(str.isdigit,
'px') str(has_inline_style_attrs.group(
if has_inline_style_attrs.group(6) or has_css_style_attrs.group(6): 5))))) * 6) +
'px; ')
if has_inline_style_attrs.group(6):
inline_style = inline_style.replace(has_inline_style_attrs.group(6), '') inline_style = inline_style.replace(has_inline_style_attrs.group(6), '')
if self.tag.attrs['style'] not in style:
style += inline_style style += inline_style
return style return style
@@ -412,7 +426,8 @@ def add_inline_style_to_html_soup(soup1: BeautifulSoup, css_text: str):
remove_classes=False, remove_classes=False,
external_styles=False, external_styles=False,
allow_network=False, allow_network=False,
disable_validation=True) disable_validation=True,
)
soup2 = BeautifulSoup(html_with_inline_style, features='lxml') soup2 = BeautifulSoup(html_with_inline_style, features='lxml')
for i in livecarta_tmp_ids: for i in livecarta_tmp_ids:

View File

@@ -442,7 +442,7 @@ if __name__ == "__main__":
logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0) logger_object = BookLogger(name=f'epub', main_logger=logger, book_id=0)
json_converter = EpubConverter('../epub/9781614389729.epub', json_converter = EpubConverter('../epub/9781614389170.epub',
logger=logger_object) logger=logger_object)
tmp = json_converter.convert_to_dict() tmp = json_converter.convert_to_dict()