epub converter: bug fix

- remove adding border 0 in td
-save links while headings removal
This commit is contained in:
shirshasa
2021-06-02 08:41:46 +03:00
parent 044439e617
commit 1b94b870a7
3 changed files with 15 additions and 4 deletions

View File

@@ -97,6 +97,7 @@ def preprocess_table(body_tag: BeautifulSoup):
if width:
td.attrs['width'] = width
td.attrs['style'] = td.attrs.get('style').replace('border:0;', '')
if border_sizes:
border_size = sum(border_sizes) / len(border_sizes)
@@ -118,6 +119,15 @@ def _process_lists(body_tag):
il_tag.p.unwrap()
def _add_span_to_save_ids_for_links(tag_to_be_removed, body_tag):
for sub_tag in tag_to_be_removed.find_all():
if sub_tag.attrs.get('id'):
new_tag = body_tag.new_tag("span")
new_tag.attrs['id'] = sub_tag.attrs['id']
new_tag.attrs['class'] = sub_tag.attrs.get('class')
tag_to_be_removed.insert_before(new_tag)
def clean_headings_content(content: Tag, title: str):
title = title.lower()
for child in content.contents:
@@ -130,8 +140,10 @@ def clean_headings_content(content: Tag, title: str):
text = re.sub(r' +', ' ', text).strip()
text = text.lower()
if title == text:
_add_span_to_save_ids_for_links(child, content)
child.extract()
elif (title in text) and (child.name in ['h1', 'h2', 'h3']):
_add_span_to_save_ids_for_links(child, content)
child.extract()
break

View File

@@ -104,7 +104,8 @@ class LawCartaConfig:
'silver': 'lightGray',
'white': 'white',
'maroon': '#800000',
'gray': '#808080'
'gray': '#808080',
'grey': '#808080'
}
INDENT = '30px'

View File

@@ -59,9 +59,7 @@ def str2color_name(s: str):
elif '#' in s:
name = get_hex_color_name(s)
if (name == 'white') and (s.lower() not in ['#ffffff', '#fff']):
return 'gray'
if name == 'grey':
return 'gray'
name = 'gray'
return name