epub converter: prettifying

This commit is contained in:
shirshasa
2021-06-07 17:40:09 +03:00
parent 2506da20be
commit eba46574fc

View File

@@ -323,7 +323,7 @@ def unwrap_structural_tags(body_tag):
x.parent.unwrap() # warning! could reflect on formatting/internal links in some cases
parents_marks_are_body = [x.parent == body_tag for x in marks]
assert all(parents_marks_are_body), 'Anchor for chapter is deeper than 2 level.'
assert all(parents_marks_are_body), 'Anchor for chapter is deeper than 2 level. Chapters can not be parsed.'
_preprocessing_headings(body_tag)
@@ -340,16 +340,16 @@ def unwrap_structural_tags(body_tag):
return body_tag
def get_tags_between_ids(first_id, href, html_soup):
h_marked = html_soup.find(attrs={'id': first_id, 'class': 'converter-chapter-mark'})
if h_marked:
p = h_marked.next_sibling
def get_tags_between_chapter_marks(first_id, href, html_soup):
marked_tags = html_soup.find(attrs={'id': first_id, 'class': 'converter-chapter-mark'})
if marked_tags:
next_tag = marked_tags.next_sibling
tags = []
while p:
if p.name == 'tmp' and p.attrs.get('class') == 'converter-chapter-mark':
while next_tag:
if next_tag.attrs.get('class') == 'converter-chapter-mark':
break
tags.append(p)
p = p.next_sibling
tags.append(next_tag)
next_tag = next_tag.next_sibling
tags = [tag.extract() for tag in tags]
html_soup.smooth()
@@ -368,7 +368,7 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
to_remove = []
for child in chapter_tag.contents:
if isinstance(child, NavigableString):
s = re.sub(r'([\n\t\xa0])', '', child.string)
s = re.sub(r'([\n\t])', '', child.string)
if s == '':
to_remove.append(child)