forked from LiveCarta/BookConverter
epub converter: prettifying
This commit is contained in:
@@ -323,7 +323,7 @@ def unwrap_structural_tags(body_tag):
|
||||
x.parent.unwrap() # warning! could reflect on formatting/internal links in some cases
|
||||
|
||||
parents_marks_are_body = [x.parent == body_tag for x in marks]
|
||||
assert all(parents_marks_are_body), 'Anchor for chapter is deeper than 2 level.'
|
||||
assert all(parents_marks_are_body), 'Anchor for chapter is deeper than 2 level. Chapters can not be parsed.'
|
||||
|
||||
_preprocessing_headings(body_tag)
|
||||
|
||||
@@ -340,16 +340,16 @@ def unwrap_structural_tags(body_tag):
|
||||
return body_tag
|
||||
|
||||
|
||||
def get_tags_between_ids(first_id, href, html_soup):
|
||||
h_marked = html_soup.find(attrs={'id': first_id, 'class': 'converter-chapter-mark'})
|
||||
if h_marked:
|
||||
p = h_marked.next_sibling
|
||||
def get_tags_between_chapter_marks(first_id, href, html_soup):
|
||||
marked_tags = html_soup.find(attrs={'id': first_id, 'class': 'converter-chapter-mark'})
|
||||
if marked_tags:
|
||||
next_tag = marked_tags.next_sibling
|
||||
tags = []
|
||||
while p:
|
||||
if p.name == 'tmp' and p.attrs.get('class') == 'converter-chapter-mark':
|
||||
while next_tag:
|
||||
if next_tag.attrs.get('class') == 'converter-chapter-mark':
|
||||
break
|
||||
tags.append(p)
|
||||
p = p.next_sibling
|
||||
tags.append(next_tag)
|
||||
next_tag = next_tag.next_sibling
|
||||
|
||||
tags = [tag.extract() for tag in tags]
|
||||
html_soup.smooth()
|
||||
@@ -368,7 +368,7 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
|
||||
to_remove = []
|
||||
for child in chapter_tag.contents:
|
||||
if isinstance(child, NavigableString):
|
||||
s = re.sub(r'([\n\t\xa0])', '', child.string)
|
||||
s = re.sub(r'([\n\t])', '', child.string)
|
||||
if s == '':
|
||||
to_remove.append(child)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user