forked from LiveCarta/BookConverter
epub converter: prettifying
This commit is contained in:
@@ -323,7 +323,7 @@ def unwrap_structural_tags(body_tag):
|
|||||||
x.parent.unwrap() # warning! could reflect on formatting/internal links in some cases
|
x.parent.unwrap() # warning! could reflect on formatting/internal links in some cases
|
||||||
|
|
||||||
parents_marks_are_body = [x.parent == body_tag for x in marks]
|
parents_marks_are_body = [x.parent == body_tag for x in marks]
|
||||||
assert all(parents_marks_are_body), 'Anchor for chapter is deeper than 2 level.'
|
assert all(parents_marks_are_body), 'Anchor for chapter is deeper than 2 level. Chapters can not be parsed.'
|
||||||
|
|
||||||
_preprocessing_headings(body_tag)
|
_preprocessing_headings(body_tag)
|
||||||
|
|
||||||
@@ -340,16 +340,16 @@ def unwrap_structural_tags(body_tag):
|
|||||||
return body_tag
|
return body_tag
|
||||||
|
|
||||||
|
|
||||||
def get_tags_between_ids(first_id, href, html_soup):
|
def get_tags_between_chapter_marks(first_id, href, html_soup):
|
||||||
h_marked = html_soup.find(attrs={'id': first_id, 'class': 'converter-chapter-mark'})
|
marked_tags = html_soup.find(attrs={'id': first_id, 'class': 'converter-chapter-mark'})
|
||||||
if h_marked:
|
if marked_tags:
|
||||||
p = h_marked.next_sibling
|
next_tag = marked_tags.next_sibling
|
||||||
tags = []
|
tags = []
|
||||||
while p:
|
while next_tag:
|
||||||
if p.name == 'tmp' and p.attrs.get('class') == 'converter-chapter-mark':
|
if next_tag.attrs.get('class') == 'converter-chapter-mark':
|
||||||
break
|
break
|
||||||
tags.append(p)
|
tags.append(next_tag)
|
||||||
p = p.next_sibling
|
next_tag = next_tag.next_sibling
|
||||||
|
|
||||||
tags = [tag.extract() for tag in tags]
|
tags = [tag.extract() for tag in tags]
|
||||||
html_soup.smooth()
|
html_soup.smooth()
|
||||||
@@ -368,7 +368,7 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
|
|||||||
to_remove = []
|
to_remove = []
|
||||||
for child in chapter_tag.contents:
|
for child in chapter_tag.contents:
|
||||||
if isinstance(child, NavigableString):
|
if isinstance(child, NavigableString):
|
||||||
s = re.sub(r'([\n\t\xa0])', '', child.string)
|
s = re.sub(r'([\n\t])', '', child.string)
|
||||||
if s == '':
|
if s == '':
|
||||||
to_remove.append(child)
|
to_remove.append(child)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user