forked from LiveCarta/BookConverter
epub converter: update footnotes
This commit is contained in:
@@ -48,8 +48,15 @@ class EpubPostprocessor:
|
|||||||
|
|
||||||
self.logger.log('Footnotes processing.')
|
self.logger.log('Footnotes processing.')
|
||||||
self.footnotes = []
|
self.footnotes = []
|
||||||
|
self.noterefs = []
|
||||||
for href in self.href2soup_html:
|
for href in self.href2soup_html:
|
||||||
self.footnotes.extend(preprocess_footnotes(self.href2soup_html[href], self.href2soup_html))
|
footnotes, noterefs = preprocess_footnotes(self.href2soup_html[href], self.href2soup_html)
|
||||||
|
self.footnotes.extend(footnotes)
|
||||||
|
self.noterefs.extend(noterefs)
|
||||||
|
for i, noteref in enumerate(self.noterefs):
|
||||||
|
noteref.attrs['data-id'] = i + 1
|
||||||
|
noteref.attrs['id'] = f'footnote-{i + 1}'
|
||||||
|
|
||||||
self.logger.log(f'Added {len(self.footnotes)} footnotes.')
|
self.logger.log(f'Added {len(self.footnotes)} footnotes.')
|
||||||
self.logger.log('TOC processing.')
|
self.logger.log('TOC processing.')
|
||||||
self.href2subchapter_ids = defaultdict(list)
|
self.href2subchapter_ids = defaultdict(list)
|
||||||
|
|||||||
@@ -194,10 +194,14 @@ def replace_with_livecarta_anchor_tag(anchor, i):
|
|||||||
new_tag['data-id'] = i + 1
|
new_tag['data-id'] = i + 1
|
||||||
new_tag['id'] = f'footnote-{i + 1}'
|
new_tag['id'] = f'footnote-{i + 1}'
|
||||||
new_tag.string = '*'
|
new_tag.string = '*'
|
||||||
|
if anchor.parent.name == 'sup':
|
||||||
|
anchor.parent.unwrap()
|
||||||
anchor.replace_with(new_tag)
|
anchor.replace_with(new_tag)
|
||||||
|
return new_tag
|
||||||
|
|
||||||
|
|
||||||
def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name='epub:type') -> List[str]:
|
def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, noteref_attr_name='epub:type') -> Tuple[
|
||||||
|
list, list]:
|
||||||
"""
|
"""
|
||||||
This function should be earlier that adding fonts in pipeline.
|
This function should be earlier that adding fonts in pipeline.
|
||||||
|
|
||||||
@@ -209,6 +213,7 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
|
|||||||
noterefs_tags = source_html_tag.find_all(attrs={noteref_attr_name: 'noteref'})
|
noterefs_tags = source_html_tag.find_all(attrs={noteref_attr_name: 'noteref'})
|
||||||
bad_noterefs_tags = set([tag for tag in noterefs_tags if not tag.attrs.get('href')])
|
bad_noterefs_tags = set([tag for tag in noterefs_tags if not tag.attrs.get('href')])
|
||||||
noterefs_tags = [tag for tag in noterefs_tags if tag not in bad_noterefs_tags]
|
noterefs_tags = [tag for tag in noterefs_tags if tag not in bad_noterefs_tags]
|
||||||
|
new_noterefs_tags = []
|
||||||
[tag.decompose() for tag in bad_noterefs_tags]
|
[tag.decompose() for tag in bad_noterefs_tags]
|
||||||
|
|
||||||
def parse_a_tag_href(s: str):
|
def parse_a_tag_href(s: str):
|
||||||
@@ -257,13 +262,13 @@ def preprocess_footnotes(source_html_tag: Tag, href2soup_html: dict = None, note
|
|||||||
footnote_tag = expected_footnote_tags[0]
|
footnote_tag = expected_footnote_tags[0]
|
||||||
if footnote_tag.parent.attrs.get('role') and footnote_tag.parent.attrs.get('role') == 'doc-endnote':
|
if footnote_tag.parent.attrs.get('role') and footnote_tag.parent.attrs.get('role') == 'doc-endnote':
|
||||||
footnote_tag = footnote_tag.parent
|
footnote_tag = footnote_tag.parent
|
||||||
replace_with_livecarta_anchor_tag(noteref_tag, i)
|
new_noterefs_tags.append(replace_with_livecarta_anchor_tag(noteref_tag, i))
|
||||||
content = footnote_tag.text
|
content = footnote_tag.text
|
||||||
|
|
||||||
footnote_tag.decompose()
|
footnote_tag.decompose()
|
||||||
footnotes.append(content)
|
footnotes.append(content)
|
||||||
|
|
||||||
return footnotes
|
return footnotes, new_noterefs_tags
|
||||||
|
|
||||||
|
|
||||||
def unwrap_structural_tags(body_tag):
|
def unwrap_structural_tags(body_tag):
|
||||||
@@ -503,7 +508,7 @@ def prepare_title_and_content(title, chapter_tag: BeautifulSoup, remove_title_fr
|
|||||||
preprocess_block_tags(chapter_tag)
|
preprocess_block_tags(chapter_tag)
|
||||||
# 2. class removal
|
# 2. class removal
|
||||||
for tag in chapter_tag.find_all(recursive=True):
|
for tag in chapter_tag.find_all(recursive=True):
|
||||||
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor']):
|
if hasattr(tag, 'attrs') and tag.attrs.get('class') and (tag.attrs.get('class') not in ['link-anchor','footnote-element']):
|
||||||
del tag.attrs['class']
|
del tag.attrs['class']
|
||||||
|
|
||||||
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
|
# content_str = re.sub(r'([\n\t\xa0])', ' ', str(content_tag))
|
||||||
|
|||||||
Reference in New Issue
Block a user