forked from LiveCarta/BookConverter
epub converter: add footnotes, list processing
This commit is contained in:
@@ -13,9 +13,17 @@ from ebooklib.utils import debug
|
||||
|
||||
from src.data_objects import ChapterItem, NavPoint
|
||||
from src.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_ids, prepare_title_and_content, \
|
||||
preprocess_image
|
||||
preprocess_image, preprocess_footnotes
|
||||
|
||||
|
||||
# epub3 examples:
|
||||
# https://github.com/IDPF/epub3-samples
|
||||
# specification:
|
||||
# https://idpf.github.io/epub-vocabs/structure/
|
||||
# footnotes:
|
||||
# http://www.theheratik.net/books/tech-epub/chapter-8/
|
||||
# http://kb.daisy.org/publishing/docs/html/epub-type.html
|
||||
# todo: http://kb.daisy.org/publishing/docs/html/notes.html
|
||||
# todo: https://docs.python.org/3/howto/unicode.html
|
||||
|
||||
|
||||
@@ -34,6 +42,10 @@ class EpubBookAdapter:
|
||||
|
||||
self.id_anchor_exist_in_nav_points = False
|
||||
self.href2soup_html: Dict[str, BeautifulSoup] = self.build_href2soup_content()
|
||||
self.footnotes = []
|
||||
for href in self.href2soup_html:
|
||||
self.footnotes.extend(preprocess_footnotes(self.href2soup_html[href], self.href2soup_html,
|
||||
noteref_attr_name='data-type'))
|
||||
# если в content.opf есть в spine toc атрибут -> можно найти ncx файл -> из него достать navMap
|
||||
# если его там нет, пробуют искать nav tag в manifest -> EpubNav. это у epub3 (не тестировалось todo)
|
||||
self.href2ids = defaultdict(list)
|
||||
@@ -71,8 +83,6 @@ class EpubBookAdapter:
|
||||
|
||||
def build_adjacency_list_from_toc(self, element, lvl=0):
|
||||
# use book.toc as a root
|
||||
# todo: read _create_section in get_nav
|
||||
# todo: try list on hrefs, extra info in another db
|
||||
|
||||
if isinstance(element, Link):
|
||||
# todo: check if link exists
|
||||
@@ -210,7 +220,8 @@ if __name__ == "__main__":
|
||||
l = [x.to_dict() for x in top_level_chapters]
|
||||
|
||||
tmp = {
|
||||
"content": l
|
||||
"content": l,
|
||||
"footnotes": adapter.footnotes
|
||||
}
|
||||
|
||||
with codecs.open('tmp.json', 'w', encoding='utf-8') as f:
|
||||
|
||||
Reference in New Issue
Block a user