forked from LiveCarta/BookConverter
epub converter: rename from epub_converter.py
This commit is contained in:
@@ -27,7 +27,7 @@ from src.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_
|
||||
# todo: https://docs.python.org/3/howto/unicode.html
|
||||
|
||||
|
||||
class EpubBookAdapter:
|
||||
class EpubPostprocessor:
|
||||
def __init__(self, file):
|
||||
self.file = file
|
||||
self.ebooklib_book = epub.read_epub(file) # todo: log error from ebooklib
|
||||
@@ -178,7 +178,7 @@ class EpubBookAdapter:
|
||||
for sub_node in self.adjacency_list[node]:
|
||||
self.build_one_anchored_section(sub_node)
|
||||
|
||||
print(f'Chapter: {node.href, node.id} is split.')
|
||||
# print(f'Chapter: {node.href, node.id} is split.')
|
||||
|
||||
def build_anchor2soup(self):
|
||||
nav_points = self.adjacency_list[-1]
|
||||
@@ -204,27 +204,28 @@ class EpubBookAdapter:
|
||||
sub_nodes.append(sub_chapter_item)
|
||||
|
||||
# print(f'Chapter: {title} is prepared.')
|
||||
return ChapterItem(title, content_preprocessed, sub_nodes)
|
||||
return ChapterItem(title_preprocessed, content_preprocessed, sub_nodes)
|
||||
|
||||
def convert_to_dict(self):
|
||||
top_level_nav_points = self.adjacency_list[-1]
|
||||
top_level_chapters = []
|
||||
|
||||
for nav_point in top_level_nav_points:
|
||||
chapter = self.node2livecarta_chapter_item(nav_point)
|
||||
top_level_chapters.append(chapter)
|
||||
|
||||
top_level_dict_chapters = [x.to_dict() for x in top_level_chapters]
|
||||
|
||||
return {
|
||||
"content": top_level_dict_chapters,
|
||||
"footnotes": self.footnotes
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
adapter = EpubBookAdapter('/home/katerina/PycharmProjects/Jenia/converter/epub/calibri.epub')
|
||||
|
||||
top_level_nav_points = adapter.adjacency_list[-1]
|
||||
top_level_chapters = []
|
||||
|
||||
for nav_point in top_level_nav_points:
|
||||
chapter = adapter.node2livecarta_chapter_item(nav_point)
|
||||
top_level_chapters.append(chapter)
|
||||
|
||||
l = [x.to_dict() for x in top_level_chapters]
|
||||
|
||||
tmp = {
|
||||
"content": l,
|
||||
"footnotes": adapter.footnotes
|
||||
}
|
||||
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/Chaos_Engineering.epub')
|
||||
tmp = json_converter.convert_to_dict()
|
||||
|
||||
with codecs.open('tmp.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(tmp, f, ensure_ascii=False)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user