epub converter: rename from epub_converter.py

This commit is contained in:
shirshasa
2021-04-19 11:35:38 +03:00
parent 1df37b6122
commit dce0f871a8

View File

@@ -27,7 +27,7 @@ from src.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_
# todo: https://docs.python.org/3/howto/unicode.html
class EpubBookAdapter:
class EpubPostprocessor:
def __init__(self, file):
self.file = file
self.ebooklib_book = epub.read_epub(file) # todo: log error from ebooklib
@@ -178,7 +178,7 @@ class EpubBookAdapter:
for sub_node in self.adjacency_list[node]:
self.build_one_anchored_section(sub_node)
print(f'Chapter: {node.href, node.id} is split.')
# print(f'Chapter: {node.href, node.id} is split.')
def build_anchor2soup(self):
nav_points = self.adjacency_list[-1]
@@ -204,27 +204,28 @@ class EpubBookAdapter:
sub_nodes.append(sub_chapter_item)
# print(f'Chapter: {title} is prepared.')
return ChapterItem(title, content_preprocessed, sub_nodes)
return ChapterItem(title_preprocessed, content_preprocessed, sub_nodes)
def convert_to_dict(self):
top_level_nav_points = self.adjacency_list[-1]
top_level_chapters = []
for nav_point in top_level_nav_points:
chapter = self.node2livecarta_chapter_item(nav_point)
top_level_chapters.append(chapter)
top_level_dict_chapters = [x.to_dict() for x in top_level_chapters]
return {
"content": top_level_dict_chapters,
"footnotes": self.footnotes
}
if __name__ == "__main__":
adapter = EpubBookAdapter('/home/katerina/PycharmProjects/Jenia/converter/epub/calibri.epub')
top_level_nav_points = adapter.adjacency_list[-1]
top_level_chapters = []
for nav_point in top_level_nav_points:
chapter = adapter.node2livecarta_chapter_item(nav_point)
top_level_chapters.append(chapter)
l = [x.to_dict() for x in top_level_chapters]
tmp = {
"content": l,
"footnotes": adapter.footnotes
}
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/Chaos_Engineering.epub')
tmp = json_converter.convert_to_dict()
with codecs.open('tmp.json', 'w', encoding='utf-8') as f:
json.dump(tmp, f, ensure_ascii=False)