diff --git a/src/epub_converter.py b/src/epub_postprocessor.py similarity index 91% rename from src/epub_converter.py rename to src/epub_postprocessor.py index b54eec4..6d32940 100644 --- a/src/epub_converter.py +++ b/src/epub_postprocessor.py @@ -27,7 +27,7 @@ from src.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_ # todo: https://docs.python.org/3/howto/unicode.html -class EpubBookAdapter: +class EpubPostprocessor: def __init__(self, file): self.file = file self.ebooklib_book = epub.read_epub(file) # todo: log error from ebooklib @@ -178,7 +178,7 @@ class EpubBookAdapter: for sub_node in self.adjacency_list[node]: self.build_one_anchored_section(sub_node) - print(f'Chapter: {node.href, node.id} is split.') + # print(f'Chapter: {node.href, node.id} is split.') def build_anchor2soup(self): nav_points = self.adjacency_list[-1] @@ -204,27 +204,28 @@ class EpubBookAdapter: sub_nodes.append(sub_chapter_item) # print(f'Chapter: {title} is prepared.') - return ChapterItem(title, content_preprocessed, sub_nodes) + return ChapterItem(title_preprocessed, content_preprocessed, sub_nodes) + + def convert_to_dict(self): + top_level_nav_points = self.adjacency_list[-1] + top_level_chapters = [] + + for nav_point in top_level_nav_points: + chapter = self.node2livecarta_chapter_item(nav_point) + top_level_chapters.append(chapter) + + top_level_dict_chapters = [x.to_dict() for x in top_level_chapters] + + return { + "content": top_level_dict_chapters, + "footnotes": self.footnotes + } if __name__ == "__main__": - adapter = EpubBookAdapter('/home/katerina/PycharmProjects/Jenia/converter/epub/calibri.epub') - - top_level_nav_points = adapter.adjacency_list[-1] - top_level_chapters = [] - - for nav_point in top_level_nav_points: - chapter = adapter.node2livecarta_chapter_item(nav_point) - top_level_chapters.append(chapter) - - l = [x.to_dict() for x in top_level_chapters] - - tmp = { - "content": l, - "footnotes": adapter.footnotes - } + json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/Chaos_Engineering.epub') + tmp = json_converter.convert_to_dict() with codecs.open('tmp.json', 'w', encoding='utf-8') as f: json.dump(tmp, f, ensure_ascii=False) -