epub converter: rename from epub_converter.py

This commit is contained in:
shirshasa
2021-04-19 11:35:38 +03:00
parent 1df37b6122
commit dce0f871a8

View File

@@ -27,7 +27,7 @@ from src.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_
# todo: https://docs.python.org/3/howto/unicode.html # todo: https://docs.python.org/3/howto/unicode.html
class EpubBookAdapter: class EpubPostprocessor:
def __init__(self, file): def __init__(self, file):
self.file = file self.file = file
self.ebooklib_book = epub.read_epub(file) # todo: log error from ebooklib self.ebooklib_book = epub.read_epub(file) # todo: log error from ebooklib
@@ -178,7 +178,7 @@ class EpubBookAdapter:
for sub_node in self.adjacency_list[node]: for sub_node in self.adjacency_list[node]:
self.build_one_anchored_section(sub_node) self.build_one_anchored_section(sub_node)
print(f'Chapter: {node.href, node.id} is split.') # print(f'Chapter: {node.href, node.id} is split.')
def build_anchor2soup(self): def build_anchor2soup(self):
nav_points = self.adjacency_list[-1] nav_points = self.adjacency_list[-1]
@@ -204,27 +204,28 @@ class EpubBookAdapter:
sub_nodes.append(sub_chapter_item) sub_nodes.append(sub_chapter_item)
# print(f'Chapter: {title} is prepared.') # print(f'Chapter: {title} is prepared.')
return ChapterItem(title, content_preprocessed, sub_nodes) return ChapterItem(title_preprocessed, content_preprocessed, sub_nodes)
def convert_to_dict(self):
if __name__ == "__main__": top_level_nav_points = self.adjacency_list[-1]
adapter = EpubBookAdapter('/home/katerina/PycharmProjects/Jenia/converter/epub/calibri.epub')
top_level_nav_points = adapter.adjacency_list[-1]
top_level_chapters = [] top_level_chapters = []
for nav_point in top_level_nav_points: for nav_point in top_level_nav_points:
chapter = adapter.node2livecarta_chapter_item(nav_point) chapter = self.node2livecarta_chapter_item(nav_point)
top_level_chapters.append(chapter) top_level_chapters.append(chapter)
l = [x.to_dict() for x in top_level_chapters] top_level_dict_chapters = [x.to_dict() for x in top_level_chapters]
tmp = { return {
"content": l, "content": top_level_dict_chapters,
"footnotes": adapter.footnotes "footnotes": self.footnotes
} }
if __name__ == "__main__":
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/Chaos_Engineering.epub')
tmp = json_converter.convert_to_dict()
with codecs.open('tmp.json', 'w', encoding='utf-8') as f: with codecs.open('tmp.json', 'w', encoding='utf-8') as f:
json.dump(tmp, f, ensure_ascii=False) json.dump(tmp, f, ensure_ascii=False)