forked from LiveCarta/BookConverter
epub converter: rename from epub_converter.py
This commit is contained in:
@@ -27,7 +27,7 @@ from src.html_epub_preprocessor import unwrap_structural_tags, get_tags_between_
|
|||||||
# todo: https://docs.python.org/3/howto/unicode.html
|
# todo: https://docs.python.org/3/howto/unicode.html
|
||||||
|
|
||||||
|
|
||||||
class EpubBookAdapter:
|
class EpubPostprocessor:
|
||||||
def __init__(self, file):
|
def __init__(self, file):
|
||||||
self.file = file
|
self.file = file
|
||||||
self.ebooklib_book = epub.read_epub(file) # todo: log error from ebooklib
|
self.ebooklib_book = epub.read_epub(file) # todo: log error from ebooklib
|
||||||
@@ -178,7 +178,7 @@ class EpubBookAdapter:
|
|||||||
for sub_node in self.adjacency_list[node]:
|
for sub_node in self.adjacency_list[node]:
|
||||||
self.build_one_anchored_section(sub_node)
|
self.build_one_anchored_section(sub_node)
|
||||||
|
|
||||||
print(f'Chapter: {node.href, node.id} is split.')
|
# print(f'Chapter: {node.href, node.id} is split.')
|
||||||
|
|
||||||
def build_anchor2soup(self):
|
def build_anchor2soup(self):
|
||||||
nav_points = self.adjacency_list[-1]
|
nav_points = self.adjacency_list[-1]
|
||||||
@@ -204,27 +204,28 @@ class EpubBookAdapter:
|
|||||||
sub_nodes.append(sub_chapter_item)
|
sub_nodes.append(sub_chapter_item)
|
||||||
|
|
||||||
# print(f'Chapter: {title} is prepared.')
|
# print(f'Chapter: {title} is prepared.')
|
||||||
return ChapterItem(title, content_preprocessed, sub_nodes)
|
return ChapterItem(title_preprocessed, content_preprocessed, sub_nodes)
|
||||||
|
|
||||||
|
def convert_to_dict(self):
|
||||||
|
top_level_nav_points = self.adjacency_list[-1]
|
||||||
|
top_level_chapters = []
|
||||||
|
|
||||||
|
for nav_point in top_level_nav_points:
|
||||||
|
chapter = self.node2livecarta_chapter_item(nav_point)
|
||||||
|
top_level_chapters.append(chapter)
|
||||||
|
|
||||||
|
top_level_dict_chapters = [x.to_dict() for x in top_level_chapters]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"content": top_level_dict_chapters,
|
||||||
|
"footnotes": self.footnotes
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
adapter = EpubBookAdapter('/home/katerina/PycharmProjects/Jenia/converter/epub/calibri.epub')
|
json_converter = EpubPostprocessor('/home/katerina/PycharmProjects/Jenia/converter/epub/Chaos_Engineering.epub')
|
||||||
|
tmp = json_converter.convert_to_dict()
|
||||||
top_level_nav_points = adapter.adjacency_list[-1]
|
|
||||||
top_level_chapters = []
|
|
||||||
|
|
||||||
for nav_point in top_level_nav_points:
|
|
||||||
chapter = adapter.node2livecarta_chapter_item(nav_point)
|
|
||||||
top_level_chapters.append(chapter)
|
|
||||||
|
|
||||||
l = [x.to_dict() for x in top_level_chapters]
|
|
||||||
|
|
||||||
tmp = {
|
|
||||||
"content": l,
|
|
||||||
"footnotes": adapter.footnotes
|
|
||||||
}
|
|
||||||
|
|
||||||
with codecs.open('tmp.json', 'w', encoding='utf-8') as f:
|
with codecs.open('tmp.json', 'w', encoding='utf-8') as f:
|
||||||
json.dump(tmp, f, ensure_ascii=False)
|
json.dump(tmp, f, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user