import json import codecs from src.book_solver import BookSolver from src.util.helpers import BookLogger from src.html_preprocessor import HtmlPreprocessor from src.style_preprocessor import StylePreprocessor from src.epub_converter.html_epub_processor import HTMLEpubProcessor from src.epub_converter.epub_converter import EpubConverter class EpubBook(BookSolver): """Class of .epub type book - child of BookSolver""" def __init__(self, book_id: int = 0, access=None, main_logger=None): super().__init__(book_id, access, main_logger) self.book_type = "epub" def get_converted_book(self): """ Function Steps ---------- 1. Converts .epub to .html 2. Parses from line structure to nested structure Returns ---------- content_dict json for LiveCarta platform """ html_preprocessor = HtmlPreprocessor( logger=self.logger_object, preset_path="presets/epub_presets.json") style_preprocessor = StylePreprocessor() html_processor = HTMLEpubProcessor(logger=self.logger_object, html_preprocessor=html_preprocessor) json_converter = EpubConverter( self.book_path, access=self.access, logger=self.logger_object, style_processor=style_preprocessor, html_processor=html_processor) content_dict = json_converter.convert_to_dict() return content_dict if __name__ == "__main__": epub_file_path = "../../books/epub/9780763774134.epub" logger_object = BookLogger( name="epub", book_id=epub_file_path.split("/")[-1]) html_preprocessor = HtmlPreprocessor( logger=logger_object, preset_path="../../presets/epub_presets.json") style_preprocessor = StylePreprocessor() html_processor = HTMLEpubProcessor(logger=logger_object, html_preprocessor=html_preprocessor) json_converter = EpubConverter(epub_file_path, logger=logger_object, style_processor=style_preprocessor, html_processor=html_processor) content_dict = json_converter.convert_to_dict() with codecs.open(epub_file_path.replace("epub", "json"), "w", encoding="utf-8") as f_json: json.dump(content_dict, f_json, ensure_ascii=False)