BookConverter/src/epub_converter/epub_solver.py

import json
import codecs

from src.book_solver import BookSolver
from src.util.helpers import BookLogger
from src.html_preprocessor import HtmlPreprocessor
from src.style_preprocessor import StylePreprocessor
from src.epub_converter.html_epub_processor import HTMLEpubProcessor
from src.epub_converter.epub_converter import EpubConverter


class EpubBook(BookSolver):
    """Class of .epub type book - child of BookSolver"""

    def __init__(self, book_id: int = 0, access=None, main_logger=None):
        super().__init__(book_id, access, main_logger)
        self.book_type = "epub"

    def get_converted_book(self):
        """
        Function
        Steps
        ----------
        1. Converts .epub to .html
        2. Parses from line structure to nested structure

        Returns
        ----------
        content_dict
            json for LiveCarta platform

        """
        html_preprocessor = HtmlPreprocessor(
            logger=self.logger_object, preset_path="presets/epub_presets.json")
        style_preprocessor = StylePreprocessor()
        html_processor = HTMLEpubProcessor(logger=self.logger_object,
                                           html_preprocessor=html_preprocessor)
        json_converter = EpubConverter(
            self.book_path, access=self.access, logger=self.logger_object,
            style_processor=style_preprocessor, html_processor=html_processor)
        content_dict = json_converter.convert_to_dict()

        return content_dict


if __name__ == "__main__":
    epub_file_path = "../../books/epub/9780763774134.epub"
    logger_object = BookLogger(
        name="epub", book_id=epub_file_path.split("/")[-1])

    html_preprocessor = HtmlPreprocessor(
        logger=logger_object, preset_path="../../presets/epub_presets.json")
    style_preprocessor = StylePreprocessor()
    html_processor = HTMLEpubProcessor(logger=logger_object,
                                       html_preprocessor=html_preprocessor)

    json_converter = EpubConverter(epub_file_path, logger=logger_object,
                                   style_processor=style_preprocessor, html_processor=html_processor)
    content_dict = json_converter.convert_to_dict()

    with codecs.open(epub_file_path.replace("epub", "json"), "w", encoding="utf-8") as f_json:
        json.dump(content_dict, f_json, ensure_ascii=False)