This repository has been archived on 2026-04-06. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
BookConverter/src/epub_converter/epub_solver.py
2022-09-06 16:26:08 +03:00

63 lines
2.3 KiB
Python

import json
import codecs
from src.book_solver import BookSolver
from src.util.helpers import BookLogger
from src.html_preprocessor import HtmlPreprocessor
from src.style_preprocessor import StylePreprocessor
from src.epub_converter.html_epub_processor import HTMLEpubProcessor
from src.epub_converter.epub_converter import EpubConverter
class EpubBook(BookSolver):
"""Class of .epub type book - child of BookSolver"""
def __init__(self, book_id: int = 0, access=None, main_logger=None):
super().__init__(book_id, access, main_logger)
self.book_type = "epub"
def get_converted_book(self):
"""
Function
Steps
----------
1. Converts .epub to .html
2. Parses from line structure to nested structure
Returns
----------
content_dict
json for LiveCarta platform
"""
html_preprocessor = HtmlPreprocessor(
logger=self.logger_object, preset_path="presets/epub_presets.json")
style_preprocessor = StylePreprocessor()
html_processor = HTMLEpubProcessor(logger=self.logger_object,
html_preprocessor=html_preprocessor)
json_converter = EpubConverter(
self.book_path, access=self.access, logger=self.logger_object,
style_processor=style_preprocessor, html_processor=html_processor)
content_dict = json_converter.convert_to_dict()
return content_dict
if __name__ == "__main__":
epub_file_path = "../../books/epub/9780763774134.epub"
logger_object = BookLogger(
name="epub", book_id=epub_file_path.split("/")[-1])
html_preprocessor = HtmlPreprocessor(
logger=logger_object, preset_path="../../presets/epub_presets.json")
style_preprocessor = StylePreprocessor()
html_processor = HTMLEpubProcessor(logger=logger_object,
html_preprocessor=html_preprocessor)
json_converter = EpubConverter(epub_file_path, logger=logger_object,
style_processor=style_preprocessor, html_processor=html_processor)
content_dict = json_converter.convert_to_dict()
with codecs.open(epub_file_path.replace("epub", "json"), "w", encoding="utf-8") as f_json:
json.dump(content_dict, f_json, ensure_ascii=False)