forked from LiveCarta/BookConverter
63 lines
2.3 KiB
Python
63 lines
2.3 KiB
Python
import json
|
|
import codecs
|
|
|
|
from src.book_solver import BookSolver
|
|
from src.util.helpers import BookLogger
|
|
from src.html_preprocessor import HtmlPreprocessor
|
|
from src.style_preprocessor import StylePreprocessor
|
|
from src.epub_converter.html_epub_processor import HTMLEpubProcessor
|
|
from src.epub_converter.epub_converter import EpubConverter
|
|
|
|
|
|
class EpubBook(BookSolver):
|
|
"""Class of .epub type book - child of BookSolver"""
|
|
|
|
def __init__(self, book_id: int = 0, access=None, main_logger=None):
|
|
super().__init__(book_id, access, main_logger)
|
|
self.book_type = "epub"
|
|
|
|
def get_converted_book(self):
|
|
"""
|
|
Function
|
|
Steps
|
|
----------
|
|
1. Converts .epub to .html
|
|
2. Parses from line structure to nested structure
|
|
|
|
Returns
|
|
----------
|
|
content_dict
|
|
json for LiveCarta platform
|
|
|
|
"""
|
|
html_preprocessor = HtmlPreprocessor(
|
|
logger=self.logger_object, preset_path="presets/epub_presets.json")
|
|
style_preprocessor = StylePreprocessor()
|
|
html_processor = HTMLEpubProcessor(logger=self.logger_object,
|
|
html_preprocessor=html_preprocessor)
|
|
json_converter = EpubConverter(
|
|
self.book_path, access=self.access, logger=self.logger_object,
|
|
style_processor=style_preprocessor, html_processor=html_processor)
|
|
content_dict = json_converter.convert_to_dict()
|
|
|
|
return content_dict
|
|
|
|
|
|
if __name__ == "__main__":
|
|
epub_file_path = "../../books/epub/9780763774134.epub"
|
|
logger_object = BookLogger(
|
|
name="epub", book_id=epub_file_path.split("/")[-1])
|
|
|
|
html_preprocessor = HtmlPreprocessor(
|
|
logger=logger_object, preset_path="../../presets/epub_presets.json")
|
|
style_preprocessor = StylePreprocessor()
|
|
html_processor = HTMLEpubProcessor(logger=logger_object,
|
|
html_preprocessor=html_preprocessor)
|
|
|
|
json_converter = EpubConverter(epub_file_path, logger=logger_object,
|
|
style_processor=style_preprocessor, html_processor=html_processor)
|
|
content_dict = json_converter.convert_to_dict()
|
|
|
|
with codecs.open(epub_file_path.replace("epub", "json"), "w", encoding="utf-8") as f_json:
|
|
json.dump(content_dict, f_json, ensure_ascii=False)
|