import json import codecs import logging from threading import Event from src.book_solver import BookSolver from src.util.helpers import BookLogger from src.html_presets_processor import HtmlPresetsProcessor from src.style_reader import StyleReader from src.docx_converter.docx2libre_html import Docx2LibreHtml from src.docx_converter.html_docx_processor import HtmlDocxProcessor from src.docx_converter.libre_html2json_converter import LibreHtml2JsonConverter if __name__ == "__main__": docx_file_path = f"/app/books/docx/Ch_1_ready.docx" book_logger = BookLogger(name="epub") book_logger.configure_book_logger(book_id=docx_file_path.split("/")[-1]) locker = Event() locker.set() html_converter = Docx2LibreHtml(file_path=docx_file_path, logger=book_logger, libre_locker=locker) html_preset_processor = HtmlPresetsProcessor( logger=book_logger, preset_path="/app/preset/default_preset.json") style_preprocessor = StyleReader() html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=book_logger, html_preprocessor=html_preset_processor, style_preprocessor=style_preprocessor) content, footnotes, top_level_headers = html_processor.process_html( html_path=html_converter.html_path, book_id=html_converter.book_id) json_converter = LibreHtml2JsonConverter( content, footnotes, top_level_headers, book_logger) content_dict = json_converter.convert_to_dict() with codecs.open(docx_file_path.replace("docx", "json"), "w", encoding="utf-8") as f: json.dump(content_dict, f, ensure_ascii=False)