add converter local launch

This commit is contained in:
Svitin Egor
2025-04-25 14:39:55 +03:00
parent 9be0a46162
commit 43f65f9712
4 changed files with 98 additions and 0 deletions

39
test_docx.py Normal file
View File

@@ -0,0 +1,39 @@
import json
import codecs
import logging
from threading import Event
from src.book_solver import BookSolver
from src.util.helpers import BookLogger
from src.html_presets_processor import HtmlPresetsProcessor
from src.style_reader import StyleReader
from src.docx_converter.docx2libre_html import Docx2LibreHtml
from src.docx_converter.html_docx_processor import HtmlDocxProcessor
from src.docx_converter.libre_html2json_converter import LibreHtml2JsonConverter
if __name__ == "__main__":
docx_file_path = f"/app/books/docx/Ch_1_ready.docx"
book_logger = BookLogger(name="epub")
book_logger.configure_book_logger(book_id=docx_file_path.split("/")[-1])
locker = Event()
locker.set()
html_converter = Docx2LibreHtml(file_path=docx_file_path,
logger=book_logger, libre_locker=locker)
html_preset_processor = HtmlPresetsProcessor(
logger=book_logger, preset_path="/app/preset/default_preset.json")
style_preprocessor = StyleReader()
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=book_logger,
html_preprocessor=html_preset_processor, style_preprocessor=style_preprocessor)
content, footnotes, top_level_headers = html_processor.process_html(
html_path=html_converter.html_path, book_id=html_converter.book_id)
json_converter = LibreHtml2JsonConverter(
content, footnotes, top_level_headers, book_logger)
content_dict = json_converter.convert_to_dict()
with codecs.open(docx_file_path.replace("docx", "json"), "w", encoding="utf-8") as f:
json.dump(content_dict, f, ensure_ascii=False)