add converter local launch

2025-04-25 14:39:55 +03:00
parent 9be0a46162
commit 43f65f9712
4 changed files with 98 additions and 0 deletions
--- a/18
+++ b/18
@@ -0,0 +1,18 @@
+FROM python:3.11.0
+
+RUN apt-get update && \
+    apt-get install -y software-properties-common
+
+RUN add-apt-repository -r ppa:libreoffice/ppa
+RUN apt-get update
+RUN apt-get -y install libreoffice
+
+COPY requirements.txt /app/
+RUN pip install -r /app/requirements.txt
+RUN pip install debugpy
+
+WORKDIR /app/
+
+CMD tail -f > /dev/null
+
+#python3 -m debugpy --listen 0.0.0.0:5678 --wait-for-client test.py
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,10 @@
+version: "3.8"
+
+services:
+  converter:
+    build:
+      dockerfile: DockerfileLocal
+    ports:
+      - '5678:5678'
+    volumes:
+      - ./:/app
--- a/test.py
+++ b/test.py
@@ -0,0 +1,31 @@
+import json
+import codecs
+import logging
+
+from src.book_solver import BookSolver
+from src.util.helpers import BookLogger
+from src.html_presets_processor import HtmlPresetsProcessor
+from src.style_reader import StyleReader
+from src.epub_converter.html_epub_processor import HtmlEpubProcessor
+from src.epub_converter.epub_converter import EpubConverter
+
+
+if __name__ == "__main__":
+    #epub_file_path = f"/app/books/epub/9781284289473.epub"
+    epub_file_path = f"/app/books/epub/9781284296693.epub"
+    print("Start")
+    logger_object = BookLogger(name="epub")
+    logger_object.configure_book_logger(book_id=epub_file_path.split("/")[-1])
+
+    html_preset_processor = HtmlPresetsProcessor(
+        logger=logger_object, preset_path="/app/preset/default_preset.json")
+    style_preprocessor = StyleReader()
+    html_processor = HtmlEpubProcessor(logger=logger_object,
+                                       html_preprocessor=html_preset_processor)
+
+    json_converter = EpubConverter(epub_file_path, logger=logger_object,
+                                   style_processor=style_preprocessor, html_processor=html_processor)
+    content_dict = json_converter.convert_to_dict()
+    print(epub_file_path.replace("epub", "json"))
+    with codecs.open(epub_file_path.replace("epub", "json"), "w", encoding="utf-8") as f_json:
+        json.dump(content_dict, f_json, ensure_ascii=False)
--- a/test_docx.py
+++ b/test_docx.py
@@ -0,0 +1,39 @@
+import json
+import codecs
+import logging
+from threading import Event
+
+from src.book_solver import BookSolver
+from src.util.helpers import BookLogger
+from src.html_presets_processor import HtmlPresetsProcessor
+from src.style_reader import StyleReader
+from src.docx_converter.docx2libre_html import Docx2LibreHtml
+from src.docx_converter.html_docx_processor import HtmlDocxProcessor
+from src.docx_converter.libre_html2json_converter import LibreHtml2JsonConverter
+
+
+if __name__ == "__main__":
+    docx_file_path = f"/app/books/docx/Ch_1_ready.docx"
+
+    book_logger = BookLogger(name="epub")
+    book_logger.configure_book_logger(book_id=docx_file_path.split("/")[-1])
+
+    locker = Event()
+    locker.set()
+
+    html_converter = Docx2LibreHtml(file_path=docx_file_path,
+                                    logger=book_logger, libre_locker=locker)
+    html_preset_processor = HtmlPresetsProcessor(
+    logger=book_logger, preset_path="/app/preset/default_preset.json")
+    style_preprocessor = StyleReader()
+    html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=book_logger,
+                                        html_preprocessor=html_preset_processor, style_preprocessor=style_preprocessor)
+    content, footnotes, top_level_headers = html_processor.process_html(
+            html_path=html_converter.html_path, book_id=html_converter.book_id)
+
+    json_converter = LibreHtml2JsonConverter(
+            content, footnotes, top_level_headers, book_logger)
+    content_dict = json_converter.convert_to_dict()
+
+    with codecs.open(docx_file_path.replace("docx", "json"), "w", encoding="utf-8") as f:
+        json.dump(content_dict, f, ensure_ascii=False)