diff --git a/DockerfileLocal b/DockerfileLocal new file mode 100644 index 0000000..40d1e39 --- /dev/null +++ b/DockerfileLocal @@ -0,0 +1,18 @@ +FROM python:3.11.0 + +RUN apt-get update && \ + apt-get install -y software-properties-common + +RUN add-apt-repository -r ppa:libreoffice/ppa +RUN apt-get update +RUN apt-get -y install libreoffice + +COPY requirements.txt /app/ +RUN pip install -r /app/requirements.txt +RUN pip install debugpy + +WORKDIR /app/ + +CMD tail -f > /dev/null + +#python3 -m debugpy --listen 0.0.0.0:5678 --wait-for-client test.py \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..cb9f9d1 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,10 @@ +version: "3.8" + +services: + converter: + build: + dockerfile: DockerfileLocal + ports: + - '5678:5678' + volumes: + - ./:/app diff --git a/test.py b/test.py new file mode 100644 index 0000000..55e341e --- /dev/null +++ b/test.py @@ -0,0 +1,31 @@ +import json +import codecs +import logging + +from src.book_solver import BookSolver +from src.util.helpers import BookLogger +from src.html_presets_processor import HtmlPresetsProcessor +from src.style_reader import StyleReader +from src.epub_converter.html_epub_processor import HtmlEpubProcessor +from src.epub_converter.epub_converter import EpubConverter + + +if __name__ == "__main__": + #epub_file_path = f"/app/books/epub/9781284289473.epub" + epub_file_path = f"/app/books/epub/9781284296693.epub" + print("Start") + logger_object = BookLogger(name="epub") + logger_object.configure_book_logger(book_id=epub_file_path.split("/")[-1]) + + html_preset_processor = HtmlPresetsProcessor( + logger=logger_object, preset_path="/app/preset/default_preset.json") + style_preprocessor = StyleReader() + html_processor = HtmlEpubProcessor(logger=logger_object, + html_preprocessor=html_preset_processor) + + json_converter = EpubConverter(epub_file_path, logger=logger_object, + style_processor=style_preprocessor, html_processor=html_processor) + content_dict = json_converter.convert_to_dict() + print(epub_file_path.replace("epub", "json")) + with codecs.open(epub_file_path.replace("epub", "json"), "w", encoding="utf-8") as f_json: + json.dump(content_dict, f_json, ensure_ascii=False) diff --git a/test_docx.py b/test_docx.py new file mode 100644 index 0000000..43438ed --- /dev/null +++ b/test_docx.py @@ -0,0 +1,39 @@ +import json +import codecs +import logging +from threading import Event + +from src.book_solver import BookSolver +from src.util.helpers import BookLogger +from src.html_presets_processor import HtmlPresetsProcessor +from src.style_reader import StyleReader +from src.docx_converter.docx2libre_html import Docx2LibreHtml +from src.docx_converter.html_docx_processor import HtmlDocxProcessor +from src.docx_converter.libre_html2json_converter import LibreHtml2JsonConverter + + +if __name__ == "__main__": + docx_file_path = f"/app/books/docx/Ch_1_ready.docx" + + book_logger = BookLogger(name="epub") + book_logger.configure_book_logger(book_id=docx_file_path.split("/")[-1]) + + locker = Event() + locker.set() + + html_converter = Docx2LibreHtml(file_path=docx_file_path, + logger=book_logger, libre_locker=locker) + html_preset_processor = HtmlPresetsProcessor( + logger=book_logger, preset_path="/app/preset/default_preset.json") + style_preprocessor = StyleReader() + html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=book_logger, + html_preprocessor=html_preset_processor, style_preprocessor=style_preprocessor) + content, footnotes, top_level_headers = html_processor.process_html( + html_path=html_converter.html_path, book_id=html_converter.book_id) + + json_converter = LibreHtml2JsonConverter( + content, footnotes, top_level_headers, book_logger) + content_dict = json_converter.convert_to_dict() + + with codecs.open(docx_file_path.replace("docx", "json"), "w", encoding="utf-8") as f: + json.dump(content_dict, f, ensure_ascii=False)