forked from LiveCarta/BookConverter
add converter local launch
This commit is contained in:
18
DockerfileLocal
Normal file
18
DockerfileLocal
Normal file
@@ -0,0 +1,18 @@
|
||||
FROM python:3.11.0
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y software-properties-common
|
||||
|
||||
RUN add-apt-repository -r ppa:libreoffice/ppa
|
||||
RUN apt-get update
|
||||
RUN apt-get -y install libreoffice
|
||||
|
||||
COPY requirements.txt /app/
|
||||
RUN pip install -r /app/requirements.txt
|
||||
RUN pip install debugpy
|
||||
|
||||
WORKDIR /app/
|
||||
|
||||
CMD tail -f > /dev/null
|
||||
|
||||
#python3 -m debugpy --listen 0.0.0.0:5678 --wait-for-client test.py
|
||||
10
docker-compose.yml
Normal file
10
docker-compose.yml
Normal file
@@ -0,0 +1,10 @@
|
||||
version: "3.8"
|
||||
|
||||
services:
|
||||
converter:
|
||||
build:
|
||||
dockerfile: DockerfileLocal
|
||||
ports:
|
||||
- '5678:5678'
|
||||
volumes:
|
||||
- ./:/app
|
||||
31
test.py
Normal file
31
test.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import json
|
||||
import codecs
|
||||
import logging
|
||||
|
||||
from src.book_solver import BookSolver
|
||||
from src.util.helpers import BookLogger
|
||||
from src.html_presets_processor import HtmlPresetsProcessor
|
||||
from src.style_reader import StyleReader
|
||||
from src.epub_converter.html_epub_processor import HtmlEpubProcessor
|
||||
from src.epub_converter.epub_converter import EpubConverter
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
#epub_file_path = f"/app/books/epub/9781284289473.epub"
|
||||
epub_file_path = f"/app/books/epub/9781284296693.epub"
|
||||
print("Start")
|
||||
logger_object = BookLogger(name="epub")
|
||||
logger_object.configure_book_logger(book_id=epub_file_path.split("/")[-1])
|
||||
|
||||
html_preset_processor = HtmlPresetsProcessor(
|
||||
logger=logger_object, preset_path="/app/preset/default_preset.json")
|
||||
style_preprocessor = StyleReader()
|
||||
html_processor = HtmlEpubProcessor(logger=logger_object,
|
||||
html_preprocessor=html_preset_processor)
|
||||
|
||||
json_converter = EpubConverter(epub_file_path, logger=logger_object,
|
||||
style_processor=style_preprocessor, html_processor=html_processor)
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
print(epub_file_path.replace("epub", "json"))
|
||||
with codecs.open(epub_file_path.replace("epub", "json"), "w", encoding="utf-8") as f_json:
|
||||
json.dump(content_dict, f_json, ensure_ascii=False)
|
||||
39
test_docx.py
Normal file
39
test_docx.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import json
|
||||
import codecs
|
||||
import logging
|
||||
from threading import Event
|
||||
|
||||
from src.book_solver import BookSolver
|
||||
from src.util.helpers import BookLogger
|
||||
from src.html_presets_processor import HtmlPresetsProcessor
|
||||
from src.style_reader import StyleReader
|
||||
from src.docx_converter.docx2libre_html import Docx2LibreHtml
|
||||
from src.docx_converter.html_docx_processor import HtmlDocxProcessor
|
||||
from src.docx_converter.libre_html2json_converter import LibreHtml2JsonConverter
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
docx_file_path = f"/app/books/docx/Ch_1_ready.docx"
|
||||
|
||||
book_logger = BookLogger(name="epub")
|
||||
book_logger.configure_book_logger(book_id=docx_file_path.split("/")[-1])
|
||||
|
||||
locker = Event()
|
||||
locker.set()
|
||||
|
||||
html_converter = Docx2LibreHtml(file_path=docx_file_path,
|
||||
logger=book_logger, libre_locker=locker)
|
||||
html_preset_processor = HtmlPresetsProcessor(
|
||||
logger=book_logger, preset_path="/app/preset/default_preset.json")
|
||||
style_preprocessor = StyleReader()
|
||||
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=book_logger,
|
||||
html_preprocessor=html_preset_processor, style_preprocessor=style_preprocessor)
|
||||
content, footnotes, top_level_headers = html_processor.process_html(
|
||||
html_path=html_converter.html_path, book_id=html_converter.book_id)
|
||||
|
||||
json_converter = LibreHtml2JsonConverter(
|
||||
content, footnotes, top_level_headers, book_logger)
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
|
||||
with codecs.open(docx_file_path.replace("docx", "json"), "w", encoding="utf-8") as f:
|
||||
json.dump(content_dict, f, ensure_ascii=False)
|
||||
Reference in New Issue
Block a user