forked from LiveCarta/BookConverter
Add convert_local
This commit is contained in:
@@ -13,8 +13,10 @@ import os
|
||||
import pathlib
|
||||
from abc import abstractmethod, ABCMeta
|
||||
|
||||
from livecarta_config import LawCartaConfig
|
||||
from src.util.helpers import BookLogger, BookStatusWrapper
|
||||
from livecarta_config import LawCartaConfig
|
||||
from util.helpers import BookLogger, BookStatusWrapper
|
||||
|
||||
|
||||
|
||||
|
||||
class BookSolver:
|
||||
|
||||
@@ -8,7 +8,7 @@ from threading import Event
|
||||
from bs4 import BeautifulSoup
|
||||
from html_docx_preprocessor import HTMLDocxPreprocessor
|
||||
from libra_html2json_converter import LibraHTML2JSONConverter
|
||||
from solver import BookSolver
|
||||
from book_solver import BookSolver
|
||||
|
||||
|
||||
class DocxBook(BookSolver):
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from epub_converter import EpubConverter
|
||||
from solver import BookSolver
|
||||
from book_solver import BookSolver
|
||||
|
||||
|
||||
class EpubBook(BookSolver):
|
||||
|
||||
139
src/solver.py
139
src/solver.py
@@ -1,139 +0,0 @@
|
||||
""" This is Main Abstract class for solving a task of a book conversion
|
||||
|
||||
Having an id of coming book, gets book from server, runs conversion.
|
||||
In parallel it updates status of a book conversion on admin panel.
|
||||
Finally sends result to server.
|
||||
Result is a json, JSON schema in book_schema.json
|
||||
"""
|
||||
|
||||
import codecs
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pathlib
|
||||
from abc import abstractmethod, ABCMeta
|
||||
|
||||
from livecarta_config import LawCartaConfig
|
||||
from util.helpers import BookLogger, BookStatusWrapper
|
||||
|
||||
|
||||
class BookSolver:
|
||||
__metaclass__ = ABCMeta
|
||||
|
||||
def __init__(self, book_id=0, access=None, main_logger=None, logging_format='%(asctime)s - %(levelname)s - %(message)s'):
|
||||
self.book_type = None
|
||||
self.book_id = book_id
|
||||
self.access = access
|
||||
self.file_path = None # path to book file, appears after downloading from server
|
||||
self.output_path = None # path to json file
|
||||
self.logger_object = BookLogger(name=f'{__name__}_{self.book_id}',
|
||||
logging_format=logging_format,
|
||||
book_id=book_id,
|
||||
main_logger=main_logger)
|
||||
self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id)
|
||||
|
||||
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
|
||||
"Length of headers doesn't match allowed levels."
|
||||
|
||||
def save_book_file(self, content):
|
||||
"""
|
||||
Save binary content of file to .docx/.epub.
|
||||
:param content: binary content of the file.
|
||||
"""
|
||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
folder_path = os.path.join(folder_path, f'{self.book_type}/{self.book_id}')
|
||||
pathlib.Path(folder_path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
file_path = os.path.join(folder_path, f'{self.book_id}.{self.book_type}')
|
||||
try:
|
||||
with open(file_path, 'wb+') as file:
|
||||
file.write(content)
|
||||
self.logger_object.log(f'File was saved to folder: {folder_path}.')
|
||||
except Exception as exc:
|
||||
self.logger_object.log(f"Error in writing {self.book_type} file.", logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
raise exc
|
||||
|
||||
self.file_path = pathlib.Path(file_path)
|
||||
|
||||
def get_book_file(self):
|
||||
"""
|
||||
Method for getting and saving book from server.
|
||||
"""
|
||||
try:
|
||||
self.logger_object.log(f'Start receiving file from server. URL: {self.access.url}/doc-convert/{self.book_id}/file')
|
||||
content = self.access.get_doc(self.book_id)
|
||||
self.logger_object.log('File was received from server.')
|
||||
self.save_book_file(content)
|
||||
except FileNotFoundError as f_err:
|
||||
self.logger_object.log("Can't get docx from server.", logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
raise f_err
|
||||
except Exception as exc:
|
||||
raise exc
|
||||
|
||||
def check_output_directory(self):
|
||||
if self.output_path is None:
|
||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
output_path = os.path.join(folder_path, f'json/{self.book_id}.json')
|
||||
self.output_path = output_path
|
||||
|
||||
self.output_path = pathlib.Path(self.output_path)
|
||||
self.logger_object.log(f'Output file path: {self.output_path}')
|
||||
|
||||
pathlib.Path(self.output_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
self.output_path.touch(exist_ok=True)
|
||||
|
||||
def write_to_json(self, content: dict):
|
||||
self.check_output_directory()
|
||||
try:
|
||||
with codecs.open(self.output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(content, f, ensure_ascii=False)
|
||||
self.logger_object.log(f'Data has been saved to .json file: {self.output_path}')
|
||||
except Exception as exc:
|
||||
self.logger_object.log('Error has occurred while writing json file.' + str(exc), logging.ERROR)
|
||||
|
||||
def send_json_content_to_server(self, content: dict):
|
||||
try:
|
||||
self.access.send_book(self.book_id, content)
|
||||
self.logger_object.log(f'JSON data has been sent to server.')
|
||||
except Exception as exc:
|
||||
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
self.status_wrapper.set_error()
|
||||
raise exc
|
||||
|
||||
@abstractmethod
|
||||
def get_converted_book(self):
|
||||
self.logger_object.log('Beginning of processing json output.')
|
||||
self.status_wrapper.set_generating()
|
||||
return {}
|
||||
|
||||
def test_conversion(self):
|
||||
self.logger_object.log('Beginning of the test.')
|
||||
|
||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
folder_path = os.path.join(folder_path, f'{self.book_type}')
|
||||
file_path = os.path.join(folder_path, f'{self.book_id}.{self.book_type}')
|
||||
self.file_path = pathlib.Path(file_path)
|
||||
self.logger_object.log(f'Test on {self.book_type}: {self.file_path}')
|
||||
content_dict = self.get_converted_book()
|
||||
self.write_to_json(content_dict)
|
||||
self.logger_object.log('End of the test.')
|
||||
|
||||
def conversion(self):
|
||||
try:
|
||||
self.logger_object.log(f'Beginning of conversion from .{self.book_type} to .json.')
|
||||
self.get_book_file()
|
||||
self.status_wrapper.set_processing()
|
||||
content_dict = self.get_converted_book()
|
||||
self.write_to_json(content_dict)
|
||||
self.send_json_content_to_server(content_dict)
|
||||
self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.')
|
||||
|
||||
except Exception as exc:
|
||||
self.status_wrapper.set_error()
|
||||
self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log(str(exc))
|
||||
raise exc
|
||||
|
||||
Reference in New Issue
Block a user