epub converter: refactor BookStatusWrapper

This commit is contained in:
shirshasa
2021-08-17 19:10:55 +03:00
parent 4fb3aee2b0
commit 8714ff829c
4 changed files with 25 additions and 25 deletions

View File

@@ -9,7 +9,7 @@ from threading import Event
from bs4 import BeautifulSoup
from livecarta_config import BookLogger, BookApiWrapper, LawCartaConfig
from livecarta_config import BookLogger, BookStatusWrapper, LawCartaConfig
from html_preprocessor import HTMLPreprocessor
from json_postprocessor import JSONConverter
@@ -30,7 +30,7 @@ class Book:
logging_format=logging_format,
book_id=book_id,
main_logger=main_logger)
self.book_api_wrapper = BookApiWrapper(access, self.logger_object, book_id)
self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id)
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
"Length of headers doesn't match allowed levels."
@@ -93,7 +93,7 @@ class Book:
f.close()
except FileNotFoundError as error:
self.logger_object.log('Invalid path to input data.', logging.ERROR)
self.book_api_wrapper.set_error_status()
self.status_wrapper.set_error()
raise error
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -124,7 +124,7 @@ class Book:
except Exception as exc:
self.logger_object.log("Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_api_wrapper.set_error_status()
self.status_wrapper.set_error()
raise exc
out_dir_path = os.path.join(out_dir_path, f'{self.book_id}.html')
@@ -136,7 +136,7 @@ class Book:
except FileNotFoundError as exc:
self.logger_object.log("Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_api_wrapper.set_error_status()
self.status_wrapper.set_error()
raise exc
self.logger_object.log('End of conversion from .docx to .html.')
@@ -165,7 +165,7 @@ class Book:
self.logger_object.log('There is no html to process.'
'Conversion went wrong or you specified wrong paths.', logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_api_wrapper.set_error_status()
self.status_wrapper.set_error()
raise exc
html_soup = BeautifulSoup(html_text, features='lxml')
@@ -194,14 +194,14 @@ class Book:
except Exception as exc:
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_api_wrapper.set_error_status()
self.status_wrapper.set_error()
raise exc
def convert_from_html(self):
html_soup = self.read_html()
parser = HTMLPreprocessor(html_soup, self.logger_object)
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
content_dict = json_converter.convert_to_dict()
self.write_to_json(content_dict)
self.write_html_from_list(parser.body_tag)
@@ -222,7 +222,7 @@ class Book:
parser = HTMLPreprocessor(html_soup, self.logger_object)
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
content_dict = json_converter.convert_to_dict()
self.write_to_json(content_dict)
@@ -233,7 +233,7 @@ class Book:
try:
self.logger_object.log('Beginning of conversion from .docx to .json.')
self.get_docx()
self.book_api_wrapper.set_process_status()
self.status_wrapper.set_processing()
self.convert_doc_to_html()
self.check_output_directory()
@@ -244,9 +244,9 @@ class Book:
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
self.logger_object.log('Beginning of processing json output.')
self.book_api_wrapper.set_generate_status()
self.status_wrapper.set_generating()
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
content_dict = json_converter.convert_to_dict()
self.write_to_json(content_dict)
self.send_json_content(content_dict)
@@ -254,7 +254,7 @@ class Book:
except Exception as exc:
self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
self.logger_object.log_error_to_main_log(str(exc))
self.book_api_wrapper.set_error_status()
self.status_wrapper.set_error()
raise exc

View File

@@ -4,7 +4,7 @@ import logging
import os
import pathlib
from livecarta_config import BookLogger, BookApiWrapper, LawCartaConfig
from livecarta_config import BookLogger, BookStatusWrapper, LawCartaConfig
from epub_postprocessor import EpubPostprocessor
@@ -21,7 +21,7 @@ class EpubBook:
logging_format=logging_format,
book_id=book_id,
main_logger=main_logger)
self.book_api_wrapper = BookApiWrapper(access, self.logger_object, book_id)
self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id)
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
"Length of headers doesn't match allowed levels."
@@ -91,7 +91,7 @@ class EpubBook:
except Exception as exc:
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_api_wrapper.set_error_status()
self.status_wrapper.set_error()
raise exc
def test_conversion(self):
@@ -110,13 +110,13 @@ class EpubBook:
def conversion(self):
self.logger_object.log('Beginning of conversion from .docx to .json.')
self.get_epub()
self.book_api_wrapper.set_process_status()
self.status_wrapper.set_processing()
self.logger_object.log('Beginning of processing json output.')
try:
json_converter = EpubPostprocessor(self.epub_path, access=self.access, logger=self.logger_object)
content_dict = json_converter.convert_to_dict()
self.book_api_wrapper.set_generate_status()
self.status_wrapper.set_generating()
self.write_to_json(content_dict)
self.send_json_content(content_dict)
self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.')
@@ -124,5 +124,5 @@ class EpubBook:
except Exception as exc:
self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
self.logger_object.log_error_to_main_log(str(exc))
self.book_api_wrapper.set_error_status()
self.status_wrapper.set_error()
raise exc

View File

@@ -6,16 +6,16 @@ from shutil import copyfile
from bs4 import BeautifulSoup, NavigableString
from livecarta_config import LawCartaConfig, BookLogger, BookApiWrapper
from livecarta_config import LawCartaConfig, BookLogger, BookStatusWrapper
class HTMLPreprocessor:
def __init__(self, html_soup, logger_object, book_api_wrapper=None):
def __init__(self, html_soup, logger_object, status_wrapper=None):
self.body_tag = html_soup.body
self.html_soup = html_soup
self.logger_object: BookLogger = logger_object
self.book_api_wrapper: BookApiWrapper = book_api_wrapper
self.status_wrapper: BookStatusWrapper = status_wrapper
self.top_level_headers = None
self.content = list()
@@ -684,8 +684,8 @@ class HTMLPreprocessor:
except Exception as exc:
self.logger_object.log('Error has occurred while processing html.', logging.ERROR)
self.logger_object.log_error_to_main_log()
if self.book_api_wrapper:
self.book_api_wrapper.set_error_status()
if self.status_wrapper:
self.status_wrapper.set_error()
raise exc
self.logger_object.log('End of processing .html file.')

View File

@@ -128,7 +128,7 @@ class JSONConverter:
self.logger_object.log('Error has occurred while making json structure.', logging.ERROR)
self.logger_object.log_error_to_main_log()
if self.book_api_status:
self.book_api_status.set_error_status()
self.book_api_status.set_error()
raise exc
# Add is_introduction field to json structure