epub converter: refactor BookStatusWrapper

This commit is contained in:
shirshasa
2021-08-17 19:10:55 +03:00
parent 4fb3aee2b0
commit 8714ff829c
4 changed files with 25 additions and 25 deletions

View File

@@ -9,7 +9,7 @@ from threading import Event
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from livecarta_config import BookLogger, BookApiWrapper, LawCartaConfig from livecarta_config import BookLogger, BookStatusWrapper, LawCartaConfig
from html_preprocessor import HTMLPreprocessor from html_preprocessor import HTMLPreprocessor
from json_postprocessor import JSONConverter from json_postprocessor import JSONConverter
@@ -30,7 +30,7 @@ class Book:
logging_format=logging_format, logging_format=logging_format,
book_id=book_id, book_id=book_id,
main_logger=main_logger) main_logger=main_logger)
self.book_api_wrapper = BookApiWrapper(access, self.logger_object, book_id) self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id)
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \ assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
"Length of headers doesn't match allowed levels." "Length of headers doesn't match allowed levels."
@@ -93,7 +93,7 @@ class Book:
f.close() f.close()
except FileNotFoundError as error: except FileNotFoundError as error:
self.logger_object.log('Invalid path to input data.', logging.ERROR) self.logger_object.log('Invalid path to input data.', logging.ERROR)
self.book_api_wrapper.set_error_status() self.status_wrapper.set_error()
raise error raise error
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -124,7 +124,7 @@ class Book:
except Exception as exc: except Exception as exc:
self.logger_object.log("Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR) self.logger_object.log("Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR)
self.logger_object.log_error_to_main_log() self.logger_object.log_error_to_main_log()
self.book_api_wrapper.set_error_status() self.status_wrapper.set_error()
raise exc raise exc
out_dir_path = os.path.join(out_dir_path, f'{self.book_id}.html') out_dir_path = os.path.join(out_dir_path, f'{self.book_id}.html')
@@ -136,7 +136,7 @@ class Book:
except FileNotFoundError as exc: except FileNotFoundError as exc:
self.logger_object.log("Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR) self.logger_object.log("Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR)
self.logger_object.log_error_to_main_log() self.logger_object.log_error_to_main_log()
self.book_api_wrapper.set_error_status() self.status_wrapper.set_error()
raise exc raise exc
self.logger_object.log('End of conversion from .docx to .html.') self.logger_object.log('End of conversion from .docx to .html.')
@@ -165,7 +165,7 @@ class Book:
self.logger_object.log('There is no html to process.' self.logger_object.log('There is no html to process.'
'Conversion went wrong or you specified wrong paths.', logging.ERROR) 'Conversion went wrong or you specified wrong paths.', logging.ERROR)
self.logger_object.log_error_to_main_log() self.logger_object.log_error_to_main_log()
self.book_api_wrapper.set_error_status() self.status_wrapper.set_error()
raise exc raise exc
html_soup = BeautifulSoup(html_text, features='lxml') html_soup = BeautifulSoup(html_text, features='lxml')
@@ -194,14 +194,14 @@ class Book:
except Exception as exc: except Exception as exc:
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR) self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
self.logger_object.log_error_to_main_log() self.logger_object.log_error_to_main_log()
self.book_api_wrapper.set_error_status() self.status_wrapper.set_error()
raise exc raise exc
def convert_from_html(self): def convert_from_html(self):
html_soup = self.read_html() html_soup = self.read_html()
parser = HTMLPreprocessor(html_soup, self.logger_object) parser = HTMLPreprocessor(html_soup, self.logger_object)
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id) content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper) json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
content_dict = json_converter.convert_to_dict() content_dict = json_converter.convert_to_dict()
self.write_to_json(content_dict) self.write_to_json(content_dict)
self.write_html_from_list(parser.body_tag) self.write_html_from_list(parser.body_tag)
@@ -222,7 +222,7 @@ class Book:
parser = HTMLPreprocessor(html_soup, self.logger_object) parser = HTMLPreprocessor(html_soup, self.logger_object)
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id) content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper) json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
content_dict = json_converter.convert_to_dict() content_dict = json_converter.convert_to_dict()
self.write_to_json(content_dict) self.write_to_json(content_dict)
@@ -233,7 +233,7 @@ class Book:
try: try:
self.logger_object.log('Beginning of conversion from .docx to .json.') self.logger_object.log('Beginning of conversion from .docx to .json.')
self.get_docx() self.get_docx()
self.book_api_wrapper.set_process_status() self.status_wrapper.set_processing()
self.convert_doc_to_html() self.convert_doc_to_html()
self.check_output_directory() self.check_output_directory()
@@ -244,9 +244,9 @@ class Book:
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id) content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
self.logger_object.log('Beginning of processing json output.') self.logger_object.log('Beginning of processing json output.')
self.book_api_wrapper.set_generate_status() self.status_wrapper.set_generating()
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper) json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
content_dict = json_converter.convert_to_dict() content_dict = json_converter.convert_to_dict()
self.write_to_json(content_dict) self.write_to_json(content_dict)
self.send_json_content(content_dict) self.send_json_content(content_dict)
@@ -254,7 +254,7 @@ class Book:
except Exception as exc: except Exception as exc:
self.logger_object.log('Error has occurred while conversion.', logging.ERROR) self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
self.logger_object.log_error_to_main_log(str(exc)) self.logger_object.log_error_to_main_log(str(exc))
self.book_api_wrapper.set_error_status() self.status_wrapper.set_error()
raise exc raise exc

View File

@@ -4,7 +4,7 @@ import logging
import os import os
import pathlib import pathlib
from livecarta_config import BookLogger, BookApiWrapper, LawCartaConfig from livecarta_config import BookLogger, BookStatusWrapper, LawCartaConfig
from epub_postprocessor import EpubPostprocessor from epub_postprocessor import EpubPostprocessor
@@ -21,7 +21,7 @@ class EpubBook:
logging_format=logging_format, logging_format=logging_format,
book_id=book_id, book_id=book_id,
main_logger=main_logger) main_logger=main_logger)
self.book_api_wrapper = BookApiWrapper(access, self.logger_object, book_id) self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id)
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \ assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
"Length of headers doesn't match allowed levels." "Length of headers doesn't match allowed levels."
@@ -91,7 +91,7 @@ class EpubBook:
except Exception as exc: except Exception as exc:
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR) self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
self.logger_object.log_error_to_main_log() self.logger_object.log_error_to_main_log()
self.book_api_wrapper.set_error_status() self.status_wrapper.set_error()
raise exc raise exc
def test_conversion(self): def test_conversion(self):
@@ -110,13 +110,13 @@ class EpubBook:
def conversion(self): def conversion(self):
self.logger_object.log('Beginning of conversion from .docx to .json.') self.logger_object.log('Beginning of conversion from .docx to .json.')
self.get_epub() self.get_epub()
self.book_api_wrapper.set_process_status() self.status_wrapper.set_processing()
self.logger_object.log('Beginning of processing json output.') self.logger_object.log('Beginning of processing json output.')
try: try:
json_converter = EpubPostprocessor(self.epub_path, access=self.access, logger=self.logger_object) json_converter = EpubPostprocessor(self.epub_path, access=self.access, logger=self.logger_object)
content_dict = json_converter.convert_to_dict() content_dict = json_converter.convert_to_dict()
self.book_api_wrapper.set_generate_status() self.status_wrapper.set_generating()
self.write_to_json(content_dict) self.write_to_json(content_dict)
self.send_json_content(content_dict) self.send_json_content(content_dict)
self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.') self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.')
@@ -124,5 +124,5 @@ class EpubBook:
except Exception as exc: except Exception as exc:
self.logger_object.log('Error has occurred while conversion.', logging.ERROR) self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
self.logger_object.log_error_to_main_log(str(exc)) self.logger_object.log_error_to_main_log(str(exc))
self.book_api_wrapper.set_error_status() self.status_wrapper.set_error()
raise exc raise exc

View File

@@ -6,16 +6,16 @@ from shutil import copyfile
from bs4 import BeautifulSoup, NavigableString from bs4 import BeautifulSoup, NavigableString
from livecarta_config import LawCartaConfig, BookLogger, BookApiWrapper from livecarta_config import LawCartaConfig, BookLogger, BookStatusWrapper
class HTMLPreprocessor: class HTMLPreprocessor:
def __init__(self, html_soup, logger_object, book_api_wrapper=None): def __init__(self, html_soup, logger_object, status_wrapper=None):
self.body_tag = html_soup.body self.body_tag = html_soup.body
self.html_soup = html_soup self.html_soup = html_soup
self.logger_object: BookLogger = logger_object self.logger_object: BookLogger = logger_object
self.book_api_wrapper: BookApiWrapper = book_api_wrapper self.status_wrapper: BookStatusWrapper = status_wrapper
self.top_level_headers = None self.top_level_headers = None
self.content = list() self.content = list()
@@ -684,8 +684,8 @@ class HTMLPreprocessor:
except Exception as exc: except Exception as exc:
self.logger_object.log('Error has occurred while processing html.', logging.ERROR) self.logger_object.log('Error has occurred while processing html.', logging.ERROR)
self.logger_object.log_error_to_main_log() self.logger_object.log_error_to_main_log()
if self.book_api_wrapper: if self.status_wrapper:
self.book_api_wrapper.set_error_status() self.status_wrapper.set_error()
raise exc raise exc
self.logger_object.log('End of processing .html file.') self.logger_object.log('End of processing .html file.')

View File

@@ -128,7 +128,7 @@ class JSONConverter:
self.logger_object.log('Error has occurred while making json structure.', logging.ERROR) self.logger_object.log('Error has occurred while making json structure.', logging.ERROR)
self.logger_object.log_error_to_main_log() self.logger_object.log_error_to_main_log()
if self.book_api_status: if self.book_api_status:
self.book_api_status.set_error_status() self.book_api_status.set_error()
raise exc raise exc
# Add is_introduction field to json structure # Add is_introduction field to json structure