Add try-except block for every part | Docx

This commit is contained in:
Kiryl
2022-08-05 12:37:20 +03:00
parent 18642ec5fd
commit 64d30b0ab8

View File

@@ -1,5 +1,6 @@
import json
import codecs
import logging
from threading import Event
from src.book_solver import BookSolver
@@ -34,21 +35,40 @@ class DocxBook(BookSolver):
"""
# 1. Converts docx to html with LibreOffice
html_converter = Docx2LibreHTML(self.book_id, self.book_path, self.access,
self.logger_object, self.libre_locker)
# todo presets
try:
html_converter = Docx2LibreHTML(self.book_id, self.book_path, self.access,
self.logger_object, self.libre_locker)
except Exception as exc:
self.logger_object.log(
"Error has occurred while converting .docx to .html.", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.status_wrapper.set_error()
raise exc
# 2. Parses and cleans html, gets list of tags, gets footnotes
parser = HTMLDocxPreprocessor(
html_converter.html_soup, self.logger_object)
bs_tags, footnotes, top_level_headers = parser.process_html(
self.access, html_converter.html_path, self.book_id)
try:
parser = HTMLDocxPreprocessor(
html_converter.html_soup, self.logger_object)
bs_tags, footnotes, top_level_headers = parser.process_html(
self.access, html_converter.html_path, self.book_id)
except Exception as exc:
self.logger_object.log(
"Error has occurred while processing .html", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.status_wrapper.set_error()
raise exc
# 3. Parses from line structure to nested structure with JSONConverter
json_converter = LibreHTML2JSONConverter(bs_tags, footnotes, top_level_headers,
self.logger_object)
content_dict = json_converter.convert_to_dict()
try:
json_converter = LibreHTML2JSONConverter(bs_tags, footnotes, top_level_headers,
self.logger_object)
content_dict = json_converter.convert_to_dict()
except Exception as exc:
self.logger_object.log(
"Error has occurred while converting .html to .json", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.status_wrapper.set_error()
raise exc
return content_dict