forked from LiveCarta/BookConverter
epub converter: refactor BookStatusWrapper
This commit is contained in:
@@ -9,7 +9,7 @@ from threading import Event
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from livecarta_config import BookLogger, BookApiWrapper, LawCartaConfig
|
||||
from livecarta_config import BookLogger, BookStatusWrapper, LawCartaConfig
|
||||
from html_preprocessor import HTMLPreprocessor
|
||||
from json_postprocessor import JSONConverter
|
||||
|
||||
@@ -30,7 +30,7 @@ class Book:
|
||||
logging_format=logging_format,
|
||||
book_id=book_id,
|
||||
main_logger=main_logger)
|
||||
self.book_api_wrapper = BookApiWrapper(access, self.logger_object, book_id)
|
||||
self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id)
|
||||
|
||||
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
|
||||
"Length of headers doesn't match allowed levels."
|
||||
@@ -93,7 +93,7 @@ class Book:
|
||||
f.close()
|
||||
except FileNotFoundError as error:
|
||||
self.logger_object.log('Invalid path to input data.', logging.ERROR)
|
||||
self.book_api_wrapper.set_error_status()
|
||||
self.status_wrapper.set_error()
|
||||
raise error
|
||||
|
||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||
@@ -124,7 +124,7 @@ class Book:
|
||||
except Exception as exc:
|
||||
self.logger_object.log("Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
self.book_api_wrapper.set_error_status()
|
||||
self.status_wrapper.set_error()
|
||||
raise exc
|
||||
|
||||
out_dir_path = os.path.join(out_dir_path, f'{self.book_id}.html')
|
||||
@@ -136,7 +136,7 @@ class Book:
|
||||
except FileNotFoundError as exc:
|
||||
self.logger_object.log("Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
self.book_api_wrapper.set_error_status()
|
||||
self.status_wrapper.set_error()
|
||||
raise exc
|
||||
|
||||
self.logger_object.log('End of conversion from .docx to .html.')
|
||||
@@ -165,7 +165,7 @@ class Book:
|
||||
self.logger_object.log('There is no html to process.'
|
||||
'Conversion went wrong or you specified wrong paths.', logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
self.book_api_wrapper.set_error_status()
|
||||
self.status_wrapper.set_error()
|
||||
raise exc
|
||||
|
||||
html_soup = BeautifulSoup(html_text, features='lxml')
|
||||
@@ -194,14 +194,14 @@ class Book:
|
||||
except Exception as exc:
|
||||
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
self.book_api_wrapper.set_error_status()
|
||||
self.status_wrapper.set_error()
|
||||
raise exc
|
||||
|
||||
def convert_from_html(self):
|
||||
html_soup = self.read_html()
|
||||
parser = HTMLPreprocessor(html_soup, self.logger_object)
|
||||
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
self.write_to_json(content_dict)
|
||||
self.write_html_from_list(parser.body_tag)
|
||||
@@ -222,7 +222,7 @@ class Book:
|
||||
parser = HTMLPreprocessor(html_soup, self.logger_object)
|
||||
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||
|
||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
|
||||
self.write_to_json(content_dict)
|
||||
@@ -233,7 +233,7 @@ class Book:
|
||||
try:
|
||||
self.logger_object.log('Beginning of conversion from .docx to .json.')
|
||||
self.get_docx()
|
||||
self.book_api_wrapper.set_process_status()
|
||||
self.status_wrapper.set_processing()
|
||||
self.convert_doc_to_html()
|
||||
self.check_output_directory()
|
||||
|
||||
@@ -244,9 +244,9 @@ class Book:
|
||||
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||
|
||||
self.logger_object.log('Beginning of processing json output.')
|
||||
self.book_api_wrapper.set_generate_status()
|
||||
self.status_wrapper.set_generating()
|
||||
|
||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
self.write_to_json(content_dict)
|
||||
self.send_json_content(content_dict)
|
||||
@@ -254,7 +254,7 @@ class Book:
|
||||
except Exception as exc:
|
||||
self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log(str(exc))
|
||||
self.book_api_wrapper.set_error_status()
|
||||
self.status_wrapper.set_error()
|
||||
raise exc
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import logging
|
||||
import os
|
||||
import pathlib
|
||||
|
||||
from livecarta_config import BookLogger, BookApiWrapper, LawCartaConfig
|
||||
from livecarta_config import BookLogger, BookStatusWrapper, LawCartaConfig
|
||||
from epub_postprocessor import EpubPostprocessor
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ class EpubBook:
|
||||
logging_format=logging_format,
|
||||
book_id=book_id,
|
||||
main_logger=main_logger)
|
||||
self.book_api_wrapper = BookApiWrapper(access, self.logger_object, book_id)
|
||||
self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id)
|
||||
|
||||
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
|
||||
"Length of headers doesn't match allowed levels."
|
||||
@@ -91,7 +91,7 @@ class EpubBook:
|
||||
except Exception as exc:
|
||||
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
self.book_api_wrapper.set_error_status()
|
||||
self.status_wrapper.set_error()
|
||||
raise exc
|
||||
|
||||
def test_conversion(self):
|
||||
@@ -110,13 +110,13 @@ class EpubBook:
|
||||
def conversion(self):
|
||||
self.logger_object.log('Beginning of conversion from .docx to .json.')
|
||||
self.get_epub()
|
||||
self.book_api_wrapper.set_process_status()
|
||||
self.status_wrapper.set_processing()
|
||||
self.logger_object.log('Beginning of processing json output.')
|
||||
|
||||
try:
|
||||
json_converter = EpubPostprocessor(self.epub_path, access=self.access, logger=self.logger_object)
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
self.book_api_wrapper.set_generate_status()
|
||||
self.status_wrapper.set_generating()
|
||||
self.write_to_json(content_dict)
|
||||
self.send_json_content(content_dict)
|
||||
self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.')
|
||||
@@ -124,5 +124,5 @@ class EpubBook:
|
||||
except Exception as exc:
|
||||
self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log(str(exc))
|
||||
self.book_api_wrapper.set_error_status()
|
||||
self.status_wrapper.set_error()
|
||||
raise exc
|
||||
|
||||
@@ -6,16 +6,16 @@ from shutil import copyfile
|
||||
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
|
||||
from livecarta_config import LawCartaConfig, BookLogger, BookApiWrapper
|
||||
from livecarta_config import LawCartaConfig, BookLogger, BookStatusWrapper
|
||||
|
||||
|
||||
class HTMLPreprocessor:
|
||||
|
||||
def __init__(self, html_soup, logger_object, book_api_wrapper=None):
|
||||
def __init__(self, html_soup, logger_object, status_wrapper=None):
|
||||
self.body_tag = html_soup.body
|
||||
self.html_soup = html_soup
|
||||
self.logger_object: BookLogger = logger_object
|
||||
self.book_api_wrapper: BookApiWrapper = book_api_wrapper
|
||||
self.status_wrapper: BookStatusWrapper = status_wrapper
|
||||
self.top_level_headers = None
|
||||
self.content = list()
|
||||
|
||||
@@ -684,8 +684,8 @@ class HTMLPreprocessor:
|
||||
except Exception as exc:
|
||||
self.logger_object.log('Error has occurred while processing html.', logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
if self.book_api_wrapper:
|
||||
self.book_api_wrapper.set_error_status()
|
||||
if self.status_wrapper:
|
||||
self.status_wrapper.set_error()
|
||||
raise exc
|
||||
|
||||
self.logger_object.log('End of processing .html file.')
|
||||
|
||||
@@ -128,7 +128,7 @@ class JSONConverter:
|
||||
self.logger_object.log('Error has occurred while making json structure.', logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
if self.book_api_status:
|
||||
self.book_api_status.set_error_status()
|
||||
self.book_api_status.set_error()
|
||||
raise exc
|
||||
|
||||
# Add is_introduction field to json structure
|
||||
|
||||
Reference in New Issue
Block a user