forked from LiveCarta/BookConverter
epub converter: refactor BookStatusWrapper
This commit is contained in:
@@ -9,7 +9,7 @@ from threading import Event
|
|||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from livecarta_config import BookLogger, BookApiWrapper, LawCartaConfig
|
from livecarta_config import BookLogger, BookStatusWrapper, LawCartaConfig
|
||||||
from html_preprocessor import HTMLPreprocessor
|
from html_preprocessor import HTMLPreprocessor
|
||||||
from json_postprocessor import JSONConverter
|
from json_postprocessor import JSONConverter
|
||||||
|
|
||||||
@@ -30,7 +30,7 @@ class Book:
|
|||||||
logging_format=logging_format,
|
logging_format=logging_format,
|
||||||
book_id=book_id,
|
book_id=book_id,
|
||||||
main_logger=main_logger)
|
main_logger=main_logger)
|
||||||
self.book_api_wrapper = BookApiWrapper(access, self.logger_object, book_id)
|
self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id)
|
||||||
|
|
||||||
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
|
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
|
||||||
"Length of headers doesn't match allowed levels."
|
"Length of headers doesn't match allowed levels."
|
||||||
@@ -93,7 +93,7 @@ class Book:
|
|||||||
f.close()
|
f.close()
|
||||||
except FileNotFoundError as error:
|
except FileNotFoundError as error:
|
||||||
self.logger_object.log('Invalid path to input data.', logging.ERROR)
|
self.logger_object.log('Invalid path to input data.', logging.ERROR)
|
||||||
self.book_api_wrapper.set_error_status()
|
self.status_wrapper.set_error()
|
||||||
raise error
|
raise error
|
||||||
|
|
||||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
@@ -124,7 +124,7 @@ class Book:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log("Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR)
|
self.logger_object.log("Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.logger_object.log_error_to_main_log()
|
||||||
self.book_api_wrapper.set_error_status()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
out_dir_path = os.path.join(out_dir_path, f'{self.book_id}.html')
|
out_dir_path = os.path.join(out_dir_path, f'{self.book_id}.html')
|
||||||
@@ -136,7 +136,7 @@ class Book:
|
|||||||
except FileNotFoundError as exc:
|
except FileNotFoundError as exc:
|
||||||
self.logger_object.log("Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR)
|
self.logger_object.log("Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.logger_object.log_error_to_main_log()
|
||||||
self.book_api_wrapper.set_error_status()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
self.logger_object.log('End of conversion from .docx to .html.')
|
self.logger_object.log('End of conversion from .docx to .html.')
|
||||||
@@ -165,7 +165,7 @@ class Book:
|
|||||||
self.logger_object.log('There is no html to process.'
|
self.logger_object.log('There is no html to process.'
|
||||||
'Conversion went wrong or you specified wrong paths.', logging.ERROR)
|
'Conversion went wrong or you specified wrong paths.', logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.logger_object.log_error_to_main_log()
|
||||||
self.book_api_wrapper.set_error_status()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
html_soup = BeautifulSoup(html_text, features='lxml')
|
html_soup = BeautifulSoup(html_text, features='lxml')
|
||||||
@@ -194,14 +194,14 @@ class Book:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
|
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.logger_object.log_error_to_main_log()
|
||||||
self.book_api_wrapper.set_error_status()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
def convert_from_html(self):
|
def convert_from_html(self):
|
||||||
html_soup = self.read_html()
|
html_soup = self.read_html()
|
||||||
parser = HTMLPreprocessor(html_soup, self.logger_object)
|
parser = HTMLPreprocessor(html_soup, self.logger_object)
|
||||||
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
|
||||||
content_dict = json_converter.convert_to_dict()
|
content_dict = json_converter.convert_to_dict()
|
||||||
self.write_to_json(content_dict)
|
self.write_to_json(content_dict)
|
||||||
self.write_html_from_list(parser.body_tag)
|
self.write_html_from_list(parser.body_tag)
|
||||||
@@ -222,7 +222,7 @@ class Book:
|
|||||||
parser = HTMLPreprocessor(html_soup, self.logger_object)
|
parser = HTMLPreprocessor(html_soup, self.logger_object)
|
||||||
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||||
|
|
||||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
|
||||||
content_dict = json_converter.convert_to_dict()
|
content_dict = json_converter.convert_to_dict()
|
||||||
|
|
||||||
self.write_to_json(content_dict)
|
self.write_to_json(content_dict)
|
||||||
@@ -233,7 +233,7 @@ class Book:
|
|||||||
try:
|
try:
|
||||||
self.logger_object.log('Beginning of conversion from .docx to .json.')
|
self.logger_object.log('Beginning of conversion from .docx to .json.')
|
||||||
self.get_docx()
|
self.get_docx()
|
||||||
self.book_api_wrapper.set_process_status()
|
self.status_wrapper.set_processing()
|
||||||
self.convert_doc_to_html()
|
self.convert_doc_to_html()
|
||||||
self.check_output_directory()
|
self.check_output_directory()
|
||||||
|
|
||||||
@@ -244,9 +244,9 @@ class Book:
|
|||||||
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
content, footnotes, top_level_headers = parser.process_html(self.access, self.html_path, self.book_id)
|
||||||
|
|
||||||
self.logger_object.log('Beginning of processing json output.')
|
self.logger_object.log('Beginning of processing json output.')
|
||||||
self.book_api_wrapper.set_generate_status()
|
self.status_wrapper.set_generating()
|
||||||
|
|
||||||
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.book_api_wrapper)
|
json_converter = JSONConverter(content, footnotes, top_level_headers, self.logger_object, self.status_wrapper)
|
||||||
content_dict = json_converter.convert_to_dict()
|
content_dict = json_converter.convert_to_dict()
|
||||||
self.write_to_json(content_dict)
|
self.write_to_json(content_dict)
|
||||||
self.send_json_content(content_dict)
|
self.send_json_content(content_dict)
|
||||||
@@ -254,7 +254,7 @@ class Book:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
|
self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log(str(exc))
|
self.logger_object.log_error_to_main_log(str(exc))
|
||||||
self.book_api_wrapper.set_error_status()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
from livecarta_config import BookLogger, BookApiWrapper, LawCartaConfig
|
from livecarta_config import BookLogger, BookStatusWrapper, LawCartaConfig
|
||||||
from epub_postprocessor import EpubPostprocessor
|
from epub_postprocessor import EpubPostprocessor
|
||||||
|
|
||||||
|
|
||||||
@@ -21,7 +21,7 @@ class EpubBook:
|
|||||||
logging_format=logging_format,
|
logging_format=logging_format,
|
||||||
book_id=book_id,
|
book_id=book_id,
|
||||||
main_logger=main_logger)
|
main_logger=main_logger)
|
||||||
self.book_api_wrapper = BookApiWrapper(access, self.logger_object, book_id)
|
self.status_wrapper = BookStatusWrapper(access, self.logger_object, book_id)
|
||||||
|
|
||||||
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
|
assert LawCartaConfig.SUPPORTED_LEVELS == len(LawCartaConfig.SUPPORTED_HEADERS), \
|
||||||
"Length of headers doesn't match allowed levels."
|
"Length of headers doesn't match allowed levels."
|
||||||
@@ -91,7 +91,7 @@ class EpubBook:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
|
self.logger_object.log('Error has occurred while sending json content.', logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.logger_object.log_error_to_main_log()
|
||||||
self.book_api_wrapper.set_error_status()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
def test_conversion(self):
|
def test_conversion(self):
|
||||||
@@ -110,13 +110,13 @@ class EpubBook:
|
|||||||
def conversion(self):
|
def conversion(self):
|
||||||
self.logger_object.log('Beginning of conversion from .docx to .json.')
|
self.logger_object.log('Beginning of conversion from .docx to .json.')
|
||||||
self.get_epub()
|
self.get_epub()
|
||||||
self.book_api_wrapper.set_process_status()
|
self.status_wrapper.set_processing()
|
||||||
self.logger_object.log('Beginning of processing json output.')
|
self.logger_object.log('Beginning of processing json output.')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
json_converter = EpubPostprocessor(self.epub_path, access=self.access, logger=self.logger_object)
|
json_converter = EpubPostprocessor(self.epub_path, access=self.access, logger=self.logger_object)
|
||||||
content_dict = json_converter.convert_to_dict()
|
content_dict = json_converter.convert_to_dict()
|
||||||
self.book_api_wrapper.set_generate_status()
|
self.status_wrapper.set_generating()
|
||||||
self.write_to_json(content_dict)
|
self.write_to_json(content_dict)
|
||||||
self.send_json_content(content_dict)
|
self.send_json_content(content_dict)
|
||||||
self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.')
|
self.logger_object.log(f'End of the conversion to LawCarta format. Check {self.output_path}.')
|
||||||
@@ -124,5 +124,5 @@ class EpubBook:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
|
self.logger_object.log('Error has occurred while conversion.', logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log(str(exc))
|
self.logger_object.log_error_to_main_log(str(exc))
|
||||||
self.book_api_wrapper.set_error_status()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|||||||
@@ -6,16 +6,16 @@ from shutil import copyfile
|
|||||||
|
|
||||||
from bs4 import BeautifulSoup, NavigableString
|
from bs4 import BeautifulSoup, NavigableString
|
||||||
|
|
||||||
from livecarta_config import LawCartaConfig, BookLogger, BookApiWrapper
|
from livecarta_config import LawCartaConfig, BookLogger, BookStatusWrapper
|
||||||
|
|
||||||
|
|
||||||
class HTMLPreprocessor:
|
class HTMLPreprocessor:
|
||||||
|
|
||||||
def __init__(self, html_soup, logger_object, book_api_wrapper=None):
|
def __init__(self, html_soup, logger_object, status_wrapper=None):
|
||||||
self.body_tag = html_soup.body
|
self.body_tag = html_soup.body
|
||||||
self.html_soup = html_soup
|
self.html_soup = html_soup
|
||||||
self.logger_object: BookLogger = logger_object
|
self.logger_object: BookLogger = logger_object
|
||||||
self.book_api_wrapper: BookApiWrapper = book_api_wrapper
|
self.status_wrapper: BookStatusWrapper = status_wrapper
|
||||||
self.top_level_headers = None
|
self.top_level_headers = None
|
||||||
self.content = list()
|
self.content = list()
|
||||||
|
|
||||||
@@ -684,8 +684,8 @@ class HTMLPreprocessor:
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log('Error has occurred while processing html.', logging.ERROR)
|
self.logger_object.log('Error has occurred while processing html.', logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.logger_object.log_error_to_main_log()
|
||||||
if self.book_api_wrapper:
|
if self.status_wrapper:
|
||||||
self.book_api_wrapper.set_error_status()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
self.logger_object.log('End of processing .html file.')
|
self.logger_object.log('End of processing .html file.')
|
||||||
|
|||||||
@@ -128,7 +128,7 @@ class JSONConverter:
|
|||||||
self.logger_object.log('Error has occurred while making json structure.', logging.ERROR)
|
self.logger_object.log('Error has occurred while making json structure.', logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.logger_object.log_error_to_main_log()
|
||||||
if self.book_api_status:
|
if self.book_api_status:
|
||||||
self.book_api_status.set_error_status()
|
self.book_api_status.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
# Add is_introduction field to json structure
|
# Add is_introduction field to json structure
|
||||||
|
|||||||
Reference in New Issue
Block a user