From aca017b55f5972fae36aef76815274966c887fd8 Mon Sep 17 00:00:00 2001 From: Jeniamakarchik Date: Fri, 14 Feb 2020 15:58:47 +0300 Subject: [PATCH] add main logger for the project - add logger to consumer.py - handle exceptions and log them --- src/book.py | 164 ++++++++++++++++++++++++++++++++---------------- src/consumer.py | 57 +++++++++++++---- 2 files changed, 156 insertions(+), 65 deletions(-) diff --git a/src/book.py b/src/book.py index ceba5e6..b67e6c8 100644 --- a/src/book.py +++ b/src/book.py @@ -30,11 +30,12 @@ class Book: } SUPPORTED_HEADERS = ["h1", "h2", "h3"] - def __init__(self, book_id=0, access=None, file_path=None, output_path=None): + def __init__(self, book_id=0, access=None, file_path=None, output_path=None, main_logger=None): self.book_id = book_id self.access = access self.file_path = file_path self.output_path = output_path + self.main_logger = main_logger self.logger = None self.html_soup = None @@ -44,7 +45,7 @@ class Book: self.images = list() self.content_dict = dict() - def configure_file_logger(self, name, attr_name='logger', filename='logs/converter_log.log', filemode='w+', + def configure_file_logger(self, name, attr_name='logger', filename='logs/book_log.log', filemode='w+', logging_level=logging.INFO, logging_format='%(asctime)s - %(message)s'): """ Method for Logger configuration. Logger will write in file. @@ -83,15 +84,31 @@ class Book: """ self.logger.log(msg=message, level=logging_level) + def log_error_to_main_log(self, message=''): + """ + Method for logging error to main log file. + """ + if self.main_logger: + if not message: + message = f'Error in book conversion. Check {self.book_id}_log.log file.' + self.main_logger.error(message) + def save_docx(self, content): """ Save binary content of file to .docx. :param content: binary content of the file. """ folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - file_path = os.path.join(folder_path, f'docx/{self.book_id}.docx') - with open(file_path, 'wb+') as file: - file.write(content) + folder_path = os.path.join(folder_path, 'docx') + + file_path = os.path.join(folder_path, f'{self.book_id}.docx') + try: + with open(file_path, 'wb+') as file: + file.write(content) + except Exception as exc: + self.log("Error in writing docx file.", logging.ERROR) + self.log_error_to_main_log() + raise exc self.file_path = pathlib.Path(file_path) @@ -103,27 +120,37 @@ class Book: content = self.access.get_doc(self.book_id) self.save_docx(content) except FileNotFoundError as ferr: - self.log('File have not found') + self.log("Can't get docx from server.", logging.ERROR) + self.log_error_to_main_log() raise ferr except Exception as exc: raise exc def set_process_status(self): try: - self.access.update_status(self.book_id, self.access.PROCESS) + if self.access: + self.access.update_status(self.book_id, self.access.PROCESS) except Exception as exc: + self.log("Can't update status of the book [PROCESS].", logging.ERROR) + self.log_error_to_main_log() raise exc def set_generate_status(self): try: - self.access.update_status(self.book_id, self.access.GENERATE) + if self.access: + self.access.update_status(self.book_id, self.access.GENERATE) except Exception as exc: + self.log("Can't update status of the book [GENERATE].", logging.ERROR) + self.log_error_to_main_log() raise exc def set_error_status(self): try: - self.access.update_status(self.book_id, self.access.ERROR) + if self.access: + self.access.update_status(self.book_id, self.access.ERROR) except Exception as exc: + self.log("Can't update status of the book [ERROR].", logging.ERROR) + self.log_error_to_main_log() raise exc def convert_doc_to_html(self): @@ -138,15 +165,21 @@ class Book: f = open(self.file_path) f.close() except FileNotFoundError as error: - self.logger.error('Invalid path to input data.') + self.log('Invalid path to input data.', logging.ERROR) self.set_error_status() raise error folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) out_dir_path = os.path.join(folder_path, f'html/{self.book_id}') - command = f'libreoffice --headless --convert-to html "{str(self.file_path)}" --outdir {out_dir_path}' - os.system(command) + try: + command = f'libreoffice --headless --convert-to html "{str(self.file_path)}" --outdir {out_dir_path}' + os.system(command) + except Exception as exc: + self.log("Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR) + self.log_error_to_main_log() + self.set_error_status() + raise exc out_dir_path = os.path.join(out_dir_path, f'{self.file_path.stem}.html') self.file_path = pathlib.Path(out_dir_path) @@ -155,7 +188,8 @@ class Book: f = open(self.file_path) f.close() except FileNotFoundError as exc: - self.logger.error('Conversion has gone wrong.') + self.log("Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR) + self.log_error_to_main_log() self.set_error_status() raise exc @@ -181,7 +215,8 @@ class Book: try: html_text = open(self.file_path, 'r', encoding='utf8').read() except FileNotFoundError as exc: - self.logger.error('There is no html to process. Conversion went wrong or you specified wrong paths.') + self.log('There is no html to process. Conversion went wrong or you specified wrong paths.', logging.ERROR) + self.log_error_to_main_log() self.set_error_status() raise exc @@ -503,36 +538,42 @@ class Book: """ Process html code to satisfy LawCarta formatting. """ - self.logger.info('Beginning of processing .html file.') + self.log('Beginning of processing .html file.') - self.clean_trash() + try: + self.clean_trash() - # process main elements of the .html doc - self._process_paragraph() - self._process_two_columns() - self._process_quotes() + # process main elements of the .html doc + self._process_paragraph() + self._process_two_columns() + self._process_quotes() - self.logger.info('Footnotes processing.') - self._process_footnotes() - self.logger.info(f'{len(self.footnotes)} footnotes have been processed.') + self.log('Footnotes processing.') + self._process_footnotes() + self.log(f'{len(self.footnotes)} footnotes have been processed.') - self.logger.info('Image processing.') - self._process_images() - self.logger.info(f'{len(self.images)} images have been processed.') + self.log('Image processing.') + self._process_images() + self.log(f'{len(self.images)} images have been processed.') - self._process_div() + self._process_div() - self.content = self.body_tag.find_all(recursive=False) + self.content = self.body_tag.find_all(recursive=False) - self._process_toc_links() - self._process_headings() + self._process_toc_links() + self._process_headings() - self.content = self.body_tag.find_all(recursive=False) + self.content = self.body_tag.find_all(recursive=False) - # delete text before table of content if exists - self.delete_content_before_toc() + # delete text before table of content if exists + self.delete_content_before_toc() + except Exception as exc: + self.log('Error has occurred while processing html.', logging.ERROR) + self.log_error_to_main_log() + self.set_error_status() + raise exc - self.logger.info('End of processing .html file.') + self.log('End of processing .html file.') @staticmethod def format_html(html_text): @@ -606,23 +647,29 @@ class Book: ind = 0 ch_num = 0 - while ind < len(self.content): - res = {} + try: + while ind < len(self.content): + res = {} - if self.content[ind].name in self.SUPPORTED_HEADERS: - res, ind = self.header_to_json(ind) - else: - chapter_title = f'Untitled chapter {ch_num}' - chapter = [] - while ind < len(self.content) and self.content[ind].name not in self.SUPPORTED_HEADERS: - if not self._is_empty_p_tag(self.content[ind]): - chapter.append(self.format_html(str(self.content[ind]))) - ind += 1 - if chapter: - res = {chapter_title: ["".join(chapter)]} - ch_num += 1 - if res: - json_strc.append(res) + if self.content[ind].name in self.SUPPORTED_HEADERS: + res, ind = self.header_to_json(ind) + else: + chapter_title = f'Untitled chapter {ch_num}' + chapter = [] + while ind < len(self.content) and self.content[ind].name not in self.SUPPORTED_HEADERS: + if not self._is_empty_p_tag(self.content[ind]): + chapter.append(self.format_html(str(self.content[ind]))) + ind += 1 + if chapter: + res = {chapter_title: ["".join(chapter)]} + ch_num += 1 + if res: + json_strc.append(res) + except Exception as exc: + self.log('Error has occurred while making json structure.', logging.ERROR) + self.log_error_to_main_log() + self.set_error_status() + raise exc self.content_dict = { "content": json_strc, @@ -630,24 +677,33 @@ class Book: } def write_json(self): - with codecs.open(self.output_path, 'w', encoding='utf-8') as f: - json.dump(self.content_dict, f, ensure_ascii=False) + try: + with codecs.open(self.output_path, 'w', encoding='utf-8') as f: + json.dump(self.content_dict, f, ensure_ascii=False) + except Exception as exc: + self.log('Error has occurred while writing json file.', logging.ERROR) + # self.log_error_to_main_log() + # self.set_error_status() + # raise exc def send_json_content(self): try: self.access.send_book(self.book_id, self.content_dict) except Exception as exc: + self.log('Error has occurred while sending json content.', logging.ERROR) + self.log_error_to_main_log() + self.set_error_status() raise exc def convert_from_html(self, logging_format): - self.configure_file_logger(__name__, logging_format=logging_format, filemode='w+') + self.configure_file_logger(f'{__name__}_{self.book_id}', logging_format=logging_format, filemode='w+') self.read_html() self.process_html() self.convert_to_json() self.write_json() def conversion(self, logging_format, filemode='w+'): - self.configure_file_logger(__name__, logging_format=logging_format, filemode=filemode) + self.configure_file_logger(f'{__name__}_{self.book_id}', logging_format=logging_format, filemode=filemode) self.log('Beginning of conversion from .docx to .json.') self.get_docx() self.set_process_status() diff --git a/src/consumer.py b/src/consumer.py index efd2f10..4c5000c 100644 --- a/src/consumer.py +++ b/src/consumer.py @@ -1,5 +1,7 @@ import json +import logging import os +import sys from functools import partial from pathlib import Path from threading import Thread, active_count @@ -10,34 +12,62 @@ from access import Access from book import Book -def convert_book(book_id, access): +def configure_file_logger(name, filename='logs/converter_log.log', filemode='w+', logging_level=logging.INFO, + logging_format='%(asctime)s - %(message)s'): + logger = logging.getLogger(name) + + folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + file_path = os.path.join(folder_path, filename) + + file_handler = logging.FileHandler(file_path, mode=filemode) + logger.addHandler(file_handler) + file_format = logging.Formatter(fmt=logging_format) + file_handler.setFormatter(file_format) + logger.setLevel(logging_level) + + return logger + + +def convert_book(book_id, access, logger): + logger.info(f'Start processing book-{book_id}.') logging_format = '%(asctime)s - %(levelname)s - %(message)s' - book = Book(book_id, access) - book.conversion(logging_format=logging_format) + try: + book = Book(book_id, access, main_logger=logger) + book.conversion(logging_format=logging_format) + except Exception as exc: + raise exc - print('Book has been proceeded.') + logger.info(f'Book-{book_id} has been proceeded.') + # print('Book has been proceeded.') -def callback(ch, method, properties, body, access): +def callback(ch, method, properties, body, access, logger): print(f'Message: {body}.') + logger.info(f'Message: {body}.') try: data = json.loads(body) params = { 'book_id': data['id'], - 'access': access + 'access': access, + 'logger': logger } thread = Thread(target=convert_book, kwargs=params) thread.start() - print(f'Active threads: {active_count()}.') + logging.log(logging.INFO, f'Active threads: {active_count()}.') + # print(f'Active threads: {active_count()}.') except Exception as exc: - print(exc) + if hasattr(exc, 'message'): + logger.error(f'{sys.exc_info()[0]}: {exc.message}') + else: + logger.error(f'{sys.exc_info()[0]}') finally: + pass # thread.join() - print('Waiting for the message...') + # print('Waiting for the message...') if __name__ == '__main__': @@ -56,15 +86,20 @@ if __name__ == '__main__': connection = pika.BlockingConnection(parameters) channel = connection.channel() + logger = configure_file_logger('consumer', logging_format='%(asctime)s - %(levelname)s - %(message)s') + try: channel.queue_declare(queue=conf_param['queue'], passive=True) except ValueError as exc: + logger.log(logging.ERROR, 'Queue is not declared.') raise exc acs = Access() channel.basic_consume(queue=conf_param['queue'], auto_ack=True, - on_message_callback=partial(callback, access=acs)) - + on_message_callback=partial(callback, access=acs, logger=logger)) + logger.info('Connection has been established.') print('Waiting for messages...') + logger.info('Waiting for messages...') + channel.start_consuming()