add main logger for the project

- add logger to consumer.py
- handle exceptions and log them
This commit is contained in:
Jeniamakarchik
2020-02-14 15:58:47 +03:00
parent 617e21e1cb
commit aca017b55f
2 changed files with 156 additions and 65 deletions

View File

@@ -30,11 +30,12 @@ class Book:
} }
SUPPORTED_HEADERS = ["h1", "h2", "h3"] SUPPORTED_HEADERS = ["h1", "h2", "h3"]
def __init__(self, book_id=0, access=None, file_path=None, output_path=None): def __init__(self, book_id=0, access=None, file_path=None, output_path=None, main_logger=None):
self.book_id = book_id self.book_id = book_id
self.access = access self.access = access
self.file_path = file_path self.file_path = file_path
self.output_path = output_path self.output_path = output_path
self.main_logger = main_logger
self.logger = None self.logger = None
self.html_soup = None self.html_soup = None
@@ -44,7 +45,7 @@ class Book:
self.images = list() self.images = list()
self.content_dict = dict() self.content_dict = dict()
def configure_file_logger(self, name, attr_name='logger', filename='logs/converter_log.log', filemode='w+', def configure_file_logger(self, name, attr_name='logger', filename='logs/book_log.log', filemode='w+',
logging_level=logging.INFO, logging_format='%(asctime)s - %(message)s'): logging_level=logging.INFO, logging_format='%(asctime)s - %(message)s'):
""" """
Method for Logger configuration. Logger will write in file. Method for Logger configuration. Logger will write in file.
@@ -83,15 +84,31 @@ class Book:
""" """
self.logger.log(msg=message, level=logging_level) self.logger.log(msg=message, level=logging_level)
def log_error_to_main_log(self, message=''):
"""
Method for logging error to main log file.
"""
if self.main_logger:
if not message:
message = f'Error in book conversion. Check {self.book_id}_log.log file.'
self.main_logger.error(message)
def save_docx(self, content): def save_docx(self, content):
""" """
Save binary content of file to .docx. Save binary content of file to .docx.
:param content: binary content of the file. :param content: binary content of the file.
""" """
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
file_path = os.path.join(folder_path, f'docx/{self.book_id}.docx') folder_path = os.path.join(folder_path, 'docx')
with open(file_path, 'wb+') as file:
file.write(content) file_path = os.path.join(folder_path, f'{self.book_id}.docx')
try:
with open(file_path, 'wb+') as file:
file.write(content)
except Exception as exc:
self.log("Error in writing docx file.", logging.ERROR)
self.log_error_to_main_log()
raise exc
self.file_path = pathlib.Path(file_path) self.file_path = pathlib.Path(file_path)
@@ -103,27 +120,37 @@ class Book:
content = self.access.get_doc(self.book_id) content = self.access.get_doc(self.book_id)
self.save_docx(content) self.save_docx(content)
except FileNotFoundError as ferr: except FileNotFoundError as ferr:
self.log('File have not found') self.log("Can't get docx from server.", logging.ERROR)
self.log_error_to_main_log()
raise ferr raise ferr
except Exception as exc: except Exception as exc:
raise exc raise exc
def set_process_status(self): def set_process_status(self):
try: try:
self.access.update_status(self.book_id, self.access.PROCESS) if self.access:
self.access.update_status(self.book_id, self.access.PROCESS)
except Exception as exc: except Exception as exc:
self.log("Can't update status of the book [PROCESS].", logging.ERROR)
self.log_error_to_main_log()
raise exc raise exc
def set_generate_status(self): def set_generate_status(self):
try: try:
self.access.update_status(self.book_id, self.access.GENERATE) if self.access:
self.access.update_status(self.book_id, self.access.GENERATE)
except Exception as exc: except Exception as exc:
self.log("Can't update status of the book [GENERATE].", logging.ERROR)
self.log_error_to_main_log()
raise exc raise exc
def set_error_status(self): def set_error_status(self):
try: try:
self.access.update_status(self.book_id, self.access.ERROR) if self.access:
self.access.update_status(self.book_id, self.access.ERROR)
except Exception as exc: except Exception as exc:
self.log("Can't update status of the book [ERROR].", logging.ERROR)
self.log_error_to_main_log()
raise exc raise exc
def convert_doc_to_html(self): def convert_doc_to_html(self):
@@ -138,15 +165,21 @@ class Book:
f = open(self.file_path) f = open(self.file_path)
f.close() f.close()
except FileNotFoundError as error: except FileNotFoundError as error:
self.logger.error('Invalid path to input data.') self.log('Invalid path to input data.', logging.ERROR)
self.set_error_status() self.set_error_status()
raise error raise error
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
out_dir_path = os.path.join(folder_path, f'html/{self.book_id}') out_dir_path = os.path.join(folder_path, f'html/{self.book_id}')
command = f'libreoffice --headless --convert-to html "{str(self.file_path)}" --outdir {out_dir_path}' try:
os.system(command) command = f'libreoffice --headless --convert-to html "{str(self.file_path)}" --outdir {out_dir_path}'
os.system(command)
except Exception as exc:
self.log("Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR)
self.log_error_to_main_log()
self.set_error_status()
raise exc
out_dir_path = os.path.join(out_dir_path, f'{self.file_path.stem}.html') out_dir_path = os.path.join(out_dir_path, f'{self.file_path.stem}.html')
self.file_path = pathlib.Path(out_dir_path) self.file_path = pathlib.Path(out_dir_path)
@@ -155,7 +188,8 @@ class Book:
f = open(self.file_path) f = open(self.file_path)
f.close() f.close()
except FileNotFoundError as exc: except FileNotFoundError as exc:
self.logger.error('Conversion has gone wrong.') self.log("Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR)
self.log_error_to_main_log()
self.set_error_status() self.set_error_status()
raise exc raise exc
@@ -181,7 +215,8 @@ class Book:
try: try:
html_text = open(self.file_path, 'r', encoding='utf8').read() html_text = open(self.file_path, 'r', encoding='utf8').read()
except FileNotFoundError as exc: except FileNotFoundError as exc:
self.logger.error('There is no html to process. Conversion went wrong or you specified wrong paths.') self.log('There is no html to process. Conversion went wrong or you specified wrong paths.', logging.ERROR)
self.log_error_to_main_log()
self.set_error_status() self.set_error_status()
raise exc raise exc
@@ -503,36 +538,42 @@ class Book:
""" """
Process html code to satisfy LawCarta formatting. Process html code to satisfy LawCarta formatting.
""" """
self.logger.info('Beginning of processing .html file.') self.log('Beginning of processing .html file.')
self.clean_trash() try:
self.clean_trash()
# process main elements of the .html doc # process main elements of the .html doc
self._process_paragraph() self._process_paragraph()
self._process_two_columns() self._process_two_columns()
self._process_quotes() self._process_quotes()
self.logger.info('Footnotes processing.') self.log('Footnotes processing.')
self._process_footnotes() self._process_footnotes()
self.logger.info(f'{len(self.footnotes)} footnotes have been processed.') self.log(f'{len(self.footnotes)} footnotes have been processed.')
self.logger.info('Image processing.') self.log('Image processing.')
self._process_images() self._process_images()
self.logger.info(f'{len(self.images)} images have been processed.') self.log(f'{len(self.images)} images have been processed.')
self._process_div() self._process_div()
self.content = self.body_tag.find_all(recursive=False) self.content = self.body_tag.find_all(recursive=False)
self._process_toc_links() self._process_toc_links()
self._process_headings() self._process_headings()
self.content = self.body_tag.find_all(recursive=False) self.content = self.body_tag.find_all(recursive=False)
# delete text before table of content if exists # delete text before table of content if exists
self.delete_content_before_toc() self.delete_content_before_toc()
except Exception as exc:
self.log('Error has occurred while processing html.', logging.ERROR)
self.log_error_to_main_log()
self.set_error_status()
raise exc
self.logger.info('End of processing .html file.') self.log('End of processing .html file.')
@staticmethod @staticmethod
def format_html(html_text): def format_html(html_text):
@@ -606,23 +647,29 @@ class Book:
ind = 0 ind = 0
ch_num = 0 ch_num = 0
while ind < len(self.content): try:
res = {} while ind < len(self.content):
res = {}
if self.content[ind].name in self.SUPPORTED_HEADERS: if self.content[ind].name in self.SUPPORTED_HEADERS:
res, ind = self.header_to_json(ind) res, ind = self.header_to_json(ind)
else: else:
chapter_title = f'Untitled chapter {ch_num}' chapter_title = f'Untitled chapter {ch_num}'
chapter = [] chapter = []
while ind < len(self.content) and self.content[ind].name not in self.SUPPORTED_HEADERS: while ind < len(self.content) and self.content[ind].name not in self.SUPPORTED_HEADERS:
if not self._is_empty_p_tag(self.content[ind]): if not self._is_empty_p_tag(self.content[ind]):
chapter.append(self.format_html(str(self.content[ind]))) chapter.append(self.format_html(str(self.content[ind])))
ind += 1 ind += 1
if chapter: if chapter:
res = {chapter_title: ["".join(chapter)]} res = {chapter_title: ["".join(chapter)]}
ch_num += 1 ch_num += 1
if res: if res:
json_strc.append(res) json_strc.append(res)
except Exception as exc:
self.log('Error has occurred while making json structure.', logging.ERROR)
self.log_error_to_main_log()
self.set_error_status()
raise exc
self.content_dict = { self.content_dict = {
"content": json_strc, "content": json_strc,
@@ -630,24 +677,33 @@ class Book:
} }
def write_json(self): def write_json(self):
with codecs.open(self.output_path, 'w', encoding='utf-8') as f: try:
json.dump(self.content_dict, f, ensure_ascii=False) with codecs.open(self.output_path, 'w', encoding='utf-8') as f:
json.dump(self.content_dict, f, ensure_ascii=False)
except Exception as exc:
self.log('Error has occurred while writing json file.', logging.ERROR)
# self.log_error_to_main_log()
# self.set_error_status()
# raise exc
def send_json_content(self): def send_json_content(self):
try: try:
self.access.send_book(self.book_id, self.content_dict) self.access.send_book(self.book_id, self.content_dict)
except Exception as exc: except Exception as exc:
self.log('Error has occurred while sending json content.', logging.ERROR)
self.log_error_to_main_log()
self.set_error_status()
raise exc raise exc
def convert_from_html(self, logging_format): def convert_from_html(self, logging_format):
self.configure_file_logger(__name__, logging_format=logging_format, filemode='w+') self.configure_file_logger(f'{__name__}_{self.book_id}', logging_format=logging_format, filemode='w+')
self.read_html() self.read_html()
self.process_html() self.process_html()
self.convert_to_json() self.convert_to_json()
self.write_json() self.write_json()
def conversion(self, logging_format, filemode='w+'): def conversion(self, logging_format, filemode='w+'):
self.configure_file_logger(__name__, logging_format=logging_format, filemode=filemode) self.configure_file_logger(f'{__name__}_{self.book_id}', logging_format=logging_format, filemode=filemode)
self.log('Beginning of conversion from .docx to .json.') self.log('Beginning of conversion from .docx to .json.')
self.get_docx() self.get_docx()
self.set_process_status() self.set_process_status()

View File

@@ -1,5 +1,7 @@
import json import json
import logging
import os import os
import sys
from functools import partial from functools import partial
from pathlib import Path from pathlib import Path
from threading import Thread, active_count from threading import Thread, active_count
@@ -10,34 +12,62 @@ from access import Access
from book import Book from book import Book
def convert_book(book_id, access): def configure_file_logger(name, filename='logs/converter_log.log', filemode='w+', logging_level=logging.INFO,
logging_format='%(asctime)s - %(message)s'):
logger = logging.getLogger(name)
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
file_path = os.path.join(folder_path, filename)
file_handler = logging.FileHandler(file_path, mode=filemode)
logger.addHandler(file_handler)
file_format = logging.Formatter(fmt=logging_format)
file_handler.setFormatter(file_format)
logger.setLevel(logging_level)
return logger
def convert_book(book_id, access, logger):
logger.info(f'Start processing book-{book_id}.')
logging_format = '%(asctime)s - %(levelname)s - %(message)s' logging_format = '%(asctime)s - %(levelname)s - %(message)s'
book = Book(book_id, access) try:
book.conversion(logging_format=logging_format) book = Book(book_id, access, main_logger=logger)
book.conversion(logging_format=logging_format)
except Exception as exc:
raise exc
print('Book has been proceeded.') logger.info(f'Book-{book_id} has been proceeded.')
# print('Book has been proceeded.')
def callback(ch, method, properties, body, access): def callback(ch, method, properties, body, access, logger):
print(f'Message: {body}.') print(f'Message: {body}.')
logger.info(f'Message: {body}.')
try: try:
data = json.loads(body) data = json.loads(body)
params = { params = {
'book_id': data['id'], 'book_id': data['id'],
'access': access 'access': access,
'logger': logger
} }
thread = Thread(target=convert_book, kwargs=params) thread = Thread(target=convert_book, kwargs=params)
thread.start() thread.start()
print(f'Active threads: {active_count()}.') logging.log(logging.INFO, f'Active threads: {active_count()}.')
# print(f'Active threads: {active_count()}.')
except Exception as exc: except Exception as exc:
print(exc) if hasattr(exc, 'message'):
logger.error(f'{sys.exc_info()[0]}: {exc.message}')
else:
logger.error(f'{sys.exc_info()[0]}')
finally: finally:
pass
# thread.join() # thread.join()
print('Waiting for the message...') # print('Waiting for the message...')
if __name__ == '__main__': if __name__ == '__main__':
@@ -56,15 +86,20 @@ if __name__ == '__main__':
connection = pika.BlockingConnection(parameters) connection = pika.BlockingConnection(parameters)
channel = connection.channel() channel = connection.channel()
logger = configure_file_logger('consumer', logging_format='%(asctime)s - %(levelname)s - %(message)s')
try: try:
channel.queue_declare(queue=conf_param['queue'], passive=True) channel.queue_declare(queue=conf_param['queue'], passive=True)
except ValueError as exc: except ValueError as exc:
logger.log(logging.ERROR, 'Queue is not declared.')
raise exc raise exc
acs = Access() acs = Access()
channel.basic_consume(queue=conf_param['queue'], channel.basic_consume(queue=conf_param['queue'],
auto_ack=True, auto_ack=True,
on_message_callback=partial(callback, access=acs)) on_message_callback=partial(callback, access=acs, logger=logger))
logger.info('Connection has been established.')
print('Waiting for messages...') print('Waiting for messages...')
logger.info('Waiting for messages...')
channel.start_consuming() channel.start_consuming()