forked from LiveCarta/BookConverter
add main logger for the project
- add logger to consumer.py - handle exceptions and log them
This commit is contained in:
164
src/book.py
164
src/book.py
@@ -30,11 +30,12 @@ class Book:
|
|||||||
}
|
}
|
||||||
SUPPORTED_HEADERS = ["h1", "h2", "h3"]
|
SUPPORTED_HEADERS = ["h1", "h2", "h3"]
|
||||||
|
|
||||||
def __init__(self, book_id=0, access=None, file_path=None, output_path=None):
|
def __init__(self, book_id=0, access=None, file_path=None, output_path=None, main_logger=None):
|
||||||
self.book_id = book_id
|
self.book_id = book_id
|
||||||
self.access = access
|
self.access = access
|
||||||
self.file_path = file_path
|
self.file_path = file_path
|
||||||
self.output_path = output_path
|
self.output_path = output_path
|
||||||
|
self.main_logger = main_logger
|
||||||
|
|
||||||
self.logger = None
|
self.logger = None
|
||||||
self.html_soup = None
|
self.html_soup = None
|
||||||
@@ -44,7 +45,7 @@ class Book:
|
|||||||
self.images = list()
|
self.images = list()
|
||||||
self.content_dict = dict()
|
self.content_dict = dict()
|
||||||
|
|
||||||
def configure_file_logger(self, name, attr_name='logger', filename='logs/converter_log.log', filemode='w+',
|
def configure_file_logger(self, name, attr_name='logger', filename='logs/book_log.log', filemode='w+',
|
||||||
logging_level=logging.INFO, logging_format='%(asctime)s - %(message)s'):
|
logging_level=logging.INFO, logging_format='%(asctime)s - %(message)s'):
|
||||||
"""
|
"""
|
||||||
Method for Logger configuration. Logger will write in file.
|
Method for Logger configuration. Logger will write in file.
|
||||||
@@ -83,15 +84,31 @@ class Book:
|
|||||||
"""
|
"""
|
||||||
self.logger.log(msg=message, level=logging_level)
|
self.logger.log(msg=message, level=logging_level)
|
||||||
|
|
||||||
|
def log_error_to_main_log(self, message=''):
|
||||||
|
"""
|
||||||
|
Method for logging error to main log file.
|
||||||
|
"""
|
||||||
|
if self.main_logger:
|
||||||
|
if not message:
|
||||||
|
message = f'Error in book conversion. Check {self.book_id}_log.log file.'
|
||||||
|
self.main_logger.error(message)
|
||||||
|
|
||||||
def save_docx(self, content):
|
def save_docx(self, content):
|
||||||
"""
|
"""
|
||||||
Save binary content of file to .docx.
|
Save binary content of file to .docx.
|
||||||
:param content: binary content of the file.
|
:param content: binary content of the file.
|
||||||
"""
|
"""
|
||||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
file_path = os.path.join(folder_path, f'docx/{self.book_id}.docx')
|
folder_path = os.path.join(folder_path, 'docx')
|
||||||
with open(file_path, 'wb+') as file:
|
|
||||||
file.write(content)
|
file_path = os.path.join(folder_path, f'{self.book_id}.docx')
|
||||||
|
try:
|
||||||
|
with open(file_path, 'wb+') as file:
|
||||||
|
file.write(content)
|
||||||
|
except Exception as exc:
|
||||||
|
self.log("Error in writing docx file.", logging.ERROR)
|
||||||
|
self.log_error_to_main_log()
|
||||||
|
raise exc
|
||||||
|
|
||||||
self.file_path = pathlib.Path(file_path)
|
self.file_path = pathlib.Path(file_path)
|
||||||
|
|
||||||
@@ -103,27 +120,37 @@ class Book:
|
|||||||
content = self.access.get_doc(self.book_id)
|
content = self.access.get_doc(self.book_id)
|
||||||
self.save_docx(content)
|
self.save_docx(content)
|
||||||
except FileNotFoundError as ferr:
|
except FileNotFoundError as ferr:
|
||||||
self.log('File have not found')
|
self.log("Can't get docx from server.", logging.ERROR)
|
||||||
|
self.log_error_to_main_log()
|
||||||
raise ferr
|
raise ferr
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
def set_process_status(self):
|
def set_process_status(self):
|
||||||
try:
|
try:
|
||||||
self.access.update_status(self.book_id, self.access.PROCESS)
|
if self.access:
|
||||||
|
self.access.update_status(self.book_id, self.access.PROCESS)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
self.log("Can't update status of the book [PROCESS].", logging.ERROR)
|
||||||
|
self.log_error_to_main_log()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
def set_generate_status(self):
|
def set_generate_status(self):
|
||||||
try:
|
try:
|
||||||
self.access.update_status(self.book_id, self.access.GENERATE)
|
if self.access:
|
||||||
|
self.access.update_status(self.book_id, self.access.GENERATE)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
self.log("Can't update status of the book [GENERATE].", logging.ERROR)
|
||||||
|
self.log_error_to_main_log()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
def set_error_status(self):
|
def set_error_status(self):
|
||||||
try:
|
try:
|
||||||
self.access.update_status(self.book_id, self.access.ERROR)
|
if self.access:
|
||||||
|
self.access.update_status(self.book_id, self.access.ERROR)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
self.log("Can't update status of the book [ERROR].", logging.ERROR)
|
||||||
|
self.log_error_to_main_log()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
def convert_doc_to_html(self):
|
def convert_doc_to_html(self):
|
||||||
@@ -138,15 +165,21 @@ class Book:
|
|||||||
f = open(self.file_path)
|
f = open(self.file_path)
|
||||||
f.close()
|
f.close()
|
||||||
except FileNotFoundError as error:
|
except FileNotFoundError as error:
|
||||||
self.logger.error('Invalid path to input data.')
|
self.log('Invalid path to input data.', logging.ERROR)
|
||||||
self.set_error_status()
|
self.set_error_status()
|
||||||
raise error
|
raise error
|
||||||
|
|
||||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
out_dir_path = os.path.join(folder_path, f'html/{self.book_id}')
|
out_dir_path = os.path.join(folder_path, f'html/{self.book_id}')
|
||||||
|
|
||||||
command = f'libreoffice --headless --convert-to html "{str(self.file_path)}" --outdir {out_dir_path}'
|
try:
|
||||||
os.system(command)
|
command = f'libreoffice --headless --convert-to html "{str(self.file_path)}" --outdir {out_dir_path}'
|
||||||
|
os.system(command)
|
||||||
|
except Exception as exc:
|
||||||
|
self.log("Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR)
|
||||||
|
self.log_error_to_main_log()
|
||||||
|
self.set_error_status()
|
||||||
|
raise exc
|
||||||
|
|
||||||
out_dir_path = os.path.join(out_dir_path, f'{self.file_path.stem}.html')
|
out_dir_path = os.path.join(out_dir_path, f'{self.file_path.stem}.html')
|
||||||
self.file_path = pathlib.Path(out_dir_path)
|
self.file_path = pathlib.Path(out_dir_path)
|
||||||
@@ -155,7 +188,8 @@ class Book:
|
|||||||
f = open(self.file_path)
|
f = open(self.file_path)
|
||||||
f.close()
|
f.close()
|
||||||
except FileNotFoundError as exc:
|
except FileNotFoundError as exc:
|
||||||
self.logger.error('Conversion has gone wrong.')
|
self.log("Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR)
|
||||||
|
self.log_error_to_main_log()
|
||||||
self.set_error_status()
|
self.set_error_status()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
@@ -181,7 +215,8 @@ class Book:
|
|||||||
try:
|
try:
|
||||||
html_text = open(self.file_path, 'r', encoding='utf8').read()
|
html_text = open(self.file_path, 'r', encoding='utf8').read()
|
||||||
except FileNotFoundError as exc:
|
except FileNotFoundError as exc:
|
||||||
self.logger.error('There is no html to process. Conversion went wrong or you specified wrong paths.')
|
self.log('There is no html to process. Conversion went wrong or you specified wrong paths.', logging.ERROR)
|
||||||
|
self.log_error_to_main_log()
|
||||||
self.set_error_status()
|
self.set_error_status()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
@@ -503,36 +538,42 @@ class Book:
|
|||||||
"""
|
"""
|
||||||
Process html code to satisfy LawCarta formatting.
|
Process html code to satisfy LawCarta formatting.
|
||||||
"""
|
"""
|
||||||
self.logger.info('Beginning of processing .html file.')
|
self.log('Beginning of processing .html file.')
|
||||||
|
|
||||||
self.clean_trash()
|
try:
|
||||||
|
self.clean_trash()
|
||||||
|
|
||||||
# process main elements of the .html doc
|
# process main elements of the .html doc
|
||||||
self._process_paragraph()
|
self._process_paragraph()
|
||||||
self._process_two_columns()
|
self._process_two_columns()
|
||||||
self._process_quotes()
|
self._process_quotes()
|
||||||
|
|
||||||
self.logger.info('Footnotes processing.')
|
self.log('Footnotes processing.')
|
||||||
self._process_footnotes()
|
self._process_footnotes()
|
||||||
self.logger.info(f'{len(self.footnotes)} footnotes have been processed.')
|
self.log(f'{len(self.footnotes)} footnotes have been processed.')
|
||||||
|
|
||||||
self.logger.info('Image processing.')
|
self.log('Image processing.')
|
||||||
self._process_images()
|
self._process_images()
|
||||||
self.logger.info(f'{len(self.images)} images have been processed.')
|
self.log(f'{len(self.images)} images have been processed.')
|
||||||
|
|
||||||
self._process_div()
|
self._process_div()
|
||||||
|
|
||||||
self.content = self.body_tag.find_all(recursive=False)
|
self.content = self.body_tag.find_all(recursive=False)
|
||||||
|
|
||||||
self._process_toc_links()
|
self._process_toc_links()
|
||||||
self._process_headings()
|
self._process_headings()
|
||||||
|
|
||||||
self.content = self.body_tag.find_all(recursive=False)
|
self.content = self.body_tag.find_all(recursive=False)
|
||||||
|
|
||||||
# delete text before table of content if exists
|
# delete text before table of content if exists
|
||||||
self.delete_content_before_toc()
|
self.delete_content_before_toc()
|
||||||
|
except Exception as exc:
|
||||||
|
self.log('Error has occurred while processing html.', logging.ERROR)
|
||||||
|
self.log_error_to_main_log()
|
||||||
|
self.set_error_status()
|
||||||
|
raise exc
|
||||||
|
|
||||||
self.logger.info('End of processing .html file.')
|
self.log('End of processing .html file.')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def format_html(html_text):
|
def format_html(html_text):
|
||||||
@@ -606,23 +647,29 @@ class Book:
|
|||||||
ind = 0
|
ind = 0
|
||||||
ch_num = 0
|
ch_num = 0
|
||||||
|
|
||||||
while ind < len(self.content):
|
try:
|
||||||
res = {}
|
while ind < len(self.content):
|
||||||
|
res = {}
|
||||||
|
|
||||||
if self.content[ind].name in self.SUPPORTED_HEADERS:
|
if self.content[ind].name in self.SUPPORTED_HEADERS:
|
||||||
res, ind = self.header_to_json(ind)
|
res, ind = self.header_to_json(ind)
|
||||||
else:
|
else:
|
||||||
chapter_title = f'Untitled chapter {ch_num}'
|
chapter_title = f'Untitled chapter {ch_num}'
|
||||||
chapter = []
|
chapter = []
|
||||||
while ind < len(self.content) and self.content[ind].name not in self.SUPPORTED_HEADERS:
|
while ind < len(self.content) and self.content[ind].name not in self.SUPPORTED_HEADERS:
|
||||||
if not self._is_empty_p_tag(self.content[ind]):
|
if not self._is_empty_p_tag(self.content[ind]):
|
||||||
chapter.append(self.format_html(str(self.content[ind])))
|
chapter.append(self.format_html(str(self.content[ind])))
|
||||||
ind += 1
|
ind += 1
|
||||||
if chapter:
|
if chapter:
|
||||||
res = {chapter_title: ["".join(chapter)]}
|
res = {chapter_title: ["".join(chapter)]}
|
||||||
ch_num += 1
|
ch_num += 1
|
||||||
if res:
|
if res:
|
||||||
json_strc.append(res)
|
json_strc.append(res)
|
||||||
|
except Exception as exc:
|
||||||
|
self.log('Error has occurred while making json structure.', logging.ERROR)
|
||||||
|
self.log_error_to_main_log()
|
||||||
|
self.set_error_status()
|
||||||
|
raise exc
|
||||||
|
|
||||||
self.content_dict = {
|
self.content_dict = {
|
||||||
"content": json_strc,
|
"content": json_strc,
|
||||||
@@ -630,24 +677,33 @@ class Book:
|
|||||||
}
|
}
|
||||||
|
|
||||||
def write_json(self):
|
def write_json(self):
|
||||||
with codecs.open(self.output_path, 'w', encoding='utf-8') as f:
|
try:
|
||||||
json.dump(self.content_dict, f, ensure_ascii=False)
|
with codecs.open(self.output_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(self.content_dict, f, ensure_ascii=False)
|
||||||
|
except Exception as exc:
|
||||||
|
self.log('Error has occurred while writing json file.', logging.ERROR)
|
||||||
|
# self.log_error_to_main_log()
|
||||||
|
# self.set_error_status()
|
||||||
|
# raise exc
|
||||||
|
|
||||||
def send_json_content(self):
|
def send_json_content(self):
|
||||||
try:
|
try:
|
||||||
self.access.send_book(self.book_id, self.content_dict)
|
self.access.send_book(self.book_id, self.content_dict)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
self.log('Error has occurred while sending json content.', logging.ERROR)
|
||||||
|
self.log_error_to_main_log()
|
||||||
|
self.set_error_status()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
def convert_from_html(self, logging_format):
|
def convert_from_html(self, logging_format):
|
||||||
self.configure_file_logger(__name__, logging_format=logging_format, filemode='w+')
|
self.configure_file_logger(f'{__name__}_{self.book_id}', logging_format=logging_format, filemode='w+')
|
||||||
self.read_html()
|
self.read_html()
|
||||||
self.process_html()
|
self.process_html()
|
||||||
self.convert_to_json()
|
self.convert_to_json()
|
||||||
self.write_json()
|
self.write_json()
|
||||||
|
|
||||||
def conversion(self, logging_format, filemode='w+'):
|
def conversion(self, logging_format, filemode='w+'):
|
||||||
self.configure_file_logger(__name__, logging_format=logging_format, filemode=filemode)
|
self.configure_file_logger(f'{__name__}_{self.book_id}', logging_format=logging_format, filemode=filemode)
|
||||||
self.log('Beginning of conversion from .docx to .json.')
|
self.log('Beginning of conversion from .docx to .json.')
|
||||||
self.get_docx()
|
self.get_docx()
|
||||||
self.set_process_status()
|
self.set_process_status()
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from threading import Thread, active_count
|
from threading import Thread, active_count
|
||||||
@@ -10,34 +12,62 @@ from access import Access
|
|||||||
from book import Book
|
from book import Book
|
||||||
|
|
||||||
|
|
||||||
def convert_book(book_id, access):
|
def configure_file_logger(name, filename='logs/converter_log.log', filemode='w+', logging_level=logging.INFO,
|
||||||
|
logging_format='%(asctime)s - %(message)s'):
|
||||||
|
logger = logging.getLogger(name)
|
||||||
|
|
||||||
|
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
file_path = os.path.join(folder_path, filename)
|
||||||
|
|
||||||
|
file_handler = logging.FileHandler(file_path, mode=filemode)
|
||||||
|
logger.addHandler(file_handler)
|
||||||
|
file_format = logging.Formatter(fmt=logging_format)
|
||||||
|
file_handler.setFormatter(file_format)
|
||||||
|
logger.setLevel(logging_level)
|
||||||
|
|
||||||
|
return logger
|
||||||
|
|
||||||
|
|
||||||
|
def convert_book(book_id, access, logger):
|
||||||
|
logger.info(f'Start processing book-{book_id}.')
|
||||||
logging_format = '%(asctime)s - %(levelname)s - %(message)s'
|
logging_format = '%(asctime)s - %(levelname)s - %(message)s'
|
||||||
|
|
||||||
book = Book(book_id, access)
|
try:
|
||||||
book.conversion(logging_format=logging_format)
|
book = Book(book_id, access, main_logger=logger)
|
||||||
|
book.conversion(logging_format=logging_format)
|
||||||
|
except Exception as exc:
|
||||||
|
raise exc
|
||||||
|
|
||||||
print('Book has been proceeded.')
|
logger.info(f'Book-{book_id} has been proceeded.')
|
||||||
|
# print('Book has been proceeded.')
|
||||||
|
|
||||||
|
|
||||||
def callback(ch, method, properties, body, access):
|
def callback(ch, method, properties, body, access, logger):
|
||||||
print(f'Message: {body}.')
|
print(f'Message: {body}.')
|
||||||
|
logger.info(f'Message: {body}.')
|
||||||
try:
|
try:
|
||||||
data = json.loads(body)
|
data = json.loads(body)
|
||||||
params = {
|
params = {
|
||||||
'book_id': data['id'],
|
'book_id': data['id'],
|
||||||
'access': access
|
'access': access,
|
||||||
|
'logger': logger
|
||||||
}
|
}
|
||||||
|
|
||||||
thread = Thread(target=convert_book, kwargs=params)
|
thread = Thread(target=convert_book, kwargs=params)
|
||||||
thread.start()
|
thread.start()
|
||||||
print(f'Active threads: {active_count()}.')
|
logging.log(logging.INFO, f'Active threads: {active_count()}.')
|
||||||
|
# print(f'Active threads: {active_count()}.')
|
||||||
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
print(exc)
|
if hasattr(exc, 'message'):
|
||||||
|
logger.error(f'{sys.exc_info()[0]}: {exc.message}')
|
||||||
|
else:
|
||||||
|
logger.error(f'{sys.exc_info()[0]}')
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
pass
|
||||||
# thread.join()
|
# thread.join()
|
||||||
print('Waiting for the message...')
|
# print('Waiting for the message...')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
@@ -56,15 +86,20 @@ if __name__ == '__main__':
|
|||||||
connection = pika.BlockingConnection(parameters)
|
connection = pika.BlockingConnection(parameters)
|
||||||
channel = connection.channel()
|
channel = connection.channel()
|
||||||
|
|
||||||
|
logger = configure_file_logger('consumer', logging_format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
channel.queue_declare(queue=conf_param['queue'], passive=True)
|
channel.queue_declare(queue=conf_param['queue'], passive=True)
|
||||||
except ValueError as exc:
|
except ValueError as exc:
|
||||||
|
logger.log(logging.ERROR, 'Queue is not declared.')
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
acs = Access()
|
acs = Access()
|
||||||
channel.basic_consume(queue=conf_param['queue'],
|
channel.basic_consume(queue=conf_param['queue'],
|
||||||
auto_ack=True,
|
auto_ack=True,
|
||||||
on_message_callback=partial(callback, access=acs))
|
on_message_callback=partial(callback, access=acs, logger=logger))
|
||||||
|
logger.info('Connection has been established.')
|
||||||
print('Waiting for messages...')
|
print('Waiting for messages...')
|
||||||
|
logger.info('Waiting for messages...')
|
||||||
|
|
||||||
channel.start_consuming()
|
channel.start_consuming()
|
||||||
|
|||||||
Reference in New Issue
Block a user