add main logger for the project

- add logger to consumer.py - handle exceptions and log them
2020-02-14 15:58:47 +03:00
parent 617e21e1cb
commit aca017b55f
2 changed files with 156 additions and 65 deletions
--- a/src/book.py
+++ b/src/book.py
@@ -30,11 +30,12 @@ class Book:
    }
    SUPPORTED_HEADERS = ["h1", "h2", "h3"]

-    def __init__(self, book_id=0, access=None, file_path=None, output_path=None):
+    def __init__(self, book_id=0, access=None, file_path=None, output_path=None, main_logger=None):
        self.book_id = book_id
        self.access = access
        self.file_path = file_path
        self.output_path = output_path
+        self.main_logger = main_logger

        self.logger = None
        self.html_soup = None
@@ -44,7 +45,7 @@ class Book:
        self.images = list()
        self.content_dict = dict()

-    def configure_file_logger(self, name, attr_name='logger', filename='logs/converter_log.log', filemode='w+',
+    def configure_file_logger(self, name, attr_name='logger', filename='logs/book_log.log', filemode='w+',
                              logging_level=logging.INFO, logging_format='%(asctime)s - %(message)s'):
        """
        Method for Logger configuration. Logger will write in file.
@@ -83,15 +84,31 @@ class Book:
        """
        self.logger.log(msg=message, level=logging_level)

+    def log_error_to_main_log(self, message=''):
+        """
+        Method for logging error to main log file.
+        """
+        if self.main_logger:
+            if not message:
+                message = f'Error in book conversion. Check {self.book_id}_log.log file.'
+            self.main_logger.error(message)
+
    def save_docx(self, content):
        """
        Save binary content of file to .docx.
        :param content: binary content of the file.
        """
        folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-        file_path = os.path.join(folder_path, f'docx/{self.book_id}.docx')
-        with open(file_path, 'wb+') as file:
-            file.write(content)
+        folder_path = os.path.join(folder_path, 'docx')
+
+        file_path = os.path.join(folder_path, f'{self.book_id}.docx')
+        try:
+            with open(file_path, 'wb+') as file:
+                file.write(content)
+        except Exception as exc:
+            self.log("Error in writing docx file.", logging.ERROR)
+            self.log_error_to_main_log()
+            raise exc

        self.file_path = pathlib.Path(file_path)

@@ -103,27 +120,37 @@ class Book:
            content = self.access.get_doc(self.book_id)
            self.save_docx(content)
        except FileNotFoundError as ferr:
-            self.log('File have not found')
+            self.log("Can't get docx from server.", logging.ERROR)
+            self.log_error_to_main_log()
            raise ferr
        except Exception as exc:
            raise exc

    def set_process_status(self):
        try:
-            self.access.update_status(self.book_id, self.access.PROCESS)
+            if self.access:
+                self.access.update_status(self.book_id, self.access.PROCESS)
        except Exception as exc:
+            self.log("Can't update status of the book [PROCESS].", logging.ERROR)
+            self.log_error_to_main_log()
            raise exc

    def set_generate_status(self):
        try:
-            self.access.update_status(self.book_id, self.access.GENERATE)
+            if self.access:
+                self.access.update_status(self.book_id, self.access.GENERATE)
        except Exception as exc:
+            self.log("Can't update status of the book [GENERATE].", logging.ERROR)
+            self.log_error_to_main_log()
            raise exc

    def set_error_status(self):
        try:
-            self.access.update_status(self.book_id, self.access.ERROR)
+            if self.access:
+                self.access.update_status(self.book_id, self.access.ERROR)
        except Exception as exc:
+            self.log("Can't update status of the book [ERROR].", logging.ERROR)
+            self.log_error_to_main_log()
            raise exc

    def convert_doc_to_html(self):
@@ -138,15 +165,21 @@ class Book:
            f = open(self.file_path)
            f.close()
        except FileNotFoundError as error:
-            self.logger.error('Invalid path to input data.')
+            self.log('Invalid path to input data.', logging.ERROR)
            self.set_error_status()
            raise error

        folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        out_dir_path = os.path.join(folder_path, f'html/{self.book_id}')

-        command = f'libreoffice --headless --convert-to html "{str(self.file_path)}" --outdir {out_dir_path}'
-        os.system(command)
+        try:
+            command = f'libreoffice --headless --convert-to html "{str(self.file_path)}" --outdir {out_dir_path}'
+            os.system(command)
+        except Exception as exc:
+            self.log("Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR)
+            self.log_error_to_main_log()
+            self.set_error_status()
+            raise exc

        out_dir_path = os.path.join(out_dir_path, f'{self.file_path.stem}.html')
        self.file_path = pathlib.Path(out_dir_path)
@@ -155,7 +188,8 @@ class Book:
            f = open(self.file_path)
            f.close()
        except FileNotFoundError as exc:
-            self.logger.error('Conversion has gone wrong.')
+            self.log("Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR)
+            self.log_error_to_main_log()
            self.set_error_status()
            raise exc

@@ -181,7 +215,8 @@ class Book:
        try:
            html_text = open(self.file_path, 'r', encoding='utf8').read()
        except FileNotFoundError as exc:
-            self.logger.error('There is no html to process. Conversion went wrong or you specified wrong paths.')
+            self.log('There is no html to process. Conversion went wrong or you specified wrong paths.', logging.ERROR)
+            self.log_error_to_main_log()
            self.set_error_status()
            raise exc

@@ -503,36 +538,42 @@ class Book:
        """
        Process html code to satisfy LawCarta formatting.
        """
-        self.logger.info('Beginning of processing .html file.')
+        self.log('Beginning of processing .html file.')

-        self.clean_trash()
+        try:
+            self.clean_trash()

-        # process main elements of the .html doc
-        self._process_paragraph()
-        self._process_two_columns()
-        self._process_quotes()
+            # process main elements of the .html doc
+            self._process_paragraph()
+            self._process_two_columns()
+            self._process_quotes()

-        self.logger.info('Footnotes processing.')
-        self._process_footnotes()
-        self.logger.info(f'{len(self.footnotes)} footnotes have been processed.')
+            self.log('Footnotes processing.')
+            self._process_footnotes()
+            self.log(f'{len(self.footnotes)} footnotes have been processed.')

-        self.logger.info('Image processing.')
-        self._process_images()
-        self.logger.info(f'{len(self.images)} images have been processed.')
+            self.log('Image processing.')
+            self._process_images()
+            self.log(f'{len(self.images)} images have been processed.')

-        self._process_div()
+            self._process_div()

-        self.content = self.body_tag.find_all(recursive=False)
+            self.content = self.body_tag.find_all(recursive=False)

-        self._process_toc_links()
-        self._process_headings()
+            self._process_toc_links()
+            self._process_headings()

-        self.content = self.body_tag.find_all(recursive=False)
+            self.content = self.body_tag.find_all(recursive=False)

-        # delete text before table of content if exists
-        self.delete_content_before_toc()
+            # delete text before table of content if exists
+            self.delete_content_before_toc()
+        except Exception as exc:
+            self.log('Error has occurred while processing html.', logging.ERROR)
+            self.log_error_to_main_log()
+            self.set_error_status()
+            raise exc

-        self.logger.info('End of processing .html file.')
+        self.log('End of processing .html file.')

    @staticmethod
    def format_html(html_text):
@@ -606,23 +647,29 @@ class Book:
        ind = 0
        ch_num = 0

-        while ind < len(self.content):
-            res = {}
+        try:
+            while ind < len(self.content):
+                res = {}

-            if self.content[ind].name in self.SUPPORTED_HEADERS:
-                res, ind = self.header_to_json(ind)
-            else:
-                chapter_title = f'Untitled chapter {ch_num}'
-                chapter = []
-                while ind < len(self.content) and self.content[ind].name not in self.SUPPORTED_HEADERS:
-                    if not self._is_empty_p_tag(self.content[ind]):
-                        chapter.append(self.format_html(str(self.content[ind])))
-                    ind += 1
-                if chapter:
-                    res = {chapter_title: ["".join(chapter)]}
-                    ch_num += 1
-            if res:
-                json_strc.append(res)
+                if self.content[ind].name in self.SUPPORTED_HEADERS:
+                    res, ind = self.header_to_json(ind)
+                else:
+                    chapter_title = f'Untitled chapter {ch_num}'
+                    chapter = []
+                    while ind < len(self.content) and self.content[ind].name not in self.SUPPORTED_HEADERS:
+                        if not self._is_empty_p_tag(self.content[ind]):
+                            chapter.append(self.format_html(str(self.content[ind])))
+                        ind += 1
+                    if chapter:
+                        res = {chapter_title: ["".join(chapter)]}
+                        ch_num += 1
+                if res:
+                    json_strc.append(res)
+        except Exception as exc:
+            self.log('Error has occurred while making json structure.', logging.ERROR)
+            self.log_error_to_main_log()
+            self.set_error_status()
+            raise exc

        self.content_dict = {
            "content": json_strc,
@@ -630,24 +677,33 @@ class Book:
        }

    def write_json(self):
-        with codecs.open(self.output_path, 'w', encoding='utf-8') as f:
-            json.dump(self.content_dict, f, ensure_ascii=False)
+        try:
+            with codecs.open(self.output_path, 'w', encoding='utf-8') as f:
+                json.dump(self.content_dict, f, ensure_ascii=False)
+        except Exception as exc:
+            self.log('Error has occurred while writing json file.', logging.ERROR)
+            # self.log_error_to_main_log()
+            # self.set_error_status()
+            # raise exc

    def send_json_content(self):
        try:
            self.access.send_book(self.book_id, self.content_dict)
        except Exception as exc:
+            self.log('Error has occurred while sending json content.', logging.ERROR)
+            self.log_error_to_main_log()
+            self.set_error_status()
            raise exc

    def convert_from_html(self, logging_format):
-        self.configure_file_logger(__name__, logging_format=logging_format, filemode='w+')
+        self.configure_file_logger(f'{__name__}_{self.book_id}', logging_format=logging_format, filemode='w+')
        self.read_html()
        self.process_html()
        self.convert_to_json()
        self.write_json()

    def conversion(self, logging_format, filemode='w+'):
-        self.configure_file_logger(__name__, logging_format=logging_format, filemode=filemode)
+        self.configure_file_logger(f'{__name__}_{self.book_id}', logging_format=logging_format, filemode=filemode)
        self.log('Beginning of conversion from .docx to .json.')
        self.get_docx()
        self.set_process_status()