From a6a54abb0a595b0bd698e296a44d1fc94a3b7fe0 Mon Sep 17 00:00:00 2001
From: Kiryl <kiryl.miatselitsa@teqniksoft.com>
Date: Thu, 14 Jul 2022 19:13:59 +0300
Subject: [PATCH] Optimize docx2libre_html.py

---
 src/docx_converter/docx2libre_html.py | 102 ++++++++++++--------------
 1 file changed, 47 insertions(+), 55 deletions(-)

diff --git a/src/docx_converter/docx2libre_html.py b/src/docx_converter/docx2libre_html.py
index 889aa25..fbb24fe 100644
--- a/src/docx_converter/docx2libre_html.py
+++ b/src/docx_converter/docx2libre_html.py
@@ -10,12 +10,12 @@ from src.util.helpers import BookLogger
 
 
 class Docx2LibreHTML:
-    def __init__(self, book_id=0, file_path=None, access=None, logger=None, status_wrapper=None, libre_locker=None):
-        self.book_id = book_id
+    def __init__(self, book_id=0, file_path=None, access=None, logger=None, libre_locker=None):
+        self.book_id = book_id if book_id != 0 else pathlib.Path(
+            file_path).stem
         self.file_path = file_path
         self.access = access
         self.logger_object: BookLogger = logger
-        self.status_wrapper: status_wrapper = status_wrapper
         # critical section for occupying libreoffice by one thread
         self.libre_locker: Event() = libre_locker
 
@@ -24,15 +24,15 @@ class Docx2LibreHTML:
         self.html_soup = self.read_html(self.html_path)
 
     def _libre_run(self, out_dir_path):
-        command = ['libreoffice', '--headless',
-                   '--convert-to', 'html', f'{str(self.file_path)}',
-                   '--outdir', f'{out_dir_path}']
+        command = ["libreoffice", "--headless",
+                   "--convert-to", "html", f"{str(self.file_path)}",
+                   "--outdir", f"{out_dir_path}"]
         print(command)
         result = subprocess.run(command, stdout=PIPE, stderr=PIPE)
-        self.logger_object.log(f'Result of libre conversion for book_{self.book_id}:'
-                               f' {result.returncode}, {result.stdout}', logging.DEBUG)
-        self.logger_object.log(f'Any error while libre conversion for book_'
-                               f'{self.book_id}: {result.stderr}', logging.DEBUG)
+        self.logger_object.log(f"Result of libre conversion for book_{self.book_id}:"
+                               f" {result.returncode}, {result.stdout}", logging.DEBUG)
+        self.logger_object.log(f"Any error while libre conversion for book_"
+                               f"{self.book_id}: {result.stderr}", logging.DEBUG)
 
     def convert_docx_to_html(self):
         """
@@ -48,82 +48,74 @@ class Docx2LibreHTML:
             path to html file, file appears after libre-conversion
 
         """
-        self.logger_object.log(f'File - {self.file_path}.')
-        print(f'{self.file_path}')
-        self.logger_object.log('Beginning of conversion from .docx to .html.')
+        def get_and_clear_flag(out_dir_path: str):
+            self.libre_locker.clear()
+            self.logger_object.log(f"Got flag!", logging.DEBUG)
+            self._libre_run(out_dir_path)
+            self.libre_locker.set()
+            self.logger_object.log("Cleared flag...", logging.DEBUG)
 
-        try:
-            f = open(self.file_path)
-            f.close()
-        except FileNotFoundError as error:
-            self.logger_object.log(
-                'Invalid path to input data.', logging.ERROR)
-            self.status_wrapper.set_error()
-            raise error
+        def check_file_exists(path, error_string: str):
+            try:
+                f = open(path)
+                f.close()
+            except FileNotFoundError as error:
+                self.logger_object.log(
+                    error_string, logging.ERROR)
+                self.logger_object.log_error_to_main_log()
+                raise error
+
+        self.logger_object.log(f"File - {self.file_path}.")
+        print(f"{self.file_path}")
+        self.logger_object.log("Beginning of conversion from .docx to .html.")
+
+        check_file_exists(
+            self.file_path, error_string="Invalid path to input data.")
 
         folder_path = os.path.dirname(
             os.path.dirname(os.path.abspath(__file__)))
-        out_dir_path = os.path.join(folder_path, f'../html/{self.book_id}')
+        out_dir_path = os.path.join(folder_path, f"../html/{self.book_id}")
         pathlib.Path(out_dir_path).mkdir(parents=True, exist_ok=True)
 
-        is_book_converted = False
         try:
             if self.libre_locker.isSet():
-                self.libre_locker.clear()
-                self.logger_object.log('Got flag...', logging.DEBUG)
-                self._libre_run(out_dir_path)
-                self.libre_locker.set()
-                self.logger_object.log('Cleared flag...', logging.DEBUG)
-
+                get_and_clear_flag(out_dir_path)
             else:
-                while not self.libre_locker.isSet() and not is_book_converted:
+                while not self.libre_locker.isSet():
                     self.logger_object.log(
-                        'Waiting for libre...', logging.DEBUG)
+                        "Waiting for libre...", logging.DEBUG)
                     flag = self.libre_locker.wait(50)
                     if flag:
                         if self.libre_locker.isSet():
-                            self.libre_locker.clear()
-                            self.logger_object.log(f'Got flag!', logging.DEBUG)
-                            self._libre_run(out_dir_path)
-                            self.libre_locker.set()
+                            get_and_clear_flag(out_dir_path)
                             break
-
         except Exception as exc:
             self.logger_object.log(
                 "Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR)
             self.logger_object.log_error_to_main_log()
-            self.status_wrapper.set_error()
             raise exc
 
-        out_dir_path = os.path.join(out_dir_path, f'{self.book_id}.html')
+        out_dir_path = os.path.join(out_dir_path, f"{self.book_id}.html")
         html_path = pathlib.Path(out_dir_path)
 
-        try:
-            f = open(html_path)
-            f.close()
-        except FileNotFoundError as exc:
-            self.logger_object.log(
-                "Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR)
-            self.logger_object.log_error_to_main_log()
-            self.status_wrapper.set_error()
-            raise exc
+        check_file_exists(
+            html_path, error_string="Conversion has gone wrong. HTML file doesn't exist.")
 
-        self.logger_object.log('End of conversion from .docx to .html.')
+        self.logger_object.log("End of conversion from .docx to .html.")
         self.logger_object.log(
-            f'Input file path after conversion: {html_path}.')
+            f"Input file path after conversion: {html_path}.")
         return html_path
 
     def read_html(self, html_path):
         """Method for reading .html file into beautiful soup tag."""
         try:
-            html_text = open(html_path, 'r', encoding='utf8').read()
-            self.logger_object.log('HTML for book has been loaded.')
+            html_text = open(html_path, "r", encoding="utf8").read()
+            self.logger_object.log("HTML for book has been loaded.")
         except FileNotFoundError as exc:
-            self.logger_object.log('There is no html to process.'
-                                   'Conversion went wrong or you specified wrong paths.', logging.ERROR)
+            self.logger_object.log("There is no html to process."
+                                   "Conversion went wrong or you specified wrong paths.", logging.ERROR)
             self.logger_object.log_error_to_main_log()
-            self.status_wrapper.set_error()
             raise exc
 
-        html_soup = BeautifulSoup(html_text, features='lxml')
+        html_soup = BeautifulSoup(html_text, features="lxml")
         return html_soup