diff --git a/src/docx_converter/docx2libre_html.py b/src/docx_converter/docx2libre_html.py
index 889aa25..fbb24fe 100644
--- a/src/docx_converter/docx2libre_html.py
+++ b/src/docx_converter/docx2libre_html.py
@@ -10,12 +10,12 @@ from src.util.helpers import BookLogger
class Docx2LibreHTML:
- def __init__(self, book_id=0, file_path=None, access=None, logger=None, status_wrapper=None, libre_locker=None):
- self.book_id = book_id
+ def __init__(self, book_id=0, file_path=None, access=None, logger=None, libre_locker=None):
+ self.book_id = book_id if book_id != 0 else pathlib.Path(
+ file_path).stem
self.file_path = file_path
self.access = access
self.logger_object: BookLogger = logger
- self.status_wrapper: status_wrapper = status_wrapper
# critical section for occupying libreoffice by one thread
self.libre_locker: Event() = libre_locker
@@ -24,15 +24,15 @@ class Docx2LibreHTML:
self.html_soup = self.read_html(self.html_path)
def _libre_run(self, out_dir_path):
- command = ['libreoffice', '--headless',
- '--convert-to', 'html', f'{str(self.file_path)}',
- '--outdir', f'{out_dir_path}']
+ command = ["libreoffice", "--headless",
+ "--convert-to", "html", f"{str(self.file_path)}",
+ "--outdir", f"{out_dir_path}"]
print(command)
result = subprocess.run(command, stdout=PIPE, stderr=PIPE)
- self.logger_object.log(f'Result of libre conversion for book_{self.book_id}:'
- f' {result.returncode}, {result.stdout}', logging.DEBUG)
- self.logger_object.log(f'Any error while libre conversion for book_'
- f'{self.book_id}: {result.stderr}', logging.DEBUG)
+ self.logger_object.log(f"Result of libre conversion for book_{self.book_id}:"
+ f" {result.returncode}, {result.stdout}", logging.DEBUG)
+ self.logger_object.log(f"Any error while libre conversion for book_"
+ f"{self.book_id}: {result.stderr}", logging.DEBUG)
def convert_docx_to_html(self):
"""
@@ -48,82 +48,74 @@ class Docx2LibreHTML:
path to html file, file appears after libre-conversion
"""
- self.logger_object.log(f'File - {self.file_path}.')
- print(f'{self.file_path}')
- self.logger_object.log('Beginning of conversion from .docx to .html.')
+ def get_and_clear_flag(out_dir_path: str):
+ self.libre_locker.clear()
+ self.logger_object.log(f"Got flag!", logging.DEBUG)
+ self._libre_run(out_dir_path)
+ self.libre_locker.set()
+ self.logger_object.log("Cleared flag...", logging.DEBUG)
- try:
- f = open(self.file_path)
- f.close()
- except FileNotFoundError as error:
- self.logger_object.log(
- 'Invalid path to input data.', logging.ERROR)
- self.status_wrapper.set_error()
- raise error
+ def check_file_exists(path, error_string: str):
+ try:
+ f = open(path)
+ f.close()
+ except FileNotFoundError as error:
+ self.logger_object.log(
+ error_string, logging.ERROR)
+ self.logger_object.log_error_to_main_log()
+ raise error
+
+ self.logger_object.log(f"File - {self.file_path}.")
+ print(f"{self.file_path}")
+ self.logger_object.log("Beginning of conversion from .docx to .html.")
+
+ check_file_exists(
+ self.file_path, error_string="Invalid path to input data.")
folder_path = os.path.dirname(
os.path.dirname(os.path.abspath(__file__)))
- out_dir_path = os.path.join(folder_path, f'../html/{self.book_id}')
+ out_dir_path = os.path.join(folder_path, f"../html/{self.book_id}")
pathlib.Path(out_dir_path).mkdir(parents=True, exist_ok=True)
- is_book_converted = False
try:
if self.libre_locker.isSet():
- self.libre_locker.clear()
- self.logger_object.log('Got flag...', logging.DEBUG)
- self._libre_run(out_dir_path)
- self.libre_locker.set()
- self.logger_object.log('Cleared flag...', logging.DEBUG)
-
+ get_and_clear_flag(out_dir_path)
else:
- while not self.libre_locker.isSet() and not is_book_converted:
+ while not self.libre_locker.isSet():
self.logger_object.log(
- 'Waiting for libre...', logging.DEBUG)
+ "Waiting for libre...", logging.DEBUG)
flag = self.libre_locker.wait(50)
if flag:
if self.libre_locker.isSet():
- self.libre_locker.clear()
- self.logger_object.log(f'Got flag!', logging.DEBUG)
- self._libre_run(out_dir_path)
- self.libre_locker.set()
+ get_and_clear_flag(out_dir_path)
break
-
except Exception as exc:
self.logger_object.log(
"Conversion has gone wrong. Libreoffice is not installed.", logging.ERROR)
self.logger_object.log_error_to_main_log()
- self.status_wrapper.set_error()
raise exc
- out_dir_path = os.path.join(out_dir_path, f'{self.book_id}.html')
+ out_dir_path = os.path.join(out_dir_path, f"{self.book_id}.html")
html_path = pathlib.Path(out_dir_path)
- try:
- f = open(html_path)
- f.close()
- except FileNotFoundError as exc:
- self.logger_object.log(
- "Conversion has gone wrong. HTML file doesn't exist.", logging.ERROR)
- self.logger_object.log_error_to_main_log()
- self.status_wrapper.set_error()
- raise exc
+ check_file_exists(
+ html_path, error_string="Conversion has gone wrong. HTML file doesn't exist.")
- self.logger_object.log('End of conversion from .docx to .html.')
+ self.logger_object.log("End of conversion from .docx to .html.")
self.logger_object.log(
- f'Input file path after conversion: {html_path}.')
+ f"Input file path after conversion: {html_path}.")
return html_path
def read_html(self, html_path):
"""Method for reading .html file into beautiful soup tag."""
try:
- html_text = open(html_path, 'r', encoding='utf8').read()
- self.logger_object.log('HTML for book has been loaded.')
+ html_text = open(html_path, "r", encoding="utf8").read()
+ self.logger_object.log("HTML for book has been loaded.")
except FileNotFoundError as exc:
- self.logger_object.log('There is no html to process.'
- 'Conversion went wrong or you specified wrong paths.', logging.ERROR)
+ self.logger_object.log("There is no html to process."
+ "Conversion went wrong or you specified wrong paths.", logging.ERROR)
self.logger_object.log_error_to_main_log()
- self.status_wrapper.set_error()
raise exc
- html_soup = BeautifulSoup(html_text, features='lxml')
+ html_soup = BeautifulSoup(html_text, features="lxml")
return html_soup