Set up local docx_converter

This commit is contained in:
Kiryl
2022-07-14 19:13:34 +03:00
parent 7b35d8a7c2
commit b1ccd796c9
2 changed files with 8 additions and 7 deletions

View File

@@ -35,7 +35,7 @@ class DocxBook(BookSolver):
"""
# 1. Converts docx to html with LibreOffice
html_converter = Docx2LibreHTML(self.book_id, self.file_path, self.access,
self.logger_object, self.status_wrapper, self.libre_locker)
self.logger_object, self.libre_locker)
# TODO presets
# 2. Parses and cleans html, gets list of tags, gets footnotes
@@ -46,7 +46,7 @@ class DocxBook(BookSolver):
# 3. Parses from line structure to nested structure with JSONConverter
json_converter = LibreHTML2JSONConverter(bs_tags, footnotes, top_level_headers,
self.logger_object, self.status_wrapper)
self.logger_object)
content_dict = json_converter.convert_to_dict()
return content_dict
@@ -56,12 +56,15 @@ if __name__ == "__main__":
docx_file_path = '../../docx/music_inquiry.docx'
logger_object = BookLogger(
name='docx', book_id=docx_file_path.split('/')[-1])
locker = Event()
locker.set()
html_converter = Docx2LibreHTML(file_path=docx_file_path)
html_converter = Docx2LibreHTML(file_path=docx_file_path,
logger=logger_object, libre_locker=locker)
parser = HTMLDocxPreprocessor(html_converter.html_soup, logger_object)
content, footnotes, top_level_headers = parser.process_html(
html_converter.html_path)
html_path=html_converter.html_path, book_id=html_converter.book_id)
json_converter = LibreHTML2JSONConverter(
content, footnotes, top_level_headers, logger_object)