Annot.[HTML->Html, _]

This commit is contained in:
Kiryl
2022-09-06 16:36:35 +03:00
parent ddc45e2d04
commit 83939e43cb
9 changed files with 22 additions and 23 deletions

View File

@@ -7,9 +7,9 @@ from src.book_solver import BookSolver
from src.util.helpers import BookLogger
from src.html_preprocessor import HtmlPreprocessor
from src.style_preprocessor import StylePreprocessor
from src.docx_converter.docx2libre_html import Docx2LibreHTML
from src.docx_converter.html_docx_processor import HTMLDocxProcessor
from src.docx_converter.libre_html2json_converter import LibreHTML2JSONConverter
from src.docx_converter.docx2libre_html import Docx2LibreHtml
from src.docx_converter.html_docx_processor import HtmlDocxProcessor
from src.docx_converter.libre_html2json_converter import LibreHtml2JsonConverter
class DocxBook(BookSolver):
@@ -38,7 +38,7 @@ class DocxBook(BookSolver):
"""
# 1. Converts docx to html with LibreOffice
try:
html_converter = Docx2LibreHTML(self.book_id, self.book_path, self.access,
html_converter = Docx2LibreHtml(self.book_id, self.book_path, self.access,
self.logger_object, self.libre_locker)
except Exception as exc:
self.logger_object.log(
@@ -52,7 +52,7 @@ class DocxBook(BookSolver):
html_preprocessor = HtmlPreprocessor(
logger=self.logger_object, preset_path="presets/docx_presets.json")
style_preprocessor = StylePreprocessor()
html_processor = HTMLDocxProcessor(html_soup=html_converter.html_soup,
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup,
logger=self.logger_object,
html_preprocessor=html_preprocessor,
style_preprocessor=style_preprocessor)
@@ -67,7 +67,7 @@ class DocxBook(BookSolver):
# 3. Parses from line structure to nested structure with JSONConverter
try:
json_converter = LibreHTML2JSONConverter(bs_tags, footnotes, top_level_headers,
json_converter = LibreHtml2JsonConverter(bs_tags, footnotes, top_level_headers,
self.logger_object)
content_dict = json_converter.convert_to_dict()
except Exception as exc:
@@ -86,18 +86,18 @@ if __name__ == "__main__":
locker = Event()
locker.set()
html_converter = Docx2LibreHTML(file_path=docx_file_path,
html_converter = Docx2LibreHtml(file_path=docx_file_path,
logger=logger_object, libre_locker=locker)
html_preprocessor = HtmlPreprocessor(
logger=logger_object, preset_path="../../presets/docx_presets.json")
style_preprocessor = StylePreprocessor()
html_processor = HTMLDocxProcessor(html_soup=html_converter.html_soup, logger=logger_object,
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=logger_object,
html_preprocessor=html_preprocessor, style_preprocessor=style_preprocessor)
content, footnotes, top_level_headers = html_processor.process_html(
html_path=html_converter.html_path, book_id=html_converter.book_id)
json_converter = LibreHTML2JSONConverter(
json_converter = LibreHtml2JsonConverter(
content, footnotes, top_level_headers, logger_object)
content_dict = json_converter.convert_to_dict()