Merge processing tags[Docx, Epub]

This commit is contained in:
Kiryl
2022-09-06 16:26:08 +03:00
parent ea37b19c36
commit ddc45e2d04
6 changed files with 226 additions and 277 deletions

View File

@@ -5,6 +5,7 @@ from threading import Event
from src.book_solver import BookSolver
from src.util.helpers import BookLogger
from src.html_preprocessor import HtmlPreprocessor
from src.style_preprocessor import StylePreprocessor
from src.docx_converter.docx2libre_html import Docx2LibreHTML
from src.docx_converter.html_docx_processor import HTMLDocxProcessor
@@ -48,10 +49,14 @@ class DocxBook(BookSolver):
# 2. Parses and cleans html, gets list of tags, gets footnotes
try:
style_processor = StylePreprocessor()
parser = HTMLDocxProcessor(html_soup=html_converter.html_soup,
logger=self.logger_object, style_processor=style_processor)
bs_tags, footnotes, top_level_headers = parser.process_html(
html_preprocessor = HtmlPreprocessor(
logger=self.logger_object, preset_path="presets/docx_presets.json")
style_preprocessor = StylePreprocessor()
html_processor = HTMLDocxProcessor(html_soup=html_converter.html_soup,
logger=self.logger_object,
html_preprocessor=html_preprocessor,
style_preprocessor=style_preprocessor)
bs_tags, footnotes, top_level_headers = html_processor.process_html(
self.access, html_converter.html_path, self.book_id)
except Exception as exc:
self.logger_object.log(
@@ -84,10 +89,12 @@ if __name__ == "__main__":
html_converter = Docx2LibreHTML(file_path=docx_file_path,
logger=logger_object, libre_locker=locker)
css_processor = StylePreprocessor()
parser = HTMLDocxProcessor(html_soup=html_converter.html_soup, logger=logger_object,
style_processor=css_processor, preset_path="../../presets/docx_presets.json")
content, footnotes, top_level_headers = parser.process_html(
html_preprocessor = HtmlPreprocessor(
logger=logger_object, preset_path="../../presets/docx_presets.json")
style_preprocessor = StylePreprocessor()
html_processor = HTMLDocxProcessor(html_soup=html_converter.html_soup, logger=logger_object,
html_preprocessor=html_preprocessor, style_preprocessor=style_preprocessor)
content, footnotes, top_level_headers = html_processor.process_html(
html_path=html_converter.html_path, book_id=html_converter.book_id)
json_converter = LibreHTML2JSONConverter(