From 84b692d39b8741dcc100bfd3a56bd75890746db2 Mon Sep 17 00:00:00 2001 From: Kiryl Date: Wed, 27 Jul 2022 20:19:48 +0300 Subject: [PATCH] Add preset processing from backend --- {config => presets}/presets.json | 0 src/book_solver.py | 94 ++++++++++++++--------- src/docx_converter/image_processing.py | 2 +- src/epub_converter/epub_converter.py | 14 ++-- src/epub_converter/epub_solver.py | 7 +- src/epub_converter/html_epub_processor.py | 5 +- src/preset_processor.py | 15 ---- 7 files changed, 69 insertions(+), 68 deletions(-) rename {config => presets}/presets.json (100%) delete mode 100644 src/preset_processor.py diff --git a/config/presets.json b/presets/presets.json similarity index 100% rename from config/presets.json rename to presets/presets.json diff --git a/src/book_solver.py b/src/book_solver.py index c45af0f..10af671 100644 --- a/src/book_solver.py +++ b/src/book_solver.py @@ -24,9 +24,10 @@ class BookSolver: self.book_type = None self.book_id = book_id self.access = access - self.file_path = None # path to book file, appears after downloading from server - self.output_path = None # path to json file - self.logger_object = BookLogger(name=f'{__name__}_{self.book_id}', + self.preset_path = None + self.book_path = None # path to book file, appears after downloading from server + self.book_output_path = None # path to json file + self.logger_object = BookLogger(name=f"{__name__}_{self.book_id}", book_id=book_id, main_logger=main_logger) self.status_wrapper = BookStatusWrapper( @@ -35,9 +36,9 @@ class BookSolver: assert LiveCartaConfig.SUPPORTED_LEVELS == len(LiveCartaConfig.SUPPORTED_HEADERS), \ "Length of headers doesn't match allowed levels." - def save_book_file(self, content: bytes): + def save_file(self, content: bytes, path_to_save, file_type): """ - Function saves binary content of file to .docx/.epub + Function saves binary content of file to folder(path_to_save) Parameters ---------- content: bytes str @@ -47,80 +48,100 @@ class BookSolver: folder_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) folder_path = os.path.join( - folder_path, f'{self.book_type}/{self.book_id}') + folder_path, path_to_save) pathlib.Path(folder_path).mkdir(parents=True, exist_ok=True) file_path = os.path.join( - folder_path, f'{self.book_id}.{self.book_type}') + folder_path, f"{self.book_id}.{file_type}") try: - with open(file_path, 'wb+') as file: + with open(file_path, "wb+") as file: file.write(content) - self.logger_object.log(f'File was saved to folder: {folder_path}.') + self.logger_object.log( + f"Preset file was saved to folder: {folder_path}.") except Exception as exc: self.logger_object.log( f"Error in writing {self.book_type} file.", logging.ERROR) self.logger_object.log_error_to_main_log() raise exc + return file_path - self.file_path = pathlib.Path(file_path) + def get_preset_file(self): + """Method for getting and saving preset from server""" + try: + self.logger_object.log(f"Start receiving preset file from server. URL:" + f" {self.access.url}/doc-convert/{self.book_id}/presets") + content = self.access.get_file( + file_path=f"{self.access.url}/doc-convert/{self.book_id}/presets") + self.logger_object.log("Preset file was received from server.") + self.preset_path = pathlib.Path( + str(self.save_file(content, path_to_save="presets", file_type="json"))) + except FileNotFoundError as f_err: + self.logger_object.log( + "Can't get preset file from server.", logging.ERROR) + self.logger_object.log_error_to_main_log() + raise f_err + except Exception as exc: + raise exc def get_book_file(self): """Method for getting and saving book from server""" try: - self.logger_object.log(f'Start receiving file from server. URL:' - f' {self.access.url}/doc-convert/{self.book_id}/file') - content = self.access.get_book(self.book_id) - self.logger_object.log('File was received from server.') - self.save_book_file(content) + self.logger_object.log(f"Start receiving book file from server. URL:" + f" {self.access.url}/doc-convert/{self.book_id}/file") + content = self.access.get_file( + file_path=f"{self.access.url}/doc-convert/{self.book_id}/file") + self.logger_object.log("Book file was received from server.") + self.book_path = pathlib.Path(self.save_file( + content, path_to_save=f"books/{self.book_type}", file_type=self.book_type)) except FileNotFoundError as f_err: self.logger_object.log( - "Can't get file from server.", logging.ERROR) + "Can't get book file from server.", logging.ERROR) self.logger_object.log_error_to_main_log() raise f_err except Exception as exc: raise exc def check_output_directory(self): - if self.output_path is None: + if self.book_output_path is None: folder_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) output_path = os.path.join( - folder_path, f'json/{self.book_id}.json') - self.output_path = output_path + folder_path, f"books/json/{self.book_id}.json") + self.book_output_path = output_path - self.output_path = pathlib.Path(self.output_path) - self.logger_object.log(f'Output file path: {self.output_path}') + self.book_output_path = pathlib.Path(self.book_output_path) + self.logger_object.log(f"Output file path: {self.book_output_path}") - pathlib.Path(self.output_path).parent.mkdir( + pathlib.Path(self.book_output_path).parent.mkdir( parents=True, exist_ok=True) - self.output_path.touch(exist_ok=True) + self.book_output_path.touch(exist_ok=True) def write_to_json(self, content: dict): self.check_output_directory() try: - with codecs.open(self.output_path, 'w', encoding='utf-8') as f: + with codecs.open(self.book_output_path, "w", encoding="utf-8") as f: json.dump(content, f, ensure_ascii=False) self.logger_object.log( - f'Data has been saved to .json file: {self.output_path}') + f"Data has been saved to .json file: {self.book_output_path}") except Exception as exc: self.logger_object.log( - 'Error has occurred while writing .json file.' + str(exc), logging.ERROR) + "Error has occurred while writing .json file." + str(exc), logging.ERROR) def send_json_content_to_server(self, content: dict): """Function sends json_content to site""" try: self.access.send_book(self.book_id, content) - self.logger_object.log(f'JSON data has been sent to server.') + self.logger_object.log(f"JSON data has been sent to server.") except Exception as exc: self.logger_object.log( - 'Error has occurred while sending json content.', logging.ERROR) + "Error has occurred while sending json content.", logging.ERROR) self.logger_object.log_error_to_main_log() self.status_wrapper.set_error() raise exc @abstractmethod def get_converted_book(self): - self.logger_object.log('Beginning of processing .json output.') + self.logger_object.log("Beginning of processing .json output.") self.status_wrapper.set_generating() return {} @@ -133,7 +154,8 @@ class BookSolver: """ try: self.logger_object.log( - f'Beginning of conversion from .{self.book_type} to .json.') + f"Beginning of conversion from .{self.book_type} to .json.") + self.get_preset_file() self.get_book_file() self.status_wrapper.set_processing() content_dict = self.get_converted_book() @@ -141,11 +163,11 @@ class BookSolver: self.write_to_json(content_dict) self.send_json_content_to_server(content_dict) self.logger_object.log( - f'End of the conversion to LiveCarta format. Check {self.output_path}.') + f"End of the conversion to LiveCarta format. Check {self.book_output_path}.") except Exception as exc: self.status_wrapper.set_error() self.logger_object.log( - 'Error has occurred while conversion.', logging.ERROR) + "Error has occurred while conversion.", logging.ERROR) self.logger_object.log_error_to_main_log(str(exc)) raise exc @@ -158,15 +180,15 @@ class BookSolver: """ try: self.logger_object.log( - f'Data has been downloaded from {file_path} file') + f"Data has been downloaded from {file_path} file") self.status_wrapper.set_processing() - with codecs.open(file_path, 'r', encoding='utf-8') as f_json: + with codecs.open(file_path, "r", encoding="utf-8") as f_json: content_dict = json.load(f_json) self.status_wrapper.set_generating() self.send_json_content_to_server(content_dict) - self.logger_object.log(f'Sent a file to server. Check LiveCarta.') + self.logger_object.log(f"Sent a file to server. Check LiveCarta.") except Exception as exc: self.status_wrapper.set_error() self.logger_object.log( - 'Error has occurred while reading json file.' + str(exc), logging.ERROR) + "Error has occurred while reading json file." + str(exc), logging.ERROR) self.logger_object.log_error_to_main_log(str(exc)) diff --git a/src/docx_converter/image_processing.py b/src/docx_converter/image_processing.py index dfd413b..0eab671 100644 --- a/src/docx_converter/image_processing.py +++ b/src/docx_converter/image_processing.py @@ -26,7 +26,7 @@ def process_images(access, html_path, book_id, body_tag): folder_path = os.path.dirname( os.path.dirname(os.path.abspath(__file__))) new_path = pathlib.Path(os.path.join( - folder_path, f'../json/img_{book_id}/')) + folder_path, f'../books/json/img_{book_id}/')) new_path.mkdir(exist_ok=True) new_img_path = new_path / img_name copyfile(img_path, new_img_path) diff --git a/src/epub_converter/epub_converter.py b/src/epub_converter/epub_converter.py index f2c3232..4a09481 100644 --- a/src/epub_converter/epub_converter.py +++ b/src/epub_converter/epub_converter.py @@ -13,7 +13,6 @@ from typing import Dict, Union, List from bs4 import BeautifulSoup, NavigableString, Tag from src.util.helpers import BookLogger -from src.preset_processor import PresetProcessor from src.epub_converter.css_processor import CSSPreprocessor from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor from src.livecarta_config import LiveCartaConfig @@ -24,11 +23,11 @@ from src.epub_converter.tag_inline_style_processor import TagInlineStyleProcesso class EpubConverter: - def __init__(self, file_path, access=None, logger=None, css_processor=None, html_processor=None): - self.file_path = file_path + def __init__(self, book_path, access=None, logger=None, css_processor=None, html_processor=None): + self.book_path = book_path self.access = access self.logger: BookLogger = logger - self.ebooklib_book = epub.read_epub(file_path) + self.ebooklib_book = epub.read_epub(book_path) self.css_processor = css_processor self.html_processor = html_processor @@ -603,7 +602,7 @@ class EpubConverter: path_to_html=nav_point.href, access=self.access, path2aws_path=self.book_image_src_path2aws_path, - book_id=Path(self.file_path).stem) + book_id=Path(self.book_path).stem) sub_nodes = [] # warning! not EpubHtmlItems won't be added to chapter # if it doesn't have subchapters @@ -638,11 +637,8 @@ if __name__ == "__main__": logger_object = BookLogger( name="epub", book_id=epub_file_path.split("/")[-1]) - preset = PresetProcessor(preset_path="../../config/presets.json", logger=logger_object)\ - .get_preset_json() css_processor = CSSPreprocessor() - html_processor = HtmlEpubPreprocessor( - preset=preset, logger=logger_object) + html_processor = HtmlEpubPreprocessor("../../presets/presets.json", logger=logger_object) json_converter = EpubConverter(epub_file_path, logger=logger_object, css_processor=css_processor, html_processor=html_processor) diff --git a/src/epub_converter/epub_solver.py b/src/epub_converter/epub_solver.py index e0cfef6..9131eda 100644 --- a/src/epub_converter/epub_solver.py +++ b/src/epub_converter/epub_solver.py @@ -1,5 +1,4 @@ from src.book_solver import BookSolver -from src.preset_processor import PresetProcessor from src.epub_converter.css_processor import CSSPreprocessor from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor from src.epub_converter.epub_converter import EpubConverter @@ -28,12 +27,10 @@ class EpubBook(BookSolver): json for LiveCarta platform """ - preset = PresetProcessor(preset_path="config/presets.json", logger=self.logger_object)\ - .get_preset_json() css_processor = CSSPreprocessor() - html_processor = HtmlEpubPreprocessor(preset=preset, logger=self.logger_object) + html_processor = HtmlEpubPreprocessor(self.preset_path, logger=self.logger_object) json_converter = EpubConverter( - self.file_path, access=self.access, logger=self.logger_object, + self.book_path, access=self.access, logger=self.logger_object, css_processor=css_processor, html_processor=html_processor) content_dict = json_converter.convert_to_dict() return content_dict diff --git a/src/epub_converter/html_epub_processor.py b/src/epub_converter/html_epub_processor.py index 0df4908..d8403d1 100644 --- a/src/epub_converter/html_epub_processor.py +++ b/src/epub_converter/html_epub_processor.py @@ -1,12 +1,13 @@ import re +import json from bs4 import BeautifulSoup, NavigableString, Comment, Tag from src.util.helpers import BookLogger class HtmlEpubPreprocessor: - def __init__(self, preset, logger=None): - self.preset = preset + def __init__(self, preset_path, logger=None): + self.preset = json.load(open(preset_path)) self.logger: BookLogger = logger self.name2function = { "table_wrapper": self._wrap_tags_with_table, diff --git a/src/preset_processor.py b/src/preset_processor.py deleted file mode 100644 index a1cbb93..0000000 --- a/src/preset_processor.py +++ /dev/null @@ -1,15 +0,0 @@ -import json - - -from src.util.helpers import BookLogger - - -class PresetProcessor: - def __init__(self, preset_path="config/presets.json", logger=None): - self.preset_path = preset_path - self.logger: BookLogger = logger - - def get_preset_json(self): - f = open(self.preset_path) - data = json.load(f) - return data