Add preset processing from backend

This commit is contained in:
Kiryl
2022-07-27 20:19:48 +03:00
parent 32a54f0e4e
commit 84b692d39b
7 changed files with 69 additions and 68 deletions

View File

@@ -24,9 +24,10 @@ class BookSolver:
self.book_type = None self.book_type = None
self.book_id = book_id self.book_id = book_id
self.access = access self.access = access
self.file_path = None # path to book file, appears after downloading from server self.preset_path = None
self.output_path = None # path to json file self.book_path = None # path to book file, appears after downloading from server
self.logger_object = BookLogger(name=f'{__name__}_{self.book_id}', self.book_output_path = None # path to json file
self.logger_object = BookLogger(name=f"{__name__}_{self.book_id}",
book_id=book_id, book_id=book_id,
main_logger=main_logger) main_logger=main_logger)
self.status_wrapper = BookStatusWrapper( self.status_wrapper = BookStatusWrapper(
@@ -35,9 +36,9 @@ class BookSolver:
assert LiveCartaConfig.SUPPORTED_LEVELS == len(LiveCartaConfig.SUPPORTED_HEADERS), \ assert LiveCartaConfig.SUPPORTED_LEVELS == len(LiveCartaConfig.SUPPORTED_HEADERS), \
"Length of headers doesn't match allowed levels." "Length of headers doesn't match allowed levels."
def save_book_file(self, content: bytes): def save_file(self, content: bytes, path_to_save, file_type):
""" """
Function saves binary content of file to .docx/.epub Function saves binary content of file to folder(path_to_save)
Parameters Parameters
---------- ----------
content: bytes str content: bytes str
@@ -47,80 +48,100 @@ class BookSolver:
folder_path = os.path.dirname( folder_path = os.path.dirname(
os.path.dirname(os.path.abspath(__file__))) os.path.dirname(os.path.abspath(__file__)))
folder_path = os.path.join( folder_path = os.path.join(
folder_path, f'{self.book_type}/{self.book_id}') folder_path, path_to_save)
pathlib.Path(folder_path).mkdir(parents=True, exist_ok=True) pathlib.Path(folder_path).mkdir(parents=True, exist_ok=True)
file_path = os.path.join( file_path = os.path.join(
folder_path, f'{self.book_id}.{self.book_type}') folder_path, f"{self.book_id}.{file_type}")
try: try:
with open(file_path, 'wb+') as file: with open(file_path, "wb+") as file:
file.write(content) file.write(content)
self.logger_object.log(f'File was saved to folder: {folder_path}.') self.logger_object.log(
f"Preset file was saved to folder: {folder_path}.")
except Exception as exc: except Exception as exc:
self.logger_object.log( self.logger_object.log(
f"Error in writing {self.book_type} file.", logging.ERROR) f"Error in writing {self.book_type} file.", logging.ERROR)
self.logger_object.log_error_to_main_log() self.logger_object.log_error_to_main_log()
raise exc raise exc
return file_path
self.file_path = pathlib.Path(file_path) def get_preset_file(self):
"""Method for getting and saving preset from server"""
try:
self.logger_object.log(f"Start receiving preset file from server. URL:"
f" {self.access.url}/doc-convert/{self.book_id}/presets")
content = self.access.get_file(
file_path=f"{self.access.url}/doc-convert/{self.book_id}/presets")
self.logger_object.log("Preset file was received from server.")
self.preset_path = pathlib.Path(
str(self.save_file(content, path_to_save="presets", file_type="json")))
except FileNotFoundError as f_err:
self.logger_object.log(
"Can't get preset file from server.", logging.ERROR)
self.logger_object.log_error_to_main_log()
raise f_err
except Exception as exc:
raise exc
def get_book_file(self): def get_book_file(self):
"""Method for getting and saving book from server""" """Method for getting and saving book from server"""
try: try:
self.logger_object.log(f'Start receiving file from server. URL:' self.logger_object.log(f"Start receiving book file from server. URL:"
f' {self.access.url}/doc-convert/{self.book_id}/file') f" {self.access.url}/doc-convert/{self.book_id}/file")
content = self.access.get_book(self.book_id) content = self.access.get_file(
self.logger_object.log('File was received from server.') file_path=f"{self.access.url}/doc-convert/{self.book_id}/file")
self.save_book_file(content) self.logger_object.log("Book file was received from server.")
self.book_path = pathlib.Path(self.save_file(
content, path_to_save=f"books/{self.book_type}", file_type=self.book_type))
except FileNotFoundError as f_err: except FileNotFoundError as f_err:
self.logger_object.log( self.logger_object.log(
"Can't get file from server.", logging.ERROR) "Can't get book file from server.", logging.ERROR)
self.logger_object.log_error_to_main_log() self.logger_object.log_error_to_main_log()
raise f_err raise f_err
except Exception as exc: except Exception as exc:
raise exc raise exc
def check_output_directory(self): def check_output_directory(self):
if self.output_path is None: if self.book_output_path is None:
folder_path = os.path.dirname( folder_path = os.path.dirname(
os.path.dirname(os.path.abspath(__file__))) os.path.dirname(os.path.abspath(__file__)))
output_path = os.path.join( output_path = os.path.join(
folder_path, f'json/{self.book_id}.json') folder_path, f"books/json/{self.book_id}.json")
self.output_path = output_path self.book_output_path = output_path
self.output_path = pathlib.Path(self.output_path) self.book_output_path = pathlib.Path(self.book_output_path)
self.logger_object.log(f'Output file path: {self.output_path}') self.logger_object.log(f"Output file path: {self.book_output_path}")
pathlib.Path(self.output_path).parent.mkdir( pathlib.Path(self.book_output_path).parent.mkdir(
parents=True, exist_ok=True) parents=True, exist_ok=True)
self.output_path.touch(exist_ok=True) self.book_output_path.touch(exist_ok=True)
def write_to_json(self, content: dict): def write_to_json(self, content: dict):
self.check_output_directory() self.check_output_directory()
try: try:
with codecs.open(self.output_path, 'w', encoding='utf-8') as f: with codecs.open(self.book_output_path, "w", encoding="utf-8") as f:
json.dump(content, f, ensure_ascii=False) json.dump(content, f, ensure_ascii=False)
self.logger_object.log( self.logger_object.log(
f'Data has been saved to .json file: {self.output_path}') f"Data has been saved to .json file: {self.book_output_path}")
except Exception as exc: except Exception as exc:
self.logger_object.log( self.logger_object.log(
'Error has occurred while writing .json file.' + str(exc), logging.ERROR) "Error has occurred while writing .json file." + str(exc), logging.ERROR)
def send_json_content_to_server(self, content: dict): def send_json_content_to_server(self, content: dict):
"""Function sends json_content to site""" """Function sends json_content to site"""
try: try:
self.access.send_book(self.book_id, content) self.access.send_book(self.book_id, content)
self.logger_object.log(f'JSON data has been sent to server.') self.logger_object.log(f"JSON data has been sent to server.")
except Exception as exc: except Exception as exc:
self.logger_object.log( self.logger_object.log(
'Error has occurred while sending json content.', logging.ERROR) "Error has occurred while sending json content.", logging.ERROR)
self.logger_object.log_error_to_main_log() self.logger_object.log_error_to_main_log()
self.status_wrapper.set_error() self.status_wrapper.set_error()
raise exc raise exc
@abstractmethod @abstractmethod
def get_converted_book(self): def get_converted_book(self):
self.logger_object.log('Beginning of processing .json output.') self.logger_object.log("Beginning of processing .json output.")
self.status_wrapper.set_generating() self.status_wrapper.set_generating()
return {} return {}
@@ -133,7 +154,8 @@ class BookSolver:
""" """
try: try:
self.logger_object.log( self.logger_object.log(
f'Beginning of conversion from .{self.book_type} to .json.') f"Beginning of conversion from .{self.book_type} to .json.")
self.get_preset_file()
self.get_book_file() self.get_book_file()
self.status_wrapper.set_processing() self.status_wrapper.set_processing()
content_dict = self.get_converted_book() content_dict = self.get_converted_book()
@@ -141,11 +163,11 @@ class BookSolver:
self.write_to_json(content_dict) self.write_to_json(content_dict)
self.send_json_content_to_server(content_dict) self.send_json_content_to_server(content_dict)
self.logger_object.log( self.logger_object.log(
f'End of the conversion to LiveCarta format. Check {self.output_path}.') f"End of the conversion to LiveCarta format. Check {self.book_output_path}.")
except Exception as exc: except Exception as exc:
self.status_wrapper.set_error() self.status_wrapper.set_error()
self.logger_object.log( self.logger_object.log(
'Error has occurred while conversion.', logging.ERROR) "Error has occurred while conversion.", logging.ERROR)
self.logger_object.log_error_to_main_log(str(exc)) self.logger_object.log_error_to_main_log(str(exc))
raise exc raise exc
@@ -158,15 +180,15 @@ class BookSolver:
""" """
try: try:
self.logger_object.log( self.logger_object.log(
f'Data has been downloaded from {file_path} file') f"Data has been downloaded from {file_path} file")
self.status_wrapper.set_processing() self.status_wrapper.set_processing()
with codecs.open(file_path, 'r', encoding='utf-8') as f_json: with codecs.open(file_path, "r", encoding="utf-8") as f_json:
content_dict = json.load(f_json) content_dict = json.load(f_json)
self.status_wrapper.set_generating() self.status_wrapper.set_generating()
self.send_json_content_to_server(content_dict) self.send_json_content_to_server(content_dict)
self.logger_object.log(f'Sent a file to server. Check LiveCarta.') self.logger_object.log(f"Sent a file to server. Check LiveCarta.")
except Exception as exc: except Exception as exc:
self.status_wrapper.set_error() self.status_wrapper.set_error()
self.logger_object.log( self.logger_object.log(
'Error has occurred while reading json file.' + str(exc), logging.ERROR) "Error has occurred while reading json file." + str(exc), logging.ERROR)
self.logger_object.log_error_to_main_log(str(exc)) self.logger_object.log_error_to_main_log(str(exc))

View File

@@ -26,7 +26,7 @@ def process_images(access, html_path, book_id, body_tag):
folder_path = os.path.dirname( folder_path = os.path.dirname(
os.path.dirname(os.path.abspath(__file__))) os.path.dirname(os.path.abspath(__file__)))
new_path = pathlib.Path(os.path.join( new_path = pathlib.Path(os.path.join(
folder_path, f'../json/img_{book_id}/')) folder_path, f'../books/json/img_{book_id}/'))
new_path.mkdir(exist_ok=True) new_path.mkdir(exist_ok=True)
new_img_path = new_path / img_name new_img_path = new_path / img_name
copyfile(img_path, new_img_path) copyfile(img_path, new_img_path)

View File

@@ -13,7 +13,6 @@ from typing import Dict, Union, List
from bs4 import BeautifulSoup, NavigableString, Tag from bs4 import BeautifulSoup, NavigableString, Tag
from src.util.helpers import BookLogger from src.util.helpers import BookLogger
from src.preset_processor import PresetProcessor
from src.epub_converter.css_processor import CSSPreprocessor from src.epub_converter.css_processor import CSSPreprocessor
from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor
from src.livecarta_config import LiveCartaConfig from src.livecarta_config import LiveCartaConfig
@@ -24,11 +23,11 @@ from src.epub_converter.tag_inline_style_processor import TagInlineStyleProcesso
class EpubConverter: class EpubConverter:
def __init__(self, file_path, access=None, logger=None, css_processor=None, html_processor=None): def __init__(self, book_path, access=None, logger=None, css_processor=None, html_processor=None):
self.file_path = file_path self.book_path = book_path
self.access = access self.access = access
self.logger: BookLogger = logger self.logger: BookLogger = logger
self.ebooklib_book = epub.read_epub(file_path) self.ebooklib_book = epub.read_epub(book_path)
self.css_processor = css_processor self.css_processor = css_processor
self.html_processor = html_processor self.html_processor = html_processor
@@ -603,7 +602,7 @@ class EpubConverter:
path_to_html=nav_point.href, path_to_html=nav_point.href,
access=self.access, access=self.access,
path2aws_path=self.book_image_src_path2aws_path, path2aws_path=self.book_image_src_path2aws_path,
book_id=Path(self.file_path).stem) book_id=Path(self.book_path).stem)
sub_nodes = [] sub_nodes = []
# warning! not EpubHtmlItems won't be added to chapter # warning! not EpubHtmlItems won't be added to chapter
# if it doesn't have subchapters # if it doesn't have subchapters
@@ -638,11 +637,8 @@ if __name__ == "__main__":
logger_object = BookLogger( logger_object = BookLogger(
name="epub", book_id=epub_file_path.split("/")[-1]) name="epub", book_id=epub_file_path.split("/")[-1])
preset = PresetProcessor(preset_path="../../config/presets.json", logger=logger_object)\
.get_preset_json()
css_processor = CSSPreprocessor() css_processor = CSSPreprocessor()
html_processor = HtmlEpubPreprocessor( html_processor = HtmlEpubPreprocessor("../../presets/presets.json", logger=logger_object)
preset=preset, logger=logger_object)
json_converter = EpubConverter(epub_file_path, logger=logger_object, json_converter = EpubConverter(epub_file_path, logger=logger_object,
css_processor=css_processor, html_processor=html_processor) css_processor=css_processor, html_processor=html_processor)

View File

@@ -1,5 +1,4 @@
from src.book_solver import BookSolver from src.book_solver import BookSolver
from src.preset_processor import PresetProcessor
from src.epub_converter.css_processor import CSSPreprocessor from src.epub_converter.css_processor import CSSPreprocessor
from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor
from src.epub_converter.epub_converter import EpubConverter from src.epub_converter.epub_converter import EpubConverter
@@ -28,12 +27,10 @@ class EpubBook(BookSolver):
json for LiveCarta platform json for LiveCarta platform
""" """
preset = PresetProcessor(preset_path="config/presets.json", logger=self.logger_object)\
.get_preset_json()
css_processor = CSSPreprocessor() css_processor = CSSPreprocessor()
html_processor = HtmlEpubPreprocessor(preset=preset, logger=self.logger_object) html_processor = HtmlEpubPreprocessor(self.preset_path, logger=self.logger_object)
json_converter = EpubConverter( json_converter = EpubConverter(
self.file_path, access=self.access, logger=self.logger_object, self.book_path, access=self.access, logger=self.logger_object,
css_processor=css_processor, html_processor=html_processor) css_processor=css_processor, html_processor=html_processor)
content_dict = json_converter.convert_to_dict() content_dict = json_converter.convert_to_dict()
return content_dict return content_dict

View File

@@ -1,12 +1,13 @@
import re import re
import json
from bs4 import BeautifulSoup, NavigableString, Comment, Tag from bs4 import BeautifulSoup, NavigableString, Comment, Tag
from src.util.helpers import BookLogger from src.util.helpers import BookLogger
class HtmlEpubPreprocessor: class HtmlEpubPreprocessor:
def __init__(self, preset, logger=None): def __init__(self, preset_path, logger=None):
self.preset = preset self.preset = json.load(open(preset_path))
self.logger: BookLogger = logger self.logger: BookLogger = logger
self.name2function = { self.name2function = {
"table_wrapper": self._wrap_tags_with_table, "table_wrapper": self._wrap_tags_with_table,

View File

@@ -1,15 +0,0 @@
import json
from src.util.helpers import BookLogger
class PresetProcessor:
def __init__(self, preset_path="config/presets.json", logger=None):
self.preset_path = preset_path
self.logger: BookLogger = logger
def get_preset_json(self):
f = open(self.preset_path)
data = json.load(f)
return data