forked from LiveCarta/BookConverter
Add preset processing from backend
This commit is contained in:
@@ -24,9 +24,10 @@ class BookSolver:
|
||||
self.book_type = None
|
||||
self.book_id = book_id
|
||||
self.access = access
|
||||
self.file_path = None # path to book file, appears after downloading from server
|
||||
self.output_path = None # path to json file
|
||||
self.logger_object = BookLogger(name=f'{__name__}_{self.book_id}',
|
||||
self.preset_path = None
|
||||
self.book_path = None # path to book file, appears after downloading from server
|
||||
self.book_output_path = None # path to json file
|
||||
self.logger_object = BookLogger(name=f"{__name__}_{self.book_id}",
|
||||
book_id=book_id,
|
||||
main_logger=main_logger)
|
||||
self.status_wrapper = BookStatusWrapper(
|
||||
@@ -35,9 +36,9 @@ class BookSolver:
|
||||
assert LiveCartaConfig.SUPPORTED_LEVELS == len(LiveCartaConfig.SUPPORTED_HEADERS), \
|
||||
"Length of headers doesn't match allowed levels."
|
||||
|
||||
def save_book_file(self, content: bytes):
|
||||
def save_file(self, content: bytes, path_to_save, file_type):
|
||||
"""
|
||||
Function saves binary content of file to .docx/.epub
|
||||
Function saves binary content of file to folder(path_to_save)
|
||||
Parameters
|
||||
----------
|
||||
content: bytes str
|
||||
@@ -47,80 +48,100 @@ class BookSolver:
|
||||
folder_path = os.path.dirname(
|
||||
os.path.dirname(os.path.abspath(__file__)))
|
||||
folder_path = os.path.join(
|
||||
folder_path, f'{self.book_type}/{self.book_id}')
|
||||
folder_path, path_to_save)
|
||||
pathlib.Path(folder_path).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
file_path = os.path.join(
|
||||
folder_path, f'{self.book_id}.{self.book_type}')
|
||||
folder_path, f"{self.book_id}.{file_type}")
|
||||
try:
|
||||
with open(file_path, 'wb+') as file:
|
||||
with open(file_path, "wb+") as file:
|
||||
file.write(content)
|
||||
self.logger_object.log(f'File was saved to folder: {folder_path}.')
|
||||
self.logger_object.log(
|
||||
f"Preset file was saved to folder: {folder_path}.")
|
||||
except Exception as exc:
|
||||
self.logger_object.log(
|
||||
f"Error in writing {self.book_type} file.", logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
raise exc
|
||||
return file_path
|
||||
|
||||
self.file_path = pathlib.Path(file_path)
|
||||
def get_preset_file(self):
|
||||
"""Method for getting and saving preset from server"""
|
||||
try:
|
||||
self.logger_object.log(f"Start receiving preset file from server. URL:"
|
||||
f" {self.access.url}/doc-convert/{self.book_id}/presets")
|
||||
content = self.access.get_file(
|
||||
file_path=f"{self.access.url}/doc-convert/{self.book_id}/presets")
|
||||
self.logger_object.log("Preset file was received from server.")
|
||||
self.preset_path = pathlib.Path(
|
||||
str(self.save_file(content, path_to_save="presets", file_type="json")))
|
||||
except FileNotFoundError as f_err:
|
||||
self.logger_object.log(
|
||||
"Can't get preset file from server.", logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
raise f_err
|
||||
except Exception as exc:
|
||||
raise exc
|
||||
|
||||
def get_book_file(self):
|
||||
"""Method for getting and saving book from server"""
|
||||
try:
|
||||
self.logger_object.log(f'Start receiving file from server. URL:'
|
||||
f' {self.access.url}/doc-convert/{self.book_id}/file')
|
||||
content = self.access.get_book(self.book_id)
|
||||
self.logger_object.log('File was received from server.')
|
||||
self.save_book_file(content)
|
||||
self.logger_object.log(f"Start receiving book file from server. URL:"
|
||||
f" {self.access.url}/doc-convert/{self.book_id}/file")
|
||||
content = self.access.get_file(
|
||||
file_path=f"{self.access.url}/doc-convert/{self.book_id}/file")
|
||||
self.logger_object.log("Book file was received from server.")
|
||||
self.book_path = pathlib.Path(self.save_file(
|
||||
content, path_to_save=f"books/{self.book_type}", file_type=self.book_type))
|
||||
except FileNotFoundError as f_err:
|
||||
self.logger_object.log(
|
||||
"Can't get file from server.", logging.ERROR)
|
||||
"Can't get book file from server.", logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
raise f_err
|
||||
except Exception as exc:
|
||||
raise exc
|
||||
|
||||
def check_output_directory(self):
|
||||
if self.output_path is None:
|
||||
if self.book_output_path is None:
|
||||
folder_path = os.path.dirname(
|
||||
os.path.dirname(os.path.abspath(__file__)))
|
||||
output_path = os.path.join(
|
||||
folder_path, f'json/{self.book_id}.json')
|
||||
self.output_path = output_path
|
||||
folder_path, f"books/json/{self.book_id}.json")
|
||||
self.book_output_path = output_path
|
||||
|
||||
self.output_path = pathlib.Path(self.output_path)
|
||||
self.logger_object.log(f'Output file path: {self.output_path}')
|
||||
self.book_output_path = pathlib.Path(self.book_output_path)
|
||||
self.logger_object.log(f"Output file path: {self.book_output_path}")
|
||||
|
||||
pathlib.Path(self.output_path).parent.mkdir(
|
||||
pathlib.Path(self.book_output_path).parent.mkdir(
|
||||
parents=True, exist_ok=True)
|
||||
self.output_path.touch(exist_ok=True)
|
||||
self.book_output_path.touch(exist_ok=True)
|
||||
|
||||
def write_to_json(self, content: dict):
|
||||
self.check_output_directory()
|
||||
try:
|
||||
with codecs.open(self.output_path, 'w', encoding='utf-8') as f:
|
||||
with codecs.open(self.book_output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(content, f, ensure_ascii=False)
|
||||
self.logger_object.log(
|
||||
f'Data has been saved to .json file: {self.output_path}')
|
||||
f"Data has been saved to .json file: {self.book_output_path}")
|
||||
except Exception as exc:
|
||||
self.logger_object.log(
|
||||
'Error has occurred while writing .json file.' + str(exc), logging.ERROR)
|
||||
"Error has occurred while writing .json file." + str(exc), logging.ERROR)
|
||||
|
||||
def send_json_content_to_server(self, content: dict):
|
||||
"""Function sends json_content to site"""
|
||||
try:
|
||||
self.access.send_book(self.book_id, content)
|
||||
self.logger_object.log(f'JSON data has been sent to server.')
|
||||
self.logger_object.log(f"JSON data has been sent to server.")
|
||||
except Exception as exc:
|
||||
self.logger_object.log(
|
||||
'Error has occurred while sending json content.', logging.ERROR)
|
||||
"Error has occurred while sending json content.", logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
self.status_wrapper.set_error()
|
||||
raise exc
|
||||
|
||||
@abstractmethod
|
||||
def get_converted_book(self):
|
||||
self.logger_object.log('Beginning of processing .json output.')
|
||||
self.logger_object.log("Beginning of processing .json output.")
|
||||
self.status_wrapper.set_generating()
|
||||
return {}
|
||||
|
||||
@@ -133,7 +154,8 @@ class BookSolver:
|
||||
"""
|
||||
try:
|
||||
self.logger_object.log(
|
||||
f'Beginning of conversion from .{self.book_type} to .json.')
|
||||
f"Beginning of conversion from .{self.book_type} to .json.")
|
||||
self.get_preset_file()
|
||||
self.get_book_file()
|
||||
self.status_wrapper.set_processing()
|
||||
content_dict = self.get_converted_book()
|
||||
@@ -141,11 +163,11 @@ class BookSolver:
|
||||
self.write_to_json(content_dict)
|
||||
self.send_json_content_to_server(content_dict)
|
||||
self.logger_object.log(
|
||||
f'End of the conversion to LiveCarta format. Check {self.output_path}.')
|
||||
f"End of the conversion to LiveCarta format. Check {self.book_output_path}.")
|
||||
except Exception as exc:
|
||||
self.status_wrapper.set_error()
|
||||
self.logger_object.log(
|
||||
'Error has occurred while conversion.', logging.ERROR)
|
||||
"Error has occurred while conversion.", logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log(str(exc))
|
||||
raise exc
|
||||
|
||||
@@ -158,15 +180,15 @@ class BookSolver:
|
||||
"""
|
||||
try:
|
||||
self.logger_object.log(
|
||||
f'Data has been downloaded from {file_path} file')
|
||||
f"Data has been downloaded from {file_path} file")
|
||||
self.status_wrapper.set_processing()
|
||||
with codecs.open(file_path, 'r', encoding='utf-8') as f_json:
|
||||
with codecs.open(file_path, "r", encoding="utf-8") as f_json:
|
||||
content_dict = json.load(f_json)
|
||||
self.status_wrapper.set_generating()
|
||||
self.send_json_content_to_server(content_dict)
|
||||
self.logger_object.log(f'Sent a file to server. Check LiveCarta.')
|
||||
self.logger_object.log(f"Sent a file to server. Check LiveCarta.")
|
||||
except Exception as exc:
|
||||
self.status_wrapper.set_error()
|
||||
self.logger_object.log(
|
||||
'Error has occurred while reading json file.' + str(exc), logging.ERROR)
|
||||
"Error has occurred while reading json file." + str(exc), logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log(str(exc))
|
||||
|
||||
@@ -26,7 +26,7 @@ def process_images(access, html_path, book_id, body_tag):
|
||||
folder_path = os.path.dirname(
|
||||
os.path.dirname(os.path.abspath(__file__)))
|
||||
new_path = pathlib.Path(os.path.join(
|
||||
folder_path, f'../json/img_{book_id}/'))
|
||||
folder_path, f'../books/json/img_{book_id}/'))
|
||||
new_path.mkdir(exist_ok=True)
|
||||
new_img_path = new_path / img_name
|
||||
copyfile(img_path, new_img_path)
|
||||
|
||||
@@ -13,7 +13,6 @@ from typing import Dict, Union, List
|
||||
from bs4 import BeautifulSoup, NavigableString, Tag
|
||||
|
||||
from src.util.helpers import BookLogger
|
||||
from src.preset_processor import PresetProcessor
|
||||
from src.epub_converter.css_processor import CSSPreprocessor
|
||||
from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor
|
||||
from src.livecarta_config import LiveCartaConfig
|
||||
@@ -24,11 +23,11 @@ from src.epub_converter.tag_inline_style_processor import TagInlineStyleProcesso
|
||||
|
||||
|
||||
class EpubConverter:
|
||||
def __init__(self, file_path, access=None, logger=None, css_processor=None, html_processor=None):
|
||||
self.file_path = file_path
|
||||
def __init__(self, book_path, access=None, logger=None, css_processor=None, html_processor=None):
|
||||
self.book_path = book_path
|
||||
self.access = access
|
||||
self.logger: BookLogger = logger
|
||||
self.ebooklib_book = epub.read_epub(file_path)
|
||||
self.ebooklib_book = epub.read_epub(book_path)
|
||||
self.css_processor = css_processor
|
||||
self.html_processor = html_processor
|
||||
|
||||
@@ -603,7 +602,7 @@ class EpubConverter:
|
||||
path_to_html=nav_point.href,
|
||||
access=self.access,
|
||||
path2aws_path=self.book_image_src_path2aws_path,
|
||||
book_id=Path(self.file_path).stem)
|
||||
book_id=Path(self.book_path).stem)
|
||||
sub_nodes = []
|
||||
# warning! not EpubHtmlItems won't be added to chapter
|
||||
# if it doesn't have subchapters
|
||||
@@ -638,11 +637,8 @@ if __name__ == "__main__":
|
||||
logger_object = BookLogger(
|
||||
name="epub", book_id=epub_file_path.split("/")[-1])
|
||||
|
||||
preset = PresetProcessor(preset_path="../../config/presets.json", logger=logger_object)\
|
||||
.get_preset_json()
|
||||
css_processor = CSSPreprocessor()
|
||||
html_processor = HtmlEpubPreprocessor(
|
||||
preset=preset, logger=logger_object)
|
||||
html_processor = HtmlEpubPreprocessor("../../presets/presets.json", logger=logger_object)
|
||||
|
||||
json_converter = EpubConverter(epub_file_path, logger=logger_object,
|
||||
css_processor=css_processor, html_processor=html_processor)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from src.book_solver import BookSolver
|
||||
from src.preset_processor import PresetProcessor
|
||||
from src.epub_converter.css_processor import CSSPreprocessor
|
||||
from src.epub_converter.html_epub_processor import HtmlEpubPreprocessor
|
||||
from src.epub_converter.epub_converter import EpubConverter
|
||||
@@ -28,12 +27,10 @@ class EpubBook(BookSolver):
|
||||
json for LiveCarta platform
|
||||
|
||||
"""
|
||||
preset = PresetProcessor(preset_path="config/presets.json", logger=self.logger_object)\
|
||||
.get_preset_json()
|
||||
css_processor = CSSPreprocessor()
|
||||
html_processor = HtmlEpubPreprocessor(preset=preset, logger=self.logger_object)
|
||||
html_processor = HtmlEpubPreprocessor(self.preset_path, logger=self.logger_object)
|
||||
json_converter = EpubConverter(
|
||||
self.file_path, access=self.access, logger=self.logger_object,
|
||||
self.book_path, access=self.access, logger=self.logger_object,
|
||||
css_processor=css_processor, html_processor=html_processor)
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
return content_dict
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
import re
|
||||
import json
|
||||
from bs4 import BeautifulSoup, NavigableString, Comment, Tag
|
||||
|
||||
from src.util.helpers import BookLogger
|
||||
|
||||
|
||||
class HtmlEpubPreprocessor:
|
||||
def __init__(self, preset, logger=None):
|
||||
self.preset = preset
|
||||
def __init__(self, preset_path, logger=None):
|
||||
self.preset = json.load(open(preset_path))
|
||||
self.logger: BookLogger = logger
|
||||
self.name2function = {
|
||||
"table_wrapper": self._wrap_tags_with_table,
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
import json
|
||||
|
||||
|
||||
from src.util.helpers import BookLogger
|
||||
|
||||
|
||||
class PresetProcessor:
|
||||
def __init__(self, preset_path="config/presets.json", logger=None):
|
||||
self.preset_path = preset_path
|
||||
self.logger: BookLogger = logger
|
||||
|
||||
def get_preset_json(self):
|
||||
f = open(self.preset_path)
|
||||
data = json.load(f)
|
||||
return data
|
||||
Reference in New Issue
Block a user