forked from LiveCarta/BookConverter
Merge pull request #323 from Teqniksoft/kiryl/converter_fix
Fix logs output
This commit is contained in:
28
consumer.py
28
consumer.py
@@ -15,30 +15,30 @@ from src.docx_converter.docx_solver import DocxBook
|
|||||||
from src.epub_converter.epub_solver import EpubBook
|
from src.epub_converter.epub_solver import EpubBook
|
||||||
|
|
||||||
|
|
||||||
def local_convert_book(book_type: [DocxBook, EpubBook], book_id: int, logger: logging.Logger, params: dict):
|
def local_convert_book(book_type: [DocxBook, EpubBook], book_id: int, main_logger: logging.Logger, params: dict):
|
||||||
logger.info(f"Start processing book-{book_id}.")
|
main_logger.info(f"Start processing book-{book_id}.")
|
||||||
try:
|
try:
|
||||||
json_file_path = "books/json/9781614382264.json"
|
json_file_path = "books/json/9781614382264.json"
|
||||||
book = book_type(book_id=book_id, main_logger=logger, **params)
|
book = book_type(book_id=book_id, main_logger=main_logger, **params)
|
||||||
book.conversion(json_file_path)
|
book.conversion(json_file_path)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise exc
|
raise exc
|
||||||
logger.info(f"Book-{book_id} has been proceeded.")
|
main_logger.info(f"Book-{book_id} has been proceeded.")
|
||||||
|
|
||||||
|
|
||||||
def convert_book(book_type: [DocxBook, EpubBook], book_id: int, logger: logging.Logger, params: Dict[str, Access]):
|
def convert_book(book_type: [DocxBook, EpubBook], book_id: int, main_logger: logging.Logger, params: Dict[str, Access]):
|
||||||
logger.info(f"Start processing book-{book_id}.")
|
main_logger.info(f"Start processing book-{book_id}.")
|
||||||
try:
|
try:
|
||||||
book = book_type(book_id=book_id, main_logger=logger, **params)
|
book = book_type(book_id=book_id, main_logger=main_logger, **params)
|
||||||
book.conversion()
|
book.conversion()
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise exc
|
raise exc
|
||||||
logger.info(f"Book-{book_id} has been proceeded.")
|
main_logger.info(f"Book-{book_id} has been proceeded.")
|
||||||
|
|
||||||
|
|
||||||
def callback(ch, method, properties, body: bytes, logger: logging.Logger, libre_locker: Event):
|
def callback(ch, method, properties, body: bytes, main_logger: logging.Logger, libre_locker: Event):
|
||||||
print(f"Message: {body}.")
|
print(f"Message: {body}.")
|
||||||
logger.info(f"Message: {body}.")
|
main_logger.info(f"Message: {body}.")
|
||||||
try:
|
try:
|
||||||
data = json.loads(body)
|
data = json.loads(body)
|
||||||
assert "apiURL" in data, "No apiURL field in received message."
|
assert "apiURL" in data, "No apiURL field in received message."
|
||||||
@@ -54,7 +54,7 @@ def callback(ch, method, properties, body: bytes, logger: logging.Logger, libre_
|
|||||||
params = {
|
params = {
|
||||||
"book_type": EpubBook if data.get("fileExtension") == "epub" else DocxBook,
|
"book_type": EpubBook if data.get("fileExtension") == "epub" else DocxBook,
|
||||||
"book_id": data["id"],
|
"book_id": data["id"],
|
||||||
"logger": logger,
|
"main_logger": main_logger,
|
||||||
"params": book_params
|
"params": book_params
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -64,9 +64,9 @@ def callback(ch, method, properties, body: bytes, logger: logging.Logger, libre_
|
|||||||
# print(f"Active threads: {active_count()}.")
|
# print(f"Active threads: {active_count()}.")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
if hasattr(exc, "message"):
|
if hasattr(exc, "message"):
|
||||||
logger.error(f"{sys.exc_info()[0]}: {exc.message}")
|
main_logger.error(f"{sys.exc_info()[0]}: {exc.message}")
|
||||||
else:
|
else:
|
||||||
logger.error(f"{sys.exc_info()[0]}: {str(exc)}")
|
main_logger.error(f"{sys.exc_info()[0]}: {str(exc)}")
|
||||||
finally:
|
finally:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -104,7 +104,7 @@ def server_run():
|
|||||||
locker.set()
|
locker.set()
|
||||||
channel.basic_consume(queue=conf_param["queue"],
|
channel.basic_consume(queue=conf_param["queue"],
|
||||||
auto_ack=True,
|
auto_ack=True,
|
||||||
on_message_callback=partial(callback, logger=logger_object, libre_locker=locker))
|
on_message_callback=partial(callback, main_logger=logger_object, libre_locker=locker))
|
||||||
logger_object.info("Connection has been established.")
|
logger_object.info("Connection has been established.")
|
||||||
print("Waiting for messages...")
|
print("Waiting for messages...")
|
||||||
logger_object.info("Waiting for messages...")
|
logger_object.info("Waiting for messages...")
|
||||||
|
|||||||
@@ -28,10 +28,10 @@ class BookSolver:
|
|||||||
self.preset_path = None
|
self.preset_path = None
|
||||||
self.book_path = None # path to book file, appears after downloading from server
|
self.book_path = None # path to book file, appears after downloading from server
|
||||||
self.book_output_path = None # path to json file
|
self.book_output_path = None # path to json file
|
||||||
self.logger_object = BookLogger(name=f"{__name__}_{self.book_id}")
|
self.book_logger = BookLogger(name=f"{__name__}_{self.book_id}")
|
||||||
self.logger_object.configure_book_logger(book_id=book_id)
|
self.book_logger.configure_book_logger(book_id=book_id)
|
||||||
self.status_wrapper = BookStatusWrapper(
|
self.status_wrapper = BookStatusWrapper(
|
||||||
access, self.logger_object, book_id)
|
access, self.book_logger, book_id)
|
||||||
|
|
||||||
assert LiveCartaConfig.SUPPORTED_LEVELS == len(LiveCartaConfig.SUPPORTED_HEADERS), \
|
assert LiveCartaConfig.SUPPORTED_LEVELS == len(LiveCartaConfig.SUPPORTED_HEADERS), \
|
||||||
"Length of headers doesn't match allowed levels."
|
"Length of headers doesn't match allowed levels."
|
||||||
@@ -64,12 +64,12 @@ class BookSolver:
|
|||||||
try:
|
try:
|
||||||
with open(file_path, "wb+") as file:
|
with open(file_path, "wb+") as file:
|
||||||
file.write(content)
|
file.write(content)
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
f"File was saved to folder: {folder_path}.")
|
f"File was saved to folder: {folder_path}.")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
f"Error in writing {self.book_type} file.", logging.ERROR)
|
f"Error in writing {self.book_type} file.", logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.book_logger.log_error_to_main_log()
|
||||||
raise exc
|
raise exc
|
||||||
return file_path
|
return file_path
|
||||||
|
|
||||||
@@ -86,9 +86,9 @@ class BookSolver:
|
|||||||
# self.preset_path = pathlib.Path(
|
# self.preset_path = pathlib.Path(
|
||||||
# str(self.save_file(content, path_to_save="preset", file_type="json")))
|
# str(self.save_file(content, path_to_save="preset", file_type="json")))
|
||||||
except FileNotFoundError as f_err:
|
except FileNotFoundError as f_err:
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
"Can't get preset file from server.", logging.ERROR)
|
"Can't get preset file from server.", logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.book_logger.log_error_to_main_log()
|
||||||
raise f_err
|
raise f_err
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise exc
|
raise exc
|
||||||
@@ -96,17 +96,17 @@ class BookSolver:
|
|||||||
def get_book_file(self):
|
def get_book_file(self):
|
||||||
"""Method for getting and saving book from server"""
|
"""Method for getting and saving book from server"""
|
||||||
try:
|
try:
|
||||||
self.logger_object.log(f"Start receiving book file from server. URL:"
|
self.book_logger.log(f"Start receiving book file from server. URL:"
|
||||||
f" {self.access.url}/doc-convert/{self.book_id}/file")
|
f" {self.access.url}/doc-convert/{self.book_id}/file")
|
||||||
content = self.access.get_file(
|
content = self.access.get_file(
|
||||||
file_path=f"{self.access.url}/doc-convert/{self.book_id}/file")
|
file_path=f"{self.access.url}/doc-convert/{self.book_id}/file")
|
||||||
self.logger_object.log("Book file was received from server.")
|
self.book_logger.log("Book file was received from server.")
|
||||||
self.book_path = pathlib.Path(self.save_file(
|
self.book_path = pathlib.Path(self.save_file(
|
||||||
content, path_to_save=f"books/{self.book_type}", file_type=self.book_type))
|
content, path_to_save=f"books/{self.book_type}", file_type=self.book_type))
|
||||||
except FileNotFoundError as f_err:
|
except FileNotFoundError as f_err:
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
"Can't get book file from server.", logging.ERROR)
|
"Can't get book file from server.", logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.book_logger.log_error_to_main_log()
|
||||||
raise f_err
|
raise f_err
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
raise exc
|
raise exc
|
||||||
@@ -120,7 +120,7 @@ class BookSolver:
|
|||||||
self.book_output_path = output_path
|
self.book_output_path = output_path
|
||||||
|
|
||||||
self.book_output_path = pathlib.Path(self.book_output_path)
|
self.book_output_path = pathlib.Path(self.book_output_path)
|
||||||
self.logger_object.log(f"Output file path: {self.book_output_path}")
|
self.book_logger.log(f"Output file path: {self.book_output_path}")
|
||||||
|
|
||||||
pathlib.Path(self.book_output_path).parent.mkdir(
|
pathlib.Path(self.book_output_path).parent.mkdir(
|
||||||
parents=True, exist_ok=True)
|
parents=True, exist_ok=True)
|
||||||
@@ -131,27 +131,27 @@ class BookSolver:
|
|||||||
try:
|
try:
|
||||||
with codecs.open(self.book_output_path, "w", encoding="utf-8") as f:
|
with codecs.open(self.book_output_path, "w", encoding="utf-8") as f:
|
||||||
json.dump(content, f, ensure_ascii=False)
|
json.dump(content, f, ensure_ascii=False)
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
f"Data has been saved to .json file: {self.book_output_path}")
|
f"Data has been saved to .json file: {self.book_output_path}")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
"Error has occurred while writing .json file." + str(exc), logging.ERROR)
|
"Error has occurred while writing .json file." + str(exc), logging.ERROR)
|
||||||
|
|
||||||
def send_json_content_to_server(self, content: Dict[str, List[Dict[str, Union[List, str]]]]):
|
def send_json_content_to_server(self, content: Dict[str, List[Dict[str, Union[List, str]]]]):
|
||||||
"""Function sends json_content to site"""
|
"""Function sends json_content to site"""
|
||||||
try:
|
try:
|
||||||
self.access.send_book(self.book_id, content)
|
self.access.send_book(self.book_id, content)
|
||||||
self.logger_object.log(f"JSON data has been sent to server.")
|
self.book_logger.log(f"JSON data has been sent to server.")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
"Error has occurred while sending json content.", logging.ERROR)
|
"Error has occurred while sending json content.", logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.book_logger.log_error_to_main_log()
|
||||||
self.status_wrapper.set_error()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_converted_book(self) -> Dict[str, List[Dict[str, Union[List, str]]]]:
|
def get_converted_book(self) -> Dict[str, List[Dict[str, Union[List, str]]]]:
|
||||||
self.logger_object.log("Beginning of processing .json output.")
|
self.book_logger.log("Beginning of processing .json output.")
|
||||||
self.status_wrapper.set_generating()
|
self.status_wrapper.set_generating()
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
@@ -165,23 +165,23 @@ class BookSolver:
|
|||||||
try:
|
try:
|
||||||
self.get_preset_file()
|
self.get_preset_file()
|
||||||
self.get_book_file()
|
self.get_book_file()
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
f"Beginning of conversion from .{self.book_type} to .json.")
|
f"Beginning of conversion from .{self.book_type} to .json.")
|
||||||
self.status_wrapper.set_processing()
|
self.status_wrapper.set_processing()
|
||||||
content_dict: Dict[str, List[Dict[Union[str, List]]]] = self.get_converted_book()
|
content_dict: Dict[str, List[Dict[Union[str, List]]]] = self.get_converted_book()
|
||||||
# todo add delete of preset path
|
# todo add delete of preset path
|
||||||
[os.remove(path) for path in [self.book_path]]
|
[os.remove(path) for path in [self.book_path]]
|
||||||
self.logger_object.log("Beginning of processing .json output.")
|
self.book_logger.log("Beginning of processing .json output.")
|
||||||
self.status_wrapper.set_generating()
|
self.status_wrapper.set_generating()
|
||||||
self.write_to_json(content_dict)
|
self.write_to_json(content_dict)
|
||||||
self.send_json_content_to_server(content_dict)
|
self.send_json_content_to_server(content_dict)
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
f"End of the conversion to LiveCarta format. Check {self.book_output_path}.")
|
f"End of the conversion to LiveCarta format. Check {self.book_output_path}.")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.status_wrapper.set_error()
|
self.status_wrapper.set_error()
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
"Error has occurred while conversion.", logging.ERROR)
|
"Error has occurred while conversion.", logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log(str(exc))
|
self.book_logger.log_error_to_main_log(str(exc))
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
def conversion_local(self, file_path: str):
|
def conversion_local(self, file_path: str):
|
||||||
@@ -192,17 +192,17 @@ class BookSolver:
|
|||||||
|
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
f"Data has been downloaded from {file_path} file")
|
f"Data has been downloaded from {file_path} file")
|
||||||
self.status_wrapper.set_processing()
|
self.status_wrapper.set_processing()
|
||||||
with codecs.open(file_path, "r", encoding="utf-8") as f_json:
|
with codecs.open(file_path, "r", encoding="utf-8") as f_json:
|
||||||
content_dict = json.load(f_json)
|
content_dict = json.load(f_json)
|
||||||
self.logger_object.log("Beginning of processing .json output.")
|
self.book_logger.log("Beginning of processing .json output.")
|
||||||
self.status_wrapper.set_generating()
|
self.status_wrapper.set_generating()
|
||||||
self.send_json_content_to_server(content_dict)
|
self.send_json_content_to_server(content_dict)
|
||||||
self.logger_object.log(f"Sent a file to server. Check LiveCarta.")
|
self.book_logger.log(f"Sent a file to server. Check LiveCarta.")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.status_wrapper.set_error()
|
self.status_wrapper.set_error()
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
"Error has occurred while reading json file." + str(exc), logging.ERROR)
|
"Error has occurred while reading json file." + str(exc), logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log(str(exc))
|
self.book_logger.log_error_to_main_log(str(exc))
|
||||||
|
|||||||
@@ -39,41 +39,41 @@ class DocxBook(BookSolver):
|
|||||||
# 1. Converts docx to html with LibreOffice
|
# 1. Converts docx to html with LibreOffice
|
||||||
try:
|
try:
|
||||||
html_converter = Docx2LibreHtml(self.book_id, self.book_path, self.access,
|
html_converter = Docx2LibreHtml(self.book_id, self.book_path, self.access,
|
||||||
self.logger_object, self.libre_locker)
|
self.book_logger, self.libre_locker)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
"Error has occurred while converting .docx to .html.", logging.ERROR)
|
"Error has occurred while converting .docx to .html.", logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.book_logger.log_error_to_main_log()
|
||||||
self.status_wrapper.set_error()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
# 2. Parses and cleans html, gets list of tags, gets footnotes
|
# 2. Parses and cleans html, gets list of tags, gets footnotes
|
||||||
try:
|
try:
|
||||||
html_preprocessor = HtmlPresetsProcessor(
|
html_preprocessor = HtmlPresetsProcessor(
|
||||||
logger=self.logger_object, preset_path="preset/docx_presets.json")
|
logger=self.book_logger, preset_path="preset/docx_presets.json")
|
||||||
style_preprocessor = StyleReader()
|
style_preprocessor = StyleReader()
|
||||||
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup,
|
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup,
|
||||||
logger=self.logger_object,
|
logger=self.book_logger,
|
||||||
html_preprocessor=html_preprocessor,
|
html_preprocessor=html_preprocessor,
|
||||||
style_preprocessor=style_preprocessor)
|
style_preprocessor=style_preprocessor)
|
||||||
bs_tags, footnotes, top_level_headers = html_processor.process_html(
|
bs_tags, footnotes, top_level_headers = html_processor.process_html(
|
||||||
self.access, html_converter.html_path, self.book_id)
|
self.access, html_converter.html_path, self.book_id)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
"Error has occurred while processing .html", logging.ERROR)
|
"Error has occurred while processing .html", logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.book_logger.log_error_to_main_log()
|
||||||
self.status_wrapper.set_error()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
|
|
||||||
# 3. Parses from line structure to nested structure with JSONConverter
|
# 3. Parses from line structure to nested structure with JSONConverter
|
||||||
try:
|
try:
|
||||||
json_converter = LibreHtml2JsonConverter(bs_tags, footnotes, top_level_headers,
|
json_converter = LibreHtml2JsonConverter(bs_tags, footnotes, top_level_headers,
|
||||||
self.logger_object)
|
self.book_logger)
|
||||||
content_dict = json_converter.convert_to_dict()
|
content_dict = json_converter.convert_to_dict()
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
"Error has occurred while converting .html to .json", logging.ERROR)
|
"Error has occurred while converting .html to .json", logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.book_logger.log_error_to_main_log()
|
||||||
self.status_wrapper.set_error()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
return content_dict
|
return content_dict
|
||||||
@@ -82,24 +82,24 @@ class DocxBook(BookSolver):
|
|||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
docx_file_path = f"../../books/docx/3cd6f561b8d7ee6a510c783784c9d018.docx"
|
docx_file_path = f"../../books/docx/3cd6f561b8d7ee6a510c783784c9d018.docx"
|
||||||
|
|
||||||
logger_object = BookLogger(name="epub")
|
book_logger = BookLogger(name="epub")
|
||||||
logger_object.configure_book_logger(book_id=docx_file_path.split("/")[-1])
|
book_logger.configure_book_logger(book_id=docx_file_path.split("/")[-1])
|
||||||
|
|
||||||
locker = Event()
|
locker = Event()
|
||||||
locker.set()
|
locker.set()
|
||||||
|
|
||||||
html_converter = Docx2LibreHtml(file_path=docx_file_path,
|
html_converter = Docx2LibreHtml(file_path=docx_file_path,
|
||||||
logger=logger_object, libre_locker=locker)
|
logger=book_logger, libre_locker=locker)
|
||||||
html_preprocessor = HtmlPresetsProcessor(
|
html_preprocessor = HtmlPresetsProcessor(
|
||||||
logger=logger_object, preset_path="../../preset/docx_presets.json")
|
logger=book_logger, preset_path="../../preset/docx_presets.json")
|
||||||
style_preprocessor = StyleReader()
|
style_preprocessor = StyleReader()
|
||||||
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=logger_object,
|
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=book_logger,
|
||||||
html_preprocessor=html_preprocessor, style_preprocessor=style_preprocessor)
|
html_preprocessor=html_preprocessor, style_preprocessor=style_preprocessor)
|
||||||
content, footnotes, top_level_headers = html_processor.process_html(
|
content, footnotes, top_level_headers = html_processor.process_html(
|
||||||
html_path=html_converter.html_path, book_id=html_converter.book_id)
|
html_path=html_converter.html_path, book_id=html_converter.book_id)
|
||||||
|
|
||||||
json_converter = LibreHtml2JsonConverter(
|
json_converter = LibreHtml2JsonConverter(
|
||||||
content, footnotes, top_level_headers, logger_object)
|
content, footnotes, top_level_headers, book_logger)
|
||||||
content_dict = json_converter.convert_to_dict()
|
content_dict = json_converter.convert_to_dict()
|
||||||
|
|
||||||
with codecs.open(docx_file_path.replace("docx", "json"), "w", encoding="utf-8") as f:
|
with codecs.open(docx_file_path.replace("docx", "json"), "w", encoding="utf-8") as f:
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ class EpubConverter:
|
|||||||
style_processor: StyleReader = None, html_processor: HtmlEpubProcessor = None):
|
style_processor: StyleReader = None, html_processor: HtmlEpubProcessor = None):
|
||||||
self.book_path = book_path
|
self.book_path = book_path
|
||||||
self.access = access
|
self.access = access
|
||||||
self.logger: BookLogger = logger
|
self.book_logger: BookLogger = logger
|
||||||
self.ebooklib_book = epub.read_epub(book_path)
|
self.ebooklib_book = epub.read_epub(book_path)
|
||||||
self.style_processor = style_processor
|
self.style_processor = style_processor
|
||||||
self.html_processor = html_processor
|
self.html_processor = html_processor
|
||||||
@@ -57,52 +57,52 @@ class EpubConverter:
|
|||||||
self.noterefs: List[Tag] = [] # start of the footnote
|
self.noterefs: List[Tag] = [] # start of the footnote
|
||||||
self.footnotes: List[Tag] = [] # end of the footnote
|
self.footnotes: List[Tag] = [] # end of the footnote
|
||||||
|
|
||||||
self.logger.log("HTML files reading.")
|
self.book_logger.log("HTML files reading.")
|
||||||
self.html_href2html_body_soup: Dict[str,
|
self.html_href2html_body_soup: Dict[str,
|
||||||
BeautifulSoup] = self.build_href2soup_content()
|
BeautifulSoup] = self.build_href2soup_content()
|
||||||
|
|
||||||
self.logger.log("CSS inline style processing.")
|
self.book_logger.log("CSS inline style processing.")
|
||||||
[self.style_processor.process_inline_styles_in_html_soup(
|
[self.style_processor.process_inline_styles_in_html_soup(
|
||||||
self.html_href2html_body_soup[html_href]) for html_href in self.html_href2html_body_soup]
|
self.html_href2html_body_soup[html_href]) for html_href in self.html_href2html_body_soup]
|
||||||
self.logger.log("CSS files processing.")
|
self.book_logger.log("CSS files processing.")
|
||||||
self.html_href2css_href, self.css_href2css_content = self.build_html_and_css_relations()
|
self.html_href2css_href, self.css_href2css_content = self.build_html_and_css_relations()
|
||||||
self.logger.log("CSS styles fusion(inline+file).")
|
self.book_logger.log("CSS styles fusion(inline+file).")
|
||||||
self.add_css_styles_to_html_soup()
|
self.add_css_styles_to_html_soup()
|
||||||
|
|
||||||
self.logger.log("Image processing.")
|
self.book_logger.log("Image processing.")
|
||||||
for x in chain(self.ebooklib_book.get_items_of_type(ebooklib.ITEM_IMAGE),
|
for x in chain(self.ebooklib_book.get_items_of_type(ebooklib.ITEM_IMAGE),
|
||||||
self.ebooklib_book.get_items_of_type(ebooklib.ITEM_COVER)):
|
self.ebooklib_book.get_items_of_type(ebooklib.ITEM_COVER)):
|
||||||
file_name = x.file_name
|
file_name = x.file_name
|
||||||
content = x.content
|
content = x.content
|
||||||
self.img_href2img_bytes[file_name] = content
|
self.img_href2img_bytes[file_name] = content
|
||||||
|
|
||||||
self.logger.log("Footnotes processing.")
|
self.book_logger.log("Footnotes processing.")
|
||||||
for href in self.html_href2html_body_soup:
|
for href in self.html_href2html_body_soup:
|
||||||
self.footnotes_contents, self.noterefs, self.footnotes =\
|
self.footnotes_contents, self.noterefs, self.footnotes =\
|
||||||
preprocess_footnotes(
|
preprocess_footnotes(
|
||||||
self.html_href2html_body_soup[href], self.html_href2html_body_soup)
|
self.html_href2html_body_soup[href], self.html_href2html_body_soup)
|
||||||
self.logger.log(f"Added {len(self.footnotes_contents)} footnotes.")
|
self.book_logger.log(f"Added {len(self.footnotes_contents)} footnotes.")
|
||||||
|
|
||||||
self.logger.log("TOC processing.")
|
self.book_logger.log("TOC processing.")
|
||||||
self.build_adjacency_list_from_toc(self.ebooklib_book.toc)
|
self.build_adjacency_list_from_toc(self.ebooklib_book.toc)
|
||||||
# build simple toc from spine if needed
|
# build simple toc from spine if needed
|
||||||
if self.is_toc_empty():
|
if self.is_toc_empty():
|
||||||
self.build_adjacency_list_from_spine()
|
self.build_adjacency_list_from_spine()
|
||||||
not_added = [
|
not_added = [
|
||||||
x for x in self.html_href2html_body_soup if x not in self.hrefs_added_to_toc]
|
x for x in self.html_href2html_body_soup if x not in self.hrefs_added_to_toc]
|
||||||
self.logger.log(f"Html documents not added to TOC: {not_added}.")
|
self.book_logger.log(f"Html documents not added to TOC: {not_added}.")
|
||||||
self.logger.log(f"Add documents not added to TOC.")
|
self.book_logger.log(f"Add documents not added to TOC.")
|
||||||
self.add_not_added_files_to_adjacency_list(not_added)
|
self.add_not_added_files_to_adjacency_list(not_added)
|
||||||
self.logger.log(f"Label subchapters with converter tag.")
|
self.book_logger.log(f"Label subchapters with converter tag.")
|
||||||
self.label_subchapters_with_lc_tag()
|
self.label_subchapters_with_lc_tag()
|
||||||
self.logger.log(f"Process html internal links.")
|
self.book_logger.log(f"Process html internal links.")
|
||||||
self.process_internal_links()
|
self.process_internal_links()
|
||||||
self.logger.log(
|
self.book_logger.log(
|
||||||
f"Check if converter-chapter-marks are on the same level.")
|
f"Check if converter-chapter-marks are on the same level.")
|
||||||
self.chapter_marks_are_same_level()
|
self.chapter_marks_are_same_level()
|
||||||
self.logger.log(f"Define chapters content.")
|
self.book_logger.log(f"Define chapters content.")
|
||||||
self.define_chapters_with_content()
|
self.define_chapters_with_content()
|
||||||
self.logger.log(f"Converting html_nodes to LiveCarta chapter items.")
|
self.book_logger.log(f"Converting html_nodes to LiveCarta chapter items.")
|
||||||
|
|
||||||
def build_href2soup_content(self) -> Dict[str, BeautifulSoup]:
|
def build_href2soup_content(self) -> Dict[str, BeautifulSoup]:
|
||||||
# using EpubElements
|
# using EpubElements
|
||||||
@@ -341,13 +341,13 @@ class EpubConverter:
|
|||||||
full_path = [
|
full_path = [
|
||||||
href_from_toc for href_from_toc in self.hrefs_added_to_toc if normed_path in href_from_toc]
|
href_from_toc for href_from_toc in self.hrefs_added_to_toc if normed_path in href_from_toc]
|
||||||
if not full_path:
|
if not full_path:
|
||||||
self.logger.log(f"Error in {cur_file_path} file. No {normed_path} file found in added to TOC documents. "
|
self.book_logger.log(f"Error in {cur_file_path} file. No {normed_path} file found in added to TOC documents. "
|
||||||
f"While processing href in {internal_link_tag}.")
|
f"While processing href in {internal_link_tag}.")
|
||||||
internal_link_tag.attrs["converter-mark"] = "bad-link"
|
internal_link_tag.attrs["converter-mark"] = "bad-link"
|
||||||
return None
|
return None
|
||||||
|
|
||||||
if len(full_path) > 1:
|
if len(full_path) > 1:
|
||||||
self.logger.log(f"Warning in {cur_file_path}. Multiple paths found {full_path} for file {href_in_link}"
|
self.book_logger.log(f"Warning in {cur_file_path}. Multiple paths found {full_path} for file {href_in_link}"
|
||||||
f" while {internal_link_tag} processing. The first one will be chosen.")
|
f" while {internal_link_tag} processing. The first one will be chosen.")
|
||||||
|
|
||||||
return full_path[0]
|
return full_path[0]
|
||||||
@@ -433,7 +433,7 @@ class EpubConverter:
|
|||||||
anchor_html_content.find_all(attrs={"id": id_}) # if link is a footnote
|
anchor_html_content.find_all(attrs={"id": id_}) # if link is a footnote
|
||||||
if anchor_tags:
|
if anchor_tags:
|
||||||
if len(anchor_tags) > 1:
|
if len(anchor_tags) > 1:
|
||||||
self.logger.log(f"Warning in {html_href_from_toc}: multiple anchors:"
|
self.book_logger.log(f"Warning in {html_href_from_toc}: multiple anchors:"
|
||||||
f"{len(anchor_tags)} found.\n"
|
f"{len(anchor_tags)} found.\n"
|
||||||
f"{anchor_tags}\n"
|
f"{anchor_tags}\n"
|
||||||
f"While processing {internal_link_tag}")
|
f"While processing {internal_link_tag}")
|
||||||
@@ -446,7 +446,7 @@ class EpubConverter:
|
|||||||
del internal_link_tag.attrs["href"]
|
del internal_link_tag.attrs["href"]
|
||||||
else:
|
else:
|
||||||
internal_link_tag.attrs["converter-mark"] = "bad-link"
|
internal_link_tag.attrs["converter-mark"] = "bad-link"
|
||||||
self.logger.log(f"Error in {html_href_from_toc}."
|
self.book_logger.log(f"Error in {html_href_from_toc}."
|
||||||
f" While processing {internal_link_tag} no anchor found."
|
f" While processing {internal_link_tag} no anchor found."
|
||||||
f" Should be anchor with new id={new_unique_id} in"
|
f" Should be anchor with new id={new_unique_id} in"
|
||||||
f" {html_href_of_anchor} file."
|
f" {html_href_of_anchor} file."
|
||||||
@@ -563,11 +563,11 @@ class EpubConverter:
|
|||||||
if nav_point.id else self.html_href2html_body_soup[nav_point.href]
|
if nav_point.id else self.html_href2html_body_soup[nav_point.href]
|
||||||
|
|
||||||
indent: str = " " * lvl
|
indent: str = " " * lvl
|
||||||
self.logger.log(indent + f"Chapter: {title} is processing.")
|
self.book_logger.log(indent + f"Chapter: {title} is processing.")
|
||||||
is_chapter: bool = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
|
is_chapter: bool = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
|
||||||
self.logger.log(indent + "Process title.")
|
self.book_logger.log(indent + "Process title.")
|
||||||
title_preprocessed: str = self.html_processor.prepare_title(title)
|
title_preprocessed: str = self.html_processor.prepare_title(title)
|
||||||
self.logger.log(indent + "Process content.")
|
self.book_logger.log(indent + "Process content.")
|
||||||
content_preprocessed: Union[Tag, BeautifulSoup] = self.html_processor.prepare_content(
|
content_preprocessed: Union[Tag, BeautifulSoup] = self.html_processor.prepare_content(
|
||||||
title_preprocessed, content, remove_title_from_chapter=is_chapter)
|
title_preprocessed, content, remove_title_from_chapter=is_chapter)
|
||||||
|
|
||||||
@@ -597,8 +597,8 @@ class EpubConverter:
|
|||||||
chapter = self.html_node_to_livecarta_chapter_item(tl_nav_point)
|
chapter = self.html_node_to_livecarta_chapter_item(tl_nav_point)
|
||||||
top_level_chapters.append(chapter)
|
top_level_chapters.append(chapter)
|
||||||
top_level_dict_chapters = [x.to_dict() for x in top_level_chapters]
|
top_level_dict_chapters = [x.to_dict() for x in top_level_chapters]
|
||||||
self.logger.log(f"Anchors found: {len(self.internal_anchors)}.")
|
self.book_logger.log(f"Anchors found: {len(self.internal_anchors)}.")
|
||||||
self.logger.log("End conversion.")
|
self.book_logger.log("End conversion.")
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"content": top_level_dict_chapters,
|
"content": top_level_dict_chapters,
|
||||||
|
|||||||
@@ -35,17 +35,17 @@ class EpubBook(BookSolver):
|
|||||||
# Parses and cleans html, gets list of tags, gets footnotes
|
# Parses and cleans html, gets list of tags, gets footnotes
|
||||||
try:
|
try:
|
||||||
html_preprocessor = HtmlPresetsProcessor(
|
html_preprocessor = HtmlPresetsProcessor(
|
||||||
logger=self.logger_object, preset_path="preset/epub_presets.json")
|
logger=self.book_logger, preset_path="preset/epub_presets.json")
|
||||||
html_processor = HtmlEpubProcessor(logger=self.logger_object,
|
html_processor = HtmlEpubProcessor(logger=self.book_logger,
|
||||||
html_preprocessor=html_preprocessor)
|
html_preprocessor=html_preprocessor)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.logger_object.log(
|
self.book_logger.log(
|
||||||
"Error has occurred while processing .html", logging.ERROR)
|
"Error has occurred while processing .html", logging.ERROR)
|
||||||
self.logger_object.log_error_to_main_log()
|
self.book_logger.log_error_to_main_log()
|
||||||
self.status_wrapper.set_error()
|
self.status_wrapper.set_error()
|
||||||
raise exc
|
raise exc
|
||||||
json_converter = EpubConverter(
|
json_converter = EpubConverter(
|
||||||
self.book_path, access=self.access, logger=self.logger_object,
|
self.book_path, access=self.access, logger=self.book_logger,
|
||||||
style_processor=style_preprocessor, html_processor=html_processor)
|
style_processor=style_preprocessor, html_processor=html_processor)
|
||||||
content_dict = json_converter.convert_to_dict()
|
content_dict = json_converter.convert_to_dict()
|
||||||
return content_dict
|
return content_dict
|
||||||
|
|||||||
@@ -27,20 +27,21 @@ class ColoredFormatter(logging.Formatter):
|
|||||||
return logging.Formatter.format(self, record)
|
return logging.Formatter.format(self, record)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_file_path(filename: str):
|
||||||
|
folder_path = os.path.dirname(os.path.abspath(os.path.join(__file__ ,"../..")))
|
||||||
|
folder_path = os.path.join(folder_path, f"logs/{time.strftime('%d-%m-%Y_%H-00')}/")
|
||||||
|
if not os.path.exists(folder_path):
|
||||||
|
os.makedirs(folder_path)
|
||||||
|
file_path = os.path.join(folder_path, filename)
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
|
||||||
class MainLogger:
|
class MainLogger:
|
||||||
def __init__(self, name: str):
|
def __init__(self, name: str):
|
||||||
self.main_logger = logging.getLogger(name)
|
self.main_logger = logging.getLogger(name)
|
||||||
|
|
||||||
def generate_file_path(self, filename: str):
|
|
||||||
folder_path = os.path.dirname(os.path.abspath(os.path.join(__file__ ,"../..")))
|
|
||||||
folder_path = os.path.join(folder_path, f"logs/{time.strftime('%d-%m-%Y_%H-00')}/")
|
|
||||||
if not os.path.exists(folder_path):
|
|
||||||
os.makedirs(folder_path)
|
|
||||||
file_path = os.path.join(folder_path, filename)
|
|
||||||
return file_path
|
|
||||||
|
|
||||||
def configure_main_logger(self, filemode: str = "w+", logging_level: int = logging.INFO) -> logging.Logger:
|
def configure_main_logger(self, filemode: str = "w+", logging_level: int = logging.INFO) -> logging.Logger:
|
||||||
file_path = self.generate_file_path("converter.log")
|
file_path = generate_file_path("converter.log")
|
||||||
|
|
||||||
file_handler = logging.FileHandler(file_path, mode=filemode)
|
file_handler = logging.FileHandler(file_path, mode=filemode)
|
||||||
self.main_logger.addHandler(file_handler)
|
self.main_logger.addHandler(file_handler)
|
||||||
@@ -52,10 +53,10 @@ class MainLogger:
|
|||||||
return self.main_logger
|
return self.main_logger
|
||||||
|
|
||||||
|
|
||||||
class BookLogger(MainLogger):
|
class BookLogger:
|
||||||
def __init__(self, name: str):
|
def __init__(self, name: str, main_logger=None):
|
||||||
"""
|
"""
|
||||||
Method for Logger configuration. Logger will write to file.
|
Method for Logger configuration. Logger will write to file that descript book.
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
name: str
|
name: str
|
||||||
@@ -72,13 +73,13 @@ class BookLogger(MainLogger):
|
|||||||
format of record in log file
|
format of record in log file
|
||||||
|
|
||||||
"""
|
"""
|
||||||
super().__init__(name)
|
|
||||||
self.book_logger = logging.getLogger(name)
|
self.book_logger = logging.getLogger(name)
|
||||||
self.book_logger.propagate = False
|
self.book_logger.propagate = False
|
||||||
|
self.main_logger = main_logger
|
||||||
|
|
||||||
def configure_book_logger(self, book_id: Union[int, str], filemode: str = "w+",
|
def configure_book_logger(self, book_id: Union[int, str], filemode: str = "w+",
|
||||||
logging_level: int = logging.INFO):
|
logging_level: int = logging.INFO):
|
||||||
file_path = self.generate_file_path(f"{book_id}.log")
|
file_path = generate_file_path(f"{book_id}.log")
|
||||||
book_logger_format: str = "%(asctime)s - %(levelname)s - %(message)s" \
|
book_logger_format: str = "%(asctime)s - %(levelname)s - %(message)s" \
|
||||||
" [%(filename)s:%(lineno)d in %(funcName)s]"
|
" [%(filename)s:%(lineno)d in %(funcName)s]"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user