Fix logs output

This commit is contained in:
Kiryl
2022-11-16 14:40:28 +03:00
parent b2491d0195
commit 58e2c74014
6 changed files with 104 additions and 103 deletions

View File

@@ -15,30 +15,30 @@ from src.docx_converter.docx_solver import DocxBook
from src.epub_converter.epub_solver import EpubBook
def local_convert_book(book_type: [DocxBook, EpubBook], book_id: int, logger: logging.Logger, params: dict):
logger.info(f"Start processing book-{book_id}.")
def local_convert_book(book_type: [DocxBook, EpubBook], book_id: int, main_logger: logging.Logger, params: dict):
main_logger.info(f"Start processing book-{book_id}.")
try:
json_file_path = "books/json/9781614382264.json"
book = book_type(book_id=book_id, main_logger=logger, **params)
book = book_type(book_id=book_id, main_logger=main_logger, **params)
book.conversion(json_file_path)
except Exception as exc:
raise exc
logger.info(f"Book-{book_id} has been proceeded.")
main_logger.info(f"Book-{book_id} has been proceeded.")
def convert_book(book_type: [DocxBook, EpubBook], book_id: int, logger: logging.Logger, params: Dict[str, Access]):
logger.info(f"Start processing book-{book_id}.")
def convert_book(book_type: [DocxBook, EpubBook], book_id: int, main_logger: logging.Logger, params: Dict[str, Access]):
main_logger.info(f"Start processing book-{book_id}.")
try:
book = book_type(book_id=book_id, main_logger=logger, **params)
book = book_type(book_id=book_id, main_logger=main_logger, **params)
book.conversion()
except Exception as exc:
raise exc
logger.info(f"Book-{book_id} has been proceeded.")
main_logger.info(f"Book-{book_id} has been proceeded.")
def callback(ch, method, properties, body: bytes, logger: logging.Logger, libre_locker: Event):
def callback(ch, method, properties, body: bytes, main_logger: logging.Logger, libre_locker: Event):
print(f"Message: {body}.")
logger.info(f"Message: {body}.")
main_logger.info(f"Message: {body}.")
try:
data = json.loads(body)
assert "apiURL" in data, "No apiURL field in received message."
@@ -54,7 +54,7 @@ def callback(ch, method, properties, body: bytes, logger: logging.Logger, libre_
params = {
"book_type": EpubBook if data.get("fileExtension") == "epub" else DocxBook,
"book_id": data["id"],
"logger": logger,
"main_logger": main_logger,
"params": book_params
}
@@ -64,9 +64,9 @@ def callback(ch, method, properties, body: bytes, logger: logging.Logger, libre_
# print(f"Active threads: {active_count()}.")
except Exception as exc:
if hasattr(exc, "message"):
logger.error(f"{sys.exc_info()[0]}: {exc.message}")
main_logger.error(f"{sys.exc_info()[0]}: {exc.message}")
else:
logger.error(f"{sys.exc_info()[0]}: {str(exc)}")
main_logger.error(f"{sys.exc_info()[0]}: {str(exc)}")
finally:
pass
@@ -104,7 +104,7 @@ def server_run():
locker.set()
channel.basic_consume(queue=conf_param["queue"],
auto_ack=True,
on_message_callback=partial(callback, logger=logger_object, libre_locker=locker))
on_message_callback=partial(callback, main_logger=logger_object, libre_locker=locker))
logger_object.info("Connection has been established.")
print("Waiting for messages...")
logger_object.info("Waiting for messages...")

View File

@@ -28,10 +28,10 @@ class BookSolver:
self.preset_path = None
self.book_path = None # path to book file, appears after downloading from server
self.book_output_path = None # path to json file
self.logger_object = BookLogger(name=f"{__name__}_{self.book_id}")
self.logger_object.configure_book_logger(book_id=book_id)
self.book_logger = BookLogger(name=f"{__name__}_{self.book_id}")
self.book_logger.configure_book_logger(book_id=book_id)
self.status_wrapper = BookStatusWrapper(
access, self.logger_object, book_id)
access, self.book_logger, book_id)
assert LiveCartaConfig.SUPPORTED_LEVELS == len(LiveCartaConfig.SUPPORTED_HEADERS), \
"Length of headers doesn't match allowed levels."
@@ -64,12 +64,12 @@ class BookSolver:
try:
with open(file_path, "wb+") as file:
file.write(content)
self.logger_object.log(
self.book_logger.log(
f"File was saved to folder: {folder_path}.")
except Exception as exc:
self.logger_object.log(
self.book_logger.log(
f"Error in writing {self.book_type} file.", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_logger.log_error_to_main_log()
raise exc
return file_path
@@ -86,9 +86,9 @@ class BookSolver:
# self.preset_path = pathlib.Path(
# str(self.save_file(content, path_to_save="preset", file_type="json")))
except FileNotFoundError as f_err:
self.logger_object.log(
self.book_logger.log(
"Can't get preset file from server.", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_logger.log_error_to_main_log()
raise f_err
except Exception as exc:
raise exc
@@ -96,17 +96,17 @@ class BookSolver:
def get_book_file(self):
"""Method for getting and saving book from server"""
try:
self.logger_object.log(f"Start receiving book file from server. URL:"
self.book_logger.log(f"Start receiving book file from server. URL:"
f" {self.access.url}/doc-convert/{self.book_id}/file")
content = self.access.get_file(
file_path=f"{self.access.url}/doc-convert/{self.book_id}/file")
self.logger_object.log("Book file was received from server.")
self.book_logger.log("Book file was received from server.")
self.book_path = pathlib.Path(self.save_file(
content, path_to_save=f"books/{self.book_type}", file_type=self.book_type))
except FileNotFoundError as f_err:
self.logger_object.log(
self.book_logger.log(
"Can't get book file from server.", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_logger.log_error_to_main_log()
raise f_err
except Exception as exc:
raise exc
@@ -120,7 +120,7 @@ class BookSolver:
self.book_output_path = output_path
self.book_output_path = pathlib.Path(self.book_output_path)
self.logger_object.log(f"Output file path: {self.book_output_path}")
self.book_logger.log(f"Output file path: {self.book_output_path}")
pathlib.Path(self.book_output_path).parent.mkdir(
parents=True, exist_ok=True)
@@ -131,27 +131,27 @@ class BookSolver:
try:
with codecs.open(self.book_output_path, "w", encoding="utf-8") as f:
json.dump(content, f, ensure_ascii=False)
self.logger_object.log(
self.book_logger.log(
f"Data has been saved to .json file: {self.book_output_path}")
except Exception as exc:
self.logger_object.log(
self.book_logger.log(
"Error has occurred while writing .json file." + str(exc), logging.ERROR)
def send_json_content_to_server(self, content: Dict[str, List[Dict[str, Union[List, str]]]]):
"""Function sends json_content to site"""
try:
self.access.send_book(self.book_id, content)
self.logger_object.log(f"JSON data has been sent to server.")
self.book_logger.log(f"JSON data has been sent to server.")
except Exception as exc:
self.logger_object.log(
self.book_logger.log(
"Error has occurred while sending json content.", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_logger.log_error_to_main_log()
self.status_wrapper.set_error()
raise exc
@abstractmethod
def get_converted_book(self) -> Dict[str, List[Dict[str, Union[List, str]]]]:
self.logger_object.log("Beginning of processing .json output.")
self.book_logger.log("Beginning of processing .json output.")
self.status_wrapper.set_generating()
return {}
@@ -165,23 +165,23 @@ class BookSolver:
try:
self.get_preset_file()
self.get_book_file()
self.logger_object.log(
self.book_logger.log(
f"Beginning of conversion from .{self.book_type} to .json.")
self.status_wrapper.set_processing()
content_dict: Dict[str, List[Dict[Union[str, List]]]] = self.get_converted_book()
# todo add delete of preset path
[os.remove(path) for path in [self.book_path]]
self.logger_object.log("Beginning of processing .json output.")
self.book_logger.log("Beginning of processing .json output.")
self.status_wrapper.set_generating()
self.write_to_json(content_dict)
self.send_json_content_to_server(content_dict)
self.logger_object.log(
self.book_logger.log(
f"End of the conversion to LiveCarta format. Check {self.book_output_path}.")
except Exception as exc:
self.status_wrapper.set_error()
self.logger_object.log(
self.book_logger.log(
"Error has occurred while conversion.", logging.ERROR)
self.logger_object.log_error_to_main_log(str(exc))
self.book_logger.log_error_to_main_log(str(exc))
raise exc
def conversion_local(self, file_path: str):
@@ -192,17 +192,17 @@ class BookSolver:
"""
try:
self.logger_object.log(
self.book_logger.log(
f"Data has been downloaded from {file_path} file")
self.status_wrapper.set_processing()
with codecs.open(file_path, "r", encoding="utf-8") as f_json:
content_dict = json.load(f_json)
self.logger_object.log("Beginning of processing .json output.")
self.book_logger.log("Beginning of processing .json output.")
self.status_wrapper.set_generating()
self.send_json_content_to_server(content_dict)
self.logger_object.log(f"Sent a file to server. Check LiveCarta.")
self.book_logger.log(f"Sent a file to server. Check LiveCarta.")
except Exception as exc:
self.status_wrapper.set_error()
self.logger_object.log(
self.book_logger.log(
"Error has occurred while reading json file." + str(exc), logging.ERROR)
self.logger_object.log_error_to_main_log(str(exc))
self.book_logger.log_error_to_main_log(str(exc))

View File

@@ -39,41 +39,41 @@ class DocxBook(BookSolver):
# 1. Converts docx to html with LibreOffice
try:
html_converter = Docx2LibreHtml(self.book_id, self.book_path, self.access,
self.logger_object, self.libre_locker)
self.book_logger, self.libre_locker)
except Exception as exc:
self.logger_object.log(
self.book_logger.log(
"Error has occurred while converting .docx to .html.", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_logger.log_error_to_main_log()
self.status_wrapper.set_error()
raise exc
# 2. Parses and cleans html, gets list of tags, gets footnotes
try:
html_preprocessor = HtmlPresetsProcessor(
logger=self.logger_object, preset_path="preset/docx_presets.json")
logger=self.book_logger, preset_path="preset/docx_presets.json")
style_preprocessor = StyleReader()
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup,
logger=self.logger_object,
logger=self.book_logger,
html_preprocessor=html_preprocessor,
style_preprocessor=style_preprocessor)
bs_tags, footnotes, top_level_headers = html_processor.process_html(
self.access, html_converter.html_path, self.book_id)
except Exception as exc:
self.logger_object.log(
self.book_logger.log(
"Error has occurred while processing .html", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_logger.log_error_to_main_log()
self.status_wrapper.set_error()
raise exc
# 3. Parses from line structure to nested structure with JSONConverter
try:
json_converter = LibreHtml2JsonConverter(bs_tags, footnotes, top_level_headers,
self.logger_object)
self.book_logger)
content_dict = json_converter.convert_to_dict()
except Exception as exc:
self.logger_object.log(
self.book_logger.log(
"Error has occurred while converting .html to .json", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_logger.log_error_to_main_log()
self.status_wrapper.set_error()
raise exc
return content_dict
@@ -82,24 +82,24 @@ class DocxBook(BookSolver):
if __name__ == "__main__":
docx_file_path = f"../../books/docx/3cd6f561b8d7ee6a510c783784c9d018.docx"
logger_object = BookLogger(name="epub")
logger_object.configure_book_logger(book_id=docx_file_path.split("/")[-1])
book_logger = BookLogger(name="epub")
book_logger.configure_book_logger(book_id=docx_file_path.split("/")[-1])
locker = Event()
locker.set()
html_converter = Docx2LibreHtml(file_path=docx_file_path,
logger=logger_object, libre_locker=locker)
logger=book_logger, libre_locker=locker)
html_preprocessor = HtmlPresetsProcessor(
logger=logger_object, preset_path="../../preset/docx_presets.json")
logger=book_logger, preset_path="../../preset/docx_presets.json")
style_preprocessor = StyleReader()
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=logger_object,
html_processor = HtmlDocxProcessor(html_soup=html_converter.html_soup, logger=book_logger,
html_preprocessor=html_preprocessor, style_preprocessor=style_preprocessor)
content, footnotes, top_level_headers = html_processor.process_html(
html_path=html_converter.html_path, book_id=html_converter.book_id)
json_converter = LibreHtml2JsonConverter(
content, footnotes, top_level_headers, logger_object)
content, footnotes, top_level_headers, book_logger)
content_dict = json_converter.convert_to_dict()
with codecs.open(docx_file_path.replace("docx", "json"), "w", encoding="utf-8") as f:

View File

@@ -24,7 +24,7 @@ class EpubConverter:
style_processor: StyleReader = None, html_processor: HtmlEpubProcessor = None):
self.book_path = book_path
self.access = access
self.logger: BookLogger = logger
self.book_logger: BookLogger = logger
self.ebooklib_book = epub.read_epub(book_path)
self.style_processor = style_processor
self.html_processor = html_processor
@@ -57,52 +57,52 @@ class EpubConverter:
self.noterefs: List[Tag] = [] # start of the footnote
self.footnotes: List[Tag] = [] # end of the footnote
self.logger.log("HTML files reading.")
self.book_logger.log("HTML files reading.")
self.html_href2html_body_soup: Dict[str,
BeautifulSoup] = self.build_href2soup_content()
self.logger.log("CSS inline style processing.")
self.book_logger.log("CSS inline style processing.")
[self.style_processor.process_inline_styles_in_html_soup(
self.html_href2html_body_soup[html_href]) for html_href in self.html_href2html_body_soup]
self.logger.log("CSS files processing.")
self.book_logger.log("CSS files processing.")
self.html_href2css_href, self.css_href2css_content = self.build_html_and_css_relations()
self.logger.log("CSS styles fusion(inline+file).")
self.book_logger.log("CSS styles fusion(inline+file).")
self.add_css_styles_to_html_soup()
self.logger.log("Image processing.")
self.book_logger.log("Image processing.")
for x in chain(self.ebooklib_book.get_items_of_type(ebooklib.ITEM_IMAGE),
self.ebooklib_book.get_items_of_type(ebooklib.ITEM_COVER)):
file_name = x.file_name
content = x.content
self.img_href2img_bytes[file_name] = content
self.logger.log("Footnotes processing.")
self.book_logger.log("Footnotes processing.")
for href in self.html_href2html_body_soup:
self.footnotes_contents, self.noterefs, self.footnotes =\
preprocess_footnotes(
self.html_href2html_body_soup[href], self.html_href2html_body_soup)
self.logger.log(f"Added {len(self.footnotes_contents)} footnotes.")
self.book_logger.log(f"Added {len(self.footnotes_contents)} footnotes.")
self.logger.log("TOC processing.")
self.book_logger.log("TOC processing.")
self.build_adjacency_list_from_toc(self.ebooklib_book.toc)
# build simple toc from spine if needed
if self.is_toc_empty():
self.build_adjacency_list_from_spine()
not_added = [
x for x in self.html_href2html_body_soup if x not in self.hrefs_added_to_toc]
self.logger.log(f"Html documents not added to TOC: {not_added}.")
self.logger.log(f"Add documents not added to TOC.")
self.book_logger.log(f"Html documents not added to TOC: {not_added}.")
self.book_logger.log(f"Add documents not added to TOC.")
self.add_not_added_files_to_adjacency_list(not_added)
self.logger.log(f"Label subchapters with converter tag.")
self.book_logger.log(f"Label subchapters with converter tag.")
self.label_subchapters_with_lc_tag()
self.logger.log(f"Process html internal links.")
self.book_logger.log(f"Process html internal links.")
self.process_internal_links()
self.logger.log(
self.book_logger.log(
f"Check if converter-chapter-marks are on the same level.")
self.chapter_marks_are_same_level()
self.logger.log(f"Define chapters content.")
self.book_logger.log(f"Define chapters content.")
self.define_chapters_with_content()
self.logger.log(f"Converting html_nodes to LiveCarta chapter items.")
self.book_logger.log(f"Converting html_nodes to LiveCarta chapter items.")
def build_href2soup_content(self) -> Dict[str, BeautifulSoup]:
# using EpubElements
@@ -341,13 +341,13 @@ class EpubConverter:
full_path = [
href_from_toc for href_from_toc in self.hrefs_added_to_toc if normed_path in href_from_toc]
if not full_path:
self.logger.log(f"Error in {cur_file_path} file. No {normed_path} file found in added to TOC documents. "
self.book_logger.log(f"Error in {cur_file_path} file. No {normed_path} file found in added to TOC documents. "
f"While processing href in {internal_link_tag}.")
internal_link_tag.attrs["converter-mark"] = "bad-link"
return None
if len(full_path) > 1:
self.logger.log(f"Warning in {cur_file_path}. Multiple paths found {full_path} for file {href_in_link}"
self.book_logger.log(f"Warning in {cur_file_path}. Multiple paths found {full_path} for file {href_in_link}"
f" while {internal_link_tag} processing. The first one will be chosen.")
return full_path[0]
@@ -433,7 +433,7 @@ class EpubConverter:
anchor_html_content.find_all(attrs={"id": id_}) # if link is a footnote
if anchor_tags:
if len(anchor_tags) > 1:
self.logger.log(f"Warning in {html_href_from_toc}: multiple anchors:"
self.book_logger.log(f"Warning in {html_href_from_toc}: multiple anchors:"
f"{len(anchor_tags)} found.\n"
f"{anchor_tags}\n"
f"While processing {internal_link_tag}")
@@ -446,7 +446,7 @@ class EpubConverter:
del internal_link_tag.attrs["href"]
else:
internal_link_tag.attrs["converter-mark"] = "bad-link"
self.logger.log(f"Error in {html_href_from_toc}."
self.book_logger.log(f"Error in {html_href_from_toc}."
f" While processing {internal_link_tag} no anchor found."
f" Should be anchor with new id={new_unique_id} in"
f" {html_href_of_anchor} file."
@@ -563,11 +563,11 @@ class EpubConverter:
if nav_point.id else self.html_href2html_body_soup[nav_point.href]
indent: str = " " * lvl
self.logger.log(indent + f"Chapter: {title} is processing.")
self.book_logger.log(indent + f"Chapter: {title} is processing.")
is_chapter: bool = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
self.logger.log(indent + "Process title.")
self.book_logger.log(indent + "Process title.")
title_preprocessed: str = self.html_processor.prepare_title(title)
self.logger.log(indent + "Process content.")
self.book_logger.log(indent + "Process content.")
content_preprocessed: Union[Tag, BeautifulSoup] = self.html_processor.prepare_content(
title_preprocessed, content, remove_title_from_chapter=is_chapter)
@@ -597,8 +597,8 @@ class EpubConverter:
chapter = self.html_node_to_livecarta_chapter_item(tl_nav_point)
top_level_chapters.append(chapter)
top_level_dict_chapters = [x.to_dict() for x in top_level_chapters]
self.logger.log(f"Anchors found: {len(self.internal_anchors)}.")
self.logger.log("End conversion.")
self.book_logger.log(f"Anchors found: {len(self.internal_anchors)}.")
self.book_logger.log("End conversion.")
return {
"content": top_level_dict_chapters,

View File

@@ -35,17 +35,17 @@ class EpubBook(BookSolver):
# Parses and cleans html, gets list of tags, gets footnotes
try:
html_preprocessor = HtmlPresetsProcessor(
logger=self.logger_object, preset_path="preset/epub_presets.json")
html_processor = HtmlEpubProcessor(logger=self.logger_object,
logger=self.book_logger, preset_path="preset/epub_presets.json")
html_processor = HtmlEpubProcessor(logger=self.book_logger,
html_preprocessor=html_preprocessor)
except Exception as exc:
self.logger_object.log(
self.book_logger.log(
"Error has occurred while processing .html", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_logger.log_error_to_main_log()
self.status_wrapper.set_error()
raise exc
json_converter = EpubConverter(
self.book_path, access=self.access, logger=self.logger_object,
self.book_path, access=self.access, logger=self.book_logger,
style_processor=style_preprocessor, html_processor=html_processor)
content_dict = json_converter.convert_to_dict()
return content_dict

View File

@@ -27,20 +27,21 @@ class ColoredFormatter(logging.Formatter):
return logging.Formatter.format(self, record)
def generate_file_path(filename: str):
folder_path = os.path.dirname(os.path.abspath(os.path.join(__file__ ,"../..")))
folder_path = os.path.join(folder_path, f"logs/{time.strftime('%d-%m-%Y_%H-00')}/")
if not os.path.exists(folder_path):
os.makedirs(folder_path)
file_path = os.path.join(folder_path, filename)
return file_path
class MainLogger:
def __init__(self, name: str):
self.main_logger = logging.getLogger(name)
def generate_file_path(self, filename: str):
folder_path = os.path.dirname(os.path.abspath(os.path.join(__file__ ,"../..")))
folder_path = os.path.join(folder_path, f"logs/{time.strftime('%d-%m-%Y_%H-00')}/")
if not os.path.exists(folder_path):
os.makedirs(folder_path)
file_path = os.path.join(folder_path, filename)
return file_path
def configure_main_logger(self, filemode: str = "w+", logging_level: int = logging.INFO) -> logging.Logger:
file_path = self.generate_file_path("converter.log")
file_path = generate_file_path("converter.log")
file_handler = logging.FileHandler(file_path, mode=filemode)
self.main_logger.addHandler(file_handler)
@@ -52,10 +53,10 @@ class MainLogger:
return self.main_logger
class BookLogger(MainLogger):
def __init__(self, name: str):
class BookLogger:
def __init__(self, name: str, main_logger=None):
"""
Method for Logger configuration. Logger will write to file.
Method for Logger configuration. Logger will write to file that descript book.
Parameters
----------
name: str
@@ -72,13 +73,13 @@ class BookLogger(MainLogger):
format of record in log file
"""
super().__init__(name)
self.book_logger = logging.getLogger(name)
self.book_logger.propagate = False
self.main_logger = main_logger
def configure_book_logger(self, book_id: Union[int, str], filemode: str = "w+",
logging_level: int = logging.INFO):
file_path = self.generate_file_path(f"{book_id}.log")
file_path = generate_file_path(f"{book_id}.log")
book_logger_format: str = "%(asctime)s - %(levelname)s - %(message)s" \
" [%(filename)s:%(lineno)d in %(funcName)s]"