Fix logs output

This commit is contained in:
Kiryl
2022-11-16 14:40:28 +03:00
parent b2491d0195
commit 58e2c74014
6 changed files with 104 additions and 103 deletions

View File

@@ -24,7 +24,7 @@ class EpubConverter:
style_processor: StyleReader = None, html_processor: HtmlEpubProcessor = None):
self.book_path = book_path
self.access = access
self.logger: BookLogger = logger
self.book_logger: BookLogger = logger
self.ebooklib_book = epub.read_epub(book_path)
self.style_processor = style_processor
self.html_processor = html_processor
@@ -57,52 +57,52 @@ class EpubConverter:
self.noterefs: List[Tag] = [] # start of the footnote
self.footnotes: List[Tag] = [] # end of the footnote
self.logger.log("HTML files reading.")
self.book_logger.log("HTML files reading.")
self.html_href2html_body_soup: Dict[str,
BeautifulSoup] = self.build_href2soup_content()
self.logger.log("CSS inline style processing.")
self.book_logger.log("CSS inline style processing.")
[self.style_processor.process_inline_styles_in_html_soup(
self.html_href2html_body_soup[html_href]) for html_href in self.html_href2html_body_soup]
self.logger.log("CSS files processing.")
self.book_logger.log("CSS files processing.")
self.html_href2css_href, self.css_href2css_content = self.build_html_and_css_relations()
self.logger.log("CSS styles fusion(inline+file).")
self.book_logger.log("CSS styles fusion(inline+file).")
self.add_css_styles_to_html_soup()
self.logger.log("Image processing.")
self.book_logger.log("Image processing.")
for x in chain(self.ebooklib_book.get_items_of_type(ebooklib.ITEM_IMAGE),
self.ebooklib_book.get_items_of_type(ebooklib.ITEM_COVER)):
file_name = x.file_name
content = x.content
self.img_href2img_bytes[file_name] = content
self.logger.log("Footnotes processing.")
self.book_logger.log("Footnotes processing.")
for href in self.html_href2html_body_soup:
self.footnotes_contents, self.noterefs, self.footnotes =\
preprocess_footnotes(
self.html_href2html_body_soup[href], self.html_href2html_body_soup)
self.logger.log(f"Added {len(self.footnotes_contents)} footnotes.")
self.book_logger.log(f"Added {len(self.footnotes_contents)} footnotes.")
self.logger.log("TOC processing.")
self.book_logger.log("TOC processing.")
self.build_adjacency_list_from_toc(self.ebooklib_book.toc)
# build simple toc from spine if needed
if self.is_toc_empty():
self.build_adjacency_list_from_spine()
not_added = [
x for x in self.html_href2html_body_soup if x not in self.hrefs_added_to_toc]
self.logger.log(f"Html documents not added to TOC: {not_added}.")
self.logger.log(f"Add documents not added to TOC.")
self.book_logger.log(f"Html documents not added to TOC: {not_added}.")
self.book_logger.log(f"Add documents not added to TOC.")
self.add_not_added_files_to_adjacency_list(not_added)
self.logger.log(f"Label subchapters with converter tag.")
self.book_logger.log(f"Label subchapters with converter tag.")
self.label_subchapters_with_lc_tag()
self.logger.log(f"Process html internal links.")
self.book_logger.log(f"Process html internal links.")
self.process_internal_links()
self.logger.log(
self.book_logger.log(
f"Check if converter-chapter-marks are on the same level.")
self.chapter_marks_are_same_level()
self.logger.log(f"Define chapters content.")
self.book_logger.log(f"Define chapters content.")
self.define_chapters_with_content()
self.logger.log(f"Converting html_nodes to LiveCarta chapter items.")
self.book_logger.log(f"Converting html_nodes to LiveCarta chapter items.")
def build_href2soup_content(self) -> Dict[str, BeautifulSoup]:
# using EpubElements
@@ -341,13 +341,13 @@ class EpubConverter:
full_path = [
href_from_toc for href_from_toc in self.hrefs_added_to_toc if normed_path in href_from_toc]
if not full_path:
self.logger.log(f"Error in {cur_file_path} file. No {normed_path} file found in added to TOC documents. "
self.book_logger.log(f"Error in {cur_file_path} file. No {normed_path} file found in added to TOC documents. "
f"While processing href in {internal_link_tag}.")
internal_link_tag.attrs["converter-mark"] = "bad-link"
return None
if len(full_path) > 1:
self.logger.log(f"Warning in {cur_file_path}. Multiple paths found {full_path} for file {href_in_link}"
self.book_logger.log(f"Warning in {cur_file_path}. Multiple paths found {full_path} for file {href_in_link}"
f" while {internal_link_tag} processing. The first one will be chosen.")
return full_path[0]
@@ -433,7 +433,7 @@ class EpubConverter:
anchor_html_content.find_all(attrs={"id": id_}) # if link is a footnote
if anchor_tags:
if len(anchor_tags) > 1:
self.logger.log(f"Warning in {html_href_from_toc}: multiple anchors:"
self.book_logger.log(f"Warning in {html_href_from_toc}: multiple anchors:"
f"{len(anchor_tags)} found.\n"
f"{anchor_tags}\n"
f"While processing {internal_link_tag}")
@@ -446,7 +446,7 @@ class EpubConverter:
del internal_link_tag.attrs["href"]
else:
internal_link_tag.attrs["converter-mark"] = "bad-link"
self.logger.log(f"Error in {html_href_from_toc}."
self.book_logger.log(f"Error in {html_href_from_toc}."
f" While processing {internal_link_tag} no anchor found."
f" Should be anchor with new id={new_unique_id} in"
f" {html_href_of_anchor} file."
@@ -563,11 +563,11 @@ class EpubConverter:
if nav_point.id else self.html_href2html_body_soup[nav_point.href]
indent: str = " " * lvl
self.logger.log(indent + f"Chapter: {title} is processing.")
self.book_logger.log(indent + f"Chapter: {title} is processing.")
is_chapter: bool = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
self.logger.log(indent + "Process title.")
self.book_logger.log(indent + "Process title.")
title_preprocessed: str = self.html_processor.prepare_title(title)
self.logger.log(indent + "Process content.")
self.book_logger.log(indent + "Process content.")
content_preprocessed: Union[Tag, BeautifulSoup] = self.html_processor.prepare_content(
title_preprocessed, content, remove_title_from_chapter=is_chapter)
@@ -597,8 +597,8 @@ class EpubConverter:
chapter = self.html_node_to_livecarta_chapter_item(tl_nav_point)
top_level_chapters.append(chapter)
top_level_dict_chapters = [x.to_dict() for x in top_level_chapters]
self.logger.log(f"Anchors found: {len(self.internal_anchors)}.")
self.logger.log("End conversion.")
self.book_logger.log(f"Anchors found: {len(self.internal_anchors)}.")
self.book_logger.log("End conversion.")
return {
"content": top_level_dict_chapters,

View File

@@ -35,17 +35,17 @@ class EpubBook(BookSolver):
# Parses and cleans html, gets list of tags, gets footnotes
try:
html_preprocessor = HtmlPresetsProcessor(
logger=self.logger_object, preset_path="preset/epub_presets.json")
html_processor = HtmlEpubProcessor(logger=self.logger_object,
logger=self.book_logger, preset_path="preset/epub_presets.json")
html_processor = HtmlEpubProcessor(logger=self.book_logger,
html_preprocessor=html_preprocessor)
except Exception as exc:
self.logger_object.log(
self.book_logger.log(
"Error has occurred while processing .html", logging.ERROR)
self.logger_object.log_error_to_main_log()
self.book_logger.log_error_to_main_log()
self.status_wrapper.set_error()
raise exc
json_converter = EpubConverter(
self.book_path, access=self.access, logger=self.logger_object,
self.book_path, access=self.access, logger=self.book_logger,
style_processor=style_preprocessor, html_processor=html_processor)
content_dict = json_converter.convert_to_dict()
return content_dict