forked from LiveCarta/BookConverter
Fix logs output
This commit is contained in:
@@ -24,7 +24,7 @@ class EpubConverter:
|
||||
style_processor: StyleReader = None, html_processor: HtmlEpubProcessor = None):
|
||||
self.book_path = book_path
|
||||
self.access = access
|
||||
self.logger: BookLogger = logger
|
||||
self.book_logger: BookLogger = logger
|
||||
self.ebooklib_book = epub.read_epub(book_path)
|
||||
self.style_processor = style_processor
|
||||
self.html_processor = html_processor
|
||||
@@ -57,52 +57,52 @@ class EpubConverter:
|
||||
self.noterefs: List[Tag] = [] # start of the footnote
|
||||
self.footnotes: List[Tag] = [] # end of the footnote
|
||||
|
||||
self.logger.log("HTML files reading.")
|
||||
self.book_logger.log("HTML files reading.")
|
||||
self.html_href2html_body_soup: Dict[str,
|
||||
BeautifulSoup] = self.build_href2soup_content()
|
||||
|
||||
self.logger.log("CSS inline style processing.")
|
||||
self.book_logger.log("CSS inline style processing.")
|
||||
[self.style_processor.process_inline_styles_in_html_soup(
|
||||
self.html_href2html_body_soup[html_href]) for html_href in self.html_href2html_body_soup]
|
||||
self.logger.log("CSS files processing.")
|
||||
self.book_logger.log("CSS files processing.")
|
||||
self.html_href2css_href, self.css_href2css_content = self.build_html_and_css_relations()
|
||||
self.logger.log("CSS styles fusion(inline+file).")
|
||||
self.book_logger.log("CSS styles fusion(inline+file).")
|
||||
self.add_css_styles_to_html_soup()
|
||||
|
||||
self.logger.log("Image processing.")
|
||||
self.book_logger.log("Image processing.")
|
||||
for x in chain(self.ebooklib_book.get_items_of_type(ebooklib.ITEM_IMAGE),
|
||||
self.ebooklib_book.get_items_of_type(ebooklib.ITEM_COVER)):
|
||||
file_name = x.file_name
|
||||
content = x.content
|
||||
self.img_href2img_bytes[file_name] = content
|
||||
|
||||
self.logger.log("Footnotes processing.")
|
||||
self.book_logger.log("Footnotes processing.")
|
||||
for href in self.html_href2html_body_soup:
|
||||
self.footnotes_contents, self.noterefs, self.footnotes =\
|
||||
preprocess_footnotes(
|
||||
self.html_href2html_body_soup[href], self.html_href2html_body_soup)
|
||||
self.logger.log(f"Added {len(self.footnotes_contents)} footnotes.")
|
||||
self.book_logger.log(f"Added {len(self.footnotes_contents)} footnotes.")
|
||||
|
||||
self.logger.log("TOC processing.")
|
||||
self.book_logger.log("TOC processing.")
|
||||
self.build_adjacency_list_from_toc(self.ebooklib_book.toc)
|
||||
# build simple toc from spine if needed
|
||||
if self.is_toc_empty():
|
||||
self.build_adjacency_list_from_spine()
|
||||
not_added = [
|
||||
x for x in self.html_href2html_body_soup if x not in self.hrefs_added_to_toc]
|
||||
self.logger.log(f"Html documents not added to TOC: {not_added}.")
|
||||
self.logger.log(f"Add documents not added to TOC.")
|
||||
self.book_logger.log(f"Html documents not added to TOC: {not_added}.")
|
||||
self.book_logger.log(f"Add documents not added to TOC.")
|
||||
self.add_not_added_files_to_adjacency_list(not_added)
|
||||
self.logger.log(f"Label subchapters with converter tag.")
|
||||
self.book_logger.log(f"Label subchapters with converter tag.")
|
||||
self.label_subchapters_with_lc_tag()
|
||||
self.logger.log(f"Process html internal links.")
|
||||
self.book_logger.log(f"Process html internal links.")
|
||||
self.process_internal_links()
|
||||
self.logger.log(
|
||||
self.book_logger.log(
|
||||
f"Check if converter-chapter-marks are on the same level.")
|
||||
self.chapter_marks_are_same_level()
|
||||
self.logger.log(f"Define chapters content.")
|
||||
self.book_logger.log(f"Define chapters content.")
|
||||
self.define_chapters_with_content()
|
||||
self.logger.log(f"Converting html_nodes to LiveCarta chapter items.")
|
||||
self.book_logger.log(f"Converting html_nodes to LiveCarta chapter items.")
|
||||
|
||||
def build_href2soup_content(self) -> Dict[str, BeautifulSoup]:
|
||||
# using EpubElements
|
||||
@@ -341,13 +341,13 @@ class EpubConverter:
|
||||
full_path = [
|
||||
href_from_toc for href_from_toc in self.hrefs_added_to_toc if normed_path in href_from_toc]
|
||||
if not full_path:
|
||||
self.logger.log(f"Error in {cur_file_path} file. No {normed_path} file found in added to TOC documents. "
|
||||
self.book_logger.log(f"Error in {cur_file_path} file. No {normed_path} file found in added to TOC documents. "
|
||||
f"While processing href in {internal_link_tag}.")
|
||||
internal_link_tag.attrs["converter-mark"] = "bad-link"
|
||||
return None
|
||||
|
||||
if len(full_path) > 1:
|
||||
self.logger.log(f"Warning in {cur_file_path}. Multiple paths found {full_path} for file {href_in_link}"
|
||||
self.book_logger.log(f"Warning in {cur_file_path}. Multiple paths found {full_path} for file {href_in_link}"
|
||||
f" while {internal_link_tag} processing. The first one will be chosen.")
|
||||
|
||||
return full_path[0]
|
||||
@@ -433,7 +433,7 @@ class EpubConverter:
|
||||
anchor_html_content.find_all(attrs={"id": id_}) # if link is a footnote
|
||||
if anchor_tags:
|
||||
if len(anchor_tags) > 1:
|
||||
self.logger.log(f"Warning in {html_href_from_toc}: multiple anchors:"
|
||||
self.book_logger.log(f"Warning in {html_href_from_toc}: multiple anchors:"
|
||||
f"{len(anchor_tags)} found.\n"
|
||||
f"{anchor_tags}\n"
|
||||
f"While processing {internal_link_tag}")
|
||||
@@ -446,7 +446,7 @@ class EpubConverter:
|
||||
del internal_link_tag.attrs["href"]
|
||||
else:
|
||||
internal_link_tag.attrs["converter-mark"] = "bad-link"
|
||||
self.logger.log(f"Error in {html_href_from_toc}."
|
||||
self.book_logger.log(f"Error in {html_href_from_toc}."
|
||||
f" While processing {internal_link_tag} no anchor found."
|
||||
f" Should be anchor with new id={new_unique_id} in"
|
||||
f" {html_href_of_anchor} file."
|
||||
@@ -563,11 +563,11 @@ class EpubConverter:
|
||||
if nav_point.id else self.html_href2html_body_soup[nav_point.href]
|
||||
|
||||
indent: str = " " * lvl
|
||||
self.logger.log(indent + f"Chapter: {title} is processing.")
|
||||
self.book_logger.log(indent + f"Chapter: {title} is processing.")
|
||||
is_chapter: bool = lvl <= LiveCartaConfig.SUPPORTED_LEVELS
|
||||
self.logger.log(indent + "Process title.")
|
||||
self.book_logger.log(indent + "Process title.")
|
||||
title_preprocessed: str = self.html_processor.prepare_title(title)
|
||||
self.logger.log(indent + "Process content.")
|
||||
self.book_logger.log(indent + "Process content.")
|
||||
content_preprocessed: Union[Tag, BeautifulSoup] = self.html_processor.prepare_content(
|
||||
title_preprocessed, content, remove_title_from_chapter=is_chapter)
|
||||
|
||||
@@ -597,8 +597,8 @@ class EpubConverter:
|
||||
chapter = self.html_node_to_livecarta_chapter_item(tl_nav_point)
|
||||
top_level_chapters.append(chapter)
|
||||
top_level_dict_chapters = [x.to_dict() for x in top_level_chapters]
|
||||
self.logger.log(f"Anchors found: {len(self.internal_anchors)}.")
|
||||
self.logger.log("End conversion.")
|
||||
self.book_logger.log(f"Anchors found: {len(self.internal_anchors)}.")
|
||||
self.book_logger.log("End conversion.")
|
||||
|
||||
return {
|
||||
"content": top_level_dict_chapters,
|
||||
|
||||
@@ -35,17 +35,17 @@ class EpubBook(BookSolver):
|
||||
# Parses and cleans html, gets list of tags, gets footnotes
|
||||
try:
|
||||
html_preprocessor = HtmlPresetsProcessor(
|
||||
logger=self.logger_object, preset_path="preset/epub_presets.json")
|
||||
html_processor = HtmlEpubProcessor(logger=self.logger_object,
|
||||
logger=self.book_logger, preset_path="preset/epub_presets.json")
|
||||
html_processor = HtmlEpubProcessor(logger=self.book_logger,
|
||||
html_preprocessor=html_preprocessor)
|
||||
except Exception as exc:
|
||||
self.logger_object.log(
|
||||
self.book_logger.log(
|
||||
"Error has occurred while processing .html", logging.ERROR)
|
||||
self.logger_object.log_error_to_main_log()
|
||||
self.book_logger.log_error_to_main_log()
|
||||
self.status_wrapper.set_error()
|
||||
raise exc
|
||||
json_converter = EpubConverter(
|
||||
self.book_path, access=self.access, logger=self.logger_object,
|
||||
self.book_path, access=self.access, logger=self.book_logger,
|
||||
style_processor=style_preprocessor, html_processor=html_processor)
|
||||
content_dict = json_converter.convert_to_dict()
|
||||
return content_dict
|
||||
|
||||
Reference in New Issue
Block a user