diff --git a/src/epub_converter/epub_converter.py b/src/epub_converter/epub_converter.py index d3a623a..e050791 100644 --- a/src/epub_converter/epub_converter.py +++ b/src/epub_converter/epub_converter.py @@ -1,15 +1,15 @@ import re import json import codecs -import os +import ebooklib +from ebooklib import epub +from ebooklib.epub import Link, Section +from os import path +from pathlib import Path from itertools import chain from premailer import transform from collections import defaultdict from typing import Dict, Union, List - -import ebooklib -from ebooklib import epub -from ebooklib.epub import Link, Section from bs4 import BeautifulSoup, NavigableString, Tag from src.util.helpers import BookLogger @@ -370,8 +370,8 @@ class EpubConverter: prepared content """ - dir_name = os.path.dirname(cur_file_path) - normed_path = os.path.normpath(os.path.join( + dir_name = path.dirname(cur_file_path) + normed_path = path.normpath(path.join( dir_name, href_in_link)).replace("\\", "/") full_path = [ path for path in self.hrefs_added_to_toc if normed_path in path] @@ -446,7 +446,7 @@ class EpubConverter: a_tag_href, a_tag_id = internal_link_tag.attrs["href"].split("#") a_tag_href_matched_to_toc = self.match_href_to_path_from_toc( toc_href, a_tag_href, internal_link_tag) if a_tag_href \ - else os.path.normpath(toc_href).replace("\\", "/") + else path.normpath(toc_href).replace("\\", "/") if a_tag_href_matched_to_toc: new_id = self.create_unique_id( a_tag_href_matched_to_toc, a_tag_id) @@ -594,8 +594,7 @@ class EpubConverter: path_to_html=nav_point.href, access=self.access, path2aws_path=self.book_image_src_path2aws_path, - book_id=self.file_path.stem - if hasattr(self.file_path, "stem") else "book_id") + book_id=Path(self.file_path).stem) indent = " " * lvl self.logger.log(indent + f"Chapter: {title} is processing.") @@ -635,7 +634,7 @@ class EpubConverter: if __name__ == "__main__": - epub_file_path = "../../epub/9781641050234.epub" + epub_file_path = "../../epub/9781614382264.epub" logger_object = BookLogger( name="epub", book_id=epub_file_path.split("/")[-1]) diff --git a/src/epub_converter/image_processing.py b/src/epub_converter/image_processing.py index aefa24d..be0246e 100644 --- a/src/epub_converter/image_processing.py +++ b/src/epub_converter/image_processing.py @@ -1,6 +1,5 @@ import os import pathlib - from bs4 import BeautifulSoup from src.access import Access @@ -35,7 +34,6 @@ def update_images_src_links(body_tag: BeautifulSoup, book_id: str = None) -> dict: """Function makes dictionary image_src_path -> Amazon web service_path""" img_tags = body_tag.find_all("img") - for img in img_tags: path_to_img_from_html = img.attrs.get("src") html_folder = os.path.dirname(path_to_html) @@ -55,7 +53,7 @@ def update_images_src_links(body_tag: BeautifulSoup, path2aws_path[path_to_img_from_root] = new_folder else: new_folder = save_image_locally( - path_to_img_from_root, img_content, "book_id") + path_to_img_from_root, img_content, book_id) img.attrs["src"] = str(new_folder) if img.attrs.get("width"):