Add concrete book_id for imgs on Local

2022-07-14 12:39:48 +03:00
parent a5f7a9b36c
commit e1f06ba884
2 changed files with 11 additions and 14 deletions
--- a/src/epub_converter/epub_converter.py
+++ b/src/epub_converter/epub_converter.py
@@ -1,15 +1,15 @@
 import re
 import json
 import codecs
-import os
+import ebooklib
+from ebooklib import epub
+from ebooklib.epub import Link, Section
+from os import path
+from pathlib import Path
 from itertools import chain
 from premailer import transform
 from collections import defaultdict
 from typing import Dict, Union, List
-
-import ebooklib
-from ebooklib import epub
-from ebooklib.epub import Link, Section
 from bs4 import BeautifulSoup, NavigableString, Tag

 from src.util.helpers import BookLogger
@@ -370,8 +370,8 @@ class EpubConverter:
            prepared content

        """
-        dir_name = os.path.dirname(cur_file_path)
-        normed_path = os.path.normpath(os.path.join(
+        dir_name = path.dirname(cur_file_path)
+        normed_path = path.normpath(path.join(
            dir_name, href_in_link)).replace("\\", "/")
        full_path = [
            path for path in self.hrefs_added_to_toc if normed_path in path]
@@ -446,7 +446,7 @@ class EpubConverter:
                    a_tag_href, a_tag_id = internal_link_tag.attrs["href"].split("#")
                    a_tag_href_matched_to_toc = self.match_href_to_path_from_toc(
                        toc_href, a_tag_href, internal_link_tag) if a_tag_href \
-                        else os.path.normpath(toc_href).replace("\\", "/")
+                        else path.normpath(toc_href).replace("\\", "/")
                    if a_tag_href_matched_to_toc:
                        new_id = self.create_unique_id(
                            a_tag_href_matched_to_toc, a_tag_id)
@@ -594,8 +594,7 @@ class EpubConverter:
                                                                    path_to_html=nav_point.href,
                                                                    access=self.access,
                                                                    path2aws_path=self.book_image_src_path2aws_path,
-                                                                    book_id=self.file_path.stem
-                                                                    if hasattr(self.file_path, "stem") else "book_id")
+                                                                    book_id=Path(self.file_path).stem)

        indent = " " * lvl
        self.logger.log(indent + f"Chapter: {title} is processing.")
@@ -635,7 +634,7 @@ class EpubConverter:


 if __name__ == "__main__":
-    epub_file_path = "../../epub/9781641050234.epub"
+    epub_file_path = "../../epub/9781614382264.epub"
    logger_object = BookLogger(
        name="epub", book_id=epub_file_path.split("/")[-1])

--- a/src/epub_converter/image_processing.py
+++ b/src/epub_converter/image_processing.py
@@ -1,6 +1,5 @@
 import os
 import pathlib
-
 from bs4 import BeautifulSoup

 from src.access import Access
@@ -35,7 +34,6 @@ def update_images_src_links(body_tag: BeautifulSoup,
                            book_id: str = None) -> dict:
    """Function makes dictionary image_src_path -> Amazon web service_path"""
    img_tags = body_tag.find_all("img")
-
    for img in img_tags:
        path_to_img_from_html = img.attrs.get("src")
        html_folder = os.path.dirname(path_to_html)
@@ -55,7 +53,7 @@ def update_images_src_links(body_tag: BeautifulSoup,
                path2aws_path[path_to_img_from_root] = new_folder
        else:
            new_folder = save_image_locally(
-                path_to_img_from_root, img_content, "book_id")
+                path_to_img_from_root, img_content, book_id)

        img.attrs["src"] = str(new_folder)
        if img.attrs.get("width"):