Add concrete book_id for imgs on Local

2022-07-14 12:39:48 +03:00
parent a5f7a9b36c
commit e1f06ba884
2 changed files with 11 additions and 14 deletions
--- a/src/epub_converter/epub_converter.py
+++ b/src/epub_converter/epub_converter.py
@@ -1,15 +1,15 @@
 import re
 import json
 import codecs
-import os
+import ebooklib
 from ebooklib import epub
 from ebooklib.epub import Link, Section
 from os import path
 from pathlib import Path
 from itertools import chain
 from premailer import transform
 from collections import defaultdict
 from typing import Dict, Union, List
 import ebooklib
 from ebooklib import epub
 from ebooklib.epub import Link, Section
 from bs4 import BeautifulSoup, NavigableString, Tag
 from src.util.helpers import BookLogger
@@ -370,8 +370,8 @@ class EpubConverter:
            prepared content
        """
-        dir_name = os.path.dirname(cur_file_path)
+        dir_name = path.dirname(cur_file_path)
-        normed_path = os.path.normpath(os.path.join(
+        normed_path = path.normpath(path.join(
            dir_name, href_in_link)).replace("\\", "/")
        full_path = [
            path for path in self.hrefs_added_to_toc if normed_path in path]
@@ -446,7 +446,7 @@ class EpubConverter:
                    a_tag_href, a_tag_id = internal_link_tag.attrs["href"].split("#")
                    a_tag_href_matched_to_toc = self.match_href_to_path_from_toc(
                        toc_href, a_tag_href, internal_link_tag) if a_tag_href \
-                        else os.path.normpath(toc_href).replace("\\", "/")
+                        else path.normpath(toc_href).replace("\\", "/")
                    if a_tag_href_matched_to_toc:
                        new_id = self.create_unique_id(
                            a_tag_href_matched_to_toc, a_tag_id)
@@ -594,8 +594,7 @@ class EpubConverter:
                                                                    path_to_html=nav_point.href,
                                                                    access=self.access,
                                                                    path2aws_path=self.book_image_src_path2aws_path,
-                                                                    book_id=self.file_path.stem
+                                                                    book_id=Path(self.file_path).stem)
                                                                    if hasattr(self.file_path, "stem") else "book_id")
        indent = " " * lvl
        self.logger.log(indent + f"Chapter: {title} is processing.")
@@ -635,7 +634,7 @@ class EpubConverter:
 if __name__ == "__main__":
-    epub_file_path = "../../epub/9781641050234.epub"
+    epub_file_path = "../../epub/9781614382264.epub"
    logger_object = BookLogger(
        name="epub", book_id=epub_file_path.split("/")[-1])
--- a/src/epub_converter/image_processing.py
+++ b/src/epub_converter/image_processing.py
@@ -1,6 +1,5 @@
 import os
 import pathlib
 from bs4 import BeautifulSoup
 from src.access import Access
@@ -35,7 +34,6 @@ def update_images_src_links(body_tag: BeautifulSoup,
                            book_id: str = None) -> dict:
    """Function makes dictionary image_src_path -> Amazon web service_path"""
    img_tags = body_tag.find_all("img")
    for img in img_tags:
        path_to_img_from_html = img.attrs.get("src")
        html_folder = os.path.dirname(path_to_html)
@@ -55,7 +53,7 @@ def update_images_src_links(body_tag: BeautifulSoup,
                path2aws_path[path_to_img_from_root] = new_folder
        else:
            new_folder = save_image_locally(
-                path_to_img_from_root, img_content, "book_id")
+                path_to_img_from_root, img_content, book_id)
        img.attrs["src"] = str(new_folder)
        if img.attrs.get("width"):