Add concrete book_id for imgs on Local

This commit is contained in:
Kiryl
2022-07-14 12:39:48 +03:00
parent a5f7a9b36c
commit e1f06ba884
2 changed files with 11 additions and 14 deletions

View File

@@ -1,15 +1,15 @@
import re import re
import json import json
import codecs import codecs
import os import ebooklib
from ebooklib import epub
from ebooklib.epub import Link, Section
from os import path
from pathlib import Path
from itertools import chain from itertools import chain
from premailer import transform from premailer import transform
from collections import defaultdict from collections import defaultdict
from typing import Dict, Union, List from typing import Dict, Union, List
import ebooklib
from ebooklib import epub
from ebooklib.epub import Link, Section
from bs4 import BeautifulSoup, NavigableString, Tag from bs4 import BeautifulSoup, NavigableString, Tag
from src.util.helpers import BookLogger from src.util.helpers import BookLogger
@@ -370,8 +370,8 @@ class EpubConverter:
prepared content prepared content
""" """
dir_name = os.path.dirname(cur_file_path) dir_name = path.dirname(cur_file_path)
normed_path = os.path.normpath(os.path.join( normed_path = path.normpath(path.join(
dir_name, href_in_link)).replace("\\", "/") dir_name, href_in_link)).replace("\\", "/")
full_path = [ full_path = [
path for path in self.hrefs_added_to_toc if normed_path in path] path for path in self.hrefs_added_to_toc if normed_path in path]
@@ -446,7 +446,7 @@ class EpubConverter:
a_tag_href, a_tag_id = internal_link_tag.attrs["href"].split("#") a_tag_href, a_tag_id = internal_link_tag.attrs["href"].split("#")
a_tag_href_matched_to_toc = self.match_href_to_path_from_toc( a_tag_href_matched_to_toc = self.match_href_to_path_from_toc(
toc_href, a_tag_href, internal_link_tag) if a_tag_href \ toc_href, a_tag_href, internal_link_tag) if a_tag_href \
else os.path.normpath(toc_href).replace("\\", "/") else path.normpath(toc_href).replace("\\", "/")
if a_tag_href_matched_to_toc: if a_tag_href_matched_to_toc:
new_id = self.create_unique_id( new_id = self.create_unique_id(
a_tag_href_matched_to_toc, a_tag_id) a_tag_href_matched_to_toc, a_tag_id)
@@ -594,8 +594,7 @@ class EpubConverter:
path_to_html=nav_point.href, path_to_html=nav_point.href,
access=self.access, access=self.access,
path2aws_path=self.book_image_src_path2aws_path, path2aws_path=self.book_image_src_path2aws_path,
book_id=self.file_path.stem book_id=Path(self.file_path).stem)
if hasattr(self.file_path, "stem") else "book_id")
indent = " " * lvl indent = " " * lvl
self.logger.log(indent + f"Chapter: {title} is processing.") self.logger.log(indent + f"Chapter: {title} is processing.")
@@ -635,7 +634,7 @@ class EpubConverter:
if __name__ == "__main__": if __name__ == "__main__":
epub_file_path = "../../epub/9781641050234.epub" epub_file_path = "../../epub/9781614382264.epub"
logger_object = BookLogger( logger_object = BookLogger(
name="epub", book_id=epub_file_path.split("/")[-1]) name="epub", book_id=epub_file_path.split("/")[-1])

View File

@@ -1,6 +1,5 @@
import os import os
import pathlib import pathlib
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from src.access import Access from src.access import Access
@@ -35,7 +34,6 @@ def update_images_src_links(body_tag: BeautifulSoup,
book_id: str = None) -> dict: book_id: str = None) -> dict:
"""Function makes dictionary image_src_path -> Amazon web service_path""" """Function makes dictionary image_src_path -> Amazon web service_path"""
img_tags = body_tag.find_all("img") img_tags = body_tag.find_all("img")
for img in img_tags: for img in img_tags:
path_to_img_from_html = img.attrs.get("src") path_to_img_from_html = img.attrs.get("src")
html_folder = os.path.dirname(path_to_html) html_folder = os.path.dirname(path_to_html)
@@ -55,7 +53,7 @@ def update_images_src_links(body_tag: BeautifulSoup,
path2aws_path[path_to_img_from_root] = new_folder path2aws_path[path_to_img_from_root] = new_folder
else: else:
new_folder = save_image_locally( new_folder = save_image_locally(
path_to_img_from_root, img_content, "book_id") path_to_img_from_root, img_content, book_id)
img.attrs["src"] = str(new_folder) img.attrs["src"] = str(new_folder)
if img.attrs.get("width"): if img.attrs.get("width"):