import os import pathlib from bs4 import BeautifulSoup from src.access import Access def save_image_to_aws(access: Access, img_file_path: str, img_content: bytes, book_id: str): """Function saves all images to Amazon web service""" link_path = access.send_image( img_file_path, doc_id=book_id, img_content=img_content) return link_path def save_image_locally(img_file_path: str, img_content: bytes, book_id: str): """Function saves all images locally""" folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) new_path = pathlib.Path(os.path.join( folder_path, f"../books/json/img_{book_id}/")) new_path.mkdir(exist_ok=True) new_img_path = new_path / os.path.basename(img_file_path) f = open(new_img_path, "wb+") f.write(img_content) f.close() return new_img_path def update_images_src_links(body_tag: BeautifulSoup, img_href2img_content: dict, path_to_html: str, access=None, path2aws_path: dict = None, book_id: str = None) -> dict: """Function makes dictionary image_src_path -> Amazon web service_path""" img_tags = body_tag.find_all("img") for img in img_tags: path_to_img_from_html = img.attrs.get("src") html_folder = os.path.dirname(path_to_html) path_to_img_from_root = os.path.normpath(os.path.join( html_folder, path_to_img_from_html)).replace("\\", "/") assert path_to_img_from_root in img_href2img_content, \ f"Image {path_to_img_from_html} in file {path_to_html} was not added to manifest." img_content = img_href2img_content[path_to_img_from_root] if access is not None: if path_to_img_from_root in path2aws_path: new_folder = path2aws_path[path_to_img_from_root] else: new_folder = save_image_to_aws( access, path_to_img_from_root, img_content, book_id) path2aws_path[path_to_img_from_root] = new_folder else: new_folder = save_image_locally( path_to_img_from_root, img_content, book_id) img.attrs["src"] = str(new_folder) if img.attrs.get("width"): del img.attrs["width"] if img.attrs.get("height"): del img.attrs["height"] if img.attrs.get("style"): del img.attrs["style"] return path2aws_path