forked from LiveCarta/BookConverter
67 lines
2.6 KiB
Python
67 lines
2.6 KiB
Python
import os
|
|
import pathlib
|
|
from typing import Dict
|
|
from bs4 import BeautifulSoup
|
|
|
|
from src.access import Access
|
|
|
|
|
|
def save_image_to_aws(access: Access, img_file_path: str, img_content: bytes, book_id: str) -> str:
|
|
"""Function saves all images to Amazon web service"""
|
|
link_path: str = access.send_image(
|
|
img_file_path, doc_id=book_id, img_content=img_content)
|
|
return link_path
|
|
|
|
|
|
def save_image_locally(img_file_path: str, img_content: bytes, book_id: str):
|
|
"""Function saves all images locally"""
|
|
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
new_path = pathlib.Path(os.path.join(
|
|
folder_path, f"../books/json/img_{book_id}/"))
|
|
new_path.mkdir(exist_ok=True)
|
|
|
|
new_img_path = new_path / os.path.basename(img_file_path)
|
|
f = open(new_img_path, "wb+")
|
|
f.write(img_content)
|
|
f.close()
|
|
return new_img_path
|
|
|
|
|
|
def update_images_src_links(body_tag: BeautifulSoup,
|
|
img_href2img_content: Dict[str, bytes],
|
|
path_to_html: str,
|
|
access: Access = None,
|
|
path2aws_path: Dict[str, str] = None,
|
|
book_id: str = None) -> Dict[str, str]:
|
|
"""Function makes dictionary image_src_path -> Amazon web service_path"""
|
|
img_tags = body_tag.find_all("img")
|
|
for img in img_tags:
|
|
path_to_img_from_html = img.attrs.get("src")
|
|
html_folder = os.path.dirname(path_to_html)
|
|
path_to_img_from_root = os.path.normpath(os.path.join(
|
|
html_folder, path_to_img_from_html)).replace("\\", "/")
|
|
|
|
assert path_to_img_from_root in img_href2img_content, \
|
|
f"Image {path_to_img_from_html} in file {path_to_html} was not added to manifest."
|
|
|
|
img_content: bytes = img_href2img_content[path_to_img_from_root]
|
|
if access is not None:
|
|
if path_to_img_from_root in path2aws_path:
|
|
new_folder = path2aws_path[path_to_img_from_root]
|
|
else:
|
|
new_folder = save_image_to_aws(
|
|
access, path_to_img_from_root, img_content, book_id)
|
|
path2aws_path[path_to_img_from_root] = new_folder
|
|
else:
|
|
new_folder = save_image_locally(
|
|
path_to_img_from_root, img_content, book_id)
|
|
|
|
img.attrs["src"] = str(new_folder)
|
|
if img.attrs.get("width"):
|
|
del img.attrs["width"]
|
|
if img.attrs.get("height"):
|
|
del img.attrs["height"]
|
|
if img.attrs.get("style"):
|
|
del img.attrs["style"]
|
|
return path2aws_path
|