forked from LiveCarta/BookConverter
Rewrite the processing of images
This commit is contained in:
@@ -1,9 +1,28 @@
|
|||||||
import os
|
import os
|
||||||
import pathlib
|
import pathlib
|
||||||
|
from bs4 import Tag
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
|
|
||||||
|
|
||||||
def process_images(access, html_path, book_id, body_tag):
|
def save_image_to_aws(access, img_file_path: str, book_id: int) -> str:
|
||||||
|
"""Function saves all images to Amazon web service"""
|
||||||
|
link_path: str = access.send_image(img_file_path, doc_id=book_id)
|
||||||
|
return link_path
|
||||||
|
|
||||||
|
|
||||||
|
def save_image_locally(img_file_path: str, book_id: int) -> pathlib.Path:
|
||||||
|
"""Function saves all images locally"""
|
||||||
|
folder_path = os.path.dirname(
|
||||||
|
os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
new_path = pathlib.Path(os.path.join(
|
||||||
|
folder_path, f"../books/json/img_{book_id}/"))
|
||||||
|
new_path.mkdir(exist_ok=True)
|
||||||
|
img_folder_path = new_path / os.path.basename(img_file_path)
|
||||||
|
copyfile(img_file_path, img_folder_path)
|
||||||
|
return img_folder_path
|
||||||
|
|
||||||
|
|
||||||
|
def process_images(access, path_to_html: str, book_id: int, body_tag: Tag):
|
||||||
"""
|
"""
|
||||||
Function to process <img> tag.
|
Function to process <img> tag.
|
||||||
Img should be sent Amazon S3 and then return new tag with valid link.
|
Img should be sent Amazon S3 and then return new tag with valid link.
|
||||||
@@ -12,23 +31,18 @@ def process_images(access, html_path, book_id, body_tag):
|
|||||||
"""
|
"""
|
||||||
img_tags = body_tag.find_all("img")
|
img_tags = body_tag.find_all("img")
|
||||||
for img in img_tags:
|
for img in img_tags:
|
||||||
img_name = img.attrs.get("src")
|
path_to_img_from_html = img.attrs.get("src")
|
||||||
# quick fix for bad links
|
# quick fix for bad links
|
||||||
if (len(img_name) >= 3) and img_name[:3] == "../":
|
if (len(path_to_img_from_html) >= 3) and path_to_img_from_html [:3] == "../":
|
||||||
img_name = img_name[3:]
|
path_to_img_from_html = path_to_img_from_html [3:]
|
||||||
img_path = pathlib.Path(f"{html_path.parent}", f"{img_name}")
|
html_folder = os.path.dirname(path_to_html)
|
||||||
|
path_to_img_from_root = os.path.normpath(os.path.join(
|
||||||
|
html_folder, path_to_img_from_html)).replace("\\", "/")
|
||||||
if access is not None:
|
if access is not None:
|
||||||
link = access.send_image(img_path, doc_id=book_id)
|
img_folder_path = save_image_to_aws(
|
||||||
img.attrs["src"] = link
|
access, path_to_img_from_root, book_id)
|
||||||
else:
|
else:
|
||||||
if img_tags.index(img) == 0:
|
img_folder_path = save_image_locally(
|
||||||
folder_path = os.path.dirname(
|
path_to_img_from_root, book_id)
|
||||||
os.path.dirname(os.path.abspath(__file__)))
|
img.attrs["src"] = str(img_folder_path)
|
||||||
new_path = pathlib.Path(os.path.join(
|
|
||||||
folder_path, f"../books/json/img_{book_id}/"))
|
|
||||||
new_path.mkdir(exist_ok=True)
|
|
||||||
new_img_path = new_path / img_name
|
|
||||||
copyfile(img_path, new_img_path)
|
|
||||||
img.attrs["src"] = str(new_img_path)
|
|
||||||
return img_tags
|
return img_tags
|
||||||
|
|||||||
@@ -13,18 +13,18 @@ def save_image_to_aws(access: Access, img_file_path: str, img_content: bytes, bo
|
|||||||
return link_path
|
return link_path
|
||||||
|
|
||||||
|
|
||||||
def save_image_locally(img_file_path: str, img_content: bytes, book_id: str):
|
def save_image_locally(img_file_path: str, img_content: bytes, book_id: str) -> pathlib.Path:
|
||||||
"""Function saves all images locally"""
|
"""Function saves all images locally"""
|
||||||
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
||||||
new_path = pathlib.Path(os.path.join(
|
new_path = pathlib.Path(os.path.join(
|
||||||
folder_path, f"../books/json/img_{book_id}/"))
|
folder_path, f"../books/json/img_{book_id}/"))
|
||||||
new_path.mkdir(exist_ok=True)
|
new_path.mkdir(exist_ok=True)
|
||||||
|
|
||||||
new_img_path = new_path / os.path.basename(img_file_path)
|
img_folder_path = new_path / os.path.basename(img_file_path)
|
||||||
f = open(new_img_path, "wb+")
|
f = open(img_folder_path, "wb+")
|
||||||
f.write(img_content)
|
f.write(img_content)
|
||||||
f.close()
|
f.close()
|
||||||
return new_img_path
|
return img_folder_path
|
||||||
|
|
||||||
|
|
||||||
def update_images_src_links(body_tag: BeautifulSoup,
|
def update_images_src_links(body_tag: BeautifulSoup,
|
||||||
@@ -47,20 +47,17 @@ def update_images_src_links(body_tag: BeautifulSoup,
|
|||||||
img_content: bytes = img_href2img_content[path_to_img_from_root]
|
img_content: bytes = img_href2img_content[path_to_img_from_root]
|
||||||
if access is not None:
|
if access is not None:
|
||||||
if path_to_img_from_root in path2aws_path:
|
if path_to_img_from_root in path2aws_path:
|
||||||
new_folder = path2aws_path[path_to_img_from_root]
|
img_folder_path = path2aws_path[path_to_img_from_root]
|
||||||
else:
|
else:
|
||||||
new_folder = save_image_to_aws(
|
img_folder_path = save_image_to_aws(
|
||||||
access, path_to_img_from_root, img_content, book_id)
|
access, path_to_img_from_root, img_content, book_id)
|
||||||
path2aws_path[path_to_img_from_root] = new_folder
|
path2aws_path[path_to_img_from_root] = img_folder_path
|
||||||
else:
|
else:
|
||||||
new_folder = save_image_locally(
|
img_folder_path = save_image_locally(
|
||||||
path_to_img_from_root, img_content, book_id)
|
path_to_img_from_root, img_content, book_id)
|
||||||
|
|
||||||
img.attrs["src"] = str(new_folder)
|
img.attrs["src"] = str(img_folder_path)
|
||||||
if img.attrs.get("width"):
|
for attr in ["width", "height", "style"]:
|
||||||
del img.attrs["width"]
|
if img.attrs.get(attr):
|
||||||
if img.attrs.get("height"):
|
del img.attrs[attr]
|
||||||
del img.attrs["height"]
|
|
||||||
if img.attrs.get("style"):
|
|
||||||
del img.attrs["style"]
|
|
||||||
return path2aws_path
|
return path2aws_path
|
||||||
|
|||||||
Reference in New Issue
Block a user