From 19c2308c58c814386282e447b1269045aa897fd4 Mon Sep 17 00:00:00 2001 From: Kiryl Date: Wed, 3 Aug 2022 16:45:18 +0300 Subject: [PATCH] Rewrite the processing of images --- src/docx_converter/image_processing.py | 48 +++++++++++++++++--------- src/epub_converter/image_processing.py | 27 +++++++-------- 2 files changed, 43 insertions(+), 32 deletions(-) diff --git a/src/docx_converter/image_processing.py b/src/docx_converter/image_processing.py index 9c5fdab..7e87824 100644 --- a/src/docx_converter/image_processing.py +++ b/src/docx_converter/image_processing.py @@ -1,9 +1,28 @@ import os import pathlib +from bs4 import Tag from shutil import copyfile -def process_images(access, html_path, book_id, body_tag): +def save_image_to_aws(access, img_file_path: str, book_id: int) -> str: + """Function saves all images to Amazon web service""" + link_path: str = access.send_image(img_file_path, doc_id=book_id) + return link_path + + +def save_image_locally(img_file_path: str, book_id: int) -> pathlib.Path: + """Function saves all images locally""" + folder_path = os.path.dirname( + os.path.dirname(os.path.abspath(__file__))) + new_path = pathlib.Path(os.path.join( + folder_path, f"../books/json/img_{book_id}/")) + new_path.mkdir(exist_ok=True) + img_folder_path = new_path / os.path.basename(img_file_path) + copyfile(img_file_path, img_folder_path) + return img_folder_path + + +def process_images(access, path_to_html: str, book_id: int, body_tag: Tag): """ Function to process tag. Img should be sent Amazon S3 and then return new tag with valid link. @@ -12,23 +31,18 @@ def process_images(access, html_path, book_id, body_tag): """ img_tags = body_tag.find_all("img") for img in img_tags: - img_name = img.attrs.get("src") + path_to_img_from_html = img.attrs.get("src") # quick fix for bad links - if (len(img_name) >= 3) and img_name[:3] == "../": - img_name = img_name[3:] - img_path = pathlib.Path(f"{html_path.parent}", f"{img_name}") - + if (len(path_to_img_from_html) >= 3) and path_to_img_from_html [:3] == "../": + path_to_img_from_html = path_to_img_from_html [3:] + html_folder = os.path.dirname(path_to_html) + path_to_img_from_root = os.path.normpath(os.path.join( + html_folder, path_to_img_from_html)).replace("\\", "/") if access is not None: - link = access.send_image(img_path, doc_id=book_id) - img.attrs["src"] = link + img_folder_path = save_image_to_aws( + access, path_to_img_from_root, book_id) else: - if img_tags.index(img) == 0: - folder_path = os.path.dirname( - os.path.dirname(os.path.abspath(__file__))) - new_path = pathlib.Path(os.path.join( - folder_path, f"../books/json/img_{book_id}/")) - new_path.mkdir(exist_ok=True) - new_img_path = new_path / img_name - copyfile(img_path, new_img_path) - img.attrs["src"] = str(new_img_path) + img_folder_path = save_image_locally( + path_to_img_from_root, book_id) + img.attrs["src"] = str(img_folder_path) return img_tags diff --git a/src/epub_converter/image_processing.py b/src/epub_converter/image_processing.py index b0238ac..da4e8a7 100644 --- a/src/epub_converter/image_processing.py +++ b/src/epub_converter/image_processing.py @@ -13,18 +13,18 @@ def save_image_to_aws(access: Access, img_file_path: str, img_content: bytes, bo return link_path -def save_image_locally(img_file_path: str, img_content: bytes, book_id: str): +def save_image_locally(img_file_path: str, img_content: bytes, book_id: str) -> pathlib.Path: """Function saves all images locally""" folder_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) new_path = pathlib.Path(os.path.join( folder_path, f"../books/json/img_{book_id}/")) new_path.mkdir(exist_ok=True) - new_img_path = new_path / os.path.basename(img_file_path) - f = open(new_img_path, "wb+") + img_folder_path = new_path / os.path.basename(img_file_path) + f = open(img_folder_path, "wb+") f.write(img_content) f.close() - return new_img_path + return img_folder_path def update_images_src_links(body_tag: BeautifulSoup, @@ -47,20 +47,17 @@ def update_images_src_links(body_tag: BeautifulSoup, img_content: bytes = img_href2img_content[path_to_img_from_root] if access is not None: if path_to_img_from_root in path2aws_path: - new_folder = path2aws_path[path_to_img_from_root] + img_folder_path = path2aws_path[path_to_img_from_root] else: - new_folder = save_image_to_aws( + img_folder_path = save_image_to_aws( access, path_to_img_from_root, img_content, book_id) - path2aws_path[path_to_img_from_root] = new_folder + path2aws_path[path_to_img_from_root] = img_folder_path else: - new_folder = save_image_locally( + img_folder_path = save_image_locally( path_to_img_from_root, img_content, book_id) - img.attrs["src"] = str(new_folder) - if img.attrs.get("width"): - del img.attrs["width"] - if img.attrs.get("height"): - del img.attrs["height"] - if img.attrs.get("style"): - del img.attrs["style"] + img.attrs["src"] = str(img_folder_path) + for attr in ["width", "height", "style"]: + if img.attrs.get(attr): + del img.attrs[attr] return path2aws_path