From 7b35d8a7c2f63e07f3873d05e351f2b51b67020e Mon Sep 17 00:00:00 2001 From: Kiryl Date: Thu, 14 Jul 2022 19:12:22 +0300 Subject: [PATCH] Optimize speed of image_processing.py --- src/docx_converter/image_processing.py | 52 ++++++++++++-------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/src/docx_converter/image_processing.py b/src/docx_converter/image_processing.py index e593312..dfd413b 100644 --- a/src/docx_converter/image_processing.py +++ b/src/docx_converter/image_processing.py @@ -3,36 +3,32 @@ import pathlib from shutil import copyfile -def process_images(body_tag, access, html_path, book_id): +def process_images(access, html_path, book_id, body_tag): """ - Function to process tag. Img should be sent Amazon S3 and then return new tag with valid link. + Function to process tag. + Img should be sent Amazon S3 and then return new tag with valid link. For now images are moved to one folder. + """ img_tags = body_tag.find_all('img') + for img in img_tags: + img_name = img.attrs.get('src') + # quick fix for bad links + if (len(img_name) >= 3) and img_name[:3] == '../': + img_name = img_name[3:] + img_path = pathlib.Path(f'{html_path.parent}', f'{img_name}') - if len(img_tags): - if access is None: - folder_path = os.path.dirname( - os.path.dirname(os.path.abspath(__file__))) - new_path = pathlib.Path(os.path.join( - folder_path, f'json/img_{book_id}/')) - new_path.mkdir(exist_ok=True) - - for img in img_tags: - img_name = img.attrs.get('src') - # quick fix for bad links - if (len(img_name) >= 3) and img_name[:3] == '../': - img_name = img_name[3:] - - img_path = pathlib.Path(f'{html_path.parent}', f'{img_name}') - - if access is not None: - link = access.send_image(img_path, doc_id=book_id) - img.attrs['src'] = link - else: - img_size = os.path.getsize(img_path) - new_img_path = new_path / img_name - copyfile(img_path, new_img_path) - img.attrs["src"] = str(new_img_path) - - return img_tags \ No newline at end of file + if access is not None: + link = access.send_image(img_path, doc_id=book_id) + img.attrs['src'] = link + else: + if img_tags.index(img) == 0: + folder_path = os.path.dirname( + os.path.dirname(os.path.abspath(__file__))) + new_path = pathlib.Path(os.path.join( + folder_path, f'../json/img_{book_id}/')) + new_path.mkdir(exist_ok=True) + new_img_path = new_path / img_name + copyfile(img_path, new_img_path) + img.attrs["src"] = str(new_img_path) + return img_tags