Optimize speed of image_processing.py

This commit is contained in:
Kiryl
2022-07-14 19:12:22 +03:00
parent ca229dc6b7
commit 7b35d8a7c2

View File

@@ -3,36 +3,32 @@ import pathlib
from shutil import copyfile from shutil import copyfile
def process_images(body_tag, access, html_path, book_id): def process_images(access, html_path, book_id, body_tag):
""" """
Function to process <img> tag. Img should be sent Amazon S3 and then return new tag with valid link. Function to process <img> tag.
Img should be sent Amazon S3 and then return new tag with valid link.
For now images are moved to one folder. For now images are moved to one folder.
""" """
img_tags = body_tag.find_all('img') img_tags = body_tag.find_all('img')
for img in img_tags:
img_name = img.attrs.get('src')
# quick fix for bad links
if (len(img_name) >= 3) and img_name[:3] == '../':
img_name = img_name[3:]
img_path = pathlib.Path(f'{html_path.parent}', f'{img_name}')
if len(img_tags): if access is not None:
if access is None: link = access.send_image(img_path, doc_id=book_id)
folder_path = os.path.dirname( img.attrs['src'] = link
os.path.dirname(os.path.abspath(__file__))) else:
new_path = pathlib.Path(os.path.join( if img_tags.index(img) == 0:
folder_path, f'json/img_{book_id}/')) folder_path = os.path.dirname(
new_path.mkdir(exist_ok=True) os.path.dirname(os.path.abspath(__file__)))
new_path = pathlib.Path(os.path.join(
for img in img_tags: folder_path, f'../json/img_{book_id}/'))
img_name = img.attrs.get('src') new_path.mkdir(exist_ok=True)
# quick fix for bad links new_img_path = new_path / img_name
if (len(img_name) >= 3) and img_name[:3] == '../': copyfile(img_path, new_img_path)
img_name = img_name[3:] img.attrs["src"] = str(new_img_path)
img_path = pathlib.Path(f'{html_path.parent}', f'{img_name}')
if access is not None:
link = access.send_image(img_path, doc_id=book_id)
img.attrs['src'] = link
else:
img_size = os.path.getsize(img_path)
new_img_path = new_path / img_name
copyfile(img_path, new_img_path)
img.attrs["src"] = str(new_img_path)
return img_tags return img_tags