Optimize speed of image_processing.py

This commit is contained in:
Kiryl
2022-07-14 19:12:22 +03:00
parent ca229dc6b7
commit 7b35d8a7c2

View File

@@ -3,36 +3,32 @@ import pathlib
from shutil import copyfile from shutil import copyfile
def process_images(body_tag, access, html_path, book_id): def process_images(access, html_path, book_id, body_tag):
""" """
Function to process <img> tag. Img should be sent Amazon S3 and then return new tag with valid link. Function to process <img> tag.
Img should be sent Amazon S3 and then return new tag with valid link.
For now images are moved to one folder. For now images are moved to one folder.
""" """
img_tags = body_tag.find_all('img') img_tags = body_tag.find_all('img')
if len(img_tags):
if access is None:
folder_path = os.path.dirname(
os.path.dirname(os.path.abspath(__file__)))
new_path = pathlib.Path(os.path.join(
folder_path, f'json/img_{book_id}/'))
new_path.mkdir(exist_ok=True)
for img in img_tags: for img in img_tags:
img_name = img.attrs.get('src') img_name = img.attrs.get('src')
# quick fix for bad links # quick fix for bad links
if (len(img_name) >= 3) and img_name[:3] == '../': if (len(img_name) >= 3) and img_name[:3] == '../':
img_name = img_name[3:] img_name = img_name[3:]
img_path = pathlib.Path(f'{html_path.parent}', f'{img_name}') img_path = pathlib.Path(f'{html_path.parent}', f'{img_name}')
if access is not None: if access is not None:
link = access.send_image(img_path, doc_id=book_id) link = access.send_image(img_path, doc_id=book_id)
img.attrs['src'] = link img.attrs['src'] = link
else: else:
img_size = os.path.getsize(img_path) if img_tags.index(img) == 0:
folder_path = os.path.dirname(
os.path.dirname(os.path.abspath(__file__)))
new_path = pathlib.Path(os.path.join(
folder_path, f'../json/img_{book_id}/'))
new_path.mkdir(exist_ok=True)
new_img_path = new_path / img_name new_img_path = new_path / img_name
copyfile(img_path, new_img_path) copyfile(img_path, new_img_path)
img.attrs["src"] = str(new_img_path) img.attrs["src"] = str(new_img_path)
return img_tags return img_tags