forked from LiveCarta/BookConverter
Optimize speed of image_processing.py
This commit is contained in:
@@ -3,36 +3,32 @@ import pathlib
|
|||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
|
|
||||||
|
|
||||||
def process_images(body_tag, access, html_path, book_id):
|
def process_images(access, html_path, book_id, body_tag):
|
||||||
"""
|
"""
|
||||||
Function to process <img> tag. Img should be sent Amazon S3 and then return new tag with valid link.
|
Function to process <img> tag.
|
||||||
|
Img should be sent Amazon S3 and then return new tag with valid link.
|
||||||
For now images are moved to one folder.
|
For now images are moved to one folder.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
img_tags = body_tag.find_all('img')
|
img_tags = body_tag.find_all('img')
|
||||||
|
|
||||||
if len(img_tags):
|
|
||||||
if access is None:
|
|
||||||
folder_path = os.path.dirname(
|
|
||||||
os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
new_path = pathlib.Path(os.path.join(
|
|
||||||
folder_path, f'json/img_{book_id}/'))
|
|
||||||
new_path.mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
for img in img_tags:
|
for img in img_tags:
|
||||||
img_name = img.attrs.get('src')
|
img_name = img.attrs.get('src')
|
||||||
# quick fix for bad links
|
# quick fix for bad links
|
||||||
if (len(img_name) >= 3) and img_name[:3] == '../':
|
if (len(img_name) >= 3) and img_name[:3] == '../':
|
||||||
img_name = img_name[3:]
|
img_name = img_name[3:]
|
||||||
|
|
||||||
img_path = pathlib.Path(f'{html_path.parent}', f'{img_name}')
|
img_path = pathlib.Path(f'{html_path.parent}', f'{img_name}')
|
||||||
|
|
||||||
if access is not None:
|
if access is not None:
|
||||||
link = access.send_image(img_path, doc_id=book_id)
|
link = access.send_image(img_path, doc_id=book_id)
|
||||||
img.attrs['src'] = link
|
img.attrs['src'] = link
|
||||||
else:
|
else:
|
||||||
img_size = os.path.getsize(img_path)
|
if img_tags.index(img) == 0:
|
||||||
|
folder_path = os.path.dirname(
|
||||||
|
os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
new_path = pathlib.Path(os.path.join(
|
||||||
|
folder_path, f'../json/img_{book_id}/'))
|
||||||
|
new_path.mkdir(exist_ok=True)
|
||||||
new_img_path = new_path / img_name
|
new_img_path = new_path / img_name
|
||||||
copyfile(img_path, new_img_path)
|
copyfile(img_path, new_img_path)
|
||||||
img.attrs["src"] = str(new_img_path)
|
img.attrs["src"] = str(new_img_path)
|
||||||
|
|
||||||
return img_tags
|
return img_tags
|
||||||
Reference in New Issue
Block a user