BookConverter/src/docx_converter/image_processing.py

import os
import pathlib
from shutil import copyfile


def process_images(body_tag, access, html_path, book_id):
    """
    Function to process <img> tag. Img should be sent Amazon S3 and then return new tag with valid link.
    For now images are moved to one folder.
    """
    img_tags = body_tag.find_all('img')

    if len(img_tags):
        if access is None:
            folder_path = os.path.dirname(
                os.path.dirname(os.path.abspath(__file__)))
            new_path = pathlib.Path(os.path.join(
                folder_path, f'json/img_{book_id}/'))
            new_path.mkdir(exist_ok=True)

        for img in img_tags:
            img_name = img.attrs.get('src')
            # quick fix for bad links
            if (len(img_name) >= 3) and img_name[:3] == '../':
                img_name = img_name[3:]

            img_path = pathlib.Path(f'{html_path.parent}', f'{img_name}')

            if access is not None:
                link = access.send_image(img_path, doc_id=book_id)
                img.attrs['src'] = link
            else:
                img_size = os.path.getsize(img_path)
                new_img_path = new_path / img_name
                copyfile(img_path, new_img_path)
                img.attrs["src"] = str(new_img_path)

    return img_tags