How to read image file from S3 bucket directly into memory?
I would suggest using io module to read the file directly in to memory, without having to use a temporary file at all.
For example:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import boto3
import io
s3 = boto3.resource('s3', region_name='us-east-2')
bucket = s3.Bucket('sentinel-s2-l1c')
object = bucket.Object('tiles/10/S/DG/2015/12/7/0/B01.jp2')
file_stream = io.StringIO()
object.download_fileobj(file_stream)
img = mpimg.imread(file_stream)
# whatever you need to do
You could also use io.BytesIO
if your data is binary.
Further development from Greg Merritt's answer to solve all errors in the comment section, using BytesIO
instead of StringIO
, using PIL Image
instead of matplotlib.image
.
The following function works for python3
and boto3
. Similarly, write_image_to_s3
function is a bonus.
from PIL import Image
from io import BytesIO
import numpy as np
def read_image_from_s3(bucket, key, region_name='ap-southeast-1'):
"""Load image file from s3.
Parameters
----------
bucket: string
Bucket name
key : string
Path in s3
Returns
-------
np array
Image array
"""
s3 = boto3.resource('s3', region_name='ap-southeast-1')
bucket = s3.Bucket(bucket)
object = bucket.Object(key)
response = object.get()
file_stream = response['Body']
im = Image.open(file_stream)
return np.array(im)
def write_image_to_s3(img_array, bucket, key, region_name='ap-southeast-1'):
"""Write an image array into S3 bucket
Parameters
----------
bucket: string
Bucket name
key : string
Path in s3
Returns
-------
None
"""
s3 = boto3.resource('s3', region_name)
bucket = s3.Bucket(bucket)
object = bucket.Object(key)
file_stream = BytesIO()
im = Image.fromarray(img_array)
im.save(file_stream, format='jpeg')
object.put(Body=file_stream.getvalue())