Delete all versions of an object in S3 using python?
You can use object_versions.
def delete_all_versions(bucket_name: str, prefix: str):
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
if prefix is None:
bucket.object_versions.delete()
else:
bucket.object_versions.filter(Prefix=prefix).delete()
delete_all_versions("my_bucket", None) # empties the entire bucket
delete_all_versions("my_bucket", "my_prefix/") # deletes all objects matching the prefix (can be only one if only one matches)
The other answers delete objects individually. It is more efficient to use the delete_objects boto3 call and batch process your delete. See the code below for a function which collects all objects and deletes in batches of 1000:
bucket = 'bucket-name'
s3_client = boto3.client('s3')
object_response_paginator = s3_client.get_paginator('list_object_versions')
delete_marker_list = []
version_list = []
for object_response_itr in object_response_paginator.paginate(Bucket=bucket):
if 'DeleteMarkers' in object_response_itr:
for delete_marker in object_response_itr['DeleteMarkers']:
delete_marker_list.append({'Key': delete_marker['Key'], 'VersionId': delete_marker['VersionId']})
if 'Versions' in object_response_itr:
for version in object_response_itr['Versions']:
version_list.append({'Key': version['Key'], 'VersionId': version['VersionId']})
for i in range(0, len(delete_marker_list), 1000):
response = s3_client.delete_objects(
Bucket=bucket,
Delete={
'Objects': delete_marker_list[i:i+1000],
'Quiet': True
}
)
print(response)
for i in range(0, len(version_list), 1000):
response = s3_client.delete_objects(
Bucket=bucket,
Delete={
'Objects': version_list[i:i+1000],
'Quiet': True
}
)
print(response)
The documentation is helpful here:
- When versioning is enabled in an S3 bucket, a simple DeleteObject request cannot permanently delete an object from that bucket. Instead, Amazon S3 inserts a delete marker (which is effectively a new version of the object with its own version ID).
- When you try to GET an object whose current version is a delete marker, S3 behaves as if the object has been deleted (even though it has not) and returns a 404 error.
- To permanently delete an object from a versioned bucket, use DeleteObject, with the relevant version ID, for each and every version of the object (and that includes the delete markers).
I had trouble using the other solutions to this question so here's mine.
import boto3
bucket = "bucket name goes here"
filename = "filename goes here"
client = boto3.client('s3')
paginator = client.get_paginator('list_object_versions')
response_iterator = paginator.paginate(Bucket=bucket)
for response in response_iterator:
versions = response.get('Versions', [])
versions.extend(response.get('DeleteMarkers', []))
for version_id in [x['VersionId'] for x in versions
if x['Key'] == filename and x['VersionId'] != 'null']:
print('Deleting {} version {}'.format(filename, version_id))
client.delete_object(Bucket=bucket, Key=filename, VersionId=version_id)
This code deals with the cases where
- object versioning isn't actually turned on
- there are DeleteMarkers
- there are no DeleteMarkers
- there are more versions of a given file than fit in a single API response
Mahesh Mogal's answer doesn't delete DeleteMarker
s. Mangohero1's answer fails if the object is missing a DeleteMarker
. Hari's answer repeats 10 times (to workaround missing pagination logic).