Skip to content

Commit

Permalink
do not delete objects that are already deleted when restoring bucket
Browse files Browse the repository at this point in the history
When a bucket is restored to an earlier state of itself, do not delete objects
that are already deleted in their current state.

Added a test that checks that no new delete marker is created when the object
was not changed.
  • Loading branch information
okummer authored and angeloc committed Apr 20, 2023
1 parent 358bf47 commit a7ef7f0
Showing 1 changed file with 39 additions and 1 deletion.
40 changes: 39 additions & 1 deletion s3-pit-restore
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,33 @@ class TestS3PitRestoreSameBucket(unittest.TestCase):
self.assertEqual(1, len(result['Versions']))
self.assertEqual(0, len(result.get("DeleteMarkers", [])))

def test_no_op_delete(self):
print('Running test_no_op_delete ...')
test_content = str(uuid.uuid4())
test_key = f'test_no_op_delete/{str(uuid.uuid4())}'

s3 = boto3.resource('s3', endpoint_url=args.endpoint_url)
self.check_versioning(s3)

print("Preparing ...")
object = s3.Object(args.bucket, test_key)
object.put(Body=test_content)
time.sleep(1)
object.delete()
time.sleep(1)

args.prefix = test_key
args.timestamp = None
args.from_timestamp = None

print("Restoring ...")
do_restore()

print("Checking ...")
result = s3.meta.client.list_object_versions(Bucket=args.bucket, Prefix=test_key)
self.assertEqual(1, len(result['Versions']))
self.assertEqual(1, len(result.get("DeleteMarkers", [])))


def signal_handler(signal, frame):
executor.shutdown(wait=False)
Expand Down Expand Up @@ -360,7 +387,7 @@ def do_restore():
deletemarkers = previous_deletemarkers + page.get("DeleteMarkers", [])
# And since they have been added, we remove them from the overflow list
previous_deletemarkers = []
dmarker = {"Key":""}
dmarker = {"Key": "", "IsLatest": False}
for obj in versions:
if last_obj["Key"] == obj["Key"]:
# We've had a newer version or a delete of this key
Expand All @@ -381,6 +408,9 @@ def do_restore():
# (both versions and deletemarkers list are sorted in alphabetical order of the key, and then in reverse time order for each key)
while deletemarkers and (dmarker["Key"] < obj["Key"] or (dmarker["Key"] == obj["Key"] and dmarker["LastModified"] > pit_end_date)):
dmarker = deletemarkers.pop(0)
if dmarker['IsLatest']:
# The given object is already deleted and does not have to be deleted again.
obj_needs_be_deleted.pop(dmarker["Key"], None)

#skip dmarker if it's latest than pit_end_date
if dmarker["Key"] == obj["Key"] and dmarker["LastModified"] > obj["LastModified"] and dmarker["LastModified"] <= pit_end_date:
Expand Down Expand Up @@ -425,6 +455,14 @@ def do_restore():
except Exception as ex:
print('"%s" %s %s %s %s "ERROR: %s"' % (obj["LastModified"], obj["VersionId"], obj["Size"], obj["StorageClass"], obj["Key"], ex), file=sys.stderr)
del(futures[future])

# Process leftover delete markers.
while previous_deletemarkers:
dmarker = previous_deletemarkers.pop(0)
if dmarker['IsLatest']:
# The given object is already deleted and does not have to be deleted again.
obj_needs_be_deleted.pop(dmarker["Key"], None)

# delete objects which came in existence after pit_end_date only if the destination bucket is same as source bucket and restoring to same object key
if args.dest_bucket == args.bucket and not args.dest_prefix:
for key in obj_needs_be_deleted:
Expand Down

0 comments on commit a7ef7f0

Please sign in to comment.