From 5a890aec0f420ba10e1777b53bf56c6d6bf949ea Mon Sep 17 00:00:00 2001 From: Romil Date: Tue, 16 Aug 2022 22:55:31 -0700 Subject: [PATCH 1/9] fix public bucket detection --- sky/data/storage.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/sky/data/storage.py b/sky/data/storage.py index b3e7c64aaac..1dff096fe53 100644 --- a/sky/data/storage.py +++ b/sky/data/storage.py @@ -854,21 +854,18 @@ def _get_bucket(self) -> Tuple[StorageHandle, bool]: # This line does not error out if the bucket is an external public # bucket or if it is a user's bucket that is publicly # accessible. - self.client.get_public_access_block(Bucket=self.name) + self.client.head_bucket(Bucket=self.name) return bucket, False except aws.client_exception() as e: error_code = e.response['Error']['Code'] # AccessDenied error for buckets that are private and not owned by # user. - if error_code == 'AccessDenied': + if error_code == '403': command = f'aws s3 ls {self.name}' with ux_utils.print_exception_no_traceback(): raise exceptions.StorageBucketGetError( _BUCKET_FAIL_TO_CONNECT_MESSAGE.format( name=self.name, command=command)) from e - # Try private bucket case. - if data_utils.verify_s3_bucket(self.name): - return bucket, False if self.source is not None and self.source.startswith('s3://'): with ux_utils.print_exception_no_traceback(): From fd7b4b1109dff94fb3e5decb34e8b5350f67deb4 Mon Sep 17 00:00:00 2001 From: Romil Date: Tue, 16 Aug 2022 23:33:23 -0700 Subject: [PATCH 2/9] add nonexist bucket test --- tests/test_smoke.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index d868cd567ff..1f27b7dd55d 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -820,7 +820,7 @@ def test_new_bucket_creation_and_deletion(self, tmp_local_storage_obj, @pytest.mark.parametrize( 'tmp_public_storage_obj, store_type', - [('s3://tcga-2-open', storage_lib.StoreType.S3), + [('s3://digitalcorpora', storage_lib.StoreType.S3), ('gs://gcp-public-data-sentinel-2', storage_lib.StoreType.GCS)], indirect=['tmp_public_storage_obj']) def test_public_bucket(self, tmp_public_storage_obj, store_type): @@ -832,6 +832,13 @@ def test_public_bucket(self, tmp_public_storage_obj, store_type): out = subprocess.check_output(['sky', 'storage', 'ls']) assert tmp_public_storage_obj.name not in out.decode('utf-8') + @pytest.mark.parametrize( + 'nonexist_bucket', [f's3://{str(uuid.uuid4())}', f'gs://{str(uuid.uuid4())}']) + def test_nonexistent_bucket(self, nonexist_bucket): + # Attempts to create fetch a stroage with a non-existent source. + with pytest.raises(sky.exceptions.StorageBucketGetError, match='Attempted to connect to a non-existent bucket'): + storage_obj = storage_lib.Storage(source=nonexist_bucket) + @staticmethod def cli_ls_cmd(store_type, bucket_name): if store_type == storage_lib.StoreType.S3: From 1ccf64193e83fc09b6a454e52682ace618707e6c Mon Sep 17 00:00:00 2001 From: Romil Date: Tue, 16 Aug 2022 23:43:13 -0700 Subject: [PATCH 3/9] add private bucket test --- tests/test_smoke.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 1f27b7dd55d..9170a3d49f0 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -839,6 +839,15 @@ def test_nonexistent_bucket(self, nonexist_bucket): with pytest.raises(sky.exceptions.StorageBucketGetError, match='Attempted to connect to a non-existent bucket'): storage_obj = storage_lib.Storage(source=nonexist_bucket) + @pytest.mark.parametrize( + 'nonexist_bucket', [f's3://imagenet', f'gs://imagenet']) + def test_private_bucket(self, nonexist_bucket): + # Attempts to access private buckets not belonging to the user. + # These buckets are known to be private, but may need to be updated if + # they are removed by their owners. + with pytest.raises(sky.exceptions.StorageBucketGetError, match='the bucket name is taken'): + storage_obj = storage_lib.Storage(source=nonexist_bucket) + @staticmethod def cli_ls_cmd(store_type, bucket_name): if store_type == storage_lib.StoreType.S3: From 7182059a8afe0b620ed1425c7c972b093898da6e Mon Sep 17 00:00:00 2001 From: Romil Date: Tue, 16 Aug 2022 23:46:01 -0700 Subject: [PATCH 4/9] lint --- tests/test_smoke.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 9170a3d49f0..e6b260fb423 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -833,19 +833,23 @@ def test_public_bucket(self, tmp_public_storage_obj, store_type): assert tmp_public_storage_obj.name not in out.decode('utf-8') @pytest.mark.parametrize( - 'nonexist_bucket', [f's3://{str(uuid.uuid4())}', f'gs://{str(uuid.uuid4())}']) + 'nonexist_bucket', + [f's3://{str(uuid.uuid4())}', f'gs://{str(uuid.uuid4())}']) def test_nonexistent_bucket(self, nonexist_bucket): # Attempts to create fetch a stroage with a non-existent source. - with pytest.raises(sky.exceptions.StorageBucketGetError, match='Attempted to connect to a non-existent bucket'): + with pytest.raises( + sky.exceptions.StorageBucketGetError, + match='Attempted to connect to a non-existent bucket'): storage_obj = storage_lib.Storage(source=nonexist_bucket) - @pytest.mark.parametrize( - 'nonexist_bucket', [f's3://imagenet', f'gs://imagenet']) + @pytest.mark.parametrize('nonexist_bucket', + [f's3://imagenet', f'gs://imagenet']) def test_private_bucket(self, nonexist_bucket): # Attempts to access private buckets not belonging to the user. # These buckets are known to be private, but may need to be updated if # they are removed by their owners. - with pytest.raises(sky.exceptions.StorageBucketGetError, match='the bucket name is taken'): + with pytest.raises(sky.exceptions.StorageBucketGetError, + match='the bucket name is taken'): storage_obj = storage_lib.Storage(source=nonexist_bucket) @staticmethod From 9127719a31760b2e68c624c6b6a4fb4864f9bf7f Mon Sep 17 00:00:00 2001 From: Romil Date: Wed, 17 Aug 2022 08:08:20 -0700 Subject: [PATCH 5/9] arg naming --- tests/test_smoke.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index e6b260fb423..dde9b9a38aa 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -842,15 +842,15 @@ def test_nonexistent_bucket(self, nonexist_bucket): match='Attempted to connect to a non-existent bucket'): storage_obj = storage_lib.Storage(source=nonexist_bucket) - @pytest.mark.parametrize('nonexist_bucket', + @pytest.mark.parametrize('private_bucket', [f's3://imagenet', f'gs://imagenet']) - def test_private_bucket(self, nonexist_bucket): + def test_private_bucket(self, private_bucket): # Attempts to access private buckets not belonging to the user. # These buckets are known to be private, but may need to be updated if # they are removed by their owners. with pytest.raises(sky.exceptions.StorageBucketGetError, match='the bucket name is taken'): - storage_obj = storage_lib.Storage(source=nonexist_bucket) + storage_obj = storage_lib.Storage(source=private_bucket) @staticmethod def cli_ls_cmd(store_type, bucket_name): From bd35b71853ee269439ee6fbc9b2b085b5a77dda9 Mon Sep 17 00:00:00 2001 From: Romil Date: Thu, 18 Aug 2022 12:09:41 -0700 Subject: [PATCH 6/9] Add check for non existent bucket in tests --- tests/test_smoke.py | 40 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index dde9b9a38aa..bd300f4cd08 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -833,14 +833,46 @@ def test_public_bucket(self, tmp_public_storage_obj, store_type): assert tmp_public_storage_obj.name not in out.decode('utf-8') @pytest.mark.parametrize( - 'nonexist_bucket', - [f's3://{str(uuid.uuid4())}', f'gs://{str(uuid.uuid4())}']) - def test_nonexistent_bucket(self, nonexist_bucket): + 'nonexist_bucket_url', + ['s3://{random_name}', 'gs://{random_name}']) + def test_nonexistent_bucket(self, nonexist_bucket_url): # Attempts to create fetch a stroage with a non-existent source. + # Generate a random bucket name and verify it doesn't exist: + retry_count = 0 + while True: + nonexist_bucket_name = str(uuid.uuid4()) + if nonexist_bucket_url.startswith('s3'): + command = ['aws', 's3api', 'head-bucket', '--bucket', + nonexist_bucket_name] + expected_output = '404' + elif nonexist_bucket_url.startswith('gs'): + command = ['gsutil', 'ls', nonexist_bucket_url.format( + random_name=nonexist_bucket_name)] + expected_output = 'BucketNotFoundException' + else: + raise ValueError('Unsupported bucket type ' + f'{nonexist_bucket_url}') + + # Check if bucket exists using the cli: + try: + out = subprocess.check_output(command, stderr=subprocess.STDOUT) + except subprocess.CalledProcessError as e: + out = e.output + out = out.decode('utf-8') + if expected_output in out: + break + else: + retry_count += 1 + if retry_count > 3: + raise RuntimeError('Could find a nonexistent bucket to use.' + ' This is higly unlikely to happen - ' + 'check if the tests are correct.') + with pytest.raises( sky.exceptions.StorageBucketGetError, match='Attempted to connect to a non-existent bucket'): - storage_obj = storage_lib.Storage(source=nonexist_bucket) + storage_obj = storage_lib.Storage(source=nonexist_bucket_url.format( + random_name=nonexist_bucket_name)) @pytest.mark.parametrize('private_bucket', [f's3://imagenet', f'gs://imagenet']) From 72c22ba899533472162a48bd89e95c3b0ae86726 Mon Sep 17 00:00:00 2001 From: Romil Date: Thu, 18 Aug 2022 12:22:25 -0700 Subject: [PATCH 7/9] lint --- tests/test_smoke.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index bd300f4cd08..3932c46ee7f 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -832,9 +832,8 @@ def test_public_bucket(self, tmp_public_storage_obj, store_type): out = subprocess.check_output(['sky', 'storage', 'ls']) assert tmp_public_storage_obj.name not in out.decode('utf-8') - @pytest.mark.parametrize( - 'nonexist_bucket_url', - ['s3://{random_name}', 'gs://{random_name}']) + @pytest.mark.parametrize('nonexist_bucket_url', + ['s3://{random_name}', 'gs://{random_name}']) def test_nonexistent_bucket(self, nonexist_bucket_url): # Attempts to create fetch a stroage with a non-existent source. # Generate a random bucket name and verify it doesn't exist: @@ -842,12 +841,16 @@ def test_nonexistent_bucket(self, nonexist_bucket_url): while True: nonexist_bucket_name = str(uuid.uuid4()) if nonexist_bucket_url.startswith('s3'): - command = ['aws', 's3api', 'head-bucket', '--bucket', - nonexist_bucket_name] + command = [ + 'aws', 's3api', 'head-bucket', '--bucket', + nonexist_bucket_name + ] expected_output = '404' elif nonexist_bucket_url.startswith('gs'): - command = ['gsutil', 'ls', nonexist_bucket_url.format( - random_name=nonexist_bucket_name)] + command = [ + 'gsutil', 'ls', + nonexist_bucket_url.format(random_name=nonexist_bucket_name) + ] expected_output = 'BucketNotFoundException' else: raise ValueError('Unsupported bucket type ' From 0022fccba4e623d7c7d4b51986f4556ab179eb35 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 19 Aug 2022 15:40:18 -0700 Subject: [PATCH 8/9] Add tcga-2-open to tests --- tests/test_smoke.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 3932c46ee7f..e72ad92f13b 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -820,7 +820,8 @@ def test_new_bucket_creation_and_deletion(self, tmp_local_storage_obj, @pytest.mark.parametrize( 'tmp_public_storage_obj, store_type', - [('s3://digitalcorpora', storage_lib.StoreType.S3), + [('s3://tcga-2-open', storage_lib.StoreType.S3), + ('s3://digitalcorpora', storage_lib.StoreType.S3), ('gs://gcp-public-data-sentinel-2', storage_lib.StoreType.GCS)], indirect=['tmp_public_storage_obj']) def test_public_bucket(self, tmp_public_storage_obj, store_type): From 541a0c249506fc3d8ad4dba441dea992f96ed19d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 19 Aug 2022 15:46:57 -0700 Subject: [PATCH 9/9] Fix typo --- tests/test_smoke.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index e72ad92f13b..8bae301ad81 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -868,8 +868,8 @@ def test_nonexistent_bucket(self, nonexist_bucket_url): else: retry_count += 1 if retry_count > 3: - raise RuntimeError('Could find a nonexistent bucket to use.' - ' This is higly unlikely to happen - ' + raise RuntimeError('Unable to find a nonexistent bucket ' + 'to use. This is higly unlikely - ' 'check if the tests are correct.') with pytest.raises(