Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Storage] Fix public bucket source check in SkyPilot Storage #1087

Merged
merged 9 commits into from
Aug 24, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions sky/data/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,21 +854,18 @@ def _get_bucket(self) -> Tuple[StorageHandle, bool]:
# This line does not error out if the bucket is an external public
# bucket or if it is a user's bucket that is publicly
# accessible.
self.client.get_public_access_block(Bucket=self.name)
self.client.head_bucket(Bucket=self.name)
return bucket, False
except aws.client_exception() as e:
error_code = e.response['Error']['Code']
# AccessDenied error for buckets that are private and not owned by
# user.
if error_code == 'AccessDenied':
if error_code == '403':
command = f'aws s3 ls {self.name}'
with ux_utils.print_exception_no_traceback():
raise exceptions.StorageBucketGetError(
_BUCKET_FAIL_TO_CONNECT_MESSAGE.format(
name=self.name, command=command)) from e
# Try private bucket case.
if data_utils.verify_s3_bucket(self.name):
return bucket, False

if self.source is not None and self.source.startswith('s3://'):
with ux_utils.print_exception_no_traceback():
Expand Down
57 changes: 56 additions & 1 deletion tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,7 +820,7 @@ def test_new_bucket_creation_and_deletion(self, tmp_local_storage_obj,

@pytest.mark.parametrize(
'tmp_public_storage_obj, store_type',
[('s3://tcga-2-open', storage_lib.StoreType.S3),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we keep this bucket in the test? For coverage.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great point - added!

[('s3://digitalcorpora', storage_lib.StoreType.S3),
('gs://gcp-public-data-sentinel-2', storage_lib.StoreType.GCS)],
indirect=['tmp_public_storage_obj'])
def test_public_bucket(self, tmp_public_storage_obj, store_type):
Expand All @@ -832,6 +832,61 @@ def test_public_bucket(self, tmp_public_storage_obj, store_type):
out = subprocess.check_output(['sky', 'storage', 'ls'])
assert tmp_public_storage_obj.name not in out.decode('utf-8')

@pytest.mark.parametrize('nonexist_bucket_url',
['s3://{random_name}', 'gs://{random_name}'])
def test_nonexistent_bucket(self, nonexist_bucket_url):
# Attempts to create fetch a stroage with a non-existent source.
# Generate a random bucket name and verify it doesn't exist:
retry_count = 0
while True:
nonexist_bucket_name = str(uuid.uuid4())
if nonexist_bucket_url.startswith('s3'):
command = [
'aws', 's3api', 'head-bucket', '--bucket',
nonexist_bucket_name
]
expected_output = '404'
elif nonexist_bucket_url.startswith('gs'):
command = [
'gsutil', 'ls',
nonexist_bucket_url.format(random_name=nonexist_bucket_name)
]
expected_output = 'BucketNotFoundException'
else:
raise ValueError('Unsupported bucket type '
f'{nonexist_bucket_url}')

# Check if bucket exists using the cli:
try:
out = subprocess.check_output(command, stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
out = e.output
out = out.decode('utf-8')
if expected_output in out:
break
else:
retry_count += 1
if retry_count > 3:
raise RuntimeError('Could find a nonexistent bucket to use.'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could not*

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch! fixed

' This is higly unlikely to happen - '
'check if the tests are correct.')

with pytest.raises(
sky.exceptions.StorageBucketGetError,
match='Attempted to connect to a non-existent bucket'):
storage_obj = storage_lib.Storage(source=nonexist_bucket_url.format(
random_name=nonexist_bucket_name))

@pytest.mark.parametrize('private_bucket',
[f's3://imagenet', f'gs://imagenet'])
def test_private_bucket(self, private_bucket):
# Attempts to access private buckets not belonging to the user.
# These buckets are known to be private, but may need to be updated if
# they are removed by their owners.
with pytest.raises(sky.exceptions.StorageBucketGetError,
match='the bucket name is taken'):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

match=backend_utils._BUCKET_FAIL_TO_CONNECT_MESSAGE.format(...)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ahh private buckets trigger this message on gcp, so the common message is the bucket name is taken:

skypilot/sky/data/storage.py

Lines 1129 to 1133 in 6196552

ex = exceptions.StorageBucketGetError(
f'Attempted to access a private external bucket {self.name}'
'\nCheck if the 1) the bucket name is taken and/or '
'2) the bucket permissions are not setup correctly. '
f'Consider using `gsutil ls gs://{self.name}` to debug.')

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case, can we move this message as an variable such as PRIVATE_BUCKET_ERROR?

storage_obj = storage_lib.Storage(source=private_bucket)

@staticmethod
def cli_ls_cmd(store_type, bucket_name):
if store_type == storage_lib.StoreType.S3:
Expand Down