Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update _s3_file_query to check if an object exists explicitly. #168

Merged
merged 6 commits into from
Sep 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# cloudpathlib Changelog

## v0.6.1 (unreleased)
## v0.6.1 (2021-09-17)

- Fixed absolute documentation URLs to point to the new versioned documentation pages.
- Fixed bug where `no_sign_request` couldn't be used to download files since our code required list permissions to the bucket to do so, which is tracked in issue [#169](https://github.com/drivendataorg/cloudpathlib/issues/169). PR [#168](https://github.com/drivendataorg/cloudpathlib/pull/168).

## v0.6.0 (2021-09-07)

Expand Down
37 changes: 24 additions & 13 deletions cloudpathlib/s3/s3client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from pathlib import Path, PurePosixPath
from typing import Any, Dict, Iterable, Optional, Union

from botocore.exceptions import ClientError

from ..client import Client, register_client_class
from ..cloudpath import implementation_registry
from .s3path import S3Path
Expand Down Expand Up @@ -130,17 +132,26 @@ def _exists(self, cloud_path: S3Path) -> bool:

def _s3_file_query(self, cloud_path: S3Path):
"""Boto3 query used for quick checks of existence and if path is file/dir"""
return next(
(
obj
for obj in (
self.s3.Bucket(cloud_path.bucket)
.objects.filter(Prefix=cloud_path.key)
.limit(1)
)
),
None,
)
# first check if this is an object that we can access directly

try:
obj = self.s3.Object(cloud_path.bucket, cloud_path.key)
obj.load()
return obj

# else, confirm it is a dir by filtering to the first item under the prefix
except ClientError:
return next(
(
obj
for obj in (
self.s3.Bucket(cloud_path.bucket)
.objects.filter(Prefix=cloud_path.key)
.limit(1)
)
),
None,
)

def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[S3Path]:
bucket = self.s3.Bucket(cloud_path.bucket)
Expand Down Expand Up @@ -200,12 +211,12 @@ def _remove(self, cloud_path: S3Path) -> None:
obj = self.s3.Object(cloud_path.bucket, cloud_path.key)

# will throw if not a file
obj.get()
obj.load()

resp = obj.delete()
assert resp.get("ResponseMetadata").get("HTTPStatusCode") == 204

except self.client.exceptions.NoSuchKey:
except ClientError:
# try to delete as a direcotry instead
bucket = self.s3.Bucket(cloud_path.bucket)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,5 @@ def load_requirements(path: Path):
"Source Code": "https://github.com/drivendataorg/cloudpathlib",
},
url="https://github.com/drivendataorg/cloudpathlib",
version="0.6.0",
version="0.6.1",
)
13 changes: 12 additions & 1 deletion tests/mock_clients/mock_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from tempfile import TemporaryDirectory

from boto3.session import Session
from botocore.exceptions import ClientError

from .utils import delete_empty_parents_up_to_root

Expand Down Expand Up @@ -64,7 +65,17 @@ def get(self):
if not self.path.exists() or self.path.is_dir():
raise NoSuchKey({}, {})
else:
return {"key": self.path}
return {"key": str(PurePosixPath(self.path))}

def load(self):
if not self.path.exists() or self.path.is_dir():
raise ClientError({}, {})
else:
return {"key": str(PurePosixPath(self.path))}

@property
def key(self):
return str(PurePosixPath(self.path).relative_to(PurePosixPath(self.root)))

def copy_from(self, CopySource=None, Metadata=None, MetadataDirective=None):
if CopySource["Key"] == str(self.path.relative_to(self.root)):
Expand Down
51 changes: 25 additions & 26 deletions tests/test_s3_specific.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from concurrent.futures import ProcessPoolExecutor
from time import sleep

import pytest

from boto3.s3.transfer import TransferConfig
Expand Down Expand Up @@ -30,6 +31,7 @@ def test_transfer_config(s3_rig, tmp_path):
# download
client.set_as_default_client()
p = s3_rig.create_cloud_path("dir_0/file0_0.txt")

dl_dir = tmp_path
assert not (dl_dir / p.name).exists()
p.download_to(dl_dir)
Expand All @@ -54,37 +56,31 @@ def _download_with_threads(s3_rig, tmp_path, use_threads):
"""Job used by tests to ensure Transfer config changes are
actually passed through to boto3 and respected.
"""
try:
sleep(1) # give test monitoring process time to start watching

transfer_config = TransferConfig(
max_concurrency=100,
use_threads=use_threads,
multipart_chunksize=1 * 1024,
multipart_threshold=10 * 1024,
)
client = s3_rig.client_class(boto3_transfer_config=transfer_config)
p = client.CloudPath(f"s3://{s3_rig.drive}/dir_0/file0_to_download.txt")

assert not p.exists()
sleep(1) # give test monitoring process time to start watching

transfer_config = TransferConfig(
max_concurrency=100,
use_threads=use_threads,
multipart_chunksize=1 * 1024,
multipart_threshold=10 * 1024,
)
client = s3_rig.client_class(boto3_transfer_config=transfer_config)
p = client.CloudPath(f"s3://{s3_rig.drive}/{s3_rig.test_dir}/dir_0/file0_to_download.txt")

# file should be about 60KB
text = "lalala" * 10_000
p.write_text(text)
assert not p.exists()

assert p.exists()
# file should be about 60KB
text = "lalala" * 10_000
p.write_text(text)

# assert not (dl_dir / p.name).exists()
p.download_to(tmp_path)
assert p.exists()

p.unlink()
# assert not (dl_dir / p.name).exists()
p.download_to(tmp_path)

assert not p.exists()
p.unlink()

finally:
p = s3_rig.create_cloud_path("/dir_0/file0_0.txt")
if p.exists():
p.unlink()
assert not p.exists()


def test_transfer_config_live(s3_rig, tmp_path):
Expand Down Expand Up @@ -133,7 +129,7 @@ def _execute_on_subprocess_and_observe(use_threads):
assert _execute_on_subprocess_and_observe(use_threads=True) > 10


def test_no_sign_request(s3_rig):
def test_no_sign_request(s3_rig, tmp_path):
"""Tests that we can pass no_sign_request to the S3Client and we will
be able to access public resources but not private ones.
"""
Expand All @@ -146,6 +142,9 @@ def test_no_sign_request(s3_rig):
)
assert p.exists()

p.download_to(tmp_path)
assert (tmp_path / p.name).read_bytes() == p.read_bytes()

# unsigned raises for private S3 file that exists
p = client.CloudPath(f"s3://{s3_rig.drive}/dir_0/file0_to_download.txt")
with pytest.raises(botocore.exceptions.ClientError):
Expand Down