Skip to content

Commit

Permalink
Update _s3_file_query to check if an object exists explicitly. (#168)
Browse files Browse the repository at this point in the history
* Check file existence first so public AWS access is feasible

* Use PurePosixPath in mock to support Windows

* Revert unnecessary test changes

* Remove import and write that are not needed

* Changelog and release version

* Remove cleanup now handled by the rig
  • Loading branch information
pjbull authored Sep 17, 2021
1 parent ea3053f commit 5f97f08
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 42 deletions.
3 changes: 2 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# cloudpathlib Changelog

## v0.6.1 (unreleased)
## v0.6.1 (2021-09-17)

- Fixed absolute documentation URLs to point to the new versioned documentation pages.
- Fixed bug where `no_sign_request` couldn't be used to download files since our code required list permissions to the bucket to do so, which is tracked in issue [#169](https://github.com/drivendataorg/cloudpathlib/issues/169). PR [#168](https://github.com/drivendataorg/cloudpathlib/pull/168).

## v0.6.0 (2021-09-07)

Expand Down
37 changes: 24 additions & 13 deletions cloudpathlib/s3/s3client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from pathlib import Path, PurePosixPath
from typing import Any, Dict, Iterable, Optional, Union

from botocore.exceptions import ClientError

from ..client import Client, register_client_class
from ..cloudpath import implementation_registry
from .s3path import S3Path
Expand Down Expand Up @@ -130,17 +132,26 @@ def _exists(self, cloud_path: S3Path) -> bool:

def _s3_file_query(self, cloud_path: S3Path):
"""Boto3 query used for quick checks of existence and if path is file/dir"""
return next(
(
obj
for obj in (
self.s3.Bucket(cloud_path.bucket)
.objects.filter(Prefix=cloud_path.key)
.limit(1)
)
),
None,
)
# first check if this is an object that we can access directly

try:
obj = self.s3.Object(cloud_path.bucket, cloud_path.key)
obj.load()
return obj

# else, confirm it is a dir by filtering to the first item under the prefix
except ClientError:
return next(
(
obj
for obj in (
self.s3.Bucket(cloud_path.bucket)
.objects.filter(Prefix=cloud_path.key)
.limit(1)
)
),
None,
)

def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[S3Path]:
bucket = self.s3.Bucket(cloud_path.bucket)
Expand Down Expand Up @@ -200,12 +211,12 @@ def _remove(self, cloud_path: S3Path) -> None:
obj = self.s3.Object(cloud_path.bucket, cloud_path.key)

# will throw if not a file
obj.get()
obj.load()

resp = obj.delete()
assert resp.get("ResponseMetadata").get("HTTPStatusCode") == 204

except self.client.exceptions.NoSuchKey:
except ClientError:
# try to delete as a direcotry instead
bucket = self.s3.Bucket(cloud_path.bucket)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,5 @@ def load_requirements(path: Path):
"Source Code": "https://github.com/drivendataorg/cloudpathlib",
},
url="https://github.com/drivendataorg/cloudpathlib",
version="0.6.0",
version="0.6.1",
)
13 changes: 12 additions & 1 deletion tests/mock_clients/mock_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from tempfile import TemporaryDirectory

from boto3.session import Session
from botocore.exceptions import ClientError

from .utils import delete_empty_parents_up_to_root

Expand Down Expand Up @@ -64,7 +65,17 @@ def get(self):
if not self.path.exists() or self.path.is_dir():
raise NoSuchKey({}, {})
else:
return {"key": self.path}
return {"key": str(PurePosixPath(self.path))}

def load(self):
if not self.path.exists() or self.path.is_dir():
raise ClientError({}, {})
else:
return {"key": str(PurePosixPath(self.path))}

@property
def key(self):
return str(PurePosixPath(self.path).relative_to(PurePosixPath(self.root)))

def copy_from(self, CopySource=None, Metadata=None, MetadataDirective=None):
if CopySource["Key"] == str(self.path.relative_to(self.root)):
Expand Down
51 changes: 25 additions & 26 deletions tests/test_s3_specific.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from concurrent.futures import ProcessPoolExecutor
from time import sleep

import pytest

from boto3.s3.transfer import TransferConfig
Expand Down Expand Up @@ -30,6 +31,7 @@ def test_transfer_config(s3_rig, tmp_path):
# download
client.set_as_default_client()
p = s3_rig.create_cloud_path("dir_0/file0_0.txt")

dl_dir = tmp_path
assert not (dl_dir / p.name).exists()
p.download_to(dl_dir)
Expand All @@ -54,37 +56,31 @@ def _download_with_threads(s3_rig, tmp_path, use_threads):
"""Job used by tests to ensure Transfer config changes are
actually passed through to boto3 and respected.
"""
try:
sleep(1) # give test monitoring process time to start watching

transfer_config = TransferConfig(
max_concurrency=100,
use_threads=use_threads,
multipart_chunksize=1 * 1024,
multipart_threshold=10 * 1024,
)
client = s3_rig.client_class(boto3_transfer_config=transfer_config)
p = client.CloudPath(f"s3://{s3_rig.drive}/dir_0/file0_to_download.txt")

assert not p.exists()
sleep(1) # give test monitoring process time to start watching

transfer_config = TransferConfig(
max_concurrency=100,
use_threads=use_threads,
multipart_chunksize=1 * 1024,
multipart_threshold=10 * 1024,
)
client = s3_rig.client_class(boto3_transfer_config=transfer_config)
p = client.CloudPath(f"s3://{s3_rig.drive}/{s3_rig.test_dir}/dir_0/file0_to_download.txt")

# file should be about 60KB
text = "lalala" * 10_000
p.write_text(text)
assert not p.exists()

assert p.exists()
# file should be about 60KB
text = "lalala" * 10_000
p.write_text(text)

# assert not (dl_dir / p.name).exists()
p.download_to(tmp_path)
assert p.exists()

p.unlink()
# assert not (dl_dir / p.name).exists()
p.download_to(tmp_path)

assert not p.exists()
p.unlink()

finally:
p = s3_rig.create_cloud_path("/dir_0/file0_0.txt")
if p.exists():
p.unlink()
assert not p.exists()


def test_transfer_config_live(s3_rig, tmp_path):
Expand Down Expand Up @@ -133,7 +129,7 @@ def _execute_on_subprocess_and_observe(use_threads):
assert _execute_on_subprocess_and_observe(use_threads=True) > 10


def test_no_sign_request(s3_rig):
def test_no_sign_request(s3_rig, tmp_path):
"""Tests that we can pass no_sign_request to the S3Client and we will
be able to access public resources but not private ones.
"""
Expand All @@ -146,6 +142,9 @@ def test_no_sign_request(s3_rig):
)
assert p.exists()

p.download_to(tmp_path)
assert (tmp_path / p.name).read_bytes() == p.read_bytes()

# unsigned raises for private S3 file that exists
p = client.CloudPath(f"s3://{s3_rig.drive}/dir_0/file0_to_download.txt")
with pytest.raises(botocore.exceptions.ClientError):
Expand Down

0 comments on commit 5f97f08

Please sign in to comment.