Skip to content

Commit

Permalink
addressed comments on PR
Browse files Browse the repository at this point in the history
  • Loading branch information
adrpar committed Feb 2, 2020
1 parent 04d75e1 commit 741b047
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 25 deletions.
10 changes: 10 additions & 0 deletions help.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,16 @@ FUNCTIONS
multipart_upload_kwargs: dict, optional
Additional parameters to pass to boto3's initiate_multipart_upload function.
For writing only.
singlepart_upload_kwargs: dict, optional
Additional parameters to pass to boto3's S3.Object.put function when using single
part upload.
For writing only.
multipart_upload: bool, optional
Default: `True`
If set to `True`, will use multipart upload for writing to S3. If set
to `False`, S3 upload will use the S3 Single-Part Upload API, which
is more ideal for small file sizes.
For writing only.
version_id: str, optional
Version of the object, used when reading object. If None, will fetch the most recent version.

Expand Down
31 changes: 20 additions & 11 deletions smart_open/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def open(
resource_kwargs=None,
multipart_upload_kwargs=None,
multipart_upload=True,
singlepart_upload_kwargs=None,
object_kwargs=None,
):
"""Open an S3 object for reading or writing.
Expand All @@ -102,10 +103,14 @@ def open(
multipart_upload_kwargs: dict, optional
Additional parameters to pass to boto3's initiate_multipart_upload function.
For writing only.
singlepart_upload_kwargs: dict, optional
Additional parameters to pass to boto3's S3.Object.put function when using single
part upload.
For writing only.
multipart_upload: bool, optional
Default: `True`
If set to `True`, will use multipart upload for writing to S3. If set
to `false`, S3 upload will use the S3 Single-Part Upload API, which
to `False`, S3 upload will use the S3 Single-Part Upload API, which
is more ideal for small file sizes.
For writing only.
version_id: str, optional
Expand Down Expand Up @@ -155,7 +160,7 @@ def open(
bucket_id,
key_id,
session=session,
multipart_upload_kwargs=multipart_upload_kwargs,
singlepart_upload_kwargs=singlepart_upload_kwargs,
resource_kwargs=resource_kwargs,
)
else:
Expand Down Expand Up @@ -663,15 +668,18 @@ def __repr__(self):
class SinglepartWriter(io.BufferedIOBase):
"""Writes bytes to S3 using the single part API.
Implements the io.BufferedIOBase interface of the standard library."""
Implements the io.BufferedIOBase interface of the standard library.
This class buffers all of its input in memory until its `close` method is called. Only then will
the data be written to S3 and the buffer is released."""

def __init__(
self,
bucket,
key,
session=None,
resource_kwargs=None,
multipart_upload_kwargs=None,
singlepart_upload_kwargs=None,
):

self._session = session
Expand All @@ -681,10 +689,10 @@ def __init__(
session = boto3.Session()
if resource_kwargs is None:
resource_kwargs = {}
if multipart_upload_kwargs is None:
multipart_upload_kwargs = {}
if singlepart_upload_kwargs is None:
singlepart_upload_kwargs = {}

self._multipart_upload_kwargs = multipart_upload_kwargs
self._singlepart_upload_kwargs = singlepart_upload_kwargs

s3 = session.resource('s3', **resource_kwargs)
try:
Expand Down Expand Up @@ -712,9 +720,10 @@ def close(self):
self._buf.seek(0)

try:
self._object.put(Body=self._buf, **self._multipart_upload_kwargs)
self._object.put(Body=self._buf, **self._singlepart_upload_kwargs)
except botocore.client.ClientError:
raise ValueError('the bucket %r does not exist, or is forbidden for access' % self._object.bucket_name)
raise ValueError(
'the bucket %r does not exist, or is forbidden for access' % self._object.bucket_name)

logger.debug("direct upload finished")
self._closed = True
Expand Down Expand Up @@ -774,13 +783,13 @@ def __repr__(self):
"key=%r, "
"session=%r, "
"resource_kwargs=%r, "
"multipart_upload_kwargs=%r)"
"singlepart_upload_kwargs=%r)"
) % (
self._object.bucket_name,
self._object.key,
self._session,
self._resource_kwargs,
self._multipart_upload_kwargs,
self._singlepart_upload_kwargs,
)


Expand Down
28 changes: 14 additions & 14 deletions smart_open/tests/test_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def test_to_boto3(self):


@maybe_mock_s3
class BufferedMultiPartOutputBaseTest(unittest.TestCase):
class MultipartWriterTest(unittest.TestCase):
"""
Test writing into s3 files.
Expand All @@ -318,7 +318,7 @@ def test_write_01(self):
test_string = u"žluťoučký koníček".encode('utf8')

# write into key
with smart_open.s3.BufferedMultiPartOutputBase(BUCKET_NAME, WRITE_KEY_NAME) as fout:
with smart_open.s3.MultipartWriter(BUCKET_NAME, WRITE_KEY_NAME) as fout:
fout.write(test_string)

# read key and test content
Expand All @@ -329,7 +329,7 @@ def test_write_01(self):
def test_write_01a(self):
"""Does s3 write fail on incorrect input?"""
try:
with smart_open.s3.BufferedMultiPartOutputBase(BUCKET_NAME, WRITE_KEY_NAME) as fin:
with smart_open.s3.MultipartWriter(BUCKET_NAME, WRITE_KEY_NAME) as fin:
fin.write(None)
except TypeError:
pass
Expand All @@ -338,7 +338,7 @@ def test_write_01a(self):

def test_write_02(self):
"""Does s3 write unicode-utf8 conversion work?"""
smart_open_write = smart_open.s3.BufferedMultiPartOutputBase(BUCKET_NAME, WRITE_KEY_NAME)
smart_open_write = smart_open.s3.MultipartWriter(BUCKET_NAME, WRITE_KEY_NAME)
smart_open_write.tell()
logger.info("smart_open_write: %r", smart_open_write)
with smart_open_write as fout:
Expand All @@ -348,7 +348,7 @@ def test_write_02(self):
def test_write_03(self):
"""Does s3 multipart chunking work correctly?"""
# write
smart_open_write = smart_open.s3.BufferedMultiPartOutputBase(
smart_open_write = smart_open.s3.MultipartWriter(
BUCKET_NAME, WRITE_KEY_NAME, min_part_size=10
)
with smart_open_write as fout:
Expand All @@ -369,7 +369,7 @@ def test_write_03(self):

def test_write_04(self):
"""Does writing no data cause key with an empty value to be created?"""
smart_open_write = smart_open.s3.BufferedMultiPartOutputBase(BUCKET_NAME, WRITE_KEY_NAME)
smart_open_write = smart_open.s3.MultipartWriter(BUCKET_NAME, WRITE_KEY_NAME)
with smart_open_write as fout: # noqa
pass

Expand All @@ -380,7 +380,7 @@ def test_write_04(self):

def test_gzip(self):
expected = u'а не спеть ли мне песню... о любви'.encode('utf-8')
with smart_open.s3.BufferedMultiPartOutputBase(BUCKET_NAME, WRITE_KEY_NAME) as fout:
with smart_open.s3.MultipartWriter(BUCKET_NAME, WRITE_KEY_NAME) as fout:
with gzip.GzipFile(fileobj=fout, mode='w') as zipfile:
zipfile.write(expected)

Expand All @@ -397,7 +397,7 @@ def test_buffered_writer_wrapper_works(self):
"""
expected = u'не думай о секундах свысока'

with smart_open.s3.BufferedMultiPartOutputBase(BUCKET_NAME, WRITE_KEY_NAME) as fout:
with smart_open.s3.MultipartWriter(BUCKET_NAME, WRITE_KEY_NAME) as fout:
with io.BufferedWriter(fout) as sub_out:
sub_out.write(expected.encode('utf-8'))

Expand Down Expand Up @@ -451,7 +451,7 @@ def test_to_boto3(self):


@maybe_mock_s3
class BufferedSinglePartOutputBaseTest(unittest.TestCase):
class SinglepartWriterTest(unittest.TestCase):
"""
Test writing into s3 files using single part upload.
Expand All @@ -467,7 +467,7 @@ def test_write_01(self):
test_string = u"žluťoučký koníček".encode('utf8')

# write into key
with smart_open.s3.BufferedSinglePartOutputBase(BUCKET_NAME, WRITE_KEY_NAME) as fout:
with smart_open.s3.SinglepartWriter(BUCKET_NAME, WRITE_KEY_NAME) as fout:
fout.write(test_string)

# read key and test content
Expand All @@ -478,7 +478,7 @@ def test_write_01(self):
def test_write_01a(self):
"""Does s3 write fail on incorrect input?"""
try:
with smart_open.s3.BufferedSinglePartOutputBase(BUCKET_NAME, WRITE_KEY_NAME) as fin:
with smart_open.s3.SinglepartWriter(BUCKET_NAME, WRITE_KEY_NAME) as fin:
fin.write(None)
except TypeError:
pass
Expand All @@ -489,7 +489,7 @@ def test_write_02(self):
"""Does s3 write unicode-utf8 conversion work?"""
test_string = u"testžížáč".encode("utf-8")

smart_open_write = smart_open.s3.BufferedSinglePartOutputBase(BUCKET_NAME, WRITE_KEY_NAME)
smart_open_write = smart_open.s3.SinglepartWriter(BUCKET_NAME, WRITE_KEY_NAME)
smart_open_write.tell()
logger.info("smart_open_write: %r", smart_open_write)
with smart_open_write as fout:
Expand All @@ -498,7 +498,7 @@ def test_write_02(self):

def test_write_04(self):
"""Does writing no data cause key with an empty value to be created?"""
smart_open_write = smart_open.s3.BufferedSinglePartOutputBase(BUCKET_NAME, WRITE_KEY_NAME)
smart_open_write = smart_open.s3.SinglepartWriter(BUCKET_NAME, WRITE_KEY_NAME)
with smart_open_write as fout: # noqa
pass

Expand All @@ -514,7 +514,7 @@ def test_buffered_writer_wrapper_works(self):
"""
expected = u'не думай о секундах свысока'

with smart_open.s3.BufferedSinglePartOutputBase(BUCKET_NAME, WRITE_KEY_NAME) as fout:
with smart_open.s3.SinglepartWriter(BUCKET_NAME, WRITE_KEY_NAME) as fout:
with io.BufferedWriter(fout) as sub_out:
sub_out.write(expected.encode('utf-8'))

Expand Down

0 comments on commit 741b047

Please sign in to comment.