Skip to content

Commit

Permalink
Avoid MD5
Browse files Browse the repository at this point in the history
  • Loading branch information
JordonPhillips committed Feb 26, 2016
1 parent 69183e0 commit 23b71f0
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 22 deletions.
35 changes: 18 additions & 17 deletions awscli/customizations/s3/fileinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,10 @@
import errno
import hashlib

from dateutil.parser import parse
from dateutil.tz import tzlocal

from botocore.compat import quote
from awscli.customizations.s3.utils import find_bucket_key, \
uni_print, guess_content_type, MD5Error, bytes_print, set_file_utime, \
RequestParamsMapper
from botocore.compat import MD5_AVAILABLE
from awscli.customizations.s3.utils import (
find_bucket_key, guess_content_type, MD5Error, bytes_print, set_file_utime,
RequestParamsMapper)


LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -48,22 +45,26 @@ def save_file(filename, response_data, last_update, is_stream=False):
if not e.errno == errno.EEXIST:
raise CreateDirectoryError(
"Could not create directory %s: %s" % (d, e))
md5 = hashlib.md5()

if MD5_AVAILABLE and _can_validate_md5_with_etag(etag, response_data):
md5 = hashlib.md5()
else:
md5 = None

file_chunks = iter(partial(body.read, 1024 * 1024), b'')
if is_stream:
# Need to save the data to be able to check the etag for a stream
# because once the data is written to the stream there is no
# undoing it.
payload = write_to_file(None, etag, md5, file_chunks, True)
payload = write_to_file(None, etag, file_chunks, md5, True)
else:
with open(filename, 'wb') as out_file:
write_to_file(out_file, etag, md5, file_chunks)
write_to_file(out_file, etag, file_chunks, md5)

if _can_validate_md5_with_etag(etag, response_data):
if etag != md5.hexdigest():
if not is_stream:
os.remove(filename)
raise MD5Error(filename)
if md5 and etag != md5.hexdigest():
if not is_stream:
os.remove(filename)
raise MD5Error(filename)

if not is_stream:
last_update_tuple = last_update.timetuple()
Expand All @@ -84,15 +85,15 @@ def _can_validate_md5_with_etag(etag, response_data):
return False


def write_to_file(out_file, etag, md5, file_chunks, is_stream=False):
def write_to_file(out_file, etag, file_chunks, md5=None, is_stream=False):
"""
Updates the etag for each file chunk. It will write to the file if it a
file but if it is a stream it will return a byte string to be later
written to a stream.
"""
body = b''
for chunk in file_chunks:
if not _is_multipart_etag(etag):
if md5 and not _is_multipart_etag(etag):
md5.update(chunk)
if is_stream:
body += chunk
Expand Down
44 changes: 39 additions & 5 deletions tests/unit/customizations/s3/test_fileinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
import tempfile
import shutil
from datetime import datetime
from hashlib import md5

from awscli.compat import six
import mock
Expand All @@ -26,20 +25,46 @@
from awscli.customizations.s3.fileinfo import TaskInfo


class TestSaveFile(unittest.TestCase):
class BaseMD5Test(unittest.TestCase):
def setUp(self):
self.md5_object = mock.Mock()
self.md5_object.hexdigest = mock.Mock(return_value=b'foo')
md5_builder = mock.Mock(return_value=self.md5_object)
self.md5_patch = mock.patch('hashlib.md5', md5_builder)
self.md5_patch.start()
self._md5_available_patch = None
self.set_md5_available()

def tearDown(self):
super(BaseMD5Test, self).tearDown()
self.md5_patch.stop()
if self._md5_available_patch:
self._md5_available_patch.stop()

def set_md5_available(self, is_available=True):
if self._md5_available_patch:
self._md5_available_patch.stop()

self._md5_available_patch = mock.patch(
'awscli.customizations.s3.fileinfo.MD5_AVAILABLE', is_available)
self._md5_available_patch.start()


class TestSaveFile(BaseMD5Test):
def setUp(self):
super(TestSaveFile, self).setUp()
self.tempdir = tempfile.mkdtemp()
self.filename = os.path.join(self.tempdir, 'dir1', 'dir2', 'foo.txt')
etag = md5()
etag.update(b'foobar')
etag = etag.hexdigest()
etag = '3858f62230ac3c915f300c664312c63f'
self.md5_object.hexdigest = mock.Mock(return_value=etag)
self.response_data = {
'Body': six.BytesIO(b'foobar'),
'ETag': '"%s"' % etag,
}
self.last_update = datetime.now()

def tearDown(self):
super(TestSaveFile, self).tearDown()
shutil.rmtree(self.tempdir)

def test_save_file(self):
Expand Down Expand Up @@ -95,6 +120,15 @@ def test_no_raise_md5_with_kms(self):
# The file should have been saved.
self.assertTrue(os.path.isfile(self.filename))

def test_no_raise_md5_when_md5_unavailable(self):
self.response_data['ETag'] = '"0"'
self.response_data['ServerSideEncryption'] = 'AES256'
self.set_md5_available(False)
# Should not raise any md5 error.
fileinfo.save_file(self.filename, self.response_data, self.last_update)
# The file should have been saved.
self.assertTrue(os.path.isfile(self.filename))


class TestSetSizeFromS3(unittest.TestCase):
def test_set_size_from_s3(self):
Expand Down

0 comments on commit 23b71f0

Please sign in to comment.