Skip to content

Commit

Permalink
Reduced memory usage when computing checksums of files.
Browse files Browse the repository at this point in the history
Fixes #1098
  • Loading branch information
trevorrowe committed Jun 15, 2016
1 parent 7cbfbef commit d848206
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 38 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Unreleased Changes
------------------

* Issue - Memory Usage - Added a pair of utility methods that perform more efficient
SHA4256 and MD5 checksums of file objects. Before this change, data was read in
1MB chunks. Now using the `OpenSSL::Digest.file` interface to reduce memory usage.

See related [GitHub issue #1098](https://github.com/aws/aws-sdk-ruby/issues/1098).

* Issue - Aws::RDS - Resolved an issue with `Aws::RDS#db_engine_version`.

See related [GitHub issue #1138](https://github.com/aws/aws-sdk-ruby/issues/1138).
Expand Down
1 change: 1 addition & 0 deletions aws-sdk-core/lib/aws-sdk-core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ module Aws
end

autoload :AssumeRoleCredentials, 'aws-sdk-core/assume_role_credentials'
autoload :Checksums, 'aws-sdk-core/checksums'
autoload :Client, 'aws-sdk-core/client'
autoload :ClientStubs, 'aws-sdk-core/client_stubs'
autoload :ClientWaiters, 'aws-sdk-core/client_waiters'
Expand Down
43 changes: 43 additions & 0 deletions aws-sdk-core/lib/aws-sdk-core/checksums.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
require 'openssl'
require 'tempfile'

module Aws
# @api private
module Checksums
class << self

# @param [File, Tempfile, IO#read, String] value
# @return [String<SHA256 Hexdigest>]
def sha256_hexdigest(value)
if File === value || Tempfile === value
OpenSSL::Digest::SHA256.file(value).hexdigest
elsif value.respond_to?(:read)
OpenSSL::Digest::SHA256.hexdigest(read_and_rewind(value))
else
OpenSSL::Digest::SHA256.hexdigest(value)
end
end

# @param [File, Tempfile, IO#read, String] value
# @return [String<MD5>]
def md5(value)
if File === value || Tempfile === value
Base64.encode64(OpenSSL::Digest::MD5.file(value).digest).strip
elsif value.respond_to?(:read)
Base64.encode64(OpenSSL::Digest::MD5.digest(read_and_rewind(value))).strip
else
Base64.encode64(OpenSSL::Digest::MD5.digest(value)).strip
end
end

private

def read_and_rewind(io)
value = io.read
io.rewind
value
end

end
end
end
13 changes: 1 addition & 12 deletions aws-sdk-core/lib/aws-sdk-core/plugins/s3_md5s.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,14 @@ class S3Md5s < Seahorse::Client::Plugin
# @api private
class Handler < Seahorse::Client::Handler

OneMB = 1024 * 1024

def call(context)
body = context.http_request.body
if body.size > 0
context.http_request.headers['Content-Md5'] ||= md5(body)
context.http_request.headers['Content-Md5'] ||= Checksums.md5(body)
end
@handler.call(context)
end

def md5(body)
md5 = OpenSSL::Digest::MD5.new
while chunk = body.read(OneMB)
md5.update(chunk)
end
body.rewind
Base64.encode64(md5.digest).strip
end

end

option(:compute_checksums, true)
Expand Down
2 changes: 1 addition & 1 deletion aws-sdk-core/lib/aws-sdk-core/plugins/s3_sse_cpk.rb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def require_https(context)
end

def md5(str)
OpenSSL::Digest::MD5.digest(str)
Checksums.md5(str)
end

def base64(str)
Expand Down
11 changes: 1 addition & 10 deletions aws-sdk-core/lib/aws-sdk-core/signers/v4.rb
Original file line number Diff line number Diff line change
Expand Up @@ -217,16 +217,7 @@ def standard_port?(uri)
end

def hexdigest(value)
digest = OpenSSL::Digest::SHA256.new
if value.respond_to?(:read)
chunk = nil
chunk_size = 1024 * 1024 # 1 megabyte
digest.update(chunk) while chunk = value.read(chunk_size)
value.rewind
else
digest.update(value)
end
digest.hexdigest
Aws::Checksums.sha256_hexdigest(value)
end

def hmac(key, value)
Expand Down
28 changes: 22 additions & 6 deletions aws-sdk-core/spec/aws/plugins/s3_md5s_spec.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'spec_helper'
require 'tempfile'

module Aws
module Plugins
Expand Down Expand Up @@ -48,13 +49,28 @@ module Plugins
)
end

it 'computes the md5 in 1MB chunks for IO objects' do
chunk = '.' * 1024 * 1024
body = double('io-object', size: 5 * 1024 * 1024)
it 'computes the md5 of files without loading them into memory' do
body = Tempfile.new('tempfile')
body.write('.' * 5 * 1024 * 1024)
body.flush

expect(body).not_to receive(:read)
expect(body).not_to receive(:rewind)

context.http_request.body = body
handlers.add(NoSendHandler, step: :send)
handlers.to_stack.call(context)
expect(context.http_request.headers['Content-Md5']).to(
eq("+kDD2/74SZx+Rz+/Dw7I1Q==")
)
end

it 'computes the md5 in in memory for non-file IO objects' do
size = 5 * 1024 * 1024
body = double('io-object', size: size)
expect(body).to receive(:read).
with(1024 * 1024).
exactly(6).times.
and_return(chunk, chunk, chunk, chunk, chunk, nil)
with(no_args). # read the entire object
and_return('.' * size)
expect(body).to receive(:rewind)

context.http_request.body = body
Expand Down
27 changes: 18 additions & 9 deletions aws-sdk-core/spec/aws/signers/v4_spec.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
require 'spec_helper'
require 'tempfile'

module Aws
module Signers
Expand All @@ -17,8 +18,8 @@ module Signers
req
end

let(:now) { double('now') }
let(:utc) { double('utc-time') }
let(:now) { Time.now }
let(:utc) { now.utc }

before(:each) {
allow(Time).to receive(:now).and_return(now)
Expand Down Expand Up @@ -67,13 +68,21 @@ module Signers
Digest::SHA256.hexdigest('abc'))
end

it 'reads the http request payload in 1mb chunks' do
body = double('http-payload')
allow(body).to receive(:rewind)
expect(body).to receive(:read).with(1024 * 1024) { 'a' }
expect(body).to receive(:read).with(1024 * 1024) { 'b' }
expect(body).to receive(:read).with(1024 * 1024) { 'c' }
expect(body).to receive(:read).with(1024 * 1024) { nil }
it 'computes the checksum of files without loading them into memory' do
body = Tempfile.new('tempfile')
body.write('abc')
body.flush
expect(body).not_to receive(:read)
expect(body).not_to receive(:rewind)
http_request.body = body
expect(sign.headers['X-Amz-Content-Sha256']).to eq(
Digest::SHA256.hexdigest('abc'))
end

it 'reads non-file IO objects into memory to compute checksusm' do
body = StringIO.new('abc')
expect(body).to receive(:read).with(no_args).and_call_original
expect(body).to receive(:rewind).with(no_args).and_call_original
http_request.body = body
expect(sign.headers['X-Amz-Content-Sha256']).to eq(
Digest::SHA256.hexdigest('abc'))
Expand Down

0 comments on commit d848206

Please sign in to comment.