Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement exponential backoff for service fetch operations (CC API v2) #2680

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions app/jobs/v2/services/asynchronous_operations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,27 @@ def new_end_timestamp

def retry_job(retry_after_header: '')
update_polling_interval(retry_after_header: retry_after_header)
if Time.now + poll_interval > end_timestamp
if Time.now + next_execution_in > end_timestamp
end_timestamp_reached
else
enqueue_again
end
end

def enqueue_again
opts = { queue: Jobs::Queues.generic, run_at: Delayed::Job.db_time_now + poll_interval }
opts = { queue: Jobs::Queues.generic, run_at: Delayed::Job.db_time_now + next_execution_in }
self.retry_number += 1
Jobs::Enqueuer.new(self, opts).enqueue
end

def default_polling_exponential_backoff
Config.config.get(:broker_client_async_poll_exponential_backoff_rate)
end

def next_execution_in
poll_interval * default_polling_exponential_backoff**retry_number
end

def update_polling_interval(retry_after_header: '')
default_poll_interval = Config.config.get(:broker_client_default_async_poll_interval_seconds)
poll_interval = [default_poll_interval, retry_after_header.to_i].max
Expand Down
3 changes: 2 additions & 1 deletion app/jobs/v2/services/service_binding_state_fetch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@ module Services
class ServiceBindingStateFetch < VCAP::CloudController::Jobs::CCJob
include AsynchronousOperations

attr_accessor :service_binding_guid, :end_timestamp, :user_audit_info, :request_attrs, :poll_interval
attr_accessor :service_binding_guid, :end_timestamp, :user_audit_info, :request_attrs, :poll_interval, :retry_number

def initialize(service_binding_guid, user_info, request_attrs)
@service_binding_guid = service_binding_guid
@end_timestamp = new_end_timestamp
@user_audit_info = user_info
@request_attrs = request_attrs
@retry_number = 0
update_polling_interval
end

Expand Down
3 changes: 2 additions & 1 deletion app/jobs/v2/services/service_instance_state_fetch.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@ module Services
class ServiceInstanceStateFetch < VCAP::CloudController::Jobs::CCJob
include AsynchronousOperations

attr_accessor :name, :service_instance_guid, :request_attrs, :poll_interval, :end_timestamp, :user_audit_info
attr_accessor :name, :service_instance_guid, :request_attrs, :poll_interval, :end_timestamp, :user_audit_info, :retry_number

def initialize(name, service_instance_guid, user_audit_info, request_attrs, end_timestamp=nil)
@name = name
@service_instance_guid = service_instance_guid
@request_attrs = request_attrs
@end_timestamp = end_timestamp || new_end_timestamp
@user_audit_info = user_audit_info
@retry_number = 0
update_polling_interval
end

Expand Down
60 changes: 60 additions & 0 deletions spec/unit/jobs/services/service_binding_state_fetch_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,66 @@ def run_job(job)
end
end

context 'when the last_operation state is in progress and exponential backoff is not set to default' do
let(:polling_interval) { 60 }

it 'calculates the polling intervals based on the default interval and the exponential backoff rate' do
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 2.0
enqueued_time = 0

Timecop.freeze do
run_job(job)
enqueued_time = Time.now
end

[60, 180, 420, 900, 1860].each do |seconds|
Timecop.freeze((seconds - 1).seconds.after(enqueued_time)) do
execute_all_jobs(expected_successes: 0, expected_failures: 0)
end

Timecop.freeze((seconds + 1).seconds.after(enqueued_time)) do
execute_all_jobs(expected_successes: 1, expected_failures: 0)
end
end
end

it 'calculates the polling intervals based on the configured interval and the exponential backoff rate' do
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 2.0
TestConfig.config[:broker_client_default_async_poll_interval_seconds] = 10

enqueued_time = 0

Timecop.freeze do
run_job(job)
enqueued_time = Time.now
end

[10, 30, 70, 150, 310].each do |seconds|
Timecop.freeze((seconds - 1).seconds.after(enqueued_time)) do
execute_all_jobs(expected_successes: 0, expected_failures: 0)
end

Timecop.freeze((seconds + 1).seconds.after(enqueued_time)) do
execute_all_jobs(expected_successes: 1, expected_failures: 0)
end
end
end

it 'takes the exponential backoff into account when checking whether the next run would exceed the maximum duration' do
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 1.3
TestConfig.config[:broker_client_max_async_poll_duration_minutes] = 60

job.retry_number = 10
Timecop.freeze(Time.now + 3384.321.ceil.seconds) do
run_job(job)
service_binding.reload

expect(service_binding.last_operation.state).to eq('failed')
expect(service_binding.last_operation.description).to eq('Service Broker failed to create binding within the required time.')
end
end
end

context 'when the last_operation state is failed' do
let(:state) { 'failed' }
let(:description) { 'something went wrong' }
Expand Down
58 changes: 58 additions & 0 deletions spec/unit/jobs/services/service_instance_state_fetch_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,64 @@ def run_job(job)
end
end
end

context 'when exponential backoff is not set to default' do
it 'calculates the polling intervals based on the default interval and the exponential backoff rate' do
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 2.0
enqueued_time = 0

Timecop.freeze do
run_job(job)
enqueued_time = Time.now
end

[60, 180, 420, 900, 1860].each do |seconds|
Timecop.freeze((seconds - 1).seconds.after(enqueued_time)) do
execute_all_jobs(expected_successes: 0, expected_failures: 0)
end

Timecop.freeze((seconds + 1).seconds.after(enqueued_time)) do
execute_all_jobs(expected_successes: 1, expected_failures: 0)
end
end
end

it 'calculates the polling intervals based on the configured interval and the exponential backoff rate' do
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 2.0
TestConfig.config[:broker_client_default_async_poll_interval_seconds] = 10

enqueued_time = 0

Timecop.freeze do
run_job(job)
enqueued_time = Time.now
end

[10, 30, 70, 150, 310].each do |seconds|
Timecop.freeze((seconds - 1).seconds.after(enqueued_time)) do
execute_all_jobs(expected_successes: 0, expected_failures: 0)
end

Timecop.freeze((seconds + 1).seconds.after(enqueued_time)) do
execute_all_jobs(expected_successes: 1, expected_failures: 0)
end
end
end

it 'takes the exponential backoff into account when checking whether the next run would exceed the maximum duration' do
TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 1.3
TestConfig.config[:broker_client_max_async_poll_duration_minutes] = 60

job.retry_number = 10
Timecop.freeze(Time.now + 3384.321.ceil.seconds) do
run_job(job)

service_instance.reload
expect(service_instance.last_operation.state).to eq('failed')
expect(service_instance.last_operation.description).to eq('Service Broker failed to provision within the required time.')
end
end
end
end

context 'when saving to the database fails' do
Expand Down