From 4f544fd2ce9df9d370111626939a25d0d8945cf8 Mon Sep 17 00:00:00 2001 From: Sven Krieger <37476281+svkrieger@users.noreply.github.com> Date: Mon, 14 Feb 2022 15:56:18 +0100 Subject: [PATCH 1/2] Implement exponential backoff for service fetch jobs (v2) --- app/jobs/v2/services/asynchronous_operations.rb | 13 +++++++++++-- app/jobs/v2/services/service_binding_state_fetch.rb | 3 ++- .../v2/services/service_instance_state_fetch.rb | 3 ++- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/app/jobs/v2/services/asynchronous_operations.rb b/app/jobs/v2/services/asynchronous_operations.rb index 999be295154..648c5f99bed 100644 --- a/app/jobs/v2/services/asynchronous_operations.rb +++ b/app/jobs/v2/services/asynchronous_operations.rb @@ -15,7 +15,7 @@ def new_end_timestamp def retry_job(retry_after_header: '') update_polling_interval(retry_after_header: retry_after_header) - if Time.now + poll_interval > end_timestamp + if Time.now + next_execution_in > end_timestamp end_timestamp_reached else enqueue_again @@ -23,10 +23,19 @@ def retry_job(retry_after_header: '') end def enqueue_again - opts = { queue: Jobs::Queues.generic, run_at: Delayed::Job.db_time_now + poll_interval } + opts = { queue: Jobs::Queues.generic, run_at: Delayed::Job.db_time_now + next_execution_in } + self.retry_number += 1 Jobs::Enqueuer.new(self, opts).enqueue end + def default_polling_exponential_backoff + Config.config.get(:broker_client_async_poll_exponential_backoff_rate) + end + + def next_execution_in + poll_interval * default_polling_exponential_backoff**retry_number + end + def update_polling_interval(retry_after_header: '') default_poll_interval = Config.config.get(:broker_client_default_async_poll_interval_seconds) poll_interval = [default_poll_interval, retry_after_header.to_i].max diff --git a/app/jobs/v2/services/service_binding_state_fetch.rb b/app/jobs/v2/services/service_binding_state_fetch.rb index 075b56b8a83..69f8ae8fbbf 100644 --- a/app/jobs/v2/services/service_binding_state_fetch.rb +++ b/app/jobs/v2/services/service_binding_state_fetch.rb @@ -6,13 +6,14 @@ module Services class ServiceBindingStateFetch < VCAP::CloudController::Jobs::CCJob include AsynchronousOperations - attr_accessor :service_binding_guid, :end_timestamp, :user_audit_info, :request_attrs, :poll_interval + attr_accessor :service_binding_guid, :end_timestamp, :user_audit_info, :request_attrs, :poll_interval, :retry_number def initialize(service_binding_guid, user_info, request_attrs) @service_binding_guid = service_binding_guid @end_timestamp = new_end_timestamp @user_audit_info = user_info @request_attrs = request_attrs + @retry_number = 0 update_polling_interval end diff --git a/app/jobs/v2/services/service_instance_state_fetch.rb b/app/jobs/v2/services/service_instance_state_fetch.rb index 8d1955865b1..a1c2fd07241 100644 --- a/app/jobs/v2/services/service_instance_state_fetch.rb +++ b/app/jobs/v2/services/service_instance_state_fetch.rb @@ -6,7 +6,7 @@ module Services class ServiceInstanceStateFetch < VCAP::CloudController::Jobs::CCJob include AsynchronousOperations - attr_accessor :name, :service_instance_guid, :request_attrs, :poll_interval, :end_timestamp, :user_audit_info + attr_accessor :name, :service_instance_guid, :request_attrs, :poll_interval, :end_timestamp, :user_audit_info, :retry_number def initialize(name, service_instance_guid, user_audit_info, request_attrs, end_timestamp=nil) @name = name @@ -14,6 +14,7 @@ def initialize(name, service_instance_guid, user_audit_info, request_attrs, end_ @request_attrs = request_attrs @end_timestamp = end_timestamp || new_end_timestamp @user_audit_info = user_audit_info + @retry_number = 0 update_polling_interval end From 66f8016537cac5833d75725ffcb46ed6d36dba6b Mon Sep 17 00:00:00 2001 From: Sven Krieger <37476281+svkrieger@users.noreply.github.com> Date: Mon, 14 Feb 2022 17:00:20 +0100 Subject: [PATCH 2/2] Add unit tests for exponential backoff(v2 api) Co-authored-by: Aftab Alam <81828613+iaftab-alam@users.noreply.github.com> --- .../service_binding_state_fetch_spec.rb | 60 +++++++++++++++++++ .../service_instance_state_fetch_spec.rb | 58 ++++++++++++++++++ 2 files changed, 118 insertions(+) diff --git a/spec/unit/jobs/services/service_binding_state_fetch_spec.rb b/spec/unit/jobs/services/service_binding_state_fetch_spec.rb index b4016794824..3fd94367b48 100644 --- a/spec/unit/jobs/services/service_binding_state_fetch_spec.rb +++ b/spec/unit/jobs/services/service_binding_state_fetch_spec.rb @@ -419,6 +419,66 @@ def run_job(job) end end + context 'when the last_operation state is in progress and exponential backoff is not set to default' do + let(:polling_interval) { 60 } + + it 'calculates the polling intervals based on the default interval and the exponential backoff rate' do + TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 2.0 + enqueued_time = 0 + + Timecop.freeze do + run_job(job) + enqueued_time = Time.now + end + + [60, 180, 420, 900, 1860].each do |seconds| + Timecop.freeze((seconds - 1).seconds.after(enqueued_time)) do + execute_all_jobs(expected_successes: 0, expected_failures: 0) + end + + Timecop.freeze((seconds + 1).seconds.after(enqueued_time)) do + execute_all_jobs(expected_successes: 1, expected_failures: 0) + end + end + end + + it 'calculates the polling intervals based on the configured interval and the exponential backoff rate' do + TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 2.0 + TestConfig.config[:broker_client_default_async_poll_interval_seconds] = 10 + + enqueued_time = 0 + + Timecop.freeze do + run_job(job) + enqueued_time = Time.now + end + + [10, 30, 70, 150, 310].each do |seconds| + Timecop.freeze((seconds - 1).seconds.after(enqueued_time)) do + execute_all_jobs(expected_successes: 0, expected_failures: 0) + end + + Timecop.freeze((seconds + 1).seconds.after(enqueued_time)) do + execute_all_jobs(expected_successes: 1, expected_failures: 0) + end + end + end + + it 'takes the exponential backoff into account when checking whether the next run would exceed the maximum duration' do + TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 1.3 + TestConfig.config[:broker_client_max_async_poll_duration_minutes] = 60 + + job.retry_number = 10 + Timecop.freeze(Time.now + 3384.321.ceil.seconds) do + run_job(job) + service_binding.reload + + expect(service_binding.last_operation.state).to eq('failed') + expect(service_binding.last_operation.description).to eq('Service Broker failed to create binding within the required time.') + end + end + end + context 'when the last_operation state is failed' do let(:state) { 'failed' } let(:description) { 'something went wrong' } diff --git a/spec/unit/jobs/services/service_instance_state_fetch_spec.rb b/spec/unit/jobs/services/service_instance_state_fetch_spec.rb index dea185f8a2e..b7fc6e94f41 100644 --- a/spec/unit/jobs/services/service_instance_state_fetch_spec.rb +++ b/spec/unit/jobs/services/service_instance_state_fetch_spec.rb @@ -387,6 +387,64 @@ def run_job(job) end end end + + context 'when exponential backoff is not set to default' do + it 'calculates the polling intervals based on the default interval and the exponential backoff rate' do + TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 2.0 + enqueued_time = 0 + + Timecop.freeze do + run_job(job) + enqueued_time = Time.now + end + + [60, 180, 420, 900, 1860].each do |seconds| + Timecop.freeze((seconds - 1).seconds.after(enqueued_time)) do + execute_all_jobs(expected_successes: 0, expected_failures: 0) + end + + Timecop.freeze((seconds + 1).seconds.after(enqueued_time)) do + execute_all_jobs(expected_successes: 1, expected_failures: 0) + end + end + end + + it 'calculates the polling intervals based on the configured interval and the exponential backoff rate' do + TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 2.0 + TestConfig.config[:broker_client_default_async_poll_interval_seconds] = 10 + + enqueued_time = 0 + + Timecop.freeze do + run_job(job) + enqueued_time = Time.now + end + + [10, 30, 70, 150, 310].each do |seconds| + Timecop.freeze((seconds - 1).seconds.after(enqueued_time)) do + execute_all_jobs(expected_successes: 0, expected_failures: 0) + end + + Timecop.freeze((seconds + 1).seconds.after(enqueued_time)) do + execute_all_jobs(expected_successes: 1, expected_failures: 0) + end + end + end + + it 'takes the exponential backoff into account when checking whether the next run would exceed the maximum duration' do + TestConfig.config[:broker_client_async_poll_exponential_backoff_rate] = 1.3 + TestConfig.config[:broker_client_max_async_poll_duration_minutes] = 60 + + job.retry_number = 10 + Timecop.freeze(Time.now + 3384.321.ceil.seconds) do + run_job(job) + + service_instance.reload + expect(service_instance.last_operation.state).to eq('failed') + expect(service_instance.last_operation.description).to eq('Service Broker failed to provision within the required time.') + end + end + end end context 'when saving to the database fails' do