diff --git a/.changelog/24165.txt b/.changelog/24165.txt new file mode 100644 index 00000000000..38f33c9e8f2 --- /dev/null +++ b/.changelog/24165.txt @@ -0,0 +1,3 @@ +```release-note:bug +scheduler: fixes reconnecting allocations not getting picked correctly when replacements failed +``` diff --git a/scheduler/reconcile.go b/scheduler/reconcile.go index 7f94c183072..2d1451d8bc0 100644 --- a/scheduler/reconcile.go +++ b/scheduler/reconcile.go @@ -10,6 +10,7 @@ package scheduler import ( "fmt" + "slices" "sort" "time" @@ -1192,19 +1193,33 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc continue } + // A replacement allocation could fail and be replaced with another + // so follow the replacements in a linked list style + replacements := []string{} + nextAlloc := reconnectingAlloc.NextAllocation + for { + val, ok := all[nextAlloc] + if !ok { + break + } + replacements = append(replacements, val.ID) + nextAlloc = val.NextAllocation + } + // Find replacement allocations and decide which one to stop. A // reconnecting allocation may have multiple replacements. for _, replacementAlloc := range all { - // Skip allocations that are not a replacement of the one - // reconnecting. - isReplacement := replacementAlloc.ID == reconnectingAlloc.NextAllocation + // Skip the allocation if it is the reconnecting alloc + if replacementAlloc == reconnectingAlloc { + continue + } - // Skip allocations that are server terminal. + // Skip allocations that are server terminal or not replacements. // We don't want to replace a reconnecting allocation with one that // is or will terminate and we don't need to stop them since they // are already marked as terminal by the servers. - if !isReplacement || replacementAlloc.ServerTerminalStatus() { + if !slices.Contains(replacements, replacementAlloc.ID) || replacementAlloc.ServerTerminalStatus() { continue } @@ -1221,9 +1236,9 @@ func (a *allocReconciler) reconcileReconnecting(reconnecting allocSet, all alloc }) } } else { - // The reconnecting allocation is preferred, so stop this - // replacement, but avoid re-stopping stopped allocs - if replacementAlloc.ClientStatus != structs.AllocClientStatusFailed { + // The reconnecting allocation is preferred, so stop any replacements + // that are not in server terminal status or stopped already. + if _, ok := stop[replacementAlloc.ID]; !ok { stop[replacementAlloc.ID] = replacementAlloc a.result.stop = append(a.result.stop, allocStopResult{ alloc: replacementAlloc, diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index a60fc059df1..958f55ce4d5 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -5339,6 +5339,7 @@ func TestReconciler_Disconnected_Client(t *testing.T) { disconnectReplacement bool replaceFailedReplacement bool shouldStopOnDisconnectedNode bool + shouldStopOnReconnect bool maxDisconnect *time.Duration expected *resultExpectation pickResult string @@ -5455,8 +5456,7 @@ func TestReconciler_Disconnected_Client(t *testing.T) { disconnectedAllocStates: disconnectAllocState, shouldStopOnDisconnectedNode: false, expected: &resultExpectation{ - stop: 2, - reconnectUpdates: 2, + stop: 2, desiredTGUpdates: map[string]*structs.DesiredUpdates{ "web": { Stop: 2, @@ -5464,6 +5464,29 @@ func TestReconciler_Disconnected_Client(t *testing.T) { }, }, }, + reconcileStrategy: structs.ReconcileOptionBestScore, + callPicker: true, + }, + { + name: "stop-original-alloc-desired-status-stop", + allocCount: 1, + replace: true, + failReplacement: true, + replaceFailedReplacement: true, + disconnectedAllocCount: 1, + disconnectedAllocStatus: structs.AllocClientStatusRunning, + disconnectedAllocStates: disconnectAllocState, + shouldStopOnDisconnectedNode: false, + shouldStopOnReconnect: true, + expected: &resultExpectation{ + stop: 1, + desiredTGUpdates: map[string]*structs.DesiredUpdates{ + "web": { + Stop: 1, + Ignore: 2, + }, + }, + }, }, { name: "stop-original-pending-alloc-for-disconnected-node", @@ -5569,7 +5592,11 @@ func TestReconciler_Disconnected_Client(t *testing.T) { // Set alloc state disconnectedAllocCount := tc.disconnectedAllocCount for _, alloc := range allocs { - alloc.DesiredStatus = structs.AllocDesiredStatusRun + if tc.shouldStopOnReconnect { + alloc.DesiredStatus = structs.AllocDesiredStatusStop + } else { + alloc.DesiredStatus = structs.AllocDesiredStatusRun + } if tc.maxDisconnect != nil { alloc.Job.TaskGroups[0].MaxClientDisconnect = tc.maxDisconnect @@ -5664,8 +5691,6 @@ func TestReconciler_Disconnected_Client(t *testing.T) { if tc.shouldStopOnDisconnectedNode { must.Eq(t, testNode.ID, stopResult.alloc.NodeID) - } else { - must.NotEq(t, testNode.ID, stopResult.alloc.NodeID) } must.Eq(t, job.Version, stopResult.alloc.Job.Version)