From fee6a1f0ae8867ed1051d0cb6cec477af41ef2b2 Mon Sep 17 00:00:00 2001 From: Paul Santa Clara Date: Thu, 19 Dec 2024 12:15:31 -0500 Subject: [PATCH 1/2] do not consider previously preempted allocations as potential victims --- pkg/scheduler/objects/allocation.go | 6 ++++++ pkg/scheduler/objects/queue.go | 5 +++++ pkg/scheduler/objects/queue_test.go | 10 +++++++++- 3 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pkg/scheduler/objects/allocation.go b/pkg/scheduler/objects/allocation.go index 0ef052221..2bae20048 100644 --- a/pkg/scheduler/objects/allocation.go +++ b/pkg/scheduler/objects/allocation.go @@ -354,6 +354,12 @@ func (a *Allocation) MarkPreempted() { a.preempted = true } +func (a *Allocation) UnmarkPreempted() { + a.Lock() + defer a.Unlock() + a.preempted = false +} + // IsPreempted returns whether the allocation has been marked for preemption or not. func (a *Allocation) IsPreempted() bool { a.RLock() diff --git a/pkg/scheduler/objects/queue.go b/pkg/scheduler/objects/queue.go index 507ee3d95..6394bc04e 100644 --- a/pkg/scheduler/objects/queue.go +++ b/pkg/scheduler/objects/queue.go @@ -1885,6 +1885,11 @@ func (sq *Queue) findEligiblePreemptionVictims(results map[string]*QueuePreempti continue } + // skip allocs which have already been preempted + if alloc.IsPreempted() { + continue + } + // if we have encountered a fence then all tasks are eligible for preemption // otherwise the task is a candidate if its priority is less than or equal to the ask priority if fenced || int64(alloc.GetPriority()) <= askPriority { diff --git a/pkg/scheduler/objects/queue_test.go b/pkg/scheduler/objects/queue_test.go index 3ad286b9f..82cf8e795 100644 --- a/pkg/scheduler/objects/queue_test.go +++ b/pkg/scheduler/objects/queue_test.go @@ -1886,7 +1886,8 @@ func TestFindEligiblePreemptionVictims(t *testing.T) { // verify no victims when no allocations exist snapshot := leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask) - assert.Equal(t, 5, len(snapshot), "wrong snapshot count") // leaf1, parent1, root + + assert.Equal(t, 5, len(snapshot), "wrong snapshot count") // root, root.parent1, root.parent1.leaf1, root.parent2, root.parent2.leaf2 assert.Equal(t, 0, len(victims(snapshot)), "found victims") // add two lower-priority allocs in leaf2 @@ -1974,6 +1975,13 @@ func TestFindEligiblePreemptionVictims(t *testing.T) { assert.Equal(t, alloc3.allocationKey, victims(snapshot)[0].allocationKey, "wrong alloc") alloc2.released = false + //alloc2 has already been preempted and should not be considered a valid victim + alloc2.MarkPreempted() + snapshot = leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask) + assert.Equal(t, 1, len(victims(snapshot)), "wrong victim count") + assert.Equal(t, alloc3.allocationKey, victims(snapshot)[0].allocationKey, "wrong alloc") + alloc2.UnmarkPreempted() + // setting priority offset on parent2 queue should remove leaf2 victims parent2.priorityOffset = 1001 snapshot = leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask) From d959c389e596378049e6f64a3638231dec4af394 Mon Sep 17 00:00:00 2001 From: Paul Santa Clara Date: Thu, 19 Dec 2024 14:50:08 -0500 Subject: [PATCH 2/2] removing UnmarkPreempted test utility function --- pkg/scheduler/objects/allocation.go | 6 ------ pkg/scheduler/objects/queue_test.go | 7 +++++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pkg/scheduler/objects/allocation.go b/pkg/scheduler/objects/allocation.go index 2bae20048..0ef052221 100644 --- a/pkg/scheduler/objects/allocation.go +++ b/pkg/scheduler/objects/allocation.go @@ -354,12 +354,6 @@ func (a *Allocation) MarkPreempted() { a.preempted = true } -func (a *Allocation) UnmarkPreempted() { - a.Lock() - defer a.Unlock() - a.preempted = false -} - // IsPreempted returns whether the allocation has been marked for preemption or not. func (a *Allocation) IsPreempted() bool { a.RLock() diff --git a/pkg/scheduler/objects/queue_test.go b/pkg/scheduler/objects/queue_test.go index 82cf8e795..f2f242962 100644 --- a/pkg/scheduler/objects/queue_test.go +++ b/pkg/scheduler/objects/queue_test.go @@ -1975,12 +1975,15 @@ func TestFindEligiblePreemptionVictims(t *testing.T) { assert.Equal(t, alloc3.allocationKey, victims(snapshot)[0].allocationKey, "wrong alloc") alloc2.released = false - //alloc2 has already been preempted and should not be considered a valid victim + // alloc2 has already been preempted and should not be considered a valid victim alloc2.MarkPreempted() snapshot = leaf1.FindEligiblePreemptionVictims(leaf1.QueuePath, ask) assert.Equal(t, 1, len(victims(snapshot)), "wrong victim count") assert.Equal(t, alloc3.allocationKey, victims(snapshot)[0].allocationKey, "wrong alloc") - alloc2.UnmarkPreempted() + // recreate alloc2 to restore non-prempted state + app2.RemoveAllocation(alloc2.GetAllocationKey(), si.TerminationType_STOPPED_BY_RM) + alloc2 = createAllocation("ask2", appID2, nodeID1, true, true, -1000, res) + app2.AddAllocation(alloc2) // setting priority offset on parent2 queue should remove leaf2 victims parent2.priorityOffset = 1001