diff --git a/nomad/blocked_evals.go b/nomad/blocked_evals.go index 8f5a6f5c4c7..06e7410ac6f 100644 --- a/nomad/blocked_evals.go +++ b/nomad/blocked_evals.go @@ -147,7 +147,9 @@ func (b *BlockedEvals) Block(eval *structs.Evaluation) { } // Check if the eval missed an unblock while it was in the scheduler at an - // older index. + // older index. The scheduler could have been invoked with a snapshot of + // state that was prior to additional capacity being added or allocations + // becoming terminal. if b.missedUnblock(eval) { // Just re-enqueue the eval immediately b.evalBroker.Enqueue(eval) @@ -258,11 +260,6 @@ func (b *BlockedEvals) unblock(computedClass string, index uint64) { return } - // Store the index in which the unblock happened. We use this on subsequent - // block calls in case the evaluation was in the scheduler when a trigger - // occured. - b.unblockIndexes[computedClass] = index - // Every eval that has escaped computed node class has to be unblocked // because any node could potentially be feasible. var unblocked []*structs.Evaluation diff --git a/nomad/leader.go b/nomad/leader.go index 58e94c588c8..f1e10b71d40 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -11,6 +11,13 @@ import ( "github.com/hashicorp/serf/serf" ) +const ( + // failedEvalUnblockInterval is the interval at which failed evaluations are + // unblocked to re-enter the scheduler. A failed evaluation occurs under + // high contention when the schedulers plan does not make progress. + failedEvalUnblockInterval = 1 * time.Minute +) + // monitorLeadership is used to monitor if we acquire or lose our role // as the leader in the Raft cluster. There is some work the leader is // expected to do, so we must react to changes @@ -346,11 +353,11 @@ func (s *Server) reapDupBlockedEvaluations(stopCh chan struct{}) { // periodicUnblockFailedEvals periodically unblocks failed, blocked evaluations. func (s *Server) periodicUnblockFailedEvals(stopCh chan struct{}) { - ticker := time.NewTimer(1 * time.Minute) + ticker := time.NewTimer(failedEvalUnblockInterval) + defer ticker.Stop() for { select { case <-stopCh: - ticker.Stop() return case <-ticker.C: // Unblock the failed allocations diff --git a/scheduler/generic_sched_test.go b/scheduler/generic_sched_test.go index d44d76d3865..4b9f9f4024c 100644 --- a/scheduler/generic_sched_test.go +++ b/scheduler/generic_sched_test.go @@ -543,7 +543,7 @@ func TestServiceSched_EvaluateBlockedEval_Finished(t *testing.T) { // Ensure the eval has no spawned blocked eval if len(h.Evals) != 1 { t.Fatalf("bad: %#v", h.Evals) - if h.Evals[0].SpawnedBlockedEval != "" { + if h.Evals[0].BlockedEval != "" { t.Fatalf("bad: %#v", h.Evals[0]) } }