Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix!: Prevent eviction hanging due to do-not-evict #220

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions pkg/controllers/deprovisioning/drift.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"errors"
"fmt"

"github.com/samber/lo"
v1 "k8s.io/api/core/v1"
"knative.dev/pkg/logging"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -61,13 +62,12 @@ func (d *Drift) ComputeCommand(ctx context.Context, candidates ...CandidateNode)
if err != nil {
return Command{}, fmt.Errorf("tracking PodDisruptionBudgets, %w", err)
}
for _, candidate := range candidates {
// is this a node that we can terminate? This check is meant to be fast so we can save the expense of simulated
// scheduling unless its really needed
if _, canTerminate := canBeTerminated(candidate, pdbs); !canTerminate {
continue
}
candidates = lo.Filter(candidates, func(n CandidateNode, _ int) bool {
_, canTerminate := canBeTerminated(n, pdbs)
return canTerminate
})

for _, candidate := range candidates {
// Check if we need to create any nodes.
newNodes, allPodsScheduled, err := simulateScheduling(ctx, d.kubeClient, d.cluster, d.provisioner, candidate)
if err != nil {
Expand Down
9 changes: 6 additions & 3 deletions pkg/controllers/deprovisioning/emptiness.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,8 @@ import (
"context"
"time"

"k8s.io/utils/clock"

v1 "k8s.io/api/core/v1"
"k8s.io/utils/clock"
"knative.dev/pkg/logging"
"knative.dev/pkg/ptr"

Expand Down Expand Up @@ -66,7 +65,11 @@ func (e *Emptiness) ShouldDeprovision(ctx context.Context, n *state.Node, provis

// ComputeCommand generates a deprovisioning command given deprovisionable nodes
func (e *Emptiness) ComputeCommand(_ context.Context, nodes ...CandidateNode) (Command, error) {
emptyNodes := lo.Filter(nodes, func(n CandidateNode, _ int) bool { return len(n.pods) == 0 })
emptyNodes := lo.Filter(nodes, func(n CandidateNode, _ int) bool {
_, canTerminate := canBeTerminated(n, nil)
return len(n.pods) == 0 && canTerminate
})

if len(emptyNodes) == 0 {
return Command{action: actionDoNothing}, nil
}
Expand Down
14 changes: 7 additions & 7 deletions pkg/controllers/deprovisioning/expiration.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"sort"
"time"

"github.com/samber/lo"
"k8s.io/utils/clock"

v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -72,13 +73,12 @@ func (e *Expiration) ComputeCommand(ctx context.Context, candidates ...Candidate
if err != nil {
return Command{}, fmt.Errorf("tracking PodDisruptionBudgets, %w", err)
}
for _, candidate := range candidates {
// is this a node that we can terminate? This check is meant to be fast so we can save the expense of simulated
// scheduling unless its really needed
if _, canBeTerminated := canBeTerminated(candidate, pdbs); !canBeTerminated {
continue
}
candidates = lo.Filter(candidates, func(n CandidateNode, _ int) bool {
_, canTerminate := canBeTerminated(n, pdbs)
return canTerminate
})

for _, candidate := range candidates {
// Check if we need to create any nodes.
newNodes, allPodsScheduled, err := simulateScheduling(ctx, e.kubeClient, e.cluster, e.provisioner, candidate)
if err != nil {
Expand All @@ -93,7 +93,7 @@ func (e *Expiration) ComputeCommand(ctx context.Context, candidates ...Candidate
logging.FromContext(ctx).With("node", candidate.Name).Debugf("continuing to expire node after scheduling simulation failed to schedule all pods")
}

logging.FromContext(ctx).With("expirationTTL", time.Duration(ptr.Int64Value(candidates[0].provisioner.Spec.TTLSecondsUntilExpired))*time.Second).
logging.FromContext(ctx).With("ttl", time.Duration(ptr.Int64Value(candidates[0].provisioner.Spec.TTLSecondsUntilExpired))*time.Second).
With("delay", time.Since(getExpirationTime(candidates[0].Node, candidates[0].provisioner))).Infof("triggering termination for expired node after TTL")

// were we able to schedule all the pods on the inflight nodes?
Expand Down
29 changes: 10 additions & 19 deletions pkg/controllers/deprovisioning/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,27 +329,18 @@ func canBeTerminated(node CandidateNode, pdbs *PDBLimits) (string, bool) {
if !node.DeletionTimestamp.IsZero() {
return "in the process of deletion", false
}
if pdb, ok := pdbs.CanEvictPods(node.pods); !ok {
return fmt.Sprintf("pdb %s prevents pod evictions", pdb), false
}

if reason, ok := PodsPreventEviction(node.pods); ok {
return reason, false
if pdbs != nil {
if pdb, ok := pdbs.CanEvictPods(node.pods); !ok {
return fmt.Sprintf("pdb %s prevents pod evictions", pdb), false
}
}
return "", true
}

// PodsPreventEviction returns true if there are pods that would prevent eviction
func PodsPreventEviction(pods []*v1.Pod) (string, bool) {
for _, p := range pods {
// don't care about pods that are finishing, finished or owned by the node
if p, ok := lo.Find(node.pods, func(p *v1.Pod) bool {
if pod.IsTerminating(p) || pod.IsTerminal(p) || pod.IsOwnedByNode(p) {
continue
}

if pod.HasDoNotEvict(p) {
return fmt.Sprintf("pod %s/%s has do not evict annotation", p.Namespace, p.Name), true
return false
}
return pod.HasDoNotEvict(p)
}); ok {
return fmt.Sprintf("pod %s/%s has do not evict annotation", p.Namespace, p.Name), false
}
return "", false
return "", true
}
3 changes: 2 additions & 1 deletion pkg/controllers/deprovisioning/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -932,7 +932,7 @@ var _ = Describe("Delete Node", func() {
ExpectNotFound(ctx, env.Client, node1)
})
It("can delete nodes, considers do-not-evict", func() {
// create our RS so we can link a pod to it
// create our RS, so we can link a pod to it
rs := test.ReplicaSet()
ExpectApplied(ctx, env.Client, rs)
Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed())
Expand Down Expand Up @@ -1000,6 +1000,7 @@ var _ = Describe("Delete Node", func() {
ExpectReconcileSucceeded(ctx, nodeStateController, client.ObjectKeyFromObject(node2))

fakeClock.Step(10 * time.Minute)

var wg sync.WaitGroup
ExpectTriggerVerifyAction(&wg)
_, err := deprovisioningController.Reconcile(ctx, reconcile.Request{})
Expand Down
9 changes: 0 additions & 9 deletions pkg/controllers/inflightchecks/termination.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ func (t *Termination) Check(ctx context.Context, node *v1.Node, provisioner *v1a
if node.DeletionTimestamp.IsZero() {
return nil, nil
}

pods, err := nodeutils.GetNodePods(ctx, t.kubeClient, node)
if err != nil {
return nil, err
Expand All @@ -54,13 +53,5 @@ func (t *Termination) Check(ctx context.Context, node *v1.Node, provisioner *v1a
message: fmt.Sprintf("Can't drain node, PDB %s is blocking evictions", pdb),
})
}

if reason, ok := deprovisioning.PodsPreventEviction(pods); ok {
issues = append(issues, Issue{
node: node,
message: fmt.Sprintf("Can't drain node, %s", reason),
})
}

return issues, nil
}
3 changes: 0 additions & 3 deletions pkg/controllers/machine/terminator/terminator.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,6 @@ func (t *Terminator) Drain(ctx context.Context, node *v1.Node) error {
var podsToEvict []*v1.Pod
// Skip node due to pods that are not able to be evicted
for _, p := range pods {
if podutil.HasDoNotEvict(p) {
return NewNodeDrainError(fmt.Errorf("pod %s/%s has do-not-evict annotation", p.Namespace, p.Name))
}
// Ignore if unschedulable is tolerated, since they will reschedule
if podutil.ToleratesUnschedulableTaint(p) {
continue
Expand Down
Loading