Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Fix an issue where the instanceTypes aren't sorted before truncating it for spot-to-spot consolidation. #1012

Merged
merged 5 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pkg/controllers/disruption/consolidation.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ func (c *consolidation) computeSpotToSpotConsolidation(ctx context.Context, cand
// 3) Assuming CreateInstanceFromTypes(A,B,C,D) returned D, we check if D is part of (A,B,C) and it isn't, so will have another consolidation send a CreateInstanceFromTypes(A,B,C), since they’re cheaper than D resulting in continual consolidation.
// If we had restricted instance types to min flexibility at launch at step (1) i.e CreateInstanceFromTypes(A,B,C), we would have received the instance type part of the list preventing immediate consolidation.
// Taking this to 15 types, we need to only send the 15 cheapest types in the CreateInstanceFromTypes call so that the resulting instance is always in that set of 15 and we won’t immediately consolidate.
results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions = lo.Slice(results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions, 0, MinInstanceTypesForSpotToSpotConsolidation)
results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions = lo.Slice(results.NewNodeClaims[0].NodeClaimTemplate.InstanceTypeOptions.OrderByPrice(results.NewNodeClaims[0].Requirements), 0, MinInstanceTypesForSpotToSpotConsolidation)

return Command{
candidates: candidates,
Expand Down
116 changes: 116 additions & 0 deletions pkg/controllers/disruption/consolidation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1367,6 +1367,122 @@ var _ = Describe("Consolidation", func() {
ExpectExists(ctx, env.Client, spotNodeClaim)
ExpectExists(ctx, env.Client, spotNode)
})
It("spot to spot consolidation should order the instance types by price before enforcing minimum flexibility.", func() {
// Fetch 18 spot instances
spotInstances = lo.Slice(lo.Filter(cloudProvider.InstanceTypes, func(i *cloudprovider.InstanceType, _ int) bool {
for _, o := range i.Offerings {
if o.CapacityType == v1beta1.CapacityTypeSpot {
return true
}
}
return false
}), 0, 18)
// Assign the prices for 18 spot instance in ascending order incrementally
for i, inst := range spotInstances {
inst.Offerings[0].Price = 1.00 + float64(i)*0.1
}
// Force an instancetype that is outside the bound of 15 instances to have the cheapest price among the lot.
spotInstances[16].Offerings[0].Price = 0.001

// We now have these spot instance in the list as lowest priced and highest priced instanceTypes
cheapestSpotInstanceType := spotInstances[16]
mostExpensiveInstanceType := spotInstances[17]

// Add these spot instance with this special condition to cloud provider instancetypes
cloudProvider.InstanceTypes = spotInstances

expectedInstanceTypesForConsolidation := make([]*cloudprovider.InstanceType, len(spotInstances))
copy(expectedInstanceTypesForConsolidation, spotInstances)
// Sort the spot instances by pricing from low to high
sort.Slice(expectedInstanceTypesForConsolidation, func(i, j int) bool {
return expectedInstanceTypesForConsolidation[i].Offerings[0].Price < expectedInstanceTypesForConsolidation[j].Offerings[0].Price
})
// These 15 cheapest instance types should eventually be considered for consolidation.
var expectedInstanceTypesNames []string
for i := 0; i < 15; i++ {
expectedInstanceTypesNames = append(expectedInstanceTypesNames, expectedInstanceTypesForConsolidation[i].Name)
}

// Assign the most expensive spot instancetype so that it will definitely be replaced through consolidation
spotNodeClaim.Labels = lo.Assign(spotNodeClaim.Labels, map[string]string{
v1beta1.NodePoolLabelKey: nodePool.Name,
v1.LabelInstanceTypeStable: mostExpensiveInstanceType.Name,
v1beta1.CapacityTypeLabelKey: mostExpensiveInstanceType.Offerings[0].CapacityType,
v1.LabelTopologyZone: mostExpensiveInstanceType.Offerings[0].Zone,
})

spotNode.Labels = lo.Assign(spotNode.Labels, map[string]string{
v1beta1.NodePoolLabelKey: nodePool.Name,
v1.LabelInstanceTypeStable: mostExpensiveInstanceType.Name,
v1beta1.CapacityTypeLabelKey: mostExpensiveInstanceType.Offerings[0].CapacityType,
v1.LabelTopologyZone: mostExpensiveInstanceType.Offerings[0].Zone,
})

rs := test.ReplicaSet()
ExpectApplied(ctx, env.Client, rs)
Expect(env.Client.Get(ctx, client.ObjectKeyFromObject(rs), rs)).To(Succeed())

pod := test.Pod(test.PodOptions{
ObjectMeta: metav1.ObjectMeta{Labels: labels,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: "apps/v1",
Kind: "ReplicaSet",
Name: rs.Name,
UID: rs.UID,
Controller: ptr.Bool(true),
BlockOwnerDeletion: ptr.Bool(true),
},
}}})
ExpectApplied(ctx, env.Client, rs, pod, spotNode, spotNodeClaim, nodePool)

// bind pods to node
ExpectManualBinding(ctx, env.Client, pod, spotNode)

// inform cluster state about nodes and nodeclaims
ExpectMakeNodesAndNodeClaimsInitializedAndStateUpdated(ctx, env.Client, nodeStateController, nodeClaimStateController, []*v1.Node{spotNode}, []*v1beta1.NodeClaim{spotNodeClaim})

fakeClock.Step(10 * time.Minute)

// consolidation won't delete the old nodeclaim until the new nodeclaim is ready
var wg sync.WaitGroup
ExpectTriggerVerifyAction(&wg)
ExpectMakeNewNodeClaimsReady(ctx, env.Client, &wg, cluster, cloudProvider, 1)
ExpectReconcileSucceeded(ctx, disruptionController, client.ObjectKey{})
wg.Wait()

// Process the item so that the nodes can be deleted.
ExpectReconcileSucceeded(ctx, queue, types.NamespacedName{})

// Cascade any deletion of the nodeclaim to the node
ExpectNodeClaimsCascadeDeletion(ctx, env.Client, spotNodeClaim)

// should create a new nodeclaim as there is a cheaper one that can hold the pod
nodeClaims := ExpectNodeClaims(ctx, env.Client)
nodes := ExpectNodes(ctx, env.Client)
Expect(nodeClaims).To(HaveLen(1))
Expect(nodes).To(HaveLen(1))

// Expect that the new nodeclaim does not request the most expensive instance type
Expect(nodeClaims[0].Name).ToNot(Equal(spotNodeClaim.Name))
Expect(scheduling.NewNodeSelectorRequirements(nodeClaims[0].Spec.Requirements...).Has(v1.LabelInstanceTypeStable)).To(BeTrue())
Expect(scheduling.NewNodeSelectorRequirements(nodeClaims[0].Spec.Requirements...).Get(v1.LabelInstanceTypeStable).Has(mostExpensiveInstanceType.Name)).To(BeFalse())

// Make sure that the cheapest instance that was outside the bound of 15 instance types is considered for consolidation.
Expect(scheduling.NewNodeSelectorRequirements(nodeClaims[0].Spec.Requirements...).Get(v1.LabelInstanceTypeStable).Has(cheapestSpotInstanceType.Name)).To(BeTrue())
jonathan-innis marked this conversation as resolved.
Show resolved Hide resolved
spotInstancesConsideredForConsolidation := scheduling.NewNodeSelectorRequirements(nodeClaims[0].Spec.Requirements...).Get(v1.LabelInstanceTypeStable).Values()

// Make sure that we send only 15 instance types.
Expect(len(spotInstancesConsideredForConsolidation)).To(Equal(15))

// Make sure we considered the first 15 cheapest instance types.
for i := 0; i < 15; i++ {
Expect(spotInstancesConsideredForConsolidation).To(ContainElement(expectedInstanceTypesNames[i]))
}

// and delete the old one
ExpectNotFound(ctx, env.Client, spotNodeClaim, spotNode)
})
DescribeTable("can replace nodes if another nodePool returns no instance types",
func(spotToSpot bool) {
nodeClaim = lo.Ternary(spotToSpot, spotNodeClaim, nodeClaim)
Expand Down
Loading