Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Restructure PodsReadyTimeout integration tests with short timeout. #2329

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions test/integration/controller/jobs/job/job_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ package job
import (
"fmt"
"maps"
"time"

"github.com/google/go-cmp/cmp/cmpopts"
"github.com/onsi/ginkgo/v2"
Expand Down Expand Up @@ -1907,7 +1906,7 @@ var _ = ginkgo.Describe("Job controller interacting with Workload controller whe
waitForPodsReady := &configapi.WaitForPodsReady{
Enable: true,
BlockAdmission: ptr.To(true),
Timeout: &metav1.Duration{Duration: 10 * time.Millisecond},
Timeout: &metav1.Duration{Duration: util.TinyTimeout},
RequeuingStrategy: &configapi.RequeuingStrategy{
Timestamp: ptr.To(configapi.EvictionTimestamp),
BackoffBaseSeconds: ptr.To[int32](backoffBaseSeconds),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1648,7 +1648,7 @@ var _ = ginkgo.Describe("Pod controller interacting with Workload controller whe
cfg = fwk.Init()
waitForPodsReady := &configapi.WaitForPodsReady{
Enable: true,
Timeout: &metav1.Duration{Duration: 10 * time.Millisecond},
Timeout: &metav1.Duration{Duration: util.TinyTimeout},
RequeuingStrategy: &configapi.RequeuingStrategy{
Timestamp: ptr.To(configapi.EvictionTimestamp),
BackoffLimitCount: ptr.To[int32](1),
Expand Down
173 changes: 103 additions & 70 deletions test/integration/scheduler/podsready/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ limitations under the License.
package podsready

import (
"context"
"path/filepath"
"time"

Expand All @@ -30,7 +29,6 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/manager"

config "sigs.k8s.io/kueue/apis/config/v1beta1"
kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
Expand All @@ -47,7 +45,7 @@ var (
)

const (
defaultPodsReadyTimeout = 3 * time.Second
defaultPodsReadyTimeout = util.TinyTimeout
defaultRequeuingTimestamp = config.EvictionTimestamp
)

Expand All @@ -56,9 +54,9 @@ const (
var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() {
var (
// Values changed by tests (and reset after each):
podsReadyTimeout = defaultPodsReadyTimeout
requeuingTimestamp = defaultRequeuingTimestamp
requeuingBackoffLimitCount = defaultRequeuingBackoffLimitCount
podsReadyTimeout = defaultPodsReadyTimeout
requeuingTimestamp = defaultRequeuingTimestamp
requeueingBackoffLimitCount = defaultRequeuingBackoffLimitCount
)

var (
Expand All @@ -77,9 +75,19 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() {
WebhookPath: filepath.Join("..", "..", "..", "..", "config", "components", "webhook"),
}
cfg = fwk.Init()
ctx, k8sClient = fwk.RunManager(cfg, func(mgr manager.Manager, ctx context.Context) {
managerAndSchedulerSetupWithTimeoutAdmission(mgr, ctx, podsReadyTimeout, true, requeuingTimestamp, requeuingBackoffLimitCount)
})
configuration := &config.Configuration{
WaitForPodsReady: &config.WaitForPodsReady{
Enable: true,
BlockAdmission: ptr.To(true),
Timeout: &metav1.Duration{Duration: podsReadyTimeout},
RequeuingStrategy: &config.RequeuingStrategy{
Timestamp: ptr.To(requeuingTimestamp),
BackoffLimitCount: requeueingBackoffLimitCount,
BackoffBaseSeconds: ptr.To[int32](1),
},
},
}
ctx, k8sClient = fwk.RunManager(cfg, managerAndSchedulerSetup(configuration))

defaultFlavor = testing.MakeResourceFlavor("default").Obj()
gomega.Expect(k8sClient.Create(ctx, defaultFlavor)).To(gomega.Succeed())
Expand Down Expand Up @@ -119,12 +127,12 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() {
// Reset values that are changed by tests.
podsReadyTimeout = defaultPodsReadyTimeout
requeuingTimestamp = defaultRequeuingTimestamp
requeuingBackoffLimitCount = defaultRequeuingBackoffLimitCount
requeueingBackoffLimitCount = defaultRequeuingBackoffLimitCount
})

ginkgo.Context("Long PodsReady timeout", func() {
ginkgo.BeforeEach(func() {
podsReadyTimeout = time.Minute
podsReadyTimeout = util.LongTimeout
})

ginkgo.It("Should unblock admission of new workloads in other ClusterQueues once the admitted workload exceeds timeout", func() {
Expand Down Expand Up @@ -193,8 +201,8 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() {

var _ = ginkgo.Context("Short PodsReady timeout", func() {
ginkgo.BeforeEach(func() {
podsReadyTimeout = 1 * time.Second
requeuingBackoffLimitCount = ptr.To[int32](2)
podsReadyTimeout = util.ShortTimeout
requeueingBackoffLimitCount = ptr.To[int32](2)
})

ginkgo.It("Should requeue a workload which exceeded the timeout to reach PodsReady=True", func() {
Expand Down Expand Up @@ -309,6 +317,13 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() {
Count: ptr.To[int32](1),
}, false)
})
})

var _ = ginkgo.Context("Tiny PodsReady timeout", func() {
ginkgo.BeforeEach(func() {
podsReadyTimeout = util.TinyTimeout
requeueingBackoffLimitCount = ptr.To[int32](2)
})

ginkgo.It("Should unblock admission of new workloads in other ClusterQueues once the admitted workload exceeds timeout", func() {
ginkgo.By("create the 'prod' workload")
Expand Down Expand Up @@ -397,66 +412,70 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() {
ginkgo.By("delete the waiting 'prod' workload so that it does not get admitted during teardown")
gomega.Expect(k8sClient.Delete(ctx, prodWl)).Should(gomega.Succeed())
})
})

ginkgo.It("Should move the evicted workload at the end of the queue", func() {
localQueueName := "eviction-lq"
ginkgo.It("Should move the evicted workload at the end of the queue", func() {
// We wait 1 second between each workload creation calls. Therefore, we need to add this time to timeout.
podsReadyTimeout = util.TinyTimeout + 2*time.Second
requeueingBackoffLimitCount = ptr.To[int32](2)

// the workloads are created with a 5 cpu resource requirement to ensure only one can fit at a given time,
// letting them all to time out, we should see a circular buffer admission pattern
wl1 := testing.MakeWorkload("prod1", ns.Name).Queue(localQueueName).Request(corev1.ResourceCPU, "5").Obj()
wl2 := testing.MakeWorkload("prod2", ns.Name).Queue(localQueueName).Request(corev1.ResourceCPU, "5").Obj()
wl3 := testing.MakeWorkload("prod3", ns.Name).Queue(localQueueName).Request(corev1.ResourceCPU, "5").Obj()
localQueueName := "eviction-lq"

ginkgo.By("create the workloads", func() {
// since metav1.Time has only second resolution, wait one second between
// create calls to avoid any potential creation timestamp collision
gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed())
time.Sleep(time.Second)
gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed())
time.Sleep(time.Second)
gomega.Expect(k8sClient.Create(ctx, wl3)).Should(gomega.Succeed())
})
// the workloads are created with a 5 cpu resource requirement to ensure only one can fit at a given time,
// letting them all to time out, we should see a circular buffer admission pattern
wl1 := testing.MakeWorkload("prod1", ns.Name).Queue(localQueueName).Request(corev1.ResourceCPU, "5").Obj()
wl2 := testing.MakeWorkload("prod2", ns.Name).Queue(localQueueName).Request(corev1.ResourceCPU, "5").Obj()
wl3 := testing.MakeWorkload("prod3", ns.Name).Queue(localQueueName).Request(corev1.ResourceCPU, "5").Obj()

ginkgo.By("create the local queue to start admission", func() {
lq := testing.MakeLocalQueue(localQueueName, ns.Name).ClusterQueue(prodClusterQ.Name).Obj()
gomega.Expect(k8sClient.Create(ctx, lq)).Should(gomega.Succeed())
})
ginkgo.By("create the workloads", func() {
// since metav1.Time has only second resolution, wait one second between
// create calls to avoid any potential creation timestamp collision
gomega.Expect(k8sClient.Create(ctx, wl1)).Should(gomega.Succeed())
time.Sleep(time.Second)
gomega.Expect(k8sClient.Create(ctx, wl2)).Should(gomega.Succeed())
time.Sleep(time.Second)
gomega.Expect(k8sClient.Create(ctx, wl3)).Should(gomega.Succeed())
})

ginkgo.By("waiting for the first workload to be admitted", func() {
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wl1)
})
ginkgo.By("create the local queue to start admission", func() {
lq := testing.MakeLocalQueue(localQueueName, ns.Name).ClusterQueue(prodClusterQ.Name).Obj()
gomega.Expect(k8sClient.Create(ctx, lq)).Should(gomega.Succeed())
})

ginkgo.By("waiting the timeout, the first workload should be evicted and the second one should be admitted", func() {
time.Sleep(podsReadyTimeout)
util.FinishEvictionForWorkloads(ctx, k8sClient, wl1)
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wl2)
})
ginkgo.By("waiting for the first workload to be admitted", func() {
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wl1)
})

ginkgo.By("finishing the second workload, the third one should be admitted", func() {
time.Sleep(podsReadyTimeout)
util.FinishWorkloads(ctx, k8sClient, wl2)
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wl3)
})
ginkgo.By("waiting the timeout, the first workload should be evicted and the second one should be admitted", func() {
time.Sleep(podsReadyTimeout)
util.FinishEvictionForWorkloads(ctx, k8sClient, wl1)
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wl2)
})

ginkgo.By("finishing the third workload, the first one should be admitted", func() {
time.Sleep(podsReadyTimeout)
util.FinishWorkloads(ctx, k8sClient, wl3)
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wl1)
})
ginkgo.By("finishing the second workload, the third one should be admitted", func() {
time.Sleep(podsReadyTimeout)
util.FinishWorkloads(ctx, k8sClient, wl2)
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wl3)
})

ginkgo.By("verifying if all workloads have a proper re-queue count", func() {
// Here, we focus on verifying if the requeuingTimestamp works well.
// So, we don't check if the .status.requeueState.requeueAt is reset.
util.ExpectWorkloadToHaveRequeueState(ctx, k8sClient, client.ObjectKeyFromObject(wl1), &kueue.RequeueState{
Count: ptr.To[int32](2),
}, true)
util.ExpectWorkloadToHaveRequeueState(ctx, k8sClient, client.ObjectKeyFromObject(wl2), &kueue.RequeueState{
Count: ptr.To[int32](1),
}, true)
util.ExpectWorkloadToHaveRequeueState(ctx, k8sClient, client.ObjectKeyFromObject(wl3), &kueue.RequeueState{
Count: ptr.To[int32](1),
}, true)
})
ginkgo.By("finishing the third workload, the first one should be admitted", func() {
time.Sleep(podsReadyTimeout)
util.FinishWorkloads(ctx, k8sClient, wl3)
util.ExpectWorkloadsToHaveQuotaReservation(ctx, k8sClient, prodClusterQ.Name, wl1)
})

ginkgo.By("verifying if all workloads have a proper re-queue count", func() {
// Here, we focus on verifying if the requeuingTimestamp works well.
// So, we don't check if the .status.requeueState.requeueAt is reset.
util.ExpectWorkloadToHaveRequeueState(ctx, k8sClient, client.ObjectKeyFromObject(wl1), &kueue.RequeueState{
Count: ptr.To[int32](2),
}, true)
util.ExpectWorkloadToHaveRequeueState(ctx, k8sClient, client.ObjectKeyFromObject(wl2), &kueue.RequeueState{
Count: ptr.To[int32](1),
}, true)
util.ExpectWorkloadToHaveRequeueState(ctx, k8sClient, client.ObjectKeyFromObject(wl3), &kueue.RequeueState{
Count: ptr.To[int32](1),
}, true)
})
})

Expand All @@ -467,6 +486,8 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReady", func() {
)

ginkgo.BeforeEach(func() {
// We wait 1 second between each workload creation calls. Therefore, we need to add this time to timeout.
podsReadyTimeout = util.ShortTimeout + 2*time.Second
requeuingTimestamp = config.CreationTimestamp
})

Expand Down Expand Up @@ -552,9 +573,19 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReadyNonblockingMode", func() {
WebhookPath: filepath.Join("..", "..", "..", "..", "config", "components", "webhook"),
}
cfg = fwk.Init()
ctx, k8sClient = fwk.RunManager(cfg, func(mgr manager.Manager, ctx context.Context) {
managerAndSchedulerSetupWithTimeoutAdmission(mgr, ctx, podsReadyTimeout, false, requeuingTimestamp, requeueingBackoffLimitCount)
})
configuration := &config.Configuration{
WaitForPodsReady: &config.WaitForPodsReady{
Enable: true,
BlockAdmission: ptr.To(false),
Timeout: &metav1.Duration{Duration: podsReadyTimeout},
RequeuingStrategy: &config.RequeuingStrategy{
Timestamp: ptr.To(requeuingTimestamp),
BackoffLimitCount: requeueingBackoffLimitCount,
BackoffBaseSeconds: ptr.To[int32](1),
},
},
}
ctx, k8sClient = fwk.RunManager(cfg, managerAndSchedulerSetup(configuration))

defaultFlavor = testing.MakeResourceFlavor("default").Obj()
gomega.Expect(k8sClient.Create(ctx, defaultFlavor)).To(gomega.Succeed())
Expand Down Expand Up @@ -599,7 +630,7 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReadyNonblockingMode", func() {

ginkgo.Context("Long PodsReady timeout", func() {
ginkgo.BeforeEach(func() {
podsReadyTimeout = time.Minute
podsReadyTimeout = util.LongTimeout
})

ginkgo.It("Should not block admission of one new workload if two are considered in the same scheduling cycle", func() {
Expand Down Expand Up @@ -628,9 +659,9 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReadyNonblockingMode", func() {
})
})

var _ = ginkgo.Context("Short PodsReady timeout", func() {
var _ = ginkgo.Context("Tiny PodsReady timeout", func() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we have another context for this already?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For NonblockingMode no.

ginkgo.BeforeEach(func() {
podsReadyTimeout = 1 * time.Second
podsReadyTimeout = util.TinyTimeout
})

ginkgo.It("Should re-admit a timed out workload", func() {
Expand Down Expand Up @@ -666,6 +697,8 @@ var _ = ginkgo.Describe("SchedulerWithWaitForPodsReadyNonblockingMode", func() {
)

ginkgo.BeforeEach(func() {
// We wait 1 second between each workload creation calls. Therefore, we need to add this time to timeout.
podsReadyTimeout = util.ShortTimeout + 2*time.Second
requeuingTimestamp = config.CreationTimestamp
})

Expand Down
Loading