Skip to content

Commit

Permalink
E2E: fix flaky event stream test (#12548)
Browse files Browse the repository at this point in the history
This changeset fixes two sources of flakiness in the event stream test.

First, the stream request gets the event *closest* to the index, not
the exact match. Although events are written before raft entries
they're written asynchronously, so it's possible to race and get a
raft index from this query higher than the current head of the event
buffer. Ensure the job is running before we try to get the index, so
that we've given the event enough time to land in the buffer.

Second, the assertion that the found index is greater than the start
index is only true if the `PlanResult` event manages to land before we
do the second registration. Although it should now with the first fix
above, it's not a correct assertion for what we're testing.
  • Loading branch information
tgross authored Apr 12, 2022
1 parent 8bde164 commit 86ca8f7
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 8 deletions.
31 changes: 24 additions & 7 deletions e2e/events/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package events
import (
"context"
"fmt"
"time"

"github.com/hashicorp/nomad/api"
"github.com/hashicorp/nomad/e2e/e2eutil"
Expand Down Expand Up @@ -188,18 +189,34 @@ func (tc *EventsTest) TestStartIndex(f *framework.F) {
nomadClient := tc.Nomad()
events := nomadClient.EventStream()

uuid := uuid.Generate()
jobID := fmt.Sprintf("deployment-%s", uuid[0:8])
jobID2 := fmt.Sprintf("deployment2-%s", uuid[0:8])
tc.jobIDs = append(tc.jobIDs, jobID, jobID2)
uuid := uuid.Short()
noopID := fmt.Sprintf("noop-%s", uuid)
jobID := fmt.Sprintf("deployment-%s", uuid)
jobID2 := fmt.Sprintf("deployment2-%s", uuid)
tc.jobIDs = append(tc.jobIDs, noopID, jobID, jobID2)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

// register job
err := e2eutil.Register(jobID, "events/input/initial.nomad")
require.NoError(t, err)
job, _, err := nomadClient.Jobs().Info(jobID, nil)
require.NoError(t, err)

// The stream request gets the event *closest* to the index, not
// the exact match. Although events are written before raft
// entries they're written asynchronously, so it's possible to
// race and get a raft index from this query higher than the
// current head of the event buffer. Ensure the job is running
// before we try to get the index, so that we've given the event
// enough time to land in the buffer.
var job *api.Job
f.Eventually(func() bool {
job, _, err = nomadClient.Jobs().Info(jobID, nil)
if err != nil {
return false
}
return *job.Status == "running"
}, 20*time.Second, 200*time.Millisecond, "job should be running")

startIndex := *job.JobModifyIndex + 1

topics := map[api.Topic][]string{
Expand Down Expand Up @@ -239,7 +256,7 @@ func (tc *EventsTest) TestStartIndex(f *framework.F) {
testutil.WaitForResult(func() (bool, error) {
for _, e := range jobEvents {
if e.Type == "JobRegistered" {
if e.Index <= startIndex {
if e.Index < startIndex {
foundUnexpected = true
}
if e.Index >= startIndex {
Expand Down
2 changes: 1 addition & 1 deletion e2e/events/input/initial.nomad
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
job "deployment_auto.nomad" {
datacenters = ["dc1"]
datacenters = ["dc1", "dc2"]

constraint {
attribute = "${attr.kernel.name}"
Expand Down

0 comments on commit 86ca8f7

Please sign in to comment.