-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Heartbeat] Adjust State loader to only retry for failed requests and not for 4xx #37981
Changes from all commits
bf4ee72
568be03
aa2c02b
7c074d2
dd757c0
b3e419d
ff6f4cd
c407305
639cb20
d305b86
0b8a902
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,7 +62,7 @@ func (t *Tracker) RecordStatus(sf stdfields.StdMonitorFields, newStatus StateSta | |
t.mtx.Lock() | ||
defer t.mtx.Unlock() | ||
|
||
state := t.GetCurrentState(sf) | ||
state := t.GetCurrentState(sf, RetryConfig{}) | ||
if state == nil { | ||
state = newMonitorState(sf, newStatus, 0, t.flappingEnabled) | ||
logp.L().Infof("initializing new state for monitor %s: %s", sf.ID, state.String()) | ||
|
@@ -75,36 +75,56 @@ func (t *Tracker) RecordStatus(sf stdfields.StdMonitorFields, newStatus StateSta | |
} | ||
|
||
func (t *Tracker) GetCurrentStatus(sf stdfields.StdMonitorFields) StateStatus { | ||
s := t.GetCurrentState(sf) | ||
s := t.GetCurrentState(sf, RetryConfig{}) | ||
if s == nil { | ||
return StatusEmpty | ||
} | ||
return s.Status | ||
} | ||
|
||
func (t *Tracker) GetCurrentState(sf stdfields.StdMonitorFields) (state *State) { | ||
type RetryConfig struct { | ||
attempts int | ||
waitFn func() time.Duration | ||
} | ||
|
||
func (t *Tracker) GetCurrentState(sf stdfields.StdMonitorFields, rc RetryConfig) (state *State) { | ||
if state, ok := t.states[sf.ID]; ok { | ||
return state | ||
} | ||
|
||
tries := 3 | ||
// Default number of attempts | ||
attempts := 3 | ||
if rc.attempts != 0 { | ||
attempts = rc.attempts | ||
} | ||
|
||
var loadedState *State | ||
var err error | ||
for i := 0; i < tries; i++ { | ||
var i int | ||
for i = 0; i < attempts; i++ { | ||
loadedState, err = t.stateLoader(sf) | ||
if err == nil { | ||
if loadedState != nil { | ||
logp.L().Infof("loaded previous state for monitor %s: %s", sf.ID, loadedState.String()) | ||
} | ||
break | ||
} | ||
var loaderError LoaderError | ||
if errors.As(err, &loaderError) && !loaderError.Retry { | ||
logp.L().Warnf("could not load last externally recorded state: %v", loaderError) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess if we want to use %w, we would need to unwrap |
||
break | ||
} | ||
|
||
// Default sleep time | ||
sleepFor := (time.Duration(i*i) * time.Second) + (time.Duration(rand.Intn(500)) * time.Millisecond) | ||
logp.L().Warnf("could not load last externally recorded state, will retry again in %d milliseconds: %w", sleepFor.Milliseconds(), err) | ||
if rc.waitFn != nil { | ||
sleepFor = rc.waitFn() | ||
} | ||
logp.L().Warnf("could not load last externally recorded state, will retry again in %d milliseconds: %v", sleepFor.Milliseconds(), err) | ||
time.Sleep(sleepFor) | ||
} | ||
if err != nil { | ||
logp.L().Warnf("could not load prior state from elasticsearch after %d attempts, will create new state for monitor: %s", tries, sf.ID) | ||
logp.L().Warnf("could not load prior state from elasticsearch after %d attempts, will create new state for monitor: %s", i+1, sf.ID) | ||
} | ||
|
||
if loadedState != nil { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
package monitorstate | ||
|
||
import ( | ||
"errors" | ||
"testing" | ||
"time" | ||
|
||
|
@@ -131,3 +132,51 @@ func TestDeferredStateLoader(t *testing.T) { | |
resState, _ = dsl(stdfields.StdMonitorFields{}) | ||
require.Equal(t, stateA, resState) | ||
} | ||
|
||
func TestStateLoaderRetry(t *testing.T) { | ||
// While testing the sleep time between retries should be negligible | ||
waitFn := func() time.Duration { | ||
return time.Microsecond | ||
} | ||
|
||
tests := []struct { | ||
name string | ||
retryable bool | ||
rc RetryConfig | ||
expectedCalls int | ||
}{ | ||
{ | ||
"should retry 3 times when fails with retryable error", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this test lasts 5 seconds given that There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we should make it configurable. |
||
true, | ||
RetryConfig{waitFn: waitFn}, | ||
3, | ||
}, | ||
{ | ||
"should not retry when fails with non-retryable error", | ||
false, | ||
RetryConfig{waitFn: waitFn}, | ||
1, | ||
}, | ||
{ | ||
"should honour the configured number of attempts when fails with retryable error", | ||
true, | ||
RetryConfig{attempts: 5, waitFn: waitFn}, | ||
5, | ||
}, | ||
} | ||
|
||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
calls := 0 | ||
errorStateLoader := func(_ stdfields.StdMonitorFields) (*State, error) { | ||
calls += 1 | ||
return nil, LoaderError{err: errors.New("test error"), Retry: tt.retryable} | ||
} | ||
|
||
mst := NewTracker(errorStateLoader, true) | ||
mst.GetCurrentState(stdfields.StdMonitorFields{}, tt.rc) | ||
|
||
require.Equal(t, calls, tt.expectedCalls) | ||
}) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
change made to make the HTTP API obj "fakeable" as you can see in esloader_test.go