Skip to content

Commit

Permalink
show the last error to the user if retry fails
Browse files Browse the repository at this point in the history
  • Loading branch information
rusq committed Apr 10, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
1 parent e66df2e commit 218547d
Showing 2 changed files with 86 additions and 54 deletions.
30 changes: 24 additions & 6 deletions internal/network/network.go
Original file line number Diff line number Diff line change
@@ -38,7 +38,22 @@ var (

// ErrRetryFailed is returned if number of retry attempts exceeded the retry attempts limit and
// function wasn't able to complete without errors.
var ErrRetryFailed = errors.New("callback was unable to complete without errors within the allowed number of retries")
type ErrRetryFailed struct {
Err error
}

func (e *ErrRetryFailed) Error() string {
return fmt.Sprintf("callback was unable to complete without errors within the allowed number of retries: %s", e.Err)
}

func (e *ErrRetryFailed) Unwrap() error {
return e.Err
}

func (e *ErrRetryFailed) Is(target error) bool {
_, ok := target.(*ErrRetryFailed)
return ok
}

// WithRetry will run the callback function fn. If the function returns
// slack.RateLimitedError, it will delay, and then call it again up to
@@ -49,6 +64,8 @@ func WithRetry(ctx context.Context, lim *rate.Limiter, maxAttempts int, fn func(
if maxAttempts == 0 {
maxAttempts = defNumAttempts
}

var lastErr error
for attempt := 0; attempt < maxAttempts; attempt++ {
// calling wait to ensure that we don't exceed the rate limit
var err error
@@ -65,6 +82,7 @@ func WithRetry(ctx context.Context, lim *rate.Limiter, maxAttempts int, fn func(
ok = true
break
}
lastErr = cbErr

tracelogf(ctx, "error", "WithRetry: %[1]s (%[1]T) after %[2]d attempts", cbErr, attempt+1)
var (
@@ -74,22 +92,22 @@ func WithRetry(ctx context.Context, lim *rate.Limiter, maxAttempts int, fn func(
)
switch {
case errors.As(cbErr, &rle):
tracelogf(ctx, "info", "got rate limited, sleeping %s", rle.RetryAfter)
tracelogf(ctx, "info", "got rate limited, sleeping %s (%s)", rle.RetryAfter, cbErr)
time.Sleep(rle.RetryAfter)
continue
case errors.As(cbErr, &sce):
if isRecoverable(sce.Code) {
// possibly transient error
delay := waitFn(attempt)
tracelogf(ctx, "info", "got server error %d, sleeping %s", sce.Code, delay)
tracelogf(ctx, "info", "got server error %d, sleeping %s (%s)", sce.Code, delay, cbErr)
time.Sleep(delay)
continue
}
case errors.As(cbErr, &ne):
if ne.Op == "read" || ne.Op == "write" {
// possibly transient error
delay := netWaitFn(attempt)
tracelogf(ctx, "info", "got network error %s, sleeping %s", ne.Op, delay)
tracelogf(ctx, "info", "got network error %s on %q, sleeping %s", cbErr, ne.Op, delay)
time.Sleep(delay)
continue
}
@@ -98,7 +116,7 @@ func WithRetry(ctx context.Context, lim *rate.Limiter, maxAttempts int, fn func(
return fmt.Errorf("callback error: %w", cbErr)
}
if !ok {
return ErrRetryFailed
return &ErrRetryFailed{Err: lastErr}
}
return nil
}
@@ -112,7 +130,7 @@ func isRecoverable(statusCode int) bool {
// where x is the current attempt number. The maximum wait time is capped at 5
// minutes.
func cubicWait(attempt int) time.Duration {
x := attempt + 2 // this is to ensure that we sleep at least 8 seconds.
x := attempt + 1 // this is to ensure that we sleep at least a second.
delay := time.Duration(x*x*x) * time.Second
if delay > maxAllowedWaitTime {
return maxAllowedWaitTime
110 changes: 62 additions & 48 deletions internal/network/network_test.go
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@ import (
"time"

"github.com/rusq/slack"
"github.com/stretchr/testify/assert"
"golang.org/x/time/rate"
)

@@ -177,36 +178,70 @@ func Test_withRetry(t *testing.T) {
}
})
}
}
t.Run("500 error handling", func(t *testing.T) {
waitFn = func(attempt int) time.Duration { return 50 * time.Millisecond }
defer func() {
waitFn = cubicWait
}()

var codes = []int{500, 502, 503, 504, 598}
for _, code := range codes {
var thisCode = code
// This test is to ensure that we handle 500 errors correctly.
t.Run(fmt.Sprintf("%d error", code), func(t *testing.T) {

const (
testRetryCount = 1
waitThreshold = 100 * time.Millisecond
)

// Create a test server that returns a 500 error.
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(thisCode)
}))
defer ts.Close()

// Create a new client with the test server as the endpoint.
client := slack.New("token", slack.OptionAPIURL(ts.URL+"/"))

start := time.Now()
// Call the client with a retry.
err := WithRetry(context.Background(), rate.NewLimiter(1, 1), testRetryCount, func() error {
_, err := client.GetConversationHistory(&slack.GetConversationHistoryParameters{})
if err == nil {
return errors.New("expected error, got nil")
}
return err
})
if err == nil {
t.Fatal("expected error, got nil")
}

func Test500ErrorHandling(t *testing.T) {
waitFn = func(attempt int) time.Duration { return 50 * time.Millisecond }
defer func() {
waitFn = cubicWait
}()
dur := time.Since(start)
if dur < waitFn(testRetryCount-1)-waitThreshold || waitFn(testRetryCount-1)+waitThreshold < dur {
t.Errorf("expected duration to be around %s, got %s", waitFn(testRetryCount), dur)
}

var codes = []int{500, 502, 503, 504, 598}
for _, code := range codes {
var thisCode = code
// This test is to ensure that we handle 500 errors correctly.
t.Run(fmt.Sprintf("%d error", code), func(t *testing.T) {
})
}
t.Run("404 error", func(t *testing.T) {
t.Parallel()

const (
testRetryCount = 1
waitThreshold = 100 * time.Millisecond
)

// Create a test server that returns a 500 error.
// Create a test server that returns a 404 error.
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(thisCode)
w.WriteHeader(404)
}))
defer ts.Close()

// Create a new client with the test server as the endpoint.
client := slack.New("token", slack.OptionAPIURL(ts.URL+"/"))

start := time.Now()
// Call the client with a retry.
start := time.Now()
err := WithRetry(context.Background(), rate.NewLimiter(1, 1), testRetryCount, func() error {
_, err := client.GetConversationHistory(&slack.GetConversationHistoryParameters{})
if err == nil {
@@ -217,46 +252,25 @@ func Test500ErrorHandling(t *testing.T) {
if err == nil {
t.Fatal("expected error, got nil")
}

dur := time.Since(start)
if dur < waitFn(testRetryCount-1)-waitThreshold || waitFn(testRetryCount-1)+waitThreshold < dur {
t.Errorf("expected duration to be around %s, got %s", waitFn(testRetryCount), dur)
if dur > 500*time.Millisecond { // 404 error should not be retried
t.Errorf("expected no sleep, but slept for %s", dur)
}

})
}
t.Run("404 error", func(t *testing.T) {
})
t.Run("meaningful error message", func(t *testing.T) {
t.Parallel()

const (
testRetryCount = 1
)

// Create a test server that returns a 404 error.
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(404)
}))
defer ts.Close()

// Create a new client with the test server as the endpoint.
client := slack.New("token", slack.OptionAPIURL(ts.URL+"/"))

// Call the client with a retry.
start := time.Now()
err := WithRetry(context.Background(), rate.NewLimiter(1, 1), testRetryCount, func() error {
_, err := client.GetConversationHistory(&slack.GetConversationHistoryParameters{})
if err == nil {
return errors.New("expected error, got nil")
}
return err
})
var errFunc = func() error {
return slack.StatusCodeError{Code: 500, Status: "Internal Server Error"}
}
err := WithRetry(context.Background(), rate.NewLimiter(1, 1), 1, errFunc)
if err == nil {
t.Fatal("expected error, got nil")
}
dur := time.Since(start)
if dur > 500*time.Millisecond { // 404 error should not be retried
t.Errorf("expected no sleep, but slept for %s", dur)
}
assert.ErrorContains(t, err, "Internal Server Error")
assert.ErrorIs(t, err, &ErrRetryFailed{})
var sce slack.StatusCodeError
assert.ErrorAs(t, errors.Unwrap(err), &sce)
})
}

0 comments on commit 218547d

Please sign in to comment.