From 218547dfd1868f6e5b0d5ad0282f5679882070a1 Mon Sep 17 00:00:00 2001 From: Rustam Gilyazov <16064414+rusq@users.noreply.github.com> Date: Sun, 7 Apr 2024 18:31:51 +1000 Subject: [PATCH] show the last error to the user if retry fails --- internal/network/network.go | 30 +++++++-- internal/network/network_test.go | 110 +++++++++++++++++-------------- 2 files changed, 86 insertions(+), 54 deletions(-) diff --git a/internal/network/network.go b/internal/network/network.go index 249a420d..b0819991 100644 --- a/internal/network/network.go +++ b/internal/network/network.go @@ -38,7 +38,22 @@ var ( // ErrRetryFailed is returned if number of retry attempts exceeded the retry attempts limit and // function wasn't able to complete without errors. -var ErrRetryFailed = errors.New("callback was unable to complete without errors within the allowed number of retries") +type ErrRetryFailed struct { + Err error +} + +func (e *ErrRetryFailed) Error() string { + return fmt.Sprintf("callback was unable to complete without errors within the allowed number of retries: %s", e.Err) +} + +func (e *ErrRetryFailed) Unwrap() error { + return e.Err +} + +func (e *ErrRetryFailed) Is(target error) bool { + _, ok := target.(*ErrRetryFailed) + return ok +} // WithRetry will run the callback function fn. If the function returns // slack.RateLimitedError, it will delay, and then call it again up to @@ -49,6 +64,8 @@ func WithRetry(ctx context.Context, lim *rate.Limiter, maxAttempts int, fn func( if maxAttempts == 0 { maxAttempts = defNumAttempts } + + var lastErr error for attempt := 0; attempt < maxAttempts; attempt++ { // calling wait to ensure that we don't exceed the rate limit var err error @@ -65,6 +82,7 @@ func WithRetry(ctx context.Context, lim *rate.Limiter, maxAttempts int, fn func( ok = true break } + lastErr = cbErr tracelogf(ctx, "error", "WithRetry: %[1]s (%[1]T) after %[2]d attempts", cbErr, attempt+1) var ( @@ -74,14 +92,14 @@ func WithRetry(ctx context.Context, lim *rate.Limiter, maxAttempts int, fn func( ) switch { case errors.As(cbErr, &rle): - tracelogf(ctx, "info", "got rate limited, sleeping %s", rle.RetryAfter) + tracelogf(ctx, "info", "got rate limited, sleeping %s (%s)", rle.RetryAfter, cbErr) time.Sleep(rle.RetryAfter) continue case errors.As(cbErr, &sce): if isRecoverable(sce.Code) { // possibly transient error delay := waitFn(attempt) - tracelogf(ctx, "info", "got server error %d, sleeping %s", sce.Code, delay) + tracelogf(ctx, "info", "got server error %d, sleeping %s (%s)", sce.Code, delay, cbErr) time.Sleep(delay) continue } @@ -89,7 +107,7 @@ func WithRetry(ctx context.Context, lim *rate.Limiter, maxAttempts int, fn func( if ne.Op == "read" || ne.Op == "write" { // possibly transient error delay := netWaitFn(attempt) - tracelogf(ctx, "info", "got network error %s, sleeping %s", ne.Op, delay) + tracelogf(ctx, "info", "got network error %s on %q, sleeping %s", cbErr, ne.Op, delay) time.Sleep(delay) continue } @@ -98,7 +116,7 @@ func WithRetry(ctx context.Context, lim *rate.Limiter, maxAttempts int, fn func( return fmt.Errorf("callback error: %w", cbErr) } if !ok { - return ErrRetryFailed + return &ErrRetryFailed{Err: lastErr} } return nil } @@ -112,7 +130,7 @@ func isRecoverable(statusCode int) bool { // where x is the current attempt number. The maximum wait time is capped at 5 // minutes. func cubicWait(attempt int) time.Duration { - x := attempt + 2 // this is to ensure that we sleep at least 8 seconds. + x := attempt + 1 // this is to ensure that we sleep at least a second. delay := time.Duration(x*x*x) * time.Second if delay > maxAllowedWaitTime { return maxAllowedWaitTime diff --git a/internal/network/network_test.go b/internal/network/network_test.go index 8773bb9a..7480a541 100644 --- a/internal/network/network_test.go +++ b/internal/network/network_test.go @@ -12,6 +12,7 @@ import ( "time" "github.com/rusq/slack" + "github.com/stretchr/testify/assert" "golang.org/x/time/rate" ) @@ -177,36 +178,70 @@ func Test_withRetry(t *testing.T) { } }) } -} + t.Run("500 error handling", func(t *testing.T) { + waitFn = func(attempt int) time.Duration { return 50 * time.Millisecond } + defer func() { + waitFn = cubicWait + }() + + var codes = []int{500, 502, 503, 504, 598} + for _, code := range codes { + var thisCode = code + // This test is to ensure that we handle 500 errors correctly. + t.Run(fmt.Sprintf("%d error", code), func(t *testing.T) { + + const ( + testRetryCount = 1 + waitThreshold = 100 * time.Millisecond + ) + + // Create a test server that returns a 500 error. + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(thisCode) + })) + defer ts.Close() + + // Create a new client with the test server as the endpoint. + client := slack.New("token", slack.OptionAPIURL(ts.URL+"/")) + + start := time.Now() + // Call the client with a retry. + err := WithRetry(context.Background(), rate.NewLimiter(1, 1), testRetryCount, func() error { + _, err := client.GetConversationHistory(&slack.GetConversationHistoryParameters{}) + if err == nil { + return errors.New("expected error, got nil") + } + return err + }) + if err == nil { + t.Fatal("expected error, got nil") + } -func Test500ErrorHandling(t *testing.T) { - waitFn = func(attempt int) time.Duration { return 50 * time.Millisecond } - defer func() { - waitFn = cubicWait - }() + dur := time.Since(start) + if dur < waitFn(testRetryCount-1)-waitThreshold || waitFn(testRetryCount-1)+waitThreshold < dur { + t.Errorf("expected duration to be around %s, got %s", waitFn(testRetryCount), dur) + } - var codes = []int{500, 502, 503, 504, 598} - for _, code := range codes { - var thisCode = code - // This test is to ensure that we handle 500 errors correctly. - t.Run(fmt.Sprintf("%d error", code), func(t *testing.T) { + }) + } + t.Run("404 error", func(t *testing.T) { + t.Parallel() const ( testRetryCount = 1 - waitThreshold = 100 * time.Millisecond ) - // Create a test server that returns a 500 error. + // Create a test server that returns a 404 error. ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(thisCode) + w.WriteHeader(404) })) defer ts.Close() // Create a new client with the test server as the endpoint. client := slack.New("token", slack.OptionAPIURL(ts.URL+"/")) - start := time.Now() // Call the client with a retry. + start := time.Now() err := WithRetry(context.Background(), rate.NewLimiter(1, 1), testRetryCount, func() error { _, err := client.GetConversationHistory(&slack.GetConversationHistoryParameters{}) if err == nil { @@ -217,46 +252,25 @@ func Test500ErrorHandling(t *testing.T) { if err == nil { t.Fatal("expected error, got nil") } - dur := time.Since(start) - if dur < waitFn(testRetryCount-1)-waitThreshold || waitFn(testRetryCount-1)+waitThreshold < dur { - t.Errorf("expected duration to be around %s, got %s", waitFn(testRetryCount), dur) + if dur > 500*time.Millisecond { // 404 error should not be retried + t.Errorf("expected no sleep, but slept for %s", dur) } - }) - } - t.Run("404 error", func(t *testing.T) { + }) + t.Run("meaningful error message", func(t *testing.T) { t.Parallel() - - const ( - testRetryCount = 1 - ) - - // Create a test server that returns a 404 error. - ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(404) - })) - defer ts.Close() - - // Create a new client with the test server as the endpoint. - client := slack.New("token", slack.OptionAPIURL(ts.URL+"/")) - - // Call the client with a retry. - start := time.Now() - err := WithRetry(context.Background(), rate.NewLimiter(1, 1), testRetryCount, func() error { - _, err := client.GetConversationHistory(&slack.GetConversationHistoryParameters{}) - if err == nil { - return errors.New("expected error, got nil") - } - return err - }) + var errFunc = func() error { + return slack.StatusCodeError{Code: 500, Status: "Internal Server Error"} + } + err := WithRetry(context.Background(), rate.NewLimiter(1, 1), 1, errFunc) if err == nil { t.Fatal("expected error, got nil") } - dur := time.Since(start) - if dur > 500*time.Millisecond { // 404 error should not be retried - t.Errorf("expected no sleep, but slept for %s", dur) - } + assert.ErrorContains(t, err, "Internal Server Error") + assert.ErrorIs(t, err, &ErrRetryFailed{}) + var sce slack.StatusCodeError + assert.ErrorAs(t, errors.Unwrap(err), &sce) }) }