From 3095929bc3be58865745bf36c1c7f7ceb8c02948 Mon Sep 17 00:00:00 2001 From: Jordan Nimlos Date: Mon, 28 Oct 2024 13:12:41 -0500 Subject: [PATCH 1/6] add smtp retries --- remotemonitor/config.go | 6 ++++++ remotemonitor/monitor.go | 15 ++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/remotemonitor/config.go b/remotemonitor/config.go index d96573f1a3..7b6e572e95 100644 --- a/remotemonitor/config.go +++ b/remotemonitor/config.go @@ -35,6 +35,9 @@ type Config struct { // ServerAddr is the address of the SMTP server, including port. ServerAddr string User, Pass string + + // Retries is the number of times to retry sending with an email integration key. + Retries int } // Instances determine what remote GoAlert instances will be monitored and send potential errors. @@ -94,6 +97,9 @@ func (cfg Config) Validate() error { if cfg.SMTP.From == "" { return errors.New("SMTP from address is required") } + if cfg.SMTP.Retries > 5 || cfg.SMTP.Retries < 0 { + return errors.New("SMTP retries must be between 0 and 5") + } return nil } diff --git a/remotemonitor/monitor.go b/remotemonitor/monitor.go index f02806afaa..a6df2892ab 100644 --- a/remotemonitor/monitor.go +++ b/remotemonitor/monitor.go @@ -187,9 +187,18 @@ func (m *Monitor) createEmailAlert(i Instance, dedup, summary, details string) e auth = smtp.PlainAuth("", m.cfg.SMTP.User, m.cfg.SMTP.Pass, host) } - err = smtp.SendMail(m.cfg.SMTP.ServerAddr, auth, m.cfg.SMTP.From, []string{addr.Address}, []byte(msg)) - if err != nil { - return fmt.Errorf("send email: %w", err) + var retryInterval = 10 * time.Second + for attempt := 0; attempt <= m.cfg.SMTP.Retries; attempt++ { + err = smtp.SendMail(m.cfg.SMTP.ServerAddr, auth, m.cfg.SMTP.From, []string{addr.Address}, []byte(msg)) + if err == nil { + return nil + } + if attempt < m.cfg.SMTP.Retries { + log.Printf("WARN: send email failed: %s - retrying in %fs", err.Error(), retryInterval.Seconds()) + time.Sleep(retryInterval) + } else { + return fmt.Errorf("send email: %w", err) + } } return nil From a79836e981aa57f40dc319b1f9e27fe47e254a8e Mon Sep 17 00:00:00 2001 From: nimjor Date: Tue, 29 Oct 2024 10:45:17 -0500 Subject: [PATCH 2/6] use existing retry package instead --- remotemonitor/monitor.go | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/remotemonitor/monitor.go b/remotemonitor/monitor.go index a6df2892ab..a59d8da2ca 100644 --- a/remotemonitor/monitor.go +++ b/remotemonitor/monitor.go @@ -2,7 +2,7 @@ package remotemonitor import ( "context" - "errors" + // "errors" "fmt" "io" "log" @@ -14,8 +14,10 @@ import ( "strings" "time" + "github.com/pkg/errors" "github.com/target/goalert/config" "github.com/target/goalert/notification/twilio" + "github.com/target/goalert/retry" ) // Monitor will check for functionality and communication between itself and one or more instances. @@ -187,21 +189,17 @@ func (m *Monitor) createEmailAlert(i Instance, dedup, summary, details string) e auth = smtp.PlainAuth("", m.cfg.SMTP.User, m.cfg.SMTP.Pass, host) } - var retryInterval = 10 * time.Second - for attempt := 0; attempt <= m.cfg.SMTP.Retries; attempt++ { + err = retry.DoTemporaryError(func(_ int) error { err = smtp.SendMail(m.cfg.SMTP.ServerAddr, auth, m.cfg.SMTP.From, []string{addr.Address}, []byte(msg)) - if err == nil { - return nil - } - if attempt < m.cfg.SMTP.Retries { - log.Printf("WARN: send email failed: %s - retrying in %fs", err.Error(), retryInterval.Seconds()) - time.Sleep(retryInterval) - } else { - return fmt.Errorf("send email: %w", err) - } - } + err = errors.Wrap(err, "send email") + return err + }, + retry.Log(m.context()), + retry.Limit(m.cfg.SMTP.Retries), + retry.FibBackoff(time.Second), + ) - return nil + return err } func (m *Monitor) loop() { From eb89f133f5552729cf012db971d87793cd29ae88 Mon Sep 17 00:00:00 2001 From: nimjor Date: Tue, 29 Oct 2024 11:20:19 -0500 Subject: [PATCH 3/6] clean up commented code --- remotemonitor/monitor.go | 1 - 1 file changed, 1 deletion(-) diff --git a/remotemonitor/monitor.go b/remotemonitor/monitor.go index a59d8da2ca..f1be129505 100644 --- a/remotemonitor/monitor.go +++ b/remotemonitor/monitor.go @@ -2,7 +2,6 @@ package remotemonitor import ( "context" - // "errors" "fmt" "io" "log" From bb8f92b3284385be4ff3358de9ff8ced38709b6e Mon Sep 17 00:00:00 2001 From: nimjor Date: Wed, 30 Oct 2024 09:37:10 -0500 Subject: [PATCH 4/6] treat any error from smtp SendMail as temporary --- remotemonitor/monitor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/remotemonitor/monitor.go b/remotemonitor/monitor.go index f1be129505..3aa53f45ee 100644 --- a/remotemonitor/monitor.go +++ b/remotemonitor/monitor.go @@ -191,7 +191,7 @@ func (m *Monitor) createEmailAlert(i Instance, dedup, summary, details string) e err = retry.DoTemporaryError(func(_ int) error { err = smtp.SendMail(m.cfg.SMTP.ServerAddr, auth, m.cfg.SMTP.From, []string{addr.Address}, []byte(msg)) err = errors.Wrap(err, "send email") - return err + return retry.TemporaryError(err) }, retry.Log(m.context()), retry.Limit(m.cfg.SMTP.Retries), From a7dd121afdb5ce59414d8681cb87d1ca8852b95e Mon Sep 17 00:00:00 2001 From: nimjor Date: Wed, 30 Oct 2024 15:53:03 -0500 Subject: [PATCH 5/6] only treat 4xx series SMTP errors as temporary --- remotemonitor/monitor.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/remotemonitor/monitor.go b/remotemonitor/monitor.go index 3aa53f45ee..144b65fa13 100644 --- a/remotemonitor/monitor.go +++ b/remotemonitor/monitor.go @@ -190,8 +190,11 @@ func (m *Monitor) createEmailAlert(i Instance, dedup, summary, details string) e err = retry.DoTemporaryError(func(_ int) error { err = smtp.SendMail(m.cfg.SMTP.ServerAddr, auth, m.cfg.SMTP.From, []string{addr.Address}, []byte(msg)) - err = errors.Wrap(err, "send email") - return retry.TemporaryError(err) + if strings.HasPrefix(err.Error(), "4") { // SMTP server return codes beginning with 4 are considered transient + return retry.TemporaryError(errors.Wrap(err, "send email")) + } else { + return errors.Wrap(err, "send email") + } }, retry.Log(m.context()), retry.Limit(m.cfg.SMTP.Retries), From 6890f1c2720b0581e2692f5d3532921b8042f831 Mon Sep 17 00:00:00 2001 From: nimjor Date: Thu, 31 Oct 2024 13:41:13 -0500 Subject: [PATCH 6/6] make sure err is nil checked --- remotemonitor/monitor.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/remotemonitor/monitor.go b/remotemonitor/monitor.go index 144b65fa13..ad6df76edd 100644 --- a/remotemonitor/monitor.go +++ b/remotemonitor/monitor.go @@ -190,11 +190,13 @@ func (m *Monitor) createEmailAlert(i Instance, dedup, summary, details string) e err = retry.DoTemporaryError(func(_ int) error { err = smtp.SendMail(m.cfg.SMTP.ServerAddr, auth, m.cfg.SMTP.From, []string{addr.Address}, []byte(msg)) + if err == nil { + return nil + } if strings.HasPrefix(err.Error(), "4") { // SMTP server return codes beginning with 4 are considered transient - return retry.TemporaryError(errors.Wrap(err, "send email")) - } else { - return errors.Wrap(err, "send email") + err = retry.TemporaryError(err) } + return errors.Wrap(err, "send email") }, retry.Log(m.context()), retry.Limit(m.cfg.SMTP.Retries),