diff --git a/changelog/unreleased/graceful_shutdown_timeout.md b/changelog/unreleased/graceful_shutdown_timeout.md new file mode 100644 index 00000000000..f7d0e8f7adc --- /dev/null +++ b/changelog/unreleased/graceful_shutdown_timeout.md @@ -0,0 +1,10 @@ +Enhancement: Allow to specify a shutdown timeout + +Wnen setting `graceful_shutdown_timeout` revad with try to shutdown in a +graceful manner when receiving an INT or TERM signal (similar to how it already +behaves on SIGQUIT). This allows ongoing operations to complete before exiting. + +If the shutdown didn't finish before `graceful_shutdown_timeout` seconds the +process will exit with an error code (1). + +https://github.com/cs3org/reva/pull/xxxx diff --git a/cmd/revad/internal/grace/grace.go b/cmd/revad/internal/grace/grace.go index 339a8c5c1e6..8aaf18aaf55 100644 --- a/cmd/revad/internal/grace/grace.go +++ b/cmd/revad/internal/grace/grace.go @@ -36,13 +36,14 @@ import ( // Watcher watches a process for a graceful restart // preserving open network sockets to avoid packet loss. type Watcher struct { - log zerolog.Logger - graceful bool - ppid int - lns map[string]net.Listener - ss map[string]Server - pidFile string - childPIDs []int + log zerolog.Logger + graceful bool + ppid int + lns map[string]net.Listener + ss map[string]Server + pidFile string + childPIDs []int + gracefulShutdownTimeout int } // Option represent an option. @@ -62,6 +63,12 @@ func WithPIDFile(fn string) Option { } } +func WithGracefuleShutdownTimeout(seconds int) Option { + return func(w *Watcher) { + w.gracefulShutdownTimeout = seconds + } +} + // NewWatcher creates a Watcher. func NewWatcher(opts ...Option) *Watcher { w := &Watcher{ @@ -279,49 +286,67 @@ func (w *Watcher) TrapSignals() { } case syscall.SIGQUIT: - w.log.Info().Msg("preparing for a graceful shutdown with deadline of 10 seconds") - go func() { - count := 10 - ticker := time.NewTicker(time.Second) - for ; true; <-ticker.C { - w.log.Info().Msgf("shutting down in %d seconds", count-1) - count-- - if count <= 0 { - w.log.Info().Msg("deadline reached before draining active conns, hard stopping ...") - for _, s := range w.ss { - err := s.Stop() - if err != nil { - w.log.Error().Err(err).Msg("error stopping server") - } - w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address()) - } - w.Exit(1) - } - } - }() - for _, s := range w.ss { - w.log.Info().Msgf("fd to %s:%s gracefully closed ", s.Network(), s.Address()) - err := s.GracefulStop() - if err != nil { - w.log.Error().Err(err).Msg("error stopping server") - w.log.Info().Msg("exit with error code 1") - w.Exit(1) - } - } - w.log.Info().Msg("exit with error code 0") - w.Exit(0) + gracefulShutdown(w) case syscall.SIGINT, syscall.SIGTERM: - w.log.Info().Msg("preparing for hard shutdown, aborting all conns") - for _, s := range w.ss { - w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address()) - err := s.Stop() - if err != nil { - w.log.Error().Err(err).Msg("error stopping server") + if w.gracefulShutdownTimeout == 0 { + hardShutdown(w) + } + gracefulShutdown(w) + } + } +} + +// TODO: Ideally this would call exit() but properly return an error. The +// exit() is problematic (i.e. racey) especiaily when orchestrating multiple +// reva services from some external runtime (like in the "ocis server" case +func gracefulShutdown(w *Watcher) { + w.log.Info().Int("Timeout", w.gracefulShutdownTimeout).Msg("preparing for a graceful shutdown with deadline") + go func() { + count := w.gracefulShutdownTimeout + ticker := time.NewTicker(time.Second) + for ; true; <-ticker.C { + w.log.Info().Msgf("shutting down in %d seconds", count-1) + count-- + if count <= 0 { + w.log.Info().Msg("deadline reached before draining active conns, hard stopping ...") + for _, s := range w.ss { + err := s.Stop() + if err != nil { + w.log.Error().Err(err).Msg("error stopping server") + } + w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address()) } + w.Exit(1) } - w.Exit(0) + } + }() + for _, s := range w.ss { + w.log.Info().Msgf("fd to %s:%s gracefully closed ", s.Network(), s.Address()) + err := s.GracefulStop() + if err != nil { + w.log.Error().Err(err).Msg("error stopping server") + w.log.Info().Msg("exit with error code 1") + + w.Exit(1) + } + } + w.log.Info().Msg("exit with error code 0") + w.Exit(0) +} + +// TODO: Ideally this would call exit() but properly return an error. The +// exit() is problematic (i.e. racey) especiaily when orchestrating multiple +// reva services from some external runtime (like in the "ocis server" case +func hardShutdown(w *Watcher) { + w.log.Info().Msg("preparing for hard shutdown, aborting all conns") + for _, s := range w.ss { + w.log.Info().Msgf("fd to %s:%s abruptly closed", s.Network(), s.Address()) + err := s.Stop() + if err != nil { + w.log.Error().Err(err).Msg("error stopping server") } } + w.Exit(0) } func getListenerFile(ln net.Listener) (*os.File, error) { diff --git a/cmd/revad/runtime/runtime.go b/cmd/revad/runtime/runtime.go index 948397b7fe4..cc908d3e222 100644 --- a/cmd/revad/runtime/runtime.go +++ b/cmd/revad/runtime/runtime.go @@ -72,6 +72,8 @@ type coreConf struct { // TracingService specifies the service. i.e OpenCensus, OpenTelemetry, OpenTracing... TracingService string `mapstructure:"tracing_service"` + + GracefulShutdownTimeout int `mapstructure:"graceful_shutdown_timeout"` } func run( @@ -92,7 +94,7 @@ func run( initCPUCount(coreConf, logger) servers := initServers(mainConf, logger, tp) - watcher, err := initWatcher(logger, filename) + watcher, err := initWatcher(logger, filename, coreConf.GracefulShutdownTimeout) if err != nil { log.Panic(err) } @@ -110,8 +112,8 @@ func initListeners(watcher *grace.Watcher, servers map[string]grace.Server, log return listeners } -func initWatcher(log *zerolog.Logger, filename string) (*grace.Watcher, error) { - watcher, err := handlePIDFlag(log, filename) +func initWatcher(log *zerolog.Logger, filename string, gracefulShutdownTimeout int) (*grace.Watcher, error) { + watcher, err := handlePIDFlag(log, filename, gracefulShutdownTimeout) // TODO(labkode): maybe pidfile can be created later on? like once a server is going to be created? if err != nil { log.Error().Err(err).Msg("error creating grace watcher") @@ -187,11 +189,11 @@ func initLogger(conf *logConf) *zerolog.Logger { return log } -func handlePIDFlag(l *zerolog.Logger, pidFile string) (*grace.Watcher, error) { - var opts []grace.Option - opts = append(opts, grace.WithPIDFile(pidFile)) - opts = append(opts, grace.WithLogger(l.With().Str("pkg", "grace").Logger())) - w := grace.NewWatcher(opts...) +func handlePIDFlag(l *zerolog.Logger, pidFile string, gracefulShutdownTimeout int) (*grace.Watcher, error) { + w := grace.NewWatcher(grace.WithPIDFile(pidFile), + grace.WithLogger(l.With().Str("pkg", "grace").Logger()), + grace.WithGracefuleShutdownTimeout(gracefulShutdownTimeout), + ) err := w.WritePID() if err != nil { return nil, err