From 8d86bfb4d8933f2e2d84e4478e750b04cd59eeb0 Mon Sep 17 00:00:00 2001 From: Yu Juncen Date: Tue, 27 Feb 2024 15:43:50 +0800 Subject: [PATCH 1/5] print callstack when exiting Signed-off-by: Yu Juncen --- br/cmd/br/main.go | 26 ++--------------- br/pkg/task/operator/cmd.go | 2 ++ br/pkg/utils/misc.go | 57 +++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+), 24 deletions(-) diff --git a/br/cmd/br/main.go b/br/cmd/br/main.go index 5eca340f1e622..f745920f5bfba 100644 --- a/br/cmd/br/main.go +++ b/br/cmd/br/main.go @@ -2,39 +2,17 @@ package main import ( "context" - "fmt" "os" - "os/signal" - "syscall" "github.com/pingcap/log" + "github.com/pingcap/tidb/br/pkg/utils" "github.com/spf13/cobra" "go.uber.org/zap" ) func main() { gCtx := context.Background() - ctx, cancel := context.WithCancel(gCtx) - defer cancel() - - sc := make(chan os.Signal, 1) - signal.Notify(sc, - syscall.SIGHUP, - syscall.SIGINT, - syscall.SIGTERM, - syscall.SIGQUIT) - - go func() { - sig := <-sc - fmt.Printf("\nGot signal [%v] to exit.\n", sig) - log.Warn("received signal to exit", zap.Stringer("signal", sig)) - cancel() - fmt.Fprintln(os.Stderr, "gracefully shuting down, press ^C again to force exit") - <-sc - // Even user use SIGTERM to exit, there isn't any checkpoint for resuming, - // hence returning fail exit code. - os.Exit(1) - }() + ctx, cancel := utils.StartExitSingleListener(gCtx) rootCmd := &cobra.Command{ Use: "br", diff --git a/br/pkg/task/operator/cmd.go b/br/pkg/task/operator/cmd.go index 40048721f4868..2b0157a699036 100644 --- a/br/pkg/task/operator/cmd.go +++ b/br/pkg/task/operator/cmd.go @@ -106,6 +106,7 @@ func hintAllReady() { // AdaptEnvForSnapshotBackup blocks the current goroutine and pause the GC safepoint and remove the scheduler by the config. // This function will block until the context being canceled. func AdaptEnvForSnapshotBackup(ctx context.Context, cfg *PauseGcConfig) error { + utils.DumpGoroutineWhenExit.Store(true) mgr, err := dialPD(ctx, &cfg.Config) if err != nil { return errors.Annotate(err, "failed to dial PD") @@ -141,6 +142,7 @@ func AdaptEnvForSnapshotBackup(ctx context.Context, cfg *PauseGcConfig) error { if cfg.OnAllReady != nil { cfg.OnAllReady() } + utils.DumpGoroutineWhenExit.Store(false) hintAllReady() }() defer func() { diff --git a/br/pkg/utils/misc.go b/br/pkg/utils/misc.go index 84774af0906a1..aa9a7413b96ce 100644 --- a/br/pkg/utils/misc.go +++ b/br/pkg/utils/misc.go @@ -16,8 +16,16 @@ package utils import ( "context" "crypto/tls" + "fmt" + "os" + "os/signal" + "runtime" + "strings" + "sync/atomic" + "syscall" "time" + "github.com/docker/go-units" "github.com/pingcap/errors" "github.com/pingcap/kvproto/pkg/metapb" "github.com/pingcap/log" @@ -157,3 +165,52 @@ func WithCleanUp(errOut *error, timeout time.Duration, fn func(context.Context) log.Warn("Encountered but ignored error while cleaning up.", zap.Error(err)) } } + +func AllStackInfo() []byte { + res := make([]byte, 256*units.KiB) + for { + n := runtime.Stack(res, true) + if n < len(res) { + return res[:n] + } + res = make([]byte, len(res)*2) + } +} + +var ( + DumpGoroutineWhenExit atomic.Bool +) + +func StartExitSingleListener(ctx context.Context) (context.Context, context.CancelFunc) { + cx, cancel := context.WithCancel(ctx) + sc := make(chan os.Signal, 1) + signal.Notify(sc, + syscall.SIGHUP, + syscall.SIGINT, + syscall.SIGTERM, + syscall.SIGQUIT) + go func() { + sig := <-sc + dumpGoroutine := DumpGoroutineWhenExit.Load() + padding := strings.Repeat("=", 8) + printDelimate := func(s string) { + fmt.Printf("%s[ %s ]%s\n", padding, s, padding) + } + fmt.Println() + printDelimate(fmt.Sprintf("Got signal %v to exit.", sig)) + printDelimate(fmt.Sprintf("Required Goroutine Dump = %v", dumpGoroutine)) + if DumpGoroutineWhenExit.Load() { + printDelimate("Start Dumping Goroutine") + os.Stdout.Write(AllStackInfo()) + printDelimate("End of Dumping Goroutine") + } + log.Warn("received signal to exit", zap.Stringer("signal", sig)) + cancel() + fmt.Fprintln(os.Stderr, "gracefully shutting down, press ^C again to force exit") + <-sc + // Even user use SIGTERM to exit, there isn't any checkpoint for resuming, + // hence returning fail exit code. + os.Exit(1) + }() + return cx, cancel +} From 6b593c884d296a4944e9562b107086751c42e3ef Mon Sep 17 00:00:00 2001 From: Yu Juncen Date: Tue, 27 Feb 2024 19:22:53 +0800 Subject: [PATCH 2/5] added more logs Signed-off-by: Yu Juncen --- br/pkg/backup/prepare_snap/prepare.go | 1 + 1 file changed, 1 insertion(+) diff --git a/br/pkg/backup/prepare_snap/prepare.go b/br/pkg/backup/prepare_snap/prepare.go index 46f1916873831..dd9cb0e7953be 100644 --- a/br/pkg/backup/prepare_snap/prepare.go +++ b/br/pkg/backup/prepare_snap/prepare.go @@ -370,6 +370,7 @@ func (p *Preparer) workOnPendingRanges(ctx context.Context) error { } func (p *Preparer) sendWaitApply(ctx context.Context, reqs pendingRequests) error { + logutil.CL(ctx).Info("about to send wait apply to stores", zap.Int("to-stores", len(reqs))) for store, req := range reqs { stream, err := p.streamOf(ctx, store) if err != nil { From 961680d7fec7b4bd30b053fc2efb41451ccdeca8 Mon Sep 17 00:00:00 2001 From: Yu Juncen Date: Tue, 27 Feb 2024 19:31:13 +0800 Subject: [PATCH 3/5] better logging Signed-off-by: Yu Juncen --- br/pkg/backup/prepare_snap/prepare.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/pkg/backup/prepare_snap/prepare.go b/br/pkg/backup/prepare_snap/prepare.go index dd9cb0e7953be..f3ccdff2b1163 100644 --- a/br/pkg/backup/prepare_snap/prepare.go +++ b/br/pkg/backup/prepare_snap/prepare.go @@ -372,6 +372,7 @@ func (p *Preparer) workOnPendingRanges(ctx context.Context) error { func (p *Preparer) sendWaitApply(ctx context.Context, reqs pendingRequests) error { logutil.CL(ctx).Info("about to send wait apply to stores", zap.Int("to-stores", len(reqs))) for store, req := range reqs { + logutil.CL(ctx).Info("sending wait apply requests to store", zap.Uint64("store", store), zap.Int("regions", len(req.Regions))) stream, err := p.streamOf(ctx, store) if err != nil { return errors.Annotatef(err, "failed to dial the store %d", store) @@ -380,7 +381,6 @@ func (p *Preparer) sendWaitApply(ctx context.Context, reqs pendingRequests) erro if err != nil { return errors.Annotatef(err, "failed to send message to the store %d", store) } - logutil.CL(ctx).Info("sent wait apply requests to store", zap.Uint64("store", store), zap.Int("regions", len(req.Regions))) } return nil } From ce9c7db22ece4ddba373685e63b43e283d64297c Mon Sep 17 00:00:00 2001 From: Yu Juncen Date: Thu, 29 Feb 2024 17:59:54 +0800 Subject: [PATCH 4/5] address comments Signed-off-by: Yu Juncen --- br/pkg/utils/misc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/pkg/utils/misc.go b/br/pkg/utils/misc.go index aa9a7413b96ce..7c1e48c1ba8b0 100644 --- a/br/pkg/utils/misc.go +++ b/br/pkg/utils/misc.go @@ -199,7 +199,7 @@ func StartExitSingleListener(ctx context.Context) (context.Context, context.Canc fmt.Println() printDelimate(fmt.Sprintf("Got signal %v to exit.", sig)) printDelimate(fmt.Sprintf("Required Goroutine Dump = %v", dumpGoroutine)) - if DumpGoroutineWhenExit.Load() { + if dumpGoroutine { printDelimate("Start Dumping Goroutine") os.Stdout.Write(AllStackInfo()) printDelimate("End of Dumping Goroutine") From 84f6513334d89726bb9a9a68bfc0e45c247b5643 Mon Sep 17 00:00:00 2001 From: Yu Juncen Date: Wed, 6 Mar 2024 11:12:22 +0800 Subject: [PATCH 5/5] make clippy happy Signed-off-by: Yu Juncen --- br/pkg/utils/misc.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/pkg/utils/misc.go b/br/pkg/utils/misc.go index 7c1e48c1ba8b0..c351f62011a76 100644 --- a/br/pkg/utils/misc.go +++ b/br/pkg/utils/misc.go @@ -201,7 +201,7 @@ func StartExitSingleListener(ctx context.Context) (context.Context, context.Canc printDelimate(fmt.Sprintf("Required Goroutine Dump = %v", dumpGoroutine)) if dumpGoroutine { printDelimate("Start Dumping Goroutine") - os.Stdout.Write(AllStackInfo()) + _, _ = os.Stdout.Write(AllStackInfo()) printDelimate("End of Dumping Goroutine") } log.Warn("received signal to exit", zap.Stringer("signal", sig))