From c1259141dab7f0c1ebc528f6ecc77192ddbc6b7e Mon Sep 17 00:00:00 2001 From: you06 Date: Mon, 27 May 2024 21:12:53 +0900 Subject: [PATCH 1/8] reproduce #53592 Signed-off-by: you06 --- pkg/statistics/handle/bootstrap.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index a9983219b7958..d4c944f0de066 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -18,6 +18,7 @@ import ( "context" "sync" "sync/atomic" + "time" "github.com/pingcap/errors" "github.com/pingcap/tidb/pkg/config" @@ -691,6 +692,7 @@ func (h *Handle) InitStatsLite(is infoschema.InfoSchema) (err error) { if err != nil { return err } + time.Sleep(30 * time.Minute) cache, err := h.initStatsMeta(is) if err != nil { return errors.Trace(err) @@ -718,6 +720,7 @@ func (h *Handle) InitStats(is infoschema.InfoSchema) (err error) { if err != nil { return err } + time.Sleep(30 * time.Minute) cache, err := h.initStatsMeta(is) if err != nil { return errors.Trace(err) From 63d0a3bf40e400cd1a4298ed4a7ab33395f491f7 Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 28 May 2024 11:44:23 +0900 Subject: [PATCH 2/8] track init stats ctx's min start ts Signed-off-by: you06 --- pkg/domain/domain.go | 16 ++++++++++++++++ pkg/domain/infosync/info.go | 8 +++++--- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/pkg/domain/domain.go b/pkg/domain/domain.go index 6c8eab47eb356..017cc95274eb4 100644 --- a/pkg/domain/domain.go +++ b/pkg/domain/domain.go @@ -2249,6 +2249,22 @@ func (do *Domain) UpdateTableStatsLoop(ctx, initStatsCtx sessionctx.Context) err }, "analyzeJobsCleanupWorker", ) + go func() { + // The initStatsCtx is used to store the internal session for initializing stats, + // so we need the gc min start ts calculation to track it as an internal session. + // Since the session manager may not be ready at this moment, `infosync.StoreInternalSession` can fail. + // we need to retry until the session manager is ready or the init stats completes. + for !infosync.StoreInternalSession(initStatsCtx) { + waitRetry := time.After(time.Second) + select { + case <-do.StatsHandle().InitStatsDone: + return + case <-waitRetry: + } + } + <-do.StatsHandle().InitStatsDone + infosync.DeleteInternalSession(initStatsCtx) + }() return nil } diff --git a/pkg/domain/infosync/info.go b/pkg/domain/infosync/info.go index 51dfbcac6e16b..908d84df8aced 100644 --- a/pkg/domain/infosync/info.go +++ b/pkg/domain/infosync/info.go @@ -1243,16 +1243,18 @@ func ConfigureTiFlashPDForPartitions(accel bool, definitions *[]model.PartitionD } // StoreInternalSession is the entry function for store an internal session to SessionManager. -func StoreInternalSession(se any) { +// return whether the session is stored successfully. +func StoreInternalSession(se any) bool { is, err := getGlobalInfoSyncer() if err != nil { - return + return false } sm := is.GetSessionManager() if sm == nil { - return + return false } sm.StoreInternalSession(se) + return true } // DeleteInternalSession is the entry function for delete an internal session from SessionManager. From 3f998acc2592fb0f495bffd26dc987287d3b1ef9 Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 28 May 2024 13:09:36 +0900 Subject: [PATCH 3/8] remove test code Signed-off-by: you06 --- pkg/statistics/handle/bootstrap.go | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index d4c944f0de066..ad6b199dd3979 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -16,10 +16,6 @@ package handle import ( "context" - "sync" - "sync/atomic" - "time" - "github.com/pingcap/errors" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/infoschema" @@ -37,6 +33,8 @@ import ( "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/logutil" "go.uber.org/zap" + "sync" + "sync/atomic" ) // initStatsStep is the step to load stats by paging. @@ -692,7 +690,6 @@ func (h *Handle) InitStatsLite(is infoschema.InfoSchema) (err error) { if err != nil { return err } - time.Sleep(30 * time.Minute) cache, err := h.initStatsMeta(is) if err != nil { return errors.Trace(err) @@ -720,7 +717,6 @@ func (h *Handle) InitStats(is infoschema.InfoSchema) (err error) { if err != nil { return err } - time.Sleep(30 * time.Minute) cache, err := h.initStatsMeta(is) if err != nil { return errors.Trace(err) From 3894930e779ac9ec1307bb1abf64821b9e8b0325 Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 28 May 2024 13:15:13 +0900 Subject: [PATCH 4/8] sort imports Signed-off-by: you06 --- pkg/statistics/handle/bootstrap.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index ad6b199dd3979..a9983219b7958 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -16,6 +16,9 @@ package handle import ( "context" + "sync" + "sync/atomic" + "github.com/pingcap/errors" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/infoschema" @@ -33,8 +36,6 @@ import ( "github.com/pingcap/tidb/pkg/util/chunk" "github.com/pingcap/tidb/pkg/util/logutil" "go.uber.org/zap" - "sync" - "sync/atomic" ) // initStatsStep is the step to load stats by paging. From 39638d58c3725dbe8ea4103c1ae2e7177a030363 Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 28 May 2024 16:02:58 +0900 Subject: [PATCH 5/8] add test Signed-off-by: you06 --- pkg/domain/domain.go | 33 +++++++++++---------- pkg/server/BUILD.bazel | 1 + pkg/server/stat_test.go | 47 ++++++++++++++++++++++++++++++ pkg/statistics/handle/BUILD.bazel | 1 + pkg/statistics/handle/bootstrap.go | 3 ++ 5 files changed, 70 insertions(+), 15 deletions(-) diff --git a/pkg/domain/domain.go b/pkg/domain/domain.go index 017cc95274eb4..baec2d5360a33 100644 --- a/pkg/domain/domain.go +++ b/pkg/domain/domain.go @@ -2249,22 +2249,25 @@ func (do *Domain) UpdateTableStatsLoop(ctx, initStatsCtx sessionctx.Context) err }, "analyzeJobsCleanupWorker", ) - go func() { - // The initStatsCtx is used to store the internal session for initializing stats, - // so we need the gc min start ts calculation to track it as an internal session. - // Since the session manager may not be ready at this moment, `infosync.StoreInternalSession` can fail. - // we need to retry until the session manager is ready or the init stats completes. - for !infosync.StoreInternalSession(initStatsCtx) { - waitRetry := time.After(time.Second) - select { - case <-do.StatsHandle().InitStatsDone: - return - case <-waitRetry: + do.wg.Run( + func() { + // The initStatsCtx is used to store the internal session for initializing stats, + // so we need the gc min start ts calculation to track it as an internal session. + // Since the session manager may not be ready at this moment, `infosync.StoreInternalSession` can fail. + // we need to retry until the session manager is ready or the init stats completes. + for !infosync.StoreInternalSession(initStatsCtx) { + waitRetry := time.After(time.Second) + select { + case <-do.StatsHandle().InitStatsDone: + return + case <-waitRetry: + } } - } - <-do.StatsHandle().InitStatsDone - infosync.DeleteInternalSession(initStatsCtx) - }() + <-do.StatsHandle().InitStatsDone + infosync.DeleteInternalSession(initStatsCtx) + }, + "RemoveInitStatsFromInternalSessions", + ) return nil } diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel index d69699f9eb013..a2163a312aabc 100644 --- a/pkg/server/BUILD.bazel +++ b/pkg/server/BUILD.bazel @@ -202,6 +202,7 @@ go_test( "@com_github_prometheus_client_golang//prometheus/testutil", "@com_github_stretchr_testify//require", "@com_github_tikv_client_go_v2//error", + "@com_github_tikv_client_go_v2//oracle", "@com_github_tikv_client_go_v2//testutils", "@com_github_tikv_client_go_v2//tikv", "@org_uber_go_goleak//:goleak", diff --git a/pkg/server/stat_test.go b/pkg/server/stat_test.go index 5b0fca478780d..2db66efbe8fe2 100644 --- a/pkg/server/stat_test.go +++ b/pkg/server/stat_test.go @@ -20,12 +20,14 @@ import ( "time" "github.com/pingcap/failpoint" + "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/domain/infosync" "github.com/pingcap/tidb/pkg/keyspace" "github.com/pingcap/tidb/pkg/server/internal/util" "github.com/pingcap/tidb/pkg/session" "github.com/pingcap/tidb/pkg/store/mockstore" "github.com/stretchr/testify/require" + "github.com/tikv/client-go/v2/oracle" ) func TestUptime(t *testing.T) { @@ -63,3 +65,48 @@ func TestUptime(t *testing.T) { require.NoError(t, err) require.GreaterOrEqual(t, stats[upTime].(int64), int64(time.Since(time.Unix(1282967700, 0)).Seconds())) } + +func TestInitStatsSessionBlockGC(t *testing.T) { + origConfig := config.GetGlobalConfig() + defer func() { + config.StoreGlobalConfig(origConfig) + }() + newConfig := *origConfig + for _, lite := range []bool{false, true} { + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/statistics/handle/beforeInitStats", "pause")) + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/pkg/statistics/handle/beforeInitStatsLite", "pause")) + newConfig.Performance.LiteInitStats = lite + config.StoreGlobalConfig(&newConfig) + + store, err := mockstore.NewMockStore() + require.NoError(t, err) + dom, err := session.BootstrapSession(store) + require.NoError(t, err) + defer func() { + dom.Close() + err := store.Close() + require.NoError(t, err) + }() + infoSyncer := dom.InfoSyncer() + sv := CreateMockServer(t, store) + sv.SetDomain(dom) + infoSyncer.SetSessionManager(sv) + time.Sleep(time.Second) + require.Eventually(t, func() bool { + now := time.Now() + startTSList := sv.GetInternalSessionStartTSList() + for _, startTs := range startTSList { + if startTs != 0 { + startTime := oracle.GetTimeFromTS(startTs) + // test pass if the min_start_ts is blocked over 1s. + if now.Sub(startTime) > time.Second { + return true + } + } + } + return false + }, 1*time.Second, 10*time.Millisecond, "min_start_ts is not blocked over 1s") + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/statistics/handle/beforeInitStats")) + require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/statistics/handle/beforeInitStatsLite")) + } +} diff --git a/pkg/statistics/handle/BUILD.bazel b/pkg/statistics/handle/BUILD.bazel index f5787bc595d13..9b8d1bda85aa0 100644 --- a/pkg/statistics/handle/BUILD.bazel +++ b/pkg/statistics/handle/BUILD.bazel @@ -35,6 +35,7 @@ go_library( "//pkg/util/chunk", "//pkg/util/logutil", "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", "@org_uber_go_zap//:zap", ], ) diff --git a/pkg/statistics/handle/bootstrap.go b/pkg/statistics/handle/bootstrap.go index a9983219b7958..272078ca764ff 100644 --- a/pkg/statistics/handle/bootstrap.go +++ b/pkg/statistics/handle/bootstrap.go @@ -20,6 +20,7 @@ import ( "sync/atomic" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" "github.com/pingcap/tidb/pkg/config" "github.com/pingcap/tidb/pkg/infoschema" "github.com/pingcap/tidb/pkg/kv" @@ -691,6 +692,7 @@ func (h *Handle) InitStatsLite(is infoschema.InfoSchema) (err error) { if err != nil { return err } + failpoint.Inject("beforeInitStatsLite", func() {}) cache, err := h.initStatsMeta(is) if err != nil { return errors.Trace(err) @@ -718,6 +720,7 @@ func (h *Handle) InitStats(is infoschema.InfoSchema) (err error) { if err != nil { return err } + failpoint.Inject("beforeInitStats", func() {}) cache, err := h.initStatsMeta(is) if err != nil { return errors.Trace(err) From a04ebc4748f540353681b879583ab810fea2309d Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 28 May 2024 16:07:20 +0900 Subject: [PATCH 6/8] stablize test Signed-off-by: you06 --- pkg/server/stat_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/server/stat_test.go b/pkg/server/stat_test.go index 2db66efbe8fe2..0d0f6818db367 100644 --- a/pkg/server/stat_test.go +++ b/pkg/server/stat_test.go @@ -105,7 +105,7 @@ func TestInitStatsSessionBlockGC(t *testing.T) { } } return false - }, 1*time.Second, 10*time.Millisecond, "min_start_ts is not blocked over 1s") + }, 10*time.Second, 10*time.Millisecond, "min_start_ts is not blocked over 1s") require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/statistics/handle/beforeInitStats")) require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/statistics/handle/beforeInitStatsLite")) } From 1bf8665bf57e5443636ed43644cc921b96f7f11a Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 28 May 2024 16:30:12 +0900 Subject: [PATCH 7/8] fix lint Signed-off-by: you06 --- pkg/server/stat_test.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pkg/server/stat_test.go b/pkg/server/stat_test.go index 0d0f6818db367..4a91d4dd6b260 100644 --- a/pkg/server/stat_test.go +++ b/pkg/server/stat_test.go @@ -82,11 +82,7 @@ func TestInitStatsSessionBlockGC(t *testing.T) { require.NoError(t, err) dom, err := session.BootstrapSession(store) require.NoError(t, err) - defer func() { - dom.Close() - err := store.Close() - require.NoError(t, err) - }() + infoSyncer := dom.InfoSyncer() sv := CreateMockServer(t, store) sv.SetDomain(dom) @@ -106,6 +102,8 @@ func TestInitStatsSessionBlockGC(t *testing.T) { } return false }, 10*time.Second, 10*time.Millisecond, "min_start_ts is not blocked over 1s") + dom.Close() + require.NoError(t, store.Close()) require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/statistics/handle/beforeInitStats")) require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/statistics/handle/beforeInitStatsLite")) } From 1ad315ef5df75cd7eb56586f12d48c21d8394e67 Mon Sep 17 00:00:00 2001 From: you06 Date: Tue, 28 May 2024 17:14:36 +0900 Subject: [PATCH 8/8] fix test timeout Signed-off-by: you06 --- pkg/server/stat_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/server/stat_test.go b/pkg/server/stat_test.go index 4a91d4dd6b260..edc26eed42277 100644 --- a/pkg/server/stat_test.go +++ b/pkg/server/stat_test.go @@ -102,9 +102,9 @@ func TestInitStatsSessionBlockGC(t *testing.T) { } return false }, 10*time.Second, 10*time.Millisecond, "min_start_ts is not blocked over 1s") - dom.Close() - require.NoError(t, store.Close()) require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/statistics/handle/beforeInitStats")) require.NoError(t, failpoint.Disable("github.com/pingcap/tidb/pkg/statistics/handle/beforeInitStatsLite")) + dom.Close() + require.NoError(t, store.Close()) } }