From 1cbf6ffc80f97b25a8f7a8c06fa7790eed49a905 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Fri, 23 Feb 2024 17:58:31 +0800 Subject: [PATCH 01/17] done Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 52 ++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index c3ad0f4f0e174..a91acbd05af17 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -57,6 +57,7 @@ import ( "github.com/pingcap/tidb/pkg/util/sqlescape" "github.com/pingcap/tidb/pkg/util/sqlexec" "github.com/pingcap/tidb/pkg/util/timeutil" + clientv3 "go.etcd.io/etcd/client/v3" "go.etcd.io/etcd/client/v3/concurrency" "go.uber.org/zap" ) @@ -1300,9 +1301,47 @@ var ( SupportUpgradeHTTPOpVer int64 = version174 ) +func deleteLeader(ctx context.Context, cli *clientv3.Client, prefixKey string) error { + session, err := concurrency.NewSession(cli) + if err != nil { + return errors.Trace(err) + } + defer func() { + _ = session.Close() + }() + election := concurrency.NewElection(session, prefixKey) + resp, err := election.Leader(ctx) + if err != nil { + return errors.Trace(err) + } + _, err = cli.Delete(ctx, string(resp.Kvs[0].Key)) + return err +} + +func forceToLeader(ctx context.Context, s sessiontypes.Session) error { + dom := domain.GetDomain(s) + for !dom.DDL().OwnerManager().IsOwner() { + ownerId, err := dom.DDL().OwnerManager().GetOwnerID(ctx) + if err != nil { + return err + } + err = deleteLeader(ctx, dom.EtcdClient(), ownerId) + if err != nil { + return err + } + time.Sleep(50 * time.Millisecond) + } + return nil +} + // upgrade function will do some upgrade works, when the system is bootstrapped by low version TiDB server // For example, add new system variables into mysql.global_variables table. func upgrade(s sessiontypes.Session) { + err := forceToLeader(context.Background(), s) + if err != nil { + logutil.BgLogger().Fatal("[upgrade] force to owner failed", zap.Error(err)) + } + ver, err := getBootstrapVersion(s) terror.MustNil(err) if ver >= currentBootstrapVersion { @@ -1311,19 +1350,6 @@ func upgrade(s sessiontypes.Session) { } printClusterState(s, ver) - // Only upgrade from under version92 and this TiDB is not owner set. - // The owner in older tidb does not support concurrent DDL, we should add the internal DDL to job queue. - if ver < version92 { - useConcurrentDDL, err := checkOwnerVersion(context.Background(), domain.GetDomain(s)) - if err != nil { - logutil.BgLogger().Fatal("[upgrade] upgrade failed", zap.Error(err)) - } - if !useConcurrentDDL { - // Use another variable DDLForce2Queue but not EnableConcurrentDDL since in upgrade it may set global variable, the initial step will - // overwrite variable EnableConcurrentDDL. - variable.DDLForce2Queue.Store(true) - } - } // Do upgrade works then update bootstrap version. isNull, err := InitMDLVariableForUpgrade(s.GetStore()) if err != nil { From 2115d3ae3a1dd2b8227de56845b9a0079926c924 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Fri, 23 Feb 2024 18:08:24 +0800 Subject: [PATCH 02/17] fix build Signed-off-by: wjhuang2016 --- pkg/owner/manager.go | 18 ++++++++++++++++++ pkg/session/bootstrap.go | 20 +------------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pkg/owner/manager.go b/pkg/owner/manager.go index d5f7ddb46779f..a8b178d257de0 100644 --- a/pkg/owner/manager.go +++ b/pkg/owner/manager.go @@ -485,3 +485,21 @@ func init() { logutil.BgLogger().Warn("set manager session TTL failed", zap.Error(err)) } } + +// DeleteLeader deletes the leader key. +func DeleteLeader(ctx context.Context, cli *clientv3.Client, prefixKey string) error { + session, err := concurrency.NewSession(cli) + if err != nil { + return errors.Trace(err) + } + defer func() { + _ = session.Close() + }() + election := concurrency.NewElection(session, prefixKey) + resp, err := election.Leader(ctx) + if err != nil { + return errors.Trace(err) + } + _, err = cli.Delete(ctx, string(resp.Kvs[0].Key)) + return err +} diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index a91acbd05af17..c9adf877f74eb 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -57,7 +57,6 @@ import ( "github.com/pingcap/tidb/pkg/util/sqlescape" "github.com/pingcap/tidb/pkg/util/sqlexec" "github.com/pingcap/tidb/pkg/util/timeutil" - clientv3 "go.etcd.io/etcd/client/v3" "go.etcd.io/etcd/client/v3/concurrency" "go.uber.org/zap" ) @@ -1301,23 +1300,6 @@ var ( SupportUpgradeHTTPOpVer int64 = version174 ) -func deleteLeader(ctx context.Context, cli *clientv3.Client, prefixKey string) error { - session, err := concurrency.NewSession(cli) - if err != nil { - return errors.Trace(err) - } - defer func() { - _ = session.Close() - }() - election := concurrency.NewElection(session, prefixKey) - resp, err := election.Leader(ctx) - if err != nil { - return errors.Trace(err) - } - _, err = cli.Delete(ctx, string(resp.Kvs[0].Key)) - return err -} - func forceToLeader(ctx context.Context, s sessiontypes.Session) error { dom := domain.GetDomain(s) for !dom.DDL().OwnerManager().IsOwner() { @@ -1325,7 +1307,7 @@ func forceToLeader(ctx context.Context, s sessiontypes.Session) error { if err != nil { return err } - err = deleteLeader(ctx, dom.EtcdClient(), ownerId) + err = owner.DeleteLeader(ctx, dom.EtcdClient(), ownerId) if err != nil { return err } From 965652c7d8168c2c5614b9cb75877ad70daa93c6 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Mon, 26 Feb 2024 14:30:08 +0800 Subject: [PATCH 03/17] fix Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index c9adf877f74eb..0daaceb2f1d09 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -21,6 +21,7 @@ package session import ( "context" "encoding/hex" + "errors" "fmt" "os" osuser "os/user" @@ -1304,7 +1305,10 @@ func forceToLeader(ctx context.Context, s sessiontypes.Session) error { dom := domain.GetDomain(s) for !dom.DDL().OwnerManager().IsOwner() { ownerId, err := dom.DDL().OwnerManager().GetOwnerID(ctx) - if err != nil { + if errors.Is(err, concurrency.ErrElectionNoLeader) { + time.Sleep(50 * time.Millisecond) + continue + } else if err != nil { return err } err = owner.DeleteLeader(ctx, dom.EtcdClient(), ownerId) From 98781bde44d2593f1a776170808d3f589f654d76 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Mon, 26 Feb 2024 16:15:29 +0800 Subject: [PATCH 04/17] fix build Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index 0daaceb2f1d09..515cfd9e6b88e 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -21,7 +21,6 @@ package session import ( "context" "encoding/hex" - "errors" "fmt" "os" osuser "os/user" @@ -1305,7 +1304,7 @@ func forceToLeader(ctx context.Context, s sessiontypes.Session) error { dom := domain.GetDomain(s) for !dom.DDL().OwnerManager().IsOwner() { ownerId, err := dom.DDL().OwnerManager().GetOwnerID(ctx) - if errors.Is(err, concurrency.ErrElectionNoLeader) { + if errors.ErrorEqual(err, concurrency.ErrElectionNoLeader) { time.Sleep(50 * time.Millisecond) continue } else if err != nil { From 9cde6fed3f60d07f02063739e0ebe65732f1ed73 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Mon, 26 Feb 2024 16:48:41 +0800 Subject: [PATCH 05/17] fix build Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index 515cfd9e6b88e..770ed598d2d02 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -1303,14 +1303,14 @@ var ( func forceToLeader(ctx context.Context, s sessiontypes.Session) error { dom := domain.GetDomain(s) for !dom.DDL().OwnerManager().IsOwner() { - ownerId, err := dom.DDL().OwnerManager().GetOwnerID(ctx) + ownerID, err := dom.DDL().OwnerManager().GetOwnerID(ctx) if errors.ErrorEqual(err, concurrency.ErrElectionNoLeader) { time.Sleep(50 * time.Millisecond) continue } else if err != nil { return err } - err = owner.DeleteLeader(ctx, dom.EtcdClient(), ownerId) + err = owner.DeleteLeader(ctx, dom.EtcdClient(), ownerID) if err != nil { return err } From 0755eaa9f2850725bfa89e507eba509e22c3a430 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Tue, 27 Feb 2024 17:06:23 +0800 Subject: [PATCH 06/17] fix Signed-off-by: wjhuang2016 --- pkg/owner/manager.go | 14 +++----------- pkg/session/bootstrap.go | 5 ++++- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/pkg/owner/manager.go b/pkg/owner/manager.go index a8b178d257de0..660e62b416c69 100644 --- a/pkg/owner/manager.go +++ b/pkg/owner/manager.go @@ -487,19 +487,11 @@ func init() { } // DeleteLeader deletes the leader key. -func DeleteLeader(ctx context.Context, cli *clientv3.Client, prefixKey string) error { - session, err := concurrency.NewSession(cli) +func DeleteLeader(ctx context.Context, cli *clientv3.Client, key string) error { + ownerKey, _, _, _, err := getOwnerInfo(ctx, ctx, cli, key) if err != nil { return errors.Trace(err) } - defer func() { - _ = session.Close() - }() - election := concurrency.NewElection(session, prefixKey) - resp, err := election.Leader(ctx) - if err != nil { - return errors.Trace(err) - } - _, err = cli.Delete(ctx, string(resp.Kvs[0].Key)) + _, err = cli.Delete(ctx, ownerKey) return err } diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index 770ed598d2d02..b7d28c934fcb9 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -31,6 +31,7 @@ import ( "github.com/pingcap/errors" "github.com/pingcap/tidb/pkg/bindinfo" "github.com/pingcap/tidb/pkg/config" + "github.com/pingcap/tidb/pkg/ddl" "github.com/pingcap/tidb/pkg/domain" "github.com/pingcap/tidb/pkg/domain/infosync" "github.com/pingcap/tidb/pkg/expression" @@ -1308,10 +1309,12 @@ func forceToLeader(ctx context.Context, s sessiontypes.Session) error { time.Sleep(50 * time.Millisecond) continue } else if err != nil { + logutil.BgLogger().Error("unexpected error", zap.Error(err)) return err } - err = owner.DeleteLeader(ctx, dom.EtcdClient(), ownerID) + err = owner.DeleteLeader(ctx, dom.EtcdClient(), ddl.DDLOwnerKey) if err != nil { + logutil.BgLogger().Error("unexpected error", zap.Error(err), zap.String("ownerID", ownerID)) return err } time.Sleep(50 * time.Millisecond) From 5471b05532cfa3b160f0361b9182ee31d5cf07d3 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Tue, 27 Feb 2024 18:14:17 +0800 Subject: [PATCH 07/17] fix test Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index b7d28c934fcb9..71afd6a0387ae 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -1305,7 +1305,7 @@ func forceToLeader(ctx context.Context, s sessiontypes.Session) error { dom := domain.GetDomain(s) for !dom.DDL().OwnerManager().IsOwner() { ownerID, err := dom.DDL().OwnerManager().GetOwnerID(ctx) - if errors.ErrorEqual(err, concurrency.ErrElectionNoLeader) { + if errors.ErrorEqual(err, concurrency.ErrElectionNoLeader) || strings.Contains(err.Error(), "no owner") { time.Sleep(50 * time.Millisecond) continue } else if err != nil { From 2af47324705a162ef513eeea6a61c5feffeb7962 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Fri, 2 Aug 2024 16:06:44 +0800 Subject: [PATCH 08/17] done Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 48 +++++++++++++++++++++++++++++++++++----- 1 file changed, 43 insertions(+), 5 deletions(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index 82512a3a72c0e..bdaf06dcd9418 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -63,6 +63,8 @@ import ( "go.uber.org/zap" ) +var bootstrapOwnerKey = "/tidb/distributeLock/" + const ( // CreateUserTable is the SQL statement creates User table in system db. // WARNING: There are some limitations on altering the schema of mysql.user table. @@ -1345,6 +1347,35 @@ var ( SupportUpgradeHTTPOpVer int64 = version174 ) +func acquireLock(s sessiontypes.Session) bool { + cli := domain.GetDomain(s).GetEtcdClient() + // The lock is used to make sure only one TiDB server is bootstrapping the system. + etcdSession, err := concurrency.NewSession(cli) + if err != nil { + return false + } + mu := concurrency.NewMutex(etcdSession, bootstrapOwnerKey) + err = mu.Lock(context.Background()) + if err != nil { + return false + } + return true +} + +func releaseLock(s sessiontypes.Session) { + cli := domain.GetDomain(s).GetEtcdClient() + etcdSession, err := concurrency.NewSession(cli) + if err != nil { + return + } + mu := concurrency.NewMutex(etcdSession, bootstrapOwnerKey) + err = mu.Unlock(context.Background()) + if err != nil { + logutil.BgLogger().Error("release lock failed", zap.Error(err)) + return + } +} + func forceToLeader(ctx context.Context, s sessiontypes.Session) error { dom := domain.GetDomain(s) for !dom.DDL().OwnerManager().IsOwner() { @@ -1420,12 +1451,19 @@ func upgrade(s sessiontypes.Session) { logutil.BgLogger().Fatal("[upgrade] force to owner failed", zap.Error(err)) } - ver, err := getBootstrapVersion(s) - terror.MustNil(err) - if ver >= currentBootstrapVersion { - // It is already bootstrapped/upgraded by a higher version TiDB server. - return + var ver int64 + for { + acquireLock(s) + ver, err = getBootstrapVersion(s) + terror.MustNil(err) + if ver >= currentBootstrapVersion { + // It is already bootstrapped/upgraded by a higher version TiDB server. + releaseLock(s) + return + } + break } + defer releaseLock(s) checkDistTask(s, ver) printClusterState(s, ver) From dde17df013e86beb7398d69447cb08c5ce64b0eb Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Fri, 2 Aug 2024 16:43:51 +0800 Subject: [PATCH 09/17] fix Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index bdaf06dcd9418..eb6270d01ca15 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -1380,7 +1380,7 @@ func forceToLeader(ctx context.Context, s sessiontypes.Session) error { dom := domain.GetDomain(s) for !dom.DDL().OwnerManager().IsOwner() { ownerID, err := dom.DDL().OwnerManager().GetOwnerID(ctx) - if errors.ErrorEqual(err, concurrency.ErrElectionNoLeader) || strings.Contains(err.Error(), "no owner") { + if err != nil && (errors.ErrorEqual(err, concurrency.ErrElectionNoLeader) || strings.Contains(err.Error(), "no owner")) { time.Sleep(50 * time.Millisecond) continue } else if err != nil { From d281c3352e0be9cfc439f5e48ef0e7b52f47f7c5 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Mon, 5 Aug 2024 16:36:27 +0800 Subject: [PATCH 10/17] fix Signed-off-by: wjhuang2016 --- pkg/session/session.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/session/session.go b/pkg/session/session.go index 730b5f6cf52ad..e80f14a9e208b 100644 --- a/pkg/session/session.go +++ b/pkg/session/session.go @@ -3438,6 +3438,10 @@ func bootstrapSessionImpl(store kv.Storage, createSessionsImpl func(store kv.Sto if ver == notBootstrapped { runInBootstrapSession(store, bootstrap) } else if ver < currentBootstrapVersion { + err = InitMDLVariable(store) + if err != nil { + return nil, err + } runInBootstrapSession(store, upgrade) } else { err = InitMDLVariable(store) From 10e133d014a414de39c4659b0ca77f3dbe420436 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Mon, 5 Aug 2024 16:45:38 +0800 Subject: [PATCH 11/17] fix Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 14 +++++++------- pkg/session/session.go | 4 ---- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index eb6270d01ca15..592c467a0aeae 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -1446,7 +1446,13 @@ func checkDistTask(s sessiontypes.Session, ver int64) { // upgrade function will do some upgrade works, when the system is bootstrapped by low version TiDB server // For example, add new system variables into mysql.global_variables table. func upgrade(s sessiontypes.Session) { - err := forceToLeader(context.Background(), s) + // Do upgrade works then update bootstrap version. + isNull, err := InitMDLVariableForUpgrade(s.GetStore()) + if err != nil { + logutil.BgLogger().Fatal("[upgrade] init metadata lock failed", zap.Error(err)) + } + + err = forceToLeader(context.Background(), s) if err != nil { logutil.BgLogger().Fatal("[upgrade] force to owner failed", zap.Error(err)) } @@ -1468,12 +1474,6 @@ func upgrade(s sessiontypes.Session) { checkDistTask(s, ver) printClusterState(s, ver) - // Do upgrade works then update bootstrap version. - isNull, err := InitMDLVariableForUpgrade(s.GetStore()) - if err != nil { - logutil.BgLogger().Fatal("[upgrade] init metadata lock failed", zap.Error(err)) - } - if isNull { upgradeToVer99Before(s) } diff --git a/pkg/session/session.go b/pkg/session/session.go index e80f14a9e208b..730b5f6cf52ad 100644 --- a/pkg/session/session.go +++ b/pkg/session/session.go @@ -3438,10 +3438,6 @@ func bootstrapSessionImpl(store kv.Storage, createSessionsImpl func(store kv.Sto if ver == notBootstrapped { runInBootstrapSession(store, bootstrap) } else if ver < currentBootstrapVersion { - err = InitMDLVariable(store) - if err != nil { - return nil, err - } runInBootstrapSession(store, upgrade) } else { err = InitMDLVariable(store) From ae883946c732aee01474224e41fa8dffe801f1f0 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Mon, 5 Aug 2024 16:57:35 +0800 Subject: [PATCH 12/17] fix Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index 592c467a0aeae..91e4bebf8b5da 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -1356,10 +1356,7 @@ func acquireLock(s sessiontypes.Session) bool { } mu := concurrency.NewMutex(etcdSession, bootstrapOwnerKey) err = mu.Lock(context.Background()) - if err != nil { - return false - } - return true + return err == nil } func releaseLock(s sessiontypes.Session) { @@ -1458,16 +1455,13 @@ func upgrade(s sessiontypes.Session) { } var ver int64 - for { - acquireLock(s) - ver, err = getBootstrapVersion(s) - terror.MustNil(err) - if ver >= currentBootstrapVersion { - // It is already bootstrapped/upgraded by a higher version TiDB server. - releaseLock(s) - return - } - break + acquireLock(s) + ver, err = getBootstrapVersion(s) + terror.MustNil(err) + if ver >= currentBootstrapVersion { + // It is already bootstrapped/upgraded by a higher version TiDB server. + releaseLock(s) + return } defer releaseLock(s) From d0641a9445a42d8cbe22dfc7d405be960d884669 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Thu, 19 Sep 2024 21:01:31 +0800 Subject: [PATCH 13/17] fix Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index e8558bd053746..c6f90a3cd22b7 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -1373,9 +1373,20 @@ var ( ) func acquireLock(s sessiontypes.Session) bool { - cli := domain.GetDomain(s).GetEtcdClient() + dom := domain.GetDomain(s) + if dom == nil { + logutil.BgLogger().Warn("domain is nil") + return false + } + cli := dom.GetEtcdClient() + if cli == nil { + logutil.BgLogger().Warn("etcd client is nil") + return false + } // The lock is used to make sure only one TiDB server is bootstrapping the system. - lease, err := cli.Grant(context.Background(), 30) + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + lease, err := cli.Grant(ctx, 30) + cancel() if err != nil { return false } @@ -1389,7 +1400,16 @@ func acquireLock(s sessiontypes.Session) bool { } func releaseLock(s sessiontypes.Session) { - cli := domain.GetDomain(s).GetEtcdClient() + dom := domain.GetDomain(s) + if dom == nil { + logutil.BgLogger().Warn("domain is nil") + return + } + cli := dom.GetEtcdClient() + if cli == nil { + logutil.BgLogger().Warn("etcd client is nil") + return + } etcdSession, err := concurrency.NewSession(cli) if err != nil { return From 4e780bed3c1fd37ee87aec51363bdfa147879fbd Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Fri, 20 Sep 2024 17:42:45 +0800 Subject: [PATCH 14/17] fix Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index c6f90a3cd22b7..23c12ddff8a05 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -1498,11 +1498,6 @@ func upgrade(s sessiontypes.Session) { logutil.BgLogger().Fatal("[upgrade] init metadata lock failed", zap.Error(err)) } - err = forceToLeader(context.Background(), s) - if err != nil { - logutil.BgLogger().Fatal("[upgrade] force to owner failed", zap.Error(err)) - } - var ver int64 acquireLock(s) ver, err = getBootstrapVersion(s) @@ -1514,6 +1509,11 @@ func upgrade(s sessiontypes.Session) { } defer releaseLock(s) + err = forceToLeader(context.Background(), s) + if err != nil { + logutil.BgLogger().Fatal("[upgrade] force to owner failed", zap.Error(err)) + } + checkDistTask(s, ver) printClusterState(s, ver) From 4b713f1bfc4e17a17ff23f37422944ae1cfa45e4 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Tue, 24 Sep 2024 16:21:28 +0800 Subject: [PATCH 15/17] fix Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index 23c12ddff8a05..78f6b0f2552e7 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -63,7 +63,8 @@ import ( "go.uber.org/zap" ) -var bootstrapOwnerKey = "/tidb/distributeLock/" +// bootstrapOwnerKey is the key used by ddl owner mutex during boostrap. +var bootstrapOwnerKey = "/tidb/distributeDDLOwnerLock/" const ( // CreateUserTable is the SQL statement creates User table in system db. @@ -1499,7 +1500,17 @@ func upgrade(s sessiontypes.Session) { } var ver int64 - acquireLock(s) + i := 0 + var maxRetryCnt = 10 + for ; i < maxRetryCnt; i++ { + ok := acquireLock(s) + if ok { + break + } + } + if i == maxRetryCnt { + logutil.BgLogger().Fatal("[upgrade] get ddl owner distributed lock failed", zap.Error(err)) + } ver, err = getBootstrapVersion(s) terror.MustNil(err) if ver >= currentBootstrapVersion { From 132903e2932cc770b54704a4c174dce968955e99 Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Wed, 25 Sep 2024 17:01:56 +0800 Subject: [PATCH 16/17] fix test Signed-off-by: wjhuang2016 --- pkg/session/bootstrap.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index e4e89cc69e032..b191ade4e5bf5 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -1424,8 +1424,11 @@ func acquireLock(s sessiontypes.Session) (func(), bool) { } cli := dom.GetEtcdClient() if cli == nil { - logutil.BgLogger().Warn("etcd client is nil") - return nil, false + logutil.BgLogger().Warn("etcd client is nil, force to acquire ddl owner lock") + // Special handling for test. + return func() { + // do nothing + }, true } releaseFn, err := owner.AcquireDistributedLock(context.Background(), cli, ddl.DDLOwnerKey, 10) if err != nil { From f32b9502392bf766ad02ed901268b2f92a83fb8b Mon Sep 17 00:00:00 2001 From: wjhuang2016 Date: Wed, 25 Sep 2024 19:41:46 +0800 Subject: [PATCH 17/17] refine Signed-off-by: wjhuang2016 --- pkg/owner/manager.go | 9 ++++++++- pkg/session/bootstrap.go | 13 ++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/pkg/owner/manager.go b/pkg/owner/manager.go index 34189642b0b95..6e3c185b22861 100644 --- a/pkg/owner/manager.go +++ b/pkg/owner/manager.go @@ -522,7 +522,14 @@ func AcquireDistributedLock( return nil, err } mu := concurrency.NewMutex(se, key) - err = mu.Lock(ctx) + maxRetryCnt := 10 + err = util2.RunWithRetry(maxRetryCnt, util2.RetryInterval, func() (bool, error) { + err = mu.Lock(ctx) + if err != nil { + return true, err + } + return false, nil + }) if err != nil { err1 := se.Close() if err1 != nil { diff --git a/pkg/session/bootstrap.go b/pkg/session/bootstrap.go index b191ade4e5bf5..86494eddfbe20 100644 --- a/pkg/session/bootstrap.go +++ b/pkg/session/bootstrap.go @@ -1514,17 +1514,8 @@ func upgrade(s sessiontypes.Session) { } var ver int64 - i := 0 - var maxRetryCnt = 10 - var releaseFn func() - var ok bool - for ; i < maxRetryCnt; i++ { - releaseFn, ok = acquireLock(s) - if ok { - break - } - } - if i == maxRetryCnt { + releaseFn, ok := acquireLock(s) + if !ok { logutil.BgLogger().Fatal("[upgrade] get ddl owner distributed lock failed", zap.Error(err)) } ver, err = getBootstrapVersion(s)