From bff70bd052a077e3f8f87632256c52fedd97d7b0 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 23 Dec 2022 11:39:44 +0800 Subject: [PATCH 01/11] support reset_tiflash after ebs restoration --- br/pkg/restore/client.go | 36 ++++++++++++++++++++++++++++++++++++ br/pkg/task/restore_data.go | 4 ++++ 2 files changed, 40 insertions(+) diff --git a/br/pkg/restore/client.go b/br/pkg/restore/client.go index d7811574915f2..ba061d571938e 100644 --- a/br/pkg/restore/client.go +++ b/br/pkg/restore/client.go @@ -24,6 +24,7 @@ import ( "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/backup" "github.com/pingcap/tidb/br/pkg/checksum" + "github.com/pingcap/tidb/br/pkg/conn" "github.com/pingcap/tidb/br/pkg/conn/util" berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/glue" @@ -2721,3 +2722,38 @@ func CheckNewCollationEnable( log.Info("set new_collation_enabled", zap.Bool("new_collation_enabled", enabled)) return nil } + +func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, mgr *conn.Mgr) error { + info := mgr.GetDomain().InfoSchema() + allSchema := info.AllSchemas() + recorder := tiflashrec.New() + + tiFlashStoreCount, err := rc.getTiFlashNodeCount(ctx) + for _, s := range allSchema { + for _, t := range s.Tables { + if t.TiFlashReplica != nil && t.TiFlashReplica.Count > tiFlashStoreCount { + if recorder != nil && t.TiFlashReplica != nil { + recorder.AddTable(t.ID, *t.TiFlashReplica) + } + } + } + } + if err != nil { + return errors.Trace(err) + } + sqls := recorder.GenerateAlterTableDDLs(info) + log.Info("Generating SQLs for resetting TiFlash Replica", + zap.Strings("sqls", sqls)) + return g.UseOneShotSession(mgr.GetStorage(), false, func(se glue.Session) error { + for _, sql := range sqls { + if errExec := se.ExecuteInternal(ctx, sql); errExec != nil { + logutil.WarnTerm("Failed to restore tiflash replica config, you may execute the sql restore it manually.", + logutil.ShortError(errExec), + zap.String("sql", sql), + ) + } + } + return nil + }) + +} diff --git a/br/pkg/task/restore_data.go b/br/pkg/task/restore_data.go index f8e286dd0e72b..c75e5691d813b 100644 --- a/br/pkg/task/restore_data.go +++ b/br/pkg/task/restore_data.go @@ -154,6 +154,10 @@ func RunResolveKvData(c context.Context, g glue.Glue, cmdName string, cfg *Resto //TODO: restore volume type into origin type //ModifyVolume(*ec2.ModifyVolumeInput) (*ec2.ModifyVolumeOutput, error) by backupmeta + // since we cannot reset tiflash automaticlly. so we should start it manually + if err = client.ResetTiFlashReplicas(ctx, g, mgr); err != nil { + return errors.Trace(err) + } progress.Close() summary.CollectDuration("restore duration", time.Since(startAll)) summary.SetSuccessStatus(true) From 91650388bd1ed28730a119585c29328fe39d9c32 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 23 Dec 2022 15:58:05 +0800 Subject: [PATCH 02/11] fix --- br/pkg/task/restore_data.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/pkg/task/restore_data.go b/br/pkg/task/restore_data.go index c75e5691d813b..b851fcef3317e 100644 --- a/br/pkg/task/restore_data.go +++ b/br/pkg/task/restore_data.go @@ -64,7 +64,7 @@ func RunResolveKvData(c context.Context, g glue.Glue, cmdName string, cfg *Resto summary.CollectUint("resolve-ts", resolveTS) keepaliveCfg := GetKeepalive(&cfg.Config) - mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, keepaliveCfg, cfg.CheckRequirements, false, conn.NormalVersionChecker) + mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, keepaliveCfg, cfg.CheckRequirements, true, conn.NormalVersionChecker) if err != nil { return errors.Trace(err) } From e75917a4898da38c6647efd98b02f830c1cf33ac Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 23 Dec 2022 16:59:10 +0800 Subject: [PATCH 03/11] fix nil --- br/pkg/restore/client.go | 11 +++++++---- br/pkg/task/restore_data.go | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/br/pkg/restore/client.go b/br/pkg/restore/client.go index ba061d571938e..b9c9a091348ae 100644 --- a/br/pkg/restore/client.go +++ b/br/pkg/restore/client.go @@ -24,7 +24,6 @@ import ( "github.com/pingcap/log" "github.com/pingcap/tidb/br/pkg/backup" "github.com/pingcap/tidb/br/pkg/checksum" - "github.com/pingcap/tidb/br/pkg/conn" "github.com/pingcap/tidb/br/pkg/conn/util" berrors "github.com/pingcap/tidb/br/pkg/errors" "github.com/pingcap/tidb/br/pkg/glue" @@ -2723,8 +2722,12 @@ func CheckNewCollationEnable( return nil } -func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, mgr *conn.Mgr) error { - info := mgr.GetDomain().InfoSchema() +func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, storage kv.Storage) error { + dom, err := g.GetDomain(storage) + if err != nil { + return errors.Trace(err) + } + info := dom.InfoSchema() allSchema := info.AllSchemas() recorder := tiflashrec.New() @@ -2744,7 +2747,7 @@ func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, mgr *co sqls := recorder.GenerateAlterTableDDLs(info) log.Info("Generating SQLs for resetting TiFlash Replica", zap.Strings("sqls", sqls)) - return g.UseOneShotSession(mgr.GetStorage(), false, func(se glue.Session) error { + return g.UseOneShotSession(storage, false, func(se glue.Session) error { for _, sql := range sqls { if errExec := se.ExecuteInternal(ctx, sql); errExec != nil { logutil.WarnTerm("Failed to restore tiflash replica config, you may execute the sql restore it manually.", diff --git a/br/pkg/task/restore_data.go b/br/pkg/task/restore_data.go index b851fcef3317e..fc82d011abb0d 100644 --- a/br/pkg/task/restore_data.go +++ b/br/pkg/task/restore_data.go @@ -64,7 +64,7 @@ func RunResolveKvData(c context.Context, g glue.Glue, cmdName string, cfg *Resto summary.CollectUint("resolve-ts", resolveTS) keepaliveCfg := GetKeepalive(&cfg.Config) - mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, keepaliveCfg, cfg.CheckRequirements, true, conn.NormalVersionChecker) + mgr, err := NewMgr(ctx, g, cfg.PD, cfg.TLS, keepaliveCfg, cfg.CheckRequirements, false, conn.NormalVersionChecker) if err != nil { return errors.Trace(err) } @@ -155,7 +155,7 @@ func RunResolveKvData(c context.Context, g glue.Glue, cmdName string, cfg *Resto //ModifyVolume(*ec2.ModifyVolumeInput) (*ec2.ModifyVolumeOutput, error) by backupmeta // since we cannot reset tiflash automaticlly. so we should start it manually - if err = client.ResetTiFlashReplicas(ctx, g, mgr); err != nil { + if err = client.ResetTiFlashReplicas(ctx, g, mgr.GetStorage()); err != nil { return errors.Trace(err) } progress.Close() From d2be1e4ff3e6f9b9ce8297131f11d750f9eb6cde Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 23 Dec 2022 18:16:44 +0800 Subject: [PATCH 04/11] fix count --- br/pkg/restore/client.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/br/pkg/restore/client.go b/br/pkg/restore/client.go index b9c9a091348ae..245ed1d624cfb 100644 --- a/br/pkg/restore/client.go +++ b/br/pkg/restore/client.go @@ -2732,12 +2732,12 @@ func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, storage recorder := tiflashrec.New() tiFlashStoreCount, err := rc.getTiFlashNodeCount(ctx) + log.Info("get tiflash store count for resetting TiFlash Replica", + zap.Uint64("count", tiFlashStoreCount)) for _, s := range allSchema { for _, t := range s.Tables { - if t.TiFlashReplica != nil && t.TiFlashReplica.Count > tiFlashStoreCount { - if recorder != nil && t.TiFlashReplica != nil { - recorder.AddTable(t.ID, *t.TiFlashReplica) - } + if t.TiFlashReplica != nil && t.TiFlashReplica.Count <= tiFlashStoreCount { + recorder.AddTable(t.ID, *t.TiFlashReplica) } } } From be2f707664bed1e572f3518150670d09a715965f Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 23 Dec 2022 20:32:27 +0800 Subject: [PATCH 05/11] reset to 0 --- br/pkg/restore/tiflashrec/tiflash_recorder.go | 49 ++++++++++++++++++- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/br/pkg/restore/tiflashrec/tiflash_recorder.go b/br/pkg/restore/tiflashrec/tiflash_recorder.go index 31dde982a7b69..dea2d1ea13b35 100644 --- a/br/pkg/restore/tiflashrec/tiflash_recorder.go +++ b/br/pkg/restore/tiflashrec/tiflash_recorder.go @@ -79,6 +79,43 @@ func (r *TiFlashRecorder) Rewrite(oldID int64, newID int64) { } } +func (r *TiFlashRecorder) GenerateResetAlterTableDDLs(info infoschema.InfoSchema) []string { + items := make([]string, 0, len(r.items)) + r.Iterate(func(id int64, replica model.TiFlashReplicaInfo) { + table, ok := info.TableByID(id) + if !ok { + log.Warn("Table do not exist, skipping", zap.Int64("id", id)) + return + } + schema, ok := info.SchemaByTable(table.Meta()) + if !ok { + log.Warn("Schema do not exist, skipping", zap.Int64("id", id), zap.Stringer("table", table.Meta().Name)) + return + } + altTableSpec, err := alterTableSpecOf(replica, true) + if err != nil { + log.Warn("Failed to generate the alter table spec", logutil.ShortError(err), zap.Any("replica", replica)) + return + } + items = append(items, fmt.Sprintf( + "ALTER TABLE %s %s", + utils.EncloseDBAndTable(schema.Name.O, table.Meta().Name.O), + altTableSpec), + ) + altTableSpec, err = alterTableSpecOf(replica, false) + if err != nil { + log.Warn("Failed to generate the alter table spec", logutil.ShortError(err), zap.Any("replica", replica)) + return + } + items = append(items, fmt.Sprintf( + "ALTER TABLE %s %s", + utils.EncloseDBAndTable(schema.Name.O, table.Meta().Name.O), + altTableSpec), + ) + }) + return items +} + func (r *TiFlashRecorder) GenerateAlterTableDDLs(info infoschema.InfoSchema) []string { items := make([]string, 0, len(r.items)) r.Iterate(func(id int64, replica model.TiFlashReplicaInfo) { @@ -92,7 +129,7 @@ func (r *TiFlashRecorder) GenerateAlterTableDDLs(info infoschema.InfoSchema) []s log.Warn("Schema do not exist, skipping", zap.Int64("id", id), zap.Stringer("table", table.Meta().Name)) return } - altTableSpec, err := alterTableSpecOf(replica) + altTableSpec, err := alterTableSpecOf(replica, false) if err != nil { log.Warn("Failed to generate the alter table spec", logutil.ShortError(err), zap.Any("replica", replica)) return @@ -106,7 +143,7 @@ func (r *TiFlashRecorder) GenerateAlterTableDDLs(info infoschema.InfoSchema) []s return items } -func alterTableSpecOf(replica model.TiFlashReplicaInfo) (string, error) { +func alterTableSpecOf(replica model.TiFlashReplicaInfo, reset bool) (string, error) { spec := &ast.AlterTableSpec{ Tp: ast.AlterTableSetTiFlashReplica, TiFlashReplica: &ast.TiFlashReplicaSpec{ @@ -114,6 +151,14 @@ func alterTableSpecOf(replica model.TiFlashReplicaInfo) (string, error) { Labels: replica.LocationLabels, }, } + if reset { + spec = &ast.AlterTableSpec{ + Tp: ast.AlterTableSetTiFlashReplica, + TiFlashReplica: &ast.TiFlashReplicaSpec{ + Count: 0, + }, + } + } buf := bytes.NewBuffer(make([]byte, 0, 32)) restoreCx := format.NewRestoreCtx( From 48ab0d3ad25e073e9e81063944cdd924fe9ddd56 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 23 Dec 2022 20:33:45 +0800 Subject: [PATCH 06/11] reset to 0 --- br/pkg/restore/client.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/pkg/restore/client.go b/br/pkg/restore/client.go index 245ed1d624cfb..f8dce3a252205 100644 --- a/br/pkg/restore/client.go +++ b/br/pkg/restore/client.go @@ -2744,7 +2744,7 @@ func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, storage if err != nil { return errors.Trace(err) } - sqls := recorder.GenerateAlterTableDDLs(info) + sqls := recorder.GenerateResetAlterTableDDLs(info) log.Info("Generating SQLs for resetting TiFlash Replica", zap.Strings("sqls", sqls)) return g.UseOneShotSession(storage, false, func(se glue.Session) error { From 2b43931898320ec724f74f63dab8afbe65d4e0bf Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 27 Dec 2022 12:02:59 +0800 Subject: [PATCH 07/11] wait for tiflash starts --- br/pkg/restore/client.go | 66 +++++++++++++++++-- br/pkg/restore/tiflashrec/tiflash_recorder.go | 2 +- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/br/pkg/restore/client.go b/br/pkg/restore/client.go index f8dce3a252205..a08abb66cf846 100644 --- a/br/pkg/restore/client.go +++ b/br/pkg/restore/client.go @@ -2731,22 +2731,52 @@ func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, storage allSchema := info.AllSchemas() recorder := tiflashrec.New() - tiFlashStoreCount, err := rc.getTiFlashNodeCount(ctx) - log.Info("get tiflash store count for resetting TiFlash Replica", - zap.Uint64("count", tiFlashStoreCount)) + expectTiFlashStoreCount := uint64(0) + needTiFlash := false for _, s := range allSchema { for _, t := range s.Tables { - if t.TiFlashReplica != nil && t.TiFlashReplica.Count <= tiFlashStoreCount { + if t.TiFlashReplica != nil { + expectTiFlashStoreCount = mathutil.Max(expectTiFlashStoreCount, t.TiFlashReplica.Count) recorder.AddTable(t.ID, *t.TiFlashReplica) + needTiFlash = true } } } + if !needTiFlash { + log.Info("no need to set tiflash replica, since there is no tables enable tiflash replica") + return nil + } + // we wait for ten minutes to wait tiflash starts. + // since tiflash only starts when set unmark recovery mode finished. + timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + err = utils.WithRetry(timeoutCtx, func() error { + tiFlashStoreCount, err := rc.getTiFlashNodeCount(ctx) + log.Info("get tiflash store count for resetting TiFlash Replica", + zap.Uint64("count", tiFlashStoreCount)) + if err != nil { + return errors.Trace(err) + } + if tiFlashStoreCount < expectTiFlashStoreCount { + log.Info("still waiting for all tiflash starts", + zap.Uint64("expect", expectTiFlashStoreCount), + zap.Uint64("actual", tiFlashStoreCount), + ) + return errors.New("tiflash store count is less than expected") + } + return nil + }, &waitTiFlashBackoffer{ + Attempts: 30, + BaseBackoff: 4 * time.Second, + }) if err != nil { - return errors.Trace(err) + return err } + sqls := recorder.GenerateResetAlterTableDDLs(info) - log.Info("Generating SQLs for resetting TiFlash Replica", + log.Info("Generating SQLs for resetting tiflash replica", zap.Strings("sqls", sqls)) + return g.UseOneShotSession(storage, false, func(se glue.Session) error { for _, sql := range sqls { if errExec := se.ExecuteInternal(ctx, sql); errExec != nil { @@ -2760,3 +2790,27 @@ func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, storage }) } + +type waitTiFlashBackoffer struct { + Attempts int + BaseBackoff time.Duration +} + +// NextBackoff returns a duration to wait before retrying again +func (b *waitTiFlashBackoffer) NextBackoff(error) time.Duration { + bo := b.BaseBackoff + b.Attempts-- + if b.Attempts == 0 { + return 0 + } + b.BaseBackoff *= 2 + if b.BaseBackoff > 32*time.Second { + b.BaseBackoff = 32 * time.Second + } + return bo +} + +// Attempt returns the remain attempt times +func (b *waitTiFlashBackoffer) Attempt() int { + return b.Attempts +} diff --git a/br/pkg/restore/tiflashrec/tiflash_recorder.go b/br/pkg/restore/tiflashrec/tiflash_recorder.go index dea2d1ea13b35..76afbc8c4754f 100644 --- a/br/pkg/restore/tiflashrec/tiflash_recorder.go +++ b/br/pkg/restore/tiflashrec/tiflash_recorder.go @@ -155,7 +155,7 @@ func alterTableSpecOf(replica model.TiFlashReplicaInfo, reset bool) (string, err spec = &ast.AlterTableSpec{ Tp: ast.AlterTableSetTiFlashReplica, TiFlashReplica: &ast.TiFlashReplicaSpec{ - Count: 0, + Count: 0, }, } } From 8d311d889f5f1cd1f8870a8264cc9dacf302e455 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Wed, 28 Dec 2022 14:36:08 +0800 Subject: [PATCH 08/11] add test --- br/pkg/restore/client.go | 2 +- .../tiflashrec/tiflash_recorder_test.go | 30 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/br/pkg/restore/client.go b/br/pkg/restore/client.go index a08abb66cf846..d943fffa35be9 100644 --- a/br/pkg/restore/client.go +++ b/br/pkg/restore/client.go @@ -2758,7 +2758,7 @@ func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, storage return errors.Trace(err) } if tiFlashStoreCount < expectTiFlashStoreCount { - log.Info("still waiting for all tiflash starts", + log.Info("still waiting for enough tiflash store start", zap.Uint64("expect", expectTiFlashStoreCount), zap.Uint64("actual", tiFlashStoreCount), ) diff --git a/br/pkg/restore/tiflashrec/tiflash_recorder_test.go b/br/pkg/restore/tiflashrec/tiflash_recorder_test.go index b01272caeddc5..b8ed117ffa017 100644 --- a/br/pkg/restore/tiflashrec/tiflash_recorder_test.go +++ b/br/pkg/restore/tiflashrec/tiflash_recorder_test.go @@ -170,3 +170,33 @@ func TestGenSql(t *testing.T) { "ALTER TABLE `test`.`evils` SET TIFLASH REPLICA 1 LOCATION LABELS 'kIll''; OR DROP DATABASE test --', 'dEaTh with " + `\\"quoting\\"` + "'", }) } + +func TestGenResetSql(t *testing.T) { + tInfo := func(id int, name string) *model.TableInfo { + return &model.TableInfo{ + ID: int64(id), + Name: model.NewCIStr(name), + } + } + fakeInfo := infoschema.MockInfoSchema([]*model.TableInfo{ + tInfo(1, "fruits"), + tInfo(2, "whisper"), + }) + rec := tiflashrec.New() + rec.AddTable(1, model.TiFlashReplicaInfo{ + Count: 1, + }) + rec.AddTable(2, model.TiFlashReplicaInfo{ + Count: 2, + LocationLabels: []string{"climate"}, + }) + + sqls := rec.GenerateResetAlterTableDDLs(fakeInfo) + require.ElementsMatch(t, sqls, []string{ + "ALTER TABLE `test`.`whisper` SET TIFLASH REPLICA 0", + "ALTER TABLE `test`.`whisper` SET TIFLASH REPLICA 2 LOCATION LABELS 'climate'", + "ALTER TABLE `test`.`fruits` SET TIFLASH REPLICA 0", + "ALTER TABLE `test`.`fruits` SET TIFLASH REPLICA 1", + }) +} + From 0ce8db51b3d103ef4f4e2f75d93480c855f1e1d8 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Tue, 3 Jan 2023 13:43:47 +0800 Subject: [PATCH 09/11] fix lint --- br/pkg/restore/client.go | 137 +++++++++++++++++++-------------------- 1 file changed, 68 insertions(+), 69 deletions(-) diff --git a/br/pkg/restore/client.go b/br/pkg/restore/client.go index d943fffa35be9..6f91a3b4deffc 100644 --- a/br/pkg/restore/client.go +++ b/br/pkg/restore/client.go @@ -2646,6 +2646,74 @@ func (rc *Client) SetWithSysTable(withSysTable bool) { rc.withSysTable = withSysTable } +func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, storage kv.Storage) error { + dom, err := g.GetDomain(storage) + if err != nil { + return errors.Trace(err) + } + info := dom.InfoSchema() + allSchema := info.AllSchemas() + recorder := tiflashrec.New() + + expectTiFlashStoreCount := uint64(0) + needTiFlash := false + for _, s := range allSchema { + for _, t := range s.Tables { + if t.TiFlashReplica != nil { + expectTiFlashStoreCount = mathutil.Max(expectTiFlashStoreCount, t.TiFlashReplica.Count) + recorder.AddTable(t.ID, *t.TiFlashReplica) + needTiFlash = true + } + } + } + if !needTiFlash { + log.Info("no need to set tiflash replica, since there is no tables enable tiflash replica") + return nil + } + // we wait for ten minutes to wait tiflash starts. + // since tiflash only starts when set unmark recovery mode finished. + timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + err = utils.WithRetry(timeoutCtx, func() error { + tiFlashStoreCount, err := rc.getTiFlashNodeCount(ctx) + log.Info("get tiflash store count for resetting TiFlash Replica", + zap.Uint64("count", tiFlashStoreCount)) + if err != nil { + return errors.Trace(err) + } + if tiFlashStoreCount < expectTiFlashStoreCount { + log.Info("still waiting for enough tiflash store start", + zap.Uint64("expect", expectTiFlashStoreCount), + zap.Uint64("actual", tiFlashStoreCount), + ) + return errors.New("tiflash store count is less than expected") + } + return nil + }, &waitTiFlashBackoffer{ + Attempts: 30, + BaseBackoff: 4 * time.Second, + }) + if err != nil { + return err + } + + sqls := recorder.GenerateResetAlterTableDDLs(info) + log.Info("Generating SQLs for resetting tiflash replica", + zap.Strings("sqls", sqls)) + + return g.UseOneShotSession(storage, false, func(se glue.Session) error { + for _, sql := range sqls { + if errExec := se.ExecuteInternal(ctx, sql); errExec != nil { + logutil.WarnTerm("Failed to restore tiflash replica config, you may execute the sql restore it manually.", + logutil.ShortError(errExec), + zap.String("sql", sql), + ) + } + } + return nil + }) +} + // MockClient create a fake client used to test. func MockClient(dbs map[string]*utils.Database) *Client { return &Client{databases: dbs} @@ -2722,75 +2790,6 @@ func CheckNewCollationEnable( return nil } -func (rc *Client) ResetTiFlashReplicas(ctx context.Context, g glue.Glue, storage kv.Storage) error { - dom, err := g.GetDomain(storage) - if err != nil { - return errors.Trace(err) - } - info := dom.InfoSchema() - allSchema := info.AllSchemas() - recorder := tiflashrec.New() - - expectTiFlashStoreCount := uint64(0) - needTiFlash := false - for _, s := range allSchema { - for _, t := range s.Tables { - if t.TiFlashReplica != nil { - expectTiFlashStoreCount = mathutil.Max(expectTiFlashStoreCount, t.TiFlashReplica.Count) - recorder.AddTable(t.ID, *t.TiFlashReplica) - needTiFlash = true - } - } - } - if !needTiFlash { - log.Info("no need to set tiflash replica, since there is no tables enable tiflash replica") - return nil - } - // we wait for ten minutes to wait tiflash starts. - // since tiflash only starts when set unmark recovery mode finished. - timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Minute) - defer cancel() - err = utils.WithRetry(timeoutCtx, func() error { - tiFlashStoreCount, err := rc.getTiFlashNodeCount(ctx) - log.Info("get tiflash store count for resetting TiFlash Replica", - zap.Uint64("count", tiFlashStoreCount)) - if err != nil { - return errors.Trace(err) - } - if tiFlashStoreCount < expectTiFlashStoreCount { - log.Info("still waiting for enough tiflash store start", - zap.Uint64("expect", expectTiFlashStoreCount), - zap.Uint64("actual", tiFlashStoreCount), - ) - return errors.New("tiflash store count is less than expected") - } - return nil - }, &waitTiFlashBackoffer{ - Attempts: 30, - BaseBackoff: 4 * time.Second, - }) - if err != nil { - return err - } - - sqls := recorder.GenerateResetAlterTableDDLs(info) - log.Info("Generating SQLs for resetting tiflash replica", - zap.Strings("sqls", sqls)) - - return g.UseOneShotSession(storage, false, func(se glue.Session) error { - for _, sql := range sqls { - if errExec := se.ExecuteInternal(ctx, sql); errExec != nil { - logutil.WarnTerm("Failed to restore tiflash replica config, you may execute the sql restore it manually.", - logutil.ShortError(errExec), - zap.String("sql", sql), - ) - } - } - return nil - }) - -} - type waitTiFlashBackoffer struct { Attempts int BaseBackoff time.Duration From 10fa3330c904d93f8d2970604b38e2401a721236 Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 6 Jan 2023 13:39:46 +0800 Subject: [PATCH 10/11] fix lint --- br/pkg/restore/tiflashrec/tiflash_recorder_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/br/pkg/restore/tiflashrec/tiflash_recorder_test.go b/br/pkg/restore/tiflashrec/tiflash_recorder_test.go index b8ed117ffa017..f7316a1ed3133 100644 --- a/br/pkg/restore/tiflashrec/tiflash_recorder_test.go +++ b/br/pkg/restore/tiflashrec/tiflash_recorder_test.go @@ -199,4 +199,3 @@ func TestGenResetSql(t *testing.T) { "ALTER TABLE `test`.`fruits` SET TIFLASH REPLICA 1", }) } - From b91d8e87e9efd52c0c8aed8f203e2d66b7f999fc Mon Sep 17 00:00:00 2001 From: 3pointer Date: Fri, 6 Jan 2023 18:09:20 +0800 Subject: [PATCH 11/11] address comment --- br/pkg/restore/tiflashrec/tiflash_recorder.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/br/pkg/restore/tiflashrec/tiflash_recorder.go b/br/pkg/restore/tiflashrec/tiflash_recorder.go index 76afbc8c4754f..84707f05e1f1b 100644 --- a/br/pkg/restore/tiflashrec/tiflash_recorder.go +++ b/br/pkg/restore/tiflashrec/tiflash_recorder.go @@ -92,6 +92,9 @@ func (r *TiFlashRecorder) GenerateResetAlterTableDDLs(info infoschema.InfoSchema log.Warn("Schema do not exist, skipping", zap.Int64("id", id), zap.Stringer("table", table.Meta().Name)) return } + // Currently, we didn't backup tiflash cluster volume during volume snapshot backup, + // But the table has replica info after volume restoration. + // We should reset it to 0, then set it back. otherwise, it will return error when alter tiflash replica. altTableSpec, err := alterTableSpecOf(replica, true) if err != nil { log.Warn("Failed to generate the alter table spec", logutil.ShortError(err), zap.Any("replica", replica))