From ee52655a51ee917abb10029205ff8105d5037062 Mon Sep 17 00:00:00 2001 From: fengou1 <85682690+fengou1@users.noreply.github.com> Date: Tue, 10 Jan 2023 16:42:23 +0800 Subject: [PATCH] br: reset cloud_admin and root after ebs restoration (#39986) (#40448) close pingcap/tidb#40418 --- br/pkg/restore/BUILD.bazel | 1 + br/pkg/restore/systable_restore.go | 103 +++++++++++++++++++++--- br/pkg/restore/systable_restore_test.go | 72 +++++++++++++++++ br/pkg/task/restore.go | 14 +++- br/pkg/task/restore_data.go | 18 +++++ 5 files changed, 193 insertions(+), 15 deletions(-) create mode 100644 br/pkg/restore/systable_restore_test.go diff --git a/br/pkg/restore/BUILD.bazel b/br/pkg/restore/BUILD.bazel index 772ec438976d7..7d0071047d8e8 100644 --- a/br/pkg/restore/BUILD.bazel +++ b/br/pkg/restore/BUILD.bazel @@ -115,6 +115,7 @@ go_test( "search_test.go", "split_test.go", "stream_metas_test.go", + "systable_restore_test.go", "util_test.go", ], embed = [":restore"], diff --git a/br/pkg/restore/systable_restore.go b/br/pkg/restore/systable_restore.go index 02ea0860d5425..ac21b0dba7e42 100644 --- a/br/pkg/restore/systable_restore.go +++ b/br/pkg/restore/systable_restore.go @@ -19,7 +19,11 @@ import ( "go.uber.org/zap" ) -const sysUserTableName = "user" +const ( + rootUser = "root" + sysUserTableName = "user" + cloudAdminUser = "cloud_admin" +) var statsTables = map[string]struct{}{ "stats_buckets": {}, @@ -51,14 +55,14 @@ var unRecoverableTable = map[string]struct{}{ // skip clearing or restoring 'cloud_admin'@'%' which is a special // user on TiDB Cloud var sysPrivilegeTableMap = map[string]string{ - "user": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0 - "db": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0 - "tables_priv": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0 - "columns_priv": "not (user = 'cloud_admin' and host = '%')", // since v1.0.0 - "default_roles": "not (user = 'cloud_admin' and host = '%')", // since v3.0.0 - "role_edges": "not (to_user = 'cloud_admin' and to_host = '%')", // since v3.0.0 - "global_priv": "not (user = 'cloud_admin' and host = '%')", // since v3.0.8 - "global_grants": "not (user = 'cloud_admin' and host = '%')", // since v5.0.3 + "user": "(user = '%s' and host = '%%')", // since v1.0.0 + "db": "(user = '%s' and host = '%%')", // since v1.0.0 + "tables_priv": "(user = '%s' and host = '%%')", // since v1.0.0 + "columns_priv": "(user = '%s' and host = '%%')", // since v1.0.0 + "default_roles": "(user = '%s' and host = '%%')", // since v3.0.0 + "role_edges": "(to_user = '%s' and to_host = '%%')", // since v3.0.0 + "global_priv": "(user = '%s' and host = '%%')", // since v3.0.8 + "global_grants": "(user = '%s' and host = '%%')", // since v5.0.3 } func isUnrecoverableTable(tableName string) bool { @@ -71,6 +75,78 @@ func isStatsTable(tableName string) bool { return ok } +func generateResetSQLs(db *database, resetUsers []string) []string { + if db.Name.L != mysql.SystemDB { + return nil + } + sqls := make([]string, 0, 10) + // we only need reset root password once + rootReset := false + for tableName := range db.ExistingTables { + if sysPrivilegeTableMap[tableName] != "" { + for _, name := range resetUsers { + if strings.ToLower(name) == rootUser { + if !rootReset { + updateSQL := fmt.Sprintf("UPDATE %s.%s SET authentication_string='',"+ + " Shutdown_priv='Y',"+ + " Config_priv='Y'"+ + " WHERE USER='root' AND Host='%%';", + db.Name.L, sysUserTableName) + sqls = append(sqls, updateSQL) + rootReset = true + } else { + continue + } + } else { + /* #nosec G202: SQL string concatenation */ + whereClause := fmt.Sprintf("WHERE "+sysPrivilegeTableMap[tableName], name) + deleteSQL := fmt.Sprintf("DELETE FROM %s %s;", + utils.EncloseDBAndTable(db.Name.L, tableName), whereClause) + sqls = append(sqls, deleteSQL) + } + } + } + } + return sqls +} + +// ClearSystemUsers is used for volume-snapshot restoration. +// because we can not support restore user in some scenarios, for example in cloud. +// we'd better use this function to drop cloud_admin user after volume-snapshot restore. +func (rc *Client) ClearSystemUsers(ctx context.Context, resetUsers []string) error { + sysDB := mysql.SystemDB + db, ok := rc.getDatabaseByName(sysDB) + if !ok { + log.Warn("target database not exist, aborting", zap.String("database", sysDB)) + return nil + } + execSQL := func(sql string) error { + // SQLs here only contain table name and database name, seems it is no need to redact them. + if err := rc.db.se.Execute(ctx, sql); err != nil { + log.Warn("failed to clear system users", + zap.Stringer("database", db.Name), + zap.String("sql", sql), + zap.Error(err), + ) + return berrors.ErrUnknown.Wrap(err).GenWithStack("failed to execute %s", sql) + } + log.Info("successfully clear system users after restoration", + zap.Stringer("database", db.Name), + zap.String("sql", sql), + ) + return nil + } + + sqls := generateResetSQLs(db, resetUsers) + for _, sql := range sqls { + log.Info("reset system user for cloud", zap.String("sql", sql)) + if err := execSQL(sql); err != nil { + return err + } + } + return nil +} + // RestoreSystemSchemas restores the system schema(i.e. the `mysql` schema). // Detail see https://github.com/pingcap/br/issues/679#issuecomment-762592254. func (rc *Client) RestoreSystemSchemas(ctx context.Context, f filter.Filter) { @@ -203,14 +279,15 @@ func (rc *Client) replaceTemporaryTableToSystable(ctx context.Context, ti *model } if db.ExistingTables[tableName] != nil { - whereClause := "" + whereNotClause := "" if rc.fullClusterRestore && sysPrivilegeTableMap[tableName] != "" { // cloud_admin is a special user on tidb cloud, need to skip it. - whereClause = fmt.Sprintf("WHERE %s", sysPrivilegeTableMap[tableName]) + /* #nosec G202: SQL string concatenation */ + whereNotClause = fmt.Sprintf("WHERE NOT "+sysPrivilegeTableMap[tableName], cloudAdminUser) log.Info("full cluster restore, delete existing data", zap.String("table", tableName), zap.Stringer("schema", db.Name)) deleteSQL := fmt.Sprintf("DELETE FROM %s %s;", - utils.EncloseDBAndTable(db.Name.L, tableName), whereClause) + utils.EncloseDBAndTable(db.Name.L, tableName), whereNotClause) if err := execSQL(deleteSQL); err != nil { return err } @@ -228,7 +305,7 @@ func (rc *Client) replaceTemporaryTableToSystable(ctx context.Context, ti *model utils.EncloseDBAndTable(db.Name.L, tableName), colListStr, colListStr, utils.EncloseDBAndTable(db.TemporaryName.L, tableName), - whereClause) + whereNotClause) return execSQL(replaceIntoSQL) } diff --git a/br/pkg/restore/systable_restore_test.go b/br/pkg/restore/systable_restore_test.go new file mode 100644 index 0000000000000..2371f066a43a1 --- /dev/null +++ b/br/pkg/restore/systable_restore_test.go @@ -0,0 +1,72 @@ +// Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. + +package restore + +import ( + "regexp" + "testing" + + "github.com/pingcap/tidb/br/pkg/utils" + "github.com/pingcap/tidb/parser/model" + "github.com/stretchr/testify/require" +) + +func testTableInfo(name string) *model.TableInfo { + return &model.TableInfo{ + Name: model.NewCIStr(name), + } +} + +func TestGenerateResetSQL(t *testing.T) { + // case #1: ignore non-mysql databases + mockDB := &database{ + ExistingTables: map[string]*model.TableInfo{}, + Name: model.NewCIStr("non-mysql"), + TemporaryName: utils.TemporaryDBName("non-mysql"), + } + for name := range sysPrivilegeTableMap { + mockDB.ExistingTables[name] = testTableInfo(name) + } + resetUsers := []string{"cloud_admin", "root"} + require.Equal(t, 0, len(generateResetSQLs(mockDB, resetUsers))) + + // case #2: ignore non expected table + mockDB = &database{ + ExistingTables: map[string]*model.TableInfo{}, + Name: model.NewCIStr("mysql"), + TemporaryName: utils.TemporaryDBName("mysql"), + } + for name := range sysPrivilegeTableMap { + name += "non_available" + mockDB.ExistingTables[name] = testTableInfo(name) + } + resetUsers = []string{"cloud_admin", "root"} + require.Equal(t, 0, len(generateResetSQLs(mockDB, resetUsers))) + + // case #3: only reset cloud admin account + for name := range sysPrivilegeTableMap { + mockDB.ExistingTables[name] = testTableInfo(name) + } + resetUsers = []string{"cloud_admin"} + sqls := generateResetSQLs(mockDB, resetUsers) + require.Equal(t, 8, len(sqls)) + for _, sql := range sqls { + // for cloud_admin we only generate DELETE sql + require.Regexp(t, regexp.MustCompile("DELETE*"), sql) + } + + // case #4: reset cloud admin/other account + resetUsers = []string{"cloud_admin", "cloud_other"} + sqls = generateResetSQLs(mockDB, resetUsers) + require.Equal(t, 16, len(sqls)) + for _, sql := range sqls { + // for cloud_admin/cloud_other we only generate DELETE sql + require.Regexp(t, regexp.MustCompile("DELETE*"), sql) + } + + // case #5: reset cloud admin && root account + resetUsers = []string{"cloud_admin", "root"} + sqls = generateResetSQLs(mockDB, resetUsers) + // 8 DELETE sqls for cloud admin and 1 UPDATE sql for root + require.Equal(t, 9, len(sqls)) +} diff --git a/br/pkg/task/restore.go b/br/pkg/task/restore.go index 83c22a29e61db..9ca210c0af2c7 100644 --- a/br/pkg/task/restore.go +++ b/br/pkg/task/restore.go @@ -62,6 +62,8 @@ const ( FlagPiTRBatchSize = "pitr-batch-size" FlagPiTRConcurrency = "pitr-concurrency" + FlagResetSysUsers = "reset-sys-users" + defaultPiTRBatchCount = 8 defaultPiTRBatchSize = 16 * 1024 * 1024 defaultRestoreConcurrency = 128 @@ -93,6 +95,8 @@ type RestoreCommonConfig struct { // determines whether enable restore sys table on default, see fullClusterRestore in restore/client.go WithSysTable bool `json:"with-sys-table" toml:"with-sys-table"` + + ResetSysUsers []string `json:"reset-sys-users" toml:"reset-sys-users"` } // adjust adjusts the abnormal config value in the current config. @@ -118,10 +122,12 @@ func DefineRestoreCommonFlags(flags *pflag.FlagSet) { flags.Uint(FlagPDConcurrency, defaultPDConcurrency, "concurrency pd-relative operations like split & scatter.") flags.Duration(FlagBatchFlushInterval, defaultBatchFlushInterval, - "after how long a restore batch would be auto sended.") + "after how long a restore batch would be auto sent.") flags.Uint(FlagDdlBatchSize, defaultFlagDdlBatchSize, - "batch size for ddl to create a batch of tabes once.") + "batch size for ddl to create a batch of tables once.") flags.Bool(flagWithSysTable, false, "whether restore system privilege tables on default setting") + flags.StringArrayP(FlagResetSysUsers, "", []string{"cloud_admin", "root"}, "whether reset these users after restoration") + _ = flags.MarkHidden(FlagResetSysUsers) _ = flags.MarkHidden(FlagMergeRegionSizeBytes) _ = flags.MarkHidden(FlagMergeRegionKeyCount) _ = flags.MarkHidden(FlagPDConcurrency) @@ -150,6 +156,10 @@ func (cfg *RestoreCommonConfig) ParseFromFlags(flags *pflag.FlagSet) error { return errors.Trace(err) } } + cfg.ResetSysUsers, err = flags.GetStringArray(FlagResetSysUsers) + if err != nil { + return errors.Trace(err) + } return errors.Trace(err) } diff --git a/br/pkg/task/restore_data.go b/br/pkg/task/restore_data.go index f8e286dd0e72b..6079da9fb6c71 100644 --- a/br/pkg/task/restore_data.go +++ b/br/pkg/task/restore_data.go @@ -19,6 +19,7 @@ import ( "github.com/pingcap/tidb/br/pkg/storage" "github.com/pingcap/tidb/br/pkg/summary" "github.com/pingcap/tidb/br/pkg/utils" + tidbconfig "github.com/pingcap/tidb/config" "go.uber.org/zap" ) @@ -71,6 +72,11 @@ func RunResolveKvData(c context.Context, g glue.Glue, cmdName string, cfg *Resto defer mgr.Close() keepaliveCfg.PermitWithoutStream = true + tc := tidbconfig.GetGlobalConfig() + tc.SkipRegisterToDashboard = true + tc.EnableGlobalKill = false + tidbconfig.StoreGlobalConfig(tc) + client := restore.NewRestoreClient(mgr.GetPDClient(), mgr.GetTLSConfig(), keepaliveCfg, false) restoreTS, err := client.GetTS(ctx) @@ -153,6 +159,18 @@ func RunResolveKvData(c context.Context, g glue.Glue, cmdName string, cfg *Resto //TODO: restore volume type into origin type //ModifyVolume(*ec2.ModifyVolumeInput) (*ec2.ModifyVolumeOutput, error) by backupmeta + // this is used for cloud restoration + err = client.Init(g, mgr.GetStorage()) + if err != nil { + return errors.Trace(err) + } + defer client.Close() + log.Info("start to clear system user for cloud") + err = client.ClearSystemUsers(ctx, cfg.ResetSysUsers) + + if err != nil { + return errors.Trace(err) + } progress.Close() summary.CollectDuration("restore duration", time.Since(startAll))