Skip to content

Commit

Permalink
Merge branch 'master' into fix_historical_stats_Log
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkingrei authored Feb 7, 2023
2 parents 359cdd3 + b2cfcca commit aaeaff4
Show file tree
Hide file tree
Showing 89 changed files with 1,403 additions and 644 deletions.
2 changes: 1 addition & 1 deletion .bazelrc
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ build:race --@io_bazel_rules_go//go/config:race --test_env=GORACE=halt_on_error=

test --test_env=TZ=Asia/Shanghai
test --test_output=errors --test_summary=testcase
test:ci --color=yes
test:ci --color=yes --spawn_strategy=local
test:ci --verbose_failures --test_verbose_timeout_warnings
test:ci --test_env=GO_TEST_WRAP_TESTV=1
test:ci --experimental_ui_max_stdouterr_bytes=104857600
Expand Down
12 changes: 6 additions & 6 deletions DEPS.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -530,8 +530,8 @@ def go_deps():
name = "com_github_cloudfoundry_gosigar",
build_file_proto_mode = "disable",
importpath = "github.com/cloudfoundry/gosigar",
sum = "h1:T3MoGdugg1vdHn8Az7wDn7cZ4+QCjZph+eXf2CjSjo4=",
version = "v1.3.4",
sum = "h1:gIc08FbB3QPb+nAQhINIK/qhf5REKkY0FTGgRGXkcVc=",
version = "v1.3.6",
)

go_repository(
Expand Down Expand Up @@ -1120,8 +1120,8 @@ def go_deps():
name = "com_github_fsnotify_fsnotify",
build_file_proto_mode = "disable_global",
importpath = "github.com/fsnotify/fsnotify",
sum = "h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=",
version = "v1.5.4",
sum = "h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=",
version = "v1.6.0",
)
go_repository(
name = "com_github_fsouza_fake_gcs_server",
Expand Down Expand Up @@ -3118,8 +3118,8 @@ def go_deps():
name = "com_github_nxadm_tail",
build_file_proto_mode = "disable_global",
importpath = "github.com/nxadm/tail",
sum = "h1:DQuhQpB1tVlglWS2hLQ5OV6B5r8aGxSrPc5Qo6uTN78=",
version = "v1.4.4",
sum = "h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=",
version = "v1.4.8",
)
go_repository(
name = "com_github_oklog_run",
Expand Down
69 changes: 51 additions & 18 deletions br/pkg/lightning/restore/precheck_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package restore
import (
"bytes"
"context"
"encoding/json"
"fmt"
"path/filepath"
"reflect"
Expand Down Expand Up @@ -780,53 +781,63 @@ func (ci *CDCPITRCheckItem) Check(ctx context.Context) (*CheckResult, error) {

// check etcd KV of CDC >= v6.2
cdcPrefix := "/tidb/cdc/"
capturePath := []byte("/__cdc_meta__/capture/")
changefeedPath := []byte("/changefeed/info/")

nameSet := make(map[string][]string, 1)
resp, err := ci.etcdCli.Get(ctx, cdcPrefix, clientv3.WithPrefix(), clientv3.WithKeysOnly())
resp, err := ci.etcdCli.Get(ctx, cdcPrefix, clientv3.WithPrefix())
if err != nil {
return nil, errors.Trace(err)
}
for _, kv := range resp.Kvs {
// example: /tidb/cdc/<clusterID>/__cdc_meta__/capture/<captureID>
// example: /tidb/cdc/<clusterID>/<namespace>/changefeed/info/<changefeedID>
k := kv.Key[len(cdcPrefix):]
clusterID, captureID, found := bytes.Cut(k, capturePath)
if found {
nameSet[string(clusterID)] = append(nameSet[string(clusterID)], string(captureID))
clusterAndNamespace, changefeedID, found := bytes.Cut(k, changefeedPath)
if !found {
continue
}
if !isActiveCDCChangefeed(kv.Value) {
continue
}

nameSet[string(clusterAndNamespace)] = append(nameSet[string(clusterAndNamespace)], string(changefeedID))
}
if len(nameSet) == 0 {
// check etcd KV of CDC <= v6.1
cdcPrefixV61 := "/tidb/cdc/capture/"
resp, err = ci.etcdCli.Get(ctx, cdcPrefixV61, clientv3.WithPrefix(), clientv3.WithKeysOnly())
cdcPrefixV61 := "/tidb/cdc/changefeed/info/"
resp, err = ci.etcdCli.Get(ctx, cdcPrefixV61, clientv3.WithPrefix())
if err != nil {
return nil, errors.Trace(err)
}
for _, kv := range resp.Kvs {
// example: /tidb/cdc/capture/<captureID>
// example: /tidb/cdc/changefeed/info/<changefeedID>
k := kv.Key[len(cdcPrefixV61):]
if len(k) == 0 {
continue
}
if !isActiveCDCChangefeed(kv.Value) {
continue
}

nameSet["<nil>"] = append(nameSet["<nil>"], string(k))
}
}

if len(nameSet) > 0 {
var captureMsgBuf strings.Builder
captureMsgBuf.WriteString("found CDC capture(s): ")
var changefeedMsgBuf strings.Builder
changefeedMsgBuf.WriteString("found CDC changefeed(s): ")
isFirst := true
for clusterID, captureIDs := range nameSet {
if !isFirst {
captureMsgBuf.WriteString(", ")
changefeedMsgBuf.WriteString(", ")
}
isFirst = false
captureMsgBuf.WriteString("clusterID: ")
captureMsgBuf.WriteString(clusterID)
captureMsgBuf.WriteString(" captureID(s): ")
captureMsgBuf.WriteString(fmt.Sprintf("%v", captureIDs))
changefeedMsgBuf.WriteString("cluster/namespace: ")
changefeedMsgBuf.WriteString(clusterID)
changefeedMsgBuf.WriteString(" changefeed(s): ")
changefeedMsgBuf.WriteString(fmt.Sprintf("%v", captureIDs))
}
captureMsgBuf.WriteString(",")
errorMsg = append(errorMsg, captureMsgBuf.String())
changefeedMsgBuf.WriteString(",")
errorMsg = append(errorMsg, changefeedMsgBuf.String())
}

if len(errorMsg) > 0 {
Expand All @@ -841,6 +852,28 @@ func (ci *CDCPITRCheckItem) Check(ctx context.Context) (*CheckResult, error) {
return theResult, nil
}

type onlyState struct {
State string `json:"state"`
}

func isActiveCDCChangefeed(jsonBytes []byte) bool {
s := onlyState{}
err := json.Unmarshal(jsonBytes, &s)
if err != nil {
// maybe a compatible issue, skip this key
log.L().Error("unmarshal etcd value failed when check CDC changefeed, will skip this key",
zap.ByteString("value", jsonBytes),
zap.Error(err))
return false
}
switch s.State {
case "normal", "stopped", "error":
return true
default:
return false
}
}

type schemaCheckItem struct {
cfg *config.Config
preInfoGetter PreRestoreInfoGetter
Expand Down
27 changes: 20 additions & 7 deletions br/pkg/lightning/restore/precheck_impl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -620,17 +620,27 @@ func (s *precheckImplSuite) TestCDCPITRCheckItem() {
s.Require().NoError(err)
err = brCli.PutTask(ctx, *taskInfo)
s.Require().NoError(err)
checkEtcdPut := func(key string) {
_, err := cli.Put(ctx, key, "")
checkEtcdPut := func(key string, vals ...string) {
val := ""
if len(vals) == 1 {
val = vals[0]
}
_, err := cli.Put(ctx, key, val)
s.Require().NoError(err)
}
// TiCDC >= v6.2
checkEtcdPut("/tidb/cdc/default/__cdc_meta__/capture/3ecd5c98-0148-4086-adfd-17641995e71f")
checkEtcdPut("/tidb/cdc/default/__cdc_meta__/meta/meta-version")
checkEtcdPut("/tidb/cdc/default/__cdc_meta__/meta/ticdc-delete-etcd-key-count")
checkEtcdPut("/tidb/cdc/default/__cdc_meta__/owner/22318498f4dd6639")
checkEtcdPut("/tidb/cdc/default/default/changefeed/info/test")
checkEtcdPut("/tidb/cdc/default/default/changefeed/info/test-1")
checkEtcdPut(
"/tidb/cdc/default/default/changefeed/info/test",
`{"upstream-id":7195826648407968958,"namespace":"default","changefeed-id":"test-1","sink-uri":"mysql://[email protected]:3306?time-zone=","create-time":"2023-02-03T15:23:34.773768+08:00","start-ts":439198420741652483,"target-ts":0,"admin-job-type":0,"sort-engine":"unified","sort-dir":"","config":{"memory-quota":1073741824,"case-sensitive":true,"enable-old-value":true,"force-replicate":false,"check-gc-safe-point":true,"enable-sync-point":false,"bdr-mode":false,"sync-point-interval":600000000000,"sync-point-retention":86400000000000,"filter":{"rules":["*.*"],"ignore-txn-start-ts":null,"event-filters":null},"mounter":{"worker-num":16},"sink":{"transaction-atomicity":"","protocol":"","dispatchers":null,"csv":{"delimiter":",","quote":"\"","null":"\\N","include-commit-ts":false},"column-selectors":null,"schema-registry":"","encoder-concurrency":16,"terminator":"\r\n","date-separator":"none","enable-partition-separator":false},"consistent":{"level":"none","max-log-size":64,"flush-interval":2000,"storage":""},"scheduler":{"region-per-span":0}},"state":"normal","error":null,"creator-version":"v6.5.0-master-dirty"}`,
)
checkEtcdPut(
"/tidb/cdc/default/default/changefeed/info/test-1",
`{"upstream-id":7195826648407968958,"namespace":"default","changefeed-id":"test-1","sink-uri":"mysql://[email protected]:3306?time-zone=","create-time":"2023-02-03T15:23:34.773768+08:00","start-ts":439198420741652483,"target-ts":0,"admin-job-type":0,"sort-engine":"unified","sort-dir":"","config":{"memory-quota":1073741824,"case-sensitive":true,"enable-old-value":true,"force-replicate":false,"check-gc-safe-point":true,"enable-sync-point":false,"bdr-mode":false,"sync-point-interval":600000000000,"sync-point-retention":86400000000000,"filter":{"rules":["*.*"],"ignore-txn-start-ts":null,"event-filters":null},"mounter":{"worker-num":16},"sink":{"transaction-atomicity":"","protocol":"","dispatchers":null,"csv":{"delimiter":",","quote":"\"","null":"\\N","include-commit-ts":false},"column-selectors":null,"schema-registry":"","encoder-concurrency":16,"terminator":"\r\n","date-separator":"none","enable-partition-separator":false},"consistent":{"level":"none","max-log-size":64,"flush-interval":2000,"storage":""},"scheduler":{"region-per-span":0}},"state":"failed","error":null,"creator-version":"v6.5.0-master-dirty"}`,
)
checkEtcdPut("/tidb/cdc/default/default/changefeed/status/test")
checkEtcdPut("/tidb/cdc/default/default/changefeed/status/test-1")
checkEtcdPut("/tidb/cdc/default/default/task/position/3ecd5c98-0148-4086-adfd-17641995e71f/test-1")
Expand All @@ -640,7 +650,7 @@ func (s *precheckImplSuite) TestCDCPITRCheckItem() {
s.Require().NoError(err)
s.Require().False(result.Passed)
s.Require().Equal("found PiTR log streaming task(s): [br_name],\n"+
"found CDC capture(s): clusterID: default captureID(s): [3ecd5c98-0148-4086-adfd-17641995e71f],\n"+
"found CDC changefeed(s): cluster/namespace: default/default changefeed(s): [test],\n"+
"local backend is not compatible with them. Please switch to tidb backend then try again.",
result.Message)

Expand All @@ -649,7 +659,10 @@ func (s *precheckImplSuite) TestCDCPITRCheckItem() {

// TiCDC <= v6.1
checkEtcdPut("/tidb/cdc/capture/f14cb04d-5ba1-410e-a59b-ccd796920e9d")
checkEtcdPut("/tidb/cdc/changefeed/info/test")
checkEtcdPut(
"/tidb/cdc/changefeed/info/test",
`{"upstream-id":7195826648407968958,"namespace":"default","changefeed-id":"test-1","sink-uri":"mysql://[email protected]:3306?time-zone=","create-time":"2023-02-03T15:23:34.773768+08:00","start-ts":439198420741652483,"target-ts":0,"admin-job-type":0,"sort-engine":"unified","sort-dir":"","config":{"memory-quota":1073741824,"case-sensitive":true,"enable-old-value":true,"force-replicate":false,"check-gc-safe-point":true,"enable-sync-point":false,"bdr-mode":false,"sync-point-interval":600000000000,"sync-point-retention":86400000000000,"filter":{"rules":["*.*"],"ignore-txn-start-ts":null,"event-filters":null},"mounter":{"worker-num":16},"sink":{"transaction-atomicity":"","protocol":"","dispatchers":null,"csv":{"delimiter":",","quote":"\"","null":"\\N","include-commit-ts":false},"column-selectors":null,"schema-registry":"","encoder-concurrency":16,"terminator":"\r\n","date-separator":"none","enable-partition-separator":false},"consistent":{"level":"none","max-log-size":64,"flush-interval":2000,"storage":""},"scheduler":{"region-per-span":0}},"state":"stopped","error":null,"creator-version":"v6.5.0-master-dirty"}`,
)
checkEtcdPut("/tidb/cdc/job/test")
checkEtcdPut("/tidb/cdc/owner/223184ad80a88b0b")
checkEtcdPut("/tidb/cdc/task/position/f14cb04d-5ba1-410e-a59b-ccd796920e9d/test")
Expand All @@ -658,7 +671,7 @@ func (s *precheckImplSuite) TestCDCPITRCheckItem() {
s.Require().NoError(err)
s.Require().False(result.Passed)
s.Require().Equal("found PiTR log streaming task(s): [br_name],\n"+
"found CDC capture(s): clusterID: <nil> captureID(s): [f14cb04d-5ba1-410e-a59b-ccd796920e9d],\n"+
"found CDC changefeed(s): cluster/namespace: <nil> changefeed(s): [test],\n"+
"local backend is not compatible with them. Please switch to tidb backend then try again.",
result.Message)

Expand Down
35 changes: 21 additions & 14 deletions br/pkg/restore/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,22 +302,29 @@ func (recovery *Recovery) WaitApply(ctx context.Context) (err error) {

// prepare the region for flashback the data, the purpose is to stop region service, put region in flashback state
func (recovery *Recovery) PrepareFlashbackToVersion(ctx context.Context, resolveTS uint64, startTS uint64) (err error) {
handler := func(ctx context.Context, r tikvstore.KeyRange) (rangetask.TaskStat, error) {
stats, err := ddl.SendPrepareFlashbackToVersionRPC(ctx, recovery.mgr.GetStorage().(tikv.Storage), resolveTS, startTS, r)
return stats, err
}
retryErr := utils.WithRetry(
ctx,
func() error {
handler := func(ctx context.Context, r tikvstore.KeyRange) (rangetask.TaskStat, error) {
stats, err := ddl.SendPrepareFlashbackToVersionRPC(ctx, recovery.mgr.GetStorage().(tikv.Storage), resolveTS, startTS, r)
return stats, err
}

runner := rangetask.NewRangeTaskRunner("br-flashback-prepare-runner", recovery.mgr.GetStorage().(tikv.Storage), int(recovery.concurrency), handler)
// Run prepare flashback on the entire TiKV cluster. Empty keys means the range is unbounded.
err = runner.RunOnRange(ctx, []byte(""), []byte(""))
if err != nil {
log.Error("region flashback prepare get error")
return errors.Trace(err)
}
recovery.progress.Inc()
log.Info("region flashback prepare complete", zap.Int("regions", runner.CompletedRegions()))
runner := rangetask.NewRangeTaskRunner("br-flashback-prepare-runner", recovery.mgr.GetStorage().(tikv.Storage), int(recovery.concurrency), handler)
// Run prepare flashback on the entire TiKV cluster. Empty keys means the range is unbounded.
err = runner.RunOnRange(ctx, []byte(""), []byte(""))
if err != nil {
log.Warn("region flashback prepare get error")
return errors.Trace(err)
}
log.Info("region flashback prepare complete", zap.Int("regions", runner.CompletedRegions()))
return nil
},
utils.NewFlashBackBackoffer(),
)

return nil
recovery.progress.Inc()
return retryErr
}

// flashback the region data to version resolveTS
Expand Down
36 changes: 36 additions & 0 deletions br/pkg/utils/backoff.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ const (
resetTSRetryTimeExt = 600
resetTSWaitIntervalExt = 500 * time.Millisecond
resetTSMaxWaitIntervalExt = 300 * time.Second

// region heartbeat are 10 seconds by default, if some region has 2 heartbeat missing (15 seconds), it appear to be a network issue between PD and TiKV.
flashbackRetryTime = 3
flashbackWaitInterval = 3000 * time.Millisecond
flashbackMaxWaitInterval = 15 * time.Second
)

// RetryState is the mutable state needed for retrying.
Expand Down Expand Up @@ -204,3 +209,34 @@ func (bo *pdReqBackoffer) NextBackoff(err error) time.Duration {
func (bo *pdReqBackoffer) Attempt() int {
return bo.attempt
}

type flashbackBackoffer struct {
attempt int
delayTime time.Duration
maxDelayTime time.Duration
}

// NewBackoffer creates a new controller regulating a truncated exponential backoff.
func NewFlashBackBackoffer() Backoffer {
return &flashbackBackoffer{
attempt: flashbackRetryTime,
delayTime: flashbackWaitInterval,
maxDelayTime: flashbackMaxWaitInterval,
}
}

// retry 3 times when prepare flashback failure.
func (bo *flashbackBackoffer) NextBackoff(err error) time.Duration {
bo.delayTime = 2 * bo.delayTime
bo.attempt--
log.Warn("region may not ready to serve, retry it...", zap.Error(err))

if bo.delayTime > bo.maxDelayTime {
return bo.maxDelayTime
}
return bo.delayTime
}

func (bo *flashbackBackoffer) Attempt() int {
return bo.attempt
}
1 change: 1 addition & 0 deletions ddl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ go_library(
"//util/filter",
"//util/gcutil",
"//util/generic",
"//util/gpool",
"//util/gpool/spmc",
"//util/hack",
"//util/intest",
Expand Down
39 changes: 35 additions & 4 deletions ddl/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import (
"github.com/pingcap/kvproto/pkg/kvrpcpb"
"github.com/pingcap/tidb/ddl/util"
"github.com/pingcap/tidb/domain/infosync"
"github.com/pingcap/tidb/expression"
"github.com/pingcap/tidb/infoschema"
"github.com/pingcap/tidb/kv"
"github.com/pingcap/tidb/meta"
Expand Down Expand Up @@ -99,6 +98,16 @@ func recoverPDSchedule(pdScheduleParam map[string]interface{}) error {
return infosync.SetPDScheduleConfig(context.Background(), pdScheduleParam)
}

func getStoreGlobalMinSafeTS(s kv.Storage) time.Time {
minSafeTS := s.GetMinSafeTS(kv.GlobalTxnScope)
// Inject mocked SafeTS for test.
failpoint.Inject("injectSafeTS", func(val failpoint.Value) {
injectTS := val.(int)
minSafeTS = uint64(injectTS)
})
return oracle.GetTimeFromTS(minSafeTS)
}

// ValidateFlashbackTS validates that flashBackTS in range [gcSafePoint, currentTS).
func ValidateFlashbackTS(ctx context.Context, sctx sessionctx.Context, flashBackTS uint64) error {
currentTS, err := sctx.GetStore().GetOracle().GetStaleTimestamp(ctx, oracle.GlobalTxnScope, 0)
Expand All @@ -111,12 +120,34 @@ func ValidateFlashbackTS(ctx context.Context, sctx sessionctx.Context, flashBack
}
currentTS = currentVer.Ver
}
if oracle.GetTimeFromTS(flashBackTS).After(oracle.GetTimeFromTS(currentTS)) {
oracleFlashbackTS := oracle.GetTimeFromTS(flashBackTS)
if oracleFlashbackTS.After(oracle.GetTimeFromTS(currentTS)) {
return errors.Errorf("cannot set flashback timestamp to future time")
}
if oracle.GetTimeFromTS(flashBackTS).After(expression.GetMinSafeTime(sctx)) {
return errors.Errorf("cannot set flashback timestamp to too close to present time")

flashbackGetMinSafeTimeTimeout := time.Minute
failpoint.Inject("changeFlashbackGetMinSafeTimeTimeout", func(val failpoint.Value) {
t := val.(int)
flashbackGetMinSafeTimeTimeout = time.Duration(t)
})

start := time.Now()
minSafeTime := getStoreGlobalMinSafeTS(sctx.GetStore())
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
for oracleFlashbackTS.After(minSafeTime) {
if time.Since(start) >= flashbackGetMinSafeTimeTimeout {
return errors.Errorf("cannot set flashback timestamp after min-resolved-ts(%s)", minSafeTime)
}
select {
case <-ticker.C:
minSafeTime = getStoreGlobalMinSafeTS(sctx.GetStore())
break
case <-ctx.Done():
return ctx.Err()
}
}

gcSafePoint, err := gcutil.GetGCSafePoint(sctx)
if err != nil {
return err
Expand Down
Loading

0 comments on commit aaeaff4

Please sign in to comment.