Skip to content

Commit

Permalink
replication_mode: switch from sync to async when state is not replica…
Browse files Browse the repository at this point in the history
…ted to all PDs (#2491)

Signed-off-by: disksing <[email protected]>

Co-authored-by: pingcap-github-bot <[email protected]>
Co-authored-by: ShuNing <[email protected]>
Co-authored-by: lhy1024 <[email protected]>
  • Loading branch information
4 people authored Jun 8, 2020
1 parent 2b56a4c commit 4e8b16d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
8 changes: 7 additions & 1 deletion server/replication/replication_mode.go
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,13 @@ func (m *ModeManager) drPersistStatus(status drAutoSyncStatus) error {
data, _ := json.Marshal(status)
if err := m.fileReplicater.ReplicateFileToAllMembers(ctx, drStatusFile, data); err != nil {
log.Warn("failed to switch state", zap.String("replicate-mode", modeDRAutoSync), zap.String("new-state", status.State), zap.Error(err))
return err
// Throw away the error to make it possible to switch to async when
// primary and dr DC are disconnected. This will result in the
// inability to accurately determine whether data is fully
// synchronized when using dr DC to disaster recovery.
// TODO: introduce PD's leader-follower connection timeout to solve
// this issue. More details: https://github.com/pingcap/pd/issues/2490
return nil
}
}
return nil
Expand Down
18 changes: 17 additions & 1 deletion server/replication/replication_mode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
package replication

import (
"context"
"errors"
"testing"
"time"

Expand Down Expand Up @@ -127,6 +129,14 @@ func (s *testReplicationMode) TestStatus(c *C) {
})
}

type mockFileReplicator struct {
err error
}

func (rep *mockFileReplicator) ReplicateFileToAllMembers(context.Context, string, []byte) error {
return rep.err
}

func (s *testReplicationMode) TestStateSwitch(c *C) {
store := core.NewStorage(kv.NewMemoryKV())
conf := config.ReplicationModeConfig{ReplicationMode: modeDRAutoSync, DRAutoSync: config.DRAutoSyncReplicationConfig{
Expand All @@ -139,7 +149,8 @@ func (s *testReplicationMode) TestStateSwitch(c *C) {
WaitSyncTimeout: typeutil.Duration{Duration: time.Minute},
}}
cluster := mockcluster.NewCluster(mockoption.NewScheduleOptions())
rep, err := NewReplicationModeManager(conf, store, cluster, nil)
var replicator mockFileReplicator
rep, err := NewReplicationModeManager(conf, store, cluster, &replicator)
c.Assert(err, IsNil)

cluster.AddLabelsStore(1, 1, map[string]string{"zone": "zone1"})
Expand Down Expand Up @@ -172,6 +183,11 @@ func (s *testReplicationMode) TestStateSwitch(c *C) {
rep.tickDR()
c.Assert(rep.drGetState(), Equals, drStateAsync)
assertStateIDUpdate()
rep.drSwitchToSync()
replicator.err = errors.New("fail to replicate")
rep.tickDR()
c.Assert(rep.drGetState(), Equals, drStateAsync)
assertStateIDUpdate()

// async -> sync_recover
s.setStoreState(cluster, 5, "up")
Expand Down

0 comments on commit 4e8b16d

Please sign in to comment.