Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
137322: kvserver: disable StoreLiveness for ranges that use expiration leases r=nvanbenschoten a=arulajmani

This patch first extracts a few stateless functions, which previously lived on the Replica struct, to compute whether a range should use an expiration based lease or not. We then use these functions to disable StoreLiveness for ranges that use expiration based leases.

Note that this approach works even if the cluster setting to enable expiration based leases unconditionally is flipped within a leadership term, as this is no different than the cluster setting to enable raft fortification being toggled within a leadership term.

Closes cockroachdb#137121

Release note: None

Co-authored-by: Arul Ajmani <[email protected]>
  • Loading branch information
craig[bot] and arulajmani committed Dec 18, 2024
2 parents 05c3e25 + 02fa61b commit 3d95da9
Show file tree
Hide file tree
Showing 7 changed files with 150 additions and 36 deletions.
12 changes: 12 additions & 0 deletions pkg/kv/kvserver/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -696,3 +696,15 @@ func NewRangefeedTxnPusher(
span: span,
}
}

// SupportFromEnabled exports (replicaRLockedStoreLiveness).SupportFromEnabled
// for testing purposes.
func (r *Replica) SupportFromEnabled() bool {
return (*replicaRLockedStoreLiveness)(r).SupportFromEnabled()
}

// RaftFortificationEnabledForRangeID exports raftFortificationEnabledForRangeID
// for use in tests.
func RaftFortificationEnabledForRangeID(fracEnabled float64, rangeID roachpb.RangeID) bool {
return raftFortificationEnabledForRangeID(fracEnabled, rangeID)
}
26 changes: 14 additions & 12 deletions pkg/kv/kvserver/lease_queue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -404,8 +404,8 @@ func TestLeaseQueueSwitchesLeaseType(t *testing.T) {
}

// TestUpdateLastUpdateTimesUsingStoreLiveness tests that `lastUpdateTimes` is
// updated when the leader is supported by a follower in the store liveness even
// if it's not updating the map upon receiving followers' messages.
// updated when the leader is supported by a follower in store liveness even if
// it's not updating the map upon receiving followers' messages.
func TestUpdateLastUpdateTimesUsingStoreLiveness(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)
Expand All @@ -416,7 +416,8 @@ func TestUpdateLastUpdateTimesUsingStoreLiveness(t *testing.T) {

ctx := context.Background()
st := cluster.MakeTestingClusterSettings()
kvserver.ExpirationLeasesOnly.Override(ctx, &st.SV, false) // override metamorphism
// This test is only relevant for leader leases.
kvserver.OverrideDefaultLeaseType(ctx, &st.SV, roachpb.LeaseLeader)

manualClock := hlc.NewHybridManualClock()
knobs := base.TestingKnobs{
Expand All @@ -426,8 +427,16 @@ func TestUpdateLastUpdateTimesUsingStoreLiveness(t *testing.T) {
Store: &kvserver.StoreTestingKnobs{
// Disable updating the `lastUpdateTimes` map when the leader receives
// messages from followers. This is to simulate the leader not
// sending/receiving any messages because it doesn't have any updates.
DisableUpdateLastUpdateTimesMapOnRaftGroupStep: true,
// sending/receiving any messages because it doesn't have any updates. We
// only want to do this for ranges that use leader leases (read: we want
// to omit the meta ranges/NodeLiveness range which always use expiration
// based leases and don't partake in fortification).
//
// Also see updateLastUpdateTimesUsingStoreLivenessRLocked which is the
// method being tested here.
DisableUpdateLastUpdateTimesMapOnRaftGroupStep: func(r *kvserver.Replica) bool {
return r.SupportFromEnabled()
},
},
}

Expand All @@ -444,20 +453,13 @@ func TestUpdateLastUpdateTimesUsingStoreLiveness(t *testing.T) {
require.NoError(t, tc.WaitForFullReplication())

db := tc.Server(0).DB()
sqlDB := tc.ServerConn(0)

// Split off a few ranges so we have something to work with.
scratchKey := tc.ScratchRange(t)
for i := 0; i <= 16; i++ {
splitKey := append(scratchKey.Clone(), byte(i))
require.NoError(t, db.AdminSplit(ctx, splitKey, hlc.MaxTimestamp))
}

// Switch 100% of ranges to use leader fortification.
_, err := sqlDB.ExecContext(ctx,
`SET CLUSTER SETTING kv.raft.leader_fortification.fraction_enabled = 1.00`)
require.NoError(t, err)

// Increment the manual clock to ensure that all followers are initially
// considered inactive.
manualClock.Increment(time.Second.Nanoseconds() * 3)
Expand Down
21 changes: 17 additions & 4 deletions pkg/kv/kvserver/replica_raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -647,10 +647,21 @@ func (r *Replica) stepRaftGroupRaftMuLocked(req *kvserverpb.RaftMessageRequest)
wakeLeader := hasLeader && !fromLeader
r.maybeUnquiesceLocked(wakeLeader, false /* mayCampaign */)
}
if r.store.TestingKnobs() == nil ||
!r.store.TestingKnobs().DisableUpdateLastUpdateTimesMapOnRaftGroupStep {
r.mu.lastUpdateTimes.update(req.FromReplica.ReplicaID, r.Clock().PhysicalTime())

{
// Update the lastUpdateTimes map, unless configured not to by a testing
// knob.
disableUpdateLastUpdateTimesMapOnRaftGroupStep := false
if r.store.TestingKnobs() == nil &&
r.store.TestingKnobs().DisableUpdateLastUpdateTimesMapOnRaftGroupStep != nil {
disableUpdateLastUpdateTimesMapOnRaftGroupStep = r.store.TestingKnobs().DisableUpdateLastUpdateTimesMapOnRaftGroupStep(r)
}

if !disableUpdateLastUpdateTimesMapOnRaftGroupStep {
r.mu.lastUpdateTimes.update(req.FromReplica.ReplicaID, r.Clock().PhysicalTime())
}
}

switch req.Message.Type {
case raftpb.MsgPreVote, raftpb.MsgVote:
// If we receive a (pre)vote request, and we find our leader to be dead or
Expand Down Expand Up @@ -3046,7 +3057,9 @@ func (r *Replica) printRaftTail(
func (r *Replica) updateLastUpdateTimesUsingStoreLivenessRLocked(
storeClockTimestamp hlc.ClockTimestamp,
) {
// If store liveness is not enabled, there is nothing to do.
// If store liveness is not enabled, there is nothing to do. The
// lastUpdateTimes map will be updated as a result of responses to heartbeats
// sent by the leader.
if !(*replicaRLockedStoreLiveness)(r).SupportFromEnabled() {
return
}
Expand Down
21 changes: 12 additions & 9 deletions pkg/kv/kvserver/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ func OverrideDefaultLeaseType(ctx context.Context, sv *settings.Values, typ roac
switch typ {
case roachpb.LeaseExpiration:
ExpirationLeasesOnly.Override(ctx, sv, true)
RaftLeaderFortificationFractionEnabled.Override(ctx, sv, 0.0)
case roachpb.LeaseEpoch:
ExpirationLeasesOnly.Override(ctx, sv, false)
RaftLeaderFortificationFractionEnabled.Override(ctx, sv, 0.0)
Expand Down Expand Up @@ -827,15 +826,11 @@ func (r *Replica) requiresExpirationLeaseRLocked() bool {

// shouldUseExpirationLeaseRLocked returns true if this range should be using an
// expiration-based lease.
//
// We use an expiration-based lease if the range requires one or if the
// kv.expiration_leases_only.enabled setting is enabled and the number of ranges
// (replicas) per node is fewer than kv.expiration_leases.max_replicas_per_node.
func (r *Replica) shouldUseExpirationLeaseRLocked() bool {
return r.desiredLeaseTypeRLocked() == roachpb.LeaseExpiration
}

// desiredLeaseTypeRLocked returns the desired lease type for this replica.
func (r *Replica) desiredLeaseTypeRLocked() roachpb.LeaseType {
// Use an expiration-based lease if the range requires one or if the
// kv.expiration_leases_only.enabled setting is enabled and the number of ranges
// (replicas) per node is fewer than kv.expiration_leases.max_replicas_per_node.
expirationLeaseRequired := r.requiresExpirationLeaseRLocked()
expirationLeaseOnly := func() bool {
settingEnabled := ExpirationLeasesOnly.Get(&r.ClusterSettings().SV) && !DisableExpirationLeasesOnly
Expand All @@ -847,6 +842,14 @@ func (r *Replica) desiredLeaseTypeRLocked() roachpb.LeaseType {
return settingEnabled
}()
if expirationLeaseRequired || expirationLeaseOnly {
return true
}
return false
}

// desiredLeaseTypeRLocked returns the desired lease type for this replica.
func (r *Replica) desiredLeaseTypeRLocked() roachpb.LeaseType {
if r.shouldUseExpirationLeaseRLocked() {
return roachpb.LeaseExpiration
}

Expand Down
10 changes: 10 additions & 0 deletions pkg/kv/kvserver/replica_store_liveness.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,16 @@ func (r *replicaRLockedStoreLiveness) SupportFromEnabled() bool {
if !r.store.storeLiveness.SupportFromEnabled(context.TODO()) {
return false
}
if (*Replica)(r).shouldUseExpirationLeaseRLocked() {
// If this range wants to use an expiration based lease, either because it's
// one of the system ranges (NodeLiveness, Meta) or because the cluster
// setting to always use expiration based leases is turned on, then do not
// fortify the leader. There's no benefit to doing so because we aren't
// going to acquire a leader lease on top of it. On the other hand, by not
// fortifying, we ensure there's no StoreLiveness dependency for these
// ranges.
return false
}
fracEnabled := RaftLeaderFortificationFractionEnabled.Get(&r.store.ClusterSettings().SV)
fortifyEnabled := raftFortificationEnabledForRangeID(fracEnabled, r.RangeID)
return fortifyEnabled
Expand Down
85 changes: 77 additions & 8 deletions pkg/kv/kvserver/replica_store_liveness_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,20 @@
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.

package kvserver
package kvserver_test

import (
"context"
"fmt"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/stretchr/testify/require"
Expand All @@ -22,14 +29,14 @@ func TestRaftFortificationEnabledForRangeID(t *testing.T) {
// Ensure fracEnabled=0.0 never returns true.
t.Run("fracEnabled=0.0", func(t *testing.T) {
for i := range 10_000 {
require.False(t, raftFortificationEnabledForRangeID(0.0, roachpb.RangeID(i)))
require.False(t, kvserver.RaftFortificationEnabledForRangeID(0.0, roachpb.RangeID(i)))
}
})

// Ensure fracEnabled=1.0 always returns true.
t.Run("fracEnabled=1.0", func(t *testing.T) {
for i := range 10_000 {
require.True(t, raftFortificationEnabledForRangeID(1.0, roachpb.RangeID(i)))
require.True(t, kvserver.RaftFortificationEnabledForRangeID(1.0, roachpb.RangeID(i)))
}
})

Expand Down Expand Up @@ -70,7 +77,7 @@ func TestRaftFortificationEnabledForRangeID(t *testing.T) {
for _, tc := range testCases {
name := fmt.Sprintf("fracEnabled=%f/rangeID=%d", tc.fracEnabled, tc.rangeID)
t.Run(name, func(t *testing.T) {
require.Equal(t, tc.exp, raftFortificationEnabledForRangeID(tc.fracEnabled, tc.rangeID))
require.Equal(t, tc.exp, kvserver.RaftFortificationEnabledForRangeID(tc.fracEnabled, tc.rangeID))
})
}

Expand All @@ -80,7 +87,7 @@ func TestRaftFortificationEnabledForRangeID(t *testing.T) {
enabled := 0
const total = 100_000
for i := range total {
if raftFortificationEnabledForRangeID(fracEnabled, roachpb.RangeID(i)) {
if kvserver.RaftFortificationEnabledForRangeID(fracEnabled, roachpb.RangeID(i)) {
enabled++
}
}
Expand All @@ -92,16 +99,78 @@ func TestRaftFortificationEnabledForRangeID(t *testing.T) {
// Test panic on invalid fracEnabled.
t.Run("fracEnabled=invalid", func(t *testing.T) {
require.Panics(t, func() {
raftFortificationEnabledForRangeID(-0.1, 1)
kvserver.RaftFortificationEnabledForRangeID(-0.1, 1)
})
require.Panics(t, func() {
raftFortificationEnabledForRangeID(1.1, 1)
kvserver.RaftFortificationEnabledForRangeID(1.1, 1)
})
})
}

// TestFortificationDisabledForExpirationBasedLeases ensures that raft
// fortification is disabled for ranges that want to use expiration based
// leases. This is always the case for meta ranges and the node liveness range,
// and can be the case for other ranges if the cluster setting to always use
// expiration based leases is turned on.
func TestRaftFortificationDisabledForExpirationBasedLeases(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

testutils.RunTrueAndFalse(t, "useExpirationBasedLeases", func(t *testing.T, useExpirationBasedLeases bool) {
ctx := context.Background()
st := cluster.MakeTestingClusterSettings()
tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{
ServerArgs: base.TestServerArgs{
Settings: st,
},
})
defer tc.Stopper().Stop(ctx)

if useExpirationBasedLeases {
kvserver.OverrideDefaultLeaseType(ctx, &st.SV, roachpb.LeaseExpiration)
} else {
kvserver.OverrideDefaultLeaseType(ctx, &st.SV, roachpb.LeaseLeader)
}

testCases := []struct {
key roachpb.Key
exp bool
}{
{
key: keys.NodeLivenessPrefix,
exp: false,
},
{
key: keys.Meta1Prefix,
exp: false,
},
{
key: keys.Meta2Prefix,
exp: false,
},
{
key: keys.NamespaceTableMin,
exp: true,
},
{
key: keys.TableDataMin,
exp: true,
},
}

for _, test := range testCases {
repl := tc.GetFirstStoreFromServer(t, 0).LookupReplica(roachpb.RKey(test.key))
if useExpirationBasedLeases {
require.False(t, repl.SupportFromEnabled())
} else {
require.Equal(t, test.exp, repl.SupportFromEnabled())
}
}
})
}

func BenchmarkRaftFortificationEnabledForRangeID(b *testing.B) {
for i := 0; i < b.N; i++ {
_ = raftFortificationEnabledForRangeID(0.5, roachpb.RangeID(i))
_ = kvserver.RaftFortificationEnabledForRangeID(0.5, roachpb.RangeID(i))
}
}
11 changes: 8 additions & 3 deletions pkg/kv/kvserver/testing_knobs.go
Original file line number Diff line number Diff line change
Expand Up @@ -547,9 +547,14 @@ type StoreTestingKnobs struct {
// raft group for that replica.
RaftReportUnreachableBypass func(roachpb.ReplicaID) bool

// DisableUpdateLastUpdateTimesMapOnRaftGroupStep disables updating the
// lastUpdateTimes map when a raft group is stepped.
DisableUpdateLastUpdateTimesMapOnRaftGroupStep bool
// DisableUpdateLastUpdateTimesMapOnRaftGroupStep, if set, is invoked with the
// replica whose raft group is being stepped. It returns whether the
// lastUpdateTimes map should be not be updated upon stepping the raft group.
//
// This testing knob is used to simulate the leader not sending or receiving
// messages because it has no updates and heartbeats are turned off. This
// simulation is only meaningful for ranges that use leader leases.
DisableUpdateLastUpdateTimesMapOnRaftGroupStep func(r *Replica) bool
}

// ModuleTestingKnobs is part of the base.ModuleTestingKnobs interface.
Expand Down

0 comments on commit 3d95da9

Please sign in to comment.