Skip to content

Commit

Permalink
Mark its own cluster as healthy when rebalancing. (#8406)
Browse files Browse the repository at this point in the history
This code started as an optimization to avoid doing an RPC Ping to
itself. But in a single server cluster the rebalancing was led to
believe that there were no healthy servers because foundHealthyServer
was not set. Now this is being set properly.

Fixes #8401 and #8403.
  • Loading branch information
hanshasselberg authored and hashicorp-ci committed Aug 6, 2020
1 parent 9d5f9c6 commit 1a351d5
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 14 deletions.
35 changes: 21 additions & 14 deletions agent/router/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,25 @@ func (m *Manager) NumServers() int {
return len(l.servers)
}

func (m *Manager) healthyServer(server *metadata.Server) bool {
// Check to see if the manager is trying to ping itself. This
// is a small optimization to avoid performing an unnecessary
// RPC call.
// If this is true, we know there are healthy servers for this
// manager and we don't need to continue.
if m.serverName != "" && server.Name == m.serverName {
return true
}
if ok, err := m.connPoolPinger.Ping(server.Datacenter, server.ShortName, server.Addr); !ok {
m.logger.Debug("pinging server failed",
"server", server.String(),
"error", err,
)
return false
}
return true
}

// RebalanceServers shuffles the list of servers on this metadata. The server
// at the front of the list is selected for the next RPC. RPC calls that
// fail for a particular server are rotated to the end of the list. This
Expand All @@ -345,24 +364,12 @@ func (m *Manager) RebalanceServers() {
// this loop mutates the server list in-place.
var foundHealthyServer bool
for i := 0; i < len(l.servers); i++ {
// Always test the first server. Failed servers are cycled
// Always test the first server. Failed servers are cycled
// while Serf detects the node has failed.
srv := l.servers[0]

// check to see if the manager is trying to ping itself,
// continue if that is the case.
if m.serverName != "" && srv.Name == m.serverName {
continue
}
ok, err := m.connPoolPinger.Ping(srv.Datacenter, srv.ShortName, srv.Addr)
if ok {
if m.healthyServer(l.servers[0]) {
foundHealthyServer = true
break
}
m.logger.Debug("pinging server failed",
"server", srv.String(),
"error", err,
)
l.servers = l.cycleServer()
}

Expand Down
51 changes: 51 additions & 0 deletions agent/router/manager_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/hashicorp/consul/agent/metadata"
"github.com/hashicorp/go-hclog"
"github.com/stretchr/testify/require"
)

var (
Expand Down Expand Up @@ -350,3 +351,53 @@ func TestManagerInternal_saveServerList(t *testing.T) {
}
}()
}

func TestManager_healthyServer(t *testing.T) {
t.Run("checking itself", func(t *testing.T) {
m := testManager()
m.serverName = "s1"
server := metadata.Server{Name: m.serverName}
require.True(t, m.healthyServer(&server))
})
t.Run("checking another server with successful ping", func(t *testing.T) {
m := testManager()
server := metadata.Server{Name: "s1"}
require.True(t, m.healthyServer(&server))
})
t.Run("checking another server with failed ping", func(t *testing.T) {
m := testManagerFailProb(1)
server := metadata.Server{Name: "s1"}
require.False(t, m.healthyServer(&server))
})
}

func TestManager_Rebalance(t *testing.T) {
t.Run("single server cluster checking itself", func(t *testing.T) {
m := testManager()
m.serverName = "s1"
m.AddServer(&metadata.Server{Name: m.serverName})
m.RebalanceServers()
require.False(t, m.IsOffline())
})
t.Run("multi server cluster is unhealthy when pings always fail", func(t *testing.T) {
m := testManagerFailProb(1)
m.AddServer(&metadata.Server{Name: "s1"})
m.AddServer(&metadata.Server{Name: "s2"})
m.AddServer(&metadata.Server{Name: "s3"})
for i := 0; i < 100; i++ {
m.RebalanceServers()
require.True(t, m.IsOffline())
}
})
t.Run("multi server cluster checking itself remains healthy despite pings always fail", func(t *testing.T) {
m := testManagerFailProb(1)
m.serverName = "s1"
m.AddServer(&metadata.Server{Name: m.serverName})
m.AddServer(&metadata.Server{Name: "s2"})
m.AddServer(&metadata.Server{Name: "s3"})
for i := 0; i < 100; i++ {
m.RebalanceServers()
require.False(t, m.IsOffline())
}
})
}

0 comments on commit 1a351d5

Please sign in to comment.