-
Notifications
You must be signed in to change notification settings - Fork 4.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use native grpc round robin balancer #12940
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,6 +20,7 @@ import ( | |
"github.com/hashicorp/consul/agent/structs" | ||
"github.com/hashicorp/consul/proto/pbservice" | ||
"github.com/hashicorp/consul/proto/pbsubscribe" | ||
"github.com/hashicorp/consul/sdk/testutil/retry" | ||
"github.com/hashicorp/consul/testrpc" | ||
) | ||
|
||
|
@@ -204,11 +205,10 @@ func TestSubscribeBackend_IntegrationWithServer_TLSReload(t *testing.T) { | |
server.tlsConfigurator.Update(newConf) | ||
|
||
// Try the subscribe call again | ||
retryFailedConn(t, conn) | ||
|
||
streamClient = pbsubscribe.NewStateChangeSubscriptionClient(conn) | ||
_, err = streamClient.Subscribe(ctx, req) | ||
require.NoError(t, err) | ||
retry.Run(t, func(r *retry.R) { | ||
_, err = streamClient.Subscribe(ctx, req) | ||
require.NoError(r, err) | ||
}) | ||
Comment on lines
+208
to
+211
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There's a slight timing issue (<1s) that flakes the second try. I'm unsure why the previous code did not run into the same issue but I didn't consider this to be a significant regression. |
||
} | ||
|
||
func clientConfigVerifyOutgoing(config *Config) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ import ( | |
"time" | ||
|
||
"google.golang.org/grpc" | ||
"google.golang.org/grpc/balancer/roundrobin" | ||
"google.golang.org/grpc/keepalive" | ||
|
||
"github.com/hashicorp/consul/agent/metadata" | ||
|
@@ -130,8 +131,8 @@ func (c *ClientConnPool) dial(datacenter string, serverType string) (*grpc.Clien | |
grpc.WithContextDialer(c.dialer), | ||
grpc.WithDisableRetry(), | ||
grpc.WithStatsHandler(newStatsHandler(defaultMetrics())), | ||
// nolint:staticcheck // there is no other supported alternative to WithBalancerName | ||
grpc.WithBalancerName("pick_first"), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is no longer true https://pkg.go.dev/google.golang.org/grpc/balancer#section-directories |
||
// nolint:staticcheck | ||
grpc.WithBalancerName(roundrobin.Name), | ||
// Keep alive parameters are based on the same default ones we used for | ||
// Yamux. These are somewhat arbitrary but we did observe in scale testing | ||
// that the gRPC defaults (servers send keepalives only every 2 hours, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -373,21 +373,11 @@ func TestClientConnPool_IntegrationWithGRPCResolver_Rebalance(t *testing.T) { | |
first, err := client.Something(ctx, &testservice.Req{}) | ||
require.NoError(t, err) | ||
|
||
t.Run("rebalance a different DC, does nothing", func(t *testing.T) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No longer a relevant test since calls are round-robin |
||
res.NewRebalancer("dc-other")() | ||
|
||
resp, err := client.Something(ctx, &testservice.Req{}) | ||
require.NoError(t, err) | ||
require.Equal(t, resp.ServerName, first.ServerName) | ||
}) | ||
|
||
t.Run("rebalance the dc", func(t *testing.T) { | ||
// Rebalance is random, but if we repeat it a few times it should give us a | ||
// new server. | ||
attempts := 100 | ||
for i := 0; i < attempts; i++ { | ||
res.NewRebalancer("dc1")() | ||
|
||
resp, err := client.Something(ctx, &testservice.Req{}) | ||
require.NoError(t, err) | ||
if resp.ServerName != first.ServerName { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,10 +2,8 @@ package resolver | |
|
||
import ( | ||
"fmt" | ||
"math/rand" | ||
"strings" | ||
"sync" | ||
"time" | ||
|
||
"google.golang.org/grpc/resolver" | ||
|
||
|
@@ -43,31 +41,6 @@ func NewServerResolverBuilder(cfg Config) *ServerResolverBuilder { | |
} | ||
} | ||
|
||
// NewRebalancer returns a function which shuffles the server list for resolvers | ||
// in all datacenters. | ||
func (s *ServerResolverBuilder) NewRebalancer(dc string) func() { | ||
shuffler := rand.New(rand.NewSource(time.Now().UnixNano())) | ||
return func() { | ||
s.lock.RLock() | ||
defer s.lock.RUnlock() | ||
|
||
for _, resolver := range s.resolvers { | ||
if resolver.datacenter != dc { | ||
continue | ||
} | ||
// Shuffle the list of addresses using the last list given to the resolver. | ||
resolver.addrLock.Lock() | ||
addrs := resolver.addrs | ||
shuffler.Shuffle(len(addrs), func(i, j int) { | ||
addrs[i], addrs[j] = addrs[j], addrs[i] | ||
}) | ||
// Pass the shuffled list to the resolver. | ||
resolver.updateAddrsLocked(addrs) | ||
resolver.addrLock.Unlock() | ||
} | ||
} | ||
} | ||
|
||
// ServerForGlobalAddr returns server metadata for a server with the specified globally unique address. | ||
func (s *ServerResolverBuilder) ServerForGlobalAddr(globalAddr string) (*metadata.Server, error) { | ||
s.lock.RLock() | ||
|
@@ -270,22 +243,7 @@ func (r *serverResolver) updateAddrs(addrs []resolver.Address) { | |
// updateAddrsLocked updates this serverResolver's ClientConn to use the given | ||
// set of addrs. addrLock must be held by caller. | ||
func (r *serverResolver) updateAddrsLocked(addrs []resolver.Address) { | ||
// Only pass the first address initially, which will cause the | ||
// balancer to spin down the connection for its previous first address | ||
// if it is different. If we don't do this, it will keep using the old | ||
// first address as long as it is still in the list, making it impossible to | ||
// rebalance until that address is removed. | ||
var firstAddr []resolver.Address | ||
if len(addrs) > 0 { | ||
firstAddr = []resolver.Address{addrs[0]} | ||
} | ||
r.clientConn.UpdateState(resolver.State{Addresses: firstAddr}) | ||
|
||
// Call UpdateState again with the entire list of addrs in case we need them | ||
// for failover. | ||
Comment on lines
-273
to
-285
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we do want to preserve the old behavior of killing streams (almost) every time server addresses are shuffled, we could add some logic here. |
||
r.clientConn.UpdateState(resolver.State{Addresses: addrs}) | ||
|
||
r.addrs = addrs | ||
} | ||
|
||
func (r *serverResolver) Close() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't see a need for this code to hard-reset the connection; it did not impact the test (nor did it cause me to add the retry below).