Skip to content

Commit

Permalink
xds: prevent LDS flaps in mesh gateways due to unstable datacenter li…
Browse files Browse the repository at this point in the history
…sts (#9651)

1.7.x backport of #9651
  • Loading branch information
rboyer committed Feb 8, 2021
1 parent 111b627 commit 733d41a
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 6 deletions.
3 changes: 3 additions & 0 deletions .changelog/9651.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:bug
xds: prevent LDS flaps in mesh gateways due to unstable datacenter lists; also prevent some flaps in terminating gateways as well
```
15 changes: 15 additions & 0 deletions agent/proxycfg/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package proxycfg

import (
"context"
"sort"

"github.com/hashicorp/consul/agent/structs"
"github.com/mitchellh/copystructure"
Expand Down Expand Up @@ -55,6 +56,20 @@ type configSnapshotMeshGateway struct {
GatewayGroups map[string]structs.CheckServiceNodes
}

func (c *configSnapshotMeshGateway) Datacenters() []string {
sz := len(c.GatewayGroups)

dcs := make([]string, 0, sz)
for dc := range c.GatewayGroups {
dcs = append(dcs, dc)
}

// Always sort the results to ensure we generate deterministic things over
// xDS, such as mesh-gateway listener filter chains.
sort.Strings(dcs)
return dcs
}

func (c *configSnapshotMeshGateway) IsEmpty() bool {
if c == nil {
return true
Expand Down
6 changes: 4 additions & 2 deletions agent/xds/clusters.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,13 @@ func makeExposeClusterName(destinationPort int) string {
// for a mesh gateway. This will include 1 cluster per remote datacenter as well as
// 1 cluster for each service subset.
func (s *Server) clustersFromSnapshotMeshGateway(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
datacenters := cfgSnap.MeshGateway.Datacenters()

// 1 cluster per remote dc + 1 cluster per local service (this is a lower bound - all subset specific clusters will be appended)
clusters := make([]proto.Message, 0, len(cfgSnap.MeshGateway.GatewayGroups)+len(cfgSnap.MeshGateway.ServiceGroups))
clusters := make([]proto.Message, 0, len(datacenters)+len(cfgSnap.MeshGateway.ServiceGroups))

// generate the remote dc clusters
for dc, _ := range cfgSnap.MeshGateway.GatewayGroups {
for _, dc := range datacenters {
clusterName := connect.DatacenterSNI(dc, cfgSnap.Roots.TrustDomain)

cluster, err := s.makeMeshGatewayCluster(clusterName, cfgSnap)
Expand Down
11 changes: 9 additions & 2 deletions agent/xds/endpoints.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,10 +169,17 @@ func (s *Server) filterSubsetEndpoints(subset *structs.ServiceResolverSubset, en
}

func (s *Server) endpointsFromSnapshotMeshGateway(cfgSnap *proxycfg.ConfigSnapshot) ([]proto.Message, error) {
resources := make([]proto.Message, 0, len(cfgSnap.MeshGateway.GatewayGroups)+len(cfgSnap.MeshGateway.ServiceGroups))
datacenters := cfgSnap.MeshGateway.Datacenters()
resources := make([]proto.Message, 0, len(datacenters)+len(cfgSnap.MeshGateway.ServiceGroups))

// generate the endpoints for the gateways in the remote datacenters
for dc, endpoints := range cfgSnap.MeshGateway.GatewayGroups {
for _, dc := range datacenters {
endpoints, ok := cfgSnap.MeshGateway.GatewayGroups[dc]
if !ok { // not possible
s.Logger.Error("skipping mesh gateway endpoints because no definition found", "datacenter", dc)
continue
}

clusterName := connect.DatacenterSNI(dc, cfgSnap.Roots.TrustDomain)
la := makeLoadAssignment(
clusterName,
Expand Down
3 changes: 2 additions & 1 deletion agent/xds/listeners.go
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,8 @@ func (s *Server) makeGatewayListener(name, addr string, port int, cfgSnap *proxy

// TODO (mesh-gateway) - Do we need to create clusters for all the old trust domains as well?
// We need 1 Filter Chain per datacenter
for dc := range cfgSnap.MeshGateway.GatewayGroups {
datacenters := cfgSnap.MeshGateway.Datacenters()
for _, dc := range datacenters {
clusterName := connect.DatacenterSNI(dc, cfgSnap.Roots.TrustDomain)
filterName := fmt.Sprintf("%s_%s", name, dc)
dcTCPProxy, err := makeTCPProxyFilter(filterName, clusterName, "mesh_gateway_remote_")
Expand Down
5 changes: 4 additions & 1 deletion agent/xds/listeners_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -298,11 +298,14 @@ func TestListenersFromSnapshot(t *testing.T) {
ProxyFeatures: sf,
}
listeners, err := s.listenersFromSnapshot(cInfo, snap)
require.NoError(err)

// The order of listeners returned via LDS isn't relevant, so it's safe
// to sort these for the purposes of test comparisons.
sort.Slice(listeners, func(i, j int) bool {
return listeners[i].(*envoy.Listener).Name < listeners[j].(*envoy.Listener).Name
})

require.NoError(err)
r, err := createResponse(ListenerType, "00000001", "00000001", listeners)
require.NoError(err)

Expand Down

0 comments on commit 733d41a

Please sign in to comment.