Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add rotate option for recursors #8882

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions agent/config/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -903,6 +903,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
DNSOnlyPassing: b.boolVal(c.DNS.OnlyPassing),
DNSPort: dnsPort,
DNSRecursorTimeout: b.durationVal("recursor_timeout", c.DNS.RecursorTimeout),
DNSRecursorRotate: b.boolValWithDefault(c.DNS.RecursorRotate, false),
DNSRecursors: dnsRecursors,
DNSServiceTTL: dnsServiceTTL,
DNSSOA: soa,
Expand Down
1 change: 1 addition & 0 deletions agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,7 @@ type DNS struct {
NodeTTL *string `json:"node_ttl,omitempty" hcl:"node_ttl" mapstructure:"node_ttl"`
OnlyPassing *bool `json:"only_passing,omitempty" hcl:"only_passing" mapstructure:"only_passing"`
RecursorTimeout *string `json:"recursor_timeout,omitempty" hcl:"recursor_timeout" mapstructure:"recursor_timeout"`
RecursorRotate *bool `json:"recursor_rotate,omitempty" hcl:"recursor_rotate" mapstructure:"recursor_rotate"`
ServiceTTL map[string]string `json:"service_ttl,omitempty" hcl:"service_ttl" mapstructure:"service_ttl"`
UDPAnswerLimit *int `json:"udp_answer_limit,omitempty" hcl:"udp_answer_limit" mapstructure:"udp_answer_limit"`
NodeMetaTXT *bool `json:"enable_additional_node_meta_txt,omitempty" hcl:"enable_additional_node_meta_txt" mapstructure:"enable_additional_node_meta_txt"`
Expand Down
7 changes: 7 additions & 0 deletions agent/config/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,13 @@ type RuntimeConfig struct {
// hcl: dns_config { recursor_timeout = "duration" }
DNSRecursorTimeout time.Duration

// DNSRecursorRotate causes round-robin selection of recursors.
// This has the effect of spreading the query load among all listed servers,
// rather than having all clients try the first listed server first every time.
//
// hcl: dns_config { recursor_rotate = (true|false) }
DNSRecursorRotate bool

// DNSServiceTTL provides the TTL value for a service
// query for given service. The "*" wildcard can be used
// to set a default for all services.
Expand Down
4 changes: 4 additions & 0 deletions agent/config/runtime_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5029,6 +5029,7 @@ func TestFullConfig(t *testing.T) {
"node_ttl": "7084s",
"only_passing": true,
"recursor_timeout": "4427s",
"recursor_rotate": true,
"service_ttl": {
"*": "32030s"
},
Expand Down Expand Up @@ -5711,6 +5712,7 @@ func TestFullConfig(t *testing.T) {
node_ttl = "7084s"
only_passing = true
recursor_timeout = "4427s"
recursor_rotate = true
service_ttl = {
"*" = "32030s"
}
Expand Down Expand Up @@ -6472,6 +6474,7 @@ func TestFullConfig(t *testing.T) {
DNSOnlyPassing: true,
DNSPort: 7001,
DNSRecursorTimeout: 4427 * time.Second,
DNSRecursorRotate: true,
DNSRecursors: []string{"63.38.39.58", "92.49.18.18"},
DNSSOA: RuntimeSOAConfig{Refresh: 3600, Retry: 600, Expire: 86400, Minttl: 0},
DNSServiceTTL: map[string]time.Duration{"*": 32030 * time.Second},
Expand Down Expand Up @@ -7378,6 +7381,7 @@ func TestSanitize(t *testing.T) {
"DNSOnlyPassing": false,
"DNSPort": 0,
"DNSRecursorTimeout": "0s",
"DNSRecursorRotate": false,
"DNSRecursors": [],
"DNSServiceTTL": {},
"DNSSOA": {
Expand Down
31 changes: 23 additions & 8 deletions agent/dns.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ type dnsSOAConfig struct {
Minttl uint32 // 0
}

type dnsRecursors struct {
Addrs []string
RoundRobinIndex uint32
}

type dnsConfig struct {
AllowStale bool
Datacenter string
Expand All @@ -59,7 +64,8 @@ type dnsConfig struct {
NodeTTL time.Duration
OnlyPassing bool
RecursorTimeout time.Duration
Recursors []string
RecursorRotate bool
Recursors dnsRecursors
SegmentName string
UDPAnswerLimit int
ARecordLimit int
Expand Down Expand Up @@ -135,6 +141,7 @@ func GetDNSConfig(conf *config.RuntimeConfig) (*dnsConfig, error) {
NodeTTL: conf.DNSNodeTTL,
OnlyPassing: conf.DNSOnlyPassing,
RecursorTimeout: conf.DNSRecursorTimeout,
RecursorRotate: conf.DNSRecursorRotate,
SegmentName: conf.SegmentName,
UDPAnswerLimit: conf.DNSUDPAnswerLimit,
NodeMetaTXT: conf.DNSNodeMetaTXT,
Expand Down Expand Up @@ -168,7 +175,7 @@ func GetDNSConfig(conf *config.RuntimeConfig) (*dnsConfig, error) {
if err != nil {
return nil, fmt.Errorf("Invalid recursor address: %v", err)
}
cfg.Recursors = append(cfg.Recursors, ra)
cfg.Recursors.Addrs = append(cfg.Recursors.Addrs, ra)
}

return cfg, nil
Expand Down Expand Up @@ -223,7 +230,7 @@ func (d *DNSServer) ListenAndServe(network, addr string, notif func()) error {

// toggleRecursorHandlerFromConfig enables or disables the recursor handler based on config idempotently
func (d *DNSServer) toggleRecursorHandlerFromConfig(cfg *dnsConfig) {
shouldEnable := len(cfg.Recursors) > 0
shouldEnable := len(cfg.Recursors.Addrs) > 0

if shouldEnable && atomic.CompareAndSwapUint32(&d.recursorEnabled, 0, 1) {
d.mux.HandleFunc(".", d.handleRecurse)
Expand Down Expand Up @@ -342,7 +349,7 @@ func (d *DNSServer) handlePtr(resp dns.ResponseWriter, req *dns.Msg) {
m.SetReply(req)
m.Compress = !cfg.DisableCompression
m.Authoritative = true
m.RecursionAvailable = (len(cfg.Recursors) > 0)
m.RecursionAvailable = (len(cfg.Recursors.Addrs) > 0)

// Only add the SOA if requested
if req.Question[0].Qtype == dns.TypeSOA {
Expand Down Expand Up @@ -452,7 +459,7 @@ func (d *DNSServer) handleQuery(resp dns.ResponseWriter, req *dns.Msg) {
m.SetReply(req)
m.Compress = !cfg.DisableCompression
m.Authoritative = true
m.RecursionAvailable = (len(cfg.Recursors) > 0)
m.RecursionAvailable = (len(cfg.Recursors.Addrs) > 0)

ecsGlobal := true

Expand Down Expand Up @@ -1812,7 +1819,11 @@ func (d *DNSServer) handleRecurse(resp dns.ResponseWriter, req *dns.Msg) {
var r *dns.Msg
var rtt time.Duration
var err error
for _, recursor := range cfg.Recursors {
var start = int(cfg.Recursors.RoundRobinIndex)
if cfg.RecursorRotate {
cfg.Recursors.RoundRobinIndex += 1
}
for _, recursor := range append(cfg.Recursors.Addrs[start:], cfg.Recursors.Addrs[:start]...) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think there may be a problems here. cfg.Recursors.RoundRobinIndex does not appear to be reset to 0 when it reaches the end of the list. I think this will panic after a few requests.

I believe cfg.Recursors.RoundRobinIndex += 1 is not safe for concurrent use either, so this could return incorrect results when there are multiple handlers running at the same time in different goroutines.

Instead of round robin, would random selection be ok? Something like this would remove the need to track the Index, and I think should also be safe for concurrent use because we only read concurrently, and the recursors list does not change.

for _, idx := range rand.Perm(len(cfg.Recursors)) {
    recursor := cfg.Recursors[idx]
}

Copy link
Contributor Author

@munali munali Oct 10, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dnephin Doh! I forgot to modulo.

I am sure this is not safe for concurrent use either, but I believe something like this could work as well? What do you think.

cfg.Recursors.RoundRobinIndex = (atomic.AddInt32(cfg.Recursors.RoundRobinIndex, 1)%len(cfg.Recursors))

Either way, I like your suggestion because it does a good job of distributing the load. Whereas my attempt adds complexity to increment the index and store it as an extra variable.

Would something like this be good/safe golang style?

for idx, random_idx := range rand.Perm(len(cfg.Recursors)) {
    recursor := cfg.Recursors[idx]
    if cfg. RecursorRotate {
       recursor = cfg.Recursors[random_idx]
   }
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dnephin any thoughts on the changes?

r, rtt, err = c.Exchange(req, recursor)
// Check if the response is valid and has the desired Response code
if r != nil && (r.Rcode != dns.RcodeSuccess && r.Rcode != dns.RcodeNameError) {
Expand Down Expand Up @@ -1883,7 +1894,7 @@ func (d *DNSServer) resolveCNAME(cfg *dnsConfig, name string, maxRecursionLevel
}

// Do nothing if we don't have a recursor
if len(cfg.Recursors) == 0 {
if len(cfg.Recursors.Addrs) == 0 {
return nil
}

Expand All @@ -1896,7 +1907,11 @@ func (d *DNSServer) resolveCNAME(cfg *dnsConfig, name string, maxRecursionLevel
var r *dns.Msg
var rtt time.Duration
var err error
for _, recursor := range cfg.Recursors {
var start = int(cfg.Recursors.RoundRobinIndex)
if cfg.RecursorRotate {
cfg.Recursors.RoundRobinIndex += 1
}
for _, recursor := range append(cfg.Recursors.Addrs[start:], cfg.Recursors.Addrs[:start]...) {
r, rtt, err = c.Exchange(m, recursor)
if err == nil {
d.logger.Debug("cname recurse RTT for name",
Expand Down
6 changes: 4 additions & 2 deletions agent/dns_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7017,7 +7017,8 @@ func TestDNS_ConfigReload(t *testing.T) {

for _, s := range a.dnsServers {
cfg := s.config.Load().(*dnsConfig)
require.Equal(t, []string{"8.8.8.8:53"}, cfg.Recursors)
require.Equal(t, []string{"8.8.8.8:53"}, cfg.Recursors.Addrs)
require.False(t, cfg.RecursorRotate)
require.False(t, cfg.AllowStale)
require.Equal(t, 20*time.Second, cfg.MaxStale)
require.Equal(t, 10*time.Second, cfg.NodeTTL)
Expand Down Expand Up @@ -7062,7 +7063,8 @@ func TestDNS_ConfigReload(t *testing.T) {

for _, s := range a.dnsServers {
cfg := s.config.Load().(*dnsConfig)
require.Equal(t, []string{"1.1.1.1:53"}, cfg.Recursors)
require.Equal(t, []string{"1.1.1.1:53"}, cfg.Recursors.Addrs)
require.False(t, cfg.RecursorRotate)
require.True(t, cfg.AllowStale)
require.Equal(t, 21*time.Second, cfg.MaxStale)
require.Equal(t, 11*time.Second, cfg.NodeTTL)
Expand Down
3 changes: 3 additions & 0 deletions website/pages/docs/agent/options.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -1381,6 +1381,9 @@ Valid time units are 'ns', 'us' (or 'µs'), 'ms', 's', 'm', 'h'."
- `recursor_timeout` - Timeout used by Consul when
recursively querying an upstream DNS server. See [`recursors`](#recursors) for more details. Default is 2s. This is available in Consul 0.7 and later.

- `recursor_rotate` - If set to true, Consul will pick an upstream DNS server in a round robin
See [`recursors`](#recursors) for more details. Default is false.

- `disable_compression` - If set to true, DNS
responses will not be compressed. Compression was added and enabled by default
in Consul 0.7.
Expand Down