Skip to content

Commit

Permalink
Add configcheck command (#1139)
Browse files Browse the repository at this point in the history
* Fix config storage for file provider

* Change default yolo instance name

* Fix logic for discovered config

* Add configcheck to the flare

* Add unresolved templates dump

* Add configcheck reno notes
  • Loading branch information
mfpierre authored Feb 5, 2018
1 parent d85eae5 commit 3caf1ff
Show file tree
Hide file tree
Showing 23 changed files with 409 additions and 80 deletions.
18 changes: 18 additions & 0 deletions cmd/agent/api/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ import (

log "github.com/cihub/seelog"

"github.com/DataDog/datadog-agent/cmd/agent/api/response"
"github.com/DataDog/datadog-agent/cmd/agent/common"
"github.com/DataDog/datadog-agent/cmd/agent/common/signals"
"github.com/DataDog/datadog-agent/cmd/agent/gui"
apiutil "github.com/DataDog/datadog-agent/pkg/api/util"
"github.com/DataDog/datadog-agent/pkg/collector/autodiscovery"
"github.com/DataDog/datadog-agent/pkg/collector/py"
"github.com/DataDog/datadog-agent/pkg/config"
"github.com/DataDog/datadog-agent/pkg/flare"
Expand All @@ -40,6 +42,7 @@ func SetupHandlers(r *mux.Router) {
r.HandleFunc("/{component}/status", componentStatusHandler).Methods("POST")
r.HandleFunc("/{component}/configs", componentConfigHandler).Methods("GET")
r.HandleFunc("/gui/csrf-token", getCSRFToken).Methods("GET")
r.HandleFunc("/config-check", getConfigCheck).Methods("GET")
}

func stopAgent(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -200,3 +203,18 @@ func getCSRFToken(w http.ResponseWriter, r *http.Request) {
}
w.Write([]byte(gui.CsrfToken))
}

func getConfigCheck(w http.ResponseWriter, r *http.Request) {
var response response.ConfigCheckResponse

response.Configs = common.AC.GetProviderLoadedConfigs()
response.Warnings = autodiscovery.GetResolveWarnings()
response.Unresolved = common.AC.GetUnresolvedTemplates()

json, err := json.Marshal(response)
if err != nil {
log.Errorf("Unable to marshal config check response: %s", err)
}

w.Write(json)
}
15 changes: 15 additions & 0 deletions cmd/agent/api/response/types.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2018 Datadog, Inc.

package response

import "github.com/DataDog/datadog-agent/pkg/collector/check"

// ConfigCheckResponse holds the config check response
type ConfigCheckResponse struct {
Configs map[string][]check.Config `json:"configs"`
Warnings map[string][]string `json:"warnings"`
Unresolved map[string]check.Config `json:"unresolved"`
}
44 changes: 44 additions & 0 deletions cmd/agent/app/config_check.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2018 Datadog, Inc.

package app

import (
"fmt"

"github.com/DataDog/datadog-agent/cmd/agent/common"
"github.com/DataDog/datadog-agent/pkg/flare"

"github.com/fatih/color"
"github.com/spf13/cobra"
)

var withDebug bool

func init() {
AgentCmd.AddCommand(configCheckCommand)

configCheckCommand.Flags().BoolVarP(&withDebug, "verbose", "v", false, "print additional debug info")
}

var configCheckCommand = &cobra.Command{
Use: "configcheck",
Short: "Print all configurations loaded & resolved of a running agent",
Long: ``,
RunE: func(cmd *cobra.Command, args []string) error {
err := common.SetupConfig(confFilePath)
if err != nil {
return fmt.Errorf("unable to set up global agent configuration: %v", err)
}
if flagNoColor {
color.NoColor = true
}
err = flare.GetConfigCheck(color.Output, withDebug)
if err != nil {
return err
}
return nil
},
}
3 changes: 1 addition & 2 deletions cmd/agent/common/autoconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
package common

import (
"fmt"
"path/filepath"

"github.com/DataDog/datadog-agent/pkg/collector"
Expand All @@ -26,7 +25,7 @@ func SetupAutoConfig(confdPath string) {
// start tagging system
err := tagger.Init()
if err != nil {
fmt.Printf("Unable to start tagging system: %s", err)
log.Errorf("Unable to start tagging system: %s", err)
}

// create the Collector instance and start all the components
Expand Down
2 changes: 1 addition & 1 deletion cmd/agent/dist/conf.d/cpu.d/conf.yaml.default
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
instances:
- foo: bar
- {}
2 changes: 1 addition & 1 deletion cmd/agent/dist/conf.d/memory.d/conf.yaml.default
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
instances:
- foo: bar
- {}
104 changes: 61 additions & 43 deletions pkg/collector/autodiscovery/autoconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ func init() {
acErrors.Set("RunErrors", expvar.Func(func() interface{} {
return errorStats.getRunErrors()
}))
acErrors.Set("ResolveWarnings", expvar.Func(func() interface{} {
return errorStats.getResolveWarnings()
}))
}

// providerDescriptor keeps track of the configurations loaded by a certain
Expand All @@ -59,30 +62,32 @@ type providerDescriptor struct {
// Notice the `AutoConfig` public API speaks in terms of `check.Config`,
// meaning that you cannot use it to schedule check instances directly.
type AutoConfig struct {
collector *collector.Collector
providers []*providerDescriptor
loaders []check.Loader
templateCache *TemplateCache
listeners []listeners.ServiceListener
configResolver *ConfigResolver
configsPollTicker *time.Ticker
config2checks map[string][]check.ID // cache the ID of checks we load for each config
name2jmxmetrics map[string]check.ConfigData // holds the metrics to collect for JMX checks
stop chan bool
pollerActive bool
m sync.RWMutex
collector *collector.Collector
providers []*providerDescriptor
loaders []check.Loader
templateCache *TemplateCache
listeners []listeners.ServiceListener
configResolver *ConfigResolver
configsPollTicker *time.Ticker
config2checks map[string][]check.ID // cache the ID of checks we load for each config
name2jmxmetrics map[string]check.ConfigData // holds the metrics to collect for JMX checks
providerLoadedConfigs map[string][]check.Config // holds the resolved config per provider
stop chan bool
pollerActive bool
m sync.RWMutex
}

// NewAutoConfig creates an AutoConfig instance.
func NewAutoConfig(collector *collector.Collector) *AutoConfig {
ac := &AutoConfig{
collector: collector,
providers: make([]*providerDescriptor, 0, 5),
loaders: make([]check.Loader, 0, 5),
templateCache: NewTemplateCache(),
config2checks: make(map[string][]check.ID),
name2jmxmetrics: make(map[string]check.ConfigData),
stop: make(chan bool),
collector: collector,
providers: make([]*providerDescriptor, 0, 5),
loaders: make([]check.Loader, 0, 5),
templateCache: NewTemplateCache(),
config2checks: make(map[string][]check.ID),
name2jmxmetrics: make(map[string]check.ConfigData),
providerLoadedConfigs: make(map[string][]check.Config),
stop: make(chan bool),
}
ac.configResolver = newConfigResolver(collector, ac, ac.templateCache)
return ac
Expand Down Expand Up @@ -183,12 +188,12 @@ func (ac *AutoConfig) GetChecksByName(checkName string) []check.Check {
// getAllConfigs queries all the providers and returns all the check
// configurations found, resolving the ones it can
func (ac *AutoConfig) getAllConfigs() []check.Config {
rawConfigs := []check.Config{}
resolvedConfigs := []check.Config{}

for _, pd := range ac.providers {
cfgs, _ := pd.provider.Collect()

if fileConfPd, ok := pd.provider.(*providers.FileConfigProvider); ok {

var goodConfs []check.Config
for _, cfg := range cfgs {
// JMX checks can have 2 YAML files: one containing the metrics to collect, one containing the
Expand All @@ -213,15 +218,14 @@ func (ac *AutoConfig) getAllConfigs() []check.Config {

cfgs = goodConfs
}
rawConfigs = append(rawConfigs, cfgs...)
}
resolvedConfigs := []check.Config{}
for _, config := range rawConfigs {
rc, err := ac.resolve(config)
if err != nil {
log.Error(err)
// Store all raw configs in the provider
pd.configs = cfgs

// resolve configs if needed
for _, config := range cfgs {
rc := ac.resolve(config, pd.provider.String())
resolvedConfigs = append(resolvedConfigs, rc...)
}
resolvedConfigs = append(resolvedConfigs, rc...)
}

return resolvedConfigs
Expand Down Expand Up @@ -262,9 +266,8 @@ func (ac *AutoConfig) schedule(checks []check.Check) {
}
}

// resolve loads and resolves a given config and can optionnaly schedules the
// corresponding Check instances. Returns a slice of resolved configs
func (ac *AutoConfig) resolve(config check.Config) ([]check.Config, error) {
// resolve loads and resolves a given config into a slice of resolved configs
func (ac *AutoConfig) resolve(config check.Config, provider string) []check.Config {
configs := []check.Config{}

// add default metrics to collect to JMX checks
Expand All @@ -279,28 +282,31 @@ func (ac *AutoConfig) resolve(config check.Config) ([]check.Config, error) {

if config.IsTemplate() {
// store the template in the cache in any case
if err := ac.templateCache.Set(config); err != nil {
if err := ac.templateCache.Set(config, provider); err != nil {
log.Errorf("Unable to store Check configuration in the cache: %s", err)
}

// try to resolve the template
resolvedConfigs := ac.configResolver.ResolveTemplate(config)
if len(resolvedConfigs) == 0 {
log.Infof("Can't resolve the template for %s at this moment.", config.Name)
return configs, nil
e := fmt.Sprintf("Can't resolve the template for %s at this moment.", config.Name)
errorStats.setResolveWarning(config.Name, e)
log.Infof(e)
return configs
}
errorStats.removeResolveWarnings(config.Name)

// If success, get the checks for each config resolved
// and schedule for running, each template can resolve
// to multiple configs
// each template can resolve to multiple configs
for _, config := range resolvedConfigs {
configs = append(configs, config)
}
} else {
configs = append(configs, config)
// store non template configs in the AC
ac.providerLoadedConfigs[provider] = append(ac.providerLoadedConfigs[provider], config)
}

return configs, nil
return configs
}

// AddListener adds a service listener to AutoConfig.
Expand Down Expand Up @@ -366,10 +372,7 @@ func (ac *AutoConfig) pollConfigs() {
newConfigs, removedConfigs := ac.collect(pd)
for _, config := range newConfigs {
// store the checks we schedule for this config locally
resolvedConfigs, err := ac.resolve(config)
if err != nil {
log.Error(err)
}
resolvedConfigs := ac.resolve(config, pd.provider.String())
checks := ac.getChecksFromConfigs(resolvedConfigs, true)
ac.schedule(checks)
}
Expand Down Expand Up @@ -473,6 +476,16 @@ func (ac *AutoConfig) GetChecks(config check.Config) ([]check.Check, error) {
return []check.Check{}, fmt.Errorf("unable to load any check from config '%s'", config.Name)
}

// GetProviderLoadedConfigs returns configs loaded by provider
func (ac *AutoConfig) GetProviderLoadedConfigs() map[string][]check.Config {
return ac.providerLoadedConfigs
}

// GetUnresolvedTemplates returns templates in cache yet to be resolved
func (ac *AutoConfig) GetUnresolvedTemplates() map[string]check.Config {
return ac.templateCache.GetUnresolvedTemplates()
}

// check if the descriptor contains the Config passed
func (pd *providerDescriptor) contains(c *check.Config) bool {
for _, config := range pd.configs {
Expand All @@ -493,3 +506,8 @@ func GetLoaderErrors() map[string]LoaderErrors {
func GetConfigErrors() map[string]string {
return errorStats.getConfigErrors()
}

// GetResolveWarnings get the resolve warnings/errors
func GetResolveWarnings() map[string][]string {
return errorStats.getResolveWarnings()
}
1 change: 1 addition & 0 deletions pkg/collector/autodiscovery/autoconfig_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ func (p *MockProvider) Collect() ([]check.Config, error) {
func (p *MockProvider) String() string {
return "mocked"
}

func (p *MockProvider) IsUpToDate() (bool, error) {
return true, nil
}
Expand Down
19 changes: 16 additions & 3 deletions pkg/collector/autodiscovery/configresolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@ func (cr *ConfigResolver) ResolveTemplate(tpl check.Config) []check.Config {
// check out whether any service we know has this identifier
serviceIds, found := cr.adIDToServices[id]
if !found {
log.Debugf("No service found with this AD identifier: %s", id)
s := fmt.Sprintf("No service found with this AD identifier: %s", id)
errorStats.setResolveWarning(tpl.Name, s)
log.Debugf(s)
continue
}

Expand All @@ -117,8 +119,10 @@ func (cr *ConfigResolver) ResolveTemplate(tpl check.Config) []check.Config {
if err == nil {
resolvedSet[config.Digest()] = config
} else {
log.Warnf("Error resolving template %s for service %s: %v",
err := fmt.Errorf("Error resolving template %s for service %s: %v",
config.Name, serviceID, err)
errorStats.setResolveWarning(tpl.Name, err.Error())
log.Warn(err)
}
}
}
Expand Down Expand Up @@ -146,6 +150,9 @@ func (cr *ConfigResolver) resolve(tpl check.Config, svc listeners.Service) (chec
copy(resolvedConfig.InitConfig, tpl.InitConfig)
copy(resolvedConfig.Instances, tpl.Instances)

// Get provider to map configs with it
provider := cr.templates.GetProviderFromDigest(tpl.Digest())

tags, err := svc.GetTags()
if err != nil {
return resolvedConfig, err
Expand All @@ -171,6 +178,9 @@ func (cr *ConfigResolver) resolve(tpl check.Config, svc listeners.Service) (chec
}
}

// store resolved configs in the AC
cr.ac.providerLoadedConfigs[provider] = append(cr.ac.providerLoadedConfigs[provider], resolvedConfig)

return resolvedConfig, nil
}

Expand Down Expand Up @@ -205,9 +215,12 @@ func (cr *ConfigResolver) processNewService(svc listeners.Service) {
// resolve the template
config, err := cr.resolve(template, svc)
if err != nil {
log.Errorf("Unable to resolve configuration template: %v", err)
s := fmt.Sprintf("Unable to resolve configuration template: %v", err)
errorStats.setResolveWarning(template.Name, s)
log.Errorf(s)
continue
}
errorStats.removeResolveWarnings(config.Name)

// load the checks for this config using Autoconfig
checks, err := cr.ac.GetChecks(config)
Expand Down
Loading

0 comments on commit 3caf1ff

Please sign in to comment.