Skip to content

Commit

Permalink
Support for maximum size for Output of checks
Browse files Browse the repository at this point in the history
This PR will allow administrator to limit the size of Output produced by healthchecks,
when set at agent level, it will be the maximum output for all healthchecks produced
by agents.

It will also allow specific healthchecks to go below the limit set by administrator,
OutputMaxSize if set in check specification will now allow to go beyond agent level
setting, but will allow to reduce it further.

We had the following configuration  at agent level
 * check_output_max_size (default: 4k) : max number for all checks of the agent
 * check.OutputMaxSize: (default: 4k)
  • Loading branch information
pierresouchay committed May 10, 2019
1 parent 5508fd1 commit 6c21464
Show file tree
Hide file tree
Showing 13 changed files with 155 additions and 63 deletions.
28 changes: 20 additions & 8 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -915,6 +915,7 @@ func (a *Agent) consulConfig() (*consul.Config, error) {
base.CoordinateUpdateBatchSize = a.config.ConsulCoordinateUpdateBatchSize
base.CoordinateUpdateMaxBatches = a.config.ConsulCoordinateUpdateMaxBatches
base.CoordinateUpdatePeriod = a.config.ConsulCoordinateUpdatePeriod
base.CheckOutputMaxSize = a.config.CheckOutputMaxSize

base.RaftConfig.HeartbeatTimeout = a.config.ConsulRaftHeartbeatTimeout
base.RaftConfig.LeaderLeaseTimeout = a.config.ConsulRaftLeaderLeaseTimeout
Expand Down Expand Up @@ -971,6 +972,9 @@ func (a *Agent) consulConfig() (*consul.Config, error) {
if a.config.Bootstrap {
base.Bootstrap = true
}
if a.config.CheckOutputMaxSize > 0 {
base.CheckOutputMaxSize = a.config.CheckOutputMaxSize
}
if a.config.RejoinAfterLeave {
base.RejoinAfterLeave = true
}
Expand Down Expand Up @@ -2239,6 +2243,13 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType,

// Check if already registered
if chkType != nil {
maxOutputSize := a.config.CheckOutputMaxSize
if maxOutputSize == 0 {
maxOutputSize = 4096
}
if chkType.OutputMaxSize > 0 && maxOutputSize > chkType.OutputMaxSize {
maxOutputSize = chkType.OutputMaxSize
}
switch {

case chkType.IsTTL():
Expand Down Expand Up @@ -2285,6 +2296,7 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType,
Interval: chkType.Interval,
Timeout: chkType.Timeout,
Logger: a.logger,
OutputMaxSize: maxOutputSize,
TLSClientConfig: tlsClientConfig,
}
http.Start()
Expand Down Expand Up @@ -2352,7 +2364,7 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType,
}

if a.dockerClient == nil {
dc, err := checks.NewDockerClient(os.Getenv("DOCKER_HOST"), checks.BufSize)
dc, err := checks.NewDockerClient(os.Getenv("DOCKER_HOST"), int64(maxOutputSize))
if err != nil {
a.logger.Printf("[ERR] agent: error creating docker client: %s", err)
return err
Expand Down Expand Up @@ -2387,14 +2399,14 @@ func (a *Agent) addCheck(check *structs.HealthCheck, chkType *structs.CheckType,
check.CheckID, checks.MinInterval)
chkType.Interval = checks.MinInterval
}

monitor := &checks.CheckMonitor{
Notify: a.State,
CheckID: check.CheckID,
ScriptArgs: chkType.ScriptArgs,
Interval: chkType.Interval,
Timeout: chkType.Timeout,
Logger: a.logger,
Notify: a.State,
CheckID: check.CheckID,
ScriptArgs: chkType.ScriptArgs,
Interval: chkType.Interval,
Timeout: chkType.Timeout,
Logger: a.logger,
OutputMaxSize: maxOutputSize,
}
monitor.Start()
a.checkMonitors[check.CheckID] = monitor
Expand Down
23 changes: 14 additions & 9 deletions agent/checks/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,14 @@ type CheckNotifier interface {
// determine the health of a given check. It is compatible with
// nagios plugins and expects the output in the same format.
type CheckMonitor struct {
Notify CheckNotifier
CheckID types.CheckID
Script string
ScriptArgs []string
Interval time.Duration
Timeout time.Duration
Logger *log.Logger
Notify CheckNotifier
CheckID types.CheckID
Script string
ScriptArgs []string
Interval time.Duration
Timeout time.Duration
Logger *log.Logger
OutputMaxSize int

stop bool
stopCh chan struct{}
Expand Down Expand Up @@ -122,7 +123,7 @@ func (c *CheckMonitor) check() {
}

// Collect the output
output, _ := circbuf.NewBuffer(BufSize)
output, _ := circbuf.NewBuffer(int64(c.OutputMaxSize))
cmd.Stdout = output
cmd.Stderr = output
exec.SetSysProcAttr(cmd)
Expand Down Expand Up @@ -303,6 +304,7 @@ type CheckHTTP struct {
Timeout time.Duration
Logger *log.Logger
TLSClientConfig *tls.Config
OutputMaxSize int

httpClient *http.Client
stop bool
Expand Down Expand Up @@ -339,6 +341,9 @@ func (c *CheckHTTP) Start() {
} else if c.Interval < 10*time.Second {
c.httpClient.Timeout = c.Interval
}
if c.OutputMaxSize < 1 {
c.OutputMaxSize = 4096
}
}

c.stop = false
Expand Down Expand Up @@ -413,7 +418,7 @@ func (c *CheckHTTP) check() {
defer resp.Body.Close()

// Read the response into a circular buffer to limit the size
output, _ := circbuf.NewBuffer(BufSize)
output, _ := circbuf.NewBuffer(int64(c.OutputMaxSize))
if _, err := io.Copy(output, resp.Body); err != nil {
c.Logger.Printf("[WARN] agent: Check %q error while reading body: %s", c.CheckID, err)
}
Expand Down
72 changes: 39 additions & 33 deletions agent/checks/check_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,12 @@ func TestCheckMonitor_Script(t *testing.T) {
t.Run(tt.status, func(t *testing.T) {
notif := mock.NewNotify()
check := &CheckMonitor{
Notify: notif,
CheckID: types.CheckID("foo"),
Script: tt.script,
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
Script: tt.script,
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
OutputMaxSize: 4096,
}
check.Start()
defer check.Stop()
Expand Down Expand Up @@ -79,11 +80,12 @@ func TestCheckMonitor_Args(t *testing.T) {
t.Run(tt.status, func(t *testing.T) {
notif := mock.NewNotify()
check := &CheckMonitor{
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: tt.args,
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: tt.args,
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
OutputMaxSize: 4096,
}
check.Start()
defer check.Stop()
Expand All @@ -103,12 +105,13 @@ func TestCheckMonitor_Timeout(t *testing.T) {
// t.Parallel() // timing test. no parallel
notif := mock.NewNotify()
check := &CheckMonitor{
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"sh", "-c", "sleep 1 && exit 0"},
Interval: 50 * time.Millisecond,
Timeout: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"sh", "-c", "sleep 1 && exit 0"},
Interval: 50 * time.Millisecond,
Timeout: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
OutputMaxSize: 4096,
}
check.Start()
defer check.Stop()
Expand All @@ -128,11 +131,12 @@ func TestCheckMonitor_RandomStagger(t *testing.T) {
// t.Parallel() // timing test. no parallel
notif := mock.NewNotify()
check := &CheckMonitor{
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"sh", "-c", "exit 0"},
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"sh", "-c", "exit 0"},
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
OutputMaxSize: 4096,
}
check.Start()
defer check.Stop()
Expand All @@ -153,11 +157,12 @@ func TestCheckMonitor_LimitOutput(t *testing.T) {
t.Parallel()
notif := mock.NewNotify()
check := &CheckMonitor{
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"od", "-N", "81920", "/dev/urandom"},
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
ScriptArgs: []string{"od", "-N", "81920", "/dev/urandom"},
Interval: 25 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
OutputMaxSize: 4096,
}
check.Start()
defer check.Stop()
Expand Down Expand Up @@ -295,13 +300,14 @@ func TestCheckHTTP(t *testing.T) {

notif := mock.NewNotify()
check := &CheckHTTP{
Notify: notif,
CheckID: types.CheckID("foo"),
HTTP: server.URL,
Method: tt.method,
Header: tt.header,
Interval: 10 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
Notify: notif,
CheckID: types.CheckID("foo"),
HTTP: server.URL,
Method: tt.method,
OutputMaxSize: 4096,
Header: tt.header,
Interval: 10 * time.Millisecond,
Logger: log.New(ioutil.Discard, uniqueID(), log.LstdFlags),
}
check.Start()
defer check.Stop()
Expand Down
13 changes: 11 additions & 2 deletions agent/config/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,7 @@ func (b *Builder) Build() (rt RuntimeConfig, err error) {
CAPath: b.stringVal(c.CAPath),
CertFile: b.stringVal(c.CertFile),
CheckUpdateInterval: b.durationVal("check_update_interval", c.CheckUpdateInterval),
CheckOutputMaxSize: b.intValWithDefault(c.CheckOutputMaxSize, 4096),
Checks: checks,
ClientAddrs: clientAddrs,
ConfigEntryBootstrap: configEntries,
Expand Down Expand Up @@ -964,6 +965,9 @@ func (b *Builder) Validate(rt RuntimeConfig) error {
if rt.BootstrapExpect > 0 && rt.Bootstrap {
return fmt.Errorf("'bootstrap_expect > 0' and 'bootstrap = true' are mutually exclusive")
}
if rt.CheckOutputMaxSize < 1 {
return fmt.Errorf("'check_output_max_size must be positive")
}
if rt.AEInterval <= 0 {
return fmt.Errorf("ae_interval cannot be %s. Must be positive", rt.AEInterval)
}
Expand Down Expand Up @@ -1174,6 +1178,7 @@ func (b *Builder) checkVal(v *CheckDefinition) *structs.CheckDefinition {
Timeout: b.durationVal(fmt.Sprintf("check[%s].timeout", id), v.Timeout),
TTL: b.durationVal(fmt.Sprintf("check[%s].ttl", id), v.TTL),
DeregisterCriticalServiceAfter: b.durationVal(fmt.Sprintf("check[%s].deregister_critical_service_after", id), v.DeregisterCriticalServiceAfter),
OutputMaxSize: b.intValWithDefault(v.OutputMaxSize, 4096),
}
}

Expand Down Expand Up @@ -1347,13 +1352,17 @@ func (b *Builder) durationVal(name string, v *string) (d time.Duration) {
return b.durationValWithDefault(name, v, 0)
}

func (b *Builder) intVal(v *int) int {
func (b *Builder) intValWithDefault(v *int, defaultVal int) int {
if v == nil {
return 0
return defaultVal
}
return *v
}

func (b *Builder) intVal(v *int) int {
return b.intValWithDefault(v, 0)
}

func (b *Builder) portVal(name string, v *int) int {
if v == nil || *v <= 0 {
return -1
Expand Down
2 changes: 2 additions & 0 deletions agent/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ type Config struct {
CAPath *string `json:"ca_path,omitempty" hcl:"ca_path" mapstructure:"ca_path"`
CertFile *string `json:"cert_file,omitempty" hcl:"cert_file" mapstructure:"cert_file"`
Check *CheckDefinition `json:"check,omitempty" hcl:"check" mapstructure:"check"` // needs to be a pointer to avoid partial merges
CheckOutputMaxSize *int `json:"check_output_max_size,omitempty" hcl:"check_output_max_size" mapstructure:"check_output_max_size"`
CheckUpdateInterval *string `json:"check_update_interval,omitempty" hcl:"check_update_interval" mapstructure:"check_update_interval"`
Checks []CheckDefinition `json:"checks,omitempty" hcl:"checks" mapstructure:"checks"`
ClientAddr *string `json:"client_addr,omitempty" hcl:"client_addr" mapstructure:"client_addr"`
Expand Down Expand Up @@ -390,6 +391,7 @@ type CheckDefinition struct {
HTTP *string `json:"http,omitempty" hcl:"http" mapstructure:"http"`
Header map[string][]string `json:"header,omitempty" hcl:"header" mapstructure:"header"`
Method *string `json:"method,omitempty" hcl:"method" mapstructure:"method"`
OutputMaxSize *int `json:"output_max_size,omitempty" hcl:"output_max_size" mapstructure:"output_max_size"`
TCP *string `json:"tcp,omitempty" hcl:"tcp" mapstructure:"tcp"`
Interval *string `json:"interval,omitempty" hcl:"interval" mapstructure:"interval"`
DockerContainerID *string `json:"docker_container_id,omitempty" hcl:"docker_container_id" mapstructure:"docker_container_id"`
Expand Down
1 change: 1 addition & 0 deletions agent/config/default.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func DefaultSource() Source {
bind_addr = "0.0.0.0"
bootstrap = false
bootstrap_expect = 0
check_output_max_size = 4096
check_update_interval = "5m"
client_addr = "127.0.0.1"
datacenter = "` + consul.DefaultDC + `"
Expand Down
1 change: 1 addition & 0 deletions agent/config/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ func AddFlags(fs *flag.FlagSet, f *Flags) {
add(&f.Config.Bootstrap, "bootstrap", "Sets server to bootstrap mode.")
add(&f.Config.BootstrapExpect, "bootstrap-expect", "Sets server to expect bootstrap mode.")
add(&f.Config.ClientAddr, "client", "Sets the address to bind for client access. This includes RPC, DNS, HTTP, HTTPS and gRPC (if configured).")
add(&f.Config.CheckOutputMaxSize, "check_output_max_size", "Set the maximum default size of output for checks on this agent")
add(&f.ConfigFiles, "config-dir", "Path to a directory to read configuration files from. This will read every file ending in '.json' as configuration in this directory in alphabetical order. Can be specified multiple times.")
add(&f.ConfigFiles, "config-file", "Path to a file in JSON or HCL format with a matching file extension. Can be specified multiple times.")
add(&f.ConfigFormat, "config-format", "Config files are in this format irrespective of their extension. Must be 'hcl' or 'json'")
Expand Down
5 changes: 5 additions & 0 deletions agent/config/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,11 @@ type RuntimeConfig struct {
// hcl: check_update_interval = "duration"
CheckUpdateInterval time.Duration

// Maximum size for the output of a healtcheck
// hcl check_output_max_size int
// flag: -check_output_max_size int
CheckOutputMaxSize int

// Checks contains the provided check definitions.
//
// hcl: checks = [
Expand Down
Loading

0 comments on commit 6c21464

Please sign in to comment.