Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prometheus support on v1/sys/metrics endpoint #5308

Merged
merged 12 commits into from
Feb 14, 2019
61 changes: 46 additions & 15 deletions command/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"encoding/base64"
"encoding/hex"
"fmt"
"github.com/hashicorp/vault/helper/metricsutil"
"io"
"io/ioutil"
"net"
Expand All @@ -23,6 +24,7 @@ import (
metrics "github.com/armon/go-metrics"
"github.com/armon/go-metrics/circonus"
"github.com/armon/go-metrics/datadog"
"github.com/armon/go-metrics/prometheus"
"github.com/hashicorp/errwrap"
log "github.com/hashicorp/go-hclog"
multierror "github.com/hashicorp/go-multierror"
Expand Down Expand Up @@ -469,7 +471,8 @@ func (c *ServerCommand) Run(args []string) int {
"in a Docker container, provide the IPC_LOCK cap to the container."))
}

if err := c.setupTelemetry(config); err != nil {
metricsHelper, err := c.setupTelemetry(config)
if err != nil {
c.UI.Error(fmt.Sprintf("Error initializing telemetry: %s", err))
return 1
}
Expand Down Expand Up @@ -563,6 +566,7 @@ func (c *ServerCommand) Run(args []string) int {
AllLoggers: allLoggers,
BuiltinRegistry: builtinplugins.Registry,
DisableKeyEncodingChecks: config.DisablePrintableCheck,
MetricsHelper: metricsHelper,
}
if c.flagDev {
coreConfig.DevToken = c.flagDevRootTokenID
Expand Down Expand Up @@ -1686,8 +1690,8 @@ func (c *ServerCommand) detectRedirect(detect physical.RedirectDetect,
return url.String(), nil
}

// setupTelemetry is used to setup the telemetry sub-systems
func (c *ServerCommand) setupTelemetry(config *server.Config) error {
// setupTelemetry is used to setup the telemetry sub-systems and returns the in-memory sink to be used in http configuration
func (c *ServerCommand) setupTelemetry(config *server.Config) (*metricsutil.MetricsHelper, error) {
/* Setup telemetry
Aggregate on 10 second intervals for 1 minute. Expose the
metrics over stderr when there is a SIGUSR1 received.
Expand All @@ -1696,21 +1700,40 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {
metrics.DefaultInmemSignal(inm)

var telConfig *server.Telemetry
if config.Telemetry == nil {
telConfig = &server.Telemetry{}
} else {
if config.Telemetry != nil {
telConfig = config.Telemetry
} else {
telConfig = &server.Telemetry{}
}

metricsConf := metrics.DefaultConfig("vault")
metricsConf.EnableHostname = !telConfig.DisableHostname

// Configure the statsite sink
var fanout metrics.FanoutSink
var prometheusEnabled bool

// Configure the Prometheus sink
if telConfig.PrometheusRetentionTime != 0 {
prometheusEnabled = true
prometheusOpts := prometheus.PrometheusOpts{
Expiration: telConfig.PrometheusRetentionTime,
}

sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts)
if err != nil {
return nil, err
}
fanout = append(fanout, sink)
}

metricHelper := metricsutil.NewMetricsHelper(inm, prometheusEnabled)


if telConfig.StatsiteAddr != "" {
sink, err := metrics.NewStatsiteSink(telConfig.StatsiteAddr)
if err != nil {
return err
return nil, err
}
fanout = append(fanout, sink)
}
Expand All @@ -1719,7 +1742,7 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {
if telConfig.StatsdAddr != "" {
sink, err := metrics.NewStatsdSink(telConfig.StatsdAddr)
if err != nil {
return err
return nil, err
}
fanout = append(fanout, sink)
}
Expand Down Expand Up @@ -1755,7 +1778,7 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {

sink, err := circonus.NewCirconusSink(cfg)
if err != nil {
return err
return nil, err
}
sink.Start()
fanout = append(fanout, sink)
Expand All @@ -1770,21 +1793,29 @@ func (c *ServerCommand) setupTelemetry(config *server.Config) error {

sink, err := datadog.NewDogStatsdSink(telConfig.DogStatsDAddr, metricsConf.HostName)
if err != nil {
return errwrap.Wrapf("failed to start DogStatsD sink: {{err}}", err)
return nil, errwrap.Wrapf("failed to start DogStatsD sink: {{err}}", err)
}
sink.SetTags(tags)
fanout = append(fanout, sink)
}

// Initialize the global sink
if len(fanout) > 0 {
fanout = append(fanout, inm)
metrics.NewGlobal(metricsConf, fanout)
if len(fanout) > 1 {
// Hostname enabled will create poor quality metrics name for prometheus
if !telConfig.DisableHostname {
c.UI.Warn("telemetry.disable_hostname has been set to false. Recommended setting is true for Prometheus to avoid poorly named metrics.")
}
} else {
metricsConf.EnableHostname = false
metrics.NewGlobal(metricsConf, inm)
}
return nil
fanout = append(fanout, inm)
_, err := metrics.NewGlobal(metricsConf, fanout)

if err != nil {
return nil, err
}

return metricHelper, nil
}

func (c *ServerCommand) Reload(lock *sync.RWMutex, reloadFuncs *map[string][]reload.ReloadFunc, configPath []string) error {
Expand Down
25 changes: 24 additions & 1 deletion command/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ import (
"github.com/hashicorp/vault/helper/parseutil"
)

const (
prometheusDefaultRetentionTime = 24 * time.Hour
)

// Config is the configuration for the vault server.
type Config struct {
Listeners []*Listener `hcl:"-"`
Expand Down Expand Up @@ -98,7 +102,10 @@ func DevConfig(ha, transactional bool) *Config {

EnableUI: true,

Telemetry: &Telemetry{},
Telemetry: &Telemetry{
PrometheusRetentionTime: prometheusDefaultRetentionTime,
DisableHostname: true,
},
}

switch {
Expand Down Expand Up @@ -233,6 +240,12 @@ type Telemetry struct {
// DogStatsdTags are the global tags that should be sent with each packet to dogstatsd
// It is a list of strings, where each string looks like "my_tag_name:my_tag_value"
DogStatsDTags []string `hcl:"dogstatsd_tags"`

// Prometheus:
// PrometheusRetentionTime is the retention time for prometheus metrics if greater than 0.
// Default: 24h
PrometheusRetentionTime time.Duration `hcl:-`
PrometheusRetentionTimeRaw interface{} `hcl:"prometheus_retention_time"`
}

func (s *Telemetry) GoString() string {
Expand Down Expand Up @@ -864,5 +877,15 @@ func parseTelemetry(result *Config, list *ast.ObjectList) error {
if err := hcl.DecodeObject(&result.Telemetry, item.Val); err != nil {
return multierror.Prefix(err, "telemetry:")
}

if result.Telemetry.PrometheusRetentionTimeRaw != nil {
var err error
if result.Telemetry.PrometheusRetentionTime, err = parseutil.ParseDurationSecond(result.Telemetry.PrometheusRetentionTimeRaw); err != nil {
return err
}
} else {
result.Telemetry.PrometheusRetentionTime = prometheusDefaultRetentionTime
}

return nil
}
104 changes: 104 additions & 0 deletions helper/metricsutil/metricsutil.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package metricsutil

import (
"bytes"
"encoding/json"
"fmt"
"github.com/armon/go-metrics"
"github.com/hashicorp/vault/logical"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/expfmt"
"strings"
)

const (
OpenMetricsMIMEType = "application/openmetrics-text"
)

const (
PrometheusMetricFormat = "prometheus"
)

type MetricsHelper struct {
inMemSink *metrics.InmemSink
PrometheusEnabled bool
}

func NewMetricsHelper(inMem *metrics.InmemSink, enablePrometheus bool) *MetricsHelper{
return &MetricsHelper{inMem, enablePrometheus}
}

func FormatFromRequest(req *logical.Request) (string) {
acceptHeaders := req.Headers["Accept"]
if len(acceptHeaders) > 0 {
acceptHeader := acceptHeaders[0]
if strings.HasPrefix(acceptHeader, OpenMetricsMIMEType) {
return "prometheus"
}
}
return ""
}

func (m *MetricsHelper) ResponseForFormat(format string) (*logical.Response, error) {
switch format {
case PrometheusMetricFormat:
return m.PrometheusResponse()
case "":
return m.GenericResponse()
default:
return nil, fmt.Errorf("metric response format \"%s\" unknown", format)
}
}

func (m *MetricsHelper) PrometheusResponse() (*logical.Response, error) {
if !m.PrometheusEnabled {
return &logical.Response{
Data: map[string]interface{}{
logical.HTTPContentType: "text/plain",
logical.HTTPRawBody: "prometheus is not enabled",
logical.HTTPStatusCode: 400,
},
}, nil
}
metricsFamilies, err := prometheus.DefaultGatherer.Gather()
if err != nil && len(metricsFamilies) == 0 {
return nil, fmt.Errorf("no prometheus metrics could be decoded: %s", err)
}

// Initialize a byte buffer.
buf := &bytes.Buffer{}
defer buf.Reset()

e := expfmt.NewEncoder(buf, expfmt.FmtText)
for _, mf := range metricsFamilies {
err := e.Encode(mf)
if err != nil {
return nil, fmt.Errorf("error during the encoding of metrics: %s", err)
}
}
return &logical.Response{
Data: map[string]interface{}{
logical.HTTPContentType: string(expfmt.FmtText),
logical.HTTPRawBody: buf.Bytes(),
logical.HTTPStatusCode: 200,
},
}, nil
}

func (m *MetricsHelper) GenericResponse() (*logical.Response, error) {
summary, err := m.inMemSink.DisplayMetrics(nil,nil)
if err != nil {
return nil, fmt.Errorf("error while fetching the in-memory metrics: %s", err)
}
content, err := json.Marshal(summary)
if err != nil {
return nil, fmt.Errorf("error while marshalling the in-memory metrics: %s", err)
}
return &logical.Response{
Data: map[string]interface{}{
logical.HTTPContentType: "application/json",
logical.HTTPRawBody: content,
logical.HTTPStatusCode: 200,
},
}, nil
}
10 changes: 9 additions & 1 deletion vault/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"crypto/x509"
"errors"
"fmt"
"github.com/hashicorp/vault/helper/metricsutil"
"net"
"net/http"
"net/url"
Expand Down Expand Up @@ -419,7 +420,10 @@ type Core struct {

// loadCaseSensitiveIdentityStore enforces the loading of identity store
// artifacts in a case sensitive manner. To be used only in testing.
loadCaseSensitiveIdentityStore bool
loadCaseSensitiveIdentityStore bool

// Telemetry objects
metricsHelper *metricsutil.MetricsHelper
}

// CoreConfig is used to parameterize a core
Expand Down Expand Up @@ -488,6 +492,9 @@ type CoreConfig struct {
DisableKeyEncodingChecks bool

AllLoggers []log.Logger

// Telemetry objects
MetricsHelper *metricsutil.MetricsHelper
}

func (c *CoreConfig) Clone() *CoreConfig {
Expand Down Expand Up @@ -596,6 +603,7 @@ func NewCore(conf *CoreConfig) (*Core, error) {
builtinRegistry: conf.BuiltinRegistry,
neverBecomeActive: new(uint32),
clusterLeaderParams: new(atomic.Value),
metricsHelper: conf.MetricsHelper,
}

atomic.StoreUint32(c.sealed, 1)
Expand Down
14 changes: 14 additions & 0 deletions vault/logical_system.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"github.com/hashicorp/vault/helper/consts"
"github.com/hashicorp/vault/helper/identity"
"github.com/hashicorp/vault/helper/jsonutil"
"github.com/hashicorp/vault/helper/metricsutil"
"github.com/hashicorp/vault/helper/namespace"
"github.com/hashicorp/vault/helper/parseutil"
"github.com/hashicorp/vault/helper/strutil"
Expand Down Expand Up @@ -145,6 +146,7 @@ func NewSystemBackend(core *Core, logger log.Logger) *SystemBackend {
b.Backend.Paths = append(b.Backend.Paths, b.capabilitiesPaths()...)
b.Backend.Paths = append(b.Backend.Paths, b.internalPaths()...)
b.Backend.Paths = append(b.Backend.Paths, b.remountPath())
b.Backend.Paths = append(b.Backend.Paths, b.metricsPath())

if core.rawEnabled {
b.Backend.Paths = append(b.Backend.Paths, &framework.Path{
Expand Down Expand Up @@ -2512,6 +2514,14 @@ func (b *SystemBackend) responseWrappingUnwrap(ctx context.Context, te *logical.
return response, nil
}

func (b *SystemBackend) handleMetrics(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
format := data.Get("format").(string)
if format == "" {
format = metricsutil.FormatFromRequest(req)
}
return b.Core.metricsHelper.ResponseForFormat(format)
}

func (b *SystemBackend) handleWrappingLookup(ctx context.Context, req *logical.Request, data *framework.FieldData) (*logical.Response, error) {
// This ordering of lookups has been validated already in the wrapping
// validation func, we're just doing this for a safety check
Expand Down Expand Up @@ -3884,4 +3894,8 @@ This path responds to the following HTTP methods.
"Information about a token's resultant ACL. Internal API; its location, inputs, and outputs may change.",
"",
},
"metrics": {
"Export the metrics aggregated for telemetry purpose.",
"",
},
}
Loading