Skip to content

Commit

Permalink
Add insecure-health-endpoint flag to enable Kubernetes HTTP probes
Browse files Browse the repository at this point in the history
Before this patch, Kubernetes users deploying etcd who configure mTLS
authentication for client and metrics endpoints are unable to leverage etcd's
`/health` endpoint for liveness or readiness probes because Kubernetes HTTP
probes don't support mTLS. Users must work around this limitation with
strategies like weak health checks using custom `exec` probes. Bypassing the
`/health` endpoint for this purpose can lead to false positive probe results and
quorum loss if a member is falsely determined to be ready during a rollout.

This patch introduces an `--insecure-health-endpoint` flag which, if specified,
enables an insecure HTTP `/health` endpoint which is functionally equivalent to
other `/health` endpoints. This enables etcd pods to specify reliable HTTP GET
probes for readiness and liveness checking.

This approach stands as an alternative to introducing a parallel TLS
configuration and listener just for the `/health` endpoint. The typical use case
is probably a non-exposed port used only by a container orchestration component
(e.g. the Kubelet).

Fixes #11993.
  • Loading branch information
ironcladlou committed Oct 9, 2020
1 parent 0aab02e commit a4ded77
Show file tree
Hide file tree
Showing 9 changed files with 102 additions and 21 deletions.
6 changes: 6 additions & 0 deletions embed/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,12 @@ type Config struct {
// UnsafeNoFsync disables all uses of fsync.
// Setting this is unsafe and will cause data loss.
UnsafeNoFsync bool `json:"unsafe-no-fsync"`

// InsecureHealthEndpoint enables an additional /health listener on the specified
// address. The listener serves the usual /health endpoint over HTTP. This is
// useful for health probes (e.g. Kubernetes) where TLS is an undesirable
// complication.
InsecureHealthEndpoint string `json:"insecure-health-endpoint"`
}

// configYAML holds the config suitable for yaml parsing
Expand Down
16 changes: 16 additions & 0 deletions embed/etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) {
EnableLeaseCheckpoint: cfg.ExperimentalEnableLeaseCheckpoint,
CompactionBatchLimit: cfg.ExperimentalCompactionBatchLimit,
WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval,
InsecureHealthEndpoint: cfg.InsecureHealthEndpoint,
}
print(e.cfg.logger, *cfg, srvcfg, memberInitialized)
if e.Server, err = etcdserver.NewServer(srvcfg); err != nil {
Expand Down Expand Up @@ -634,6 +635,21 @@ func (e *Etcd) serveClients() (err error) {
mux := http.NewServeMux()
etcdhttp.HandleBasic(e.cfg.logger, mux, e.Server)
etcdhttp.HandleMetricsHealthForV3(e.cfg.logger, mux, e.Server)
if len(e.cfg.InsecureHealthEndpoint) > 0 {
healthMux := http.NewServeMux()
etcdhttp.HandleHealthForV3(e.cfg.logger, healthMux, e.Server)
s := &http.Server{
Addr: e.cfg.InsecureHealthEndpoint,
Handler: healthMux,
}
e.cfg.logger.Info("listening for /health on insecure endpoint", zap.String("insecure-health-endpoint", s.Addr))
go func() {
if err := s.ListenAndServe(); err != nil {
e.cfg.logger.Info("insecure /health listener stopped with error", zap.Error(err))
}
}()
}

h = mux
}

Expand Down
1 change: 1 addition & 0 deletions etcdmain/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ func newConfig() *config {

// additional metrics
fs.StringVar(&cfg.ec.Metrics, "metrics", cfg.ec.Metrics, "Set level of detail for exported metrics, specify 'extensive' to include server side grpc histogram metrics")
fs.StringVar(&cfg.ec.InsecureHealthEndpoint, "insecure-health-endpoint", cfg.ec.InsecureHealthEndpoint, "Create an additional /health HTTP listener on this address")

// auth
fs.StringVar(&cfg.ec.AuthToken, "auth-token", cfg.ec.AuthToken, "Specify auth token specific options.")
Expand Down
14 changes: 12 additions & 2 deletions etcdserver/api/etcdhttp/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,20 @@ func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv etcdserver.Serv
mux.Handle(PathHealth, NewHealthHandler(lg, func() Health { return checkV2Health(lg, srv) }))
}

// HandleMetricsHealthForV3 registers metrics and health handlers. it checks health by using v3 range request
// and its corresponding timeout.
// HandleMetricsHealthForV3 registers metrics and health handlers.
func HandleMetricsHealthForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserver.EtcdServer) {
HandleMetricsForV3(lg, mux, srv)
HandleHealthForV3(lg, mux, srv)
}

// HandleMetricsForV3 registers a metrics handler.
func HandleMetricsForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserver.EtcdServer) {
mux.Handle(PathMetrics, promhttp.Handler())
}

// HandleHealthForV3 registers a health handler. It checks health by using v3 range request
// and its corresponding timeout.
func HandleHealthForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserver.EtcdServer) {
mux.Handle(PathHealth, NewHealthHandler(lg, func() Health { return checkV3Health(lg, srv) }))
}

Expand Down
6 changes: 6 additions & 0 deletions etcdserver/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,12 @@ type ServerConfig struct {
// UnsafeNoFsync disables all uses of fsync.
// Setting this is unsafe and will cause data loss.
UnsafeNoFsync bool `json:"unsafe-no-fsync"`

// InsecureHealthEndpoint enables an additional /health listener on the specified
// address. The listener serves the usual /health endpoint over HTTP. This is
// useful for health probes (e.g. Kubernetes) where TLS is an undesirable
// complication.
InsecureHealthEndpoint string `json:"insecure-health-endpoint"`
}

// VerifyBootstrap sanity-checks the initial config for bootstrap case
Expand Down
3 changes: 3 additions & 0 deletions integration/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,7 @@ type memberConfig struct {
enableLeaseCheckpoint bool
leaseCheckpointInterval time.Duration
WatchProgressNotifyInterval time.Duration
InsecureHealthEndpoint string
}

// mustNewMember return an inited member with the given name. If peerTLS is
Expand Down Expand Up @@ -691,6 +692,8 @@ func mustNewMember(t testing.TB, mcfg memberConfig) *member {

m.InitialCorruptCheck = true

m.InsecureHealthEndpoint = mcfg.InsecureHealthEndpoint

lcfg := logutil.DefaultZapLoggerConfig
m.LoggerConfig = &lcfg
m.LoggerConfig.OutputPaths = []string{"/dev/null"}
Expand Down
42 changes: 25 additions & 17 deletions tests/e2e/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,14 @@ type etcdProcessClusterConfig struct {

cipherSuites []string

forceNewCluster bool
initialToken string
quotaBackendBytes int64
noStrictReconfig bool
enableV2 bool
initialCorruptCheck bool
authTokenOpts string
forceNewCluster bool
initialToken string
quotaBackendBytes int64
noStrictReconfig bool
enableV2 bool
initialCorruptCheck bool
authTokenOpts string
insecureHealthEndpointEnabled bool

rollingStart bool
}
Expand Down Expand Up @@ -269,23 +270,30 @@ func (cfg *etcdProcessClusterConfig) etcdServerProcessConfigs() []*etcdServerPro
args = append(args, "--listen-metrics-urls", murl)
}

var insecureHealthEndpoint string
if cfg.insecureHealthEndpointEnabled {
insecureHealthEndpoint = fmt.Sprintf("localhost:%d", port+3)
args = append(args, "--insecure-health-endpoint", insecureHealthEndpoint)
}

args = append(args, cfg.tlsArgs()...)

if cfg.authTokenOpts != "" {
args = append(args, "--auth-token", cfg.authTokenOpts)
}

etcdCfgs[i] = &etcdServerProcessConfig{
execPath: cfg.execPath,
args: args,
tlsArgs: cfg.tlsArgs(),
dataDirPath: dataDirPath,
keepDataDir: cfg.keepDataDir,
name: name,
purl: purl,
acurl: curl,
murl: murl,
initialToken: cfg.initialToken,
execPath: cfg.execPath,
args: args,
tlsArgs: cfg.tlsArgs(),
dataDirPath: dataDirPath,
keepDataDir: cfg.keepDataDir,
name: name,
purl: purl,
acurl: curl,
murl: murl,
insecureMetricsEndpoint: insecureHealthEndpoint,
initialToken: cfg.initialToken,
}
}

Expand Down
5 changes: 3 additions & 2 deletions tests/e2e/etcd_process.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,9 @@ type etcdServerProcessConfig struct {

purl url.URL

acurl string
murl string
acurl string
murl string
insecureMetricsEndpoint string

initialToken string
initialCluster string
Expand Down
30 changes: 30 additions & 0 deletions tests/e2e/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ package e2e

import (
"fmt"
"io/ioutil"
"net/http"
"testing"

"go.etcd.io/etcd/api/v3/version"
Expand Down Expand Up @@ -64,3 +66,31 @@ func metricsTest(cx ctlCtx) {
}
}
}

func TestInsecureHealthEndpointEnabled(t *testing.T) {
cfg := configNoTLS
cfg.clusterSize = 1
cfg.insecureHealthEndpointEnabled = true
testCtl(t, func(cx ctlCtx) {
if err := ctlV3Put(cx, "k", "v", ""); err != nil {
cx.t.Fatal(err)
}
addr := cx.epc.procs[0].Config().insecureMetricsEndpoint
if len(addr) == 0 {
t.Fatal("expected endpoint")
}
endpoint := fmt.Sprintf("http://%s/health", addr)
resp, err := http.Get(endpoint)
if err != nil {
t.Fatal(err)
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
t.Fatal(err)
}
if e, a := `{"health":"true"}`, string(body); e != a {
t.Fatalf("from health endpoint %q, expected:\n%s\ngot:\n%s", endpoint, e, a)
}
}, withCfg(cfg))
}

0 comments on commit a4ded77

Please sign in to comment.