Skip to content

Commit

Permalink
Moving leader election inside vault-k8s (#271)
Browse files Browse the repository at this point in the history
Using operator-sdk's Become() for leader election. Retries Become() 10
times (with exp backoff) and then signals the caller to exit if it
failed. command.Run() now watches an exitOnError channel for that
case.

Co-authored-by: Ben Ash <[email protected]>
  • Loading branch information
tvoran and benashz authored Aug 31, 2021
1 parent 5c809ee commit 0c69acb
Show file tree
Hide file tree
Showing 9 changed files with 414 additions and 181 deletions.
40 changes: 6 additions & 34 deletions deploy/injector-deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,38 +20,6 @@ spec:
spec:
serviceAccountName: "vault-injector"
containers:
- name: leader-elector
image: k8s.gcr.io/leader-elector:0.4
args:
- --election=vault-agent-injector-leader
- --election-namespace=$(NAMESPACE)
- --http=0.0.0.0:4040
- --ttl=60s
env:
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
livenessProbe:
httpGet:
path: /
port: 4040
scheme: HTTP
failureThreshold: 2
initialDelaySeconds: 1
periodSeconds: 2
successThreshold: 1
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /
port: 4040
scheme: HTTP
failureThreshold: 2
initialDelaySeconds: 2
periodSeconds: 2
successThreshold: 1
timeoutSeconds: 5
- name: sidecar-injector
image: "hashicorp/vault-k8s:0.12.0"
imagePullPolicy: IfNotPresent
Expand All @@ -60,6 +28,10 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: AGENT_INJECT_LISTEN
value: ":8080"
- name: AGENT_INJECT_LOG_LEVEL
Expand Down Expand Up @@ -95,7 +67,7 @@ spec:
port: 8080
scheme: HTTPS
failureThreshold: 2
initialDelaySeconds: 1
initialDelaySeconds: 5
periodSeconds: 2
successThreshold: 1
timeoutSeconds: 5
Expand All @@ -105,7 +77,7 @@ spec:
port: 8080
scheme: HTTPS
failureThreshold: 2
initialDelaySeconds: 2
initialDelaySeconds: 5
periodSeconds: 2
successThreshold: 1
timeoutSeconds: 5
9 changes: 1 addition & 8 deletions deploy/injector-leader-extras.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
# These are created here so they can be cleaned up easily. The endpoints
# especially, since if they're left around the leader won't expire for about a
# minute.
---
apiVersion: v1
kind: Endpoints
metadata:
name: vault-agent-injector-leader
# This is created here so it can be cleaned up easily.
---
apiVersion: v1
kind: Secret
Expand Down
8 changes: 7 additions & 1 deletion deploy/injector-rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,19 @@ metadata:
app.kubernetes.io/instance: vault
rules:
- apiGroups: [""]
resources: ["endpoints", "secrets"]
resources: ["secrets", "configmaps"]
verbs:
- "create"
- "get"
- "watch"
- "list"
- "update"
- apiGroups: [""]
resources: ["pods"]
verbs:
- "get"
- "patch"
- "delete"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
Expand Down
8 changes: 3 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,17 @@ require (
github.com/armon/go-radix v1.0.0 // indirect
github.com/cenkalti/backoff/v4 v4.1.1
github.com/hashicorp/go-hclog v0.9.2
github.com/hashicorp/golang-lru v0.5.3 // indirect
github.com/hashicorp/vault/sdk v0.1.14-0.20191205220236-47cffd09f972
github.com/kelseyhightower/envconfig v1.4.0
github.com/kr/text v0.2.0
github.com/mattbaird/jsonpatch v0.0.0-20171005235357-81af80346b1a
github.com/mattn/go-colorable v0.1.2 // indirect
github.com/mitchellh/cli v1.0.0
github.com/operator-framework/operator-lib v0.6.0
github.com/pkg/errors v0.9.1
github.com/posener/complete v1.2.1 // indirect
github.com/prometheus/client_golang v1.7.1
github.com/prometheus/client_golang v1.11.0
github.com/radovskyb/watcher v1.0.7
github.com/stretchr/testify v1.6.1
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110 // indirect
github.com/stretchr/testify v1.7.0
k8s.io/api v0.21.3
k8s.io/apimachinery v0.21.3
k8s.io/client-go v0.21.3
Expand Down
328 changes: 300 additions & 28 deletions go.sum

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion helper/cert/source_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ type GenSource struct {
K8sClient kubernetes.Interface
Namespace string
SecretsCache informerv1.SecretInformer
LeaderElector *leader.LeaderElector
LeaderElector leader.Elector
WebhookName string
AdminAPIVersion string

Expand Down
46 changes: 11 additions & 35 deletions helper/cert/source_gen_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,14 @@ package cert

import (
"context"
"encoding/json"
"io/ioutil"
"net/http"
"net/http/httptest"
"os"
"os/exec"
"path/filepath"
"testing"
"time"

"github.com/hashicorp/go-hclog"
"github.com/hashicorp/vault-k8s/leader"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
adminv1 "k8s.io/api/admissionregistration/v1"
Expand Down Expand Up @@ -141,10 +137,8 @@ func TestGenSource_leader(t *testing.T) {
// Generate the bundle
source := testGenSource()

// Setup test leader service returning this host as the leader
ts := testLeaderServer(t, testGetHostname(t))
defer ts.Close()
source.LeaderElector = leader.NewWithURL(ts.URL)
// Pretend this host is the leader
source.LeaderElector = newFakeLeader(true)

source.Namespace = "default"
source.K8sClient = fake.NewSimpleClientset()
Expand Down Expand Up @@ -173,11 +167,8 @@ func TestGenSource_follower(t *testing.T) {
// Generate the bundle
source := testGenSource()

// Setup a leader elector service that returns a different hostname, so it
// thinks we're the follower
ts := testLeaderServer(t, testGetHostname(t)+"not it")
defer ts.Close()
source.LeaderElector = leader.NewWithURL(ts.URL)
// Pretend this host is the follower
source.LeaderElector = newFakeLeader(false)

// Setup the k8s client with a Secret for a follower to pick up
source.Namespace = "default"
Expand Down Expand Up @@ -214,31 +205,16 @@ func TestGenSource_follower(t *testing.T) {
)
}

func testLeaderServer(t *testing.T, hostname string) *httptest.Server {
t.Helper()

ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
lResp := leader.LeaderResponse{
Name: hostname,
}
body, err := json.Marshal(lResp)
if err != nil {
t.Fatalf("failed to marshal leader response: %s", err)
}
w.WriteHeader(200)
w.Write(body)
}))
return ts
type FakeLeader struct {
leader bool
}

func testGetHostname(t *testing.T) string {
t.Helper()
func newFakeLeader(isLeader bool) *FakeLeader {
return &FakeLeader{leader: isLeader}
}

host, err := os.Hostname()
if err != nil {
t.Fatalf("failed to get hostname for test leader service: %s", err)
}
return host
func (fl *FakeLeader) IsLeader() (bool, error) {
return fl.leader, nil
}

func TestGensource_prependLastCA(t *testing.T) {
Expand Down
96 changes: 51 additions & 45 deletions leader/leader.go
Original file line number Diff line number Diff line change
@@ -1,59 +1,65 @@
package leader

import (
"encoding/json"
"io/ioutil"
"net/http"
"os"
)
"context"
"sync/atomic"
"time"

const defaultURL = "http://localhost:4040/"
"github.com/cenkalti/backoff/v4"
"github.com/hashicorp/go-hclog"
operator_leader "github.com/operator-framework/operator-lib/leader"
"k8s.io/client-go/kubernetes"
)

type LeaderElector struct {
URL string
type Elector interface {
// IsLeader returns whether this host is the leader
IsLeader() (bool, error)
}

type LeaderResponse struct {
Name string `json:"name"`
type LeaderForLife struct {
isLeader atomic.Value
}

// New returns a LeaderElector with the default service endpoint
func New() *LeaderElector {
return &LeaderElector{
URL: defaultURL,
}
}
// New returns a Elector that uses the operator-sdk's leader for life elector
func New(ctx context.Context, logger hclog.Logger, clientset kubernetes.Interface, exitOnError chan error) *LeaderForLife {
le := &LeaderForLife{}
le.isLeader.Store(false)

go func() {
// The Become() function blocks until this replica becomes the "leader",
// by creating a ConfigMap with an OwnerReference. Another replica can
// become the leader when the current leader replica stops running, and
// the Kubernetes garbage collector deletes the vault-k8s-leader
// ConfigMap.

// New exponential backoff with 10 retries
expBo := backoff.NewExponentialBackOff()
expBo.MaxInterval = time.Second * 30
bo := backoff.WithMaxRetries(expBo, 10)

err := backoff.Retry(func() error {
if err := operator_leader.Become(ctx, "vault-k8s-leader"); err != nil {
logger.Error("Trouble becoming leader", "error", err)
return err
}
return nil
}, bo)

if err != nil {
// Signal the caller to shutdown the injector server, since Become()
// failed all the retries
exitOnError <- err
return
}

le.isLeader.Store(true)
}()

// NewWithURL returns a LeaderElector with a custom service endpoint URL
func NewWithURL(URL string) *LeaderElector {
return &LeaderElector{
URL: URL,
}
return le
}

// IsLeader returns whether this host is the leader
func (le *LeaderElector) IsLeader() (bool, error) {
resp, err := http.Get(le.URL)
if err != nil {
return false, err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return false, err
}
leaderResp := &LeaderResponse{}
err = json.Unmarshal(body, leaderResp)
if err != nil {
return false, err
}
hostname, err := os.Hostname()
if err != nil {
return false, err
}
if leaderResp.Name == hostname {
return true, nil
}

return false, nil
func (le *LeaderForLife) IsLeader() (bool, error) {
leaderStatus := le.isLeader.Load().(bool)
return leaderStatus, nil
}
Loading

0 comments on commit 0c69acb

Please sign in to comment.