Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Soft-fail on node password verification if the secret cannot be created #7655

Merged
merged 2 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/daemons/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import (
"github.com/rancher/wrangler/pkg/leader"
utilnet "k8s.io/apimachinery/pkg/util/net"
"k8s.io/apiserver/pkg/authentication/authenticator"
"k8s.io/client-go/tools/record"
utilsnet "k8s.io/utils/net"
)

Expand Down Expand Up @@ -340,6 +341,7 @@ type ControlRuntime struct {
ClientETCDKey string

Core *core.Factory
Event record.EventRecorder
EtcdConfig endpoint.ETCDConfig
}

Expand Down
68 changes: 44 additions & 24 deletions pkg/nodepassword/nodepassword.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,38 @@ import (
v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/pointer"
)

var (
// Hasher provides the algorithm for generating and verifying hashes
Hasher = hash.NewSCrypt()
Hasher = hash.NewSCrypt()
ErrVerifyFailed = errVerifyFailed()
)

type passwordError struct {
node string
err error
}

func (e *passwordError) Error() string {
return fmt.Sprintf("unable to verify password for node %s: %v", e.node, e.err)
}

func (e *passwordError) Is(target error) bool {
switch target {
case ErrVerifyFailed:
return true
}
return false
}

func (e *passwordError) Unwrap() error {
return e.err
}

func errVerifyFailed() error { return &passwordError{} }

func getSecretName(nodeName string) string {
return strings.ToLower(nodeName + ".node-password." + version.Program)
}
Expand All @@ -30,39 +55,34 @@ func verifyHash(secretClient coreclient.SecretClient, nodeName, pass string) err
name := getSecretName(nodeName)
secret, err := secretClient.Get(metav1.NamespaceSystem, name, metav1.GetOptions{})
if err != nil {
return err
return &passwordError{node: nodeName, err: err}
}
if hash, ok := secret.Data["hash"]; ok {
if err := Hasher.VerifyHash(string(hash), pass); err != nil {
return errors.Wrapf(err, "unable to verify hash for node '%s'", nodeName)
return &passwordError{node: nodeName, err: err}
}
return nil
}
return fmt.Errorf("unable to locate hash data for node secret '%s'", name)
return &passwordError{node: nodeName, err: errors.New("password hash not found in node secret")}
}

// Ensure will verify a node-password secret if it exists, otherwise it will create one
func Ensure(secretClient coreclient.SecretClient, nodeName, pass string) error {
if err := verifyHash(secretClient, nodeName, pass); !apierrors.IsNotFound(err) {
return err
}

hash, err := Hasher.CreateHash(pass)
if err != nil {
return errors.Wrapf(err, "unable to create hash for node '%s'", nodeName)
}

immutable := true
_, err = secretClient.Create(&v1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: getSecretName(nodeName),
Namespace: metav1.NamespaceSystem,
},
Immutable: &immutable,
Data: map[string][]byte{"hash": []byte(hash)},
})
if apierrors.IsAlreadyExists(err) {
return verifyHash(secretClient, nodeName, pass)
err := verifyHash(secretClient, nodeName, pass)
if apierrors.IsNotFound(err) {
var hash string
hash, err = Hasher.CreateHash(pass)
if err != nil {
return &passwordError{node: nodeName, err: err}
}
_, err = secretClient.Create(&v1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: getSecretName(nodeName),
Namespace: metav1.NamespaceSystem,
},
Immutable: pointer.Bool(true),
Data: map[string][]byte{"hash": []byte(hash)},
})
}
return err
}
Expand Down
8 changes: 8 additions & 0 deletions pkg/nodepassword/nodepassword_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package nodepassword

import (
"errors"
"fmt"
"log"
"os"
Expand Down Expand Up @@ -89,6 +90,13 @@ func Test_UnitMigrateFileNodes(t *testing.T) {
assertNotEqual(t, Ensure(secretClient, newNode, "wrong-password"), nil)
}

func Test_PasswordError(t *testing.T) {
err := &passwordError{node: "test", err: fmt.Errorf("inner error")}
assertEqual(t, errors.Is(err, ErrVerifyFailed), true)
assertEqual(t, errors.Is(err, fmt.Errorf("different error")), false)
assertNotEqual(t, errors.Unwrap(err), nil)
}

// --------------------------

// mock secret client interface
Expand Down
4 changes: 4 additions & 0 deletions pkg/server/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,11 @@ import (
"github.com/rancher/wrangler/pkg/generated/controllers/core"
"github.com/rancher/wrangler/pkg/generated/controllers/rbac"
"github.com/rancher/wrangler/pkg/start"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/tools/record"
)

type Context struct {
Expand All @@ -29,6 +31,7 @@ type Context struct {
Auth *rbac.Factory
Core *core.Factory
K8s kubernetes.Interface
Event record.EventRecorder
}

func (c *Context) Start(ctx context.Context) error {
Expand Down Expand Up @@ -58,6 +61,7 @@ func NewContext(ctx context.Context, cfg string) (*Context, error) {
Apps: apps.NewFactoryFromConfigOrDie(restConfig),
Batch: batch.NewFactoryFromConfigOrDie(restConfig),
Core: core.NewFactoryFromConfigOrDie(restConfig),
Event: util.BuildControllerEventRecorder(k8s, version.Program+"-supervisor", metav1.NamespaceAll),
}, nil
}

Expand Down
80 changes: 51 additions & 29 deletions pkg/server/router.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,13 @@ import (
certutil "github.com/rancher/dynamiclistener/cert"
coreclient "github.com/rancher/wrangler/pkg/generated/controllers/core/v1"
"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/json"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/apiserver/pkg/authentication/user"
"k8s.io/apiserver/pkg/endpoints/handlers/responsewriters"
"k8s.io/apiserver/pkg/endpoints/request"
Expand Down Expand Up @@ -140,7 +143,7 @@ func cacerts(serverCA string) http.Handler {
var err error
ca, err = os.ReadFile(serverCA)
if err != nil {
sendError(err, resp)
sendError(err, resp, req)
return
}
}
Expand Down Expand Up @@ -215,13 +218,13 @@ func servingKubeletCert(server *config.Control, keyFile string, auth nodePassBoo

nodeName, errCode, err := auth(req)
if err != nil {
sendError(err, resp, errCode)
sendError(err, resp, req, errCode)
return
}

caCerts, caKey, key, err := getCACertAndKeys(server.Runtime.ServerCA, server.Runtime.ServerCAKey, server.Runtime.ServingKubeletKey)
if err != nil {
sendError(err, resp)
sendError(err, resp, req)
return
}

Expand All @@ -231,7 +234,7 @@ func servingKubeletCert(server *config.Control, keyFile string, auth nodePassBoo
for _, v := range strings.Split(nodeIP, ",") {
ip := net.ParseIP(v)
if ip == nil {
sendError(fmt.Errorf("invalid IP address %s", ip), resp)
sendError(fmt.Errorf("invalid node IP address %s", ip), resp, req)
return
}
ips = append(ips, ip)
Expand All @@ -247,7 +250,7 @@ func servingKubeletCert(server *config.Control, keyFile string, auth nodePassBoo
},
}, key, caCerts[0], caKey)
if err != nil {
sendError(err, resp)
sendError(err, resp, req)
return
}

Expand All @@ -271,13 +274,13 @@ func clientKubeletCert(server *config.Control, keyFile string, auth nodePassBoot

nodeName, errCode, err := auth(req)
if err != nil {
sendError(err, resp, errCode)
sendError(err, resp, req, errCode)
return
}

caCerts, caKey, key, err := getCACertAndKeys(server.Runtime.ClientCA, server.Runtime.ClientCAKey, server.Runtime.ClientKubeletKey)
if err != nil {
sendError(err, resp)
sendError(err, resp, req)
return
}

Expand All @@ -287,7 +290,7 @@ func clientKubeletCert(server *config.Control, keyFile string, auth nodePassBoot
Usages: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth},
}, key, caCerts[0], caKey)
if err != nil {
sendError(err, resp)
sendError(err, resp, req)
return
}

Expand Down Expand Up @@ -397,17 +400,19 @@ func serveStatic(urlPrefix, staticDir string) http.Handler {
return http.StripPrefix(urlPrefix, http.FileServer(http.Dir(staticDir)))
}

func sendError(err error, resp http.ResponseWriter, status ...int) {
func sendError(err error, resp http.ResponseWriter, req *http.Request, status ...int) {
var code int
if len(status) == 1 {
code = status[0]
}
if code == 0 || code == http.StatusOK {
code = http.StatusInternalServerError
}
logrus.Error(err)
resp.WriteHeader(code)
resp.Write([]byte(err.Error()))
logrus.Errorf("Sending HTTP %d response to %s: %v", code, req.RemoteAddr, err)
responsewriters.ErrorNegotiated(
apierrors.NewGenericServerResponse(code, req.Method, schema.GroupResource{}, req.URL.Path, err.Error(), 0, true),
scheme.Codecs.WithoutConversion(), schema.GroupVersion{}, resp, req,
)
}

// nodePassBootstrapper returns a node name, or http error code and error
Expand Down Expand Up @@ -457,12 +462,23 @@ func passwordBootstrap(ctx context.Context, config *Config) nodePassBootstrapper
}
}

// verify that the node exists, if using Node Identity auth
if err := verifyNode(ctx, nodeClient, node); err != nil {
return "", http.StatusUnauthorized, err
}

// verify that the node password secret matches, or create it if it does not
if err := nodepassword.Ensure(secretClient, node.Name, node.Password); err != nil {
return "", http.StatusForbidden, err
// if the verification failed, reject the request
if errors.Is(err, nodepassword.ErrVerifyFailed) {
return "", http.StatusForbidden, err
}
// If verification failed due to an error creating the node password secret, allow
// the request, but retry verification until the outage is resolved. This behavior
// allows nodes to join the cluster during outages caused by validating webhooks
// blocking secret creation - if the outage requires new nodes to join in order to
// run the webhook pods, we must fail open here to resolve the outage.
return verifyRemotePassword(ctx, config, &mu, deferredNodes, node)
}

return node.Name, http.StatusOK, nil
Expand All @@ -489,7 +505,7 @@ func verifyLocalPassword(ctx context.Context, config *Config, mu *sync.Mutex, de
}

if err := nodepassword.Hasher.VerifyHash(passHash, node.Password); err != nil {
return "", http.StatusForbidden, errors.Wrapf(err, "unable to verify local password for node '%s'", node.Name)
return "", http.StatusForbidden, errors.Wrap(err, "unable to verify local node password")
}

mu.Lock()
Expand All @@ -498,7 +514,7 @@ func verifyLocalPassword(ctx context.Context, config *Config, mu *sync.Mutex, de
if _, ok := deferredNodes[node.Name]; !ok {
deferredNodes[node.Name] = true
go ensureSecret(ctx, config, node)
logrus.Debugf("Password verified locally for node '%s'", node.Name)
logrus.Infof("Password verified locally for node %s", node.Name)
}

return node.Name, http.StatusOK, nil
Expand All @@ -511,7 +527,7 @@ func verifyRemotePassword(ctx context.Context, config *Config, mu *sync.Mutex, d
if _, ok := deferredNodes[node.Name]; !ok {
deferredNodes[node.Name] = true
go ensureSecret(ctx, config, node)
logrus.Debugf("Password verification deferred for node '%s'", node.Name)
logrus.Infof("Password verification deferred for node %s", node.Name)
}

return node.Name, http.StatusOK, nil
Expand All @@ -528,19 +544,25 @@ func verifyNode(ctx context.Context, nodeClient coreclient.NodeClient, node *nod

func ensureSecret(ctx context.Context, config *Config, node *nodeInfo) {
runtime := config.ControlConfig.Runtime
for {
select {
case <-ctx.Done():
return
case <-time.After(1 * time.Second):
if runtime.Core != nil {
logrus.Debugf("Runtime core has become available, ensuring password secret for node '%s'", node.Name)
secretClient := runtime.Core.Core().V1().Secret()
if err := nodepassword.Ensure(secretClient, node.Name, node.Password); err != nil {
logrus.Warnf("Error ensuring node password secret for pre-validated node '%s': %v", node.Name, err)
}
return
wait.PollImmediateUntilWithContext(ctx, time.Second*5, func(ctx context.Context) (bool, error) {
if runtime.Core != nil {
secretClient := runtime.Core.Core().V1().Secret()
// This is consistent with events attached to the node generated by the kubelet
// https://github.com/kubernetes/kubernetes/blob/612130dd2f4188db839ea5c2dea07a96b0ad8d1c/pkg/kubelet/kubelet.go#L479-L485
nodeRef := &corev1.ObjectReference{
Kind: "Node",
Name: node.Name,
UID: types.UID(node.Name),
Namespace: "",
}
if err := nodepassword.Ensure(secretClient, node.Name, node.Password); err != nil {
runtime.Event.Eventf(nodeRef, corev1.EventTypeWarning, "NodePasswordValidationFailed", "Deferred node password secret validation failed: %v", err)
// Return true to stop polling if the password verification failed; only retry on secret creation errors.
return errors.Is(err, nodepassword.ErrVerifyFailed), nil
}
runtime.Event.Event(nodeRef, corev1.EventTypeNormal, "NodePasswordValidationComplete", "Deferred node password secret validation complete")
return true, nil
}
}
return false, nil
})
}
1 change: 1 addition & 0 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ func runControllers(ctx context.Context, config *Config) error {
controlConfig.Runtime.NodePasswdFile); err != nil {
logrus.Warn(errors.Wrap(err, "error migrating node-password file"))
}
controlConfig.Runtime.Event = sc.Event
controlConfig.Runtime.Core = sc.Core

for name, cb := range controlConfig.Runtime.ClusterControllerStarts {
Expand Down