Skip to content

Commit

Permalink
feat: Thanos Querier to Thanos sidecar mTLS
Browse files Browse the repository at this point in the history
  • Loading branch information
vyzigold committed Jul 17, 2024
1 parent 150b708 commit 2732d31
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 0 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/go-logr/logr v1.4.2
github.com/google/go-cmp v0.6.0
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8
github.com/pkg/errors v0.9.1
github.com/prometheus/common v0.55.0
github.com/rhobs/obo-prometheus-operator v0.74.0-rhobs1
Expand All @@ -20,6 +21,7 @@ require (
k8s.io/api v0.30.2
k8s.io/apiextensions-apiserver v0.30.2
k8s.io/apimachinery v0.30.2
k8s.io/apiserver v0.30.2
k8s.io/client-go v0.30.2
k8s.io/component-base v0.30.2
k8s.io/utils v0.0.0-20240310230437-4693a0247e57
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,8 @@ github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrB
github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999 h1:+S998xHiJApsJZjRAO8wyedU9GfqFd8mtwWly6LqHDo=
github.com/openshift/api v0.0.0-20240301093301-ce10821dc999/go.mod h1:CxgbWAlvu2iQB0UmKTtRu1YfepRg1/vJ64n2DlIEVz4=
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8 h1:dKtHGYiOwl0DKZEWBW4MFWFS6IYW02AVD1WSuUAVwEo=
github.com/openshift/library-go v0.0.0-20240216151214-738f3fa4ccf8/go.mod h1:ePlaOqUiPplRc++6aYdMe+2FmXb2xTNS9Nz5laG2YmI=
github.com/opentracing/opentracing-go v1.2.0 h1:uEJPy/1a5RIPAJ0Ov+OIO8OxWu77jEv+1B0VhjKrZUs=
github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYrxe9dPLANfrWvHYVTgc=
github.com/ovh/go-ovh v1.4.3 h1:Gs3V823zwTFpzgGLZNI6ILS4rmxZgJwJCz54Er9LwD0=
Expand Down Expand Up @@ -802,6 +804,8 @@ k8s.io/apiextensions-apiserver v0.30.2 h1:l7Eue2t6QiLHErfn2vwK4KgF4NeDgjQkCXtEbO
k8s.io/apiextensions-apiserver v0.30.2/go.mod h1:lsJFLYyK40iguuinsb3nt+Sj6CmodSI4ACDLep1rgjw=
k8s.io/apimachinery v0.30.2 h1:fEMcnBj6qkzzPGSVsAZtQThU62SmQ4ZymlXRC5yFSCg=
k8s.io/apimachinery v0.30.2/go.mod h1:iexa2somDaxdnj7bha06bhb43Zpa6eWH8N8dbqVjTUc=
k8s.io/apiserver v0.30.2 h1:ACouHiYl1yFI2VFI3YGM+lvxgy6ir4yK2oLOsLI1/tw=
k8s.io/apiserver v0.30.2/go.mod h1:BOTdFBIch9Sv0ypSEcUR6ew/NUFGocRFNl72Ra7wTm8=
k8s.io/client-go v0.30.2 h1:sBIVJdojUNPDU/jObC+18tXWcTJVcwyqS9diGdWHk50=
k8s.io/client-go v0.30.2/go.mod h1:JglKSWULm9xlJLx4KCkfLLQ7XwtlbflV6uFFSHTMgVs=
k8s.io/component-base v0.30.2 h1:pqGBczYoW1sno8q9ObExUqrYSKhtE5rW3y6gX88GZII=
Expand Down
148 changes: 148 additions & 0 deletions pkg/assets/certificate_generator.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
package assets

import (
"crypto/rand"
"crypto/x509"
"fmt"
"math/big"
"time"

"github.com/go-logr/logr"

"github.com/openshift/library-go/pkg/crypto"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apiserver/pkg/authentication/user"
)

const certificateLifetime = time.Duration(crypto.DefaultCertificateLifetimeInDays) * 24 * time.Hour
const GRPCSecretName = "thanos-grpc-secret"

// Taken from
// https://github.com/openshift/library-go/blob/08c2fd1b452520da35ad210930ea9d100545589a/pkg/operator/certrotation/signer.go#L68-L86
// without refresh time handling. We just take care of rotation if we reach 1/5 of the validity timespan before expiration.
func needsNewCert(notBefore, notAfter time.Time, now func() time.Time) bool {
maxWait := notAfter.Sub(notBefore) / 5
latestTime := notAfter.Add(-maxWait)
return now().After(latestTime)
}

// Taken from
// https://github.com/openshift/cluster-monitoring-operator/blob/765d0b0369b176a5997d787b6710783437172879/pkg/manifests/tls.go#L113
func RotateGRPCSecret(s *v1.Secret, logger logr.Logger) (bool, error) {
var (
curCA, newCA *crypto.CA
curCABytes, crtPresent = s.Data["ca.crt"]
curCAKeyBytes, keyPresent = s.Data["ca.key"]
rotate = !crtPresent || !keyPresent
)

if crtPresent && keyPresent {
var err error
curCA, err = crypto.GetCAFromBytes(curCABytes, curCAKeyBytes)
if err != nil {
logger.Info(fmt.Sprintf("generating a new CA due to error reading CA: %v", err))
rotate = true
} else if needsNewCert(curCA.Config.Certs[0].NotBefore, curCA.Config.Certs[0].NotAfter, time.Now) {
logger.Info("generating new CA, because the current one is older than 1/5 of it validity timestamp")
rotate = true
}
}

if !rotate {
return rotate, nil
}

if curCA == nil {
newCAConfig, err := crypto.MakeSelfSignedCAConfig(
fmt.Sprintf("%s@%d", "openshift-cluster-monitoring", time.Now().Unix()),
crypto.DefaultCertificateLifetimeInDays,
)
if err != nil {
return rotate, fmt.Errorf("error generating self signed CA: %w", err)
}

newCA = &crypto.CA{
SerialGenerator: &crypto.RandomSerialGenerator{},
Config: newCAConfig,
}
} else {
template := curCA.Config.Certs[0]
now := time.Now()
template.NotBefore = now.Add(-1 * time.Second)
template.NotAfter = now.Add(certificateLifetime)
template.SerialNumber = template.SerialNumber.Add(template.SerialNumber, big.NewInt(1))

newCACert, err := createCertificate(template, template, template.PublicKey, curCA.Config.Key)
if err != nil {
return rotate, fmt.Errorf("error rotating CA: %w", err)
}

newCA = &crypto.CA{
SerialGenerator: &crypto.RandomSerialGenerator{},
Config: &crypto.TLSCertificateConfig{
Certs: []*x509.Certificate{newCACert},
Key: curCA.Config.Key,
},
}
}

newCABytes, newCAKeyBytes, err := newCA.Config.GetPEMBytes()
if err != nil {
return rotate, fmt.Errorf("error getting PEM bytes from CA: %w", err)
}

s.Data["ca.crt"] = newCABytes
s.Data["ca.key"] = newCAKeyBytes

{
cfg, err := newCA.MakeClientCertificateForDuration(
&user.DefaultInfo{
Name: "thanos-querier",
},
time.Duration(crypto.DefaultCertificateLifetimeInDays)*24*time.Hour,
)
if err != nil {
return rotate, fmt.Errorf("error making client certificate: %w", err)
}

crt, key, err := cfg.GetPEMBytes()
if err != nil {
return rotate, fmt.Errorf("error getting PEM bytes for thanos querier client certificate: %w", err)
}
s.Data["thanos-querier-client.crt"] = crt
s.Data["thanos-querier-client.key"] = key
}

{
cfg, err := newCA.MakeServerCert(
sets.NewString("prometheus-grpc"),
crypto.DefaultCertificateLifetimeInDays,
)
if err != nil {
return rotate, fmt.Errorf("error making server certificate: %w", err)
}

crt, key, err := cfg.GetPEMBytes()
if err != nil {
return rotate, fmt.Errorf("error getting PEM bytes for prometheus-k8s server certificate: %w", err)
}
s.Data["prometheus-server.crt"] = crt
s.Data["prometheus-server.key"] = key
}

return rotate, nil
}

// createCertificate creates a new certificate and returns it in x509.Certificate form.
func createCertificate(template, parent *x509.Certificate, pub, priv interface{}) (*x509.Certificate, error) {
rawCert, err := x509.CreateCertificate(rand.Reader, template, parent, pub, priv)
if err != nil {
return nil, fmt.Errorf("error creating certificate: %w", err)
}
parsedCerts, err := x509.ParseCertificates(rawCert)
if err != nil {
return nil, fmt.Errorf("error parsing certificate: %w", err)
}
return parsedCerts[0], nil
}
22 changes: 22 additions & 0 deletions pkg/controllers/monitoring/monitoring-stack/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (

stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
"github.com/rhobs/observability-operator/pkg/reconciler"
"github.com/rhobs/observability-operator/pkg/assets"
)

const AdditionalScrapeConfigsSelfScrapeKey = "self-scrape-config"
Expand Down Expand Up @@ -189,12 +190,33 @@ func newPrometheus(
}
return []monv1.EnableFeature{}
}(),
Volumes: []corev1.Volume{
{
Name: "thanos-tls-assets",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: assets.GRPCSecretName,
},
},
},
},
},
Retention: ms.Spec.Retention,
RuleSelector: prometheusSelector,
RuleNamespaceSelector: ms.Spec.NamespaceSelector,
Thanos: &monv1.ThanosSpec{
Image: ptr.To(thanosCfg.Image),
GRPCServerTLSConfig: &monv1.TLSConfig{
CAFile: "/etc/thanos/tls-assets/ca.crt",
CertFile: "/etc/thanos/tls-assets/prometheus-server.crt",
KeyFile: "/etc/thanos/tls-assets/prometheus-server.key",
},
VolumeMounts: []corev1.VolumeMount{
{
Name: "thanos-tls-assets",
MountPath: "/etc/thanos/tls-assets",
},
},
},
},
}
Expand Down
39 changes: 39 additions & 0 deletions pkg/controllers/monitoring/monitoring-stack/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,17 @@ import (
policyv1 "k8s.io/api/policy/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/builder"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/predicate"

stack "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
"github.com/rhobs/observability-operator/pkg/assets"
)

type resourceManager struct {
Expand Down Expand Up @@ -133,6 +136,42 @@ func RegisterWithManager(mgr ctrl.Manager, opts Options) error {
func (rm resourceManager) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
logger := rm.logger.WithValues("stack", req.NamespacedName)
logger.Info("Reconciling monitoring stack")

gRPCSecret := v1.Secret{
TypeMeta: metav1.TypeMeta{
APIVersion: v1.SchemeGroupVersion.String(),
Kind: "Secret",
},
ObjectMeta: metav1.ObjectMeta{
Name: assets.GRPCSecretName,
Namespace: req.Namespace,
},
Data: map[string][]byte{},
}
err := rm.k8sClient.Get(ctx,
types.NamespacedName{
Name: assets.GRPCSecretName,
Namespace: req.Namespace,
},
&gRPCSecret)
if client.IgnoreNotFound(err) != nil {
return ctrl.Result{}, err
}

rotate, err := assets.RotateGRPCSecret(&gRPCSecret, logger)
if err != nil {
return ctrl.Result{}, err
}
if rotate {
err = rm.k8sClient.Update(ctx, &gRPCSecret)
if errors.IsNotFound(err) {
err = rm.k8sClient.Create(ctx, &gRPCSecret)
}
if err != nil {
return ctrl.Result{}, err
}
}

ms, err := rm.getStack(ctx, req)
if err != nil {
// retry since some error has occured
Expand Down
22 changes: 22 additions & 0 deletions pkg/controllers/monitoring/thanos-querier/components.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"k8s.io/utils/ptr"

msoapi "github.com/rhobs/observability-operator/pkg/apis/monitoring/v1alpha1"
"github.com/rhobs/observability-operator/pkg/assets"
"github.com/rhobs/observability-operator/pkg/reconciler"
)

Expand All @@ -29,6 +30,11 @@ func newThanosQuerierDeployment(name string, spec *msoapi.ThanosQuerier, sidecar
"--log.format=logfmt",
"--query.replica-label=prometheus_replica",
"--query.auto-downsampling",
"--grpc-client-tls-secure",
"--grpc-client-server-name=prometheus-grpc",
"--grpc-client-tls-ca=/etc/thanos/tls-sidecar-assets/ca.crt",
"--grpc-client-tls-key=/etc/thanos/tls-sidecar-assets/thanos-querier-client.key",
"--grpc-client-tls-cert=/etc/thanos/tls-sidecar-assets/thanos-querier-client.crt",
}
for _, endpoint := range sidecarUrls {
args = append(args, fmt.Sprintf("--endpoint=%s", endpoint))
Expand Down Expand Up @@ -86,6 +92,12 @@ func newThanosQuerierDeployment(name string, spec *msoapi.ThanosQuerier, sidecar
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
VolumeMounts: []corev1.VolumeMount{
{
Name: "thanos-sidecar-tls-assets",
MountPath: "/etc/thanos/tls-sidecar-assets",
},
},
},
},
NodeSelector: map[string]string{
Expand All @@ -97,6 +109,16 @@ func newThanosQuerierDeployment(name string, spec *msoapi.ThanosQuerier, sidecar
Type: corev1.SeccompProfileTypeRuntimeDefault,
},
},
Volumes: []corev1.Volume{
{
Name: "thanos-sidecar-tls-assets",
VolumeSource: corev1.VolumeSource{
Secret: &corev1.SecretVolumeSource{
SecretName: assets.GRPCSecretName,
},
},
},
},
},
},
ProgressDeadlineSeconds: ptr.To(int32(300)),
Expand Down

0 comments on commit 2732d31

Please sign in to comment.