Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support prometheus metrics #73

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/container-storage-interface/spec v1.10.0
github.com/go-logr/logr v1.4.2
github.com/kubernetes-csi/csi-lib-utils v0.19.0
github.com/prometheus/client_golang v1.20.4
github.com/stretchr/testify v1.9.0
google.golang.org/grpc v1.66.2
k8s.io/apimachinery v0.31.1
Expand Down Expand Up @@ -42,6 +43,7 @@ require (
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/moby/sys/mountinfo v0.7.2 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
Expand All @@ -51,7 +53,6 @@ require (
github.com/opencontainers/runtime-spec v1.2.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.20.4 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.59.1 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
Expand Down
54 changes: 47 additions & 7 deletions manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
cminformers "github.com/cert-manager/cert-manager/pkg/client/informers/externalversions"
cmlisters "github.com/cert-manager/cert-manager/pkg/client/listers/certmanager/v1"
"github.com/go-logr/logr"
"github.com/prometheus/client_golang/prometheus"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
Expand All @@ -47,6 +48,7 @@ import (
internalapi "github.com/cert-manager/csi-lib/internal/api"
internalapiutil "github.com/cert-manager/csi-lib/internal/api/util"
"github.com/cert-manager/csi-lib/metadata"
"github.com/cert-manager/csi-lib/metrics"
"github.com/cert-manager/csi-lib/storage"
)

Expand Down Expand Up @@ -89,6 +91,9 @@ type Options struct {

// RenewalBackoffConfig configures the exponential backoff applied to certificate renewal failures.
RenewalBackoffConfig *wait.Backoff

// Metrics is used for exposing Prometheus metrics
Metrics *metrics.Metrics
}

// NewManager constructs a new manager used to manage volumes containing
Expand Down Expand Up @@ -126,6 +131,9 @@ func NewManager(opts Options) (*Manager, error) {
if opts.Log == nil {
return nil, errors.New("log must be set")
}
if opts.Metrics == nil {
opts.Metrics = metrics.New(opts.Log, prometheus.NewRegistry())
}
if opts.MetadataReader == nil {
return nil, errors.New("MetadataReader must be set")
}
Expand Down Expand Up @@ -241,6 +249,7 @@ func NewManager(opts Options) (*Manager, error) {
metadataReader: opts.MetadataReader,
clock: opts.Clock,
log: *opts.Log,
metrics: opts.Metrics,

generatePrivateKey: opts.GeneratePrivateKey,
generateRequest: opts.GenerateRequest,
Expand Down Expand Up @@ -375,6 +384,9 @@ type Manager struct {
// No thread safety is added around this field, and it MUST NOT be used for any implementation logic.
// It should not be used full-stop :).
doNotUse_CallOnEachIssue func()

// metrics is used to expose Prometheus
metrics *metrics.Metrics
}

// issue will step through the entire issuance flow for a volume.
Expand All @@ -387,6 +399,9 @@ func (m *Manager) issue(ctx context.Context, volumeID string) error {
log := m.log.WithValues("volume_id", volumeID)
log.Info("Processing issuance")

// Increase issue count
m.metrics.IncrementIssueCallCount(m.nodeNameHash, volumeID)

if err := m.cleanupStaleRequests(ctx, log, volumeID); err != nil {
return fmt.Errorf("cleaning up stale requests: %w", err)
}
Expand Down Expand Up @@ -594,7 +609,7 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad
// Calculate the default next issuance time.
// The implementation's writeKeypair function may override this value before
// writing to the storage layer.
renewalPoint, err := calculateNextIssuanceTime(req.Status.Certificate)
expiryPoint, renewalPoint, err := getExpiryAndDefaultNextIssuanceTime(req.Status.Certificate)
if err != nil {
return fmt.Errorf("calculating next issuance time: %w", err)
}
Expand All @@ -606,6 +621,10 @@ func (m *Manager) handleRequest(ctx context.Context, volumeID string, meta metad
}
log.V(2).Info("Wrote new keypair to storage")

// Update the request metrics.
// Using meta.NextIssuanceTime instead of renewalPoint here, in case writeKeypair overrides the value.
m.metrics.UpdateCertificateRequest(req, expiryPoint, *meta.NextIssuanceTime)

// We must explicitly delete the private key from the pending requests map so that the existing Completed
// request will not be re-used upon renewal.
// Without this, the renewal would pick up the existing issued certificate and re-issue, rather than requesting
Expand Down Expand Up @@ -657,6 +676,9 @@ func (m *Manager) cleanupStaleRequests(ctx context.Context, log logr.Logger, vol
}
}

// Remove the CertificateRequest from the metrics.
m.metrics.RemoveCertificateRequest(toDelete.Name, toDelete.Namespace)

log.Info("Deleted CertificateRequest resource", "name", toDelete.Name, "namespace", toDelete.Namespace)
}

Expand Down Expand Up @@ -756,6 +778,8 @@ func (m *Manager) ManageVolumeImmediate(ctx context.Context, volumeID string) (m
// If issuance fails, immediately return without retrying so the caller can decide
// how to proceed depending on the context this method was called within.
if err := m.issue(ctx, volumeID); err != nil {
// Increase issue error count
m.metrics.IncrementIssueErrorCount(m.nodeNameHash, volumeID)
return true, err
}
}
Expand Down Expand Up @@ -783,6 +807,8 @@ func (m *Manager) manageVolumeIfNotManaged(volumeID string) (managed bool) {
// construct a new channel used to stop management of the volume
stopCh := make(chan struct{})
m.managedVolumes[volumeID] = stopCh
// Increase managed volume count for this driver
m.metrics.IncrementManagedVolumeCount(m.nodeNameHash)

return true
}
Expand All @@ -800,6 +826,10 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) {
return false
}

// Increase managed certificate count for this driver.
// We assume each volume will have one certificate to be managed.
m.metrics.IncrementManagedCertificateCount(m.nodeNameHash)

// Create a context that will be cancelled when the stopCh is closed
ctx, cancel := context.WithCancel(context.Background())
go func() {
Expand Down Expand Up @@ -835,6 +865,8 @@ func (m *Manager) startRenewalRoutine(volumeID string) (started bool) {
defer issueCancel()
if err := m.issue(issueCtx, volumeID); err != nil {
log.Error(err, "Failed to issue certificate, retrying after applying exponential backoff")
// Increase issue error count
m.metrics.IncrementIssueErrorCount(m.nodeNameHash, volumeID)
return false, nil
}
return true, nil
Expand Down Expand Up @@ -874,6 +906,14 @@ func (m *Manager) UnmanageVolume(volumeID string) {
if stopCh, ok := m.managedVolumes[volumeID]; ok {
close(stopCh)
delete(m.managedVolumes, volumeID)
if reqs, err := m.listAllRequestsForVolume(volumeID); err == nil {
// Remove the CertificateRequest from the metrics with the best effort.
for _, req := range reqs {
if req != nil {
m.metrics.RemoveCertificateRequest(req.Name, req.Namespace)
}
}
}
}
}

Expand Down Expand Up @@ -919,19 +959,19 @@ func (m *Manager) Stop() {
}
}

// calculateNextIssuanceTime will return the default time at which the certificate
// should be renewed by the driver- 2/3rds through its lifetime (NotAfter -
// NotBefore).
func calculateNextIssuanceTime(chain []byte) (time.Time, error) {
// getExpiryAndDefaultNextIssuanceTime will return the certificate expiry time, together with
// default time at which the certificate should be renewed by the driver- 2/3rds through its
// lifetime (NotAfter - NotBefore).
func getExpiryAndDefaultNextIssuanceTime(chain []byte) (time.Time, time.Time, error) {
block, _ := pem.Decode(chain)
crt, err := x509.ParseCertificate(block.Bytes)
if err != nil {
return time.Time{}, fmt.Errorf("parsing issued certificate: %w", err)
return time.Time{}, time.Time{}, fmt.Errorf("parsing issued certificate: %w", err)
}

actualDuration := crt.NotAfter.Sub(crt.NotBefore)

renewBeforeNotAfter := actualDuration / 3

return crt.NotAfter.Add(-renewBeforeNotAfter), nil
return crt.NotAfter, crt.NotAfter.Add(-renewBeforeNotAfter), nil
}
17 changes: 10 additions & 7 deletions manager/manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ func TestManager_cleanupStaleRequests(t *testing.T) {
}
}

func Test_calculateNextIssuanceTime(t *testing.T) {
func Test_getExpiryAndDefaultNextIssuanceTime(t *testing.T) {
notBefore := time.Date(1970, time.January, 1, 0, 0, 0, 0, time.UTC)
notAfter := time.Date(1970, time.January, 4, 0, 0, 0, 0, time.UTC)
pk, err := rsa.GenerateKey(rand.Reader, 2048)
Expand All @@ -474,20 +474,23 @@ func Test_calculateNextIssuanceTime(t *testing.T) {
certPEM := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: derBytes})

tests := map[string]struct {
expTime time.Time
expErr bool
expTime time.Time
renewTime time.Time
expErr bool
}{
"if no attributes given, return 2/3rd certificate lifetime": {
expTime: notBefore.AddDate(0, 0, 2),
expErr: false,
expTime: notAfter,
renewTime: notBefore.AddDate(0, 0, 2),
expErr: false,
},
}

for name, test := range tests {
t.Run(name, func(t *testing.T) {
renewTime, err := calculateNextIssuanceTime(certPEM)
expTime, renewTime, err := getExpiryAndDefaultNextIssuanceTime(certPEM)
assert.Equal(t, test.expErr, err != nil)
assert.Equal(t, test.expTime, renewTime)
assert.Equal(t, test.expTime, expTime)
assert.Equal(t, test.renewTime, renewTime)
})
}
}
Expand Down
102 changes: 102 additions & 0 deletions metrics/certificaterequest.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/*
Copyright 2024 The cert-manager Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"time"

"github.com/prometheus/client_golang/prometheus"

cmapi "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1"
cmmeta "github.com/cert-manager/cert-manager/pkg/apis/meta/v1"
)

var readyConditionStatuses = [...]cmmeta.ConditionStatus{
cmmeta.ConditionTrue,
cmmeta.ConditionFalse,
cmmeta.ConditionUnknown,
}

// UpdateCertificateRequest will update the given CertificateRequest's metrics for its expiry, renewal, and status condition.
func (m *Metrics) UpdateCertificateRequest(cr *cmapi.CertificateRequest, exp, renewal time.Time) {
m.updateCertificateRequestExpiryAndRenewalTime(cr, exp, renewal)
m.updateCertificateRequestStatus(cr)
}

// updateCertificateRequestExpiryAndRenewalTime updates the expiry and renewal time of a certificate request
func (m *Metrics) updateCertificateRequestExpiryAndRenewalTime(cr *cmapi.CertificateRequest, exp, renewal time.Time) {
expiryTime := 0.0
if !exp.IsZero() {
expiryTime = float64(exp.Unix())
}
m.certificateRequestExpiryTimeSeconds.With(prometheus.Labels{
"name": cr.Name,
"namespace": cr.Namespace,
"issuer_name": cr.Spec.IssuerRef.Name,
"issuer_kind": cr.Spec.IssuerRef.Kind,
"issuer_group": cr.Spec.IssuerRef.Group}).Set(expiryTime)

renewalTime := 0.0
if !renewal.IsZero() {
renewalTime = float64(renewal.Unix())
}
m.certificateRequestRenewalTimeSeconds.With(prometheus.Labels{
"name": cr.Name,
"namespace": cr.Namespace,
"issuer_name": cr.Spec.IssuerRef.Name,
"issuer_kind": cr.Spec.IssuerRef.Kind,
"issuer_group": cr.Spec.IssuerRef.Group}).Set(renewalTime)
}

// updateCertificateRequestStatus will update the metric for that Certificate Request
func (m *Metrics) updateCertificateRequestStatus(cr *cmapi.CertificateRequest) {
for _, c := range cr.Status.Conditions {
if c.Type == cmapi.CertificateRequestConditionReady {
m.updateCertificateRequestReadyStatus(cr, c.Status)
return
}
}

// If no status condition set yet, set to Unknown
m.updateCertificateRequestReadyStatus(cr, cmmeta.ConditionUnknown)
}

func (m *Metrics) updateCertificateRequestReadyStatus(cr *cmapi.CertificateRequest, current cmmeta.ConditionStatus) {
for _, condition := range readyConditionStatuses {
value := 0.0

if current == condition {
value = 1.0
}

m.certificateRequestReadyStatus.With(prometheus.Labels{
"name": cr.Name,
"namespace": cr.Namespace,
"condition": string(condition),
"issuer_name": cr.Spec.IssuerRef.Name,
"issuer_kind": cr.Spec.IssuerRef.Kind,
"issuer_group": cr.Spec.IssuerRef.Group,
}).Set(value)
}
}

// RemoveCertificateRequest will delete the CertificateRequest metrics from continuing to be exposed.
func (m *Metrics) RemoveCertificateRequest(name, namespace string) {
m.certificateRequestExpiryTimeSeconds.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace})
m.certificateRequestRenewalTimeSeconds.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace})
m.certificateRequestReadyStatus.DeletePartialMatch(prometheus.Labels{"name": name, "namespace": namespace})
}
Loading