diff --git a/.github/workflows/CI-standalone.yaml b/.github/workflows/CI-standalone.yaml index adcc997e..0906690d 100644 --- a/.github/workflows/CI-standalone.yaml +++ b/.github/workflows/CI-standalone.yaml @@ -59,4 +59,4 @@ jobs: make deploy -e GIT_BRANCH=${{ env.GIT_BRANCH }} TAG=${{ env.GIT_BRANCH }}-${{ env.TAG }} ENV=standalone - name: Run E2E tests - run: LABEL_FILTER="Standalone,Webhook" ./hack/run-tests-on-cluster.sh + run: LABEL_FILTER="Metrics,Standalone,Webhook" ./hack/run-tests-on-cluster.sh diff --git a/cmd/main.go b/cmd/main.go index fd650295..df087173 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -42,6 +42,7 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log/zap" + "sigs.k8s.io/controller-runtime/pkg/metrics/filters" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/yaml" @@ -49,6 +50,7 @@ import ( kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1" workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" + "github.com/project-codeflare/appwrapper/internal/metrics" "github.com/project-codeflare/appwrapper/pkg/config" "github.com/project-codeflare/appwrapper/pkg/controller" "github.com/project-codeflare/appwrapper/pkg/logger" @@ -123,15 +125,19 @@ func main() { tlsOpts = append(tlsOpts, disableHTTP2) } + metrics.Register() + mgr, err := ctrl.NewManager(k8sConfig, ctrl.Options{ Scheme: scheme, Metrics: metricsserver.Options{ - BindAddress: cfg.ControllerManager.Metrics.BindAddress, - SecureServing: cfg.ControllerManager.Metrics.SecureServing, - TLSOpts: tlsOpts, + BindAddress: cfg.ControllerManager.Metrics.BindAddress, + FilterProvider: filters.WithAuthenticationAndAuthorization, + SecureServing: true, + TLSOpts: tlsOpts, }, WebhookServer: webhook.NewServer(webhook.Options{ TLSOpts: tlsOpts, + Port: 9443, }), HealthProbeBindAddress: cfg.ControllerManager.Health.BindAddress, LeaderElection: cfg.ControllerManager.LeaderElection, diff --git a/config/default/config.yaml b/config/default/config.yaml index 7811e480..ba203c74 100644 --- a/config/default/config.yaml +++ b/config/default/config.yaml @@ -10,5 +10,5 @@ data: health: bindAddress: ":8081" metrics: - bindAddress: "127.0.0.1:8080" + bindAddress: ":8443" leaderElection: true diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 2fee1589..54c70fd5 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -8,30 +8,30 @@ namespace: appwrapper-system # field above. namePrefix: appwrapper- -# Labels to add to all resources and selectors. -#labels: -#- includeSelectors: true -# pairs: -# someName: someValue +labels: +- pairs: + app.kubernetes.io/name: appwrapper + app.kubernetes.io/component: controller + includeTemplates: true +- pairs: + control-plane: controller-manager + includeSelectors: true resources: - config.yaml - ../crd - ../rbac - ../manager +- ../internalcert # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in # crd/kustomization.yaml - ../webhook -- ../internalcert # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. #- ../prometheus +# [METRICS] Expose the controller manager metrics service. +- metrics_service.yaml patches: -# Protect the /metrics endpoint by putting it behind auth. -# If you want your controller-manager to expose the /metrics -# endpoint w/o any authn/z, please comment the following line. -- path: manager_auth_proxy_patch.yaml - # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in # crd/kustomization.yaml - path: manager_webhook_patch.yaml diff --git a/config/default/manager_auth_proxy_patch.yaml b/config/default/manager_auth_proxy_patch.yaml deleted file mode 100644 index 6f391305..00000000 --- a/config/default/manager_auth_proxy_patch.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# This patch inject a sidecar container which is a HTTP proxy for the -# controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews. -apiVersion: apps/v1 -kind: Deployment -metadata: - name: controller-manager - namespace: system -spec: - template: - spec: - containers: - - name: kube-rbac-proxy - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - "ALL" - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0 - args: - - "--secure-listen-address=0.0.0.0:8443" - - "--upstream=http://127.0.0.1:8080/" - - "--logtostderr=true" - - "--v=0" - ports: - - containerPort: 8443 - protocol: TCP - name: https - resources: - limits: - cpu: 500m - memory: 128Mi - requests: - cpu: 5m - memory: 64Mi diff --git a/config/default/metrics_service.yaml b/config/default/metrics_service.yaml new file mode 100644 index 00000000..dc7548c8 --- /dev/null +++ b/config/default/metrics_service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + labels: + control-plane: controller-manager + app.kubernetes.io/name: appwrapper + name: controller-manager-metrics-service + namespace: system +spec: + ports: + - name: https + port: 8443 + protocol: TCP + targetPort: 8443 + selector: + control-plane: controller-manager diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 549f5e4a..58dd79a8 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -1,8 +1,6 @@ apiVersion: v1 kind: Namespace metadata: - labels: - control-plane: controller-manager name: system --- apiVersion: apps/v1 @@ -10,19 +8,12 @@ kind: Deployment metadata: name: controller-manager namespace: system - labels: - control-plane: controller-manager spec: - selector: - matchLabels: - control-plane: controller-manager replicas: 1 template: metadata: annotations: kubectl.kubernetes.io/default-container: manager - labels: - control-plane: controller-manager spec: affinity: nodeAffinity: @@ -47,6 +38,8 @@ spec: containers: - command: - /manager + args: + - "--zap-log-level=2" image: controller:latest name: manager securityContext: @@ -68,10 +61,10 @@ spec: periodSeconds: 10 resources: limits: - cpu: 500m + cpu: "2" memory: 128Mi requests: - cpu: 10m + cpu: 100m memory: 64Mi serviceAccountName: controller-manager terminationGracePeriodSeconds: 10 diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index 12187025..c14c8e6f 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -12,10 +12,13 @@ resources: - user_role.yaml - editor_role.yaml - viewer_role.yaml -# Comment the following 4 lines if you want to disable -# the auth proxy (https://github.com/brancz/kube-rbac-proxy) -# which protects your /metrics endpoint. -- auth_proxy_service.yaml -- auth_proxy_role.yaml -- auth_proxy_role_binding.yaml -- auth_proxy_client_clusterrole.yaml + +# The following RBAC configurations are used to protect +# the metrics endpoint with authn/authz. These configurations +# ensure that only authorized users and service accounts +# can access the metrics endpoint. Comment the following +# permissions if you want to disable this protection. +# More info: https://book.kubebuilder.io/reference/metrics.html +- metrics_auth_role.yaml +- metrics_auth_role_binding.yaml +- metrics_reader_role.yaml diff --git a/config/rbac/auth_proxy_role.yaml b/config/rbac/metrics_auth_role.yaml similarity index 90% rename from config/rbac/auth_proxy_role.yaml rename to config/rbac/metrics_auth_role.yaml index 80e1857c..32d2e4ec 100644 --- a/config/rbac/auth_proxy_role.yaml +++ b/config/rbac/metrics_auth_role.yaml @@ -1,7 +1,7 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: proxy-role + name: metrics-auth-role rules: - apiGroups: - authentication.k8s.io diff --git a/config/rbac/auth_proxy_role_binding.yaml b/config/rbac/metrics_auth_role_binding.yaml similarity index 79% rename from config/rbac/auth_proxy_role_binding.yaml rename to config/rbac/metrics_auth_role_binding.yaml index ec7acc0a..e775d67f 100644 --- a/config/rbac/auth_proxy_role_binding.yaml +++ b/config/rbac/metrics_auth_role_binding.yaml @@ -1,11 +1,11 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - name: proxy-rolebinding + name: metrics-auth-rolebinding roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: proxy-role + name: metrics-auth-role subjects: - kind: ServiceAccount name: controller-manager diff --git a/config/rbac/auth_proxy_client_clusterrole.yaml b/config/rbac/metrics_reader_role.yaml similarity index 100% rename from config/rbac/auth_proxy_client_clusterrole.yaml rename to config/rbac/metrics_reader_role.yaml diff --git a/config/standalone/config.yaml b/config/standalone/config.yaml index e8916c96..ec00bae7 100644 --- a/config/standalone/config.yaml +++ b/config/standalone/config.yaml @@ -10,5 +10,5 @@ data: health: bindAddress: ":8081" metrics: - bindAddress: "127.0.0.1:8080" + bindAddress: ":8443" leaderElection: true diff --git a/config/standalone/kustomization.yaml b/config/standalone/kustomization.yaml index 4326f887..a10ffbb2 100644 --- a/config/standalone/kustomization.yaml +++ b/config/standalone/kustomization.yaml @@ -8,30 +8,30 @@ namespace: appwrapper-system # field above. namePrefix: appwrapper- -# Labels to add to all resources and selectors. -#labels: -#- includeSelectors: true -# pairs: -# someName: someValue +labels: +- pairs: + app.kubernetes.io/name: appwrapper + app.kubernetes.io/component: controller + includeTemplates: true +- pairs: + control-plane: controller-manager + includeSelectors: true resources: - config.yaml - ../crd - ../rbac - ../manager +- ../internalcert # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in # crd/kustomization.yaml - ../webhook -- ../internalcert # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. #- ../prometheus +# [METRICS] Expose the controller manager metrics service. +- metrics_service.yaml patches: -# Protect the /metrics endpoint by putting it behind auth. -# If you want your controller-manager to expose the /metrics -# endpoint w/o any authn/z, please comment the following line. -- path: manager_auth_proxy_patch.yaml - # [WEBHOOK] To enable webhook, uncomment all the sections with [WEBHOOK] prefix including the one in # crd/kustomization.yaml - path: manager_webhook_patch.yaml diff --git a/config/standalone/manager_auth_proxy_patch.yaml b/config/standalone/manager_auth_proxy_patch.yaml deleted file mode 100644 index 6f391305..00000000 --- a/config/standalone/manager_auth_proxy_patch.yaml +++ /dev/null @@ -1,34 +0,0 @@ -# This patch inject a sidecar container which is a HTTP proxy for the -# controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews. -apiVersion: apps/v1 -kind: Deployment -metadata: - name: controller-manager - namespace: system -spec: - template: - spec: - containers: - - name: kube-rbac-proxy - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - "ALL" - image: gcr.io/kubebuilder/kube-rbac-proxy:v0.15.0 - args: - - "--secure-listen-address=0.0.0.0:8443" - - "--upstream=http://127.0.0.1:8080/" - - "--logtostderr=true" - - "--v=0" - ports: - - containerPort: 8443 - protocol: TCP - name: https - resources: - limits: - cpu: 500m - memory: 128Mi - requests: - cpu: 5m - memory: 64Mi diff --git a/config/rbac/auth_proxy_service.yaml b/config/standalone/metrics_service.yaml similarity index 90% rename from config/rbac/auth_proxy_service.yaml rename to config/standalone/metrics_service.yaml index a4f0a0a8..728bb353 100644 --- a/config/rbac/auth_proxy_service.yaml +++ b/config/standalone/metrics_service.yaml @@ -8,6 +8,6 @@ spec: - name: https port: 8443 protocol: TCP - targetPort: https + targetPort: 8443 selector: control-plane: controller-manager diff --git a/go.mod b/go.mod index 11e148aa..e1768a23 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/onsi/ginkgo/v2 v2.22.0 github.com/onsi/gomega v1.36.1 github.com/open-policy-agent/cert-controller v0.12.0 + github.com/prometheus/client_golang v1.20.5 go.uber.org/zap v1.27.0 k8s.io/api v0.31.4 k8s.io/apimachinery v0.31.4 @@ -22,15 +23,21 @@ require ( ) require ( + github.com/antlr4-go/antlr/v4 v4.13.0 // indirect + github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/blang/semver/v4 v4.0.0 // indirect + github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/emicklei/go-restful/v3 v3.12.1 // indirect github.com/evanphx/json-patch/v5 v5.9.0 // indirect github.com/fatih/color v1.18.0 // indirect + github.com/felixge/httpsnoop v1.0.4 // indirect + github.com/fsnotify/fsnotify v1.8.0 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/go-errors/errors v1.4.2 // indirect + github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/zapr v1.3.0 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect @@ -40,12 +47,14 @@ require ( github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect + github.com/google/cel-go v0.20.1 // indirect github.com/google/gnostic-models v0.6.8 // indirect github.com/google/go-cmp v0.6.0 // indirect github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect github.com/google/uuid v1.6.0 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -62,15 +71,23 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/opencontainers/go-digest v1.0.0 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/prometheus/client_golang v1.20.5 // indirect github.com/prometheus/client_model v0.6.1 // indirect github.com/prometheus/common v0.57.0 // indirect github.com/prometheus/procfs v0.15.1 // indirect github.com/sirupsen/logrus v1.9.3 // indirect github.com/spf13/cobra v1.8.1 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/stoewer/go-strcase v1.3.0 // indirect github.com/x448/float16 v0.8.4 // indirect github.com/xlab/treeprint v1.2.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect + go.opentelemetry.io/otel v1.28.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 // indirect + go.opentelemetry.io/otel/metric v1.28.0 // indirect + go.opentelemetry.io/otel/sdk v1.28.0 // indirect + go.opentelemetry.io/otel/trace v1.28.0 // indirect + go.opentelemetry.io/proto/otlp v1.3.1 // indirect go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect @@ -84,6 +101,9 @@ require ( golang.org/x/time v0.6.0 // indirect golang.org/x/tools v0.26.0 // indirect gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect + google.golang.org/grpc v1.65.0 // indirect google.golang.org/protobuf v1.35.1 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect @@ -95,6 +115,7 @@ require ( k8s.io/component-helpers v0.31.4 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20240812233141-91dab695df6f // indirect + sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 // indirect sigs.k8s.io/jobset v0.7.1 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/kustomize/api v0.18.0 // indirect diff --git a/go.sum b/go.sum index 279302d1..69c16c6f 100644 --- a/go.sum +++ b/go.sum @@ -1,7 +1,13 @@ +github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= +github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= +github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= +github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= +github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8= +github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= @@ -19,14 +25,19 @@ github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0 github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= +github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg= +github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M= github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= github.com/go-logr/zapr v1.3.0/go.mod h1:YKepepNBd1u/oyhd/yQmtjVXmm9uML4IXUgMOwR8/Gg= github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= @@ -47,6 +58,8 @@ github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/golangci/golangci-lint v1.60.1 h1:DRKNqNTQRLBJZ1il5u4fvgLQCjQc7QFs0DbhksJtVJE= github.com/golangci/golangci-lint v1.60.1/go.mod h1:jDIPN1rYaIA+ijp9OZcUmUCoQOtZ76pOlFbi15FlLJY= +github.com/google/cel-go v0.20.1 h1:nDx9r8S3L4pE61eDdt8igGj8rf5kjYR3ILxWIpWNi84= +github.com/google/cel-go v0.20.1/go.mod h1:kWcIzTsPX0zmQ+H3TirHstLLf9ep5QTsZBN9u4dOYLg= github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I= github.com/google/gnostic-models v0.6.8/go.mod h1:5n7qKqH0f5wFt+aWF8CW6pZLLNOfYuF5OpfBSENuI8U= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= @@ -61,6 +74,8 @@ github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaU github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -139,6 +154,8 @@ github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stoewer/go-strcase v1.3.0 h1:g0eASXYtp+yvN9fK8sH94oCIk0fau9uV1/ZdJ0AVEzs= +github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8wodgtPmh1xo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -157,6 +174,22 @@ github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ= github.com/xlab/treeprint v1.2.0/go.mod h1:gj5Gd3gPdKtR1ikdDK6fnFLdmIS0X30kTTuNd/WEJu0= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 h1:4K4tsIXefpVJtvA/8srF4V4y0akAoPHkIslgAkjixJA= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0/go.mod h1:jjdQuTGVsXV4vSs+CJ2qYDeDPf9yIJV23qlIzBm73Vg= +go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= +go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 h1:3Q/xZUyC1BBkualc9ROb4G8qkH90LXEIICcs5zv1OYY= +go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0/go.mod h1:s75jGIWA9OfCMzF0xr+ZgfrB5FEbbV7UuYo32ahUiFI= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 h1:qFffATk0X+HD+f1Z8lswGiOQYKHRlzfmdJm0wEaVrFA= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0/go.mod h1:MOiCmryaYtc+V0Ei+Tx9o5S1ZjA7kzLucuVuyzBZloQ= +go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= +go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= +go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE= +go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg= +go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= +go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= +go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/atomic v1.11.0 h1:ZvwS0R+56ePWxUNi+Atn9dWONBPp/AUETXlHW0DxSjE= go.uber.org/atomic v1.11.0/go.mod h1:LUxbIzbOniOlMKjJjyPfpl4v+PKK2cNJn91OQbhoJI0= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= @@ -215,6 +248,12 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gomodules.xyz/jsonpatch/v2 v2.4.0 h1:Ci3iUJyx9UeRx7CeFN8ARgGbkESwJK+KB9lLcWxY/Zw= gomodules.xyz/jsonpatch/v2 v2.4.0/go.mod h1:AH3dM2RI6uoBZxn3LVrfvJ3E0/9dG4cSrbuBJT4moAY= +google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 h1:7whR9kGa5LUwFtpLm2ArCEejtnxlGeLbAyjFY8sGNFw= +google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157/go.mod h1:99sLkeliLXfdj2J75X3Ho+rrVCaJze0uwN7zDDkjPVU= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 h1:BwIjyKYGsK9dMCBOorzRri8MQwmi7mT9rGHsCEinZkA= +google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094/go.mod h1:Ue6ibwXGpU+dqIcODieyLOcgj7z8+IcskoNIgZxtrFY= +google.golang.org/grpc v1.65.0 h1:bs/cUb4lp1G5iImFFd3u5ixQzweKizoZJAwBNLR42lc= +google.golang.org/grpc v1.65.0/go.mod h1:WgYC2ypjlB0EiQi6wdKixMqukr6lBc0Vo+oOgjrM5ZQ= google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -253,6 +292,8 @@ k8s.io/kube-openapi v0.0.0-20240812233141-91dab695df6f h1:bnWtxXWdAl5bVOCEPoNdvM k8s.io/kube-openapi v0.0.0-20240812233141-91dab695df6f/go.mod h1:G0W3eI9gG219NHRq3h5uQaRBl4pj4ZpwzRP5ti8y770= k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 h1:2770sDpzrjjsAtVhSeUFseziht227YAWYHLGNM8QPwY= +sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw= sigs.k8s.io/controller-runtime v0.19.3 h1:XO2GvC9OPftRst6xWCpTgBZO04S2cbp0Qqkj8bX1sPw= sigs.k8s.io/controller-runtime v0.19.3/go.mod h1:j4j87DqtsThvwTv5/Tc5NFRyyF/RF0ip4+62tbTSIUM= sigs.k8s.io/controller-tools v0.16.5 h1:5k9FNRqziBPwqr17AMEPPV/En39ZBplLAdOwwQHruP4= diff --git a/hack/e2e-util.sh b/hack/e2e-util.sh index aa2f8149..691a6765 100755 --- a/hack/e2e-util.sh +++ b/hack/e2e-util.sh @@ -33,6 +33,7 @@ export IMAGE_KUBERAY_OPERATOR="quay.io/kuberay/operator:v1.1.1" # Pull and kind load to avoid long delays during testing export IMAGE_ECHOSERVER="quay.io/project-codeflare/echo-server:1.0" export IMAGE_BUSY_BOX_LATEST="quay.io/project-codeflare/busybox:latest" +export IMAGE_CURL="quay.io/curl/curl:8.11.1" function update_test_host { @@ -141,7 +142,7 @@ function check_prerequisites { } function pull_images { - for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_KUBEFLOW_OPERATOR} ${IMAGE_KUBERAY_OPERATOR} + for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_CURL} ${IMAGE_KUBEFLOW_OPERATOR} ${IMAGE_KUBERAY_OPERATOR} do docker pull $image if [ $? -ne 0 ] @@ -237,7 +238,7 @@ function kind_up_cluster { } function kind_load_images { - for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_KUBEFLOW_OPERATOR} ${IMAGE_KUBERAY_OPERATOR} + for image in ${IMAGE_ECHOSERVER} ${IMAGE_BUSY_BOX_LATEST} ${IMAGE_CURL} ${IMAGE_KUBEFLOW_OPERATOR} ${IMAGE_KUBERAY_OPERATOR} do kind load docker-image ${image} ${CLUSTER_CONTEXT} if [ $? -ne 0 ] diff --git a/hack/run-tests-on-cluster.sh b/hack/run-tests-on-cluster.sh index 7f0e9b14..912ebca3 100755 --- a/hack/run-tests-on-cluster.sh +++ b/hack/run-tests-on-cluster.sh @@ -19,7 +19,7 @@ export GORACE=1 export CLEANUP_CLUSTER=${CLEANUP_CLUSTER:-"false"} export CLUSTER_STARTED="true" export KUTTL_TEST_SUITES=("") -export LABEL_FILTER=${LABEL_FILTER:-"Kueue,Webhook"} +export LABEL_FILTER=${LABEL_FILTER:-"Kueue,Webhook,Metrics"} source ${ROOT_DIR}/hack/e2e-util.sh diff --git a/internal/controller/appwrapper/appwrapper_controller.go b/internal/controller/appwrapper/appwrapper_controller.go index edbb6038..7e3d0052 100644 --- a/internal/controller/appwrapper/appwrapper_controller.go +++ b/internal/controller/appwrapper/appwrapper_controller.go @@ -45,6 +45,7 @@ import ( workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2" wlc "github.com/project-codeflare/appwrapper/internal/controller/workload" + "github.com/project-codeflare/appwrapper/internal/metrics" "github.com/project-codeflare/appwrapper/pkg/config" "github.com/project-codeflare/appwrapper/pkg/utils" ) @@ -528,6 +529,7 @@ func (r *AppWrapperReconciler) transitionToPhase(ctx context.Context, orig *work return err } log.FromContext(ctx).Info(string(phase), "phase", phase) + metrics.AppWrapperPhaseCounter.WithLabelValues(orig.Namespace, string(phase)).Inc() return nil } diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go new file mode 100644 index 00000000..caacf1b8 --- /dev/null +++ b/internal/metrics/metrics.go @@ -0,0 +1,35 @@ +/* +Copyright 2024 IBM Corporation. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +var ( + AppWrapperPhaseCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "appwrapper_phase_total", + Help: `The total number of times an appwrapper transitioned to a given phase per namespace.`, + }, []string{"namespace", "phase"}, + ) +) + +func Register() { + metrics.Registry.MustRegister(AppWrapperPhaseCounter) +} diff --git a/pkg/config/config.go b/pkg/config/config.go index c2dbfcc1..b8f3be54 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -86,8 +86,7 @@ type ControllerManagerConfig struct { } type MetricsConfiguration struct { - BindAddress string `json:"bindAddress,omitempty"` - SecureServing bool `json:"secureServing,omitempty"` + BindAddress string `json:"bindAddress,omitempty"` } type HealthConfiguration struct { @@ -186,8 +185,7 @@ func NewCertManagementConfig(namespace string) *CertManagementConfig { func NewControllerManagerConfig() *ControllerManagerConfig { return &ControllerManagerConfig{ Metrics: MetricsConfiguration{ - BindAddress: ":8080", - SecureServing: false, + BindAddress: ":8443", }, Health: HealthConfiguration{ BindAddress: ":8081", diff --git a/test/e2e/metrics_test.go b/test/e2e/metrics_test.go new file mode 100644 index 00000000..9ba249ea --- /dev/null +++ b/test/e2e/metrics_test.go @@ -0,0 +1,112 @@ +/* +Copyright 2024 IBM Corporation. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "fmt" + "os/exec" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + corev1 "k8s.io/api/core/v1" + rbacv1 "k8s.io/api/rbac/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const ( + managerNamespace = "appwrapper-system" + serviceAccountName = "appwrapper-controller-manager" + metricsReaderClusterRoleName = "appwrapper-metrics-reader" + metricsServiceName = "appwrapper-controller-manager-metrics-service" +) + +var _ = Describe("Metrics", Label("Metrics"), func() { + It("should ensure the metrics endpoint is serving metrics", Label("Metrics"), func() { + By("Creating a ClusterRoleBinding for the service account to allow access to metrics") + metricsReaderClusterRoleBinding := &rbacv1.ClusterRoleBinding{ + ObjectMeta: metav1.ObjectMeta{Name: "e2e-test-aw-metrics-reader-crb"}, + Subjects: []rbacv1.Subject{ + { + Kind: "ServiceAccount", + Name: serviceAccountName, + Namespace: managerNamespace, + }, + }, + RoleRef: rbacv1.RoleRef{ + APIGroup: rbacv1.GroupName, + Kind: "ClusterRole", + Name: metricsReaderClusterRoleName, + }, + } + Expect(getClient(ctx).Create(ctx, metricsReaderClusterRoleBinding)).Should(Succeed()) + DeferCleanup(func() { + By("Deleting the ClusterRoleBinding", func() { + Expect(getClient(ctx).Delete(ctx, metricsReaderClusterRoleBinding)).To(Succeed()) + }) + }) + + By("Creating the curl-metrics pod using a service account that can access the metrics endpoint") + pod := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: "curl-metrics", Namespace: managerNamespace}, + Spec: corev1.PodSpec{ + ServiceAccountName: serviceAccountName, + Containers: []corev1.Container{{ + Name: "curl", + Image: "quay.io/curl/curl:8.11.1", + Command: []string{"sleep", "3600"}, + }}, + }, + } + Expect(getClient(ctx).Create(ctx, pod)).Should(Succeed()) + DeferCleanup(func() { + By("Deleting the pod", func() { + Expect(getClient(ctx).Delete(ctx, pod)).Should(Succeed()) + }) + }) + + By("Waiting for the curl-metrics pod to be running.", func() { + Eventually(func(g Gomega) { + createdPod := &corev1.Pod{} + g.Expect(getClient(ctx).Get(ctx, client.ObjectKeyFromObject(pod), createdPod)).To(Succeed()) + g.Expect(createdPod.Status.Phase).To(Equal(corev1.PodRunning)) + }, 60*time.Second).Should(Succeed()) + }) + + metrics := []string{ + "controller_runtime_reconcile_total", + } + + By("Getting the metrics via curl", func() { + Eventually(func(g Gomega) { + cmd := exec.Command("kubectl", "exec", "-n", managerNamespace, "curl-metrics", "--", "/bin/sh", "-c", + fmt.Sprintf( + "curl -v -k -H \"Authorization: Bearer $(cat /var/run/secrets/kubernetes.io/serviceaccount/token)\" https://%s.%s.svc.cluster.local:8443/metrics ", + metricsServiceName, managerNamespace, + ), + ) + metricsOutput, err := cmd.CombinedOutput() + g.Expect(err).NotTo(HaveOccurred()) + for _, metric := range metrics { + g.Expect(string(metricsOutput)).To(ContainSubstring(metric)) + } + }, 3600*time.Second).Should(Succeed()) + }) + }) +})