Skip to content

Commit

Permalink
enable Prometheus, gather CSI call counts + PMEM stats
Browse files Browse the repository at this point in the history
The Prometheus integration uses the approach from
helm/charts#22899:
- HTTP for metrics endpoints
- container ports tell Prometheus which containers to scrape
  and how

CSI call counts are the same as in the sidecars. This enables
correlating statistics and ensures that also node-local operations are
captured; kubelet doesn't seem to be instrumented.

Internal communication is instrumented the same way.

PMEM usage statistics are recorded by querying the active device
manager each time the metrics data gets scraped.

The metrics support is enabled unconditionally in the operator and all
pre-generated deployment files and use plain HTTP for the sake of
simplicity. This is based on the rationale that the data itself is
not sensitive and should always be readily available if desired.
  • Loading branch information
pohly committed Sep 10, 2020
1 parent 2d7facd commit 8c34c9f
Show file tree
Hide file tree
Showing 64 changed files with 1,193 additions and 213 deletions.
32 changes: 16 additions & 16 deletions deploy/bindata_generated.go

Large diffs are not rendered by default.

17 changes: 16 additions & 1 deletion deploy/kubernetes-1.17/direct/pmem-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ spec:
serviceName: pmem-csi-controller
template:
metadata:
annotations:
pmem-csi.intel.com/scrape: containers
labels:
app: pmem-csi-controller
pmem-csi.intel.com/deployment: direct-production
Expand All @@ -246,12 +248,12 @@ spec:
- -mode=controller
- -endpoint=unix:///csi/csi-controller.sock
- -registryEndpoint=tcp://0.0.0.0:10000
- -metricsListen=:10010
- -nodeid=$(KUBE_NODE_NAME)
- -caFile=/certs/ca.crt
- -certFile=/certs/tls.crt
- -keyFile=/certs/tls.key
- -drivername=$(PMEM_CSI_DRIVER_NAME)
- -metricsListen=:10010
env:
- name: KUBE_NODE_NAME
valueFrom:
Expand All @@ -265,6 +267,9 @@ spec:
image: intel/pmem-csi-driver:canary
imagePullPolicy: IfNotPresent
name: pmem-driver
ports:
- containerPort: 10010
name: metrics
securityContext:
readOnlyRootFilesystem: true
terminationMessagePath: /tmp/termination-log
Expand All @@ -281,9 +286,13 @@ spec:
- --feature-gates=Topology=true
- --strict-topology=true
- --timeout=5m
- --metrics-address=:10011
image: quay.io/k8scsi/csi-provisioner:v2.0.0
imagePullPolicy: IfNotPresent
name: external-provisioner
ports:
- containerPort: 10011
name: metrics
securityContext:
readOnlyRootFilesystem: true
volumeMounts:
Expand Down Expand Up @@ -316,6 +325,8 @@ spec:
pmem-csi.intel.com/deployment: direct-production
template:
metadata:
annotations:
pmem-csi.intel.com/scrape: containers
labels:
app: pmem-csi-node
pmem-csi.intel.com/deployment: direct-production
Expand All @@ -337,6 +348,7 @@ spec:
- -statePath=/var/lib/$(PMEM_CSI_DRIVER_NAME)
- -drivername=$(PMEM_CSI_DRIVER_NAME)
- -pmemPercentage=100
- -metricsListen=:10010
env:
- name: KUBE_NODE_NAME
valueFrom:
Expand All @@ -355,6 +367,9 @@ spec:
image: intel/pmem-csi-driver:canary
imagePullPolicy: IfNotPresent
name: pmem-driver
ports:
- containerPort: 10010
name: metrics
securityContext:
privileged: true
runAsUser: 0
Expand Down
17 changes: 16 additions & 1 deletion deploy/kubernetes-1.17/direct/testing/pmem-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,8 @@ spec:
serviceName: pmem-csi-controller
template:
metadata:
annotations:
pmem-csi.intel.com/scrape: containers
labels:
app: pmem-csi-controller
pmem-csi.intel.com/deployment: direct-testing
Expand All @@ -261,12 +263,12 @@ spec:
- -mode=controller
- -endpoint=unix:///csi/csi-controller.sock
- -registryEndpoint=tcp://0.0.0.0:10000
- -metricsListen=:10010
- -nodeid=$(KUBE_NODE_NAME)
- -caFile=/certs/ca.crt
- -certFile=/certs/tls.crt
- -keyFile=/certs/tls.key
- -drivername=$(PMEM_CSI_DRIVER_NAME)
- -metricsListen=:10010
- -v=5
- -testEndpoint
- -coverprofile=/var/lib/pmem-csi-coverage/pmem-csi-driver-controller-*.out
Expand All @@ -283,6 +285,9 @@ spec:
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
name: pmem-driver
ports:
- containerPort: 10010
name: metrics
securityContext:
readOnlyRootFilesystem: true
terminationMessagePath: /tmp/termination-log
Expand All @@ -301,10 +306,14 @@ spec:
- --feature-gates=Topology=true
- --strict-topology=true
- --timeout=5m
- --metrics-address=:10011
- -v=5
image: quay.io/k8scsi/csi-provisioner:v2.0.0
imagePullPolicy: IfNotPresent
name: external-provisioner
ports:
- containerPort: 10011
name: metrics
securityContext:
readOnlyRootFilesystem: true
volumeMounts:
Expand Down Expand Up @@ -364,6 +373,8 @@ spec:
pmem-csi.intel.com/deployment: direct-testing
template:
metadata:
annotations:
pmem-csi.intel.com/scrape: containers
labels:
app: pmem-csi-node
pmem-csi.intel.com/deployment: direct-testing
Expand All @@ -385,6 +396,7 @@ spec:
- -statePath=/var/lib/$(PMEM_CSI_DRIVER_NAME)
- -drivername=$(PMEM_CSI_DRIVER_NAME)
- -pmemPercentage=100
- -metricsListen=:10010
- -v=5
- -testEndpoint
- -coverprofile=/var/lib/pmem-csi-coverage/pmem-csi-driver-node-*.out
Expand All @@ -406,6 +418,9 @@ spec:
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
name: pmem-driver
ports:
- containerPort: 10010
name: metrics
securityContext:
privileged: true
runAsUser: 0
Expand Down
17 changes: 16 additions & 1 deletion deploy/kubernetes-1.17/lvm/pmem-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ spec:
serviceName: pmem-csi-controller
template:
metadata:
annotations:
pmem-csi.intel.com/scrape: containers
labels:
app: pmem-csi-controller
pmem-csi.intel.com/deployment: lvm-production
Expand All @@ -246,12 +248,12 @@ spec:
- -mode=controller
- -endpoint=unix:///csi/csi-controller.sock
- -registryEndpoint=tcp://0.0.0.0:10000
- -metricsListen=:10010
- -nodeid=$(KUBE_NODE_NAME)
- -caFile=/certs/ca.crt
- -certFile=/certs/tls.crt
- -keyFile=/certs/tls.key
- -drivername=$(PMEM_CSI_DRIVER_NAME)
- -metricsListen=:10010
env:
- name: KUBE_NODE_NAME
valueFrom:
Expand All @@ -265,6 +267,9 @@ spec:
image: intel/pmem-csi-driver:canary
imagePullPolicy: IfNotPresent
name: pmem-driver
ports:
- containerPort: 10010
name: metrics
securityContext:
readOnlyRootFilesystem: true
terminationMessagePath: /tmp/termination-log
Expand All @@ -281,9 +286,13 @@ spec:
- --feature-gates=Topology=true
- --strict-topology=true
- --timeout=5m
- --metrics-address=:10011
image: quay.io/k8scsi/csi-provisioner:v2.0.0
imagePullPolicy: IfNotPresent
name: external-provisioner
ports:
- containerPort: 10011
name: metrics
securityContext:
readOnlyRootFilesystem: true
volumeMounts:
Expand Down Expand Up @@ -316,6 +325,8 @@ spec:
pmem-csi.intel.com/deployment: lvm-production
template:
metadata:
annotations:
pmem-csi.intel.com/scrape: containers
labels:
app: pmem-csi-node
pmem-csi.intel.com/deployment: lvm-production
Expand All @@ -337,6 +348,7 @@ spec:
- -statePath=/var/lib/$(PMEM_CSI_DRIVER_NAME)
- -drivername=$(PMEM_CSI_DRIVER_NAME)
- -pmemPercentage=100
- -metricsListen=:10010
env:
- name: KUBE_NODE_NAME
valueFrom:
Expand All @@ -355,6 +367,9 @@ spec:
image: intel/pmem-csi-driver:canary
imagePullPolicy: IfNotPresent
name: pmem-driver
ports:
- containerPort: 10010
name: metrics
securityContext:
privileged: true
runAsUser: 0
Expand Down
17 changes: 16 additions & 1 deletion deploy/kubernetes-1.17/lvm/testing/pmem-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,8 @@ spec:
serviceName: pmem-csi-controller
template:
metadata:
annotations:
pmem-csi.intel.com/scrape: containers
labels:
app: pmem-csi-controller
pmem-csi.intel.com/deployment: lvm-testing
Expand All @@ -261,12 +263,12 @@ spec:
- -mode=controller
- -endpoint=unix:///csi/csi-controller.sock
- -registryEndpoint=tcp://0.0.0.0:10000
- -metricsListen=:10010
- -nodeid=$(KUBE_NODE_NAME)
- -caFile=/certs/ca.crt
- -certFile=/certs/tls.crt
- -keyFile=/certs/tls.key
- -drivername=$(PMEM_CSI_DRIVER_NAME)
- -metricsListen=:10010
- -v=5
- -testEndpoint
- -coverprofile=/var/lib/pmem-csi-coverage/pmem-csi-driver-controller-*.out
Expand All @@ -283,6 +285,9 @@ spec:
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
name: pmem-driver
ports:
- containerPort: 10010
name: metrics
securityContext:
readOnlyRootFilesystem: true
terminationMessagePath: /tmp/termination-log
Expand All @@ -301,10 +306,14 @@ spec:
- --feature-gates=Topology=true
- --strict-topology=true
- --timeout=5m
- --metrics-address=:10011
- -v=5
image: quay.io/k8scsi/csi-provisioner:v2.0.0
imagePullPolicy: IfNotPresent
name: external-provisioner
ports:
- containerPort: 10011
name: metrics
securityContext:
readOnlyRootFilesystem: true
volumeMounts:
Expand Down Expand Up @@ -364,6 +373,8 @@ spec:
pmem-csi.intel.com/deployment: lvm-testing
template:
metadata:
annotations:
pmem-csi.intel.com/scrape: containers
labels:
app: pmem-csi-node
pmem-csi.intel.com/deployment: lvm-testing
Expand All @@ -385,6 +396,7 @@ spec:
- -statePath=/var/lib/$(PMEM_CSI_DRIVER_NAME)
- -drivername=$(PMEM_CSI_DRIVER_NAME)
- -pmemPercentage=100
- -metricsListen=:10010
- -v=5
- -testEndpoint
- -coverprofile=/var/lib/pmem-csi-coverage/pmem-csi-driver-node-*.out
Expand All @@ -406,6 +418,9 @@ spec:
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
name: pmem-driver
ports:
- containerPort: 10010
name: metrics
securityContext:
privileged: true
runAsUser: 0
Expand Down
17 changes: 16 additions & 1 deletion deploy/kubernetes-1.17/pmem-csi-direct-testing.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,8 @@ spec:
serviceName: pmem-csi-controller
template:
metadata:
annotations:
pmem-csi.intel.com/scrape: containers
labels:
app: pmem-csi-controller
pmem-csi.intel.com/deployment: direct-testing
Expand All @@ -261,12 +263,12 @@ spec:
- -mode=controller
- -endpoint=unix:///csi/csi-controller.sock
- -registryEndpoint=tcp://0.0.0.0:10000
- -metricsListen=:10010
- -nodeid=$(KUBE_NODE_NAME)
- -caFile=/certs/ca.crt
- -certFile=/certs/tls.crt
- -keyFile=/certs/tls.key
- -drivername=$(PMEM_CSI_DRIVER_NAME)
- -metricsListen=:10010
- -v=5
- -testEndpoint
- -coverprofile=/var/lib/pmem-csi-coverage/pmem-csi-driver-controller-*.out
Expand All @@ -283,6 +285,9 @@ spec:
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
name: pmem-driver
ports:
- containerPort: 10010
name: metrics
securityContext:
readOnlyRootFilesystem: true
terminationMessagePath: /tmp/termination-log
Expand All @@ -301,10 +306,14 @@ spec:
- --feature-gates=Topology=true
- --strict-topology=true
- --timeout=5m
- --metrics-address=:10011
- -v=5
image: quay.io/k8scsi/csi-provisioner:v2.0.0
imagePullPolicy: IfNotPresent
name: external-provisioner
ports:
- containerPort: 10011
name: metrics
securityContext:
readOnlyRootFilesystem: true
volumeMounts:
Expand Down Expand Up @@ -364,6 +373,8 @@ spec:
pmem-csi.intel.com/deployment: direct-testing
template:
metadata:
annotations:
pmem-csi.intel.com/scrape: containers
labels:
app: pmem-csi-node
pmem-csi.intel.com/deployment: direct-testing
Expand All @@ -385,6 +396,7 @@ spec:
- -statePath=/var/lib/$(PMEM_CSI_DRIVER_NAME)
- -drivername=$(PMEM_CSI_DRIVER_NAME)
- -pmemPercentage=100
- -metricsListen=:10010
- -v=5
- -testEndpoint
- -coverprofile=/var/lib/pmem-csi-coverage/pmem-csi-driver-node-*.out
Expand All @@ -406,6 +418,9 @@ spec:
image: intel/pmem-csi-driver-test:canary
imagePullPolicy: IfNotPresent
name: pmem-driver
ports:
- containerPort: 10010
name: metrics
securityContext:
privileged: true
runAsUser: 0
Expand Down
Loading

0 comments on commit 8c34c9f

Please sign in to comment.