Skip to content

Commit

Permalink
Introduce container status metrics (open-telemetry#19)
Browse files Browse the repository at this point in the history
  • Loading branch information
SaxyPandaBear authored May 30, 2023
1 parent 3659083 commit b482642
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 24 deletions.
10 changes: 10 additions & 0 deletions internal/aws/containerinsight/const.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ const (
StatusConditionNetworkUnavailable = "status_condition_network_unavailable"
StatusCapacityPods = "status_capacity_pods"
StatusAllocatablePods = "status_allocatable_pods"
StatusRunning = "status_running"
StatusTerminated = "status_terminated"
StatusWaiting = "status_waiting"
StatusWaitingReasonCrashed = "status_waiting_reason_crashed"

RunningPodCount = "number_of_running_pods"
RunningContainerCount = "number_of_running_containers"
Expand Down Expand Up @@ -218,6 +222,12 @@ func init() {
StatusCapacityPods: UnitCount,
StatusAllocatablePods: UnitCount,

// kube-state-metrics equivalents
StatusRunning: UnitCount,
StatusTerminated: UnitCount,
StatusWaiting: UnitCount,
StatusWaitingReasonCrashed: UnitCount,

// cluster metrics
NodeCount: UnitCount,
FailedNodeCount: UnitCount,
Expand Down
52 changes: 28 additions & 24 deletions receiver/awscontainerinsightreceiver/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -646,30 +646,34 @@ kubectl apply -f config.yaml


### Container
| Metric | Unit |
|-----------------------------------------|---------------|
| container_cpu_limit | Millicore |
| container_cpu_request | Millicore |
| container_cpu_usage_system | Millicore |
| container_cpu_usage_total | Millicore |
| container_cpu_usage_user | Millicore |
| container_cpu_utilization | Percent |
| container_memory_cache | Bytes |
| container_memory_failcnt | Count |
| container_memory_hierarchical_pgfault | Count/Second |
| container_memory_hierarchical_pgmajfault| Count/Second |
| container_memory_limit | Bytes |
| container_memory_mapped_file | Bytes |
| container_memory_max_usage | Bytes |
| container_memory_pgfault | Count/Second |
| container_memory_pgmajfault | Count/Second |
| container_memory_request | Bytes |
| container_memory_rss | Bytes |
| container_memory_swap | Bytes |
| container_memory_usage | Bytes |
| container_memory_utilization | Percent |
| container_memory_working_set | Bytes |
| number_of_container_restarts | Count |
| Metric | Unit |
|------------------------------------------|--------------|
| container_cpu_limit | Millicore |
| container_cpu_request | Millicore |
| container_cpu_usage_system | Millicore |
| container_cpu_usage_total | Millicore |
| container_cpu_usage_user | Millicore |
| container_cpu_utilization | Percent |
| container_memory_cache | Bytes |
| container_memory_failcnt | Count |
| container_memory_hierarchical_pgfault | Count/Second |
| container_memory_hierarchical_pgmajfault | Count/Second |
| container_memory_limit | Bytes |
| container_memory_mapped_file | Bytes |
| container_memory_max_usage | Bytes |
| container_memory_pgfault | Count/Second |
| container_memory_pgmajfault | Count/Second |
| container_memory_request | Bytes |
| container_memory_rss | Bytes |
| container_memory_swap | Bytes |
| container_memory_usage | Bytes |
| container_memory_utilization | Percent |
| container_memory_working_set | Bytes |
| number_of_container_restarts | Count |
| container_status_running | Count |
| container_status_terminated | Count |
| container_status_waiting | Count |
| container_status_waiting_reason_crashed | Count |

<br/><br/>

Expand Down
17 changes: 17 additions & 0 deletions receiver/awscontainerinsightreceiver/internal/stores/podstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -469,16 +469,28 @@ func (p *PodStore) addStatus(metric CIMetric, pod *corev1.Pod) {
if containerName := metric.GetTag(ci.ContainerNamekey); containerName != "" {
for _, containerStatus := range pod.Status.ContainerStatuses {
if containerStatus.Name == containerName {
possibleStatuses := map[string]int{
ci.StatusRunning: 0,
ci.StatusWaiting: 0,
ci.StatusWaitingReasonCrashed: 0,
ci.StatusTerminated: 0,
}
switch {
case containerStatus.State.Running != nil:
metric.AddTag(ci.ContainerStatus, "Running")
possibleStatuses[ci.StatusRunning] = 1
case containerStatus.State.Waiting != nil:
metric.AddTag(ci.ContainerStatus, "Waiting")
possibleStatuses[ci.StatusWaiting] = 1
if containerStatus.State.Waiting.Reason != "" {
metric.AddTag(ci.ContainerStatusReason, containerStatus.State.Waiting.Reason)
if strings.Contains(containerStatus.State.Waiting.Reason, "Crash") {
possibleStatuses[ci.StatusWaitingReasonCrashed] = 1
}
}
case containerStatus.State.Terminated != nil:
metric.AddTag(ci.ContainerStatus, "Terminated")
possibleStatuses[ci.StatusTerminated] = 1
if containerStatus.State.Terminated.Reason != "" {
metric.AddTag(ci.ContainerStatusReason, containerStatus.State.Terminated.Reason)
}
Expand All @@ -500,6 +512,11 @@ func (p *PodStore) addStatus(metric CIMetric, pod *corev1.Pod) {
}
p.setPrevMeasurement(ci.TypeContainer, containerKey, prevContainerMeasurement{restarts: int(containerStatus.RestartCount)})
}

// add container containerStatus metrics
for name, val := range possibleStatuses {
metric.AddField(ci.MetricName(ci.TypeContainer, name), val)
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,9 @@ func TestPodStore_addStatus(t *testing.T) {
assert.Equal(t, "Running", metric.GetTag(ci.ContainerStatus))
val = metric.GetField(ci.ContainerRestartCount)
assert.Nil(t, val)
val = metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusRunning))
assert.NotNil(t, val)
assert.Equal(t, 1, val)

pod.Status.ContainerStatuses[0].State.Running = nil
pod.Status.ContainerStatuses[0].State.Terminated = &corev1.ContainerStateTerminated{}
Expand All @@ -331,6 +334,28 @@ func TestPodStore_addStatus(t *testing.T) {
assert.Equal(t, "Terminated", metric.GetTag(ci.ContainerStatus))
assert.Equal(t, "OOMKilled", metric.GetTag(ci.ContainerLastTerminationReason))
assert.Equal(t, int(1), metric.GetField(ci.ContainerRestartCount).(int))
assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusTerminated)))

pod.Status.ContainerStatuses[0].State.Terminated = nil
pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "CrashLoopBackOff"}

tags = map[string]string{ci.MetricType: ci.TypeContainer, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit", ci.ContainerNamekey: "ubuntu"}
metric = generateMetric(fields, tags)

podStore.addStatus(metric, pod)
assert.Equal(t, "Waiting", metric.GetTag(ci.ContainerStatus))
assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaiting)))
assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonCrashed)))

pod.Status.ContainerStatuses[0].State.Waiting = &corev1.ContainerStateWaiting{Reason: "SomeOtherReason"}

tags = map[string]string{ci.MetricType: ci.TypeContainer, ci.K8sNamespace: "default", ci.K8sPodNameKey: "cpu-limit", ci.ContainerNamekey: "ubuntu"}
metric = generateMetric(fields, tags)

podStore.addStatus(metric, pod)
assert.Equal(t, "Waiting", metric.GetTag(ci.ContainerStatus))
assert.Equal(t, 1, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaiting)))
assert.Equal(t, 0, metric.GetField(ci.MetricName(ci.TypeContainer, ci.StatusWaitingReasonCrashed)))

// test delta of restartCount
pod.Status.ContainerStatuses[0].RestartCount = 3
Expand Down

0 comments on commit b482642

Please sign in to comment.