Skip to content

Commit

Permalink
fix(collector): convert cpu time in collection time instead of report…
Browse files Browse the repository at this point in the history
…ing time to avoid inconsistent use of cpu time in models

Signed-off-by: Huamin Chen <[email protected]>
  • Loading branch information
rootfs authored Jul 4, 2024
1 parent 2c579aa commit c7b3ddb
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func updateSWCounters(key uint64, ct *ProcessBPFMetrics, processStats map[uint64
for counterKey := range bpfSupportedMetrics.SoftwareCounters {
switch counterKey {
case config.CPUTime:
processStats[key].ResourceUsage[config.CPUTime].AddDeltaStat(utils.GenericSocketID, ct.ProcessRunTime)
processStats[key].ResourceUsage[config.CPUTime].AddDeltaStat(utils.GenericSocketID, ct.ProcessRunTime/1000 /* convert microseconds to miliseconds */)
case config.PageCacheHit:
processStats[key].ResourceUsage[config.PageCacheHit].AddDeltaStat(utils.GenericSocketID, ct.PageCacheHit/(1000*1000))
case config.IRQNetTXLabel:
Expand Down
18 changes: 5 additions & 13 deletions pkg/metrics/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,6 @@ func collectEnergy(ch chan<- prometheus.Metric, instance interface{}, metricName
}
}

func convertUnit(metricName string, val uint64) float64 {
if metricName == config.CPUTime {
// convert microseconds to miliseconds
return float64(val) / 1000.0
}
return float64(val)
}

func CollectResUtil(ch chan<- prometheus.Metric, instance interface{}, metricName string, collector metricfactory.PromMetric) {
var value float64
var labelValues []string
Expand All @@ -127,7 +119,7 @@ func CollectResUtil(ch chan<- prometheus.Metric, instance interface{}, metricNam
}
if isGPUMetric {
for deviceID, utilization := range container.ResourceUsage[metricName].Stat {
value = convertUnit(metricName, utilization.Aggr)
value = float64(utilization.Aggr)
labelValues = []string{container.ContainerID, container.PodName, container.ContainerName, container.Namespace, deviceID}
collect(ch, collector, value, labelValues)
}
Expand All @@ -136,20 +128,20 @@ func CollectResUtil(ch chan<- prometheus.Metric, instance interface{}, metricNam
klog.Errorf("ContainerStats %s does not have metric %s\n", container.ContainerID, metricName)
return
}
value = convertUnit(metricName, container.ResourceUsage[metricName].SumAllAggrValues())
value = float64(container.ResourceUsage[metricName].SumAllAggrValues())
labelValues = []string{container.ContainerID, container.PodName, container.ContainerName, container.Namespace}
collect(ch, collector, value, labelValues)
}

case *stats.ProcessStats:
process := instance.(*stats.ProcessStats)
value = convertUnit(metricName, process.ResourceUsage[metricName].SumAllAggrValues())
value = float64(process.ResourceUsage[metricName].SumAllAggrValues())
labelValues = []string{strconv.FormatUint(process.PID, 10), process.ContainerID, process.VMID, process.Command}
collect(ch, collector, value, labelValues)

case *stats.VMStats:
vm := instance.(*stats.VMStats)
value = convertUnit(metricName, vm.ResourceUsage[metricName].SumAllAggrValues())
value = float64(vm.ResourceUsage[metricName].SumAllAggrValues())
labelValues = []string{vm.VMID}
collect(ch, collector, value, labelValues)

Expand All @@ -158,7 +150,7 @@ func CollectResUtil(ch chan<- prometheus.Metric, instance interface{}, metricNam
node := instance.(*stats.NodeStats)
if _, exist := node.ResourceUsage[metricName]; exist {
for deviceID, utilization := range node.ResourceUsage[metricName].Stat {
value = convertUnit(metricName, utilization.Aggr)
value = float64(utilization.Aggr)
labelValues = []string{deviceID, stats.NodeName}
collect(ch, collector, value, labelValues)
}
Expand Down

0 comments on commit c7b3ddb

Please sign in to comment.