Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more memory cgroup metrics #3639

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions container/libcontainer/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -805,16 +805,29 @@ func setMemoryStats(s *cgroups.Stats, ret *info.ContainerStats) {
ret.Memory.RSS = s.MemoryStats.Stats["anon"]
ret.Memory.Swap = s.MemoryStats.SwapUsage.Usage - s.MemoryStats.Usage.Usage
ret.Memory.MappedFile = s.MemoryStats.Stats["file_mapped"]
ret.Memory.Shmem = s.MemoryStats.Stats["shmem"]
ret.Memory.Dirty = s.MemoryStats.Stats["file_dirty"]
ret.Memory.Writeback = s.MemoryStats.Stats["file_writeback"]
ret.Memory.Unevictable = s.MemoryStats.Stats["unevictable"]
ret.Memory.Sock = s.MemoryStats.Stats["sock"]
} else if s.MemoryStats.UseHierarchy {
ret.Memory.Cache = s.MemoryStats.Stats["total_cache"]
ret.Memory.RSS = s.MemoryStats.Stats["total_rss"]
ret.Memory.Swap = s.MemoryStats.Stats["total_swap"]
ret.Memory.MappedFile = s.MemoryStats.Stats["total_mapped_file"]
ret.Memory.Shmem = s.MemoryStats.Stats["total_shmem"]
ret.Memory.Dirty = s.MemoryStats.Stats["total_dirty"]
ret.Memory.Writeback = s.MemoryStats.Stats["total_writeback"]
ret.Memory.Unevictable = s.MemoryStats.Stats["total_unevictable"]
} else {
ret.Memory.Cache = s.MemoryStats.Stats["cache"]
ret.Memory.RSS = s.MemoryStats.Stats["rss"]
ret.Memory.Swap = s.MemoryStats.Stats["swap"]
ret.Memory.MappedFile = s.MemoryStats.Stats["mapped_file"]
ret.Memory.Shmem = s.MemoryStats.Stats["shmem"]
ret.Memory.Dirty = s.MemoryStats.Stats["dirty"]
ret.Memory.Writeback = s.MemoryStats.Stats["writeback"]
ret.Memory.Unevictable = s.MemoryStats.Stats["unevictable"]
}
if v, ok := s.MemoryStats.Stats["pgfault"]; ok {
ret.Memory.ContainerData.Pgfault = v
Expand Down
5 changes: 5 additions & 0 deletions docs/storage/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,16 +55,21 @@ Metric name | Type | Description | Unit (where applicable) | option parameter |
`container_memory_bandwidth_bytes` | Gauge | Total memory bandwidth usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM). | bytes | resctrl |
`container_memory_bandwidth_local_bytes` | Gauge | Local memory bandwidth usage statistics for container counted with RDT Memory Bandwidth Monitoring (MBM). | bytes | resctrl |
`container_memory_cache` | Gauge | Total page cache memory | bytes | memory |
`container_memory_dirty` | Gauge | Size of memory that are waiting to get written back to the disk | bytes | memory |
`container_memory_failcnt` | Counter | Number of memory usage hits limits | | memory |
`container_memory_failures_total` | Counter | Cumulative count of memory allocation failures | | memory |
`container_memory_mapped_file` | Gauge | Size of memory mapped files | bytes | memory |
`container_memory_max_usage_bytes` | Gauge | Maximum memory usage recorded | bytes | memory |
`container_memory_migrate` | Gauge | Memory migrate status | | cpuset |
`container_memory_numa_pages` | Gauge | Number of used pages per NUMA node | | memory_numa |
`container_memory_rss` | Gauge | Size of RSS | bytes | memory |
`container_memory_shmem` | Gauge | Size of shmem | bytes | memory |
`container_memory_sock` | Gauge | Size of memory used in network transmission buffers (cgroupv2-only) | bytes | memory |
`container_memory_swap` | Gauge | Container swap usage | bytes | memory |
`container_memory_unevictable` | Gauge | Size of unevictable memory | bytes | memory |
`container_memory_usage_bytes` | Gauge | Current memory usage, including all memory regardless of when it was accessed | bytes | memory |
`container_memory_working_set_bytes` | Gauge | Current working set | bytes | memory |
`container_memory_writeback` | Gauge | Size of file/anon cache that are queued for syncing to disk | bytes | memory |
`container_network_advance_tcp_stats_total` | Gauge | advanced tcp connections statistic for container | | advtcp |
`container_network_receive_bytes_total` | Counter | Cumulative count of bytes received | bytes | network |
`container_network_receive_errors_total` | Counter | Cumulative count of errors encountered while receiving | | network |
Expand Down
21 changes: 21 additions & 0 deletions info/v1/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,27 @@ type MemoryStats struct {
// The amount of memory used for mapped files (includes tmpfs/shmem)
MappedFile uint64 `json:"mapped_file"`

// The amount of cached filesystem data that is swap-backed, such as tmpfs,
// shm segments, shared anonymous mmap()s
// Units: Bytes.
Shmem uint64 `json:"shmem"`

// The amount of bytes that are waiting to get written back to the disk
// Units: Bytes.
Dirty uint64 `json:"dirty"`

// The amount of bytes of file/anon cache that are queued for syncing to disk
// Units: Bytes.
Writeback uint64 `json:"writeback"`

// The amount of bytes of memory that cannot be reclaimed (mlocked etc)
// Units: Bytes.
Unevictable uint64 `json:"unevictable"`

// The amount of memory used in network transmission buffers
// Units: Bytes.
Sock uint64 `json:"sock"`

// The amount of working set memory, this includes recently accessed memory,
// dirty memory, and kernel memory. Working set is <= "usage".
// Units: Bytes.
Expand Down
5 changes: 5 additions & 0 deletions info/v1/test/datagen.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ func GenerateRandomStats(numStats, numCores int, duration time.Duration) []*info
stats.Memory.Cache = uint64(rand.Int63n(4096))
stats.Memory.RSS = uint64(rand.Int63n(4096))
stats.Memory.MappedFile = uint64(rand.Int63n(4096))
stats.Memory.Shmem = uint64(rand.Int63n(4096))
stats.Memory.Dirty = uint64(rand.Int63n(4096))
stats.Memory.Writeback = uint64(rand.Int63n(4096))
stats.Memory.Unevictable = uint64(rand.Int63n(4096))
stats.Memory.Sock = uint64(rand.Int63n(4096))
stats.Memory.KernelUsage = uint64(rand.Int63n(4096))
stats.ReferencedMemory = uint64(rand.Int63n(1000))
ret[i] = stats
Expand Down
13 changes: 9 additions & 4 deletions info/v2/conversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,10 +140,15 @@ func TestContainerStatsFromV1(t *testing.T) {
Usage: 1,
Cache: 2,
RSS: 3,
WorkingSet: 4,
Failcnt: 5,
TotalActiveFile: 6,
TotalInactiveFile: 7,
Shmem: 4,
Dirty: 5,
Writeback: 6,
Unevictable: 7,
Sock: 8,
WorkingSet: 9,
Failcnt: 10,
TotalActiveFile: 11,
TotalInactiveFile: 12,
ContainerData: v1.MemoryStatsMemoryData{
Pgfault: 1,
Pgmajfault: 2,
Expand Down
37 changes: 37 additions & 0 deletions metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/google/cadvisor/container"
info "github.com/google/cadvisor/info/v1"
v2 "github.com/google/cadvisor/info/v2"
"github.com/opencontainers/runc/libcontainer/cgroups"

"github.com/prometheus/client_golang/prometheus"

Expand Down Expand Up @@ -398,6 +399,42 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: float64(s.Memory.MappedFile), timestamp: s.Timestamp}}
},
}, {
name: "container_memory_shmem",
help: "Size of shmem in bytes.",
valueType: prometheus.GaugeValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: float64(s.Memory.Shmem), timestamp: s.Timestamp}}
},
}, {
name: "container_memory_dirty",
help: "Size of memory that are waiting to get written back to the disk in bytes.",
valueType: prometheus.GaugeValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: float64(s.Memory.Dirty), timestamp: s.Timestamp}}
},
}, {
name: "container_memory_writeback",
help: "Size of file/anon cache that are queued for syncing to disk in bytes.",
valueType: prometheus.GaugeValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: float64(s.Memory.Writeback), timestamp: s.Timestamp}}
},
}, {
name: "container_memory_unevictable",
help: "Size of unevictable memory in bytes.",
valueType: prometheus.GaugeValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: float64(s.Memory.Unevictable), timestamp: s.Timestamp}}
},
}, {
name: "container_memory_sock",
help: "Size of memory used in network transmission buffers in bytes.",
valueType: prometheus.GaugeValue,
condition: func(s info.ContainerSpec) bool { return cgroups.IsCgroup2UnifiedMode() },
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: float64(s.Memory.Sock), timestamp: s.Timestamp}}
},
}, {
name: "container_memory_swap",
help: "Container swap usage in bytes.",
Expand Down
5 changes: 5 additions & 0 deletions metrics/prometheus_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,11 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req
RSS: 15,
MappedFile: 16,
KernelUsage: 17,
Shmem: 18,
Dirty: 19,
Writeback: 20,
Unevictable: 21,
Sock: 22,
Swap: 8192,
},
Hugetlb: map[string]info.HugetlbStats{
Expand Down
15 changes: 15 additions & 0 deletions metrics/testdata/prometheus_metrics
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ container_last_seen{container_env_foo_env="prod",container_label_foo_label="bar"
# HELP container_memory_cache Number of bytes of page cache memory.
# TYPE container_memory_cache gauge
container_memory_cache{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 14 1395066363000
# HELP container_memory_dirty Size of memory that are waiting to get written back to the disk in bytes.
# TYPE container_memory_dirty gauge
container_memory_dirty{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 19 1395066363000
# HELP container_memory_failcnt Number of memory usage hits limits
# TYPE container_memory_failcnt counter
container_memory_failcnt{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0 1395066363000
Expand Down Expand Up @@ -180,6 +183,12 @@ container_memory_numa_pages{container_env_foo_env="prod",container_label_foo_lab
# HELP container_memory_rss Size of RSS in bytes.
# TYPE container_memory_rss gauge
container_memory_rss{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 15 1395066363000
# HELP container_memory_shmem Size of shmem in bytes.
# TYPE container_memory_shmem gauge
container_memory_shmem{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 18 1395066363000
# HELP container_memory_sock Size of memory used in network transmission buffers in bytes.
# TYPE container_memory_sock gauge
container_memory_sock{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 22 1395066363000
# HELP container_memory_swap Container swap usage in bytes.
# TYPE container_memory_swap gauge
container_memory_swap{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 8192 1395066363000
Expand All @@ -189,12 +198,18 @@ container_memory_total_active_file_bytes{container_env_foo_env="prod",container_
# HELP container_memory_total_inactive_file_bytes Current total inactive file in bytes.
# TYPE container_memory_total_inactive_file_bytes gauge
container_memory_total_inactive_file_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 6 1395066363000
# HELP container_memory_unevictable Size of unevictable memory in bytes.
# TYPE container_memory_unevictable gauge
container_memory_unevictable{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 21 1395066363000
# HELP container_memory_usage_bytes Current memory usage in bytes, including all memory regardless of when it was accessed
# TYPE container_memory_usage_bytes gauge
container_memory_usage_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 8 1395066363000
# HELP container_memory_working_set_bytes Current working set in bytes.
# TYPE container_memory_working_set_bytes gauge
container_memory_working_set_bytes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 9 1395066363000
# HELP container_memory_writeback Size of file/anon cache that are queued for syncing to disk in bytes.
# TYPE container_memory_writeback gauge
container_memory_writeback{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 20 1395066363000
# HELP container_network_advance_tcp_stats_total advance tcp connections statistic for container
# TYPE container_network_advance_tcp_stats_total gauge
container_network_advance_tcp_stats_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",tcp_state="activeopens",zone_name="hello"} 1.1038621e+07 1395066363000
Expand Down
15 changes: 15 additions & 0 deletions metrics/testdata/prometheus_metrics_whitelist_filtered
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,9 @@ container_last_seen{container_env_foo_env="prod",id="testcontainer",image="test"
# HELP container_memory_cache Number of bytes of page cache memory.
# TYPE container_memory_cache gauge
container_memory_cache{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 14 1395066363000
# HELP container_memory_dirty Size of memory that are waiting to get written back to the disk in bytes.
# TYPE container_memory_dirty gauge
container_memory_dirty{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 19 1395066363000
# HELP container_memory_failcnt Number of memory usage hits limits
# TYPE container_memory_failcnt counter
container_memory_failcnt{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0 1395066363000
Expand Down Expand Up @@ -180,6 +183,12 @@ container_memory_numa_pages{container_env_foo_env="prod",id="testcontainer",imag
# HELP container_memory_rss Size of RSS in bytes.
# TYPE container_memory_rss gauge
container_memory_rss{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 15 1395066363000
# HELP container_memory_shmem Size of shmem in bytes.
# TYPE container_memory_shmem gauge
container_memory_shmem{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 18 1395066363000
# HELP container_memory_sock Size of memory used in network transmission buffers in bytes.
# TYPE container_memory_sock gauge
container_memory_sock{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 22 1395066363000
# HELP container_memory_swap Container swap usage in bytes.
# TYPE container_memory_swap gauge
container_memory_swap{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 8192 1395066363000
Expand All @@ -189,12 +198,18 @@ container_memory_total_active_file_bytes{container_env_foo_env="prod",id="testco
# HELP container_memory_total_inactive_file_bytes Current total inactive file in bytes.
# TYPE container_memory_total_inactive_file_bytes gauge
container_memory_total_inactive_file_bytes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 6 1395066363000
# HELP container_memory_unevictable Size of unevictable memory in bytes.
# TYPE container_memory_unevictable gauge
container_memory_unevictable{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 21 1395066363000
# HELP container_memory_usage_bytes Current memory usage in bytes, including all memory regardless of when it was accessed
# TYPE container_memory_usage_bytes gauge
container_memory_usage_bytes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 8 1395066363000
# HELP container_memory_working_set_bytes Current working set in bytes.
# TYPE container_memory_working_set_bytes gauge
container_memory_working_set_bytes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 9 1395066363000
# HELP container_memory_writeback Size of file/anon cache that are queued for syncing to disk in bytes.
# TYPE container_memory_writeback gauge
container_memory_writeback{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 20 1395066363000
# HELP container_network_advance_tcp_stats_total advance tcp connections statistic for container
# TYPE container_network_advance_tcp_stats_total gauge
container_network_advance_tcp_stats_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",tcp_state="activeopens",zone_name="hello"} 1.1038621e+07 1395066363000
Expand Down