Skip to content

Commit

Permalink
Add metrics from /proc/[pid]/io to system/process metrics on linux (#114
Browse files Browse the repository at this point in the history
)

## What does this PR do?

Part of elastic/beats#7461

This ended up being fairly simple; we just fetch per-process IO from
procfs, and these values appear to be largely identical to what's
reported by the netlink taskstat.

This just reads metrics from `/proc/[pid]/io` and reports them as part
of other process metrics, same as how things like memory usage are
reported.

## Why is it important?

We want per-process I/O metrics.

## Checklist


- [x] My code follows the style guidelines of this project
- [x] I have commented my code, particularly in hard-to-understand areas
- [x] I have added tests that prove my fix is effective or that my
feature works
- [ ] I have added an entry in `CHANGELOG.md`
  • Loading branch information
fearful-symmetry authored Nov 22, 2023
1 parent f61b864 commit a9ca78c
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ This project adheres to [Semantic Versioning](http://semver.org/).

### Added

- Add metrics from `/proc/[pid]/io`

### Changed

### Deprecated
Expand Down
44 changes: 44 additions & 0 deletions metric/system/process/process_linux_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ func FillPidMetrics(hostfs resolve.Resolver, pid int, state ProcState, filter fu
if err != nil {
return state, fmt.Errorf("error creating username for pid %d: %w", pid, err)
}

state.IO, err = getIOData(hostfs, pid)
if err != nil {
return state, fmt.Errorf("error fetching IO metrics for pid %d: %w", pid, err)
}

return state, nil
}

Expand Down Expand Up @@ -301,6 +307,44 @@ func getMemData(hostfs resolve.Resolver, pid int) (ProcMemInfo, error) {
return state, nil
}

func getIOData(hostfs resolve.Resolver, pid int) (ProcIOInfo, error) {
state := ProcIOInfo{}
path := hostfs.Join("proc", strconv.Itoa(pid), "io")
data, err := os.ReadFile(path)
if err != nil {
return state, fmt.Errorf("error fetching IO metrics: %w", err)
}

for _, metric := range strings.Split(string(data), "\n") {
raw := strings.Split(metric, ": ")
if len(raw) < 2 {
continue
}
value, err := strconv.ParseUint(raw[1], 10, 64)
if err != nil {
return state, fmt.Errorf("error converting counters '%s' in io stat file: %w", raw, err)
}

switch raw[0] {
case "rchar":
state.ReadChar = opt.UintWith(value)
case "wchar":
state.WriteChar = opt.UintWith(value)
case "syscr":
state.ReadSyscalls = opt.UintWith(value)
case "syscw":
state.WriteSyscalls = opt.UintWith(value)
case "read_bytes":
state.ReadBytes = opt.UintWith(value)
case "write_bytes":
state.WriteBytes = opt.UintWith(value)
case "cancelled_write_bytes":
state.CancelledWriteBytes = opt.UintWith(value)
}
}
return state, nil
}

func getCPUTime(hostfs resolve.Resolver, pid int) (ProcCPUInfo, error) {
state := ProcCPUInfo{}

Expand Down
18 changes: 18 additions & 0 deletions metric/system/process/process_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,21 @@ func TestParseProcStat(t *testing.T) {

assert.Equal(t, want, got, "")
}

func TestParseIO(t *testing.T) {
path := resolve.NewTestResolver("testdata/")
data, err := getIOData(path, 42)
require.NoError(t, err)

good := ProcIOInfo{
ReadChar: opt.UintWith(10418),
WriteChar: opt.UintWith(8),
ReadSyscalls: opt.UintWith(14),
WriteSyscalls: opt.UintWith(1),
ReadBytes: opt.UintWith(5243),
WriteBytes: opt.UintWith(128),
CancelledWriteBytes: opt.UintWith(4),
}

require.Equal(t, good, data)
}
19 changes: 19 additions & 0 deletions metric/system/process/process_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ type ProcState struct {
CPU ProcCPUInfo `struct:"cpu,omitempty"`
FD ProcFDInfo `struct:"fd,omitempty"`
Network *sysinfotypes.NetworkCountersInfo `struct:"-,omitempty"`
IO ProcIOInfo `struct:"io,omitempty"`

// cgroups
Cgroup cgroup.CGStats `struct:"cgroup,omitempty"`
Expand Down Expand Up @@ -79,6 +80,24 @@ type CPUTotal struct {
Norm opt.PctOpt `struct:"norm,omitempty"`
}

// ProcIOInfo is the struct for I/O counters from /proc/[pid]/io
type ProcIOInfo struct {
// ReadChar is bytes read from the system, as passed from read() and similar syscalls
ReadChar opt.Uint `struct:"read_char,omitempty"`
// WriteChar is bytes written to the system, as passed to various syscalls
WriteChar opt.Uint `struct:"write_char,omitempty"`
//ReadSyscalls counts the number of read operations
ReadSyscalls opt.Uint `struct:"read_ops,omitempty"`
//WriteSyscalls counts the number of write operations
WriteSyscalls opt.Uint `struct:"write_ops,omitempty"`
// ReadBytes is the count of bytes that were actually fetched from the storage layer
ReadBytes opt.Uint `struct:"read_bytes,omitempty"`
// WriteBytes is the count of bytes that were actually written to the storage layer
WriteBytes opt.Uint `struct:"write_bytes,omitempty"`
// the number of bytes which this process caused to not happen, by truncating pagecache
CancelledWriteBytes opt.Uint `struct:"cancelled_write_bytes,omitempty"`
}

// ProcMemInfo is the struct for cpu.memory metrics
type ProcMemInfo struct {
Size opt.Uint `struct:"size,omitempty"`
Expand Down
7 changes: 7 additions & 0 deletions metric/system/process/testdata/proc/42/io
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
rchar: 10418
wchar: 8
syscr: 14
syscw: 1
read_bytes: 5243
write_bytes: 128
cancelled_write_bytes: 4

0 comments on commit a9ca78c

Please sign in to comment.