Skip to content

Commit

Permalink
feat: In Lustre input plugin, support collecting per-client stats. (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
omgold authored and MyaLongmire committed Jul 6, 2022
1 parent b221c6c commit 286dfd8
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 8 deletions.
38 changes: 38 additions & 0 deletions plugins/inputs/lustre2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ This plugin monitors the Lustre file system using its entries in the proc filesy
# "/proc/fs/lustre/obdfilter/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
# "/proc/fs/lustre/obdfilter/*/job_stats",
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
# ]
# mds_procfiles = [
# "/proc/fs/lustre/mdt/*/md_stats",
# "/proc/fs/lustre/mdt/*/job_stats",
# "/proc/fs/lustre/mdt/*/exports/*/stats",
# ]
```

Expand All @@ -40,6 +42,18 @@ From `/proc/fs/lustre/obdfilter/*/stats` and `/proc/fs/lustre/osd-ldiskfs/*/stat
- cache_miss
- cache_access

From `/proc/fs/lustre/obdfilter/*/exports/*/stats`:

- lustre2
- tags:
- name
- client
- fields:
- write_bytes
- write_calls
- read_bytes
- read_calls

From `/proc/fs/lustre/obdfilter/*/job_stats`:

- lustre2
Expand Down Expand Up @@ -89,6 +103,30 @@ From `/proc/fs/lustre/mdt/*/md_stats`:
- samedir_rename
- crossdir_rename

From `/proc/fs/lustre/mdt/*/exports/*/stats`:

- lustre2
- tags:
- name
- client
- fields:
- open
- close
- mknod
- link
- unlink
- mkdir
- rmdir
- rename
- getattr
- setattr
- getxattr
- setxattr
- statfs
- sync
- samedir_rename
- crossdir_rename

From `/proc/fs/lustre/mdt/*/job_stats`:

- lustre2
Expand Down
34 changes: 26 additions & 8 deletions plugins/inputs/lustre2/lustre2.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import (
)

type tags struct {
name, job string
name, job, client string
}

// Lustre proc files can change between versions, so we want to future-proof
Expand All @@ -40,10 +40,12 @@ var sampleConfig = `
# "/proc/fs/lustre/obdfilter/*/stats",
# "/proc/fs/lustre/osd-ldiskfs/*/stats",
# "/proc/fs/lustre/obdfilter/*/job_stats",
# "/proc/fs/lustre/obdfilter/*/exports/*/stats",
# ]
# mds_procfiles = [
# "/proc/fs/lustre/mdt/*/md_stats",
# "/proc/fs/lustre/mdt/*/job_stats",
# "/proc/fs/lustre/mdt/*/exports/*/stats",
# ]
`

Expand Down Expand Up @@ -365,13 +367,26 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wantedFields []*mapping) e
fieldSplitter := regexp.MustCompile(`[ :]+`)

for _, file := range files {
/* Turn /proc/fs/lustre/obdfilter/<ost_name>/stats and similar
* into just the object store target name
* Assumption: the target name is always second to last,
* which is true in Lustre 2.1->2.8

/* From /proc/fs/lustre/obdfilter/<ost_name>/stats and similar
* extract the object store target name,
* and for per-client files under
* /proc/fs/lustre/obdfilter/<ost_name>/exports/<client_nid>/stats
* and similar the client NID
* Assumption: the target name is fourth to last
* for per-client files and second to last otherwise
* and the client NID is always second to last,
* which is true in Lustre 2.1->2.14
*/
path := strings.Split(file, "/")
name := path[len(path)-2]
var name, client string
if strings.Contains(file, "/exports/") {
name = path[len(path)-4]
client = path[len(path)-2]
} else {
name = path[len(path)-2]
client = ""
}

//lines, err := internal.ReadLines(file)
wholeFile, err := os.ReadFile(file)
Expand Down Expand Up @@ -401,10 +416,10 @@ func (l *Lustre2) GetLustreProcStats(fileglob string, wantedFields []*mapping) e
}

var fields map[string]interface{}
fields, ok := l.allFields[tags{name, jobid}]
fields, ok := l.allFields[tags{name, jobid, client}]
if !ok {
fields = make(map[string]interface{})
l.allFields[tags{name, jobid}] = fields
l.allFields[tags{name, jobid, client}] = fields
}

for _, wanted := range wantedFields {
Expand Down Expand Up @@ -508,6 +523,9 @@ func (l *Lustre2) Gather(acc telegraf.Accumulator) error {
if len(tgs.job) > 0 {
tags["jobid"] = tgs.job
}
if len(tgs.client) > 0 {
tags["client"] = tgs.client
}
acc.AddFields("lustre2", fields, tags)
}

Expand Down
63 changes: 63 additions & 0 deletions plugins/inputs/lustre2/lustre2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,69 @@ func TestLustre2GeneratesMetrics(t *testing.T) {
require.NoError(t, err)
}

func TestLustre2GeneratesClientMetrics(t *testing.T) {
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
ostName := "OST0001"
clientName := "10.2.4.27@o2ib1"
mdtdir := tempdir + "/mdt/"
err := os.MkdirAll(mdtdir+"/"+ostName+"/exports/"+clientName, 0755)
require.NoError(t, err)

obddir := tempdir + "/obdfilter/"
err = os.MkdirAll(obddir+"/"+ostName+"/exports/"+clientName, 0755)
require.NoError(t, err)

err = os.WriteFile(mdtdir+"/"+ostName+"/exports/"+clientName+"/stats", []byte(mdtProcContents), 0644)
require.NoError(t, err)

err = os.WriteFile(obddir+"/"+ostName+"/exports/"+clientName+"/stats", []byte(obdfilterProcContents), 0644)
require.NoError(t, err)

// Begin by testing standard Lustre stats
m := &Lustre2{
OstProcfiles: []string{obddir + "/*/exports/*/stats"},
MdsProcfiles: []string{mdtdir + "/*/exports/*/stats"},
}

var acc testutil.Accumulator

err = m.Gather(&acc)
require.NoError(t, err)

tags := map[string]string{
"name": ostName,
"client": clientName,
}

fields := map[string]interface{}{
"close": uint64(873243496),
"crossdir_rename": uint64(369571),
"getattr": uint64(1503663097),
"getxattr": uint64(6145349681),
"link": uint64(445),
"mkdir": uint64(705499),
"mknod": uint64(349042),
"open": uint64(1024577037),
"read_bytes": uint64(78026117632000),
"read_calls": uint64(203238095),
"rename": uint64(629196),
"rmdir": uint64(227434),
"samedir_rename": uint64(259625),
"setattr": uint64(1898364),
"setxattr": uint64(83969),
"statfs": uint64(2916320),
"sync": uint64(434081),
"unlink": uint64(3549417),
"write_bytes": uint64(15201500833981),
"write_calls": uint64(71893382),
}

acc.AssertContainsTaggedFields(t, "lustre2", fields, tags)

err = os.RemoveAll(os.TempDir() + "/telegraf")
require.NoError(t, err)
}

func TestLustre2GeneratesJobstatsMetrics(t *testing.T) {
tempdir := os.TempDir() + "/telegraf/proc/fs/lustre/"
ostName := "OST0001"
Expand Down

0 comments on commit 286dfd8

Please sign in to comment.