Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a new input plugin for InfiniBand card/port statistics #6631

Merged
merged 16 commits into from
Jan 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Gopkg.toml
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,10 @@
branch = "master"
name = "github.com/cisco-ie/nx-telemetry-proto"

[[constraint]]
branch = "master"
name = "github.com/Mellanox/rdmamap"

[[constraint]]
name = "gopkg.in/ldap.v3"
version = "3.1.0"
Expand Down
1 change: 1 addition & 0 deletions plugins/inputs/all/all.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ import (
_ "github.com/influxdata/telegraf/plugins/inputs/http_response"
_ "github.com/influxdata/telegraf/plugins/inputs/httpjson"
_ "github.com/influxdata/telegraf/plugins/inputs/icinga2"
_ "github.com/influxdata/telegraf/plugins/inputs/infiniband"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb"
_ "github.com/influxdata/telegraf/plugins/inputs/influxdb_listener"
_ "github.com/influxdata/telegraf/plugins/inputs/internal"
Expand Down
29 changes: 29 additions & 0 deletions plugins/inputs/infiniband/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# InfiniBand Input Plugin

This plugin gathers statistics for all InfiniBand devices and ports on the system. These are the counters that can be found in /sys/class/infiniband/<dev>/port/<port>/counters/

### Configuration

This section contains the default TOML to configure the plugin. You can
generate it using `telegraf --usage infiniband`.

```toml
[[inputs.infiniband]]
```

There are no configuration options for this plugin.

### Metrics

You can find more information about the counters that are gathered here:
https://community.mellanox.com/s/article/understanding-mlx5-linux-counters-and-status-parameters

There is a simple mapping from counter -> counter value. All counter values are 64 bit integers. A seperate measurement is made for each port.
Each measurement is tagged with the device and port that it relates to. These are strings.


### Example Output

```
infiniband,device=mlx5_0,port=1,VL15_dropped=0i,excessive_buffer_overrun_errors=0i,link_downed=0i,link_error_recovery=0i,local_link_integrity_errors=0i,multicast_rcv_packets=0i,multicast_xmit_packets=0i,port_rcv_constraint_errors=0i,port_rcv_data=237159415345822i,port_rcv_errors=0i,port_rcv_packets=801977655075i,port_rcv_remote_physical_errors=0i,port_rcv_switch_relay_errors=0i,port_xmit_constraint_errors=0i,port_xmit_data=238334949937759i,port_xmit_discards=0i,port_xmit_packets=803162651391i,port_xmit_wait=4294967295i,symbol_error=0i,unicast_rcv_packets=801977655075i,unicast_xmit_packets=803162651391i 1573125558000000000
```
22 changes: 22 additions & 0 deletions plugins/inputs/infiniband/infiniband.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package infiniband

import (
"github.com/influxdata/telegraf"
)

// Stores the configuration values for the infiniband plugin - as there are no
// config values, this is intentionally empty
type Infiniband struct {
Log telegraf.Logger `toml:"-"`
}

// Sample configuration for plugin
var InfinibandConfig = ``

func (_ *Infiniband) SampleConfig() string {
return InfinibandConfig
}

func (_ *Infiniband) Description() string {
return "Gets counters from all InfiniBand cards and ports installed"
}
59 changes: 59 additions & 0 deletions plugins/inputs/infiniband/infiniband_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// +build linux

package infiniband

import (
"fmt"
"github.com/Mellanox/rdmamap"
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
"strconv"
)

// Gather statistics from our infiniband cards
func (_ *Infiniband) Gather(acc telegraf.Accumulator) error {

rdmaDevices := rdmamap.GetRdmaDeviceList()

if len(rdmaDevices) == 0 {
return fmt.Errorf("no InfiniBand devices found in /sys/class/infiniband/")
}

for _, dev := range rdmaDevices {
devicePorts := rdmamap.GetPorts(dev)
for _, port := range devicePorts {
portInt, err := strconv.Atoi(port)
if err != nil {
return err
}

stats, err := rdmamap.GetRdmaSysfsStats(dev, portInt)
if err != nil {
return err
}

addStats(dev, port, stats, acc)
}
}

return nil
}

// Add the statistics to the accumulator
func addStats(dev string, port string, stats []rdmamap.RdmaStatEntry, acc telegraf.Accumulator) {

// Allow users to filter by card and port
tags := map[string]string{"device": dev, "port": port}
fields := make(map[string]interface{})

for _, entry := range stats {
fields[entry.Name] = entry.Value
}

acc.AddFields("infiniband", fields, tags)
}

// Initialise plugin
func init() {
inputs.Add("infiniband", func() telegraf.Input { return &Infiniband{} })
}
23 changes: 23 additions & 0 deletions plugins/inputs/infiniband/infiniband_notlinux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// +build !linux

package infiniband

import (
"github.com/influxdata/telegraf"
"github.com/influxdata/telegraf/plugins/inputs"
)

func (i *Infiniband) Init() error {
i.Log.Warn("Current platform is not supported")
return nil
}

func (_ *Infiniband) Gather(acc telegraf.Accumulator) error {
return nil
}

func init() {
inputs.Add("infiniband", func() telegraf.Input {
return &Infiniband{}
})
}
134 changes: 134 additions & 0 deletions plugins/inputs/infiniband/infiniband_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
// +build linux

package infiniband

import (
"github.com/Mellanox/rdmamap"
"github.com/influxdata/telegraf/testutil"
"testing"
)

func TestInfiniband(t *testing.T) {
fields := map[string]interface{}{
"excessive_buffer_overrun_errors": uint64(0),
"link_downed": uint64(0),
"link_error_recovery": uint64(0),
"local_link_integrity_errors": uint64(0),
"multicast_rcv_packets": uint64(0),
"multicast_xmit_packets": uint64(0),
"port_rcv_constraint_errors": uint64(0),
"port_rcv_data": uint64(237159415345822),
"port_rcv_errors": uint64(0),
"port_rcv_packets": uint64(801977655075),
"port_rcv_remote_physical_errors": uint64(0),
"port_rcv_switch_relay_errors": uint64(0),
"port_xmit_constraint_errors": uint64(0),
"port_xmit_data": uint64(238334949937759),
"port_xmit_discards": uint64(0),
"port_xmit_packets": uint64(803162651391),
"port_xmit_wait": uint64(4294967295),
"symbol_error": uint64(0),
"unicast_rcv_packets": uint64(801977655075),
"unicast_xmit_packets": uint64(803162651391),
"VL15_dropped": uint64(0),
}

tags := map[string]string{
"device": "m1x5_0",
"port": "1",
}

sample_rdmastats_entries := []rdmamap.RdmaStatEntry{
{
Name: "excessive_buffer_overrun_errors",
Value: uint64(0),
},
{
Name: "link_downed",
Value: uint64(0),
},
{
Name: "link_error_recovery",
Value: uint64(0),
},
{
Name: "local_link_integrity_errors",
Value: uint64(0),
},
{
Name: "multicast_rcv_packets",
Value: uint64(0),
},
{
Name: "multicast_xmit_packets",
Value: uint64(0),
},
{
Name: "port_rcv_constraint_errors",
Value: uint64(0),
},
{
Name: "port_rcv_data",
Value: uint64(237159415345822),
},
{
Name: "port_rcv_errors",
Value: uint64(0),
},
{
Name: "port_rcv_packets",
Value: uint64(801977655075),
},
{
Name: "port_rcv_remote_physical_errors",
Value: uint64(0),
},
{
Name: "port_rcv_switch_relay_errors",
Value: uint64(0),
},
{
Name: "port_xmit_constraint_errors",
Value: uint64(0),
},
{
Name: "port_xmit_data",
Value: uint64(238334949937759),
},
{
Name: "port_xmit_discards",
Value: uint64(0),
},
{
Name: "port_xmit_packets",
Value: uint64(803162651391),
},
{
Name: "port_xmit_wait",
Value: uint64(4294967295),
},
{
Name: "symbol_error",
Value: uint64(0),
},
{
Name: "unicast_rcv_packets",
Value: uint64(801977655075),
},
{
Name: "unicast_xmit_packets",
Value: uint64(803162651391),
},
{
Name: "VL15_dropped",
Value: uint64(0),
},
}

var acc testutil.Accumulator

addStats("m1x5_0", "1", sample_rdmastats_entries, &acc)

acc.AssertContainsTaggedFields(t, "infiniband", fields, tags)

}