diff --git a/Gopkg.lock b/Gopkg.lock index 749f37d4c061f..477aff14aa4a0 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -80,6 +80,14 @@ revision = "3492b2aff5036c67228ab3c7dba3577c871db200" version = "v13.3.0" +[[projects]] + branch = "master" + digest = "1:005d83d9daaea4e3fc7b2eedf28f68ebf87df7d331a874e5d7d14f643467e7d9" + name = "github.com/Mellanox/rdmamap" + packages = ["."] + pruneopts = "" + revision = "7c3c4763a6ee6a4d624fe133135dc3a7c483111c" + [[projects]] digest = "1:298712a3ee36b59c3ca91f4183bd75d174d5eaa8b4aed5072831f126e2e752f6" name = "github.com/Microsoft/ApplicationInsights-Go" @@ -1197,7 +1205,10 @@ [[projects]] digest = "1:026b6ceaabbacaa147e94a63579efc3d3c73e00c73b67fa5c43ab46191ed04eb" name = "github.com/vishvananda/netlink" - packages = ["nl"] + packages = [ + ".", + "nl", + ] pruneopts = "" revision = "b2de5d10e38ecce8607e6b438b6d174f389a004e" @@ -1712,6 +1723,7 @@ "github.com/Azure/azure-storage-queue-go/azqueue", "github.com/Azure/go-autorest/autorest", "github.com/Azure/go-autorest/autorest/azure/auth", + "github.com/Mellanox/rdmamap", "github.com/Microsoft/ApplicationInsights-Go/appinsights", "github.com/Shopify/sarama", "github.com/StackExchange/wmi", diff --git a/Gopkg.toml b/Gopkg.toml index 5604fd36228e3..b4304c61c2d9b 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -297,6 +297,10 @@ branch = "master" name = "github.com/cisco-ie/nx-telemetry-proto" +[[constraint]] + branch = "master" + name = "github.com/Mellanox/rdmamap" + [[constraint]] name = "gopkg.in/ldap.v3" version = "3.1.0" diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 3ce9823f63f51..5860ac6c604b5 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -56,6 +56,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/http_response" _ "github.com/influxdata/telegraf/plugins/inputs/httpjson" _ "github.com/influxdata/telegraf/plugins/inputs/icinga2" + _ "github.com/influxdata/telegraf/plugins/inputs/infiniband" _ "github.com/influxdata/telegraf/plugins/inputs/influxdb" _ "github.com/influxdata/telegraf/plugins/inputs/influxdb_listener" _ "github.com/influxdata/telegraf/plugins/inputs/internal" diff --git a/plugins/inputs/infiniband/README.md b/plugins/inputs/infiniband/README.md new file mode 100644 index 0000000000000..6f2e85a96b21b --- /dev/null +++ b/plugins/inputs/infiniband/README.md @@ -0,0 +1,29 @@ +# InfiniBand Input Plugin + +This plugin gathers statistics for all InfiniBand devices and ports on the system. These are the counters that can be found in /sys/class/infiniband//port//counters/ + +### Configuration + +This section contains the default TOML to configure the plugin. You can +generate it using `telegraf --usage infiniband`. + +```toml +[[inputs.infiniband]] +``` + +There are no configuration options for this plugin. + +### Metrics + +You can find more information about the counters that are gathered here: +https://community.mellanox.com/s/article/understanding-mlx5-linux-counters-and-status-parameters + +There is a simple mapping from counter -> counter value. All counter values are 64 bit integers. A seperate measurement is made for each port. +Each measurement is tagged with the device and port that it relates to. These are strings. + + +### Example Output + +``` +infiniband,device=mlx5_0,port=1,VL15_dropped=0i,excessive_buffer_overrun_errors=0i,link_downed=0i,link_error_recovery=0i,local_link_integrity_errors=0i,multicast_rcv_packets=0i,multicast_xmit_packets=0i,port_rcv_constraint_errors=0i,port_rcv_data=237159415345822i,port_rcv_errors=0i,port_rcv_packets=801977655075i,port_rcv_remote_physical_errors=0i,port_rcv_switch_relay_errors=0i,port_xmit_constraint_errors=0i,port_xmit_data=238334949937759i,port_xmit_discards=0i,port_xmit_packets=803162651391i,port_xmit_wait=4294967295i,symbol_error=0i,unicast_rcv_packets=801977655075i,unicast_xmit_packets=803162651391i 1573125558000000000 +``` diff --git a/plugins/inputs/infiniband/infiniband.go b/plugins/inputs/infiniband/infiniband.go new file mode 100644 index 0000000000000..65e1d6c712998 --- /dev/null +++ b/plugins/inputs/infiniband/infiniband.go @@ -0,0 +1,22 @@ +package infiniband + +import ( + "github.com/influxdata/telegraf" +) + +// Stores the configuration values for the infiniband plugin - as there are no +// config values, this is intentionally empty +type Infiniband struct { + Log telegraf.Logger `toml:"-"` +} + +// Sample configuration for plugin +var InfinibandConfig = `` + +func (_ *Infiniband) SampleConfig() string { + return InfinibandConfig +} + +func (_ *Infiniband) Description() string { + return "Gets counters from all InfiniBand cards and ports installed" +} diff --git a/plugins/inputs/infiniband/infiniband_linux.go b/plugins/inputs/infiniband/infiniband_linux.go new file mode 100644 index 0000000000000..48cd8a428900d --- /dev/null +++ b/plugins/inputs/infiniband/infiniband_linux.go @@ -0,0 +1,59 @@ +// +build linux + +package infiniband + +import ( + "fmt" + "github.com/Mellanox/rdmamap" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" + "strconv" +) + +// Gather statistics from our infiniband cards +func (_ *Infiniband) Gather(acc telegraf.Accumulator) error { + + rdmaDevices := rdmamap.GetRdmaDeviceList() + + if len(rdmaDevices) == 0 { + return fmt.Errorf("no InfiniBand devices found in /sys/class/infiniband/") + } + + for _, dev := range rdmaDevices { + devicePorts := rdmamap.GetPorts(dev) + for _, port := range devicePorts { + portInt, err := strconv.Atoi(port) + if err != nil { + return err + } + + stats, err := rdmamap.GetRdmaSysfsStats(dev, portInt) + if err != nil { + return err + } + + addStats(dev, port, stats, acc) + } + } + + return nil +} + +// Add the statistics to the accumulator +func addStats(dev string, port string, stats []rdmamap.RdmaStatEntry, acc telegraf.Accumulator) { + + // Allow users to filter by card and port + tags := map[string]string{"device": dev, "port": port} + fields := make(map[string]interface{}) + + for _, entry := range stats { + fields[entry.Name] = entry.Value + } + + acc.AddFields("infiniband", fields, tags) +} + +// Initialise plugin +func init() { + inputs.Add("infiniband", func() telegraf.Input { return &Infiniband{} }) +} diff --git a/plugins/inputs/infiniband/infiniband_notlinux.go b/plugins/inputs/infiniband/infiniband_notlinux.go new file mode 100644 index 0000000000000..5b19672d975d8 --- /dev/null +++ b/plugins/inputs/infiniband/infiniband_notlinux.go @@ -0,0 +1,23 @@ +// +build !linux + +package infiniband + +import ( + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +func (i *Infiniband) Init() error { + i.Log.Warn("Current platform is not supported") + return nil +} + +func (_ *Infiniband) Gather(acc telegraf.Accumulator) error { + return nil +} + +func init() { + inputs.Add("infiniband", func() telegraf.Input { + return &Infiniband{} + }) +} diff --git a/plugins/inputs/infiniband/infiniband_test.go b/plugins/inputs/infiniband/infiniband_test.go new file mode 100644 index 0000000000000..6c4bb24587f4a --- /dev/null +++ b/plugins/inputs/infiniband/infiniband_test.go @@ -0,0 +1,134 @@ +// +build linux + +package infiniband + +import ( + "github.com/Mellanox/rdmamap" + "github.com/influxdata/telegraf/testutil" + "testing" +) + +func TestInfiniband(t *testing.T) { + fields := map[string]interface{}{ + "excessive_buffer_overrun_errors": uint64(0), + "link_downed": uint64(0), + "link_error_recovery": uint64(0), + "local_link_integrity_errors": uint64(0), + "multicast_rcv_packets": uint64(0), + "multicast_xmit_packets": uint64(0), + "port_rcv_constraint_errors": uint64(0), + "port_rcv_data": uint64(237159415345822), + "port_rcv_errors": uint64(0), + "port_rcv_packets": uint64(801977655075), + "port_rcv_remote_physical_errors": uint64(0), + "port_rcv_switch_relay_errors": uint64(0), + "port_xmit_constraint_errors": uint64(0), + "port_xmit_data": uint64(238334949937759), + "port_xmit_discards": uint64(0), + "port_xmit_packets": uint64(803162651391), + "port_xmit_wait": uint64(4294967295), + "symbol_error": uint64(0), + "unicast_rcv_packets": uint64(801977655075), + "unicast_xmit_packets": uint64(803162651391), + "VL15_dropped": uint64(0), + } + + tags := map[string]string{ + "device": "m1x5_0", + "port": "1", + } + + sample_rdmastats_entries := []rdmamap.RdmaStatEntry{ + { + Name: "excessive_buffer_overrun_errors", + Value: uint64(0), + }, + { + Name: "link_downed", + Value: uint64(0), + }, + { + Name: "link_error_recovery", + Value: uint64(0), + }, + { + Name: "local_link_integrity_errors", + Value: uint64(0), + }, + { + Name: "multicast_rcv_packets", + Value: uint64(0), + }, + { + Name: "multicast_xmit_packets", + Value: uint64(0), + }, + { + Name: "port_rcv_constraint_errors", + Value: uint64(0), + }, + { + Name: "port_rcv_data", + Value: uint64(237159415345822), + }, + { + Name: "port_rcv_errors", + Value: uint64(0), + }, + { + Name: "port_rcv_packets", + Value: uint64(801977655075), + }, + { + Name: "port_rcv_remote_physical_errors", + Value: uint64(0), + }, + { + Name: "port_rcv_switch_relay_errors", + Value: uint64(0), + }, + { + Name: "port_xmit_constraint_errors", + Value: uint64(0), + }, + { + Name: "port_xmit_data", + Value: uint64(238334949937759), + }, + { + Name: "port_xmit_discards", + Value: uint64(0), + }, + { + Name: "port_xmit_packets", + Value: uint64(803162651391), + }, + { + Name: "port_xmit_wait", + Value: uint64(4294967295), + }, + { + Name: "symbol_error", + Value: uint64(0), + }, + { + Name: "unicast_rcv_packets", + Value: uint64(801977655075), + }, + { + Name: "unicast_xmit_packets", + Value: uint64(803162651391), + }, + { + Name: "VL15_dropped", + Value: uint64(0), + }, + } + + var acc testutil.Accumulator + + addStats("m1x5_0", "1", sample_rdmastats_entries, &acc) + + acc.AssertContainsTaggedFields(t, "infiniband", fields, tags) + +}