Skip to content

Commit

Permalink
fix: Grafana dashboard shows negative packet loss
Browse files Browse the repository at this point in the history
  • Loading branch information
clambin committed Aug 14, 2021
1 parent 3be69e7 commit 8d96f19
Show file tree
Hide file tree
Showing 8 changed files with 260 additions and 177 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
branches:
- master
- refactor
- metrics
- fix

jobs:
test:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
branches-ignore:
- master
- refactor
- metrics
- fix
pull_request_target:

jobs:
Expand Down
146 changes: 70 additions & 76 deletions assets/grafana/dashboards/pinger.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
Expand All @@ -19,106 +25,95 @@
"links": [],
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus",
"description": "",
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"drawStyle": "line",
"fillOpacity": 10,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"links": [],
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 11,
"w": 12,
"x": 0,
"y": 0
},
"hiddenSeries": false,
"id": 4,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": true,
"rightSide": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"nullPointMode": "null",
"options": {
"alertThreshold": true
"legend": {
"calcs": [
"mean",
"lastNotNull",
"max",
"min"
],
"displayMode": "table",
"placement": "bottom"
},
"tooltip": {
"mode": "single"
}
},
"percentage": false,
"pluginVersion": "8.0.6",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum by (host) (pinger_latency_seconds / pinger_packet_count)",
"interval": "",
"legendFormat": "{{host}}",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Latency",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:83",
"format": "s",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:84",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
"type": "timeseries"
},
{
"datasource": "Prometheus",
Expand All @@ -140,7 +135,7 @@
"tooltip": false,
"viz": false
},
"lineInterpolation": "stepAfter",
"lineInterpolation": "stepBefore",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
Expand All @@ -158,7 +153,6 @@
},
"links": [],
"mappings": [],
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [
Expand Down Expand Up @@ -202,7 +196,7 @@
"targets": [
{
"exemplar": true,
"expr": "sum by (host) (increase(pinger_packet_loss_count[2m]))",
"expr": "sum by (host) (pinger_packet_loss_count)",
"interval": "",
"legendFormat": "{{host}}",
"refId": "B"
Expand All @@ -214,15 +208,15 @@
"type": "timeseries"
}
],
"refresh": "10s",
"refresh": "5m",
"schemaVersion": 30,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now-15m",
"from": "now-1h",
"to": "now"
},
"timepicker": {
Expand All @@ -242,5 +236,5 @@
"timezone": "",
"title": "Pinger",
"uid": "WHj4zUggz",
"version": 5
"version": 23
}
18 changes: 9 additions & 9 deletions pinger/collect.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@ import (
)

// Describe interface for Prometheus collector
func (pinger *Pinger) Describe(ch chan<- *prometheus.Desc) {
ch <- pinger.packetsMetric
ch <- pinger.lossMetric
ch <- pinger.latencyMetric
func (monitor *Monitor) Describe(ch chan<- *prometheus.Desc) {
ch <- monitor.packetsMetric
ch <- monitor.lossMetric
ch <- monitor.latencyMetric
}

// Collect interface for Prometheus collector
func (pinger *Pinger) Collect(ch chan<- prometheus.Metric) {
for host, tracker := range pinger.Trackers {
func (monitor *Monitor) Collect(ch chan<- prometheus.Metric) {
for host, tracker := range monitor.Trackers {
count, loss, latency := tracker.Calculate()

log.WithFields(log.Fields{"host": host, "count": count, "loss": loss, "latency": latency}).Debug()

ch <- prometheus.MustNewConstMetric(pinger.packetsMetric, prometheus.GaugeValue, float64(count), host)
ch <- prometheus.MustNewConstMetric(pinger.lossMetric, prometheus.GaugeValue, float64(loss), host)
ch <- prometheus.MustNewConstMetric(pinger.latencyMetric, prometheus.GaugeValue, latency.Seconds(), host)
ch <- prometheus.MustNewConstMetric(monitor.packetsMetric, prometheus.GaugeValue, float64(count), host)
ch <- prometheus.MustNewConstMetric(monitor.lossMetric, prometheus.GaugeValue, float64(loss), host)
ch <- prometheus.MustNewConstMetric(monitor.latencyMetric, prometheus.GaugeValue, latency.Seconds(), host)
}
}
68 changes: 46 additions & 22 deletions pinger/pinger.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,62 +14,84 @@ import (
"time"
)

type Pinger struct {
// Monitor pings a number of hosts and measures latency & packet loss
type Monitor struct {
Pinger func(host string, ch chan PingResponse) (err error)
Trackers map[string]*pingtracker.PingTracker
packets chan PingResponse
packetsMetric *prometheus.Desc
lossMetric *prometheus.Desc
latencyMetric *prometheus.Desc
}

// New creates a Pinger for the specified hosts
func New(hosts []string) (pinger *Pinger) {
pinger = &Pinger{
type PingResponse struct {
Host string
SequenceNr int
Latency time.Duration
}

// New creates a Monitor for the specified hosts
func New(hosts []string) (monitor *Monitor) {
monitor = &Monitor{
Pinger: spawnedPinger,
Trackers: make(map[string]*pingtracker.PingTracker),
packets: make(chan PingResponse),
packetsMetric: prometheus.NewDesc(
prometheus.BuildFQName("pinger", "", "packet_count"),
"Pinger total packet count",
"Monitor total packet count",
[]string{"host"},
nil,
),
lossMetric: prometheus.NewDesc(
prometheus.BuildFQName("pinger", "", "packet_loss_count"),
"Pinger total measured packet loss",
"Monitor total measured packet loss",
[]string{"host"},
nil,
),
latencyMetric: prometheus.NewDesc(
prometheus.BuildFQName("pinger", "", "latency_seconds"),
"Pinger latency in seconds",
"Monitor latency in seconds",
[]string{"host"},
nil,
),
}

for _, host := range hosts {
pinger.Trackers[host] = pingtracker.New()
monitor.Trackers[host] = pingtracker.New()
}

return
}

// Run starts the pingers
func (pinger *Pinger) Run(ctx context.Context) {
for host, tracker := range pinger.Trackers {
log.WithField("host", host).Debug("starting tracker")
go func(host string, tracker *pingtracker.PingTracker) {
err := spawnedPinger(host, tracker)
// Run starts the pinger(s)
func (monitor *Monitor) Run(ctx context.Context) {
monitor.startPingers()

for running := true; running; {
select {
case <-ctx.Done():
running = false
case packet := <-monitor.packets:
monitor.Trackers[packet.Host].Track(packet.SequenceNr, packet.Latency)
}
}
}

func (monitor *Monitor) startPingers() {
for host := range monitor.Trackers {
log.WithField("host", host).Debug("starting pinger")
go func(host string) {
err := monitor.Pinger(host, monitor.packets)

if err != nil {
log.WithError(err).Error("failed to run tracker")
log.WithError(err).Fatal("failed to run pinger")
}
}(host, tracker)
}(host)
}

<-ctx.Done()
}

// spawnedPinger spawns a ping process and reports to a specified PingTracker
func spawnedPinger(host string, tracker *pingtracker.PingTracker) (err error) {
func spawnedPinger(host string, ch chan PingResponse) (err error) {
var (
cmd string
pingOut io.ReadCloser
Expand Down Expand Up @@ -104,9 +126,11 @@ func spawnedPinger(host string, tracker *pingtracker.PingTracker) (err error) {
rtt, _ = strconv.ParseFloat(match[3], 64)
latency = time.Duration(rtt*1000) * time.Microsecond

tracker.Track(seqNr, latency)

// log.Debugf("%s: seqno=%d, latency=%v", host, seqNr, latency)
ch <- PingResponse{
Host: host,
SequenceNr: seqNr,
Latency: latency,
}
}
}
}
Expand Down
Loading

0 comments on commit 8d96f19

Please sign in to comment.