Skip to content

Commit

Permalink
CLI: support per-backend cumulative ("total") latencies
Browse files Browse the repository at this point in the history
* revise 'ais performance latency'
* use `.total.` latencies and their respective counters
* related commit: 5fd1015

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Jul 23, 2024
1 parent abffbe3 commit 988051c
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 52 deletions.
3 changes: 2 additions & 1 deletion ais/backend/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@ type base struct {
metrics map[string]string // this backend's metric names (below)
}

// NOTE: `stats.LatencyToCounter()` - a public helper that relies on the naming convention below
func (b *base) init(snode *meta.Snode, tstats stats.Tracker) {
prefix := b.provider
if prefix == apc.AIS {
prefix = apc.RemAIS
}
b.metrics = make(map[string]string, 8)
b.metrics = make(map[string]string, 12)
b.metrics[stats.GetCount] = prefix + "." + stats.GetCount
b.metrics[stats.GetLatencyTotal] = prefix + "." + stats.GetLatencyTotal
b.metrics[stats.GetE2ELatencyTotal] = prefix + "." + stats.GetE2ELatencyTotal
Expand Down
59 changes: 21 additions & 38 deletions cmd/cli/cli/performance.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,28 +235,26 @@ func showLatencyHandler(c *cli.Context) error {

_warnThruLatIters(c)

// statically filter metrics (names)
// statically filter metrics (names):
// take sizes and latencies that _map_ to their respective counters

selected := make(cos.StrKVs, len(metrics))
for name, kind := range metrics {
switch name {
case stats.GetSize, stats.GetCount, stats.PutSize, stats.PutCount, stats.AppendCount:
if name == stats.GetSize || name == stats.PutSize {
selected[name] = kind
continue
}

switch {
case strings.HasSuffix(name, "."+stats.GetCount) || strings.HasSuffix(name, "."+stats.GetSize):
selected[name] = kind
case strings.HasSuffix(name, "."+stats.PutCount) || strings.HasSuffix(name, "."+stats.PutSize):
selected[name] = kind
case strings.HasSuffix(name, ".ns") && name != stats.Uptime: // NOTE: not including and not handling "*.ns.total"
selected[name] = kind
case stats.IsErrMetric(name):
if strings.Contains(name, "get") || strings.Contains(name, "put") || strings.Contains(name, "append") {
selected[name] = kind
}
if kind != stats.KindLatency && kind != stats.KindTotal {
continue
}
ncounter := stats.LatencyToCounter(name)
if ncounter == "" {
continue
}
selected[name] = kind
selected[ncounter] = stats.KindCounter
}

// `true` to show (and put request latency numbers in perspective)
return showPerfTab(c, selected, _latency, cmdShowLatency, nil, true)
}
Expand All @@ -272,32 +270,17 @@ func _latency(c *cli.Context, metrics cos.StrKVs, mapBegin, mapEnd teb.StstMap,
continue
}
for name, v := range begin.Tracker {
if kind, ok := metrics[name]; !ok || kind != stats.KindLatency {
kind, ok := metrics[name]
if !ok {
continue
}
if kind != stats.KindLatency && kind != stats.KindTotal {
continue
}
vend := end.Tracker[name]
ncounter := name[:len(name)-1] // ".ns" => ".n"
switch name {
case stats.GetLatency, stats.GetRedirLatency:
ncounter = stats.GetCount

// TODO -- FIXME: transition to using totals (ais/backend/common.go)
case stats.GetColdRwLatency:
if _, ok := metrics["aws."+stats.GetCount]; ok {
ncounter = "aws." + stats.GetCount
} else if _, ok := metrics["gcp."+stats.GetCount]; ok {
ncounter = "gcp." + stats.GetCount
} else if _, ok := metrics["azure."+stats.GetCount]; ok {
ncounter = "azure." + stats.GetCount
} else {
v.Value = 0
begin.Tracker[name] = v
continue
}
case stats.PutLatency, stats.PutRedirLatency:
ncounter = stats.PutCount
case stats.AppendLatency:
ncounter = stats.AppendCount
ncounter := stats.LatencyToCounter(name)
if ncounter == "" {
continue
}
if cntBegin, ok1 := begin.Tracker[ncounter]; ok1 {
if cntEnd, ok2 := end.Tracker[ncounter]; ok2 && cntEnd.Value > cntBegin.Value {
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/NVIDIA/aistore/cmd/cli
go 1.22.3

require (
github.com/NVIDIA/aistore v1.3.24-0.20240720153031-67251ecbcd25
github.com/NVIDIA/aistore v1.3.24-0.20240722230742-1c3f24573b25
github.com/fatih/color v1.17.0
github.com/json-iterator/go v1.1.12
github.com/onsi/ginkgo/v2 v2.19.0
Expand Down
4 changes: 2 additions & 2 deletions cmd/cli/go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
code.cloudfoundry.org/bytefmt v0.0.0-20190710193110-1eb035ffe2b6/go.mod h1:wN/zk7mhREp/oviagqUXY3EwuHhWyOvAdsn5Y4CzOrc=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/NVIDIA/aistore v1.3.24-0.20240720153031-67251ecbcd25 h1:Po2p67Dvq2q4kmbH/0Gv+5avQCilKXCpwo9NarVcSQ8=
github.com/NVIDIA/aistore v1.3.24-0.20240720153031-67251ecbcd25/go.mod h1:A4wCIW7GooZSzDxTxh4pS092Ve9gCiXh1EvtjlVB8ew=
github.com/NVIDIA/aistore v1.3.24-0.20240722230742-1c3f24573b25 h1:RYiPcXgY4/TzqMi+JfMqVwdfpqMVpjq92E6rodTWpvU=
github.com/NVIDIA/aistore v1.3.24-0.20240722230742-1c3f24573b25/go.mod h1:A4wCIW7GooZSzDxTxh4pS092Ve9gCiXh1EvtjlVB8ew=
github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8=
github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
Expand Down
20 changes: 12 additions & 8 deletions cmd/cli/teb/performance.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// Package teb contains templates and (templated) tables to format CLI output.
/*
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
*/
package teb

Expand Down Expand Up @@ -312,24 +312,28 @@ func _metricToPrintedColName(mname string, cols []*header, metrics, n2n cos.StrK
printedName = strings.ToUpper(parts[0])
}

// middle name (is every name in-between)
for j := 1; j < len(parts)-1; j++ {
// middle name
l := len(parts) - 1
if parts[l] == "total" { // latency; see related: `stats.LatencyToCounter`
l--
}
for j := 1; j < l; j++ {
printedName += "-" + strings.ToUpper(parts[j])
}

// suffix
switch {
case kind == stats.KindThroughput || kind == stats.KindComputedThroughput:
switch kind {
case stats.KindThroughput, stats.KindComputedThroughput:
printedName += "(bw)"
case kind == stats.KindLatency:
case stats.KindLatency, stats.KindTotal:
printedName += "(t)"
case kind == stats.KindSize:
case stats.KindSize:
if n2n != nil && _present(cols, metrics, mname, n2n) {
printedName += "(total/avg size)"
} else {
printedName += "(size)"
}
case kind == stats.KindCounter:
case stats.KindCounter:
printedName += "(n)"
}
return
Expand Down
2 changes: 1 addition & 1 deletion cmd/cli/teb/units.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ func FmtStatValue(name, kind string, value int64, units string) string {
return "0"
}
// uptime
if strings.HasSuffix(name, ".time") || kind == stats.KindLatency {
if strings.HasSuffix(name, ".time") || kind == stats.KindLatency || kind == stats.KindTotal {
return FmtDuration(value, units)
}
// units (enum)
Expand Down
36 changes: 35 additions & 1 deletion stats/api.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Package stats provides methods and functionality to register, track, log,
// and StatsD-notify statistics that, for the most part, include "counter" and "latency" kinds.
/*
* Copyright (c) 2018-2023, NVIDIA CORPORATION. All rights reserved.
* Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
*/
package stats

Expand Down Expand Up @@ -111,3 +111,37 @@ type (
func IsErrMetric(name string) bool {
return strings.HasPrefix(name, errPrefix) // e.g. name = ErrHTTPWriteCount
}

// compare with base.init() at ais/backend/common
func LatencyToCounter(latency string) string {
// 1. basics first
switch latency {
case GetLatency, GetRedirLatency:
return GetCount
case PutLatency, PutRedirLatency:
return PutCount
case ListLatency:
return ListCount
case AppendLatency:
return AppendCount
}
// 2. filter out
if !strings.Contains(latency, "get.") && !strings.Contains(latency, "put.") {
return ""
}
// backend first
if strings.HasSuffix(latency, ".ns.total") {
for prefix := range apc.Providers {
if prefix == apc.AIS {
prefix = apc.RemAIS
}
if strings.HasPrefix(latency, prefix) {
if strings.Contains(latency, ".get.") {
return prefix + "." + GetCount
}
return prefix + "." + PutCount
}
}
}
return ""
}

0 comments on commit 988051c

Please sign in to comment.