Skip to content

Commit

Permalink
Added end to end message latency calculations
Browse files Browse the repository at this point in the history
This commit adds new statistics about how long individual messages take
from being published to being finished.  We maintain several percentile
values, the exactly quantiles which can be specified as runtime
parameters to nsqd.  In addition, this information is displayed quite
shnazily in nsqadmin.
  • Loading branch information
mynameisfiber committed Oct 29, 2013
1 parent c314fde commit 0be672e
Show file tree
Hide file tree
Showing 19 changed files with 607 additions and 46 deletions.
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ install:
- go get github.com/bitly/go-hostpool
- go get github.com/mreiferson/go-snappystream
- go get github.com/bmizerany/assert
- go get github.com/bmizerany/perks/quantile
script:
- export PATH="$HOME/gopath/bin:$PATH"
- ./test.sh
Expand Down
5 changes: 5 additions & 0 deletions Godeps
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@
"ImportPath": "github.com/kr/text",
"Comment": "",
"Rev": "6807e777504f54ad073ecef66747de158294b639"
},
{
"ImportPath": "github.com/bmizerany/perks",
"Comment": "",
"Rev": "da72989a59aaaecda7110926d3a6198ee4421c1f"
}
]
}
51 changes: 33 additions & 18 deletions nsqadmin/graphs.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@ import (
)

type GraphTarget interface {
Target(key string) (string, string)
Target(key string) ([]string, string)
Host() string
}

type Node string

func (n Node) Target(key string) (string, string) {
func (n Node) Target(key string) ([]string, string) {
target := fmt.Sprintf("%%smem.%s", key)
if key == "gc_runs" {
target = fmt.Sprintf("movingAverage(%s,45)", target)
}
return target, "red,green,blue,purple"
return []string{target}, "red,green,blue,purple"
}

func (n Node) Host() string {
Expand All @@ -49,13 +49,13 @@ func TopicsFromStrings(s []string) Topics {
return t
}

func (t *Topic) Target(key string) (string, string) {
func (t *Topic) Target(key string) ([]string, string) {
color := "blue"
if key == "depth" || key == "deferred_count" {
color = "red"
}
target := fmt.Sprintf("sumSeries(%%stopic.%s.%s)", t.TopicName, key)
return target, color
return []string{target}, color
}

func (t *Topic) Host() string {
Expand Down Expand Up @@ -188,8 +188,6 @@ func (g *GraphOptions) Prefix(host string, metricType string) string {
}

func (g *GraphOptions) Sparkline(gr GraphTarget, key string) template.URL {
target, color := gr.Target(key)
target = fmt.Sprintf(target, g.Prefix(gr.Host(), metricType(key)))
params := url.Values{}
params.Set("height", "20")
params.Set("width", "120")
Expand All @@ -199,40 +197,55 @@ func (g *GraphOptions) Sparkline(gr GraphTarget, key string) template.URL {
params.Set("bgcolor", "ff000000") // transparent
params.Set("fgcolor", "black")
params.Set("margin", "0")
params.Set("colorList", color)
params.Set("yMin", "0")
params.Set("lineMode", "connected")
params.Set("drawNullAsZero", "false")

interval := fmt.Sprintf("%dsec", *statsdInterval/time.Second)
params.Set("target", fmt.Sprintf(`summarize(%s,"%s","avg")`, target, interval))
targets, color := gr.Target(key)
for _, target := range targets {
target = fmt.Sprintf(target, g.Prefix(gr.Host(), metricType(key)))
params.Add("target", fmt.Sprintf(`summarize(%s,"%s","avg")`, target, interval))
}
params.Add("colorList", color)

params.Set("from", g.GraphInterval.GraphFrom)
params.Set("until", g.GraphInterval.GraphUntil)
return template.URL(fmt.Sprintf("%s/render?%s", g.GraphiteUrl, params.Encode()))
}

func (g *GraphOptions) LargeGraph(gr GraphTarget, key string) template.URL {
target, color := gr.Target(key)
target = fmt.Sprintf(target, g.Prefix(gr.Host(), metricType(key)))
params := url.Values{}
params.Set("height", "450")
params.Set("width", "800")
params.Set("bgcolor", "ff000000") // transparent
params.Set("fgcolor", "999999")
params.Set("colorList", color)
params.Set("yMin", "0")
params.Set("lineMode", "connected")
params.Set("drawNullAsZero", "false")

interval := fmt.Sprintf("%dsec", *statsdInterval/time.Second)
target = fmt.Sprintf(`summarize(%s,"%s","avg")`, target, interval)
if metricType(key) == "counter" {
scale := fmt.Sprintf("%.04f", 1/float64(*statsdInterval/time.Second))
target = fmt.Sprintf(`scale(%s,%s)`, target, scale)
targets, color := gr.Target(key)
for _, target := range targets {
target = fmt.Sprintf(target, g.Prefix(gr.Host(), metricType(key)))
target = fmt.Sprintf(`summarize(%s,"%s","avg")`, target, interval)
if metricType(key) == "counter" {
scale := fmt.Sprintf("%.04f", 1/float64(*statsdInterval/time.Second))
target = fmt.Sprintf(`scale(%s,%s)`, target, scale)
}
log.Println("Adding target: ", target)
params.Add("target", target)
}
params.Set("target", target)
params.Add("colorList", color)

params.Set("from", g.GraphInterval.GraphFrom)
params.Set("until", g.GraphInterval.GraphUntil)
return template.URL(fmt.Sprintf("%s/render?%s", g.GraphiteUrl, params.Encode()))
}

func (g *GraphOptions) Rate(gr GraphTarget) string {
target, _ := gr.Target("message_count")
return fmt.Sprintf(target, g.Prefix(gr.Host(), metricType("message_count")))
return fmt.Sprintf(target[0], g.Prefix(gr.Host(), metricType("message_count")))
}

func metricType(key string) string {
Expand All @@ -248,6 +261,8 @@ func metricType(key string) string {
"gc_pause_*": "gauge",
"gc_runs": "counter",
"heap_objects": "gauge",

"e2e_processing_latency": "gauge",
}[key]
}

Expand Down
15 changes: 13 additions & 2 deletions nsqadmin/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ func loadTemplates() {
var err error
t := template.New("nsqadmin").Funcs(template.FuncMap{
"commafy": util.Commafy,
"nanotohuman": util.NanoSecondToHuman,
"floatToPercent": util.FloatToPercent,
"percSuffix": util.PercSuffix,
"getNodeConsistencyClass": getNodeConsistencyClass,
})
templates, err = t.ParseGlob(fmt.Sprintf("%s/*.html", *templateDir))
Expand Down Expand Up @@ -169,6 +172,8 @@ func topicHandler(w http.ResponseWriter, req *http.Request) {
globalTopicStats.Add(t)
}

hasE2eLatency := len(globalTopicStats.E2eProcessingLatency.Percentiles) > 0

p := struct {
Title string
GraphOptions *GraphOptions
Expand All @@ -178,6 +183,7 @@ func topicHandler(w http.ResponseWriter, req *http.Request) {
TopicStats []*lookupd.TopicStats
GlobalTopicStats *lookupd.TopicStats
ChannelStats map[string]*lookupd.ChannelStats
HasE2eLatency bool
}{
Title: fmt.Sprintf("NSQ %s", topicName),
GraphOptions: NewGraphOptions(w, req, reqParams),
Expand All @@ -187,6 +193,7 @@ func topicHandler(w http.ResponseWriter, req *http.Request) {
TopicStats: topicStats,
GlobalTopicStats: globalTopicStats,
ChannelStats: channelStats,
HasE2eLatency: hasE2eLatency,
}
err = templates.ExecuteTemplate(w, "topic.html", p)
if err != nil {
Expand All @@ -207,6 +214,8 @@ func channelHandler(w http.ResponseWriter, req *http.Request, topicName string,
_, allChannelStats, _ := lookupd.GetNSQDStats(producers, topicName)
channelStats := allChannelStats[channelName]

hasE2eLatency := len(channelStats.E2eProcessingLatency.Percentiles) > 0

p := struct {
Title string
GraphOptions *GraphOptions
Expand All @@ -215,6 +224,7 @@ func channelHandler(w http.ResponseWriter, req *http.Request, topicName string,
Channel string
TopicProducers []string
ChannelStats *lookupd.ChannelStats
HasE2eLatency bool
}{
Title: fmt.Sprintf("NSQ %s / %s", topicName, channelName),
GraphOptions: NewGraphOptions(w, req, reqParams),
Expand All @@ -223,6 +233,7 @@ func channelHandler(w http.ResponseWriter, req *http.Request, topicName string,
Channel: channelName,
TopicProducers: producers,
ChannelStats: channelStats,
HasE2eLatency: hasE2eLatency,
}

err = templates.ExecuteTemplate(w, "channel.html", p)
Expand Down Expand Up @@ -686,8 +697,8 @@ func nodesHandler(w http.ResponseWriter, req *http.Request) {

type counterTarget struct{}

func (c counterTarget) Target(key string) (string, string) {
return fmt.Sprintf("sumSeries(%%stopic.*.channel.*.%s)", key), "green"
func (c counterTarget) Target(key string) ([]string, string) {
return []string{fmt.Sprintf("sumSeries(%%stopic.*.channel.*.%s)", key)}, "green"
}

func (c counterTarget) Host() string {
Expand Down
39 changes: 33 additions & 6 deletions nsqadmin/templates/channel.html
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{{template "header.html" .}}
{{$g := .GraphOptions}}
{{$firstHost := index .ChannelStats.HostStats 0}}

<ul class="breadcrumb">
<li><a href="/">Streams</a> <span class="divider">/</span></li>
Expand Down Expand Up @@ -65,10 +66,9 @@ <h4>Channel</h4>
<tr>
<th>&nbsp;</th>
<th colspan="4" class='text-center'>Message Queues</th>
{{if $g.Enabled}}
<th colspan="4" class='text-center'>Statistics</th>
{{else}}
<th colspan="5" class='text-center'>Statistics</th>
<th colspan="{{if $g.Enabled}}5{{else}}4{{end}}" class='text-center'>Statistics</th>
{{if $firstHost.E2eProcessingLatency.Percentiles}}
<th colspan="{{len $firstHost.E2eProcessingLatency.Percentiles}}">E2E Processing Latency</th>
{{end}}
</tr>
<tr>
Expand All @@ -82,6 +82,9 @@ <h4>Channel</h4>
<th>Messages</th>
{{if $g.Enabled}}<th>Rate</th>{{end}}
<th>Connections</th>
{{range $e2e := $firstHost.E2eProcessingLatency.Percentiles}}
<th>{{$e2e.quantile | floatToPercent}}<sup>{{$e2e.quantile | percSuffix}}</sup></th>
{{end}}
</tr>
</thead>
<tbody>
Expand All @@ -98,9 +101,16 @@ <h4>Channel</h4>
<td>{{$c.MessageCount | commafy}}</td>
{{if $g.Enabled}}<td class="bold rate" target="{{$g.Rate $c}}"></td> {{end}}
<td>{{$c.ClientCount}}</td>
{{if $c.E2eProcessingLatency.Percentiles}}
{{range $e2e := $c.E2eProcessingLatency.Percentiles}}
<td>
<span title="{{$e2e.quantile | floatToPercent}}: min = {{$e2e.min | nanotohuman}}, max = {{$e2e.max | nanotohuman}}">{{$e2e.average | nanotohuman}}</span>
</td>
{{end}}
{{end}}
</tr>
{{if $g.Enabled}}
<tr>
<tr class="graph-row">
<td></td>
<td><a href="{{$g.LargeGraph $c "depth"}}"><img width="120" height="20" src="{{$g.Sparkline $c "depth"}}"></a></td>
<td></td>
Expand All @@ -111,6 +121,11 @@ <h4>Channel</h4>
<td><a href="{{$g.LargeGraph $c "message_count"}}"><img width="120" height="20" src="{{$g.Sparkline $c "message_count"}}"></a></td>
<td></td>
<td><a href="{{$g.LargeGraph $c "clients"}}"><img width="120" height="20" src="{{$g.Sparkline $c "clients"}}"></a></td>
{{if $c.E2eProcessingLatency.Percentiles}}
<td colspan="{{len $c.E2eProcessingLatency.Percentiles}}">
<a href="{{$g.LargeGraph $c.E2eProcessingLatency "e2e_processing_latency"}}"><img width="120" height="20" src="{{$g.Sparkline $c.E2eProcessingLatency "e2e_processing_latency"}}"></a>
</td>
{{end}}
</tr>
{{end}}

Expand All @@ -127,9 +142,16 @@ <h4>Channel</h4>
<td>{{$c.MessageCount | commafy}}</td>
{{if $g.Enabled}}<td class="bold rate" target="{{$g.Rate $c}}"></td> {{end}}
<td>{{$c.ClientCount}}</td>
{{if $c.E2eProcessingLatency.Percentiles}}
{{range $e2e := $c.E2eProcessingLatency.Percentiles}}
<td>
<span title="{{$e2e.quantile | floatToPercent}}: min = {{$e2e.min | nanotohuman}}, max = {{$e2e.max | nanotohuman}}">{{$e2e.average | nanotohuman}}</span>
</td>
{{end}}
{{end}}
</tr>
{{if $g.Enabled}}
<tr class="info">
<tr class="info graph-row">
<td></td>
<td><a href="{{$g.LargeGraph $c "depth"}}"><img width="120" height="20" src="{{$g.Sparkline $c "depth"}}"></a></td>
<td></td>
Expand All @@ -140,6 +162,11 @@ <h4>Channel</h4>
<td><a href="{{$g.LargeGraph $c "message_count"}}"><img width="120" height="20" src="{{$g.Sparkline $c "message_count"}}"></a></td>
<td></td>
<td><a href="{{$g.LargeGraph $c "clients"}}"><img width="120" height="20" src="{{$g.Sparkline $c "clients"}}"></a></td>
{{if $c.E2eProcessingLatency.Percentiles}}
<td colspan="{{len $c.E2eProcessingLatency.Percentiles}}">
<a href="{{$g.LargeGraph $c.E2eProcessingLatency "e2e_processing_latency"}}"><img width="120" height="20" src="{{$g.Sparkline $c.E2eProcessingLatency "e2e_processing_latency"}}"></a>
</td>
{{end}}
</tr>
{{end}}
</tbody>
Expand Down
44 changes: 44 additions & 0 deletions nsqadmin/templates/header.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
<title>{{.Title}}</title>
<!-- from http://www.bootstrapcdn.com/ -->
<script src="http://code.jquery.com/jquery-1.10.1.min.js"></script>
<link rel="stylesheet" href="//netdna.bootstrapcdn.com/twitter-bootstrap/2.2.1/css/bootstrap-combined.min.css" type="text/css" charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<style type="text/css">
Expand All @@ -16,9 +17,52 @@
.bold {
font-weight: bold;
}
.graph-row td {
text-align: center;
}
.image-preview {
display: none;
position: absolute;
z-index: 100;
height: 240px;
width: 480px;
}
</style>
</head>
<body>

<script>
$(document).ready(function () {
// Get all the thumbnail
$('td a[href^="/render"] img').mouseenter(function(e) {

// Calculate the position of the image tooltip
x = e.pageX - 25;
y = e.pageY + 25;
x = Math.min(x, $(window).width() - 510)
if (y + 240 > $(window).height()) {
y = e.pageY - 265;
}

// Set the z-index of the current item,
// make sure it's greater than the rest of thumbnail items
// Set the position and display the image tooltip
var tooltip = $('.image-preview');

tooltip.attr("src", $(this).parent().attr("href"));
tooltip.stop().css({'top': y,'left': x,'display':'block','opacity':1});

}).mouseleave(function() {

// Reset the z-index and hide the image tooltip
var tooltip = $('.image-preview');
tooltip.animate({"opacity": "hide"}, "fast");
});

});
</script>

<img class="image-preview img-polaroid">

<div class="navbar navbar-inverse">
<div class="navbar-inner">
Expand Down
Loading

0 comments on commit 0be672e

Please sign in to comment.