Skip to content

Commit

Permalink
pkg/receive: forward metrics
Browse files Browse the repository at this point in the history
This commit enables metrics forwarding from one receive node to another.
The receive nodes construct hashrings from the given sd-files and
use these hashrings to select a node to which toforward a given time
series. Time series are batched together to ensure that for any incoming
write-request to a node, at most one outgoing write-request will be made
every other node in the hashring.
  • Loading branch information
squat committed Jun 11, 2019
1 parent a51ebdb commit 1215c5f
Show file tree
Hide file tree
Showing 4 changed files with 415 additions and 16 deletions.
69 changes: 69 additions & 0 deletions cmd/thanos/receive.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"context"
"fmt"
"net"
"os"
"strings"
"sync"
"time"

Expand All @@ -22,6 +24,8 @@ import (
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/discovery/file"
"github.com/prometheus/prometheus/discovery/targetgroup"
"github.com/prometheus/prometheus/storage/tsdb"
"github.com/prometheus/tsdb/labels"
"google.golang.org/grpc"
Expand All @@ -46,12 +50,41 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application, name stri

retention := modelDuration(cmd.Flag("tsdb.retention", "How long to retain raw samples on local storage. 0d - disables this retention").Default("15d"))

fileSDFiles := cmd.Flag("receive.sd-files", "Path to file that contain addresses of receive peers. The path can be a glob pattern (repeatable).").
PlaceHolder("<path>").Strings()

fileSDInterval := modelDuration(cmd.Flag("receive.sd-interval", "Refresh interval to re-read file SD files. (used as a fallback)").
Default("5m"))

local := cmd.Flag("receive.local-endpoint", "Endpoint of local receive node. Used to identify the local node in the hashring configuration.").String()

m[name] = func(g *run.Group, logger log.Logger, reg *prometheus.Registry, tracer opentracing.Tracer, _ bool) error {
lset, err := parseFlagLabels(*labelStrs)
if err != nil {
return errors.Wrap(err, "parse labels")
}

var fileSD *file.Discovery
if len(*fileSDFiles) > 0 {
conf := &file.SDConfig{
Files: *fileSDFiles,
RefreshInterval: *fileSDInterval,
}
fileSD = file.NewDiscovery(conf, logger)
}

// Local is empty, so try to generate a local endpoint
// based on the hostname and the listening port.
if *local == "" {
hostname, err := os.Hostname()
if hostname == "" || err != nil {
return errors.New("--receive.local-endpoint is empty and host could not be determined.")
}
parts := strings.Split(*remoteWriteAddress, ":")
port := parts[len(parts)-1]
*local = fmt.Sprintf("http://%s:%s/api/v1/receive", hostname, port)
}

return runReceive(
g,
logger,
Expand All @@ -67,6 +100,8 @@ func registerReceive(m map[string]setupFunc, app *kingpin.Application, name stri
objStoreConfig,
lset,
*retention,
fileSD,
*local,
)
}
}
Expand All @@ -86,6 +121,8 @@ func runReceive(
objStoreConfig *pathOrContent,
lset labels.Labels,
retention model.Duration,
fileSD *file.Discovery,
endpoint string,
) error {
logger = log.With(logger, "component", "receive")
level.Warn(logger).Log("msg", "setting up receive; the Thanos receive component is EXPERIMENTAL, it may break significantly without notice")
Expand All @@ -97,13 +134,45 @@ func runReceive(
MaxBlockDuration: model.Duration(time.Hour * 2),
}

var hashring receive.Hashring
// Run File Service Discovery and check for updates.
if fileSD != nil {
ctxRun, cancelRun := context.WithCancel(context.Background())
fileSDUpdates := make(chan []*targetgroup.Group)
go fileSD.Run(ctxRun, fileSDUpdates)

// We need to wait for initial discovery to create the hashring.
groups := <-fileSDUpdates
hashring = receive.NewHashring(receive.ExactMatcher, groups)
g.Add(func() error {
for {
select {
// If the SD changes, we need to shutdown.
case update := <-fileSDUpdates:
if !receive.HashringsAreEquivalent(groups, update) {
level.Info(logger).Log("msg", "file service discovery changed")
return nil
}
case <-ctxRun.Done():
return nil
}
}
}, func(error) {
cancelRun()
})
} else {
hashring = receive.SingleNodeHashring(endpoint)
}

localStorage := &tsdb.ReadyStorage{}
receiver := receive.NewWriter(log.With(logger, "component", "receive-writer"), localStorage)
webHandler := receive.NewHandler(log.With(logger, "component", "receive-handler"), &receive.Options{
Receiver: receiver,
ListenAddress: remoteWriteAddress,
Registry: reg,
ReadyStorage: localStorage,
Hashring: hashring,
Endpoint: endpoint,
})

// Start all components while we wait for TSDB to open but only load
Expand Down
91 changes: 88 additions & 3 deletions pkg/receive/handler.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package receive

import (
"bytes"
"context"
"fmt"
"io/ioutil"
stdlog "log"
Expand All @@ -17,12 +19,17 @@ import (
conntrack "github.com/mwitkow/go-conntrack"
"github.com/opentracing-contrib/go-stdlib/nethttp"
opentracing "github.com/opentracing/opentracing-go"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/common/route"
promtsdb "github.com/prometheus/prometheus/storage/tsdb"
)

const (
tenantHeader = "THANOS_TENANT"
)

var (
requestDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Expand All @@ -48,6 +55,8 @@ type Options struct {
ListenAddress string
Registry prometheus.Registerer
ReadyStorage *promtsdb.ReadyStorage
Hashring Hashring
Endpoint string
}

// Handler serves a Prometheus remote write receiving HTTP endpoint.
Expand All @@ -56,6 +65,7 @@ type Handler struct {
logger log.Logger
receiver *Writer
router *route.Router
hashring Hashring
options *Options
listener net.Listener

Expand Down Expand Up @@ -83,6 +93,7 @@ func NewHandler(logger log.Logger, o *Options) *Handler {
router: router,
readyStorage: o.ReadyStorage,
receiver: o.Receiver,
hashring: o.Hashring,
options: o,
}

Expand Down Expand Up @@ -160,8 +171,8 @@ func (h *Handler) Run() error {
return httpSrv.Serve(h.listener)
}

func (h *Handler) receive(w http.ResponseWriter, req *http.Request) {
compressed, err := ioutil.ReadAll(req.Body)
func (h *Handler) receive(w http.ResponseWriter, r *http.Request) {
compressed, err := ioutil.ReadAll(r.Body)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
Expand All @@ -180,8 +191,82 @@ func (h *Handler) receive(w http.ResponseWriter, req *http.Request) {
return
}

if err := h.receiver.Receive(&wreq); err != nil {
tenant := r.Header.Get(tenantHeader)
local, err := h.forward(r.Context(), tenant, &wreq)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
// There may be no WriteRequest destined for the local node.
if local != nil {
if err := h.receiver.Receive(local); err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
}
}

// forward accepts a write request, batches its time series by
// corresponding endpoint, and forwards them in parallel. It returns a write
// request containing only the time series that correspond to
// local handler.
func (h *Handler) forward(ctx context.Context, tenant string, wreq *prompb.WriteRequest) (*prompb.WriteRequest, error) {
wreqs := make(map[string]*prompb.WriteRequest)
for i := range wreq.Timeseries {
endpoint, err := h.hashring.Get(tenant, &wreq.Timeseries[i])
if err != nil {
return nil, err
}
if _, ok := wreqs[endpoint]; !ok {
wreqs[endpoint] = &prompb.WriteRequest{}
}
wr := wreqs[endpoint]
wr.Timeseries = append(wr.Timeseries, wreq.Timeseries[i])
}

ec := make(chan error)
defer close(ec)
var n int
var local *prompb.WriteRequest
for endpoint := range wreqs {
if endpoint == h.options.Endpoint {
local = wreqs[endpoint]
continue
}
n++
go func(endpoint string) {
buf, err := proto.Marshal(wreqs[endpoint])
if err != nil {
level.Error(h.logger).Log("msg", "proto marshal error", "err", err, "endpoint", endpoint)
ec <- err
return
}
req, err := http.NewRequest("POST", endpoint, bytes.NewBuffer(snappy.Encode(nil, buf)))
if err != nil {
level.Error(h.logger).Log("msg", "create request error", "err", err, "endpoint", endpoint)
ec <- err
return
}
req.Header.Add(tenantHeader, tenant)
if _, err := http.DefaultClient.Do(req.WithContext(ctx)); err != nil {
level.Error(h.logger).Log("msg", "forward request error", "err", err, "endpoint", endpoint)
ec <- err
return
}
ec <- nil
}(endpoint)
}

var errs error
for ; n > 0; n-- {
if err := <-ec; err != nil {
if errs == nil {
errs = err
continue
}
errs = errors.Wrap(errs, err.Error())
}
}

return local, errs
}
62 changes: 60 additions & 2 deletions pkg/receive/hashring.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ import (

const sep = '\xff'

// tenantLabel is used to group discovered targets by tenant.
const tenantLabel = "tenant"

// Hashring finds the correct node to handle a given time series
// for a specified tenant.
// It returns the node and any error encountered.
type Hashring interface {
Expand Down Expand Up @@ -65,7 +69,15 @@ func hash(tenant string, ts *prompb.TimeSeries) uint64 {
return xxhash.Sum64(b)
}

// simpleHashring represents a group of hosts handling write requests.
// SingleNodeHashring always returns the same node.
type SingleNodeHashring string

// Get implements the Hashring interface.
func (s SingleNodeHashring) Get(_ string, _ *prompb.TimeSeries) (string, error) {
return string(s), nil
}

// simpleHashring represents a group of nodes handling write requests.
type simpleHashring struct {
targetgroup.Group
}
Expand Down Expand Up @@ -107,7 +119,53 @@ func NewHashring(matcher Matcher, groups []*targetgroup.Group) Hashring {
matcher: matcher,
}
for _, g := range groups {
m.hashrings[g.Source] = &simpleHashring{*g}
l, ok := g.Labels[tenantLabel]
if !ok {
l = ""
}
t := string(l)
if _, ok := m.hashrings[t]; !ok {
m.hashrings[t] = &simpleHashring{}
}
h := m.hashrings[t].(*simpleHashring)
h.Targets = append(h.Targets, g.Targets...)
}
return m
}

// HashringsAreEquivalent indicates whether the hashrings created from
// two sets of groups would be equivalent.
func HashringsAreEquivalent(a, b []*targetgroup.Group) bool {
buildGroups := func(groups []*targetgroup.Group) map[string]map[string]struct{} {
m := map[string]map[string]struct{}{}
for _, g := range groups {
l, ok := g.Labels[tenantLabel]
if !ok {
l = ""
}
t := string(l)
if _, ok := m[t]; !ok {
m[t] = map[string]struct{}{}
}
for i := range g.Targets {
m[t][string(g.Targets[i][model.AddressLabel])] = struct{}{}
}
}
return m
}
ga, gb := buildGroups(a), buildGroups(b)
compare := func(a, b map[string]map[string]struct{}) bool {
for t := range a {
if _, ok := b[t]; !ok {
return false
}
for h := range a[t] {
if _, ok := b[t][h]; !ok {
return false
}
}
}
return true
}
return compare(ga, gb) && compare(gb, ga)
}
Loading

0 comments on commit 1215c5f

Please sign in to comment.