From 7e355ee39b449c10fbf9b15831a5b1edd8b642d6 Mon Sep 17 00:00:00 2001 From: Benjamin Fernandes Date: Tue, 12 Dec 2017 13:57:40 +0100 Subject: [PATCH 1/9] Redirect traces to new API endpoint --- agent/agent.go | 94 ++-- agent/sampler.go | 105 ++-- info/endpoint.go | 3 - info/info.go | 11 +- info/stats.go | 20 + model/payload.go | 6 + model/span.pb.go | 2 +- model/trace.go | 21 + model/trace.pb.go | 404 +++++++++++++++ model/trace.proto | 12 + model/trace_payload.pb.go | 458 ++++++++++++++++++ model/trace_payload.proto | 13 + writer/client.go | 33 ++ writer/datadog_endpoint.go | 68 +++ writer/endpoint.go | 250 +--------- writer/legacy_endpoint.go | 249 ++++++++++ ...dpoint_test.go => legacy_endpoint_test.go} | 0 writer/{writer.go => legacy_writer.go} | 2 +- writer/trace_writer.go | 144 ++++++ 19 files changed, 1551 insertions(+), 344 deletions(-) create mode 100644 model/trace.pb.go create mode 100644 model/trace.proto create mode 100644 model/trace_payload.pb.go create mode 100644 model/trace_payload.proto create mode 100644 writer/client.go create mode 100644 writer/datadog_endpoint.go create mode 100644 writer/legacy_endpoint.go rename writer/{endpoint_test.go => legacy_endpoint_test.go} (100%) rename writer/{writer.go => legacy_writer.go} (99%) create mode 100644 writer/trace_writer.go diff --git a/agent/agent.go b/agent/agent.go index e89042ef5..6b960730e 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -1,7 +1,6 @@ package main import ( - "sync" "sync/atomic" "time" @@ -40,12 +39,13 @@ func (pt *processedTrace) weight() float64 { // Agent struct holds all the sub-routines structs and make the data flow between them type Agent struct { - Receiver *HTTPReceiver - Concentrator *Concentrator - Filters []filters.Filter - ScoreEngine *Sampler - PriorityEngine *Sampler - Writer *writer.Writer + Receiver *HTTPReceiver + Concentrator *Concentrator + Filters []filters.Filter + ScoreSampler *Sampler + PrioritySampler *Sampler + Writer *writer.Writer + TraceWriter *writer.TraceWriter // config conf *config.AgentConfig @@ -66,27 +66,37 @@ func NewAgent(conf *config.AgentConfig, exit chan struct{}) *Agent { conf.BucketInterval.Nanoseconds(), ) f := filters.Setup(conf) - ss := NewScoreEngine(conf) + + sampledTraceChan := make(chan *model.Trace) + ss := NewScoreSampler(conf) + ss.sampled = sampledTraceChan var ps *Sampler if conf.PrioritySampling { // Use priority sampling for distributed tracing only if conf says so - ps = NewPriorityEngine(conf, dynConf) + // TODO: remove the option once confortable ; as it is true by default. + ps = NewPrioritySampler(conf, dynConf) + ps.sampled = sampledTraceChan } - + // legacy writer, will progressively get replaced by per-endpoint writers w := writer.NewWriter(conf) w.InServices = r.services + // new set of writers + tw := writer.NewTraceWriter(conf) + tw.InTraces = sampledTraceChan + return &Agent{ - Receiver: r, - Concentrator: c, - Filters: f, - ScoreEngine: ss, - PriorityEngine: ps, - Writer: w, - conf: conf, - dynConf: dynConf, - exit: exit, - die: die, + Receiver: r, + Concentrator: c, + Filters: f, + ScoreSampler: ss, + PrioritySampler: ps, + Writer: w, + TraceWriter: tw, + conf: conf, + dynConf: dynConf, + exit: exit, + die: die, } } @@ -104,11 +114,14 @@ func (a *Agent) Run() { // update the data served by expvar so that we don't expose a 0 sample rate info.UpdatePreSampler(*a.Receiver.preSampler.Stats()) + // TODO: unify components APIs. Use Start/Stop as non-blocking ways of controlling the blocking Run loop. + // Like we do with TraceWriter. a.Receiver.Run() a.Writer.Run() - a.ScoreEngine.Run() - if a.PriorityEngine != nil { - a.PriorityEngine.Run() + a.TraceWriter.Start() + a.ScoreSampler.Run() + if a.PrioritySampler != nil { + a.PrioritySampler.Run() } for { @@ -120,26 +133,8 @@ func (a *Agent) Run() { HostName: a.conf.HostName, Env: a.conf.DefaultEnv, } - var wg sync.WaitGroup - wg.Add(2) - go func() { - defer watchdog.LogOnPanic() - p.Stats = a.Concentrator.Flush() - wg.Done() - }() - go func() { - defer watchdog.LogOnPanic() - // Serializing both flushes, classic agent sampler and distributed sampler, - // in most cases only one will be used, so in mainstream case there should - // be no performance issue, only in transitionnal mode can both contain data. - p.Traces = a.ScoreEngine.Flush() - if a.PriorityEngine != nil { - p.Traces = append(p.Traces, a.PriorityEngine.Flush()...) - } - wg.Done() - }() - - wg.Wait() + p.Stats = a.Concentrator.Flush() + p.SetExtra(languageHeaderKey, a.Receiver.Languages()) a.Writer.InPayloads <- p @@ -149,9 +144,10 @@ func (a *Agent) Run() { log.Info("exiting") close(a.Receiver.exit) a.Writer.Stop() - a.ScoreEngine.Stop() - if a.PriorityEngine != nil { - a.PriorityEngine.Stop() + a.TraceWriter.Stop() + a.ScoreSampler.Stop() + if a.PrioritySampler != nil { + a.PrioritySampler.Stop() } return } @@ -177,11 +173,11 @@ func (a *Agent) Process(t model.Trace) { // We choose the sampler dynamically, depending on trace content, // it has a sampling priority info (wether 0 or 1 or more) we respect // this by using priority sampler. Else, use default score sampler. - s := a.ScoreEngine + s := a.ScoreSampler priorityPtr := &ts.TracesPriorityNone - if a.PriorityEngine != nil { + if a.PrioritySampler != nil { if priority, ok := root.Metrics[samplingPriorityKey]; ok { - s = a.PriorityEngine + s = a.PrioritySampler if priority == 0 { priorityPtr = &ts.TracesPriority0 diff --git a/agent/sampler.go b/agent/sampler.go index e3b841ef2..47ee472b4 100644 --- a/agent/sampler.go +++ b/agent/sampler.go @@ -3,7 +3,6 @@ package main import ( "fmt" "reflect" - "sync" "time" log "github.com/cihub/seelog" @@ -17,29 +16,28 @@ import ( // Sampler chooses wich spans to write to the API type Sampler struct { - mu sync.Mutex - sampledTraces []model.Trace - traceCount int - lastFlush time.Time + sampled chan *model.Trace + // For stats + keptTraceCount int + totalTraceCount int + lastFlush time.Time + + // actual implementation of the sampling logic engine sampler.Engine } -// NewScoreEngine creates a new empty sampler ready to be started -func NewScoreEngine(conf *config.AgentConfig) *Sampler { +// NewScoreSampler creates a new empty sampler ready to be started +func NewScoreSampler(conf *config.AgentConfig) *Sampler { return &Sampler{ - sampledTraces: []model.Trace{}, - traceCount: 0, - engine: sampler.NewScoreEngine(conf.ExtraSampleRate, conf.MaxTPS), + engine: sampler.NewScoreEngine(conf.ExtraSampleRate, conf.MaxTPS), } } -// NewPriorityEngine creates a new empty distributed sampler ready to be started -func NewPriorityEngine(conf *config.AgentConfig, dynConf *config.DynamicConfig) *Sampler { +// NewPrioritySampler creates a new empty distributed sampler ready to be started +func NewPrioritySampler(conf *config.AgentConfig, dynConf *config.DynamicConfig) *Sampler { return &Sampler{ - sampledTraces: []model.Trace{}, - traceCount: 0, - engine: sampler.NewPriorityEngine(conf.ExtraSampleRate, conf.MaxTPS, &dynConf.RateByService), + engine: sampler.NewPriorityEngine(conf.ExtraSampleRate, conf.MaxTPS, &dynConf.RateByService), } } @@ -49,62 +47,65 @@ func (s *Sampler) Run() { defer watchdog.LogOnPanic() s.engine.Run() }() + + go func() { + defer watchdog.LogOnPanic() + s.logStats() + }() } // Add samples a trace then keep it until the next flush func (s *Sampler) Add(t processedTrace) { - s.mu.Lock() - s.traceCount++ + s.totalTraceCount++ if s.engine.Sample(t.Trace, t.Root, t.Env) { - s.sampledTraces = append(s.sampledTraces, t.Trace) + s.keptTraceCount++ + s.sampled <- &t.Trace } - s.mu.Unlock() } // Stop stops the sampler func (s *Sampler) Stop() { s.engine.Stop() -} -// Flush returns representative spans based on GetSamples and reset its internal memory -func (s *Sampler) Flush() []model.Trace { - s.mu.Lock() - - traces := s.sampledTraces - s.sampledTraces = []model.Trace{} - traceCount := s.traceCount - s.traceCount = 0 +} - now := time.Now() - duration := now.Sub(s.lastFlush) - s.lastFlush = now +// logStats reports statistics and update the info exposed. +func (s *Sampler) logStats() { - s.mu.Unlock() + for now := range time.Tick(10 * time.Second) { + keptTraceCount := s.keptTraceCount + totalTraceCount := s.totalTraceCount + s.keptTraceCount = 0 + s.totalTraceCount = 0 - state := s.engine.GetState() + duration := now.Sub(s.lastFlush) + s.lastFlush = now - switch state := state.(type) { - case sampler.InternalState: + // TODO: do we still want that? figure out how it conflicts with what the `state` exposes / what is public metrics. var stats info.SamplerStats if duration > 0 { - stats.KeptTPS = float64(len(traces)) / duration.Seconds() - stats.TotalTPS = float64(traceCount) / duration.Seconds() + stats.KeptTPS = float64(keptTraceCount) / duration.Seconds() + stats.TotalTPS = float64(totalTraceCount) / duration.Seconds() } - - log.Debugf("flushed %d sampled traces out of %d", len(traces), traceCount) - log.Debugf("inTPS: %f, outTPS: %f, maxTPS: %f, offset: %f, slope: %f, cardinality: %d", - state.InTPS, state.OutTPS, state.MaxTPS, state.Offset, state.Slope, state.Cardinality) - - // publish through expvar - switch s.engine.(type) { - case *sampler.ScoreEngine: - info.UpdateSamplerInfo(info.SamplerInfo{EngineType: fmt.Sprint(reflect.TypeOf(s.engine)), Stats: stats, State: state}) - case *sampler.PriorityEngine: - info.UpdatePrioritySamplerInfo(info.SamplerInfo{EngineType: fmt.Sprint(reflect.TypeOf(s.engine)), Stats: stats, State: state}) + engineType := fmt.Sprint(reflect.TypeOf(s.engine)) + log.Debugf("%s: flushed %d sampled traces out of %d", engineType, keptTraceCount, totalTraceCount) + + state := s.engine.GetState() + + switch state := state.(type) { + case sampler.InternalState: + log.Debugf("%s: inTPS: %f, outTPS: %f, maxTPS: %f, offset: %f, slope: %f, cardinality: %d", + engineType, state.InTPS, state.OutTPS, state.MaxTPS, state.Offset, state.Slope, state.Cardinality) + + // publish through expvar + switch s.engine.(type) { + case *sampler.ScoreEngine: + info.UpdateSamplerInfo(info.SamplerInfo{EngineType: engineType, Stats: stats, State: state}) + case *sampler.PriorityEngine: + info.UpdatePrioritySamplerInfo(info.SamplerInfo{EngineType: engineType, Stats: stats, State: state}) + } + default: + log.Debugf("unhandled sampler engine, can't log state") } - default: - log.Debugf("unhandled sampler engine, can't log state") } - - return traces } diff --git a/info/endpoint.go b/info/endpoint.go index 2cf9e97c3..1bb4f7317 100644 --- a/info/endpoint.go +++ b/info/endpoint.go @@ -12,9 +12,6 @@ type EndpointStats struct { // If several URLs are given, it does not change the size (shared for all). // This is the raw data, encoded, compressed. TracesBytes int64 - // TracesCount is the number of traces in the traces payload data sent, including errors. - // If several URLs are given, it does not change the size (shared for all). - TracesCount int64 // TracesStats is the number of stats in the traces payload data sent, including errors. // If several URLs are given, it does not change the size (shared for all). TracesStats int64 diff --git a/info/info.go b/info/info.go index b5bc9e693..4b56e582f 100644 --- a/info/info.go +++ b/info/info.go @@ -20,7 +20,8 @@ import ( var ( infoMu sync.RWMutex - receiverStats []TagStats // only for the last minute + receiverStats []TagStats // only for the last minute + languages []string endpointStats EndpointStats // only for the last minute watchdogInfo watchdog.Info samplerInfo SamplerInfo @@ -106,6 +107,14 @@ func UpdateReceiverStats(rs *ReceiverStats) { } receiverStats = s + languages = rs.Languages() +} + +func Languages() []string { + infoMu.Lock() + defer infoMu.Unlock() + + return languages } func publishReceiverStats() interface{} { diff --git a/info/stats.go b/info/stats.go index 54ed9c083..28fe03028 100644 --- a/info/stats.go +++ b/info/stats.go @@ -2,6 +2,7 @@ package info import ( "fmt" + "sort" "sync" "sync/atomic" @@ -51,6 +52,25 @@ func (rs *ReceiverStats) Publish() { rs.RUnlock() } +// Languages returns the set of languages reporting traces to the Agent. +func (rs *ReceiverStats) Languages() []string { + langSet := make(map[string]bool) + langs := []string{} + + rs.RLock() + for tags := range rs.Stats { + if _, ok := langSet[tags.Lang]; !ok { + langs = append(langs, tags.Lang) + langSet[tags.Lang] = true + } + } + rs.RUnlock() + + sort.Strings(langs) + + return langs +} + // Reset resets the ReceiverStats internal data func (rs *ReceiverStats) Reset() { rs.Lock() diff --git a/model/payload.go b/model/payload.go index f026998a8..f86a79fbe 100644 --- a/model/payload.go +++ b/model/payload.go @@ -105,3 +105,9 @@ func SetAgentPayloadHeaders(h http.Header, extras map[string]string) { default: } } + +func SetExtraHeaders(h http.Header, extras map[string]string) { + for key, value := range extras { + h.Set(key, value) + } +} diff --git a/model/span.pb.go b/model/span.pb.go index b8dc45a29..f7ac95de4 100644 --- a/model/span.pb.go +++ b/model/span.pb.go @@ -12,7 +12,7 @@ It has these top-level messages: Span - Trace + APITrace TracePayload */ package model diff --git a/model/trace.go b/model/trace.go index 986253360..44071c03d 100644 --- a/model/trace.go +++ b/model/trace.go @@ -99,3 +99,24 @@ func (t Trace) ChildrenMap() map[uint64][]*Span { func NewTraceFlushMarker() Trace { return []*Span{NewFlushMarker()} } + +// APITrace returns an APITrace from the trace, as required by the Datadog API. +func (t Trace) APITrace() *APITrace { + start := t[0].Start + end := t[0].End() + for i := range t { + if t[i].Start < start { + start = t[i].Start + } + if t[i].End() < end { + end = t[i].End() + } + } + + return &APITrace{ + TraceID: t[0].TraceID, + Spans: t, + StartTime: start, + EndTime: end, + } +} diff --git a/model/trace.pb.go b/model/trace.pb.go new file mode 100644 index 000000000..d603beeee --- /dev/null +++ b/model/trace.pb.go @@ -0,0 +1,404 @@ +// Code generated by protoc-gen-gogo. +// source: trace.proto +// DO NOT EDIT! + +package model + +import proto "github.com/gogo/protobuf/proto" +import fmt "fmt" +import math "math" + +import io "io" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +type APITrace struct { + TraceID uint64 `protobuf:"varint,1,opt,name=traceID,proto3" json:"traceID,omitempty"` + Spans []*Span `protobuf:"bytes,2,rep,name=spans" json:"spans,omitempty"` + StartTime int64 `protobuf:"varint,6,opt,name=startTime,proto3" json:"startTime,omitempty"` + EndTime int64 `protobuf:"varint,7,opt,name=endTime,proto3" json:"endTime,omitempty"` +} + +func (m *APITrace) Reset() { *m = APITrace{} } +func (m *APITrace) String() string { return proto.CompactTextString(m) } +func (*APITrace) ProtoMessage() {} +func (*APITrace) Descriptor() ([]byte, []int) { return fileDescriptorTrace, []int{0} } + +func (m *APITrace) GetSpans() []*Span { + if m != nil { + return m.Spans + } + return nil +} + +func init() { + proto.RegisterType((*APITrace)(nil), "model.APITrace") +} +func (m *APITrace) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *APITrace) MarshalTo(data []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if m.TraceID != 0 { + data[i] = 0x8 + i++ + i = encodeVarintTrace(data, i, uint64(m.TraceID)) + } + if len(m.Spans) > 0 { + for _, msg := range m.Spans { + data[i] = 0x12 + i++ + i = encodeVarintTrace(data, i, uint64(msg.Size())) + n, err := msg.MarshalTo(data[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if m.StartTime != 0 { + data[i] = 0x30 + i++ + i = encodeVarintTrace(data, i, uint64(m.StartTime)) + } + if m.EndTime != 0 { + data[i] = 0x38 + i++ + i = encodeVarintTrace(data, i, uint64(m.EndTime)) + } + return i, nil +} + +func encodeFixed64Trace(data []byte, offset int, v uint64) int { + data[offset] = uint8(v) + data[offset+1] = uint8(v >> 8) + data[offset+2] = uint8(v >> 16) + data[offset+3] = uint8(v >> 24) + data[offset+4] = uint8(v >> 32) + data[offset+5] = uint8(v >> 40) + data[offset+6] = uint8(v >> 48) + data[offset+7] = uint8(v >> 56) + return offset + 8 +} +func encodeFixed32Trace(data []byte, offset int, v uint32) int { + data[offset] = uint8(v) + data[offset+1] = uint8(v >> 8) + data[offset+2] = uint8(v >> 16) + data[offset+3] = uint8(v >> 24) + return offset + 4 +} +func encodeVarintTrace(data []byte, offset int, v uint64) int { + for v >= 1<<7 { + data[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + data[offset] = uint8(v) + return offset + 1 +} +func (m *APITrace) Size() (n int) { + var l int + _ = l + if m.TraceID != 0 { + n += 1 + sovTrace(uint64(m.TraceID)) + } + if len(m.Spans) > 0 { + for _, e := range m.Spans { + l = e.Size() + n += 1 + l + sovTrace(uint64(l)) + } + } + if m.StartTime != 0 { + n += 1 + sovTrace(uint64(m.StartTime)) + } + if m.EndTime != 0 { + n += 1 + sovTrace(uint64(m.EndTime)) + } + return n +} + +func sovTrace(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozTrace(x uint64) (n int) { + return sovTrace(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (m *APITrace) Unmarshal(data []byte) error { + l := len(data) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTrace + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: APITrace: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: APITrace: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field TraceID", wireType) + } + m.TraceID = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTrace + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + m.TraceID |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Spans", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTrace + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthTrace + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Spans = append(m.Spans, &Span{}) + if err := m.Spans[len(m.Spans)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 6: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field StartTime", wireType) + } + m.StartTime = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTrace + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + m.StartTime |= (int64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + case 7: + if wireType != 0 { + return fmt.Errorf("proto: wrong wireType = %d for field EndTime", wireType) + } + m.EndTime = 0 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTrace + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + m.EndTime |= (int64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + default: + iNdEx = preIndex + skippy, err := skipTrace(data[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthTrace + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipTrace(data []byte) (n int, err error) { + l := len(data) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowTrace + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowTrace + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if data[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowTrace + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthTrace + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowTrace + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipTrace(data[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthTrace = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowTrace = fmt.Errorf("proto: integer overflow") +) + +func init() { proto.RegisterFile("trace.proto", fileDescriptorTrace) } + +var fileDescriptorTrace = []byte{ + // 162 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0xe2, 0x2e, 0x29, 0x4a, 0x4c, + 0x4e, 0xd5, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, 0x62, 0xcd, 0xcd, 0x4f, 0x49, 0xcd, 0x91, 0xe2, + 0x2a, 0x2e, 0x48, 0xcc, 0x83, 0x08, 0x29, 0xd5, 0x73, 0x71, 0x38, 0x06, 0x78, 0x86, 0x80, 0x14, + 0x09, 0x49, 0x70, 0xb1, 0x83, 0x55, 0x7b, 0xba, 0x48, 0x30, 0x2a, 0x30, 0x6a, 0xb0, 0x04, 0xc1, + 0xb8, 0x42, 0x8a, 0x5c, 0xac, 0x20, 0x3d, 0xc5, 0x12, 0x4c, 0x0a, 0xcc, 0x1a, 0xdc, 0x46, 0xdc, + 0x7a, 0x60, 0x83, 0xf4, 0x82, 0x0b, 0x12, 0xf3, 0x82, 0x20, 0x32, 0x42, 0x32, 0x5c, 0x9c, 0xc5, + 0x25, 0x89, 0x45, 0x25, 0x21, 0x99, 0xb9, 0xa9, 0x12, 0x6c, 0x0a, 0x8c, 0x1a, 0xcc, 0x41, 0x08, + 0x01, 0x90, 0xd1, 0xa9, 0x79, 0x29, 0x60, 0x39, 0x76, 0xb0, 0x1c, 0x8c, 0xeb, 0x24, 0x70, 0xe2, + 0x91, 0x1c, 0xe3, 0x85, 0x47, 0x72, 0x8c, 0x0f, 0x1e, 0xc9, 0x31, 0x4e, 0x78, 0x2c, 0xc7, 0x90, + 0xc4, 0x06, 0x76, 0x99, 0x31, 0x20, 0x00, 0x00, 0xff, 0xff, 0xd4, 0xf7, 0x13, 0xf9, 0xbb, 0x00, + 0x00, 0x00, +} diff --git a/model/trace.proto b/model/trace.proto new file mode 100644 index 000000000..5d4b5bdba --- /dev/null +++ b/model/trace.proto @@ -0,0 +1,12 @@ +syntax = "proto3"; + +package model; + +import "span.proto"; + +message APITrace { + uint64 traceID = 1; + repeated Span spans = 2; + int64 startTime = 6; + int64 endTime = 7; +} diff --git a/model/trace_payload.pb.go b/model/trace_payload.pb.go new file mode 100644 index 000000000..c6f6d498e --- /dev/null +++ b/model/trace_payload.pb.go @@ -0,0 +1,458 @@ +// Code generated by protoc-gen-gogo. +// source: trace_payload.proto +// DO NOT EDIT! + +package model + +import proto "github.com/gogo/protobuf/proto" +import fmt "fmt" +import math "math" + +import io "io" + +// Reference imports to suppress errors if they are not otherwise used. +var _ = proto.Marshal +var _ = fmt.Errorf +var _ = math.Inf + +type TracePayload struct { + HostName string `protobuf:"bytes,1,opt,name=hostName,proto3" json:"hostName,omitempty"` + Env string `protobuf:"bytes,2,opt,name=env,proto3" json:"env,omitempty"` + Traces []*APITrace `protobuf:"bytes,3,rep,name=traces" json:"traces,omitempty"` + Transactions []*Span `protobuf:"bytes,4,rep,name=transactions" json:"transactions,omitempty"` +} + +func (m *TracePayload) Reset() { *m = TracePayload{} } +func (m *TracePayload) String() string { return proto.CompactTextString(m) } +func (*TracePayload) ProtoMessage() {} +func (*TracePayload) Descriptor() ([]byte, []int) { return fileDescriptorTracePayload, []int{0} } + +func (m *TracePayload) GetTraces() []*APITrace { + if m != nil { + return m.Traces + } + return nil +} + +func (m *TracePayload) GetTransactions() []*Span { + if m != nil { + return m.Transactions + } + return nil +} + +func init() { + proto.RegisterType((*TracePayload)(nil), "model.TracePayload") +} +func (m *TracePayload) Marshal() (data []byte, err error) { + size := m.Size() + data = make([]byte, size) + n, err := m.MarshalTo(data) + if err != nil { + return nil, err + } + return data[:n], nil +} + +func (m *TracePayload) MarshalTo(data []byte) (int, error) { + var i int + _ = i + var l int + _ = l + if len(m.HostName) > 0 { + data[i] = 0xa + i++ + i = encodeVarintTracePayload(data, i, uint64(len(m.HostName))) + i += copy(data[i:], m.HostName) + } + if len(m.Env) > 0 { + data[i] = 0x12 + i++ + i = encodeVarintTracePayload(data, i, uint64(len(m.Env))) + i += copy(data[i:], m.Env) + } + if len(m.Traces) > 0 { + for _, msg := range m.Traces { + data[i] = 0x1a + i++ + i = encodeVarintTracePayload(data, i, uint64(msg.Size())) + n, err := msg.MarshalTo(data[i:]) + if err != nil { + return 0, err + } + i += n + } + } + if len(m.Transactions) > 0 { + for _, msg := range m.Transactions { + data[i] = 0x22 + i++ + i = encodeVarintTracePayload(data, i, uint64(msg.Size())) + n, err := msg.MarshalTo(data[i:]) + if err != nil { + return 0, err + } + i += n + } + } + return i, nil +} + +func encodeFixed64TracePayload(data []byte, offset int, v uint64) int { + data[offset] = uint8(v) + data[offset+1] = uint8(v >> 8) + data[offset+2] = uint8(v >> 16) + data[offset+3] = uint8(v >> 24) + data[offset+4] = uint8(v >> 32) + data[offset+5] = uint8(v >> 40) + data[offset+6] = uint8(v >> 48) + data[offset+7] = uint8(v >> 56) + return offset + 8 +} +func encodeFixed32TracePayload(data []byte, offset int, v uint32) int { + data[offset] = uint8(v) + data[offset+1] = uint8(v >> 8) + data[offset+2] = uint8(v >> 16) + data[offset+3] = uint8(v >> 24) + return offset + 4 +} +func encodeVarintTracePayload(data []byte, offset int, v uint64) int { + for v >= 1<<7 { + data[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + data[offset] = uint8(v) + return offset + 1 +} +func (m *TracePayload) Size() (n int) { + var l int + _ = l + l = len(m.HostName) + if l > 0 { + n += 1 + l + sovTracePayload(uint64(l)) + } + l = len(m.Env) + if l > 0 { + n += 1 + l + sovTracePayload(uint64(l)) + } + if len(m.Traces) > 0 { + for _, e := range m.Traces { + l = e.Size() + n += 1 + l + sovTracePayload(uint64(l)) + } + } + if len(m.Transactions) > 0 { + for _, e := range m.Transactions { + l = e.Size() + n += 1 + l + sovTracePayload(uint64(l)) + } + } + return n +} + +func sovTracePayload(x uint64) (n int) { + for { + n++ + x >>= 7 + if x == 0 { + break + } + } + return n +} +func sozTracePayload(x uint64) (n int) { + return sovTracePayload(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (m *TracePayload) Unmarshal(data []byte) error { + l := len(data) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTracePayload + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: TracePayload: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: TracePayload: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field HostName", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTracePayload + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthTracePayload + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.HostName = string(data[iNdEx:postIndex]) + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Env", wireType) + } + var stringLen uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTracePayload + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + stringLen |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + intStringLen := int(stringLen) + if intStringLen < 0 { + return ErrInvalidLengthTracePayload + } + postIndex := iNdEx + intStringLen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Env = string(data[iNdEx:postIndex]) + iNdEx = postIndex + case 3: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Traces", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTracePayload + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthTracePayload + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Traces = append(m.Traces, &APITrace{}) + if err := m.Traces[len(m.Traces)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Transactions", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowTracePayload + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthTracePayload + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.Transactions = append(m.Transactions, &Span{}) + if err := m.Transactions[len(m.Transactions)-1].Unmarshal(data[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipTracePayload(data[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthTracePayload + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipTracePayload(data []byte) (n int, err error) { + l := len(data) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowTracePayload + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowTracePayload + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if data[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowTracePayload + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + iNdEx += length + if length < 0 { + return 0, ErrInvalidLengthTracePayload + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowTracePayload + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := data[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipTracePayload(data[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthTracePayload = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowTracePayload = fmt.Errorf("proto: integer overflow") +) + +func init() { proto.RegisterFile("trace_payload.proto", fileDescriptorTracePayload) } + +var fileDescriptorTracePayload = []byte{ + // 192 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x12, 0x2e, 0x29, 0x4a, 0x4c, + 0x4e, 0x8d, 0x2f, 0x48, 0xac, 0xcc, 0xc9, 0x4f, 0x4c, 0xd1, 0x2b, 0x28, 0xca, 0x2f, 0xc9, 0x17, + 0x62, 0xcd, 0xcd, 0x4f, 0x49, 0xcd, 0x91, 0xe2, 0x06, 0xcb, 0x41, 0xc4, 0xa4, 0xb8, 0x8a, 0x0b, + 0x12, 0xf3, 0x20, 0x6c, 0xa5, 0x69, 0x8c, 0x5c, 0x3c, 0x21, 0x20, 0xb9, 0x00, 0x88, 0x36, 0x21, + 0x29, 0x2e, 0x8e, 0x8c, 0xfc, 0xe2, 0x12, 0xbf, 0xc4, 0xdc, 0x54, 0x09, 0x46, 0x05, 0x46, 0x0d, + 0xce, 0x20, 0x38, 0x5f, 0x48, 0x80, 0x8b, 0x39, 0x35, 0xaf, 0x4c, 0x82, 0x09, 0x2c, 0x0c, 0x62, + 0x0a, 0xa9, 0x73, 0xb1, 0x81, 0x4d, 0x2e, 0x96, 0x60, 0x56, 0x60, 0xd6, 0xe0, 0x36, 0xe2, 0xd7, + 0x03, 0xdb, 0xa7, 0xe7, 0x18, 0xe0, 0x09, 0x36, 0x35, 0x08, 0x2a, 0x2d, 0xa4, 0xcf, 0xc5, 0x53, + 0x52, 0x94, 0x98, 0x57, 0x9c, 0x98, 0x5c, 0x92, 0x99, 0x9f, 0x57, 0x2c, 0xc1, 0x02, 0x56, 0xce, + 0x0d, 0x55, 0x1e, 0x5c, 0x90, 0x98, 0x17, 0x84, 0xa2, 0xc0, 0x49, 0xe0, 0xc4, 0x23, 0x39, 0xc6, + 0x0b, 0x8f, 0xe4, 0x18, 0x1f, 0x3c, 0x92, 0x63, 0x9c, 0xf0, 0x58, 0x8e, 0x21, 0x89, 0x0d, 0xec, + 0x62, 0x63, 0x40, 0x00, 0x00, 0x00, 0xff, 0xff, 0x5f, 0xc9, 0x04, 0x9e, 0xe8, 0x00, 0x00, 0x00, +} diff --git a/model/trace_payload.proto b/model/trace_payload.proto new file mode 100644 index 000000000..e94bca0fa --- /dev/null +++ b/model/trace_payload.proto @@ -0,0 +1,13 @@ +syntax = "proto3"; + +package model; + +import "trace.proto"; +import "span.proto"; + +message TracePayload { + string hostName = 1; + string env = 2; + repeated APITrace traces = 3; + repeated Span transactions = 4; +} diff --git a/writer/client.go b/writer/client.go new file mode 100644 index 000000000..d5f9adc10 --- /dev/null +++ b/writer/client.go @@ -0,0 +1,33 @@ +package writer + +import ( + "net/http" + + "github.com/DataDog/datadog-trace-agent/config" + log "github.com/cihub/seelog" +) + +// NewClient returns a http.Client configured with the Agent options. +func NewClient(conf *config.AgentConfig) (client *http.Client) { + if conf.Proxy != nil { + proxyPath, err := conf.Proxy.URL() + if err != nil { + log.Errorf("failed to configure proxy: %v", err) + return + } + + log.Infof("configuring proxy through host %s", conf.Proxy.Host) + client = &http.Client{ + Timeout: timeout, + Transport: &http.Transport{ + Proxy: http.ProxyURL(proxyPath), + }, + } + } else { + client = &http.Client{ + Timeout: timeout, + } + } + + return +} diff --git a/writer/datadog_endpoint.go b/writer/datadog_endpoint.go new file mode 100644 index 000000000..55387a941 --- /dev/null +++ b/writer/datadog_endpoint.go @@ -0,0 +1,68 @@ +package writer + +import ( + "bytes" + "fmt" + "net/http" +) + +// DatadogEndpoint sends payloads to Datadog API. +type DatadogEndpoint struct { + apiKey string + url string + client *http.Client + + path string +} + +// NewDatadogEndpoint returns an initialized DatadogEndpoint, from a provided http client and remote endpoint path. +func NewDatadogEndpoint(client *http.Client, url, path, apiKey string) *DatadogEndpoint { + if apiKey == "" { + panic(fmt.Errorf("No API key")) + } + + return &DatadogEndpoint{ + apiKey: apiKey, + url: url, + path: path, + client: client, + } +} + +// Write will send the serialized traces payload to the Datadog traces endpoint. +func (e *DatadogEndpoint) Write(payload []byte, headers map[string]string) error { + // Create the request to be sent to the API + url := e.url + e.path + req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload)) + + // If the request cannot be created, there is no point in trying again later, + // it will always yield the same result. + if err != nil { + // atomic.AddInt64(&ae.stats.TracesPayloadError, 1) + return err + } + + // Set API key in the header and issue the request + queryParams := req.URL.Query() + queryParams.Add("api_key", e.apiKey) + req.URL.RawQuery = queryParams.Encode() + + SetExtraHeaders(req.Header, headers) + req.Header.Set("Content-Type", "application/x-protobuf") + req.Header.Set("Content-Encoding", "identity") + + resp, err := e.client.Do(req) + + if err != nil { + return err + } + defer resp.Body.Close() + + // We check the status code to see if the request has succeeded. + if resp.StatusCode/100 != 2 { + return fmt.Errorf("request to %s responded with %s", url, resp.Status) + } + + // Everything went fine + return nil +} diff --git a/writer/endpoint.go b/writer/endpoint.go index 2c6aa2b4e..9087b4f6c 100644 --- a/writer/endpoint.go +++ b/writer/endpoint.go @@ -1,252 +1,28 @@ package writer import ( - "bytes" - "fmt" "net/http" - "sync/atomic" - "time" log "github.com/cihub/seelog" - - "github.com/DataDog/datadog-trace-agent/config" - "github.com/DataDog/datadog-trace-agent/info" - "github.com/DataDog/datadog-trace-agent/model" - "github.com/DataDog/datadog-trace-agent/statsd" - "github.com/DataDog/datadog-trace-agent/watchdog" ) -// timeout is the HTTP timeout for POST requests to the Datadog backend -var timeout = 10 * time.Second - -// apiError stores the error triggered we can't send data to the endpoint. -// It implements the error interface. -type apiError struct { - err error - endpoint *APIEndpoint -} - -func newAPIError(err error, endpoint *APIEndpoint) *apiError { - return &apiError{err: err, endpoint: endpoint} -} - -// Returns the error message -func (ae *apiError) Error() string { - return fmt.Sprintf("%s: %v", ae.endpoint.url, ae.err) -} - -// AgentEndpoint is an interface where we write the data -// that comes out of the agent -type AgentEndpoint interface { - // Write sends an agent payload which carries all the - // pre-processed stats/traces - Write(b model.AgentPayload) (int, error) - - // WriteServices sends updates about the services metadata - WriteServices(s model.ServicesMetadata) -} - -// APIEndpoint implements AgentEndpoint to send data to a -// an endpoint and API key. -type APIEndpoint struct { - apiKey string - url string - stats info.EndpointStats - client *http.Client -} - -// NewAPIEndpoint returns a new APIEndpoint from a given config -// of URL (such as https://trace.agent.datadoghq.com) and API -// keys -func NewAPIEndpoint(url, apiKey string) *APIEndpoint { - if apiKey == "" { - panic(fmt.Errorf("No API key")) - } - - ae := APIEndpoint{ - apiKey: apiKey, - url: url, - client: &http.Client{ - Timeout: timeout, - }, - } - go func() { - defer watchdog.LogOnPanic() - ae.logStats() - }() - return &ae -} - -// SetProxy updates the http client used by APIEndpoint to report via the given proxy -func (ae *APIEndpoint) SetProxy(settings *config.ProxySettings) { - proxyPath, err := settings.URL() - if err != nil { - log.Errorf("failed to configure proxy: %v", err) - return - } - ae.client = &http.Client{ - Timeout: timeout, - Transport: &http.Transport{ - Proxy: http.ProxyURL(proxyPath), - }, - } -} - -// Write will send the serialized payload to the API endpoint. -func (ae *APIEndpoint) Write(p model.AgentPayload) (int, error) { - startFlush := time.Now() - - // Serialize the payload to send it to the API - data, err := model.EncodeAgentPayload(&p) - if err != nil { - log.Errorf("encoding issue: %v", err) - return 0, err - } - - payloadSize := len(data) - statsd.Client.Count("datadog.trace_agent.writer.payload_bytes", int64(payloadSize), nil, 1) - atomic.AddInt64(&ae.stats.TracesBytes, int64(payloadSize)) - atomic.AddInt64(&ae.stats.TracesCount, int64(len(p.Traces))) - atomic.AddInt64(&ae.stats.TracesStats, int64(len(p.Stats))) - atomic.AddInt64(&ae.stats.TracesPayload, 1) - - // Create the request to be sent to the API - url := ae.url + model.AgentPayloadAPIPath() - req, err := http.NewRequest("POST", url, bytes.NewBuffer(data)) - - // If the request cannot be created, there is no point in trying again later, - // it will always yield the same result. - if err != nil { - log.Errorf("could not create request for endpoint %s: %v", url, err) - atomic.AddInt64(&ae.stats.TracesPayloadError, 1) - return payloadSize, err - } - - // Set API key in the header and issue the request - queryParams := req.URL.Query() - queryParams.Add("api_key", ae.apiKey) - req.URL.RawQuery = queryParams.Encode() - - model.SetAgentPayloadHeaders(req.Header, p.Extras()) - resp, err := ae.client.Do(req) - - // If the request fails, we'll try again later. - if err != nil { - log.Errorf("error when requesting to endpoint %s: %v", url, err) - atomic.AddInt64(&ae.stats.TracesPayloadError, 1) - return payloadSize, newAPIError(err, ae) - } - defer resp.Body.Close() - - // We check the status code to see if the request has succeeded. - if resp.StatusCode/100 != 2 { - err := fmt.Errorf("request to %s responded with %s", url, resp.Status) - log.Error(err) - atomic.AddInt64(&ae.stats.TracesPayloadError, 1) - - // Only retry for 5xx (server) errors - if resp.StatusCode/100 == 5 { - return payloadSize, newAPIError(err, ae) - } - - // Does not retry for other errors - return payloadSize, err - } - - flushTime := time.Since(startFlush) - log.Infof("flushed payload to the API, time:%s, size:%d", flushTime, len(data)) - statsd.Client.Gauge("datadog.trace_agent.writer.flush_duration", flushTime.Seconds(), nil, 1) - - // Everything went fine - return payloadSize, nil -} - -// WriteServices writes services to the services endpoint -// This function very loosely logs and returns if any error happens. -// See comment above. -func (ae *APIEndpoint) WriteServices(s model.ServicesMetadata) { - // Serialize the data to be sent to the API endpoint - data, err := model.EncodeServicesPayload(s) - if err != nil { - log.Errorf("encoding issue: %v", err) - return - } - - payloadSize := len(data) - atomic.AddInt64(&ae.stats.ServicesBytes, int64(payloadSize)) - atomic.AddInt64(&ae.stats.ServicesPayload, 1) - - // Create the request - url := ae.url + model.ServicesPayloadAPIPath() - req, err := http.NewRequest("POST", url, bytes.NewBuffer(data)) - if err != nil { - log.Errorf("could not create request for endpoint %s: %v", url, err) - atomic.AddInt64(&ae.stats.ServicesPayloadError, 1) - return - } - - // Set the header with the API key and issue the request - queryParams := req.URL.Query() - queryParams.Add("api_key", ae.apiKey) - req.URL.RawQuery = queryParams.Encode() - model.SetServicesPayloadHeaders(req.Header) - resp, err := ae.client.Do(req) - if err != nil { - log.Errorf("error when requesting to endpoint %s: %v", url, err) - atomic.AddInt64(&ae.stats.ServicesPayloadError, 1) - return - } - defer resp.Body.Close() - - if resp.StatusCode/100 != 2 { - log.Errorf("request to %s responded with %s", url, resp.Status) - atomic.AddInt64(&ae.stats.ServicesPayloadError, 1) - return - } - - // Everything went fine. - log.Infof("flushed %d services to the API", len(s)) +// Endpoint is an interface where we send the data from the Agent. +type Endpoint interface { + Write(payload []byte, headers map[string]string) error } -// logStats periodically submits stats about the endpoint to statsd -func (ae *APIEndpoint) logStats() { - var accStats info.EndpointStats - - for range time.Tick(time.Minute) { - // Load counters and reset them for the next flush - accStats.TracesPayload = atomic.SwapInt64(&ae.stats.TracesPayload, 0) - accStats.TracesPayloadError = atomic.SwapInt64(&ae.stats.TracesPayloadError, 0) - accStats.TracesBytes = atomic.SwapInt64(&ae.stats.TracesBytes, 0) - accStats.TracesCount = atomic.SwapInt64(&ae.stats.TracesCount, 0) - accStats.TracesStats = atomic.SwapInt64(&ae.stats.TracesStats, 0) - accStats.ServicesPayload = atomic.SwapInt64(&ae.stats.ServicesPayload, 0) - accStats.ServicesPayloadError = atomic.SwapInt64(&ae.stats.ServicesPayloadError, 0) - accStats.ServicesBytes = atomic.SwapInt64(&ae.stats.ServicesBytes, 0) - - statsd.Client.Count("datadog.trace_agent.endpoint.traces_payload", int64(accStats.TracesPayload), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.traces_payload_error", int64(accStats.TracesPayloadError), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.traces_bytes", int64(accStats.TracesBytes), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.traces_count", int64(accStats.TracesCount), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.traces_stats", int64(accStats.TracesStats), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.services_payload", int64(accStats.ServicesPayload), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.services_payload_error", int64(accStats.ServicesPayloadError), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.services_bytes", int64(accStats.ServicesBytes), nil, 1) - - info.UpdateEndpointStats(accStats) - } -} - -// NullEndpoint implements AgentEndpoint, it just logs data -// and drops everything into /dev/null +// NullEndpoint is a void endpoint dropping data. type NullEndpoint struct{} -// Write just logs and bails -func (ne NullEndpoint) Write(p model.AgentPayload) (int, error) { - log.Debug("null endpoint: dropping payload, %d traces, %d stats buckets", p.Traces, p.Stats) - return 0, nil +// Write of NullEndpoint just drops the payload and log its size. +func (ne *NullEndpoint) Write(payload []byte, headers map[string]string) error { + log.Debug("null endpoint: dropping payload, size: %d", len(payload)) + return nil } -// WriteServices just logs and stops -func (ne NullEndpoint) WriteServices(s model.ServicesMetadata) { - log.Debugf("null endpoint: dropping services update %v", s) +// SetExtraHeaders appends a header map to HTTP headers. +func SetExtraHeaders(h http.Header, extras map[string]string) { + for key, value := range extras { + h.Set(key, value) + } } diff --git a/writer/legacy_endpoint.go b/writer/legacy_endpoint.go new file mode 100644 index 000000000..1393f8cb3 --- /dev/null +++ b/writer/legacy_endpoint.go @@ -0,0 +1,249 @@ +package writer + +import ( + "bytes" + "fmt" + "net/http" + "sync/atomic" + "time" + + log "github.com/cihub/seelog" + + "github.com/DataDog/datadog-trace-agent/config" + "github.com/DataDog/datadog-trace-agent/info" + "github.com/DataDog/datadog-trace-agent/model" + "github.com/DataDog/datadog-trace-agent/statsd" + "github.com/DataDog/datadog-trace-agent/watchdog" +) + +// timeout is the HTTP timeout for POST requests to the Datadog backend +var timeout = 10 * time.Second + +// apiError stores the error triggered we can't send data to the endpoint. +// It implements the error interface. +type apiError struct { + err error + endpoint *APIEndpoint +} + +func newAPIError(err error, endpoint *APIEndpoint) *apiError { + return &apiError{err: err, endpoint: endpoint} +} + +// Returns the error message +func (ae *apiError) Error() string { + return fmt.Sprintf("%s: %v", ae.endpoint.url, ae.err) +} + +// AgentEndpoint is an interface where we write the data +// that comes out of the agent +type AgentEndpoint interface { + // Write sends an agent payload which carries all the + // pre-processed stats/traces + Write(b model.AgentPayload) (int, error) + + // WriteServices sends updates about the services metadata + WriteServices(s model.ServicesMetadata) +} + +// APIEndpoint implements AgentEndpoint to send data to a +// an endpoint and API key. +type APIEndpoint struct { + apiKey string + url string + stats info.EndpointStats + client *http.Client +} + +// NewAPIEndpoint returns a new APIEndpoint from a given config +// of URL (such as https://trace.agent.datadoghq.com) and API +// keys +func NewAPIEndpoint(url, apiKey string) *APIEndpoint { + if apiKey == "" { + panic(fmt.Errorf("No API key")) + } + + ae := APIEndpoint{ + apiKey: apiKey, + url: url, + client: &http.Client{ + Timeout: timeout, + }, + } + go func() { + defer watchdog.LogOnPanic() + ae.logStats() + }() + return &ae +} + +// SetProxy updates the http client used by APIEndpoint to report via the given proxy +func (ae *APIEndpoint) SetProxy(settings *config.ProxySettings) { + proxyPath, err := settings.URL() + if err != nil { + log.Errorf("failed to configure proxy: %v", err) + return + } + ae.client = &http.Client{ + Timeout: timeout, + Transport: &http.Transport{ + Proxy: http.ProxyURL(proxyPath), + }, + } +} + +// Write will send the serialized payload to the API endpoint. +func (ae *APIEndpoint) Write(p model.AgentPayload) (int, error) { + startFlush := time.Now() + + // Serialize the payload to send it to the API + data, err := model.EncodeAgentPayload(&p) + if err != nil { + log.Errorf("encoding issue: %v", err) + return 0, err + } + + payloadSize := len(data) + statsd.Client.Count("datadog.trace_agent.writer.payload_bytes", int64(payloadSize), nil, 1) + atomic.AddInt64(&ae.stats.TracesBytes, int64(payloadSize)) + atomic.AddInt64(&ae.stats.TracesStats, int64(len(p.Stats))) + atomic.AddInt64(&ae.stats.TracesPayload, 1) + + // Create the request to be sent to the API + url := ae.url + model.AgentPayloadAPIPath() + req, err := http.NewRequest("POST", url, bytes.NewBuffer(data)) + + // If the request cannot be created, there is no point in trying again later, + // it will always yield the same result. + if err != nil { + log.Errorf("could not create request for endpoint %s: %v", url, err) + atomic.AddInt64(&ae.stats.TracesPayloadError, 1) + return payloadSize, err + } + + // Set API key in the header and issue the request + queryParams := req.URL.Query() + queryParams.Add("api_key", ae.apiKey) + req.URL.RawQuery = queryParams.Encode() + + model.SetAgentPayloadHeaders(req.Header, p.Extras()) + resp, err := ae.client.Do(req) + + // If the request fails, we'll try again later. + if err != nil { + log.Errorf("error when requesting to endpoint %s: %v", url, err) + atomic.AddInt64(&ae.stats.TracesPayloadError, 1) + return payloadSize, newAPIError(err, ae) + } + defer resp.Body.Close() + + // We check the status code to see if the request has succeeded. + if resp.StatusCode/100 != 2 { + err := fmt.Errorf("request to %s responded with %s", url, resp.Status) + log.Error(err) + atomic.AddInt64(&ae.stats.TracesPayloadError, 1) + + // Only retry for 5xx (server) errors + if resp.StatusCode/100 == 5 { + return payloadSize, newAPIError(err, ae) + } + + // Does not retry for other errors + return payloadSize, err + } + + flushTime := time.Since(startFlush) + log.Infof("flushed payload to the API, time:%s, size:%d", flushTime, len(data)) + statsd.Client.Gauge("datadog.trace_agent.writer.flush_duration", flushTime.Seconds(), nil, 1) + + // Everything went fine + return payloadSize, nil +} + +// WriteServices writes services to the services endpoint +// This function very loosely logs and returns if any error happens. +// See comment above. +func (ae *APIEndpoint) WriteServices(s model.ServicesMetadata) { + // Serialize the data to be sent to the API endpoint + data, err := model.EncodeServicesPayload(s) + if err != nil { + log.Errorf("encoding issue: %v", err) + return + } + + payloadSize := len(data) + atomic.AddInt64(&ae.stats.ServicesBytes, int64(payloadSize)) + atomic.AddInt64(&ae.stats.ServicesPayload, 1) + + // Create the request + url := ae.url + model.ServicesPayloadAPIPath() + req, err := http.NewRequest("POST", url, bytes.NewBuffer(data)) + if err != nil { + log.Errorf("could not create request for endpoint %s: %v", url, err) + atomic.AddInt64(&ae.stats.ServicesPayloadError, 1) + return + } + + // Set the header with the API key and issue the request + queryParams := req.URL.Query() + queryParams.Add("api_key", ae.apiKey) + req.URL.RawQuery = queryParams.Encode() + model.SetServicesPayloadHeaders(req.Header) + resp, err := ae.client.Do(req) + if err != nil { + log.Errorf("error when requesting to endpoint %s: %v", url, err) + atomic.AddInt64(&ae.stats.ServicesPayloadError, 1) + return + } + defer resp.Body.Close() + + if resp.StatusCode/100 != 2 { + log.Errorf("request to %s responded with %s", url, resp.Status) + atomic.AddInt64(&ae.stats.ServicesPayloadError, 1) + return + } + + // Everything went fine. + log.Infof("flushed %d services to the API", len(s)) +} + +// logStats periodically submits stats about the endpoint to statsd +func (ae *APIEndpoint) logStats() { + var accStats info.EndpointStats + + for range time.Tick(time.Minute) { + // Load counters and reset them for the next flush + accStats.TracesPayload = atomic.SwapInt64(&ae.stats.TracesPayload, 0) + accStats.TracesPayloadError = atomic.SwapInt64(&ae.stats.TracesPayloadError, 0) + accStats.TracesBytes = atomic.SwapInt64(&ae.stats.TracesBytes, 0) + accStats.TracesStats = atomic.SwapInt64(&ae.stats.TracesStats, 0) + accStats.ServicesPayload = atomic.SwapInt64(&ae.stats.ServicesPayload, 0) + accStats.ServicesPayloadError = atomic.SwapInt64(&ae.stats.ServicesPayloadError, 0) + accStats.ServicesBytes = atomic.SwapInt64(&ae.stats.ServicesBytes, 0) + + statsd.Client.Count("datadog.trace_agent.endpoint.traces_payload", int64(accStats.TracesPayload), nil, 1) + statsd.Client.Count("datadog.trace_agent.endpoint.traces_payload_error", int64(accStats.TracesPayloadError), nil, 1) + statsd.Client.Count("datadog.trace_agent.endpoint.traces_bytes", int64(accStats.TracesBytes), nil, 1) + statsd.Client.Count("datadog.trace_agent.endpoint.traces_stats", int64(accStats.TracesStats), nil, 1) + statsd.Client.Count("datadog.trace_agent.endpoint.services_payload", int64(accStats.ServicesPayload), nil, 1) + statsd.Client.Count("datadog.trace_agent.endpoint.services_payload_error", int64(accStats.ServicesPayloadError), nil, 1) + statsd.Client.Count("datadog.trace_agent.endpoint.services_bytes", int64(accStats.ServicesBytes), nil, 1) + + info.UpdateEndpointStats(accStats) + } +} + +// NullAgentEndpoint implements AgentEndpoint, it just logs data +// and drops everything into /dev/null +type NullAgentEndpoint struct{} + +// Write just logs and bails +func (ne NullAgentEndpoint) Write(p model.AgentPayload) (int, error) { + log.Debug("null endpoint: dropping payload, %d traces, %d stats buckets", p.Traces, p.Stats) + return 0, nil +} + +// WriteServices just logs and stops +func (ne NullAgentEndpoint) WriteServices(s model.ServicesMetadata) { + log.Debugf("null endpoint: dropping services update %v", s) +} diff --git a/writer/endpoint_test.go b/writer/legacy_endpoint_test.go similarity index 100% rename from writer/endpoint_test.go rename to writer/legacy_endpoint_test.go diff --git a/writer/writer.go b/writer/legacy_writer.go similarity index 99% rename from writer/writer.go rename to writer/legacy_writer.go index f3b099920..39ff0efda 100644 --- a/writer/writer.go +++ b/writer/legacy_writer.go @@ -75,7 +75,7 @@ func NewWriter(conf *config.AgentConfig) *Writer { } } else { log.Info("API interface is disabled, flushing to /dev/null instead") - endpoint = NullEndpoint{} + endpoint = NullAgentEndpoint{} } return &Writer{ diff --git a/writer/trace_writer.go b/writer/trace_writer.go new file mode 100644 index 000000000..a19c8448d --- /dev/null +++ b/writer/trace_writer.go @@ -0,0 +1,144 @@ +package writer + +import ( + "strings" + "sync" + "time" + + log "github.com/cihub/seelog" + "github.com/golang/protobuf/proto" + + "github.com/DataDog/datadog-trace-agent/config" + "github.com/DataDog/datadog-trace-agent/info" + "github.com/DataDog/datadog-trace-agent/model" + "github.com/DataDog/datadog-trace-agent/statsd" + "github.com/DataDog/datadog-trace-agent/watchdog" +) + +const ( + languageHeaderKey = "X-Datadog-Reported-Languages" +) + +// TraceWriter ingests sampled traces and flush them to the API. +type TraceWriter struct { + endpoint Endpoint + + InTraces <-chan *model.Trace + + traceBuffer []*model.APITrace + + exit chan struct{} + exitWG *sync.WaitGroup + + conf *config.AgentConfig +} + +// NewTraceWriter returns a new writer for traces. +func NewTraceWriter(conf *config.AgentConfig) *TraceWriter { + var endpoint Endpoint + + if conf.APIEnabled { + client := NewClient(conf) + endpoint = NewDatadogEndpoint(client, conf.APIEndpoint, "/api/v0.2/traces", conf.APIKey) + } else { + log.Info("API interface is disabled, flushing to /dev/null instead") + endpoint = &NullEndpoint{} + } + + return &TraceWriter{ + endpoint: endpoint, + + traceBuffer: []*model.APITrace{}, + + exit: make(chan struct{}), + exitWG: &sync.WaitGroup{}, + + conf: conf, + } +} + +// Start starts the writer. +func (w *TraceWriter) Start() { + go func() { + defer watchdog.LogOnPanic() + w.Run() + }() +} + +// Run runs the main loop of the writer goroutine. If buffers payloads and +// services read from input chans and flushes them when necessary. +func (w *TraceWriter) Run() { + w.exitWG.Add(1) + defer w.exitWG.Done() + + // for now, simply flush every x seconds + flushTicker := time.NewTicker(5 * time.Second) + defer flushTicker.Stop() + + for { + select { + case trace := <-w.InTraces: + // no need for lock for now as flush is sequential + // TODO: async flush/retry + apiTrace := trace.APITrace() + w.traceBuffer = append(w.traceBuffer, apiTrace) + case <-flushTicker.C: + w.Flush() + case <-w.exit: + log.Info("exiting, flushing all remaining traces") + w.Flush() + return + } + } +} + +// Stop stops the main Run loop. +func (w *TraceWriter) Stop() { + close(w.exit) + w.exitWG.Wait() +} + +// Flush flushes traces the data in the API +func (w *TraceWriter) Flush() { + traces := w.traceBuffer + log.Debugf("going to flush %d traces", len(traces)) + + // Make the new buffer of the size of the previous one. + // that's a fair estimation and it should reduce allocations without using too much memory. + w.traceBuffer = make([]*model.APITrace, 0, len(traces)) + + tracePayload := model.TracePayload{ + HostName: w.conf.HostName, + Env: w.conf.DefaultEnv, + Traces: traces, + } + + serialized, err := proto.Marshal(&tracePayload) + if err != nil { + log.Errorf("failed to serialize trace payload, data got dropped, err: %s", err) + return + } + + headers := map[string]string{ + languageHeaderKey: strings.Join(info.Languages(), "|"), + } + + startFlush := time.Now() + + // Send the payload to the endpoint + // TODO: track metrics/stats about payload + err = w.endpoint.Write(serialized, headers) + + flushTime := time.Since(startFlush) + + // TODO: if error, depending on why, replay later. + if err != nil { + statsd.Client.Count("datadog.trace_agent.writer.flush", 1, []string{"status:error"}, 1) + log.Errorf("failed to flush trace payload: %s", err) + } + + log.Infof("flushed trace payload to the API, time:%s, size:%d bytes", flushTime, len(serialized)) + statsd.Client.Count("datadog.trace_agent.writer.flush", 1, []string{"status:success"}, 1) + statsd.Client.Gauge("datadog.trace_agent.writer.traces.flush_duration", flushTime.Seconds(), nil, 1) + statsd.Client.Count("datadog.trace_agent.writer.payload_bytes", int64(len(serialized)), nil, 1) +} From 1cad4acf99fe418f30e7763da1328de0203dbd1e Mon Sep 17 00:00:00 2001 From: Benjamin Fernandes Date: Mon, 18 Dec 2017 00:38:10 +0100 Subject: [PATCH 2/9] Improve info code and support new trace writer --- agent/main.go | 2 +- info/info.go | 102 +++++++++++++++++++++++--------------- info/info_test.go | 34 ++++--------- info/writer.go | 64 ++++++++++++++++++++++++ writer/legacy_endpoint.go | 47 ------------------ writer/trace_writer.go | 36 ++++++++++++-- 6 files changed, 169 insertions(+), 116 deletions(-) create mode 100644 info/writer.go diff --git a/agent/main.go b/agent/main.go index b23c1a374..33ee78239 100644 --- a/agent/main.go +++ b/agent/main.go @@ -148,7 +148,7 @@ func runAgent(exit chan struct{}) { if opts.info { if err := info.Info(os.Stdout, agentConf); err != nil { - // need not display the error, Info should do it already + os.Stdout.WriteString(fmt.Sprintf("failed to print info: %s\n", err)) os.Exit(1) } return diff --git a/info/info.go b/info/info.go index 4b56e582f..5a1948be2 100644 --- a/info/info.go +++ b/info/info.go @@ -1,12 +1,14 @@ package info import ( + "bytes" "encoding/json" "expvar" // automatically publish `/debug/vars` on HTTP port "fmt" "io" "net/http" "os" + "regexp" "strconv" "strings" "sync" @@ -19,10 +21,17 @@ import ( ) var ( - infoMu sync.RWMutex - receiverStats []TagStats // only for the last minute - languages []string - endpointStats EndpointStats // only for the last minute + infoMu sync.RWMutex + receiverStats []TagStats // only for the last minute + languages []string + + // TODO: move from package globals to a clean single struct + + traceWriterInfo TraceWriterInfo + // anticipate future writers + // statsWriterInfo StatsWriterInfo + // serviceWriterInfo ServiceWriterInfo + watchdogInfo watchdog.Info samplerInfo SamplerInfo prioritySamplerInfo SamplerInfo @@ -46,35 +55,40 @@ const ( Hostname: {{.Status.Config.HostName}} Receiver: {{.Status.Config.ReceiverHost}}:{{.Status.Config.ReceiverPort}} - API Endpoint: {{.Status.Config.APIEndpoint}}{{ range $i, $ts := .Status.Receiver }} + API Endpoint: {{.Status.Config.APIEndpoint}} --- Receiver stats (1 min) --- - -> tags: {{if $ts.Tags.Lang}}{{ $ts.Tags.Lang }}, {{ $ts.Tags.LangVersion }}, {{ $ts.Tags.Interpreter }}, {{ $ts.Tags.TracerVersion }}{{else}}None{{end}} - + {{ range $i, $ts := .Status.Receiver }} + From {{if $ts.Tags.Lang}}{{ $ts.Tags.Lang }} {{ $ts.Tags.LangVersion }} ({{ $ts.Tags.Interpreter }}), client {{ $ts.Tags.TracerVersion }}{{else}}unknown clients{{end}} Traces received: {{ $ts.Stats.TracesReceived }} ({{ $ts.Stats.TracesBytes }} bytes) Spans received: {{ $ts.Stats.SpansReceived }} Services received: {{ $ts.Stats.ServicesReceived }} ({{ $ts.Stats.ServicesBytes }} bytes) - Total data received: {{ add $ts.Stats.TracesBytes $ts.Stats.ServicesBytes }} bytes{{if gt $ts.Stats.TracesDropped 0}} - + {{if gt $ts.Stats.TracesDropped 0}} WARNING: Traces dropped: {{ $ts.Stats.TracesDropped }} - {{end}}{{if gt $ts.Stats.SpansDropped 0}}WARNING: Spans dropped: {{ $ts.Stats.SpansDropped }}{{end}} + {{end}} + {{if gt $ts.Stats.SpansDropped 0}} + WARNING: Spans dropped: {{ $ts.Stats.SpansDropped }} + {{end}} + + {{end}} + {{ range $key, $value := .Status.RateByService }} + Priority sampling rate for '{{ $key }}': {{percent $value}} % + {{ end }} + {{if lt .Status.PreSampler.Rate 1.0}} + WARNING: Pre-sampling traces: {{percent .Status.PreSampler.Rate}} % + {{end}} + {{if .Status.PreSampler.Error}} + WARNING: Pre-sampler: {{.Status.PreSampler.Error}} + {{end}} - ------------------------------{{end}} -{{ range $key, $value := .Status.RateByService }} - Sample rate for '{{ $key }}': {{percent $value}} %{{ end }}{{if lt .Status.PreSampler.Rate 1.0}} + --- Writer stats (1 min) --- + + Traces: {{.Status.TraceWriter.Payloads}} payloads, {{.Status.TraceWriter.Traces}} traces, {{.Status.TraceWriter.Bytes}} bytes + {{if gt .Status.TraceWriter.Errors 0}}WARNING: Traces API errors (1 min): {{.Status.TraceWriter.Errors}}{{end}} - WARNING: Pre-sampling traces: {{percent .Status.PreSampler.Rate}} % -{{end}}{{if .Status.PreSampler.Error}} WARNING: Pre-sampler: {{.Status.PreSampler.Error}} -{{end}} - - Bytes sent (1 min): {{add .Status.Endpoint.TracesBytes .Status.Endpoint.ServicesBytes}} - Traces sent (1 min): {{.Status.Endpoint.TracesCount}} - Stats sent (1 min): {{.Status.Endpoint.TracesStats}} -{{if gt .Status.Endpoint.TracesPayloadError 0}} WARNING: Traces API errors (1 min): {{.Status.Endpoint.TracesPayloadError}}/{{.Status.Endpoint.TracesPayload}} -{{end}}{{if gt .Status.Endpoint.ServicesPayloadError 0}} WARNING: Services API errors (1 min): {{.Status.Endpoint.ServicesPayloadError}}/{{.Status.Endpoint.ServicesPayload}} -{{end}} ` + notRunningTmplSrc = `{{.Banner}} {{.Program}} {{.Banner}} @@ -82,6 +96,7 @@ const ( Not running (port {{.ReceiverPort}}) ` + errorTmplSrc = `{{.Banner}} {{.Program}} {{.Banner}} @@ -110,6 +125,7 @@ func UpdateReceiverStats(rs *ReceiverStats) { languages = rs.Languages() } +// Languages exposes languages reporting traces to the Agent func Languages() []string { infoMu.Lock() defer infoMu.Unlock() @@ -123,19 +139,6 @@ func publishReceiverStats() interface{} { return receiverStats } -// UpdateEndpointStats updates internal stats about API endpoints -func UpdateEndpointStats(es EndpointStats) { - infoMu.Lock() - defer infoMu.Unlock() - endpointStats = es -} - -func publishEndpointStats() interface{} { - infoMu.RLock() - defer infoMu.RUnlock() - return endpointStats -} - // UpdateSamplerInfo updates internal stats about signature sampling func UpdateSamplerInfo(ss SamplerInfo) { infoMu.Lock() @@ -229,8 +232,10 @@ func InitInfo(conf *config.AgentConfig) error { expvar.Publish("uptime", expvar.Func(publishUptime)) expvar.Publish("version", expvar.Func(publishVersion)) expvar.Publish("receiver", expvar.Func(publishReceiverStats)) - expvar.Publish("endpoint", expvar.Func(publishEndpointStats)) expvar.Publish("sampler", expvar.Func(publishSamplerInfo)) + expvar.Publish("trace_writer", expvar.Func(publishTraceWriterInfo)) + // expvar.Publish("writer.stats", expvar.Func(publishStatsWriterInfo)) + // expvar.Publish("writer.services", expvar.Func(publishServiceWriterInfo)) expvar.Publish("prioritysampler", expvar.Func(publishPrioritySamplerInfo)) expvar.Publish("ratebyservice", expvar.Func(publishRateByService)) expvar.Publish("watchdog", expvar.Func(publishWatchdogInfo)) @@ -283,7 +288,7 @@ type StatusInfo struct { Version infoVersion `json:"version"` Receiver []TagStats `json:"receiver"` RateByService map[string]float64 `json:"ratebyservice"` - Endpoint EndpointStats `json:"endpoint"` + TraceWriter TraceWriterInfo `json:"trace_writer"` Watchdog watchdog.Info `json:"watchdog"` PreSampler sampler.PreSamplerStats `json:"presampler"` Config config.AgentConfig `json:"config"` @@ -412,12 +417,14 @@ func Info(w io.Writer, conf *config.AgentConfig) error { // remove the default service and env, it can be inferred from other // values so has little added-value and could be confusing for users. // Besides, if one still really wants it: - // curl http://localhost:8126/degug/vars would show it. + // curl http://localhost:8126/debug/vars would show it. if info.RateByService != nil { delete(info.RateByService, "service:,env:") } - err = infoTmpl.Execute(w, struct { + var buffer bytes.Buffer + + err = infoTmpl.Execute(&buffer, struct { Banner string Program string Status *StatusInfo @@ -429,5 +436,20 @@ func Info(w io.Writer, conf *config.AgentConfig) error { if err != nil { return err } + + cleanInfo := CleanInfoExtraLines(buffer.String()) + + w.Write([]byte(cleanInfo)) + // w.Write(buffer.Bytes()) + return nil } + +// CleanInfoExtraLines removes empty lines from template code indentation. +// The idea is that an indented empty line (only indentation spaces) is because of code indentation, +// so we remove it. +// Real legit empty lines contain no space. +func CleanInfoExtraLines(info string) string { + var indentedEmptyLines = regexp.MustCompile("\n( +\n)+") + return indentedEmptyLines.ReplaceAllString(info, "\n") +} diff --git a/info/info_test.go b/info/info_test.go index 585568500..57e2da581 100644 --- a/info/info_test.go +++ b/info/info_test.go @@ -36,20 +36,16 @@ Trace Agent (v 0.99.0) --- Receiver stats (1 min) --- - -> tags: None - + From unknown clients Traces received: 0 (0 bytes) Spans received: 0 Services received: 0 (0 bytes) - Total data received: 0 bytes - ------------------------------ + Priority sampling rate for 'service:myapp,env:dev': 12.3 % - Sample rate for 'service:myapp,env:dev': 12.3 % + --- Writer stats (1 min) --- - Bytes sent (1 min): 3591 - Traces sent (1 min): 6 - Stats sent (1 min): 60 + Traces: 4 payloads, 26 traces, 3245 bytes ` @@ -67,28 +63,20 @@ Trace Agent (v 0.99.0) --- Receiver stats (1 min) --- - -> tags: python, 2.7.6, CPython, 0.9.0 - + From python 2.7.6 (CPython), client 0.9.0 Traces received: 70 (10679 bytes) Spans received: 984 Services received: 0 (0 bytes) - Total data received: 10679 bytes - WARNING: Traces dropped: 23 WARNING: Spans dropped: 184 - ------------------------------ - - WARNING: Pre-sampling traces: 42.1 % WARNING: Pre-sampler: raising pre-sampling rate from 3.1 % to 5.0 % + --- Writer stats (1 min) --- - Bytes sent (1 min): 3591 - Traces sent (1 min): 6 - Stats sent (1 min): 60 - WARNING: Traces API errors (1 min): 3/4 - WARNING: Services API errors (1 min): 1/2 + Traces: 4 payloads, 26 traces, 3245 bytes + WARNING: Traces API errors (1 min): 3 ` ) @@ -101,7 +89,7 @@ func (h *testServerHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { _, err := w.Write([]byte(`{ "cmdline": ["./trace-agent"], "config": {"Enabled":true,"HostName":"localhost.localdomain","DefaultEnv":"none","APIEndpoint":"https://trace.agent.datadoghq.com","APIEnabled":true,"APIPayloadBufferMaxSize":16777216,"BucketInterval":10000000000,"ExtraAggregators":[],"ExtraSampleRate":1,"MaxTPS":10,"ReceiverHost":"localhost","ReceiverPort":8126,"ConnectionLimit":2000,"ReceiverTimeout":0,"StatsdHost":"127.0.0.1","StatsdPort":8125,"LogLevel":"INFO","LogFilePath":"/var/log/datadog/trace-agent.log"}, -"endpoint": {"TracesPayload":4,"TracesPayloadError":0,"TracesBytes":3245,"TracesCount":6,"TracesStats":60,"ServicesPayload":2,"ServicesPayloadError":0,"ServicesBytes":346}, +"trace_writer": {"Payloads":4,"Bytes":3245,"Traces":26,"Errors":0}, "memstats": {"Alloc":773552,"TotalAlloc":773552,"Sys":3346432,"Lookups":6,"Mallocs":7231,"Frees":561,"HeapAlloc":773552,"HeapSys":1572864,"HeapIdle":49152,"HeapInuse":1523712,"HeapReleased":0,"HeapObjects":6670,"StackInuse":524288,"StackSys":524288,"MSpanInuse":24480,"MSpanSys":32768,"MCacheInuse":4800,"MCacheSys":16384,"BuckHashSys":2675,"GCSys":131072,"OtherSys":1066381,"NextGC":4194304,"LastGC":0,"PauseTotalNs":0,"PauseNs":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"PauseEnd":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"NumGC":0,"GCCPUFraction":0,"EnableGC":true,"DebugGC":false,"BySize":[{"Size":0,"Mallocs":0,"Frees":0},{"Size":8,"Mallocs":126,"Frees":0},{"Size":16,"Mallocs":825,"Frees":0},{"Size":32,"Mallocs":4208,"Frees":0},{"Size":48,"Mallocs":345,"Frees":0},{"Size":64,"Mallocs":262,"Frees":0},{"Size":80,"Mallocs":93,"Frees":0},{"Size":96,"Mallocs":70,"Frees":0},{"Size":112,"Mallocs":97,"Frees":0},{"Size":128,"Mallocs":24,"Frees":0},{"Size":144,"Mallocs":25,"Frees":0},{"Size":160,"Mallocs":57,"Frees":0},{"Size":176,"Mallocs":128,"Frees":0},{"Size":192,"Mallocs":13,"Frees":0},{"Size":208,"Mallocs":77,"Frees":0},{"Size":224,"Mallocs":3,"Frees":0},{"Size":240,"Mallocs":2,"Frees":0},{"Size":256,"Mallocs":17,"Frees":0},{"Size":288,"Mallocs":64,"Frees":0},{"Size":320,"Mallocs":12,"Frees":0},{"Size":352,"Mallocs":20,"Frees":0},{"Size":384,"Mallocs":1,"Frees":0},{"Size":416,"Mallocs":59,"Frees":0},{"Size":448,"Mallocs":0,"Frees":0},{"Size":480,"Mallocs":3,"Frees":0},{"Size":512,"Mallocs":2,"Frees":0},{"Size":576,"Mallocs":17,"Frees":0},{"Size":640,"Mallocs":6,"Frees":0},{"Size":704,"Mallocs":10,"Frees":0},{"Size":768,"Mallocs":0,"Frees":0},{"Size":896,"Mallocs":11,"Frees":0},{"Size":1024,"Mallocs":11,"Frees":0},{"Size":1152,"Mallocs":12,"Frees":0},{"Size":1280,"Mallocs":2,"Frees":0},{"Size":1408,"Mallocs":2,"Frees":0},{"Size":1536,"Mallocs":0,"Frees":0},{"Size":1664,"Mallocs":10,"Frees":0},{"Size":2048,"Mallocs":17,"Frees":0},{"Size":2304,"Mallocs":7,"Frees":0},{"Size":2560,"Mallocs":1,"Frees":0},{"Size":2816,"Mallocs":1,"Frees":0},{"Size":3072,"Mallocs":1,"Frees":0},{"Size":3328,"Mallocs":7,"Frees":0},{"Size":4096,"Mallocs":4,"Frees":0},{"Size":4608,"Mallocs":1,"Frees":0},{"Size":5376,"Mallocs":6,"Frees":0},{"Size":6144,"Mallocs":4,"Frees":0},{"Size":6400,"Mallocs":0,"Frees":0},{"Size":6656,"Mallocs":1,"Frees":0},{"Size":6912,"Mallocs":0,"Frees":0},{"Size":8192,"Mallocs":0,"Frees":0},{"Size":8448,"Mallocs":0,"Frees":0},{"Size":8704,"Mallocs":1,"Frees":0},{"Size":9472,"Mallocs":0,"Frees":0},{"Size":10496,"Mallocs":0,"Frees":0},{"Size":12288,"Mallocs":1,"Frees":0},{"Size":13568,"Mallocs":0,"Frees":0},{"Size":14080,"Mallocs":0,"Frees":0},{"Size":16384,"Mallocs":0,"Frees":0},{"Size":16640,"Mallocs":0,"Frees":0},{"Size":17664,"Mallocs":1,"Frees":0}]}, "pid": 38149, "ratebyservice": {"service:,env:":1,"service:myapp,env:dev":0.123}, @@ -137,7 +125,7 @@ func (h *testServerWarningHandler) ServeHTTP(w http.ResponseWriter, r *http.Requ _, err := w.Write([]byte(`{ "cmdline": ["./trace-agent"], "config": {"Enabled":true,"HostName":"localhost.localdomain","DefaultEnv":"none","APIEndpoint":"https://trace.agent.datadoghq.com","APIEnabled":true,"APIPayloadBufferMaxSize":16777216,"BucketInterval":10000000000,"ExtraAggregators":[],"ExtraSampleRate":1,"MaxTPS":10,"ReceiverHost":"localhost","ReceiverPort":8126,"ConnectionLimit":2000,"ReceiverTimeout":0,"StatsdHost":"127.0.0.1","StatsdPort":8125,"LogLevel":"INFO","LogFilePath":"/var/log/datadog/trace-agent.log"}, -"endpoint": {"TracesPayload":4,"TracesPayloadError":3,"TracesBytes":3245,"TracesCount":6,"TracesStats":60,"ServicesPayload":2,"ServicesPayloadError":1,"ServicesBytes":346}, +"trace_writer": {"Payloads":4,"Bytes":3245,"Traces":26,"Errors":3}, "memstats": {"Alloc":773552,"TotalAlloc":773552,"Sys":3346432,"Lookups":6,"Mallocs":7231,"Frees":561,"HeapAlloc":773552,"HeapSys":1572864,"HeapIdle":49152,"HeapInuse":1523712,"HeapReleased":0,"HeapObjects":6670,"StackInuse":524288,"StackSys":524288,"MSpanInuse":24480,"MSpanSys":32768,"MCacheInuse":4800,"MCacheSys":16384,"BuckHashSys":2675,"GCSys":131072,"OtherSys":1066381,"NextGC":4194304,"LastGC":0,"PauseTotalNs":0,"PauseNs":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"PauseEnd":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"NumGC":0,"GCCPUFraction":0,"EnableGC":true,"DebugGC":false,"BySize":[{"Size":0,"Mallocs":0,"Frees":0},{"Size":8,"Mallocs":126,"Frees":0},{"Size":16,"Mallocs":825,"Frees":0},{"Size":32,"Mallocs":4208,"Frees":0},{"Size":48,"Mallocs":345,"Frees":0},{"Size":64,"Mallocs":262,"Frees":0},{"Size":80,"Mallocs":93,"Frees":0},{"Size":96,"Mallocs":70,"Frees":0},{"Size":112,"Mallocs":97,"Frees":0},{"Size":128,"Mallocs":24,"Frees":0},{"Size":144,"Mallocs":25,"Frees":0},{"Size":160,"Mallocs":57,"Frees":0},{"Size":176,"Mallocs":128,"Frees":0},{"Size":192,"Mallocs":13,"Frees":0},{"Size":208,"Mallocs":77,"Frees":0},{"Size":224,"Mallocs":3,"Frees":0},{"Size":240,"Mallocs":2,"Frees":0},{"Size":256,"Mallocs":17,"Frees":0},{"Size":288,"Mallocs":64,"Frees":0},{"Size":320,"Mallocs":12,"Frees":0},{"Size":352,"Mallocs":20,"Frees":0},{"Size":384,"Mallocs":1,"Frees":0},{"Size":416,"Mallocs":59,"Frees":0},{"Size":448,"Mallocs":0,"Frees":0},{"Size":480,"Mallocs":3,"Frees":0},{"Size":512,"Mallocs":2,"Frees":0},{"Size":576,"Mallocs":17,"Frees":0},{"Size":640,"Mallocs":6,"Frees":0},{"Size":704,"Mallocs":10,"Frees":0},{"Size":768,"Mallocs":0,"Frees":0},{"Size":896,"Mallocs":11,"Frees":0},{"Size":1024,"Mallocs":11,"Frees":0},{"Size":1152,"Mallocs":12,"Frees":0},{"Size":1280,"Mallocs":2,"Frees":0},{"Size":1408,"Mallocs":2,"Frees":0},{"Size":1536,"Mallocs":0,"Frees":0},{"Size":1664,"Mallocs":10,"Frees":0},{"Size":2048,"Mallocs":17,"Frees":0},{"Size":2304,"Mallocs":7,"Frees":0},{"Size":2560,"Mallocs":1,"Frees":0},{"Size":2816,"Mallocs":1,"Frees":0},{"Size":3072,"Mallocs":1,"Frees":0},{"Size":3328,"Mallocs":7,"Frees":0},{"Size":4096,"Mallocs":4,"Frees":0},{"Size":4608,"Mallocs":1,"Frees":0},{"Size":5376,"Mallocs":6,"Frees":0},{"Size":6144,"Mallocs":4,"Frees":0},{"Size":6400,"Mallocs":0,"Frees":0},{"Size":6656,"Mallocs":1,"Frees":0},{"Size":6912,"Mallocs":0,"Frees":0},{"Size":8192,"Mallocs":0,"Frees":0},{"Size":8448,"Mallocs":0,"Frees":0},{"Size":8704,"Mallocs":1,"Frees":0},{"Size":9472,"Mallocs":0,"Frees":0},{"Size":10496,"Mallocs":0,"Frees":0},{"Size":12288,"Mallocs":1,"Frees":0},{"Size":13568,"Mallocs":0,"Frees":0},{"Size":14080,"Mallocs":0,"Frees":0},{"Size":16384,"Mallocs":0,"Frees":0},{"Size":16640,"Mallocs":0,"Frees":0},{"Size":17664,"Mallocs":1,"Frees":0}]}, "pid": 38149, "receiver": [{"Lang":"python","LangVersion":"2.7.6","Interpreter":"CPython","TracerVersion":"0.9.0","TracesReceived":70,"TracesDropped":23,"TracesBytes":10679,"SpansReceived":984,"SpansDropped":184,"ServicesReceived":0,"ServicesBytes":0}], @@ -219,7 +207,7 @@ func TestInfo(t *testing.T) { var buf bytes.Buffer err = Info(&buf, conf) - assert.Nil(err) + assert.NoError(err) info := buf.String() t.Logf("Info:\n%s\n", info) assert.Equal(expectedInfo, info) diff --git a/info/writer.go b/info/writer.go new file mode 100644 index 000000000..70d7d3da2 --- /dev/null +++ b/info/writer.go @@ -0,0 +1,64 @@ +package info + +// TraceWriterInfo represents statistics from the trace writer. +type TraceWriterInfo struct { + Payloads int64 + Traces int64 + Errors int64 + Bytes int64 +} + +// // ServiceWriterInfo represents statistics from the service writer. +// type ServiceWriterInfo struct { +// Payload int64 +// Services int64 +// Errors int64 +// Bytes int64 +// } + +// // StatsWriterInfo represents statistics from the stats writer. +// type StatsWriterInfo struct { +// Payload int64 +// StatsBuckets int64 +// Errors int64 +// Bytes int64 +// } + +// UpdateTraceWriterInfo updates internal trace writer stats +func UpdateTraceWriterInfo(tws TraceWriterInfo) { + infoMu.Lock() + defer infoMu.Unlock() + traceWriterInfo = tws +} + +func publishTraceWriterInfo() interface{} { + infoMu.RLock() + defer infoMu.RUnlock() + return traceWriterInfo +} + +// // UpdateStatsWriterInfo updates internal stats writer stats +// func UpdateStatsWriterInfo(sws StatsWriterInfo) { +// infoMu.Lock() +// defer infoMu.Unlock() +// statsWriterInfo = sws +// } + +// func publishStatsWriterInfo() interface{} { +// infoMu.RLock() +// defer infoMu.RUnlock() +// return statsWriterInfo +// } + +// // UpdateServiceWriterInfo updates internal service writer stats +// func UpdateServiceWriterInfo(sws ServiceWriterInfo) { +// infoMu.Lock() +// defer infoMu.Unlock() +// serviceWriterInfo = sws +// } + +// func publishServiceWriterInfo() interface{} { +// infoMu.RLock() +// defer infoMu.RUnlock() +// return serviceWriterInfo +// } diff --git a/writer/legacy_endpoint.go b/writer/legacy_endpoint.go index 1393f8cb3..f1e9e212a 100644 --- a/writer/legacy_endpoint.go +++ b/writer/legacy_endpoint.go @@ -4,16 +4,13 @@ import ( "bytes" "fmt" "net/http" - "sync/atomic" "time" log "github.com/cihub/seelog" "github.com/DataDog/datadog-trace-agent/config" - "github.com/DataDog/datadog-trace-agent/info" "github.com/DataDog/datadog-trace-agent/model" "github.com/DataDog/datadog-trace-agent/statsd" - "github.com/DataDog/datadog-trace-agent/watchdog" ) // timeout is the HTTP timeout for POST requests to the Datadog backend @@ -51,7 +48,6 @@ type AgentEndpoint interface { type APIEndpoint struct { apiKey string url string - stats info.EndpointStats client *http.Client } @@ -70,10 +66,6 @@ func NewAPIEndpoint(url, apiKey string) *APIEndpoint { Timeout: timeout, }, } - go func() { - defer watchdog.LogOnPanic() - ae.logStats() - }() return &ae } @@ -105,9 +97,6 @@ func (ae *APIEndpoint) Write(p model.AgentPayload) (int, error) { payloadSize := len(data) statsd.Client.Count("datadog.trace_agent.writer.payload_bytes", int64(payloadSize), nil, 1) - atomic.AddInt64(&ae.stats.TracesBytes, int64(payloadSize)) - atomic.AddInt64(&ae.stats.TracesStats, int64(len(p.Stats))) - atomic.AddInt64(&ae.stats.TracesPayload, 1) // Create the request to be sent to the API url := ae.url + model.AgentPayloadAPIPath() @@ -117,7 +106,6 @@ func (ae *APIEndpoint) Write(p model.AgentPayload) (int, error) { // it will always yield the same result. if err != nil { log.Errorf("could not create request for endpoint %s: %v", url, err) - atomic.AddInt64(&ae.stats.TracesPayloadError, 1) return payloadSize, err } @@ -132,7 +120,6 @@ func (ae *APIEndpoint) Write(p model.AgentPayload) (int, error) { // If the request fails, we'll try again later. if err != nil { log.Errorf("error when requesting to endpoint %s: %v", url, err) - atomic.AddInt64(&ae.stats.TracesPayloadError, 1) return payloadSize, newAPIError(err, ae) } defer resp.Body.Close() @@ -141,7 +128,6 @@ func (ae *APIEndpoint) Write(p model.AgentPayload) (int, error) { if resp.StatusCode/100 != 2 { err := fmt.Errorf("request to %s responded with %s", url, resp.Status) log.Error(err) - atomic.AddInt64(&ae.stats.TracesPayloadError, 1) // Only retry for 5xx (server) errors if resp.StatusCode/100 == 5 { @@ -171,16 +157,11 @@ func (ae *APIEndpoint) WriteServices(s model.ServicesMetadata) { return } - payloadSize := len(data) - atomic.AddInt64(&ae.stats.ServicesBytes, int64(payloadSize)) - atomic.AddInt64(&ae.stats.ServicesPayload, 1) - // Create the request url := ae.url + model.ServicesPayloadAPIPath() req, err := http.NewRequest("POST", url, bytes.NewBuffer(data)) if err != nil { log.Errorf("could not create request for endpoint %s: %v", url, err) - atomic.AddInt64(&ae.stats.ServicesPayloadError, 1) return } @@ -192,14 +173,12 @@ func (ae *APIEndpoint) WriteServices(s model.ServicesMetadata) { resp, err := ae.client.Do(req) if err != nil { log.Errorf("error when requesting to endpoint %s: %v", url, err) - atomic.AddInt64(&ae.stats.ServicesPayloadError, 1) return } defer resp.Body.Close() if resp.StatusCode/100 != 2 { log.Errorf("request to %s responded with %s", url, resp.Status) - atomic.AddInt64(&ae.stats.ServicesPayloadError, 1) return } @@ -207,32 +186,6 @@ func (ae *APIEndpoint) WriteServices(s model.ServicesMetadata) { log.Infof("flushed %d services to the API", len(s)) } -// logStats periodically submits stats about the endpoint to statsd -func (ae *APIEndpoint) logStats() { - var accStats info.EndpointStats - - for range time.Tick(time.Minute) { - // Load counters and reset them for the next flush - accStats.TracesPayload = atomic.SwapInt64(&ae.stats.TracesPayload, 0) - accStats.TracesPayloadError = atomic.SwapInt64(&ae.stats.TracesPayloadError, 0) - accStats.TracesBytes = atomic.SwapInt64(&ae.stats.TracesBytes, 0) - accStats.TracesStats = atomic.SwapInt64(&ae.stats.TracesStats, 0) - accStats.ServicesPayload = atomic.SwapInt64(&ae.stats.ServicesPayload, 0) - accStats.ServicesPayloadError = atomic.SwapInt64(&ae.stats.ServicesPayloadError, 0) - accStats.ServicesBytes = atomic.SwapInt64(&ae.stats.ServicesBytes, 0) - - statsd.Client.Count("datadog.trace_agent.endpoint.traces_payload", int64(accStats.TracesPayload), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.traces_payload_error", int64(accStats.TracesPayloadError), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.traces_bytes", int64(accStats.TracesBytes), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.traces_stats", int64(accStats.TracesStats), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.services_payload", int64(accStats.ServicesPayload), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.services_payload_error", int64(accStats.ServicesPayloadError), nil, 1) - statsd.Client.Count("datadog.trace_agent.endpoint.services_bytes", int64(accStats.ServicesBytes), nil, 1) - - info.UpdateEndpointStats(accStats) - } -} - // NullAgentEndpoint implements AgentEndpoint, it just logs data // and drops everything into /dev/null type NullAgentEndpoint struct{} diff --git a/writer/trace_writer.go b/writer/trace_writer.go index a19c8448d..de7c2a041 100644 --- a/writer/trace_writer.go +++ b/writer/trace_writer.go @@ -3,6 +3,7 @@ package writer import ( "strings" "sync" + "sync/atomic" "time" log "github.com/cihub/seelog" @@ -27,6 +28,8 @@ type TraceWriter struct { traceBuffer []*model.APITrace + stats info.TraceWriterInfo + exit chan struct{} exitWG *sync.WaitGroup @@ -75,6 +78,9 @@ func (w *TraceWriter) Run() { flushTicker := time.NewTicker(5 * time.Second) defer flushTicker.Stop() + updateInfoTicker := time.NewTicker(1 * time.Minute) + defer updateInfoTicker.Stop() + for { select { case trace := <-w.InTraces: @@ -84,6 +90,8 @@ func (w *TraceWriter) Run() { w.traceBuffer = append(w.traceBuffer, apiTrace) case <-flushTicker.C: w.Flush() + case <-updateInfoTicker.C: + go w.updateInfo() case <-w.exit: log.Info("exiting, flushing all remaining traces") w.Flush() @@ -102,6 +110,7 @@ func (w *TraceWriter) Stop() { func (w *TraceWriter) Flush() { traces := w.traceBuffer log.Debugf("going to flush %d traces", len(traces)) + atomic.AddInt64(&w.stats.Traces, int64(len(traces))) // Make the new buffer of the size of the previous one. // that's a fair estimation and it should reduce allocations without using too much memory. @@ -118,6 +127,7 @@ func (w *TraceWriter) Flush() { log.Errorf("failed to serialize trace payload, data got dropped, err: %s", err) return } + atomic.AddInt64(&w.stats.Bytes, int64(len(serialized))) headers := map[string]string{ languageHeaderKey: strings.Join(info.Languages(), "|"), @@ -126,19 +136,35 @@ func (w *TraceWriter) Flush() { startFlush := time.Now() // Send the payload to the endpoint - // TODO: track metrics/stats about payload err = w.endpoint.Write(serialized, headers) flushTime := time.Since(startFlush) // TODO: if error, depending on why, replay later. if err != nil { - statsd.Client.Count("datadog.trace_agent.writer.flush", 1, []string{"status:error"}, 1) + atomic.AddInt64(&w.stats.Errors, 1) log.Errorf("failed to flush trace payload: %s", err) } log.Infof("flushed trace payload to the API, time:%s, size:%d bytes", flushTime, len(serialized)) - statsd.Client.Count("datadog.trace_agent.writer.flush", 1, []string{"status:success"}, 1) - statsd.Client.Gauge("datadog.trace_agent.writer.traces.flush_duration", flushTime.Seconds(), nil, 1) - statsd.Client.Count("datadog.trace_agent.writer.payload_bytes", int64(len(serialized)), nil, 1) + statsd.Client.Gauge("datadog.trace_agent.trace_writer.flush_duration", flushTime.Seconds(), nil, 1) + atomic.AddInt64(&w.stats.Payloads, 1) + +} + +func (w *TraceWriter) updateInfo() { + var twInfo info.TraceWriterInfo + + // Load counters and reset them for the next flush + twInfo.Traces = atomic.SwapInt64(&w.stats.Traces, 0) + twInfo.Payloads = atomic.SwapInt64(&w.stats.Payloads, 0) + twInfo.Bytes = atomic.SwapInt64(&w.stats.Bytes, 0) + twInfo.Errors = atomic.SwapInt64(&w.stats.Traces, 0) + + statsd.Client.Count("datadog.trace_agent.trace_writer.traces", int64(twInfo.Traces), nil, 1) + statsd.Client.Count("datadog.trace_agent.trace_writer.payloads", int64(twInfo.Payloads), nil, 1) + statsd.Client.Count("datadog.trace_agent.trace_writer.bytes", int64(twInfo.Bytes), nil, 1) + statsd.Client.Count("datadog.trace_agent.trace_writer.errors", int64(twInfo.Errors), nil, 1) + + info.UpdateTraceWriterInfo(twInfo) } From 5a1a93b3feb316b9493d572cbdbd9aa5e4c8a1e6 Mon Sep 17 00:00:00 2001 From: Benjamin Fernandes Date: Mon, 18 Dec 2017 12:57:17 +0100 Subject: [PATCH 3/9] Add stats and service writer --- agent/agent.go | 59 +++++---- agent/concentrator.go | 52 +++++++- agent/receiver.go | 2 - agent/receiver_test.go | 120 +++++++++--------- info/info.go | 17 ++- info/info_test.go | 11 ++ info/writer.go | 72 +++++------ model/payload.go | 3 +- model/stats_payload.go | 29 +++++ writer/datadog_endpoint.go | 2 - writer/endpoint.go | 2 + writer/service_writer.go | 168 +++++++++++++++++++++++++ writer/stats_writer.go | 156 +++++++++++++++++++++++ writer/trace_writer.go | 24 ++-- writer/writer_test.go | 247 ------------------------------------- 15 files changed, 576 insertions(+), 388 deletions(-) create mode 100644 model/stats_payload.go create mode 100644 writer/service_writer.go create mode 100644 writer/stats_writer.go delete mode 100644 writer/writer_test.go diff --git a/agent/agent.go b/agent/agent.go index 6b960730e..4e29c89ea 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -18,7 +18,6 @@ import ( const ( processStatsInterval = time.Minute - languageHeaderKey = "X-Datadog-Reported-Languages" samplingPriorityKey = "_sampling_priority_v1" ) @@ -44,8 +43,9 @@ type Agent struct { Filters []filters.Filter ScoreSampler *Sampler PrioritySampler *Sampler - Writer *writer.Writer TraceWriter *writer.TraceWriter + ServiceWriter *writer.ServiceWriter + StatsWriter *writer.StatsWriter // config conf *config.AgentConfig @@ -60,6 +60,14 @@ type Agent struct { // NewAgent returns a new Agent object, ready to be started func NewAgent(conf *config.AgentConfig, exit chan struct{}) *Agent { dynConf := config.NewDynamicConfig() + + // inter-component channels + rawTraceChan := make(chan model.Trace, 5000) // about 1000 traces/sec for 5 sec + sampledTraceChan := make(chan *model.Trace) + statsChan := make(chan []model.StatsBucket) + serviceChan := make(chan model.ServicesMetadata, 50) + + // create components r := NewHTTPReceiver(conf, dynConf) c := NewConcentrator( conf.ExtraAggregators, @@ -67,23 +75,28 @@ func NewAgent(conf *config.AgentConfig, exit chan struct{}) *Agent { ) f := filters.Setup(conf) - sampledTraceChan := make(chan *model.Trace) ss := NewScoreSampler(conf) - ss.sampled = sampledTraceChan var ps *Sampler if conf.PrioritySampling { // Use priority sampling for distributed tracing only if conf says so - // TODO: remove the option once confortable ; as it is true by default. + // TODO: remove the option once comfortable ; as it is true by default. ps = NewPrioritySampler(conf, dynConf) - ps.sampled = sampledTraceChan } - // legacy writer, will progressively get replaced by per-endpoint writers - w := writer.NewWriter(conf) - w.InServices = r.services - - // new set of writers tw := writer.NewTraceWriter(conf) + sw := writer.NewStatsWriter(conf) + svcW := writer.NewServiceWriter(conf) + + // wire components together + r.traces = rawTraceChan + r.services = serviceChan tw.InTraces = sampledTraceChan + ss.sampled = sampledTraceChan + if conf.PrioritySampling { + ps.sampled = sampledTraceChan + } + c.OutStats = statsChan + sw.InStats = statsChan + svcW.InServices = serviceChan return &Agent{ Receiver: r, @@ -91,8 +104,9 @@ func NewAgent(conf *config.AgentConfig, exit chan struct{}) *Agent { Filters: f, ScoreSampler: ss, PrioritySampler: ps, - Writer: w, TraceWriter: tw, + StatsWriter: sw, + ServiceWriter: svcW, conf: conf, dynConf: dynConf, exit: exit, @@ -102,9 +116,6 @@ func NewAgent(conf *config.AgentConfig, exit chan struct{}) *Agent { // Run starts routers routines and individual pieces then stop them when the exit order is received func (a *Agent) Run() { - flushTicker := time.NewTicker(a.conf.BucketInterval) - defer flushTicker.Stop() - // it's really important to use a ticker for this, and with a not too short // interval, for this is our garantee that the process won't start and kill // itself too fast (nightmare loop) @@ -117,8 +128,10 @@ func (a *Agent) Run() { // TODO: unify components APIs. Use Start/Stop as non-blocking ways of controlling the blocking Run loop. // Like we do with TraceWriter. a.Receiver.Run() - a.Writer.Run() a.TraceWriter.Start() + a.StatsWriter.Start() + a.ServiceWriter.Start() + a.Concentrator.Start() a.ScoreSampler.Run() if a.PrioritySampler != nil { a.PrioritySampler.Run() @@ -128,23 +141,15 @@ func (a *Agent) Run() { select { case t := <-a.Receiver.traces: a.Process(t) - case <-flushTicker.C: - p := model.AgentPayload{ - HostName: a.conf.HostName, - Env: a.conf.DefaultEnv, - } - p.Stats = a.Concentrator.Flush() - - p.SetExtra(languageHeaderKey, a.Receiver.Languages()) - - a.Writer.InPayloads <- p case <-watchdogTicker.C: a.watchdog() case <-a.exit: log.Info("exiting") close(a.Receiver.exit) - a.Writer.Stop() + a.Concentrator.Stop() a.TraceWriter.Stop() + a.StatsWriter.Stop() + a.ServiceWriter.Stop() a.ScoreSampler.Stop() if a.PrioritySampler != nil { a.PrioritySampler.Stop() diff --git a/agent/concentrator.go b/agent/concentrator.go index fbc7693e1..49b9ae5a6 100644 --- a/agent/concentrator.go +++ b/agent/concentrator.go @@ -3,11 +3,13 @@ package main import ( "sort" "sync" + "time" log "github.com/cihub/seelog" "github.com/DataDog/datadog-trace-agent/model" "github.com/DataDog/datadog-trace-agent/statsd" + "github.com/DataDog/datadog-trace-agent/watchdog" ) // Concentrator produces time bucketed statistics from a stream of raw traces. @@ -15,8 +17,15 @@ import ( // Gets an imperial shitton of traces, and outputs pre-computed data structures // allowing to find the gold (stats) amongst the traces. type Concentrator struct { + // list of attributes to use for extra aggregation aggregators []string - bsize int64 + // bucket duration in nanoseconds + bsize int64 + + OutStats chan []model.StatsBucket + + exit chan struct{} + exitWG *sync.WaitGroup buckets map[int64]*model.StatsRawBucket // buckets used to aggregate stats per timestamp mu sync.Mutex @@ -28,11 +37,52 @@ func NewConcentrator(aggregators []string, bsize int64) *Concentrator { aggregators: aggregators, bsize: bsize, buckets: make(map[int64]*model.StatsRawBucket), + + exit: make(chan struct{}), + exitWG: &sync.WaitGroup{}, } sort.Strings(c.aggregators) return &c } +// Start starts the writer. +func (c *Concentrator) Start() { + go func() { + defer watchdog.LogOnPanic() + c.Run() + }() +} + +// Run runs the main loop of the concentrator goroutine. Traces are received +// through `Add`, this loop only deals with flushing. +func (c *Concentrator) Run() { + c.exitWG.Add(1) + defer c.exitWG.Done() + + // flush with the same period as stats buckets + flushTicker := time.NewTicker(time.Duration(c.bsize) * time.Nanosecond) + defer flushTicker.Stop() + + log.Debug("starting concentrator") + + for { + select { + case <-flushTicker.C: + c.OutStats <- c.Flush() + case <-c.exit: + log.Info("exiting concentrator, computing remaining stats") + c.OutStats <- c.Flush() + return + } + } +} + +// Stop stops the main Run loop. +func (c *Concentrator) Stop() { + close(c.exit) + c.exitWG.Wait() +} + // Add appends to the proper stats bucket this trace's statistics func (c *Concentrator) Add(t processedTrace) { c.mu.Lock() diff --git a/agent/receiver.go b/agent/receiver.go index cac596baf..a07d1a17d 100644 --- a/agent/receiver.go +++ b/agent/receiver.go @@ -71,8 +71,6 @@ type HTTPReceiver struct { func NewHTTPReceiver(conf *config.AgentConfig, dynConf *config.DynamicConfig) *HTTPReceiver { // use buffered channels so that handlers are not waiting on downstream processing return &HTTPReceiver{ - traces: make(chan model.Trace, 5000), // about 1000 traces/sec for 5 sec - services: make(chan model.ServicesMetadata, 50), conf: conf, dynConf: dynConf, stats: info.NewReceiverStats(), diff --git a/agent/receiver_test.go b/agent/receiver_test.go index 91d922209..e41cf61d7 100644 --- a/agent/receiver_test.go +++ b/agent/receiver_test.go @@ -29,18 +29,35 @@ var headerFields = map[string]string{ "tracer_version": "Datadog-Meta-Tracer-Version", } -func TestReceiverRequestBodyLength(t *testing.T) { - assert := assert.New(t) +func NewTestReceiverFromConfig(conf *config.AgentConfig) *HTTPReceiver { + dynConf := config.NewDynamicConfig() + + receiver := NewHTTPReceiver(conf, dynConf) + rawTraceChan := make(chan model.Trace, 5000) + serviceChan := make(chan model.ServicesMetadata, 50) + receiver.traces = rawTraceChan + receiver.services = serviceChan + + return receiver +} + +func NewTestReceiverConfig() *config.AgentConfig { conf := config.NewDefaultAgentConfig() conf.APIKey = "test" - dynConf := config.NewDynamicConfig() + + return conf +} + +func TestReceiverRequestBodyLength(t *testing.T) { + assert := assert.New(t) // save the global mux aside, we don't want to break other tests defaultMux := http.DefaultServeMux http.DefaultServeMux = http.NewServeMux() - receiver := NewHTTPReceiver(conf, dynConf) + conf := NewTestReceiverConfig() + receiver := NewTestReceiverFromConfig(conf) receiver.maxRequestBodyLength = 2 go receiver.Run() @@ -90,8 +107,7 @@ func TestReceiverRequestBodyLength(t *testing.T) { func TestLegacyReceiver(t *testing.T) { // testing traces without content-type in agent endpoints, it should use JSON decoding assert := assert.New(t) - conf := config.NewDefaultAgentConfig() - dynConf := config.NewDynamicConfig() + conf := NewTestReceiverConfig() testCases := []struct { name string r *HTTPReceiver @@ -99,8 +115,8 @@ func TestLegacyReceiver(t *testing.T) { contentType string traces model.Trace }{ - {"v01 with empty content-type", NewHTTPReceiver(conf, dynConf), v01, "", model.Trace{fixtures.GetTestSpan()}}, - {"v01 with application/json", NewHTTPReceiver(conf, dynConf), v01, "application/json", model.Trace{fixtures.GetTestSpan()}}, + {"v01 with empty content-type", NewTestReceiverFromConfig(conf), v01, "", model.Trace{fixtures.GetTestSpan()}}, + {"v01 with application/json", NewTestReceiverFromConfig(conf), v01, "application/json", model.Trace{fixtures.GetTestSpan()}}, } for _, tc := range testCases { @@ -147,8 +163,7 @@ func TestLegacyReceiver(t *testing.T) { func TestReceiverJSONDecoder(t *testing.T) { // testing traces without content-type in agent endpoints, it should use JSON decoding assert := assert.New(t) - conf := config.NewDefaultAgentConfig() - dynConf := config.NewDynamicConfig() + conf := NewTestReceiverConfig() testCases := []struct { name string r *HTTPReceiver @@ -156,15 +171,15 @@ func TestReceiverJSONDecoder(t *testing.T) { contentType string traces []model.Trace }{ - {"v02 with empty content-type", NewHTTPReceiver(conf, dynConf), v02, "", fixtures.GetTestTrace(1, 1)}, - {"v03 with empty content-type", NewHTTPReceiver(conf, dynConf), v03, "", fixtures.GetTestTrace(1, 1)}, - {"v04 with empty content-type", NewHTTPReceiver(conf, dynConf), v04, "", fixtures.GetTestTrace(1, 1)}, - {"v02 with application/json", NewHTTPReceiver(conf, dynConf), v02, "application/json", fixtures.GetTestTrace(1, 1)}, - {"v03 with application/json", NewHTTPReceiver(conf, dynConf), v03, "application/json", fixtures.GetTestTrace(1, 1)}, - {"v04 with application/json", NewHTTPReceiver(conf, dynConf), v04, "application/json", fixtures.GetTestTrace(1, 1)}, - {"v02 with text/json", NewHTTPReceiver(conf, dynConf), v02, "text/json", fixtures.GetTestTrace(1, 1)}, - {"v03 with text/json", NewHTTPReceiver(conf, dynConf), v03, "text/json", fixtures.GetTestTrace(1, 1)}, - {"v04 with text/json", NewHTTPReceiver(conf, dynConf), v04, "text/json", fixtures.GetTestTrace(1, 1)}, + {"v02 with empty content-type", NewTestReceiverFromConfig(conf), v02, "", fixtures.GetTestTrace(1, 1)}, + {"v03 with empty content-type", NewTestReceiverFromConfig(conf), v03, "", fixtures.GetTestTrace(1, 1)}, + {"v04 with empty content-type", NewTestReceiverFromConfig(conf), v04, "", fixtures.GetTestTrace(1, 1)}, + {"v02 with application/json", NewTestReceiverFromConfig(conf), v02, "application/json", fixtures.GetTestTrace(1, 1)}, + {"v03 with application/json", NewTestReceiverFromConfig(conf), v03, "application/json", fixtures.GetTestTrace(1, 1)}, + {"v04 with application/json", NewTestReceiverFromConfig(conf), v04, "application/json", fixtures.GetTestTrace(1, 1)}, + {"v02 with text/json", NewTestReceiverFromConfig(conf), v02, "text/json", fixtures.GetTestTrace(1, 1)}, + {"v03 with text/json", NewTestReceiverFromConfig(conf), v03, "text/json", fixtures.GetTestTrace(1, 1)}, + {"v04 with text/json", NewTestReceiverFromConfig(conf), v04, "text/json", fixtures.GetTestTrace(1, 1)}, } for _, tc := range testCases { @@ -212,8 +227,7 @@ func TestReceiverMsgpackDecoder(t *testing.T) { // testing traces without content-type in agent endpoints, it should use Msgpack decoding // or it should raise a 415 Unsupported media type assert := assert.New(t) - conf := config.NewDefaultAgentConfig() - dynConf := config.NewDynamicConfig() + conf := NewTestReceiverConfig() testCases := []struct { name string r *HTTPReceiver @@ -221,10 +235,10 @@ func TestReceiverMsgpackDecoder(t *testing.T) { contentType string traces model.Traces }{ - {"v01 with application/msgpack", NewHTTPReceiver(conf, dynConf), v01, "application/msgpack", fixtures.GetTestTrace(1, 1)}, - {"v02 with application/msgpack", NewHTTPReceiver(conf, dynConf), v02, "application/msgpack", fixtures.GetTestTrace(1, 1)}, - {"v03 with application/msgpack", NewHTTPReceiver(conf, dynConf), v03, "application/msgpack", fixtures.GetTestTrace(1, 1)}, - {"v04 with application/msgpack", NewHTTPReceiver(conf, dynConf), v04, "application/msgpack", fixtures.GetTestTrace(1, 1)}, + {"v01 with application/msgpack", NewTestReceiverFromConfig(conf), v01, "application/msgpack", fixtures.GetTestTrace(1, 1)}, + {"v02 with application/msgpack", NewTestReceiverFromConfig(conf), v02, "application/msgpack", fixtures.GetTestTrace(1, 1)}, + {"v03 with application/msgpack", NewTestReceiverFromConfig(conf), v03, "application/msgpack", fixtures.GetTestTrace(1, 1)}, + {"v04 with application/msgpack", NewTestReceiverFromConfig(conf), v04, "application/msgpack", fixtures.GetTestTrace(1, 1)}, } for _, tc := range testCases { @@ -308,26 +322,25 @@ func TestReceiverMsgpackDecoder(t *testing.T) { func TestReceiverServiceJSONDecoder(t *testing.T) { // testing traces without content-type in agent endpoints, it should use JSON decoding assert := assert.New(t) - conf := config.NewDefaultAgentConfig() - dynConf := config.NewDynamicConfig() + conf := NewTestReceiverConfig() testCases := []struct { name string r *HTTPReceiver apiVersion APIVersion contentType string }{ - {"v01 with empty content-type", NewHTTPReceiver(conf, dynConf), v01, ""}, - {"v02 with empty content-type", NewHTTPReceiver(conf, dynConf), v02, ""}, - {"v03 with empty content-type", NewHTTPReceiver(conf, dynConf), v03, ""}, - {"v04 with empty content-type", NewHTTPReceiver(conf, dynConf), v04, ""}, - {"v01 with application/json", NewHTTPReceiver(conf, dynConf), v01, "application/json"}, - {"v02 with application/json", NewHTTPReceiver(conf, dynConf), v02, "application/json"}, - {"v03 with application/json", NewHTTPReceiver(conf, dynConf), v03, "application/json"}, - {"v04 with application/json", NewHTTPReceiver(conf, dynConf), v04, "application/json"}, - {"v01 with text/json", NewHTTPReceiver(conf, dynConf), v01, "text/json"}, - {"v02 with text/json", NewHTTPReceiver(conf, dynConf), v02, "text/json"}, - {"v03 with text/json", NewHTTPReceiver(conf, dynConf), v03, "text/json"}, - {"v04 with text/json", NewHTTPReceiver(conf, dynConf), v04, "text/json"}, + {"v01 with empty content-type", NewTestReceiverFromConfig(conf), v01, ""}, + {"v02 with empty content-type", NewTestReceiverFromConfig(conf), v02, ""}, + {"v03 with empty content-type", NewTestReceiverFromConfig(conf), v03, ""}, + {"v04 with empty content-type", NewTestReceiverFromConfig(conf), v04, ""}, + {"v01 with application/json", NewTestReceiverFromConfig(conf), v01, "application/json"}, + {"v02 with application/json", NewTestReceiverFromConfig(conf), v02, "application/json"}, + {"v03 with application/json", NewTestReceiverFromConfig(conf), v03, "application/json"}, + {"v04 with application/json", NewTestReceiverFromConfig(conf), v04, "application/json"}, + {"v01 with text/json", NewTestReceiverFromConfig(conf), v01, "text/json"}, + {"v02 with text/json", NewTestReceiverFromConfig(conf), v02, "text/json"}, + {"v03 with text/json", NewTestReceiverFromConfig(conf), v03, "text/json"}, + {"v04 with text/json", NewTestReceiverFromConfig(conf), v04, "text/json"}, } for _, tc := range testCases { @@ -383,18 +396,17 @@ func TestReceiverServiceMsgpackDecoder(t *testing.T) { // testing traces without content-type in agent endpoints, it should use Msgpack decoding // or it should raise a 415 Unsupported media type assert := assert.New(t) - conf := config.NewDefaultAgentConfig() - dynConf := config.NewDynamicConfig() + conf := NewTestReceiverConfig() testCases := []struct { name string r *HTTPReceiver apiVersion APIVersion contentType string }{ - {"v01 with application/msgpack", NewHTTPReceiver(conf, dynConf), v01, "application/msgpack"}, - {"v02 with application/msgpack", NewHTTPReceiver(conf, dynConf), v02, "application/msgpack"}, - {"v03 with application/msgpack", NewHTTPReceiver(conf, dynConf), v03, "application/msgpack"}, - {"v04 with application/msgpack", NewHTTPReceiver(conf, dynConf), v04, "application/msgpack"}, + {"v01 with application/msgpack", NewTestReceiverFromConfig(conf), v01, "application/msgpack"}, + {"v02 with application/msgpack", NewTestReceiverFromConfig(conf), v02, "application/msgpack"}, + {"v03 with application/msgpack", NewTestReceiverFromConfig(conf), v03, "application/msgpack"}, + {"v04 with application/msgpack", NewTestReceiverFromConfig(conf), v04, "application/msgpack"}, } for _, tc := range testCases { @@ -485,10 +497,8 @@ func TestHandleTraces(t *testing.T) { msgp.Encode(&buf, fixtures.GetTestTrace(10, 10)) // prepare the receiver - conf := config.NewDefaultAgentConfig() - conf.APIKey = "test" - dynConf := config.NewDynamicConfig() - receiver := NewHTTPReceiver(conf, dynConf) + conf := NewTestReceiverConfig() + receiver := NewTestReceiverFromConfig(conf) // response recorder handler := http.HandlerFunc(receiver.httpHandleWithVersion(v04, receiver.handleTraces)) @@ -532,10 +542,8 @@ func BenchmarkHandleTracesFromOneApp(b *testing.B) { msgp.Encode(&buf, fixtures.GetTestTrace(1, 1)) // prepare the receiver - conf := config.NewDefaultAgentConfig() - dynConf := config.NewDynamicConfig() - conf.APIKey = "test" - receiver := NewHTTPReceiver(conf, dynConf) + conf := NewTestReceiverConfig() + receiver := NewTestReceiverFromConfig(conf) // response recorder handler := http.HandlerFunc(receiver.httpHandleWithVersion(v04, receiver.handleTraces)) @@ -574,10 +582,8 @@ func BenchmarkHandleTracesFromMultipleApps(b *testing.B) { msgp.Encode(&buf, fixtures.GetTestTrace(1, 1)) // prepare the receiver - conf := config.NewDefaultAgentConfig() - conf.APIKey = "test" - dynConf := config.NewDynamicConfig() - receiver := NewHTTPReceiver(conf, dynConf) + conf := NewTestReceiverConfig() + receiver := NewTestReceiverFromConfig(conf) // response recorder handler := http.HandlerFunc(receiver.httpHandleWithVersion(v04, receiver.handleTraces)) diff --git a/info/info.go b/info/info.go index 5a1948be2..c4d400216 100644 --- a/info/info.go +++ b/info/info.go @@ -27,10 +27,9 @@ var ( // TODO: move from package globals to a clean single struct - traceWriterInfo TraceWriterInfo - // anticipate future writers - // statsWriterInfo StatsWriterInfo - // serviceWriterInfo ServiceWriterInfo + traceWriterInfo TraceWriterInfo + statsWriterInfo StatsWriterInfo + serviceWriterInfo ServiceWriterInfo watchdogInfo watchdog.Info samplerInfo SamplerInfo @@ -86,6 +85,10 @@ const ( Traces: {{.Status.TraceWriter.Payloads}} payloads, {{.Status.TraceWriter.Traces}} traces, {{.Status.TraceWriter.Bytes}} bytes {{if gt .Status.TraceWriter.Errors 0}}WARNING: Traces API errors (1 min): {{.Status.TraceWriter.Errors}}{{end}} + Stats: {{.Status.StatsWriter.Payloads}} payloads, {{.Status.StatsWriter.StatsBuckets}} stats buckets, {{.Status.StatsWriter.Bytes}} bytes + {{if gt .Status.StatsWriter.Errors 0}}WARNING: Stats API errors (1 min): {{.Status.StatsWriter.Errors}}{{end}} + Services: {{.Status.ServiceWriter.Payloads}} payloads, {{.Status.ServiceWriter.Services}} services, {{.Status.ServiceWriter.Bytes}} bytes + {{if gt .Status.ServiceWriter.Errors 0}}WARNING: Services API errors (1 min): {{.Status.ServiceWriter.Errors}}{{end}} ` @@ -234,8 +237,8 @@ func InitInfo(conf *config.AgentConfig) error { expvar.Publish("receiver", expvar.Func(publishReceiverStats)) expvar.Publish("sampler", expvar.Func(publishSamplerInfo)) expvar.Publish("trace_writer", expvar.Func(publishTraceWriterInfo)) - // expvar.Publish("writer.stats", expvar.Func(publishStatsWriterInfo)) - // expvar.Publish("writer.services", expvar.Func(publishServiceWriterInfo)) + expvar.Publish("stats_writer", expvar.Func(publishStatsWriterInfo)) + expvar.Publish("service_writer", expvar.Func(publishServiceWriterInfo)) expvar.Publish("prioritysampler", expvar.Func(publishPrioritySamplerInfo)) expvar.Publish("ratebyservice", expvar.Func(publishRateByService)) expvar.Publish("watchdog", expvar.Func(publishWatchdogInfo)) @@ -289,6 +292,8 @@ type StatusInfo struct { Receiver []TagStats `json:"receiver"` RateByService map[string]float64 `json:"ratebyservice"` TraceWriter TraceWriterInfo `json:"trace_writer"` + StatsWriter StatsWriterInfo `json:"stats_writer"` + ServiceWriter ServiceWriterInfo `json:"service_writer"` Watchdog watchdog.Info `json:"watchdog"` PreSampler sampler.PreSamplerStats `json:"presampler"` Config config.AgentConfig `json:"config"` diff --git a/info/info_test.go b/info/info_test.go index 57e2da581..210e92a10 100644 --- a/info/info_test.go +++ b/info/info_test.go @@ -21,6 +21,7 @@ type testServerHandler struct { t *testing.T } +// TODO: move these expected output to flat file, simpler to read/update const ( expectedInfo = `====================== Trace Agent (v 0.99.0) @@ -46,6 +47,8 @@ Trace Agent (v 0.99.0) --- Writer stats (1 min) --- Traces: 4 payloads, 26 traces, 3245 bytes + Stats: 6 payloads, 12 stats buckets, 8329 bytes + Services: 1 payloads, 2 services, 1234 bytes ` @@ -77,6 +80,10 @@ Trace Agent (v 0.99.0) Traces: 4 payloads, 26 traces, 3245 bytes WARNING: Traces API errors (1 min): 3 + Stats: 6 payloads, 12 stats buckets, 8329 bytes + WARNING: Stats API errors (1 min): 1 + Services: 1 payloads, 2 services, 1234 bytes + WARNING: Services API errors (1 min): 1 ` ) @@ -90,6 +97,8 @@ func (h *testServerHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { "cmdline": ["./trace-agent"], "config": {"Enabled":true,"HostName":"localhost.localdomain","DefaultEnv":"none","APIEndpoint":"https://trace.agent.datadoghq.com","APIEnabled":true,"APIPayloadBufferMaxSize":16777216,"BucketInterval":10000000000,"ExtraAggregators":[],"ExtraSampleRate":1,"MaxTPS":10,"ReceiverHost":"localhost","ReceiverPort":8126,"ConnectionLimit":2000,"ReceiverTimeout":0,"StatsdHost":"127.0.0.1","StatsdPort":8125,"LogLevel":"INFO","LogFilePath":"/var/log/datadog/trace-agent.log"}, "trace_writer": {"Payloads":4,"Bytes":3245,"Traces":26,"Errors":0}, +"stats_writer": {"Payloads":6,"Bytes":8329,"StatsBuckets":12,"Errors":0}, +"service_writer": {"Payloads":1,"Bytes":1234,"Services":2,"Errors":0}, "memstats": {"Alloc":773552,"TotalAlloc":773552,"Sys":3346432,"Lookups":6,"Mallocs":7231,"Frees":561,"HeapAlloc":773552,"HeapSys":1572864,"HeapIdle":49152,"HeapInuse":1523712,"HeapReleased":0,"HeapObjects":6670,"StackInuse":524288,"StackSys":524288,"MSpanInuse":24480,"MSpanSys":32768,"MCacheInuse":4800,"MCacheSys":16384,"BuckHashSys":2675,"GCSys":131072,"OtherSys":1066381,"NextGC":4194304,"LastGC":0,"PauseTotalNs":0,"PauseNs":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"PauseEnd":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"NumGC":0,"GCCPUFraction":0,"EnableGC":true,"DebugGC":false,"BySize":[{"Size":0,"Mallocs":0,"Frees":0},{"Size":8,"Mallocs":126,"Frees":0},{"Size":16,"Mallocs":825,"Frees":0},{"Size":32,"Mallocs":4208,"Frees":0},{"Size":48,"Mallocs":345,"Frees":0},{"Size":64,"Mallocs":262,"Frees":0},{"Size":80,"Mallocs":93,"Frees":0},{"Size":96,"Mallocs":70,"Frees":0},{"Size":112,"Mallocs":97,"Frees":0},{"Size":128,"Mallocs":24,"Frees":0},{"Size":144,"Mallocs":25,"Frees":0},{"Size":160,"Mallocs":57,"Frees":0},{"Size":176,"Mallocs":128,"Frees":0},{"Size":192,"Mallocs":13,"Frees":0},{"Size":208,"Mallocs":77,"Frees":0},{"Size":224,"Mallocs":3,"Frees":0},{"Size":240,"Mallocs":2,"Frees":0},{"Size":256,"Mallocs":17,"Frees":0},{"Size":288,"Mallocs":64,"Frees":0},{"Size":320,"Mallocs":12,"Frees":0},{"Size":352,"Mallocs":20,"Frees":0},{"Size":384,"Mallocs":1,"Frees":0},{"Size":416,"Mallocs":59,"Frees":0},{"Size":448,"Mallocs":0,"Frees":0},{"Size":480,"Mallocs":3,"Frees":0},{"Size":512,"Mallocs":2,"Frees":0},{"Size":576,"Mallocs":17,"Frees":0},{"Size":640,"Mallocs":6,"Frees":0},{"Size":704,"Mallocs":10,"Frees":0},{"Size":768,"Mallocs":0,"Frees":0},{"Size":896,"Mallocs":11,"Frees":0},{"Size":1024,"Mallocs":11,"Frees":0},{"Size":1152,"Mallocs":12,"Frees":0},{"Size":1280,"Mallocs":2,"Frees":0},{"Size":1408,"Mallocs":2,"Frees":0},{"Size":1536,"Mallocs":0,"Frees":0},{"Size":1664,"Mallocs":10,"Frees":0},{"Size":2048,"Mallocs":17,"Frees":0},{"Size":2304,"Mallocs":7,"Frees":0},{"Size":2560,"Mallocs":1,"Frees":0},{"Size":2816,"Mallocs":1,"Frees":0},{"Size":3072,"Mallocs":1,"Frees":0},{"Size":3328,"Mallocs":7,"Frees":0},{"Size":4096,"Mallocs":4,"Frees":0},{"Size":4608,"Mallocs":1,"Frees":0},{"Size":5376,"Mallocs":6,"Frees":0},{"Size":6144,"Mallocs":4,"Frees":0},{"Size":6400,"Mallocs":0,"Frees":0},{"Size":6656,"Mallocs":1,"Frees":0},{"Size":6912,"Mallocs":0,"Frees":0},{"Size":8192,"Mallocs":0,"Frees":0},{"Size":8448,"Mallocs":0,"Frees":0},{"Size":8704,"Mallocs":1,"Frees":0},{"Size":9472,"Mallocs":0,"Frees":0},{"Size":10496,"Mallocs":0,"Frees":0},{"Size":12288,"Mallocs":1,"Frees":0},{"Size":13568,"Mallocs":0,"Frees":0},{"Size":14080,"Mallocs":0,"Frees":0},{"Size":16384,"Mallocs":0,"Frees":0},{"Size":16640,"Mallocs":0,"Frees":0},{"Size":17664,"Mallocs":1,"Frees":0}]}, "pid": 38149, "ratebyservice": {"service:,env:":1,"service:myapp,env:dev":0.123}, @@ -126,6 +135,8 @@ func (h *testServerWarningHandler) ServeHTTP(w http.ResponseWriter, r *http.Requ "cmdline": ["./trace-agent"], "config": {"Enabled":true,"HostName":"localhost.localdomain","DefaultEnv":"none","APIEndpoint":"https://trace.agent.datadoghq.com","APIEnabled":true,"APIPayloadBufferMaxSize":16777216,"BucketInterval":10000000000,"ExtraAggregators":[],"ExtraSampleRate":1,"MaxTPS":10,"ReceiverHost":"localhost","ReceiverPort":8126,"ConnectionLimit":2000,"ReceiverTimeout":0,"StatsdHost":"127.0.0.1","StatsdPort":8125,"LogLevel":"INFO","LogFilePath":"/var/log/datadog/trace-agent.log"}, "trace_writer": {"Payloads":4,"Bytes":3245,"Traces":26,"Errors":3}, +"stats_writer": {"Payloads":6,"Bytes":8329,"StatsBuckets":12,"Errors":1}, +"service_writer": {"Payloads":1,"Bytes":1234,"Services":2,"Errors":1}, "memstats": {"Alloc":773552,"TotalAlloc":773552,"Sys":3346432,"Lookups":6,"Mallocs":7231,"Frees":561,"HeapAlloc":773552,"HeapSys":1572864,"HeapIdle":49152,"HeapInuse":1523712,"HeapReleased":0,"HeapObjects":6670,"StackInuse":524288,"StackSys":524288,"MSpanInuse":24480,"MSpanSys":32768,"MCacheInuse":4800,"MCacheSys":16384,"BuckHashSys":2675,"GCSys":131072,"OtherSys":1066381,"NextGC":4194304,"LastGC":0,"PauseTotalNs":0,"PauseNs":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"PauseEnd":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"NumGC":0,"GCCPUFraction":0,"EnableGC":true,"DebugGC":false,"BySize":[{"Size":0,"Mallocs":0,"Frees":0},{"Size":8,"Mallocs":126,"Frees":0},{"Size":16,"Mallocs":825,"Frees":0},{"Size":32,"Mallocs":4208,"Frees":0},{"Size":48,"Mallocs":345,"Frees":0},{"Size":64,"Mallocs":262,"Frees":0},{"Size":80,"Mallocs":93,"Frees":0},{"Size":96,"Mallocs":70,"Frees":0},{"Size":112,"Mallocs":97,"Frees":0},{"Size":128,"Mallocs":24,"Frees":0},{"Size":144,"Mallocs":25,"Frees":0},{"Size":160,"Mallocs":57,"Frees":0},{"Size":176,"Mallocs":128,"Frees":0},{"Size":192,"Mallocs":13,"Frees":0},{"Size":208,"Mallocs":77,"Frees":0},{"Size":224,"Mallocs":3,"Frees":0},{"Size":240,"Mallocs":2,"Frees":0},{"Size":256,"Mallocs":17,"Frees":0},{"Size":288,"Mallocs":64,"Frees":0},{"Size":320,"Mallocs":12,"Frees":0},{"Size":352,"Mallocs":20,"Frees":0},{"Size":384,"Mallocs":1,"Frees":0},{"Size":416,"Mallocs":59,"Frees":0},{"Size":448,"Mallocs":0,"Frees":0},{"Size":480,"Mallocs":3,"Frees":0},{"Size":512,"Mallocs":2,"Frees":0},{"Size":576,"Mallocs":17,"Frees":0},{"Size":640,"Mallocs":6,"Frees":0},{"Size":704,"Mallocs":10,"Frees":0},{"Size":768,"Mallocs":0,"Frees":0},{"Size":896,"Mallocs":11,"Frees":0},{"Size":1024,"Mallocs":11,"Frees":0},{"Size":1152,"Mallocs":12,"Frees":0},{"Size":1280,"Mallocs":2,"Frees":0},{"Size":1408,"Mallocs":2,"Frees":0},{"Size":1536,"Mallocs":0,"Frees":0},{"Size":1664,"Mallocs":10,"Frees":0},{"Size":2048,"Mallocs":17,"Frees":0},{"Size":2304,"Mallocs":7,"Frees":0},{"Size":2560,"Mallocs":1,"Frees":0},{"Size":2816,"Mallocs":1,"Frees":0},{"Size":3072,"Mallocs":1,"Frees":0},{"Size":3328,"Mallocs":7,"Frees":0},{"Size":4096,"Mallocs":4,"Frees":0},{"Size":4608,"Mallocs":1,"Frees":0},{"Size":5376,"Mallocs":6,"Frees":0},{"Size":6144,"Mallocs":4,"Frees":0},{"Size":6400,"Mallocs":0,"Frees":0},{"Size":6656,"Mallocs":1,"Frees":0},{"Size":6912,"Mallocs":0,"Frees":0},{"Size":8192,"Mallocs":0,"Frees":0},{"Size":8448,"Mallocs":0,"Frees":0},{"Size":8704,"Mallocs":1,"Frees":0},{"Size":9472,"Mallocs":0,"Frees":0},{"Size":10496,"Mallocs":0,"Frees":0},{"Size":12288,"Mallocs":1,"Frees":0},{"Size":13568,"Mallocs":0,"Frees":0},{"Size":14080,"Mallocs":0,"Frees":0},{"Size":16384,"Mallocs":0,"Frees":0},{"Size":16640,"Mallocs":0,"Frees":0},{"Size":17664,"Mallocs":1,"Frees":0}]}, "pid": 38149, "receiver": [{"Lang":"python","LangVersion":"2.7.6","Interpreter":"CPython","TracerVersion":"0.9.0","TracesReceived":70,"TracesDropped":23,"TracesBytes":10679,"SpansReceived":984,"SpansDropped":184,"ServicesReceived":0,"ServicesBytes":0}], diff --git a/info/writer.go b/info/writer.go index 70d7d3da2..4394fa5b1 100644 --- a/info/writer.go +++ b/info/writer.go @@ -8,21 +8,21 @@ type TraceWriterInfo struct { Bytes int64 } -// // ServiceWriterInfo represents statistics from the service writer. -// type ServiceWriterInfo struct { -// Payload int64 -// Services int64 -// Errors int64 -// Bytes int64 -// } +// ServiceWriterInfo represents statistics from the service writer. +type ServiceWriterInfo struct { + Payloads int64 + Services int64 + Errors int64 + Bytes int64 +} -// // StatsWriterInfo represents statistics from the stats writer. -// type StatsWriterInfo struct { -// Payload int64 -// StatsBuckets int64 -// Errors int64 -// Bytes int64 -// } +// StatsWriterInfo represents statistics from the stats writer. +type StatsWriterInfo struct { + Payloads int64 + StatsBuckets int64 + Errors int64 + Bytes int64 +} // UpdateTraceWriterInfo updates internal trace writer stats func UpdateTraceWriterInfo(tws TraceWriterInfo) { @@ -37,28 +37,28 @@ func publishTraceWriterInfo() interface{} { return traceWriterInfo } -// // UpdateStatsWriterInfo updates internal stats writer stats -// func UpdateStatsWriterInfo(sws StatsWriterInfo) { -// infoMu.Lock() -// defer infoMu.Unlock() -// statsWriterInfo = sws -// } +// UpdateStatsWriterInfo updates internal stats writer stats +func UpdateStatsWriterInfo(sws StatsWriterInfo) { + infoMu.Lock() + defer infoMu.Unlock() + statsWriterInfo = sws +} -// func publishStatsWriterInfo() interface{} { -// infoMu.RLock() -// defer infoMu.RUnlock() -// return statsWriterInfo -// } +func publishStatsWriterInfo() interface{} { + infoMu.RLock() + defer infoMu.RUnlock() + return statsWriterInfo +} -// // UpdateServiceWriterInfo updates internal service writer stats -// func UpdateServiceWriterInfo(sws ServiceWriterInfo) { -// infoMu.Lock() -// defer infoMu.Unlock() -// serviceWriterInfo = sws -// } +// UpdateServiceWriterInfo updates internal service writer stats +func UpdateServiceWriterInfo(sws ServiceWriterInfo) { + infoMu.Lock() + defer infoMu.Unlock() + serviceWriterInfo = sws +} -// func publishServiceWriterInfo() interface{} { -// infoMu.RLock() -// defer infoMu.RUnlock() -// return serviceWriterInfo -// } +func publishServiceWriterInfo() interface{} { + infoMu.RLock() + defer infoMu.RUnlock() + return serviceWriterInfo +} diff --git a/model/payload.go b/model/payload.go index f86a79fbe..1d9e0e3f3 100644 --- a/model/payload.go +++ b/model/payload.go @@ -11,7 +11,8 @@ import ( ) // AgentPayload is the main payload to carry data that has been -// pre-processed to the Datadog mothership +// pre-processed to the Datadog mothership. +// This is a legacy payload format, used in API v0.1. type AgentPayload struct { HostName string `json:"hostname"` // the host name that will be resolved by the API Env string `json:"env"` // the default environment this agent uses diff --git a/model/stats_payload.go b/model/stats_payload.go new file mode 100644 index 000000000..4a67adc15 --- /dev/null +++ b/model/stats_payload.go @@ -0,0 +1,29 @@ +package model + +import ( + "bytes" + "compress/gzip" + "encoding/json" +) + +// StatsPayload represents the payload to be flushed to the stats endpoint +type StatsPayload struct { + HostName string `json:"hostname"` + Env string `json:"env"` + Stats []StatsBucket `json:"stats"` +} + +// EncodeStatsPayload encodes the stats payload as json/gzip. +func EncodeStatsPayload(payload *StatsPayload) ([]byte, error) { + var b bytes.Buffer + var err error + + gz, err := gzip.NewWriterLevel(&b, gzip.BestSpeed) + if err != nil { + return nil, err + } + err = json.NewEncoder(gz).Encode(payload) + gz.Close() + + return b.Bytes(), err +} diff --git a/writer/datadog_endpoint.go b/writer/datadog_endpoint.go index 55387a941..75df4eaf2 100644 --- a/writer/datadog_endpoint.go +++ b/writer/datadog_endpoint.go @@ -48,8 +48,6 @@ func (e *DatadogEndpoint) Write(payload []byte, headers map[string]string) error req.URL.RawQuery = queryParams.Encode() SetExtraHeaders(req.Header, headers) - req.Header.Set("Content-Type", "application/x-protobuf") - req.Header.Set("Content-Encoding", "identity") resp, err := e.client.Do(req) diff --git a/writer/endpoint.go b/writer/endpoint.go index 9087b4f6c..423828a01 100644 --- a/writer/endpoint.go +++ b/writer/endpoint.go @@ -6,6 +6,8 @@ import ( log "github.com/cihub/seelog" ) +const languageHeaderKey = "X-Datadog-Reported-Languages" + // Endpoint is an interface where we send the data from the Agent. type Endpoint interface { Write(payload []byte, headers map[string]string) error diff --git a/writer/service_writer.go b/writer/service_writer.go new file mode 100644 index 000000000..41cd09337 --- /dev/null +++ b/writer/service_writer.go @@ -0,0 +1,168 @@ +package writer + +import ( + "strings" + "sync" + "sync/atomic" + "time" + + log "github.com/cihub/seelog" + + "github.com/DataDog/datadog-trace-agent/config" + "github.com/DataDog/datadog-trace-agent/info" + "github.com/DataDog/datadog-trace-agent/model" + "github.com/DataDog/datadog-trace-agent/statsd" + "github.com/DataDog/datadog-trace-agent/watchdog" +) + +// ServiceWriter ingests service metadata and flush them to the API. +type ServiceWriter struct { + endpoint Endpoint + + InServices <-chan model.ServicesMetadata + + serviceBuffer model.ServicesMetadata + updated bool + + stats info.ServiceWriterInfo + + exit chan struct{} + exitWG *sync.WaitGroup + + conf *config.AgentConfig +} + +// NewServiceWriter returns a new writer for services. +func NewServiceWriter(conf *config.AgentConfig) *ServiceWriter { + var endpoint Endpoint + + if conf.APIEnabled { + client := NewClient(conf) + endpoint = NewDatadogEndpoint(client, conf.APIEndpoint, "/api/v0.2/services", conf.APIKey) + } else { + log.Info("API interface is disabled, flushing to /dev/null instead") + endpoint = &NullEndpoint{} + } + + return &ServiceWriter{ + endpoint: endpoint, + + serviceBuffer: make(model.ServicesMetadata), + + exit: make(chan struct{}), + exitWG: &sync.WaitGroup{}, + + conf: conf, + } +} + +// Start starts the writer. +func (w *ServiceWriter) Start() { + go func() { + defer watchdog.LogOnPanic() + w.Run() + }() +} + +// Run runs the main loop of the writer goroutine. If buffers +// services read from input chan and flushes them when necessary. +func (w *ServiceWriter) Run() { + w.exitWG.Add(1) + defer w.exitWG.Done() + + // for now, simply flush every x seconds + flushTicker := time.NewTicker(5 * time.Second) + defer flushTicker.Stop() + + updateInfoTicker := time.NewTicker(1 * time.Minute) + defer updateInfoTicker.Stop() + + log.Debug("starting service writer") + + for { + select { + case sm := <-w.InServices: + updated := w.serviceBuffer.Update(sm) + if updated { + w.updated = updated + statsd.Client.Count("datadog.trace_agent.writer.services.updated", 1, nil, 1) + } + case <-flushTicker.C: + w.Flush() + case <-updateInfoTicker.C: + go w.updateInfo() + case <-w.exit: + log.Info("exiting service writer, flushing all modified services") + w.Flush() + return + } + } +} + +// Stop stops the main Run loop. +func (w *ServiceWriter) Stop() { + close(w.exit) + w.exitWG.Wait() +} + +// Flush flushes service metadata, if they changed, to the API +func (w *ServiceWriter) Flush() { + if !w.updated { + return + } + w.updated = false + + serviceBuffer := w.serviceBuffer + + log.Debugf("going to flush updated service metadata, %d services", len(serviceBuffer)) + atomic.StoreInt64(&w.stats.Services, int64(len(serviceBuffer))) + + data, err := model.EncodeServicesPayload(serviceBuffer) + if err != nil { + log.Errorf("encoding issue: %v", err) + return + } + + headers := map[string]string{ + languageHeaderKey: strings.Join(info.Languages(), "|"), + "Content-Type": "application/json", + } + + atomic.AddInt64(&w.stats.Bytes, int64(len(data))) + + startFlush := time.Now() + + // Send the payload to the endpoint + // TODO: track metrics/stats about payload + err = w.endpoint.Write(data, headers) + + flushTime := time.Since(startFlush) + + // TODO: if error, depending on why, replay later. + if err != nil { + atomic.AddInt64(&w.stats.Errors, 1) + log.Errorf("failed to flush service payload, time:%s, size:%d bytes, error: %s", flushTime, len(data), err) + return + } + + log.Infof("flushed service payload to the API, time:%s, size:%d bytes", flushTime, len(data)) + statsd.Client.Gauge("datadog.trace_agent.service_writer.flush_duration", flushTime.Seconds(), nil, 1) + atomic.AddInt64(&w.stats.Payloads, 1) +} + +func (w *ServiceWriter) updateInfo() { + var swInfo info.ServiceWriterInfo + + // Load counters and reset them for the next flush + swInfo.Payloads = atomic.SwapInt64(&w.stats.Payloads, 0) + swInfo.Services = atomic.SwapInt64(&w.stats.Services, 0) + swInfo.Bytes = atomic.SwapInt64(&w.stats.Bytes, 0) + swInfo.Errors = atomic.SwapInt64(&w.stats.Errors, 0) + + statsd.Client.Count("datadog.trace_agent.service_writer.payloads", int64(swInfo.Payloads), nil, 1) + statsd.Client.Gauge("datadog.trace_agent.service_writer.services", float64(swInfo.Services), nil, 1) + statsd.Client.Count("datadog.trace_agent.service_writer.bytes", int64(swInfo.Bytes), nil, 1) + statsd.Client.Count("datadog.trace_agent.service_writer.errors", int64(swInfo.Errors), nil, 1) + + info.UpdateServiceWriterInfo(swInfo) +} diff --git a/writer/stats_writer.go b/writer/stats_writer.go new file mode 100644 index 000000000..7e238893f --- /dev/null +++ b/writer/stats_writer.go @@ -0,0 +1,156 @@ +package writer + +import ( + "strings" + "sync" + "sync/atomic" + "time" + + log "github.com/cihub/seelog" + + "github.com/DataDog/datadog-trace-agent/config" + "github.com/DataDog/datadog-trace-agent/info" + "github.com/DataDog/datadog-trace-agent/model" + "github.com/DataDog/datadog-trace-agent/statsd" + "github.com/DataDog/datadog-trace-agent/watchdog" +) + +// StatsWriter ingests service metadata and flush them to the API. +type StatsWriter struct { + endpoint Endpoint + + InStats <-chan []model.StatsBucket + + stats info.StatsWriterInfo + + exit chan struct{} + exitWG *sync.WaitGroup + + conf *config.AgentConfig +} + +// NewStatsWriter returns a new writer for services. +func NewStatsWriter(conf *config.AgentConfig) *StatsWriter { + var endpoint Endpoint + + if conf.APIEnabled { + client := NewClient(conf) + endpoint = NewDatadogEndpoint(client, conf.APIEndpoint, "/api/v0.2/stats", conf.APIKey) + } else { + log.Info("API interface is disabled, flushing to /dev/null instead") + endpoint = &NullEndpoint{} + } + + return &StatsWriter{ + endpoint: endpoint, + + exit: make(chan struct{}), + exitWG: &sync.WaitGroup{}, + + conf: conf, + } +} + +// Start starts the writer. +func (w *StatsWriter) Start() { + go func() { + defer watchdog.LogOnPanic() + w.Run() + }() +} + +// Run runs the main loop of the writer goroutine. If flushes +// stats buckets once received from the concentrator. +func (w *StatsWriter) Run() { + w.exitWG.Add(1) + defer w.exitWG.Done() + + log.Debug("starting stats writer") + + updateInfoTicker := time.NewTicker(1 * time.Minute) + defer updateInfoTicker.Stop() + + for { + select { + case stats := <-w.InStats: + // TODO: have a buffer with replay abilities + w.Flush(stats) + case <-updateInfoTicker.C: + go w.updateInfo() + case <-w.exit: + log.Info("exiting stats writer") + return + } + } +} + +// Stop stops the main Run loop. +func (w *StatsWriter) Stop() { + close(w.exit) + w.exitWG.Wait() +} + +// Flush flushes received stats +func (w *StatsWriter) Flush(stats []model.StatsBucket) { + if len(stats) == 0 { + log.Debugf("no stats to flush") + return + } + log.Debugf("going to flush stats buckets, %d buckets", len(stats)) + atomic.AddInt64(&w.stats.StatsBuckets, int64(len(stats))) + + statsPayload := &model.StatsPayload{ + HostName: w.conf.HostName, + Env: w.conf.DefaultEnv, + Stats: stats, + } + + data, err := model.EncodeStatsPayload(statsPayload) + if err != nil { + log.Errorf("encoding issue: %v", err) + return + } + + headers := map[string]string{ + languageHeaderKey: strings.Join(info.Languages(), "|"), + "Content-Type": "application/json", + "Content-Encoding": "gzip", + } + + atomic.AddInt64(&w.stats.Bytes, int64(len(data))) + + startFlush := time.Now() + + // Send the payload to the endpoint + err = w.endpoint.Write(data, headers) + + flushTime := time.Since(startFlush) + + // TODO: if error, depending on why, replay later. + if err != nil { + atomic.AddInt64(&w.stats.Errors, 1) + log.Errorf("failed to flush service payload, time:%s, size:%d bytes, error: %s", flushTime, len(data), err) + return + } + + log.Infof("flushed service payload to the API, time:%s, size:%d bytes", flushTime, len(data)) + statsd.Client.Gauge("datadog.trace_agent.stats_writer.flush_duration", flushTime.Seconds(), nil, 1) + atomic.AddInt64(&w.stats.Payloads, 1) +} + +func (w *StatsWriter) updateInfo() { + var swInfo info.StatsWriterInfo + + // Load counters and reset them for the next flush + swInfo.Payloads = atomic.SwapInt64(&w.stats.Payloads, 0) + swInfo.StatsBuckets = atomic.SwapInt64(&w.stats.StatsBuckets, 0) + swInfo.Bytes = atomic.SwapInt64(&w.stats.Bytes, 0) + swInfo.Errors = atomic.SwapInt64(&w.stats.Errors, 0) + + statsd.Client.Count("datadog.trace_agent.stats_writer.payloads", int64(swInfo.Payloads), nil, 1) + statsd.Client.Count("datadog.trace_agent.stats_writer.stats_buckets", int64(swInfo.StatsBuckets), nil, 1) + statsd.Client.Count("datadog.trace_agent.stats_writer.bytes", int64(swInfo.Bytes), nil, 1) + statsd.Client.Count("datadog.trace_agent.stats_writer.errors", int64(swInfo.Errors), nil, 1) + + info.UpdateStatsWriterInfo(swInfo) +} diff --git a/writer/trace_writer.go b/writer/trace_writer.go index de7c2a041..d67fb1066 100644 --- a/writer/trace_writer.go +++ b/writer/trace_writer.go @@ -16,10 +16,6 @@ import ( "github.com/DataDog/datadog-trace-agent/watchdog" ) -const ( - languageHeaderKey = "X-Datadog-Reported-Languages" -) - // TraceWriter ingests sampled traces and flush them to the API. type TraceWriter struct { endpoint Endpoint @@ -81,6 +77,8 @@ func (w *TraceWriter) Run() { updateInfoTicker := time.NewTicker(1 * time.Minute) defer updateInfoTicker.Stop() + log.Debug("starting trace writer") + for { select { case trace := <-w.InTraces: @@ -93,7 +91,7 @@ func (w *TraceWriter) Run() { case <-updateInfoTicker.C: go w.updateInfo() case <-w.exit: - log.Info("exiting, flushing all remaining traces") + log.Info("exiting trace writer, flushing all remaining traces") w.Flush() return } @@ -109,6 +107,11 @@ func (w *TraceWriter) Stop() { // Flush flushes traces the data in the API func (w *TraceWriter) Flush() { traces := w.traceBuffer + + if len(traces) == 0 { + log.Debugf("no trace to flush") + return + } log.Debugf("going to flush %d traces", len(traces)) atomic.AddInt64(&w.stats.Traces, int64(len(traces))) @@ -130,7 +133,9 @@ func (w *TraceWriter) Flush() { atomic.AddInt64(&w.stats.Bytes, int64(len(serialized))) headers := map[string]string{ - languageHeaderKey: strings.Join(info.Languages(), "|"), + languageHeaderKey: strings.Join(info.Languages(), "|"), + "Content-Type": "application/x-protobuf", + "Content-Encoding": "identity", } startFlush := time.Now() @@ -143,7 +148,8 @@ func (w *TraceWriter) Flush() { // TODO: if error, depending on why, replay later. if err != nil { atomic.AddInt64(&w.stats.Errors, 1) - log.Errorf("failed to flush trace payload: %s", err) + log.Errorf("failed to flush trace payload, time:%s, size:%d bytes, error: %s", flushTime, len(serialized), err) + return } log.Infof("flushed trace payload to the API, time:%s, size:%d bytes", flushTime, len(serialized)) @@ -156,13 +162,13 @@ func (w *TraceWriter) updateInfo() { var twInfo info.TraceWriterInfo // Load counters and reset them for the next flush - twInfo.Traces = atomic.SwapInt64(&w.stats.Traces, 0) twInfo.Payloads = atomic.SwapInt64(&w.stats.Payloads, 0) + twInfo.Traces = atomic.SwapInt64(&w.stats.Traces, 0) twInfo.Bytes = atomic.SwapInt64(&w.stats.Bytes, 0) twInfo.Errors = atomic.SwapInt64(&w.stats.Traces, 0) - statsd.Client.Count("datadog.trace_agent.trace_writer.traces", int64(twInfo.Traces), nil, 1) statsd.Client.Count("datadog.trace_agent.trace_writer.payloads", int64(twInfo.Payloads), nil, 1) + statsd.Client.Count("datadog.trace_agent.trace_writer.traces", int64(twInfo.Traces), nil, 1) statsd.Client.Count("datadog.trace_agent.trace_writer.bytes", int64(twInfo.Bytes), nil, 1) statsd.Client.Count("datadog.trace_agent.trace_writer.errors", int64(twInfo.Errors), nil, 1) diff --git a/writer/writer_test.go b/writer/writer_test.go deleted file mode 100644 index 6021e60bc..000000000 --- a/writer/writer_test.go +++ /dev/null @@ -1,247 +0,0 @@ -package writer - -import ( - "fmt" - "io/ioutil" - "net/http" - "net/http/httptest" - "testing" - "time" - - "github.com/DataDog/datadog-trace-agent/config" - "github.com/DataDog/datadog-trace-agent/fixtures" - "github.com/DataDog/datadog-trace-agent/model" - "github.com/stretchr/testify/assert" -) - -type dataFromAPI struct { - urlPath string - urlParams map[string][]string - header http.Header - body string -} - -func newTestServer(t *testing.T, data chan dataFromAPI) *httptest.Server { - return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - body, err := ioutil.ReadAll(r.Body) - if err != nil { - t.Errorf("test server: cannot read request body: %v", err) - return - } - defer r.Body.Close() - - data <- dataFromAPI{ - urlPath: r.URL.Path, - urlParams: r.URL.Query(), - header: r.Header, - body: string(body), - } - w.WriteHeader(http.StatusOK) - })) -} - -func newFailingTestServer(t *testing.T, status int) *httptest.Server { - return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - w.WriteHeader(status) - })) -} - -func newTestPayload(env string) model.AgentPayload { - return model.AgentPayload{ - HostName: "test.host", - Env: env, - Traces: []model.Trace{model.Trace{fixtures.TestSpan()}}, - Stats: []model.StatsBucket{fixtures.TestStatsBucket()}, - } -} - -func TestWriterServices(t *testing.T) { - assert := assert.New(t) - // where we'll receive data - data := make(chan dataFromAPI, 1) - - // make a real HTTP endpoint so we can test that too - testAPI := newTestServer(t, data) - defer testAPI.Close() - - conf := config.NewDefaultAgentConfig() - conf.APIEndpoint = testAPI.URL - conf.APIKey = "xxxxxxx" - - w := NewWriter(conf) - w.InServices = make(chan model.ServicesMetadata) - - go w.Run() - - // send services - services := model.ServicesMetadata{ - "mcnulty": map[string]string{ - "app_type": "web", - }, - } - - w.InServices <- services - -receivingLoop: - for { - select { - case received := <-data: - assert.Equal("/api/v0.1/services", received.urlPath) - assert.Equal(map[string][]string{ - "api_key": []string{"xxxxxxx"}, - }, received.urlParams) - assert.Equal("application/json", received.header.Get("Content-Type")) - assert.Equal("", received.header.Get("Content-Encoding")) - assert.Equal(`{"mcnulty":{"app_type":"web"}}`, received.body) - break receivingLoop - case <-time.After(time.Second): - t.Fatal("did not receive service data in time") - } - } -} - -func TestWriterPayload(t *testing.T) { - assert := assert.New(t) - - data := make(chan dataFromAPI, 1) - - server := newTestServer(t, data) - defer server.Close() - - conf := config.NewDefaultAgentConfig() - conf.APIEndpoint = server.URL - conf.APIKey = "key" - - w := NewWriter(conf) - go w.Run() - - w.InPayloads <- newTestPayload("test") - -receivingLoop: - for { - select { - case received := <-data: - assert.Equal("/api/v0.1/collector", received.urlPath) - assert.Equal(map[string][]string{"api_key": []string{"key"}}, received.urlParams) - assert.Equal("application/json", received.header.Get("Content-Type")) - assert.Equal("gzip", received.header.Get("Content-Encoding")) - // do not assert the body yet - break receivingLoop - case <-time.After(time.Second): - t.Fatal("did not receive service data in time") - } - } - - w.Stop() - - assert.Equal(0, len(w.payloadBuffer)) -} - -func TestWriterPayloadErrors(t *testing.T) { - assert := assert.New(t) - - data := make(chan dataFromAPI, 1) - - server := newTestServer(t, data) - defer server.Close() - - conf := config.NewDefaultAgentConfig() - conf.APIEndpoint = server.URL - conf.APIKey = "key" - - w := NewWriter(conf) - go w.Run() - - w.InPayloads <- newTestPayload("test") - -receivingLoop: - for { - select { - case received := <-data: - assert.Equal("/api/v0.1/collector", received.urlPath) - assert.Equal(map[string][]string{"api_key": []string{"key"}}, received.urlParams) - assert.Equal("application/json", received.header.Get("Content-Type")) - assert.Equal("gzip", received.header.Get("Content-Encoding")) - // do not assert the body yet - break receivingLoop - case <-time.After(time.Second): - t.Fatal("did not receive service data in time") - } - } - - w.Stop() - - // The payloadBuffer must be empty since the request to the bucket must have succeeded. - assert.Equal(0, len(w.payloadBuffer)) -} - -func TestWriterBuffering(t *testing.T) { - assert := assert.New(t) - - nbPayloads := 3 - payloads := make([]model.AgentPayload, nbPayloads) - payloadSizes := make([]int, nbPayloads) - for i := range payloads { - payload := newTestPayload(fmt.Sprintf("p%d", i)) - payloads[i] = payload - - data, err := model.EncodeAgentPayload(&payload) - if err != nil { - t.Fatalf("cannot encode test payload: %v", err) - } - payloadSizes[i] = len(data) - } - - // Use a server that will reject all requests to make sure our - // payloads are kept in the buffer. - server := newFailingTestServer(t, http.StatusInternalServerError) - defer server.Close() - - conf := config.NewDefaultAgentConfig() - conf.APIEndpoint = server.URL - conf.APIKey = "key" - conf.APIPayloadBufferMaxSize = payloadSizes[0] + payloadSizes[1] - - w := NewWriter(conf) - // Make the chan unbuffered to block on write - w.InPayloads = make(chan model.AgentPayload) - go w.Run() - - for _, payload := range payloads { - w.InPayloads <- payload - } - - w.Stop() - - // Since the writer was created with a buffer just large enough for - // the first two payloads, the third payload overflowed the buffer, - // and the first and oldest payload (p0) was discarded. - assert.Equal(2, len(w.payloadBuffer)) - assert.Equal("p1", w.payloadBuffer[0].payload.Env) - assert.Equal("p2", w.payloadBuffer[1].payload.Env) -} - -func TestWriterDisabledBuffering(t *testing.T) { - assert := assert.New(t) - - server := newFailingTestServer(t, http.StatusInternalServerError) - defer server.Close() - - conf := config.NewDefaultAgentConfig() - conf.APIEndpoint = server.URL - conf.APIKey = "key" - conf.APIPayloadBufferMaxSize = 0 - - w := NewWriter(conf) - // Make the chan unbuffered to block on write - w.InPayloads = make(chan model.AgentPayload) - go w.Run() - - w.InPayloads <- newTestPayload("test") - - w.Stop() - - // Since buffering is disabled, the payload should have been - // dropped and the buffer should be empty. - assert.Equal(0, len(w.payloadBuffer)) -} From 5414a48fa0ec1836c9416bdacf916e8c525d4cff Mon Sep 17 00:00:00 2001 From: Benjamin Fernandes Date: Mon, 18 Dec 2017 16:16:48 +0100 Subject: [PATCH 4/9] Update lint configuration --- Rakefile | 3 +++ model/payload.go | 6 ------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/Rakefile b/Rakefile index 62e88050d..a799b0e90 100644 --- a/Rakefile +++ b/Rakefile @@ -17,6 +17,7 @@ desc 'Bootstrap CI environment' task :bootstrap do tools = { 'github.com/golang/lint' => { + # TODO: upgrade golint version: 'b8599f7d71e7fead76b25aeb919c0e2558672f4a', main_pkg: './golint', check_cmd: 'golint', @@ -68,6 +69,8 @@ PACKAGES = %w( EXCLUDE_LINT = [ 'model/services_gen.go', 'model/trace_gen.go', + 'model/trace.pb.go', + 'model/trace_payload.pb.go', 'model/span_gen.go', 'model/span.pb.go', ] diff --git a/model/payload.go b/model/payload.go index 1d9e0e3f3..35f583ec7 100644 --- a/model/payload.go +++ b/model/payload.go @@ -106,9 +106,3 @@ func SetAgentPayloadHeaders(h http.Header, extras map[string]string) { default: } } - -func SetExtraHeaders(h http.Header, extras map[string]string) { - for key, value := range extras { - h.Set(key, value) - } -} From bc417e0d09f0a7a6e8a83ea0cbb12abe68aa6ac4 Mon Sep 17 00:00:00 2001 From: Benjamin Fernandes Date: Tue, 19 Dec 2017 10:13:22 +0100 Subject: [PATCH 5/9] Remove legacy writer code --- agent/concentrator.go | 2 +- agent/sampler.go | 1 + writer/client.go | 4 + writer/legacy_endpoint.go | 202 --------------------------- writer/legacy_endpoint_test.go | 43 ------ writer/legacy_writer.go | 243 --------------------------------- 6 files changed, 6 insertions(+), 489 deletions(-) delete mode 100644 writer/legacy_endpoint.go delete mode 100644 writer/legacy_endpoint_test.go delete mode 100644 writer/legacy_writer.go diff --git a/agent/concentrator.go b/agent/concentrator.go index 49b9ae5a6..da200a4d6 100644 --- a/agent/concentrator.go +++ b/agent/concentrator.go @@ -45,7 +45,7 @@ func NewConcentrator(aggregators []string, bsize int64) *Concentrator { return &c } -// Start starts the writer. +// Start starts the concentrator. func (c *Concentrator) Start() { go func() { defer watchdog.LogOnPanic() diff --git a/agent/sampler.go b/agent/sampler.go index 47ee472b4..3b8952221 100644 --- a/agent/sampler.go +++ b/agent/sampler.go @@ -98,6 +98,7 @@ func (s *Sampler) logStats() { engineType, state.InTPS, state.OutTPS, state.MaxTPS, state.Offset, state.Slope, state.Cardinality) // publish through expvar + // TODO: avoid type switch, prefer engine method switch s.engine.(type) { case *sampler.ScoreEngine: info.UpdateSamplerInfo(info.SamplerInfo{EngineType: engineType, Stats: stats, State: state}) diff --git a/writer/client.go b/writer/client.go index d5f9adc10..09c3525dc 100644 --- a/writer/client.go +++ b/writer/client.go @@ -2,11 +2,15 @@ package writer import ( "net/http" + "time" "github.com/DataDog/datadog-trace-agent/config" log "github.com/cihub/seelog" ) +// timeout is the HTTP timeout for POST requests to the Datadog backend +const timeout = 10 * time.Second + // NewClient returns a http.Client configured with the Agent options. func NewClient(conf *config.AgentConfig) (client *http.Client) { if conf.Proxy != nil { diff --git a/writer/legacy_endpoint.go b/writer/legacy_endpoint.go deleted file mode 100644 index f1e9e212a..000000000 --- a/writer/legacy_endpoint.go +++ /dev/null @@ -1,202 +0,0 @@ -package writer - -import ( - "bytes" - "fmt" - "net/http" - "time" - - log "github.com/cihub/seelog" - - "github.com/DataDog/datadog-trace-agent/config" - "github.com/DataDog/datadog-trace-agent/model" - "github.com/DataDog/datadog-trace-agent/statsd" -) - -// timeout is the HTTP timeout for POST requests to the Datadog backend -var timeout = 10 * time.Second - -// apiError stores the error triggered we can't send data to the endpoint. -// It implements the error interface. -type apiError struct { - err error - endpoint *APIEndpoint -} - -func newAPIError(err error, endpoint *APIEndpoint) *apiError { - return &apiError{err: err, endpoint: endpoint} -} - -// Returns the error message -func (ae *apiError) Error() string { - return fmt.Sprintf("%s: %v", ae.endpoint.url, ae.err) -} - -// AgentEndpoint is an interface where we write the data -// that comes out of the agent -type AgentEndpoint interface { - // Write sends an agent payload which carries all the - // pre-processed stats/traces - Write(b model.AgentPayload) (int, error) - - // WriteServices sends updates about the services metadata - WriteServices(s model.ServicesMetadata) -} - -// APIEndpoint implements AgentEndpoint to send data to a -// an endpoint and API key. -type APIEndpoint struct { - apiKey string - url string - client *http.Client -} - -// NewAPIEndpoint returns a new APIEndpoint from a given config -// of URL (such as https://trace.agent.datadoghq.com) and API -// keys -func NewAPIEndpoint(url, apiKey string) *APIEndpoint { - if apiKey == "" { - panic(fmt.Errorf("No API key")) - } - - ae := APIEndpoint{ - apiKey: apiKey, - url: url, - client: &http.Client{ - Timeout: timeout, - }, - } - return &ae -} - -// SetProxy updates the http client used by APIEndpoint to report via the given proxy -func (ae *APIEndpoint) SetProxy(settings *config.ProxySettings) { - proxyPath, err := settings.URL() - if err != nil { - log.Errorf("failed to configure proxy: %v", err) - return - } - ae.client = &http.Client{ - Timeout: timeout, - Transport: &http.Transport{ - Proxy: http.ProxyURL(proxyPath), - }, - } -} - -// Write will send the serialized payload to the API endpoint. -func (ae *APIEndpoint) Write(p model.AgentPayload) (int, error) { - startFlush := time.Now() - - // Serialize the payload to send it to the API - data, err := model.EncodeAgentPayload(&p) - if err != nil { - log.Errorf("encoding issue: %v", err) - return 0, err - } - - payloadSize := len(data) - statsd.Client.Count("datadog.trace_agent.writer.payload_bytes", int64(payloadSize), nil, 1) - - // Create the request to be sent to the API - url := ae.url + model.AgentPayloadAPIPath() - req, err := http.NewRequest("POST", url, bytes.NewBuffer(data)) - - // If the request cannot be created, there is no point in trying again later, - // it will always yield the same result. - if err != nil { - log.Errorf("could not create request for endpoint %s: %v", url, err) - return payloadSize, err - } - - // Set API key in the header and issue the request - queryParams := req.URL.Query() - queryParams.Add("api_key", ae.apiKey) - req.URL.RawQuery = queryParams.Encode() - - model.SetAgentPayloadHeaders(req.Header, p.Extras()) - resp, err := ae.client.Do(req) - - // If the request fails, we'll try again later. - if err != nil { - log.Errorf("error when requesting to endpoint %s: %v", url, err) - return payloadSize, newAPIError(err, ae) - } - defer resp.Body.Close() - - // We check the status code to see if the request has succeeded. - if resp.StatusCode/100 != 2 { - err := fmt.Errorf("request to %s responded with %s", url, resp.Status) - log.Error(err) - - // Only retry for 5xx (server) errors - if resp.StatusCode/100 == 5 { - return payloadSize, newAPIError(err, ae) - } - - // Does not retry for other errors - return payloadSize, err - } - - flushTime := time.Since(startFlush) - log.Infof("flushed payload to the API, time:%s, size:%d", flushTime, len(data)) - statsd.Client.Gauge("datadog.trace_agent.writer.flush_duration", flushTime.Seconds(), nil, 1) - - // Everything went fine - return payloadSize, nil -} - -// WriteServices writes services to the services endpoint -// This function very loosely logs and returns if any error happens. -// See comment above. -func (ae *APIEndpoint) WriteServices(s model.ServicesMetadata) { - // Serialize the data to be sent to the API endpoint - data, err := model.EncodeServicesPayload(s) - if err != nil { - log.Errorf("encoding issue: %v", err) - return - } - - // Create the request - url := ae.url + model.ServicesPayloadAPIPath() - req, err := http.NewRequest("POST", url, bytes.NewBuffer(data)) - if err != nil { - log.Errorf("could not create request for endpoint %s: %v", url, err) - return - } - - // Set the header with the API key and issue the request - queryParams := req.URL.Query() - queryParams.Add("api_key", ae.apiKey) - req.URL.RawQuery = queryParams.Encode() - model.SetServicesPayloadHeaders(req.Header) - resp, err := ae.client.Do(req) - if err != nil { - log.Errorf("error when requesting to endpoint %s: %v", url, err) - return - } - defer resp.Body.Close() - - if resp.StatusCode/100 != 2 { - log.Errorf("request to %s responded with %s", url, resp.Status) - return - } - - // Everything went fine. - log.Infof("flushed %d services to the API", len(s)) -} - -// NullAgentEndpoint implements AgentEndpoint, it just logs data -// and drops everything into /dev/null -type NullAgentEndpoint struct{} - -// Write just logs and bails -func (ne NullAgentEndpoint) Write(p model.AgentPayload) (int, error) { - log.Debug("null endpoint: dropping payload, %d traces, %d stats buckets", p.Traces, p.Stats) - return 0, nil -} - -// WriteServices just logs and stops -func (ne NullAgentEndpoint) WriteServices(s model.ServicesMetadata) { - log.Debugf("null endpoint: dropping services update %v", s) -} diff --git a/writer/legacy_endpoint_test.go b/writer/legacy_endpoint_test.go deleted file mode 100644 index d1148f980..000000000 --- a/writer/legacy_endpoint_test.go +++ /dev/null @@ -1,43 +0,0 @@ -package writer - -import ( - "testing" - - "github.com/DataDog/datadog-trace-agent/fixtures" - "github.com/DataDog/datadog-trace-agent/model" -) - -func newBenchPayload(traces, spans, stats int) model.AgentPayload { - payload := model.AgentPayload{ - HostName: "test.host", - Env: "test", - } - for i := 0; i < traces; i++ { - var trace model.Trace - for j := 0; j < spans/traces; j++ { - span := fixtures.TestSpan() - span.TraceID += uint64(i * spans) - span.ParentID += uint64(i * spans) - span.SpanID += uint64(i*spans + j) - span.Start += int64(i*spans + j) - trace = append(trace, span) - } - payload.Traces = append(payload.Traces, trace) - } - for i := 0; i < stats; i++ { - payload.Stats = append(payload.Stats, fixtures.TestStatsBucket()) - } - return payload -} - -func BenchmarkEncodeAgentPayload(b *testing.B) { - payload := newBenchPayload(10, 1000, 100) - - b.ResetTimer() - b.ReportAllocs() - for i := 0; i < b.N; i++ { - if _, err := model.EncodeAgentPayload(&payload); err != nil { - b.Fatalf("error encoding payload: %v", b) - } - } -} diff --git a/writer/legacy_writer.go b/writer/legacy_writer.go deleted file mode 100644 index 39ff0efda..000000000 --- a/writer/legacy_writer.go +++ /dev/null @@ -1,243 +0,0 @@ -package writer - -import ( - "sync" - "time" - - log "github.com/cihub/seelog" - - "github.com/DataDog/datadog-trace-agent/config" - "github.com/DataDog/datadog-trace-agent/model" - "github.com/DataDog/datadog-trace-agent/statsd" - "github.com/DataDog/datadog-trace-agent/watchdog" -) - -// the amount of time in seconds to wait before resending a payload -const payloadResendDelay = 5 * time.Second - -// the amount of time in seconds a payload can stay buffered before being dropped -const payloadMaxAge = 10 * time.Minute - -// writerPayload wraps a model.AgentPayload and keeps track of a list of -// endpoints the payload must be sent to. -type writerPayload struct { - payload model.AgentPayload // the payload itself - size int // the size of the serialized payload or 0 if it has not been serialized yet - endpoint AgentEndpoint // the endpoints the payload must be sent to - creationDate time.Time // the creation date of the payload - nextFlush time.Time // The earliest moment we can flush -} - -func newWriterPayload(p model.AgentPayload, endpoint AgentEndpoint) *writerPayload { - return &writerPayload{ - payload: p, - endpoint: endpoint, - creationDate: time.Now(), - } -} - -func (p *writerPayload) write() error { - size, err := p.endpoint.Write(p.payload) - p.size = size - return err -} - -// Writer is the last chain of trace-agent which takes the -// pre-processed data from channels and tentatively output them -// to a given endpoint. -type Writer struct { - endpoint AgentEndpoint // where the data will end - - // input data - InPayloads chan model.AgentPayload // main payloads for processed traces/stats - InServices chan model.ServicesMetadata // secondary services metadata - - payloadBuffer []*writerPayload // buffer of payloads ready to send - serviceBuffer model.ServicesMetadata // services are merged into this map continuously - - exit chan struct{} - exitWG *sync.WaitGroup - - conf *config.AgentConfig -} - -// NewWriter returns a new Writer -func NewWriter(conf *config.AgentConfig) *Writer { - var endpoint AgentEndpoint - - if conf.APIEnabled { - endpoint = NewAPIEndpoint(conf.APIEndpoint, conf.APIKey) - if conf.Proxy != nil { - // we have some kind of proxy configured. - // make sure our http client uses it - log.Infof("configuring proxy through host %s", conf.Proxy.Host) - endpoint.(*APIEndpoint).SetProxy(conf.Proxy) - } - } else { - log.Info("API interface is disabled, flushing to /dev/null instead") - endpoint = NullAgentEndpoint{} - } - - return &Writer{ - endpoint: endpoint, - - // small buffer to not block in case we're flushing - InPayloads: make(chan model.AgentPayload, 1), - - payloadBuffer: make([]*writerPayload, 0, 5), - serviceBuffer: make(model.ServicesMetadata), - - exit: make(chan struct{}), - exitWG: &sync.WaitGroup{}, - - conf: conf, - } -} - -// isPayloadBufferingEnabled returns true if payload buffering is enabled or -// false if it is not. -func (w *Writer) isPayloadBufferingEnabled() bool { - return w.conf.APIPayloadBufferMaxSize > 0 -} - -// Run starts the writer. -func (w *Writer) Run() { - w.exitWG.Add(1) - go func() { - defer watchdog.LogOnPanic() - w.main() - }() -} - -// main is the main loop of the writer goroutine. If buffers payloads and -// services read from input chans and flushes them when necessary. -// NOTE: this currently happens sequentially, but it would not be too -// hard to mutex and parallelize. Not sure it's needed though. -func (w *Writer) main() { - defer w.exitWG.Done() - - flushTicker := time.NewTicker(time.Second) - defer flushTicker.Stop() - - for { - select { - case p := <-w.InPayloads: - if p.IsEmpty() { - continue - } - w.payloadBuffer = append(w.payloadBuffer, - newWriterPayload(p, w.endpoint)) - w.Flush() - case <-flushTicker.C: - w.Flush() - case sm := <-w.InServices: - updated := w.serviceBuffer.Update(sm) - if updated { - w.FlushServices() - statsd.Client.Count("datadog.trace_agent.services.updated", 1, nil, 1) - } - case <-w.exit: - log.Info("exiting, trying to flush all remaining data") - w.Flush() - return - } - } -} - -// Stop stops the main Run loop -func (w *Writer) Stop() { - close(w.exit) - w.exitWG.Wait() -} - -// FlushServices initiate a flush of the services to the services endpoint -func (w *Writer) FlushServices() { - w.endpoint.WriteServices(w.serviceBuffer) -} - -// Flush actually writes the data in the API -func (w *Writer) Flush() { - // TODO[leo]: batch payloads in same API key - - var payloads []*writerPayload - now := time.Now() - bufSize := 0 - - bufferPayload := func(p *writerPayload) { - payloads = append(payloads, p) - bufSize += p.size - } - - nbSuccesses := 0 - nbErrors := 0 - - for _, p := range w.payloadBuffer { - if w.isPayloadBufferingEnabled() && p.nextFlush.After(now) { - // We already tried to flush recently, so there's no - // point in trying again right now. - bufferPayload(p) - continue - } - - err := p.write() - - if err == nil { - nbSuccesses++ - } else { - nbErrors++ - } - - if err == nil || !w.isPayloadBufferingEnabled() { - continue - } - - if terr, ok := err.(*apiError); ok { - // We could not send the payload and this is an API - // endpoint error, so we can try again later. - - if now.Sub(p.creationDate) > payloadMaxAge { - // The payload is too old, let's drop it - statsd.Client.Count("datadog.trace_agent.writer.dropped_payload", - int64(1), []string{"reason:too_old"}, 1) - continue - } - - p.nextFlush = now.Add(payloadResendDelay) - - // Keep this payload in the buffer to try again later, - // but only with the endpoints that failed. - p.endpoint = terr.endpoint - bufferPayload(p) - } - } - - if nbSuccesses > 0 { - statsd.Client.Count("datadog.trace_agent.writer.flush", - int64(nbSuccesses), []string{"status:success"}, 1) - } - - if nbErrors > 0 { - statsd.Client.Count("datadog.trace_agent.writer.flush", - int64(nbErrors), []string{"status:error"}, 1) - } - - // Drop payloads to respect the buffer size limit if necessary. - nbDrops := 0 - for n := 0; n < len(payloads) && bufSize > w.conf.APIPayloadBufferMaxSize; n++ { - bufSize -= payloads[n].size - nbDrops++ - } - - if nbDrops > 0 { - log.Infof("dropping %d payloads (payload buffer full)", nbDrops) - statsd.Client.Count("datadog.trace_agent.writer.dropped_payload", - int64(nbDrops), []string{"reason:buffer_full"}, 1) - - payloads = payloads[nbDrops:] - } - - statsd.Client.Gauge("datadog.trace_agent.writer.payload_buffer_size", - float64(bufSize), nil, 1) - - w.payloadBuffer = payloads -} From a830c92ea02535396f249c8dfa1733101ba7177e Mon Sep 17 00:00:00 2001 From: Benjamin Fernandes Date: Tue, 19 Dec 2017 10:43:39 +0100 Subject: [PATCH 6/9] Make channels struct constructor parameters --- agent/agent.go | 22 ++++++++-------------- agent/concentrator.go | 4 +++- agent/concentrator_test.go | 6 ++++-- agent/receiver.go | 7 ++++++- agent/receiver_test.go | 2 +- agent/sampler.go | 10 ++++++---- writer/service_writer.go | 4 +++- writer/stats_writer.go | 4 +++- writer/trace_writer.go | 4 +++- 9 files changed, 37 insertions(+), 26 deletions(-) diff --git a/agent/agent.go b/agent/agent.go index 4e29c89ea..decaf81a2 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -62,39 +62,33 @@ func NewAgent(conf *config.AgentConfig, exit chan struct{}) *Agent { dynConf := config.NewDynamicConfig() // inter-component channels - rawTraceChan := make(chan model.Trace, 5000) // about 1000 traces/sec for 5 sec + rawTraceChan := make(chan model.Trace, 5000) // about 1000 traces/sec for 5 sec, TODO: move to *model.Trace sampledTraceChan := make(chan *model.Trace) statsChan := make(chan []model.StatsBucket) serviceChan := make(chan model.ServicesMetadata, 50) // create components - r := NewHTTPReceiver(conf, dynConf) + r := NewHTTPReceiver(conf, dynConf, rawTraceChan, serviceChan) c := NewConcentrator( conf.ExtraAggregators, conf.BucketInterval.Nanoseconds(), + statsChan, ) f := filters.Setup(conf) - ss := NewScoreSampler(conf) + ss := NewScoreSampler(conf, sampledTraceChan) var ps *Sampler if conf.PrioritySampling { // Use priority sampling for distributed tracing only if conf says so // TODO: remove the option once comfortable ; as it is true by default. - ps = NewPrioritySampler(conf, dynConf) + ps = NewPrioritySampler(conf, dynConf, sampledTraceChan) } - tw := writer.NewTraceWriter(conf) - sw := writer.NewStatsWriter(conf) - svcW := writer.NewServiceWriter(conf) + tw := writer.NewTraceWriter(conf, sampledTraceChan) + sw := writer.NewStatsWriter(conf, statsChan) + svcW := writer.NewServiceWriter(conf, serviceChan) // wire components together - r.traces = rawTraceChan - r.services = serviceChan tw.InTraces = sampledTraceChan - ss.sampled = sampledTraceChan - if conf.PrioritySampling { - ps.sampled = sampledTraceChan - } - c.OutStats = statsChan sw.InStats = statsChan svcW.InServices = serviceChan diff --git a/agent/concentrator.go b/agent/concentrator.go index da200a4d6..e989e6247 100644 --- a/agent/concentrator.go +++ b/agent/concentrator.go @@ -32,12 +32,14 @@ type Concentrator struct { } // NewConcentrator initializes a new concentrator ready to be started -func NewConcentrator(aggregators []string, bsize int64) *Concentrator { +func NewConcentrator(aggregators []string, bsize int64, out chan []model.StatsBucket) *Concentrator { c := Concentrator{ aggregators: aggregators, bsize: bsize, buckets: make(map[int64]*model.StatsRawBucket), + OutStats: out, + exit: make(chan struct{}), exitWG: &sync.WaitGroup{}, } diff --git a/agent/concentrator_test.go b/agent/concentrator_test.go index 943561f91..7722c7fb0 100644 --- a/agent/concentrator_test.go +++ b/agent/concentrator_test.go @@ -12,7 +12,8 @@ import ( var testBucketInterval = time.Duration(2 * time.Second).Nanoseconds() func NewTestConcentrator() *Concentrator { - return NewConcentrator([]string{}, time.Second.Nanoseconds()) + statsChan := make(chan []model.StatsBucket) + return NewConcentrator([]string{}, time.Second.Nanoseconds(), statsChan) } // getTsInBucket gives a timestamp in ns which is `offset` buckets late @@ -39,7 +40,8 @@ func testSpan(c *Concentrator, spanID uint64, duration, offset int64, service, r func TestConcentratorStatsCounts(t *testing.T) { assert := assert.New(t) - c := NewConcentrator([]string{}, testBucketInterval) + statsChan := make(chan []model.StatsBucket) + c := NewConcentrator([]string{}, testBucketInterval, statsChan) now := model.Now() alignedNow := now - now%c.bsize diff --git a/agent/receiver.go b/agent/receiver.go index a07d1a17d..60e0e3e66 100644 --- a/agent/receiver.go +++ b/agent/receiver.go @@ -68,7 +68,9 @@ type HTTPReceiver struct { } // NewHTTPReceiver returns a pointer to a new HTTPReceiver -func NewHTTPReceiver(conf *config.AgentConfig, dynConf *config.DynamicConfig) *HTTPReceiver { +func NewHTTPReceiver( + conf *config.AgentConfig, dynConf *config.DynamicConfig, traces chan model.Trace, services chan model.ServicesMetadata, +) *HTTPReceiver { // use buffered channels so that handlers are not waiting on downstream processing return &HTTPReceiver{ conf: conf, @@ -77,6 +79,9 @@ func NewHTTPReceiver(conf *config.AgentConfig, dynConf *config.DynamicConfig) *H preSampler: sampler.NewPreSampler(conf.PreSampleRate), exit: make(chan struct{}), + traces: traces, + services: services, + maxRequestBodyLength: maxRequestBodyLength, debug: strings.ToLower(conf.LogLevel) == "debug", } diff --git a/agent/receiver_test.go b/agent/receiver_test.go index e41cf61d7..e9fa1aaf5 100644 --- a/agent/receiver_test.go +++ b/agent/receiver_test.go @@ -32,9 +32,9 @@ var headerFields = map[string]string{ func NewTestReceiverFromConfig(conf *config.AgentConfig) *HTTPReceiver { dynConf := config.NewDynamicConfig() - receiver := NewHTTPReceiver(conf, dynConf) rawTraceChan := make(chan model.Trace, 5000) serviceChan := make(chan model.ServicesMetadata, 50) + receiver := NewHTTPReceiver(conf, dynConf, rawTraceChan, serviceChan) receiver.traces = rawTraceChan receiver.services = serviceChan diff --git a/agent/sampler.go b/agent/sampler.go index 3b8952221..9f6e3ebf0 100644 --- a/agent/sampler.go +++ b/agent/sampler.go @@ -28,16 +28,18 @@ type Sampler struct { } // NewScoreSampler creates a new empty sampler ready to be started -func NewScoreSampler(conf *config.AgentConfig) *Sampler { +func NewScoreSampler(conf *config.AgentConfig, sampled chan *model.Trace) *Sampler { return &Sampler{ - engine: sampler.NewScoreEngine(conf.ExtraSampleRate, conf.MaxTPS), + engine: sampler.NewScoreEngine(conf.ExtraSampleRate, conf.MaxTPS), + sampled: sampled, } } // NewPrioritySampler creates a new empty distributed sampler ready to be started -func NewPrioritySampler(conf *config.AgentConfig, dynConf *config.DynamicConfig) *Sampler { +func NewPrioritySampler(conf *config.AgentConfig, dynConf *config.DynamicConfig, sampled chan *model.Trace) *Sampler { return &Sampler{ - engine: sampler.NewPriorityEngine(conf.ExtraSampleRate, conf.MaxTPS, &dynConf.RateByService), + engine: sampler.NewPriorityEngine(conf.ExtraSampleRate, conf.MaxTPS, &dynConf.RateByService), + sampled: sampled, } } diff --git a/writer/service_writer.go b/writer/service_writer.go index 41cd09337..d1583c6d8 100644 --- a/writer/service_writer.go +++ b/writer/service_writer.go @@ -33,7 +33,7 @@ type ServiceWriter struct { } // NewServiceWriter returns a new writer for services. -func NewServiceWriter(conf *config.AgentConfig) *ServiceWriter { +func NewServiceWriter(conf *config.AgentConfig, InServices <-chan model.ServicesMetadata) *ServiceWriter { var endpoint Endpoint if conf.APIEnabled { @@ -47,6 +47,8 @@ func NewServiceWriter(conf *config.AgentConfig) *ServiceWriter { return &ServiceWriter{ endpoint: endpoint, + InServices: InServices, + serviceBuffer: make(model.ServicesMetadata), exit: make(chan struct{}), diff --git a/writer/stats_writer.go b/writer/stats_writer.go index 7e238893f..f6991ef0d 100644 --- a/writer/stats_writer.go +++ b/writer/stats_writer.go @@ -30,7 +30,7 @@ type StatsWriter struct { } // NewStatsWriter returns a new writer for services. -func NewStatsWriter(conf *config.AgentConfig) *StatsWriter { +func NewStatsWriter(conf *config.AgentConfig, InStats <-chan []model.StatsBucket) *StatsWriter { var endpoint Endpoint if conf.APIEnabled { @@ -44,6 +44,8 @@ func NewStatsWriter(conf *config.AgentConfig) *StatsWriter { return &StatsWriter{ endpoint: endpoint, + InStats: InStats, + exit: make(chan struct{}), exitWG: &sync.WaitGroup{}, diff --git a/writer/trace_writer.go b/writer/trace_writer.go index d67fb1066..02715c631 100644 --- a/writer/trace_writer.go +++ b/writer/trace_writer.go @@ -33,7 +33,7 @@ type TraceWriter struct { } // NewTraceWriter returns a new writer for traces. -func NewTraceWriter(conf *config.AgentConfig) *TraceWriter { +func NewTraceWriter(conf *config.AgentConfig, InTraces <-chan *model.Trace) *TraceWriter { var endpoint Endpoint if conf.APIEnabled { @@ -52,6 +52,8 @@ func NewTraceWriter(conf *config.AgentConfig) *TraceWriter { exit: make(chan struct{}), exitWG: &sync.WaitGroup{}, + InTraces: InTraces, + conf: conf, } } From 8a62fbf4870854ddc8385f13cd3eb25c55bdc587 Mon Sep 17 00:00:00 2001 From: Benjamin Fernandes Date: Tue, 19 Dec 2017 11:09:14 +0100 Subject: [PATCH 7/9] Extract text from info tests Since the code is now more readable, we can also remove examples from doc strings and only rely on tests or the more readable template. --- info/info.go | 60 ------------------ info/info_test.go | 118 +++++++---------------------------- info/test_cases/okay.info | 26 ++++++++ info/test_cases/okay.json | 14 +++++ info/test_cases/warning.info | 32 ++++++++++ info/test_cases/warning.json | 13 ++++ 6 files changed, 107 insertions(+), 156 deletions(-) create mode 100644 info/test_cases/okay.info create mode 100644 info/test_cases/okay.json create mode 100644 info/test_cases/warning.info create mode 100644 info/test_cases/warning.json diff --git a/info/info.go b/info/info.go index c4d400216..0c01557a1 100644 --- a/info/info.go +++ b/info/info.go @@ -89,7 +89,6 @@ const ( {{if gt .Status.StatsWriter.Errors 0}}WARNING: Stats API errors (1 min): {{.Status.StatsWriter.Errors}}{{end}} Services: {{.Status.ServiceWriter.Payloads}} payloads, {{.Status.ServiceWriter.Services}} services, {{.Status.ServiceWriter.Bytes}} bytes {{if gt .Status.ServiceWriter.Errors 0}}WARNING: Services API errors (1 min): {{.Status.ServiceWriter.Errors}}{{end}} - ` notRunningTmplSrc = `{{.Banner}} @@ -312,65 +311,6 @@ func getProgramBanner(version string) (string, string) { // // If error is nil, means the program is running. // If not, it displays a pretty-printed message anyway (for support) -// -// Typical output of 'trace-agent -info' when agent is running: -// -// -----8<------------------------------------------------------- -// ====================== -// Trace Agent (v 0.99.0) -// ====================== -// -// Pid: 38149 -// Uptime: 15 seconds -// Mem alloc: 773552 bytes -// -// Hostname: localhost.localdomain -// Receiver: localhost:8126 -// API Endpoint: https://trace.agent.datadoghq.com -// -// Bytes received (1 min): 10000 -// Traces received (1 min): 240 -// Spans received (1 min): 360 -// WARNING: Traces dropped (1 min): 5 -// WARNING: Spans dropped (1 min): 10 -// WARNING: Pre-sampling traces: 26.0 % -// WARNING: Pre-sampler: raising pre-sampling rate from 2.9 % to 5.0 % -// -// Bytes sent (1 min): 3245 -// Traces sent (1 min): 6 -// Stats sent (1 min): 60 -// WARNING: Traces API errors (1 min): 1/3 -// WARNING: Services API errors (1 min): 1/1 -// -// -----8<------------------------------------------------------- -// -// The "WARNING:" lines are hidden if there's nothing dropped or no errors. -// -// Typical output of 'trace-agent -info' when agent is not running: -// -// -----8<------------------------------------------------------- -// ====================== -// Trace Agent (v 0.99.0) -// ====================== -// -// Not running (port 8126) -// -// -----8<------------------------------------------------------- -// -// Typical output of 'trace-agent -info' when something unexpected happened, -// for instance we're connecting to an HTTP server that serves an inadequate -// response, or there's a bug, or... : -// -// -----8<------------------------------------------------------- -// ====================== -// Trace Agent (v 0.99.0) -// ====================== -// -// Error: json: cannot unmarshal number into Go value of type main.StatusInfo -// URL: http://localhost:8126/debug/vars -// -// -----8<------------------------------------------------------- -// func Info(w io.Writer, conf *config.AgentConfig) error { host := conf.ReceiverHost if host == "0.0.0.0" { diff --git a/info/info_test.go b/info/info_test.go index 210e92a10..1a0faa527 100644 --- a/info/info_test.go +++ b/info/info_test.go @@ -5,6 +5,7 @@ import ( "encoding/json" "expvar" "fmt" + "io/ioutil" "net/http" "net/http/httptest" "net/url" @@ -21,92 +22,18 @@ type testServerHandler struct { t *testing.T } -// TODO: move these expected output to flat file, simpler to read/update -const ( - expectedInfo = `====================== -Trace Agent (v 0.99.0) -====================== - - Pid: 38149 - Uptime: 15 seconds - Mem alloc: 773552 bytes - - Hostname: localhost.localdomain - Receiver: localhost:8126 - API Endpoint: https://trace.agent.datadoghq.com - - --- Receiver stats (1 min) --- - - From unknown clients - Traces received: 0 (0 bytes) - Spans received: 0 - Services received: 0 (0 bytes) - - Priority sampling rate for 'service:myapp,env:dev': 12.3 % - - --- Writer stats (1 min) --- - - Traces: 4 payloads, 26 traces, 3245 bytes - Stats: 6 payloads, 12 stats buckets, 8329 bytes - Services: 1 payloads, 2 services, 1234 bytes - -` - - expectedWarning = `====================== -Trace Agent (v 0.99.0) -====================== - - Pid: 38149 - Uptime: 15 seconds - Mem alloc: 773552 bytes - - Hostname: localhost.localdomain - Receiver: localhost:8126 - API Endpoint: https://trace.agent.datadoghq.com - - --- Receiver stats (1 min) --- - - From python 2.7.6 (CPython), client 0.9.0 - Traces received: 70 (10679 bytes) - Spans received: 984 - Services received: 0 (0 bytes) - WARNING: Traces dropped: 23 - WARNING: Spans dropped: 184 - - WARNING: Pre-sampling traces: 42.1 % - WARNING: Pre-sampler: raising pre-sampling rate from 3.1 % to 5.0 % - - --- Writer stats (1 min) --- - - Traces: 4 payloads, 26 traces, 3245 bytes - WARNING: Traces API errors (1 min): 3 - Stats: 6 payloads, 12 stats buckets, 8329 bytes - WARNING: Stats API errors (1 min): 1 - Services: 1 payloads, 2 services, 1234 bytes - WARNING: Services API errors (1 min): 1 - -` -) - func (h *testServerHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") + + json, err := ioutil.ReadFile("./test_cases/okay.json") + if err != nil { + h.t.Errorf("error loading json file: %v", err) + } + switch r.URL.Path { case "/debug/vars": h.t.Logf("serving fake (static) info data for %s", r.URL.Path) - _, err := w.Write([]byte(`{ -"cmdline": ["./trace-agent"], -"config": {"Enabled":true,"HostName":"localhost.localdomain","DefaultEnv":"none","APIEndpoint":"https://trace.agent.datadoghq.com","APIEnabled":true,"APIPayloadBufferMaxSize":16777216,"BucketInterval":10000000000,"ExtraAggregators":[],"ExtraSampleRate":1,"MaxTPS":10,"ReceiverHost":"localhost","ReceiverPort":8126,"ConnectionLimit":2000,"ReceiverTimeout":0,"StatsdHost":"127.0.0.1","StatsdPort":8125,"LogLevel":"INFO","LogFilePath":"/var/log/datadog/trace-agent.log"}, -"trace_writer": {"Payloads":4,"Bytes":3245,"Traces":26,"Errors":0}, -"stats_writer": {"Payloads":6,"Bytes":8329,"StatsBuckets":12,"Errors":0}, -"service_writer": {"Payloads":1,"Bytes":1234,"Services":2,"Errors":0}, -"memstats": {"Alloc":773552,"TotalAlloc":773552,"Sys":3346432,"Lookups":6,"Mallocs":7231,"Frees":561,"HeapAlloc":773552,"HeapSys":1572864,"HeapIdle":49152,"HeapInuse":1523712,"HeapReleased":0,"HeapObjects":6670,"StackInuse":524288,"StackSys":524288,"MSpanInuse":24480,"MSpanSys":32768,"MCacheInuse":4800,"MCacheSys":16384,"BuckHashSys":2675,"GCSys":131072,"OtherSys":1066381,"NextGC":4194304,"LastGC":0,"PauseTotalNs":0,"PauseNs":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"PauseEnd":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"NumGC":0,"GCCPUFraction":0,"EnableGC":true,"DebugGC":false,"BySize":[{"Size":0,"Mallocs":0,"Frees":0},{"Size":8,"Mallocs":126,"Frees":0},{"Size":16,"Mallocs":825,"Frees":0},{"Size":32,"Mallocs":4208,"Frees":0},{"Size":48,"Mallocs":345,"Frees":0},{"Size":64,"Mallocs":262,"Frees":0},{"Size":80,"Mallocs":93,"Frees":0},{"Size":96,"Mallocs":70,"Frees":0},{"Size":112,"Mallocs":97,"Frees":0},{"Size":128,"Mallocs":24,"Frees":0},{"Size":144,"Mallocs":25,"Frees":0},{"Size":160,"Mallocs":57,"Frees":0},{"Size":176,"Mallocs":128,"Frees":0},{"Size":192,"Mallocs":13,"Frees":0},{"Size":208,"Mallocs":77,"Frees":0},{"Size":224,"Mallocs":3,"Frees":0},{"Size":240,"Mallocs":2,"Frees":0},{"Size":256,"Mallocs":17,"Frees":0},{"Size":288,"Mallocs":64,"Frees":0},{"Size":320,"Mallocs":12,"Frees":0},{"Size":352,"Mallocs":20,"Frees":0},{"Size":384,"Mallocs":1,"Frees":0},{"Size":416,"Mallocs":59,"Frees":0},{"Size":448,"Mallocs":0,"Frees":0},{"Size":480,"Mallocs":3,"Frees":0},{"Size":512,"Mallocs":2,"Frees":0},{"Size":576,"Mallocs":17,"Frees":0},{"Size":640,"Mallocs":6,"Frees":0},{"Size":704,"Mallocs":10,"Frees":0},{"Size":768,"Mallocs":0,"Frees":0},{"Size":896,"Mallocs":11,"Frees":0},{"Size":1024,"Mallocs":11,"Frees":0},{"Size":1152,"Mallocs":12,"Frees":0},{"Size":1280,"Mallocs":2,"Frees":0},{"Size":1408,"Mallocs":2,"Frees":0},{"Size":1536,"Mallocs":0,"Frees":0},{"Size":1664,"Mallocs":10,"Frees":0},{"Size":2048,"Mallocs":17,"Frees":0},{"Size":2304,"Mallocs":7,"Frees":0},{"Size":2560,"Mallocs":1,"Frees":0},{"Size":2816,"Mallocs":1,"Frees":0},{"Size":3072,"Mallocs":1,"Frees":0},{"Size":3328,"Mallocs":7,"Frees":0},{"Size":4096,"Mallocs":4,"Frees":0},{"Size":4608,"Mallocs":1,"Frees":0},{"Size":5376,"Mallocs":6,"Frees":0},{"Size":6144,"Mallocs":4,"Frees":0},{"Size":6400,"Mallocs":0,"Frees":0},{"Size":6656,"Mallocs":1,"Frees":0},{"Size":6912,"Mallocs":0,"Frees":0},{"Size":8192,"Mallocs":0,"Frees":0},{"Size":8448,"Mallocs":0,"Frees":0},{"Size":8704,"Mallocs":1,"Frees":0},{"Size":9472,"Mallocs":0,"Frees":0},{"Size":10496,"Mallocs":0,"Frees":0},{"Size":12288,"Mallocs":1,"Frees":0},{"Size":13568,"Mallocs":0,"Frees":0},{"Size":14080,"Mallocs":0,"Frees":0},{"Size":16384,"Mallocs":0,"Frees":0},{"Size":16640,"Mallocs":0,"Frees":0},{"Size":17664,"Mallocs":1,"Frees":0}]}, -"pid": 38149, -"ratebyservice": {"service:,env:":1,"service:myapp,env:dev":0.123}, -"receiver": [{}], -"presampler": {"Rate":1.0}, -"uptime": 15, -"version": {"BuildDate": "2017-02-01T14:28:10+0100", "GitBranch": "ufoot/statusinfo", "GitCommit": "396a217", "GoVersion": "go version go1.7 darwin/amd64", "Version": "0.99.0"} -}`)) + _, err := w.Write(json) if err != nil { h.t.Errorf("error serving %s: %v", r.URL.Path, err) } @@ -128,22 +55,16 @@ type testServerWarningHandler struct { func (h *testServerWarningHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") + + json, err := ioutil.ReadFile("./test_cases/warning.json") + if err != nil { + h.t.Errorf("error loading json file: %v", err) + } + switch r.URL.Path { case "/debug/vars": h.t.Logf("serving fake (static) info data for %s", r.URL.Path) - _, err := w.Write([]byte(`{ -"cmdline": ["./trace-agent"], -"config": {"Enabled":true,"HostName":"localhost.localdomain","DefaultEnv":"none","APIEndpoint":"https://trace.agent.datadoghq.com","APIEnabled":true,"APIPayloadBufferMaxSize":16777216,"BucketInterval":10000000000,"ExtraAggregators":[],"ExtraSampleRate":1,"MaxTPS":10,"ReceiverHost":"localhost","ReceiverPort":8126,"ConnectionLimit":2000,"ReceiverTimeout":0,"StatsdHost":"127.0.0.1","StatsdPort":8125,"LogLevel":"INFO","LogFilePath":"/var/log/datadog/trace-agent.log"}, -"trace_writer": {"Payloads":4,"Bytes":3245,"Traces":26,"Errors":3}, -"stats_writer": {"Payloads":6,"Bytes":8329,"StatsBuckets":12,"Errors":1}, -"service_writer": {"Payloads":1,"Bytes":1234,"Services":2,"Errors":1}, -"memstats": {"Alloc":773552,"TotalAlloc":773552,"Sys":3346432,"Lookups":6,"Mallocs":7231,"Frees":561,"HeapAlloc":773552,"HeapSys":1572864,"HeapIdle":49152,"HeapInuse":1523712,"HeapReleased":0,"HeapObjects":6670,"StackInuse":524288,"StackSys":524288,"MSpanInuse":24480,"MSpanSys":32768,"MCacheInuse":4800,"MCacheSys":16384,"BuckHashSys":2675,"GCSys":131072,"OtherSys":1066381,"NextGC":4194304,"LastGC":0,"PauseTotalNs":0,"PauseNs":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"PauseEnd":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"NumGC":0,"GCCPUFraction":0,"EnableGC":true,"DebugGC":false,"BySize":[{"Size":0,"Mallocs":0,"Frees":0},{"Size":8,"Mallocs":126,"Frees":0},{"Size":16,"Mallocs":825,"Frees":0},{"Size":32,"Mallocs":4208,"Frees":0},{"Size":48,"Mallocs":345,"Frees":0},{"Size":64,"Mallocs":262,"Frees":0},{"Size":80,"Mallocs":93,"Frees":0},{"Size":96,"Mallocs":70,"Frees":0},{"Size":112,"Mallocs":97,"Frees":0},{"Size":128,"Mallocs":24,"Frees":0},{"Size":144,"Mallocs":25,"Frees":0},{"Size":160,"Mallocs":57,"Frees":0},{"Size":176,"Mallocs":128,"Frees":0},{"Size":192,"Mallocs":13,"Frees":0},{"Size":208,"Mallocs":77,"Frees":0},{"Size":224,"Mallocs":3,"Frees":0},{"Size":240,"Mallocs":2,"Frees":0},{"Size":256,"Mallocs":17,"Frees":0},{"Size":288,"Mallocs":64,"Frees":0},{"Size":320,"Mallocs":12,"Frees":0},{"Size":352,"Mallocs":20,"Frees":0},{"Size":384,"Mallocs":1,"Frees":0},{"Size":416,"Mallocs":59,"Frees":0},{"Size":448,"Mallocs":0,"Frees":0},{"Size":480,"Mallocs":3,"Frees":0},{"Size":512,"Mallocs":2,"Frees":0},{"Size":576,"Mallocs":17,"Frees":0},{"Size":640,"Mallocs":6,"Frees":0},{"Size":704,"Mallocs":10,"Frees":0},{"Size":768,"Mallocs":0,"Frees":0},{"Size":896,"Mallocs":11,"Frees":0},{"Size":1024,"Mallocs":11,"Frees":0},{"Size":1152,"Mallocs":12,"Frees":0},{"Size":1280,"Mallocs":2,"Frees":0},{"Size":1408,"Mallocs":2,"Frees":0},{"Size":1536,"Mallocs":0,"Frees":0},{"Size":1664,"Mallocs":10,"Frees":0},{"Size":2048,"Mallocs":17,"Frees":0},{"Size":2304,"Mallocs":7,"Frees":0},{"Size":2560,"Mallocs":1,"Frees":0},{"Size":2816,"Mallocs":1,"Frees":0},{"Size":3072,"Mallocs":1,"Frees":0},{"Size":3328,"Mallocs":7,"Frees":0},{"Size":4096,"Mallocs":4,"Frees":0},{"Size":4608,"Mallocs":1,"Frees":0},{"Size":5376,"Mallocs":6,"Frees":0},{"Size":6144,"Mallocs":4,"Frees":0},{"Size":6400,"Mallocs":0,"Frees":0},{"Size":6656,"Mallocs":1,"Frees":0},{"Size":6912,"Mallocs":0,"Frees":0},{"Size":8192,"Mallocs":0,"Frees":0},{"Size":8448,"Mallocs":0,"Frees":0},{"Size":8704,"Mallocs":1,"Frees":0},{"Size":9472,"Mallocs":0,"Frees":0},{"Size":10496,"Mallocs":0,"Frees":0},{"Size":12288,"Mallocs":1,"Frees":0},{"Size":13568,"Mallocs":0,"Frees":0},{"Size":14080,"Mallocs":0,"Frees":0},{"Size":16384,"Mallocs":0,"Frees":0},{"Size":16640,"Mallocs":0,"Frees":0},{"Size":17664,"Mallocs":1,"Frees":0}]}, -"pid": 38149, -"receiver": [{"Lang":"python","LangVersion":"2.7.6","Interpreter":"CPython","TracerVersion":"0.9.0","TracesReceived":70,"TracesDropped":23,"TracesBytes":10679,"SpansReceived":984,"SpansDropped":184,"ServicesReceived":0,"ServicesBytes":0}], -"presampler": {"Rate":0.421,"Error":"raising pre-sampling rate from 3.1 % to 5.0 %"}, -"uptime": 15, -"version": {"BuildDate": "2017-02-01T14:28:10+0100", "GitBranch": "ufoot/statusinfo", "GitCommit": "396a217", "GoVersion": "go version go1.7 darwin/amd64", "Version": "0.99.0"} -}`)) + _, err := w.Write(json) if err != nil { h.t.Errorf("error serving %s: %v", r.URL.Path, err) } @@ -221,7 +142,9 @@ func TestInfo(t *testing.T) { assert.NoError(err) info := buf.String() t.Logf("Info:\n%s\n", info) - assert.Equal(expectedInfo, info) + expectedInfo, err := ioutil.ReadFile("./test_cases/okay.info") + assert.NoError(err) + assert.Equal(string(expectedInfo), info) } func TestWarning(t *testing.T) { @@ -248,7 +171,10 @@ func TestWarning(t *testing.T) { assert.Nil(err) info := buf.String() - assert.Equal(expectedWarning, info) + expectedWarning, err := ioutil.ReadFile("./test_cases/warning.info") + assert.NoError(err) + assert.Equal(string(expectedWarning), info) + t.Logf("Info:\n%s\n", info) } diff --git a/info/test_cases/okay.info b/info/test_cases/okay.info new file mode 100644 index 000000000..5304a545a --- /dev/null +++ b/info/test_cases/okay.info @@ -0,0 +1,26 @@ +====================== +Trace Agent (v 0.99.0) +====================== + + Pid: 38149 + Uptime: 15 seconds + Mem alloc: 773552 bytes + + Hostname: localhost.localdomain + Receiver: localhost:8126 + API Endpoint: https://trace.agent.datadoghq.com + + --- Receiver stats (1 min) --- + + From unknown clients + Traces received: 0 (0 bytes) + Spans received: 0 + Services received: 0 (0 bytes) + + Priority sampling rate for 'service:myapp,env:dev': 12.3 % + + --- Writer stats (1 min) --- + + Traces: 4 payloads, 26 traces, 3245 bytes + Stats: 6 payloads, 12 stats buckets, 8329 bytes + Services: 1 payloads, 2 services, 1234 bytes diff --git a/info/test_cases/okay.json b/info/test_cases/okay.json new file mode 100644 index 000000000..95036b774 --- /dev/null +++ b/info/test_cases/okay.json @@ -0,0 +1,14 @@ +{ + "cmdline": ["./trace-agent"], + "config": {"Enabled":true,"HostName":"localhost.localdomain","DefaultEnv":"none","APIEndpoint":"https://trace.agent.datadoghq.com","APIEnabled":true,"APIPayloadBufferMaxSize":16777216,"BucketInterval":10000000000,"ExtraAggregators":[],"ExtraSampleRate":1,"MaxTPS":10,"ReceiverHost":"localhost","ReceiverPort":8126,"ConnectionLimit":2000,"ReceiverTimeout":0,"StatsdHost":"127.0.0.1","StatsdPort":8125,"LogLevel":"INFO","LogFilePath":"/var/log/datadog/trace-agent.log"}, + "trace_writer": {"Payloads":4,"Bytes":3245,"Traces":26,"Errors":0}, + "stats_writer": {"Payloads":6,"Bytes":8329,"StatsBuckets":12,"Errors":0}, + "service_writer": {"Payloads":1,"Bytes":1234,"Services":2,"Errors":0}, + "memstats": {"Alloc":773552,"TotalAlloc":773552,"Sys":3346432,"Lookups":6,"Mallocs":7231,"Frees":561,"HeapAlloc":773552,"HeapSys":1572864,"HeapIdle":49152,"HeapInuse":1523712,"HeapReleased":0,"HeapObjects":6670,"StackInuse":524288,"StackSys":524288,"MSpanInuse":24480,"MSpanSys":32768,"MCacheInuse":4800,"MCacheSys":16384,"BuckHashSys":2675,"GCSys":131072,"OtherSys":1066381,"NextGC":4194304,"LastGC":0,"PauseTotalNs":0,"PauseNs":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"PauseEnd":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"NumGC":0,"GCCPUFraction":0,"EnableGC":true,"DebugGC":false,"BySize":[{"Size":0,"Mallocs":0,"Frees":0},{"Size":8,"Mallocs":126,"Frees":0},{"Size":16,"Mallocs":825,"Frees":0},{"Size":32,"Mallocs":4208,"Frees":0},{"Size":48,"Mallocs":345,"Frees":0},{"Size":64,"Mallocs":262,"Frees":0},{"Size":80,"Mallocs":93,"Frees":0},{"Size":96,"Mallocs":70,"Frees":0},{"Size":112,"Mallocs":97,"Frees":0},{"Size":128,"Mallocs":24,"Frees":0},{"Size":144,"Mallocs":25,"Frees":0},{"Size":160,"Mallocs":57,"Frees":0},{"Size":176,"Mallocs":128,"Frees":0},{"Size":192,"Mallocs":13,"Frees":0},{"Size":208,"Mallocs":77,"Frees":0},{"Size":224,"Mallocs":3,"Frees":0},{"Size":240,"Mallocs":2,"Frees":0},{"Size":256,"Mallocs":17,"Frees":0},{"Size":288,"Mallocs":64,"Frees":0},{"Size":320,"Mallocs":12,"Frees":0},{"Size":352,"Mallocs":20,"Frees":0},{"Size":384,"Mallocs":1,"Frees":0},{"Size":416,"Mallocs":59,"Frees":0},{"Size":448,"Mallocs":0,"Frees":0},{"Size":480,"Mallocs":3,"Frees":0},{"Size":512,"Mallocs":2,"Frees":0},{"Size":576,"Mallocs":17,"Frees":0},{"Size":640,"Mallocs":6,"Frees":0},{"Size":704,"Mallocs":10,"Frees":0},{"Size":768,"Mallocs":0,"Frees":0},{"Size":896,"Mallocs":11,"Frees":0},{"Size":1024,"Mallocs":11,"Frees":0},{"Size":1152,"Mallocs":12,"Frees":0},{"Size":1280,"Mallocs":2,"Frees":0},{"Size":1408,"Mallocs":2,"Frees":0},{"Size":1536,"Mallocs":0,"Frees":0},{"Size":1664,"Mallocs":10,"Frees":0},{"Size":2048,"Mallocs":17,"Frees":0},{"Size":2304,"Mallocs":7,"Frees":0},{"Size":2560,"Mallocs":1,"Frees":0},{"Size":2816,"Mallocs":1,"Frees":0},{"Size":3072,"Mallocs":1,"Frees":0},{"Size":3328,"Mallocs":7,"Frees":0},{"Size":4096,"Mallocs":4,"Frees":0},{"Size":4608,"Mallocs":1,"Frees":0},{"Size":5376,"Mallocs":6,"Frees":0},{"Size":6144,"Mallocs":4,"Frees":0},{"Size":6400,"Mallocs":0,"Frees":0},{"Size":6656,"Mallocs":1,"Frees":0},{"Size":6912,"Mallocs":0,"Frees":0},{"Size":8192,"Mallocs":0,"Frees":0},{"Size":8448,"Mallocs":0,"Frees":0},{"Size":8704,"Mallocs":1,"Frees":0},{"Size":9472,"Mallocs":0,"Frees":0},{"Size":10496,"Mallocs":0,"Frees":0},{"Size":12288,"Mallocs":1,"Frees":0},{"Size":13568,"Mallocs":0,"Frees":0},{"Size":14080,"Mallocs":0,"Frees":0},{"Size":16384,"Mallocs":0,"Frees":0},{"Size":16640,"Mallocs":0,"Frees":0},{"Size":17664,"Mallocs":1,"Frees":0}]}, + "pid": 38149, + "ratebyservice": {"service:,env:":1,"service:myapp,env:dev":0.123}, + "receiver": [{}], + "presampler": {"Rate":1.0}, + "uptime": 15, + "version": {"BuildDate": "2017-02-01T14:28:10+0100", "GitBranch": "ufoot/statusinfo", "GitCommit": "396a217", "GoVersion": "go version go1.7 darwin/amd64", "Version": "0.99.0"} +} diff --git a/info/test_cases/warning.info b/info/test_cases/warning.info new file mode 100644 index 000000000..afad32d66 --- /dev/null +++ b/info/test_cases/warning.info @@ -0,0 +1,32 @@ +====================== +Trace Agent (v 0.99.0) +====================== + + Pid: 38149 + Uptime: 15 seconds + Mem alloc: 773552 bytes + + Hostname: localhost.localdomain + Receiver: localhost:8126 + API Endpoint: https://trace.agent.datadoghq.com + + --- Receiver stats (1 min) --- + + From python 2.7.6 (CPython), client 0.9.0 + Traces received: 70 (10679 bytes) + Spans received: 984 + Services received: 0 (0 bytes) + WARNING: Traces dropped: 23 + WARNING: Spans dropped: 184 + + WARNING: Pre-sampling traces: 42.1 % + WARNING: Pre-sampler: raising pre-sampling rate from 3.1 % to 5.0 % + + --- Writer stats (1 min) --- + + Traces: 4 payloads, 26 traces, 3245 bytes + WARNING: Traces API errors (1 min): 3 + Stats: 6 payloads, 12 stats buckets, 8329 bytes + WARNING: Stats API errors (1 min): 1 + Services: 1 payloads, 2 services, 1234 bytes + WARNING: Services API errors (1 min): 1 diff --git a/info/test_cases/warning.json b/info/test_cases/warning.json new file mode 100644 index 000000000..2ef02ac79 --- /dev/null +++ b/info/test_cases/warning.json @@ -0,0 +1,13 @@ +{ + "cmdline": ["./trace-agent"], + "config": {"Enabled":true,"HostName":"localhost.localdomain","DefaultEnv":"none","APIEndpoint":"https://trace.agent.datadoghq.com","APIEnabled":true,"APIPayloadBufferMaxSize":16777216,"BucketInterval":10000000000,"ExtraAggregators":[],"ExtraSampleRate":1,"MaxTPS":10,"ReceiverHost":"localhost","ReceiverPort":8126,"ConnectionLimit":2000,"ReceiverTimeout":0,"StatsdHost":"127.0.0.1","StatsdPort":8125,"LogLevel":"INFO","LogFilePath":"/var/log/datadog/trace-agent.log"}, + "trace_writer": {"Payloads":4,"Bytes":3245,"Traces":26,"Errors":3}, + "stats_writer": {"Payloads":6,"Bytes":8329,"StatsBuckets":12,"Errors":1}, + "service_writer": {"Payloads":1,"Bytes":1234,"Services":2,"Errors":1}, + "memstats": {"Alloc":773552,"TotalAlloc":773552,"Sys":3346432,"Lookups":6,"Mallocs":7231,"Frees":561,"HeapAlloc":773552,"HeapSys":1572864,"HeapIdle":49152,"HeapInuse":1523712,"HeapReleased":0,"HeapObjects":6670,"StackInuse":524288,"StackSys":524288,"MSpanInuse":24480,"MSpanSys":32768,"MCacheInuse":4800,"MCacheSys":16384,"BuckHashSys":2675,"GCSys":131072,"OtherSys":1066381,"NextGC":4194304,"LastGC":0,"PauseTotalNs":0,"PauseNs":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"PauseEnd":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"NumGC":0,"GCCPUFraction":0,"EnableGC":true,"DebugGC":false,"BySize":[{"Size":0,"Mallocs":0,"Frees":0},{"Size":8,"Mallocs":126,"Frees":0},{"Size":16,"Mallocs":825,"Frees":0},{"Size":32,"Mallocs":4208,"Frees":0},{"Size":48,"Mallocs":345,"Frees":0},{"Size":64,"Mallocs":262,"Frees":0},{"Size":80,"Mallocs":93,"Frees":0},{"Size":96,"Mallocs":70,"Frees":0},{"Size":112,"Mallocs":97,"Frees":0},{"Size":128,"Mallocs":24,"Frees":0},{"Size":144,"Mallocs":25,"Frees":0},{"Size":160,"Mallocs":57,"Frees":0},{"Size":176,"Mallocs":128,"Frees":0},{"Size":192,"Mallocs":13,"Frees":0},{"Size":208,"Mallocs":77,"Frees":0},{"Size":224,"Mallocs":3,"Frees":0},{"Size":240,"Mallocs":2,"Frees":0},{"Size":256,"Mallocs":17,"Frees":0},{"Size":288,"Mallocs":64,"Frees":0},{"Size":320,"Mallocs":12,"Frees":0},{"Size":352,"Mallocs":20,"Frees":0},{"Size":384,"Mallocs":1,"Frees":0},{"Size":416,"Mallocs":59,"Frees":0},{"Size":448,"Mallocs":0,"Frees":0},{"Size":480,"Mallocs":3,"Frees":0},{"Size":512,"Mallocs":2,"Frees":0},{"Size":576,"Mallocs":17,"Frees":0},{"Size":640,"Mallocs":6,"Frees":0},{"Size":704,"Mallocs":10,"Frees":0},{"Size":768,"Mallocs":0,"Frees":0},{"Size":896,"Mallocs":11,"Frees":0},{"Size":1024,"Mallocs":11,"Frees":0},{"Size":1152,"Mallocs":12,"Frees":0},{"Size":1280,"Mallocs":2,"Frees":0},{"Size":1408,"Mallocs":2,"Frees":0},{"Size":1536,"Mallocs":0,"Frees":0},{"Size":1664,"Mallocs":10,"Frees":0},{"Size":2048,"Mallocs":17,"Frees":0},{"Size":2304,"Mallocs":7,"Frees":0},{"Size":2560,"Mallocs":1,"Frees":0},{"Size":2816,"Mallocs":1,"Frees":0},{"Size":3072,"Mallocs":1,"Frees":0},{"Size":3328,"Mallocs":7,"Frees":0},{"Size":4096,"Mallocs":4,"Frees":0},{"Size":4608,"Mallocs":1,"Frees":0},{"Size":5376,"Mallocs":6,"Frees":0},{"Size":6144,"Mallocs":4,"Frees":0},{"Size":6400,"Mallocs":0,"Frees":0},{"Size":6656,"Mallocs":1,"Frees":0},{"Size":6912,"Mallocs":0,"Frees":0},{"Size":8192,"Mallocs":0,"Frees":0},{"Size":8448,"Mallocs":0,"Frees":0},{"Size":8704,"Mallocs":1,"Frees":0},{"Size":9472,"Mallocs":0,"Frees":0},{"Size":10496,"Mallocs":0,"Frees":0},{"Size":12288,"Mallocs":1,"Frees":0},{"Size":13568,"Mallocs":0,"Frees":0},{"Size":14080,"Mallocs":0,"Frees":0},{"Size":16384,"Mallocs":0,"Frees":0},{"Size":16640,"Mallocs":0,"Frees":0},{"Size":17664,"Mallocs":1,"Frees":0}]}, + "pid": 38149, + "receiver": [{"Lang":"python","LangVersion":"2.7.6","Interpreter":"CPython","TracerVersion":"0.9.0","TracesReceived":70,"TracesDropped":23,"TracesBytes":10679,"SpansReceived":984,"SpansDropped":184,"ServicesReceived":0,"ServicesBytes":0}], + "presampler": {"Rate":0.421,"Error":"raising pre-sampling rate from 3.1 % to 5.0 %"}, + "uptime": 15, + "version": {"BuildDate": "2017-02-01T14:28:10+0100", "GitBranch": "ufoot/statusinfo", "GitCommit": "396a217", "GoVersion": "go version go1.7 darwin/amd64", "Version": "0.99.0"} +} From 1e367ac5efd252b2487757da56ba8f54e30af64d Mon Sep 17 00:00:00 2001 From: Benjamin Fernandes Date: Tue, 19 Dec 2017 11:30:05 +0100 Subject: [PATCH 8/9] Set apiKey as a header --- writer/datadog_endpoint.go | 10 ++++------ writer/service_writer.go | 1 - 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/writer/datadog_endpoint.go b/writer/datadog_endpoint.go index 75df4eaf2..0acaa781b 100644 --- a/writer/datadog_endpoint.go +++ b/writer/datadog_endpoint.go @@ -6,6 +6,8 @@ import ( "net/http" ) +const apiHTTPHeaderKey = "DD-Api-Key" + // DatadogEndpoint sends payloads to Datadog API. type DatadogEndpoint struct { apiKey string @@ -35,17 +37,12 @@ func (e *DatadogEndpoint) Write(payload []byte, headers map[string]string) error url := e.url + e.path req, err := http.NewRequest("POST", url, bytes.NewBuffer(payload)) - // If the request cannot be created, there is no point in trying again later, - // it will always yield the same result. if err != nil { - // atomic.AddInt64(&ae.stats.TracesPayloadError, 1) return err } // Set API key in the header and issue the request - queryParams := req.URL.Query() - queryParams.Add("api_key", e.apiKey) - req.URL.RawQuery = queryParams.Encode() + req.Header.Set(apiHTTPHeaderKey, e.apiKey) SetExtraHeaders(req.Header, headers) @@ -57,6 +54,7 @@ func (e *DatadogEndpoint) Write(payload []byte, headers map[string]string) error defer resp.Body.Close() // We check the status code to see if the request has succeeded. + // TODO: define all legit status code and behave accordingly. if resp.StatusCode/100 != 2 { return fmt.Errorf("request to %s responded with %s", url, resp.Status) } diff --git a/writer/service_writer.go b/writer/service_writer.go index d1583c6d8..0455db13a 100644 --- a/writer/service_writer.go +++ b/writer/service_writer.go @@ -135,7 +135,6 @@ func (w *ServiceWriter) Flush() { startFlush := time.Now() // Send the payload to the endpoint - // TODO: track metrics/stats about payload err = w.endpoint.Write(data, headers) flushTime := time.Since(startFlush) From 33b1bab37fcc6606a64daa31a345d2c7a0f763fe Mon Sep 17 00:00:00 2001 From: Benjamin Fernandes Date: Wed, 20 Dec 2017 13:37:03 +0100 Subject: [PATCH 9/9] Clean minor nitpicks --- agent/receiver_test.go | 3 --- model/trace.proto | 2 +- writer/trace_writer.go | 2 ++ 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/agent/receiver_test.go b/agent/receiver_test.go index e9fa1aaf5..5aa8feaeb 100644 --- a/agent/receiver_test.go +++ b/agent/receiver_test.go @@ -36,9 +36,6 @@ func NewTestReceiverFromConfig(conf *config.AgentConfig) *HTTPReceiver { serviceChan := make(chan model.ServicesMetadata, 50) receiver := NewHTTPReceiver(conf, dynConf, rawTraceChan, serviceChan) - receiver.traces = rawTraceChan - receiver.services = serviceChan - return receiver } diff --git a/model/trace.proto b/model/trace.proto index 5d4b5bdba..268ca3d61 100644 --- a/model/trace.proto +++ b/model/trace.proto @@ -8,5 +8,5 @@ message APITrace { uint64 traceID = 1; repeated Span spans = 2; int64 startTime = 6; - int64 endTime = 7; + int64 endTime = 7; } diff --git a/writer/trace_writer.go b/writer/trace_writer.go index 02715c631..01a957218 100644 --- a/writer/trace_writer.go +++ b/writer/trace_writer.go @@ -134,6 +134,8 @@ func (w *TraceWriter) Flush() { } atomic.AddInt64(&w.stats.Bytes, int64(len(serialized))) + // TODO: benchmark and pick the right encoding + headers := map[string]string{ languageHeaderKey: strings.Join(info.Languages(), "|"), "Content-Type": "application/x-protobuf",