From 88c3ae0538dcdfac27c872da86fd56b1044f12c3 Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Fri, 15 Mar 2019 12:29:18 +0000 Subject: [PATCH 01/12] compression and aggregation of metrics added as a toggled feature --- plugins/outputs/kinesis/compression.go | 37 +++ plugins/outputs/kinesis/compression_test.go | 38 +++ plugins/outputs/kinesis/kinesis.go | 104 +++++++-- plugins/outputs/kinesis/kinesisHandler.go | 216 ++++++++++++++++++ .../outputs/kinesis/kinesishandler_test.go | 163 +++++++++++++ 5 files changed, 533 insertions(+), 25 deletions(-) create mode 100644 plugins/outputs/kinesis/compression.go create mode 100644 plugins/outputs/kinesis/compression_test.go create mode 100644 plugins/outputs/kinesis/kinesisHandler.go create mode 100644 plugins/outputs/kinesis/kinesishandler_test.go diff --git a/plugins/outputs/kinesis/compression.go b/plugins/outputs/kinesis/compression.go new file mode 100644 index 0000000000000..08ea86c89939e --- /dev/null +++ b/plugins/outputs/kinesis/compression.go @@ -0,0 +1,37 @@ +package kinesis + +import ( + "bytes" + "compress/gzip" + "fmt" + + "github.com/golang/snappy" +) + +var ( + // gzipCompressionLevel sets the compression level. Tests indicate that 7 gives the best trade off + // between speed and compression. + gzipCompressionLevel = 7 +) + +func gzipMetrics(metrics []byte) ([]byte, error) { + var buffer bytes.Buffer + + gzw, err := gzip.NewWriterLevel(&buffer, gzipCompressionLevel) + if err != nil { + return []byte{}, fmt.Errorf("Compression level is incorrect for gzip") + } + _, err = gzw.Write(metrics) + if err != nil { + return []byte{}, fmt.Errorf("There was an error in writing to the gzip writer") + } + if err := gzw.Close(); err != nil { + return []byte{}, fmt.Errorf("There was an error in closing the gzip writer") + } + + return buffer.Bytes(), nil +} + +func snappyMetrics(metrics []byte) ([]byte, error) { + return snappy.Encode(nil, metrics), nil +} diff --git a/plugins/outputs/kinesis/compression_test.go b/plugins/outputs/kinesis/compression_test.go new file mode 100644 index 0000000000000..6d0d1f9054c2e --- /dev/null +++ b/plugins/outputs/kinesis/compression_test.go @@ -0,0 +1,38 @@ +package kinesis + +import ( + "testing" + "time" +) + +func TestGoodCompression(t *testing.T) { + tests := []string{ + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + time.Now().String(), + `abcdefghijklmnopqrstuvwzyz1234567890@~|\/?><@~#+=!"£$%^&_*(){}[]`, + } + + for _, test := range tests { + _, err := gzipMetrics([]byte(test)) + if err != nil { + t.Logf("Failed to gzip test data") + t.Fail() + } + + // Snappy doesn't error, so we can only look for panics + snappyMetrics([]byte(test)) + } +} + +func TestBadGzipCompressionLevel(t *testing.T) { + oldlevel := gzipCompressionLevel + gzipCompressionLevel = 11 + defer func() { gzipCompressionLevel = oldlevel }() + + _, err := gzipMetrics([]byte(time.Now().String())) + if err == nil { + t.Logf("Expect gzip to fail because of a bad compression level") + t.Fail() + } + +} diff --git a/plugins/outputs/kinesis/kinesis.go b/plugins/outputs/kinesis/kinesis.go index 497676486293c..15a70a59ad916 100644 --- a/plugins/outputs/kinesis/kinesis.go +++ b/plugins/outputs/kinesis/kinesis.go @@ -14,6 +14,12 @@ import ( "github.com/influxdata/telegraf/plugins/serializers" ) +func init() { + outputs.Add("kinesis", func() telegraf.Output { + return &KinesisOutput{} + }) +} + type ( KinesisOutput struct { Region string `toml:"region"` @@ -30,7 +36,12 @@ type ( RandomPartitionKey bool `toml:"use_random_partitionkey"` Partition *Partition `toml:"partition"` Debug bool `toml:"debug"` - svc *kinesis.Kinesis + AggregateMetrics bool `toml:"aggregate_metrics"` + GZipRecords bool `toml:"gzip_records"` + SnappyRecords bool `toml:"snappy_records"` + + svc *kinesis.Kinesis + nShards int64 serializer serializers.Serializer } @@ -48,7 +59,7 @@ var sampleConfig = ` ## Amazon Credentials ## Credentials are loaded in the following order - ## 1) Assumed credentials via STS if role_arn is specified + ## 1) Assumed credentials via STS if role_arn is specified ## 2) explicit credentials from 'access_key' and 'secret_key' ## 3) shared profile from 'profile' ## 4) environment variables @@ -140,10 +151,11 @@ func (k *KinesisOutput) Connect() error { configProvider := credentialConfig.Credentials() svc := kinesis.New(configProvider) - _, err := svc.DescribeStreamSummary(&kinesis.DescribeStreamSummaryInput{ + describeOutput, err := svc.DescribeStreamSummary(&kinesis.DescribeStreamSummaryInput{ StreamName: aws.String(k.StreamName), }) k.svc = svc + k.nShards = *describeOutput.StreamDescriptionSummary.OpenShardCount return err } @@ -162,30 +174,32 @@ func writekinesis(k *KinesisOutput, r []*kinesis.PutRecordsRequestEntry) time.Du StreamName: aws.String(k.StreamName), } + resp, err := k.svc.PutRecords(payload) + if err != nil { + log.Printf("E! kinesis: Unable to write to Kinesis : %s", err.Error()) + } if k.Debug { - resp, err := k.svc.PutRecords(payload) - if err != nil { - log.Printf("E! kinesis: Unable to write to Kinesis : %s", err.Error()) - } log.Printf("I! Wrote: '%+v'", resp) - - } else { - _, err := k.svc.PutRecords(payload) - if err != nil { - log.Printf("E! kinesis: Unable to write to Kinesis : %s", err.Error()) - } } return time.Since(start) } func (k *KinesisOutput) getPartitionKey(metric telegraf.Metric) string { + randomKey := func() string { + if k.AggregateMetrics { + return randomPartitionKey + } + + u := uuid.NewV4() + return u.String() + } + if k.Partition != nil { switch k.Partition.Method { case "static": return k.Partition.Key case "random": - u := uuid.NewV4() - return u.String() + return randomKey() case "measurement": return metric.Name() case "tag": @@ -201,19 +215,27 @@ func (k *KinesisOutput) getPartitionKey(metric telegraf.Metric) string { } } if k.RandomPartitionKey { - u := uuid.NewV4() - return u.String() + return randomKey() } return k.PartitionKey } func (k *KinesisOutput) Write(metrics []telegraf.Metric) error { - var sz uint32 - if len(metrics) == 0 { return nil } + switch { + case k.AggregateMetrics: + return k.aggregatedWrite(metrics) + default: + return k.writeDefault(metrics) + } +} + +func (k *KinesisOutput) writeDefault(metrics []telegraf.Metric) error { + var sz uint32 + r := []*kinesis.PutRecordsRequestEntry{} for _, metric := range metrics { @@ -236,7 +258,7 @@ func (k *KinesisOutput) Write(metrics []telegraf.Metric) error { if sz == 500 { // Max Messages Per PutRecordRequest is 500 elapsed := writekinesis(k, r) - log.Printf("D! Wrote a %d point batch to Kinesis in %+v.", sz, elapsed) + log.Printf("D! Wrote a %d point batch to Kinesis in %+v.\n", sz, elapsed) sz = 0 r = nil } @@ -244,14 +266,46 @@ func (k *KinesisOutput) Write(metrics []telegraf.Metric) error { } if sz > 0 { elapsed := writekinesis(k, r) - log.Printf("D! Wrote a %d point batch to Kinesis in %+v.", sz, elapsed) + log.Printf("D! Wrote a %d point batch to Kinesis in %+v.\n", sz, elapsed) } return nil } -func init() { - outputs.Add("kinesis", func() telegraf.Output { - return &KinesisOutput{} - }) +func (k *KinesisOutput) aggregatedWrite(metrics []telegraf.Metric) error { + log.Printf("D! Starting aggregated writer with %d metrics.", len(metrics)) + + handler := newPutRecordsHandler() + handler.setSerializer(k.serializer) + + for _, metric := range metrics { + err := handler.addMetric(k.getPartitionKey(metric), metric) + if err != nil { + return err + } + } + handler.packageMetrics(k.nShards) + + switch { + case k.GZipRecords: + if err := handler.gzipCompressSlugs(); err != nil { + log.Printf("E! Failed to compress with gzip") + return err + } + case k.SnappyRecords: + if err := handler.snappyCompressSlugs(); err != nil { + log.Printf("E! Failed to compress with snappy") + return err + } + } + + var elapsed time.Duration + for _, writeRequests := range handler.convertToKinesisPutRequests() { + t := writekinesis(k, writeRequests) + elapsed = elapsed + t + } + + log.Printf("D! Wrote aggregated metrics in %+v.\n", elapsed) + + return nil } diff --git a/plugins/outputs/kinesis/kinesisHandler.go b/plugins/outputs/kinesis/kinesisHandler.go new file mode 100644 index 0000000000000..82b9884da75a3 --- /dev/null +++ b/plugins/outputs/kinesis/kinesisHandler.go @@ -0,0 +1,216 @@ +package kinesis + +import ( + "fmt" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/kinesis" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/serializers" + uuid "github.com/satori/go.uuid" +) + +const ( + // MaxOutputRecords is the maximum number of records that we can send in a single send to Kinesis. + maxOutputRecords = 5 + // maxRecordSizeBytes is the maximum size for a record when sending to Kinesis. + // 1020KB for they payload and 4KB for the partition key. + maxRecordSizeBytes = 1020 * 1024 + randomPartitionKey = "-random-" +) + +type slug struct { + metricsBytes []byte + size int +} + +type putRecordsHandler struct { + rawMetrics map[string][]telegraf.Metric + slugs map[string][][]byte + maxOutputRecords int + randomPartitionKey string + serializer serializers.Serializer + readyToSendLock bool +} + +func newPutRecordsHandler() *putRecordsHandler { + handler := &putRecordsHandler{ + maxOutputRecords: maxOutputRecords, + randomPartitionKey: randomPartitionKey, + } + handler.init() + + return handler +} + +func (handler *putRecordsHandler) init() { + handler.rawMetrics = make(map[string][]telegraf.Metric) + handler.slugs = make(map[string][][]byte) +} + +func (handler *putRecordsHandler) setSerializer(serializer serializers.Serializer) { + handler.serializer = serializer +} + +func (handler *putRecordsHandler) addMetric(partition string, metric telegraf.Metric) error { + if handler.readyToSendLock { + return fmt.Errorf("Already pacakged current metrics. Send first then add more") + } + if _, ok := handler.rawMetrics[partition]; !ok { + handler.rawMetrics[partition] = make([]telegraf.Metric, 0) + } + + handler.rawMetrics[partition] = append(handler.rawMetrics[partition], metric) + return nil +} + +func (handler *putRecordsHandler) addSlugs(partitionKey string, slugs ...[]byte) { + if _, ok := handler.slugs[partitionKey]; !ok { + handler.slugs[partitionKey] = make([][]byte, 0) + } + // Add each new slug into the current slice of []bytes + for _, slug := range slugs { + handler.slugs[partitionKey] = append(handler.slugs[partitionKey], slug) + } +} + +func (handler *putRecordsHandler) packageMetrics(shards int64) error { + if handler.readyToSendLock { + return fmt.Errorf("Already setup to send data") + } + splitIntoBlocks := func(howManyBlocks int64, partitionKey string, metrics []telegraf.Metric) error { + blocks := make([][]telegraf.Metric, howManyBlocks) + for index := range blocks { + blocks[index] = make([]telegraf.Metric, 0) + } + + currentBlock := 0 + for _, metric := range metrics { + blocks[currentBlock] = append(blocks[currentBlock], metric) + currentBlock++ + if currentBlock == len(blocks) { + currentBlock = 0 + } + } + + for _, metrics := range blocks { + metricsBytes, err := handler.serializer.SerializeBatch(metrics) + if err != nil { + return err + } + handler.addSlugs(partitionKey, metricsBytes) + } + + return nil + } + + // At this point we need to know if the metrics will fit in a single push to kinesis + // if not we need to start splitting it. + // We start with a go for gold dash and bulk serialize. + // If that doesn't work we will then know how many block we would need. + // Split again into x blocks, serialize and return. + for partitionKey, metrics := range handler.rawMetrics { + + if partitionKey == randomPartitionKey { + blocks := int64(shards) + if int64(len(metrics)) < shards { + blocks = int64(len(metrics)) + } + if err := splitIntoBlocks(blocks, partitionKey, metrics); err != nil { + return err + } + + // Now we need to move the data into its own partition keys + for _, metricBytes := range handler.slugs[randomPartitionKey] { + key := uuid.NewV4().String() + handler.addSlugs(key, metricBytes) + } + // We are done now so we need to clear out the random key map value + delete(handler.slugs, randomPartitionKey) + continue + } + + tryOne, err := handler.serializer.SerializeBatch(metrics) + if err != nil { + return err + } + + requiredBlocks := (len(tryOne) / maxRecordSizeBytes) + 1 + fmt.Println("Required Blocks", requiredBlocks) + + if requiredBlocks == 1 { + // we are ok and we can carry on + handler.addSlugs(partitionKey, tryOne) + continue + } + + // sad times we need to make more blocks and split the data between them + if err := splitIntoBlocks(int64(requiredBlocks), partitionKey, metrics); err != nil { + return err + } + continue + } + + return nil +} + +func (handler *putRecordsHandler) snappyCompressSlugs() error { + for partitionKey, slugs := range handler.slugs { + for index, slug := range slugs { + // snappy doesn't return errors + compressedBytes, _ := snappyMetrics(slug) + handler.slugs[partitionKey][index] = compressedBytes + } + } + return nil +} + +func (handler *putRecordsHandler) gzipCompressSlugs() error { + for partitionKey, slugs := range handler.slugs { + for index, slug := range slugs { + compressedBytes, err := gzipMetrics(slug) + if err != nil { + return err + } + handler.slugs[partitionKey][index] = compressedBytes + } + } + return nil +} + +//convertToKinesisPutRequests will return a slice that contains a []*kinesis.PutRecordsRequestEntry +// sized to fit into a PutRecords calls. The number of of outer slices is how many times you would +// need to call kinesis.PutRecords. +// The Inner slices ad hear to the current rules. No more than 500 records at once and no more than +// 5MB of data including the partition keys. +func (handler *putRecordsHandler) convertToKinesisPutRequests() [][]*kinesis.PutRecordsRequestEntry { + putRequests := make([][]*kinesis.PutRecordsRequestEntry, 0) + // We need to seed it with the first one. + putRequests = append(putRequests, make([]*kinesis.PutRecordsRequestEntry, 0)) + + currentIndex := 0 + currentSize := 0 + for partitionKey, metricBytesSlice := range handler.slugs { + for _, metricBytes := range metricBytesSlice { + // We need to see if the current data will fit in this put request + payloadSize := len(partitionKey) + len(metricBytes) + if currentSize+payloadSize > maxRecordSizeBytes { + currentIndex++ + putRequests = append(putRequests, make([]*kinesis.PutRecordsRequestEntry, 0)) + currentSize = 0 + } + + currentSize = currentSize + payloadSize + + putRequests[currentIndex] = append( + putRequests[currentIndex], + &kinesis.PutRecordsRequestEntry{ + Data: metricBytes, + PartitionKey: aws.String(partitionKey), + }, + ) + } + } + + return putRequests +} diff --git a/plugins/outputs/kinesis/kinesishandler_test.go b/plugins/outputs/kinesis/kinesishandler_test.go new file mode 100644 index 0000000000000..2822107c20e3f --- /dev/null +++ b/plugins/outputs/kinesis/kinesishandler_test.go @@ -0,0 +1,163 @@ +package kinesis + +import ( + "fmt" + "testing" + "time" + + "github.com/influxdata/telegraf/plugins/serializers/influx" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/metric" +) + +func fakeMetrics(t *testing.T, howMany int) []telegraf.Metric { + metricList := make([]telegraf.Metric, howMany) + + for i := 0; i < howMany; i++ { + m, err := metric.New( + "fake_metric", + map[string]string{ + "index": fmt.Sprintf("%d", i), + "static": "static_tag", + }, + map[string]interface{}{ + "index_measurement": i, + "nano_seconds": time.Now().UnixNano(), + }, + time.Now(), + ) + if err != nil { + t.Logf("Failed to make test metrics") + t.FailNow() + } + metricList[i] = m + } + + return metricList +} + +func TestAddMetric(t *testing.T) { + testMetrics := fakeMetrics(t, 3) + + h := newPutRecordsHandler() + + for _, m := range testMetrics { + h.addMetric("test", m) + } + + if len(h.rawMetrics["test"]) != 3 { + t.Logf("Adding metrics did not end up in the correct bucket.") + t.Fail() + } +} + +func TestAddSlugs(t *testing.T) { + tests := [][]byte{ + []byte("test1"), + []byte("test2"), + []byte("test3"), + } + + h := newPutRecordsHandler() + partkey := "testPartion" + h.addSlugs(partkey, tests...) + if len(h.slugs[partkey]) != 3 { + t.Logf("Added 3 slugs but never seen them on the other side. Got: %v", h.slugs[partkey]) + t.Fail() + } +} + +func TestKinesisPackagedMetrics(t *testing.T) { + tests := []struct { + name string + shards int64 + nMetrics int + staticKey string + expectedSlugs int + snappy bool + gzip bool + }{ + { + name: "micro Random expect 2 slugs", + shards: 4, + nMetrics: 2, + expectedSlugs: 2, + }, + { + name: "large Random expect 4 slugs", + shards: 4, + nMetrics: 4041, + expectedSlugs: 4, + }, + { + name: "vary large Random expect 4 slugs", + shards: 4, + nMetrics: 8081, + expectedSlugs: 4, + }, + { + name: "vary large static expect 1 slugs", + shards: 4, + nMetrics: 8081, + expectedSlugs: 1, + staticKey: "static_key", + }, + { + name: "vary large random expect 2 slugs", + shards: 2, + nMetrics: 51200, + expectedSlugs: 1, + snappy: true, + staticKey: "static_key", + }, + { + name: "vary large random expect 2 slugs", + shards: 2, + nMetrics: 51200, + expectedSlugs: 1, + gzip: true, + staticKey: "static_key", + }, + } + + for _, test := range tests { + h := newPutRecordsHandler() + h.setSerializer(influx.NewSerializer()) + + pk := randomPartitionKey + if test.staticKey != "" { + pk = test.staticKey + } + + for _, m := range fakeMetrics(t, test.nMetrics) { + h.addMetric(pk, m) + } + + if err := h.packageMetrics(test.shards); err != nil { + t.Logf("%s: Failed to package metrics. Error: %s", test.name, err) + t.Fail() + } + + if len(h.slugs) != test.expectedSlugs { + t.Logf("%s: Expected slug count is wrong.\nWant: %d\nGot: %d", test.name, test.expectedSlugs, len(h.slugs)) + t.Fail() + } + + if test.snappy { + // Snappy doesn't error, just testing for panic :( + h.snappyCompressSlugs() + } + + if test.gzip { + if err := h.gzipCompressSlugs(); err != nil { + t.Logf("%s: Error when gzip compressing slug. Error: %s", test.name, err) + t.FailNow() + } + } + + // We need to make sure that we don't get panics here. + h.convertToKinesisPutRequests() + + } +} From f96b09011b006a5ecf7a9c1ab8b867f1159de785 Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Fri, 15 Mar 2019 14:07:00 +0000 Subject: [PATCH 02/12] Updating the readme --- plugins/outputs/kinesis/README.md | 49 +++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/plugins/outputs/kinesis/README.md b/plugins/outputs/kinesis/README.md index 12b6178fd9197..c2277595527e0 100644 --- a/plugins/outputs/kinesis/README.md +++ b/plugins/outputs/kinesis/README.md @@ -20,9 +20,34 @@ will attempt to authenticate. 5. [Shared Credentials](https://github.com/aws/aws-sdk-go/wiki/configuring-sdk#shared-credentials-file) 6. [EC2 Instance Profile](http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html) +## Example Configuration + +```toml + access_key = "AWSKEYVALUE" + secret_key = "AWSSecretKeyValue" + region = "eu-west-1" + streamname = "KinesisStreamName" + aggregate_metrics = true + gzip_records = true + snappy_records = true + partition = { method = "random" } + debug = true +``` ## Config +## AWS Configration + +The following AWS configuration variables are available and map directly to the normal AWS settings. If you don't know what they are then you most likely don't need to touch them. + +* access_key +* secret_key +* role_arn +* profile +* shared_credential_file +* token +* endpoint_url + For this output plugin to function correctly the following variables must be configured. * region @@ -31,6 +56,7 @@ For this output plugin to function correctly the following variables must be con ### region The region is the Amazon region that you wish to connect to. Examples include but are not limited to + * us-west-1 * us-west-2 * us-east-1 @@ -89,3 +115,26 @@ String is defined using the default Point.String() value and translated to []byt #### custom Custom is a string defined by a number of values in the FormatMetric() function. + +### aggregate_metrics + +This will make the plugin gather the metrics and send them as blocks of metrics in Kinesis records. The number of put requests depends on a few factors. + +1. If a random key is in use then a block for each shard in the stream will be created unless there isn't enough metrics then as many blocks as metrics. +1. Each record will be 1020kb in size + partition key + +### gzip_records + +This will make the plugin compress the data using GZip before the data is shipped to Kinesis. +GZip is slower than snappy but generally fast enough and gives much better compression. Use GZip in most cases. + +If both gzip and snappy are true. GZip wins. + +### snappy_records + +This will make the plugin compress the data using Google's Snappy compression before the data is shipped to Kinesis. +Snappy is much quicker and would be used if you are taking too long to compress and write before the next flush interval. + +### debug + +Prints debugging data into the logs. \ No newline at end of file From 3045839d83c5ce5c24052ced87bbc38e107c2503 Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Fri, 15 Mar 2019 14:42:12 +0000 Subject: [PATCH 03/12] adding the dep files for golang snappy --- Gopkg.lock | 6 +++--- Gopkg.toml | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Gopkg.lock b/Gopkg.lock index 47feeb386f7e1..2f33f6201f58e 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -499,12 +499,11 @@ version = "v1.1.0" [[projects]] - branch = "master" - digest = "1:2a5888946cdbc8aa360fd43301f9fc7869d663f60d5eedae7d4e6e5e4f06f2bf" + digest = "1:6a6322a15aa8e99bd156fbba0aae4e5d67b4bb05251d860b348a45dfdcba9cce" name = "github.com/golang/snappy" packages = ["."] pruneopts = "" - revision = "2e65f85255dbc3072edf28d6b5b8efc472979f5a" + revision = "2a8bb927dd31d8daada140a5d09578521ce5c36a" [[projects]] digest = "1:f9f45f75f332e03fc7e9fe9188ea4e1ce4d14779ef34fa1b023da67518e36327" @@ -1549,6 +1548,7 @@ "github.com/golang/protobuf/ptypes/duration", "github.com/golang/protobuf/ptypes/empty", "github.com/golang/protobuf/ptypes/timestamp", + "github.com/golang/snappy", "github.com/google/go-cmp/cmp", "github.com/gorilla/mux", "github.com/harlow/kinesis-consumer", diff --git a/Gopkg.toml b/Gopkg.toml index cd7825ccbc951..8f20b481f04c2 100644 --- a/Gopkg.toml +++ b/Gopkg.toml @@ -265,3 +265,7 @@ [[constraint]] name = "github.com/go-logfmt/logfmt" version = "0.4.0" + +[[constraint]] + name = "github.com/golang/snappy" + revision = "2a8bb927dd31d8daada140a5d09578521ce5c36a" \ No newline at end of file From 5baa3b0b146c789799f1b3ced7b945ac71046b18 Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Fri, 15 Mar 2019 16:42:38 +0000 Subject: [PATCH 04/12] removed a rouge print line debugger. Added a comment to a function to explain what it should do. --- plugins/outputs/kinesis/kinesisHandler.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/plugins/outputs/kinesis/kinesisHandler.go b/plugins/outputs/kinesis/kinesisHandler.go index 82b9884da75a3..3551d49d7e405 100644 --- a/plugins/outputs/kinesis/kinesisHandler.go +++ b/plugins/outputs/kinesis/kinesisHandler.go @@ -74,6 +74,11 @@ func (handler *putRecordsHandler) addSlugs(partitionKey string, slugs ...[]byte) } } +// packageMetrics is responsible to get the metrics split into payloads that we no larger than 1020kb. +// Each partition key will have metrics that need to then be split into payloads. +// If the partition key is random then it will create payloads ready to be split between as many shards +// that you have available. +// packageMetrics can't be called again until init is called. Really it is designed to be used once. func (handler *putRecordsHandler) packageMetrics(shards int64) error { if handler.readyToSendLock { return fmt.Errorf("Already setup to send data") @@ -136,7 +141,6 @@ func (handler *putRecordsHandler) packageMetrics(shards int64) error { } requiredBlocks := (len(tryOne) / maxRecordSizeBytes) + 1 - fmt.Println("Required Blocks", requiredBlocks) if requiredBlocks == 1 { // we are ok and we can carry on @@ -178,7 +182,7 @@ func (handler *putRecordsHandler) gzipCompressSlugs() error { return nil } -//convertToKinesisPutRequests will return a slice that contains a []*kinesis.PutRecordsRequestEntry +// convertToKinesisPutRequests will return a slice that contains a []*kinesis.PutRecordsRequestEntry // sized to fit into a PutRecords calls. The number of of outer slices is how many times you would // need to call kinesis.PutRecords. // The Inner slices ad hear to the current rules. No more than 500 records at once and no more than From bfbc97079539c370c36f56b90c4adde2811d09c7 Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Sat, 16 Mar 2019 12:52:15 +0000 Subject: [PATCH 05/12] Changed the compression configuration to a string. Updated readme file to represent this. Updated the Sample config to include the configuration changes. --- plugins/outputs/kinesis/README.md | 20 +++++++++++++++----- plugins/outputs/kinesis/kinesis.go | 23 +++++++++++++++++------ 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/plugins/outputs/kinesis/README.md b/plugins/outputs/kinesis/README.md index c2277595527e0..990dc115af57f 100644 --- a/plugins/outputs/kinesis/README.md +++ b/plugins/outputs/kinesis/README.md @@ -1,4 +1,4 @@ -## Amazon Kinesis Output for Telegraf +# Amazon Kinesis Output for Telegraf This is an experimental plugin that is still in the early stages of development. It will batch up all of the Points in one Put request to Kinesis. This should save the number of API requests by a considerable level. @@ -13,6 +13,7 @@ maybe useful for users to review Amazons official documentation which is availab This plugin uses a credential chain for Authentication with the Kinesis API endpoint. In the following order the plugin will attempt to authenticate. + 1. Assumed credentials via STS if `role_arn` attribute is specified (source credentials are evaluated from subsequent rules) 2. Explicit credentials from `access_key`, `secret_key`, and `token` attributes 3. Shared profile from `profile` attribute @@ -28,8 +29,8 @@ will attempt to authenticate. region = "eu-west-1" streamname = "KinesisStreamName" aggregate_metrics = true - gzip_records = true - snappy_records = true + # Either "gzip", "snappy" + compress_metrics_with = gzip partition = { method = "random" } debug = true ``` @@ -123,14 +124,23 @@ This will make the plugin gather the metrics and send them as blocks of metrics 1. If a random key is in use then a block for each shard in the stream will be created unless there isn't enough metrics then as many blocks as metrics. 1. Each record will be 1020kb in size + partition key -### gzip_records +### compress_metrics_with + +`compress_with` has the following values. If no value is set then compression is skipped. + +* gzip +* snappy + +They are explained below. + +#### gzip This will make the plugin compress the data using GZip before the data is shipped to Kinesis. GZip is slower than snappy but generally fast enough and gives much better compression. Use GZip in most cases. If both gzip and snappy are true. GZip wins. -### snappy_records +#### snappy This will make the plugin compress the data using Google's Snappy compression before the data is shipped to Kinesis. Snappy is much quicker and would be used if you are taking too long to compress and write before the next flush interval. diff --git a/plugins/outputs/kinesis/kinesis.go b/plugins/outputs/kinesis/kinesis.go index 15a70a59ad916..258766cfdb916 100644 --- a/plugins/outputs/kinesis/kinesis.go +++ b/plugins/outputs/kinesis/kinesis.go @@ -37,8 +37,7 @@ type ( Partition *Partition `toml:"partition"` Debug bool `toml:"debug"` AggregateMetrics bool `toml:"aggregate_metrics"` - GZipRecords bool `toml:"gzip_records"` - SnappyRecords bool `toml:"snappy_records"` + CompressWith string `toml:"compress_metrics_with"` svc *kinesis.Kinesis nShards int64 @@ -113,7 +112,19 @@ var sampleConfig = ` ## Each data format has its own unique set of configuration options, read ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md - data_format = "influx" + data_format = "influx" + + # Aggregate metrics into payloads that will fit into the Kinesis records. This + # is designed to save money by making more efficient use of Kinesis. + aggregate_metrics = true + + # Kinesis cares little for what you send into it via the records. + # We can therefore save more money by compressing the aggregated metrics. + # Note, this only works with the aggregated metrics set to true. + # valid options: "gzip", "snappy" + # See https://github.com/influxdata/telegraf/tree/master/plugins/outputs/kinesis + # for more details on each compression method. + compress_metrics_with = "gzip" ## debug will show upstream aws messages. debug = false @@ -286,13 +297,13 @@ func (k *KinesisOutput) aggregatedWrite(metrics []telegraf.Metric) error { } handler.packageMetrics(k.nShards) - switch { - case k.GZipRecords: + switch k.CompressWith { + case "gzip": if err := handler.gzipCompressSlugs(); err != nil { log.Printf("E! Failed to compress with gzip") return err } - case k.SnappyRecords: + case "snappy": if err := handler.snappyCompressSlugs(); err != nil { log.Printf("E! Failed to compress with snappy") return err From a354ef2fde21056aa041317203995c0755011adb Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Sat, 16 Mar 2019 12:52:34 +0000 Subject: [PATCH 06/12] missed from previous commit. --- plugins/outputs/kinesis/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/kinesis/README.md b/plugins/outputs/kinesis/README.md index 990dc115af57f..0137c33d98dfa 100644 --- a/plugins/outputs/kinesis/README.md +++ b/plugins/outputs/kinesis/README.md @@ -126,7 +126,7 @@ This will make the plugin gather the metrics and send them as blocks of metrics ### compress_metrics_with -`compress_with` has the following values. If no value is set then compression is skipped. +`compress_metrics_with` has the following values. If no value is set then compression is skipped. * gzip * snappy From 5f3e43f04deed5d24208ea98e54e8ee48805e211 Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Thu, 18 Apr 2019 14:48:43 +0200 Subject: [PATCH 07/12] Fixing a documentation error --- plugins/outputs/kinesis/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/outputs/kinesis/README.md b/plugins/outputs/kinesis/README.md index 0137c33d98dfa..8470f2f859cbd 100644 --- a/plugins/outputs/kinesis/README.md +++ b/plugins/outputs/kinesis/README.md @@ -30,7 +30,7 @@ will attempt to authenticate. streamname = "KinesisStreamName" aggregate_metrics = true # Either "gzip", "snappy" - compress_metrics_with = gzip + compress_metrics_with = "gzip" partition = { method = "random" } debug = true ``` From ef06f540478c6ba57b5658c2551d80177d9e9840 Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Tue, 23 Apr 2019 08:47:56 +0200 Subject: [PATCH 08/12] syncing master from influx --- .github/ISSUE_TEMPLATE/Bug_report.md | 52 +- .github/ISSUE_TEMPLATE/Feature_request.md | 34 +- CHANGELOG.md | 64 +- CONTRIBUTING.md | 2 +- README.md | 3 + accumulator.go | 9 +- agent/accumulator.go | 17 +- agent/accumulator_test.go | 7 +- agent/agent.go | 135 ++- agent/agent_test.go | 65 +- appveyor.yml | 1 + cmd/telegraf/telegraf.go | 15 +- docker-compose.yml | 6 +- docs/CONFIGURATION.md | 21 +- docs/DATA_FORMATS_OUTPUT.md | 1 + docs/LICENSE_OF_DEPENDENCIES.md | 2 + etc/telegraf.conf | 61 +- etc/telegraf_windows.conf | 5 +- internal/config/config.go | 58 +- internal/config/config_test.go | 3 +- .../testdata/single_plugin_env_vars.toml | 2 +- .../invalid-config.conf | 4 - internal/internal.go | 2 + internal/internal_test.go | 34 + internal/models/running_aggregator.go | 35 +- internal/models/running_aggregator_test.go | 10 +- internal/models/running_output.go | 7 + plugins/aggregators/histogram/README.md | 9 +- plugins/aggregators/histogram/histogram.go | 19 +- .../aggregators/histogram/histogram_test.go | 37 +- plugins/inputs/all/all.go | 2 + plugins/inputs/bind/README.md | 118 +++ plugins/inputs/bind/bind.go | 87 ++ plugins/inputs/bind/bind_test.go | 581 +++++++++++ plugins/inputs/bind/json_stats.go | 166 ++++ plugins/inputs/bind/testdata/json/v1/mem | 133 +++ plugins/inputs/bind/testdata/json/v1/net | 241 +++++ plugins/inputs/bind/testdata/json/v1/server | 141 +++ plugins/inputs/bind/testdata/xml/v2 | 926 ++++++++++++++++++ plugins/inputs/bind/testdata/xml/v3/mem | 142 +++ plugins/inputs/bind/testdata/xml/v3/net | 156 +++ plugins/inputs/bind/testdata/xml/v3/server | 328 +++++++ plugins/inputs/bind/xml_stats_v2.go | 168 ++++ plugins/inputs/bind/xml_stats_v3.go | 161 +++ plugins/inputs/consul/consul.go | 7 +- plugins/inputs/diskio/diskio_linux.go | 4 +- plugins/inputs/exec/exec.go | 112 +-- plugins/inputs/exec/exec_test.go | 87 +- plugins/inputs/filecount/filecount.go | 13 +- plugins/inputs/github/README.md | 55 ++ plugins/inputs/github/github.go | 183 ++++ plugins/inputs/github/github_test.go | 119 +++ .../inputs/influxdb_listener/http_listener.go | 11 +- plugins/inputs/phpfpm/README.md | 10 + plugins/inputs/phpfpm/phpfpm.go | 31 +- plugins/inputs/ping/README.md | 5 +- plugins/inputs/procstat/README.md | 4 + plugins/inputs/procstat/process.go | 1 + plugins/inputs/procstat/procstat.go | 14 + plugins/inputs/procstat/procstat_test.go | 4 + plugins/inputs/prometheus/README.md | 5 + plugins/inputs/prometheus/kubernetes.go | 4 +- plugins/inputs/prometheus/prometheus.go | 6 +- plugins/inputs/statsd/statsd.go | 77 +- plugins/inputs/system/README.md | 2 +- plugins/inputs/system/system.go | 20 +- plugins/inputs/vsphere/vsphere_test.go | 64 -- plugins/outputs/file/file.go | 22 +- plugins/outputs/influxdb_v2/influxdb.go | 2 +- .../prometheus_client/prometheus_client.go | 33 +- .../prometheus_client_test.go | 6 +- plugins/outputs/wavefront/README.md | 4 + plugins/outputs/wavefront/wavefront.go | 17 + plugins/outputs/wavefront/wavefront_test.go | 47 + plugins/parsers/csv/parser.go | 2 +- plugins/parsers/grok/README.md | 14 +- plugins/parsers/grok/influx_patterns.go | 41 +- plugins/parsers/grok/parser.go | 2 +- plugins/parsers/grok/parser_test.go | 21 + plugins/parsers/influx/handler.go | 4 + plugins/parsers/influx/parser.go | 2 + plugins/parsers/logfmt/README.md | 4 - plugins/parsers/nagios/parser.go | 144 ++- plugins/parsers/nagios/parser_test.go | 544 ++++++++-- plugins/serializers/registry.go | 14 + plugins/serializers/wavefront/README.md | 47 + plugins/serializers/wavefront/wavefront.go | 202 ++++ .../serializers/wavefront/wavefront_test.go | 295 ++++++ scripts/build.py | 6 +- scripts/init.sh | 20 +- scripts/post-install.sh | 12 +- testutil/accumulator.go | 3 +- 92 files changed, 5818 insertions(+), 598 deletions(-) delete mode 100644 internal/config/testdata/subconfig/..4984_10_04_08_28_06.119/invalid-config.conf create mode 100644 plugins/inputs/bind/README.md create mode 100644 plugins/inputs/bind/bind.go create mode 100644 plugins/inputs/bind/bind_test.go create mode 100644 plugins/inputs/bind/json_stats.go create mode 100644 plugins/inputs/bind/testdata/json/v1/mem create mode 100644 plugins/inputs/bind/testdata/json/v1/net create mode 100644 plugins/inputs/bind/testdata/json/v1/server create mode 100644 plugins/inputs/bind/testdata/xml/v2 create mode 100644 plugins/inputs/bind/testdata/xml/v3/mem create mode 100644 plugins/inputs/bind/testdata/xml/v3/net create mode 100644 plugins/inputs/bind/testdata/xml/v3/server create mode 100644 plugins/inputs/bind/xml_stats_v2.go create mode 100644 plugins/inputs/bind/xml_stats_v3.go create mode 100644 plugins/inputs/github/README.md create mode 100644 plugins/inputs/github/github.go create mode 100644 plugins/inputs/github/github_test.go create mode 100644 plugins/serializers/wavefront/README.md create mode 100755 plugins/serializers/wavefront/wavefront.go create mode 100755 plugins/serializers/wavefront/wavefront_test.go diff --git a/.github/ISSUE_TEMPLATE/Bug_report.md b/.github/ISSUE_TEMPLATE/Bug_report.md index b84aad767b729..49cfdefe3d0ec 100644 --- a/.github/ISSUE_TEMPLATE/Bug_report.md +++ b/.github/ISSUE_TEMPLATE/Bug_report.md @@ -1,24 +1,28 @@ ---- -name: Bug report -about: Create a report to help us improve - ---- - -### Relevant telegraf.conf: - -### System info: - -[Include Telegraf version, operating system name, and other relevant details] - -### Steps to reproduce: - -1. ... -2. ... - -### Expected behavior: - -### Actual behavior: - -### Additional info: - -[Include gist of relevant config, logs, etc.] +--- +name: Bug report +about: Create a report to help us improve + +--- + +### Relevant telegraf.conf: + +```toml + +``` + +### System info: + + + +### Steps to reproduce: + +1. ... +2. ... + +### Expected behavior: + +### Actual behavior: + +### Additional info: + + diff --git a/.github/ISSUE_TEMPLATE/Feature_request.md b/.github/ISSUE_TEMPLATE/Feature_request.md index 84d45fcd6deab..20aba04bec860 100644 --- a/.github/ISSUE_TEMPLATE/Feature_request.md +++ b/.github/ISSUE_TEMPLATE/Feature_request.md @@ -1,17 +1,17 @@ ---- -name: Feature request -about: Suggest an idea for this project - ---- - -## Feature Request - -Opening a feature request kicks off a discussion. - -### Proposal: - -### Current behavior: - -### Desired behavior: - -### Use case: [Why is this important (helps with prioritizing requests)] +--- +name: Feature request +about: Suggest an idea for this project + +--- + +## Feature Request + +Opening a feature request kicks off a discussion. + +### Proposal: + +### Current behavior: + +### Desired behavior: + +### Use case: diff --git a/CHANGELOG.md b/CHANGELOG.md index b99ab556a1d38..40faa609c546b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,71 @@ ## v1.11 [unreleased] +#### Release Notes + +- The `uptime_format` field in the system input has been deprecated, use the + `uptime` field instead. + +#### New Inputs + +- [bind](/plugins/inputs/bind/README.md) - Contributed by @dswarbrick & @danielllek +- [github](/plugins/inputs/github/README.md) - Contributed by @influxdata + +#### New Serializers + +- [wavefront](/plugins/serializers/wavefront/README.md) - Contributed by @puckpuck + #### Features - [#5556](https://github.com/influxdata/telegraf/pull/5556): Add TTL field to ping input. - [#5569](https://github.com/influxdata/telegraf/pull/5569): Add hexadecimal string to integer conversion to converter processor. +- [#5601](https://github.com/influxdata/telegraf/pull/5601): Add support for multiple line text and perfdata to nagios parser. +- [#5648](https://github.com/influxdata/telegraf/pull/5648): Allow env vars ${} expansion syntax in configuration file. +- [#5641](https://github.com/influxdata/telegraf/pull/5641): Add option to reset buckets on flush to histogram aggregator. +- [#5664](https://github.com/influxdata/telegraf/pull/5664): Add option to use strict sanitization rules to wavefront output. +- [#5697](https://github.com/influxdata/telegraf/pull/5697): Add namespace restriction to prometheus input plugin. +- [#5681](https://github.com/influxdata/telegraf/pull/5681): Add cmdline tag to procstat input. +- [#5704](https://github.com/influxdata/telegraf/pull/5704): Support verbose query param in ping endpoint of influxdb_listener. +- [#5713](https://github.com/influxdata/telegraf/pull/5713): Enhance HTTP connection options for phpfpm input plugin. + +#### Bugfixes +- [#5631](https://github.com/influxdata/telegraf/pull/5631): Create Windows service only when specified or in service manager. +- [#5730](https://github.com/influxdata/telegraf/pull/5730): Don't start telegraf when stale pidfile found. + +## v1.10.3 [2019-04-16] + +#### Bugfixes + +- [#5680](https://github.com/influxdata/telegraf/pull/5680): Allow colons in metric names in prometheus_client output. +- [#5716](https://github.com/influxdata/telegraf/pull/5716): Set log directory attributes in rpm spec. + +## v1.10.2 [2019-04-02] + +#### Release Notes + +- String fields no longer have leading and trailing quotation marks removed in + the grok parser. If you are capturing quoted strings you may need to update + the patterns. + +#### Bugfixes -## v1.10.1 [unreleased] +- [#5612](https://github.com/influxdata/telegraf/pull/5612): Fix deadlock when Telegraf is aligning aggregators. +- [#5523](https://github.com/influxdata/telegraf/issues/5523): Fix missing cluster stats in ceph input. +- [#5566](https://github.com/influxdata/telegraf/pull/5566): Fix reading major and minor block devices identifiers in diskio input. +- [#5607](https://github.com/influxdata/telegraf/pull/5607): Add owned directories to rpm package spec. +- [#4998](https://github.com/influxdata/telegraf/issues/4998): Fix last character removed from string field in grok parser. +- [#5632](https://github.com/influxdata/telegraf/pull/5632): Fix drop tracking of metrics removed with aggregator drop_original. +- [#5540](https://github.com/influxdata/telegraf/pull/5540): Fix open file error handling in file output. +- [#5626](https://github.com/influxdata/telegraf/issues/5626): Fix plugin name in influxdb_v2 output logging. +- [#5621](https://github.com/influxdata/telegraf/issues/5621): Fix basedir check and parent dir extraction in filecount input. +- [#5618](https://github.com/influxdata/telegraf/issues/5618): Listen before leaving start in statsd. +- [#5595](https://github.com/influxdata/telegraf/issues/5595): Fix aggregator window alignment. +- [#5637](https://github.com/influxdata/telegraf/issues/5637): Fix panic during shutdown of multiple aggregators. +- [#5642](https://github.com/influxdata/telegraf/issues/5642): Fix parsing of kube config certificate-authority-data in prometheus input. +- [#5636](https://github.com/influxdata/telegraf/issues/5636): Fix tags applied to wrong metric on parse error. +- [#5522](https://github.com/influxdata/telegraf/issues/5522): Remove tags that would create invalid label names in prometheus output. + +## v1.10.1 [2019-03-19] #### Bugfixes @@ -14,6 +73,9 @@ - [#5543](https://github.com/influxdata/telegraf/pull/5543): Add Base64-encoding/decoding for Google Cloud PubSub plugins. - [#5565](https://github.com/influxdata/telegraf/issues/5565): Fix type compatibility in vsphere plugin with use_int_samples option. - [#5492](https://github.com/influxdata/telegraf/issues/5492): Fix vsphere input shows failed task in vCenter. +- [#5530](https://github.com/influxdata/telegraf/issues/5530): Fix invalid measurement name and skip column in csv parser. +- [#5589](https://github.com/influxdata/telegraf/issues/5589): Fix system input causing high cpu usage on Raspbian. +- [#5575](https://github.com/influxdata/telegraf/issues/5575): Don't add empty healthcheck tags to consul input. ## v1.10 [2019-03-05] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0015cd5eb529a..badf71c120eb3 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -51,7 +51,7 @@ make test Running the integration tests requires several docker containers to be running. You can start the containers with: ``` -make docker-run +docker-compose up ``` And run the full test suite with: diff --git a/README.md b/README.md index 8d08c6dfd28a6..758d7acb0dddf 100644 --- a/README.md +++ b/README.md @@ -139,6 +139,7 @@ For documentation on the latest development code see the [documentation index][d * [aws cloudwatch](./plugins/inputs/cloudwatch) * [bcache](./plugins/inputs/bcache) * [beanstalkd](./plugins/inputs/beanstalkd) +* [bind](./plugins/inputs/bind) * [bond](./plugins/inputs/bond) * [burrow](./plugins/inputs/burrow) * [cassandra](./plugins/inputs/cassandra) (deprecated, use [jolokia2](./plugins/inputs/jolokia2)) @@ -168,6 +169,7 @@ For documentation on the latest development code see the [documentation index][d * [filestat](./plugins/inputs/filestat) * [filecount](./plugins/inputs/filecount) * [fluentd](./plugins/inputs/fluentd) +* [github](./plugins/inputs/github) * [graylog](./plugins/inputs/graylog) * [haproxy](./plugins/inputs/haproxy) * [hddtemp](./plugins/inputs/hddtemp) @@ -309,6 +311,7 @@ For documentation on the latest development code see the [documentation index][d - [ServiceNow](/plugins/serializers/nowmetric) - [SplunkMetric](/plugins/serializers/splunkmetric) - [Carbon2](/plugins/serializers/carbon2) +- [Wavefront](/plugins/serializers/wavefront) ## Processor Plugins diff --git a/accumulator.go b/accumulator.go index 825455c4c7063..1ea5737a84a99 100644 --- a/accumulator.go +++ b/accumulator.go @@ -41,11 +41,10 @@ type Accumulator interface { // AddMetric adds an metric to the accumulator. AddMetric(Metric) - // SetPrecision takes two time.Duration objects. If the first is non-zero, - // it sets that as the precision. Otherwise, it takes the second argument - // as the order of time that the metrics should be rounded to, with the - // maximum being 1s. - SetPrecision(precision, interval time.Duration) + // SetPrecision sets the timestamp rounding precision. All metrics addeds + // added to the accumulator will have their timestamp rounded to the + // nearest multiple of precision. + SetPrecision(precision time.Duration) // Report an error. AddError(err error) diff --git a/agent/accumulator.go b/agent/accumulator.go index 0533a06e2deff..9e0bb11ca0cb8 100644 --- a/agent/accumulator.go +++ b/agent/accumulator.go @@ -114,21 +114,8 @@ func (ac *accumulator) AddError(err error) { log.Printf("E! [%s]: Error in plugin: %v", ac.maker.Name(), err) } -func (ac *accumulator) SetPrecision(precision, interval time.Duration) { - if precision > 0 { - ac.precision = precision - return - } - switch { - case interval >= time.Second: - ac.precision = time.Second - case interval >= time.Millisecond: - ac.precision = time.Millisecond - case interval >= time.Microsecond: - ac.precision = time.Microsecond - default: - ac.precision = time.Nanosecond - } +func (ac *accumulator) SetPrecision(precision time.Duration) { + ac.precision = precision } func (ac *accumulator) getTime(t []time.Time) time.Time { diff --git a/agent/accumulator_test.go b/agent/accumulator_test.go index 316ad124b8147..933821701c5e5 100644 --- a/agent/accumulator_test.go +++ b/agent/accumulator_test.go @@ -74,7 +74,6 @@ func TestSetPrecision(t *testing.T) { name string unset bool precision time.Duration - interval time.Duration timestamp time.Time expected time.Time }{ @@ -86,13 +85,13 @@ func TestSetPrecision(t *testing.T) { }, { name: "second interval", - interval: time.Second, + precision: time.Second, timestamp: time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC), expected: time.Date(2006, time.February, 10, 12, 0, 0, 0, time.UTC), }, { name: "microsecond interval", - interval: time.Microsecond, + precision: time.Microsecond, timestamp: time.Date(2006, time.February, 10, 12, 0, 0, 82912748, time.UTC), expected: time.Date(2006, time.February, 10, 12, 0, 0, 82913000, time.UTC), }, @@ -109,7 +108,7 @@ func TestSetPrecision(t *testing.T) { a := NewAccumulator(&TestMetricMaker{}, metrics) if !tt.unset { - a.SetPrecision(tt.precision, tt.interval) + a.SetPrecision(tt.precision) } a.AddFields("acctest", diff --git a/agent/agent.go b/agent/agent.go index ec9aa7f32acc8..2687bbc0f2eb0 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -129,10 +129,7 @@ func (a *Agent) Run(ctx context.Context) error { wg.Wait() log.Printf("D! [agent] Closing outputs") - err = a.closeOutputs() - if err != nil { - return err - } + a.closeOutputs() log.Printf("D! [agent] Stopped Successfully") return nil @@ -183,8 +180,7 @@ func (a *Agent) Test(ctx context.Context) error { } acc := NewAccumulator(input, metricC) - acc.SetPrecision(a.Config.Agent.Precision.Duration, - a.Config.Agent.Interval.Duration) + acc.SetPrecision(a.Precision()) input.SetDefaultTags(a.Config.Tags) // Special instructions for some inputs. cpu, for example, needs to be @@ -192,8 +188,7 @@ func (a *Agent) Test(ctx context.Context) error { switch input.Name() { case "inputs.cpu", "inputs.mongodb", "inputs.procstat": nulAcc := NewAccumulator(input, nulC) - nulAcc.SetPrecision(a.Config.Agent.Precision.Duration, - a.Config.Agent.Interval.Duration) + nulAcc.SetPrecision(a.Precision()) if err := input.Input.Gather(nulAcc); err != nil { return err } @@ -225,7 +220,6 @@ func (a *Agent) runInputs( var wg sync.WaitGroup for _, input := range a.Config.Inputs { interval := a.Config.Agent.Interval.Duration - precision := a.Config.Agent.Precision.Duration jitter := a.Config.Agent.CollectionJitter.Duration // Overwrite agent interval if this plugin has its own. @@ -234,7 +228,7 @@ func (a *Agent) runInputs( } acc := NewAccumulator(input, dst) - acc.SetPrecision(precision, interval) + acc.SetPrecision(a.Precision()) wg.Add(1) go func(input *models.RunningInput) { @@ -342,10 +336,27 @@ func (a *Agent) applyProcessors(m telegraf.Metric) []telegraf.Metric { return metrics } -// runAggregators triggers the periodic push for Aggregators. +func updateWindow(start time.Time, roundInterval bool, period time.Duration) (time.Time, time.Time) { + var until time.Time + if roundInterval { + until = internal.AlignTime(start, period) + if until == start { + until = internal.AlignTime(start.Add(time.Nanosecond), period) + } + } else { + until = start.Add(period) + } + + since := until.Add(-period) + + return since, until +} + +// runAggregators adds metrics to the aggregators and triggers their periodic +// push call. // -// When the context is done a final push will occur and then this function -// will return. +// Runs until src is closed and all metrics have been processed. Will call +// push one final time before returning. func (a *Agent) runAggregators( startTime time.Time, src <-chan telegraf.Metric, @@ -353,6 +364,13 @@ func (a *Agent) runAggregators( ) error { ctx, cancel := context.WithCancel(context.Background()) + // Before calling Add, initialize the aggregation window. This ensures + // that any metric created after start time will be aggregated. + for _, agg := range a.Config.Aggregators { + since, until := updateWindow(startTime, a.Config.Agent.RoundInterval, agg.Period()) + agg.UpdateWindow(since, until) + } + var wg sync.WaitGroup wg.Add(1) go func() { @@ -367,36 +385,33 @@ func (a *Agent) runAggregators( if !dropOriginal { dst <- metric + } else { + metric.Drop() } } cancel() }() - precision := a.Config.Agent.Precision.Duration - interval := a.Config.Agent.Interval.Duration aggregations := make(chan telegraf.Metric, 100) - for _, agg := range a.Config.Aggregators { - wg.Add(1) - go func(agg *models.RunningAggregator) { - defer wg.Done() + wg.Add(1) + go func() { + defer wg.Done() - if a.Config.Agent.RoundInterval { - // Aggregators are aligned to the agent interval regardless of - // their period. - err := internal.SleepContext(ctx, internal.AlignDuration(startTime, interval)) - if err != nil { - return - } - } + var aggWg sync.WaitGroup + for _, agg := range a.Config.Aggregators { + aggWg.Add(1) + go func(agg *models.RunningAggregator) { + defer aggWg.Done() - agg.SetPeriodStart(startTime) + acc := NewAccumulator(agg, aggregations) + acc.SetPrecision(a.Precision()) + a.push(ctx, agg, acc) + }(agg) + } - acc := NewAccumulator(agg, aggregations) - acc.SetPrecision(precision, interval) - a.push(ctx, agg, acc) - close(aggregations) - }(agg) - } + aggWg.Wait() + close(aggregations) + }() for metric := range aggregations { metrics := a.applyProcessors(metric) @@ -409,34 +424,35 @@ func (a *Agent) runAggregators( return nil } -// push runs the push for a single aggregator every period. More simple than -// the output/input version as timeout should be less likely.... not really -// because the output channel can block for now. +// push runs the push for a single aggregator every period. func (a *Agent) push( ctx context.Context, aggregator *models.RunningAggregator, acc telegraf.Accumulator, ) { - ticker := time.NewTicker(aggregator.Period()) - defer ticker.Stop() - for { + // Ensures that Push will be called for each period, even if it has + // already elapsed before this function is called. This is guaranteed + // because so long as only Push updates the EndPeriod. This method + // also avoids drift by not using a ticker. + until := time.Until(aggregator.EndPeriod()) + select { - case <-ticker.C: + case <-time.After(until): + aggregator.Push(acc) break case <-ctx.Done(): aggregator.Push(acc) return } - - aggregator.Push(acc) } } // runOutputs triggers the periodic write for Outputs. // -// When the context is done, outputs continue to run until their buffer is -// closed, afterwich they run flush once more. + +// Runs until src is closed and all metrics have been processed. Will call +// Write one final time before returning. func (a *Agent) runOutputs( startTime time.Time, src <-chan telegraf.Metric, @@ -587,12 +603,10 @@ func (a *Agent) connectOutputs(ctx context.Context) error { } // closeOutputs closes all outputs. -func (a *Agent) closeOutputs() error { - var err error +func (a *Agent) closeOutputs() { for _, output := range a.Config.Outputs { - err = output.Output.Close() + output.Close() } - return err } // startServiceInputs starts all service inputs. @@ -609,7 +623,7 @@ func (a *Agent) startServiceInputs( // Gather() accumulator does apply rounding according to the // precision agent setting. acc := NewAccumulator(input, dst) - acc.SetPrecision(time.Nanosecond, 0) + acc.SetPrecision(time.Nanosecond) err := si.Start(acc) if err != nil { @@ -639,6 +653,27 @@ func (a *Agent) stopServiceInputs() { } } +// Returns the rounding precision for metrics. +func (a *Agent) Precision() time.Duration { + precision := a.Config.Agent.Precision.Duration + interval := a.Config.Agent.Interval.Duration + + if precision > 0 { + return precision + } + + switch { + case interval >= time.Second: + return time.Second + case interval >= time.Millisecond: + return time.Millisecond + case interval >= time.Microsecond: + return time.Microsecond + default: + return time.Nanosecond + } +} + // panicRecover displays an error if an input panics. func panicRecover(input *models.RunningInput) { if err := recover(); err != nil { diff --git a/agent/agent_test.go b/agent/agent_test.go index a5920ce1c8a50..c822a236b3084 100644 --- a/agent/agent_test.go +++ b/agent/agent_test.go @@ -2,15 +2,13 @@ package agent import ( "testing" + "time" "github.com/influxdata/telegraf/internal/config" - - // needing to load the plugins _ "github.com/influxdata/telegraf/plugins/inputs/all" - // needing to load the outputs _ "github.com/influxdata/telegraf/plugins/outputs/all" - "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestAgent_OmitHostname(t *testing.T) { @@ -109,3 +107,62 @@ func TestAgent_LoadOutput(t *testing.T) { a, _ = NewAgent(c) assert.Equal(t, 3, len(a.Config.Outputs)) } + +func TestWindow(t *testing.T) { + parse := func(s string) time.Time { + tm, err := time.Parse(time.RFC3339, s) + if err != nil { + panic(err) + } + return tm + } + + tests := []struct { + name string + start time.Time + roundInterval bool + period time.Duration + since time.Time + until time.Time + }{ + { + name: "round with exact alignment", + start: parse("2018-03-27T00:00:00Z"), + roundInterval: true, + period: 30 * time.Second, + since: parse("2018-03-27T00:00:00Z"), + until: parse("2018-03-27T00:00:30Z"), + }, + { + name: "round with alignment needed", + start: parse("2018-03-27T00:00:05Z"), + roundInterval: true, + period: 30 * time.Second, + since: parse("2018-03-27T00:00:00Z"), + until: parse("2018-03-27T00:00:30Z"), + }, + { + name: "no round with exact alignment", + start: parse("2018-03-27T00:00:00Z"), + roundInterval: false, + period: 30 * time.Second, + since: parse("2018-03-27T00:00:00Z"), + until: parse("2018-03-27T00:00:30Z"), + }, + { + name: "no found with alignment needed", + start: parse("2018-03-27T00:00:05Z"), + roundInterval: false, + period: 30 * time.Second, + since: parse("2018-03-27T00:00:05Z"), + until: parse("2018-03-27T00:00:35Z"), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + since, until := updateWindow(tt.start, tt.roundInterval, tt.period) + require.Equal(t, tt.since, since, "since") + require.Equal(t, tt.until, until, "until") + }) + } +} diff --git a/appveyor.yml b/appveyor.yml index 15cdd5664ed02..39ec04425f0be 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -2,6 +2,7 @@ version: "{build}" cache: - C:\Cache + - C:\gopath\pkg\dep\sources -> Gopkg.lock clone_folder: C:\gopath\src\github.com\influxdata\telegraf diff --git a/cmd/telegraf/telegraf.go b/cmd/telegraf/telegraf.go index a3fae740c8c3c..5dd29cef74bf6 100644 --- a/cmd/telegraf/telegraf.go +++ b/cmd/telegraf/telegraf.go @@ -342,7 +342,7 @@ func main() { log.Println("Telegraf version already configured to: " + internal.Version()) } - if runtime.GOOS == "windows" && !(*fRunAsConsole) { + if runtime.GOOS == "windows" && windowsRunAsService() { svcConfig := &service.Config{ Name: *fServiceName, DisplayName: "Telegraf Data Collector Service", @@ -392,3 +392,16 @@ func main() { ) } } + +// Return true if Telegraf should create a Windows service. +func windowsRunAsService() bool { + if *fService != "" { + return true + } + + if *fRunAsConsole { + return false + } + + return !service.Interactive() +} diff --git a/docker-compose.yml b/docker-compose.yml index 5ac47089db975..a5991434bc16e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,7 +17,7 @@ services: - KAFKA_ADVERTISED_HOST_NAME=localhost - KAFKA_ADVERTISED_PORT=9092 - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 - - KAFKA_CREATE_TOPICS="test:1:1" + - KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1 - JAVA_OPTS="-Xms256m -Xmx256m" ports: - "9092:9092" @@ -38,7 +38,7 @@ services: - "3306:3306" memcached: image: memcached - ports: + ports: - "11211:11211" pgbouncer: image: mbed/pgbouncer @@ -90,7 +90,7 @@ services: ports: - "4200:4200" - "4230:4230" - - "5432:5432" + - "6543:5432" command: - crate - -Cnetwork.host=0.0.0.0 diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 157ad023c098c..edb3341456fe2 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -34,10 +34,10 @@ configuration files. ### Environment Variables -Environment variables can be used anywhere in the config file, simply prepend -them with `$`. Replacement occurs before file parsing. For strings -the variable must be within quotes, e.g., `"$STR_VAR"`, for numbers and booleans -they should be unquoted, e.g., `$INT_VAR`, `$BOOL_VAR`. +Environment variables can be used anywhere in the config file, simply surround +them with `${}`. Replacement occurs before file parsing. For strings +the variable must be within quotes, e.g., `"${STR_VAR}"`, for numbers and booleans +they should be unquoted, e.g., `${INT_VAR}`, `${BOOL_VAR}`. When using the `.deb` or `.rpm` packages, you can define environment variables in the `/etc/default/telegraf` file. @@ -55,14 +55,14 @@ INFLUX_PASSWORD="monkey123" `/etc/telegraf.conf`: ```toml [global_tags] - user = "$USER" + user = "${USER}" [[inputs.mem]] [[outputs.influxdb]] - urls = ["$INFLUX_URL"] - skip_database_creation = $INFLUX_SKIP_DATABASE_CREATION - password = "$INFLUX_PASSWORD" + urls = ["${INFLUX_URL}"] + skip_database_creation = ${INFLUX_SKIP_DATABASE_CREATION} + password = "${INFLUX_PASSWORD}" ``` The above files will produce the following effective configuration file to be @@ -112,10 +112,7 @@ The agent table configures Telegraf and the defaults used across all plugins. This controls the size of writes that Telegraf sends to output plugins. - **metric_buffer_limit**: - For failed writes, telegraf will cache metric_buffer_limit metrics for each - output, and will flush this buffer on a successful write. Oldest metrics - are dropped first when this buffer fills. - This buffer only fills when writes fail to output plugin(s). + Maximum number of unwritten metrics per output. - **collection_jitter**: Collection jitter is used to jitter the collection by a random [interval][]. diff --git a/docs/DATA_FORMATS_OUTPUT.md b/docs/DATA_FORMATS_OUTPUT.md index 3ee16524d2905..f3ac028b980d6 100644 --- a/docs/DATA_FORMATS_OUTPUT.md +++ b/docs/DATA_FORMATS_OUTPUT.md @@ -9,6 +9,7 @@ plugins. 1. [Graphite](/plugins/serializers/graphite) 1. [SplunkMetric](/plugins/serializers/splunkmetric) 1. [Carbon2](/plugins/serializers/carbon2) +1. [Wavefront](/plugins/serializers/wavefront) You will be able to identify the plugins with support by the presence of a `data_format` config option, for example, in the `file` output plugin: diff --git a/docs/LICENSE_OF_DEPENDENCIES.md b/docs/LICENSE_OF_DEPENDENCIES.md index 485b758a40af7..5b6faf4c9eb6f 100644 --- a/docs/LICENSE_OF_DEPENDENCIES.md +++ b/docs/LICENSE_OF_DEPENDENCIES.md @@ -43,6 +43,8 @@ following works: - github.com/golang/protobuf [BSD 3-Clause "New" or "Revised" License](https://github.com/golang/protobuf/blob/master/LICENSE) - github.com/golang/snappy [BSD 3-Clause "New" or "Revised" License](https://github.com/golang/snappy/blob/master/LICENSE) - github.com/google/go-cmp [BSD 3-Clause "New" or "Revised" License](https://github.com/google/go-cmp/blob/master/LICENSE) +- github.com/google/go-github [BSD 3-Clause "New" or "Revised" License](https://github.com/google/go-github/blob/master/LICENSE) +- github.com/google/go-querystring [BSD 3-Clause "New" or "Revised" License](https://github.com/google/go-querystring/blob/master/LICENSE) - github.com/google/uuid [BSD 3-Clause "New" or "Revised" License](https://github.com/google/uuid/blob/master/LICENSE) - github.com/googleapis/gax-go [BSD 3-Clause "New" or "Revised" License](https://github.com/googleapis/gax-go/blob/master/LICENSE) - github.com/gorilla/context [BSD 3-Clause "New" or "Revised" License](https://github.com/gorilla/context/blob/master/LICENSE) diff --git a/etc/telegraf.conf b/etc/telegraf.conf index 4c3de469c1cda..8e3264a84a58e 100644 --- a/etc/telegraf.conf +++ b/etc/telegraf.conf @@ -9,9 +9,9 @@ # Use 'telegraf -config telegraf.conf -test' to see what metrics a config # file would generate. # -# Environment variables can be used anywhere in this config file, simply prepend -# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"), -# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) +# Environment variables can be used anywhere in this config file, simply surround +# them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"), +# for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR}) # Global tags can be specified here in key="value" format. @@ -35,10 +35,7 @@ ## This controls the size of writes that Telegraf sends to output plugins. metric_batch_size = 1000 - ## For failed writes, telegraf will cache metric_buffer_limit metrics for each - ## output, and will flush this buffer on a successful write. Oldest metrics - ## are dropped first when this buffer fills. - ## This buffer only fills when writes fail to output plugin(s). + ## Maximum number of unwritten metrics per output. metric_buffer_limit = 10000 ## Collection jitter is used to jitter the collection by a random amount. @@ -1092,6 +1089,10 @@ # ## When true will convert all _ (underscore) characters in final metric name. default is true # #convert_paths = true # +# ## Use Strict rules to sanitize metric and tag names from invalid characters +# ## When enabled forward slash (/) and comma (,) will be accpeted +# #use_strict = false +# # ## Use Regex to sanitize metric and tag names from invalid characters # ## Regex is more thorough, but significantly slower. default is false # #use_regex = false @@ -1352,6 +1353,10 @@ # ## aggregator and will not get sent to the output plugins. # drop_original = false # +# ## If true, the histogram will be reset on flush instead +# ## of accumulating the results. +# reset = false +# # ## Example config that aggregates all fields of the metric. # # [[aggregators.histogram.config]] # # ## The set of buckets. @@ -1469,7 +1474,8 @@ # Read metrics about system load & uptime [[inputs.system]] - # no configuration + ## Uncomment to remove deprecated metrics. + # fielddrop = ["uptime_format"] # # Gather ActiveMQ metrics @@ -1586,6 +1592,15 @@ # tubes = ["notifications"] +# # Read BIND nameserver XML statistics +# [[inputs.bind]] +# ## An array of BIND XML statistics URI to gather stats. +# ## Default is "http://localhost:8053/xml/v3". +# # urls = ["http://localhost:8053/xml/v3"] +# # gather_memory_contexts = false +# # gather_views = false + + # # Collect bond interface status, slaves statuses and failures count # [[inputs.bond]] # ## Sets 'proc' directory path @@ -2151,6 +2166,18 @@ # ] +# # Gather repository information from GitHub hosted repositories. +# [[inputs.github]] +# ## List of repositories to monitor. +# repositories = ["influxdata/telegraf"] +# +# ## Github API access token. Unauthenticated requests are limited to 60 per hour. +# # access_token = "" +# +# ## Timeout for HTTP requests. +# # http_timeout = "5s" + + # # Read flattened metrics from one or more GrayLog HTTP endpoints # [[inputs.graylog]] # ## API endpoint, currently supported API: @@ -3260,9 +3287,19 @@ # ## "fcgi://10.0.0.12:9000/status" # ## "cgi://10.0.10.12:9001/status" # ## -# ## Example of multiple gathering from local socket and remove host +# ## Example of multiple gathering from local socket and remote host # ## urls = ["http://192.168.1.20/status", "/tmp/fpm.sock"] # urls = ["http://localhost/status"] +# +# ## Duration allowed to complete HTTP requests. +# # timeout = "5s" +# +# ## Optional TLS Config +# # tls_ca = "/etc/telegraf/ca.pem" +# # tls_cert = "/etc/telegraf/cert.pem" +# # tls_key = "/etc/telegraf/key.pem" +# ## Use TLS but skip chain & host verification +# # insecure_skip_verify = false # # Ping given url(s) and return statistics @@ -3334,6 +3371,9 @@ # ## Field name prefix # # prefix = "" # +# ## When true add the full cmdline as a tag. +# # cmdline_tag = false +# # ## Add PID as a tag instead of a field; useful to differentiate between # ## processes whose tags are otherwise the same. Can create a large number # ## of series, use judiciously. @@ -4853,6 +4893,9 @@ # ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation. # ## - prometheus.io/port: If port is not 9102 use this annotation # # monitor_kubernetes_pods = true +# ## Restricts Kubernetes monitoring to a single namespace +# ## ex: monitor_kubernetes_pods_namespace = "default" +# # monitor_kubernetes_pods_namespace = "" # # ## Use bearer token for authorization. ('bearer_token' takes priority) # # bearer_token = "/path/to/bearer/token" diff --git a/etc/telegraf_windows.conf b/etc/telegraf_windows.conf index f0bfbdba0bff3..3263eea11fc31 100644 --- a/etc/telegraf_windows.conf +++ b/etc/telegraf_windows.conf @@ -35,10 +35,7 @@ ## This controls the size of writes that Telegraf sends to output plugins. metric_batch_size = 1000 - ## For failed writes, telegraf will cache metric_buffer_limit metrics for each - ## output, and will flush this buffer on a successful write. Oldest metrics - ## are dropped first when this buffer fills. - ## This buffer only fills when writes fail to output plugin(s). + ## Maximum number of unwritten metrics per output. metric_buffer_limit = 10000 ## Collection jitter is used to jitter the collection by a random amount. diff --git a/internal/config/config.go b/internal/config/config.go index 1c47b1535b9dc..4f747113f6fa8 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -40,7 +40,7 @@ var ( outputDefaults = []string{"influxdb"} // envVarRe is a regex to find environment variables in the config file - envVarRe = regexp.MustCompile(`\$\w+`) + envVarRe = regexp.MustCompile(`\$\{(\w+)\}|\$(\w+)`) envVarEscaper = strings.NewReplacer( `"`, `\"`, @@ -208,9 +208,9 @@ var header = `# Telegraf Configuration # Use 'telegraf -config telegraf.conf -test' to see what metrics a config # file would generate. # -# Environment variables can be used anywhere in this config file, simply prepend -# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"), -# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) +# Environment variables can be used anywhere in this config file, simply surround +# them with ${}. For strings the variable must be within quotes (ie, "${STR_VAR}"), +# for numbers and booleans they should be plain (ie, ${INT_VAR}, ${BOOL_VAR}) # Global tags can be specified here in key="value" format. @@ -234,10 +234,7 @@ var header = `# Telegraf Configuration ## This controls the size of writes that Telegraf sends to output plugins. metric_batch_size = 1000 - ## For failed writes, telegraf will cache metric_buffer_limit metrics for each - ## output, and will flush this buffer on a successful write. Oldest metrics - ## are dropped first when this buffer fills. - ## This buffer only fills when writes fail to output plugin(s). + ## Maximum number of unwritten metrics per output. metric_buffer_limit = 10000 ## Collection jitter is used to jitter the collection by a random amount. @@ -787,12 +784,25 @@ func fetchConfig(u *url.URL) ([]byte, error) { func parseConfig(contents []byte) (*ast.Table, error) { contents = trimBOM(contents) - env_vars := envVarRe.FindAll(contents, -1) - for _, env_var := range env_vars { + parameters := envVarRe.FindAllSubmatch(contents, -1) + for _, parameter := range parameters { + if len(parameter) != 3 { + continue + } + + var env_var []byte + if parameter[1] != nil { + env_var = parameter[1] + } else if parameter[2] != nil { + env_var = parameter[2] + } else { + continue + } + env_val, ok := os.LookupEnv(strings.TrimPrefix(string(env_var), "$")) if ok { env_val = escapeEnv(env_val) - contents = bytes.Replace(contents, env_var, []byte(env_val), 1) + contents = bytes.Replace(contents, parameter[0], []byte(env_val), 1) } } @@ -1797,6 +1807,30 @@ func buildSerializer(name string, tbl *ast.Table) (serializers.Serializer, error } } + if node, ok := tbl.Fields["wavefront_source_override"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if ary, ok := kv.Value.(*ast.Array); ok { + for _, elem := range ary.Value { + if str, ok := elem.(*ast.String); ok { + c.WavefrontSourceOverride = append(c.WavefrontSourceOverride, str.Value) + } + } + } + } + } + + if node, ok := tbl.Fields["wavefront_use_strict"]; ok { + if kv, ok := node.(*ast.KeyValue); ok { + if b, ok := kv.Value.(*ast.Boolean); ok { + var err error + c.WavefrontUseStrict, err = b.Boolean() + if err != nil { + return nil, err + } + } + } + } + delete(tbl.Fields, "influx_max_line_bytes") delete(tbl.Fields, "influx_sort_fields") delete(tbl.Fields, "influx_uint_support") @@ -1806,6 +1840,8 @@ func buildSerializer(name string, tbl *ast.Table) (serializers.Serializer, error delete(tbl.Fields, "template") delete(tbl.Fields, "json_timestamp_units") delete(tbl.Fields, "splunkmetric_hec_routing") + delete(tbl.Fields, "wavefront_source_override") + delete(tbl.Fields, "wavefront_use_strict") return serializers.NewSerializer(c) } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index cd7d2301cb55d..77b0dffd40992 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -11,7 +11,6 @@ import ( "github.com/influxdata/telegraf/plugins/inputs/memcached" "github.com/influxdata/telegraf/plugins/inputs/procstat" "github.com/influxdata/telegraf/plugins/parsers" - "github.com/stretchr/testify/assert" ) @@ -28,7 +27,7 @@ func TestConfig_LoadSingleInputWithEnvVars(t *testing.T) { filter := models.Filter{ NameDrop: []string{"metricname2"}, - NamePass: []string{"metricname1"}, + NamePass: []string{"metricname1", "ip_192.168.1.1_name"}, FieldDrop: []string{"other", "stuff"}, FieldPass: []string{"some", "strings"}, TagDrop: []models.TagFilter{ diff --git a/internal/config/testdata/single_plugin_env_vars.toml b/internal/config/testdata/single_plugin_env_vars.toml index 6600a77b3bae8..b1f71ea8adb78 100644 --- a/internal/config/testdata/single_plugin_env_vars.toml +++ b/internal/config/testdata/single_plugin_env_vars.toml @@ -1,6 +1,6 @@ [[inputs.memcached]] servers = ["$MY_TEST_SERVER"] - namepass = ["metricname1"] + namepass = ["metricname1", "ip_${MY_TEST_SERVER}_name"] namedrop = ["metricname2"] fieldpass = ["some", "strings"] fielddrop = ["other", "stuff"] diff --git a/internal/config/testdata/subconfig/..4984_10_04_08_28_06.119/invalid-config.conf b/internal/config/testdata/subconfig/..4984_10_04_08_28_06.119/invalid-config.conf deleted file mode 100644 index aee9abdfe93bb..0000000000000 --- a/internal/config/testdata/subconfig/..4984_10_04_08_28_06.119/invalid-config.conf +++ /dev/null @@ -1,4 +0,0 @@ -# This invalid config file should be skipped during testing -# as it is an ..data folder - -[[outputs.influxdb diff --git a/internal/internal.go b/internal/internal.go index b373c9c352d2e..133b19e9bd20b 100644 --- a/internal/internal.go +++ b/internal/internal.go @@ -288,11 +288,13 @@ func SleepContext(ctx context.Context, duration time.Duration) error { } // AlignDuration returns the duration until next aligned interval. +// If the current time is aligned a 0 duration is returned. func AlignDuration(tm time.Time, interval time.Duration) time.Duration { return AlignTime(tm, interval).Sub(tm) } // AlignTime returns the time of the next aligned interval. +// If the current time is aligned the current time is returned. func AlignTime(tm time.Time, interval time.Duration) time.Time { truncated := tm.Truncate(interval) if truncated == tm { diff --git a/internal/internal_test.go b/internal/internal_test.go index 681e1f8080ffa..da2fe01c51ee3 100644 --- a/internal/internal_test.go +++ b/internal/internal_test.go @@ -271,6 +271,40 @@ func TestAlignDuration(t *testing.T) { } } +func TestAlignTime(t *testing.T) { + rfc3339 := func(value string) time.Time { + t, _ := time.Parse(time.RFC3339, value) + return t + } + + tests := []struct { + name string + now time.Time + interval time.Duration + expected time.Time + }{ + { + name: "aligned", + now: rfc3339("2018-01-01T01:01:00Z"), + interval: 10 * time.Second, + expected: rfc3339("2018-01-01T01:01:00Z"), + }, + { + name: "aligned", + now: rfc3339("2018-01-01T01:01:01Z"), + interval: 10 * time.Second, + expected: rfc3339("2018-01-01T01:01:10Z"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + actual := AlignTime(tt.now, tt.interval) + require.Equal(t, tt.expected, actual) + }) + } +} + func TestParseTimestamp(t *testing.T) { time, err := ParseTimestamp("2019-02-20 21:50:34.029665", "2006-01-02 15:04:05.000000") assert.Nil(t, err) diff --git a/internal/models/running_aggregator.go b/internal/models/running_aggregator.go index f54b5266e0369..8a2cd576ab0f4 100644 --- a/internal/models/running_aggregator.go +++ b/internal/models/running_aggregator.go @@ -1,6 +1,7 @@ package models import ( + "log" "sync" "time" @@ -74,9 +75,14 @@ func (r *RunningAggregator) Period() time.Duration { return r.Config.Period } -func (r *RunningAggregator) SetPeriodStart(start time.Time) { +func (r *RunningAggregator) EndPeriod() time.Time { + return r.periodEnd +} + +func (r *RunningAggregator) UpdateWindow(start, until time.Time) { r.periodStart = start - r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay) + r.periodEnd = until + log.Printf("D! [%s] Updated aggregation range [%s, %s]", r.Name(), start, until) } func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric { @@ -97,10 +103,6 @@ func (r *RunningAggregator) MakeMetric(metric telegraf.Metric) telegraf.Metric { return m } -func (r *RunningAggregator) metricDropped(metric telegraf.Metric) { - r.MetricsDropped.Incr(1) -} - // Add a metric to the aggregator and return true if the original metric // should be dropped. func (r *RunningAggregator) Add(m telegraf.Metric) bool { @@ -108,22 +110,25 @@ func (r *RunningAggregator) Add(m telegraf.Metric) bool { return false } - // Make a copy of the metric but don't retain tracking; it doesn't make - // sense to fail a metric's delivery due to the aggregation not being - // sent because we can't create aggregations of historical data. + // Make a copy of the metric but don't retain tracking. We do not fail a + // delivery due to the aggregation not being sent because we can't create + // aggregations of historical data. Additionally, waiting for the + // aggregation to be pushed would introduce a hefty latency to delivery. m = metric.FromMetric(m) r.Config.Filter.Modify(m) if len(m.FieldList()) == 0 { - r.metricDropped(m) + r.MetricsFiltered.Incr(1) return r.Config.DropOriginal } r.Lock() defer r.Unlock() - if r.periodStart.IsZero() || m.Time().After(r.periodEnd) { - r.metricDropped(m) + if m.Time().Before(r.periodStart) || m.Time().After(r.periodEnd.Add(r.Config.Delay)) { + log.Printf("D! [%s] metric is outside aggregation window; discarding. %s: m: %s e: %s", + r.Name(), m.Time(), r.periodStart, r.periodEnd) + r.MetricsDropped.Incr(1) return r.Config.DropOriginal } @@ -135,8 +140,10 @@ func (r *RunningAggregator) Push(acc telegraf.Accumulator) { r.Lock() defer r.Unlock() - r.periodStart = r.periodEnd - r.periodEnd = r.periodStart.Add(r.Config.Period).Add(r.Config.Delay) + since := r.periodEnd + until := r.periodEnd.Add(r.Config.Period) + r.UpdateWindow(since, until) + r.push(acc) r.Aggregator.Reset() } diff --git a/internal/models/running_aggregator_test.go b/internal/models/running_aggregator_test.go index 76c7e4e5d75fd..19476eecfbc5a 100644 --- a/internal/models/running_aggregator_test.go +++ b/internal/models/running_aggregator_test.go @@ -23,7 +23,7 @@ func TestAdd(t *testing.T) { acc := testutil.Accumulator{} now := time.Now() - ra.SetPeriodStart(now) + ra.UpdateWindow(now, now.Add(ra.Config.Period)) m := testutil.MustMetric("RITest", map[string]string{}, @@ -51,7 +51,7 @@ func TestAddMetricsOutsideCurrentPeriod(t *testing.T) { require.NoError(t, ra.Config.Filter.Compile()) acc := testutil.Accumulator{} now := time.Now() - ra.SetPeriodStart(now) + ra.UpdateWindow(now, now.Add(ra.Config.Period)) m := testutil.MustMetric("RITest", map[string]string{}, @@ -86,7 +86,7 @@ func TestAddMetricsOutsideCurrentPeriod(t *testing.T) { ra.Push(&acc) require.Equal(t, 1, len(acc.Metrics)) - require.Equal(t, int64(202), acc.Metrics[0].Fields["sum"]) + require.Equal(t, int64(101), acc.Metrics[0].Fields["sum"]) } func TestAddAndPushOnePeriod(t *testing.T) { @@ -102,7 +102,7 @@ func TestAddAndPushOnePeriod(t *testing.T) { acc := testutil.Accumulator{} now := time.Now() - ra.SetPeriodStart(now) + ra.UpdateWindow(now, now.Add(ra.Config.Period)) m := testutil.MustMetric("RITest", map[string]string{}, @@ -129,7 +129,7 @@ func TestAddDropOriginal(t *testing.T) { require.NoError(t, ra.Config.Filter.Compile()) now := time.Now() - ra.SetPeriodStart(now) + ra.UpdateWindow(now, now.Add(ra.Config.Period)) m := testutil.MustMetric("RITest", map[string]string{}, diff --git a/internal/models/running_output.go b/internal/models/running_output.go index 531a3065bb36d..4cec18cc8284f 100644 --- a/internal/models/running_output.go +++ b/internal/models/running_output.go @@ -180,6 +180,13 @@ func (ro *RunningOutput) WriteBatch() error { return nil } +func (ro *RunningOutput) Close() { + err := ro.Output.Close() + if err != nil { + log.Printf("E! [outputs.%s] Error closing output: %v", ro.Name, err) + } +} + func (ro *RunningOutput) write(metrics []telegraf.Metric) error { start := time.Now() err := ro.Output.Write(metrics) diff --git a/plugins/aggregators/histogram/README.md b/plugins/aggregators/histogram/README.md index b4525681e41b9..f9dafd7890e0f 100644 --- a/plugins/aggregators/histogram/README.md +++ b/plugins/aggregators/histogram/README.md @@ -7,8 +7,9 @@ Values added to a bucket are also added to the larger buckets in the distribution. This creates a [cumulative histogram](https://en.wikipedia.org/wiki/Histogram#/media/File:Cumulative_vs_normal_histogram.svg). Like other Telegraf aggregators, the metric is emitted every `period` seconds. -Bucket counts however are not reset between periods and will be non-strictly -increasing while Telegraf is running. +By default bucket counts are not reset between periods and will be non-strictly +increasing while Telegraf is running. This behavior can be changed by setting the +`reset` parameter to true. #### Design @@ -34,6 +35,10 @@ of the algorithm which is implemented in the Prometheus ## aggregator and will not get sent to the output plugins. drop_original = false + ## If true, the histogram will be reset on flush instead + ## of accumulating the results. + reset = false + ## Example config that aggregates all fields of the metric. # [[aggregators.histogram.config]] # ## The set of buckets. diff --git a/plugins/aggregators/histogram/histogram.go b/plugins/aggregators/histogram/histogram.go index a60cede3d7e15..a565d89023ba5 100644 --- a/plugins/aggregators/histogram/histogram.go +++ b/plugins/aggregators/histogram/histogram.go @@ -16,7 +16,8 @@ const bucketInf = "+Inf" // HistogramAggregator is aggregator with histogram configs and particular histograms for defined metrics type HistogramAggregator struct { - Configs []config `toml:"config"` + Configs []config `toml:"config"` + ResetBuckets bool `toml:"reset"` buckets bucketsByMetrics cache map[uint64]metricHistogramCollection @@ -72,6 +73,10 @@ var sampleConfig = ` ## aggregator and will not get sent to the output plugins. drop_original = false + ## If true, the histogram will be reset on flush instead + ## of accumulating the results. + reset = false + ## Example config that aggregates all fields of the metric. # [[aggregators.histogram.config]] # ## The set of buckets. @@ -201,9 +206,15 @@ func (h *HistogramAggregator) groupField( ) } -// Reset does nothing, because we need to collect counts for a long time, otherwise if config parameter 'reset' has -// small value, we will get a histogram with a small amount of the distribution. -func (h *HistogramAggregator) Reset() {} +// Reset does nothing by default, because we typically need to collect counts for a long time. +// Otherwise if config parameter 'reset' has 'true' value, we will get a histogram +// with a small amount of the distribution. However in some use cases a reset is useful. +func (h *HistogramAggregator) Reset() { + if h.ResetBuckets { + h.resetCache() + h.buckets = make(bucketsByMetrics) + } +} // resetCache resets cached counts(hits) in the buckets func (h *HistogramAggregator) resetCache() { diff --git a/plugins/aggregators/histogram/histogram_test.go b/plugins/aggregators/histogram/histogram_test.go index 8c4a2b9d34620..69423583160d1 100644 --- a/plugins/aggregators/histogram/histogram_test.go +++ b/plugins/aggregators/histogram/histogram_test.go @@ -12,8 +12,8 @@ import ( ) // NewTestHistogram creates new test histogram aggregation with specified config -func NewTestHistogram(cfg []config) telegraf.Aggregator { - htm := &HistogramAggregator{Configs: cfg} +func NewTestHistogram(cfg []config, reset bool) telegraf.Aggregator { + htm := &HistogramAggregator{Configs: cfg, ResetBuckets: reset} htm.buckets = make(bucketsByMetrics) htm.resetCache() @@ -69,11 +69,12 @@ func BenchmarkApply(b *testing.B) { func TestHistogramWithPeriodAndOneField(t *testing.T) { var cfg []config cfg = append(cfg, config{Metric: "first_metric_name", Fields: []string{"a"}, Buckets: []float64{0.0, 10.0, 20.0, 30.0, 40.0}}) - histogram := NewTestHistogram(cfg) + histogram := NewTestHistogram(cfg, false) acc := &testutil.Accumulator{} histogram.Add(firstMetric1) + histogram.Reset() histogram.Add(firstMetric2) histogram.Push(acc) @@ -88,12 +89,36 @@ func TestHistogramWithPeriodAndOneField(t *testing.T) { assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(2)}, bucketInf) } +// TestHistogramWithPeriodAndOneField tests metrics for one period and for one field +func TestHistogramWithReset(t *testing.T) { + var cfg []config + cfg = append(cfg, config{Metric: "first_metric_name", Fields: []string{"a"}, Buckets: []float64{0.0, 10.0, 20.0, 30.0, 40.0}}) + histogram := NewTestHistogram(cfg, true) + + acc := &testutil.Accumulator{} + + histogram.Add(firstMetric1) + histogram.Reset() + histogram.Add(firstMetric2) + histogram.Push(acc) + + if len(acc.Metrics) != 6 { + assert.Fail(t, "Incorrect number of metrics") + } + assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(0)}, "0") + assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(0)}, "10") + assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(1)}, "20") + assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(1)}, "30") + assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(1)}, "40") + assertContainsTaggedField(t, acc, "first_metric_name", map[string]interface{}{"a_bucket": int64(1)}, bucketInf) +} + // TestHistogramWithPeriodAndAllFields tests two metrics for one period and for all fields func TestHistogramWithPeriodAndAllFields(t *testing.T) { var cfg []config cfg = append(cfg, config{Metric: "first_metric_name", Buckets: []float64{0.0, 15.5, 20.0, 30.0, 40.0}}) cfg = append(cfg, config{Metric: "second_metric_name", Buckets: []float64{0.0, 4.0, 10.0, 23.0, 30.0}}) - histogram := NewTestHistogram(cfg) + histogram := NewTestHistogram(cfg, false) acc := &testutil.Accumulator{} @@ -127,7 +152,7 @@ func TestHistogramDifferentPeriodsAndAllFields(t *testing.T) { var cfg []config cfg = append(cfg, config{Metric: "first_metric_name", Buckets: []float64{0.0, 10.0, 20.0, 30.0, 40.0}}) - histogram := NewTestHistogram(cfg) + histogram := NewTestHistogram(cfg, false) acc := &testutil.Accumulator{} histogram.Add(firstMetric1) @@ -166,7 +191,7 @@ func TestWrongBucketsOrder(t *testing.T) { var cfg []config cfg = append(cfg, config{Metric: "first_metric_name", Buckets: []float64{0.0, 90.0, 20.0, 30.0, 40.0}}) - histogram := NewTestHistogram(cfg) + histogram := NewTestHistogram(cfg, false) histogram.Add(firstMetric2) } diff --git a/plugins/inputs/all/all.go b/plugins/inputs/all/all.go index 765505c3eab50..7c592e925b0c5 100644 --- a/plugins/inputs/all/all.go +++ b/plugins/inputs/all/all.go @@ -8,6 +8,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/aurora" _ "github.com/influxdata/telegraf/plugins/inputs/bcache" _ "github.com/influxdata/telegraf/plugins/inputs/beanstalkd" + _ "github.com/influxdata/telegraf/plugins/inputs/bind" _ "github.com/influxdata/telegraf/plugins/inputs/bond" _ "github.com/influxdata/telegraf/plugins/inputs/burrow" _ "github.com/influxdata/telegraf/plugins/inputs/cassandra" @@ -38,6 +39,7 @@ import ( _ "github.com/influxdata/telegraf/plugins/inputs/filecount" _ "github.com/influxdata/telegraf/plugins/inputs/filestat" _ "github.com/influxdata/telegraf/plugins/inputs/fluentd" + _ "github.com/influxdata/telegraf/plugins/inputs/github" _ "github.com/influxdata/telegraf/plugins/inputs/graylog" _ "github.com/influxdata/telegraf/plugins/inputs/haproxy" _ "github.com/influxdata/telegraf/plugins/inputs/hddtemp" diff --git a/plugins/inputs/bind/README.md b/plugins/inputs/bind/README.md new file mode 100644 index 0000000000000..34d419d3a1809 --- /dev/null +++ b/plugins/inputs/bind/README.md @@ -0,0 +1,118 @@ +# BIND 9 Nameserver Statistics Input Plugin + +This plugin decodes the JSON or XML statistics provided by BIND 9 nameservers. + +### XML Statistics Channel + +Version 2 statistics (BIND 9.6 - 9.9) and version 3 statistics (BIND 9.9+) are supported. Note that +for BIND 9.9 to support version 3 statistics, it must be built with the `--enable-newstats` compile +flag, and it must be specifically requested via the correct URL. Version 3 statistics are the +default (and only) XML format in BIND 9.10+. + +### JSON Statistics Channel + +JSON statistics schema version 1 (BIND 9.10+) is supported. As of writing, some distros still do +not enable support for JSON statistics in their BIND packages. + +### Configuration: + +- **urls** []string: List of BIND statistics channel URLs to collect from. Do not include a + trailing slash in the URL. Default is "http://localhost:8053/xml/v3". +- **gather_memory_contexts** bool: Report per-context memory statistics. +- **gather_views** bool: Report per-view query statistics. + +The following table summarizes the URL formats which should be used, depending on your BIND +version and configured statistics channel. + +| BIND Version | Statistics Format | Example URL | +| ------------ | ----------------- | ----------------------------- | +| 9.6 - 9.8 | XML v2 | http://localhost:8053 | +| 9.9 | XML v2 | http://localhost:8053/xml/v2 | +| 9.9+ | XML v3 | http://localhost:8053/xml/v3 | +| 9.10+ | JSON v1 | http://localhost:8053/json/v1 | + +#### Configuration of BIND Daemon + +Add the following to your named.conf if running Telegraf on the same host as the BIND daemon: +``` +statistics-channels { + inet 127.0.0.1 port 8053; +}; +``` + +Alternatively, specify a wildcard address (e.g., 0.0.0.0) or specific IP address of an interface to +configure the BIND daemon to listen on that address. Note that you should secure the statistics +channel with an ACL if it is publicly reachable. Consult the BIND Administrator Reference Manual +for more information. + +### Measurements & Fields: + +- bind_counter + - name=value (multiple) +- bind_memory + - total_use + - in_use + - block_size + - context_size + - lost +- bind_memory_context + - total + - in_use + +### Tags: + +- All measurements + - url + - source + - port +- bind_counter + - type + - view (optional) +- bind_memory_context + - id + - name + +### Sample Queries: + +These are some useful queries (to generate dashboards or other) to run against data from this +plugin: + +``` +SELECT non_negative_derivative(mean(/^A$|^PTR$/), 5m) FROM bind_counter \ +WHERE "url" = 'localhost:8053' AND "type" = 'qtype' AND time > now() - 1h \ +GROUP BY time(5m), "type" +``` + +``` +name: bind_counter +tags: type=qtype +time non_negative_derivative_A non_negative_derivative_PTR +---- ------------------------- --------------------------- +1553862000000000000 254.99444444430992 1388.311111111194 +1553862300000000000 354 2135.716666666791 +1553862600000000000 316.8666666666977 2130.133333333768 +1553862900000000000 309.05000000004657 2126.75 +1553863200000000000 315.64999999990687 2128.483333332464 +1553863500000000000 308.9166666667443 2132.350000000559 +1553863800000000000 302.64999999990687 2131.1833333335817 +1553864100000000000 310.85000000009313 2132.449999999255 +1553864400000000000 314.3666666666977 2136.216666666791 +1553864700000000000 303.2333333331626 2133.8166666673496 +1553865000000000000 304.93333333334886 2127.333333333023 +1553865300000000000 317.93333333334886 2130.3166666664183 +1553865600000000000 280.6666666667443 1807.9071428570896 +``` + +### Example Output + +Here is example output of this plugin: + +``` +bind_memory,host=LAP,port=8053,source=localhost,url=localhost:8053 block_size=12058624i,context_size=4575056i,in_use=4113717i,lost=0i,total_use=16663252i 1554276619000000000 +bind_counter,host=LAP,port=8053,source=localhost,type=opcode,url=localhost:8053 IQUERY=0i,NOTIFY=0i,QUERY=9i,STATUS=0i,UPDATE=0i 1554276619000000000 +bind_counter,host=LAP,port=8053,source=localhost,type=rcode,url=localhost:8053 17=0i,18=0i,19=0i,20=0i,21=0i,22=0i,BADCOOKIE=0i,BADVERS=0i,FORMERR=0i,NOERROR=7i,NOTAUTH=0i,NOTIMP=0i,NOTZONE=0i,NXDOMAIN=0i,NXRRSET=0i,REFUSED=0i,RESERVED11=0i,RESERVED12=0i,RESERVED13=0i,RESERVED14=0i,RESERVED15=0i,SERVFAIL=2i,YXDOMAIN=0i,YXRRSET=0i 1554276619000000000 +bind_counter,host=LAP,port=8053,source=localhost,type=qtype,url=localhost:8053 A=1i,ANY=1i,NS=1i,PTR=5i,SOA=1i 1554276619000000000 +bind_counter,host=LAP,port=8053,source=localhost,type=nsstat,url=localhost:8053 AuthQryRej=0i,CookieBadSize=0i,CookieBadTime=0i,CookieIn=9i,CookieMatch=0i,CookieNew=9i,CookieNoMatch=0i,DNS64=0i,ECSOpt=0i,ExpireOpt=0i,KeyTagOpt=0i,NSIDOpt=0i,OtherOpt=0i,QryAuthAns=7i,QryBADCOOKIE=0i,QryDropped=0i,QryDuplicate=0i,QryFORMERR=0i,QryFailure=0i,QryNXDOMAIN=0i,QryNXRedir=0i,QryNXRedirRLookup=0i,QryNoauthAns=0i,QryNxrrset=1i,QryRecursion=2i,QryReferral=0i,QrySERVFAIL=2i,QrySuccess=6i,QryTCP=1i,QryUDP=8i,RPZRewrites=0i,RateDropped=0i,RateSlipped=0i,RecQryRej=0i,RecursClients=0i,ReqBadEDNSVer=0i,ReqBadSIG=0i,ReqEdns0=9i,ReqSIG0=0i,ReqTCP=1i,ReqTSIG=0i,Requestv4=9i,Requestv6=0i,RespEDNS0=9i,RespSIG0=0i,RespTSIG=0i,Response=9i,TruncatedResp=0i,UpdateBadPrereq=0i,UpdateDone=0i,UpdateFail=0i,UpdateFwdFail=0i,UpdateRej=0i,UpdateReqFwd=0i,UpdateRespFwd=0i,XfrRej=0i,XfrReqDone=0i 1554276619000000000 +bind_counter,host=LAP,port=8053,source=localhost,type=zonestat,url=localhost:8053 AXFRReqv4=0i,AXFRReqv6=0i,IXFRReqv4=0i,IXFRReqv6=0i,NotifyInv4=0i,NotifyInv6=0i,NotifyOutv4=0i,NotifyOutv6=0i,NotifyRej=0i,SOAOutv4=0i,SOAOutv6=0i,XfrFail=0i,XfrSuccess=0i 1554276619000000000 +bind_counter,host=LAP,port=8053,source=localhost,type=sockstat,url=localhost:8053 FDWatchClose=0i,FDwatchConn=0i,FDwatchConnFail=0i,FDwatchRecvErr=0i,FDwatchSendErr=0i,FdwatchBindFail=0i,RawActive=1i,RawClose=0i,RawOpen=1i,RawOpenFail=0i,RawRecvErr=0i,TCP4Accept=6i,TCP4AcceptFail=0i,TCP4Active=9i,TCP4BindFail=0i,TCP4Close=5i,TCP4Conn=0i,TCP4ConnFail=0i,TCP4Open=8i,TCP4OpenFail=0i,TCP4RecvErr=0i,TCP4SendErr=0i,TCP6Accept=0i,TCP6AcceptFail=0i,TCP6Active=2i,TCP6BindFail=0i,TCP6Close=0i,TCP6Conn=0i,TCP6ConnFail=0i,TCP6Open=2i,TCP6OpenFail=0i,TCP6RecvErr=0i,TCP6SendErr=0i,UDP4Active=18i,UDP4BindFail=14i,UDP4Close=14i,UDP4Conn=0i,UDP4ConnFail=0i,UDP4Open=32i,UDP4OpenFail=0i,UDP4RecvErr=0i,UDP4SendErr=0i,UDP6Active=3i,UDP6BindFail=0i,UDP6Close=6i,UDP6Conn=0i,UDP6ConnFail=6i,UDP6Open=9i,UDP6OpenFail=0i,UDP6RecvErr=0i,UDP6SendErr=0i,UnixAccept=0i,UnixAcceptFail=0i,UnixActive=0i,UnixBindFail=0i,UnixClose=0i,UnixConn=0i,UnixConnFail=0i,UnixOpen=0i,UnixOpenFail=0i,UnixRecvErr=0i,UnixSendErr=0i 1554276619000000000 +``` diff --git a/plugins/inputs/bind/bind.go b/plugins/inputs/bind/bind.go new file mode 100644 index 0000000000000..967c9031a2634 --- /dev/null +++ b/plugins/inputs/bind/bind.go @@ -0,0 +1,87 @@ +package bind + +import ( + "fmt" + "net/http" + "net/url" + "sync" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/inputs" +) + +type Bind struct { + Urls []string + GatherMemoryContexts bool + GatherViews bool +} + +var sampleConfig = ` + ## An array of BIND XML statistics URI to gather stats. + ## Default is "http://localhost:8053/xml/v3". + # urls = ["http://localhost:8053/xml/v3"] + # gather_memory_contexts = false + # gather_views = false +` + +var client = &http.Client{ + Timeout: time.Duration(4 * time.Second), +} + +func (b *Bind) Description() string { + return "Read BIND nameserver XML statistics" +} + +func (b *Bind) SampleConfig() string { + return sampleConfig +} + +func (b *Bind) Gather(acc telegraf.Accumulator) error { + var wg sync.WaitGroup + + if len(b.Urls) == 0 { + b.Urls = []string{"http://localhost:8053/xml/v3"} + } + + for _, u := range b.Urls { + addr, err := url.Parse(u) + if err != nil { + acc.AddError(fmt.Errorf("Unable to parse address '%s': %s", u, err)) + continue + } + + wg.Add(1) + go func(addr *url.URL) { + defer wg.Done() + acc.AddError(b.gatherUrl(addr, acc)) + }(addr) + } + + wg.Wait() + return nil +} + +func (b *Bind) gatherUrl(addr *url.URL, acc telegraf.Accumulator) error { + switch addr.Path { + case "": + // BIND 9.6 - 9.8 + return b.readStatsXMLv2(addr, acc) + case "/json/v1": + // BIND 9.10+ + return b.readStatsJSON(addr, acc) + case "/xml/v2": + // BIND 9.9 + return b.readStatsXMLv2(addr, acc) + case "/xml/v3": + // BIND 9.9+ + return b.readStatsXMLv3(addr, acc) + default: + return fmt.Errorf("URL %s is ambiguous. Please check plugin documentation for supported URL formats.", + addr) + } +} + +func init() { + inputs.Add("bind", func() telegraf.Input { return &Bind{} }) +} diff --git a/plugins/inputs/bind/bind_test.go b/plugins/inputs/bind/bind_test.go new file mode 100644 index 0000000000000..b961d549db051 --- /dev/null +++ b/plugins/inputs/bind/bind_test.go @@ -0,0 +1,581 @@ +package bind + +import ( + "net" + "net/http" + "net/http/httptest" + "testing" + + "github.com/influxdata/telegraf/testutil" + + "github.com/stretchr/testify/assert" +) + +func TestBindJsonStats(t *testing.T) { + ts := httptest.NewServer(http.FileServer(http.Dir("testdata"))) + url := ts.Listener.Addr().String() + host, port, _ := net.SplitHostPort(url) + defer ts.Close() + + b := Bind{ + Urls: []string{ts.URL + "/json/v1"}, + GatherMemoryContexts: true, + GatherViews: true, + } + + var acc testutil.Accumulator + err := acc.GatherError(b.Gather) + + assert.Nil(t, err) + + // Use subtests for counters, since they are similar structure + type fieldSet struct { + fieldKey string + fieldValue int64 + } + + testCases := []struct { + counterType string + values []fieldSet + }{ + { + "opcode", + []fieldSet{ + {"NOTIFY", 0}, + {"UPDATE", 0}, + {"IQUERY", 0}, + {"QUERY", 13}, + {"STATUS", 0}, + }, + }, + { + "qtype", + []fieldSet{ + {"A", 2}, + {"AAAA", 2}, + {"PTR", 7}, + {"SRV", 2}, + }, + }, + { + "nsstat", + []fieldSet{ + {"QrySuccess", 6}, + {"QryRecursion", 12}, + {"Requestv4", 13}, + {"QryNXDOMAIN", 4}, + {"QryAuthAns", 1}, + {"QryNxrrset", 1}, + {"QryNoauthAns", 10}, + {"QryUDP", 13}, + {"QryDuplicate", 1}, + {"QrySERVFAIL", 1}, + {"Response", 12}, + }, + }, + { + "sockstat", + []fieldSet{ + {"TCP4Open", 118}, + {"UDP6Close", 112}, + {"UDP4Close", 333}, + {"TCP4Close", 119}, + {"TCP6Active", 2}, + {"UDP4Active", 2}, + {"UDP4RecvErr", 1}, + {"UDP4Open", 335}, + {"TCP4Active", 10}, + {"RawActive", 1}, + {"UDP6ConnFail", 112}, + {"TCP4Conn", 114}, + {"UDP6Active", 1}, + {"UDP6Open", 113}, + {"UDP4Conn", 333}, + {"UDP6SendErr", 112}, + {"RawOpen", 1}, + {"TCP4Accept", 6}, + {"TCP6Open", 2}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.counterType, func(t *testing.T) { + tags := map[string]string{ + "url": url, + "type": tc.counterType, + "source": host, + "port": port, + } + + fields := map[string]interface{}{} + + for _, val := range tc.values { + fields[val.fieldKey] = val.fieldValue + } + + acc.AssertContainsTaggedFields(t, "bind_counter", fields, tags) + }) + } + + // Subtest for memory stats + t.Run("memory", func(t *testing.T) { + tags := map[string]string{ + "url": url, + "source": host, + "port": port, + } + + fields := map[string]interface{}{ + "block_size": 13893632, + "context_size": 3685480, + "in_use": 3064368, + "lost": 0, + "total_use": 18206566, + } + + acc.AssertContainsTaggedFields(t, "bind_memory", fields, tags) + }) + + // Subtest for per-context memory stats + t.Run("memory_context", func(t *testing.T) { + assert.True(t, acc.HasIntField("bind_memory_context", "total")) + assert.True(t, acc.HasIntField("bind_memory_context", "in_use")) + }) +} + +func TestBindXmlStatsV2(t *testing.T) { + ts := httptest.NewServer(http.FileServer(http.Dir("testdata"))) + url := ts.Listener.Addr().String() + host, port, _ := net.SplitHostPort(url) + defer ts.Close() + + b := Bind{ + Urls: []string{ts.URL + "/xml/v2"}, + GatherMemoryContexts: true, + GatherViews: true, + } + + var acc testutil.Accumulator + err := acc.GatherError(b.Gather) + + assert.Nil(t, err) + + // Use subtests for counters, since they are similar structure + type fieldSet struct { + fieldKey string + fieldValue int64 + } + + testCases := []struct { + counterType string + values []fieldSet + }{ + { + "opcode", + []fieldSet{ + {"UPDATE", 238}, + {"QUERY", 102312374}, + }, + }, + { + "qtype", + []fieldSet{ + {"ANY", 7}, + {"DNSKEY", 452}, + {"SSHFP", 2987}, + {"SOA", 100415}, + {"AAAA", 37786321}, + {"MX", 441155}, + {"IXFR", 157}, + {"CNAME", 531}, + {"NS", 1999}, + {"TXT", 34628}, + {"A", 58951432}, + {"SRV", 741082}, + {"PTR", 4211487}, + {"NAPTR", 39137}, + {"DS", 584}, + }, + }, + { + "nsstat", + []fieldSet{ + {"XfrReqDone", 157}, + {"ReqEdns0", 441758}, + {"ReqTSIG", 0}, + {"UpdateRespFwd", 0}, + {"RespEDNS0", 441748}, + {"QryDropped", 16}, + {"RPZRewrites", 0}, + {"XfrRej", 0}, + {"RecQryRej", 0}, + {"QryNxrrset", 24423133}, + {"QryFORMERR", 0}, + {"ReqTCP", 1548156}, + {"UpdateDone", 0}, + {"QrySERVFAIL", 14422}, + {"QryRecursion", 2104239}, + {"Requestv4", 102312611}, + {"UpdateFwdFail", 0}, + {"QryReferral", 3}, + {"Response", 102301560}, + {"RespTSIG", 0}, + {"QrySuccess", 63811668}, + {"QryFailure", 0}, + {"RespSIG0", 0}, + {"ReqSIG0", 0}, + {"UpdateRej", 238}, + {"QryAuthAns", 72180718}, + {"UpdateFail", 0}, + {"QryDuplicate", 10879}, + {"RateDropped", 0}, + {"QryNoauthAns", 30106182}, + {"QryNXDOMAIN", 14052096}, + {"ReqBadSIG", 0}, + {"UpdateReqFwd", 0}, + {"RateSlipped", 0}, + {"TruncatedResp", 3787}, + {"Requestv6", 1}, + {"UpdateBadPrereq", 0}, + {"AuthQryRej", 0}, + {"ReqBadEDNSVer", 0}, + }, + }, + { + "sockstat", + []fieldSet{ + {"FdwatchBindFail", 0}, + {"UDP6Open", 238269}, + {"UDP6SendErr", 238250}, + {"TCP4ConnFail", 0}, + {"TCP4Conn", 590}, + {"TCP6AcceptFail", 0}, + {"UDP4SendErr", 0}, + {"FDwatchConn", 0}, + {"TCP4RecvErr", 1}, + {"TCP4OpenFail", 0}, + {"UDP4OpenFail", 0}, + {"UDP6OpenFail", 0}, + {"TCP4Close", 1548268}, + {"TCP6BindFail", 0}, + {"TCP4AcceptFail", 0}, + {"UnixConn", 0}, + {"UDP4Open", 3765532}, + {"TCP6Close", 0}, + {"FDwatchRecvErr", 0}, + {"UDP4Conn", 3764828}, + {"UnixConnFail", 0}, + {"TCP6Conn", 0}, + {"TCP6OpenFail", 0}, + {"TCP6SendErr", 0}, + {"TCP6RecvErr", 0}, + {"FDwatchSendErr", 0}, + {"UDP4RecvErr", 1650}, + {"UDP4ConnFail", 0}, + {"UDP6Close", 238267}, + {"FDWatchClose", 0}, + {"TCP4Accept", 1547672}, + {"UnixAccept", 0}, + {"TCP4Open", 602}, + {"UDP4BindFail", 219}, + {"UDP6ConnFail", 238250}, + {"UnixClose", 0}, + {"TCP4BindFail", 0}, + {"UnixOpenFail", 0}, + {"UDP6BindFail", 16}, + {"UnixOpen", 0}, + {"UnixAcceptFail", 0}, + {"UnixRecvErr", 0}, + {"UDP6RecvErr", 0}, + {"TCP6ConnFail", 0}, + {"FDwatchConnFail", 0}, + {"TCP4SendErr", 0}, + {"UDP4Close", 3765528}, + {"UnixSendErr", 0}, + {"TCP6Open", 2}, + {"UDP6Conn", 1}, + {"TCP6Accept", 0}, + {"UnixBindFail", 0}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.counterType, func(t *testing.T) { + tags := map[string]string{ + "url": url, + "type": tc.counterType, + "source": host, + "port": port, + } + + fields := map[string]interface{}{} + + for _, val := range tc.values { + fields[val.fieldKey] = val.fieldValue + } + + acc.AssertContainsTaggedFields(t, "bind_counter", fields, tags) + }) + } + + // Subtest for memory stats + t.Run("memory", func(t *testing.T) { + tags := map[string]string{ + "url": url, + "source": host, + "port": port, + } + + fields := map[string]interface{}{ + "block_size": 77070336, + "context_size": 6663840, + "in_use": 20772579, + "lost": 0, + "total_use": 81804609, + } + + acc.AssertContainsTaggedFields(t, "bind_memory", fields, tags) + }) + + // Subtest for per-context memory stats + t.Run("memory_context", func(t *testing.T) { + assert.True(t, acc.HasIntField("bind_memory_context", "total")) + assert.True(t, acc.HasIntField("bind_memory_context", "in_use")) + }) +} + +func TestBindXmlStatsV3(t *testing.T) { + ts := httptest.NewServer(http.FileServer(http.Dir("testdata"))) + url := ts.Listener.Addr().String() + host, port, _ := net.SplitHostPort(url) + defer ts.Close() + + b := Bind{ + Urls: []string{ts.URL + "/xml/v3"}, + GatherMemoryContexts: true, + GatherViews: true, + } + + var acc testutil.Accumulator + err := acc.GatherError(b.Gather) + + assert.Nil(t, err) + + // Use subtests for counters, since they are similar structure + type fieldSet struct { + fieldKey string + fieldValue int64 + } + + testCases := []struct { + counterType string + values []fieldSet + }{ + { + "opcode", + []fieldSet{ + {"NOTIFY", 0}, + {"UPDATE", 0}, + {"IQUERY", 0}, + {"QUERY", 74941}, + {"STATUS", 0}, + }, + }, + { + "qtype", + []fieldSet{ + {"ANY", 22}, + {"SOA", 18}, + {"AAAA", 5735}, + {"MX", 618}, + {"NS", 373}, + {"TXT", 970}, + {"A", 63672}, + {"SRV", 139}, + {"PTR", 3393}, + {"RRSIG", 1}, + }, + }, + { + "nsstat", + []fieldSet{ + {"DNS64", 0}, + {"ExpireOpt", 0}, + {"NSIDOpt", 0}, + {"OtherOpt", 59}, + {"XfrReqDone", 0}, + {"ReqEdns0", 9250}, + {"ReqTSIG", 0}, + {"UpdateRespFwd", 0}, + {"RespEDNS0", 9250}, + {"QryDropped", 11}, + {"RPZRewrites", 0}, + {"XfrRej", 0}, + {"RecQryRej", 35}, + {"QryNxrrset", 2452}, + {"QryFORMERR", 0}, + {"ReqTCP", 260}, + {"QryTCP", 258}, + {"QryUDP", 74648}, + {"UpdateDone", 0}, + {"QrySERVFAIL", 122}, + {"QryRecursion", 53750}, + {"RecursClients", 0}, + {"Requestv4", 74942}, + {"UpdateFwdFail", 0}, + {"QryReferral", 0}, + {"Response", 63264}, + {"RespTSIG", 0}, + {"QrySuccess", 49044}, + {"QryFailure", 35}, + {"RespSIG0", 0}, + {"ReqSIG0", 0}, + {"UpdateRej", 0}, + {"QryAuthAns", 2752}, + {"UpdateFail", 0}, + {"QryDuplicate", 11667}, + {"RateDropped", 0}, + {"QryNoauthAns", 60354}, + {"QryNXDOMAIN", 11610}, + {"ReqBadSIG", 0}, + {"UpdateReqFwd", 0}, + {"RateSlipped", 0}, + {"TruncatedResp", 365}, + {"Requestv6", 0}, + {"UpdateBadPrereq", 0}, + {"AuthQryRej", 0}, + {"ReqBadEDNSVer", 0}, + {"SitBadSize", 0}, + {"SitBadTime", 0}, + {"SitMatch", 0}, + {"SitNew", 0}, + {"SitNoMatch", 0}, + {"SitOpt", 0}, + {"TruncatedResp", 365}, + }, + }, + { + "sockstat", + []fieldSet{ + {"FDwatchConnFail", 0}, + {"UnixClose", 0}, + {"TCP6OpenFail", 0}, + {"TCP6Active", 0}, + {"UDP4RecvErr", 14}, + {"TCP6Conn", 0}, + {"FDWatchClose", 0}, + {"TCP4ConnFail", 0}, + {"UnixConn", 0}, + {"UnixSendErr", 0}, + {"UDP6Close", 0}, + {"UnixOpen", 0}, + {"UDP4Conn", 92535}, + {"TCP4Close", 336}, + {"UnixAcceptFail", 0}, + {"UnixAccept", 0}, + {"TCP6AcceptFail", 0}, + {"UDP6Open", 0}, + {"UDP6BindFail", 0}, + {"UDP6RecvErr", 0}, + {"RawOpenFail", 0}, + {"TCP4Accept", 293}, + {"UDP6SendErr", 0}, + {"UDP6Conn", 0}, + {"TCP4SendErr", 0}, + {"UDP4BindFail", 1}, + {"UDP4Active", 4}, + {"TCP4Active", 297}, + {"UnixConnFail", 0}, + {"UnixOpenFail", 0}, + {"UDP6ConnFail", 0}, + {"TCP6Accept", 0}, + {"UnixRecvErr", 0}, + {"RawActive", 1}, + {"UDP6OpenFail", 0}, + {"RawClose", 0}, + {"UnixBindFail", 0}, + {"UnixActive", 0}, + {"FdwatchBindFail", 0}, + {"UDP4SendErr", 0}, + {"RawRecvErr", 0}, + {"TCP6Close", 0}, + {"FDwatchRecvErr", 0}, + {"TCP4BindFail", 0}, + {"TCP4AcceptFail", 0}, + {"TCP4OpenFail", 0}, + {"UDP4Open", 92542}, + {"UDP4ConnFail", 0}, + {"TCP4Conn", 44}, + {"TCP6ConnFail", 0}, + {"FDwatchConn", 0}, + {"UDP6Active", 0}, + {"RawOpen", 1}, + {"TCP6BindFail", 0}, + {"UDP4Close", 92538}, + {"TCP6Open", 0}, + {"TCP6SendErr", 0}, + {"TCP4Open", 48}, + {"FDwatchSendErr", 0}, + {"TCP6RecvErr", 0}, + {"UDP4OpenFail", 0}, + {"TCP4RecvErr", 0}, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.counterType, func(t *testing.T) { + tags := map[string]string{ + "url": url, + "type": tc.counterType, + "source": host, + "port": port, + } + + fields := map[string]interface{}{} + + for _, val := range tc.values { + fields[val.fieldKey] = val.fieldValue + } + + acc.AssertContainsTaggedFields(t, "bind_counter", fields, tags) + }) + } + + // Subtest for memory stats + t.Run("memory", func(t *testing.T) { + tags := map[string]string{ + "url": url, + "source": host, + "port": port, + } + + fields := map[string]interface{}{ + "block_size": 45875200, + "context_size": 10037400, + "in_use": 6000232, + "lost": 0, + "total_use": 777821909, + } + + acc.AssertContainsTaggedFields(t, "bind_memory", fields, tags) + }) + + // Subtest for per-context memory stats + t.Run("memory_context", func(t *testing.T) { + assert.True(t, acc.HasIntField("bind_memory_context", "total")) + assert.True(t, acc.HasIntField("bind_memory_context", "in_use")) + }) +} + +func TestBindUnparseableURL(t *testing.T) { + b := Bind{ + Urls: []string{"://example.com"}, + } + + var acc testutil.Accumulator + err := acc.GatherError(b.Gather) + assert.Contains(t, err.Error(), "Unable to parse address") +} diff --git a/plugins/inputs/bind/json_stats.go b/plugins/inputs/bind/json_stats.go new file mode 100644 index 0000000000000..95c7e6fe893bf --- /dev/null +++ b/plugins/inputs/bind/json_stats.go @@ -0,0 +1,166 @@ +package bind + +import ( + "encoding/json" + "fmt" + "net" + "net/http" + "net/url" + "strings" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/metric" +) + +type jsonStats struct { + OpCodes map[string]int + QTypes map[string]int + NSStats map[string]int + SockStats map[string]int + Views map[string]jsonView + Memory jsonMemory +} + +type jsonMemory struct { + TotalUse int + InUse int + BlockSize int + ContextSize int + Lost int + Contexts []struct { + Id string + Name string + Total int + InUse int + } +} + +type jsonView struct { + Resolver map[string]map[string]int +} + +// addJSONCounter adds a counter array to a Telegraf Accumulator, with the specified tags. +func addJSONCounter(acc telegraf.Accumulator, commonTags map[string]string, stats map[string]int) { + grouper := metric.NewSeriesGrouper() + ts := time.Now() + for name, value := range stats { + if commonTags["type"] == "opcode" && strings.HasPrefix(name, "RESERVED") { + continue + } + + tags := make(map[string]string) + + // Create local copy of tags since maps are reference types + for k, v := range commonTags { + tags[k] = v + } + + grouper.Add("bind_counter", tags, ts, name, value) + } + + //Add grouped metrics + for _, metric := range grouper.Metrics() { + acc.AddMetric(metric) + } +} + +// addStatsJson walks a jsonStats struct and adds the values to the telegraf.Accumulator. +func (b *Bind) addStatsJSON(stats jsonStats, acc telegraf.Accumulator, urlTag string) { + grouper := metric.NewSeriesGrouper() + ts := time.Now() + tags := map[string]string{"url": urlTag} + host, port, _ := net.SplitHostPort(urlTag) + tags["source"] = host + tags["port"] = port + + // Opcodes + tags["type"] = "opcode" + addJSONCounter(acc, tags, stats.OpCodes) + + // Query RDATA types + tags["type"] = "qtype" + addJSONCounter(acc, tags, stats.QTypes) + + // Nameserver stats + tags["type"] = "nsstat" + addJSONCounter(acc, tags, stats.NSStats) + + // Socket statistics + tags["type"] = "sockstat" + addJSONCounter(acc, tags, stats.SockStats) + + // Memory stats + fields := map[string]interface{}{ + "total_use": stats.Memory.TotalUse, + "in_use": stats.Memory.InUse, + "block_size": stats.Memory.BlockSize, + "context_size": stats.Memory.ContextSize, + "lost": stats.Memory.Lost, + } + acc.AddGauge("bind_memory", fields, map[string]string{"url": urlTag, "source": host, "port": port}) + + // Detailed, per-context memory stats + if b.GatherMemoryContexts { + for _, c := range stats.Memory.Contexts { + tags := map[string]string{"url": urlTag, "id": c.Id, "name": c.Name, "source": host, "port": port} + fields := map[string]interface{}{"total": c.Total, "in_use": c.InUse} + + acc.AddGauge("bind_memory_context", fields, tags) + } + } + + // Detailed, per-view stats + if b.GatherViews { + for vName, view := range stats.Views { + for cntrType, counters := range view.Resolver { + for cntrName, value := range counters { + tags := map[string]string{ + "url": urlTag, + "source": host, + "port": port, + "view": vName, + "type": cntrType, + } + + grouper.Add("bind_counter", tags, ts, cntrName, value) + } + } + } + } + + //Add grouped metrics + for _, metric := range grouper.Metrics() { + acc.AddMetric(metric) + } +} + +// readStatsJSON takes a base URL to probe, and requests the individual statistics blobs that we +// are interested in. These individual blobs have a combined size which is significantly smaller +// than if we requested everything at once (e.g. taskmgr and socketmgr can be omitted). +func (b *Bind) readStatsJSON(addr *url.URL, acc telegraf.Accumulator) error { + var stats jsonStats + + // Progressively build up full jsonStats struct by parsing the individual HTTP responses + for _, suffix := range [...]string{"/server", "/net", "/mem"} { + scrapeUrl := addr.String() + suffix + + resp, err := client.Get(scrapeUrl) + if err != nil { + return err + } + + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("%s returned HTTP status: %s", scrapeUrl, resp.Status) + } + + if err := json.NewDecoder(resp.Body).Decode(&stats); err != nil { + return fmt.Errorf("Unable to decode JSON blob: %s", err) + } + } + + b.addStatsJSON(stats, acc, addr.Host) + return nil +} diff --git a/plugins/inputs/bind/testdata/json/v1/mem b/plugins/inputs/bind/testdata/json/v1/mem new file mode 100644 index 0000000000000..8872344e14149 --- /dev/null +++ b/plugins/inputs/bind/testdata/json/v1/mem @@ -0,0 +1,133 @@ +{ + "json-stats-version":"1.2", + "boot-time":"2017-07-28T13:24:53Z", + "config-time":"2017-07-28T13:24:53Z", + "current-time":"2017-07-28T15:33:07Z", + "memory":{ + "TotalUse":18206566, + "InUse":3064368, + "BlockSize":13893632, + "ContextSize":3685480, + "Lost":0, + "contexts":[ + { + "id":"0x55fb2e042de0", + "name":"main", + "references":202, + "total":2693003, + "inuse":1454904, + "maxinuse":1508072, + "blocksize":786432, + "pools":40, + "hiwater":0, + "lowater":0 + }, + { + "id":"0x55fb2e0507e0", + "name":"dst", + "references":1, + "total":387478, + "inuse":91776, + "maxinuse":97208, + "pools":0, + "hiwater":0, + "lowater":0 + }, + { + "id":"0x55fb2e0938e0", + "name":"zonemgr-pool", + "references":113, + "total":742986, + "inuse":143776, + "maxinuse":313961, + "blocksize":262144, + "pools":0, + "hiwater":0, + "lowater":0 + }, + { + "id":"0x7f19d00017d0", + "name":"threadkey", + "references":1, + "total":0, + "inuse":0, + "maxinuse":0, + "pools":0, + "hiwater":0, + "lowater":0 + }, + { + "id":"0x7f19d00475f0", + "name":"client", + "references":3, + "total":267800, + "inuse":8760, + "maxinuse":8760, + "blocksize":262144, + "pools":2, + "hiwater":0, + "lowater":0 + }, + { + "id":"0x7f19d00dfca0", + "name":"cache", + "references":8, + "total":288938, + "inuse":83650, + "maxinuse":83842, + "blocksize":262144, + "pools":0, + "hiwater":0, + "lowater":0 + }, + { + "id":"0x7f19d00eaa30", + "name":"cache_heap", + "references":18, + "total":393216, + "inuse":132096, + "maxinuse":132096, + "blocksize":262144, + "pools":0, + "hiwater":0, + "lowater":0 + }, + { + "id":"0x7f19d01094e0", + "name":"res0", + "references":1, + "total":262144, + "inuse":0, + "maxinuse":22048, + "blocksize":262144, + "pools":0, + "hiwater":0, + "lowater":0 + }, + { + "id":"0x7f19d0114270", + "name":"res1", + "references":1, + "total":0, + "inuse":0, + "maxinuse":0, + "blocksize":0, + "pools":0, + "hiwater":0, + "lowater":0 + }, + { + "id":"0x7f19d011f000", + "name":"res2", + "references":1, + "total":0, + "inuse":0, + "maxinuse":0, + "blocksize":0, + "pools":0, + "hiwater":0, + "lowater":0 + } + ] + } +} \ No newline at end of file diff --git a/plugins/inputs/bind/testdata/json/v1/net b/plugins/inputs/bind/testdata/json/v1/net new file mode 100644 index 0000000000000..0bbd41429cee1 --- /dev/null +++ b/plugins/inputs/bind/testdata/json/v1/net @@ -0,0 +1,241 @@ +{ + "json-stats-version":"1.2", + "boot-time":"2017-07-28T13:24:53Z", + "config-time":"2017-07-28T13:24:53Z", + "current-time":"2017-07-28T15:33:07Z", + "sockstats":{ + "UDP4Open":335, + "UDP6Open":113, + "TCP4Open":118, + "TCP6Open":2, + "RawOpen":1, + "UDP4Close":333, + "UDP6Close":112, + "TCP4Close":119, + "UDP6ConnFail":112, + "UDP4Conn":333, + "TCP4Conn":114, + "TCP4Accept":6, + "UDP6SendErr":112, + "UDP4RecvErr":1, + "UDP4Active":2, + "UDP6Active":1, + "TCP4Active":10, + "TCP6Active":2, + "RawActive":1 + }, + "socketmgr":{ + "sockets":[ + { + "id":"0x7f19dd849010", + "references":1, + "type":"not-initialized", + "local-address":"", + "states":[ + "bound" + ] + }, + { + "id":"0x7f19dd849268", + "references":1, + "type":"tcp", + "local-address":"0.0.0.0#8053", + "states":[ + "listener", + "bound" + ] + }, + { + "id":"0x7f19dd849718", + "references":2, + "type":"udp", + "local-address":"::#53", + "states":[ + "bound" + ] + }, + { + "id":"0x7f19dd849970", + "references":2, + "type":"tcp", + "local-address":"::#53", + "states":[ + "listener", + "bound" + ] + }, + { + "id":"0x7f19dd849bc8", + "references":2, + "type":"udp", + "local-address":"127.0.0.1#53", + "states":[ + "bound" + ] + }, + { + "id":"0x7f19dd6f4010", + "references":2, + "type":"tcp", + "local-address":"127.0.0.1#53", + "states":[ + "listener", + "bound" + ] + }, + { + "id":"0x7f19dd6f4718", + "references":1, + "type":"tcp", + "local-address":"127.0.0.1#953", + "states":[ + "listener", + "bound" + ] + }, + { + "id":"0x7f19dd6f4bc8", + "references":1, + "type":"tcp", + "local-address":"::1#953", + "states":[ + "listener", + "bound" + ] + }, + { + "id":"0x7f19d4fb7970", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fb7bc8", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fc7010", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fc74c0", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fc7718", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fc7bc8", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fd1010", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fd1268", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fd14c0", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fd1718", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fd1970", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fd1bc8", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fd9010", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fda4c0", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fd9bc8", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fda268", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fd9970", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fda010", + "references":1, + "type":"udp", + "states":[ + ] + }, + { + "id":"0x7f19d4fd9718", + "references":1, + "type":"udp", + "states":[ + ] + } + ] + } +} \ No newline at end of file diff --git a/plugins/inputs/bind/testdata/json/v1/server b/plugins/inputs/bind/testdata/json/v1/server new file mode 100644 index 0000000000000..53acd90672558 --- /dev/null +++ b/plugins/inputs/bind/testdata/json/v1/server @@ -0,0 +1,141 @@ +{ + "json-stats-version":"1.2", + "boot-time":"2017-07-28T13:24:53Z", + "config-time":"2017-07-28T13:24:53Z", + "current-time":"2017-07-28T15:33:07Z", + "opcodes":{ + "QUERY":13, + "IQUERY":0, + "STATUS":0, + "RESERVED3":0, + "NOTIFY":0, + "UPDATE":0, + "RESERVED6":0, + "RESERVED7":0, + "RESERVED8":0, + "RESERVED9":0, + "RESERVED10":0, + "RESERVED11":0, + "RESERVED12":0, + "RESERVED13":0, + "RESERVED14":0, + "RESERVED15":0 + }, + "qtypes":{ + "A":2, + "PTR":7, + "AAAA":2, + "SRV":2 + }, + "nsstats":{ + "Requestv4":13, + "Response":12, + "QrySuccess":6, + "QryAuthAns":1, + "QryNoauthAns":10, + "QryNxrrset":1, + "QrySERVFAIL":1, + "QryNXDOMAIN":4, + "QryRecursion":12, + "QryDuplicate":1, + "QryUDP":13 + }, + "views":{ + "_default":{ + "resolver":{ + "stats":{ + "Queryv4":447, + "Queryv6":112, + "Responsev4":444, + "NXDOMAIN":3, + "Truncated":114, + "Retry":242, + "QueryTimeout":3, + "GlueFetchv4":61, + "GlueFetchv6":68, + "GlueFetchv6Fail":24, + "ValAttempt":36, + "ValOk":27, + "ValNegOk":9, + "QryRTT100":287, + "QryRTT500":152, + "QryRTT800":4, + "BucketSize":31 + }, + "qtypes":{ + "A":220, + "NS":19, + "PTR":22, + "AAAA":233, + "SRV":14, + "DS":27, + "DNSKEY":24 + }, + "cache":{ + "A":150, + "NS":44, + "PTR":3, + "AAAA":104, + "DS":23, + "RRSIG":94, + "NSEC":8, + "DNSKEY":7, + "!AAAA":23, + "!DS":5, + "NXDOMAIN":1 + }, + "cachestats":{ + "CacheHits":1675, + "CacheMisses":44, + "QueryHits":17, + "QueryMisses":12, + "DeleteLRU":0, + "DeleteTTL":16, + "CacheNodes":219, + "CacheBuckets":129, + "TreeMemTotal":551082, + "TreeMemInUse":150704, + "HeapMemMax":132096, + "HeapMemTotal":393216, + "HeapMemInUse":132096 + }, + "adb":{ + "nentries":1021, + "entriescnt":254, + "nnames":1021, + "namescnt":195 + } + } + }, + "_bind":{ + "resolver":{ + "stats":{ + "BucketSize":31 + }, + "qtypes":{ + }, + "cache":{ + }, + "cachestats":{ + "CacheHits":0, + "CacheMisses":0, + "QueryHits":0, + "QueryMisses":0, + "DeleteLRU":0, + "DeleteTTL":0, + "CacheNodes":0, + "CacheBuckets":64, + "TreeMemTotal":287392, + "TreeMemInUse":29608, + "HeapMemMax":1024, + "HeapMemTotal":262144, + "HeapMemInUse":1024 + }, + "adb":{ + "nentries":1021, + "nnames":1021 + } + } + } + } +} \ No newline at end of file diff --git a/plugins/inputs/bind/testdata/xml/v2 b/plugins/inputs/bind/testdata/xml/v2 new file mode 100644 index 0000000000000..e16c53dbc1000 --- /dev/null +++ b/plugins/inputs/bind/testdata/xml/v2 @@ -0,0 +1,926 @@ + + + + + + + + _default + + A + 2936881 + + + NS + 28994 + + + CNAME + 26 + + + SOA + 15131 + + + PTR + 47924 + + + MX + 1884 + + + TXT + 6486 + + + AAAA + 949781 + + + SRV + 14740 + + + NAPTR + 1606 + + + DS + 25 + + + SSHFP + 185 + + + DNSKEY + 13 + + + ANY + 1 + + + Queryv4 + 3765426 + + + Queryv6 + 238251 + + + Responsev4 + 3716142 + + + Responsev6 + 1 + + + NXDOMAIN + 100052 + + + SERVFAIL + 5894 + + + FORMERR + 2041 + + + OtherError + 14801 + + + EDNS0Fail + 2615 + + + Mismatch + 0 + + + Truncated + 598 + + + Lame + 117 + + + Retry + 383343 + + + QueryAbort + 0 + + + QuerySockFail + 0 + + + QueryTimeout + 50874 + + + GlueFetchv4 + 260749 + + + GlueFetchv6 + 225310 + + + GlueFetchv4Fail + 5756 + + + GlueFetchv6Fail + 141500 + + + ValAttempt + 0 + + + ValOk + 0 + + + ValNegOk + 0 + + + ValFail + 0 + + + QryRTT10 + 458176 + + + QryRTT100 + 3010133 + + + QryRTT500 + 244312 + + + QryRTT800 + 1275 + + + QryRTT1600 + 361 + + + QryRTT1600+ + 236 + + + + A + 2700 + + + NS + 759 + + + CNAME + 486 + + + SOA + 2 + + + PTR + 6 + + + TXT + 2 + + + AAAA + 629 + + + SRV + 1 + + + DS + 48 + + + RRSIG + 203 + + + NSEC + 22 + + + DNSKEY + 1 + + + !A + 6 + + + !SOA + 26 + + + !AAAA + 84 + + + !NAPTR + 3 + + + NXDOMAIN + 143 + + + + + _bind + + Queryv4 + 0 + + + Queryv6 + 0 + + + Responsev4 + 0 + + + Responsev6 + 0 + + + NXDOMAIN + 0 + + + SERVFAIL + 0 + + + FORMERR + 0 + + + OtherError + 0 + + + EDNS0Fail + 0 + + + Mismatch + 0 + + + Truncated + 0 + + + Lame + 0 + + + Retry + 0 + + + QueryAbort + 0 + + + QuerySockFail + 0 + + + QueryTimeout + 0 + + + GlueFetchv4 + 0 + + + GlueFetchv6 + 0 + + + GlueFetchv4Fail + 0 + + + GlueFetchv6Fail + 0 + + + ValAttempt + 0 + + + ValOk + 0 + + + ValNegOk + 0 + + + ValFail + 0 + + + QryRTT10 + 0 + + + QryRTT100 + 0 + + + QryRTT500 + 0 + + + QryRTT800 + 0 + + + QryRTT1600 + 0 + + + QryRTT1600+ + 0 + + + + + + 2016-10-02T18:45:00Z + 2016-10-23T19:27:48Z + + + QUERY + 102312374 + + + UPDATE + 238 + + + + + A + 58951432 + + + NS + 1999 + + + CNAME + 531 + + + SOA + 100415 + + + PTR + 4211487 + + + MX + 441155 + + + TXT + 34628 + + + AAAA + 37786321 + + + SRV + 741082 + + + NAPTR + 39137 + + + DS + 584 + + + SSHFP + 2987 + + + DNSKEY + 452 + + + IXFR + 157 + + + ANY + 7 + + + + Requestv4 + 102312611 + + + Requestv6 + 1 + + + ReqEdns0 + 441758 + + + ReqBadEDNSVer + 0 + + + ReqTSIG + 0 + + + ReqSIG0 + 0 + + + ReqBadSIG + 0 + + + ReqTCP + 1548156 + + + AuthQryRej + 0 + + + RecQryRej + 0 + + + XfrRej + 0 + + + UpdateRej + 238 + + + Response + 102301560 + + + TruncatedResp + 3787 + + + RespEDNS0 + 441748 + + + RespTSIG + 0 + + + RespSIG0 + 0 + + + QrySuccess + 63811668 + + + QryAuthAns + 72180718 + + + QryNoauthAns + 30106182 + + + QryReferral + 3 + + + QryNxrrset + 24423133 + + + QrySERVFAIL + 14422 + + + QryFORMERR + 0 + + + QryNXDOMAIN + 14052096 + + + QryRecursion + 2104239 + + + QryDuplicate + 10879 + + + QryDropped + 16 + + + QryFailure + 0 + + + XfrReqDone + 157 + + + UpdateReqFwd + 0 + + + UpdateRespFwd + 0 + + + UpdateFwdFail + 0 + + + UpdateDone + 0 + + + UpdateFail + 0 + + + UpdateBadPrereq + 0 + + + RPZRewrites + 0 + + + RateDropped + 0 + + + RateSlipped + 0 + + + NotifyOutv4 + 663 + + + NotifyOutv6 + 0 + + + NotifyInv4 + 0 + + + NotifyInv6 + 0 + + + NotifyRej + 0 + + + SOAOutv4 + 386 + + + SOAOutv6 + 0 + + + AXFRReqv4 + 0 + + + AXFRReqv6 + 0 + + + IXFRReqv4 + 0 + + + IXFRReqv6 + 0 + + + XfrSuccess + 0 + + + XfrFail + 0 + + + Mismatch + 2 + + + UDP4Open + 3765532 + + + UDP6Open + 238269 + + + TCP4Open + 602 + + + TCP6Open + 2 + + + UnixOpen + 0 + + + UDP4OpenFail + 0 + + + UDP6OpenFail + 0 + + + TCP4OpenFail + 0 + + + TCP6OpenFail + 0 + + + UnixOpenFail + 0 + + + UDP4Close + 3765528 + + + UDP6Close + 238267 + + + TCP4Close + 1548268 + + + TCP6Close + 0 + + + UnixClose + 0 + + + FDWatchClose + 0 + + + UDP4BindFail + 219 + + + UDP6BindFail + 16 + + + TCP4BindFail + 0 + + + TCP6BindFail + 0 + + + UnixBindFail + 0 + + + FdwatchBindFail + 0 + + + UDP4ConnFail + 0 + + + UDP6ConnFail + 238250 + + + TCP4ConnFail + 0 + + + TCP6ConnFail + 0 + + + UnixConnFail + 0 + + + FDwatchConnFail + 0 + + + UDP4Conn + 3764828 + + + UDP6Conn + 1 + + + TCP4Conn + 590 + + + TCP6Conn + 0 + + + UnixConn + 0 + + + FDwatchConn + 0 + + + TCP4AcceptFail + 0 + + + TCP6AcceptFail + 0 + + + UnixAcceptFail + 0 + + + TCP4Accept + 1547672 + + + TCP6Accept + 0 + + + UnixAccept + 0 + + + UDP4SendErr + 0 + + + UDP6SendErr + 238250 + + + TCP4SendErr + 0 + + + TCP6SendErr + 0 + + + UnixSendErr + 0 + + + FDwatchSendErr + 0 + + + UDP4RecvErr + 1650 + + + UDP6RecvErr + 0 + + + TCP4RecvErr + 1 + + + TCP6RecvErr + 0 + + + UnixRecvErr + 0 + + + FDwatchRecvErr + 0 + + + + + + 0x7f8a94e061d0 + main + 229 + 5002528 + 3662792 + 4848264 + 2359296 + 75 + 0 + 0 + + + 0x7f8a94e13830 + dst + 1 + 133486 + 96456 + 102346 + - + 0 + 0 + 0 + + + 0x7f8a94e401c0 + zonemgr-pool + 501 + 6339848 + 4384240 + 5734049 + 6029312 + 0 + 0 + 0 + + + + 81804609 + 20772579 + 77070336 + 6663840 + 0 + + + + + diff --git a/plugins/inputs/bind/testdata/xml/v3/mem b/plugins/inputs/bind/testdata/xml/v3/mem new file mode 100644 index 0000000000000..493708d7dcd7f --- /dev/null +++ b/plugins/inputs/bind/testdata/xml/v3/mem @@ -0,0 +1,142 @@ + + + + + 2017-07-21T11:53:28Z + 2017-07-21T11:53:28Z + 2017-07-25T23:47:08Z + + + + + + + 0x55fb2e042de0 + main + 202 + 2706043 + 1454904 + 1508072 + 786432 + 40 + 0 + 0 + + + 0x55fb2e0507e0 + dst + 1 + 387478 + 91776 + 97208 + - + 0 + 0 + 0 + + + 0x55fb2e0938e0 + zonemgr-pool + 113 + 742986 + 143776 + 313961 + 262144 + 0 + 0 + 0 + + + 0x7f19d00017d0 + threadkey + 1 + 0 + 0 + 0 + - + 0 + 0 + 0 + + + 0x7f19d00475f0 + client + 3 + 267800 + 8760 + 8760 + 262144 + 2 + 0 + 0 + + + 0x7f19d00dfca0 + cache + 8 + 288938 + 83650 + 83842 + 262144 + 0 + 0 + 0 + + + 0x7f19d00eaa30 + cache_heap + 18 + 393216 + 132096 + 132096 + 262144 + 0 + 0 + 0 + + + 0x7f19d01094e0 + res0 + 1 + 262144 + 0 + 22048 + 262144 + 0 + 0 + 0 + + + 0x7f19d0114270 + res1 + 1 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + 0x7f19d011f000 + res2 + 1 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + + 777821909 + 6000232 + 45875200 + 10037400 + 0 + + + diff --git a/plugins/inputs/bind/testdata/xml/v3/net b/plugins/inputs/bind/testdata/xml/v3/net new file mode 100644 index 0000000000000..50f7134477f51 --- /dev/null +++ b/plugins/inputs/bind/testdata/xml/v3/net @@ -0,0 +1,156 @@ + + + + + 2017-07-21T11:53:28Z + 2017-07-21T11:53:28Z + 2017-07-25T23:47:08Z + + 92542 + 0 + 48 + 0 + 0 + 1 + 0 + 0 + 0 + 0 + 0 + 0 + 92538 + 0 + 336 + 0 + 0 + 0 + 0 + 1 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 92535 + 0 + 44 + 0 + 0 + 0 + 0 + 0 + 0 + 293 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 14 + 0 + 0 + 0 + 0 + 0 + 0 + 4 + 0 + 297 + 0 + 0 + 1 + + + + + + + + 0x7f19dd849010 + 1 + not-initialized + <unknown address, family 16> + + bound + + + + 0x7f19dd849268 + 1 + tcp + 0.0.0.0#8053 + + listener + bound + + + + 0x7f19dd849718 + 2 + udp + ::#53 + + bound + + + + 0x7f19dd849970 + 2 + tcp + ::#53 + + listener + bound + + + + 0x7f19dd849bc8 + 2 + udp + 127.0.0.1#53 + + bound + + + + 0x7f19dd6f4010 + 2 + tcp + 127.0.0.1#53 + + listener + bound + + + + 0x7f19dd6f4718 + 1 + tcp + 127.0.0.1#953 + + listener + bound + + + + 0x7f19dd6f4bc8 + 1 + tcp + ::1#953 + + listener + bound + + + + + diff --git a/plugins/inputs/bind/testdata/xml/v3/server b/plugins/inputs/bind/testdata/xml/v3/server new file mode 100644 index 0000000000000..0d9206c692429 --- /dev/null +++ b/plugins/inputs/bind/testdata/xml/v3/server @@ -0,0 +1,328 @@ + + + + + 2017-07-21T11:53:28Z + 2017-07-21T11:53:28Z + 2017-07-25T23:47:08Z + + 74941 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + 63672 + 373 + 18 + 3393 + 618 + 970 + 5735 + 139 + 1 + 22 + + + 74942 + 0 + 9250 + 0 + 0 + 0 + 0 + 260 + 0 + 35 + 0 + 0 + 63264 + 365 + 9250 + 0 + 0 + 49044 + 2752 + 60354 + 0 + 2452 + 122 + 0 + 11610 + 53750 + 11667 + 11 + 35 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 74648 + 258 + 0 + 0 + 59 + 0 + 0 + 0 + 0 + 0 + 0 + + + 2 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + + + + + 61568 + 9126 + 1249 + 286 + 942 + 3933 + 21 + 13749 + 1699 + + + 92573 + 0 + 92135 + 0 + 8182 + 318 + 0 + 0 + 0 + 0 + 42 + 12 + 800 + 0 + 0 + 0 + 0 + 490 + 1398 + 0 + 3 + 0 + 90256 + 67322 + 22850 + 6 + 0 + 45760 + 45543 + 743 + 75 + 0 + 0 + 31 + 34 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + + A + 195 + + + NS + 42 + + + CNAME + 7 + + + PTR + 48 + + + MX + 7 + + + TXT + 6 + + + AAAA + 4 + + + DS + 97 + + + RRSIG + 258 + + + NSEC + 89 + + + DNSKEY + 60 + + + !DS + 29 + + + NXDOMAIN + 25 + + + + 1021 + 314 + 1021 + 316 + + + 1904593 + 96 + 336094 + 369336 + 0 + 47518 + 769 + 519 + 1464363 + 392128 + 828966 + 393216 + 132096 + 132096 + + + + + + 0 + + + 0 + + + 0 + + + 0 + + + + + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 31 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 0 + + + + 1021 + 0 + 1021 + 0 + + + 0 + 0 + 0 + 0 + 0 + 0 + 0 + 64 + 287392 + 29608 + 29608 + 262144 + 1024 + 1024 + + + + diff --git a/plugins/inputs/bind/xml_stats_v2.go b/plugins/inputs/bind/xml_stats_v2.go new file mode 100644 index 0000000000000..45071bdc005f0 --- /dev/null +++ b/plugins/inputs/bind/xml_stats_v2.go @@ -0,0 +1,168 @@ +package bind + +import ( + "encoding/xml" + "fmt" + "net" + "net/http" + "net/url" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/metric" +) + +type v2Root struct { + XMLName xml.Name + Version string `xml:"version,attr"` + Statistics v2Statistics `xml:"bind>statistics"` +} + +// Omitted branches: socketmgr, taskmgr +type v2Statistics struct { + Version string `xml:"version,attr"` + Views []struct { + // Omitted branches: zones + Name string `xml:"name"` + RdTypes []v2Counter `xml:"rdtype"` + ResStats []v2Counter `xml:"resstat"` + Caches []struct { + Name string `xml:"name,attr"` + RRSets []v2Counter `xml:"rrset"` + } `xml:"cache"` + } `xml:"views>view"` + Server struct { + OpCodes []v2Counter `xml:"requests>opcode"` + RdTypes []v2Counter `xml:"queries-in>rdtype"` + NSStats []v2Counter `xml:"nsstat"` + ZoneStats []v2Counter `xml:"zonestat"` + ResStats []v2Counter `xml:"resstat"` + SockStats []v2Counter `xml:"sockstat"` + } `xml:"server"` + Memory struct { + Contexts []struct { + // Omitted nodes: references, maxinuse, blocksize, pools, hiwater, lowater + Id string `xml:"id"` + Name string `xml:"name"` + Total int `xml:"total"` + InUse int `xml:"inuse"` + } `xml:"contexts>context"` + Summary struct { + TotalUse int + InUse int + BlockSize int + ContextSize int + Lost int + } `xml:"summary"` + } `xml:"memory"` +} + +// BIND statistics v2 counter struct used throughout +type v2Counter struct { + Name string `xml:"name"` + Value int `xml:"counter"` +} + +// addXMLv2Counter adds a v2Counter array to a Telegraf Accumulator, with the specified tags +func addXMLv2Counter(acc telegraf.Accumulator, commonTags map[string]string, stats []v2Counter) { + grouper := metric.NewSeriesGrouper() + ts := time.Now() + for _, c := range stats { + tags := make(map[string]string) + + // Create local copy of tags since maps are reference types + for k, v := range commonTags { + tags[k] = v + } + + grouper.Add("bind_counter", tags, ts, c.Name, c.Value) + } + + //Add grouped metrics + for _, metric := range grouper.Metrics() { + acc.AddMetric(metric) + } +} + +// readStatsXMLv2 decodes a BIND9 XML statistics version 2 document. Unlike the XML v3 statistics +// format, the v2 format does not support broken-out subsets. +func (b *Bind) readStatsXMLv2(addr *url.URL, acc telegraf.Accumulator) error { + var stats v2Root + + resp, err := client.Get(addr.String()) + if err != nil { + return err + } + + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("%s returned HTTP status: %s", addr, resp.Status) + } + + if err := xml.NewDecoder(resp.Body).Decode(&stats); err != nil { + return fmt.Errorf("Unable to decode XML document: %s", err) + } + + tags := map[string]string{"url": addr.Host} + host, port, _ := net.SplitHostPort(addr.Host) + tags["source"] = host + tags["port"] = port + + // Opcodes + tags["type"] = "opcode" + addXMLv2Counter(acc, tags, stats.Statistics.Server.OpCodes) + + // Query RDATA types + tags["type"] = "qtype" + addXMLv2Counter(acc, tags, stats.Statistics.Server.RdTypes) + + // Nameserver stats + tags["type"] = "nsstat" + addXMLv2Counter(acc, tags, stats.Statistics.Server.NSStats) + + // Zone stats + tags["type"] = "zonestat" + addXMLv2Counter(acc, tags, stats.Statistics.Server.ZoneStats) + + // Socket statistics + tags["type"] = "sockstat" + addXMLv2Counter(acc, tags, stats.Statistics.Server.SockStats) + + // Memory stats + fields := map[string]interface{}{ + "total_use": stats.Statistics.Memory.Summary.TotalUse, + "in_use": stats.Statistics.Memory.Summary.InUse, + "block_size": stats.Statistics.Memory.Summary.BlockSize, + "context_size": stats.Statistics.Memory.Summary.ContextSize, + "lost": stats.Statistics.Memory.Summary.Lost, + } + acc.AddGauge("bind_memory", fields, map[string]string{"url": addr.Host, "source": host, "port": port}) + + // Detailed, per-context memory stats + if b.GatherMemoryContexts { + for _, c := range stats.Statistics.Memory.Contexts { + tags := map[string]string{"url": addr.Host, "id": c.Id, "name": c.Name, "source": host, "port": port} + fields := map[string]interface{}{"total": c.Total, "in_use": c.InUse} + + acc.AddGauge("bind_memory_context", fields, tags) + } + } + + // Detailed, per-view stats + if b.GatherViews { + for _, v := range stats.Statistics.Views { + tags := map[string]string{"url": addr.Host, "view": v.Name} + + // Query RDATA types + tags["type"] = "qtype" + addXMLv2Counter(acc, tags, v.RdTypes) + + // Resolver stats + tags["type"] = "resstats" + addXMLv2Counter(acc, tags, v.ResStats) + } + } + + return nil +} diff --git a/plugins/inputs/bind/xml_stats_v3.go b/plugins/inputs/bind/xml_stats_v3.go new file mode 100644 index 0000000000000..ed2cc1b7faf9c --- /dev/null +++ b/plugins/inputs/bind/xml_stats_v3.go @@ -0,0 +1,161 @@ +package bind + +import ( + "encoding/xml" + "fmt" + "net" + "net/http" + "net/url" + "strings" + "time" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/metric" +) + +// XML path: //statistics +// Omitted branches: socketmgr, taskmgr +type v3Stats struct { + Server v3Server `xml:"server"` + Views []v3View `xml:"views>view"` + Memory v3Memory `xml:"memory"` +} + +// XML path: //statistics/memory +type v3Memory struct { + Contexts []struct { + // Omitted nodes: references, maxinuse, blocksize, pools, hiwater, lowater + Id string `xml:"id"` + Name string `xml:"name"` + Total int `xml:"total"` + InUse int `xml:"inuse"` + } `xml:"contexts>context"` + Summary struct { + TotalUse int + InUse int + BlockSize int + ContextSize int + Lost int + } `xml:"summary"` +} + +// XML path: //statistics/server +type v3Server struct { + CounterGroups []v3CounterGroup `xml:"counters"` +} + +// XML path: //statistics/views/view +type v3View struct { + // Omitted branches: zones + Name string `xml:"name,attr"` + CounterGroups []v3CounterGroup `xml:"counters"` + Caches []struct { + Name string `xml:"name,attr"` + RRSets []struct { + Name string `xml:"name"` + Value int `xml:"counter"` + } `xml:"rrset"` + } `xml:"cache"` +} + +// Generic XML v3 doc fragment used in multiple places +type v3CounterGroup struct { + Type string `xml:"type,attr"` + Counters []struct { + Name string `xml:"name,attr"` + Value int `xml:",chardata"` + } `xml:"counter"` +} + +// addStatsXMLv3 walks a v3Stats struct and adds the values to the telegraf.Accumulator. +func (b *Bind) addStatsXMLv3(stats v3Stats, acc telegraf.Accumulator, hostPort string) { + grouper := metric.NewSeriesGrouper() + ts := time.Now() + host, port, _ := net.SplitHostPort(hostPort) + // Counter groups + for _, cg := range stats.Server.CounterGroups { + for _, c := range cg.Counters { + if cg.Type == "opcode" && strings.HasPrefix(c.Name, "RESERVED") { + continue + } + + tags := map[string]string{"url": hostPort, "source": host, "port": port, "type": cg.Type} + + grouper.Add("bind_counter", tags, ts, c.Name, c.Value) + } + } + + // Memory stats + fields := map[string]interface{}{ + "total_use": stats.Memory.Summary.TotalUse, + "in_use": stats.Memory.Summary.InUse, + "block_size": stats.Memory.Summary.BlockSize, + "context_size": stats.Memory.Summary.ContextSize, + "lost": stats.Memory.Summary.Lost, + } + acc.AddGauge("bind_memory", fields, map[string]string{"url": hostPort, "source": host, "port": port}) + + // Detailed, per-context memory stats + if b.GatherMemoryContexts { + for _, c := range stats.Memory.Contexts { + tags := map[string]string{"url": hostPort, "source": host, "port": port, "id": c.Id, "name": c.Name} + fields := map[string]interface{}{"total": c.Total, "in_use": c.InUse} + + acc.AddGauge("bind_memory_context", fields, tags) + } + } + + // Detailed, per-view stats + if b.GatherViews { + for _, v := range stats.Views { + for _, cg := range v.CounterGroups { + for _, c := range cg.Counters { + tags := map[string]string{ + "url": hostPort, + "source": host, + "port": port, + "view": v.Name, + "type": cg.Type, + } + + grouper.Add("bind_counter", tags, ts, c.Name, c.Value) + } + } + } + } + + //Add grouped metrics + for _, metric := range grouper.Metrics() { + acc.AddMetric(metric) + } +} + +// readStatsXMLv3 takes a base URL to probe, and requests the individual statistics documents that +// we are interested in. These individual documents have a combined size which is significantly +// smaller than if we requested everything at once (e.g. taskmgr and socketmgr can be omitted). +func (b *Bind) readStatsXMLv3(addr *url.URL, acc telegraf.Accumulator) error { + var stats v3Stats + + // Progressively build up full v3Stats struct by parsing the individual HTTP responses + for _, suffix := range [...]string{"/server", "/net", "/mem"} { + scrapeUrl := addr.String() + suffix + + resp, err := client.Get(scrapeUrl) + if err != nil { + return err + } + + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("%s returned HTTP status: %s", scrapeUrl, resp.Status) + } + + if err := xml.NewDecoder(resp.Body).Decode(&stats); err != nil { + return fmt.Errorf("Unable to decode XML document: %s", err) + } + } + + b.addStatsXMLv3(stats, acc, addr.Host) + return nil +} diff --git a/plugins/inputs/consul/consul.go b/plugins/inputs/consul/consul.go index 4662b54b0ecc7..4b5ee4b1cae11 100644 --- a/plugins/inputs/consul/consul.go +++ b/plugins/inputs/consul/consul.go @@ -5,6 +5,7 @@ import ( "strings" "github.com/hashicorp/consul/api" + "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal/tls" "github.com/influxdata/telegraf/plugins/inputs" @@ -126,12 +127,12 @@ func (c *Consul) GatherHealthCheck(acc telegraf.Accumulator, checks []*api.Healt for _, checkTag := range check.ServiceTags { if c.TagDelimiter != "" { splittedTag := strings.SplitN(checkTag, c.TagDelimiter, 2) - if len(splittedTag) == 1 { + if len(splittedTag) == 1 && checkTag != "" { tags[checkTag] = checkTag - } else if len(splittedTag) == 2 { + } else if len(splittedTag) == 2 && splittedTag[1] != "" { tags[splittedTag[0]] = splittedTag[1] } - } else { + } else if checkTag != "" { tags[checkTag] = checkTag } } diff --git a/plugins/inputs/diskio/diskio_linux.go b/plugins/inputs/diskio/diskio_linux.go index d27fd3b46b898..c727f485b1410 100644 --- a/plugins/inputs/diskio/diskio_linux.go +++ b/plugins/inputs/diskio/diskio_linux.go @@ -35,8 +35,8 @@ func (s *DiskIO) diskInfo(devName string) (map[string]string, error) { return ic.values, nil } - major := stat.Rdev >> 8 & 0xff - minor := stat.Rdev & 0xff + major := unix.Major(uint64(stat.Rdev)) + minor := unix.Minor(uint64(stat.Rdev)) udevDataPath := fmt.Sprintf("%s/b%d:%d", udevPath, major, minor) di := map[string]string{} diff --git a/plugins/inputs/exec/exec.go b/plugins/inputs/exec/exec.go index 9cb86c3cd0ed6..615736b3c1c84 100644 --- a/plugins/inputs/exec/exec.go +++ b/plugins/inputs/exec/exec.go @@ -3,12 +3,12 @@ package exec import ( "bytes" "fmt" + "log" "os/exec" "path/filepath" "runtime" "strings" "sync" - "syscall" "time" "github.com/kballard/go-shellquote" @@ -61,39 +61,18 @@ func NewExec() *Exec { } type Runner interface { - Run(*Exec, string, telegraf.Accumulator) ([]byte, error) + Run(string, time.Duration) ([]byte, []byte, error) } type CommandRunner struct{} -func AddNagiosState(exitCode error, acc telegraf.Accumulator) error { - nagiosState := 0 - if exitCode != nil { - exiterr, ok := exitCode.(*exec.ExitError) - if ok { - status, ok := exiterr.Sys().(syscall.WaitStatus) - if ok { - nagiosState = status.ExitStatus() - } else { - return fmt.Errorf("exec: unable to get nagios plugin exit code") - } - } else { - return fmt.Errorf("exec: unable to get nagios plugin exit code") - } - } - fields := map[string]interface{}{"state": nagiosState} - acc.AddFields("nagios_state", fields, nil) - return nil -} - func (c CommandRunner) Run( - e *Exec, command string, - acc telegraf.Accumulator, -) ([]byte, error) { + timeout time.Duration, +) ([]byte, []byte, error) { split_cmd, err := shellquote.Split(command) if err != nil || len(split_cmd) == 0 { - return nil, fmt.Errorf("exec: unable to parse command, %s", err) + return nil, nil, fmt.Errorf("exec: unable to parse command, %s", err) } cmd := exec.Command(split_cmd[0], split_cmd[1:]...) @@ -105,44 +84,35 @@ func (c CommandRunner) Run( cmd.Stdout = &out cmd.Stderr = &stderr - if err := internal.RunTimeout(cmd, e.Timeout.Duration); err != nil { - switch e.parser.(type) { - case *nagios.NagiosParser: - AddNagiosState(err, acc) - default: - var errMessage = "" - if stderr.Len() > 0 { - stderr = removeCarriageReturns(stderr) - // Limit the number of bytes. - didTruncate := false - if stderr.Len() > MaxStderrBytes { - stderr.Truncate(MaxStderrBytes) - didTruncate = true - } - if i := bytes.IndexByte(stderr.Bytes(), '\n'); i > 0 { - // Only show truncation if the newline wasn't the last character. - if i < stderr.Len()-1 { - didTruncate = true - } - stderr.Truncate(i) - } - if didTruncate { - stderr.WriteString("...") - } + runErr := internal.RunTimeout(cmd, timeout) - errMessage = fmt.Sprintf(": %s", stderr.String()) - } - return nil, fmt.Errorf("exec: %s for command '%s'%s", err, command, errMessage) - } - } else { - switch e.parser.(type) { - case *nagios.NagiosParser: - AddNagiosState(nil, acc) - } + out = removeCarriageReturns(out) + if stderr.Len() > 0 { + stderr = removeCarriageReturns(stderr) + stderr = truncate(stderr) } - out = removeCarriageReturns(out) - return out.Bytes(), nil + return out.Bytes(), stderr.Bytes(), runErr +} + +func truncate(buf bytes.Buffer) bytes.Buffer { + // Limit the number of bytes. + didTruncate := false + if buf.Len() > MaxStderrBytes { + buf.Truncate(MaxStderrBytes) + didTruncate = true + } + if i := bytes.IndexByte(buf.Bytes(), '\n'); i > 0 { + // Only show truncation if the newline wasn't the last character. + if i < buf.Len()-1 { + didTruncate = true + } + buf.Truncate(i) + } + if didTruncate { + buf.WriteString("...") + } + return buf } // removeCarriageReturns removes all carriage returns from the input if the @@ -173,9 +143,11 @@ func removeCarriageReturns(b bytes.Buffer) bytes.Buffer { func (e *Exec) ProcessCommand(command string, acc telegraf.Accumulator, wg *sync.WaitGroup) { defer wg.Done() + _, isNagios := e.parser.(*nagios.NagiosParser) - out, err := e.runner.Run(e, command, acc) - if err != nil { + out, errbuf, runErr := e.runner.Run(command, e.Timeout.Duration) + if !isNagios && runErr != nil { + err := fmt.Errorf("exec: %s for command '%s': %s", runErr, command, string(errbuf)) acc.AddError(err) return } @@ -183,11 +155,19 @@ func (e *Exec) ProcessCommand(command string, acc telegraf.Accumulator, wg *sync metrics, err := e.parser.Parse(out) if err != nil { acc.AddError(err) - } else { - for _, metric := range metrics { - acc.AddFields(metric.Name(), metric.Fields(), metric.Tags(), metric.Time()) + return + } + + if isNagios { + metrics, err = nagios.TryAddState(runErr, metrics) + if err != nil { + log.Printf("E! [inputs.exec] failed to add nagios state: %s", err) } } + + for _, m := range metrics { + acc.AddMetric(m) + } } func (e *Exec) SampleConfig() string { diff --git a/plugins/inputs/exec/exec_test.go b/plugins/inputs/exec/exec_test.go index 0bfeece54feb7..5aaef8961ee69 100644 --- a/plugins/inputs/exec/exec_test.go +++ b/plugins/inputs/exec/exec_test.go @@ -5,8 +5,8 @@ import ( "fmt" "runtime" "testing" + "time" - "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/parsers" "github.com/influxdata/telegraf/testutil" @@ -74,22 +74,21 @@ var crTests = []CarriageReturnTest{ } type runnerMock struct { - out []byte - err error + out []byte + errout []byte + err error } -func newRunnerMock(out []byte, err error) Runner { +func newRunnerMock(out []byte, errout []byte, err error) Runner { return &runnerMock{ - out: out, - err: err, + out: out, + errout: errout, + err: err, } } -func (r runnerMock) Run(e *Exec, command string, acc telegraf.Accumulator) ([]byte, error) { - if r.err != nil { - return nil, r.err - } - return r.out, nil +func (r runnerMock) Run(command string, _ time.Duration) ([]byte, []byte, error) { + return r.out, r.errout, r.err } func TestExec(t *testing.T) { @@ -98,7 +97,7 @@ func TestExec(t *testing.T) { MetricName: "exec", }) e := &Exec{ - runner: newRunnerMock([]byte(validJson), nil), + runner: newRunnerMock([]byte(validJson), nil, nil), Commands: []string{"testcommand arg1"}, parser: parser, } @@ -127,7 +126,7 @@ func TestExecMalformed(t *testing.T) { MetricName: "exec", }) e := &Exec{ - runner: newRunnerMock([]byte(malformedJson), nil), + runner: newRunnerMock([]byte(malformedJson), nil, nil), Commands: []string{"badcommand arg1"}, parser: parser, } @@ -143,7 +142,7 @@ func TestCommandError(t *testing.T) { MetricName: "exec", }) e := &Exec{ - runner: newRunnerMock(nil, fmt.Errorf("exit status code 1")), + runner: newRunnerMock(nil, nil, fmt.Errorf("exit status code 1")), Commands: []string{"badcommand"}, parser: parser, } @@ -201,6 +200,66 @@ func TestExecCommandWithoutGlobAndPath(t *testing.T) { acc.AssertContainsFields(t, "metric", fields) } +func TestTruncate(t *testing.T) { + tests := []struct { + name string + bufF func() *bytes.Buffer + expF func() *bytes.Buffer + }{ + { + name: "should not truncate", + bufF: func() *bytes.Buffer { + var b bytes.Buffer + b.WriteString("hello world") + return &b + }, + expF: func() *bytes.Buffer { + var b bytes.Buffer + b.WriteString("hello world") + return &b + }, + }, + { + name: "should truncate up to the new line", + bufF: func() *bytes.Buffer { + var b bytes.Buffer + b.WriteString("hello world\nand all the people") + return &b + }, + expF: func() *bytes.Buffer { + var b bytes.Buffer + b.WriteString("hello world...") + return &b + }, + }, + { + name: "should truncate to the MaxStderrBytes", + bufF: func() *bytes.Buffer { + var b bytes.Buffer + for i := 0; i < 2*MaxStderrBytes; i++ { + b.WriteByte('b') + } + return &b + }, + expF: func() *bytes.Buffer { + var b bytes.Buffer + for i := 0; i < MaxStderrBytes; i++ { + b.WriteByte('b') + } + b.WriteString("...") + return &b + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + res := truncate(*tt.bufF()) + require.Equal(t, tt.expF().Bytes(), res.Bytes()) + }) + } +} + func TestRemoveCarriageReturns(t *testing.T) { if runtime.GOOS == "windows" { // Test that all carriage returns are removed diff --git a/plugins/inputs/filecount/filecount.go b/plugins/inputs/filecount/filecount.go index 1fd7041ff0688..c0072e0d8765f 100644 --- a/plugins/inputs/filecount/filecount.go +++ b/plugins/inputs/filecount/filecount.go @@ -4,16 +4,14 @@ import ( "log" "os" "path/filepath" - "strings" "time" - "github.com/karrick/godirwalk" - "github.com/pkg/errors" - "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/internal/globpath" "github.com/influxdata/telegraf/plugins/inputs" + "github.com/karrick/godirwalk" + "github.com/pkg/errors" ) const sampleConfig = ` @@ -157,7 +155,8 @@ func (fc *FileCount) count(acc telegraf.Accumulator, basedir string, glob globpa childSize := make(map[string]int64) walkFn := func(path string, de *godirwalk.Dirent) error { - if path == basedir { + rel, err := filepath.Rel(basedir, path) + if err == nil && rel == "." { return nil } file, err := os.Stat(path) @@ -173,7 +172,7 @@ func (fc *FileCount) count(acc telegraf.Accumulator, basedir string, glob globpa return nil } if match { - parent := path[:strings.LastIndex(path, "/")] + parent := filepath.Dir(path) childCount[parent]++ childSize[parent] += file.Size() } @@ -194,7 +193,7 @@ func (fc *FileCount) count(acc telegraf.Accumulator, basedir string, glob globpa "directory": path, }) } - parent := path[:strings.LastIndex(path, "/")] + parent := filepath.Dir(path) if fc.Recursive { childCount[parent] += childCount[path] childSize[parent] += childSize[path] diff --git a/plugins/inputs/github/README.md b/plugins/inputs/github/README.md new file mode 100644 index 0000000000000..524d1d0e70fd0 --- /dev/null +++ b/plugins/inputs/github/README.md @@ -0,0 +1,55 @@ +# GitHub Input Plugin + +Gather repository information from [GitHub][] hosted repositories. + +**Note:** Telegraf also contains the [webhook][] input which can be used as an +alternative method for collecting repository information. + +### Configuration + +```toml +[[inputs.github]] + ## List of repositories to monitor + repositories = ["influxdata/telegraf"] + + ## Github API access token. Unauthenticated requests are limited to 60 per hour. + # access_token = "" + + ## Timeout for HTTP requests. + # http_timeout = "5s" +``` + +### Metrics + +- github_repository + - tags: + - name - The repository name + - owner - The owner of the repository + - language - The primary language of the repository + - license - The license set for the repository + - fields: + - stars (int) + - forks (int) + - open_issues (int) + - size (int) + +When the [internal][] input is enabled: + ++ internal_github + - tags: + - access_token - An obfusticated reference to the configured access token or "Unauthenticated" + - fields: + - limit - How many requests you are limited to (per hour) + - remaining - How many requests you have remaining (per hour) + - blocks - How many requests have been blocked due to rate limit + +### Example Output + +``` +github,full_name=influxdata/telegraf,name=telegraf,owner=influxdata,language=Go,license=MIT\ License stars=6401i,forks=2421i,open_issues=722i,size=22611i 1552651811000000000 +internal_github,access_token=Unauthenticated rate_limit_remaining=59i,rate_limit_limit=60i,rate_limit_blocks=0i 1552653551000000000 +``` + +[GitHub]: https://www.github.com +[internal]: /plugins/inputs/internal +[webhook]: /plugins/inputs/webhooks/github diff --git a/plugins/inputs/github/github.go b/plugins/inputs/github/github.go new file mode 100644 index 0000000000000..ff497e55bd0a7 --- /dev/null +++ b/plugins/inputs/github/github.go @@ -0,0 +1,183 @@ +package github + +import ( + "context" + "fmt" + "net/http" + "strings" + "sync" + "time" + + "github.com/google/go-github/github" + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/plugins/inputs" + "github.com/influxdata/telegraf/selfstat" + "golang.org/x/oauth2" +) + +// GitHub - plugin main structure +type GitHub struct { + Repositories []string `toml:"repositories"` + AccessToken string `toml:"access_token"` + HTTPTimeout internal.Duration `toml:"http_timeout"` + githubClient *github.Client + + obfusticatedToken string + + RateLimit selfstat.Stat + RateLimitErrors selfstat.Stat + RateRemaining selfstat.Stat +} + +const sampleConfig = ` + ## List of repositories to monitor. + repositories = ["influxdata/telegraf"] + + ## Github API access token. Unauthenticated requests are limited to 60 per hour. + # access_token = "" + + ## Timeout for HTTP requests. + # http_timeout = "5s" +` + +// SampleConfig returns sample configuration for this plugin. +func (g *GitHub) SampleConfig() string { + return sampleConfig +} + +// Description returns the plugin description. +func (g *GitHub) Description() string { + return "Gather repository information from GitHub hosted repositories." +} + +// Create GitHub Client +func (g *GitHub) createGitHubClient(ctx context.Context) (*github.Client, error) { + httpClient := &http.Client{ + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + }, + Timeout: g.HTTPTimeout.Duration, + } + + g.obfusticatedToken = "Unauthenticated" + + if g.AccessToken != "" { + tokenSource := oauth2.StaticTokenSource( + &oauth2.Token{AccessToken: g.AccessToken}, + ) + oauthClient := oauth2.NewClient(ctx, tokenSource) + ctx = context.WithValue(ctx, oauth2.HTTPClient, oauthClient) + + g.obfusticatedToken = g.AccessToken[0:4] + "..." + g.AccessToken[len(g.AccessToken)-3:] + + return github.NewClient(oauthClient), nil + } + + return github.NewClient(httpClient), nil +} + +// Gather GitHub Metrics +func (g *GitHub) Gather(acc telegraf.Accumulator) error { + ctx := context.Background() + + if g.githubClient == nil { + githubClient, err := g.createGitHubClient(ctx) + + if err != nil { + return err + } + + g.githubClient = githubClient + + tokenTags := map[string]string{ + "access_token": g.obfusticatedToken, + } + + g.RateLimitErrors = selfstat.Register("github", "rate_limit_blocks", tokenTags) + g.RateLimit = selfstat.Register("github", "rate_limit_limit", tokenTags) + g.RateRemaining = selfstat.Register("github", "rate_limit_remaining", tokenTags) + } + + var wg sync.WaitGroup + wg.Add(len(g.Repositories)) + + for _, repository := range g.Repositories { + go func(repositoryName string, acc telegraf.Accumulator) { + defer wg.Done() + + owner, repository, err := splitRepositoryName(repositoryName) + if err != nil { + acc.AddError(err) + return + } + + repositoryInfo, response, err := g.githubClient.Repositories.Get(ctx, owner, repository) + + if _, ok := err.(*github.RateLimitError); ok { + g.RateLimitErrors.Incr(1) + } + + if err != nil { + acc.AddError(err) + return + } + + g.RateLimit.Set(int64(response.Rate.Limit)) + g.RateRemaining.Set(int64(response.Rate.Remaining)) + + now := time.Now() + tags := getTags(repositoryInfo) + fields := getFields(repositoryInfo) + + acc.AddFields("github_repository", fields, tags, now) + }(repository, acc) + } + + wg.Wait() + return nil +} + +func splitRepositoryName(repositoryName string) (string, string, error) { + splits := strings.SplitN(repositoryName, "/", 2) + + if len(splits) != 2 { + return "", "", fmt.Errorf("%v is not of format 'owner/repository'", repositoryName) + } + + return splits[0], splits[1], nil +} + +func getLicense(repositoryInfo *github.Repository) string { + if repositoryInfo.GetLicense() != nil { + return *repositoryInfo.License.Name + } + + return "None" +} + +func getTags(repositoryInfo *github.Repository) map[string]string { + return map[string]string{ + "owner": *repositoryInfo.Owner.Login, + "name": *repositoryInfo.Name, + "language": *repositoryInfo.Language, + "license": getLicense(repositoryInfo), + } +} + +func getFields(repositoryInfo *github.Repository) map[string]interface{} { + return map[string]interface{}{ + "stars": *repositoryInfo.StargazersCount, + "forks": *repositoryInfo.ForksCount, + "open_issues": *repositoryInfo.OpenIssuesCount, + "size": *repositoryInfo.Size, + } +} + +func init() { + inputs.Add("github", func() telegraf.Input { + return &GitHub{ + HTTPTimeout: internal.Duration{Duration: time.Second * 5}, + } + }) +} diff --git a/plugins/inputs/github/github_test.go b/plugins/inputs/github/github_test.go new file mode 100644 index 0000000000000..0ebae3a671667 --- /dev/null +++ b/plugins/inputs/github/github_test.go @@ -0,0 +1,119 @@ +package github + +import ( + "reflect" + "testing" + + gh "github.com/google/go-github/github" + "github.com/stretchr/testify/require" +) + +func TestSplitRepositoryNameWithWorkingExample(t *testing.T) { + var validRepositoryNames = []struct { + fullName string + owner string + repository string + }{ + {"influxdata/telegraf", "influxdata", "telegraf"}, + {"influxdata/influxdb", "influxdata", "influxdb"}, + {"rawkode/saltstack-dotfiles", "rawkode", "saltstack-dotfiles"}, + } + + for _, tt := range validRepositoryNames { + t.Run(tt.fullName, func(t *testing.T) { + owner, repository, _ := splitRepositoryName(tt.fullName) + + require.Equal(t, tt.owner, owner) + require.Equal(t, tt.repository, repository) + }) + } +} + +func TestSplitRepositoryNameWithNoSlash(t *testing.T) { + var invalidRepositoryNames = []string{ + "influxdata-influxdb", + } + + for _, tt := range invalidRepositoryNames { + t.Run(tt, func(t *testing.T) { + _, _, err := splitRepositoryName(tt) + + require.NotNil(t, err) + }) + } +} + +func TestGetLicenseWhenExists(t *testing.T) { + licenseName := "MIT" + license := gh.License{Name: &licenseName} + repository := gh.Repository{License: &license} + + getLicenseReturn := getLicense(&repository) + + require.Equal(t, "MIT", getLicenseReturn) +} + +func TestGetLicenseWhenMissing(t *testing.T) { + repository := gh.Repository{} + + getLicenseReturn := getLicense(&repository) + + require.Equal(t, "None", getLicenseReturn) +} + +func TestGetTags(t *testing.T) { + licenseName := "MIT" + license := gh.License{Name: &licenseName} + + ownerName := "influxdata" + owner := gh.User{Login: &ownerName} + + fullName := "influxdata/influxdb" + repositoryName := "influxdb" + + language := "Go" + + repository := gh.Repository{ + FullName: &fullName, + Name: &repositoryName, + License: &license, + Owner: &owner, + Language: &language, + } + + getTagsReturn := getTags(&repository) + + correctTagsReturn := map[string]string{ + "owner": ownerName, + "name": repositoryName, + "language": language, + "license": licenseName, + } + + require.Equal(t, true, reflect.DeepEqual(getTagsReturn, correctTagsReturn)) +} + +func TestGetFields(t *testing.T) { + stars := 1 + forks := 2 + openIssues := 3 + size := 4 + + repository := gh.Repository{ + StargazersCount: &stars, + ForksCount: &forks, + OpenIssuesCount: &openIssues, + Size: &size, + } + + getFieldsReturn := getFields(&repository) + + correctFieldReturn := make(map[string]interface{}) + + correctFieldReturn["stars"] = 1 + correctFieldReturn["forks"] = 2 + correctFieldReturn["open_issues"] = 3 + correctFieldReturn["size"] = 4 + + require.Equal(t, true, reflect.DeepEqual(getFieldsReturn, correctFieldReturn)) +} diff --git a/plugins/inputs/influxdb_listener/http_listener.go b/plugins/inputs/influxdb_listener/http_listener.go index 2857ae9c9afa8..7e55447869018 100644 --- a/plugins/inputs/influxdb_listener/http_listener.go +++ b/plugins/inputs/influxdb_listener/http_listener.go @@ -5,6 +5,7 @@ import ( "compress/gzip" "crypto/subtle" "crypto/tls" + "encoding/json" "fmt" "io" "log" @@ -231,8 +232,16 @@ func (h *HTTPListener) ServeHTTP(res http.ResponseWriter, req *http.Request) { case "/ping": h.PingsRecv.Incr(1) defer h.PingsServed.Incr(1) + verbose := req.URL.Query().Get("verbose") + // respond to ping requests - res.WriteHeader(http.StatusNoContent) + if verbose != "" && verbose != "0" && verbose != "false" { + res.WriteHeader(http.StatusOK) + b, _ := json.Marshal(map[string]string{"version": "1.0"}) // based on header set above + res.Write(b) + } else { + res.WriteHeader(http.StatusNoContent) + } default: defer h.NotFoundsServed.Incr(1) // Don't know how to respond to calls to other endpoints diff --git a/plugins/inputs/phpfpm/README.md b/plugins/inputs/phpfpm/README.md index 531edae241f32..e2f4e0c2ff574 100644 --- a/plugins/inputs/phpfpm/README.md +++ b/plugins/inputs/phpfpm/README.md @@ -27,6 +27,16 @@ Get phpfpm stats using either HTTP status page or fpm socket. ## Example of multiple gathering from local socket and remote host ## urls = ["http://192.168.1.20/status", "/tmp/fpm.sock"] urls = ["http://localhost/status"] + + ## Duration allowed to complete HTTP requests. + # timeout = "5s" + + ## Optional TLS Config + # tls_ca = "/etc/telegraf/ca.pem" + # tls_cert = "/etc/telegraf/cert.pem" + # tls_key = "/etc/telegraf/key.pem" + ## Use TLS but skip chain & host verification + # insecure_skip_verify = false ``` When using `unixsocket`, you have to ensure that telegraf runs on same diff --git a/plugins/inputs/phpfpm/phpfpm.go b/plugins/inputs/phpfpm/phpfpm.go index e40dae174fa83..ed205e6e70775 100644 --- a/plugins/inputs/phpfpm/phpfpm.go +++ b/plugins/inputs/phpfpm/phpfpm.go @@ -13,6 +13,8 @@ import ( "sync" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" + "github.com/influxdata/telegraf/internal/tls" "github.com/influxdata/telegraf/plugins/inputs" ) @@ -35,7 +37,9 @@ type metric map[string]int64 type poolStat map[string]metric type phpfpm struct { - Urls []string + Urls []string + Timeout internal.Duration + tls.ClientConfig client *http.Client } @@ -58,9 +62,19 @@ var sampleConfig = ` ## "fcgi://10.0.0.12:9000/status" ## "cgi://10.0.10.12:9001/status" ## - ## Example of multiple gathering from local socket and remove host + ## Example of multiple gathering from local socket and remote host ## urls = ["http://192.168.1.20/status", "/tmp/fpm.sock"] urls = ["http://localhost/status"] + + ## Duration allowed to complete HTTP requests. + # timeout = "5s" + + ## Optional TLS Config + # tls_ca = "/etc/telegraf/ca.pem" + # tls_cert = "/etc/telegraf/cert.pem" + # tls_key = "/etc/telegraf/key.pem" + ## Use TLS but skip chain & host verification + # insecure_skip_verify = false ` func (r *phpfpm) SampleConfig() string { @@ -96,8 +110,17 @@ func (g *phpfpm) Gather(acc telegraf.Accumulator) error { // Request status page to get stat raw data and import it func (g *phpfpm) gatherServer(addr string, acc telegraf.Accumulator) error { if g.client == nil { - client := &http.Client{} - g.client = client + tlsCfg, err := g.ClientConfig.TLSConfig() + if err != nil { + return err + } + tr := &http.Transport{ + TLSClientConfig: tlsCfg, + } + g.client = &http.Client{ + Transport: tr, + Timeout: g.Timeout.Duration, + } } if strings.HasPrefix(addr, "http://") || strings.HasPrefix(addr, "https://") { diff --git a/plugins/inputs/ping/README.md b/plugins/inputs/ping/README.md index f59a6c9470d04..5d3904e929c38 100644 --- a/plugins/inputs/ping/README.md +++ b/plugins/inputs/ping/README.md @@ -2,8 +2,9 @@ Sends a ping message by executing the system ping command and reports the results. -Currently there is no support for GNU Inetutils, use with iputils-ping -instead: +Most ping command implementations are supported, one notable exception being +that there is currently no support for GNU Inetutils ping. You may instead +use the iputils-ping implementation: ``` apt-get install iputils-ping ``` diff --git a/plugins/inputs/procstat/README.md b/plugins/inputs/procstat/README.md index 0dd631b051271..dfe95291abd21 100644 --- a/plugins/inputs/procstat/README.md +++ b/plugins/inputs/procstat/README.md @@ -41,6 +41,9 @@ Processes can be selected for monitoring using one of several methods: ## Field name prefix # prefix = "" + ## When true add the full cmdline as a tag. + # cmdline_tag = false + ## Add PID as a tag instead of a field; useful to differentiate between ## processes whose tags are otherwise the same. Can create a large number ## of series, use judiciously. @@ -72,6 +75,7 @@ implemented as a WMI query. The pattern allows fuzzy matching using only - procstat - tags: - pid (when `pid_tag` is true) + - cmdline (when 'cmdline_tag' is true) - process_name - pidfile (when defined) - exe (when defined) diff --git a/plugins/inputs/procstat/process.go b/plugins/inputs/procstat/process.go index 30e8f182f7515..94a57c1924765 100644 --- a/plugins/inputs/procstat/process.go +++ b/plugins/inputs/procstat/process.go @@ -15,6 +15,7 @@ type Process interface { IOCounters() (*process.IOCountersStat, error) MemoryInfo() (*process.MemoryInfoStat, error) Name() (string, error) + Cmdline() (string, error) NumCtxSwitches() (*process.NumCtxSwitchesStat, error) NumFDs() (int32, error) NumThreads() (int32, error) diff --git a/plugins/inputs/procstat/procstat.go b/plugins/inputs/procstat/procstat.go index 8424cd6741406..55552bb4af58a 100644 --- a/plugins/inputs/procstat/procstat.go +++ b/plugins/inputs/procstat/procstat.go @@ -27,6 +27,7 @@ type Procstat struct { Exe string Pattern string Prefix string + CmdLineTag bool `toml:"cmdline_tag"` ProcessName string User string SystemdUnit string @@ -65,6 +66,9 @@ var sampleConfig = ` ## Field name prefix # prefix = "" + ## When true add the full cmdline as a tag. + # cmdline_tag = false + ## Add PID as a tag instead of a field; useful to differentiate between ## processes whose tags are otherwise the same. Can create a large number ## of series, use judiciously. @@ -170,6 +174,16 @@ func (p *Procstat) addMetric(proc Process, acc telegraf.Accumulator) { fields["pid"] = int32(proc.PID()) } + //If cmd_line tag is true and it is not already set add cmdline as a tag + if p.CmdLineTag { + if _, ok := proc.Tags()["cmdline"]; !ok { + Cmdline, err := proc.Cmdline() + if err == nil { + proc.Tags()["cmdline"] = Cmdline + } + } + } + numThreads, err := proc.NumThreads() if err == nil { fields[prefix+"num_threads"] = numThreads diff --git a/plugins/inputs/procstat/procstat_test.go b/plugins/inputs/procstat/procstat_test.go index 7a2eaf9eee3a9..191c056ea9078 100644 --- a/plugins/inputs/procstat/procstat_test.go +++ b/plugins/inputs/procstat/procstat_test.go @@ -76,6 +76,10 @@ func (pg *testPgrep) PidFile(path string) ([]PID, error) { return pg.pids, pg.err } +func (p *testProc) Cmdline() (string, error) { + return "test_proc", nil +} + func (pg *testPgrep) Pattern(pattern string) ([]PID, error) { return pg.pids, pg.err } diff --git a/plugins/inputs/prometheus/README.md b/plugins/inputs/prometheus/README.md index 9208f54bedff3..c1f50bb966d30 100644 --- a/plugins/inputs/prometheus/README.md +++ b/plugins/inputs/prometheus/README.md @@ -24,6 +24,9 @@ in Prometheus format. ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation. ## - prometheus.io/port: If port is not 9102 use this annotation # monitor_kubernetes_pods = true + ## Restricts Kubernetes monitoring to a single namespace + ## ex: monitor_kubernetes_pods_namespace = "default" + # monitor_kubernetes_pods_namespace = "" ## Use bearer token for authorization. ('bearer_token' takes priority) # bearer_token = "/path/to/bearer/token" @@ -64,6 +67,8 @@ Currently the following annotation are supported: * `prometheus.io/path` Override the path for the metrics endpoint on the service. (default '/metrics') * `prometheus.io/port` Used to override the port. (default 9102) +Using the `monitor_kubernetes_pods_namespace` option allows you to limit which pods you are scraping. + #### Bearer Token If set, the file specified by the `bearer_token` parameter will be read on diff --git a/plugins/inputs/prometheus/kubernetes.go b/plugins/inputs/prometheus/kubernetes.go index 87db15ffe0539..d92d90ead72fc 100644 --- a/plugins/inputs/prometheus/kubernetes.go +++ b/plugins/inputs/prometheus/kubernetes.go @@ -14,7 +14,7 @@ import ( "github.com/ericchiang/k8s" corev1 "github.com/ericchiang/k8s/apis/core/v1" - "gopkg.in/yaml.v2" + "github.com/ghodss/yaml" ) type payload struct { @@ -83,7 +83,7 @@ func (p *Prometheus) start(ctx context.Context) error { // directed to do so by K8s. func (p *Prometheus) watch(ctx context.Context, client *k8s.Client) error { pod := &corev1.Pod{} - watcher, err := client.Watch(ctx, "", &corev1.Pod{}) + watcher, err := client.Watch(ctx, p.PodNamespace, &corev1.Pod{}) if err != nil { return err } diff --git a/plugins/inputs/prometheus/prometheus.go b/plugins/inputs/prometheus/prometheus.go index 879af456751af..a4409c5b037d0 100644 --- a/plugins/inputs/prometheus/prometheus.go +++ b/plugins/inputs/prometheus/prometheus.go @@ -41,7 +41,8 @@ type Prometheus struct { client *http.Client // Should we scrape Kubernetes services for prometheus annotations - MonitorPods bool `toml:"monitor_kubernetes_pods"` + MonitorPods bool `toml:"monitor_kubernetes_pods"` + PodNamespace string `toml:"monitor_kubernetes_pods_namespace"` lock sync.Mutex kubernetesPods map[string]URLAndAddress cancel context.CancelFunc @@ -65,6 +66,9 @@ var sampleConfig = ` ## - prometheus.io/path: If the metrics path is not /metrics, define it with this annotation. ## - prometheus.io/port: If port is not 9102 use this annotation # monitor_kubernetes_pods = true + ## Restricts Kubernetes monitoring to a single namespace + ## ex: monitor_kubernetes_pods_namespace = "default" + # monitor_kubernetes_pods_namespace = "" ## Use bearer token for authorization. ('bearer_token' takes priority) # bearer_token = "/path/to/bearer/token" diff --git a/plugins/inputs/statsd/statsd.go b/plugins/inputs/statsd/statsd.go index 6b0dd0b7883ba..8b5e15502d20f 100644 --- a/plugins/inputs/statsd/statsd.go +++ b/plugins/inputs/statsd/statsd.go @@ -13,11 +13,10 @@ import ( "sync" "time" - "github.com/influxdata/telegraf/plugins/parsers/graphite" - "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/inputs" + "github.com/influxdata/telegraf/plugins/parsers/graphite" "github.com/influxdata/telegraf/selfstat" ) @@ -338,38 +337,64 @@ func (s *Statsd) Start(_ telegraf.Accumulator) error { s.MetricSeparator = defaultSeparator } - s.wg.Add(2) - // Start the UDP listener if s.isUDP() { - go s.udpListen() + address, err := net.ResolveUDPAddr(s.Protocol, s.ServiceAddress) + if err != nil { + return err + } + + conn, err := net.ListenUDP(s.Protocol, address) + if err != nil { + return err + } + + log.Println("I! Statsd UDP listener listening on: ", conn.LocalAddr().String()) + s.UDPlistener = conn + + s.wg.Add(1) + go func() { + defer s.wg.Done() + s.udpListen(conn) + }() } else { - go s.tcpListen() + address, err := net.ResolveTCPAddr("tcp", s.ServiceAddress) + if err != nil { + return err + } + listener, err := net.ListenTCP("tcp", address) + if err != nil { + return err + } + + log.Println("I! TCP Statsd listening on: ", listener.Addr().String()) + s.TCPlistener = listener + + s.wg.Add(1) + go func() { + defer s.wg.Done() + s.tcpListen(listener) + }() } + // Start the line parser - go s.parser() + s.wg.Add(1) + go func() { + defer s.wg.Done() + s.parser() + }() log.Printf("I! Started the statsd service on %s\n", s.ServiceAddress) return nil } // tcpListen() starts listening for udp packets on the configured port. -func (s *Statsd) tcpListen() error { - defer s.wg.Done() - // Start listener - var err error - address, _ := net.ResolveTCPAddr("tcp", s.ServiceAddress) - s.TCPlistener, err = net.ListenTCP("tcp", address) - if err != nil { - log.Fatalf("ERROR: ListenTCP - %s", err) - return err - } - log.Println("I! TCP Statsd listening on: ", s.TCPlistener.Addr().String()) +func (s *Statsd) tcpListen(listener *net.TCPListener) error { for { select { case <-s.done: return nil default: // Accept connection: - conn, err := s.TCPlistener.AcceptTCP() + conn, err := listener.AcceptTCP() if err != nil { return err } @@ -403,16 +428,7 @@ func (s *Statsd) tcpListen() error { } // udpListen starts listening for udp packets on the configured port. -func (s *Statsd) udpListen() error { - defer s.wg.Done() - var err error - address, _ := net.ResolveUDPAddr(s.Protocol, s.ServiceAddress) - s.UDPlistener, err = net.ListenUDP(s.Protocol, address) - if err != nil { - log.Fatalf("ERROR: ListenUDP - %s", err) - } - log.Println("I! Statsd UDP listener listening on: ", s.UDPlistener.LocalAddr().String()) - +func (s *Statsd) udpListen(conn *net.UDPConn) error { if s.ReadBufferSize > 0 { s.UDPlistener.SetReadBuffer(s.ReadBufferSize) } @@ -423,7 +439,7 @@ func (s *Statsd) udpListen() error { case <-s.done: return nil default: - n, _, err := s.UDPlistener.ReadFromUDP(buf) + n, _, err := conn.ReadFromUDP(buf) if err != nil && !strings.Contains(err.Error(), "closed network") { log.Printf("E! Error READ: %s\n", err.Error()) continue @@ -448,7 +464,6 @@ func (s *Statsd) udpListen() error { // packet into statsd strings and then calls parseStatsdLine, which parses a // single statsd metric into a struct. func (s *Statsd) parser() error { - defer s.wg.Done() for { select { case <-s.done: diff --git a/plugins/inputs/system/README.md b/plugins/inputs/system/README.md index bea9bd2d95be0..efaa8a17fa5f6 100644 --- a/plugins/inputs/system/README.md +++ b/plugins/inputs/system/README.md @@ -25,7 +25,7 @@ the `telegraf` user to be added to the `utmp` group on some systems. - n_users (integer) - n_cpus (integer) - uptime (integer, seconds) - - uptime_format (string) + - uptime_format (string, deprecated in 1.10, use `uptime` field) ### Example Output: diff --git a/plugins/inputs/system/system.go b/plugins/inputs/system/system.go index ad17c56ed06ab..5c68870bbc1a2 100644 --- a/plugins/inputs/system/system.go +++ b/plugins/inputs/system/system.go @@ -9,11 +9,10 @@ import ( "strings" "time" - "github.com/shirou/gopsutil/host" - "github.com/shirou/gopsutil/load" - "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/plugins/inputs" + "github.com/shirou/gopsutil/host" + "github.com/shirou/gopsutil/load" ) type SystemStats struct{} @@ -22,7 +21,12 @@ func (_ *SystemStats) Description() string { return "Read metrics about system load & uptime" } -func (_ *SystemStats) SampleConfig() string { return "" } +func (_ *SystemStats) SampleConfig() string { + return ` + ## Uncomment to remove deprecated metrics. + # fielddrop = ["uptime_format"] +` +} func (_ *SystemStats) Gather(acc telegraf.Accumulator) error { loadavg, err := load.Avg() @@ -47,22 +51,22 @@ func (_ *SystemStats) Gather(acc telegraf.Accumulator) error { now := time.Now() acc.AddGauge("system", fields, nil, now) - hostinfo, err := host.Info() + uptime, err := host.Uptime() if err != nil { return err } acc.AddCounter("system", map[string]interface{}{ - "uptime": hostinfo.Uptime, + "uptime": uptime, }, nil, now) acc.AddFields("system", map[string]interface{}{ - "uptime_format": format_uptime(hostinfo.Uptime), + "uptime_format": formatUptime(uptime), }, nil, now) return nil } -func format_uptime(uptime uint64) string { +func formatUptime(uptime uint64) string { buf := new(bytes.Buffer) w := bufio.NewWriter(buf) diff --git a/plugins/inputs/vsphere/vsphere_test.go b/plugins/inputs/vsphere/vsphere_test.go index eff56a89d2bc1..73956b5426cfe 100644 --- a/plugins/inputs/vsphere/vsphere_test.go +++ b/plugins/inputs/vsphere/vsphere_test.go @@ -25,83 +25,19 @@ import ( ) var configHeader = ` -# Telegraf Configuration -# -# Telegraf is entirely plugin driven. All metrics are gathered from the -# declared inputs, and sent to the declared outputs. -# -# Plugins must be declared in here to be active. -# To deactivate a plugin, comment out the name and any variables. -# -# Use 'telegraf -config telegraf.conf -test' to see what metrics a config -# file would generate. -# -# Environment variables can be used anywhere in this config file, simply prepend -# them with $. For strings the variable must be within quotes (ie, "$STR_VAR"), -# for numbers and booleans they should be plain (ie, $INT_VAR, $BOOL_VAR) - - -# Global tags can be specified here in key="value" format. -[global_tags] - # dc = "us-east-1" # will tag all metrics with dc=us-east-1 - # rack = "1a" - ## Environment variables can be used as tags, and throughout the config file - # user = "$USER" - - -# Configuration for telegraf agent [agent] - ## Default data collection interval for all inputs interval = "10s" - ## Rounds collection interval to 'interval' - ## ie, if interval="10s" then always collect on :00, :10, :20, etc. round_interval = true - - ## Telegraf will send metrics to outputs in batches of at most - ## metric_batch_size metrics. - ## This controls the size of writes that Telegraf sends to output plugins. metric_batch_size = 1000 - - ## For failed writes, telegraf will cache metric_buffer_limit metrics for each - ## output, and will flush this buffer on a successful write. Oldest metrics - ## are dropped first when this buffer fills. - ## This buffer only fills when writes fail to output plugin(s). metric_buffer_limit = 10000 - - ## Collection jitter is used to jitter the collection by a random amount. - ## Each plugin will sleep for a random time within jitter before collecting. - ## This can be used to avoid many plugins querying things like sysfs at the - ## same time, which can have a measurable effect on the system. collection_jitter = "0s" - - ## Default flushing interval for all outputs. You shouldn't set this below - ## interval. Maximum flush_interval will be flush_interval + flush_jitter flush_interval = "10s" - ## Jitter the flush interval by a random amount. This is primarily to avoid - ## large write spikes for users running a large number of telegraf instances. - ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s flush_jitter = "0s" - - ## By default or when set to "0s", precision will be set to the same - ## timestamp order as the collection interval, with the maximum being 1s. - ## ie, when interval = "10s", precision will be "1s" - ## when interval = "250ms", precision will be "1ms" - ## Precision will NOT be used for service inputs. It is up to each individual - ## service input to set the timestamp at the appropriate precision. - ## Valid time units are "ns", "us" (or "µs"), "ms", "s". precision = "" - - ## Logging configuration: - ## Run telegraf with debug log messages. debug = false - ## Run telegraf in quiet mode (error log messages only). quiet = false - ## Specify the log file name. The empty string means to log to stderr. logfile = "" - - ## Override default hostname, if empty use os.Hostname() hostname = "" - ## If set to true, do no set the "host" tag in the telegraf agent. omit_hostname = false ` diff --git a/plugins/outputs/file/file.go b/plugins/outputs/file/file.go index 0bbff2f6400d0..0ef61df519202 100644 --- a/plugins/outputs/file/file.go +++ b/plugins/outputs/file/file.go @@ -43,17 +43,11 @@ func (f *File) Connect() error { if file == "stdout" { f.writers = append(f.writers, os.Stdout) } else { - var of *os.File - var err error - if _, err := os.Stat(file); os.IsNotExist(err) { - of, err = os.Create(file) - } else { - of, err = os.OpenFile(file, os.O_APPEND|os.O_WRONLY, os.ModeAppend) - } - + of, err := os.OpenFile(file, os.O_CREATE|os.O_APPEND|os.O_WRONLY, os.ModeAppend|0644) if err != nil { return err } + f.writers = append(f.writers, of) f.closers = append(f.closers, of) } @@ -62,16 +56,14 @@ func (f *File) Connect() error { } func (f *File) Close() error { - var errS string + var err error for _, c := range f.closers { - if err := c.Close(); err != nil { - errS += err.Error() + "\n" + errClose := c.Close() + if errClose != nil { + err = errClose } } - if errS != "" { - return fmt.Errorf(errS) - } - return nil + return err } func (f *File) SampleConfig() string { diff --git a/plugins/outputs/influxdb_v2/influxdb.go b/plugins/outputs/influxdb_v2/influxdb.go index d0d6800a6fa54..dca02b0cb33d9 100644 --- a/plugins/outputs/influxdb_v2/influxdb.go +++ b/plugins/outputs/influxdb_v2/influxdb.go @@ -162,7 +162,7 @@ func (i *InfluxDB) Write(metrics []telegraf.Metric) error { return nil } - log.Printf("E! [outputs.influxdb] when writing to [%s]: %v", client.URL(), err) + log.Printf("E! [outputs.influxdb_v2] when writing to [%s]: %v", client.URL(), err) } return errors.New("could not write any address") diff --git a/plugins/outputs/prometheus_client/prometheus_client.go b/plugins/outputs/prometheus_client/prometheus_client.go index db7b0c2077529..32dcdbb891f14 100644 --- a/plugins/outputs/prometheus_client/prometheus_client.go +++ b/plugins/outputs/prometheus_client/prometheus_client.go @@ -24,7 +24,10 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" ) -var invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) +var ( + invalidNameCharRE = regexp.MustCompile(`[^a-zA-Z0-9_:]`) + validNameCharRE = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]*`) +) // SampleID uniquely identifies a Sample type SampleID string @@ -343,6 +346,10 @@ func sanitize(value string) string { return invalidNameCharRE.ReplaceAllString(value, "_") } +func isValidTagName(tag string) bool { + return validNameCharRE.MatchString(tag) +} + func getPromValueType(tt telegraf.ValueType) prometheus.ValueType { switch tt { case telegraf.Counter: @@ -414,7 +421,11 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { labels := make(map[string]string) for k, v := range tags { - labels[sanitize(k)] = v + tName := sanitize(k) + if !isValidTagName(tName) { + continue + } + labels[tName] = v } // Prometheus doesn't have a string value type, so convert string @@ -423,7 +434,11 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { for fn, fv := range point.Fields() { switch fv := fv.(type) { case string: - labels[sanitize(fn)] = fv + tName := sanitize(fn) + if !isValidTagName(tName) { + continue + } + labels[tName] = fv } } } @@ -469,6 +484,10 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { } mname = sanitize(point.Name()) + if !isValidTagName(mname) { + continue + } + p.addMetricFamily(point, sample, mname, sampleID) case telegraf.Histogram: @@ -511,6 +530,10 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { } mname = sanitize(point.Name()) + if !isValidTagName(mname) { + continue + } + p.addMetricFamily(point, sample, mname, sampleID) default: @@ -555,7 +578,9 @@ func (p *PrometheusClient) Write(metrics []telegraf.Metric) error { mname = sanitize(fmt.Sprintf("%s_%s", point.Name(), fn)) } } - + if !isValidTagName(mname) { + continue + } p.addMetricFamily(point, sample, mname, sampleID) } diff --git a/plugins/outputs/prometheus_client/prometheus_client_test.go b/plugins/outputs/prometheus_client/prometheus_client_test.go index b6bbe35fde34f..211e24030dc56 100644 --- a/plugins/outputs/prometheus_client/prometheus_client_test.go +++ b/plugins/outputs/prometheus_client/prometheus_client_test.go @@ -186,15 +186,15 @@ func TestWrite_Sanitize(t *testing.T) { client := NewClient() p1, err := metric.New( - "foo.bar", + "foo.bar:colon", map[string]string{"tag-with-dash": "localhost.local"}, - map[string]interface{}{"field-with-dash": 42}, + map[string]interface{}{"field-with-dash-and:colon": 42}, time.Now(), telegraf.Counter) err = client.Write([]telegraf.Metric{p1}) require.NoError(t, err) - fam, ok := client.fam["foo_bar_field_with_dash"] + fam, ok := client.fam["foo_bar:colon_field_with_dash_and:colon"] require.True(t, ok) require.Equal(t, map[string]int{"tag_with_dash": 1}, fam.LabelSet) diff --git a/plugins/outputs/wavefront/README.md b/plugins/outputs/wavefront/README.md index bc2156b13b59a..71a760900e58a 100644 --- a/plugins/outputs/wavefront/README.md +++ b/plugins/outputs/wavefront/README.md @@ -32,6 +32,10 @@ This plugin writes to a [Wavefront](https://www.wavefront.com) proxy, in Wavefro ## When true will convert all _ (underscore) characters in final metric name. default is true #convert_paths = true + ## Use Strict rules to sanitize metric and tag names from invalid characters + ## When enabled forward slash (/) and comma (,) will be accpeted + #use_strict = false + ## Use Regex to sanitize metric and tag names from invalid characters ## Regex is more thorough, but significantly slower. default is false #use_regex = false diff --git a/plugins/outputs/wavefront/wavefront.go b/plugins/outputs/wavefront/wavefront.go index 257c5512e8c1b..65666d627cacc 100644 --- a/plugins/outputs/wavefront/wavefront.go +++ b/plugins/outputs/wavefront/wavefront.go @@ -22,6 +22,7 @@ type Wavefront struct { ConvertPaths bool ConvertBool bool UseRegex bool + UseStrict bool SourceOverride []string StringToNumber map[string][]map[string]float64 @@ -37,6 +38,14 @@ var sanitizedChars = strings.NewReplacer( "=", "-", ) +// catch many of the invalid chars that could appear in a metric or tag name +var strictSanitizedChars = strings.NewReplacer( + "!", "-", "@", "-", "#", "-", "$", "-", "%", "-", "^", "-", "&", "-", + "*", "-", "(", "-", ")", "-", "+", "-", "`", "-", "'", "-", "\"", "-", + "[", "-", "]", "-", "{", "-", "}", "-", ":", "-", ";", "-", "<", "-", + ">", "-", "?", "-", "\\", "-", "|", "-", " ", "-", "=", "-", +) + // instead of Replacer which may miss some special characters we can use a regex pattern, but this is significantly slower than Replacer var sanitizedRegex = regexp.MustCompile("[^a-zA-Z\\d_.-]") @@ -71,6 +80,10 @@ var sampleConfig = ` ## When true will convert all _ (underscore) characters in final metric name. default is true #convert_paths = true + ## Use Strict rules to sanitize metric and tag names from invalid characters + ## When enabled forward slash (/) and comma (,) will be accpeted + #use_strict = false + ## Use Regex to sanitize metric and tag names from invalid characters ## Regex is more thorough, but significantly slower. default is false #use_regex = false @@ -163,6 +176,8 @@ func buildMetrics(m telegraf.Metric, w *Wavefront) []*MetricPoint { if w.UseRegex { name = sanitizedRegex.ReplaceAllLiteralString(name, "-") + } else if w.UseStrict { + name = strictSanitizedChars.Replace(name) } else { name = sanitizedChars.Replace(name) } @@ -238,6 +253,8 @@ func buildTags(mTags map[string]string, w *Wavefront) (string, map[string]string var key string if w.UseRegex { key = sanitizedRegex.ReplaceAllLiteralString(k, "-") + } else if w.UseStrict { + key = strictSanitizedChars.Replace(k) } else { key = sanitizedChars.Replace(k) } diff --git a/plugins/outputs/wavefront/wavefront_test.go b/plugins/outputs/wavefront/wavefront_test.go index 1fda6c7ae0b73..776c3698f8cdd 100644 --- a/plugins/outputs/wavefront/wavefront_test.go +++ b/plugins/outputs/wavefront/wavefront_test.go @@ -50,6 +50,13 @@ func TestBuildMetrics(t *testing.T) { {Metric: w.Prefix + "testing.metric2", Value: 1, Timestamp: timestamp, Tags: map[string]string{"tag1": "value1"}}, }, }, + { + testutil.TestMetric(float64(1), "testing_just/another,metric:float", "metric2"), + []MetricPoint{ + {Metric: w.Prefix + "testing.just-another-metric-float", Value: 1, Timestamp: timestamp, Tags: map[string]string{"tag1": "value1"}}, + {Metric: w.Prefix + "testing.metric2", Value: 1, Timestamp: timestamp, Tags: map[string]string{"tag1": "value1"}}, + }, + }, { testMetric1, []MetricPoint{{Metric: w.Prefix + "test.simple.metric", Value: 123, Timestamp: timestamp, Source: "testHost", Tags: map[string]string{"tag1": "value1"}}}, @@ -67,6 +74,46 @@ func TestBuildMetrics(t *testing.T) { } +func TestBuildMetricsStrict(t *testing.T) { + w := defaultWavefront() + w.Prefix = "testthis." + w.UseStrict = true + + pathReplacer = strings.NewReplacer("_", w.MetricSeparator) + + var timestamp int64 = 1257894000 + + var metricTests = []struct { + metric telegraf.Metric + metricPoints []MetricPoint + }{ + { + testutil.TestMetric(float64(1), "testing_just*a%metric:float", "metric2"), + []MetricPoint{ + {Metric: w.Prefix + "testing.just-a-metric-float", Value: 1, Timestamp: timestamp, Tags: map[string]string{"tag1": "value1"}}, + {Metric: w.Prefix + "testing.metric2", Value: 1, Timestamp: timestamp, Tags: map[string]string{"tag1": "value1"}}, + }, + }, + { + testutil.TestMetric(float64(1), "testing_just/another,metric:float", "metric2"), + []MetricPoint{ + {Metric: w.Prefix + "testing.just/another,metric-float", Value: 1, Timestamp: timestamp, Tags: map[string]string{"tag/1": "value1", "tag,2": "value2"}}, + {Metric: w.Prefix + "testing.metric2", Value: 1, Timestamp: timestamp, Tags: map[string]string{"tag/1": "value1", "tag,2": "value2"}}, + }, + }, + } + + for _, mt := range metricTests { + ml := buildMetrics(mt.metric, w) + for i, line := range ml { + if mt.metricPoints[i].Metric != line.Metric || mt.metricPoints[i].Value != line.Value { + t.Errorf("\nexpected\t%+v %+v\nreceived\t%+v %+v\n", mt.metricPoints[i].Metric, mt.metricPoints[i].Value, line.Metric, line.Value) + } + } + } + +} + func TestBuildMetricsWithSimpleFields(t *testing.T) { w := defaultWavefront() w.Prefix = "testthis." diff --git a/plugins/parsers/csv/parser.go b/plugins/parsers/csv/parser.go index 5f4fcc640a6f1..f8bf93e702536 100644 --- a/plugins/parsers/csv/parser.go +++ b/plugins/parsers/csv/parser.go @@ -204,7 +204,7 @@ outer: // will default to plugin name measurementName := p.MetricName - if recordFields[p.MeasurementColumn] != nil { + if recordFields[p.MeasurementColumn] != nil && recordFields[p.MeasurementColumn] != "" { measurementName = fmt.Sprintf("%v", recordFields[p.MeasurementColumn]) } diff --git a/plugins/parsers/grok/README.md b/plugins/parsers/grok/README.md index a694735de36fc..6263eecc91050 100644 --- a/plugins/parsers/grok/README.md +++ b/plugins/parsers/grok/README.md @@ -59,11 +59,15 @@ To match a comma decimal point you can use a period. For example `%{TIMESTAMP:t To match a comma decimal point you can use a period in the pattern string. See https://golang.org/pkg/time/#Parse for more details. -Telegraf has many of its own [built-in patterns](/plugins/parsers/grok/influx_patterns.go), -as well as support for most of -[logstash's builtin patterns](https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns). -_Golang regular expressions do not support lookahead or lookbehind. -logstash patterns that depend on these are not supported._ +Telegraf has many of its own [built-in patterns][] as well as support for most +of the Logstash builtin patterns using [these Go compatible patterns][grok-patterns]. + +**Note:** Golang regular expressions do not support lookahead or lookbehind. +Logstash patterns that use these features may not be supported, or may use a Go +friendly pattern that is not fully compatible with the Logstash pattern. + +[built-in patterns]: /plugins/parsers/grok/influx_patterns.go +[grok-patterns]: https://github.com/vjeantet/grok/blob/master/patterns/grok-patterns If you need help building patterns to match your logs, you will find the https://grokdebug.herokuapp.com application quite useful! diff --git a/plugins/parsers/grok/influx_patterns.go b/plugins/parsers/grok/influx_patterns.go index b7853c742a5e8..282c28111b14c 100644 --- a/plugins/parsers/grok/influx_patterns.go +++ b/plugins/parsers/grok/influx_patterns.go @@ -1,45 +1,6 @@ package grok const DEFAULT_PATTERNS = ` -# Captures are a slightly modified version of logstash "grok" patterns, with -# the format %{[:][:]} -# By default all named captures are converted into string fields. -# If a pattern does not have a semantic name it will not be captured. -# Modifiers can be used to convert captures to other types or tags. -# Timestamp modifiers can be used to convert captures to the timestamp of the -# parsed metric. - -# View logstash grok pattern docs here: -# https://www.elastic.co/guide/en/logstash/current/plugins-filters-grok.html -# All default logstash patterns are supported, these can be viewed here: -# https://github.com/logstash-plugins/logstash-patterns-core/blob/master/patterns/grok-patterns - -# Available modifiers: -# string (default if nothing is specified) -# int -# float -# duration (ie, 5.23ms gets converted to int nanoseconds) -# tag (converts the field into a tag) -# drop (drops the field completely) -# Timestamp modifiers: -# ts-ansic ("Mon Jan _2 15:04:05 2006") -# ts-unix ("Mon Jan _2 15:04:05 MST 2006") -# ts-ruby ("Mon Jan 02 15:04:05 -0700 2006") -# ts-rfc822 ("02 Jan 06 15:04 MST") -# ts-rfc822z ("02 Jan 06 15:04 -0700") -# ts-rfc850 ("Monday, 02-Jan-06 15:04:05 MST") -# ts-rfc1123 ("Mon, 02 Jan 2006 15:04:05 MST") -# ts-rfc1123z ("Mon, 02 Jan 2006 15:04:05 -0700") -# ts-rfc3339 ("2006-01-02T15:04:05Z07:00") -# ts-rfc3339nano ("2006-01-02T15:04:05.999999999Z07:00") -# ts-httpd ("02/Jan/2006:15:04:05 -0700") -# ts-epoch (seconds since unix epoch) -# ts-epochnano (nanoseconds since unix epoch) -# ts-"CUSTOM" -# CUSTOM time layouts must be within quotes and be the representation of the -# "reference time", which is Mon Jan 2 15:04:05 -0700 MST 2006 -# See https://golang.org/pkg/time/#Parse for more details. - # Example log file pattern, example log looks like this: # [04/Jun/2016:12:41:45 +0100] 1.25 200 192.168.1.1 5.432µs # Breakdown of the DURATION pattern below: @@ -69,7 +30,7 @@ COMMON_LOG_FORMAT %{CLIENT:client_ip} %{NOTSPACE:ident} %{NOTSPACE:auth} \[%{HTT # Combined log format is the same as the common log format but with the addition # of two quoted strings at the end for "referrer" and "agent" # See Examples at http://httpd.apache.org/docs/current/mod/mod_log_config.html -COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} %{QS:referrer} %{QS:agent} +COMBINED_LOG_FORMAT %{COMMON_LOG_FORMAT} "%{DATA:referrer}" "%{DATA:agent}" # HTTPD log formats HTTPD20_ERRORLOG \[%{HTTPDERROR_DATE:timestamp}\] \[%{LOGLEVEL:loglevel:tag}\] (?:\[client %{IPORHOST:clientip}\] ){0,1}%{GREEDYDATA:errormsg} diff --git a/plugins/parsers/grok/parser.go b/plugins/parsers/grok/parser.go index 5984e288e9363..cecb69f94cc43 100644 --- a/plugins/parsers/grok/parser.go +++ b/plugins/parsers/grok/parser.go @@ -271,7 +271,7 @@ func (p *Parser) ParseLine(line string) (telegraf.Metric, error) { case TAG: tags[k] = v case STRING: - fields[k] = strings.Trim(v, `"`) + fields[k] = v case EPOCH: parts := strings.SplitN(v, ".", 2) if len(parts) == 0 { diff --git a/plugins/parsers/grok/parser_test.go b/plugins/parsers/grok/parser_test.go index 23af0af44032f..2b8815264d854 100644 --- a/plugins/parsers/grok/parser_test.go +++ b/plugins/parsers/grok/parser_test.go @@ -1047,3 +1047,24 @@ func TestEmptyYearInTimestamp(t *testing.T) { require.NotNil(t, m) require.Equal(t, time.Now().Year(), m.Time().Year()) } + +func TestTrimRegression(t *testing.T) { + // https://github.com/influxdata/telegraf/issues/4998 + p := &Parser{ + Patterns: []string{`%{GREEDYDATA:message:string}`}, + } + require.NoError(t, p.Compile()) + + actual, err := p.ParseLine(`level=info msg="ok"`) + require.NoError(t, err) + + expected := testutil.MustMetric( + "", + map[string]string{}, + map[string]interface{}{ + "message": `level=info msg="ok"`, + }, + actual.Time(), + ) + require.Equal(t, expected, actual) +} diff --git a/plugins/parsers/influx/handler.go b/plugins/parsers/influx/handler.go index c488a9c98b945..928671cc92791 100644 --- a/plugins/parsers/influx/handler.go +++ b/plugins/parsers/influx/handler.go @@ -118,3 +118,7 @@ func (h *MetricHandler) SetTimestamp(tm []byte) error { h.builder.SetTime(time.Unix(0, ns)) return nil } + +func (h *MetricHandler) Reset() { + h.builder.Reset() +} diff --git a/plugins/parsers/influx/parser.go b/plugins/parsers/influx/parser.go index 8b9272b695aa0..f1cd9a0325ecd 100644 --- a/plugins/parsers/influx/parser.go +++ b/plugins/parsers/influx/parser.go @@ -75,6 +75,7 @@ func (p *Parser) Parse(input []byte) ([]telegraf.Metric, error) { } if err != nil { + p.handler.Reset() return nil, &ParseError{ Offset: p.machine.Position(), LineOffset: p.machine.LineOffset(), @@ -87,6 +88,7 @@ func (p *Parser) Parse(input []byte) ([]telegraf.Metric, error) { metric, err := p.handler.Metric() if err != nil { + p.handler.Reset() return nil, err } diff --git a/plugins/parsers/logfmt/README.md b/plugins/parsers/logfmt/README.md index fb3a565b329eb..d3e8ab66f534f 100644 --- a/plugins/parsers/logfmt/README.md +++ b/plugins/parsers/logfmt/README.md @@ -15,10 +15,6 @@ The `logfmt` data format parses data in [logfmt] format. ## more about them here: ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md data_format = "logfmt" - - ## Set the name of the created metric, if unset the name of the plugin will - ## be used. - metric_name = "logfmt" ``` ### Metrics diff --git a/plugins/parsers/nagios/parser.go b/plugins/parsers/nagios/parser.go index 858f5082c8c99..e4058852bf2e2 100644 --- a/plugins/parsers/nagios/parser.go +++ b/plugins/parsers/nagios/parser.go @@ -1,17 +1,78 @@ package nagios import ( + "bufio" + "bytes" "errors" + "fmt" "log" + "os/exec" "regexp" "strconv" "strings" + "syscall" "time" "github.com/influxdata/telegraf" "github.com/influxdata/telegraf/metric" ) +// getExitCode get the exit code from an error value which is the result +// of running a command through exec package api. +func getExitCode(err error) (int, error) { + if err == nil { + return 0, nil + } + + ee, ok := err.(*exec.ExitError) + if !ok { + // If it is not an *exec.ExitError, then it must be + // an io error, but docs do not say anything about the + // exit code in this case. + return 0, errors.New("expected *exec.ExitError") + } + + ws, ok := ee.Sys().(syscall.WaitStatus) + if !ok { + return 0, errors.New("expected syscall.WaitStatus") + } + + return ws.ExitStatus(), nil +} + +// TryAddState attempts to add a state derived from the runErr. +// If any error occurs, it is guaranteed to be returned along with +// the initial metric slice. +func TryAddState(runErr error, metrics []telegraf.Metric) ([]telegraf.Metric, error) { + state, err := getExitCode(runErr) + if err != nil { + return metrics, fmt.Errorf("exec: get exit code: %s", err) + } + + for _, m := range metrics { + if m.Name() == "nagios_state" { + m.AddField("state", state) + return metrics, nil + } + } + + var ts time.Time + if len(metrics) != 0 { + ts = metrics[0].Time() + } else { + ts = time.Now().UTC() + } + f := map[string]interface{}{ + "state": state, + } + m, err := metric.New("nagios_state", nil, f, ts) + if err != nil { + return metrics, err + } + metrics = append(metrics, m) + return metrics, nil +} + type NagiosParser struct { MetricName string DefaultTags map[string]string @@ -34,27 +95,88 @@ func (p *NagiosParser) SetDefaultTags(tags map[string]string) { } func (p *NagiosParser) Parse(buf []byte) ([]telegraf.Metric, error) { + ts := time.Now().UTC() + + s := bufio.NewScanner(bytes.NewReader(buf)) + + var msg bytes.Buffer + var longmsg bytes.Buffer + metrics := make([]telegraf.Metric, 0) - lines := strings.Split(strings.TrimSpace(string(buf)), "\n") - for _, line := range lines { - data_splitted := strings.Split(line, "|") + // Scan the first line. + if !s.Scan() && s.Err() != nil { + return nil, s.Err() + } + parts := bytes.Split(s.Bytes(), []byte{'|'}) + switch len(parts) { + case 2: + ms, err := parsePerfData(string(parts[1]), ts) + if err != nil { + log.Printf("E! [parser.nagios] failed to parse performance data: %s\n", err.Error()) + } + metrics = append(metrics, ms...) + fallthrough + case 1: + msg.Write(bytes.TrimSpace(parts[0])) + default: + return nil, errors.New("illegal output format") + } - if len(data_splitted) != 2 { - // got human readable output only or bad line - continue + // Read long output. + for s.Scan() { + if bytes.Contains(s.Bytes(), []byte{'|'}) { + parts := bytes.Split(s.Bytes(), []byte{'|'}) + if longmsg.Len() != 0 { + longmsg.WriteByte('\n') + } + longmsg.Write(bytes.TrimSpace(parts[0])) + + ms, err := parsePerfData(string(parts[1]), ts) + if err != nil { + log.Printf("E! [parser.nagios] failed to parse performance data: %s\n", err.Error()) + } + metrics = append(metrics, ms...) + break + } + if longmsg.Len() != 0 { + longmsg.WriteByte('\n') } - m, err := parsePerfData(data_splitted[1]) + longmsg.Write(bytes.TrimSpace((s.Bytes()))) + } + + // Parse extra performance data. + for s.Scan() { + ms, err := parsePerfData(s.Text(), ts) if err != nil { log.Printf("E! [parser.nagios] failed to parse performance data: %s\n", err.Error()) - continue } - metrics = append(metrics, m...) + metrics = append(metrics, ms...) } + + if s.Err() != nil { + log.Printf("D! [parser.nagios] unexpected io error: %s\n", s.Err()) + } + + // Create nagios state. + fields := map[string]interface{}{ + "service_output": msg.String(), + } + if longmsg.Len() != 0 { + fields["long_service_output"] = longmsg.String() + } + + m, err := metric.New("nagios_state", nil, fields, ts) + if err == nil { + metrics = append(metrics, m) + } else { + log.Printf("E! [parser.nagios] failed to add nagios_state: %s\n", err) + } + return metrics, nil } -func parsePerfData(perfdatas string) ([]telegraf.Metric, error) { +func parsePerfData(perfdatas string, timestamp time.Time) ([]telegraf.Metric, error) { metrics := make([]telegraf.Metric, 0) for _, unParsedPerf := range perfSplitRegExp.FindAllString(perfdatas, -1) { @@ -125,7 +247,7 @@ func parsePerfData(perfdatas string) ([]telegraf.Metric, error) { } // Create metric - metric, err := metric.New("nagios", tags, fields, time.Now().UTC()) + metric, err := metric.New("nagios", tags, fields, timestamp) if err != nil { return nil, err } diff --git a/plugins/parsers/nagios/parser_test.go b/plugins/parsers/nagios/parser_test.go index a4da3003038e2..7f5b5937ec0c9 100644 --- a/plugins/parsers/nagios/parser_test.go +++ b/plugins/parsers/nagios/parser_test.go @@ -1,112 +1,476 @@ package nagios import ( + "errors" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/metric" + "github.com/influxdata/telegraf/testutil" ) -const validOutput1 = `PING OK - Packet loss = 0%, RTA = 0.30 ms|rta=0.298000ms;4000.000000;6000.000000;0.000000 pl=0%;80;90;0;100 -This is a long output -with three lines -` -const validOutput2 = "TCP OK - 0.008 second response time on port 80|time=0.008457s;;;0.000000;10.000000" -const validOutput3 = "TCP OK - 0.008 second response time on port 80|time=0.008457" -const validOutput4 = "OK: Load average: 0.00, 0.01, 0.05 | 'load1'=0.00;~:4;@0:6;0; 'load5'=0.01;3;0:5;0; 'load15'=0.05;0:2;0:4;0;" -const invalidOutput3 = "PING OK - Packet loss = 0%, RTA = 0.30 ms" -const invalidOutput4 = "PING OK - Packet loss = 0%, RTA = 0.30 ms| =3;;;; dgasdg =;;;; sff=;;;;" - -func TestParseValidOutput(t *testing.T) { - parser := NagiosParser{ - MetricName: "nagios_test", +func TestGetExitCode(t *testing.T) { + tests := []struct { + name string + errF func() error + expCode int + expErr error + }{ + { + name: "nil error passed is ok", + errF: func() error { + return nil + }, + expCode: 0, + expErr: nil, + }, + { + name: "unexpected error type", + errF: func() error { + return errors.New("I am not *exec.ExitError") + }, + expCode: 0, + expErr: errors.New("expected *exec.ExitError"), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + e := tt.errF() + code, err := getExitCode(e) + + require.Equal(t, tt.expCode, code) + require.Equal(t, tt.expErr, err) + }) + } +} + +type metricBuilder struct { + name string + tags map[string]string + fields map[string]interface{} + timestamp time.Time +} + +func mb() *metricBuilder { + return &metricBuilder{} +} + +func (b *metricBuilder) n(v string) *metricBuilder { + b.name = v + return b +} + +func (b *metricBuilder) t(k, v string) *metricBuilder { + if b.tags == nil { + b.tags = make(map[string]string) + } + b.tags[k] = v + return b +} + +func (b *metricBuilder) f(k string, v interface{}) *metricBuilder { + if b.fields == nil { + b.fields = make(map[string]interface{}) } + b.fields[k] = v + return b +} - // Output1 - metrics, err := parser.Parse([]byte(validOutput1)) - require.NoError(t, err) - require.Len(t, metrics, 2) - // rta - assert.Equal(t, "rta", metrics[0].Tags()["perfdata"]) - assert.Equal(t, map[string]interface{}{ - "value": float64(0.298), - "warning_lt": float64(0), - "warning_gt": float64(4000), - "critical_lt": float64(0), - "critical_gt": float64(6000), - "min": float64(0), - }, metrics[0].Fields()) - assert.Equal(t, map[string]string{"unit": "ms", "perfdata": "rta"}, metrics[0].Tags()) - // pl - assert.Equal(t, "pl", metrics[1].Tags()["perfdata"]) - assert.Equal(t, map[string]interface{}{ - "value": float64(0), - "warning_lt": float64(0), - "warning_gt": float64(80), - "critical_lt": float64(0), - "critical_gt": float64(90), - "min": float64(0), - "max": float64(100), - }, metrics[1].Fields()) - assert.Equal(t, map[string]string{"unit": "%", "perfdata": "pl"}, metrics[1].Tags()) - - // Output2 - metrics, err = parser.Parse([]byte(validOutput2)) - require.NoError(t, err) - require.Len(t, metrics, 1) - // time - assert.Equal(t, "time", metrics[0].Tags()["perfdata"]) - assert.Equal(t, map[string]interface{}{ - "value": float64(0.008457), - "min": float64(0), - "max": float64(10), - }, metrics[0].Fields()) - assert.Equal(t, map[string]string{"unit": "s", "perfdata": "time"}, metrics[0].Tags()) - - // Output3 - metrics, err = parser.Parse([]byte(validOutput3)) - require.NoError(t, err) - require.Len(t, metrics, 1) - // time - assert.Equal(t, "time", metrics[0].Tags()["perfdata"]) - assert.Equal(t, map[string]interface{}{ - "value": float64(0.008457), - }, metrics[0].Fields()) - assert.Equal(t, map[string]string{"perfdata": "time"}, metrics[0].Tags()) - - // Output4 - metrics, err = parser.Parse([]byte(validOutput4)) - require.NoError(t, err) - require.Len(t, metrics, 3) - // load - // const validOutput4 = "OK: Load average: 0.00, 0.01, 0.05 | 'load1'=0.00;0:4;0:6;0; 'load5'=0.01;0:3;0:5;0; 'load15'=0.05;0:2;0:4;0;" - assert.Equal(t, map[string]interface{}{ - "value": float64(0.00), - "warning_lt": MinFloat64, - "warning_gt": float64(4), - "critical_le": float64(0), - "critical_ge": float64(6), - "min": float64(0), - }, metrics[0].Fields()) - - assert.Equal(t, map[string]string{"perfdata": "load1"}, metrics[0].Tags()) +func (b *metricBuilder) ts(v time.Time) *metricBuilder { + b.timestamp = v + return b } -func TestParseInvalidOutput(t *testing.T) { +func (b *metricBuilder) b() telegraf.Metric { + m, err := metric.New(b.name, b.tags, b.fields, b.timestamp) + if err != nil { + panic(err) + } + return m +} + +// assertEqual asserts two slices to be equal. Note, that the order +// of the entries matters. +func assertEqual(t *testing.T, exp, actual []telegraf.Metric) { + require.Equal(t, len(exp), len(actual)) + for i := 0; i < len(exp); i++ { + ok := testutil.MetricEqual(exp[i], actual[i]) + require.True(t, ok) + } +} + +func TestTryAddState(t *testing.T) { + tests := []struct { + name string + runErrF func() error + metrics []telegraf.Metric + assertF func(*testing.T, []telegraf.Metric, error) + }{ + { + name: "should append state=0 field to existing metric", + runErrF: func() error { + return nil + }, + metrics: []telegraf.Metric{ + mb(). + n("nagios"). + f("perfdata", 0).b(), + mb(). + n("nagios_state"). + f("service_output", "OK: system working").b(), + }, + assertF: func(t *testing.T, metrics []telegraf.Metric, err error) { + exp := []telegraf.Metric{ + mb(). + n("nagios"). + f("perfdata", 0).b(), + mb(). + n("nagios_state"). + f("service_output", "OK: system working"). + f("state", 0).b(), + } + assertEqual(t, exp, metrics) + require.NoError(t, err) + }, + }, + { + name: "should create 'nagios_state state=0' and same timestamp as others", + runErrF: func() error { + return nil + }, + metrics: []telegraf.Metric{ + mb(). + n("nagios"). + f("perfdata", 0).b(), + }, + assertF: func(t *testing.T, metrics []telegraf.Metric, err error) { + exp := []telegraf.Metric{ + mb(). + n("nagios"). + f("perfdata", 0).b(), + mb(). + n("nagios_state"). + f("state", 0).b(), + } + assertEqual(t, exp, metrics) + require.NoError(t, err) + }, + }, + { + name: "should create 'nagios_state state=0' and recent timestamp", + runErrF: func() error { + return nil + }, + metrics: []telegraf.Metric{}, + assertF: func(t *testing.T, metrics []telegraf.Metric, err error) { + require.Len(t, metrics, 1) + m := metrics[0] + require.Equal(t, "nagios_state", m.Name()) + s, ok := m.GetField("state") + require.True(t, ok) + require.Equal(t, int64(0), s) + require.WithinDuration(t, time.Now().UTC(), m.Time(), 10*time.Second) + require.NoError(t, err) + }, + }, + { + name: "should return original metrics and an error", + runErrF: func() error { + return errors.New("non parsable error") + }, + metrics: []telegraf.Metric{ + mb(). + n("nagios"). + f("perfdata", 0).b(), + }, + assertF: func(t *testing.T, metrics []telegraf.Metric, err error) { + exp := []telegraf.Metric{ + mb(). + n("nagios"). + f("perfdata", 0).b(), + } + expErr := "exec: get exit code: expected *exec.ExitError" + + assertEqual(t, exp, metrics) + require.Equal(t, expErr, err.Error()) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metrics, err := TryAddState(tt.runErrF(), tt.metrics) + tt.assertF(t, metrics, err) + }) + } +} + +func assertNagiosState(t *testing.T, m telegraf.Metric, f map[string]interface{}) { + assert.Equal(t, map[string]string{}, m.Tags()) + assert.Equal(t, f, m.Fields()) +} + +func TestParse(t *testing.T) { parser := NagiosParser{ MetricName: "nagios_test", } - // invalidOutput3 - metrics, err := parser.Parse([]byte(invalidOutput3)) - require.NoError(t, err) - require.Len(t, metrics, 0) + tests := []struct { + name string + input string + assertF func(*testing.T, []telegraf.Metric, error) + }{ + { + name: "valid output 1", + input: `PING OK - Packet loss = 0%, RTA = 0.30 ms|rta=0.298000ms;4000.000000;6000.000000;0.000000 pl=0%;80;90;0;100 +This is a long output +with three lines +`, + assertF: func(t *testing.T, metrics []telegraf.Metric, err error) { + require.NoError(t, err) + require.Len(t, metrics, 3) + // rta + assert.Equal(t, map[string]string{ + "unit": "ms", + "perfdata": "rta", + }, metrics[0].Tags()) + assert.Equal(t, map[string]interface{}{ + "value": float64(0.298), + "warning_lt": float64(0), + "warning_gt": float64(4000), + "critical_lt": float64(0), + "critical_gt": float64(6000), + "min": float64(0), + }, metrics[0].Fields()) + + // pl + assert.Equal(t, map[string]string{ + "unit": "%", + "perfdata": "pl", + }, metrics[1].Tags()) + assert.Equal(t, map[string]interface{}{ + "value": float64(0), + "warning_lt": float64(0), + "warning_gt": float64(80), + "critical_lt": float64(0), + "critical_gt": float64(90), + "min": float64(0), + "max": float64(100), + }, metrics[1].Fields()) + + assertNagiosState(t, metrics[2], map[string]interface{}{ + "service_output": "PING OK - Packet loss = 0%, RTA = 0.30 ms", + "long_service_output": "This is a long output\nwith three lines", + }) + }, + }, + { + name: "valid output 2", + input: "TCP OK - 0.008 second response time on port 80|time=0.008457s;;;0.000000;10.000000", + assertF: func(t *testing.T, metrics []telegraf.Metric, err error) { + require.NoError(t, err) + require.Len(t, metrics, 2) + // time + assert.Equal(t, map[string]string{ + "unit": "s", + "perfdata": "time", + }, metrics[0].Tags()) + assert.Equal(t, map[string]interface{}{ + "value": float64(0.008457), + "min": float64(0), + "max": float64(10), + }, metrics[0].Fields()) - // invalidOutput4 - metrics, err = parser.Parse([]byte(invalidOutput4)) - require.NoError(t, err) - require.Len(t, metrics, 0) + assertNagiosState(t, metrics[1], map[string]interface{}{ + "service_output": "TCP OK - 0.008 second response time on port 80", + }) + }, + }, + { + name: "valid output 3", + input: "TCP OK - 0.008 second response time on port 80|time=0.008457", + assertF: func(t *testing.T, metrics []telegraf.Metric, err error) { + require.NoError(t, err) + require.Len(t, metrics, 2) + // time + assert.Equal(t, map[string]string{ + "perfdata": "time", + }, metrics[0].Tags()) + assert.Equal(t, map[string]interface{}{ + "value": float64(0.008457), + }, metrics[0].Fields()) + assertNagiosState(t, metrics[1], map[string]interface{}{ + "service_output": "TCP OK - 0.008 second response time on port 80", + }) + }, + }, + { + name: "valid output 4", + input: "OK: Load average: 0.00, 0.01, 0.05 | 'load1'=0.00;~:4;@0:6;0; 'load5'=0.01;3;0:5;0; 'load15'=0.05;0:2;0:4;0;", + assertF: func(t *testing.T, metrics []telegraf.Metric, err error) { + require.NoError(t, err) + require.Len(t, metrics, 4) + // load1 + assert.Equal(t, map[string]string{ + "perfdata": "load1", + }, metrics[0].Tags()) + assert.Equal(t, map[string]interface{}{ + "value": float64(0.00), + "warning_lt": MinFloat64, + "warning_gt": float64(4), + "critical_le": float64(0), + "critical_ge": float64(6), + "min": float64(0), + }, metrics[0].Fields()) + + // load5 + assert.Equal(t, map[string]string{ + "perfdata": "load5", + }, metrics[1].Tags()) + assert.Equal(t, map[string]interface{}{ + "value": float64(0.01), + "warning_gt": float64(3), + "warning_lt": float64(0), + "critical_lt": float64(0), + "critical_gt": float64(5), + "min": float64(0), + }, metrics[1].Fields()) + + // load15 + assert.Equal(t, map[string]string{ + "perfdata": "load15", + }, metrics[2].Tags()) + assert.Equal(t, map[string]interface{}{ + "value": float64(0.05), + "warning_lt": float64(0), + "warning_gt": float64(2), + "critical_lt": float64(0), + "critical_gt": float64(4), + "min": float64(0), + }, metrics[2].Fields()) + + assertNagiosState(t, metrics[3], map[string]interface{}{ + "service_output": "OK: Load average: 0.00, 0.01, 0.05", + }) + }, + }, + { + name: "no perf data", + input: "PING OK - Packet loss = 0%, RTA = 0.30 ms", + assertF: func(t *testing.T, metrics []telegraf.Metric, err error) { + require.NoError(t, err) + require.Len(t, metrics, 1) + + assertNagiosState(t, metrics[0], map[string]interface{}{ + "service_output": "PING OK - Packet loss = 0%, RTA = 0.30 ms", + }) + }, + }, + { + name: "malformed perf data", + input: "PING OK - Packet loss = 0%, RTA = 0.30 ms| =3;;;; dgasdg =;;;; sff=;;;;", + assertF: func(t *testing.T, metrics []telegraf.Metric, err error) { + require.NoError(t, err) + require.Len(t, metrics, 1) + + assertNagiosState(t, metrics[0], map[string]interface{}{ + "service_output": "PING OK - Packet loss = 0%, RTA = 0.30 ms", + }) + }, + }, + { + name: "from https://assets.nagios.com/downloads/nagioscore/docs/nagioscore/3/en/pluginapi.html", + input: `DISK OK - free space: / 3326 MB (56%); | /=2643MB;5948;5958;0;5968 +/ 15272 MB (77%); +/boot 68 MB (69%); +/home 69357 MB (27%); +/var/log 819 MB (84%); | /boot=68MB;88;93;0;98 +/home=69357MB;253404;253409;0;253414 +/var/log=818MB;970;975;0;980 +`, + assertF: func(t *testing.T, metrics []telegraf.Metric, err error) { + require.NoError(t, err) + require.Len(t, metrics, 5) + // /=2643MB;5948;5958;0;5968 + assert.Equal(t, map[string]string{ + "unit": "MB", + "perfdata": "/", + }, metrics[0].Tags()) + assert.Equal(t, map[string]interface{}{ + "value": float64(2643), + "warning_lt": float64(0), + "warning_gt": float64(5948), + "critical_lt": float64(0), + "critical_gt": float64(5958), + "min": float64(0), + "max": float64(5968), + }, metrics[0].Fields()) + + // /boot=68MB;88;93;0;98 + assert.Equal(t, map[string]string{ + "unit": "MB", + "perfdata": "/boot", + }, metrics[1].Tags()) + assert.Equal(t, map[string]interface{}{ + "value": float64(68), + "warning_lt": float64(0), + "warning_gt": float64(88), + "critical_lt": float64(0), + "critical_gt": float64(93), + "min": float64(0), + "max": float64(98), + }, metrics[1].Fields()) + + // /home=69357MB;253404;253409;0;253414 + assert.Equal(t, map[string]string{ + "unit": "MB", + "perfdata": "/home", + }, metrics[2].Tags()) + assert.Equal(t, map[string]interface{}{ + "value": float64(69357), + "warning_lt": float64(0), + "warning_gt": float64(253404), + "critical_lt": float64(0), + "critical_gt": float64(253409), + "min": float64(0), + "max": float64(253414), + }, metrics[2].Fields()) + + // /var/log=818MB;970;975;0;980 + assert.Equal(t, map[string]string{ + "unit": "MB", + "perfdata": "/var/log", + }, metrics[3].Tags()) + assert.Equal(t, map[string]interface{}{ + "value": float64(818), + "warning_lt": float64(0), + "warning_gt": float64(970), + "critical_lt": float64(0), + "critical_gt": float64(975), + "min": float64(0), + "max": float64(980), + }, metrics[3].Fields()) + + assertNagiosState(t, metrics[4], map[string]interface{}{ + "service_output": "DISK OK - free space: / 3326 MB (56%);", + "long_service_output": "/ 15272 MB (77%);\n/boot 68 MB (69%);\n/home 69357 MB (27%);\n/var/log 819 MB (84%);", + }) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + metrics, err := parser.Parse([]byte(tt.input)) + tt.assertF(t, metrics, err) + }) + } } func TestParseThreshold(t *testing.T) { diff --git a/plugins/serializers/registry.go b/plugins/serializers/registry.go index cbc5981a689d8..ecac6332393b8 100644 --- a/plugins/serializers/registry.go +++ b/plugins/serializers/registry.go @@ -12,6 +12,7 @@ import ( "github.com/influxdata/telegraf/plugins/serializers/json" "github.com/influxdata/telegraf/plugins/serializers/nowmetric" "github.com/influxdata/telegraf/plugins/serializers/splunkmetric" + "github.com/influxdata/telegraf/plugins/serializers/wavefront" ) // SerializerOutput is an interface for output plugins that are able to @@ -66,6 +67,13 @@ type Config struct { // Include HEC routing fields for splunkmetric output HecRouting bool + + // Point tags to use as the source name for Wavefront (if none found, host will be used). + WavefrontSourceOverride []string + + // Use Strict rules to sanitize metric and tag names from invalid characters for Wavefront + // When enabled forward slash (/) and comma (,) will be accepted + WavefrontUseStrict bool } // NewSerializer a Serializer interface based on the given config. @@ -85,12 +93,18 @@ func NewSerializer(config *Config) (Serializer, error) { serializer, err = NewNowSerializer() case "carbon2": serializer, err = NewCarbon2Serializer() + case "wavefront": + serializer, err = NewWavefrontSerializer(config.Prefix, config.WavefrontUseStrict, config.WavefrontSourceOverride) default: err = fmt.Errorf("Invalid data format: %s", config.DataFormat) } return serializer, err } +func NewWavefrontSerializer(prefix string, useStrict bool, sourceOverride []string) (Serializer, error) { + return wavefront.NewSerializer(prefix, useStrict, sourceOverride) +} + func NewJsonSerializer(timestampUnits time.Duration) (Serializer, error) { return json.NewSerializer(timestampUnits) } diff --git a/plugins/serializers/wavefront/README.md b/plugins/serializers/wavefront/README.md new file mode 100644 index 0000000000000..7a6594da3831e --- /dev/null +++ b/plugins/serializers/wavefront/README.md @@ -0,0 +1,47 @@ +# Example + +The `wavefront` serializer translates the Telegraf metric format to the [Wavefront Data Format](https://docs.wavefront.com/wavefront_data_format.html). + +### Configuration + +```toml +[[outputs.file]] + files = ["stdout"] + + ## Use Strict rules to sanitize metric and tag names from invalid characters + ## When enabled forward slash (/) and comma (,) will be accpeted + # wavefront_use_strict = false + + ## point tags to use as the source name for Wavefront (if none found, host will be used) + # wavefront_source_override = ["hostname", "address", "agent_host", "node_host"] + + ## Data format to output. + ## Each data format has its own unique set of configuration options, read + ## more about them here: + ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_OUTPUT.md + data_format = "wavefront" +``` + +### Metrics + +A Wavefront metric is equivalent to a single field value of a Telegraf measurement. +The Wavefront metric name will be: `.` +If a prefix is specified it will be honored. +Only boolean and numeric metrics will be serialized, all other types will generate +an error. + +### Example + +The following Telegraf metric + +``` +cpu,cpu=cpu0,host=testHost user=12,idle=88,system=0 1234567890 +``` + +will serialize into the following Wavefront metrics + +``` +"cpu.user" 12.000000 1234567890 source="testHost" "cpu"="cpu0" +"cpu.idle" 88.000000 1234567890 source="testHost" "cpu"="cpu0" +"cpu.system" 0.000000 1234567890 source="testHost" "cpu"="cpu0" +``` diff --git a/plugins/serializers/wavefront/wavefront.go b/plugins/serializers/wavefront/wavefront.go new file mode 100755 index 0000000000000..70b87512fad61 --- /dev/null +++ b/plugins/serializers/wavefront/wavefront.go @@ -0,0 +1,202 @@ +package wavefront + +import ( + "bytes" + "fmt" + "log" + "strconv" + "strings" + + "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/plugins/outputs/wavefront" +) + +// WavefrontSerializer : WavefrontSerializer struct +type WavefrontSerializer struct { + Prefix string + UseStrict bool + SourceOverride []string +} + +// catch many of the invalid chars that could appear in a metric or tag name +var sanitizedChars = strings.NewReplacer( + "!", "-", "@", "-", "#", "-", "$", "-", "%", "-", "^", "-", "&", "-", + "*", "-", "(", "-", ")", "-", "+", "-", "`", "-", "'", "-", "\"", "-", + "[", "-", "]", "-", "{", "-", "}", "-", ":", "-", ";", "-", "<", "-", + ">", "-", ",", "-", "?", "-", "/", "-", "\\", "-", "|", "-", " ", "-", + "=", "-", +) + +// catch many of the invalid chars that could appear in a metric or tag name +var strictSanitizedChars = strings.NewReplacer( + "!", "-", "@", "-", "#", "-", "$", "-", "%", "-", "^", "-", "&", "-", + "*", "-", "(", "-", ")", "-", "+", "-", "`", "-", "'", "-", "\"", "-", + "[", "-", "]", "-", "{", "-", "}", "-", ":", "-", ";", "-", "<", "-", + ">", "-", "?", "-", "\\", "-", "|", "-", " ", "-", "=", "-", +) + +var tagValueReplacer = strings.NewReplacer("\"", "\\\"", "*", "-") + +var pathReplacer = strings.NewReplacer("_", ".") + +func NewSerializer(prefix string, useStrict bool, sourceOverride []string) (*WavefrontSerializer, error) { + s := &WavefrontSerializer{ + Prefix: prefix, + UseStrict: useStrict, + SourceOverride: sourceOverride, + } + return s, nil +} + +// Serialize : Serialize based on Wavefront format +func (s *WavefrontSerializer) Serialize(m telegraf.Metric) ([]byte, error) { + out := []byte{} + metricSeparator := "." + + for fieldName, value := range m.Fields() { + var name string + + if fieldName == "value" { + name = fmt.Sprintf("%s%s", s.Prefix, m.Name()) + } else { + name = fmt.Sprintf("%s%s%s%s", s.Prefix, m.Name(), metricSeparator, fieldName) + } + + if s.UseStrict { + name = strictSanitizedChars.Replace(name) + } else { + name = sanitizedChars.Replace(name) + } + + name = pathReplacer.Replace(name) + + metric := &wavefront.MetricPoint{ + Metric: name, + Timestamp: m.Time().Unix(), + } + + metricValue, buildError := buildValue(value, metric.Metric) + if buildError != nil { + // bad value continue to next metric + continue + } + metric.Value = metricValue + + source, tags := buildTags(m.Tags(), s) + metric.Source = source + metric.Tags = tags + + out = append(out, formatMetricPoint(metric, s)...) + } + return out, nil +} + +func (s *WavefrontSerializer) SerializeBatch(metrics []telegraf.Metric) ([]byte, error) { + var batch bytes.Buffer + for _, m := range metrics { + buf, err := s.Serialize(m) + if err != nil { + return nil, err + } + _, err = batch.Write(buf) + if err != nil { + return nil, err + } + } + return batch.Bytes(), nil +} + +func buildTags(mTags map[string]string, s *WavefrontSerializer) (string, map[string]string) { + + // Remove all empty tags. + for k, v := range mTags { + if v == "" { + delete(mTags, k) + } + } + + var source string + + if src, ok := mTags["source"]; ok { + source = src + delete(mTags, "source") + } else { + sourceTagFound := false + for _, src := range s.SourceOverride { + for k, v := range mTags { + if k == src { + source = v + mTags["telegraf_host"] = mTags["host"] + sourceTagFound = true + delete(mTags, k) + break + } + } + if sourceTagFound { + break + } + } + + if !sourceTagFound { + source = mTags["host"] + } + } + + delete(mTags, "host") + + return tagValueReplacer.Replace(source), mTags +} + +func buildValue(v interface{}, name string) (float64, error) { + switch p := v.(type) { + case bool: + if p { + return 1, nil + } else { + return 0, nil + } + case int64: + return float64(v.(int64)), nil + case uint64: + return float64(v.(uint64)), nil + case float64: + return v.(float64), nil + case string: + // return an error but don't log + return 0, fmt.Errorf("string type not supported") + default: + // return an error and log a debug message + err := fmt.Errorf("unexpected type: %T, with value: %v, for :%s", v, v, name) + log.Printf("D! Serializer [wavefront] %s\n", err.Error()) + return 0, err + } +} + +func formatMetricPoint(metricPoint *wavefront.MetricPoint, s *WavefrontSerializer) []byte { + var buffer bytes.Buffer + buffer.WriteString("\"") + buffer.WriteString(metricPoint.Metric) + buffer.WriteString("\" ") + buffer.WriteString(strconv.FormatFloat(metricPoint.Value, 'f', 6, 64)) + buffer.WriteString(" ") + buffer.WriteString(strconv.FormatInt(metricPoint.Timestamp, 10)) + buffer.WriteString(" source=\"") + buffer.WriteString(metricPoint.Source) + buffer.WriteString("\"") + + for k, v := range metricPoint.Tags { + buffer.WriteString(" \"") + if s.UseStrict { + buffer.WriteString(strictSanitizedChars.Replace(k)) + } else { + buffer.WriteString(sanitizedChars.Replace(k)) + } + buffer.WriteString("\"=\"") + buffer.WriteString(tagValueReplacer.Replace(v)) + buffer.WriteString("\"") + } + + buffer.WriteString("\n") + + return buffer.Bytes() +} diff --git a/plugins/serializers/wavefront/wavefront_test.go b/plugins/serializers/wavefront/wavefront_test.go new file mode 100755 index 0000000000000..3230ce51534c0 --- /dev/null +++ b/plugins/serializers/wavefront/wavefront_test.go @@ -0,0 +1,295 @@ +package wavefront + +import ( + "fmt" + "reflect" + "strings" + "testing" + "time" + + "github.com/influxdata/telegraf/metric" + "github.com/influxdata/telegraf/plugins/outputs/wavefront" + "github.com/stretchr/testify/assert" +) + +func TestBuildTags(t *testing.T) { + var tagTests = []struct { + ptIn map[string]string + outTags map[string]string + outSource string + }{ + { + map[string]string{"one": "two", "three": "four", "host": "testHost"}, + map[string]string{"one": "two", "three": "four"}, + "testHost", + }, + { + map[string]string{"aaa": "bbb", "host": "testHost"}, + map[string]string{"aaa": "bbb"}, + "testHost", + }, + { + map[string]string{"bbb": "789", "aaa": "123", "host": "testHost"}, + map[string]string{"aaa": "123", "bbb": "789"}, + "testHost", + }, + { + map[string]string{"host": "aaa", "dc": "bbb"}, + map[string]string{"dc": "bbb"}, + "aaa", + }, + { + map[string]string{"instanceid": "i-0123456789", "host": "aaa", "dc": "bbb"}, + map[string]string{"dc": "bbb", "telegraf_host": "aaa"}, + "i-0123456789", + }, + { + map[string]string{"instance-id": "i-0123456789", "host": "aaa", "dc": "bbb"}, + map[string]string{"dc": "bbb", "telegraf_host": "aaa"}, + "i-0123456789", + }, + { + map[string]string{"instanceid": "i-0123456789", "host": "aaa", "hostname": "ccc", "dc": "bbb"}, + map[string]string{"dc": "bbb", "hostname": "ccc", "telegraf_host": "aaa"}, + "i-0123456789", + }, + { + map[string]string{"instanceid": "i-0123456789", "host": "aaa", "snmp_host": "ccc", "dc": "bbb"}, + map[string]string{"dc": "bbb", "snmp_host": "ccc", "telegraf_host": "aaa"}, + "i-0123456789", + }, + { + map[string]string{"host": "aaa", "snmp_host": "ccc", "dc": "bbb"}, + map[string]string{"dc": "bbb", "telegraf_host": "aaa"}, + "ccc", + }, + } + s := WavefrontSerializer{SourceOverride: []string{"instanceid", "instance-id", "hostname", "snmp_host", "node_host"}} + + for _, tt := range tagTests { + source, tags := buildTags(tt.ptIn, &s) + if !reflect.DeepEqual(tags, tt.outTags) { + t.Errorf("\nexpected\t%+v\nreceived\t%+v\n", tt.outTags, tags) + } + if source != tt.outSource { + t.Errorf("\nexpected\t%s\nreceived\t%s\n", tt.outSource, source) + } + } +} + +func TestBuildTagsHostTag(t *testing.T) { + var tagTests = []struct { + ptIn map[string]string + outTags map[string]string + outSource string + }{ + { + map[string]string{"one": "two", "host": "testHost", "snmp_host": "snmpHost"}, + map[string]string{"telegraf_host": "testHost", "one": "two"}, + "snmpHost", + }, + } + s := WavefrontSerializer{SourceOverride: []string{"snmp_host"}} + + for _, tt := range tagTests { + source, tags := buildTags(tt.ptIn, &s) + if !reflect.DeepEqual(tags, tt.outTags) { + t.Errorf("\nexpected\t%+v\nreceived\t%+v\n", tt.outTags, tags) + } + if source != tt.outSource { + t.Errorf("\nexpected\t%s\nreceived\t%s\n", tt.outSource, source) + } + } +} + +func TestFormatMetricPoint(t *testing.T) { + var pointTests = []struct { + ptIn *wavefront.MetricPoint + out string + }{ + { + &wavefront.MetricPoint{ + Metric: "cpu.idle", + Value: 1, + Timestamp: 1554172967, + Source: "testHost", + Tags: map[string]string{"aaa": "bbb"}, + }, + "\"cpu.idle\" 1.000000 1554172967 source=\"testHost\" \"aaa\"=\"bbb\"\n", + }, + { + &wavefront.MetricPoint{ + Metric: "cpu.idle", + Value: 1, + Timestamp: 1554172967, + Source: "testHost", + Tags: map[string]string{"sp&c!al/chars,": "get*replaced"}, + }, + "\"cpu.idle\" 1.000000 1554172967 source=\"testHost\" \"sp-c-al-chars-\"=\"get-replaced\"\n", + }, + } + + s := WavefrontSerializer{} + + for _, pt := range pointTests { + bout := formatMetricPoint(pt.ptIn, &s) + sout := string(bout[:]) + if sout != pt.out { + t.Errorf("\nexpected\t%s\nreceived\t%s\n", pt.out, sout) + } + } +} + +func TestUseStrict(t *testing.T) { + var pointTests = []struct { + ptIn *wavefront.MetricPoint + out string + }{ + { + &wavefront.MetricPoint{ + Metric: "cpu.idle", + Value: 1, + Timestamp: 1554172967, + Source: "testHost", + Tags: map[string]string{"sp&c!al/chars,": "get*replaced"}, + }, + "\"cpu.idle\" 1.000000 1554172967 source=\"testHost\" \"sp-c-al/chars,\"=\"get-replaced\"\n", + }, + } + + s := WavefrontSerializer{UseStrict: true} + + for _, pt := range pointTests { + bout := formatMetricPoint(pt.ptIn, &s) + sout := string(bout[:]) + if sout != pt.out { + t.Errorf("\nexpected\t%s\nreceived\t%s\n", pt.out, sout) + } + } +} + +func TestSerializeMetricFloat(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "cpu": "cpu0", + "host": "realHost", + } + fields := map[string]interface{}{ + "usage_idle": float64(91.5), + } + m, err := metric.New("cpu", tags, fields, now) + assert.NoError(t, err) + + s := WavefrontSerializer{} + buf, _ := s.Serialize(m) + mS := strings.Split(strings.TrimSpace(string(buf)), "\n") + assert.NoError(t, err) + + expS := []string{fmt.Sprintf("\"cpu.usage.idle\" 91.500000 %d source=\"realHost\" \"cpu\"=\"cpu0\"", now.UnixNano()/1000000000)} + assert.Equal(t, expS, mS) +} + +func TestSerializeMetricInt(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "cpu": "cpu0", + "host": "realHost", + } + fields := map[string]interface{}{ + "usage_idle": int64(91), + } + m, err := metric.New("cpu", tags, fields, now) + assert.NoError(t, err) + + s := WavefrontSerializer{} + buf, _ := s.Serialize(m) + mS := strings.Split(strings.TrimSpace(string(buf)), "\n") + assert.NoError(t, err) + + expS := []string{fmt.Sprintf("\"cpu.usage.idle\" 91.000000 %d source=\"realHost\" \"cpu\"=\"cpu0\"", now.UnixNano()/1000000000)} + assert.Equal(t, expS, mS) +} + +func TestSerializeMetricBoolTrue(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "cpu": "cpu0", + "host": "realHost", + } + fields := map[string]interface{}{ + "usage_idle": true, + } + m, err := metric.New("cpu", tags, fields, now) + assert.NoError(t, err) + + s := WavefrontSerializer{} + buf, _ := s.Serialize(m) + mS := strings.Split(strings.TrimSpace(string(buf)), "\n") + assert.NoError(t, err) + + expS := []string{fmt.Sprintf("\"cpu.usage.idle\" 1.000000 %d source=\"realHost\" \"cpu\"=\"cpu0\"", now.UnixNano()/1000000000)} + assert.Equal(t, expS, mS) +} + +func TestSerializeMetricBoolFalse(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "cpu": "cpu0", + "host": "realHost", + } + fields := map[string]interface{}{ + "usage_idle": false, + } + m, err := metric.New("cpu", tags, fields, now) + assert.NoError(t, err) + + s := WavefrontSerializer{} + buf, _ := s.Serialize(m) + mS := strings.Split(strings.TrimSpace(string(buf)), "\n") + assert.NoError(t, err) + + expS := []string{fmt.Sprintf("\"cpu.usage.idle\" 0.000000 %d source=\"realHost\" \"cpu\"=\"cpu0\"", now.UnixNano()/1000000000)} + assert.Equal(t, expS, mS) +} + +func TestSerializeMetricFieldValue(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "cpu": "cpu0", + "host": "realHost", + } + fields := map[string]interface{}{ + "value": int64(91), + } + m, err := metric.New("cpu", tags, fields, now) + assert.NoError(t, err) + + s := WavefrontSerializer{} + buf, _ := s.Serialize(m) + mS := strings.Split(strings.TrimSpace(string(buf)), "\n") + assert.NoError(t, err) + + expS := []string{fmt.Sprintf("\"cpu\" 91.000000 %d source=\"realHost\" \"cpu\"=\"cpu0\"", now.UnixNano()/1000000000)} + assert.Equal(t, expS, mS) +} + +func TestSerializeMetricPrefix(t *testing.T) { + now := time.Now() + tags := map[string]string{ + "cpu": "cpu0", + "host": "realHost", + } + fields := map[string]interface{}{ + "usage_idle": int64(91), + } + m, err := metric.New("cpu", tags, fields, now) + assert.NoError(t, err) + + s := WavefrontSerializer{Prefix: "telegraf."} + buf, _ := s.Serialize(m) + mS := strings.Split(strings.TrimSpace(string(buf)), "\n") + assert.NoError(t, err) + + expS := []string{fmt.Sprintf("\"telegraf.cpu.usage.idle\" 91.000000 %d source=\"realHost\" \"cpu\"=\"cpu0\"", now.UnixNano()/1000000000)} + assert.Equal(t, expS, mS) +} diff --git a/scripts/build.py b/scripts/build.py index e7a402be530b1..85e1724a5da76 100755 --- a/scripts/build.py +++ b/scripts/build.py @@ -18,6 +18,8 @@ # Packaging variables PACKAGE_NAME = "telegraf" +USER = "telegraf" +GROUP = "telegraf" INSTALL_ROOT_DIR = "/usr/bin" LOG_DIR = "/var/log/telegraf" SCRIPT_DIR = "/usr/lib/telegraf/scripts" @@ -66,6 +68,7 @@ --before-install {} \ --after-remove {} \ --before-remove {} \ + --rpm-attr 755,{},{}:{} \ --description \"{}\"".format( VENDOR, PACKAGE_URL, @@ -77,6 +80,7 @@ PREINST_SCRIPT, POSTREMOVE_SCRIPT, PREREMOVE_SCRIPT, + USER, GROUP, LOG_DIR, DESCRIPTION) targets = { @@ -647,7 +651,7 @@ def package(build_output, pkg_name, version, nightly=False, iteration=1, static= package_build_root, current_location) if package_type == "rpm": - fpm_command += "--depends coreutils --depends shadow-utils --rpm-posttrans {}".format(POSTINST_SCRIPT) + fpm_command += "--directories /var/log/telegraf --directories /etc/telegraf --depends coreutils --depends shadow-utils --rpm-posttrans {}".format(POSTINST_SCRIPT) out = run(fpm_command, shell=True) matches = re.search(':path=>"(.*)"', out) outfile = None diff --git a/scripts/init.sh b/scripts/init.sh index 67236d8c7a9b9..fc71536f93985 100755 --- a/scripts/init.sh +++ b/scripts/init.sh @@ -120,13 +120,13 @@ confdir=/etc/telegraf/telegraf.d case $1 in start) # Checked the PID file exists and check the actual status of process - if [ -e $pidfile ]; then - pidofproc -p $pidfile $daemon > /dev/null 2>&1 && status="0" || status="$?" - # If the status is SUCCESS then don't need to start again. - if [ "x$status" = "x0" ]; then - log_failure_msg "$name process is running" - exit 0 # Exit - fi + if [ -e "$pidfile" ]; then + if pidofproc -p $pidfile $daemon > /dev/null; then + log_failure_msg "$name process is running" + else + log_failure_msg "$name pidfile has no corresponding process; ensure $name is stopped and remove $pidfile" + fi + exit 0 fi # Bump the file limits, before launching the daemon. These will carry over to @@ -150,8 +150,7 @@ case $1 in stop) # Stop the daemon. if [ -e $pidfile ]; then - pidofproc -p $pidfile $daemon > /dev/null 2>&1 && status="0" || status="$?" - if [ "$status" = 0 ]; then + if pidofproc -p $pidfile $daemon > /dev/null; then # periodically signal until process exists while true; do if ! pidofproc -p $pidfile $daemon > /dev/null; then @@ -172,8 +171,7 @@ case $1 in reload) # Reload the daemon. if [ -e $pidfile ]; then - pidofproc -p $pidfile $daemon > /dev/null 2>&1 && status="0" || status="$?" - if [ "$status" = 0 ]; then + if pidofproc -p $pidfile $daemon > /dev/null; then if killproc -p $pidfile SIGHUP; then log_success_msg "$name process was reloaded" else diff --git a/scripts/post-install.sh b/scripts/post-install.sh index 822a4e4de5028..9972364bc6e7d 100644 --- a/scripts/post-install.sh +++ b/scripts/post-install.sh @@ -32,10 +32,6 @@ if ! id telegraf &>/dev/null; then useradd -r -M telegraf -s /bin/false -d /etc/telegraf -g telegraf fi -test -d $LOG_DIR || mkdir -p $LOG_DIR -chown -R -L telegraf:telegraf $LOG_DIR -chmod 755 $LOG_DIR - # Remove legacy symlink, if it exists if [[ -L /etc/init.d/telegraf ]]; then rm -f /etc/init.d/telegraf @@ -72,6 +68,14 @@ if [[ -f /etc/redhat-release ]] || [[ -f /etc/SuSE-release ]]; then fi elif [[ -f /etc/debian_version ]]; then # Debian/Ubuntu logic + + # Ownership for RH-based platforms is set in build.py via the `rmp-attr` option. + # We perform ownership change only for Debian-based systems. + # Moving these lines out of this if statement would make `rmp -V` fail after installation. + test -d $LOG_DIR || mkdir -p $LOG_DIR + chown -R -L telegraf:telegraf $LOG_DIR + chmod 755 $LOG_DIR + if [[ "$(readlink /proc/1/exe)" == */systemd ]]; then install_systemd /lib/systemd/system/telegraf.service deb-systemd-invoke restart telegraf.service || echo "WARNING: systemd not running." diff --git a/testutil/accumulator.go b/testutil/accumulator.go index 3fe29169999ef..a7b9fe8f63039 100644 --- a/testutil/accumulator.go +++ b/testutil/accumulator.go @@ -10,7 +10,6 @@ import ( "time" "github.com/influxdata/telegraf" - "github.com/stretchr/testify/assert" ) @@ -204,7 +203,7 @@ func (a *Accumulator) AddError(err error) { a.Unlock() } -func (a *Accumulator) SetPrecision(precision, interval time.Duration) { +func (a *Accumulator) SetPrecision(precision time.Duration) { return } From 3d754109f963e09b74cd569bed292581731f4332 Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Mon, 1 Jul 2019 11:06:19 +0100 Subject: [PATCH 09/12] Fixing a crash bug where kinesis not being available crashes telegraf --- plugins/outputs/kinesis/README.md | 0 plugins/outputs/kinesis/compression.go | 0 plugins/outputs/kinesis/compression_test.go | 0 plugins/outputs/kinesis/kinesis.go | 5 ++++- plugins/outputs/kinesis/kinesisHandler.go | 0 plugins/outputs/kinesis/kinesis_test.go | 0 plugins/outputs/kinesis/kinesishandler_test.go | 0 7 files changed, 4 insertions(+), 1 deletion(-) mode change 100644 => 100755 plugins/outputs/kinesis/README.md mode change 100644 => 100755 plugins/outputs/kinesis/compression.go mode change 100644 => 100755 plugins/outputs/kinesis/compression_test.go mode change 100644 => 100755 plugins/outputs/kinesis/kinesis.go mode change 100644 => 100755 plugins/outputs/kinesis/kinesisHandler.go mode change 100644 => 100755 plugins/outputs/kinesis/kinesis_test.go mode change 100644 => 100755 plugins/outputs/kinesis/kinesishandler_test.go diff --git a/plugins/outputs/kinesis/README.md b/plugins/outputs/kinesis/README.md old mode 100644 new mode 100755 diff --git a/plugins/outputs/kinesis/compression.go b/plugins/outputs/kinesis/compression.go old mode 100644 new mode 100755 diff --git a/plugins/outputs/kinesis/compression_test.go b/plugins/outputs/kinesis/compression_test.go old mode 100644 new mode 100755 diff --git a/plugins/outputs/kinesis/kinesis.go b/plugins/outputs/kinesis/kinesis.go old mode 100644 new mode 100755 index 258766cfdb916..c8e2d3b8ab105 --- a/plugins/outputs/kinesis/kinesis.go +++ b/plugins/outputs/kinesis/kinesis.go @@ -165,9 +165,12 @@ func (k *KinesisOutput) Connect() error { describeOutput, err := svc.DescribeStreamSummary(&kinesis.DescribeStreamSummaryInput{ StreamName: aws.String(k.StreamName), }) + if err != nil { + return err + } k.svc = svc k.nShards = *describeOutput.StreamDescriptionSummary.OpenShardCount - return err + return nil } func (k *KinesisOutput) Close() error { diff --git a/plugins/outputs/kinesis/kinesisHandler.go b/plugins/outputs/kinesis/kinesisHandler.go old mode 100644 new mode 100755 diff --git a/plugins/outputs/kinesis/kinesis_test.go b/plugins/outputs/kinesis/kinesis_test.go old mode 100644 new mode 100755 diff --git a/plugins/outputs/kinesis/kinesishandler_test.go b/plugins/outputs/kinesis/kinesishandler_test.go old mode 100644 new mode 100755 From 29de3b772c550c32558687588aa0923fc8f9e496 Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Mon, 1 Jul 2019 17:02:30 +0100 Subject: [PATCH 10/12] Adding a new internal content encoder for snappy. Refactored the code to use the internal.ContentEncoders. Refactored the block splitting for determing optimal message size for Kinesis Updated the read me to use content_encoding --- internal/content_coding.go | 51 ++++++- internal/content_coding_test.go | 14 ++ plugins/outputs/kinesis/README.md | 15 +-- plugins/outputs/kinesis/compression.go | 37 ------ plugins/outputs/kinesis/compression_test.go | 38 ------ plugins/outputs/kinesis/kinesis.go | 39 ++++-- plugins/outputs/kinesis/kinesisHandler.go | 125 ++++++++++++------ .../outputs/kinesis/kinesishandler_test.go | 55 +++++--- 8 files changed, 217 insertions(+), 157 deletions(-) delete mode 100755 plugins/outputs/kinesis/compression.go delete mode 100755 plugins/outputs/kinesis/compression_test.go diff --git a/internal/content_coding.go b/internal/content_coding.go index 936dd95620a58..0c9186c482a36 100644 --- a/internal/content_coding.go +++ b/internal/content_coding.go @@ -5,6 +5,8 @@ import ( "compress/gzip" "errors" "io" + + "github.com/golang/snappy" ) // NewContentEncoder returns a ContentEncoder for the encoding type. @@ -12,7 +14,8 @@ func NewContentEncoder(encoding string) (ContentEncoder, error) { switch encoding { case "gzip": return NewGzipEncoder() - + case "snappy": + return NewSnappyEncoder() case "identity", "": return NewIdentityEncoder(), nil default: @@ -25,6 +28,8 @@ func NewContentDecoder(encoding string) (ContentDecoder, error) { switch encoding { case "gzip": return NewGzipDecoder() + case "snappy": + return NewSnappyDecoder() case "identity", "": return NewIdentityDecoder(), nil default: @@ -51,6 +56,22 @@ func NewGzipEncoder() (*GzipEncoder, error) { }, nil } +// SetLevel will change the gzip encoder compression level +// See https://golang.org/pkg/compress/gzip/#pkg-constants +// or a number between 0 and 9 +// 0 being no compression +// 9 being best but slowest compression +// -1 is used to reset back to the default level +func (e *GzipEncoder) SetLevel(lvl int) error { + gzw, err := gzip.NewWriterLevel(e.buf, lvl) + if err != nil { + return nil + } + e.writer = gzw + return nil +} + +// Encode will take data passed in and encode it with GZip func (e *GzipEncoder) Encode(data []byte) ([]byte, error) { e.buf.Reset() e.writer.Reset(e.buf) @@ -66,6 +87,34 @@ func (e *GzipEncoder) Encode(data []byte) ([]byte, error) { return e.buf.Bytes(), nil } +// SnappyEncoder compresses and decompresses the buffer using google's snappy encryption. +type SnappyEncoder struct{} + +// NewSnappyEncoder returns a new snappy encoder that can encode []bytes to +// google snappy []bytes. +func NewSnappyEncoder() (*SnappyEncoder, error) { + return &SnappyEncoder{}, nil +} + +// NewSnappyDecoder returns a new snappy dencoder that can dencode []bytes previously encoded to +// []bytes. +func NewSnappyDecoder() (*SnappyEncoder, error) { + return &SnappyEncoder{}, nil +} + +// Encode take all data given to it and encodes it. +// Snappy will never return an error other than nil but returns nil to +// satisfy the Encode interface here. +func (e *SnappyEncoder) Encode(data []byte) ([]byte, error) { + return snappy.Encode(nil, data), nil +} + +// Decode takes the passed in data and decodes it to a []byte. +// It can return an error if the data was encoded incorrectly. +func (e *SnappyEncoder) Decode(data []byte) ([]byte, error) { + return snappy.Decode(nil, data) +} + // IdentityEncoder is a null encoder that applies no transformation. type IdentityEncoder struct{} diff --git a/internal/content_coding_test.go b/internal/content_coding_test.go index 0316331127f3b..0d5b40bdbbcc5 100644 --- a/internal/content_coding_test.go +++ b/internal/content_coding_test.go @@ -56,3 +56,17 @@ func TestIdentityEncodeDecode(t *testing.T) { require.Equal(t, "howdy", string(actual)) } + +func TestSnappyEncodeDecode(t *testing.T) { + enc, err := NewSnappyEncoder() + require.NoError(t, err) + dec, err := NewSnappyDecoder() + require.NoError(t, err) + + payload := []byte("Snappy Test!!££$$") + encoded, err := enc.Encode(payload) + require.NoError(t, err) + decoded, err := dec.Decode(encoded) + require.NoError(t, err) + require.Equal(t, string(payload), string(decoded)) +} diff --git a/plugins/outputs/kinesis/README.md b/plugins/outputs/kinesis/README.md index 8470f2f859cbd..ddff57502cf38 100755 --- a/plugins/outputs/kinesis/README.md +++ b/plugins/outputs/kinesis/README.md @@ -29,8 +29,7 @@ will attempt to authenticate. region = "eu-west-1" streamname = "KinesisStreamName" aggregate_metrics = true - # Either "gzip", "snappy" - compress_metrics_with = "gzip" + content_encoding = "gzip" partition = { method = "random" } debug = true ``` @@ -124,22 +123,20 @@ This will make the plugin gather the metrics and send them as blocks of metrics 1. If a random key is in use then a block for each shard in the stream will be created unless there isn't enough metrics then as many blocks as metrics. 1. Each record will be 1020kb in size + partition key -### compress_metrics_with +### content_encoding -`compress_metrics_with` has the following values. If no value is set then compression is skipped. +`content_encoding` can be anything that telegraf supports. + +Examples below * gzip * snappy -They are explained below. - #### gzip This will make the plugin compress the data using GZip before the data is shipped to Kinesis. GZip is slower than snappy but generally fast enough and gives much better compression. Use GZip in most cases. -If both gzip and snappy are true. GZip wins. - #### snappy This will make the plugin compress the data using Google's Snappy compression before the data is shipped to Kinesis. @@ -147,4 +144,4 @@ Snappy is much quicker and would be used if you are taking too long to compress ### debug -Prints debugging data into the logs. \ No newline at end of file +Prints debugging data into the logs. diff --git a/plugins/outputs/kinesis/compression.go b/plugins/outputs/kinesis/compression.go deleted file mode 100755 index 08ea86c89939e..0000000000000 --- a/plugins/outputs/kinesis/compression.go +++ /dev/null @@ -1,37 +0,0 @@ -package kinesis - -import ( - "bytes" - "compress/gzip" - "fmt" - - "github.com/golang/snappy" -) - -var ( - // gzipCompressionLevel sets the compression level. Tests indicate that 7 gives the best trade off - // between speed and compression. - gzipCompressionLevel = 7 -) - -func gzipMetrics(metrics []byte) ([]byte, error) { - var buffer bytes.Buffer - - gzw, err := gzip.NewWriterLevel(&buffer, gzipCompressionLevel) - if err != nil { - return []byte{}, fmt.Errorf("Compression level is incorrect for gzip") - } - _, err = gzw.Write(metrics) - if err != nil { - return []byte{}, fmt.Errorf("There was an error in writing to the gzip writer") - } - if err := gzw.Close(); err != nil { - return []byte{}, fmt.Errorf("There was an error in closing the gzip writer") - } - - return buffer.Bytes(), nil -} - -func snappyMetrics(metrics []byte) ([]byte, error) { - return snappy.Encode(nil, metrics), nil -} diff --git a/plugins/outputs/kinesis/compression_test.go b/plugins/outputs/kinesis/compression_test.go deleted file mode 100755 index 6d0d1f9054c2e..0000000000000 --- a/plugins/outputs/kinesis/compression_test.go +++ /dev/null @@ -1,38 +0,0 @@ -package kinesis - -import ( - "testing" - "time" -) - -func TestGoodCompression(t *testing.T) { - tests := []string{ - "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", - time.Now().String(), - `abcdefghijklmnopqrstuvwzyz1234567890@~|\/?><@~#+=!"£$%^&_*(){}[]`, - } - - for _, test := range tests { - _, err := gzipMetrics([]byte(test)) - if err != nil { - t.Logf("Failed to gzip test data") - t.Fail() - } - - // Snappy doesn't error, so we can only look for panics - snappyMetrics([]byte(test)) - } -} - -func TestBadGzipCompressionLevel(t *testing.T) { - oldlevel := gzipCompressionLevel - gzipCompressionLevel = 11 - defer func() { gzipCompressionLevel = oldlevel }() - - _, err := gzipMetrics([]byte(time.Now().String())) - if err == nil { - t.Logf("Expect gzip to fail because of a bad compression level") - t.Fail() - } - -} diff --git a/plugins/outputs/kinesis/kinesis.go b/plugins/outputs/kinesis/kinesis.go index 9cd1b9e3539ff..1991598248969 100755 --- a/plugins/outputs/kinesis/kinesis.go +++ b/plugins/outputs/kinesis/kinesis.go @@ -7,6 +7,7 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/kinesis" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" internalaws "github.com/influxdata/telegraf/internal/config/aws" "github.com/influxdata/telegraf/plugins/outputs" "github.com/influxdata/telegraf/plugins/serializers" @@ -36,12 +37,15 @@ type ( Partition *Partition `toml:"partition"` Debug bool `toml:"debug"` AggregateMetrics bool `toml:"aggregate_metrics"` - CompressWith string `toml:"compress_metrics_with"` + UseBatchFormat bool `toml:"use_batch_format"` + ContentEncoding string `toml:"content_encoding"` svc *kinesis.Kinesis nShards int64 serializer serializers.Serializer + + encoder internal.ContentEncoder } Partition struct { @@ -123,12 +127,22 @@ var sampleConfig = ` # valid options: "gzip", "snappy" # See https://github.com/influxdata/telegraf/tree/master/plugins/outputs/kinesis # for more details on each compression method. - compress_metrics_with = "gzip" + content_encoding = "gzip" ## debug will show upstream aws messages. debug = false ` +func makeEncoder(encoderType string) (internal.ContentEncoder, error) { + switch encoderType { + case "gzip": + // Special handling for gzip because we need to change the level of compression. + return newGzipEncoder() + default: + return internal.NewContentEncoder(encoderType) + } +} + func (k *KinesisOutput) SampleConfig() string { return sampleConfig } @@ -148,6 +162,12 @@ func (k *KinesisOutput) Connect() error { log.Printf("I! kinesis: Establishing a connection to Kinesis in %s", k.Region) } + encoder, err := makeEncoder(k.ContentEncoding) + if err != nil { + return err + } + k.encoder = encoder + credentialConfig := &internalaws.CredentialConfig{ Region: k.Region, AccessKey: k.AccessKey, @@ -300,17 +320,10 @@ func (k *KinesisOutput) aggregatedWrite(metrics []telegraf.Metric) error { } handler.packageMetrics(k.nShards) - switch k.CompressWith { - case "gzip": - if err := handler.gzipCompressSlugs(); err != nil { - log.Printf("E! Failed to compress with gzip") - return err - } - case "snappy": - if err := handler.snappyCompressSlugs(); err != nil { - log.Printf("E! Failed to compress with snappy") - return err - } + // encode the messages if required. + err := handler.encodeSlugs(k.encoder) + if err != nil { + return err } var elapsed time.Duration diff --git a/plugins/outputs/kinesis/kinesisHandler.go b/plugins/outputs/kinesis/kinesisHandler.go index 3551d49d7e405..202a26d6548de 100755 --- a/plugins/outputs/kinesis/kinesisHandler.go +++ b/plugins/outputs/kinesis/kinesisHandler.go @@ -6,6 +6,7 @@ import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/kinesis" "github.com/influxdata/telegraf" + "github.com/influxdata/telegraf/internal" "github.com/influxdata/telegraf/plugins/serializers" uuid "github.com/satori/go.uuid" ) @@ -14,11 +15,17 @@ const ( // MaxOutputRecords is the maximum number of records that we can send in a single send to Kinesis. maxOutputRecords = 5 // maxRecordSizeBytes is the maximum size for a record when sending to Kinesis. - // 1020KB for they payload and 4KB for the partition key. + // 1020KB for the payload and 4KB for the partition key. maxRecordSizeBytes = 1020 * 1024 randomPartitionKey = "-random-" ) +var ( + // gzipCompressionLevel sets the compression level. Tests indicate that 7 gives the best trade off + // between speed and compression. + gzipCompressionLevel = 7 +) + type slug struct { metricsBytes []byte size int @@ -54,7 +61,7 @@ func (handler *putRecordsHandler) setSerializer(serializer serializers.Serialize func (handler *putRecordsHandler) addMetric(partition string, metric telegraf.Metric) error { if handler.readyToSendLock { - return fmt.Errorf("Already pacakged current metrics. Send first then add more") + return fmt.Errorf("Already packaged current metrics. Send first then add more") } if _, ok := handler.rawMetrics[partition]; !ok { handler.rawMetrics[partition] = make([]telegraf.Metric, 0) @@ -74,16 +81,18 @@ func (handler *putRecordsHandler) addSlugs(partitionKey string, slugs ...[]byte) } } -// packageMetrics is responsible to get the metrics split into payloads that we no larger than 1020kb. -// Each partition key will have metrics that need to then be split into payloads. +// packageMetrics is responsible to get the metrics split into payloads that are no larger than 1020kb. +// Each partition key will have metrics that need to be split into payloads. // If the partition key is random then it will create payloads ready to be split between as many shards // that you have available. -// packageMetrics can't be called again until init is called. Really it is designed to be used once. +// packageMetrics can't be called again until init is called. +// Really it is designed to be used once and then thrown away. func (handler *putRecordsHandler) packageMetrics(shards int64) error { if handler.readyToSendLock { - return fmt.Errorf("Already setup to send data") + return fmt.Errorf("Waiting to send data, can't accept more metrics currently") } - splitIntoBlocks := func(howManyBlocks int64, partitionKey string, metrics []telegraf.Metric) error { + + splitIntoBlocks := func(howManyBlocks int, partitionKey string, metrics []telegraf.Metric) [][]telegraf.Metric { blocks := make([][]telegraf.Metric, howManyBlocks) for index := range blocks { blocks[index] = make([]telegraf.Metric, 0) @@ -98,15 +107,11 @@ func (handler *putRecordsHandler) packageMetrics(shards int64) error { } } - for _, metrics := range blocks { - metricsBytes, err := handler.serializer.SerializeBatch(metrics) - if err != nil { - return err - } - handler.addSlugs(partitionKey, metricsBytes) - } + return blocks + } - return nil + requiredBlocks := func(currentSize int) int { + return (currentSize / maxRecordSizeBytes) + 1 } // At this point we need to know if the metrics will fit in a single push to kinesis @@ -115,15 +120,44 @@ func (handler *putRecordsHandler) packageMetrics(shards int64) error { // If that doesn't work we will then know how many block we would need. // Split again into x blocks, serialize and return. for partitionKey, metrics := range handler.rawMetrics { - if partitionKey == randomPartitionKey { - blocks := int64(shards) + // for Random partition keys we need to split the data first then check that it + // will fit into the payloads. If not we need to split it again, but we know how many + // blocks to make. + + // Make as many blocks as there is shards + blocks := int(shards) + // If we have less metrics than shards, we reduce the block count to 1 + // It will be faster to send one block in this case. if int64(len(metrics)) < shards { - blocks = int64(len(metrics)) + blocks = 1 } - if err := splitIntoBlocks(blocks, partitionKey, metrics); err != nil { - return err + safeBlocks := make([][]byte, 0) + splitBlocks := splitIntoBlocks(blocks, partitionKey, metrics) + + for _, block := range splitBlocks { + metricsEncoded, err := handler.serializer.SerializeBatch(block) + if err != nil { + return err + } + + blocksNeeded := requiredBlocks(len(metricsEncoded)) + if blocksNeeded == 1 { + safeBlocks = append(safeBlocks, metricsEncoded) + } else { + newBlocks := splitIntoBlocks(blocksNeeded, partitionKey, block) + for _, newBlock := range newBlocks { + metricsEncoded, err := handler.serializer.SerializeBatch(newBlock) + if err != nil { + return err + } + safeBlocks = append(safeBlocks, metricsEncoded) + } + } } + handler.slugs[randomPartitionKey] = safeBlocks + // clear splitBlocks because we don't need it + splitBlocks = nil // Now we need to move the data into its own partition keys for _, metricBytes := range handler.slugs[randomPartitionKey] { @@ -135,48 +169,61 @@ func (handler *putRecordsHandler) packageMetrics(shards int64) error { continue } + // Try one for static keys tryOne, err := handler.serializer.SerializeBatch(metrics) if err != nil { return err } - requiredBlocks := (len(tryOne) / maxRecordSizeBytes) + 1 + // We always need a single block. + // (len(tryOne) / maxRecordSizeBytes) will give a int due to maxRecordSizeBytes being a const + // If tryOne is smaller than maxRecordSizeBytes we get zero. + // or we get how many blocks we need + the starting 1. + blocksNeeded := requiredBlocks(len(tryOne)) - if requiredBlocks == 1 { - // we are ok and we can carry on + if blocksNeeded == 1 { + // The single block is large enough to carry all the metrics. handler.addSlugs(partitionKey, tryOne) continue } - // sad times we need to make more blocks and split the data between them - if err := splitIntoBlocks(int64(requiredBlocks), partitionKey, metrics); err != nil { - return err + // We now know how many blocks we need, but need to redistribute the metrics into the blocks + blocks := splitIntoBlocks(blocksNeeded, partitionKey, metrics) + for _, metrics := range blocks { + metricsBytes, err := handler.serializer.SerializeBatch(metrics) + if err != nil { + return err + } + handler.addSlugs(partitionKey, metricsBytes) } - continue } return nil } -func (handler *putRecordsHandler) snappyCompressSlugs() error { - for partitionKey, slugs := range handler.slugs { - for index, slug := range slugs { - // snappy doesn't return errors - compressedBytes, _ := snappyMetrics(slug) - handler.slugs[partitionKey][index] = compressedBytes - } +func newGzipEncoder() (*internal.GzipEncoder, error) { + // Grab the Gzip encoder directly because we need to set the level. + gz, err := internal.NewGzipEncoder() + if err != nil { + return nil, err } - return nil + err = gz.SetLevel(gzipCompressionLevel) + if err != nil { + return nil, err + } + + return gz, nil } -func (handler *putRecordsHandler) gzipCompressSlugs() error { +func (handler *putRecordsHandler) encodeSlugs(encoder internal.ContentEncoder) error { for partitionKey, slugs := range handler.slugs { for index, slug := range slugs { - compressedBytes, err := gzipMetrics(slug) + // snappy doesn't return errors + encodedBytes, err := encoder.Encode(slug) if err != nil { return err } - handler.slugs[partitionKey][index] = compressedBytes + handler.slugs[partitionKey][index] = encodedBytes } } return nil @@ -185,7 +232,7 @@ func (handler *putRecordsHandler) gzipCompressSlugs() error { // convertToKinesisPutRequests will return a slice that contains a []*kinesis.PutRecordsRequestEntry // sized to fit into a PutRecords calls. The number of of outer slices is how many times you would // need to call kinesis.PutRecords. -// The Inner slices ad hear to the current rules. No more than 500 records at once and no more than +// The Inner slices adheres to the following rules. No more than 500 records at once and no more than // 5MB of data including the partition keys. func (handler *putRecordsHandler) convertToKinesisPutRequests() [][]*kinesis.PutRecordsRequestEntry { putRequests := make([][]*kinesis.PutRecordsRequestEntry, 0) diff --git a/plugins/outputs/kinesis/kinesishandler_test.go b/plugins/outputs/kinesis/kinesishandler_test.go index 2822107c20e3f..d20bdf79effce 100755 --- a/plugins/outputs/kinesis/kinesishandler_test.go +++ b/plugins/outputs/kinesis/kinesishandler_test.go @@ -69,20 +69,24 @@ func TestAddSlugs(t *testing.T) { } func TestKinesisPackagedMetrics(t *testing.T) { + // A slug is a records data set that has a maximum size set by maxRecordSizeBytes + // If we have random keys then we expect there to be many keys and record sets + // this allows for spreading the load around shards. + // The test will look for the amount of records and that each generated record is + // or is lower than the maximum record size. tests := []struct { name string shards int64 nMetrics int staticKey string expectedSlugs int - snappy bool - gzip bool + encoding string }{ { - name: "micro Random expect 2 slugs", + name: "micro Random expect 1 slugs", shards: 4, nMetrics: 2, - expectedSlugs: 2, + expectedSlugs: 1, }, { name: "large Random expect 4 slugs", @@ -104,20 +108,25 @@ func TestKinesisPackagedMetrics(t *testing.T) { staticKey: "static_key", }, { - name: "vary large random expect 2 slugs", - shards: 2, - nMetrics: 51200, + name: "vary large static expect 1 slugs", + shards: 4, + nMetrics: 8081 * 2, expectedSlugs: 1, - snappy: true, staticKey: "static_key", }, { - name: "vary large random expect 2 slugs", + name: "vary large random expect 6 slugs with snappy", shards: 2, nMetrics: 51200, - expectedSlugs: 1, - gzip: true, - staticKey: "static_key", + expectedSlugs: 6, + encoding: "snappy", + }, + { + name: "vary large random expect 6 slugs with gzip", + shards: 2, + nMetrics: 51200, + expectedSlugs: 6, + encoding: "gzip", }, } @@ -144,16 +153,22 @@ func TestKinesisPackagedMetrics(t *testing.T) { t.Fail() } - if test.snappy { - // Snappy doesn't error, just testing for panic :( - h.snappyCompressSlugs() + for key, slug := range h.slugs { + for index, recordSet := range slug { + if len(recordSet) > maxRecordSizeBytes { + t.Logf("%s: recordSet %d of slug %s is too large. Is: %d, max size is: %d", test.name, index, key, len(recordSet), maxRecordSizeBytes) + } + } } - if test.gzip { - if err := h.gzipCompressSlugs(); err != nil { - t.Logf("%s: Error when gzip compressing slug. Error: %s", test.name, err) - t.FailNow() - } + encoder, err := makeEncoder(test.encoding) + if err != nil { + t.Logf("Failed to make encoder. You have put something bad into the test") + t.Fail() + } + if err := h.encodeSlugs(encoder); err != nil { + t.Logf("Failed to encoder the data") + t.Fail() } // We need to make sure that we don't get panics here. From b29d272efd445c691c314887b354d6f00b3ac8ad Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Wed, 14 Aug 2019 15:17:02 +0200 Subject: [PATCH 11/12] testing changes --- .../inputs/influxdb_listener/http_listener.go | 17 ++++++++++++++--- .../influxdb_listener/http_listener_test.go | 1 + 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/plugins/inputs/influxdb_listener/http_listener.go b/plugins/inputs/influxdb_listener/http_listener.go index 7e55447869018..c388a04749a62 100644 --- a/plugins/inputs/influxdb_listener/http_listener.go +++ b/plugins/inputs/influxdb_listener/http_listener.go @@ -49,6 +49,8 @@ type HTTPListener struct { BasicUsername string BasicPassword string + KeepDatabase bool + TimeFunc mu sync.Mutex @@ -93,6 +95,11 @@ const sampleConfig = ` ## Maximum line size allowed to be sent in bytes. ## 0 means to use the default of 65536 bytes (64 kibibytes) max_line_size = "64KiB" + + ## If the write has a database on it then it should be kept + ## for metrics further on. The database will be added as a + ## tag under database + keep_database = true ## Set one or more allowed client CA certificate file names to ## enable mutually authenticated TLS connections @@ -258,6 +265,7 @@ func (h *HTTPListener) serveWrite(res http.ResponseWriter, req *http.Request) { now := h.TimeFunc() precision := req.URL.Query().Get("precision") + db := req.URL.Query().Get("db") // Handle gzip request bodies body := req.Body @@ -315,7 +323,7 @@ func (h *HTTPListener) serveWrite(res http.ResponseWriter, req *http.Request) { if err == io.ErrUnexpectedEOF { // finished reading the request body - err = h.parse(buf[:n+bufStart], now, precision) + err = h.parse(buf[:n+bufStart], now, precision, db) if err != nil { log.Println("D! "+err.Error(), bufStart+n) return400 = true @@ -346,7 +354,7 @@ func (h *HTTPListener) serveWrite(res http.ResponseWriter, req *http.Request) { bufStart = 0 continue } - if err := h.parse(buf[:i+1], now, precision); err != nil { + if err := h.parse(buf[:i+1], now, precision, db); err != nil { log.Println("D! " + err.Error()) return400 = true } @@ -359,7 +367,7 @@ func (h *HTTPListener) serveWrite(res http.ResponseWriter, req *http.Request) { } } -func (h *HTTPListener) parse(b []byte, t time.Time, precision string) error { +func (h *HTTPListener) parse(b []byte, t time.Time, precision, db string) error { h.mu.Lock() defer h.mu.Unlock() @@ -371,6 +379,9 @@ func (h *HTTPListener) parse(b []byte, t time.Time, precision string) error { } for _, m := range metrics { + if db != "" { + m.AddTag("database", db) + } h.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) } diff --git a/plugins/inputs/influxdb_listener/http_listener_test.go b/plugins/inputs/influxdb_listener/http_listener_test.go index 9642950613840..ff8241a037d90 100644 --- a/plugins/inputs/influxdb_listener/http_listener_test.go +++ b/plugins/inputs/influxdb_listener/http_listener_test.go @@ -164,6 +164,7 @@ func TestWriteHTTP(t *testing.T) { map[string]interface{}{"value": float64(12)}, map[string]string{"host": "server01"}, ) + t.Logf("Randy %v", acc) // post multiple message to listener resp, err = http.Post(createURL(listener, "http", "/write", "db=mydb"), "", bytes.NewBuffer([]byte(testMsgs))) From ca30edd8f84ab2d2d36e5887445126c4881662cc Mon Sep 17 00:00:00 2001 From: Randy Coburn Date: Wed, 14 Aug 2019 16:03:09 +0200 Subject: [PATCH 12/12] Updating readme with new parameters. Setting database tag if needed Adding test to look for the database tag if requested to keep it. --- plugins/inputs/influxdb_listener/README.md | 10 ++++ .../inputs/influxdb_listener/http_listener.go | 48 +++++++++++++------ .../influxdb_listener/http_listener_test.go | 21 ++------ 3 files changed, 48 insertions(+), 31 deletions(-) diff --git a/plugins/inputs/influxdb_listener/README.md b/plugins/inputs/influxdb_listener/README.md index 8b6d2ad51c538..4c7db247c921e 100644 --- a/plugins/inputs/influxdb_listener/README.md +++ b/plugins/inputs/influxdb_listener/README.md @@ -46,6 +46,16 @@ submits data to InfluxDB determines the destination database. tls_cert = "/etc/telegraf/cert.pem" tls_key = "/etc/telegraf/key.pem" + ## If the write has a database on it then it should be kept + ## for metrics further on. The database will be added as a tag. + ## This tag can be used in downstream outputs. + keep_database = true + + ## Optional tag name used to store the database if you want to change it to something custom. + ## If not set it will be "database" + ## Only used if keep_database is set to true. + # database_tag = database + ## Optional username and password to accept for HTTP basic authentication. ## You probably want to make sure you have TLS configured above for this. # basic_username = "foobar" diff --git a/plugins/inputs/influxdb_listener/http_listener.go b/plugins/inputs/influxdb_listener/http_listener.go index c388a04749a62..d72df5f43a61c 100644 --- a/plugins/inputs/influxdb_listener/http_listener.go +++ b/plugins/inputs/influxdb_listener/http_listener.go @@ -32,24 +32,28 @@ const ( // a single InfluxDB point. // 64 KB DEFAULT_MAX_LINE_SIZE = 64 * 1024 + + // DefaultDatabaseTag is the name of the tag that will be used to carry + // the database collected from the query string + DefaultDatabaseTag = "database" ) type TimeFunc func() time.Time type HTTPListener struct { - ServiceAddress string - ReadTimeout internal.Duration - WriteTimeout internal.Duration - MaxBodySize internal.Size - MaxLineSize internal.Size - Port int - + ServiceAddress string `toml:"service_address"` + // Port gets pulled out of ServiceAddress + Port int tlsint.ServerConfig - BasicUsername string - BasicPassword string - - KeepDatabase bool + ReadTimeout internal.Duration `toml:"read_timeout"` + WriteTimeout internal.Duration `toml:"write_timeout"` + MaxBodySize internal.Size `toml:"max_body_size"` + MaxLineSize internal.Size `toml:"max_line_size"` + BasicUsername string `toml:"basic_username"` + BasicPassword string `toml:"basic_password"` + KeepDatabase bool `toml:"keep_database"` + DatabaseTag string `toml:"database_tag"` TimeFunc @@ -97,10 +101,15 @@ const sampleConfig = ` max_line_size = "64KiB" ## If the write has a database on it then it should be kept - ## for metrics further on. The database will be added as a - ## tag under database + ## for metrics further on. The database will be added as a tag. + ## This tag can be used in downstream outputs. keep_database = true + ## Optional tag name used to store the database if you want to change it to something custom. + ## If not set it will be "database" + ## Only used if keep_database is set to true. + # database_tag = database + ## Set one or more allowed client CA certificate file names to ## enable mutually authenticated TLS connections tls_allowed_cacerts = ["/etc/telegraf/clientca.pem"] @@ -379,8 +388,15 @@ func (h *HTTPListener) parse(b []byte, t time.Time, precision, db string) error } for _, m := range metrics { - if db != "" { - m.AddTag("database", db) + // Do we need to keep the database name in the query string + if h.KeepDatabase { + // Did we get a database argument. If we didn't get it. We can't set it. + if db != "" { + // Is there already a database set. If not use the database in the query string. + if _, ok := m.Tags()[h.DatabaseTag]; !ok { + m.AddTag(h.DatabaseTag, db) + } + } } h.acc.AddFields(m.Name(), m.Fields(), m.Tags(), m.Time()) } @@ -447,12 +463,14 @@ func init() { return &HTTPListener{ ServiceAddress: ":8186", TimeFunc: time.Now, + DatabaseTag: DefaultDatabaseTag, } }) inputs.Add("influxdb_listener", func() telegraf.Input { return &HTTPListener{ ServiceAddress: ":8186", TimeFunc: time.Now, + DatabaseTag: DefaultDatabaseTag, } }) } diff --git a/plugins/inputs/influxdb_listener/http_listener_test.go b/plugins/inputs/influxdb_listener/http_listener_test.go index ff8241a037d90..3c4584b1dca2a 100644 --- a/plugins/inputs/influxdb_listener/http_listener_test.go +++ b/plugins/inputs/influxdb_listener/http_listener_test.go @@ -46,6 +46,7 @@ func newTestHTTPListener() *HTTPListener { listener := &HTTPListener{ ServiceAddress: "localhost:0", TimeFunc: time.Now, + DatabaseTag: DefaultDatabaseTag, } return listener } @@ -146,8 +147,9 @@ func TestWriteHTTPBasicAuth(t *testing.T) { require.EqualValues(t, http.StatusNoContent, resp.StatusCode) } -func TestWriteHTTP(t *testing.T) { +func TestWriteHTTPKeepDatabase(t *testing.T) { listener := newTestHTTPListener() + listener.KeepDatabase = true acc := &testutil.Accumulator{} require.NoError(t, listener.Start(acc)) @@ -162,9 +164,8 @@ func TestWriteHTTP(t *testing.T) { acc.Wait(1) acc.AssertContainsTaggedFields(t, "cpu_load_short", map[string]interface{}{"value": float64(12)}, - map[string]string{"host": "server01"}, + map[string]string{"host": "server01", "database": "mydb"}, ) - t.Logf("Randy %v", acc) // post multiple message to listener resp, err = http.Post(createURL(listener, "http", "/write", "db=mydb"), "", bytes.NewBuffer([]byte(testMsgs))) @@ -178,21 +179,9 @@ func TestWriteHTTP(t *testing.T) { for _, hostTag := range hostTags { acc.AssertContainsTaggedFields(t, "cpu_load_short", map[string]interface{}{"value": float64(12)}, - map[string]string{"host": hostTag}, + map[string]string{"host": hostTag, "database": "mydb"}, ) } - - // Post a gigantic metric to the listener and verify that an error is returned: - resp, err = http.Post(createURL(listener, "http", "/write", "db=mydb"), "", bytes.NewBuffer([]byte(hugeMetric))) - require.NoError(t, err) - resp.Body.Close() - require.EqualValues(t, 400, resp.StatusCode) - - acc.Wait(3) - acc.AssertContainsTaggedFields(t, "cpu_load_short", - map[string]interface{}{"value": float64(12)}, - map[string]string{"host": "server01"}, - ) } // http listener should add a newline at the end of the buffer if it's not there