From 076985cb383aa426f353011224c437d6c9635640 Mon Sep 17 00:00:00 2001 From: Matthias Loibl Date: Wed, 9 Dec 2020 16:03:32 +0100 Subject: [PATCH] Merge release-0.17 branch back (#3553) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix query frontend regression on release v0.17.0 (#3480) * query-frontend: make POST-request to downstream url for labels and series api endpoints (#3444) Signed-off-by: Alexander Tunik <2braven@gmail.com> * remove default response cache config Signed-off-by: Ben Ye * ensure order when merging multiple responses Signed-off-by: Ben Ye Co-authored-by: Alexander Tunik <2braven@gmail.com> * *: Set debug.SetPanicOnFault(true) so we can recover seg faults. (#3498) Signed-off-by: Bartlomiej Plotka * Prepare v0.17.1 release (#3505) Signed-off-by: Matthias Loibl * fix index out of bound bug when comparing ZLabelSets (#3520) * fix index out of bound bug when comparing ZLabelSets Signed-off-by: Ben Ye * fix param parsing error message Signed-off-by: Ben Ye * address comment feedbacks Signed-off-by: Ben Ye * compact: do not cleanup blocks on boot (#3532) Do not cleanup blocks on boot because in some very bad cases there could be thousands of blocks ready-to-be deleted and doing that makes Thanos Compact exceed `initialDelaySeconds` on k8s. Signed-off-by: Giedrius Statkevičius * Prepare v0.17.2 (#3543) Signed-off-by: Matthias Loibl * Properly rebase CHANGELOG.md after merging release-0.17 Signed-off-by: Matthias Loibl Co-authored-by: Ben Ye Co-authored-by: Alexander Tunik <2braven@gmail.com> Co-authored-by: Bartlomiej Plotka Co-authored-by: Giedrius Statkevičius --- CHANGELOG.md | 19 +++ cmd/thanos/compact.go | 30 ++-- cmd/thanos/main.go | 5 + cmd/thanos/query_frontend.go | 6 +- .../cross-cluster-tls-communication.md | 2 +- pkg/queryfrontend/labels_codec.go | 24 ++- pkg/queryfrontend/labels_codec_test.go | 157 +++++++++++++++++- pkg/queryfrontend/queryrange_codec.go | 10 +- pkg/store/labelpb/label.go | 25 +++ pkg/store/labelpb/label_test.go | 146 ++++++++++++++++ test/e2e/compact_test.go | 12 +- test/e2e/query_frontend_test.go | 27 ++- test/e2e/query_test.go | 14 +- .../thanos/1-globalview/courseBase.sh | 2 +- .../katacoda/thanos/1-globalview/step2.md | 8 +- .../katacoda/thanos/1-globalview/step3.md | 2 +- .../thanos/7-multi-tenancy/courseBase.sh | 2 +- .../katacoda/thanos/7-multi-tenancy/step1.md | 10 +- .../katacoda/thanos/7-multi-tenancy/step2.md | 2 +- 19 files changed, 428 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe5777067f..97ddd47502 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,25 @@ We use _breaking :warning:_ to mark changes that are not backward compatible (re - [#3496](https://github.com/thanos-io/thanos/pull/3496) s3: Respect SignatureV2 flag for all credential providers. + + +## [v0.17.2](https://github.com/thanos-io/thanos/releases/tag/v0.17.2) - 2020.12.07 + +### Fixed + +- [#3532](https://github.com/thanos-io/thanos/pull/3532) compact: do not cleanup blocks on boot. Reverts the behavior change introduced in [#3115](https://github.com/thanos-io/thanos/pull/3115) as in some very bad cases the boot of Thanos Compact took a very long time since there were a lot of blocks-to-be-cleaned. +- [#3520](https://github.com/thanos-io/thanos/pull/3520) Fix index out of bound bug when comparing ZLabelSets. + +## [v0.17.1](https://github.com/thanos-io/thanos/releases/tag/v0.17.1) - 2020.11.24 + +### Fixed + +- [#3480](https://github.com/thanos-io/thanos/pull/3480) Query-frontend: Fixed regression. + +### Changed + +- [#3498](https://github.com/thanos-io/thanos/pull/3498) Enabled debug.SetPanicOnFault(true) which allow us to recover on queries causing SEG FAULTs (e.g unmmaped memory access). + ## [v0.17.0](https://github.com/thanos-io/thanos/releases/tag/v0.17.0) - 2020.11.18 ### Added diff --git a/cmd/thanos/compact.go b/cmd/thanos/compact.go index 64c00ec6d8..aa4a4da7dc 100644 --- a/cmd/thanos/compact.go +++ b/cmd/thanos/compact.go @@ -113,6 +113,10 @@ func runCompact( Name: "thanos_compact_iterations_total", Help: "Total number of iterations that were executed successfully.", }) + cleanups := promauto.With(reg).NewCounter(prometheus.CounterOpts{ + Name: "thanos_compact_block_cleanup_loops_total", + Help: "Total number of concurrent cleanup loops of partially uploaded blocks and marked blocks that were executed successfully.", + }) partialUploadDeleteAttempts := promauto.With(reg).NewCounter(prometheus.CounterOpts{ Name: "thanos_compact_aborted_partial_uploads_deletion_attempts_total", Help: "Total number of started deletions of blocks that are assumed aborted and only partially uploaded.", @@ -339,29 +343,20 @@ func runCompact( cleanMtx.Lock() defer cleanMtx.Unlock() - // No need to resync before partial uploads and delete marked blocks. Last sync should be valid. + if err := sy.SyncMetas(ctx); err != nil { + cancel() + return errors.Wrap(err, "syncing metas") + } + compact.BestEffortCleanAbortedPartialUploads(ctx, logger, sy.Partial(), bkt, partialUploadDeleteAttempts, blocksCleaned, blockCleanupFailures) if err := blocksCleaner.DeleteMarkedBlocks(ctx); err != nil { return errors.Wrap(err, "cleaning marked blocks") } + cleanups.Inc() - if err := sy.SyncMetas(ctx); err != nil { - level.Error(logger).Log("msg", "failed to sync metas", "err", err) - } return nil } - // Do it once at the beginning to ensure that it runs at least once before - // the main loop. - if err := sy.SyncMetas(ctx); err != nil { - cancel() - return errors.Wrap(err, "syncing metas") - } - if err := cleanPartialMarked(); err != nil { - cancel() - return errors.Wrap(err, "cleaning partial and marked blocks") - } - compactMainFn := func() error { if err := compactor.Compact(ctx); err != nil { return errors.Wrap(err, "compaction") @@ -481,11 +476,6 @@ func runCompact( // since one iteration potentially could take a long time. if conf.cleanupBlocksInterval > 0 { g.Add(func() error { - // Wait the whole period at the beginning because we've executed this on boot. - select { - case <-time.After(conf.cleanupBlocksInterval): - case <-ctx.Done(): - } return runutil.Repeat(conf.cleanupBlocksInterval, ctx.Done(), cleanPartialMarked) }, func(error) { cancel() diff --git a/cmd/thanos/main.go b/cmd/thanos/main.go index bcaf2b4a8e..cab0fc6ee2 100644 --- a/cmd/thanos/main.go +++ b/cmd/thanos/main.go @@ -11,6 +11,7 @@ import ( "os/signal" "path/filepath" "runtime" + "runtime/debug" "syscall" "github.com/go-kit/kit/log" @@ -28,6 +29,10 @@ import ( ) func main() { + // We use mmaped resources in most of the components so hardcode PanicOnFault to true. This allows us to recover (if we can e.g if queries + // are temporarily accessing unmapped memory). + debug.SetPanicOnFault(true) + if os.Getenv("DEBUG") != "" { runtime.SetMutexProfileFraction(10) runtime.SetBlockProfileRate(10) diff --git a/cmd/thanos/query_frontend.go b/cmd/thanos/query_frontend.go index afd0a9d294..fd97747331 100644 --- a/cmd/thanos/query_frontend.go +++ b/cmd/thanos/query_frontend.go @@ -51,12 +51,10 @@ func registerQueryFrontend(app *extkingpin.App) { MaxBodySize: 10 * 1024 * 1024, }, QueryRangeConfig: queryfrontend.QueryRangeConfig{ - Limits: &cortexvalidation.Limits{}, - ResultsCacheConfig: &queryrange.ResultsCacheConfig{}, + Limits: &cortexvalidation.Limits{}, }, LabelsConfig: queryfrontend.LabelsConfig{ - Limits: &cortexvalidation.Limits{}, - ResultsCacheConfig: &queryrange.ResultsCacheConfig{}, + Limits: &cortexvalidation.Limits{}, }, }, } diff --git a/docs/operating/cross-cluster-tls-communication.md b/docs/operating/cross-cluster-tls-communication.md index e2e6ed1fcc..e80553b22d 100644 --- a/docs/operating/cross-cluster-tls-communication.md +++ b/docs/operating/cross-cluster-tls-communication.md @@ -71,7 +71,7 @@ metadata: optional: false containers: - name: querier - image: 'thanosio/thanos:v0.17.0' + image: 'thanosio/thanos:v0.17.2' args: - query - '--log.level=info' diff --git a/pkg/queryfrontend/labels_codec.go b/pkg/queryfrontend/labels_codec.go index 4d922751fa..d096fe48e2 100644 --- a/pkg/queryfrontend/labels_codec.go +++ b/pkg/queryfrontend/labels_codec.go @@ -49,20 +49,21 @@ func NewThanosLabelsCodec(partialResponse bool, defaultMetadataTimeRange time.Du } } +// MergeResponse merges multiple responses into a single Response. It needs to dedup the responses and ensure the order. func (c labelsCodec) MergeResponse(responses ...queryrange.Response) (queryrange.Response, error) { if len(responses) == 0 { + // Empty response for label_names, label_values and series API. return &ThanosLabelsResponse{ Status: queryrange.StatusSuccess, Data: []string{}, }, nil } - if len(responses) == 1 { - return responses[0], nil - } - switch responses[0].(type) { case *ThanosLabelsResponse: + if len(responses) == 1 { + return responses[0], nil + } set := make(map[string]struct{}) for _, res := range responses { @@ -83,25 +84,20 @@ func (c labelsCodec) MergeResponse(responses ...queryrange.Response) (queryrange Data: lbls, }, nil case *ThanosSeriesResponse: - seriesData := make([]labelpb.ZLabelSet, 0) + seriesData := make(labelpb.ZLabelSets, 0) - // seriesString is used in soring so we don't have to calculate the string of label sets again. - seriesString := make([]string, 0) uniqueSeries := make(map[string]struct{}) for _, res := range responses { for _, series := range res.(*ThanosSeriesResponse).Data { s := series.PromLabels().String() if _, ok := uniqueSeries[s]; !ok { seriesData = append(seriesData, series) - seriesString = append(seriesString, s) uniqueSeries[s] = struct{}{} } } } - sort.Slice(seriesData, func(i, j int) bool { - return seriesString[i] < seriesString[j] - }) + sort.Sort(seriesData) return &ThanosSeriesResponse{ Status: queryrange.StatusSuccess, Data: seriesData, @@ -287,7 +283,7 @@ func (c labelsCodec) parseLabelsRequest(r *http.Request, op string) (queryrange. return nil, err } - result.StoreMatchers, err = parseMatchersParam(r.Form[queryv1.StoreMatcherParam]) + result.StoreMatchers, err = parseMatchersParam(r.Form, queryv1.StoreMatcherParam) if err != nil { return nil, err } @@ -321,7 +317,7 @@ func (c labelsCodec) parseSeriesRequest(r *http.Request) (queryrange.Request, er return nil, err } - result.Matchers, err = parseMatchersParam(r.Form[queryv1.MatcherParam]) + result.Matchers, err = parseMatchersParam(r.Form, queryv1.MatcherParam) if err != nil { return nil, err } @@ -340,7 +336,7 @@ func (c labelsCodec) parseSeriesRequest(r *http.Request) (queryrange.Request, er result.ReplicaLabels = r.Form[queryv1.ReplicaLabelsParam] } - result.StoreMatchers, err = parseMatchersParam(r.Form[queryv1.StoreMatcherParam]) + result.StoreMatchers, err = parseMatchersParam(r.Form, queryv1.StoreMatcherParam) if err != nil { return nil, err } diff --git a/pkg/queryfrontend/labels_codec_test.go b/pkg/queryfrontend/labels_codec_test.go index 5897a86536..a0e6980716 100644 --- a/pkg/queryfrontend/labels_codec_test.go +++ b/pkg/queryfrontend/labels_codec_test.go @@ -234,7 +234,7 @@ func TestLabelsCodec_EncodeRequest(t *testing.T) { }, { name: "thanos labels values request", - req: &ThanosLabelsRequest{Start: 123000, End: 456000, Path: "/api/v1/label/__name__/values"}, + req: &ThanosLabelsRequest{Start: 123000, End: 456000, Path: "/api/v1/label/__name__/values", Label: "__name__"}, checkFunc: func(r *http.Request) bool { return r.URL.Query().Get(start) == startTime && r.URL.Query().Get(end) == endTime && @@ -243,7 +243,7 @@ func TestLabelsCodec_EncodeRequest(t *testing.T) { }, { name: "thanos labels values request, partial response set to true", - req: &ThanosLabelsRequest{Start: 123000, End: 456000, Path: "/api/v1/label/__name__/values", PartialResponse: true}, + req: &ThanosLabelsRequest{Start: 123000, End: 456000, Path: "/api/v1/label/__name__/values", Label: "__name__", PartialResponse: true}, checkFunc: func(r *http.Request) bool { return r.URL.Query().Get(start) == startTime && r.URL.Query().Get(end) == endTime && @@ -313,12 +313,29 @@ func TestLabelsCodec_DecodeResponse(t *testing.T) { labelsData, err := json.Marshal(labelResponse) testutil.Ok(t, err) + labelResponseWithHeaders := &ThanosLabelsResponse{ + Status: "success", + Data: []string{"__name__"}, + Headers: []*ResponseHeader{{Name: cacheControlHeader, Values: []string{noStoreValue}}}, + } + labelsDataWithHeaders, err := json.Marshal(labelResponseWithHeaders) + testutil.Ok(t, err) + seriesResponse := &ThanosSeriesResponse{ Status: "success", Data: []labelpb.ZLabelSet{{Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}}, } seriesData, err := json.Marshal(seriesResponse) testutil.Ok(t, err) + + seriesResponseWithHeaders := &ThanosSeriesResponse{ + Status: "success", + Data: []labelpb.ZLabelSet{{Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}}, + Headers: []*ResponseHeader{{Name: cacheControlHeader, Values: []string{noStoreValue}}}, + } + seriesDataWithHeaders, err := json.Marshal(seriesResponseWithHeaders) + testutil.Ok(t, err) + for _, tc := range []struct { name string expectedError error @@ -344,12 +361,34 @@ func TestLabelsCodec_DecodeResponse(t *testing.T) { res: http.Response{StatusCode: 200, Body: ioutil.NopCloser(bytes.NewBuffer(labelsData))}, expectedResponse: labelResponse, }, + { + name: "thanos labels request with HTTP headers", + req: &ThanosLabelsRequest{}, + res: http.Response{ + StatusCode: 200, Body: ioutil.NopCloser(bytes.NewBuffer(labelsDataWithHeaders)), + Header: map[string][]string{ + cacheControlHeader: {noStoreValue}, + }, + }, + expectedResponse: labelResponseWithHeaders, + }, { name: "thanos series request", req: &ThanosSeriesRequest{}, res: http.Response{StatusCode: 200, Body: ioutil.NopCloser(bytes.NewBuffer(seriesData))}, expectedResponse: seriesResponse, }, + { + name: "thanos series request with HTTP headers", + req: &ThanosSeriesRequest{}, + res: http.Response{ + StatusCode: 200, Body: ioutil.NopCloser(bytes.NewBuffer(seriesDataWithHeaders)), + Header: map[string][]string{ + cacheControlHeader: {noStoreValue}, + }, + }, + expectedResponse: seriesResponseWithHeaders, + }, } { t.Run(tc.name, func(t *testing.T) { // Default partial response value doesn't matter when encoding requests. @@ -364,3 +403,117 @@ func TestLabelsCodec_DecodeResponse(t *testing.T) { }) } } + +func TestLabelsCodec_MergeResponse(t *testing.T) { + for _, tc := range []struct { + name string + expectedError error + responses []queryrange.Response + expectedResponse queryrange.Response + }{ + { + name: "Prometheus range query response format, not valid", + responses: []queryrange.Response{ + &queryrange.PrometheusResponse{Status: "success"}, + }, + expectedError: httpgrpc.Errorf(http.StatusInternalServerError, "invalid response format"), + }, + { + name: "Empty response", + responses: nil, + expectedResponse: &ThanosLabelsResponse{Status: queryrange.StatusSuccess, Data: []string{}}, + }, + { + name: "One label response", + responses: []queryrange.Response{ + &ThanosLabelsResponse{Status: "success", Data: []string{"localhost:9090", "localhost:9091"}}, + }, + expectedResponse: &ThanosLabelsResponse{Status: "success", Data: []string{"localhost:9090", "localhost:9091"}}, + }, + { + name: "One label response and two empty responses", + responses: []queryrange.Response{ + &ThanosLabelsResponse{Status: queryrange.StatusSuccess, Data: []string{}}, + &ThanosLabelsResponse{Status: "success", Data: []string{"localhost:9090", "localhost:9091"}}, + &ThanosLabelsResponse{Status: queryrange.StatusSuccess, Data: []string{}}, + }, + expectedResponse: &ThanosLabelsResponse{Status: "success", Data: []string{"localhost:9090", "localhost:9091"}}, + }, + { + name: "Multiple duplicate label responses", + responses: []queryrange.Response{ + &ThanosLabelsResponse{Status: "success", Data: []string{"localhost:9090", "localhost:9091"}}, + &ThanosLabelsResponse{Status: "success", Data: []string{"localhost:9091", "localhost:9092"}}, + &ThanosLabelsResponse{Status: "success", Data: []string{"localhost:9092", "localhost:9093"}}, + }, + expectedResponse: &ThanosLabelsResponse{Status: "success", + Data: []string{"localhost:9090", "localhost:9091", "localhost:9092", "localhost:9093"}}, + }, + // This case shouldn't happen because the responses from Querier are sorted. + { + name: "Multiple unordered label responses", + responses: []queryrange.Response{ + &ThanosLabelsResponse{Status: "success", Data: []string{"localhost:9093", "localhost:9092"}}, + &ThanosLabelsResponse{Status: "success", Data: []string{"localhost:9091", "localhost:9090"}}, + }, + expectedResponse: &ThanosLabelsResponse{Status: "success", + Data: []string{"localhost:9090", "localhost:9091", "localhost:9092", "localhost:9093"}}, + }, + { + name: "One series response", + responses: []queryrange.Response{ + &ThanosSeriesResponse{Status: "success", Data: []labelpb.ZLabelSet{{Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}}}, + }, + expectedResponse: &ThanosSeriesResponse{Status: "success", Data: []labelpb.ZLabelSet{{Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}}}, + }, + { + name: "One series response and two empty responses", + responses: []queryrange.Response{ + &ThanosSeriesResponse{Status: queryrange.StatusSuccess}, + &ThanosSeriesResponse{Status: "success", Data: []labelpb.ZLabelSet{{Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}}}, + &ThanosSeriesResponse{Status: queryrange.StatusSuccess}, + }, + expectedResponse: &ThanosSeriesResponse{Status: "success", Data: []labelpb.ZLabelSet{{Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}}}, + }, + { + name: "Multiple duplicate series responses", + responses: []queryrange.Response{ + &ThanosSeriesResponse{Status: "success", Data: []labelpb.ZLabelSet{{Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}}}, + &ThanosSeriesResponse{Status: "success", Data: []labelpb.ZLabelSet{{Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}}}, + &ThanosSeriesResponse{Status: "success", Data: []labelpb.ZLabelSet{{Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}}}, + }, + expectedResponse: &ThanosSeriesResponse{Status: "success", Data: []labelpb.ZLabelSet{{Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}}}, + }, + { + name: "Multiple unordered series responses", + responses: []queryrange.Response{ + &ThanosSeriesResponse{Status: "success", Data: []labelpb.ZLabelSet{ + {Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}, + {Labels: []labelpb.ZLabel{{Name: "test", Value: "aaa"}, {Name: "instance", Value: "localhost:9090"}}}, + }}, + &ThanosSeriesResponse{Status: "success", Data: []labelpb.ZLabelSet{ + {Labels: []labelpb.ZLabel{{Name: "foo", Value: "aaa"}}}, + {Labels: []labelpb.ZLabel{{Name: "test", Value: "bbb"}, {Name: "instance", Value: "localhost:9091"}}}, + }}, + }, + expectedResponse: &ThanosSeriesResponse{Status: "success", Data: []labelpb.ZLabelSet{ + {Labels: []labelpb.ZLabel{{Name: "foo", Value: "aaa"}}}, + {Labels: []labelpb.ZLabel{{Name: "foo", Value: "bar"}}}, + {Labels: []labelpb.ZLabel{{Name: "test", Value: "aaa"}, {Name: "instance", Value: "localhost:9090"}}}, + {Labels: []labelpb.ZLabel{{Name: "test", Value: "bbb"}, {Name: "instance", Value: "localhost:9091"}}}, + }}, + }, + } { + t.Run(tc.name, func(t *testing.T) { + // Default partial response value doesn't matter when encoding requests. + codec := NewThanosLabelsCodec(false, time.Hour*2) + r, err := codec.MergeResponse(tc.responses...) + if tc.expectedError != nil { + testutil.Equals(t, err, tc.expectedError) + } else { + testutil.Ok(t, err) + testutil.Equals(t, tc.expectedResponse, r) + } + }) + } +} diff --git a/pkg/queryfrontend/queryrange_codec.go b/pkg/queryfrontend/queryrange_codec.go index 004fb522d5..3206c28be8 100644 --- a/pkg/queryfrontend/queryrange_codec.go +++ b/pkg/queryfrontend/queryrange_codec.go @@ -111,7 +111,7 @@ func (c queryRangeCodec) DecodeRequest(_ context.Context, r *http.Request) (quer result.ReplicaLabels = r.Form[queryv1.ReplicaLabelsParam] } - result.StoreMatchers, err = parseMatchersParam(r.Form[queryv1.StoreMatcherParam]) + result.StoreMatchers, err = parseMatchersParam(r.Form, queryv1.StoreMatcherParam) if err != nil { return nil, err } @@ -221,12 +221,12 @@ func parsePartialResponseParam(s string, defaultEnablePartialResponse bool) (boo return defaultEnablePartialResponse, nil } -func parseMatchersParam(ss []string) ([][]*labels.Matcher, error) { - matchers := make([][]*labels.Matcher, 0, len(ss)) - for _, s := range ss { +func parseMatchersParam(ss url.Values, matcherParam string) ([][]*labels.Matcher, error) { + matchers := make([][]*labels.Matcher, 0, len(ss[matcherParam])) + for _, s := range ss[matcherParam] { ms, err := parser.ParseMetricSelector(s) if err != nil { - return nil, httpgrpc.Errorf(http.StatusBadRequest, errCannotParse, queryv1.StoreMatcherParam) + return nil, httpgrpc.Errorf(http.StatusBadRequest, errCannotParse, matcherParam) } matchers = append(matchers, ms) } diff --git a/pkg/store/labelpb/label.go b/pkg/store/labelpb/label.go index 5638f69e5f..ea0a7fa49b 100644 --- a/pkg/store/labelpb/label.go +++ b/pkg/store/labelpb/label.go @@ -295,3 +295,28 @@ func DeepCopy(lbls []ZLabel) []ZLabel { } return ret } + +// ZLabelSets is a sortable list of ZLabelSet. It assumes the label pairs in each ZLabelSet element are already sorted. +type ZLabelSets []ZLabelSet + +func (z ZLabelSets) Len() int { return len(z) } + +func (z ZLabelSets) Swap(i, j int) { z[i], z[j] = z[j], z[i] } + +func (z ZLabelSets) Less(i, j int) bool { + l := 0 + r := 0 + var result int + lenI, lenJ := len(z[i].Labels), len(z[j].Labels) + for l < lenI && r < lenJ { + result = z[i].Labels[l].Compare(z[j].Labels[r]) + if result == 0 { + l++ + r++ + continue + } + return result < 0 + } + + return l == lenI +} diff --git a/pkg/store/labelpb/label_test.go b/pkg/store/labelpb/label_test.go index 6656eea445..d8d258e8e1 100644 --- a/pkg/store/labelpb/label_test.go +++ b/pkg/store/labelpb/label_test.go @@ -5,6 +5,8 @@ package labelpb import ( "fmt" + "reflect" + "sort" "testing" "github.com/prometheus/prometheus/pkg/labels" @@ -104,3 +106,147 @@ func BenchmarkZLabelsMarshalUnmarshal(b *testing.B) { } }) } + +func TestSortZLabelSets(t *testing.T) { + expectedResult := ZLabelSets{ + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "__name__": "grpc_client_handled_total", + "cluster": "test", + "grpc_code": "OK", + "grpc_method": "Info", + }), + ), + }, + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "__name__": "grpc_client_handled_total", + "cluster": "test", + "grpc_code": "OK", + "grpc_method": "LabelNames", + }), + ), + }, + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "__name__": "grpc_client_handled_total", + "cluster": "test", + "grpc_code": "OK", + "aa": "1", + "bb": "2", + "cc": "3", + "dd": "4", + "ee": "5", + }), + ), + }, + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "__name__": "grpc_client_handled_total", + "cluster": "test", + "grpc_code": "OK", + "aa": "1", + "bb": "2", + "cc": "3", + "dd": "4", + "ee": "5", + }), + ), + }, + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "__name__": "grpc_server_handled_total", + "cluster": "test", + "grpc_code": "OK", + "grpc_method": "Info", + }), + ), + }, + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "__name__": "up", + "instance": "localhost:10908", + }), + ), + }, + } + + list := ZLabelSets{ + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "__name__": "up", + "instance": "localhost:10908", + }), + ), + }, + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "__name__": "grpc_server_handled_total", + "cluster": "test", + "grpc_code": "OK", + "grpc_method": "Info", + }), + ), + }, + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "__name__": "grpc_client_handled_total", + "cluster": "test", + "grpc_code": "OK", + "grpc_method": "LabelNames", + }), + ), + }, + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "__name__": "grpc_client_handled_total", + "cluster": "test", + "grpc_code": "OK", + "grpc_method": "Info", + }), + ), + }, + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "__name__": "grpc_client_handled_total", + "cluster": "test", + "grpc_code": "OK", + "aa": "1", + "bb": "2", + "cc": "3", + "dd": "4", + "ee": "5", + }), + ), + }, + // This label set is the same as the previous one, which should correctly return 0 in Less() function. + { + Labels: ZLabelsFromPromLabels( + labels.FromMap(map[string]string{ + "cluster": "test", + "__name__": "grpc_client_handled_total", + "grpc_code": "OK", + "aa": "1", + "bb": "2", + "cc": "3", + "dd": "4", + "ee": "5", + }), + ), + }, + } + + sort.Sort(list) + reflect.DeepEqual(expectedResult, list) +} diff --git a/test/e2e/compact_test.go b/test/e2e/compact_test.go index b3deb8f19a..bfba33da5c 100644 --- a/test/e2e/compact_test.go +++ b/test/e2e/compact_test.go @@ -535,12 +535,14 @@ func TestCompactWithStoreGateway(t *testing.T) { c, err := e2ethanos.NewCompactor(s.SharedDir(), "expect-to-halt", svcConfig, nil) testutil.Ok(t, err) testutil.Ok(t, s.StartAndWaitReady(c)) - testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(float64(len(rawBlockIDs)+7)), "thanos_blocks_meta_synced")) - testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_sync_failures_total")) - testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_modified")) - // Expect compactor halted. + // Expect compactor halted and for one cleanup iteration to happen. testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(1), "thanos_compact_halted")) + testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(1), "thanos_compact_block_cleanup_loops_total")) + + testutil.Ok(t, str.WaitSumMetrics(e2e.Equals(float64(len(rawBlockIDs)+5)), "thanos_blocks_meta_synced")) + testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_sync_failures_total")) + testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(0), "thanos_blocks_meta_modified")) // The compact directory is still there. dataDir := filepath.Join(s.SharedDir(), "data", "compact", "expect-to-halt") @@ -559,8 +561,8 @@ func TestCompactWithStoreGateway(t *testing.T) { testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(2), "thanos_compact_group_compaction_runs_completed_total")) // However, the blocks have been cleaned because that happens concurrently. - testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(2), "thanos_compact_blocks_cleaned_total")) testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(2), "thanos_compact_aborted_partial_uploads_deletion_attempts_total")) + testutil.Ok(t, c.WaitSumMetrics(e2e.Equals(2), "thanos_compact_blocks_cleaned_total")) // Ensure bucket UI. ensureGETStatusCode(t, http.StatusOK, "http://"+path.Join(c.HTTPEndpoint(), "global")) diff --git a/test/e2e/query_frontend_test.go b/test/e2e/query_frontend_test.go index 196365a36d..7ee97b4acf 100644 --- a/test/e2e/query_frontend_test.go +++ b/test/e2e/query_frontend_test.go @@ -5,6 +5,7 @@ package e2e_test import ( "context" + "reflect" "testing" "time" @@ -262,7 +263,7 @@ func TestQueryFrontend(t *testing.T) { t.Run("query frontend splitting works for labels values API", func(t *testing.T) { labelValues(t, ctx, queryFrontend.HTTPEndpoint(), "instance", timestamp.FromTime(now.Add(-time.Hour)), timestamp.FromTime(now.Add(time.Hour)), func(res []string) bool { - return len(res) > 0 + return len(res) == 1 && res[0] == "localhost:9090" }) testutil.Ok(t, q.WaitSumMetricsWithOptions( e2e.Equals(1), @@ -281,7 +282,7 @@ func TestQueryFrontend(t *testing.T) { ) labelValues(t, ctx, queryFrontend.HTTPEndpoint(), "instance", timestamp.FromTime(now.Add(-24*time.Hour)), timestamp.FromTime(now.Add(time.Hour)), func(res []string) bool { - return len(res) > 0 + return len(res) == 1 && res[0] == "localhost:9090" }) testutil.Ok(t, q.WaitSumMetricsWithOptions( e2e.Equals(3), @@ -309,7 +310,16 @@ func TestQueryFrontend(t *testing.T) { timestamp.FromTime(now.Add(-time.Hour)), timestamp.FromTime(now.Add(time.Hour)), func(res []map[string]string) bool { - return len(res) > 0 + if len(res) != 1 { + return false + } + + return reflect.DeepEqual(res[0], map[string]string{ + "__name__": "up", + "instance": "localhost:9090", + "job": "myself", + "prometheus": "test", + }) }, ) testutil.Ok(t, q.WaitSumMetricsWithOptions( @@ -336,7 +346,16 @@ func TestQueryFrontend(t *testing.T) { timestamp.FromTime(now.Add(-24*time.Hour)), timestamp.FromTime(now.Add(time.Hour)), func(res []map[string]string) bool { - return len(res) > 0 + if len(res) != 1 { + return false + } + + return reflect.DeepEqual(res[0], map[string]string{ + "__name__": "up", + "instance": "localhost:9090", + "job": "myself", + "prometheus": "test", + }) }, ) testutil.Ok(t, q.WaitSumMetricsWithOptions( diff --git a/test/e2e/query_test.go b/test/e2e/query_test.go index 90db116f03..1658ebcfb6 100644 --- a/test/e2e/query_test.go +++ b/test/e2e/query_test.go @@ -328,7 +328,7 @@ func TestQueryLabelValues(t *testing.T) { now := time.Now() labelValues(t, ctx, q.HTTPEndpoint(), "instance", timestamp.FromTime(now.Add(-time.Hour)), timestamp.FromTime(now.Add(time.Hour)), func(res []string) bool { - return len(res) > 0 + return len(res) == 1 && res[0] == "localhost:9090" }) // Outside time range. @@ -428,7 +428,7 @@ func labelNames(t *testing.T, ctx context.Context, addr string, start, end int64 logger := log.NewLogfmtLogger(os.Stdout) logger = log.With(logger, "ts", log.DefaultTimestampUTC) - testutil.Ok(t, runutil.RetryWithLog(logger, time.Second, ctx.Done(), func() error { + testutil.Ok(t, runutil.RetryWithLog(logger, 2*time.Second, ctx.Done(), func() error { res, err := promclient.NewDefaultClient().LabelNamesInGRPC(ctx, mustURLParse(t, "http://"+addr), start, end) if err != nil { return err @@ -437,7 +437,7 @@ func labelNames(t *testing.T, ctx context.Context, addr string, start, end int64 return nil } - return errors.Errorf("unexpected results size %d", len(res)) + return errors.Errorf("unexpected results %v", res) })) } @@ -447,7 +447,7 @@ func labelValues(t *testing.T, ctx context.Context, addr, label string, start, e logger := log.NewLogfmtLogger(os.Stdout) logger = log.With(logger, "ts", log.DefaultTimestampUTC) - testutil.Ok(t, runutil.RetryWithLog(logger, time.Second, ctx.Done(), func() error { + testutil.Ok(t, runutil.RetryWithLog(logger, 2*time.Second, ctx.Done(), func() error { res, err := promclient.NewDefaultClient().LabelValuesInGRPC(ctx, mustURLParse(t, "http://"+addr), label, start, end) if err != nil { return err @@ -456,7 +456,7 @@ func labelValues(t *testing.T, ctx context.Context, addr, label string, start, e return nil } - return errors.Errorf("unexpected results size %d", len(res)) + return errors.Errorf("unexpected results %v", res) })) } @@ -465,7 +465,7 @@ func series(t *testing.T, ctx context.Context, addr string, matchers []storepb.L logger := log.NewLogfmtLogger(os.Stdout) logger = log.With(logger, "ts", log.DefaultTimestampUTC) - testutil.Ok(t, runutil.RetryWithLog(logger, time.Second, ctx.Done(), func() error { + testutil.Ok(t, runutil.RetryWithLog(logger, 2*time.Second, ctx.Done(), func() error { res, err := promclient.NewDefaultClient().SeriesInGRPC(ctx, mustURLParse(t, "http://"+addr), matchers, start, end) if err != nil { return err @@ -474,7 +474,7 @@ func series(t *testing.T, ctx context.Context, addr string, matchers []storepb.L return nil } - return errors.Errorf("unexpected results size %d", len(res)) + return errors.Errorf("unexpected results %v", res) })) } diff --git a/tutorials/katacoda/thanos/1-globalview/courseBase.sh b/tutorials/katacoda/thanos/1-globalview/courseBase.sh index 99dd7e87e1..f6740c288f 100644 --- a/tutorials/katacoda/thanos/1-globalview/courseBase.sh +++ b/tutorials/katacoda/thanos/1-globalview/courseBase.sh @@ -1,4 +1,4 @@ #!/usr/bin/env bash docker pull quay.io/prometheus/prometheus:v2.16.0 -docker pull quay.io/thanos/thanos:v0.17.0 +docker pull quay.io/thanos/thanos:v0.17.2 diff --git a/tutorials/katacoda/thanos/1-globalview/step2.md b/tutorials/katacoda/thanos/1-globalview/step2.md index 4f8e424b29..b14e0cc5bd 100644 --- a/tutorials/katacoda/thanos/1-globalview/step2.md +++ b/tutorials/katacoda/thanos/1-globalview/step2.md @@ -10,7 +10,7 @@ component and can be invoked in a single command. Let's take a look at all the Thanos commands: ``` -docker run --rm quay.io/thanos/thanos:v0.17.0 --help +docker run --rm quay.io/thanos/thanos:v0.17.2 --help ```{{execute}} You should see multiple commands that solves different purposes. @@ -53,7 +53,7 @@ docker run -d --net=host --rm \ -v $(pwd)/prometheus0_eu1.yml:/etc/prometheus/prometheus.yml \ --name prometheus-0-sidecar-eu1 \ -u root \ - quay.io/thanos/thanos:v0.17.0 \ + quay.io/thanos/thanos:v0.17.2 \ sidecar \ --http-address 0.0.0.0:19090 \ --grpc-address 0.0.0.0:19190 \ @@ -68,7 +68,7 @@ docker run -d --net=host --rm \ -v $(pwd)/prometheus0_us1.yml:/etc/prometheus/prometheus.yml \ --name prometheus-0-sidecar-us1 \ -u root \ - quay.io/thanos/thanos:v0.17.0 \ + quay.io/thanos/thanos:v0.17.2 \ sidecar \ --http-address 0.0.0.0:19091 \ --grpc-address 0.0.0.0:19191 \ @@ -81,7 +81,7 @@ docker run -d --net=host --rm \ -v $(pwd)/prometheus1_us1.yml:/etc/prometheus/prometheus.yml \ --name prometheus-1-sidecar-us1 \ -u root \ - quay.io/thanos/thanos:v0.17.0 \ + quay.io/thanos/thanos:v0.17.2 \ sidecar \ --http-address 0.0.0.0:19092 \ --grpc-address 0.0.0.0:19192 \ diff --git a/tutorials/katacoda/thanos/1-globalview/step3.md b/tutorials/katacoda/thanos/1-globalview/step3.md index 4b666db620..0238be5e8a 100644 --- a/tutorials/katacoda/thanos/1-globalview/step3.md +++ b/tutorials/katacoda/thanos/1-globalview/step3.md @@ -28,7 +28,7 @@ Click below snippet to start the Querier. ``` docker run -d --net=host --rm \ --name querier \ - quay.io/thanos/thanos:v0.17.0 \ + quay.io/thanos/thanos:v0.17.2 \ query \ --http-address 0.0.0.0:29090 \ --query.replica-label replica \ diff --git a/tutorials/katacoda/thanos/7-multi-tenancy/courseBase.sh b/tutorials/katacoda/thanos/7-multi-tenancy/courseBase.sh index fc9452241e..e186a04053 100644 --- a/tutorials/katacoda/thanos/7-multi-tenancy/courseBase.sh +++ b/tutorials/katacoda/thanos/7-multi-tenancy/courseBase.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash docker pull quay.io/prometheus/prometheus:v2.20.0 -docker pull quay.io/thanos/thanos:v0.17.0 +docker pull quay.io/thanos/thanos:v0.17.2 docker pull quay.io/thanos/prom-label-proxy:v0.3.0-rc.0-ext1 docker pull caddy:2.2.1 diff --git a/tutorials/katacoda/thanos/7-multi-tenancy/step1.md b/tutorials/katacoda/thanos/7-multi-tenancy/step1.md index 002de580b0..686b48482c 100644 --- a/tutorials/katacoda/thanos/7-multi-tenancy/step1.md +++ b/tutorials/katacoda/thanos/7-multi-tenancy/step1.md @@ -88,7 +88,7 @@ docker run -d --net=host --rm \ -v $(pwd)/editor/prometheus0_fruit.yml:/etc/prometheus/prometheus.yml \ --name prometheus-0-sidecar-fruit \ -u root \ - quay.io/thanos/thanos:v0.17.0 \ + quay.io/thanos/thanos:v0.17.2 \ sidecar \ --http-address 0.0.0.0:19090 \ --grpc-address 0.0.0.0:19190 \ @@ -120,7 +120,7 @@ docker run -d --net=host --rm \ -v $(pwd)/editor/prometheus0_veggie.yml:/etc/prometheus/prometheus.yml \ --name prometheus-0-sidecar-veggie \ -u root \ - quay.io/thanos/thanos:v0.17.0 \ + quay.io/thanos/thanos:v0.17.2 \ sidecar \ --http-address 0.0.0.0:19091 \ --grpc-address 0.0.0.0:19191 \ @@ -152,7 +152,7 @@ docker run -d --net=host --rm \ -v $(pwd)/editor/prometheus1_veggie.yml:/etc/prometheus/prometheus.yml \ --name prometheus-01-sidecar-veggie \ -u root \ - quay.io/thanos/thanos:v0.17.0 \ + quay.io/thanos/thanos:v0.17.2 \ sidecar \ --http-address 0.0.0.0:19092 \ --grpc-address 0.0.0.0:19192 \ @@ -170,7 +170,7 @@ Fruit: ``` docker run -d --net=host --rm \ --name querier-fruit \ - quay.io/thanos/thanos:v0.17.0 \ + quay.io/thanos/thanos:v0.17.2 \ query \ --http-address 0.0.0.0:29091 \ --grpc-address 0.0.0.0:29191 \ @@ -183,7 +183,7 @@ Veggie: ``` docker run -d --net=host --rm \ --name querier-veggie \ - quay.io/thanos/thanos:v0.17.0 \ + quay.io/thanos/thanos:v0.17.2 \ query \ --http-address 0.0.0.0:29092 \ --grpc-address 0.0.0.0:29192 \ diff --git a/tutorials/katacoda/thanos/7-multi-tenancy/step2.md b/tutorials/katacoda/thanos/7-multi-tenancy/step2.md index f14cdc645f..85796de8a2 100644 --- a/tutorials/katacoda/thanos/7-multi-tenancy/step2.md +++ b/tutorials/katacoda/thanos/7-multi-tenancy/step2.md @@ -11,7 +11,7 @@ docker stop querier-fruit && docker stop querier-veggie ``` docker run -d --net=host --rm \ --name querier-multi \ - quay.io/thanos/thanos:v0.17.0 \ + quay.io/thanos/thanos:v0.17.2 \ query \ --http-address 0.0.0.0:29090 \ --grpc-address 0.0.0.0:29190 \