Skip to content

Commit

Permalink
Merge release-0.17 branch back (#3553)
Browse files Browse the repository at this point in the history
* Fix query frontend regression on release v0.17.0 (#3480)

* query-frontend: make POST-request to downstream url for labels and series api endpoints (#3444)

Signed-off-by: Alexander Tunik <[email protected]>

* remove default response cache config

Signed-off-by: Ben Ye <[email protected]>

* ensure order when merging multiple responses

Signed-off-by: Ben Ye <[email protected]>

Co-authored-by: Alexander Tunik <[email protected]>

* *: Set debug.SetPanicOnFault(true) so we can recover seg faults. (#3498)

Signed-off-by: Bartlomiej Plotka <[email protected]>

* Prepare v0.17.1 release (#3505)

Signed-off-by: Matthias Loibl <[email protected]>

* fix index out of bound bug when comparing ZLabelSets (#3520)

* fix index out of bound bug when comparing ZLabelSets

Signed-off-by: Ben Ye <[email protected]>

* fix param parsing error message

Signed-off-by: Ben Ye <[email protected]>

* address comment feedbacks

Signed-off-by: Ben Ye <[email protected]>

* compact: do not cleanup blocks on boot (#3532)

Do not cleanup blocks on boot because in some very bad cases there could
be thousands of blocks ready-to-be deleted and doing that makes Thanos
Compact exceed `initialDelaySeconds` on k8s.

Signed-off-by: Giedrius Statkevičius <[email protected]>

* Prepare v0.17.2 (#3543)

Signed-off-by: Matthias Loibl <[email protected]>

* Properly rebase CHANGELOG.md after merging release-0.17

Signed-off-by: Matthias Loibl <[email protected]>

Co-authored-by: Ben Ye <[email protected]>
Co-authored-by: Alexander Tunik <[email protected]>
Co-authored-by: Bartlomiej Plotka <[email protected]>
Co-authored-by: Giedrius Statkevičius <[email protected]>
  • Loading branch information
5 people authored Dec 9, 2020
1 parent d616214 commit 076985c
Show file tree
Hide file tree
Showing 19 changed files with 428 additions and 75 deletions.
19 changes: 19 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,25 @@ We use _breaking :warning:_ to mark changes that are not backward compatible (re

- [#3496](https://github.com/thanos-io/thanos/pull/3496) s3: Respect SignatureV2 flag for all credential providers.



## [v0.17.2](https://github.com/thanos-io/thanos/releases/tag/v0.17.2) - 2020.12.07

### Fixed

- [#3532](https://github.com/thanos-io/thanos/pull/3532) compact: do not cleanup blocks on boot. Reverts the behavior change introduced in [#3115](https://github.com/thanos-io/thanos/pull/3115) as in some very bad cases the boot of Thanos Compact took a very long time since there were a lot of blocks-to-be-cleaned.
- [#3520](https://github.com/thanos-io/thanos/pull/3520) Fix index out of bound bug when comparing ZLabelSets.

## [v0.17.1](https://github.com/thanos-io/thanos/releases/tag/v0.17.1) - 2020.11.24

### Fixed

- [#3480](https://github.com/thanos-io/thanos/pull/3480) Query-frontend: Fixed regression.

### Changed

- [#3498](https://github.com/thanos-io/thanos/pull/3498) Enabled debug.SetPanicOnFault(true) which allow us to recover on queries causing SEG FAULTs (e.g unmmaped memory access).

## [v0.17.0](https://github.com/thanos-io/thanos/releases/tag/v0.17.0) - 2020.11.18

### Added
Expand Down
30 changes: 10 additions & 20 deletions cmd/thanos/compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ func runCompact(
Name: "thanos_compact_iterations_total",
Help: "Total number of iterations that were executed successfully.",
})
cleanups := promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: "thanos_compact_block_cleanup_loops_total",
Help: "Total number of concurrent cleanup loops of partially uploaded blocks and marked blocks that were executed successfully.",
})
partialUploadDeleteAttempts := promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: "thanos_compact_aborted_partial_uploads_deletion_attempts_total",
Help: "Total number of started deletions of blocks that are assumed aborted and only partially uploaded.",
Expand Down Expand Up @@ -339,29 +343,20 @@ func runCompact(
cleanMtx.Lock()
defer cleanMtx.Unlock()

// No need to resync before partial uploads and delete marked blocks. Last sync should be valid.
if err := sy.SyncMetas(ctx); err != nil {
cancel()
return errors.Wrap(err, "syncing metas")
}

compact.BestEffortCleanAbortedPartialUploads(ctx, logger, sy.Partial(), bkt, partialUploadDeleteAttempts, blocksCleaned, blockCleanupFailures)
if err := blocksCleaner.DeleteMarkedBlocks(ctx); err != nil {
return errors.Wrap(err, "cleaning marked blocks")
}
cleanups.Inc()

if err := sy.SyncMetas(ctx); err != nil {
level.Error(logger).Log("msg", "failed to sync metas", "err", err)
}
return nil
}

// Do it once at the beginning to ensure that it runs at least once before
// the main loop.
if err := sy.SyncMetas(ctx); err != nil {
cancel()
return errors.Wrap(err, "syncing metas")
}
if err := cleanPartialMarked(); err != nil {
cancel()
return errors.Wrap(err, "cleaning partial and marked blocks")
}

compactMainFn := func() error {
if err := compactor.Compact(ctx); err != nil {
return errors.Wrap(err, "compaction")
Expand Down Expand Up @@ -481,11 +476,6 @@ func runCompact(
// since one iteration potentially could take a long time.
if conf.cleanupBlocksInterval > 0 {
g.Add(func() error {
// Wait the whole period at the beginning because we've executed this on boot.
select {
case <-time.After(conf.cleanupBlocksInterval):
case <-ctx.Done():
}
return runutil.Repeat(conf.cleanupBlocksInterval, ctx.Done(), cleanPartialMarked)
}, func(error) {
cancel()
Expand Down
5 changes: 5 additions & 0 deletions cmd/thanos/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"os/signal"
"path/filepath"
"runtime"
"runtime/debug"
"syscall"

"github.com/go-kit/kit/log"
Expand All @@ -28,6 +29,10 @@ import (
)

func main() {
// We use mmaped resources in most of the components so hardcode PanicOnFault to true. This allows us to recover (if we can e.g if queries
// are temporarily accessing unmapped memory).
debug.SetPanicOnFault(true)

if os.Getenv("DEBUG") != "" {
runtime.SetMutexProfileFraction(10)
runtime.SetBlockProfileRate(10)
Expand Down
6 changes: 2 additions & 4 deletions cmd/thanos/query_frontend.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,10 @@ func registerQueryFrontend(app *extkingpin.App) {
MaxBodySize: 10 * 1024 * 1024,
},
QueryRangeConfig: queryfrontend.QueryRangeConfig{
Limits: &cortexvalidation.Limits{},
ResultsCacheConfig: &queryrange.ResultsCacheConfig{},
Limits: &cortexvalidation.Limits{},
},
LabelsConfig: queryfrontend.LabelsConfig{
Limits: &cortexvalidation.Limits{},
ResultsCacheConfig: &queryrange.ResultsCacheConfig{},
Limits: &cortexvalidation.Limits{},
},
},
}
Expand Down
2 changes: 1 addition & 1 deletion docs/operating/cross-cluster-tls-communication.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ metadata:
optional: false
containers:
- name: querier
image: 'thanosio/thanos:v0.17.0'
image: 'thanosio/thanos:v0.17.2'
args:
- query
- '--log.level=info'
Expand Down
24 changes: 10 additions & 14 deletions pkg/queryfrontend/labels_codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,20 +49,21 @@ func NewThanosLabelsCodec(partialResponse bool, defaultMetadataTimeRange time.Du
}
}

// MergeResponse merges multiple responses into a single Response. It needs to dedup the responses and ensure the order.
func (c labelsCodec) MergeResponse(responses ...queryrange.Response) (queryrange.Response, error) {
if len(responses) == 0 {
// Empty response for label_names, label_values and series API.
return &ThanosLabelsResponse{
Status: queryrange.StatusSuccess,
Data: []string{},
}, nil
}

if len(responses) == 1 {
return responses[0], nil
}

switch responses[0].(type) {
case *ThanosLabelsResponse:
if len(responses) == 1 {
return responses[0], nil
}
set := make(map[string]struct{})

for _, res := range responses {
Expand All @@ -83,25 +84,20 @@ func (c labelsCodec) MergeResponse(responses ...queryrange.Response) (queryrange
Data: lbls,
}, nil
case *ThanosSeriesResponse:
seriesData := make([]labelpb.ZLabelSet, 0)
seriesData := make(labelpb.ZLabelSets, 0)

// seriesString is used in soring so we don't have to calculate the string of label sets again.
seriesString := make([]string, 0)
uniqueSeries := make(map[string]struct{})
for _, res := range responses {
for _, series := range res.(*ThanosSeriesResponse).Data {
s := series.PromLabels().String()
if _, ok := uniqueSeries[s]; !ok {
seriesData = append(seriesData, series)
seriesString = append(seriesString, s)
uniqueSeries[s] = struct{}{}
}
}
}

sort.Slice(seriesData, func(i, j int) bool {
return seriesString[i] < seriesString[j]
})
sort.Sort(seriesData)
return &ThanosSeriesResponse{
Status: queryrange.StatusSuccess,
Data: seriesData,
Expand Down Expand Up @@ -287,7 +283,7 @@ func (c labelsCodec) parseLabelsRequest(r *http.Request, op string) (queryrange.
return nil, err
}

result.StoreMatchers, err = parseMatchersParam(r.Form[queryv1.StoreMatcherParam])
result.StoreMatchers, err = parseMatchersParam(r.Form, queryv1.StoreMatcherParam)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -321,7 +317,7 @@ func (c labelsCodec) parseSeriesRequest(r *http.Request) (queryrange.Request, er
return nil, err
}

result.Matchers, err = parseMatchersParam(r.Form[queryv1.MatcherParam])
result.Matchers, err = parseMatchersParam(r.Form, queryv1.MatcherParam)
if err != nil {
return nil, err
}
Expand All @@ -340,7 +336,7 @@ func (c labelsCodec) parseSeriesRequest(r *http.Request) (queryrange.Request, er
result.ReplicaLabels = r.Form[queryv1.ReplicaLabelsParam]
}

result.StoreMatchers, err = parseMatchersParam(r.Form[queryv1.StoreMatcherParam])
result.StoreMatchers, err = parseMatchersParam(r.Form, queryv1.StoreMatcherParam)
if err != nil {
return nil, err
}
Expand Down
Loading

0 comments on commit 076985c

Please sign in to comment.