From 8967c7793b09ca9bd2a8d5a1edfaa222468f98a7 Mon Sep 17 00:00:00 2001 From: Owen Diehl Date: Fri, 25 Feb 2022 11:25:25 -0500 Subject: [PATCH 1/3] rounds chunk bytes to kb in tsdb and includes benchmarking script --- pkg/storage/tsdb/index/chunk.go | 8 ++------ pkg/storage/tsdb/index/index.go | 12 ++++++------ tools/tsdb/tsdb-map/diff.sh | 18 ++++++++++++++++++ tools/tsdb/tsdb-map/main.go | 4 ++-- 4 files changed, 28 insertions(+), 14 deletions(-) create mode 100755 tools/tsdb/tsdb-map/diff.sh diff --git a/pkg/storage/tsdb/index/chunk.go b/pkg/storage/tsdb/index/chunk.go index 52c848c897b0a..caf5b99655f86 100644 --- a/pkg/storage/tsdb/index/chunk.go +++ b/pkg/storage/tsdb/index/chunk.go @@ -10,12 +10,8 @@ type ChunkMeta struct { MinTime, MaxTime int64 - // Bytes use an uint64 as an uint32 can only hold [0,4GB) - // While this is well within current chunk guidelines (1.5MB being "standard"), - // I (owen-d) prefer to overallocate here - // Since TSDB accesses are seeked rather than scanned, this choice - // should have little effect as long as there is enough memory available - Bytes uint64 + // Bytes stored, rounded to nearest KB + KB uint32 Entries uint32 } diff --git a/pkg/storage/tsdb/index/index.go b/pkg/storage/tsdb/index/index.go index 8e5a1d6e52aa4..1898832e285e4 100644 --- a/pkg/storage/tsdb/index/index.go +++ b/pkg/storage/tsdb/index/index.go @@ -458,7 +458,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... c := chunks[0] w.buf2.PutVarint64(c.MinTime) w.buf2.PutUvarint64(uint64(c.MaxTime - c.MinTime)) - w.buf2.PutUvarint64(c.Bytes) + w.buf2.PutUvarint32(c.KB) w.buf2.PutUvarint32(c.Entries) w.buf2.PutBE32(c.Checksum) t0 := c.MaxTime @@ -468,7 +468,7 @@ func (w *Writer) AddSeries(ref storage.SeriesRef, lset labels.Labels, chunks ... // instead of uvarint because chunks may overlap w.buf2.PutVarint64(c.MinTime - t0) w.buf2.PutUvarint64(uint64(c.MaxTime - c.MinTime)) - w.buf2.PutUvarint64(c.Bytes) + w.buf2.PutUvarint32(c.KB) w.buf2.PutUvarint32(c.Entries) t0 = c.MaxTime @@ -1869,7 +1869,7 @@ func (dec *Decoder) Series(b []byte, lbls *labels.Labels, chks *[]ChunkMeta) err t0 := d.Varint64() maxt := int64(d.Uvarint64()) + t0 - nBytes := d.Uvarint64() + kb := uint32(d.Uvarint()) entries := uint32(d.Uvarint64()) checksum := d.Be32() @@ -1877,7 +1877,7 @@ func (dec *Decoder) Series(b []byte, lbls *labels.Labels, chks *[]ChunkMeta) err Checksum: checksum, MinTime: t0, MaxTime: maxt, - Bytes: nBytes, + KB: kb, Entries: entries, }) t0 = maxt @@ -1887,7 +1887,7 @@ func (dec *Decoder) Series(b []byte, lbls *labels.Labels, chks *[]ChunkMeta) err // instead of uvarint because chunks may overlap mint := d.Varint64() + t0 maxt := int64(d.Uvarint64()) + mint - nBytes := d.Uvarint64() + kb := uint32(d.Uvarint()) entries := uint32(d.Uvarint64()) checksum := d.Be32() t0 = maxt @@ -1900,7 +1900,7 @@ func (dec *Decoder) Series(b []byte, lbls *labels.Labels, chks *[]ChunkMeta) err Checksum: checksum, MinTime: mint, MaxTime: maxt, - Bytes: nBytes, + KB: kb, Entries: entries, }) } diff --git a/tools/tsdb/tsdb-map/diff.sh b/tools/tsdb/tsdb-map/diff.sh new file mode 100755 index 0000000000000..a7e31971ef0e8 --- /dev/null +++ b/tools/tsdb/tsdb-map/diff.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +old=$1 +new=$2 + +echo benchmarks: +echo + +benchstat \ +<(LOKI_TSDB_PATH="${old}" go test github.com/grafana/loki/tools/tsdb/tsdb-map -bench=BenchmarkQuery -run '^$' -benchmem) \ +<(LOKI_TSDB_PATH="${new}" go test github.com/grafana/loki/tools/tsdb/tsdb-map -bench=BenchmarkQuery -run '^$' -benchmem) + +echo +echo sizing: +echo + +ls -lh $old +ls -lh $new diff --git a/tools/tsdb/tsdb-map/main.go b/tools/tsdb/tsdb-map/main.go index e84f58ac31903..cb3a6a293e6fb 100644 --- a/tools/tsdb/tsdb-map/main.go +++ b/tools/tsdb/tsdb-map/main.go @@ -84,8 +84,8 @@ func main() { Checksum: extractChecksumFromChunkID(entry.ChunkID), MinTime: int64(entry.From), MaxTime: int64(entry.Through), - Bytes: (3 << 20) / 4, // guess: 0.75mb, 1/2 of the max size - Entries: 10000, // guess: 10k entries + KB: ((3 << 20) / 4) / 1024, // guess: 0.75mb, 1/2 of the max size, rounded to KB + Entries: 10000, // guess: 10k entries }}) } From 5a3ca306070c842356ed7e9e2d467f45b105c66d Mon Sep 17 00:00:00 2001 From: Owen Diehl Date: Fri, 25 Feb 2022 11:58:52 -0500 Subject: [PATCH 2/3] shellcheck --- tools/tsdb/tsdb-map/diff.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/tsdb/tsdb-map/diff.sh b/tools/tsdb/tsdb-map/diff.sh index a7e31971ef0e8..f676f33ee15f0 100755 --- a/tools/tsdb/tsdb-map/diff.sh +++ b/tools/tsdb/tsdb-map/diff.sh @@ -14,5 +14,5 @@ echo echo sizing: echo -ls -lh $old -ls -lh $new +ls -lh "${old}" +ls -lh "${new}" From a9d92aa5dc61d316e1b72cf7ba874ac1e0fe7986 Mon Sep 17 00:00:00 2001 From: Owen Diehl Date: Fri, 25 Feb 2022 12:06:17 -0500 Subject: [PATCH 3/3] fix querier test --- pkg/storage/tsdb/querier_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pkg/storage/tsdb/querier_test.go b/pkg/storage/tsdb/querier_test.go index 27a63f74b7fbd..625f01a03aeac 100644 --- a/pkg/storage/tsdb/querier_test.go +++ b/pkg/storage/tsdb/querier_test.go @@ -33,14 +33,14 @@ func TestQueryIndex(t *testing.T) { Checksum: 1, MinTime: 1, MaxTime: 10, - Bytes: 10, + KB: 10, Entries: 10, }, { Checksum: 2, MinTime: 5, MaxTime: 15, - Bytes: 10, + KB: 10, Entries: 10, }, }, @@ -52,14 +52,14 @@ func TestQueryIndex(t *testing.T) { Checksum: 3, MinTime: 20, MaxTime: 30, - Bytes: 10, + KB: 10, Entries: 10, }, { Checksum: 4, MinTime: 40, MaxTime: 50, - Bytes: 10, + KB: 10, Entries: 10, }, }, @@ -71,14 +71,14 @@ func TestQueryIndex(t *testing.T) { Checksum: 1, MinTime: 1, MaxTime: 10, - Bytes: 10, + KB: 10, Entries: 10, }, { Checksum: 2, MinTime: 5, MaxTime: 15, - Bytes: 10, + KB: 10, Entries: 10, }, },