Skip to content

Commit

Permalink
store/bucket: use roaring bitmaps for postings list
Browse files Browse the repository at this point in the history
TODO: description/tests/convert set operations into bitwise ops

Signed-off-by: Giedrius Statkevičius <[email protected]>
  • Loading branch information
GiedriusS committed May 11, 2023
1 parent cdb395a commit 6128daf
Show file tree
Hide file tree
Showing 5 changed files with 242 additions and 82 deletions.
9 changes: 7 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,18 @@ require (
)

require (
github.com/RoaringBitmap/roaring v1.2.3
github.com/onsi/gomega v1.27.6
go.opentelemetry.io/contrib/propagators/autoprop v0.38.0
go4.org/intern v0.0.0-20220617035311-6925f38cc365
golang.org/x/exp v0.0.0-20230321023759-10a507213a29
)

require go4.org/unsafe/assume-no-moving-gc v0.0.0-20230209150437-ee73d164e760 // indirect
require (
github.com/bits-and-blooms/bitset v1.2.0 // indirect
github.com/mschoch/smat v0.2.0 // indirect
go4.org/unsafe/assume-no-moving-gc v0.0.0-20230209150437-ee73d164e760 // indirect
)

require (
cloud.google.com/go/compute/metadata v0.2.3 // indirect
Expand Down Expand Up @@ -191,7 +196,7 @@ require (
github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/julienschmidt/httprouter v1.3.0 // indirect
github.com/klauspost/cpuid/v2 v2.1.0 // indirect
github.com/klauspost/cpuid/v2 v2.2.4 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect
github.com/lightstep/lightstep-tracer-common/golang/gogo v0.0.0-20210210170715-a8dfcb80d3a7 // indirect
github.com/lufia/plan9stats v0.0.0-20211012122336-39d0f177ccd0 // indirect
Expand Down
10 changes: 8 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ github.com/OneOfOne/xxhash v1.2.6/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdII
github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0=
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/QcloudApi/qcloud_sign_golang v0.0.0-20141224014652-e4130a326409/go.mod h1:1pk82RBxDY/JZnPQrtqHlUFfCctgdorsd9M06fMynOM=
github.com/RoaringBitmap/roaring v1.2.3 h1:yqreLINqIrX22ErkKI0vY47/ivtJr6n+kMhVOVmhWBY=
github.com/RoaringBitmap/roaring v1.2.3/go.mod h1:plvDsJQpxOC5bw8LRteu/MLWHsHez/3y6cubLI4/1yE=
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
github.com/Shopify/toxiproxy v2.1.4+incompatible/go.mod h1:OXgGpZ6Cli1/URJOF1DMxUHB2q5Ap20/P/eIdh4G0pI=
github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d/go.mod h1:3eOhrUMpNV+6aFIbp5/iudMxNCF27Vw2OZgy4xEx0Fg=
Expand Down Expand Up @@ -173,6 +175,8 @@ github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+Ce
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/casbin/casbin/v2 v2.1.2/go.mod h1:YcPU1XXisHhLzuxH9coDNf2FbKpjGlbCg3n9yuLkIJQ=
Expand Down Expand Up @@ -613,8 +617,8 @@ github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQan
github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.1.0 h1:eyi1Ad2aNJMW95zcSbmGg7Cg6cq3ADwLpMAP96d8rF0=
github.com/klauspost/cpuid/v2 v2.1.0/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
github.com/klauspost/cpuid/v2 v2.2.4 h1:acbojRNwl3o09bUq+yDCtZFc1aiwaAAxtcn8YkZXnvk=
github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY=
github.com/kolo/xmlrpc v0.0.0-20220921171641-a4b6fa1dd06b h1:udzkj9S/zlT5X367kqJis0QP7YMxobob6zhzq6Yre00=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
Expand Down Expand Up @@ -695,6 +699,8 @@ github.com/modocache/gover v0.0.0-20171022184752-b58185e213c5/go.mod h1:caMODM3P
github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc=
github.com/mozillazg/go-httpheader v0.2.1 h1:geV7TrjbL8KXSyvghnFm+NyTux/hxwueTSrwhe88TQQ=
github.com/mozillazg/go-httpheader v0.2.1/go.mod h1:jJ8xECTlalr6ValeXYdOF8fFUISeBAdw6E61aqQma60=
github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM=
github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU=
Expand Down
185 changes: 185 additions & 0 deletions pkg/store/bitmap_postings.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
// Copyright (c) The Thanos Authors.
// Licensed under the Apache License 2.0.

package store

import (
"encoding/binary"
"fmt"
"io"

"github.com/RoaringBitmap/roaring"
"github.com/pkg/errors"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/tsdb/index"
)

type indexBitmapPostings interface {
index.Postings
GetBitmap() *roaring.Bitmap
}

type bitmapPostingsReader struct {
r io.Reader
postings []postingPtr

lastOffset int64
pi int
err error

start, length int64

cur *bitmapPostings
readBuf []byte

repeatFor int
}

type bitmapPostings struct {
cur storage.SeriesRef
bmap *roaring.Bitmap
bmapIterator roaring.IntIterable
}

func newBitmapPostingsReader(r io.Reader, postings []postingPtr, start, length int64) *bitmapPostingsReader {
return &bitmapPostingsReader{
r: r,
postings: postings,
start: start,
length: length,
readBuf: make([]byte, 4),
}
}

func getInt32(r io.Reader, buf []byte) (uint32, error) {
read, err := r.Read(buf)
if err != nil {
return 0, errors.Wrap(err, "reading")
}
if read != 4 {
return 0, fmt.Errorf("read got %d bytes instead of 4", read)
}
return binary.BigEndian.Uint32(buf), nil
}

func (r *bitmapPostingsReader) Err() error {
if errors.Is(r.err, io.EOF) {
return nil
}
return r.err
}

func (r *bitmapPostings) GetBitmap() *roaring.Bitmap {
return r.bmap
}

type errBitmapPostings struct {
e error
}

func (e errBitmapPostings) Next() bool { return false }
func (e errBitmapPostings) Seek(storage.SeriesRef) bool { return false }
func (e errBitmapPostings) At() storage.SeriesRef { return 0 }
func (e errBitmapPostings) Err() error { return e.e }
func (e errBitmapPostings) GetBitmap() *roaring.Bitmap { return nil }

func (r *bitmapPostingsReader) Next() bool {
if r.repeatFor > 0 {
r.repeatFor--
return true
}
if r.pi >= len(r.postings) {
return false
}
if r.Err() != nil {
return false
}
from := r.postings[r.pi].ptr.Start - r.start

if from-r.lastOffset < 0 {
panic("would have skipped negative bytes")
}

_, err := io.CopyN(io.Discard, r.r, from-r.lastOffset)
if err != nil {
return false
}
r.lastOffset += from - r.lastOffset

bmap := roaring.NewBitmap()

postingsCount, err := getInt32(r.r, r.readBuf[:])
if err != nil {
r.err = err
return false
}
r.lastOffset += 4

for i := 0; i < int(postingsCount); i++ {
posting, err := getInt32(r.r, r.readBuf[:])
if err != nil {
r.err = err
return false
}
r.lastOffset += 4

bmap.Add(posting)
}
r.cur = &bitmapPostings{
bmap: bmap,
}
r.pi++

for {
if r.pi >= len(r.postings) {
break
}

if r.postings[r.pi].ptr.Start == r.postings[r.pi-1].ptr.Start &&
r.postings[r.pi].ptr.End == r.postings[r.pi-1].ptr.End {
r.repeatFor++
r.pi++
continue
}

break
}

return true
}

func (r *bitmapPostingsReader) At() *bitmapPostings {
r.cur.bmapIterator = r.cur.bmap.Iterator()
return r.cur
}

func (bp *bitmapPostings) Next() bool {
ret := bp.bmapIterator.HasNext()
if ret == true {
bp.cur = storage.SeriesRef(bp.bmapIterator.Next())
return true
}
return false
}

func (bp *bitmapPostings) Seek(v storage.SeriesRef) bool {
if bp.At() >= v {
return true
}

for bp.Next() {
if bp.At() >= v {
return true
}
}

return false
}

func (bp *bitmapPostings) At() storage.SeriesRef {
return bp.cur
}

func (bp *bitmapPostings) Err() error {
return nil
}
Loading

0 comments on commit 6128daf

Please sign in to comment.