Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start roaring64 validation #431

Merged
merged 2 commits into from
Jun 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions roaring64/roaring64.go
Original file line number Diff line number Diff line change
@@ -12,8 +12,10 @@ import (
"github.com/RoaringBitmap/roaring/v2/internal"
)

const serialCookieNoRunContainer = 12346 // only arrays and bitmaps
const serialCookie = 12347 // runs, arrays, and bitmaps
const (
serialCookieNoRunContainer = 12346 // only arrays and bitmaps
serialCookie = 12347 // runs, arrays, and bitmaps
)

// Bitmap represents a compressed bitmap where you can add integers.
type Bitmap struct {
@@ -25,7 +27,6 @@ func (rb *Bitmap) ToBase64() (string, error) {
buf := new(bytes.Buffer)
_, err := rb.WriteTo(buf)
return base64.StdEncoding.EncodeToString(buf.Bytes()), err

}

// FromBase64 deserializes a bitmap from Base64
@@ -52,7 +53,6 @@ func (rb *Bitmap) ToBytes() ([]byte, error) {
// implementations (Java, Go, C++) and it has a specification :
// https://github.com/RoaringBitmap/RoaringFormatSpec#extention-for-64-bit-implementations
func (rb *Bitmap) WriteTo(stream io.Writer) (int64, error) {

var n int64
buf := make([]byte, 8)
binary.LittleEndian.PutUint64(buf, uint64(rb.highlowcontainer.size()))
@@ -1243,6 +1243,10 @@ func (rb *Bitmap) GetSerializedSizeInBytes() uint64 {
return rb.highlowcontainer.serializedSizeInBytes()
}

func (rb *Bitmap) Validate() error {
return rb.highlowcontainer.validate()
}

// Roaring32AsRoaring64 inserts a 32-bit roaring bitmap into
// a 64-bit roaring bitmap. No copy is made.
func Roaring32AsRoaring64(bm32 *roaring.Bitmap) *Bitmap {
77 changes: 72 additions & 5 deletions roaring64/roaring64_test.go
Original file line number Diff line number Diff line change
@@ -245,7 +245,7 @@ func TestRangeRemovalFromContent(t *testing.T) {
bm.RemoveRange(0, 30000)
c := bm.GetCardinality()

assert.EqualValues(t, 00, c)
assert.EqualValues(t, 0o0, c)
}

func TestFlipOnEmpty(t *testing.T) {
@@ -624,7 +624,6 @@ func TestBitmap(t *testing.T) {

assert.Equal(t, len(arrayres), len(arrayand))
assert.True(t, ok)

})

t.Run("Test AND 4", func(t *testing.T) {
@@ -1401,6 +1400,7 @@ func TestBitmap(t *testing.T) {
assert.True(t, valide)
})
}

func TestXORtest4(t *testing.T) {
t.Run("XORtest 4", func(t *testing.T) {
rb := NewBitmap()
@@ -1895,9 +1895,9 @@ func TestSerialization(t *testing.T) {
//assert.Nil(t, err)
//assert.True(t, bufBmp.Equals(bmp))

//var base64 string
//base64, err = bufBmp.ToBase64()
//assert.Nil(t, err)
// var base64 string
// base64, err = bufBmp.ToBase64()
// assert.Nil(t, err)

//base64Bmp := New()
//_, err = base64Bmp.FromBase64(base64)
@@ -1988,3 +1988,70 @@ func Test32As64(t *testing.T) {
assert.True(t, r32asr64.Equals(r64))
assert.True(t, r64.Equals(r32asr64))
}

func TestRoaringArray64Validation(t *testing.T) {
a := roaringArray64{}

assert.ErrorIs(t, a.validate(), ErrEmptyKeys)

a.keys = append(a.keys, uint32(3), uint32(1))
assert.ErrorIs(t, a.validate(), ErrKeySortOrder)
a.clear()

// build up cardinality coherent arrays
a.keys = append(a.keys, uint32(1), uint32(3), uint32(10))
assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint)
a.containers = append(a.containers, roaring.NewBitmap(), roaring.NewBitmap(), roaring.NewBitmap())
assert.ErrorIs(t, a.validate(), ErrCardinalityConstraint)
a.needCopyOnWrite = append(a.needCopyOnWrite, true, false, true)
assert.Errorf(t, a.validate(), "zero intervals")
}

func TestBitMapValidation(t *testing.T) {
bm := NewBitmap()
bm.AddRange(0, 100)
bm.AddRange(306, 406)
bm.AddRange(102, 202)
bm.AddRange(204, 304)
assert.NoError(t, bm.Validate())

randomEntries := make([]uint64, 0, 1000)
for i := 0; i < 1000; i++ {
randomEntries = append(randomEntries, rand.Uint64())
}

bm.AddMany(randomEntries)
assert.NoError(t, bm.Validate())

randomEntries = make([]uint64, 0, 1000)
for i := 0; i < 1000; i++ {
randomEntries = append(randomEntries, uint64(i))
}
bm.AddMany(randomEntries)
assert.NoError(t, bm.Validate())
}

func TestRoaringArray64SortOrder(t *testing.T) {
t.Run("Empty", func(t *testing.T) {
a := roaringArray64{}
assert.True(t, a.checkKeysSorted())
})
t.Run("Empty", func(t *testing.T) {
a := roaringArray64{}
assert.True(t, a.checkKeysSorted())
})
t.Run("Cardinality 1", func(t *testing.T) {
bm := NewBitmap()
bm.Add(65)

assert.True(t, bm.highlowcontainer.checkKeysSorted())
})

t.Run("Many Entries", func(t *testing.T) {
bm := NewBitmap()
bm.AddRange(1, 129)
bm.AddRange(511, 2049)

assert.True(t, bm.highlowcontainer.checkKeysSorted())
})
}
55 changes: 54 additions & 1 deletion roaring64/roaringarray64.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package roaring64

import (
"errors"

"github.com/RoaringBitmap/roaring/v2"
)

@@ -11,6 +13,12 @@ type roaringArray64 struct {
copyOnWrite bool
}

var (
ErrEmptyKeys = errors.New("keys were empty")
ErrKeySortOrder = errors.New("keys were out of order")
ErrCardinalityConstraint = errors.New("size of arrays was not coherent")
)

// runOptimize compresses the element containers to minimize space consumed.
// Q: how does this interact with copyOnWrite and needCopyOnWrite?
// A: since we aren't changing the logical content, just the representation,
@@ -140,7 +148,6 @@ func (ra *roaringArray64) clear() {
}

func (ra *roaringArray64) clone() *roaringArray64 {

sa := roaringArray64{}
sa.copyOnWrite = ra.copyOnWrite

@@ -401,3 +408,49 @@ func (ra *roaringArray64) serializedSizeInBytes() uint64 {
}
return answer
}

func (ra *roaringArray64) checkKeysSorted() bool {
if len(ra.keys) == 0 || len(ra.keys) == 1 {
return true
}
previous := ra.keys[0]
for nextIdx := 1; nextIdx < len(ra.keys); nextIdx++ {
next := ra.keys[nextIdx]
if previous >= next {
return false
}
previous = next

}
return true
}

// validate checks the referential integrity
// ensures len(keys) == len(containers), recurses and checks each container type
func (ra *roaringArray64) validate() error {
if len(ra.keys) == 0 {
return ErrEmptyKeys
}

if !ra.checkKeysSorted() {
return ErrKeySortOrder
}

if len(ra.keys) != len(ra.containers) {
return ErrCardinalityConstraint
}

if len(ra.keys) != len(ra.needCopyOnWrite) {
return ErrCardinalityConstraint
}

for _, maps := range ra.containers {

err := maps.Validate()
if err != nil {
return err
}
}

return nil
}
5 changes: 2 additions & 3 deletions roaring64/serialization_test.go
Original file line number Diff line number Diff line change
@@ -78,7 +78,7 @@ func TestSerializationBasic037(t *testing.T) {
func TestSerializationToFile038(t *testing.T) {
rb := BitmapOf(1, 2, 3, 4, 5, 100, 1000)
fname := "myfile.bin"
fout, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0660)
fout, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_TRUNC, 0o660)
if err != nil {
fmt.Fprintf(os.Stderr, "\n\nIMPORTANT: For testing file IO, the roaring library requires disk access.\nWe omit some tests for now.\n\n")
return
@@ -233,7 +233,6 @@ func benchmarkUnserializeFunc(b *testing.B, name string, f func(*Bitmap, []byte)
}

_, err := rb.WriteTo(buf)

if err != nil {
b.Fatalf("Unexpected error occurs: %v", err)
}
@@ -284,7 +283,7 @@ func Test_tryReadFromRoaring32WithRoaring64_File(t *testing.T) {
}

name := filepath.Join(tempDir, "r32")
if err := ioutil.WriteFile(name, bs, 0600); err != nil {
if err := ioutil.WriteFile(name, bs, 0o600); err != nil {
t.Fatal(err)
}
file, err := os.Open(name)