From f0d4a7598a5f1d21b49c659d6c5751a41ea6a32e Mon Sep 17 00:00:00 2001 From: "GUY.MOLINARI" Date: Wed, 23 Oct 2024 12:24:03 +0000 Subject: [PATCH] All tests written and passing. --- go.mod | 1 + go.sum | 2 + roaring64/bsi64.go | 37 ----------- roaring64/bsi64_test.go | 138 +++++++++++++++++++++++++++------------- 4 files changed, 97 insertions(+), 81 deletions(-) diff --git a/go.mod b/go.mod index 2602690f..ac83ee91 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.14 require ( github.com/bits-and-blooms/bitset v1.12.0 + github.com/google/uuid v1.6.0 // indirect github.com/mschoch/smat v0.2.0 github.com/stretchr/testify v1.7.0 ) diff --git a/go.sum b/go.sum index 9d4dc8aa..3bfa1d50 100644 --- a/go.sum +++ b/go.sum @@ -2,6 +2,8 @@ github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZ github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/roaring64/bsi64.go b/roaring64/bsi64.go index 229b85c4..89b9bc6d 100644 --- a/roaring64/bsi64.go +++ b/roaring64/bsi64.go @@ -8,13 +8,6 @@ import ( "sync" ) -const ( - // Min64BitSigned - Minimum 64 bit value - Min64BitSigned = -9223372036854775808 - // Max64BitSigned - Maximum 64 bit value - Max64BitSigned = 9223372036854775807 -) - // BSI is at its simplest is an array of bitmaps that represent an encoded // binary value. The advantage of a BSI is that comparisons can be made // across ranges of values whereas a bitmap can only represent the existence @@ -109,20 +102,13 @@ func (b *BSI) SetBigValue(columnID uint64, value *big.Int) { } } -//fmt.Printf(" text = %v\n", value.Text(2)) -//fmt.Printf("value = %b\n ->", value.Int64()) -//fmt.Println("->") - //for i := 0; i < b.BitCount(); i++ { for i := b.BitCount(); i >= 0; i-- { if value.Bit(i) == 0 { -//fmt.Print("0") b.bA[i].Remove(columnID) } else { -//fmt.Print("1") b.bA[i].Add(columnID) } } -//fmt.Println() b.eBM.Add(columnID) } @@ -137,7 +123,6 @@ func (b *BSI) GetValue(columnID uint64) (value int64, exists bool) { if !exists { return } -/* if !bv.IsInt64() { if bv.Sign() == -1 { msg := fmt.Errorf("can't represent a negative %d bit value as an int64", b.BitCount()) @@ -148,26 +133,9 @@ func (b *BSI) GetValue(columnID uint64) (value int64, exists bool) { panic(msg) } } -*/ return bv.Int64(), exists } -func (b *BSI) DumpBits(columnID uint64) { - exists := b.eBM.Contains(columnID) - if !exists { - return - } - fmt.Printf("BITS[") - for i := b.BitCount(); i >= 0; i-- { - if b.bA[i].Contains(columnID) { - fmt.Print("1") - } else { - fmt.Print("0") - } - } - fmt.Println("]") -} - // GetBigValue gets the value at the column ID. Second param will be false for non-existent values. func (b *BSI) GetBigValue(columnID uint64) (value *big.Int, exists bool) { exists = b.eBM.Contains(columnID) @@ -175,18 +143,13 @@ func (b *BSI) GetBigValue(columnID uint64) (value *big.Int, exists bool) { return } val := big.NewInt(0) -//fmt.Printf("LEN ARRAY = %d\n", len(b.bA)) -//fmt.Printf("ISNEG = %v\n", b.IsNegative(columnID)) -//fmt.Printf("GETVAL ->") for i := b.BitCount(); i >= 0; i-- { if b.bA[i].Contains(columnID) { bigBit := big.NewInt(1) bigBit.Lsh(bigBit, uint(i)) val.Or(val, bigBit) -//fmt.Printf("[%s]1 ", bigBit.Text(2)) } } -//fmt.Println() if b.IsNegative(columnID) { val = negativeTwosComplementToInt(val) diff --git a/roaring64/bsi64_test.go b/roaring64/bsi64_test.go index 840923db..03615a64 100644 --- a/roaring64/bsi64_test.go +++ b/roaring64/bsi64_test.go @@ -13,10 +13,19 @@ import ( "testing" "time" + "github.com/google/uuid" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) +const ( + // Min64BitSigned - Minimum 64 bit value + Min64BitSigned = -9223372036854775808 + // Max64BitSigned - Maximum 64 bit value + Max64BitSigned = 9223372036854775807 +) + func TestSetAndGetSimple(t *testing.T) { bsi := NewBSI(999, 0) @@ -31,62 +40,104 @@ func TestSetAndGetSimple(t *testing.T) { func TestSetAndGetBigValue(t *testing.T) { -/* + // Set a large UUID value--- bsi := NewDefaultBSI() - bigVal := big.NewInt(Max64BitSigned) - bsi.SetBigValue(1, bigVal) - gv, ok := bsi.GetBigValue(1) - assert.True(t, ok) - assert.True(t, gv.IsUint64()) - assert.Equal(t, bigVal, gv) - assert.Equal(t, bsi.BitCount(), 63) - - bigVal.Add(bigVal, big.NewInt(1)) - bsi.SetBigValue(1, bigVal) - assert.Equal(t, bsi.BitCount(), 64) - gv, ok = bsi.GetBigValue(1) - assert.True(t, ok) - assert.True(t, gv.IsUint64()) - assert.Equal(t, bigVal, gv) -*/ + bigUUID := big.NewInt(-578664753978847603) // Upper bits + bigUUID.Lsh(bigUUID, 64) + lowBits := big.NewInt(-5190910309365112881) // Lower bits + bigUUID.Add(bigUUID, lowBits) // Lower bits -/* How do we handle the scenario where we overflow into an int64 for GetValue after a large value was set? + bsi.SetBigValue(1, bigUUID) + assert.Equal(t, bigUUID.BitLen(), bsi.BitCount()) + bv, _ := bsi.GetBigValue(1) + assert.Equal(t, bigUUID, bv) + + // Any code past this point will expect a panic error. This will happen if a large value was set + // with SetBigValue() followed by a call to GetValue() where the set value exceeds 64 bits. defer func() { if r := recover(); r == nil { t.Errorf("The code did not panic") } }() - bsi.GetValue(1) -*/ + bsi.GetValue(1) // this should panic. If so the test will pass. +} - // Set a large UUID value--- +func TestSetAndGetUUIDValue(t *testing.T) { + uuidVal, _ := uuid.NewRandom() + b, errx := uuidVal.MarshalBinary() + assert.Nil(t, errx) + bigUUID := new(big.Int) + bigUUID.SetBytes(b) bsi := NewDefaultBSI() - //bigUUID := big.NewInt(0) - //b := make([]byte, 16) - //bigUUID.FillBytes(b) - //bigUUID.Set(big.NewInt(-578664753978847603)) // Upper bits - bigUUID := big.NewInt(-578664753978847603) // Upper bits - //bigUUID.SetInt64(50) // Upper bits - - //bigUUID := big.NewInt(Min64BitSigned ) - //bigUUID := big.NewInt(-1) - // Max64BitSigned - Maximum 64 bit value - //bigUUID := big.NewInt(-5) // Upper bits - bigUUID.Lsh(bigUUID, 64) - lowBits := big.NewInt(-5190910309365112881) // Lower bits - bigUUID.Add(bigUUID, lowBits) // Lower bits - -fmt.Printf("BIGUUID BITS = %d\n", bigUUID.BitLen()) -fmt.Printf("BIGUUID VALUE = %v\n", bigUUID.Text(10)) - bsi.SetBigValue(1, bigUUID) - //bsi.SetValue(1, bigUUID.Int64()) - fmt.Printf("BITS = %d\n", bsi.BitCount()) assert.Equal(t, bigUUID.BitLen(), bsi.BitCount()) bv, _ := bsi.GetBigValue(1) - //bv, _ := bsi.GetValue(1) - //assert.Equal(t, bigUUID.Int64(), bv) assert.Equal(t, bigUUID, bv) + + newUUID, err := uuid.FromBytes(bv.Bytes()) + assert.Nil(t, err) + + assert.Equal(t, uuidVal.String(), newUUID.String()) +} + +func secondsAndNanosToBigInt(seconds int64, nanos int32) *big.Int { + b := make([]byte, 12) + binary.BigEndian.PutUint64(b[:8], uint64(seconds)) + binary.BigEndian.PutUint32(b[8:], uint32(nanos)) + bigTime := new(big.Int) + bigTime.SetBytes(b) + return bigTime +} + +func bigIntToSecondsAndNanos(big *big.Int) (seconds int64, nanos int32) { + buf := make([]byte, 12) + big.FillBytes(buf) + seconds = int64(binary.BigEndian.Uint64(buf[:8])) + nanos = int32(binary.BigEndian.Uint32(buf[8:])) + return +} + +func TestSetAndGetBigTimestamp(t *testing.T) { + + // Store a timestamp in a BSI as 2 values, seconds as int64 and nanosecond interval as int32 (96 bits) + bigTime := secondsAndNanosToBigInt(int64(33286611346), int32(763295273)) + bsi := NewDefaultBSI() + bsi.SetBigValue(1, bigTime) + + // Recover and check the known timestamp + bv, _ := bsi.GetBigValue(1) + seconds, nanoseconds := bigIntToSecondsAndNanos(bv) + ts := time.Unix(seconds, int64(nanoseconds)) + assert.Equal(t, "3024-10-23T16:55:46.763295273Z", ts.Format(time.RFC3339Nano)) + assert.Equal(t, 67, bsi.BitCount()) +} + +func TestRangeBig(t *testing.T) { + + bsi := NewDefaultBSI() + + // Populate large timestamp values + for i := 0; i <= 100; i++ { + t := time.Now() + newTime := t.AddDate(1000, 0, 0) // Add 1000 years + secs := int64(newTime.UnixMilli() / 1000) + nano := int32(newTime.Nanosecond()) + bigTime := secondsAndNanosToBigInt(secs, nano) + bsi.SetBigValue(uint64(i), bigTime) + } + + start, _ := bsi.GetBigValue(uint64(45)) // starting value at columnID 45 + end, _ := bsi.GetBigValue(uint64(55)) // ending value at columnID 55 + set := bsi.CompareBigValue(0, RANGE, start, end, nil) + assert.Equal(t, uint64(11), set.GetCardinality()) + + i := set.Iterator() + for i.HasNext() { + v := i.Next() + assert.GreaterOrEqual(t, uint64(v), uint64(45)) + assert.LessOrEqual(t, uint64(v), uint64(55)) + } + assert.Equal(t, 67, bsi.BitCount()) } func setup() *BSI { @@ -446,7 +497,6 @@ func TestAdd(t *testing.T) { for i := 1; i <= 10; i++ { a, _ := bsi.GetValue(uint64(i)) b, _ := clone.GetValue(uint64(i)) -//fmt.Printf("ORIG = %d, CLONE = %d\n", a, b) assert.Equal(t, b*2, a) }