From dfbba84c5fe7e0385e2620e673d7f234a1bc353f Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Mon, 21 Oct 2024 13:30:58 -0700 Subject: [PATCH 1/9] Core: Add portable Roaring bitmap for row positions --- LICENSE | 1 + .../RoaringPositionBitmapBenchmark.java | 165 +++++++++ .../deletes/RoaringPositionBitmap.java | 309 +++++++++++++++++ .../deletes/TestRoaringPositionBitmap.java | 323 ++++++++++++++++++ .../apache/iceberg/deletes/64map32bitvals.bin | Bin 0 -> 48 bytes .../org/apache/iceberg/deletes/64mapempty.bin | Bin 0 -> 8 bytes .../apache/iceberg/deletes/64maphighvals.bin | Bin 0 -> 1086 bytes .../iceberg/deletes/64mapspreadvals.bin | Bin 0 -> 408 bytes 8 files changed, 798 insertions(+) create mode 100644 core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java create mode 100644 core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java create mode 100644 core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java create mode 100644 core/src/test/resources/org/apache/iceberg/deletes/64map32bitvals.bin create mode 100644 core/src/test/resources/org/apache/iceberg/deletes/64mapempty.bin create mode 100644 core/src/test/resources/org/apache/iceberg/deletes/64maphighvals.bin create mode 100644 core/src/test/resources/org/apache/iceberg/deletes/64mapspreadvals.bin diff --git a/LICENSE b/LICENSE index efb46dab44da..b6c2ec5c72f3 100644 --- a/LICENSE +++ b/LICENSE @@ -298,6 +298,7 @@ License: https://www.apache.org/licenses/LICENSE-2.0 This product includes code from Delta Lake. * AssignmentAlignmentSupport is an independent development but UpdateExpressionsSupport in Delta was used as a reference. +* RoaringPositionBitmap is an independent development but RoaringBitmapArray in Delta was used as a reference. Copyright: 2020 The Delta Lake Project Authors. Home page: https://delta.io/ diff --git a/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java b/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java new file mode 100644 index 000000000000..f4325311d8a5 --- /dev/null +++ b/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.deletes; + +import java.util.Random; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Threads; +import org.openjdk.jmh.annotations.Timeout; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; +import org.roaringbitmap.longlong.Roaring64Bitmap; + +/** + * A benchmark that evaluates the performance of {@link RoaringPositionBitmap}. + * + *

To run this benchmark: + * ./gradlew :iceberg-core:jmh + * -PjmhIncludeRegex=RoaringPositionBitmapBenchmark + * -PjmhOutputPath=benchmark/roaring-position-bitmap-benchmark.txt + * + */ +@Fork(1) +@State(Scope.Benchmark) +@Warmup(iterations = 3) +@Measurement(iterations = 5) +@BenchmarkMode(Mode.SingleShotTime) +@Timeout(time = 5, timeUnit = TimeUnit.MINUTES) +public class RoaringPositionBitmapBenchmark { + + private static final int TOTAL_POSITIONS = 5_000_000; + private static final long STEP = 5L; + + private long[] orderedPositions; + private long[] shuffledPositions; + + @Setup + public void setupBenchmark() { + this.orderedPositions = generateOrderedPositions(); + this.shuffledPositions = generateShuffledPositions(); + } + + @Benchmark + @Threads(1) + public void addOrderedPositionsIcebergBitmap(Blackhole blackhole) { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + for (long position : orderedPositions) { + bitmap.add(position); + } + blackhole.consume(bitmap); + } + + @Benchmark + @Threads(1) + public void addOrderedPositionsLibraryBitmap(Blackhole blackhole) { + Roaring64Bitmap bitmap = new Roaring64Bitmap(); + for (long position : orderedPositions) { + bitmap.add(position); + } + blackhole.consume(bitmap); + } + + @Benchmark + @Threads(1) + public void addShuffledPositionsIcebergBitmap(Blackhole blackhole) { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + for (long position : shuffledPositions) { + bitmap.add(position); + } + blackhole.consume(bitmap); + } + + @Benchmark + @Threads(1) + public void addShuffledPositionsLibraryBitmap(Blackhole blackhole) { + Roaring64Bitmap bitmap = new Roaring64Bitmap(); + for (long position : shuffledPositions) { + bitmap.add(position); + } + blackhole.consume(bitmap); + } + + @Benchmark + @Threads(1) + public void addAndCheckPositionsIcebergBitmap(Blackhole blackhole) { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + + for (long position : shuffledPositions) { + bitmap.add(position); + } + + for (long position = 0; position <= TOTAL_POSITIONS * STEP; position++) { + bitmap.contains(position); + } + + blackhole.consume(bitmap); + } + + @Benchmark + @Threads(1) + public void addAndCheckPositionsLibraryBitmap(Blackhole blackhole) { + Roaring64Bitmap bitmap = new Roaring64Bitmap(); + + for (long position : shuffledPositions) { + bitmap.add(position); + } + + for (long position = 0; position <= TOTAL_POSITIONS * STEP; position++) { + bitmap.contains(position); + } + + blackhole.consume(bitmap); + } + + private static long[] generateOrderedPositions() { + long[] positions = new long[TOTAL_POSITIONS]; + + for (int index = 0; index < TOTAL_POSITIONS; index++) { + positions[index] = index * STEP; + } + + return positions; + } + + private static long[] generateShuffledPositions() { + long[] positions = generateOrderedPositions(); + shuffle(positions); + return positions; + } + + private static void shuffle(long[] array) { + Random rand = new Random(); + + for (int index = array.length - 1; index > 0; index--) { + // swap with an element at a random index between 0 and index + int thatIndex = rand.nextInt(index + 1); + long temp = array[index]; + array[index] = array[thatIndex]; + array[thatIndex] = temp; + } + } +} diff --git a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java new file mode 100644 index 000000000000..d8cc1dabc723 --- /dev/null +++ b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.deletes; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.List; +import java.util.function.LongConsumer; +import org.apache.iceberg.relocated.com.google.common.base.Preconditions; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.roaringbitmap.RoaringBitmap; + +/** + * A bitmap that supports positive 64-bit positions, but is optimized for cases where most positions + * fit in 32 bits by using an array of 32-bit Roaring bitmaps. The bitmap array is grown as needed + * to accommodate the largest value. + * + *

Incoming 64-bit positions are divided into a 32-bit "key" using the most significant 4 bytes + * and a 32-bit position using the least significant 4 bytes. For each key in the set of positions, + * a 32-bit Roaring bitmap is maintained to store a set of 32-bit positions for that key. + * + *

To test whether a certain position is set, its most significant 4 bytes (the key) are used to + * find a 32-bit bitmap and the least significant 4 bytes are tested for inclusion in the bitmap. If + * a bitmap is not found for the key, then the position is not set. + * + *

Note that positions must be greater than or equal to 0 add less than {@link #MAX_POSITION}. + * This bitmap does not support positions larger than {@link #MAX_POSITION} as the bitmap array + * length is a signed 32-bit integer that must be greater than or equal to 0. + */ +class RoaringPositionBitmap { + + static final long MAX_POSITION = toPosition(Integer.MAX_VALUE - 1, Integer.MIN_VALUE); + private static final RoaringBitmap[] EMPTY_BITMAP_ARRAY = new RoaringBitmap[] {}; + private static final long BITMAP_COUNT_SIZE_BYTES = 8L; + private static final long BITMAP_KEY_SIZE_BYTES = 4L; + + private RoaringBitmap[] bitmaps; + + RoaringPositionBitmap() { + this.bitmaps = EMPTY_BITMAP_ARRAY; + } + + private RoaringPositionBitmap(RoaringBitmap[] bitmaps) { + this.bitmaps = bitmaps; + } + + /** + * Adds a position to the bitmap. + * + * @param pos the row position + */ + public void add(long pos) { + validatePosition(pos); + int high = highBytes(pos); + int low = lowBytes(pos); + allocateBitmapsIfNeeded(high + 1 /* required number of bitmaps */); + bitmaps[high].add(low); + } + + /** + * Adds a range of positions to the bitmap. + * + * @param posStartInclusive the start position of the range (inclusive) + * @param posEndExclusive the end position of the range (exclusive) + */ + public void addRange(long posStartInclusive, long posEndExclusive) { + for (long pos = posStartInclusive; pos < posEndExclusive; pos++) { + add(pos); + } + } + + /** + * Checks if the bitmap contains a position. + * + * @param pos the row position + * @return true if the position is in this bitmap, false otherwise + */ + public boolean contains(long pos) { + validatePosition(pos); + int high = highBytes(pos); + int low = lowBytes(pos); + return high < bitmaps.length && bitmaps[high].contains(low); + } + + /** + * Adds all positions from the other bitmap to this bitmap, modifying this bitmap in place. + * + * @param that the other bitmap + */ + public void or(RoaringPositionBitmap that) { + allocateBitmapsIfNeeded(that.bitmaps.length); + for (int index = 0; index < that.bitmaps.length; index++) { + bitmaps[index].or(that.bitmaps[index]); + } + } + + /** + * Indicates whether the bitmap is empty. + * + * @return true if the bitmap is empty, false otherwise + */ + public boolean isEmpty() { + return cardinality() == 0; + } + + /** + * Returns the number of positions in the bitmap. + * + * @return the number of set positions + */ + public long cardinality() { + long cardinality = 0L; + for (RoaringBitmap bitmap : bitmaps) { + cardinality += bitmap.getLongCardinality(); + } + return cardinality; + } + + /** + * Run-length compresses the bitmap if it is more space efficient. + * + * @return whether the bitmap was compressed + */ + public boolean runOptimize() { + boolean changed = false; + for (RoaringBitmap bitmap : bitmaps) { + changed |= bitmap.runOptimize(); + } + return changed; + } + + /** + * Iterates over all positions in the bitmap. + * + * @param consumer a consumer for positions + */ + public void forEach(LongConsumer consumer) { + for (int index = 0; index < bitmaps.length; index++) { + forEach(bitmaps[index], index, consumer); + } + } + + /** + * Returns the number of bytes required to serialize the bitmap. + * + * @return the serialized size in bytes + */ + public long serializedSizeInBytes() { + long size = BITMAP_COUNT_SIZE_BYTES; + for (RoaringBitmap bitmap : bitmaps) { + size += BITMAP_KEY_SIZE_BYTES + bitmap.serializedSizeInBytes(); + } + return size; + } + + /** + * Serializes the bitmap using the portable serialization format described below. + * + *

+ * + *

Note the byte order of the buffer must be little-endian. + * + * @param buffer the buffer to write to + * @see Roaring bitmap spec + */ + public void serialize(ByteBuffer buffer) { + validateByteOrder(buffer); + buffer.putLong(bitmaps.length); + for (int index = 0; index < bitmaps.length; index++) { + buffer.putInt(index); + bitmaps[index].serialize(buffer); + } + } + + private void allocateBitmapsIfNeeded(int requiredLength) { + if (bitmaps.length < requiredLength) { + if (bitmaps.length == 0 && requiredLength == 1) { + this.bitmaps = new RoaringBitmap[] {new RoaringBitmap()}; + } else { + RoaringBitmap[] newBitmaps = new RoaringBitmap[requiredLength]; + System.arraycopy(bitmaps, 0, newBitmaps, 0, bitmaps.length); + for (int index = bitmaps.length; index < requiredLength; index++) { + newBitmaps[index] = new RoaringBitmap(); + } + this.bitmaps = newBitmaps; + } + } + } + + /** + * Deserializes a bitmap from a buffer, assuming the portable serialization format. + * + * @param buffer the buffer to read from + * @return a new bitmap instance with the deserialized data + */ + public static RoaringPositionBitmap deserialize(ByteBuffer buffer) { + validateByteOrder(buffer); + + // the bitmap array may be sparse with more elements than the number of read bitmaps + int remainingBitmapCount = readBitmapCount(buffer); + List bitmaps = Lists.newArrayListWithExpectedSize(remainingBitmapCount); + int lastKey = 0; + + while (remainingBitmapCount > 0) { + int key = readKey(buffer, lastKey); + + // fill gaps as the bitmap array may be sparse + while (lastKey < key) { + bitmaps.add(new RoaringBitmap()); + lastKey++; + } + + RoaringBitmap bitmap = readBitmap(buffer); + bitmaps.add(bitmap); + + lastKey++; + remainingBitmapCount--; + } + + return new RoaringPositionBitmap(bitmaps.toArray(EMPTY_BITMAP_ARRAY)); + } + + private static void validateByteOrder(ByteBuffer buffer) { + Preconditions.checkArgument( + buffer.order() == ByteOrder.LITTLE_ENDIAN, + "Roaring bitmap serialization requires little-endian byte order"); + } + + private static int readBitmapCount(ByteBuffer buffer) { + long bitmapCount = buffer.getLong(); + Preconditions.checkArgument( + bitmapCount >= 0 && bitmapCount <= Integer.MAX_VALUE, + "Invalid bitmap count: %s", + bitmapCount); + return (int) bitmapCount; + } + + private static int readKey(ByteBuffer buffer, int lastKey) { + int key = buffer.getInt(); + Preconditions.checkArgument(key >= 0, "Invalid unsigned key: %s", key); + Preconditions.checkArgument(key >= lastKey, "Keys must be sorted in ascending order"); + return key; + } + + private static RoaringBitmap readBitmap(ByteBuffer buffer) { + try { + RoaringBitmap bitmap = new RoaringBitmap(); + bitmap.deserialize(buffer); + buffer.position(buffer.position() + bitmap.serializedSizeInBytes()); + return bitmap; + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + // extracts high 32 bits from a 64-bit position + private static int highBytes(long pos) { + return (int) (pos >> 32); + } + + // extracts low 32 bits from a 64-bit position + private static int lowBytes(long pos) { + return (int) pos; + } + + // combines high and low 32-bit values into a 64-bit position + // the low value must be bit-masked to avoid sign extension + private static long toPosition(int high, int low) { + return (((long) high) << 32) | (((long) low) & 0xFFFFFFFFL); + } + + // iterates over 64-bit positions, reconstructing them from high and low 32-bit values + private static void forEach(RoaringBitmap bitmap, int high, LongConsumer consumer) { + bitmap.forEach((int low) -> consumer.accept(toPosition(high, low))); + } + + private static void validatePosition(long pos) { + Preconditions.checkArgument( + pos >= 0 && pos <= MAX_POSITION, + "Bitmap supports positions that are >= 0 and <= %s: %s", + MAX_POSITION, + pos); + } +} diff --git a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java new file mode 100644 index 000000000000..b0c47a90c233 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java @@ -0,0 +1,323 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.deletes; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.io.IOException; +import java.net.URL; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.util.Set; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; +import org.apache.iceberg.relocated.com.google.common.io.Resources; +import org.junit.jupiter.api.Test; + +public class TestRoaringPositionBitmap { + + private static final long BITMAP_OFFSET = 0xFFFFFFFFL + 1L; + private static final long CONTAINER_OFFSET = Character.MAX_VALUE + 1L; + private static final Set SUPPORTED_OFFICIAL_EXAMPLE_FILES = + ImmutableSet.of("64map32bitvals.bin", "64mapempty.bin", "64mapspreadvals.bin"); + + @Test + public void testAdd() { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + + bitmap.add(10L); + assertThat(bitmap.contains(10L)).isTrue(); + + bitmap.add(0L); + assertThat(bitmap.contains(0L)).isTrue(); + + bitmap.add(10L); + assertThat(bitmap.contains(10L)).isTrue(); + } + + @Test + public void testAddPositionsRequiringMultipleBitmaps() { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + + // construct positions that differ in their high 32-bit parts (i.e. keys) + long pos1 = ((long) 0 << 32) | 10L; // high = 0, low = 10 + long pos2 = ((long) 1 << 32) | 20L; // high = 1, low = 20 + long pos3 = ((long) 2 << 32) | 30L; // high = 2, low = 30 + long pos4 = ((long) 100 << 32) | 40L; // high = 1000, low = 40 + + bitmap.add(pos1); + bitmap.add(pos2); + bitmap.add(pos3); + bitmap.add(pos4); + + assertThat(bitmap.contains(pos1)).isTrue(); + assertThat(bitmap.contains(pos2)).isTrue(); + assertThat(bitmap.contains(pos3)).isTrue(); + assertThat(bitmap.contains(pos4)).isTrue(); + + assertThat(bitmap.cardinality()).isEqualTo(4); + + assertThat(bitmap.serializedSizeInBytes()).isGreaterThan(4); + } + + @Test + public void testAddRange() { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + + long startInclusive = 10L; + long endExclusive = 20L; + bitmap.addRange(startInclusive, endExclusive); + + // assert that all positions in the range [10, 20) are added + for (long pos = startInclusive; pos < endExclusive; pos++) { + assertThat(bitmap.contains(pos)).isTrue(); + } + + // assert that positions outside the range are not present + assertThat(bitmap.contains(9L)).isFalse(); + assertThat(bitmap.contains(20L)).isFalse(); + + // assert that the cardinality is correct (10 positions in range [10, 20)) + assertThat(bitmap.cardinality()).isEqualTo(10); + } + + @Test + public void testAddEmptyRange() { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + bitmap.addRange(10, 10); + assertThat(bitmap.isEmpty()).isTrue(); + } + + @Test + public void testOr() { + RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); + bitmap1.add(10L); + bitmap1.add(20L); + + RoaringPositionBitmap bitmap2 = new RoaringPositionBitmap(); + bitmap2.add(30L); + bitmap2.add(40L); + + bitmap1.or(bitmap2); + + assertThat(bitmap1.contains(10L)).isTrue(); + assertThat(bitmap1.contains(20L)).isTrue(); + assertThat(bitmap1.contains(30L)).isTrue(); + assertThat(bitmap1.contains(40L)).isTrue(); + + assertThat(bitmap1.cardinality()).isEqualTo(4); + } + + @Test + public void testOrWithEmptyBitmap() { + RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); + bitmap1.add(10L); + bitmap1.add(20L); + + RoaringPositionBitmap emptyBitmap = new RoaringPositionBitmap(); + + bitmap1.or(emptyBitmap); + + assertThat(bitmap1.contains(10L)).isTrue(); + assertThat(bitmap1.contains(20L)).isTrue(); + + assertThat(bitmap1.cardinality()).isEqualTo(2); + } + + @Test + public void testOrWithOverlappingBitmap() { + RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); + bitmap1.add(10L); + bitmap1.add(20L); + bitmap1.add(30L); + + RoaringPositionBitmap bitmap2 = new RoaringPositionBitmap(); + bitmap2.add(20L); + bitmap2.add(40L); + + bitmap1.or(bitmap2); + + assertThat(bitmap1.contains(10L)).isTrue(); + assertThat(bitmap1.contains(20L)).isTrue(); + assertThat(bitmap1.contains(30L)).isTrue(); + assertThat(bitmap1.contains(40L)).isTrue(); + + assertThat(bitmap1.cardinality()).isEqualTo(4); + } + + @Test + public void testOrSparseBitmaps() { + RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); + bitmap1.add((long) 0 << 32 | 100L); // High = 0, Low = 100 + bitmap1.add((long) 1 << 32 | 200L); // High = 1, Low = 200 + + RoaringPositionBitmap bitmap2 = new RoaringPositionBitmap(); + bitmap2.add((long) 2 << 32 | 300L); // High = 2, Low = 300 + bitmap2.add((long) 3 << 32 | 400L); // High = 3, Low = 400 + + bitmap1.or(bitmap2); + + assertThat(bitmap1.contains((long) 0 << 32 | 100L)).isTrue(); + assertThat(bitmap1.contains((long) 1 << 32 | 200L)).isTrue(); + assertThat(bitmap1.contains((long) 2 << 32 | 300L)).isTrue(); + assertThat(bitmap1.contains((long) 3 << 32 | 400L)).isTrue(); + + assertThat(bitmap1.cardinality()).isEqualTo(4); + } + + @Test + public void testCardinality() { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + + assertThat(bitmap.cardinality()).isEqualTo(0); + + bitmap.add(10L); + bitmap.add(20L); + bitmap.add(30L); + + assertThat(bitmap.cardinality()).isEqualTo(3); + + bitmap.add(10L); // already exists + + assertThat(bitmap.cardinality()).isEqualTo(3); + } + + @Test + public void testCardinalitySparseBitmaps() { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + + bitmap.add((long) 0 << 32 | 100L); // high = 0, low = 100 + bitmap.add((long) 0 << 32 | 101L); // high = 0, low = 101 + bitmap.add((long) 0 << 32 | 105L); // high = 0, low = 101 + bitmap.add((long) 1 << 32 | 200L); // high = 1, low = 200 + bitmap.add((long) 100 << 32 | 300L); // high = 1000, low = 300 + + assertThat(bitmap.cardinality()).isEqualTo(5); + } + + @Test + public void testSerializeDeserializeAllContainerBitmap() { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + + // bitmap 0, container 0 (array) + bitmap.add(position(0 /* bitmap */, 0 /* container */, 5L)); + bitmap.add(position(0 /* bitmap */, 0 /* container */, 7L)); + + // bitmap 0, container 1 (array that can be compressed) + bitmap.addRange( + position(0 /* bitmap */, 1 /* container */, 1L), + position(0 /* bitmap */, 1 /* container */, 1000L)); + + // bitmap 1, container 2 (bitset) + bitmap.addRange( + position(0 /* bitmap */, 2 /* container */, 1L), + position(0 /* bitmap */, 2 /* container */, CONTAINER_OFFSET - 1L)); + + // bitmap 1, container 0 (array) + bitmap.add(position(1 /* bitmap */, 0 /* container */, 10L)); + bitmap.add(position(1 /* bitmap */, 0 /* container */, 20L)); + + // bitmap 1, container 1 (array that can be compressed) + bitmap.addRange( + position(1 /* bitmap */, 1 /* container */, 10L), + position(1 /* bitmap */, 1 /* container */, 500L)); + + // bitmap 1, container 2 (bitset) + bitmap.addRange( + position(1 /* bitmap */, 2 /* container */, 1L), + position(1 /* bitmap */, 2 /* container */, CONTAINER_OFFSET - 1)); + + assertThat(bitmap.runOptimize()).as("Bitmap must be RLE encoded").isTrue(); + + RoaringPositionBitmap bitmapCopy = roundTripSerialize(bitmap); + + assertThat(bitmapCopy.cardinality()).isEqualTo(bitmap.cardinality()); + bitmapCopy.forEach(position -> assertThat(bitmap.contains(position)).isTrue()); + bitmap.forEach(position -> assertThat(bitmapCopy.contains(position)).isTrue()); + } + + @Test + public void testDeserializeSupportedRoaringExamples() throws IOException { + for (String file : SUPPORTED_OFFICIAL_EXAMPLE_FILES) { + RoaringPositionBitmap bitmap = readBitmap(file); + assertThat(bitmap).isNotNull(); + } + } + + @Test + public void testDeserializeUnsupportedRoaringExample() { + // this file contains a value that is larger than the max supported value in our impl + assertThatThrownBy(() -> readBitmap("64maphighvals.bin")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Invalid unsigned key"); + } + + @Test + public void testUnsupportedPositions() { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + + assertThatThrownBy(() -> bitmap.add(-1L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining( + "Bitmap supports positions that are >= 0 and <= %s", + RoaringPositionBitmap.MAX_POSITION); + + assertThatThrownBy(() -> bitmap.contains(-1L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining( + "Bitmap supports positions that are >= 0 and <= %s", + RoaringPositionBitmap.MAX_POSITION); + + assertThatThrownBy(() -> bitmap.add(RoaringPositionBitmap.MAX_POSITION + 1L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining( + "Bitmap supports positions that are >= 0 and <= %s", + RoaringPositionBitmap.MAX_POSITION); + + assertThatThrownBy(() -> bitmap.contains(RoaringPositionBitmap.MAX_POSITION + 1L)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining( + "Bitmap supports positions that are >= 0 and <= %s", + RoaringPositionBitmap.MAX_POSITION); + } + + private static long position(int bitmapIndex, int containerIndex, long value) { + return bitmapIndex * BITMAP_OFFSET + containerIndex * CONTAINER_OFFSET + value; + } + + private static RoaringPositionBitmap roundTripSerialize(RoaringPositionBitmap bitmap) { + ByteBuffer buffer = ByteBuffer.allocate((int) bitmap.serializedSizeInBytes()); + buffer.order(ByteOrder.LITTLE_ENDIAN); + bitmap.serialize(buffer); + buffer.flip(); + return RoaringPositionBitmap.deserialize(buffer); + } + + private static RoaringPositionBitmap readBitmap(String resourceName) throws IOException { + byte[] bytes = readTestResource(resourceName); + ByteBuffer buffer = ByteBuffer.wrap(bytes); + buffer.order(ByteOrder.LITTLE_ENDIAN); + return RoaringPositionBitmap.deserialize(buffer); + } + + private static byte[] readTestResource(String resourceName) throws IOException { + URL resource = Resources.getResource(TestRoaringPositionBitmap.class, resourceName); + return Resources.toByteArray(resource); + } +} diff --git a/core/src/test/resources/org/apache/iceberg/deletes/64map32bitvals.bin b/core/src/test/resources/org/apache/iceberg/deletes/64map32bitvals.bin new file mode 100644 index 0000000000000000000000000000000000000000..475b894417e44cff61d8810057fc1530cef05718 GIT binary patch literal 48 ocmZQ%KmaQP1_nkjmy9 literal 0 HcmV?d00001 diff --git a/core/src/test/resources/org/apache/iceberg/deletes/64maphighvals.bin b/core/src/test/resources/org/apache/iceberg/deletes/64maphighvals.bin new file mode 100644 index 0000000000000000000000000000000000000000..d4312b8d22713991026a36d5d1293cf1960d89ed GIT binary patch literal 1086 zcmd;PfPnY=_rj5t0RsagP#7Y>#UKD@!S*SERgUMnOxf@r{zi~~v PF5QrBO1Grj(uH&%!J7vn literal 0 HcmV?d00001 From 41c769f6e934eac648377d8de8138dc0bcb9ef92 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Mon, 21 Oct 2024 15:28:14 -0700 Subject: [PATCH 2/9] Feedback --- LICENSE | 2 +- .../deletes/RoaringPositionBitmap.java | 48 ++++++++++--------- .../deletes/TestRoaringPositionBitmap.java | 17 ++++--- 3 files changed, 37 insertions(+), 30 deletions(-) diff --git a/LICENSE b/LICENSE index b6c2ec5c72f3..76f6113d9811 100644 --- a/LICENSE +++ b/LICENSE @@ -298,7 +298,7 @@ License: https://www.apache.org/licenses/LICENSE-2.0 This product includes code from Delta Lake. * AssignmentAlignmentSupport is an independent development but UpdateExpressionsSupport in Delta was used as a reference. -* RoaringPositionBitmap is an independent development but RoaringBitmapArray in Delta was used as a reference. +* RoaringPositionBitmap is a Java implementation of RoaringBitmapArray in Delta. Copyright: 2020 The Delta Lake Project Authors. Home page: https://delta.io/ diff --git a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java index d8cc1dabc723..8d18c73d15ac 100644 --- a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java +++ b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java @@ -41,14 +41,15 @@ * find a 32-bit bitmap and the least significant 4 bytes are tested for inclusion in the bitmap. If * a bitmap is not found for the key, then the position is not set. * - *

Note that positions must be greater than or equal to 0 add less than {@link #MAX_POSITION}. - * This bitmap does not support positions larger than {@link #MAX_POSITION} as the bitmap array - * length is a signed 32-bit integer that must be greater than or equal to 0. + *

Positions must range from 0 (inclusive) to {@link #MAX_POSITION} (inclusive). The bitmap + * cannot handle positions exceeding {@link #MAX_POSITION} because the array size is a signed 32-bit + * integer, which must be non-negative. Allocating an array with size Integer.MAX_VALUE + 1 would + * overflow the integer and result in a negative length. */ class RoaringPositionBitmap { static final long MAX_POSITION = toPosition(Integer.MAX_VALUE - 1, Integer.MIN_VALUE); - private static final RoaringBitmap[] EMPTY_BITMAP_ARRAY = new RoaringBitmap[] {}; + private static final RoaringBitmap[] EMPTY_BITMAP_ARRAY = new RoaringBitmap[0]; private static final long BITMAP_COUNT_SIZE_BYTES = 8L; private static final long BITMAP_KEY_SIZE_BYTES = 4L; @@ -158,6 +159,21 @@ public void forEach(LongConsumer consumer) { } } + private void allocateBitmapsIfNeeded(int requiredLength) { + if (bitmaps.length < requiredLength) { + if (bitmaps.length == 0 && requiredLength == 1) { + this.bitmaps = new RoaringBitmap[] {new RoaringBitmap()}; + } else { + RoaringBitmap[] newBitmaps = new RoaringBitmap[requiredLength]; + System.arraycopy(bitmaps, 0, newBitmaps, 0, bitmaps.length); + for (int index = bitmaps.length; index < requiredLength; index++) { + newBitmaps[index] = new RoaringBitmap(); + } + this.bitmaps = newBitmaps; + } + } + } + /** * Returns the number of bytes required to serialize the bitmap. * @@ -197,21 +213,6 @@ public void serialize(ByteBuffer buffer) { } } - private void allocateBitmapsIfNeeded(int requiredLength) { - if (bitmaps.length < requiredLength) { - if (bitmaps.length == 0 && requiredLength == 1) { - this.bitmaps = new RoaringBitmap[] {new RoaringBitmap()}; - } else { - RoaringBitmap[] newBitmaps = new RoaringBitmap[requiredLength]; - System.arraycopy(bitmaps, 0, newBitmaps, 0, bitmaps.length); - for (int index = bitmaps.length; index < requiredLength; index++) { - newBitmaps[index] = new RoaringBitmap(); - } - this.bitmaps = newBitmaps; - } - } - } - /** * Deserializes a bitmap from a buffer, assuming the portable serialization format. * @@ -224,13 +225,13 @@ public static RoaringPositionBitmap deserialize(ByteBuffer buffer) { // the bitmap array may be sparse with more elements than the number of read bitmaps int remainingBitmapCount = readBitmapCount(buffer); List bitmaps = Lists.newArrayListWithExpectedSize(remainingBitmapCount); - int lastKey = 0; + int lastKey = -1; while (remainingBitmapCount > 0) { int key = readKey(buffer, lastKey); // fill gaps as the bitmap array may be sparse - while (lastKey < key) { + while (lastKey < key - 1) { bitmaps.add(new RoaringBitmap()); lastKey++; } @@ -238,7 +239,7 @@ public static RoaringPositionBitmap deserialize(ByteBuffer buffer) { RoaringBitmap bitmap = readBitmap(buffer); bitmaps.add(bitmap); - lastKey++; + lastKey = key; remainingBitmapCount--; } @@ -263,7 +264,8 @@ private static int readBitmapCount(ByteBuffer buffer) { private static int readKey(ByteBuffer buffer, int lastKey) { int key = buffer.getInt(); Preconditions.checkArgument(key >= 0, "Invalid unsigned key: %s", key); - Preconditions.checkArgument(key >= lastKey, "Keys must be sorted in ascending order"); + Preconditions.checkArgument(key <= Integer.MAX_VALUE - 1, "Key is too large: %s", key); + Preconditions.checkArgument(key > lastKey, "Keys must be sorted in ascending order"); return key; } diff --git a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java index b0c47a90c233..a5942aeb9e26 100644 --- a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java +++ b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java @@ -59,7 +59,7 @@ public void testAddPositionsRequiringMultipleBitmaps() { long pos1 = ((long) 0 << 32) | 10L; // high = 0, low = 10 long pos2 = ((long) 1 << 32) | 20L; // high = 1, low = 20 long pos3 = ((long) 2 << 32) | 30L; // high = 2, low = 30 - long pos4 = ((long) 100 << 32) | 40L; // high = 1000, low = 40 + long pos4 = ((long) 100 << 32) | 40L; // high = 100, low = 40 bitmap.add(pos1); bitmap.add(pos2); @@ -120,8 +120,11 @@ public void testOr() { assertThat(bitmap1.contains(20L)).isTrue(); assertThat(bitmap1.contains(30L)).isTrue(); assertThat(bitmap1.contains(40L)).isTrue(); - assertThat(bitmap1.cardinality()).isEqualTo(4); + + assertThat(bitmap2.contains(10L)).isFalse(); + assertThat(bitmap2.contains(20L)).isFalse(); + assertThat(bitmap2.cardinality()).isEqualTo(2); } @Test @@ -136,8 +139,12 @@ public void testOrWithEmptyBitmap() { assertThat(bitmap1.contains(10L)).isTrue(); assertThat(bitmap1.contains(20L)).isTrue(); - assertThat(bitmap1.cardinality()).isEqualTo(2); + + assertThat(emptyBitmap.contains(10L)).isFalse(); + assertThat(emptyBitmap.contains(20L)).isFalse(); + assertThat(emptyBitmap.cardinality()).isEqualTo(0); + assertThat(emptyBitmap.isEmpty()).isTrue(); } @Test @@ -157,7 +164,6 @@ public void testOrWithOverlappingBitmap() { assertThat(bitmap1.contains(20L)).isTrue(); assertThat(bitmap1.contains(30L)).isTrue(); assertThat(bitmap1.contains(40L)).isTrue(); - assertThat(bitmap1.cardinality()).isEqualTo(4); } @@ -177,7 +183,6 @@ public void testOrSparseBitmaps() { assertThat(bitmap1.contains((long) 1 << 32 | 200L)).isTrue(); assertThat(bitmap1.contains((long) 2 << 32 | 300L)).isTrue(); assertThat(bitmap1.contains((long) 3 << 32 | 400L)).isTrue(); - assertThat(bitmap1.cardinality()).isEqualTo(4); } @@ -206,7 +211,7 @@ public void testCardinalitySparseBitmaps() { bitmap.add((long) 0 << 32 | 101L); // high = 0, low = 101 bitmap.add((long) 0 << 32 | 105L); // high = 0, low = 101 bitmap.add((long) 1 << 32 | 200L); // high = 1, low = 200 - bitmap.add((long) 100 << 32 | 300L); // high = 1000, low = 300 + bitmap.add((long) 100 << 32 | 300L); // high = 100, low = 300 assertThat(bitmap.cardinality()).isEqualTo(5); } From 5112ebb0adf68cd65838c75273a85a2843fcd0f1 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Mon, 21 Oct 2024 16:05:06 -0700 Subject: [PATCH 3/9] More tests and feedback --- .../deletes/RoaringPositionBitmap.java | 6 ++++ .../deletes/TestRoaringPositionBitmap.java | 36 ++++++++++++++++--- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java index 8d18c73d15ac..8a75e48156b2 100644 --- a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java +++ b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java @@ -24,6 +24,7 @@ import java.nio.ByteOrder; import java.util.List; import java.util.function.LongConsumer; +import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.roaringbitmap.RoaringBitmap; @@ -159,6 +160,11 @@ public void forEach(LongConsumer consumer) { } } + @VisibleForTesting + int allocatedBitmapCount() { + return bitmaps.length; + } + private void allocateBitmapsIfNeeded(int requiredLength) { if (bitmaps.length < requiredLength) { if (bitmaps.length == 0 && requiredLength == 1) { diff --git a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java index a5942aeb9e26..57b914e91f1b 100644 --- a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java +++ b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java @@ -70,10 +70,9 @@ public void testAddPositionsRequiringMultipleBitmaps() { assertThat(bitmap.contains(pos2)).isTrue(); assertThat(bitmap.contains(pos3)).isTrue(); assertThat(bitmap.contains(pos4)).isTrue(); - assertThat(bitmap.cardinality()).isEqualTo(4); - assertThat(bitmap.serializedSizeInBytes()).isGreaterThan(4); + assertThat(bitmap.allocatedBitmapCount()).isEqualTo(101 /* max high + 1 */); } @Test @@ -97,6 +96,27 @@ public void testAddRange() { assertThat(bitmap.cardinality()).isEqualTo(10); } + @Test + public void testAddRangeAcrossKeys() { + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + + long startInclusive = ((long) 1 << 32) - 5L; + long endExclusive = ((long) 1 << 32) + 5L; + bitmap.addRange(startInclusive, endExclusive); + + // assert that all positions in the range are added + for (long pos = startInclusive; pos < endExclusive; pos++) { + assertThat(bitmap.contains(pos)).isTrue(); + } + + // assert that positions outside the range are not present + assertThat(bitmap.contains(0)).isFalse(); + assertThat(bitmap.contains(endExclusive)).isFalse(); + + // assert that the cardinality is correct + assertThat(bitmap.cardinality()).isEqualTo(10); + } + @Test public void testAddEmptyRange() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); @@ -113,6 +133,7 @@ public void testOr() { RoaringPositionBitmap bitmap2 = new RoaringPositionBitmap(); bitmap2.add(30L); bitmap2.add(40L); + bitmap2.add((long) 2 << 32); bitmap1.or(bitmap2); @@ -120,11 +141,12 @@ public void testOr() { assertThat(bitmap1.contains(20L)).isTrue(); assertThat(bitmap1.contains(30L)).isTrue(); assertThat(bitmap1.contains(40L)).isTrue(); - assertThat(bitmap1.cardinality()).isEqualTo(4); + assertThat(bitmap1.contains((long) 2 << 32)).isTrue(); + assertThat(bitmap1.cardinality()).isEqualTo(5); assertThat(bitmap2.contains(10L)).isFalse(); assertThat(bitmap2.contains(20L)).isFalse(); - assertThat(bitmap2.cardinality()).isEqualTo(2); + assertThat(bitmap2.cardinality()).isEqualTo(3); } @Test @@ -165,6 +187,12 @@ public void testOrWithOverlappingBitmap() { assertThat(bitmap1.contains(30L)).isTrue(); assertThat(bitmap1.contains(40L)).isTrue(); assertThat(bitmap1.cardinality()).isEqualTo(4); + + assertThat(bitmap2.contains(10L)).isFalse(); + assertThat(bitmap2.contains(20L)).isTrue(); + assertThat(bitmap2.contains(30L)).isFalse(); + assertThat(bitmap2.contains(40L)).isTrue(); + assertThat(bitmap2.cardinality()).isEqualTo(2); } @Test From 7961648970c57f54a5de30181f16ee0ce3ea9579 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Mon, 21 Oct 2024 16:21:05 -0700 Subject: [PATCH 4/9] addAll --- .../deletes/RoaringPositionBitmap.java | 24 +++++++++---------- .../deletes/TestRoaringPositionBitmap.java | 16 ++++++------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java index 8a75e48156b2..861f5dcbe30c 100644 --- a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java +++ b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java @@ -89,6 +89,18 @@ public void addRange(long posStartInclusive, long posEndExclusive) { } } + /** + * Adds all positions from the other bitmap to this bitmap, modifying this bitmap in place. + * + * @param that the other bitmap + */ + public void addAll(RoaringPositionBitmap that) { + allocateBitmapsIfNeeded(that.bitmaps.length); + for (int index = 0; index < that.bitmaps.length; index++) { + bitmaps[index].or(that.bitmaps[index]); + } + } + /** * Checks if the bitmap contains a position. * @@ -102,18 +114,6 @@ public boolean contains(long pos) { return high < bitmaps.length && bitmaps[high].contains(low); } - /** - * Adds all positions from the other bitmap to this bitmap, modifying this bitmap in place. - * - * @param that the other bitmap - */ - public void or(RoaringPositionBitmap that) { - allocateBitmapsIfNeeded(that.bitmaps.length); - for (int index = 0; index < that.bitmaps.length; index++) { - bitmaps[index].or(that.bitmaps[index]); - } - } - /** * Indicates whether the bitmap is empty. * diff --git a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java index 57b914e91f1b..52db32a2c516 100644 --- a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java +++ b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java @@ -125,7 +125,7 @@ public void testAddEmptyRange() { } @Test - public void testOr() { + public void testAddAll() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); bitmap1.add(10L); bitmap1.add(20L); @@ -135,7 +135,7 @@ public void testOr() { bitmap2.add(40L); bitmap2.add((long) 2 << 32); - bitmap1.or(bitmap2); + bitmap1.addAll(bitmap2); assertThat(bitmap1.contains(10L)).isTrue(); assertThat(bitmap1.contains(20L)).isTrue(); @@ -150,14 +150,14 @@ public void testOr() { } @Test - public void testOrWithEmptyBitmap() { + public void testAddAllWithEmptyBitmap() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); bitmap1.add(10L); bitmap1.add(20L); RoaringPositionBitmap emptyBitmap = new RoaringPositionBitmap(); - bitmap1.or(emptyBitmap); + bitmap1.addAll(emptyBitmap); assertThat(bitmap1.contains(10L)).isTrue(); assertThat(bitmap1.contains(20L)).isTrue(); @@ -170,7 +170,7 @@ public void testOrWithEmptyBitmap() { } @Test - public void testOrWithOverlappingBitmap() { + public void testAddAllWithOverlappingBitmap() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); bitmap1.add(10L); bitmap1.add(20L); @@ -180,7 +180,7 @@ public void testOrWithOverlappingBitmap() { bitmap2.add(20L); bitmap2.add(40L); - bitmap1.or(bitmap2); + bitmap1.addAll(bitmap2); assertThat(bitmap1.contains(10L)).isTrue(); assertThat(bitmap1.contains(20L)).isTrue(); @@ -196,7 +196,7 @@ public void testOrWithOverlappingBitmap() { } @Test - public void testOrSparseBitmaps() { + public void testAddAllSparseBitmaps() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); bitmap1.add((long) 0 << 32 | 100L); // High = 0, Low = 100 bitmap1.add((long) 1 << 32 | 200L); // High = 1, Low = 200 @@ -205,7 +205,7 @@ public void testOrSparseBitmaps() { bitmap2.add((long) 2 << 32 | 300L); // High = 2, Low = 300 bitmap2.add((long) 3 << 32 | 400L); // High = 3, Low = 400 - bitmap1.or(bitmap2); + bitmap1.addAll(bitmap2); assertThat(bitmap1.contains((long) 0 << 32 | 100L)).isTrue(); assertThat(bitmap1.contains((long) 1 << 32 | 200L)).isTrue(); From 830b1b0c8d5901c7139449c44c98ea667aa71572 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Wed, 23 Oct 2024 16:11:59 +0200 Subject: [PATCH 5/9] Basic fuzz tests --- .../deletes/TestRoaringPositionBitmap.java | 207 +++++++++++++++--- 1 file changed, 180 insertions(+), 27 deletions(-) diff --git a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java index 52db32a2c516..ebe0745f7b10 100644 --- a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java +++ b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java @@ -25,19 +25,48 @@ import java.net.URL; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import java.util.List; +import java.util.Random; import java.util.Set; +import org.apache.iceberg.Parameter; +import org.apache.iceberg.ParameterizedTestExtension; +import org.apache.iceberg.Parameters; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; +import org.apache.iceberg.relocated.com.google.common.collect.Lists; +import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.relocated.com.google.common.io.Resources; -import org.junit.jupiter.api.Test; +import org.apache.iceberg.util.Pair; +import org.junit.jupiter.api.TestTemplate; +import org.junit.jupiter.api.extension.ExtendWith; +@ExtendWith(ParameterizedTestExtension.class) public class TestRoaringPositionBitmap { - private static final long BITMAP_OFFSET = 0xFFFFFFFFL + 1L; - private static final long CONTAINER_OFFSET = Character.MAX_VALUE + 1L; + private static final long BITMAP_SIZE = 0xFFFFFFFFL; + private static final long BITMAP_OFFSET = BITMAP_SIZE + 1L; + private static final long CONTAINER_SIZE = Character.MAX_VALUE; + private static final long CONTAINER_OFFSET = CONTAINER_SIZE + 1L; + private static final int VALIDATION_LOOKUP_COUNT = 20_000; private static final Set SUPPORTED_OFFICIAL_EXAMPLE_FILES = ImmutableSet.of("64map32bitvals.bin", "64mapempty.bin", "64mapspreadvals.bin"); - @Test + @Parameters(name = "seed = {0}, validationSeed = {1}") + protected static List parameters() { + List parameters = Lists.newArrayList(); + Random random = new Random(); + long seed = random.nextLong(); + long validationSeed = random.nextLong(); + parameters.add(new Object[] {seed, validationSeed}); + return parameters; + } + + @Parameter(index = 0) + private long seed; + + @Parameter(index = 1) + private long validationSeed; + + @TestTemplate public void testAdd() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); @@ -51,7 +80,7 @@ public void testAdd() { assertThat(bitmap.contains(10L)).isTrue(); } - @Test + @TestTemplate public void testAddPositionsRequiringMultipleBitmaps() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); @@ -75,16 +104,16 @@ public void testAddPositionsRequiringMultipleBitmaps() { assertThat(bitmap.allocatedBitmapCount()).isEqualTo(101 /* max high + 1 */); } - @Test + @TestTemplate public void testAddRange() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); - long startInclusive = 10L; - long endExclusive = 20L; - bitmap.addRange(startInclusive, endExclusive); + long posStartInclusive = 10L; + long posEndExclusive = 20L; + bitmap.addRange(posStartInclusive, posEndExclusive); // assert that all positions in the range [10, 20) are added - for (long pos = startInclusive; pos < endExclusive; pos++) { + for (long pos = posStartInclusive; pos < posEndExclusive; pos++) { assertThat(bitmap.contains(pos)).isTrue(); } @@ -96,35 +125,35 @@ public void testAddRange() { assertThat(bitmap.cardinality()).isEqualTo(10); } - @Test + @TestTemplate public void testAddRangeAcrossKeys() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); - long startInclusive = ((long) 1 << 32) - 5L; - long endExclusive = ((long) 1 << 32) + 5L; - bitmap.addRange(startInclusive, endExclusive); + long posStartInclusive = ((long) 1 << 32) - 5L; + long posEndExclusive = ((long) 1 << 32) + 5L; + bitmap.addRange(posStartInclusive, posEndExclusive); // assert that all positions in the range are added - for (long pos = startInclusive; pos < endExclusive; pos++) { + for (long pos = posStartInclusive; pos < posEndExclusive; pos++) { assertThat(bitmap.contains(pos)).isTrue(); } // assert that positions outside the range are not present assertThat(bitmap.contains(0)).isFalse(); - assertThat(bitmap.contains(endExclusive)).isFalse(); + assertThat(bitmap.contains(posEndExclusive)).isFalse(); // assert that the cardinality is correct assertThat(bitmap.cardinality()).isEqualTo(10); } - @Test + @TestTemplate public void testAddEmptyRange() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); bitmap.addRange(10, 10); assertThat(bitmap.isEmpty()).isTrue(); } - @Test + @TestTemplate public void testAddAll() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); bitmap1.add(10L); @@ -149,7 +178,7 @@ public void testAddAll() { assertThat(bitmap2.cardinality()).isEqualTo(3); } - @Test + @TestTemplate public void testAddAllWithEmptyBitmap() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); bitmap1.add(10L); @@ -169,7 +198,7 @@ public void testAddAllWithEmptyBitmap() { assertThat(emptyBitmap.isEmpty()).isTrue(); } - @Test + @TestTemplate public void testAddAllWithOverlappingBitmap() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); bitmap1.add(10L); @@ -195,7 +224,7 @@ public void testAddAllWithOverlappingBitmap() { assertThat(bitmap2.cardinality()).isEqualTo(2); } - @Test + @TestTemplate public void testAddAllSparseBitmaps() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); bitmap1.add((long) 0 << 32 | 100L); // High = 0, Low = 100 @@ -214,7 +243,7 @@ public void testAddAllSparseBitmaps() { assertThat(bitmap1.cardinality()).isEqualTo(4); } - @Test + @TestTemplate public void testCardinality() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); @@ -231,7 +260,7 @@ public void testCardinality() { assertThat(bitmap.cardinality()).isEqualTo(3); } - @Test + @TestTemplate public void testCardinalitySparseBitmaps() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); @@ -244,7 +273,7 @@ public void testCardinalitySparseBitmaps() { assertThat(bitmap.cardinality()).isEqualTo(5); } - @Test + @TestTemplate public void testSerializeDeserializeAllContainerBitmap() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); @@ -285,7 +314,7 @@ public void testSerializeDeserializeAllContainerBitmap() { bitmap.forEach(position -> assertThat(bitmapCopy.contains(position)).isTrue()); } - @Test + @TestTemplate public void testDeserializeSupportedRoaringExamples() throws IOException { for (String file : SUPPORTED_OFFICIAL_EXAMPLE_FILES) { RoaringPositionBitmap bitmap = readBitmap(file); @@ -293,7 +322,7 @@ public void testDeserializeSupportedRoaringExamples() throws IOException { } } - @Test + @TestTemplate public void testDeserializeUnsupportedRoaringExample() { // this file contains a value that is larger than the max supported value in our impl assertThatThrownBy(() -> readBitmap("64maphighvals.bin")) @@ -301,7 +330,7 @@ public void testDeserializeUnsupportedRoaringExample() { .hasMessageContaining("Invalid unsigned key"); } - @Test + @TestTemplate public void testUnsupportedPositions() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); @@ -330,6 +359,112 @@ public void testUnsupportedPositions() { RoaringPositionBitmap.MAX_POSITION); } + @TestTemplate + public void testRandomSparseBitmap() { + Pair> bitmapAndPositions = + generateSparseBitmap( + 0L /* min position */, + (long) 5 << 32 /* max position must not need more than 5 bitmaps */, + 100_000 /* cardinality */); + RoaringPositionBitmap bitmap = bitmapAndPositions.first(); + Set positions = bitmapAndPositions.second(); + assertEqual(bitmap, positions); + assertRandomPositions(bitmap, positions); + } + + @TestTemplate + public void testRandomDenseBitmap() { + Pair> bitmapAndPositions = generateDenseBitmap(7); + RoaringPositionBitmap bitmap = bitmapAndPositions.first(); + Set positions = bitmapAndPositions.second(); + assertEqual(bitmap, positions); + assertRandomPositions(bitmap, positions); + } + + @TestTemplate + public void testRandomMixedBitmap() { + Pair> bitmapAndPositions = + generateSparseBitmap( + (long) 3 << 32 /* min position must need at least 3 bitmaps */, + (long) 5 << 32 /* max position must not need more than 5 bitmaps */, + 100_000 /* cardinality */); + RoaringPositionBitmap bitmap = bitmapAndPositions.first(); + Set positions = bitmapAndPositions.second(); + + Pair> pair1 = generateDenseBitmap(9); + bitmap.addAll(pair1.first()); + positions.addAll(pair1.second()); + + Pair> pair2 = + generateSparseBitmap( + 0 /* min position */, + (long) 3 << 32 /* max position must not need more than 3 bitmaps */, + 25_000 /* cardinality */); + bitmap.addAll(pair2.first()); + positions.addAll(pair2.second()); + + Pair> pair3 = generateDenseBitmap(3); + bitmap.addAll(pair3.first()); + positions.addAll(pair3.second()); + + Pair> pair4 = + generateSparseBitmap( + 0 /* min position */, + (long) 1 << 32 /* max position must not need more than 1 bitmap */, + 5_000 /* cardinality */); + bitmap.addAll(pair4.first()); + positions.addAll(pair4.second()); + + assertEqual(bitmap, positions); + assertRandomPositions(bitmap, positions); + } + + private Pair> generateSparseBitmap( + long minInclusive, long maxExclusive, int size) { + Random random = new Random(seed); + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + Set positions = Sets.newHashSet(); + + while (positions.size() < size) { + long position = nextLong(random, minInclusive, maxExclusive); + positions.add(position); + bitmap.add(position); + } + + return Pair.of(bitmap, positions); + } + + private Pair> generateDenseBitmap(int requiredBitmapCount) { + Random random = new Random(seed); + RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); + Set positions = Sets.newHashSet(); + long currentPosition = 0; + + while (bitmap.allocatedBitmapCount() <= requiredBitmapCount) { + long maxRunPosition = currentPosition + nextLong(random, 1000, 2 * CONTAINER_SIZE); + for (long position = currentPosition; position <= maxRunPosition; position++) { + bitmap.add(position); + positions.add(position); + } + long shift = nextLong(random, (long) (0.1 * BITMAP_SIZE), (long) (0.25 * BITMAP_SIZE)); + currentPosition = maxRunPosition + shift; + } + + return Pair.of(bitmap, positions); + } + + private void assertRandomPositions(RoaringPositionBitmap bitmap, Set positions) { + Random random = new Random(validationSeed); + for (int ordinal = 0; ordinal < VALIDATION_LOOKUP_COUNT; ordinal++) { + long position = nextLong(random, 0, RoaringPositionBitmap.MAX_POSITION); + assertThat(bitmap.contains(position)).isEqualTo(positions.contains(position)); + } + } + + private static long nextLong(Random random, long minInclusive, long maxExclusive) { + return minInclusive + (long) (random.nextDouble() * (maxExclusive - minInclusive)); + } + private static long position(int bitmapIndex, int containerIndex, long value) { return bitmapIndex * BITMAP_OFFSET + containerIndex * CONTAINER_OFFSET + value; } @@ -353,4 +488,22 @@ private static byte[] readTestResource(String resourceName) throws IOException { URL resource = Resources.getResource(TestRoaringPositionBitmap.class, resourceName); return Resources.toByteArray(resource); } + + private static void assertEqual(RoaringPositionBitmap bitmap, Set positions) { + assertThat(bitmap.cardinality()).isEqualTo(positions.size()); + positions.forEach(position -> assertThat(bitmap.contains(position)).isTrue()); + bitmap.forEach(position -> assertThat(positions.contains(position)).isTrue()); + + RoaringPositionBitmap bitmapCopy1 = roundTripSerialize(bitmap); + assertThat(bitmapCopy1.cardinality()).isEqualTo(positions.size()); + positions.forEach(position -> assertThat(bitmapCopy1.contains(position)).isTrue()); + bitmapCopy1.forEach(position -> assertThat(positions.contains(position)).isTrue()); + + bitmap.runOptimize(); + + RoaringPositionBitmap bitmapCopy2 = roundTripSerialize(bitmap); + assertThat(bitmapCopy2.cardinality()).isEqualTo(positions.size()); + positions.forEach(position -> assertThat(bitmapCopy2.contains(position)).isTrue()); + bitmapCopy2.forEach(position -> assertThat(positions.contains(position)).isTrue()); + } } From 9f236c9b014585fa7ab7cba41d468dd69016b375 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Wed, 23 Oct 2024 20:06:46 +0200 Subject: [PATCH 6/9] More renames --- .../RoaringPositionBitmapBenchmark.java | 5 +- .../deletes/RoaringPositionBitmap.java | 70 +++++++++---------- .../deletes/TestRoaringPositionBitmap.java | 11 ++- 3 files changed, 46 insertions(+), 40 deletions(-) diff --git a/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java b/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java index f4325311d8a5..9ad3adb44e8d 100644 --- a/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java +++ b/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java @@ -51,6 +51,7 @@ @Timeout(time = 5, timeUnit = TimeUnit.MINUTES) public class RoaringPositionBitmapBenchmark { + private static final Random RANDOM = new Random(); private static final int TOTAL_POSITIONS = 5_000_000; private static final long STEP = 5L; @@ -152,11 +153,9 @@ private static long[] generateShuffledPositions() { } private static void shuffle(long[] array) { - Random rand = new Random(); - for (int index = array.length - 1; index > 0; index--) { // swap with an element at a random index between 0 and index - int thatIndex = rand.nextInt(index + 1); + int thatIndex = RANDOM.nextInt(index + 1); long temp = array[index]; array[index] = array[thatIndex]; array[thatIndex] = temp; diff --git a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java index 861f5dcbe30c..738d601a0b01 100644 --- a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java +++ b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java @@ -32,20 +32,20 @@ /** * A bitmap that supports positive 64-bit positions, but is optimized for cases where most positions * fit in 32 bits by using an array of 32-bit Roaring bitmaps. The bitmap array is grown as needed - * to accommodate the largest value. + * to accommodate the largest position. * *

Incoming 64-bit positions are divided into a 32-bit "key" using the most significant 4 bytes - * and a 32-bit position using the least significant 4 bytes. For each key in the set of positions, - * a 32-bit Roaring bitmap is maintained to store a set of 32-bit positions for that key. + * and a 32-bit "value" using the least significant 4 bytes. For each key in the set of positions, a + * 32-bit Roaring bitmap is maintained to store a set of 32-bit values for that key. * *

To test whether a certain position is set, its most significant 4 bytes (the key) are used to * find a 32-bit bitmap and the least significant 4 bytes are tested for inclusion in the bitmap. If * a bitmap is not found for the key, then the position is not set. * *

Positions must range from 0 (inclusive) to {@link #MAX_POSITION} (inclusive). The bitmap - * cannot handle positions exceeding {@link #MAX_POSITION} because the array size is a signed 32-bit - * integer, which must be non-negative. Allocating an array with size Integer.MAX_VALUE + 1 would - * overflow the integer and result in a negative length. + * cannot handle positions exceeding {@link #MAX_POSITION} because the bitmap array size is a signed + * 32-bit integer, which must be non-negative. Allocating an array with size Integer.MAX_VALUE + 1 + * would trigger an overflow and result in a negative length. */ class RoaringPositionBitmap { @@ -71,10 +71,10 @@ private RoaringPositionBitmap(RoaringBitmap[] bitmaps) { */ public void add(long pos) { validatePosition(pos); - int high = highBytes(pos); - int low = lowBytes(pos); - allocateBitmapsIfNeeded(high + 1 /* required number of bitmaps */); - bitmaps[high].add(low); + int key = key(pos); + int value = value(pos); + allocateBitmapsIfNeeded(key + 1 /* required number of bitmaps */); + bitmaps[key].add(value); } /** @@ -96,8 +96,8 @@ public void addRange(long posStartInclusive, long posEndExclusive) { */ public void addAll(RoaringPositionBitmap that) { allocateBitmapsIfNeeded(that.bitmaps.length); - for (int index = 0; index < that.bitmaps.length; index++) { - bitmaps[index].or(that.bitmaps[index]); + for (int key = 0; key < that.bitmaps.length; key++) { + bitmaps[key].or(that.bitmaps[key]); } } @@ -109,9 +109,9 @@ public void addAll(RoaringPositionBitmap that) { */ public boolean contains(long pos) { validatePosition(pos); - int high = highBytes(pos); - int low = lowBytes(pos); - return high < bitmaps.length && bitmaps[high].contains(low); + int key = key(pos); + int value = value(pos); + return key < bitmaps.length && bitmaps[key].contains(value); } /** @@ -137,11 +137,11 @@ public long cardinality() { } /** - * Run-length compresses the bitmap if it is more space efficient. + * Compresses the bitmap wherever it is more space efficient. * * @return whether the bitmap was compressed */ - public boolean runOptimize() { + public boolean compress() { boolean changed = false; for (RoaringBitmap bitmap : bitmaps) { changed |= bitmap.runOptimize(); @@ -155,8 +155,8 @@ public boolean runOptimize() { * @param consumer a consumer for positions */ public void forEach(LongConsumer consumer) { - for (int index = 0; index < bitmaps.length; index++) { - forEach(bitmaps[index], index, consumer); + for (int key = 0; key < bitmaps.length; key++) { + forEach(bitmaps[key], key, consumer); } } @@ -172,8 +172,8 @@ private void allocateBitmapsIfNeeded(int requiredLength) { } else { RoaringBitmap[] newBitmaps = new RoaringBitmap[requiredLength]; System.arraycopy(bitmaps, 0, newBitmaps, 0, bitmaps.length); - for (int index = bitmaps.length; index < requiredLength; index++) { - newBitmaps[index] = new RoaringBitmap(); + for (int key = bitmaps.length; key < requiredLength; key++) { + newBitmaps[key] = new RoaringBitmap(); } this.bitmaps = newBitmaps; } @@ -213,9 +213,9 @@ public long serializedSizeInBytes() { public void serialize(ByteBuffer buffer) { validateByteOrder(buffer); buffer.putLong(bitmaps.length); - for (int index = 0; index < bitmaps.length; index++) { - buffer.putInt(index); - bitmaps[index].serialize(buffer); + for (int key = 0; key < bitmaps.length; key++) { + buffer.putInt(key); + bitmaps[key].serialize(buffer); } } @@ -286,25 +286,25 @@ private static RoaringBitmap readBitmap(ByteBuffer buffer) { } } - // extracts high 32 bits from a 64-bit position - private static int highBytes(long pos) { + // extracts high 32 bits from a 64-bit position (i.e. key) + private static int key(long pos) { return (int) (pos >> 32); } - // extracts low 32 bits from a 64-bit position - private static int lowBytes(long pos) { + // extracts low 32 bits from a 64-bit position (i.e. value) + private static int value(long pos) { return (int) pos; } - // combines high and low 32-bit values into a 64-bit position - // the low value must be bit-masked to avoid sign extension - private static long toPosition(int high, int low) { - return (((long) high) << 32) | (((long) low) & 0xFFFFFFFFL); + // combines 32-bit key and value into a 64-bit position + // the value must be bit-masked to avoid sign extension + private static long toPosition(int key, int value) { + return (((long) key) << 32) | (((long) value) & 0xFFFFFFFFL); } - // iterates over 64-bit positions, reconstructing them from high and low 32-bit values - private static void forEach(RoaringBitmap bitmap, int high, LongConsumer consumer) { - bitmap.forEach((int low) -> consumer.accept(toPosition(high, low))); + // iterates over 64-bit positions, reconstructing them from 32-bit keys and values + private static void forEach(RoaringBitmap bitmap, int key, LongConsumer consumer) { + bitmap.forEach((int value) -> consumer.accept(toPosition(key, value))); } private static void validatePosition(long pos) { diff --git a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java index ebe0745f7b10..5ee07e719f0e 100644 --- a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java +++ b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java @@ -305,7 +305,7 @@ public void testSerializeDeserializeAllContainerBitmap() { position(1 /* bitmap */, 2 /* container */, 1L), position(1 /* bitmap */, 2 /* container */, CONTAINER_OFFSET - 1)); - assertThat(bitmap.runOptimize()).as("Bitmap must be RLE encoded").isTrue(); + assertThat(bitmap.compress()).as("Bitmap must be RLE encoded").isTrue(); RoaringPositionBitmap bitmapCopy = roundTripSerialize(bitmap); @@ -359,6 +359,13 @@ public void testUnsupportedPositions() { RoaringPositionBitmap.MAX_POSITION); } + @TestTemplate + public void testInvalidSerializationByteOrder() { + assertThatThrownBy(() -> RoaringPositionBitmap.deserialize(ByteBuffer.allocate(4))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("serialization requires little-endian byte order"); + } + @TestTemplate public void testRandomSparseBitmap() { Pair> bitmapAndPositions = @@ -499,7 +506,7 @@ private static void assertEqual(RoaringPositionBitmap bitmap, Set position positions.forEach(position -> assertThat(bitmapCopy1.contains(position)).isTrue()); bitmapCopy1.forEach(position -> assertThat(positions.contains(position)).isTrue()); - bitmap.runOptimize(); + bitmap.compress(); RoaringPositionBitmap bitmapCopy2 = roundTripSerialize(bitmap); assertThat(bitmapCopy2.cardinality()).isEqualTo(positions.size()); From 4bbaf7d00283bbdb4c7edb920a26529c6210fb7b Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Thu, 24 Oct 2024 06:24:51 +0200 Subject: [PATCH 7/9] More renames --- .../RoaringPositionBitmapBenchmark.java | 2 -- .../deletes/RoaringPositionBitmap.java | 28 +++++++++---------- .../deletes/TestRoaringPositionBitmap.java | 28 +++++++++---------- 3 files changed, 28 insertions(+), 30 deletions(-) diff --git a/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java b/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java index 9ad3adb44e8d..895b7bd9c3c2 100644 --- a/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java +++ b/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java @@ -138,11 +138,9 @@ public void addAndCheckPositionsLibraryBitmap(Blackhole blackhole) { private static long[] generateOrderedPositions() { long[] positions = new long[TOTAL_POSITIONS]; - for (int index = 0; index < TOTAL_POSITIONS; index++) { positions[index] = index * STEP; } - return positions; } diff --git a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java index 738d601a0b01..7273036fd685 100644 --- a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java +++ b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java @@ -35,8 +35,8 @@ * to accommodate the largest position. * *

Incoming 64-bit positions are divided into a 32-bit "key" using the most significant 4 bytes - * and a 32-bit "value" using the least significant 4 bytes. For each key in the set of positions, a - * 32-bit Roaring bitmap is maintained to store a set of 32-bit values for that key. + * and a 32-bit position using the least significant 4 bytes. For each key in the set of positions, + * a 32-bit Roaring bitmap is maintained to store a set of 32-bit position for that key. * *

To test whether a certain position is set, its most significant 4 bytes (the key) are used to * find a 32-bit bitmap and the least significant 4 bytes are tested for inclusion in the bitmap. If @@ -72,9 +72,9 @@ private RoaringPositionBitmap(RoaringBitmap[] bitmaps) { public void add(long pos) { validatePosition(pos); int key = key(pos); - int value = value(pos); + int pos32Bits = pos32Bits(pos); allocateBitmapsIfNeeded(key + 1 /* required number of bitmaps */); - bitmaps[key].add(value); + bitmaps[key].add(pos32Bits); } /** @@ -110,8 +110,8 @@ public void addAll(RoaringPositionBitmap that) { public boolean contains(long pos) { validatePosition(pos); int key = key(pos); - int value = value(pos); - return key < bitmaps.length && bitmaps[key].contains(value); + int pos32Bits = pos32Bits(pos); + return key < bitmaps.length && bitmaps[key].contains(pos32Bits); } /** @@ -291,20 +291,20 @@ private static int key(long pos) { return (int) (pos >> 32); } - // extracts low 32 bits from a 64-bit position (i.e. value) - private static int value(long pos) { + // extracts low 32 bits from a 64-bit position + private static int pos32Bits(long pos) { return (int) pos; } - // combines 32-bit key and value into a 64-bit position - // the value must be bit-masked to avoid sign extension - private static long toPosition(int key, int value) { - return (((long) key) << 32) | (((long) value) & 0xFFFFFFFFL); + // combines high and low 32 bits into a 64-bit position + // the low 32 bits must be bit-masked to avoid sign extension + private static long toPosition(int key, int pos32Bits) { + return (((long) key) << 32) | (((long) pos32Bits) & 0xFFFFFFFFL); } - // iterates over 64-bit positions, reconstructing them from 32-bit keys and values + // iterates over 64-bit positions, reconstructing them from 32-bit keys and positions private static void forEach(RoaringBitmap bitmap, int key, LongConsumer consumer) { - bitmap.forEach((int value) -> consumer.accept(toPosition(key, value))); + bitmap.forEach((int pos32Bits) -> consumer.accept(toPosition(key, pos32Bits))); } private static void validatePosition(long pos) { diff --git a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java index 5ee07e719f0e..00d0b28daaff 100644 --- a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java +++ b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java @@ -85,10 +85,10 @@ public void testAddPositionsRequiringMultipleBitmaps() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); // construct positions that differ in their high 32-bit parts (i.e. keys) - long pos1 = ((long) 0 << 32) | 10L; // high = 0, low = 10 - long pos2 = ((long) 1 << 32) | 20L; // high = 1, low = 20 - long pos3 = ((long) 2 << 32) | 30L; // high = 2, low = 30 - long pos4 = ((long) 100 << 32) | 40L; // high = 100, low = 40 + long pos1 = ((long) 0 << 32) | 10L; // key = 0, low = 10 + long pos2 = ((long) 1 << 32) | 20L; // key = 1, low = 20 + long pos3 = ((long) 2 << 32) | 30L; // key = 2, low = 30 + long pos4 = ((long) 100 << 32) | 40L; // key = 100, low = 40 bitmap.add(pos1); bitmap.add(pos2); @@ -101,7 +101,7 @@ public void testAddPositionsRequiringMultipleBitmaps() { assertThat(bitmap.contains(pos4)).isTrue(); assertThat(bitmap.cardinality()).isEqualTo(4); assertThat(bitmap.serializedSizeInBytes()).isGreaterThan(4); - assertThat(bitmap.allocatedBitmapCount()).isEqualTo(101 /* max high + 1 */); + assertThat(bitmap.allocatedBitmapCount()).isEqualTo(101 /* max key + 1 */); } @TestTemplate @@ -227,12 +227,12 @@ public void testAddAllWithOverlappingBitmap() { @TestTemplate public void testAddAllSparseBitmaps() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); - bitmap1.add((long) 0 << 32 | 100L); // High = 0, Low = 100 - bitmap1.add((long) 1 << 32 | 200L); // High = 1, Low = 200 + bitmap1.add((long) 0 << 32 | 100L); // key = 0, low = 100 + bitmap1.add((long) 1 << 32 | 200L); // key = 1, low = 200 RoaringPositionBitmap bitmap2 = new RoaringPositionBitmap(); - bitmap2.add((long) 2 << 32 | 300L); // High = 2, Low = 300 - bitmap2.add((long) 3 << 32 | 400L); // High = 3, Low = 400 + bitmap2.add((long) 2 << 32 | 300L); // key = 2, low = 300 + bitmap2.add((long) 3 << 32 | 400L); // key = 3, low = 400 bitmap1.addAll(bitmap2); @@ -264,11 +264,11 @@ public void testCardinality() { public void testCardinalitySparseBitmaps() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); - bitmap.add((long) 0 << 32 | 100L); // high = 0, low = 100 - bitmap.add((long) 0 << 32 | 101L); // high = 0, low = 101 - bitmap.add((long) 0 << 32 | 105L); // high = 0, low = 101 - bitmap.add((long) 1 << 32 | 200L); // high = 1, low = 200 - bitmap.add((long) 100 << 32 | 300L); // high = 100, low = 300 + bitmap.add((long) 0 << 32 | 100L); // key = 0, low = 100 + bitmap.add((long) 0 << 32 | 101L); // key = 0, low = 101 + bitmap.add((long) 0 << 32 | 105L); // key = 0, low = 101 + bitmap.add((long) 1 << 32 | 200L); // key = 1, low = 200 + bitmap.add((long) 100 << 32 | 300L); // key = 100, low = 300 assertThat(bitmap.cardinality()).isEqualTo(5); } From 2d1b5469f0adc2f73d26b5b0127f92eab2988be5 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Mon, 28 Oct 2024 17:41:35 +0100 Subject: [PATCH 8/9] Polish --- .../RoaringPositionBitmapBenchmark.java | 6 +- .../deletes/RoaringPositionBitmap.java | 69 ++++----- .../deletes/TestRoaringPositionBitmap.java | 133 +++++++++--------- 3 files changed, 104 insertions(+), 104 deletions(-) diff --git a/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java b/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java index 895b7bd9c3c2..1cbc39583fbc 100644 --- a/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java +++ b/core/src/jmh/java/org/apache/iceberg/deletes/RoaringPositionBitmapBenchmark.java @@ -69,7 +69,7 @@ public void setupBenchmark() { public void addOrderedPositionsIcebergBitmap(Blackhole blackhole) { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); for (long position : orderedPositions) { - bitmap.add(position); + bitmap.set(position); } blackhole.consume(bitmap); } @@ -89,7 +89,7 @@ public void addOrderedPositionsLibraryBitmap(Blackhole blackhole) { public void addShuffledPositionsIcebergBitmap(Blackhole blackhole) { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); for (long position : shuffledPositions) { - bitmap.add(position); + bitmap.set(position); } blackhole.consume(bitmap); } @@ -110,7 +110,7 @@ public void addAndCheckPositionsIcebergBitmap(Blackhole blackhole) { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); for (long position : shuffledPositions) { - bitmap.add(position); + bitmap.set(position); } for (long position = 0; position <= TOTAL_POSITIONS * STEP; position++) { diff --git a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java index 7273036fd685..b4a894e15b11 100644 --- a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java +++ b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java @@ -30,22 +30,23 @@ import org.roaringbitmap.RoaringBitmap; /** - * A bitmap that supports positive 64-bit positions, but is optimized for cases where most positions - * fit in 32 bits by using an array of 32-bit Roaring bitmaps. The bitmap array is grown as needed - * to accommodate the largest position. + * A bitmap that supports positive 64-bit positions (the most significant bit must be 0), but is + * optimized for cases where most positions fit in 32 bits by using an array of 32-bit Roaring + * bitmaps. The internal bitmap array is grown as needed to accommodate the largest position. * *

Incoming 64-bit positions are divided into a 32-bit "key" using the most significant 4 bytes * and a 32-bit position using the least significant 4 bytes. For each key in the set of positions, - * a 32-bit Roaring bitmap is maintained to store a set of 32-bit position for that key. + * a 32-bit Roaring bitmap is maintained to store a set of 32-bit positions for that key. * *

To test whether a certain position is set, its most significant 4 bytes (the key) are used to * find a 32-bit bitmap and the least significant 4 bytes are tested for inclusion in the bitmap. If * a bitmap is not found for the key, then the position is not set. * - *

Positions must range from 0 (inclusive) to {@link #MAX_POSITION} (inclusive). The bitmap - * cannot handle positions exceeding {@link #MAX_POSITION} because the bitmap array size is a signed - * 32-bit integer, which must be non-negative. Allocating an array with size Integer.MAX_VALUE + 1 - * would trigger an overflow and result in a negative length. + *

Positions must range from 0 (inclusive) to {@link #MAX_POSITION} (inclusive). This class + * cannot handle positions with the key equal to Integer.MAX_VALUE because the length of the + * internal bitmap array is a signed 32-bit integer, which must be greater than or equal to 0. + * Supporting Integer.MAX_VALUE as a key would require allocating a bitmap array with size + * Integer.MAX_VALUE + 1, triggering an integer overflow. */ class RoaringPositionBitmap { @@ -65,36 +66,36 @@ private RoaringPositionBitmap(RoaringBitmap[] bitmaps) { } /** - * Adds a position to the bitmap. + * Sets a position in the bitmap. * - * @param pos the row position + * @param pos the position */ - public void add(long pos) { + public void set(long pos) { validatePosition(pos); - int key = key(pos); - int pos32Bits = pos32Bits(pos); - allocateBitmapsIfNeeded(key + 1 /* required number of bitmaps */); + int key = extractKey(pos); + int pos32Bits = extract32Bits(pos); + allocateBitmapsIfNeeded(key + 1 /* required bitmap array length is key + 1 */); bitmaps[key].add(pos32Bits); } /** - * Adds a range of positions to the bitmap. + * Sets a range of positions in the bitmap. * * @param posStartInclusive the start position of the range (inclusive) * @param posEndExclusive the end position of the range (exclusive) */ - public void addRange(long posStartInclusive, long posEndExclusive) { + public void setRange(long posStartInclusive, long posEndExclusive) { for (long pos = posStartInclusive; pos < posEndExclusive; pos++) { - add(pos); + set(pos); } } /** - * Adds all positions from the other bitmap to this bitmap, modifying this bitmap in place. + * Sets all positions from the other bitmap in this bitmap, modifying this bitmap in place. * * @param that the other bitmap */ - public void addAll(RoaringPositionBitmap that) { + public void setAll(RoaringPositionBitmap that) { allocateBitmapsIfNeeded(that.bitmaps.length); for (int key = 0; key < that.bitmaps.length; key++) { bitmaps[key].or(that.bitmaps[key]); @@ -102,20 +103,20 @@ public void addAll(RoaringPositionBitmap that) { } /** - * Checks if the bitmap contains a position. + * Checks if a position is set in the bitmap. * - * @param pos the row position - * @return true if the position is in this bitmap, false otherwise + * @param pos the position + * @return true if the position is set in this bitmap, false otherwise */ public boolean contains(long pos) { validatePosition(pos); - int key = key(pos); - int pos32Bits = pos32Bits(pos); + int key = extractKey(pos); + int pos32Bits = extract32Bits(pos); return key < bitmaps.length && bitmaps[key].contains(pos32Bits); } /** - * Indicates whether the bitmap is empty. + * Indicates whether the bitmap has any positions set. * * @return true if the bitmap is empty, false otherwise */ @@ -124,7 +125,7 @@ public boolean isEmpty() { } /** - * Returns the number of positions in the bitmap. + * Returns the number of set positions in the bitmap. * * @return the number of set positions */ @@ -137,11 +138,11 @@ public long cardinality() { } /** - * Compresses the bitmap wherever it is more space efficient. + * Applies run-length encoding wherever it is more space efficient. * - * @return whether the bitmap was compressed + * @return whether the bitmap was changed */ - public boolean compress() { + public boolean runLengthEncode() { boolean changed = false; for (RoaringBitmap bitmap : bitmaps) { changed |= bitmap.runOptimize(); @@ -156,7 +157,7 @@ public boolean compress() { */ public void forEach(LongConsumer consumer) { for (int key = 0; key < bitmaps.length; key++) { - forEach(bitmaps[key], key, consumer); + forEach(key, bitmaps[key], consumer); } } @@ -287,12 +288,12 @@ private static RoaringBitmap readBitmap(ByteBuffer buffer) { } // extracts high 32 bits from a 64-bit position (i.e. key) - private static int key(long pos) { + private static int extractKey(long pos) { return (int) (pos >> 32); } // extracts low 32 bits from a 64-bit position - private static int pos32Bits(long pos) { + private static int extract32Bits(long pos) { return (int) pos; } @@ -302,8 +303,8 @@ private static long toPosition(int key, int pos32Bits) { return (((long) key) << 32) | (((long) pos32Bits) & 0xFFFFFFFFL); } - // iterates over 64-bit positions, reconstructing them from 32-bit keys and positions - private static void forEach(RoaringBitmap bitmap, int key, LongConsumer consumer) { + // iterates over 64-bit positions, reconstructing them from keys and 32-bit positions + private static void forEach(int key, RoaringBitmap bitmap, LongConsumer consumer) { bitmap.forEach((int pos32Bits) -> consumer.accept(toPosition(key, pos32Bits))); } diff --git a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java index 00d0b28daaff..2daf0382973b 100644 --- a/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java +++ b/core/src/test/java/org/apache/iceberg/deletes/TestRoaringPositionBitmap.java @@ -70,13 +70,13 @@ protected static List parameters() { public void testAdd() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); - bitmap.add(10L); + bitmap.set(10L); assertThat(bitmap.contains(10L)).isTrue(); - bitmap.add(0L); + bitmap.set(0L); assertThat(bitmap.contains(0L)).isTrue(); - bitmap.add(10L); + bitmap.set(10L); assertThat(bitmap.contains(10L)).isTrue(); } @@ -90,10 +90,10 @@ public void testAddPositionsRequiringMultipleBitmaps() { long pos3 = ((long) 2 << 32) | 30L; // key = 2, low = 30 long pos4 = ((long) 100 << 32) | 40L; // key = 100, low = 40 - bitmap.add(pos1); - bitmap.add(pos2); - bitmap.add(pos3); - bitmap.add(pos4); + bitmap.set(pos1); + bitmap.set(pos2); + bitmap.set(pos3); + bitmap.set(pos4); assertThat(bitmap.contains(pos1)).isTrue(); assertThat(bitmap.contains(pos2)).isTrue(); @@ -110,7 +110,7 @@ public void testAddRange() { long posStartInclusive = 10L; long posEndExclusive = 20L; - bitmap.addRange(posStartInclusive, posEndExclusive); + bitmap.setRange(posStartInclusive, posEndExclusive); // assert that all positions in the range [10, 20) are added for (long pos = posStartInclusive; pos < posEndExclusive; pos++) { @@ -131,7 +131,7 @@ public void testAddRangeAcrossKeys() { long posStartInclusive = ((long) 1 << 32) - 5L; long posEndExclusive = ((long) 1 << 32) + 5L; - bitmap.addRange(posStartInclusive, posEndExclusive); + bitmap.setRange(posStartInclusive, posEndExclusive); // assert that all positions in the range are added for (long pos = posStartInclusive; pos < posEndExclusive; pos++) { @@ -149,22 +149,22 @@ public void testAddRangeAcrossKeys() { @TestTemplate public void testAddEmptyRange() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); - bitmap.addRange(10, 10); + bitmap.setRange(10, 10); assertThat(bitmap.isEmpty()).isTrue(); } @TestTemplate public void testAddAll() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); - bitmap1.add(10L); - bitmap1.add(20L); + bitmap1.set(10L); + bitmap1.set(20L); RoaringPositionBitmap bitmap2 = new RoaringPositionBitmap(); - bitmap2.add(30L); - bitmap2.add(40L); - bitmap2.add((long) 2 << 32); + bitmap2.set(30L); + bitmap2.set(40L); + bitmap2.set((long) 2 << 32); - bitmap1.addAll(bitmap2); + bitmap1.setAll(bitmap2); assertThat(bitmap1.contains(10L)).isTrue(); assertThat(bitmap1.contains(20L)).isTrue(); @@ -181,12 +181,12 @@ public void testAddAll() { @TestTemplate public void testAddAllWithEmptyBitmap() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); - bitmap1.add(10L); - bitmap1.add(20L); + bitmap1.set(10L); + bitmap1.set(20L); RoaringPositionBitmap emptyBitmap = new RoaringPositionBitmap(); - bitmap1.addAll(emptyBitmap); + bitmap1.setAll(emptyBitmap); assertThat(bitmap1.contains(10L)).isTrue(); assertThat(bitmap1.contains(20L)).isTrue(); @@ -201,15 +201,15 @@ public void testAddAllWithEmptyBitmap() { @TestTemplate public void testAddAllWithOverlappingBitmap() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); - bitmap1.add(10L); - bitmap1.add(20L); - bitmap1.add(30L); + bitmap1.set(10L); + bitmap1.set(20L); + bitmap1.set(30L); RoaringPositionBitmap bitmap2 = new RoaringPositionBitmap(); - bitmap2.add(20L); - bitmap2.add(40L); + bitmap2.set(20L); + bitmap2.set(40L); - bitmap1.addAll(bitmap2); + bitmap1.setAll(bitmap2); assertThat(bitmap1.contains(10L)).isTrue(); assertThat(bitmap1.contains(20L)).isTrue(); @@ -227,14 +227,14 @@ public void testAddAllWithOverlappingBitmap() { @TestTemplate public void testAddAllSparseBitmaps() { RoaringPositionBitmap bitmap1 = new RoaringPositionBitmap(); - bitmap1.add((long) 0 << 32 | 100L); // key = 0, low = 100 - bitmap1.add((long) 1 << 32 | 200L); // key = 1, low = 200 + bitmap1.set((long) 0 << 32 | 100L); // key = 0, low = 100 + bitmap1.set((long) 1 << 32 | 200L); // key = 1, low = 200 RoaringPositionBitmap bitmap2 = new RoaringPositionBitmap(); - bitmap2.add((long) 2 << 32 | 300L); // key = 2, low = 300 - bitmap2.add((long) 3 << 32 | 400L); // key = 3, low = 400 + bitmap2.set((long) 2 << 32 | 300L); // key = 2, low = 300 + bitmap2.set((long) 3 << 32 | 400L); // key = 3, low = 400 - bitmap1.addAll(bitmap2); + bitmap1.setAll(bitmap2); assertThat(bitmap1.contains((long) 0 << 32 | 100L)).isTrue(); assertThat(bitmap1.contains((long) 1 << 32 | 200L)).isTrue(); @@ -249,13 +249,13 @@ public void testCardinality() { assertThat(bitmap.cardinality()).isEqualTo(0); - bitmap.add(10L); - bitmap.add(20L); - bitmap.add(30L); + bitmap.set(10L); + bitmap.set(20L); + bitmap.set(30L); assertThat(bitmap.cardinality()).isEqualTo(3); - bitmap.add(10L); // already exists + bitmap.set(10L); // already exists assertThat(bitmap.cardinality()).isEqualTo(3); } @@ -264,11 +264,11 @@ public void testCardinality() { public void testCardinalitySparseBitmaps() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); - bitmap.add((long) 0 << 32 | 100L); // key = 0, low = 100 - bitmap.add((long) 0 << 32 | 101L); // key = 0, low = 101 - bitmap.add((long) 0 << 32 | 105L); // key = 0, low = 101 - bitmap.add((long) 1 << 32 | 200L); // key = 1, low = 200 - bitmap.add((long) 100 << 32 | 300L); // key = 100, low = 300 + bitmap.set((long) 0 << 32 | 100L); // key = 0, low = 100 + bitmap.set((long) 0 << 32 | 101L); // key = 0, low = 101 + bitmap.set((long) 0 << 32 | 105L); // key = 0, low = 101 + bitmap.set((long) 1 << 32 | 200L); // key = 1, low = 200 + bitmap.set((long) 100 << 32 | 300L); // key = 100, low = 300 assertThat(bitmap.cardinality()).isEqualTo(5); } @@ -278,34 +278,34 @@ public void testSerializeDeserializeAllContainerBitmap() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); // bitmap 0, container 0 (array) - bitmap.add(position(0 /* bitmap */, 0 /* container */, 5L)); - bitmap.add(position(0 /* bitmap */, 0 /* container */, 7L)); + bitmap.set(position(0 /* bitmap */, 0 /* container */, 5L)); + bitmap.set(position(0 /* bitmap */, 0 /* container */, 7L)); // bitmap 0, container 1 (array that can be compressed) - bitmap.addRange( + bitmap.setRange( position(0 /* bitmap */, 1 /* container */, 1L), position(0 /* bitmap */, 1 /* container */, 1000L)); // bitmap 1, container 2 (bitset) - bitmap.addRange( + bitmap.setRange( position(0 /* bitmap */, 2 /* container */, 1L), position(0 /* bitmap */, 2 /* container */, CONTAINER_OFFSET - 1L)); // bitmap 1, container 0 (array) - bitmap.add(position(1 /* bitmap */, 0 /* container */, 10L)); - bitmap.add(position(1 /* bitmap */, 0 /* container */, 20L)); + bitmap.set(position(1 /* bitmap */, 0 /* container */, 10L)); + bitmap.set(position(1 /* bitmap */, 0 /* container */, 20L)); // bitmap 1, container 1 (array that can be compressed) - bitmap.addRange( + bitmap.setRange( position(1 /* bitmap */, 1 /* container */, 10L), position(1 /* bitmap */, 1 /* container */, 500L)); // bitmap 1, container 2 (bitset) - bitmap.addRange( + bitmap.setRange( position(1 /* bitmap */, 2 /* container */, 1L), position(1 /* bitmap */, 2 /* container */, CONTAINER_OFFSET - 1)); - assertThat(bitmap.compress()).as("Bitmap must be RLE encoded").isTrue(); + assertThat(bitmap.runLengthEncode()).as("Bitmap must be RLE encoded").isTrue(); RoaringPositionBitmap bitmapCopy = roundTripSerialize(bitmap); @@ -334,7 +334,7 @@ public void testDeserializeUnsupportedRoaringExample() { public void testUnsupportedPositions() { RoaringPositionBitmap bitmap = new RoaringPositionBitmap(); - assertThatThrownBy(() -> bitmap.add(-1L)) + assertThatThrownBy(() -> bitmap.set(-1L)) .isInstanceOf(IllegalArgumentException.class) .hasMessageContaining( "Bitmap supports positions that are >= 0 and <= %s", @@ -346,7 +346,7 @@ public void testUnsupportedPositions() { "Bitmap supports positions that are >= 0 and <= %s", RoaringPositionBitmap.MAX_POSITION); - assertThatThrownBy(() -> bitmap.add(RoaringPositionBitmap.MAX_POSITION + 1L)) + assertThatThrownBy(() -> bitmap.set(RoaringPositionBitmap.MAX_POSITION + 1L)) .isInstanceOf(IllegalArgumentException.class) .hasMessageContaining( "Bitmap supports positions that are >= 0 and <= %s", @@ -399,7 +399,7 @@ public void testRandomMixedBitmap() { Set positions = bitmapAndPositions.second(); Pair> pair1 = generateDenseBitmap(9); - bitmap.addAll(pair1.first()); + bitmap.setAll(pair1.first()); positions.addAll(pair1.second()); Pair> pair2 = @@ -407,11 +407,11 @@ public void testRandomMixedBitmap() { 0 /* min position */, (long) 3 << 32 /* max position must not need more than 3 bitmaps */, 25_000 /* cardinality */); - bitmap.addAll(pair2.first()); + bitmap.setAll(pair2.first()); positions.addAll(pair2.second()); Pair> pair3 = generateDenseBitmap(3); - bitmap.addAll(pair3.first()); + bitmap.setAll(pair3.first()); positions.addAll(pair3.second()); Pair> pair4 = @@ -419,7 +419,7 @@ public void testRandomMixedBitmap() { 0 /* min position */, (long) 1 << 32 /* max position must not need more than 1 bitmap */, 5_000 /* cardinality */); - bitmap.addAll(pair4.first()); + bitmap.setAll(pair4.first()); positions.addAll(pair4.second()); assertEqual(bitmap, positions); @@ -435,7 +435,7 @@ private Pair> generateSparseBitmap( while (positions.size() < size) { long position = nextLong(random, minInclusive, maxExclusive); positions.add(position); - bitmap.add(position); + bitmap.set(position); } return Pair.of(bitmap, positions); @@ -450,7 +450,7 @@ private Pair> generateDenseBitmap(int requiredB while (bitmap.allocatedBitmapCount() <= requiredBitmapCount) { long maxRunPosition = currentPosition + nextLong(random, 1000, 2 * CONTAINER_SIZE); for (long position = currentPosition; position <= maxRunPosition; position++) { - bitmap.add(position); + bitmap.set(position); positions.add(position); } long shift = nextLong(random, (long) (0.1 * BITMAP_SIZE), (long) (0.25 * BITMAP_SIZE)); @@ -497,20 +497,19 @@ private static byte[] readTestResource(String resourceName) throws IOException { } private static void assertEqual(RoaringPositionBitmap bitmap, Set positions) { - assertThat(bitmap.cardinality()).isEqualTo(positions.size()); - positions.forEach(position -> assertThat(bitmap.contains(position)).isTrue()); - bitmap.forEach(position -> assertThat(positions.contains(position)).isTrue()); + assertEqualContent(bitmap, positions); RoaringPositionBitmap bitmapCopy1 = roundTripSerialize(bitmap); - assertThat(bitmapCopy1.cardinality()).isEqualTo(positions.size()); - positions.forEach(position -> assertThat(bitmapCopy1.contains(position)).isTrue()); - bitmapCopy1.forEach(position -> assertThat(positions.contains(position)).isTrue()); - - bitmap.compress(); + assertEqualContent(bitmapCopy1, positions); + bitmap.runLengthEncode(); RoaringPositionBitmap bitmapCopy2 = roundTripSerialize(bitmap); - assertThat(bitmapCopy2.cardinality()).isEqualTo(positions.size()); - positions.forEach(position -> assertThat(bitmapCopy2.contains(position)).isTrue()); - bitmapCopy2.forEach(position -> assertThat(positions.contains(position)).isTrue()); + assertEqualContent(bitmapCopy2, positions); + } + + private static void assertEqualContent(RoaringPositionBitmap bitmap, Set positions) { + assertThat(bitmap.cardinality()).isEqualTo(positions.size()); + positions.forEach(position -> assertThat(bitmap.contains(position)).isTrue()); + bitmap.forEach(position -> assertThat(positions.contains(position)).isTrue()); } } From 8a6729fc1bf5d44ac10623715bf3fc3a80a5f7db Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Mon, 28 Oct 2024 20:55:53 +0100 Subject: [PATCH 9/9] Minor private method rename --- .../iceberg/deletes/RoaringPositionBitmap.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java index b4a894e15b11..eec130743d85 100644 --- a/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java +++ b/core/src/main/java/org/apache/iceberg/deletes/RoaringPositionBitmap.java @@ -72,9 +72,9 @@ private RoaringPositionBitmap(RoaringBitmap[] bitmaps) { */ public void set(long pos) { validatePosition(pos); - int key = extractKey(pos); - int pos32Bits = extract32Bits(pos); - allocateBitmapsIfNeeded(key + 1 /* required bitmap array length is key + 1 */); + int key = key(pos); + int pos32Bits = pos32Bits(pos); + allocateBitmapsIfNeeded(key + 1 /* required bitmap array length */); bitmaps[key].add(pos32Bits); } @@ -110,8 +110,8 @@ public void setAll(RoaringPositionBitmap that) { */ public boolean contains(long pos) { validatePosition(pos); - int key = extractKey(pos); - int pos32Bits = extract32Bits(pos); + int key = key(pos); + int pos32Bits = pos32Bits(pos); return key < bitmaps.length && bitmaps[key].contains(pos32Bits); } @@ -288,12 +288,12 @@ private static RoaringBitmap readBitmap(ByteBuffer buffer) { } // extracts high 32 bits from a 64-bit position (i.e. key) - private static int extractKey(long pos) { + private static int key(long pos) { return (int) (pos >> 32); } - // extracts low 32 bits from a 64-bit position - private static int extract32Bits(long pos) { + // extracts low 32 bits from a 64-bit position (i.e. 32-bit position) + private static int pos32Bits(long pos) { return (int) pos; }