Skip to content

Commit

Permalink
Merge pull request #1 from Dr-Emann/dremann/from-bitmap-bytes
Browse files Browse the repository at this point in the history
Speed up from_bitmap_bytes, and use less unsafe
  • Loading branch information
lemolatoon authored Sep 12, 2024
2 parents 4e1409d + ed8f0cb commit aace6b8
Show file tree
Hide file tree
Showing 5 changed files with 186 additions and 123 deletions.
7 changes: 7 additions & 0 deletions roaring/src/bitmap/container.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@ impl Container {
pub fn full(key: u16) -> Container {
Container { key, store: Store::full() }
}

pub fn from_lsb0_bytes(key: u16, bytes: &[u8], byte_offset: usize) -> Option<Self> {
Some(Container {
key,
store: Store::from_lsb0_bytes(bytes, byte_offset)?,
})
}
}

impl Container {
Expand Down
203 changes: 80 additions & 123 deletions roaring/src/bitmap/serialization.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
use crate::bitmap::container::{Container, ARRAY_LIMIT};
use crate::bitmap::store::{ArrayStore, BitmapStore, Store, BITMAP_LENGTH};
use crate::RoaringBitmap;
use bytemuck::cast_slice_mut;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use core::convert::Infallible;
use core::ops::RangeInclusive;
use core::mem::size_of;
use std::error::Error;
use std::io;

use crate::bitmap::container::{Container, ARRAY_LIMIT};
use crate::bitmap::store::{ArrayStore, BitmapStore, Store, BITMAP_LENGTH};
use crate::bitmap::util;
use crate::RoaringBitmap;

pub const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
pub const SERIAL_COOKIE: u16 = 12347;
pub const NO_OFFSET_THRESHOLD: usize = 4;
Expand Down Expand Up @@ -65,7 +64,7 @@ impl RoaringBitmap {
///
/// # Panics
///
/// This function will panic if `offset` is not a multiple of 8.
/// This function will panic if `offset` is not a multiple of 8, or if `bytes.len() + offset` is greater than 2^32.
///
///
/// # Examples
Expand All @@ -90,134 +89,66 @@ impl RoaringBitmap {
/// assert!(rb.contains(17));
/// assert!(rb.contains(39));
/// ```
#[inline]
pub fn from_bitmap_bytes(offset: u32, bytes: &[u8]) -> RoaringBitmap {
#[inline(always)]
fn next_multiple_of_u32(n: u32, multiple: u32) -> u32 {
(n + multiple - 1) / multiple * multiple
}
#[inline(always)]
fn next_multiple_of_usize(n: usize, multiple: usize) -> usize {
(n + multiple - 1) / multiple * multiple
}
/// Copies bits from `src` to `dst` at `bits_offset` bits offset.
/// Safety: `src` must be smaller than or equal to `BYTES_IN_ONE_CONTAINER` u8s,
/// considering `byte_offset`.
#[inline(always)]
#[cfg(target_endian = "little")]
unsafe fn copy_bits(src: &[u8], dst: &mut [u64; BITMAP_LENGTH], byte_offset: usize) {
debug_assert!(src.len() + byte_offset <= BYTES_IN_ONE_CONTAINER);

// Safety:
// * `byte_offset` is within the bounds of `dst`, guaranteed by the caller.
let bits_ptr = unsafe { dst.as_mut_ptr().cast::<u8>().add(byte_offset) };
// Safety:
// * `src` is a slice of `bytes` and is guaranteed to be smaller than or equal to `BYTES_IN_ONE_CONTAINER` u8s considering `byte_offset`,
// guaranteed by the caller.
unsafe {
core::ptr::copy_nonoverlapping(src.as_ptr(), bits_ptr, src.len());
}
}
/// Copies bits from `src` to `dst` at `bits_offset` bits offset.
/// Safety: `src` must be smaller than or equal to `BYTES_IN_ONE_CONTAINER` u8s,
/// considering `byte_offset`.
#[inline(always)]
#[cfg(target_endian = "big")]
unsafe fn copy_bits(src: &[u8], dst: &mut [u64; BITMAP_LENGTH], byte_offset: usize) {
debug_assert!(src.len() + byte_offset <= BYTES_IN_ONE_CONTAINER);

if byte_offset % 8 != 0 {
let mut bytes = [0u8; 8];

let src_range = 0..(8 - byte_offset % 8).min(src.len());
bytes[8 - src_range.len()..8].copy_from_slice(&src[src_range]);
dst[byte_offset / 8] = u64::from_le_bytes(bytes);
}

let aligned_u64_offset = (byte_offset + 7) / 8;

// Iterate over the src data and copy it to dst as little-endian u64 values
for i in aligned_u64_offset..((byte_offset + src.len() + 7) / 8) {
let mut bytes = [0u8; 8];

let src_range =
(i - aligned_u64_offset) * 8..((i - aligned_u64_offset + 1) * 8).min(src.len());
// println!("src_range: {:?}", src_range);
bytes[0..src_range.len()].copy_from_slice(&src[src_range]);
// println!("bytes: {:x?}", &bytes);
pub fn from_bitmap_bytes(offset: u32, mut bytes: &[u8]) -> RoaringBitmap {
assert_eq!(offset % 8, 0, "offset must be a multiple of 8");

// Write the updated u64 value back to dst
dst[i] = u64::from_le_bytes(bytes);
}
if bytes.is_empty() {
return RoaringBitmap::new();
}

const BITS_IN_ONE_CONTAINER: usize = u64::BITS as usize * BITMAP_LENGTH;
const BYTES_IN_ONE_CONTAINER: usize = BITS_IN_ONE_CONTAINER / 8;
assert_eq!(offset % 8, 0, "offset must be a multiple of 8");
let byte_offset = offset as usize / 8;
let n_containers_needed =
(bytes.len() + (BYTES_IN_ONE_CONTAINER - 1)) / BYTES_IN_ONE_CONTAINER + 1;
// Using inclusive range avoids overflow: the max exclusive value is 2^32 (u32::MAX + 1).
let end_bit_inc = u32::try_from(bytes.len())
.ok()
.and_then(|len_bytes| len_bytes.checked_mul(8))
// `bytes` is non-empty, so len_bits is > 0
.and_then(|len_bits| offset.checked_add(len_bits - 1))
.expect("offset + bytes.len() must be <= 2^32");

// offsets are in bytes
let (mut start_container, start_offset) =
(offset as usize >> 16, (offset as usize % 0x1_0000) / 8);
let (end_container_inc, end_offset) =
(end_bit_inc as usize >> 16, (end_bit_inc as usize % 0x1_0000 + 1) / 8);

let n_containers_needed = end_container_inc + 1 - start_container;
let mut containers = Vec::with_capacity(n_containers_needed);

let (offset, bytes) = if byte_offset % BYTES_IN_ONE_CONTAINER == 0 {
(offset, bytes)
} else {
let next_container_byte_offset =
next_multiple_of_usize(byte_offset, BYTES_IN_ONE_CONTAINER);

let number_of_bytes_in_first_container = next_container_byte_offset - byte_offset;
let number_of_bytes_copied_to_first_container =
bytes.len().min(number_of_bytes_in_first_container);

let (first_container_bytes, bytes_left) =
bytes.split_at(number_of_bytes_copied_to_first_container);
let (first_container_key, _) = util::split(offset);

let len: u64 = first_container_bytes.iter().map(|&b| b.count_ones() as u64).sum();
if len != 0 {
let mut bits: Box<[u64; BITMAP_LENGTH]> = Box::new([0; BITMAP_LENGTH]);
// Safety:
// * `first_container_bytes` is a slice of `bytes` and is guaranteed to be smaller than or equal to `number_of_bytes_in_first_container`
unsafe {
copy_bits(
first_container_bytes,
bits.as_mut(),
BYTES_IN_ONE_CONTAINER - number_of_bytes_in_first_container,
)
};

let store = BitmapStore::from_unchecked(len, bits);
let mut container =
Container { key: first_container_key, store: Store::Bitmap(store) };
container.ensure_correct_store();
// Handle a partial first container
if start_offset != 0 {
let end_byte = if end_container_inc == start_container {
end_offset
} else {
BITMAP_LENGTH * size_of::<u64>()
};

let (src, rest) = bytes.split_at(end_byte - start_offset);
bytes = rest;

if let Some(container) =
Container::from_lsb0_bytes(start_container as u16, src, start_offset)
{
containers.push(container);
}

(next_multiple_of_u32(offset, BITS_IN_ONE_CONTAINER as u32), bytes_left)
};

let bitmap_store_chunks = bytes.chunks(BYTES_IN_ONE_CONTAINER);
start_container += 1;
}

// Handle all full containers
for full_container_key in start_container..end_container_inc {
let (src, rest) = bytes.split_at(BITMAP_LENGTH * size_of::<u64>());
bytes = rest;

let (offset_key, _) = util::split(offset);
for (i, chunk) in bitmap_store_chunks.enumerate() {
let len: u64 = chunk.iter().map(|&b| b.count_ones() as u64).sum();
if len == 0 {
continue;
}
let mut bits: Box<[u64; BITMAP_LENGTH]> = Box::new([0; BITMAP_LENGTH]);
// Safety:
// * `chunk` is a slice of `bytes` and is guaranteed to be smaller than `BITMAP_LENGTH` u64s
unsafe {
copy_bits(chunk, bits.as_mut(), 0);
if let Some(container) = Container::from_lsb0_bytes(full_container_key as u16, src, 0) {
containers.push(container);
}
let store = BitmapStore::from_unchecked(len, bits);

let mut container =
Container { key: offset_key + i as u16, store: Store::Bitmap(store) };
container.ensure_correct_store();
}

containers.push(container);
// Handle a last container
if !bytes.is_empty() {
if let Some(container) = Container::from_lsb0_bytes(end_container_inc as u16, bytes, 0)
{
containers.push(container);
}
}

RoaringBitmap { containers }
Expand Down Expand Up @@ -476,11 +407,37 @@ mod test {
let mut bytes = vec![0x00u8; CONTAINER_OFFSET_IN_BYTES as usize];
bytes.extend(&[0xff]);
let rb = RoaringBitmap::from_bitmap_bytes(0, &bytes);

assert_eq!(rb.min(), Some(CONTAINER_OFFSET));

let rb = RoaringBitmap::from_bitmap_bytes(8, &bytes);
assert_eq!(rb.min(), Some(CONTAINER_OFFSET + 8));


// Ensure we can set the last byte in an array container
let bytes = [0x80];
let rb = RoaringBitmap::from_bitmap_bytes(0xFFFFFFF8, &bytes);
assert_eq!(rb.len(), 1);
assert!(rb.contains(u32::MAX));

// Ensure we can set the last byte in a bitmap container
let bytes = vec![0xFF; 0x1_0000 / 8];
let rb = RoaringBitmap::from_bitmap_bytes(0xFFFF0000, &bytes);
assert_eq!(rb.len(), 0x1_0000);
assert!(rb.contains(u32::MAX));
}

#[test]
#[should_panic(expected = "multiple of 8")]
fn test_from_bitmap_bytes_invalid_offset() {
let bytes = [0x01];
RoaringBitmap::from_bitmap_bytes(1, &bytes);
}

#[test]
#[should_panic(expected = "<= 2^32")]
fn test_from_bitmap_bytes_overflow() {
let bytes = [0x01, 0x01];
RoaringBitmap::from_bitmap_bytes(u32::MAX - 7, &bytes);
}

#[test]
Expand Down
28 changes: 28 additions & 0 deletions roaring/src/bitmap/store/array_store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use core::cmp::Ordering;
use core::cmp::Ordering::*;
use core::fmt::{Display, Formatter};
use core::ops::{BitAnd, BitAndAssign, BitOr, BitXor, RangeInclusive, Sub, SubAssign};
use core::mem::size_of;

#[cfg(not(feature = "std"))]
use alloc::vec::Vec;
Expand Down Expand Up @@ -47,6 +48,33 @@ impl ArrayStore {
}
}

pub fn from_lsb0_bytes(bytes: &[u8], byte_offset: usize, bits_set: u64) -> Self {
type Word = u64;

let mut vec = Vec::with_capacity(bits_set as usize);

let chunks = bytes.chunks_exact(size_of::<Word>());
let remainder = chunks.remainder();
for (index, chunk) in chunks.enumerate() {
let bit_index = (byte_offset + index * size_of::<Word>()) * 8;
let mut word = Word::from_le_bytes(chunk.try_into().unwrap());

while word != 0 {
vec.push((word.trailing_zeros() + bit_index as u32) as u16);
word &= word - 1;
}
}
for (index, mut byte) in remainder.iter().copied().enumerate() {
let bit_index = (byte_offset + (bytes.len() - remainder.len()) + index) * 8;
while byte != 0 {
vec.push((byte.trailing_zeros() + bit_index as u32) as u16);
byte &= byte - 1;
}
}

Self::from_vec_unchecked(vec)
}

pub fn insert(&mut self, index: u16) -> bool {
self.vec.binary_search(&index).map_err(|loc| self.vec.insert(loc, index)).is_err()
}
Expand Down
42 changes: 42 additions & 0 deletions roaring/src/bitmap/store/bitmap_store.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use core::borrow::Borrow;
use core::cmp::Ordering;
use core::fmt::{Display, Formatter};
use core::mem::size_of;
use core::ops::{BitAndAssign, BitOrAssign, BitXorAssign, RangeInclusive, SubAssign};

use super::ArrayStore;
Expand Down Expand Up @@ -36,6 +37,47 @@ impl BitmapStore {
}
}

pub fn from_lsb0_bytes_unchecked(bytes: &[u8], byte_offset: usize, bits_set: u64) -> Self {
const BITMAP_BYTES: usize = BITMAP_LENGTH * size_of::<u64>();
assert!(byte_offset.checked_add(bytes.len()).map_or(false, |sum| sum <= BITMAP_BYTES));

// If we know we're writing the full bitmap, we can avoid the initial memset to 0
let mut bits = if bytes.len() == BITMAP_BYTES {
debug_assert_eq!(byte_offset, 0); // Must be true from the above assert

// Safety: We've checked that the length is correct, and we use an unaligned load in case
// the bytes are not 8 byte aligned.
// The optimizer can see through this, and avoid the double copy to copy directly into
// the allocated box from bytes with memcpy
let bytes_as_words =
unsafe { bytes.as_ptr().cast::<[u64; BITMAP_LENGTH]>().read_unaligned() };
Box::new(bytes_as_words)
} else {
let mut bits = Box::new([0u64; BITMAP_LENGTH]);
// Safety: It's safe to reinterpret u64s as u8s because u8 has less alignment requirements,
// and has no padding/uninitialized data.
let dst = unsafe {
std::slice::from_raw_parts_mut(bits.as_mut_ptr().cast::<u8>(), BITMAP_BYTES)
};
let dst = &mut dst[byte_offset..][..bytes.len()];
dst.copy_from_slice(bytes);
bits
};

if !cfg!(target_endian = "little") {
// Convert all words we touched (even partially) to little-endian
let start_word = byte_offset / size_of::<u64>();
let end_word = (byte_offset + bytes.len() + (size_of::<u64>() - 1)) / size_of::<u64>();

// The 0th byte is the least significant byte, so we've written the bytes in little-endian
for word in &mut bits[start_word..end_word] {
*word = u64::from_le(*word);
}
}

Self::from_unchecked(bits_set, bits)
}

///
/// Create a new BitmapStore from a given len and bits array
/// It is up to the caller to ensure len == cardinality of bits
Expand Down
29 changes: 29 additions & 0 deletions roaring/src/bitmap/store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,35 @@ impl Store {
Store::Bitmap(BitmapStore::full())
}

pub fn from_lsb0_bytes(bytes: &[u8], byte_offset: usize) -> Option<Self> {
assert!(byte_offset + bytes.len() <= BITMAP_LENGTH * mem::size_of::<u64>());

// It seems to be pretty considerably faster to count the bits
// using u64s than for each byte
let bits_set = {
let mut bits_set = 0;
let chunks = bytes.chunks_exact(mem::size_of::<u64>());
let remainder = chunks.remainder();
for chunk in chunks {
let chunk = u64::from_ne_bytes(chunk.try_into().unwrap());
bits_set += u64::from(chunk.count_ones());
}
for byte in remainder {
bits_set += u64::from(byte.count_ones());
}
bits_set
};
if bits_set == 0 {
return None;
}

Some(if bits_set < ARRAY_LIMIT {
Array(ArrayStore::from_lsb0_bytes(bytes, byte_offset, bits_set))
} else {
Bitmap(BitmapStore::from_lsb0_bytes_unchecked(bytes, byte_offset, bits_set))
})
}

pub fn insert(&mut self, index: u16) -> bool {
match self {
Array(vec) => vec.insert(index),
Expand Down

0 comments on commit aace6b8

Please sign in to comment.