-
Notifications
You must be signed in to change notification settings - Fork 839
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Split out arrow-buffer crate (#2594) #2693
Changes from 4 commits
b9cc1fc
972b802
49485ac
f47a878
19287a9
0a2e302
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
|
||
[package] | ||
name = "arrow-buffer" | ||
version = "22.0.0" | ||
description = "Buffer abstractions for Apache Arrow" | ||
homepage = "https://github.com/apache/arrow-rs" | ||
repository = "https://github.com/apache/arrow-rs" | ||
authors = ["Apache Arrow <[email protected]>"] | ||
license = "Apache-2.0" | ||
keywords = ["arrow"] | ||
include = [ | ||
"benches/*.rs", | ||
"src/**/*.rs", | ||
"Cargo.toml", | ||
] | ||
edition = "2021" | ||
rust-version = "1.62" | ||
|
||
[lib] | ||
name = "arrow_buffer" | ||
path = "src/lib.rs" | ||
bench = false | ||
|
||
[dependencies] | ||
num = { version = "0.4", default-features = false, features = ["std"] } | ||
half = { version = "2.0", default-features = false } | ||
|
||
[dev-dependencies] | ||
rand = { version = "0.8", default-features = false, features = ["std", "std_rng"] } | ||
|
||
[build-dependencies] |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,34 +20,29 @@ | |
|
||
use std::alloc::{handle_alloc_error, Layout}; | ||
use std::fmt::{Debug, Formatter}; | ||
use std::mem::size_of; | ||
use std::panic::RefUnwindSafe; | ||
use std::ptr::NonNull; | ||
use std::sync::Arc; | ||
|
||
mod alignment; | ||
mod types; | ||
|
||
pub use alignment::ALIGNMENT; | ||
pub use types::NativeType; | ||
|
||
#[inline] | ||
unsafe fn null_pointer<T: NativeType>() -> NonNull<T> { | ||
NonNull::new_unchecked(ALIGNMENT as *mut T) | ||
unsafe fn null_pointer() -> NonNull<u8> { | ||
NonNull::new_unchecked(ALIGNMENT as *mut u8) | ||
} | ||
|
||
/// Allocates a cache-aligned memory region of `size` bytes with uninitialized values. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like previously the doc is not totally correct as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But actually I don't see any allocate_aligned usage with types other than u8 so far. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, I believe Jorge opted to fork off arrow2 before getting further with integrating this |
||
/// This is more performant than using [allocate_aligned_zeroed] when all bytes will have | ||
/// an unknown or non-zero value and is semantically similar to `malloc`. | ||
pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> { | ||
pub fn allocate_aligned(size: usize) -> NonNull<u8> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the caller needs to compute the allocation size by the type T they want? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, although nothing was actually using this API to allocate anything other than bytes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe it was also related to a time when there were fewer things that were ArrowNativeType (I can't remember but that may have changed over time) |
||
unsafe { | ||
if size == 0 { | ||
null_pointer() | ||
} else { | ||
let size = size * size_of::<T>(); | ||
|
||
let layout = Layout::from_size_align_unchecked(size, ALIGNMENT); | ||
let raw_ptr = std::alloc::alloc(layout) as *mut T; | ||
let raw_ptr = std::alloc::alloc(layout); | ||
NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout)) | ||
} | ||
} | ||
|
@@ -56,15 +51,13 @@ pub fn allocate_aligned<T: NativeType>(size: usize) -> NonNull<T> { | |
/// Allocates a cache-aligned memory region of `size` bytes with `0` on all of them. | ||
/// This is more performant than using [allocate_aligned] and setting all bytes to zero | ||
/// and is semantically similar to `calloc`. | ||
pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> { | ||
pub fn allocate_aligned_zeroed(size: usize) -> NonNull<u8> { | ||
unsafe { | ||
if size == 0 { | ||
null_pointer() | ||
} else { | ||
let size = size * size_of::<T>(); | ||
|
||
let layout = Layout::from_size_align_unchecked(size, ALIGNMENT); | ||
let raw_ptr = std::alloc::alloc_zeroed(layout) as *mut T; | ||
let raw_ptr = std::alloc::alloc_zeroed(layout); | ||
NonNull::new(raw_ptr).unwrap_or_else(|| handle_alloc_error(layout)) | ||
} | ||
} | ||
|
@@ -78,9 +71,8 @@ pub fn allocate_aligned_zeroed<T: NativeType>(size: usize) -> NonNull<T> { | |
/// * ptr must denote a block of memory currently allocated via this allocator, | ||
/// | ||
/// * size must be the same size that was used to allocate that block of memory, | ||
pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) { | ||
pub unsafe fn free_aligned(ptr: NonNull<u8>, size: usize) { | ||
if ptr != null_pointer() { | ||
let size = size * size_of::<T>(); | ||
std::alloc::dealloc( | ||
ptr.as_ptr() as *mut u8, | ||
Layout::from_size_align_unchecked(size, ALIGNMENT), | ||
|
@@ -99,13 +91,11 @@ pub unsafe fn free_aligned<T: NativeType>(ptr: NonNull<T>, size: usize) { | |
/// | ||
/// * new_size, when rounded up to the nearest multiple of [ALIGNMENT], must not overflow (i.e., | ||
/// the rounded value must be less than usize::MAX). | ||
pub unsafe fn reallocate<T: NativeType>( | ||
ptr: NonNull<T>, | ||
pub unsafe fn reallocate( | ||
ptr: NonNull<u8>, | ||
old_size: usize, | ||
new_size: usize, | ||
) -> NonNull<T> { | ||
let old_size = old_size * size_of::<T>(); | ||
let new_size = new_size * size_of::<T>(); | ||
) -> NonNull<u8> { | ||
if ptr == null_pointer() { | ||
return allocate_aligned(new_size); | ||
} | ||
|
@@ -119,7 +109,7 @@ pub unsafe fn reallocate<T: NativeType>( | |
ptr.as_ptr() as *mut u8, | ||
Layout::from_size_align_unchecked(old_size, ALIGNMENT), | ||
new_size, | ||
) as *mut T; | ||
); | ||
NonNull::new(raw_ptr).unwrap_or_else(|| { | ||
handle_alloc_error(Layout::from_size_align_unchecked(new_size, ALIGNMENT)) | ||
}) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
//! This module contains two main structs: [Buffer] and [MutableBuffer]. A buffer represents | ||
//! a contiguous memory region that can be shared via `offsets`. | ||
|
||
mod immutable; | ||
pub use immutable::*; | ||
mod mutable; | ||
pub use mutable::*; | ||
mod ops; | ||
mod scalar; | ||
pub use scalar::*; | ||
Comment on lines
+21
to
+27
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i wonder if it makes sense to just collapse the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think as the crate still contains other public modules, like alloc, it would be confusing to lift the contents of buffer to the top-level |
||
|
||
pub use ops::*; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,26 +20,19 @@ use crate::util::bit_util::ceil; | |
|
||
/// Apply a bitwise operation `op` to four inputs and return the result as a Buffer. | ||
/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits. | ||
#[allow(clippy::too_many_arguments)] | ||
pub(crate) fn bitwise_quaternary_op_helper<F>( | ||
first: &Buffer, | ||
first_offset_in_bits: usize, | ||
second: &Buffer, | ||
second_offset_in_bits: usize, | ||
third: &Buffer, | ||
third_offset_in_bits: usize, | ||
fourth: &Buffer, | ||
fourth_offset_in_bits: usize, | ||
pub fn bitwise_quaternary_op_helper<F>( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we should profile with this change? It seems like it should not be any different given that |
||
buffers: [&Buffer; 4], | ||
offsets: [usize; 4], | ||
len_in_bits: usize, | ||
op: F, | ||
) -> Buffer | ||
where | ||
F: Fn(u64, u64, u64, u64) -> u64, | ||
{ | ||
let first_chunks = first.bit_chunks(first_offset_in_bits, len_in_bits); | ||
let second_chunks = second.bit_chunks(second_offset_in_bits, len_in_bits); | ||
let third_chunks = third.bit_chunks(third_offset_in_bits, len_in_bits); | ||
let fourth_chunks = fourth.bit_chunks(fourth_offset_in_bits, len_in_bits); | ||
let first_chunks = buffers[0].bit_chunks(offsets[0], len_in_bits); | ||
let second_chunks = buffers[1].bit_chunks(offsets[1], len_in_bits); | ||
let third_chunks = buffers[2].bit_chunks(offsets[2], len_in_bits); | ||
let fourth_chunks = buffers[3].bit_chunks(offsets[3], len_in_bits); | ||
|
||
let chunks = first_chunks | ||
.iter() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
//! Buffer abstractions for Apache Arrow | ||
tustvold marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
pub mod alloc; | ||
pub mod buffer; | ||
mod bytes; | ||
pub mod native; | ||
pub mod util; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is technically a breaking change, it appears to relate to early experiments by @jorgecarleitao r.e. arrow2. It isn't used anywhere, and so I think can just go. The removal is so that arrow-buffer doesn't depend on DataType