From bc751794b7a82bb2639b63d8065dfb25a951f458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Sat, 15 Jan 2022 15:42:09 +0100 Subject: [PATCH 1/3] Extend arithmetic benchmarks --- arrow/benches/arithmetic_kernels.rs | 62 +++++++++++++++++-------- arrow/src/buffer/immutable.rs | 1 + arrow/src/compute/kernels/arithmetic.rs | 16 +++++++ 3 files changed, 60 insertions(+), 19 deletions(-) diff --git a/arrow/benches/arithmetic_kernels.rs b/arrow/benches/arithmetic_kernels.rs index bbe412366988..4be4a26933aa 100644 --- a/arrow/benches/arithmetic_kernels.rs +++ b/arrow/benches/arithmetic_kernels.rs @@ -24,7 +24,6 @@ use std::sync::Arc; extern crate arrow; -use arrow::compute::kernels::limit::*; use arrow::util::bench_util::*; use arrow::{array::*, datatypes::Float32Type}; use arrow::{compute::kernels::arithmetic::*, util::test_util::seedable_rng}; @@ -59,44 +58,69 @@ fn bench_divide(arr_a: &ArrayRef, arr_b: &ArrayRef) { criterion::black_box(divide(arr_a, arr_b).unwrap()); } +fn bench_divide_unchecked(arr_a: &ArrayRef, arr_b: &ArrayRef) { + let arr_a = arr_a.as_any().downcast_ref::().unwrap(); + let arr_b = arr_b.as_any().downcast_ref::().unwrap(); + criterion::black_box(divide_unchecked(arr_a, arr_b).unwrap()); +} + fn bench_divide_scalar(array: &ArrayRef, divisor: f32) { let array = array.as_any().downcast_ref::().unwrap(); criterion::black_box(divide_scalar(array, divisor).unwrap()); } -fn bench_limit(arr_a: &ArrayRef, max: usize) { - criterion::black_box(limit(arr_a, max)); +fn bench_modulo(arr_a: &ArrayRef, arr_b: &ArrayRef) { + let arr_a = arr_a.as_any().downcast_ref::().unwrap(); + let arr_b = arr_b.as_any().downcast_ref::().unwrap(); + criterion::black_box(modulus(arr_a, arr_b).unwrap()); +} + +fn bench_modulo_scalar(array: &ArrayRef, divisor: f32) { + let array = array.as_any().downcast_ref::().unwrap(); + criterion::black_box(modulus_scalar(array, divisor).unwrap()); } fn add_benchmark(c: &mut Criterion) { - let arr_a = create_array(512, false); - let arr_b = create_array(512, false); + const BATCH_SIZE: usize = 64 * 1024; + let arr_a = create_array(BATCH_SIZE, false); + let arr_b = create_array(BATCH_SIZE, false); let scalar = seedable_rng().gen(); - c.bench_function("add 512", |b| b.iter(|| bench_add(&arr_a, &arr_b))); - c.bench_function("subtract 512", |b| { - b.iter(|| bench_subtract(&arr_a, &arr_b)) + c.bench_function("add", |b| b.iter(|| bench_add(&arr_a, &arr_b))); + c.bench_function("subtract", |b| b.iter(|| bench_subtract(&arr_a, &arr_b))); + c.bench_function("multiply", |b| b.iter(|| bench_multiply(&arr_a, &arr_b))); + c.bench_function("divide", |b| b.iter(|| bench_divide(&arr_a, &arr_b))); + c.bench_function("divide_unchecked", |b| { + b.iter(|| bench_divide_unchecked(&arr_a, &arr_b)) }); - c.bench_function("multiply 512", |b| { - b.iter(|| bench_multiply(&arr_a, &arr_b)) - }); - c.bench_function("divide 512", |b| b.iter(|| bench_divide(&arr_a, &arr_b))); - c.bench_function("divide_scalar 512", |b| { + c.bench_function("divide_scalar", |b| { b.iter(|| bench_divide_scalar(&arr_a, scalar)) }); - c.bench_function("limit 512, 512", |b| b.iter(|| bench_limit(&arr_a, 512))); + c.bench_function("modulo", |b| b.iter(|| bench_modulo(&arr_a, &arr_b))); + c.bench_function("modulo_scalar", |b| { + b.iter(|| bench_modulo_scalar(&arr_a, scalar)) + }); - let arr_a_nulls = create_array(512, false); - let arr_b_nulls = create_array(512, false); - c.bench_function("add_nulls_512", |b| { + let arr_a_nulls = create_array(BATCH_SIZE, true); + let arr_b_nulls = create_array(BATCH_SIZE, true); + c.bench_function("add_nulls", |b| { b.iter(|| bench_add(&arr_a_nulls, &arr_b_nulls)) }); - c.bench_function("divide_nulls_512", |b| { + c.bench_function("divide_nulls", |b| { b.iter(|| bench_divide(&arr_a_nulls, &arr_b_nulls)) }); - c.bench_function("divide_scalar_nulls_512", |b| { + c.bench_function("divide_nulls_unchecked", |b| { + b.iter(|| bench_divide_unchecked(&arr_a_nulls, &arr_b_nulls)) + }); + c.bench_function("divide_scalar_nulls", |b| { b.iter(|| bench_divide_scalar(&arr_a_nulls, scalar)) }); + c.bench_function("modulo_nulls", |b| { + b.iter(|| bench_modulo(&arr_a_nulls, &arr_b_nulls)) + }); + c.bench_function("modulo_scalar_nulls", |b| { + b.iter(|| bench_modulo_scalar(&arr_a_nulls, scalar)) + }); } criterion_group!(benches, add_benchmark); diff --git a/arrow/src/buffer/immutable.rs b/arrow/src/buffer/immutable.rs index 0823de54f3fe..a3de0d496346 100644 --- a/arrow/src/buffer/immutable.rs +++ b/arrow/src/buffer/immutable.rs @@ -153,6 +153,7 @@ impl Buffer { /// /// Note that this should be used cautiously, and the returned pointer should not be /// stored anywhere, to avoid dangling pointers. + #[inline] pub fn as_ptr(&self) -> *const u8 { unsafe { self.data.ptr().as_ptr().add(self.offset) } } diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 4eb957678892..71ed7af324e6 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -756,6 +756,22 @@ where return math_checked_divide_op(left, right, |a, b| a / b); } +/// Perform `left / right` operation on two arrays without checking for division by zero. +/// If either left or right value is null then the result is also null. If any right hand value is zero then the result of this +pub fn divide_unchecked( + left: &PrimitiveArray, + right: &PrimitiveArray, +) -> Result> +where + T: datatypes::ArrowFloatNumericType, + T::Native: Div, +{ + #[cfg(feature = "simd")] + return simd_math_op(&left, &right, |a, b| a / b, |a, b| a / b); + #[cfg(not(feature = "simd"))] + return math_op(left, right, |a, b| a / b); +} + /// Modulus every value in an array by a scalar. If any value in the array is null then the /// result is also null. If the scalar is zero then the result of this operation will be /// `Err(ArrowError::DivideByZero)`. From 551be27542b95b307ae2c647c04f2e881b9734c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Sat, 22 Jan 2022 16:07:23 +0100 Subject: [PATCH 2/3] Remove explicit simd arithmetic except for div/mod because autovectorization generates better code --- arrow/src/compute/kernels/arithmetic.rs | 212 +----------------------- 1 file changed, 1 insertion(+), 211 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 71ed7af324e6..397ae159dd30 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -29,7 +29,6 @@ use num::{One, Zero}; use crate::buffer::Buffer; #[cfg(feature = "simd")] use crate::buffer::MutableBuffer; -#[cfg(not(feature = "simd"))] use crate::compute::kernels::arity::unary; use crate::compute::util::combine_option_bitmap; use crate::datatypes; @@ -42,65 +41,6 @@ use std::borrow::BorrowMut; #[cfg(feature = "simd")] use std::slice::{ChunksExact, ChunksExactMut}; -/// Creates a new PrimitiveArray by applying `simd_op` to the input `array`. -/// If the length of the array is not multiple of the number of vector lanes -/// then the remainder of the array will be calculated using `scalar_op`. -/// Any operation on a `NULL` value will result in a `NULL` value in the output. -#[cfg(feature = "simd")] -fn simd_unary_math_op( - array: &PrimitiveArray, - simd_op: SIMD_OP, - scalar_op: SCALAR_OP, -) -> PrimitiveArray -where - T: ArrowNumericType, - SIMD_OP: Fn(T::Simd) -> T::Simd, - SCALAR_OP: Fn(T::Native) -> T::Native, -{ - let lanes = T::lanes(); - let buffer_size = array.len() * std::mem::size_of::(); - - let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); - - // safety: result is newly created above, always written as a T below - let mut result_chunks = unsafe { result.typed_data_mut().chunks_exact_mut(lanes) }; - let mut array_chunks = array.values().chunks_exact(lanes); - - result_chunks - .borrow_mut() - .zip(array_chunks.borrow_mut()) - .for_each(|(result_slice, input_slice)| { - let simd_input = T::load(input_slice); - let simd_result = T::unary_op(simd_input, &simd_op); - T::write(simd_result, result_slice); - }); - - let result_remainder = result_chunks.into_remainder(); - let array_remainder = array_chunks.remainder(); - - result_remainder.into_iter().zip(array_remainder).for_each( - |(scalar_result, scalar_input)| { - *scalar_result = scalar_op(*scalar_input); - }, - ); - - let data = unsafe { - ArrayData::new_unchecked( - T::DATA_TYPE, - array.len(), - None, - array - .data_ref() - .null_buffer() - .map(|b| b.bit_slice(array.offset(), array.len())), - 0, - vec![result.into()], - vec![], - ) - }; - PrimitiveArray::::from(data) -} - /// Helper function to perform math lambda function on values from two arrays. If either /// left or right value is null then the output value is also null, so `1 + null` is /// `null`. @@ -227,79 +167,6 @@ where Ok(PrimitiveArray::::from(data)) } -/// Creates a new PrimitiveArray by applying `simd_op` to the `left` and `right` input arrays. -/// If the length of the arrays is not multiple of the number of vector lanes -/// then the remainder of the array will be calculated using `scalar_op`. -/// Any operation on a `NULL` value will result in a `NULL` value in the output. -/// -/// # Errors -/// -/// This function errors if the arrays have different lengths -#[cfg(feature = "simd")] -fn simd_math_op( - left: &PrimitiveArray, - right: &PrimitiveArray, - simd_op: SIMD_OP, - scalar_op: SCALAR_OP, -) -> Result> -where - T: ArrowNumericType, - SIMD_OP: Fn(T::Simd, T::Simd) -> T::Simd, - SCALAR_OP: Fn(T::Native, T::Native) -> T::Native, -{ - if left.len() != right.len() { - return Err(ArrowError::ComputeError( - "Cannot perform math operation on arrays of different length".to_string(), - )); - } - - let null_bit_buffer = - combine_option_bitmap(left.data_ref(), right.data_ref(), left.len())?; - - let lanes = T::lanes(); - let buffer_size = left.len() * std::mem::size_of::(); - let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false); - - // safety: result is newly created above, always written as a T below - let mut result_chunks = unsafe { result.typed_data_mut().chunks_exact_mut(lanes) }; - let mut left_chunks = left.values().chunks_exact(lanes); - let mut right_chunks = right.values().chunks_exact(lanes); - - result_chunks - .borrow_mut() - .zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut())) - .for_each(|(result_slice, (left_slice, right_slice))| { - let simd_left = T::load(left_slice); - let simd_right = T::load(right_slice); - let simd_result = T::bin_op(simd_left, simd_right, &simd_op); - T::write(simd_result, result_slice); - }); - - let result_remainder = result_chunks.into_remainder(); - let left_remainder = left_chunks.remainder(); - let right_remainder = right_chunks.remainder(); - - result_remainder - .iter_mut() - .zip(left_remainder.iter().zip(right_remainder.iter())) - .for_each(|(scalar_result, (scalar_left, scalar_right))| { - *scalar_result = scalar_op(*scalar_left, *scalar_right); - }); - - let data = unsafe { - ArrayData::new_unchecked( - T::DATA_TYPE, - left.len(), - None, - null_bit_buffer, - 0, - vec![result.into()], - vec![], - ) - }; - Ok(PrimitiveArray::::from(data)) -} - /// Calculates the modulus operation `left % right` on two SIMD inputs. /// The lower-most bits of `valid_mask` specify which vector lanes are considered as valid. /// @@ -566,9 +433,6 @@ where T: ArrowNumericType, T::Native: Add, { - #[cfg(feature = "simd")] - return simd_math_op(&left, &right, |a, b| a + b, |a, b| a + b); - #[cfg(not(feature = "simd"))] return math_op(left, right, |a, b| a + b); } @@ -582,16 +446,6 @@ where T: datatypes::ArrowNumericType, T::Native: Add, { - #[cfg(feature = "simd")] - { - let scalar_vector = T::init(scalar); - return Ok(simd_unary_math_op( - array, - |x| x + scalar_vector, - |x| x + scalar, - )); - } - #[cfg(not(feature = "simd"))] return Ok(unary(array, |value| value + scalar)); } @@ -605,9 +459,6 @@ where T: datatypes::ArrowNumericType, T::Native: Sub, { - #[cfg(feature = "simd")] - return simd_math_op(&left, &right, |a, b| a - b, |a, b| a - b); - #[cfg(not(feature = "simd"))] return math_op(left, right, |a, b| a - b); } @@ -625,16 +476,6 @@ where + Div + Zero, { - #[cfg(feature = "simd")] - { - let scalar_vector = T::init(scalar); - return Ok(simd_unary_math_op( - array, - |x| x - scalar_vector, - |x| x - scalar, - )); - } - #[cfg(not(feature = "simd"))] return Ok(unary(array, |value| value - scalar)); } @@ -644,12 +485,6 @@ where T: datatypes::ArrowNumericType, T::Native: Neg, { - #[cfg(feature = "simd")] - { - let zero_vector = T::init(T::default_value()); - return Ok(simd_unary_math_op(array, |x| zero_vector - x, |x| -x)); - } - #[cfg(not(feature = "simd"))] return Ok(unary(array, |x| -x)); } @@ -662,16 +497,6 @@ where T: datatypes::ArrowFloatNumericType, T::Native: Pow, { - #[cfg(feature = "simd")] - { - let raise_vector = T::init(raise); - return Ok(simd_unary_math_op( - array, - |x| T::pow(x, raise_vector), - |x| x.pow(raise), - )); - } - #[cfg(not(feature = "simd"))] return Ok(unary(array, |x| x.pow(raise))); } @@ -685,9 +510,6 @@ where T: datatypes::ArrowNumericType, T::Native: Mul, { - #[cfg(feature = "simd")] - return simd_math_op(&left, &right, |a, b| a * b, |a, b| a * b); - #[cfg(not(feature = "simd"))] return math_op(left, right, |a, b| a * b); } @@ -707,16 +529,6 @@ where + Zero + One, { - #[cfg(feature = "simd")] - { - let scalar_vector = T::init(scalar); - return Ok(simd_unary_math_op( - array, - |x| x * scalar_vector, - |x| x * scalar, - )); - } - #[cfg(not(feature = "simd"))] return Ok(unary(array, |value| value * scalar)); } @@ -757,6 +569,7 @@ where } /// Perform `left / right` operation on two arrays without checking for division by zero. +/// The result of dividing by zero follows normal floating point rules. /// If either left or right value is null then the result is also null. If any right hand value is zero then the result of this pub fn divide_unchecked( left: &PrimitiveArray, @@ -766,9 +579,6 @@ where T: datatypes::ArrowFloatNumericType, T::Native: Div, { - #[cfg(feature = "simd")] - return simd_math_op(&left, &right, |a, b| a / b, |a, b| a / b); - #[cfg(not(feature = "simd"))] return math_op(left, right, |a, b| a / b); } @@ -787,16 +597,6 @@ where return Err(ArrowError::DivideByZero); } - #[cfg(feature = "simd")] - { - let modulo_vector = T::init(modulo); - return Ok(simd_unary_math_op( - &array, - |a| a % modulo_vector, - |a| a % modulo, - )); - } - #[cfg(not(feature = "simd"))] return Ok(unary(array, |a| a % modulo)); } @@ -814,16 +614,6 @@ where if divisor.is_zero() { return Err(ArrowError::DivideByZero); } - #[cfg(feature = "simd")] - { - let divisor_vector = T::init(divisor); - return Ok(simd_unary_math_op( - &array, - |a| a / divisor_vector, - |a| a / divisor, - )); - } - #[cfg(not(feature = "simd"))] return Ok(unary(array, |a| a / divisor)); } From ac6a599873e0547c0932726562f87ec210595cab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rn=20Horstmann?= Date: Sat, 22 Jan 2022 16:54:28 +0100 Subject: [PATCH 3/3] Remove unneeded return keywords --- arrow/src/compute/kernels/arithmetic.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arrow/src/compute/kernels/arithmetic.rs b/arrow/src/compute/kernels/arithmetic.rs index 397ae159dd30..dcdfa952377d 100644 --- a/arrow/src/compute/kernels/arithmetic.rs +++ b/arrow/src/compute/kernels/arithmetic.rs @@ -433,7 +433,7 @@ where T: ArrowNumericType, T::Native: Add, { - return math_op(left, right, |a, b| a + b); + math_op(left, right, |a, b| a + b) } /// Add every value in an array by a scalar. If any value in the array is null then the @@ -446,7 +446,7 @@ where T: datatypes::ArrowNumericType, T::Native: Add, { - return Ok(unary(array, |value| value + scalar)); + Ok(unary(array, |value| value + scalar)) } /// Perform `left - right` operation on two arrays. If either left or right value is null @@ -459,7 +459,7 @@ where T: datatypes::ArrowNumericType, T::Native: Sub, { - return math_op(left, right, |a, b| a - b); + math_op(left, right, |a, b| a - b) } /// Subtract every value in an array by a scalar. If any value in the array is null then the @@ -476,7 +476,7 @@ where + Div + Zero, { - return Ok(unary(array, |value| value - scalar)); + Ok(unary(array, |value| value - scalar)) } /// Perform `-` operation on an array. If value is null then the result is also null. @@ -485,7 +485,7 @@ where T: datatypes::ArrowNumericType, T::Native: Neg, { - return Ok(unary(array, |x| -x)); + Ok(unary(array, |x| -x)) } /// Raise array with floating point values to the power of a scalar. @@ -497,7 +497,7 @@ where T: datatypes::ArrowFloatNumericType, T::Native: Pow, { - return Ok(unary(array, |x| x.pow(raise))); + Ok(unary(array, |x| x.pow(raise))) } /// Perform `left * right` operation on two arrays. If either left or right value is null @@ -510,7 +510,7 @@ where T: datatypes::ArrowNumericType, T::Native: Mul, { - return math_op(left, right, |a, b| a * b); + math_op(left, right, |a, b| a * b) } /// Multiply every value in an array by a scalar. If any value in the array is null then the @@ -529,7 +529,7 @@ where + Zero + One, { - return Ok(unary(array, |value| value * scalar)); + Ok(unary(array, |value| value * scalar)) } /// Perform `left % right` operation on two arrays. If either left or right value is null @@ -579,7 +579,7 @@ where T: datatypes::ArrowFloatNumericType, T::Native: Div, { - return math_op(left, right, |a, b| a / b); + math_op(left, right, |a, b| a / b) } /// Modulus every value in an array by a scalar. If any value in the array is null then the @@ -597,7 +597,7 @@ where return Err(ArrowError::DivideByZero); } - return Ok(unary(array, |a| a % modulo)); + Ok(unary(array, |a| a % modulo)) } /// Divide every value in an array by a scalar. If any value in the array is null then the @@ -614,7 +614,7 @@ where if divisor.is_zero() { return Err(ArrowError::DivideByZero); } - return Ok(unary(array, |a| a / divisor)); + Ok(unary(array, |a| a / divisor)) } #[cfg(test)]