From 48bac1645096ca92232e7f67b5cd56b7143397be Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Tue, 26 Nov 2024 12:59:38 +0800 Subject: [PATCH 01/27] perf: in-register lookup table & SIMD for 4bit PQ Signed-off-by: BubbleCal --- rust/lance-index/src/vector/pq/distance.rs | 49 ++- rust/lance-linalg/src/simd.rs | 6 + rust/lance-linalg/src/simd/f32.rs | 32 +- rust/lance-linalg/src/simd/u8.rs | 346 +++++++++++++++++++++ 4 files changed, 418 insertions(+), 15 deletions(-) create mode 100644 rust/lance-linalg/src/simd/u8.rs diff --git a/rust/lance-index/src/vector/pq/distance.rs b/rust/lance-index/src/vector/pq/distance.rs index ddf98b099c..9b3ce4ce39 100644 --- a/rust/lance-index/src/vector/pq/distance.rs +++ b/rust/lance-index/src/vector/pq/distance.rs @@ -5,6 +5,9 @@ use core::panic; use std::cmp::min; use lance_linalg::distance::{dot_distance_batch, l2_distance_batch, Dot, L2}; +use lance_linalg::simd::f32::f32x16; +use lance_linalg::simd::u8::u8x16; +use lance_linalg::simd::{Shuffle, SIMD}; use lance_table::utils::LanceIteratorExtension; use super::{num_centroids, utils::get_sub_vector_centroids}; @@ -135,26 +138,44 @@ pub(super) fn compute_l2_distance_4bit( ) -> Vec { let num_vectors = code.len() * 2 / num_sub_vectors; let mut distances = vec![0.0_f32; num_vectors]; + let mut distance_chunks = distances.chunks_exact_mut(16); const NUM_CENTROIDS: usize = 2_usize.pow(4); for (sub_vec_idx, vec_indices) in code.chunks_exact(num_vectors).enumerate() { - let dist_table: &[f32; NUM_CENTROIDS] = &distance_table - [sub_vec_idx * 2 * NUM_CENTROIDS..(sub_vec_idx * 2 + 1) * NUM_CENTROIDS] - .try_into() - .unwrap(); - let dist_table_next: &[f32; NUM_CENTROIDS] = &distance_table - [(sub_vec_idx * 2 + 1) * NUM_CENTROIDS..(sub_vec_idx * 2 + 2) * NUM_CENTROIDS] - .try_into() - .unwrap(); + let vec_indices_chunks = vec_indices.chunks_exact(16); + for vec_indices_chunk in vec_indices_chunks { + let vec_indices_vec = unsafe { u8x16::load_unaligned(vec_indices_chunk.as_ptr()) }; + let low_indices = vec_indices_vec.bit_and(0x0F); + let dist_table = unsafe { + f32x16::load_unaligned(distance_table.as_ptr().add(sub_vec_idx * NUM_CENTROIDS)) + }; + let dists = dist_table.shuffle(low_indices); + + vec_indices_chunk + .iter() + .zip(distances.iter_mut()) + .for_each(|(¢roid_idx, sum)| { + *sum += dist_table[centroid_idx as usize]; + }); + } + debug_assert_eq!(vec_indices.len(), distances.len()); + let dist_table = + &distance_table[sub_vec_idx * 2 * NUM_CENTROIDS..(sub_vec_idx * 2 + 1) * NUM_CENTROIDS]; vec_indices .iter() + .map(|idx| idx & 0xF) .zip(distances.iter_mut()) - .for_each(|(¢roid_idx, sum)| { - // for 4bit PQ, `centroid_idx` is 2 index, each index is 4bit. - let current_idx = centroid_idx & 0xF; - let next_idx = centroid_idx >> 4; - *sum += dist_table[current_idx as usize]; - *sum += dist_table_next[next_idx as usize]; + .for_each(|(idx, sum)| { + *sum += dist_table[idx as usize]; + }); + let dist_table_next = &distance_table + [(sub_vec_idx * 2 + 1) * NUM_CENTROIDS..(sub_vec_idx * 2 + 2) * NUM_CENTROIDS]; + vec_indices + .iter() + .map(|idx| idx >> 4) + .zip(distances.iter_mut()) + .for_each(|(idx, sum)| { + *sum += dist_table_next[idx as usize]; }); } diff --git a/rust/lance-linalg/src/simd.rs b/rust/lance-linalg/src/simd.rs index 74c3b56d3b..d7f8d46e5e 100644 --- a/rust/lance-linalg/src/simd.rs +++ b/rust/lance-linalg/src/simd.rs @@ -16,8 +16,10 @@ use std::ops::{Add, AddAssign, Mul, Sub, SubAssign}; pub mod f32; pub mod i32; +pub mod u8; use num_traits::{Float, Num}; +use u8::u8x16; /// Lance SIMD lib /// @@ -95,3 +97,7 @@ pub trait FloatSimd: SIMD { /// c = a * b + c fn multiply_add(&mut self, a: Self, b: Self); } + +pub trait Shuffle { + fn shuffle(&self, indices: u8x16) -> Self; +} diff --git a/rust/lance-linalg/src/simd/f32.rs b/rust/lance-linalg/src/simd/f32.rs index 8deb50338b..88da97cbb8 100644 --- a/rust/lance-linalg/src/simd/f32.rs +++ b/rust/lance-linalg/src/simd/f32.rs @@ -15,7 +15,7 @@ use std::arch::x86_64::*; use std::mem::transmute; use std::ops::{Add, AddAssign, Mul, Sub, SubAssign}; -use super::{FloatSimd, SIMD}; +use super::{FloatSimd, Shuffle, SIMD}; /// 8 of 32-bit `f32` values. Use 256-bit SIMD if possible. #[allow(non_camel_case_types)] @@ -800,6 +800,36 @@ impl FloatSimd for f32x16 { } } +impl Shuffle for f32x16 { + fn shuffle(&self, indices: super::u8::u8x16) -> Self { + #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] + unsafe { + // cast m128i to m512i + let extended_indices = _mm512_cvtepu8_epi32(indices); + Self(_mm512_permutexvar_ps(extended_indices, self.0)) + } + #[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] + unsafe { + let low_indices = _mm256_castsi128_si256(_mm_srli_si128(indices, 0)); + let high_indices = _mm256_castsi128_si256(_mm_srli_si128(indices, 8)); + let result_low = _mm256_permutevar8x32_ps(self.0, indices_low); + let result_high = _mm256_permutevar8x32_ps(self.1, indices_high); + Self(result_low, result_high) + } + #[cfg(target_arch = "aarch64")] + unsafe { + // aarch does not have shuffle instruction for floats + let values = self.as_array(); + let indices = indices.as_array(); + let mut result = [0.0; 16]; + for i in 0..16 { + result[i] = values[indices[i] as usize]; + } + Self::load_unaligned(result.as_ptr()) + } + } +} + impl Add for f32x16 { type Output = Self; diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs new file mode 100644 index 0000000000..e3aa59c176 --- /dev/null +++ b/rust/lance-linalg/src/simd/u8.rs @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright The Lance Authors + +//! `u8x8`, 8 of `u8` values + +use std::fmt::Formatter; + +#[cfg(target_arch = "aarch64")] +use std::arch::aarch64::*; +#[cfg(target_arch = "loongarch64")] +use std::arch::loongarch64::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; +#[cfg(target_arch = "loongarch64")] +use std::mem::transmute; +use std::ops::{Add, AddAssign, Mul, Sub, SubAssign}; + +use super::SIMD; + +/// 16 of 8-bit `u8` values. +#[allow(non_camel_case_types)] +#[cfg(target_arch = "x86_64")] +#[derive(Clone, Copy)] +pub struct u8x16(pub __m128i); + +/// 16 of 32-bit `f32` values. Use 512-bit SIMD if possible. +#[allow(non_camel_case_types)] +#[cfg(target_arch = "aarch64")] +#[derive(Clone, Copy)] +pub struct u8x16(pub uint8x16_t); + +impl u8x16 { + #[inline] + pub fn bit_and(self, mask: u8) -> Self { + #[cfg(target_arch = "x86_64")] + unsafe { + Self(_mm_and_si128(self.0, _mm_set1_epi8(mask as i8))) + } + #[cfg(target_arch = "aarch64")] + unsafe { + Self(vandq_u8(self.0, vdupq_n_u8(mask))) + } + #[cfg(target_arch = "loongarch64")] + unsafe { + Self(lasx_xvfand_b(self.0, mask)) + } + } +} + +impl std::fmt::Debug for u8x16 { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + let mut arr = [0u8; 16]; + unsafe { + self.store_unaligned(arr.as_mut_ptr()); + } + write!(f, "u8x16({:?})", arr) + } +} + +impl From<&[u8]> for u8x16 { + fn from(value: &[u8]) -> Self { + unsafe { Self::load_unaligned(value.as_ptr()) } + } +} + +impl<'a> From<&'a [u8; 16]> for u8x16 { + fn from(value: &'a [u8; 16]) -> Self { + unsafe { Self::load_unaligned(value.as_ptr()) } + } +} + +impl SIMD for u8x16 { + #[inline] + + fn splat(val: u8) -> Self { + #[cfg(target_arch = "x86_64")] + unsafe { + Self(_mm_set1_epi8(val as i8)) + } + #[cfg(target_arch = "aarch64")] + unsafe { + Self(vdupq_n_u8(val)) + } + } + + #[inline] + fn zeros() -> Self { + #[cfg(target_arch = "x86_64")] + unsafe { + Self(_mm_setzero_si128()) + } + #[cfg(target_arch = "aarch64")] + { + Self::splat(0) + } + } + + #[inline] + unsafe fn load(ptr: *const u8) -> Self { + #[cfg(target_arch = "x86_64")] + unsafe { + Self(_mm_loadu_epi8(ptr as *const i8)) + } + #[cfg(target_arch = "aarch64")] + { + Self::load_unaligned(ptr) + } + } + + #[inline] + unsafe fn load_unaligned(ptr: *const u8) -> Self { + #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] + unsafe { + Self(_mm_loadu_epi8(ptr as *const i8)) + } + #[cfg(target_arch = "aarch64")] + { + Self(vld1q_u8(ptr)) + } + } + + #[inline] + unsafe fn store(&self, ptr: *mut u8) { + #[cfg(target_arch = "x86_64")] + unsafe { + _mm_storeu_epi8(ptr as *mut i8, self.0) + } + #[cfg(target_arch = "aarch64")] + unsafe { + vst1q_u8(ptr, self.0) + } + } + + #[inline] + + unsafe fn store_unaligned(&self, ptr: *mut u8) { + #[cfg(target_arch = "x86_64")] + unsafe { + _mm_storeu_epi8(ptr as *mut i8, self.0) + } + #[cfg(target_arch = "aarch64")] + unsafe { + vst1q_u8(ptr, self.0) + } + } + + fn reduce_sum(&self) -> u8 { + todo!("the signature of reduce_sum is not correct"); + // #[cfg(target_arch = "x86_64")] + // unsafe { + // let zeros = _mm_setzero_si128(); + // let sum = _mm_sad_epu8(self.0, zeros); + + // let lower = _mm_cvtsi128_si64(sum) as u32; + // let upper = _mm_extract_epi64(sum, 1) as u32; + // lower + upper + // } + // #[cfg(target_arch = "aarch64")] + // unsafe { + // let low = vget_low_u8(self.0); + // let high = vget_high_u8(self.0); + // let sum = vaddl_u8(low, high); + // let sum16 = vaddw_u16(vdupq_n_u32(0), sum); + // let total = vpadd_u32(vget_low_u32(sum16), vget_high_u32(sum16)); + // vget_lane_u32(total, 0) + vget_lane_u32(total, 1) + // } + } + + #[inline] + fn reduce_min(&self) -> u8 { + #[cfg(target_arch = "x86_64")] + unsafe { + let low = _mm_and_si128(vec, _mm_set1_epi8(0xFF)); + let high = _mm_srli_si128(vec, 8); + let min_low = _mm_min_epu8(low, high); + let min_low = _mm_min_epu8(min_low, _mm_srli_si128(min_low, 4)); + let min_low = _mm_min_epu8(min_low, _mm_srli_si128(min_low, 2)); + let min_low = _mm_min_epu8(min_low, _mm_srli_si128(min_low, 1)); + _mm_extract_epi8(min_low, 0) as u8 + } + + #[cfg(target_arch = "aarch64")] + unsafe { + vminvq_u8(self.0) + } + } + + #[inline] + fn min(&self, rhs: &Self) -> Self { + #[cfg(target_arch = "x86_64")] + unsafe { + Self(_mm_min_epu8(self.0, rhs.0)) + } + #[cfg(target_arch = "aarch64")] + unsafe { + Self(vminq_u8(self.0, rhs.0)) + } + } + + fn find(&self, _val: u8) -> Option { + todo!() + } +} + +impl Add for u8x16 { + type Output = Self; + + #[inline] + fn add(self, rhs: Self) -> Self::Output { + #[cfg(target_arch = "x86_64")] + unsafe { + Self(_mm_add_epi8(self.0, rhs.0)) + } + #[cfg(target_arch = "aarch64")] + unsafe { + Self(vaddq_u8(self.0, rhs.0)) + } + #[cfg(target_arch = "loongarch64")] + unsafe { + Self(lasx_xvfadd_b(self.0, rhs.0)) + } + } +} + +impl AddAssign for u8x16 { + #[inline] + fn add_assign(&mut self, rhs: Self) { + #[cfg(target_arch = "x86_64")] + unsafe { + self.0 = _mm_add_epi8(self.0, rhs.0) + } + #[cfg(target_arch = "aarch64")] + unsafe { + self.0 = vaddq_u8(self.0, rhs.0) + } + #[cfg(target_arch = "loongarch64")] + unsafe { + self.0 = lasx_xvfadd_b(self.0, rhs.0) + } + } +} + +impl Mul for u8x16 { + type Output = Self; + + #[inline] + fn mul(self, rhs: Self) -> Self::Output { + #[cfg(target_arch = "x86_64")] + unsafe { + Self(_mm_mullo_epi16(self.0, rhs.0)) + } + #[cfg(target_arch = "aarch64")] + unsafe { + Self(vmulq_u8(self.0, rhs.0)) + } + #[cfg(target_arch = "loongarch64")] + unsafe { + Self(lasx_xvfmul_b(self.0, rhs.0)) + } + } +} + +impl Sub for u8x16 { + type Output = Self; + + #[inline] + fn sub(self, rhs: Self) -> Self::Output { + #[cfg(target_arch = "x86_64")] + unsafe { + Self(_mm_sub_epi8(self.0, rhs.0)) + } + #[cfg(target_arch = "aarch64")] + unsafe { + Self(vsubq_u8(self.0, rhs.0)) + } + #[cfg(target_arch = "loongarch64")] + unsafe { + Self(lasx_xvfsub_b(self.0, rhs.0)) + } + } +} + +impl SubAssign for u8x16 { + #[inline] + fn sub_assign(&mut self, rhs: Self) { + #[cfg(target_arch = "x86_64")] + unsafe { + self.0 = _mm_sub_epi8(self.0, rhs.0) + } + #[cfg(target_arch = "aarch64")] + unsafe { + self.0 = vsubq_u8(self.0, rhs.0) + } + #[cfg(target_arch = "loongarch64")] + unsafe { + self.0 = lasx_xvfsub_b(self.0, rhs.0) + } + } +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn test_basic_ops() { + let a = (0..16).map(|f| f as u8).collect::>(); + let b = (16..32).map(|f| f as u8).collect::>(); + + let simd_a = unsafe { u8x16::load_unaligned(a.as_ptr()) }; + let simd_b = unsafe { u8x16::load_unaligned(b.as_ptr()) }; + + let simd_add = simd_a + simd_b; + (0..16) + .zip(simd_add.as_array().iter()) + .for_each(|(x, &y)| assert_eq!((x + x + 16) as u8, y)); + + let simd_mul = simd_a * simd_b; + (0..16) + .zip(simd_mul.as_array().iter()) + .for_each(|(x, &y)| assert_eq!((x * (x + 16)) as u8, y)); + + let simd_sub = simd_b - simd_a; + simd_sub.as_array().iter().for_each(|&v| assert_eq!(v, 16)); + } + + #[test] + fn test_basic_u8x16_ops() { + let a = (0..16).map(|f| f as u8).collect::>(); + let b = (16..32).map(|f| f as u8).collect::>(); + + let simd_a = unsafe { u8x16::load_unaligned(a.as_ptr()) }; + let simd_b = unsafe { u8x16::load_unaligned(b.as_ptr()) }; + + let simd_add = simd_a + simd_b; + assert!((0..16) + .zip(simd_add.as_array().iter()) + .all(|(x, &y)| (x + x + 16) as u8 == y)); + + let simd_mul = simd_a * simd_b; + assert!((0..16) + .zip(simd_mul.as_array().iter()) + .all(|(x, &y)| (x * (x + 16)) as u8 == y)); + } +} From e24084dade473f954710ab2dbb7a9ad8bacf3726 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Wed, 27 Nov 2024 14:04:01 +0800 Subject: [PATCH 02/27] quantize distance table Signed-off-by: BubbleCal --- rust/lance-index/src/vector/pq/distance.rs | 67 ++++++++++++---------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/rust/lance-index/src/vector/pq/distance.rs b/rust/lance-index/src/vector/pq/distance.rs index d441595a04..5c4bbb2d43 100644 --- a/rust/lance-index/src/vector/pq/distance.rs +++ b/rust/lance-index/src/vector/pq/distance.rs @@ -9,6 +9,7 @@ use lance_linalg::simd::f32::f32x16; use lance_linalg::simd::u8::u8x16; use lance_linalg::simd::{Shuffle, SIMD}; use lance_table::utils::LanceIteratorExtension; +use num_traits::ToPrimitive; use super::{num_centroids, utils::get_sub_vector_centroids}; @@ -137,50 +138,54 @@ pub(super) fn compute_l2_distance_4bit( num_sub_vectors: usize, code: &[u8], ) -> Vec { + let (_, _, distance_table) = quantize_distance_table(distance_table); let num_vectors = code.len() * 2 / num_sub_vectors; - let mut distances = vec![0.0_f32; num_vectors]; - let mut distance_chunks = distances.chunks_exact_mut(16); + // store the distances in u32 to avoid overflow + let mut distances = vec![0u32; num_vectors]; const NUM_CENTROIDS: usize = 2_usize.pow(4); for (sub_vec_idx, vec_indices) in code.chunks_exact(num_vectors).enumerate() { - let vec_indices_chunks = vec_indices.chunks_exact(16); - for vec_indices_chunk in vec_indices_chunks { - let vec_indices_vec = unsafe { u8x16::load_unaligned(vec_indices_chunk.as_ptr()) }; - let low_indices = vec_indices_vec.bit_and(0x0F); - let dist_table = unsafe { - f32x16::load_unaligned(distance_table.as_ptr().add(sub_vec_idx * NUM_CENTROIDS)) - }; - let dists = dist_table.shuffle(low_indices); - - vec_indices_chunk - .iter() - .zip(distances.iter_mut()) - .for_each(|(¢roid_idx, sum)| { - *sum += dist_table[centroid_idx as usize]; - }); - } - - debug_assert_eq!(vec_indices.len(), distances.len()); let dist_table = &distance_table[sub_vec_idx * 2 * NUM_CENTROIDS..(sub_vec_idx * 2 + 1) * NUM_CENTROIDS]; - vec_indices - .iter() - .map(|idx| idx & 0xF) - .zip(distances.iter_mut()) - .for_each(|(idx, sum)| { - *sum += dist_table[idx as usize]; - }); let dist_table_next = &distance_table [(sub_vec_idx * 2 + 1) * NUM_CENTROIDS..(sub_vec_idx * 2 + 2) * NUM_CENTROIDS]; + debug_assert_eq!(vec_indices.len(), distances.len()); vec_indices .iter() - .map(|idx| idx >> 4) .zip(distances.iter_mut()) - .for_each(|(idx, sum)| { - *sum += dist_table_next[idx as usize]; + .for_each(|(¢roid_idx, sum)| { + // for 4bit PQ, `centroid_idx` is 2 index, each index is 4bit. + let current_idx = centroid_idx & 0xF; + let next_idx = centroid_idx >> 4; + *sum += dist_table[current_idx as usize] as u32; + *sum += dist_table_next[next_idx as usize] as u32; }); } - distances + // cast u32 to f32 + // no need to convert because it's still comparable + distances.iter().map(|&dist| dist as f32).collect() +} + +// Quantize the distance table to u8 +// returns (min, max, quantized_distance_table) +#[inline] +fn quantize_distance_table(distance_table: &[f32]) -> (f32, f32, Vec) { + // don't use ScalarQuantizer here, because it would introduce some overhead. + let min_dist = distance_table.iter().cloned().fold(f32::INFINITY, f32::min); + let max_dist = distance_table + .iter() + .cloned() + .fold(f32::NEG_INFINITY, f32::max); + let quantized_dist_table = distance_table + .iter() + .map(|&dist| { + ((dist - min_dist) * 255.0 / (max_dist - min_dist)) + .round() + .to_u8() + .unwrap() + }) + .collect(); + (min_dist, max_dist, quantized_dist_table) } /// Compute L2 distance from the query to all code without transposing the code. From 607f16d55eb6cff1015234459aec1135a3a2f1db Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Wed, 27 Nov 2024 17:03:52 +0800 Subject: [PATCH 03/27] done Signed-off-by: BubbleCal --- rust/lance-index/src/vector/pq.rs | 8 +- rust/lance-index/src/vector/pq/distance.rs | 138 ++++++++------------- rust/lance-index/src/vector/pq/storage.rs | 9 +- rust/lance-linalg/src/simd/u8.rs | 37 +++++- rust/lance/src/index/vector/ivf/v2.rs | 6 +- 5 files changed, 101 insertions(+), 97 deletions(-) diff --git a/rust/lance-index/src/vector/pq.rs b/rust/lance-index/src/vector/pq.rs index 467599157b..7e325c1397 100644 --- a/rust/lance-index/src/vector/pq.rs +++ b/rust/lance-index/src/vector/pq.rs @@ -11,7 +11,7 @@ use arrow_array::{cast::AsArray, Array, FixedSizeListArray, UInt8Array}; use arrow_array::{ArrayRef, Float32Array, PrimitiveArray}; use arrow_schema::DataType; use deepsize::DeepSizeOf; -use distance::{build_distance_table_dot, compute_dot_distance}; +use distance::build_distance_table_dot; use lance_arrow::*; use lance_core::{Error, Result}; use lance_linalg::distance::{DistanceType, Dot, L2}; @@ -28,7 +28,7 @@ pub mod storage; pub mod transform; pub(crate) mod utils; -use self::distance::{build_distance_table_l2, compute_l2_distance}; +use self::distance::{build_distance_table_l2, compute_pq_distance}; pub use self::utils::num_centroids; use super::quantizer::{ Quantization, QuantizationMetadata, QuantizationType, Quantizer, QuantizerBuildParams, @@ -267,7 +267,7 @@ impl ProductQuantizer { key.values(), ); - let distances = compute_dot_distance( + let distances = compute_pq_distance( &distance_table, self.num_bits, self.num_sub_vectors, @@ -327,7 +327,7 @@ impl ProductQuantizer { /// The squared L2 distance. #[inline] fn compute_l2_distance(&self, distance_table: &[f32], code: &[u8]) -> Float32Array { - Float32Array::from(compute_l2_distance( + Float32Array::from(compute_pq_distance( distance_table, self.num_bits, self.num_sub_vectors, diff --git a/rust/lance-index/src/vector/pq/distance.rs b/rust/lance-index/src/vector/pq/distance.rs index 5c4bbb2d43..44fa171cbc 100644 --- a/rust/lance-index/src/vector/pq/distance.rs +++ b/rust/lance-index/src/vector/pq/distance.rs @@ -5,7 +5,6 @@ use core::panic; use std::cmp::min; use lance_linalg::distance::{dot_distance_batch, l2_distance_batch, Dot, L2}; -use lance_linalg::simd::f32::f32x16; use lance_linalg::simd::u8::u8x16; use lance_linalg::simd::{Shuffle, SIMD}; use lance_table::utils::LanceIteratorExtension; @@ -100,14 +99,14 @@ pub fn build_distance_table_dot_impl( /// The squared L2 distance. /// #[inline] -pub(super) fn compute_l2_distance( +pub(super) fn compute_pq_distance( distance_table: &[f32], num_bits: u32, num_sub_vectors: usize, code: &[u8], ) -> Vec { if num_bits == 4 { - return compute_l2_distance_4bit(distance_table, num_sub_vectors, code); + return compute_pq_distance_4bit(distance_table, num_sub_vectors, code); } // here `code` has been transposed, // so code[i][j] is the code of i-th sub-vector of the j-th vector, @@ -133,7 +132,7 @@ pub(super) fn compute_l2_distance( } #[inline] -pub(super) fn compute_l2_distance_4bit( +pub(super) fn compute_pq_distance_4bit( distance_table: &[f32], num_sub_vectors: usize, code: &[u8], @@ -141,24 +140,50 @@ pub(super) fn compute_l2_distance_4bit( let (_, _, distance_table) = quantize_distance_table(distance_table); let num_vectors = code.len() * 2 / num_sub_vectors; // store the distances in u32 to avoid overflow - let mut distances = vec![0u32; num_vectors]; + let mut distances = vec![0u8; num_vectors]; const NUM_CENTROIDS: usize = 2_usize.pow(4); for (sub_vec_idx, vec_indices) in code.chunks_exact(num_vectors).enumerate() { - let dist_table = - &distance_table[sub_vec_idx * 2 * NUM_CENTROIDS..(sub_vec_idx * 2 + 1) * NUM_CENTROIDS]; - let dist_table_next = &distance_table - [(sub_vec_idx * 2 + 1) * NUM_CENTROIDS..(sub_vec_idx * 2 + 2) * NUM_CENTROIDS]; debug_assert_eq!(vec_indices.len(), distances.len()); - vec_indices - .iter() - .zip(distances.iter_mut()) - .for_each(|(¢roid_idx, sum)| { - // for 4bit PQ, `centroid_idx` is 2 index, each index is 4bit. + let dist_table = unsafe { + u8x16::load_unaligned(distance_table.as_ptr().add(sub_vec_idx * 2 * NUM_CENTROIDS)) + }; + let next_dist_table = unsafe { + u8x16::load_unaligned( + distance_table + .as_ptr() + .add((sub_vec_idx * 2 + 1) * NUM_CENTROIDS), + ) + }; + for i in (0..num_vectors - NUM_CENTROIDS + 1).step_by(NUM_CENTROIDS) { + let vec_indices = unsafe { u8x16::load_unaligned(vec_indices.as_ptr().add(i)) }; + let distances = &mut distances[i..i + NUM_CENTROIDS]; + let results = unsafe { u8x16::load_unaligned(distances.as_ptr()) }; + + // compute current distances + let current_indices = vec_indices.bit_and(0x0F); + let results = results + dist_table.shuffle(current_indices); + + // compute next distances + let next_indices = vec_indices.right_shift_4(); + let results = results + next_dist_table.shuffle(next_indices); + + unsafe { + results.store_unaligned(distances.as_mut_ptr()); + } + } + let remainder = num_vectors % NUM_CENTROIDS; + if remainder > 0 { + let vec_indices = &vec_indices[num_vectors - remainder..]; + let distances = &mut distances[num_vectors - remainder..]; + let dist_table = &distance_table[sub_vec_idx * 2 * NUM_CENTROIDS..]; + let next_dist_table = &distance_table[(sub_vec_idx * 2 + 1) * NUM_CENTROIDS..]; + for (i, ¢roid_idx) in vec_indices.iter().enumerate() { let current_idx = centroid_idx & 0xF; let next_idx = centroid_idx >> 4; - *sum += dist_table[current_idx as usize] as u32; - *sum += dist_table_next[next_idx as usize] as u32; - }); + distances[i] += dist_table[current_idx as usize]; + distances[i] += next_dist_table[next_idx as usize]; + } + } } // cast u32 to f32 @@ -168,24 +193,27 @@ pub(super) fn compute_l2_distance_4bit( // Quantize the distance table to u8 // returns (min, max, quantized_distance_table) +// used for only 4bit PQ so num_centroids must be 16 #[inline] fn quantize_distance_table(distance_table: &[f32]) -> (f32, f32, Vec) { - // don't use ScalarQuantizer here, because it would introduce some overhead. - let min_dist = distance_table.iter().cloned().fold(f32::INFINITY, f32::min); - let max_dist = distance_table - .iter() - .cloned() - .fold(f32::NEG_INFINITY, f32::max); + const NUM_CENTROIDS: usize = 16; + // we set qmax to the maximum possible distance, + // then no need to handle overflow + let qmin = distance_table.iter().cloned().fold(f32::INFINITY, f32::min); + let qmax = distance_table + .chunks(NUM_CENTROIDS) + .map(|chunk| chunk.iter().cloned().fold(f32::NEG_INFINITY, f32::max)) + .sum::(); let quantized_dist_table = distance_table .iter() .map(|&dist| { - ((dist - min_dist) * 255.0 / (max_dist - min_dist)) + ((dist - qmin) * 255.0 / (qmax - qmin)) .round() .to_u8() - .unwrap() + .unwrap_or(u8::MAX) }) .collect(); - (min_dist, max_dist, quantized_dist_table) + (qmin, qmax, quantized_dist_table) } /// Compute L2 distance from the query to all code without transposing the code. @@ -231,62 +259,6 @@ fn compute_l2_distance_without_transposing( distances.chain(remainder).collect() } -#[inline] -pub fn compute_dot_distance( - distance_table: &[f32], - num_bits: u32, - num_sub_vectors: usize, - code: &[u8], -) -> Vec { - if num_bits == 4 { - return compute_dot_distance_4bit(distance_table, num_sub_vectors, code); - } - let num_vectors = code.len() / num_sub_vectors; - let mut distances = vec![0.0; num_vectors]; - let num_centroids = num_centroids(num_bits); - for (sub_vec_idx, vec_indices) in code.chunks_exact(num_vectors).enumerate() { - let dist_table = &distance_table[sub_vec_idx * num_centroids..]; - vec_indices - .iter() - .zip(distances.iter_mut()) - .for_each(|(¢roid_idx, sum)| { - *sum += dist_table[centroid_idx as usize]; - }); - } - - distances -} - -#[inline] -pub fn compute_dot_distance_4bit( - distance_table: &[f32], - num_sub_vectors: usize, - code: &[u8], -) -> Vec { - let num_vectors = code.len() * 2 / num_sub_vectors; - let mut distances = vec![0.0; num_vectors]; - const NUM_CENTROIDS: usize = 2_usize.pow(4); - for (sub_vec_idx, vec_indices) in code.chunks_exact(num_vectors).enumerate() { - let dist_table = - &distance_table[sub_vec_idx * 2 * NUM_CENTROIDS..(sub_vec_idx * 2 + 1) * NUM_CENTROIDS]; - let dist_table_next = &distance_table - [(sub_vec_idx * 2 + 1) * NUM_CENTROIDS..(sub_vec_idx * 2 + 2) * NUM_CENTROIDS]; - debug_assert_eq!(vec_indices.len(), distances.len()); - vec_indices - .iter() - .zip(distances.iter_mut()) - .for_each(|(¢roid_idx, sum)| { - // for 4bit PQ, `centroid_idx` is 2 index, each index is 4bit. - let current_idx = centroid_idx & 0xF; - let next_idx = centroid_idx >> 4; - *sum += dist_table[current_idx as usize]; - *sum += dist_table_next[next_idx as usize]; - }); - } - - distances -} - #[cfg(test)] mod tests { use crate::vector::pq::storage::transpose; @@ -308,7 +280,7 @@ mod tests { let pq_codes = Vec::from_iter((0..num_vectors * num_sub_vectors).map(|v| v as u8)); let pq_codes = UInt8Array::from_iter_values(pq_codes); let transposed_codes = transpose(&pq_codes, num_vectors, num_sub_vectors); - let distances = compute_l2_distance( + let distances = compute_pq_distance( &distance_table, num_bits, num_sub_vectors, diff --git a/rust/lance-index/src/vector/pq/storage.rs b/rust/lance-index/src/vector/pq/storage.rs index ef3839aa3e..2ed2681917 100644 --- a/rust/lance-index/src/vector/pq/storage.rs +++ b/rust/lance-index/src/vector/pq/storage.rs @@ -32,8 +32,7 @@ use prost::Message; use serde::{Deserialize, Serialize}; use snafu::{location, Location}; -use super::distance::{build_distance_table_dot, compute_l2_distance}; -use super::distance::{build_distance_table_l2, compute_dot_distance}; +use super::distance::{build_distance_table_dot, build_distance_table_l2, compute_pq_distance}; use super::ProductQuantizer; use crate::vector::storage::STORAGE_METADATA_KEY; use crate::{ @@ -626,7 +625,7 @@ impl DistCalculator for PQDistCalculator { fn distance_all(&self) -> Vec { match self.distance_type { - DistanceType::L2 => compute_l2_distance( + DistanceType::L2 => compute_pq_distance( &self.distance_table, self.num_bits, self.num_sub_vectors, @@ -642,7 +641,7 @@ impl DistCalculator for PQDistCalculator { // L2 over normalized vectors: ||x - y|| = x^2 + y^2 - 2 * xy = 1 + 1 - 2 * xy = 2 * (1 - xy) // Cosine distance: 1 - |xy| / (||x|| * ||y||) = 1 - xy / (x^2 * y^2) = 1 - xy / (1 * 1) = 1 - xy // Therefore, Cosine = L2 / 2 - let l2_dists = compute_l2_distance( + let l2_dists = compute_pq_distance( &self.distance_table, self.num_bits, self.num_sub_vectors, @@ -650,7 +649,7 @@ impl DistCalculator for PQDistCalculator { ); l2_dists.into_iter().map(|v| v / 2.0).collect() } - DistanceType::Dot => compute_dot_distance( + DistanceType::Dot => compute_pq_distance( &self.distance_table, self.num_bits, self.num_sub_vectors, diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index e3aa59c176..1c56577f18 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -15,7 +15,7 @@ use std::arch::x86_64::*; use std::mem::transmute; use std::ops::{Add, AddAssign, Mul, Sub, SubAssign}; -use super::SIMD; +use super::{Shuffle, SIMD}; /// 16 of 8-bit `u8` values. #[allow(non_camel_case_types)] @@ -45,6 +45,22 @@ impl u8x16 { Self(lasx_xvfand_b(self.0, mask)) } } + + #[inline] + pub fn right_shift_4(self) -> Self { + #[cfg(target_arch = "x86_64")] + unsafe { + Self(_mm_srli_epi16(self.0, 4)) + } + #[cfg(target_arch = "aarch64")] + unsafe { + Self(vshrq_n_u8::<4>(self.0)) + } + #[cfg(target_arch = "loongarch64")] + unsafe { + Self(lasx_xvfrsh_b(self.0, 4)) + } + } } impl std::fmt::Debug for u8x16 { @@ -202,6 +218,23 @@ impl SIMD for u8x16 { } } +impl Shuffle for u8x16 { + fn shuffle(&self, indices: u8x16) -> Self { + #[cfg(target_arch = "x86_64")] + unsafe { + Self(_mm_shuffle_epi8(self.0, indices.0)) + } + #[cfg(target_arch = "aarch64")] + unsafe { + Self(vqtbl1q_u8(self.0, indices.0)) + } + #[cfg(target_arch = "loongarch64")] + unsafe { + Self(lasx_xvqtbl_b(self.0, indices.0)) + } + } +} + impl Add for u8x16 { type Output = Self; @@ -213,7 +246,7 @@ impl Add for u8x16 { } #[cfg(target_arch = "aarch64")] unsafe { - Self(vaddq_u8(self.0, rhs.0)) + Self(vqaddq_u8(self.0, rhs.0)) } #[cfg(target_arch = "loongarch64")] unsafe { diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 727f50ecea..9cbc4b9d9b 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -692,9 +692,9 @@ mod tests { } #[rstest] - #[case(4, DistanceType::L2, 0.9)] - #[case(4, DistanceType::Cosine, 0.9)] - #[case(4, DistanceType::Dot, 0.8)] + #[case(4, DistanceType::L2, 0.8)] + #[case(4, DistanceType::Cosine, 0.8)] + #[case(4, DistanceType::Dot, 0.4)] #[tokio::test] async fn test_build_ivf_pq_4bit( #[case] nlist: usize, From 4504fbe0b418d873e270a183236d208d012ae270 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Wed, 27 Nov 2024 17:07:12 +0800 Subject: [PATCH 04/27] fix Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/f32.rs | 32 +------------------------------ rust/lance-linalg/src/simd/u8.rs | 4 ++-- 2 files changed, 3 insertions(+), 33 deletions(-) diff --git a/rust/lance-linalg/src/simd/f32.rs b/rust/lance-linalg/src/simd/f32.rs index 88da97cbb8..8deb50338b 100644 --- a/rust/lance-linalg/src/simd/f32.rs +++ b/rust/lance-linalg/src/simd/f32.rs @@ -15,7 +15,7 @@ use std::arch::x86_64::*; use std::mem::transmute; use std::ops::{Add, AddAssign, Mul, Sub, SubAssign}; -use super::{FloatSimd, Shuffle, SIMD}; +use super::{FloatSimd, SIMD}; /// 8 of 32-bit `f32` values. Use 256-bit SIMD if possible. #[allow(non_camel_case_types)] @@ -800,36 +800,6 @@ impl FloatSimd for f32x16 { } } -impl Shuffle for f32x16 { - fn shuffle(&self, indices: super::u8::u8x16) -> Self { - #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] - unsafe { - // cast m128i to m512i - let extended_indices = _mm512_cvtepu8_epi32(indices); - Self(_mm512_permutexvar_ps(extended_indices, self.0)) - } - #[cfg(all(target_arch = "x86_64", not(target_feature = "avx512f")))] - unsafe { - let low_indices = _mm256_castsi128_si256(_mm_srli_si128(indices, 0)); - let high_indices = _mm256_castsi128_si256(_mm_srli_si128(indices, 8)); - let result_low = _mm256_permutevar8x32_ps(self.0, indices_low); - let result_high = _mm256_permutevar8x32_ps(self.1, indices_high); - Self(result_low, result_high) - } - #[cfg(target_arch = "aarch64")] - unsafe { - // aarch does not have shuffle instruction for floats - let values = self.as_array(); - let indices = indices.as_array(); - let mut result = [0.0; 16]; - for i in 0..16 { - result[i] = values[indices[i] as usize]; - } - Self::load_unaligned(result.as_ptr()) - } - } -} - impl Add for f32x16 { type Output = Self; diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 1c56577f18..8ed0b1d823 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -186,8 +186,8 @@ impl SIMD for u8x16 { fn reduce_min(&self) -> u8 { #[cfg(target_arch = "x86_64")] unsafe { - let low = _mm_and_si128(vec, _mm_set1_epi8(0xFF)); - let high = _mm_srli_si128(vec, 8); + let low = _mm_and_si128(self.0, _mm_set1_epi8(0xFF)); + let high = _mm_srli_si128(self.0, 8); let min_low = _mm_min_epu8(low, high); let min_low = _mm_min_epu8(min_low, _mm_srli_si128(min_low, 4)); let min_low = _mm_min_epu8(min_low, _mm_srli_si128(min_low, 2)); From 6c13fc119daf35468c50b18ddd08cb7f38313da4 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Wed, 27 Nov 2024 17:20:47 +0800 Subject: [PATCH 05/27] fix Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 8ed0b1d823..4b5abb29a4 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -113,7 +113,7 @@ impl SIMD for u8x16 { #[inline] unsafe fn load(ptr: *const u8) -> Self { - #[cfg(target_arch = "x86_64")] + #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] unsafe { Self(_mm_loadu_epi8(ptr as *const i8)) } @@ -137,7 +137,7 @@ impl SIMD for u8x16 { #[inline] unsafe fn store(&self, ptr: *mut u8) { - #[cfg(target_arch = "x86_64")] + #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] unsafe { _mm_storeu_epi8(ptr as *mut i8, self.0) } From 506d0528ac53ba3b36aeba147e4e261070009e33 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Wed, 27 Nov 2024 17:28:28 +0800 Subject: [PATCH 06/27] fix Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 4b5abb29a4..6cab31b571 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -113,9 +113,9 @@ impl SIMD for u8x16 { #[inline] unsafe fn load(ptr: *const u8) -> Self { - #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] + #[cfg(target_arch = "x86_64")] unsafe { - Self(_mm_loadu_epi8(ptr as *const i8)) + Self(_mm_loadu_si128(ptr as *const __m128i)) } #[cfg(target_arch = "aarch64")] { @@ -125,9 +125,9 @@ impl SIMD for u8x16 { #[inline] unsafe fn load_unaligned(ptr: *const u8) -> Self { - #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] + #[cfg(target_arch = "x86_64")] unsafe { - Self(_mm_loadu_epi8(ptr as *const i8)) + Self(_mm_loadu_si128(ptr as *const __m128i)) } #[cfg(target_arch = "aarch64")] { @@ -137,9 +137,9 @@ impl SIMD for u8x16 { #[inline] unsafe fn store(&self, ptr: *mut u8) { - #[cfg(all(target_arch = "x86_64", target_feature = "avx512f"))] + #[cfg(target_arch = "x86_64")] unsafe { - _mm_storeu_epi8(ptr as *mut i8, self.0) + _mm_storeu_si128(ptr as *mut i8, self.0) } #[cfg(target_arch = "aarch64")] unsafe { From aef5b11f237f4af2122166e368d7496db7e4ce51 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Wed, 27 Nov 2024 17:31:43 +0800 Subject: [PATCH 07/27] fix Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 6cab31b571..311996618d 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -139,7 +139,7 @@ impl SIMD for u8x16 { unsafe fn store(&self, ptr: *mut u8) { #[cfg(target_arch = "x86_64")] unsafe { - _mm_storeu_si128(ptr as *mut i8, self.0) + _mm_storeu_si128(ptr as *mut __m128i, self.0) } #[cfg(target_arch = "aarch64")] unsafe { @@ -152,7 +152,7 @@ impl SIMD for u8x16 { unsafe fn store_unaligned(&self, ptr: *mut u8) { #[cfg(target_arch = "x86_64")] unsafe { - _mm_storeu_epi8(ptr as *mut i8, self.0) + _mm_storeu_si128(ptr as *mut __m128i, self.0) } #[cfg(target_arch = "aarch64")] unsafe { From a2bc89749c8feffd6d7d4c7644b1201fc4b757de Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Wed, 27 Nov 2024 17:34:35 +0800 Subject: [PATCH 08/27] fix Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 311996618d..21b1f4aeb2 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -186,7 +186,7 @@ impl SIMD for u8x16 { fn reduce_min(&self) -> u8 { #[cfg(target_arch = "x86_64")] unsafe { - let low = _mm_and_si128(self.0, _mm_set1_epi8(0xFF)); + let low = _mm_and_si128(self.0, _mm_set1_epi8(0xFF as i8)); let high = _mm_srli_si128(self.0, 8); let min_low = _mm_min_epu8(low, high); let min_low = _mm_min_epu8(min_low, _mm_srli_si128(min_low, 4)); From 22843d3c128c11105e2caebaa1710aceea882a37 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Wed, 27 Nov 2024 17:44:20 +0800 Subject: [PATCH 09/27] fix Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 21b1f4aeb2..78efa06926 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -186,7 +186,7 @@ impl SIMD for u8x16 { fn reduce_min(&self) -> u8 { #[cfg(target_arch = "x86_64")] unsafe { - let low = _mm_and_si128(self.0, _mm_set1_epi8(0xFF as i8)); + let low = _mm_and_si128(self.0, _mm_set1_epi8(0xFF_i8)); let high = _mm_srli_si128(self.0, 8); let min_low = _mm_min_epu8(low, high); let min_low = _mm_min_epu8(min_low, _mm_srli_si128(min_low, 4)); From 9449c9d6765fd3ad5821b35bea877216dfcc4e66 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Wed, 27 Nov 2024 17:49:39 +0800 Subject: [PATCH 10/27] fix Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 78efa06926..152c0fc190 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -186,7 +186,7 @@ impl SIMD for u8x16 { fn reduce_min(&self) -> u8 { #[cfg(target_arch = "x86_64")] unsafe { - let low = _mm_and_si128(self.0, _mm_set1_epi8(0xFF_i8)); + let low = _mm_and_si128(self.0, _mm_set1_epi8(0xFF_u8 as i8)); let high = _mm_srli_si128(self.0, 8); let min_low = _mm_min_epu8(low, high); let min_low = _mm_min_epu8(min_low, _mm_srli_si128(min_low, 4)); From 23ffa799cadfa63e267f9e79a833b3de6e1e3040 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Wed, 27 Nov 2024 19:43:50 +0800 Subject: [PATCH 11/27] fix x86 shift Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 152c0fc190..94864e95ab 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -50,7 +50,9 @@ impl u8x16 { pub fn right_shift_4(self) -> Self { #[cfg(target_arch = "x86_64")] unsafe { - Self(_mm_srli_epi16(self.0, 4)) + let shifted = _mm_srli_epi16(self.0, 4); + let mask = _mm_set1_epi8(0x0F); + Self(_mm_and_si128(shifted, mask)) } #[cfg(target_arch = "aarch64")] unsafe { From 31f53ed698fc8e4cffbb35a8b3bea9d92c2320c7 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Wed, 27 Nov 2024 20:44:57 +0800 Subject: [PATCH 12/27] fix x86 u8x16 multiply Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 94864e95ab..38241b32d5 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -282,7 +282,15 @@ impl Mul for u8x16 { fn mul(self, rhs: Self) -> Self::Output { #[cfg(target_arch = "x86_64")] unsafe { - Self(_mm_mullo_epi16(self.0, rhs.0)) + let a_lo = _mm_unpacklo_epi8(self.0, _mm_setzero_si128()); + let a_hi = _mm_unpackhi_epi8(self.0, _mm_setzero_si128()); + let b_lo = _mm_unpacklo_epi8(rhs.0, _mm_setzero_si128()); + let b_hi = _mm_unpackhi_epi8(rhs.0, _mm_setzero_si128()); + + let res_lo = _mm_mullo_epi16(a_lo, b_lo); + let res_hi = _mm_mullo_epi16(a_hi, b_hi); + + Self(_mm_packus_epi16(res_lo, res_hi)) } #[cfg(target_arch = "aarch64")] unsafe { From 8040a46fd4872a502d3eab33542e3a5597c1e724 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 28 Nov 2024 13:08:09 +0800 Subject: [PATCH 13/27] fix ut Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 38241b32d5..63f68df005 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -377,13 +377,18 @@ mod tests { let simd_b = unsafe { u8x16::load_unaligned(b.as_ptr()) }; let simd_add = simd_a + simd_b; - assert!((0..16) + (0..16) .zip(simd_add.as_array().iter()) - .all(|(x, &y)| (x + x + 16) as u8 == y)); + .for_each(|(x, &y)| assert_eq!((x + x + 16) as u8, y)); + // on x86_64, the result of simd_mul is saturated + // on aarch64, the result of simd_mul is not saturated let simd_mul = simd_a * simd_b; - assert!((0..16) - .zip(simd_mul.as_array().iter()) - .all(|(x, &y)| (x * (x + 16)) as u8 == y)); + (0..16).zip(simd_mul.as_array().iter()).for_each(|(x, &y)| { + #[cfg(target_arch = "x86_64")] + assert_eq!(std::cmp::min(x * (x + 16), 255) as u8, y); + #[cfg(target_arch = "aarch64")] + assert_eq!(x * (x + 16) as u8, y); + }); } } From e5f82791543c07c84e410138d33cdb2a549b77db Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 28 Nov 2024 13:40:22 +0800 Subject: [PATCH 14/27] fix ut Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 63f68df005..1c984ae199 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -346,28 +346,6 @@ mod tests { use super::*; - #[test] - fn test_basic_ops() { - let a = (0..16).map(|f| f as u8).collect::>(); - let b = (16..32).map(|f| f as u8).collect::>(); - - let simd_a = unsafe { u8x16::load_unaligned(a.as_ptr()) }; - let simd_b = unsafe { u8x16::load_unaligned(b.as_ptr()) }; - - let simd_add = simd_a + simd_b; - (0..16) - .zip(simd_add.as_array().iter()) - .for_each(|(x, &y)| assert_eq!((x + x + 16) as u8, y)); - - let simd_mul = simd_a * simd_b; - (0..16) - .zip(simd_mul.as_array().iter()) - .for_each(|(x, &y)| assert_eq!((x * (x + 16)) as u8, y)); - - let simd_sub = simd_b - simd_a; - simd_sub.as_array().iter().for_each(|&v| assert_eq!(v, 16)); - } - #[test] fn test_basic_u8x16_ops() { let a = (0..16).map(|f| f as u8).collect::>(); From f5371d897956b9b149125ddb2752284d572202dd Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 28 Nov 2024 14:03:01 +0800 Subject: [PATCH 15/27] normalize vectors for tests Signed-off-by: BubbleCal --- rust/lance/src/index/vector/ivf/v2.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 9cbc4b9d9b..414430125d 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -532,6 +532,7 @@ mod tests { use lance_index::vector::DIST_COL; use lance_index::{DatasetIndexExt, IndexType}; use lance_linalg::distance::DistanceType; + use lance_linalg::kernels::normalize_arrow; use lance_testing::datagen::generate_random_array_with_range; use rstest::rstest; use tempfile::tempdir; @@ -545,6 +546,7 @@ mod tests { range: Range, ) -> (Dataset, Arc) { let vectors = generate_random_array_with_range::(1000 * DIM, range); + let vectors = normalize_arrow(&vectors).unwrap(); let metadata: HashMap = vec![("test".to_string(), "ivf_pq".to_string())] .into_iter() .collect(); From b8a201699fd64a24d2bf5aa605635de27f5f7671 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 28 Nov 2024 14:27:48 +0800 Subject: [PATCH 16/27] fix ut Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 1c984ae199..13d6d30ec8 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -364,9 +364,9 @@ mod tests { let simd_mul = simd_a * simd_b; (0..16).zip(simd_mul.as_array().iter()).for_each(|(x, &y)| { #[cfg(target_arch = "x86_64")] - assert_eq!(std::cmp::min(x * (x + 16), 255) as u8, y); + assert_eq!(std::cmp::min(x * (x + 16), 255_i32) as u8, y); #[cfg(target_arch = "aarch64")] - assert_eq!(x * (x + 16) as u8, y); + assert_eq!((x * (x + 16_i32)) as u8, y); }); } } From 4406c084c4f9b178b6a6e85b74c05a34940a5bbf Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 28 Nov 2024 14:58:33 +0800 Subject: [PATCH 17/27] lower recall requirement Signed-off-by: BubbleCal --- rust/lance/src/index/vector/ivf/v2.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 414430125d..9f846cf3e9 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -694,8 +694,8 @@ mod tests { } #[rstest] - #[case(4, DistanceType::L2, 0.8)] - #[case(4, DistanceType::Cosine, 0.8)] + #[case(4, DistanceType::L2, 0.75)] + #[case(4, DistanceType::Cosine, 0.75)] #[case(4, DistanceType::Dot, 0.4)] #[tokio::test] async fn test_build_ivf_pq_4bit( From e22c21491ed222c93ab910587d67ab16b728bfb0 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Fri, 29 Nov 2024 19:45:10 +0800 Subject: [PATCH 18/27] fix recall Signed-off-by: BubbleCal --- rust/lance-index/src/vector/pq/distance.rs | 64 +++++++++++++--------- rust/lance/src/index/vector/ivf/v2.rs | 1 + 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/rust/lance-index/src/vector/pq/distance.rs b/rust/lance-index/src/vector/pq/distance.rs index 44fa171cbc..579dbefff1 100644 --- a/rust/lance-index/src/vector/pq/distance.rs +++ b/rust/lance-index/src/vector/pq/distance.rs @@ -6,9 +6,8 @@ use std::cmp::min; use lance_linalg::distance::{dot_distance_batch, l2_distance_batch, Dot, L2}; use lance_linalg::simd::u8::u8x16; -use lance_linalg::simd::{Shuffle, SIMD}; +use lance_linalg::simd::Shuffle; use lance_table::utils::LanceIteratorExtension; -use num_traits::ToPrimitive; use super::{num_centroids, utils::get_sub_vector_centroids}; @@ -137,17 +136,17 @@ pub(super) fn compute_pq_distance_4bit( num_sub_vectors: usize, code: &[u8], ) -> Vec { - let (_, _, distance_table) = quantize_distance_table(distance_table); + let (qmin, qmax, distance_table) = quantize_distance_table(distance_table); let num_vectors = code.len() * 2 / num_sub_vectors; // store the distances in u32 to avoid overflow - let mut distances = vec![0u8; num_vectors]; + let mut distances = vec![0.0f32; num_vectors]; const NUM_CENTROIDS: usize = 2_usize.pow(4); for (sub_vec_idx, vec_indices) in code.chunks_exact(num_vectors).enumerate() { debug_assert_eq!(vec_indices.len(), distances.len()); - let dist_table = unsafe { + let origin_dist_table = unsafe { u8x16::load_unaligned(distance_table.as_ptr().add(sub_vec_idx * 2 * NUM_CENTROIDS)) }; - let next_dist_table = unsafe { + let origin_next_dist_table = unsafe { u8x16::load_unaligned( distance_table .as_ptr() @@ -157,19 +156,25 @@ pub(super) fn compute_pq_distance_4bit( for i in (0..num_vectors - NUM_CENTROIDS + 1).step_by(NUM_CENTROIDS) { let vec_indices = unsafe { u8x16::load_unaligned(vec_indices.as_ptr().add(i)) }; let distances = &mut distances[i..i + NUM_CENTROIDS]; - let results = unsafe { u8x16::load_unaligned(distances.as_ptr()) }; // compute current distances let current_indices = vec_indices.bit_and(0x0F); - let results = results + dist_table.shuffle(current_indices); + let dist_table = origin_dist_table; + let results = dist_table.shuffle(current_indices); + debug_assert_eq!(dist_table.as_array(), origin_dist_table.as_array()); // compute next distances let next_indices = vec_indices.right_shift_4(); + let next_dist_table = origin_next_dist_table; let results = results + next_dist_table.shuffle(next_indices); - unsafe { - results.store_unaligned(distances.as_mut_ptr()); - } + results + .as_array() + .into_iter() + .zip(distances.iter_mut()) + .for_each(|(d, sum)| { + *sum += d as f32; + }); } let remainder = num_vectors % NUM_CENTROIDS; if remainder > 0 { @@ -180,39 +185,48 @@ pub(super) fn compute_pq_distance_4bit( for (i, ¢roid_idx) in vec_indices.iter().enumerate() { let current_idx = centroid_idx & 0xF; let next_idx = centroid_idx >> 4; - distances[i] += dist_table[current_idx as usize]; - distances[i] += next_dist_table[next_idx as usize]; + distances[i] += dist_table[current_idx as usize] as f32; + distances[i] += next_dist_table[next_idx as usize] as f32; } } } - // cast u32 to f32 - // no need to convert because it's still comparable - distances.iter().map(|&dist| dist as f32).collect() + // need to dequantize the distances + // to make the distances comparable to the others from the other partitions + distances.iter_mut().for_each(|d| { + *d = *d * (qmax - qmin) / 255.0 + qmin; + }); + distances } // Quantize the distance table to u8 -// returns (min, max, quantized_distance_table) +// returns quantized_distance_table // used for only 4bit PQ so num_centroids must be 16 #[inline] fn quantize_distance_table(distance_table: &[f32]) -> (f32, f32, Vec) { const NUM_CENTROIDS: usize = 16; // we set qmax to the maximum possible distance, // then no need to handle overflow + let qmin = distance_table.iter().cloned().fold(f32::INFINITY, f32::min); + // let qmax = distance_table + // .chunks(NUM_CENTROIDS) + // .tuple_windows() + // .map(|(a, b)| { + // let a_max = a.iter().cloned().fold(f32::NEG_INFINITY, f32::max); + // let b_max = b.iter().cloned().fold(f32::NEG_INFINITY, f32::max); + // a_max + b_max + // }) + // .fold(f32::NEG_INFINITY, f32::max); let qmax = distance_table - .chunks(NUM_CENTROIDS) - .map(|chunk| chunk.iter().cloned().fold(f32::NEG_INFINITY, f32::max)) + .chunks_exact(NUM_CENTROIDS) + .map(|c| c.iter().cloned().fold(f32::NEG_INFINITY, f32::max)) .sum::(); let quantized_dist_table = distance_table .iter() - .map(|&dist| { - ((dist - qmin) * 255.0 / (qmax - qmin)) - .round() - .to_u8() - .unwrap_or(u8::MAX) - }) + .map(|&d| ((d - qmin) * 255.0 / (qmax - qmin)).round() as u8) .collect(); + (qmin, qmax, quantized_dist_table) } diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 9f846cf3e9..800e761fe4 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -636,6 +636,7 @@ mod tests { let gt_set = gt.iter().map(|r| r.1).collect::>(); let recall = row_ids.intersection(>_set).count() as f32 / k as f32; + println!("recall: {}", recall); assert!( recall >= recall_requirement, "recall: {}\n results: {:?}\n\ngt: {:?}", From 6f7cac9fb0cfff807f83812a7e4c78b778ffe680 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Fri, 29 Nov 2024 19:55:59 +0800 Subject: [PATCH 19/27] fix iport Signed-off-by: BubbleCal --- rust/lance-index/src/vector/pq/distance.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance-index/src/vector/pq/distance.rs b/rust/lance-index/src/vector/pq/distance.rs index 579dbefff1..07c79e2047 100644 --- a/rust/lance-index/src/vector/pq/distance.rs +++ b/rust/lance-index/src/vector/pq/distance.rs @@ -6,7 +6,7 @@ use std::cmp::min; use lance_linalg::distance::{dot_distance_batch, l2_distance_batch, Dot, L2}; use lance_linalg::simd::u8::u8x16; -use lance_linalg::simd::Shuffle; +use lance_linalg::simd::{Shuffle, SIMD}; use lance_table::utils::LanceIteratorExtension; use super::{num_centroids, utils::get_sub_vector_centroids}; From 48f5989fc803da63d2cde1264de400ff8c86675b Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Fri, 29 Nov 2024 20:04:36 +0800 Subject: [PATCH 20/27] fix clippy Signed-off-by: BubbleCal --- rust/lance/src/index/vector/ivf/v2.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 800e761fe4..9f846cf3e9 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -636,7 +636,6 @@ mod tests { let gt_set = gt.iter().map(|r| r.1).collect::>(); let recall = row_ids.intersection(>_set).count() as f32 / k as f32; - println!("recall: {}", recall); assert!( recall >= recall_requirement, "recall: {}\n results: {:?}\n\ngt: {:?}", From 3548651dcc4cedf747eea9d4490dd12549dbc986 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Sun, 1 Dec 2024 12:38:29 +0800 Subject: [PATCH 21/27] fix Signed-off-by: BubbleCal --- rust/lance-index/src/vector/pq/distance.rs | 33 ++++++++----------- rust/lance-linalg/src/simd/u8.rs | 38 +--------------------- rust/lance/src/index/vector/ivf/v2.rs | 6 ++-- 3 files changed, 18 insertions(+), 59 deletions(-) diff --git a/rust/lance-index/src/vector/pq/distance.rs b/rust/lance-index/src/vector/pq/distance.rs index 07c79e2047..06cedff9dc 100644 --- a/rust/lance-index/src/vector/pq/distance.rs +++ b/rust/lance-index/src/vector/pq/distance.rs @@ -4,6 +4,7 @@ use core::panic; use std::cmp::min; +use itertools::Itertools; use lance_linalg::distance::{dot_distance_batch, l2_distance_batch, Dot, L2}; use lance_linalg::simd::u8::u8x16; use lance_linalg::simd::{Shuffle, SIMD}; @@ -138,7 +139,7 @@ pub(super) fn compute_pq_distance_4bit( ) -> Vec { let (qmin, qmax, distance_table) = quantize_distance_table(distance_table); let num_vectors = code.len() * 2 / num_sub_vectors; - // store the distances in u32 to avoid overflow + // store the distances in f32 to avoid overflow let mut distances = vec![0.0f32; num_vectors]; const NUM_CENTROIDS: usize = 2_usize.pow(4); for (sub_vec_idx, vec_indices) in code.chunks_exact(num_vectors).enumerate() { @@ -199,32 +200,26 @@ pub(super) fn compute_pq_distance_4bit( distances } -// Quantize the distance table to u8 -// returns quantized_distance_table +// Quantize the distance table to u8, +// map distance `d` to `(d-qmin) * 255 / (qmax-qmin)`m // used for only 4bit PQ so num_centroids must be 16 +// returns (qmin, qmax, quantized_distance_table) #[inline] fn quantize_distance_table(distance_table: &[f32]) -> (f32, f32, Vec) { const NUM_CENTROIDS: usize = 16; - // we set qmax to the maximum possible distance, - // then no need to handle overflow - let qmin = distance_table.iter().cloned().fold(f32::INFINITY, f32::min); - // let qmax = distance_table - // .chunks(NUM_CENTROIDS) - // .tuple_windows() - // .map(|(a, b)| { - // let a_max = a.iter().cloned().fold(f32::NEG_INFINITY, f32::max); - // let b_max = b.iter().cloned().fold(f32::NEG_INFINITY, f32::max); - // a_max + b_max - // }) - // .fold(f32::NEG_INFINITY, f32::max); let qmax = distance_table - .chunks_exact(NUM_CENTROIDS) - .map(|c| c.iter().cloned().fold(f32::NEG_INFINITY, f32::max)) - .sum::(); + .chunks(NUM_CENTROIDS) + .tuple_windows() + .map(|(a, b)| { + let a_max = a.iter().cloned().fold(f32::NEG_INFINITY, f32::max); + let b_max = b.iter().cloned().fold(f32::NEG_INFINITY, f32::max); + a_max + b_max + }) + .fold(f32::NEG_INFINITY, f32::max); let quantized_dist_table = distance_table .iter() - .map(|&d| ((d - qmin) * 255.0 / (qmax - qmin)).round() as u8) + .map(|&d| ((d - qmin) * 255.0 / (qmax - qmin)).ceil() as u8) .collect(); (qmin, qmax, quantized_dist_table) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 13d6d30ec8..5d0d92aa1e 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -7,12 +7,8 @@ use std::fmt::Formatter; #[cfg(target_arch = "aarch64")] use std::arch::aarch64::*; -#[cfg(target_arch = "loongarch64")] -use std::arch::loongarch64::*; #[cfg(target_arch = "x86_64")] use std::arch::x86_64::*; -#[cfg(target_arch = "loongarch64")] -use std::mem::transmute; use std::ops::{Add, AddAssign, Mul, Sub, SubAssign}; use super::{Shuffle, SIMD}; @@ -23,7 +19,7 @@ use super::{Shuffle, SIMD}; #[derive(Clone, Copy)] pub struct u8x16(pub __m128i); -/// 16 of 32-bit `f32` values. Use 512-bit SIMD if possible. +/// 16 of 8-bit `u8` values. #[allow(non_camel_case_types)] #[cfg(target_arch = "aarch64")] #[derive(Clone, Copy)] @@ -40,10 +36,6 @@ impl u8x16 { unsafe { Self(vandq_u8(self.0, vdupq_n_u8(mask))) } - #[cfg(target_arch = "loongarch64")] - unsafe { - Self(lasx_xvfand_b(self.0, mask)) - } } #[inline] @@ -58,10 +50,6 @@ impl u8x16 { unsafe { Self(vshrq_n_u8::<4>(self.0)) } - #[cfg(target_arch = "loongarch64")] - unsafe { - Self(lasx_xvfrsh_b(self.0, 4)) - } } } @@ -230,10 +218,6 @@ impl Shuffle for u8x16 { unsafe { Self(vqtbl1q_u8(self.0, indices.0)) } - #[cfg(target_arch = "loongarch64")] - unsafe { - Self(lasx_xvqtbl_b(self.0, indices.0)) - } } } @@ -250,10 +234,6 @@ impl Add for u8x16 { unsafe { Self(vqaddq_u8(self.0, rhs.0)) } - #[cfg(target_arch = "loongarch64")] - unsafe { - Self(lasx_xvfadd_b(self.0, rhs.0)) - } } } @@ -268,10 +248,6 @@ impl AddAssign for u8x16 { unsafe { self.0 = vaddq_u8(self.0, rhs.0) } - #[cfg(target_arch = "loongarch64")] - unsafe { - self.0 = lasx_xvfadd_b(self.0, rhs.0) - } } } @@ -296,10 +272,6 @@ impl Mul for u8x16 { unsafe { Self(vmulq_u8(self.0, rhs.0)) } - #[cfg(target_arch = "loongarch64")] - unsafe { - Self(lasx_xvfmul_b(self.0, rhs.0)) - } } } @@ -316,10 +288,6 @@ impl Sub for u8x16 { unsafe { Self(vsubq_u8(self.0, rhs.0)) } - #[cfg(target_arch = "loongarch64")] - unsafe { - Self(lasx_xvfsub_b(self.0, rhs.0)) - } } } @@ -334,10 +302,6 @@ impl SubAssign for u8x16 { unsafe { self.0 = vsubq_u8(self.0, rhs.0) } - #[cfg(target_arch = "loongarch64")] - unsafe { - self.0 = lasx_xvfsub_b(self.0, rhs.0) - } } } diff --git a/rust/lance/src/index/vector/ivf/v2.rs b/rust/lance/src/index/vector/ivf/v2.rs index 9f846cf3e9..f518d41bfc 100644 --- a/rust/lance/src/index/vector/ivf/v2.rs +++ b/rust/lance/src/index/vector/ivf/v2.rs @@ -694,9 +694,9 @@ mod tests { } #[rstest] - #[case(4, DistanceType::L2, 0.75)] - #[case(4, DistanceType::Cosine, 0.75)] - #[case(4, DistanceType::Dot, 0.4)] + #[case(4, DistanceType::L2, 0.9)] + #[case(4, DistanceType::Cosine, 0.9)] + #[case(4, DistanceType::Dot, 0.8)] #[tokio::test] async fn test_build_ivf_pq_4bit( #[case] nlist: usize, From aad20da36e9375e9a0142ac0b7c450f99987e9c0 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Mon, 2 Dec 2024 12:35:38 +0800 Subject: [PATCH 22/27] fallback non-simd impl Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 133 ++++++++++++++++++++++++++----- 1 file changed, 113 insertions(+), 20 deletions(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 5d0d92aa1e..0c196441cd 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -25,6 +25,10 @@ pub struct u8x16(pub __m128i); #[derive(Clone, Copy)] pub struct u8x16(pub uint8x16_t); +#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] +#[derive(Clone, Copy)] +pub struct u8x16(pub [u8; 16]); + impl u8x16 { #[inline] pub fn bit_and(self, mask: u8) -> Self { @@ -36,6 +40,12 @@ impl u8x16 { unsafe { Self(vandq_u8(self.0, vdupq_n_u8(mask))) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + for i in 0..16 { + self.0[i] &= mask; + } + } } #[inline] @@ -50,6 +60,14 @@ impl u8x16 { unsafe { Self(vshrq_n_u8::<4>(self.0)) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + let mut result = [0u8; 16]; + for i in 0..16 { + result[i] = self.0[i] >> 4; + } + Self(result) + } } } @@ -87,6 +105,14 @@ impl SIMD for u8x16 { unsafe { Self(vdupq_n_u8(val)) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + let mut result = [0u8; 16]; + for i in 0..16 { + result[i] = val; + } + Self(result) + } } #[inline] @@ -99,6 +125,10 @@ impl SIMD for u8x16 { { Self::splat(0) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + Self([0; 16]) + } } #[inline] @@ -111,6 +141,10 @@ impl SIMD for u8x16 { { Self::load_unaligned(ptr) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + Self::load_unaligned(ptr) + } } #[inline] @@ -123,6 +157,14 @@ impl SIMD for u8x16 { { Self(vld1q_u8(ptr)) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + let mut result = [0u8; 16]; + for i in 0..16 { + result[i] = *ptr.add(i); + } + Self(result) + } } #[inline] @@ -135,6 +177,10 @@ impl SIMD for u8x16 { unsafe { vst1q_u8(ptr, self.0) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + self.store_unaligned(ptr); + } } #[inline] @@ -148,28 +194,16 @@ impl SIMD for u8x16 { unsafe { vst1q_u8(ptr, self.0) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + for i in 0..16 { + *ptr.add(i) = self.0[i]; + } + } } fn reduce_sum(&self) -> u8 { - todo!("the signature of reduce_sum is not correct"); - // #[cfg(target_arch = "x86_64")] - // unsafe { - // let zeros = _mm_setzero_si128(); - // let sum = _mm_sad_epu8(self.0, zeros); - - // let lower = _mm_cvtsi128_si64(sum) as u32; - // let upper = _mm_extract_epi64(sum, 1) as u32; - // lower + upper - // } - // #[cfg(target_arch = "aarch64")] - // unsafe { - // let low = vget_low_u8(self.0); - // let high = vget_high_u8(self.0); - // let sum = vaddl_u8(low, high); - // let sum16 = vaddw_u16(vdupq_n_u32(0), sum); - // let total = vpadd_u32(vget_low_u32(sum16), vget_high_u32(sum16)); - // vget_lane_u32(total, 0) + vget_lane_u32(total, 1) - // } + todo!("it is not implemented yet"); } #[inline] @@ -184,11 +218,18 @@ impl SIMD for u8x16 { let min_low = _mm_min_epu8(min_low, _mm_srli_si128(min_low, 1)); _mm_extract_epi8(min_low, 0) as u8 } - #[cfg(target_arch = "aarch64")] unsafe { vminvq_u8(self.0) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + let mut min = self.0[0]; + for i in 1..16 { + min = std::cmp::min(min, self.0[i]); + } + min + } } #[inline] @@ -201,6 +242,14 @@ impl SIMD for u8x16 { unsafe { Self(vminq_u8(self.0, rhs.0)) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + let mut result = [0u8; 16]; + for i in 0..16 { + result[i] = std::cmp::min(self.0[i], rhs.0[i]); + } + Self(result) + } } fn find(&self, _val: u8) -> Option { @@ -218,6 +267,14 @@ impl Shuffle for u8x16 { unsafe { Self(vqtbl1q_u8(self.0, indices.0)) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + let mut result = [0u8; 16]; + for i in 0..16 { + result[i] = self.0[indices.0[i] as usize]; + } + Self(result) + } } } @@ -234,6 +291,14 @@ impl Add for u8x16 { unsafe { Self(vqaddq_u8(self.0, rhs.0)) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + let mut result = [0u8; 16]; + for i in 0..16 { + result[i] = self.0[i].saturating_add(rhs.0[i]); + } + Self(result) + } } } @@ -248,6 +313,12 @@ impl AddAssign for u8x16 { unsafe { self.0 = vaddq_u8(self.0, rhs.0) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + for i in 0..16 { + self.0[i] = self.0[i].saturating_add(rhs.0[i]); + } + } } } @@ -272,6 +343,14 @@ impl Mul for u8x16 { unsafe { Self(vmulq_u8(self.0, rhs.0)) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + let mut result = [0u8; 16]; + for i in 0..16 { + result[i] = self.0[i].wrapping_mul(rhs.0[i]); + } + Self(result) + } } } @@ -288,6 +367,14 @@ impl Sub for u8x16 { unsafe { Self(vsubq_u8(self.0, rhs.0)) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + let mut result = [0u8; 16]; + for i in 0..16 { + result[i] = self.0[i].wrapping_sub(rhs.0[i]); + } + Self(result) + } } } @@ -302,6 +389,12 @@ impl SubAssign for u8x16 { unsafe { self.0 = vsubq_u8(self.0, rhs.0) } + #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] + { + for i in 0..16 { + self.0[i] = self.0[i].wrapping_sub(rhs.0[i]); + } + } } } From c6508933a5ff4712cfcb110b62572a6a59bf8c65 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 5 Dec 2024 13:05:26 +0800 Subject: [PATCH 23/27] impl right shift Signed-off-by: BubbleCal --- rust/lance-index/src/vector/pq/distance.rs | 2 +- rust/lance-linalg/src/simd/u8.rs | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/rust/lance-index/src/vector/pq/distance.rs b/rust/lance-index/src/vector/pq/distance.rs index 06cedff9dc..b915db8b61 100644 --- a/rust/lance-index/src/vector/pq/distance.rs +++ b/rust/lance-index/src/vector/pq/distance.rs @@ -165,7 +165,7 @@ pub(super) fn compute_pq_distance_4bit( debug_assert_eq!(dist_table.as_array(), origin_dist_table.as_array()); // compute next distances - let next_indices = vec_indices.right_shift_4(); + let next_indices = vec_indices.right_shift(); let next_dist_table = origin_next_dist_table; let results = results + next_dist_table.shuffle(next_indices); diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 0c196441cd..039f0b9f86 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -49,16 +49,26 @@ impl u8x16 { } #[inline] - pub fn right_shift_4(self) -> Self { + pub fn right_shift(self, nbits: i32) -> Self { #[cfg(target_arch = "x86_64")] unsafe { - let shifted = _mm_srli_epi16(self.0, 4); - let mask = _mm_set1_epi8(0x0F); + let shifted = _mm_srli_epi16(self.0, nbits); + let mask = _mm_set1_epi8(1_i8 << (8 - nbits) - 1); Self(_mm_and_si128(shifted, mask)) } #[cfg(target_arch = "aarch64")] unsafe { - Self(vshrq_n_u8::<4>(self.0)) + match nbits { + 1 => Self(vshrq_n_u8::<1>(self.0)), + 2 => Self(vshrq_n_u8::<2>(self.0)), + 3 => Self(vshrq_n_u8::<3>(self.0)), + 4 => Self(vshrq_n_u8::<4>(self.0)), + 5 => Self(vshrq_n_u8::<5>(self.0)), + 6 => Self(vshrq_n_u8::<6>(self.0)), + 7 => Self(vshrq_n_u8::<7>(self.0)), + 8 => Self(vshrq_n_u8::<8>(self.0)), + _ => unreachable!(), + } } #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] { From 7d927cdeb4a0d9a3d1383fc43177a57e17ad51f7 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 5 Dec 2024 13:14:24 +0800 Subject: [PATCH 24/27] fix Signed-off-by: BubbleCal --- python/Cargo.lock | 340 +++++++++++---------- rust/lance-index/src/vector/pq/distance.rs | 2 +- 2 files changed, 176 insertions(+), 166 deletions(-) diff --git a/python/Cargo.lock b/python/Cargo.lock index 4bbf63f81b..f2f61767d3 100644 --- a/python/Cargo.lock +++ b/python/Cargo.lock @@ -57,9 +57,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.20" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" [[package]] name = "android-tzdata" @@ -78,9 +78,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.93" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" +checksum = "c1fd03a028ef38ba2276dce7e33fcd6369c158a1bca17946c4b1b701891c1ff7" [[package]] name = "arc-swap" @@ -347,9 +347,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.17" +version = "0.4.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" +checksum = "df895a515f70646414f4b45c0b79082783b80552b373a68283012928df56f522" dependencies = [ "bzip2", "flate2", @@ -438,7 +438,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -481,7 +481,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -524,7 +524,7 @@ dependencies = [ "aws-sdk-sts", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.60.7", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -555,9 +555,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.4.3" +version = "1.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a10d5c055aa540164d9561a0e2e74ad30f0dcf7393c3a92f6733ddf9c5762468" +checksum = "b5ac934720fbb46206292d2c75b57e67acfc56fe7dfd34fb9a02334af08409ea" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -580,15 +580,15 @@ dependencies = [ [[package]] name = "aws-sdk-dynamodb" -version = "1.54.0" +version = "1.55.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8efdda6a491bb4640d35b99b0a4b93f75ce7d6e3a1937c3e902d3cb23d0a179c" +checksum = "a18e18b3cf6b75c1fcb15e677f6dbd2a6d8dfe4d168e0a36721f7a6167c6c829" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -603,15 +603,15 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.49.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09677244a9da92172c8dc60109b4a9658597d4d298b188dd0018b6a66b410ca4" +checksum = "05ca43a4ef210894f93096039ef1d6fa4ad3edfabb3be92b80908b9f2e4b4eab" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -625,15 +625,15 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.50.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fea2f3a8bb3bd10932ae7ad59cc59f65f270fc9183a7e91f501dc5efbef7ee" +checksum = "abaf490c2e48eed0bb8e2da2fb08405647bd7f253996e0f93b981958ea0f73b0" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-runtime", "aws-smithy-runtime-api", "aws-smithy-types", @@ -647,15 +647,15 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.50.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ada54e5f26ac246dc79727def52f7f8ed38915cb47781e2a72213957dc3a7d5" +checksum = "b68fde0d69c8bfdc1060ea7da21df3e39f6014da316783336deff0a9ec28f4bf" dependencies = [ "aws-credential-types", "aws-runtime", "aws-smithy-async", "aws-smithy-http", - "aws-smithy-json", + "aws-smithy-json 0.61.1", "aws-smithy-query", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -670,9 +670,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.2.5" +version = "1.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5619742a0d8f253be760bfbb8e8e8368c69e3587e4637af5754e488a611499b1" +checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -683,7 +683,7 @@ dependencies = [ "hex", "hmac", "http 0.2.12", - "http 1.1.0", + "http 1.2.0", "once_cell", "percent-encoding", "sha2", @@ -731,6 +731,15 @@ dependencies = [ "aws-smithy-types", ] +[[package]] +name = "aws-smithy-json" +version = "0.61.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee4e69cc50921eb913c6b662f8d909131bb3e6ad6cb6090d3a39b66fc5c52095" +dependencies = [ + "aws-smithy-types", +] + [[package]] name = "aws-smithy-query" version = "0.60.7" @@ -743,9 +752,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.7.3" +version = "1.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be28bd063fa91fd871d131fc8b68d7cd4c5fa0869bea68daca50dcb1cbd76be2" +checksum = "9f20685047ca9d6f17b994a07f629c813f08b5bce65523e47124879e60103d45" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -778,7 +787,7 @@ dependencies = [ "aws-smithy-types", "bytes", "http 0.2.12", - "http 1.1.0", + "http 1.2.0", "pin-project-lite", "tokio", "tracing", @@ -796,7 +805,7 @@ dependencies = [ "bytes-utils", "futures-core", "http 0.2.12", - "http 1.1.0", + "http 1.2.0", "http-body 0.4.6", "http-body 1.0.1", "http-body-util", @@ -915,9 +924,9 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.4" +version = "1.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82033247fd8e890df8f740e407ad4d038debb9eb1f40533fffb32e7d17dc6f7" +checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e" dependencies = [ "arrayref", "arrayvec", @@ -989,9 +998,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.8.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" +checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" [[package]] name = "bytes-utils" @@ -1026,9 +1035,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" +checksum = "f34d93e62b03caf570cccc334cbc6c2fceca82f39211051345108adcba3eebdc" dependencies = [ "jobserver", "libc", @@ -1725,7 +1734,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -1773,12 +1782,12 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1811,9 +1820,9 @@ dependencies = [ [[package]] name = "event-listener-strategy" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f214dc438f977e6d4e3500aaa277f5ad94ca83fbbd9b1a15713ce2344ccc5a1" +checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2" dependencies = [ "event-listener 5.3.1", "pin-project-lite", @@ -1982,7 +1991,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -2092,7 +2101,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http 1.1.0", + "http 1.2.0", "indexmap", "slab", "tokio", @@ -2123,9 +2132,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.1" +version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" +checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" dependencies = [ "allocator-api2", "equivalent", @@ -2210,9 +2219,9 @@ dependencies = [ [[package]] name = "http" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ "bytes", "fnv", @@ -2237,7 +2246,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.1.0", + "http 1.2.0", ] [[package]] @@ -2248,7 +2257,7 @@ checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ "bytes", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "pin-project-lite", ] @@ -2305,7 +2314,7 @@ dependencies = [ "futures-channel", "futures-util", "h2 0.4.7", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "httparse", "itoa", @@ -2338,10 +2347,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" dependencies = [ "futures-util", - "http 1.1.0", + "http 1.2.0", "hyper 1.5.1", "hyper-util", - "rustls 0.23.17", + "rustls 0.23.19", "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", @@ -2358,7 +2367,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "hyper 1.5.1", "pin-project-lite", @@ -2515,7 +2524,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -2541,12 +2550,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +checksum = "62f822373a4fe84d4bb149bf54e584a7f4abec90e072ed49cda0edea5b95471f" dependencies = [ "equivalent", - "hashbrown 0.15.1", + "hashbrown 0.15.2", ] [[package]] @@ -2634,9 +2643,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "540654e97a3f4470a492cd30ff187bc95d89557a903a2bbf112e2fae98104ef2" +checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" [[package]] name = "jobserver" @@ -2649,10 +2658,11 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.72" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +checksum = "a865e038f7f6ed956f788f0d7d60c541fff74c7bd74272c5d4cf15c63743e705" dependencies = [ + "once_cell", "wasm-bindgen", ] @@ -3120,9 +3130,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.164" +version = "0.2.167" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" +checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" [[package]] name = "libm" @@ -3149,9 +3159,9 @@ checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" [[package]] name = "litemap" -version = "0.7.3" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" [[package]] name = "lock_api" @@ -3178,7 +3188,7 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" dependencies = [ - "hashbrown 0.15.1", + "hashbrown 0.15.2", ] [[package]] @@ -3274,11 +3284,10 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80e04d1dcff3aae0704555fe5fee3bcfaf3d1fdf8a7e521d5b9d2b42acb52cec" +checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" dependencies = [ - "hermit-abi 0.3.9", "libc", "wasi", "windows-sys 0.52.0", @@ -3754,7 +3763,7 @@ checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -3803,9 +3812,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc9c68a3f6da06753e9335d63e27f6b9754dd1920d941135b7ea8224f141adb2" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" [[package]] name = "powerfmt" @@ -3839,7 +3848,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -3910,7 +3919,7 @@ dependencies = [ "prost 0.12.6", "prost-types 0.12.6", "regex", - "syn 2.0.89", + "syn 2.0.90", "tempfile", ] @@ -3937,7 +3946,7 @@ dependencies = [ "itertools 0.12.1", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4048,7 +4057,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4061,7 +4070,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4099,10 +4108,10 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.0.0", - "rustls 0.23.17", + "rustc-hash 2.1.0", + "rustls 0.23.19", "socket2", - "thiserror 2.0.3", + "thiserror 2.0.4", "tokio", "tracing", ] @@ -4117,11 +4126,11 @@ dependencies = [ "getrandom", "rand", "ring", - "rustc-hash 2.0.0", - "rustls 0.23.17", + "rustc-hash 2.1.0", + "rustls 0.23.19", "rustls-pki-types", "slab", - "thiserror 2.0.3", + "thiserror 2.0.4", "tinyvec", "tracing", "web-time", @@ -4316,7 +4325,7 @@ dependencies = [ "futures-core", "futures-util", "h2 0.4.7", - "http 1.1.0", + "http 1.2.0", "http-body 1.0.1", "http-body-util", "hyper 1.5.1", @@ -4330,7 +4339,7 @@ dependencies = [ "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.17", + "rustls 0.23.19", "rustls-native-certs 0.8.1", "rustls-pemfile 2.2.0", "rustls-pki-types", @@ -4367,9 +4376,9 @@ dependencies = [ [[package]] name = "roaring" -version = "0.10.6" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f4b84ba6e838ceb47b41de5194a60244fac43d9fe03b71dbe8c5a201081d6d1" +checksum = "f81dc953b2244ddd5e7860cb0bb2a790494b898ef321d4aff8e260efab60cc88" dependencies = [ "bytemuck", "byteorder", @@ -4399,9 +4408,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" +checksum = "c7fb8039b3032c191086b10f11f319a6e99e1e82889c5cc6046f515c9db1d497" [[package]] name = "rustc_version" @@ -4439,9 +4448,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.17" +version = "0.23.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f1a745511c54ba6d4465e8d5dfbd81b45791756de28d4981af70d6dca128f1e" +checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1" dependencies = [ "log", "once_cell", @@ -4575,7 +4584,7 @@ dependencies = [ "proc-macro2", "quote", "serde_derive_internals", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4662,7 +4671,7 @@ checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4673,7 +4682,7 @@ checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4697,7 +4706,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4829,9 +4838,9 @@ checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" [[package]] name = "socket2" -version = "0.5.7" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce305eb0b4296696835b71df73eb912e0f1ffd2556a501fcede6e0c50349191c" +checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8" dependencies = [ "libc", "windows-sys 0.52.0", @@ -4861,7 +4870,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4907,7 +4916,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -4929,7 +4938,7 @@ dependencies = [ "serde", "serde_json", "serde_yaml", - "syn 2.0.89", + "syn 2.0.90", "typify", "walkdir", ] @@ -4953,9 +4962,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.89" +version = "2.0.90" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" +checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31" dependencies = [ "proc-macro2", "quote", @@ -4979,7 +4988,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -5214,11 +5223,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.3" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa" +checksum = "2f49a1853cf82743e3b7950f77e0f4d622ca36cf4317cba00c767838bac8d490" dependencies = [ - "thiserror-impl 2.0.3", + "thiserror-impl 2.0.4", ] [[package]] @@ -5229,18 +5238,18 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "thiserror-impl" -version = "2.0.3" +version = "2.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568" +checksum = "8381894bb3efe0c4acac3ded651301ceee58a15d47c2e34885ed1908ad667061" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -5266,9 +5275,9 @@ dependencies = [ [[package]] name = "time" -version = "0.3.36" +version = "0.3.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" +checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21" dependencies = [ "deranged", "itoa", @@ -5287,9 +5296,9 @@ checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" [[package]] name = "time-macros" -version = "0.2.18" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f252a68540fde3a3877aeea552b832b40ab9a69e318efd078774a01ddee1ccf" +checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de" dependencies = [ "num-conv", "time-core", @@ -5331,9 +5340,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.41.1" +version = "1.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" +checksum = "5cec9b21b0450273377fc97bd4c33a8acffc8c996c987a7c5b319a0083707551" dependencies = [ "backtrace", "bytes", @@ -5354,7 +5363,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -5373,7 +5382,7 @@ version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4" dependencies = [ - "rustls 0.23.17", + "rustls 0.23.19", "rustls-pki-types", "tokio", ] @@ -5391,9 +5400,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.12" +version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ "bytes", "futures-core", @@ -5410,9 +5419,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.40" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -5421,13 +5430,13 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] @@ -5443,9 +5452,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.32" +version = "0.1.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" dependencies = [ "once_cell", "valuable", @@ -5464,9 +5473,9 @@ dependencies = [ [[package]] name = "tracing-subscriber" -version = "0.3.18" +version = "0.3.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +checksum = "e8189decb5ac0fa7bc8b96b7cb9b2701d60d48805aca84a238004d665fcc4008" dependencies = [ "nu-ansi-term", "sharded-slab", @@ -5529,7 +5538,7 @@ dependencies = [ "semver", "serde", "serde_json", - "syn 2.0.89", + "syn 2.0.90", "thiserror 1.0.69", "unicode-ident", ] @@ -5547,7 +5556,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream", - "syn 2.0.89", + "syn 2.0.90", "typify-impl", ] @@ -5589,15 +5598,15 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "2.10.1" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b74fc6b57825be3373f7054754755f03ac3a8f5d70015ccad699ba2029956f4a" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" dependencies = [ "base64 0.22.1", "flate2", "log", "once_cell", - "rustls 0.23.17", + "rustls 0.23.19", "rustls-pki-types", "url", "webpki-roots", @@ -5605,9 +5614,9 @@ dependencies = [ [[package]] name = "url" -version = "2.5.3" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d157f1b96d14500ffdc1f10ba712e780825526c03d9a49b4d0324b0d9113ada" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -5699,9 +5708,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +checksum = "d15e63b4482863c109d70a7b8706c1e364eb6ea449b201a76c5b89cedcec2d5c" dependencies = [ "cfg-if", "once_cell", @@ -5710,36 +5719,37 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +checksum = "8d36ef12e3aaca16ddd3f67922bc63e48e953f126de60bd33ccc0101ef9998cd" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.45" +version = "0.4.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" +checksum = "9dfaf8f50e5f293737ee323940c7d8b08a66a95a419223d9f41610ca08b0833d" dependencies = [ "cfg-if", "js-sys", + "once_cell", "wasm-bindgen", "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +checksum = "705440e08b42d3e4b36de7d66c944be628d579796b8090bfa3471478a2260051" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -5747,22 +5757,22 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +checksum = "98c9ae5a76e46f4deecd0f0255cc223cfa18dc9b261213b8aa0c7b36f61b3f1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.95" +version = "0.2.97" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" +checksum = "6ee99da9c5ba11bd675621338ef6fa52296b76b83305e9b6e5c77d4c286d6d49" [[package]] name = "wasm-streams" @@ -5779,9 +5789,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.72" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" +checksum = "a98bc3c33f0fe7e59ad7cd041b89034fa82a7c2d4365ca538dda6cdaf513863c" dependencies = [ "js-sys", "wasm-bindgen", @@ -6085,9 +6095,9 @@ dependencies = [ [[package]] name = "yoke" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" dependencies = [ "serde", "stable_deref_trait", @@ -6097,13 +6107,13 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "synstructure", ] @@ -6125,27 +6135,27 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] name = "zerofrom" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", "synstructure", ] @@ -6174,7 +6184,7 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.89", + "syn 2.0.90", ] [[package]] diff --git a/rust/lance-index/src/vector/pq/distance.rs b/rust/lance-index/src/vector/pq/distance.rs index b915db8b61..da89317916 100644 --- a/rust/lance-index/src/vector/pq/distance.rs +++ b/rust/lance-index/src/vector/pq/distance.rs @@ -165,7 +165,7 @@ pub(super) fn compute_pq_distance_4bit( debug_assert_eq!(dist_table.as_array(), origin_dist_table.as_array()); // compute next distances - let next_indices = vec_indices.right_shift(); + let next_indices = vec_indices.right_shift(4); let next_dist_table = origin_next_dist_table; let results = results + next_dist_table.shuffle(next_indices); From 7bfca294260c608f9094318b5d763d39548cbd7a Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 5 Dec 2024 13:48:58 +0800 Subject: [PATCH 25/27] fix Signed-off-by: BubbleCal --- rust/lance-index/src/vector/pq/distance.rs | 2 +- rust/lance-linalg/src/simd/u8.rs | 20 +++++--------------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/rust/lance-index/src/vector/pq/distance.rs b/rust/lance-index/src/vector/pq/distance.rs index da89317916..0094d53a4a 100644 --- a/rust/lance-index/src/vector/pq/distance.rs +++ b/rust/lance-index/src/vector/pq/distance.rs @@ -165,7 +165,7 @@ pub(super) fn compute_pq_distance_4bit( debug_assert_eq!(dist_table.as_array(), origin_dist_table.as_array()); // compute next distances - let next_indices = vec_indices.right_shift(4); + let next_indices = vec_indices.right_shift::<4>(); let next_dist_table = origin_next_dist_table; let results = results + next_dist_table.shuffle(next_indices); diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 039f0b9f86..a03b95a24c 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -49,32 +49,22 @@ impl u8x16 { } #[inline] - pub fn right_shift(self, nbits: i32) -> Self { + pub fn right_shift(self) -> Self { #[cfg(target_arch = "x86_64")] unsafe { - let shifted = _mm_srli_epi16(self.0, nbits); - let mask = _mm_set1_epi8(1_i8 << (8 - nbits) - 1); + let shifted = _mm_srli_epi16(self.0, N); + let mask = _mm_set1_epi8((1_i8 << (8 - nbits)) - 1); Self(_mm_and_si128(shifted, mask)) } #[cfg(target_arch = "aarch64")] unsafe { - match nbits { - 1 => Self(vshrq_n_u8::<1>(self.0)), - 2 => Self(vshrq_n_u8::<2>(self.0)), - 3 => Self(vshrq_n_u8::<3>(self.0)), - 4 => Self(vshrq_n_u8::<4>(self.0)), - 5 => Self(vshrq_n_u8::<5>(self.0)), - 6 => Self(vshrq_n_u8::<6>(self.0)), - 7 => Self(vshrq_n_u8::<7>(self.0)), - 8 => Self(vshrq_n_u8::<8>(self.0)), - _ => unreachable!(), - } + Self(vshrq_n_u8::(self.0)) } #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] { let mut result = [0u8; 16]; for i in 0..16 { - result[i] = self.0[i] >> 4; + result[i] = self.0[i] >> N; } Self(result) } From d93e4f986bf3848666ada1d3fd56615160ba567e Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 5 Dec 2024 13:53:45 +0800 Subject: [PATCH 26/27] fix Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index a03b95a24c..817340b9ec 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -53,7 +53,7 @@ impl u8x16 { #[cfg(target_arch = "x86_64")] unsafe { let shifted = _mm_srli_epi16(self.0, N); - let mask = _mm_set1_epi8((1_i8 << (8 - nbits)) - 1); + let mask = _mm_set1_epi8((1_i8 << (8 - N)) - 1); Self(_mm_and_si128(shifted, mask)) } #[cfg(target_arch = "aarch64")] From 5fc527c8dc17fc6708b372bd3934923fe0e02fd6 Mon Sep 17 00:00:00 2001 From: BubbleCal Date: Thu, 5 Dec 2024 14:01:14 +0800 Subject: [PATCH 27/27] fix Signed-off-by: BubbleCal --- rust/lance-linalg/src/simd/u8.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/rust/lance-linalg/src/simd/u8.rs b/rust/lance-linalg/src/simd/u8.rs index 817340b9ec..6a0449739b 100644 --- a/rust/lance-linalg/src/simd/u8.rs +++ b/rust/lance-linalg/src/simd/u8.rs @@ -95,7 +95,6 @@ impl<'a> From<&'a [u8; 16]> for u8x16 { impl SIMD for u8x16 { #[inline] - fn splat(val: u8) -> Self { #[cfg(target_arch = "x86_64")] unsafe { @@ -184,7 +183,6 @@ impl SIMD for u8x16 { } #[inline] - unsafe fn store_unaligned(&self, ptr: *mut u8) { #[cfg(target_arch = "x86_64")] unsafe {