diff --git a/cfavml/src/mem_loader.rs b/cfavml/src/mem_loader.rs index 352d680..3c4cbf7 100644 --- a/cfavml/src/mem_loader.rs +++ b/cfavml/src/mem_loader.rs @@ -1,5 +1,12 @@ use crate::danger::{DenseLane, SimdRegister}; +/// The stack scratch space used by the projecting buffer loader. +/// +/// This is calculated by effectively taking the maximum number of elements +/// that could be loaded from the widest supported register in CFAVML, in +/// this case; AVX512. +const SCRATCH_SPACE_SIZE: usize = 64; + /// A buffer or value that can be turned into a [MemLoader]. /// /// NOTE: You are not supposed to implement this trait yourself. @@ -72,11 +79,17 @@ where type Loader = PtrBufferLoader; fn into_projected_mem_loader(self, projected_len: usize) -> Self::Loader { + let slice = self.as_ref(); + assert_eq!( + slice.len(), projected_len, - self.as_ref().len(), - "Buffer cannot be projected outside of its existing dimensions currently", + "Input slice does not match target output length, \ + by default slices cannot be projected to a new size. \ + You can enable projection to new sizes by wrapping your value in \ + a `Projected` wrapper." ); + self.into_mem_loader() } @@ -90,6 +103,79 @@ where } } +/// A wrapper that enables extended projection of the input buffer +/// to a new shape / size. +/// +/// Please be aware that this type only supports projecting default +/// implementations of _slices_ provided by this library, it does +/// not support handling custom MemLoader implementations. +/// +/// ## Projection Rules +/// +/// A buffer can be projected to a new size providing +/// the _new_ size is a multiple of the _old_ size. +/// +/// For example, we can project any of the following: +/// +/// - `size:40 -> size:80` +/// - `size:4 -> size:16` +/// - `size:1 -> size:73` +/// +/// But we cannot project: +/// +/// - `size:3` -> `size:4` +/// - `size:2` -> `size:9` +/// +/// ## Projection Behaviour +/// +/// This projection system is _not_ like numpy broadcasting or other ndarray-like +/// broadcasting it is only aware of the _length_ of the buffer, not whether it is a +/// matrix or a type which has a shape. +/// +/// Because of this, this routine may behave differently to what you expect, allowing +/// say the projection of a matrix (represented as a slice) of shape `(4, 4)` being +/// broadcast to shape `(8, 4)` because _technically_ there is no difference in +/// array size of shapes `(2, 4, 4)` and `(8, 4)` it is simply a multiple of `16 (4, 4)`. +/// +/// +pub struct Projected(pub T); + +impl<'a, B, T> IntoMemLoader for Projected<&'a B> +where + T: Copy + Default, + B: AsRef<[T]> + ?Sized, +{ + type Loader = ProjectedPtrBufferLoader; + + fn into_projected_mem_loader(self, projected_len: usize) -> Self::Loader { + let slice = self.0.as_ref(); + + assert_eq!( + projected_len % slice.len(), + 0, + "Cannot project slice into size {projected_len}, because it is not a multiple of {}", + slice.len(), + ); + + ProjectedPtrBufferLoader { + data: slice.as_ptr(), + data_len: slice.len(), + data_cursor: 0, + projected_len, + } + } + + fn into_mem_loader(self) -> Self::Loader { + let slice = self.0.as_ref(); + ProjectedPtrBufferLoader { + data: slice.as_ptr(), + data_len: slice.len(), + data_cursor: 0, + projected_len: slice.len(), + } + } +} + macro_rules! impl_scalar_buffer_loader { ($t:ty) => { impl IntoMemLoader<$t> for $t { @@ -124,7 +210,7 @@ impl_scalar_buffer_loader!(u32); impl_scalar_buffer_loader!(u64); /// A [MemLoader] implementation that reads from a contiguous buffer represented -/// as a data pointer. +/// as a data pointer which can be projected to a size greater than its own. pub struct PtrBufferLoader { data: *const T, data_len: usize, @@ -170,6 +256,93 @@ impl MemLoader for PtrBufferLoader { } } +/// A [MemLoader] implementation that reads from a contiguous buffer represented +/// as a data pointer which can be projected to a size greater than its own. +pub struct ProjectedPtrBufferLoader { + data: *const T, + data_len: usize, + + // Generator state machine + data_cursor: usize, + projected_len: usize, +} + +impl ProjectedPtrBufferLoader { + fn can_load_full_dense_lane>(&self) -> bool { + self.data_cursor + R::elements_per_dense() <= self.data_len + } + + fn can_load_full_lane>(&self) -> bool { + self.data_cursor + R::elements_per_lane() <= self.data_len + } + + fn advance_cursor(&mut self, by: usize) { + self.data_cursor = (self.data_cursor + by) % self.data_len; + } +} + +impl MemLoader for ProjectedPtrBufferLoader { + type Value = T; + + #[inline(always)] + fn true_len(&self) -> usize { + self.data_len + } + + #[inline(always)] + fn projected_len(&self) -> usize { + self.projected_len + } + + #[inline(always)] + unsafe fn load_dense>( + &mut self, + ) -> DenseLane { + if self.can_load_full_dense_lane::() { + let dense = R::load_dense(self.data.add(self.data_cursor)); + self.advance_cursor(R::elements_per_dense()); + return dense; + } + + DenseLane { + a: self.load::(), + b: self.load::(), + c: self.load::(), + d: self.load::(), + e: self.load::(), + f: self.load::(), + g: self.load::(), + h: self.load::(), + } + } + + #[inline(always)] + unsafe fn load>(&mut self) -> R::Register { + if self.can_load_full_lane::() { + let dense = R::load(self.data.add(self.data_cursor)); + self.advance_cursor(R::elements_per_lane()); + return dense; + } + + let mut temp_buffer = [T::default(); SCRATCH_SPACE_SIZE]; + + // elements_per_lane != SCRATCH_SPACE_SIZE, this is cleaner than an iter chain. + #[allow(clippy::needless_range_loop)] + for i in 0..R::elements_per_lane() { + temp_buffer[i] = self.read(); + } + + R::load(temp_buffer.as_ptr()) + } + + #[inline(always)] + unsafe fn read(&mut self) -> Self::Value { + let value = self.data.add(self.data_cursor).read(); + self.advance_cursor(1); + value + } +} + /// A [MemLoader] implementation that holds a single value that has been broadcast /// to a desired size. pub struct ScalarBufferLoader { @@ -211,6 +384,7 @@ impl MemLoader for ScalarBufferLoader { #[cfg(test)] mod tests { use super::*; + use crate::danger::Fallback; #[test] fn test_value_broadcast_loader() { @@ -244,4 +418,370 @@ mod tests { let sample = [1.0, 2.0, 3.0]; let _loader = (&sample).into_projected_mem_loader(10); } + + #[test] + #[should_panic] + fn test_buffer_projection_creation_panic() { + let sample = [1.0, 2.0]; + let projected = Projected(&sample); + let _loader = projected.into_projected_mem_loader(5); + } + + #[test] + fn test_buffer_projection_basic_read() { + let sample = [1.0, 2.0]; + let projected = Projected(&sample); + let mut loader = projected.into_projected_mem_loader(4); + assert_eq!(loader.projected_len(), 4); + + unsafe { + assert_eq!(loader.read(), 1.0); + assert_eq!(loader.read(), 2.0); + assert_eq!(loader.read(), 1.0); + assert_eq!(loader.read(), 2.0); + } + } + + #[test] + fn test_buffer_projection_fallback_dense_load() { + let sample = [1.0, 2.0]; + let projected = Projected(&sample); + let mut loader = projected.into_projected_mem_loader(4); + assert_eq!(loader.projected_len(), 4); + + unsafe { + let dense = loader.load_dense::(); + assert_eq!(dense.a, 1.0); + assert_eq!(dense.b, 2.0); + assert_eq!(dense.c, 1.0); + assert_eq!(dense.d, 2.0); + assert_eq!(dense.e, 1.0); + assert_eq!(dense.f, 2.0); + assert_eq!(dense.g, 1.0); + assert_eq!(dense.h, 2.0); + } + } + + #[test] + fn test_buffer_projection_fallback_load() { + let sample = [1.0, 2.0]; + let projected = Projected(&sample); + let mut loader = projected.into_projected_mem_loader(4); + assert_eq!(loader.projected_len(), 4); + + unsafe { + let reg = loader.load::(); + assert_eq!(reg, 1.0); + let reg = loader.load::(); + assert_eq!(reg, 2.0); + let reg = loader.load::(); + assert_eq!(reg, 1.0); + let reg = loader.load::(); + assert_eq!(reg, 2.0); + } + } + + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx2" + ))] + #[test] + fn test_buffer_projection_avx2_dense_load() { + let sample = [1.0f32, 2.0f32]; + let projected = Projected(&sample); + let mut loader = projected.into_projected_mem_loader(4); + assert_eq!(loader.projected_len(), 4); + + #[allow(clippy::missing_transmute_annotations)] + unsafe { + let dense = loader.load_dense::(); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.a), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.b), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.c), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.d), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.e), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.f), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.g), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.h), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + } + + let sample = [1.0f32, 2.0f32, 3.0f32]; + let projected = Projected(&sample); + let mut loader = projected.into_projected_mem_loader(9); + assert_eq!(loader.projected_len(), 9); + + #[allow(clippy::missing_transmute_annotations)] + unsafe { + let dense = loader.load_dense::(); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.a), + [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.b), + [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.c), + [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.d), + [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.e), + [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.f), + [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.g), + [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(dense.h), + [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0] + ); + } + } + + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx2" + ))] + #[test] + fn test_buffer_projection_avx2_load() { + let sample = [1.0f32, 2.0f32]; + let projected = Projected(&sample); + let mut loader = projected.into_projected_mem_loader(4); + assert_eq!(loader.projected_len(), 4); + + #[allow(clippy::missing_transmute_annotations)] + unsafe { + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(reg), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(reg), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(reg), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(reg), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + } + + let sample = [1.0f32, 2.0f32, 3.0f32]; + let projected = Projected(&sample); + let mut loader = projected.into_projected_mem_loader(9); + assert_eq!(loader.projected_len(), 9); + + #[allow(clippy::missing_transmute_annotations)] + unsafe { + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(reg), + [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0] + ); + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(reg), + [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0] + ); + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f32; 8]>(reg), + [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0] + ); + } + } + + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx512f", + feature = "nightly" + ))] + #[test] + fn test_buffer_projection_avx512_dense_load() { + let sample = [1.0f64, 2.0f64]; + let projected = Projected(&sample); + let mut loader = projected.into_projected_mem_loader(4); + assert_eq!(loader.projected_len(), 4); + + #[allow(clippy::missing_transmute_annotations)] + unsafe { + let dense = loader.load_dense::(); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.a), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.b), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.c), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.d), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.e), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.f), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.g), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.h), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + } + + let sample = [1.0f64, 2.0f64, 3.0f64]; + let projected = Projected(&sample); + let mut loader = projected.into_projected_mem_loader(9); + assert_eq!(loader.projected_len(), 9); + + #[allow(clippy::missing_transmute_annotations)] + unsafe { + let dense = loader.load_dense::(); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.a), + [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.b), + [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.c), + [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.d), + [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.e), + [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.f), + [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.g), + [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0] + ); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(dense.h), + [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0] + ); + } + } + + #[cfg(all( + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "avx512f", + feature = "nightly", + ))] + #[test] + fn test_buffer_projection_avx512_load() { + let sample = [1.0f64, 2.0f64]; + let projected = Projected(&sample); + let mut loader = projected.into_projected_mem_loader(4); + assert_eq!(loader.projected_len(), 4); + + #[allow(clippy::missing_transmute_annotations)] + unsafe { + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(reg), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(reg), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(reg), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(reg), + [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0] + ); + } + + let sample = [1.0f64, 2.0f64, 3.0f64]; + let projected = Projected(&sample); + let mut loader = projected.into_projected_mem_loader(9); + assert_eq!(loader.projected_len(), 9); + + #[allow(clippy::missing_transmute_annotations)] + unsafe { + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(reg), + [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0] + ); + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(reg), + [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0] + ); + let reg = loader.load::(); + assert_eq!( + core::mem::transmute::<_, [f64; 8]>(reg), + [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0] + ); + } + } } diff --git a/cfavml/src/safe_function_ops.rs b/cfavml/src/safe_function_ops.rs index a447cc1..07cf46e 100644 --- a/cfavml/src/safe_function_ops.rs +++ b/cfavml/src/safe_function_ops.rs @@ -319,6 +319,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ### Examples /// /// ##### Two vectors @@ -350,6 +367,27 @@ where /// assert_eq!(result, [1.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, 1.0, 0.5, 3.5]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::max_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [1.0, 2.5, 1.0, 3.5]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -370,18 +408,6 @@ where /// assert_eq!(result, [2.0, 2.5, 1.0, 1.0]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ @@ -485,6 +511,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ### Examples /// /// ##### Two vectors @@ -516,6 +559,27 @@ where /// assert_eq!(result, [-5.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, 1.0, 0.5, 3.5]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::min_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [1.0, 1.0, 0.5, 2.5]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -536,18 +600,6 @@ where /// assert_eq!(result, [1.0, -1.0, 0.5, -2.0]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ @@ -608,6 +660,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ###### Masks /// /// CFAVML follows the same pattern as numpy, which is it representing boolean results as @@ -655,6 +724,27 @@ where /// assert_eq!(result, [0.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, 1.0, 0.5, 3.5]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::eq_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [1.0, 0.0, 0.0, 0.0]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -675,18 +765,6 @@ where /// assert_eq!(result, [1.0, 0.0, 1.0, 0.0]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ @@ -756,6 +834,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ###### Masks /// /// CFAVML follows the same pattern as numpy, which is it representing boolean results as @@ -803,6 +898,27 @@ where /// assert_eq!(result, [1.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, 1.0, 0.5, 3.5]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::neq_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [0.0, 1.0, 1.0, 1.0]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -823,18 +939,6 @@ where /// assert_eq!(result, [0.0, 1.0, 0.0, 1.0]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ @@ -904,6 +1008,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ###### Masks /// /// CFAVML follows the same pattern as numpy, which is it representing boolean results as @@ -951,6 +1072,27 @@ where /// assert_eq!(result, [1.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, 1.0, 0.5, 3.5]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::lt_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [0.0, 1.0, 1.0, 0.0]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -971,18 +1113,6 @@ where /// assert_eq!(result, [0.0, 1.0, 0.0, 0.0]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ @@ -1053,6 +1183,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ###### Masks /// /// CFAVML follows the same pattern as numpy, which is it representing boolean results as @@ -1100,6 +1247,27 @@ where /// assert_eq!(result, [1.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, 1.0, 0.5, 3.5]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::lte_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [1.0, 1.0, 1.0, 0.0]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -1120,18 +1288,6 @@ where /// assert_eq!(result, [1.0, 1.0, 1.0, 0.0]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ @@ -1203,6 +1359,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ###### Masks /// /// CFAVML follows the same pattern as numpy, which is it representing boolean results as @@ -1250,6 +1423,27 @@ where /// assert_eq!(result, [1.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, 1.0, 0.5, 3.5]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::gt_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [0.0, 0.0, 0.0, 1.0]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -1270,18 +1464,6 @@ where /// assert_eq!(result, [0.0, 0.0, 0.0, 1.0]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ @@ -1352,6 +1534,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ###### Masks /// /// CFAVML follows the same pattern as numpy, which is it representing boolean results as @@ -1399,6 +1598,27 @@ where /// assert_eq!(result, [1.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, 1.0, 0.5, 1.0]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::gte_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [1.0, 0.0, 0.0, 0.0]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -1419,18 +1639,6 @@ where /// assert_eq!(result, [1.0, 0.0, 1.0, 1.0]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ @@ -1499,6 +1707,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ### Examples /// /// ##### Two vectors @@ -1530,6 +1755,27 @@ where /// assert_eq!(result, [10.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, 1.0, 0.5, 1.0]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::add_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [2.0, 3.5, 1.5, 3.5]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -1550,18 +1796,6 @@ where /// assert_eq!(result, [2.0, 1.5, 1.0, -1.0]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ @@ -1620,6 +1854,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ### Examples /// /// ##### Two vectors @@ -1651,6 +1902,27 @@ where /// assert_eq!(result, [0.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, 1.0, 0.5, 1.0]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::sub_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [0.0, -1.5, -0.5, -1.5]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -1671,18 +1943,6 @@ where /// assert_eq!(result, [0.0, -3.5, 0.0, 3.0]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ @@ -1741,6 +2001,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ### Examples /// /// ##### Two vectors @@ -1772,6 +2049,27 @@ where /// assert_eq!(result, [25.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, 1.0, 0.5, 1.0]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::mul_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [1.0, 2.5, 0.5, 2.5]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -1792,18 +2090,6 @@ where /// assert_eq!(result, [1.0, -2.5, 0.25, -2.0]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ @@ -1882,6 +2168,23 @@ where /// - `0 + 1 == [1, 1, 1]` w/result_buffer_len=3 /// - `1 + 1 == [1]` w/result_buffer_len=1 /// +/// ###### Projecting Vectors +/// +/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively +/// broadcasting of two input buffers implementing `IntoMemLoader`. +/// +/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, +/// which exhibit the standard behaviour as you might expect. +/// +/// When providing two slices as inputs they cannot be projected to a buffer +/// that is larger their input sizes by default. This means providing two slices +/// of `128` elements in length must take a result buffer of `128` elements in length. +/// +/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which +/// enables projecting of the input buffer to new sizes providing the new size is a +/// multiple of the original size. When this buffer is projected, it is effectively +/// repeated `N` times, where `N` is how many times the old size fits into the new size. +/// /// ### Examples /// /// ##### Two vectors @@ -1913,6 +2216,27 @@ where /// assert_eq!(result, [1.0; 4]); /// ``` /// +/// ##### With projected vectors +/// +/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output +/// vectors than the original inputs, which can allow for much better performance and +/// memory usage when working with matrices. +/// +/// NOTE: +/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand +/// how it behaves, it does not replace your matrix library's own broadcasting system. +/// +/// ```rust +/// use cfavml::mem_loader::Projected; +/// +/// let lhs = [1.0, -1.0, 0.5, 1.0]; // Pretend this is a 2x2 matrix +/// let rhs = [1.0, 2.5]; // Pretend this is a 1x2 matrix +/// +/// let mut result = [0.0; 4]; // Our output is a 2x2 matrix +/// cfavml::div_vertical(Projected(&lhs), Projected(&rhs), &mut result); +/// assert_eq!(result, [1.0, -0.4, 0.5, 0.4]); +/// ``` +/// /// ##### With `MaybeUninit` /// /// Often if you are working with new-allocations, you do not want to initialize the data twice, @@ -1933,18 +2257,6 @@ where /// assert_eq!(result, [1.0, -0.4, 1.0, -0.5]); /// ``` /// -/// ### Projecting Vectors -/// -/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively -/// broadcasting of two input buffers implementing `IntoMemLoader`. -/// -/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, -/// which exhibit the standard behaviour as you might expect. -/// -/// When providing two slices as inputs they cannot be projected to a buffer -/// that is larger their input sizes by default. This means providing two slices -/// of `128` elements in length must take a result buffer of `128` elements in length. -/// /// ### Implementation Pseudocode /// /// _This is the logic of the routine being called._ diff --git a/cfavml/src/safe_trait_arithmetic_ops.rs b/cfavml/src/safe_trait_arithmetic_ops.rs index 8a53d39..526bf20 100644 --- a/cfavml/src/safe_trait_arithmetic_ops.rs +++ b/cfavml/src/safe_trait_arithmetic_ops.rs @@ -12,6 +12,8 @@ pub trait ArithmeticOps: Sized + Copy { /// Performs an element wise addition of two input buffers `lhs` and `rhs` that can /// be projected to the desired output size of `result`. /// + /// See [cfavml::add_vertical](crate::add_vertical) for examples. + /// /// ### Projecting Vectors /// /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively @@ -24,6 +26,11 @@ pub trait ArithmeticOps: Sized + Copy { /// that is larger their input sizes by default. This means providing two slices /// of `128` elements in length must take a result buffer of `128` elements in length. /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// /// ### Implementation Pseudocode /// /// ```ignore @@ -50,6 +57,8 @@ pub trait ArithmeticOps: Sized + Copy { /// Performs an element wise subtraction of two input buffers `a` and `b` that can /// be projected to the desired output size of `result`. /// + /// See [cfavml::sub_vertical](crate::sub_vertical) for examples. + /// /// ### Projecting Vectors /// /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively @@ -62,6 +71,11 @@ pub trait ArithmeticOps: Sized + Copy { /// that is larger their input sizes by default. This means providing two slices /// of `128` elements in length must take a result buffer of `128` elements in length. /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// /// ### Implementation Pseudocode /// /// ```ignore @@ -88,6 +102,8 @@ pub trait ArithmeticOps: Sized + Copy { /// Performs an element wise multiply of two input buffers `a` and `b` that can /// be projected to the desired output size of `result`. /// + /// See [cfavml::mul_vertical](crate::mul_vertical) for examples. + /// /// ### Projecting Vectors /// /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively @@ -100,6 +116,11 @@ pub trait ArithmeticOps: Sized + Copy { /// that is larger their input sizes by default. This means providing two slices /// of `128` elements in length must take a result buffer of `128` elements in length. /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// /// ### Implementation Pseudocode /// /// ```ignore @@ -126,6 +147,8 @@ pub trait ArithmeticOps: Sized + Copy { /// Performs an element wise division of two input buffers `a` and `b` that can /// be projected to the desired output size of `result`. /// + /// See [cfavml::div_vertical](crate::div_vertical) for examples. + /// /// ### Projecting Vectors /// /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively @@ -138,6 +161,11 @@ pub trait ArithmeticOps: Sized + Copy { /// that is larger their input sizes by default. This means providing two slices /// of `128` elements in length must take a result buffer of `128` elements in length. /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// /// ### Implementation Pseudocode /// /// ```ignore diff --git a/cfavml/src/safe_trait_cmp_ops.rs b/cfavml/src/safe_trait_cmp_ops.rs index 5953266..319b718 100644 --- a/cfavml/src/safe_trait_cmp_ops.rs +++ b/cfavml/src/safe_trait_cmp_ops.rs @@ -33,6 +33,26 @@ pub trait CmpOps: Sized + Copy { /// Performs an element wise max on each element of vector `a` and `b`, /// writing the result to `result`. /// + /// See [cfavml::max_vertical](crate::max_vertical) for examples. + /// + /// ### Projecting Vectors + /// + /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively + /// broadcasting of two input buffers implementing `IntoMemLoader`. + /// + /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, + /// which exhibit the standard behaviour as you might expect. + /// + /// When providing two slices as inputs they cannot be projected to a buffer + /// that is larger their input sizes by default. This means providing two slices + /// of `128` elements in length must take a result buffer of `128` elements in length. + /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// + /// /// ### Implementation Pseudocode /// /// ```ignore @@ -82,6 +102,25 @@ pub trait CmpOps: Sized + Copy { /// Performs an element wise min on each element of vector `a` and `b`, /// writing the result to `result`. /// + /// See [cfavml::min_vertical](crate::min_vertical) for examples. + /// + /// ### Projecting Vectors + /// + /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively + /// broadcasting of two input buffers implementing `IntoMemLoader`. + /// + /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, + /// which exhibit the standard behaviour as you might expect. + /// + /// When providing two slices as inputs they cannot be projected to a buffer + /// that is larger their input sizes by default. This means providing two slices + /// of `128` elements in length must take a result buffer of `128` elements in length. + /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// /// ### Implementation Pseudocode /// /// ```ignore @@ -114,6 +153,25 @@ pub trait CmpOps: Sized + Copy { /// Checks each element pair from vectors `a` and `b` of size `dims` comparing /// if element `a` is **_equal to_** element `b` returning a mask vector of the same type. /// + /// See [cfavml::eq_vertical](crate::eq_vertical) for examples. + /// + /// ### Projecting Vectors + /// + /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively + /// broadcasting of two input buffers implementing `IntoMemLoader`. + /// + /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, + /// which exhibit the standard behaviour as you might expect. + /// + /// When providing two slices as inputs they cannot be projected to a buffer + /// that is larger their input sizes by default. This means providing two slices + /// of `128` elements in length must take a result buffer of `128` elements in length. + /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// /// ### Implementation Pseudocode /// /// ```ignore @@ -155,6 +213,25 @@ pub trait CmpOps: Sized + Copy { /// Checks each element pair from vectors `a` and `b` of size `dims` comparing /// if element `a` is **_not equal to_** element `b` returning a mask vector of the same type. /// + /// See [cfavml::neq_vertical](crate::neq_vertical) for examples. + /// + /// ### Projecting Vectors + /// + /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively + /// broadcasting of two input buffers implementing `IntoMemLoader`. + /// + /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, + /// which exhibit the standard behaviour as you might expect. + /// + /// When providing two slices as inputs they cannot be projected to a buffer + /// that is larger their input sizes by default. This means providing two slices + /// of `128` elements in length must take a result buffer of `128` elements in length. + /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// /// ### Implementation Pseudocode /// /// ```ignore @@ -196,6 +273,25 @@ pub trait CmpOps: Sized + Copy { /// Checks each element pair from vectors `a` and `b` of size `dims` comparing /// if element `a` is **_less than_** element `b` returning a mask vector of the same type. /// + /// See [cfavml::lt_vertical](crate::lt_vertical) for examples. + /// + /// ### Projecting Vectors + /// + /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively + /// broadcasting of two input buffers implementing `IntoMemLoader`. + /// + /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, + /// which exhibit the standard behaviour as you might expect. + /// + /// When providing two slices as inputs they cannot be projected to a buffer + /// that is larger their input sizes by default. This means providing two slices + /// of `128` elements in length must take a result buffer of `128` elements in length. + /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// /// ### Implementation Pseudocode /// /// ```ignore @@ -237,6 +333,25 @@ pub trait CmpOps: Sized + Copy { /// Checks each element pair from vectors `a` and `b` of size `dims` comparing /// if element `a` is **_less than or equal to_** element `b` returning a mask vector of the same type. /// + /// See [cfavml::lte_vertical](crate::lte_vertical) for examples. + /// + /// ### Projecting Vectors + /// + /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively + /// broadcasting of two input buffers implementing `IntoMemLoader`. + /// + /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, + /// which exhibit the standard behaviour as you might expect. + /// + /// When providing two slices as inputs they cannot be projected to a buffer + /// that is larger their input sizes by default. This means providing two slices + /// of `128` elements in length must take a result buffer of `128` elements in length. + /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// /// ### Implementation Pseudocode /// /// ```ignore @@ -278,6 +393,25 @@ pub trait CmpOps: Sized + Copy { /// Checks each element pair from vectors `a` and `b` of size `dims` comparing /// if element `a` is **_greater than_** element `b` returning a mask vector of the same type. /// + /// See [cfavml::gt_vertical](crate::gt_vertical) for examples. + /// + /// ### Projecting Vectors + /// + /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively + /// broadcasting of two input buffers implementing `IntoMemLoader`. + /// + /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, + /// which exhibit the standard behaviour as you might expect. + /// + /// When providing two slices as inputs they cannot be projected to a buffer + /// that is larger their input sizes by default. This means providing two slices + /// of `128` elements in length must take a result buffer of `128` elements in length. + /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// /// ### Implementation Pseudocode /// /// ```ignore @@ -319,6 +453,25 @@ pub trait CmpOps: Sized + Copy { /// Checks each element pair from vectors `a` and `b` of size `dims` comparing /// if element `a` is **_greater than_** element `b` returning a mask vector of the same type. /// + /// See [cfavml::gte_vertical](crate::gte_vertical) for examples. + /// + /// ### Projecting Vectors + /// + /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively + /// broadcasting of two input buffers implementing `IntoMemLoader`. + /// + /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_, + /// which exhibit the standard behaviour as you might expect. + /// + /// When providing two slices as inputs they cannot be projected to a buffer + /// that is larger their input sizes by default. This means providing two slices + /// of `128` elements in length must take a result buffer of `128` elements in length. + /// + /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which + /// enables projecting of the input buffer to new sizes providing the new size is a + /// multiple of the original size. When this buffer is projected, it is effectively + /// repeated `N` times, where `N` is how many times the old size fits into the new size. + /// /// ### Implementation Pseudocode /// /// ```ignore