diff --git a/cfavml/src/mem_loader.rs b/cfavml/src/mem_loader.rs
index 352d680..3c4cbf7 100644
--- a/cfavml/src/mem_loader.rs
+++ b/cfavml/src/mem_loader.rs
@@ -1,5 +1,12 @@
 use crate::danger::{DenseLane, SimdRegister};
 
+/// The stack scratch space used by the projecting buffer loader.
+///
+/// This is calculated by effectively taking the maximum number of elements
+/// that could be loaded from the widest supported register in CFAVML, in
+/// this case; AVX512.
+const SCRATCH_SPACE_SIZE: usize = 64;
+
 /// A buffer or value that can be turned into a [MemLoader].
 ///
 /// NOTE: You are not supposed to implement this trait yourself.
@@ -72,11 +79,17 @@ where
     type Loader = PtrBufferLoader<T>;
 
     fn into_projected_mem_loader(self, projected_len: usize) -> Self::Loader {
+        let slice = self.as_ref();
+
         assert_eq!(
+            slice.len(),
             projected_len,
-            self.as_ref().len(),
-            "Buffer cannot be projected outside of its existing dimensions currently",
+            "Input slice does not match target output length, \
+            by default slices cannot be projected to a new size. \
+            You can enable projection to new sizes by wrapping your value in \
+            a `Projected<T>` wrapper."
         );
+
         self.into_mem_loader()
     }
 
@@ -90,6 +103,79 @@ where
     }
 }
 
+/// A wrapper that enables extended projection of the input buffer
+/// to a new shape / size.
+///
+/// Please be aware that this type only supports projecting default
+/// implementations of _slices_ provided by this library, it does
+/// not support handling custom MemLoader implementations.
+///
+/// ## Projection Rules
+///
+/// A buffer can be projected to a new size providing
+/// the _new_ size is a multiple of the _old_ size.
+///
+/// For example, we can project any of the following:
+///
+/// - `size:40 -> size:80`
+/// - `size:4 -> size:16`
+/// - `size:1 -> size:73`
+///
+/// But we cannot project:
+///
+/// - `size:3` -> `size:4`
+/// - `size:2` -> `size:9`
+///
+/// ## Projection Behaviour
+///
+/// This projection system is _not_ like numpy broadcasting or other ndarray-like
+/// broadcasting it is only aware of the _length_ of the buffer, not whether it is a
+/// matrix or a type which has a shape.
+///
+/// Because of this, this routine may behave differently to what you expect, allowing
+/// say the projection of a matrix (represented as a slice) of shape `(4, 4)` being
+/// broadcast to shape `(8, 4)` because _technically_ there is no difference in
+/// array size of shapes `(2, 4, 4)` and `(8, 4)` it is simply a multiple of `16 (4, 4)`.
+///
+///
+pub struct Projected<T>(pub T);
+
+impl<'a, B, T> IntoMemLoader<T> for Projected<&'a B>
+where
+    T: Copy + Default,
+    B: AsRef<[T]> + ?Sized,
+{
+    type Loader = ProjectedPtrBufferLoader<T>;
+
+    fn into_projected_mem_loader(self, projected_len: usize) -> Self::Loader {
+        let slice = self.0.as_ref();
+
+        assert_eq!(
+            projected_len % slice.len(),
+            0,
+            "Cannot project slice into size {projected_len}, because it is not a multiple of {}",
+            slice.len(),
+        );
+
+        ProjectedPtrBufferLoader {
+            data: slice.as_ptr(),
+            data_len: slice.len(),
+            data_cursor: 0,
+            projected_len,
+        }
+    }
+
+    fn into_mem_loader(self) -> Self::Loader {
+        let slice = self.0.as_ref();
+        ProjectedPtrBufferLoader {
+            data: slice.as_ptr(),
+            data_len: slice.len(),
+            data_cursor: 0,
+            projected_len: slice.len(),
+        }
+    }
+}
+
 macro_rules! impl_scalar_buffer_loader {
     ($t:ty) => {
         impl IntoMemLoader<$t> for $t {
@@ -124,7 +210,7 @@ impl_scalar_buffer_loader!(u32);
 impl_scalar_buffer_loader!(u64);
 
 /// A [MemLoader] implementation that reads from a contiguous buffer represented
-/// as a data pointer.
+/// as a data pointer which can be projected to a size greater than its own.
 pub struct PtrBufferLoader<T> {
     data: *const T,
     data_len: usize,
@@ -170,6 +256,93 @@ impl<T: Copy> MemLoader for PtrBufferLoader<T> {
     }
 }
 
+/// A [MemLoader] implementation that reads from a contiguous buffer represented
+/// as a data pointer which can be projected to a size greater than its own.
+pub struct ProjectedPtrBufferLoader<T> {
+    data: *const T,
+    data_len: usize,
+
+    // Generator state machine
+    data_cursor: usize,
+    projected_len: usize,
+}
+
+impl<T: Copy> ProjectedPtrBufferLoader<T> {
+    fn can_load_full_dense_lane<R: SimdRegister<T>>(&self) -> bool {
+        self.data_cursor + R::elements_per_dense() <= self.data_len
+    }
+
+    fn can_load_full_lane<R: SimdRegister<T>>(&self) -> bool {
+        self.data_cursor + R::elements_per_lane() <= self.data_len
+    }
+
+    fn advance_cursor(&mut self, by: usize) {
+        self.data_cursor = (self.data_cursor + by) % self.data_len;
+    }
+}
+
+impl<T: Copy + Default> MemLoader for ProjectedPtrBufferLoader<T> {
+    type Value = T;
+
+    #[inline(always)]
+    fn true_len(&self) -> usize {
+        self.data_len
+    }
+
+    #[inline(always)]
+    fn projected_len(&self) -> usize {
+        self.projected_len
+    }
+
+    #[inline(always)]
+    unsafe fn load_dense<R: SimdRegister<Self::Value>>(
+        &mut self,
+    ) -> DenseLane<R::Register> {
+        if self.can_load_full_dense_lane::<R>() {
+            let dense = R::load_dense(self.data.add(self.data_cursor));
+            self.advance_cursor(R::elements_per_dense());
+            return dense;
+        }
+
+        DenseLane {
+            a: self.load::<R>(),
+            b: self.load::<R>(),
+            c: self.load::<R>(),
+            d: self.load::<R>(),
+            e: self.load::<R>(),
+            f: self.load::<R>(),
+            g: self.load::<R>(),
+            h: self.load::<R>(),
+        }
+    }
+
+    #[inline(always)]
+    unsafe fn load<R: SimdRegister<Self::Value>>(&mut self) -> R::Register {
+        if self.can_load_full_lane::<R>() {
+            let dense = R::load(self.data.add(self.data_cursor));
+            self.advance_cursor(R::elements_per_lane());
+            return dense;
+        }
+
+        let mut temp_buffer = [T::default(); SCRATCH_SPACE_SIZE];
+
+        // elements_per_lane != SCRATCH_SPACE_SIZE, this is cleaner than an iter chain.
+        #[allow(clippy::needless_range_loop)]
+        for i in 0..R::elements_per_lane() {
+            temp_buffer[i] = self.read();
+        }
+
+        R::load(temp_buffer.as_ptr())
+    }
+
+    #[inline(always)]
+    unsafe fn read(&mut self) -> Self::Value {
+        let value = self.data.add(self.data_cursor).read();
+        self.advance_cursor(1);
+        value
+    }
+}
+
 /// A [MemLoader] implementation that holds a single value that has been broadcast
 /// to a desired size.
 pub struct ScalarBufferLoader<T> {
@@ -211,6 +384,7 @@ impl<T: Copy> MemLoader for ScalarBufferLoader<T> {
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::danger::Fallback;
 
     #[test]
     fn test_value_broadcast_loader() {
@@ -244,4 +418,370 @@ mod tests {
         let sample = [1.0, 2.0, 3.0];
         let _loader = (&sample).into_projected_mem_loader(10);
     }
+
+    #[test]
+    #[should_panic]
+    fn test_buffer_projection_creation_panic() {
+        let sample = [1.0, 2.0];
+        let projected = Projected(&sample);
+        let _loader = projected.into_projected_mem_loader(5);
+    }
+
+    #[test]
+    fn test_buffer_projection_basic_read() {
+        let sample = [1.0, 2.0];
+        let projected = Projected(&sample);
+        let mut loader = projected.into_projected_mem_loader(4);
+        assert_eq!(loader.projected_len(), 4);
+
+        unsafe {
+            assert_eq!(loader.read(), 1.0);
+            assert_eq!(loader.read(), 2.0);
+            assert_eq!(loader.read(), 1.0);
+            assert_eq!(loader.read(), 2.0);
+        }
+    }
+
+    #[test]
+    fn test_buffer_projection_fallback_dense_load() {
+        let sample = [1.0, 2.0];
+        let projected = Projected(&sample);
+        let mut loader = projected.into_projected_mem_loader(4);
+        assert_eq!(loader.projected_len(), 4);
+
+        unsafe {
+            let dense = loader.load_dense::<Fallback>();
+            assert_eq!(dense.a, 1.0);
+            assert_eq!(dense.b, 2.0);
+            assert_eq!(dense.c, 1.0);
+            assert_eq!(dense.d, 2.0);
+            assert_eq!(dense.e, 1.0);
+            assert_eq!(dense.f, 2.0);
+            assert_eq!(dense.g, 1.0);
+            assert_eq!(dense.h, 2.0);
+        }
+    }
+
+    #[test]
+    fn test_buffer_projection_fallback_load() {
+        let sample = [1.0, 2.0];
+        let projected = Projected(&sample);
+        let mut loader = projected.into_projected_mem_loader(4);
+        assert_eq!(loader.projected_len(), 4);
+
+        unsafe {
+            let reg = loader.load::<Fallback>();
+            assert_eq!(reg, 1.0);
+            let reg = loader.load::<Fallback>();
+            assert_eq!(reg, 2.0);
+            let reg = loader.load::<Fallback>();
+            assert_eq!(reg, 1.0);
+            let reg = loader.load::<Fallback>();
+            assert_eq!(reg, 2.0);
+        }
+    }
+
+    #[cfg(all(
+        any(target_arch = "x86", target_arch = "x86_64"),
+        target_feature = "avx2"
+    ))]
+    #[test]
+    fn test_buffer_projection_avx2_dense_load() {
+        let sample = [1.0f32, 2.0f32];
+        let projected = Projected(&sample);
+        let mut loader = projected.into_projected_mem_loader(4);
+        assert_eq!(loader.projected_len(), 4);
+
+        #[allow(clippy::missing_transmute_annotations)]
+        unsafe {
+            let dense = loader.load_dense::<crate::danger::Avx2>();
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.a),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.b),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.c),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.d),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.e),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.f),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.g),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.h),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+        }
+
+        let sample = [1.0f32, 2.0f32, 3.0f32];
+        let projected = Projected(&sample);
+        let mut loader = projected.into_projected_mem_loader(9);
+        assert_eq!(loader.projected_len(), 9);
+
+        #[allow(clippy::missing_transmute_annotations)]
+        unsafe {
+            let dense = loader.load_dense::<crate::danger::Avx2>();
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.a),
+                [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.b),
+                [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.c),
+                [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.d),
+                [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.e),
+                [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.f),
+                [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.g),
+                [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(dense.h),
+                [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0]
+            );
+        }
+    }
+
+    #[cfg(all(
+        any(target_arch = "x86", target_arch = "x86_64"),
+        target_feature = "avx2"
+    ))]
+    #[test]
+    fn test_buffer_projection_avx2_load() {
+        let sample = [1.0f32, 2.0f32];
+        let projected = Projected(&sample);
+        let mut loader = projected.into_projected_mem_loader(4);
+        assert_eq!(loader.projected_len(), 4);
+
+        #[allow(clippy::missing_transmute_annotations)]
+        unsafe {
+            let reg = loader.load::<crate::danger::Avx2>();
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(reg),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            let reg = loader.load::<crate::danger::Avx2>();
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(reg),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            let reg = loader.load::<crate::danger::Avx2>();
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(reg),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            let reg = loader.load::<crate::danger::Avx2>();
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(reg),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+        }
+
+        let sample = [1.0f32, 2.0f32, 3.0f32];
+        let projected = Projected(&sample);
+        let mut loader = projected.into_projected_mem_loader(9);
+        assert_eq!(loader.projected_len(), 9);
+
+        #[allow(clippy::missing_transmute_annotations)]
+        unsafe {
+            let reg = loader.load::<crate::danger::Avx2>();
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(reg),
+                [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0]
+            );
+            let reg = loader.load::<crate::danger::Avx2>();
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(reg),
+                [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0]
+            );
+            let reg = loader.load::<crate::danger::Avx2>();
+            assert_eq!(
+                core::mem::transmute::<_, [f32; 8]>(reg),
+                [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0]
+            );
+        }
+    }
+
+    #[cfg(all(
+        any(target_arch = "x86", target_arch = "x86_64"),
+        target_feature = "avx512f",
+        feature = "nightly"
+    ))]
+    #[test]
+    fn test_buffer_projection_avx512_dense_load() {
+        let sample = [1.0f64, 2.0f64];
+        let projected = Projected(&sample);
+        let mut loader = projected.into_projected_mem_loader(4);
+        assert_eq!(loader.projected_len(), 4);
+
+        #[allow(clippy::missing_transmute_annotations)]
+        unsafe {
+            let dense = loader.load_dense::<crate::danger::Avx512>();
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.a),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.b),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.c),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.d),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.e),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.f),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.g),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.h),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+        }
+
+        let sample = [1.0f64, 2.0f64, 3.0f64];
+        let projected = Projected(&sample);
+        let mut loader = projected.into_projected_mem_loader(9);
+        assert_eq!(loader.projected_len(), 9);
+
+        #[allow(clippy::missing_transmute_annotations)]
+        unsafe {
+            let dense = loader.load_dense::<crate::danger::Avx512>();
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.a),
+                [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.b),
+                [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.c),
+                [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.d),
+                [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.e),
+                [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.f),
+                [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.g),
+                [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0]
+            );
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(dense.h),
+                [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0]
+            );
+        }
+    }
+
+    #[cfg(all(
+        any(target_arch = "x86", target_arch = "x86_64"),
+        target_feature = "avx512f",
+        feature = "nightly",
+    ))]
+    #[test]
+    fn test_buffer_projection_avx512_load() {
+        let sample = [1.0f64, 2.0f64];
+        let projected = Projected(&sample);
+        let mut loader = projected.into_projected_mem_loader(4);
+        assert_eq!(loader.projected_len(), 4);
+
+        #[allow(clippy::missing_transmute_annotations)]
+        unsafe {
+            let reg = loader.load::<crate::danger::Avx512>();
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(reg),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            let reg = loader.load::<crate::danger::Avx512>();
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(reg),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            let reg = loader.load::<crate::danger::Avx512>();
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(reg),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+            let reg = loader.load::<crate::danger::Avx512>();
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(reg),
+                [1.0, 2.0, 1.0, 2.0, 1.0, 2.0, 1.0, 2.0]
+            );
+        }
+
+        let sample = [1.0f64, 2.0f64, 3.0f64];
+        let projected = Projected(&sample);
+        let mut loader = projected.into_projected_mem_loader(9);
+        assert_eq!(loader.projected_len(), 9);
+
+        #[allow(clippy::missing_transmute_annotations)]
+        unsafe {
+            let reg = loader.load::<crate::danger::Avx512>();
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(reg),
+                [1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0]
+            );
+            let reg = loader.load::<crate::danger::Avx512>();
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(reg),
+                [3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0]
+            );
+            let reg = loader.load::<crate::danger::Avx512>();
+            assert_eq!(
+                core::mem::transmute::<_, [f64; 8]>(reg),
+                [2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0]
+            );
+        }
+    }
 }
diff --git a/cfavml/src/safe_function_ops.rs b/cfavml/src/safe_function_ops.rs
index a447cc1..07cf46e 100644
--- a/cfavml/src/safe_function_ops.rs
+++ b/cfavml/src/safe_function_ops.rs
@@ -319,6 +319,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ### Examples
 ///
 /// ##### Two vectors
@@ -350,6 +367,27 @@ where
 /// assert_eq!(result, [1.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, 1.0, 0.5, 3.5];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::max_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [1.0, 2.5, 1.0, 3.5]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -370,18 +408,6 @@ where
 /// assert_eq!(result, [2.0, 2.5, 1.0, 1.0]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
@@ -485,6 +511,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ### Examples
 ///
 /// ##### Two vectors
@@ -516,6 +559,27 @@ where
 /// assert_eq!(result, [-5.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, 1.0, 0.5, 3.5];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::min_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [1.0, 1.0, 0.5, 2.5]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -536,18 +600,6 @@ where
 /// assert_eq!(result, [1.0, -1.0, 0.5, -2.0]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
@@ -608,6 +660,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ###### Masks
 ///
 /// CFAVML follows the same pattern as numpy, which is it representing boolean results as
@@ -655,6 +724,27 @@ where
 /// assert_eq!(result, [0.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, 1.0, 0.5, 3.5];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::eq_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [1.0, 0.0, 0.0, 0.0]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -675,18 +765,6 @@ where
 /// assert_eq!(result, [1.0, 0.0, 1.0, 0.0]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
@@ -756,6 +834,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ###### Masks
 ///
 /// CFAVML follows the same pattern as numpy, which is it representing boolean results as
@@ -803,6 +898,27 @@ where
 /// assert_eq!(result, [1.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, 1.0, 0.5, 3.5];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::neq_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [0.0, 1.0, 1.0, 1.0]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -823,18 +939,6 @@ where
 /// assert_eq!(result, [0.0, 1.0, 0.0, 1.0]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
@@ -904,6 +1008,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ###### Masks
 ///
 /// CFAVML follows the same pattern as numpy, which is it representing boolean results as
@@ -951,6 +1072,27 @@ where
 /// assert_eq!(result, [1.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, 1.0, 0.5, 3.5];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::lt_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [0.0, 1.0, 1.0, 0.0]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -971,18 +1113,6 @@ where
 /// assert_eq!(result, [0.0, 1.0, 0.0, 0.0]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
@@ -1053,6 +1183,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ###### Masks
 ///
 /// CFAVML follows the same pattern as numpy, which is it representing boolean results as
@@ -1100,6 +1247,27 @@ where
 /// assert_eq!(result, [1.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, 1.0, 0.5, 3.5];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::lte_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [1.0, 1.0, 1.0, 0.0]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -1120,18 +1288,6 @@ where
 /// assert_eq!(result, [1.0, 1.0, 1.0, 0.0]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
@@ -1203,6 +1359,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ###### Masks
 ///
 /// CFAVML follows the same pattern as numpy, which is it representing boolean results as
@@ -1250,6 +1423,27 @@ where
 /// assert_eq!(result, [1.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, 1.0, 0.5, 3.5];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::gt_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [0.0, 0.0, 0.0, 1.0]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -1270,18 +1464,6 @@ where
 /// assert_eq!(result, [0.0, 0.0, 0.0, 1.0]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
@@ -1352,6 +1534,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ###### Masks
 ///
 /// CFAVML follows the same pattern as numpy, which is it representing boolean results as
@@ -1399,6 +1598,27 @@ where
 /// assert_eq!(result, [1.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, 1.0, 0.5, 1.0];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::gte_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [1.0, 0.0, 0.0, 0.0]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -1419,18 +1639,6 @@ where
 /// assert_eq!(result, [1.0, 0.0, 1.0, 1.0]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
@@ -1499,6 +1707,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ### Examples
 ///
 /// ##### Two vectors
@@ -1530,6 +1755,27 @@ where
 /// assert_eq!(result, [10.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, 1.0, 0.5, 1.0];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::add_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [2.0, 3.5, 1.5, 3.5]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -1550,18 +1796,6 @@ where
 /// assert_eq!(result, [2.0, 1.5, 1.0, -1.0]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
@@ -1620,6 +1854,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ### Examples
 ///
 /// ##### Two vectors
@@ -1651,6 +1902,27 @@ where
 /// assert_eq!(result, [0.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, 1.0, 0.5, 1.0];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::sub_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [0.0, -1.5, -0.5, -1.5]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -1671,18 +1943,6 @@ where
 /// assert_eq!(result, [0.0, -3.5, 0.0, 3.0]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
@@ -1741,6 +2001,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ### Examples
 ///
 /// ##### Two vectors
@@ -1772,6 +2049,27 @@ where
 /// assert_eq!(result, [25.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, 1.0, 0.5, 1.0];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::mul_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [1.0, 2.5, 0.5, 2.5]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -1792,18 +2090,6 @@ where
 /// assert_eq!(result, [1.0, -2.5, 0.25, -2.0]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
@@ -1882,6 +2168,23 @@ where
 /// - `0 + 1 == [1, 1, 1]`  w/result_buffer_len=3
 /// - `1 + 1 == [1]`  w/result_buffer_len=1
 ///
+/// ###### Projecting Vectors
+///
+/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+///
+/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+/// which exhibit the standard behaviour as you might expect.
+///
+/// When providing two slices as inputs they cannot be projected to a buffer
+/// that is larger their input sizes by default. This means providing two slices
+/// of `128` elements in length must take a result buffer of `128` elements in length.
+///
+/// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+/// enables projecting of the input buffer to new sizes providing the new size is a
+/// multiple of the original size. When this buffer is projected, it is effectively
+/// repeated `N` times, where `N` is how many times the old size fits into the new size.
+///
 /// ### Examples
 ///
 /// ##### Two vectors
@@ -1913,6 +2216,27 @@ where
 /// assert_eq!(result, [1.0; 4]);
 /// ```
 ///
+/// ##### With projected vectors
+///
+/// Using the [Projected](crate::mem_loader::Projected) wrapper type, we can create larger output
+/// vectors than the original inputs, which can allow for much better performance and
+/// memory usage when working with matrices.
+///
+/// NOTE:
+/// Projection != Matrix broadcasting, please read the documentation of `Projected` to understand
+/// how it behaves, it does not replace your matrix library's own broadcasting system.
+///
+/// ```rust
+/// use cfavml::mem_loader::Projected;
+///
+/// let lhs = [1.0, -1.0, 0.5, 1.0];    // Pretend this is a 2x2 matrix
+/// let rhs = [1.0, 2.5];               // Pretend this is a 1x2 matrix
+///
+/// let mut result = [0.0; 4];          // Our output is a 2x2 matrix
+/// cfavml::div_vertical(Projected(&lhs), Projected(&rhs), &mut result);
+/// assert_eq!(result, [1.0, -0.4, 0.5, 0.4]);
+/// ```
+///
 /// ##### With `MaybeUninit`
 ///
 /// Often if you are working with new-allocations, you do not want to initialize the data twice,
@@ -1933,18 +2257,6 @@ where
 /// assert_eq!(result, [1.0, -0.4, 1.0, -0.5]);
 /// ```
 ///
-/// ### Projecting Vectors
-///
-/// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
-/// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
-///
-/// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
-/// which exhibit the standard behaviour as you might expect.
-///
-/// When providing two slices as inputs they cannot be projected to a buffer
-/// that is larger their input sizes by default. This means providing two slices
-/// of `128` elements in length must take a result buffer of `128` elements in length.
-///
 /// ### Implementation Pseudocode
 ///
 /// _This is the logic of the routine being called._
diff --git a/cfavml/src/safe_trait_arithmetic_ops.rs b/cfavml/src/safe_trait_arithmetic_ops.rs
index 8a53d39..526bf20 100644
--- a/cfavml/src/safe_trait_arithmetic_ops.rs
+++ b/cfavml/src/safe_trait_arithmetic_ops.rs
@@ -12,6 +12,8 @@ pub trait ArithmeticOps: Sized + Copy {
     /// Performs an element wise addition of two input buffers `lhs` and `rhs` that can
     /// be projected to the desired output size of `result`.
     ///
+    /// See [cfavml::add_vertical](crate::add_vertical) for examples.
+    ///
     /// ### Projecting Vectors
     ///
     /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
@@ -24,6 +26,11 @@ pub trait ArithmeticOps: Sized + Copy {
     /// that is larger their input sizes by default. This means providing two slices
     /// of `128` elements in length must take a result buffer of `128` elements in length.
     ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore
@@ -50,6 +57,8 @@ pub trait ArithmeticOps: Sized + Copy {
     /// Performs an element wise subtraction of two input buffers `a` and `b` that can
     /// be projected to the desired output size of `result`.
     ///
+    /// See [cfavml::sub_vertical](crate::sub_vertical) for examples.
+    ///
     /// ### Projecting Vectors
     ///
     /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
@@ -62,6 +71,11 @@ pub trait ArithmeticOps: Sized + Copy {
     /// that is larger their input sizes by default. This means providing two slices
     /// of `128` elements in length must take a result buffer of `128` elements in length.
     ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore
@@ -88,6 +102,8 @@ pub trait ArithmeticOps: Sized + Copy {
     /// Performs an element wise multiply of two input buffers `a` and `b` that can
     /// be projected to the desired output size of `result`.
     ///
+    /// See [cfavml::mul_vertical](crate::mul_vertical) for examples.
+    ///
     /// ### Projecting Vectors
     ///
     /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
@@ -100,6 +116,11 @@ pub trait ArithmeticOps: Sized + Copy {
     /// that is larger their input sizes by default. This means providing two slices
     /// of `128` elements in length must take a result buffer of `128` elements in length.
     ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore
@@ -126,6 +147,8 @@ pub trait ArithmeticOps: Sized + Copy {
     /// Performs an element wise division of two input buffers `a` and `b` that can
     /// be projected to the desired output size of `result`.
     ///
+    /// See [cfavml::div_vertical](crate::div_vertical) for examples.
+    ///
     /// ### Projecting Vectors
     ///
     /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
@@ -138,6 +161,11 @@ pub trait ArithmeticOps: Sized + Copy {
     /// that is larger their input sizes by default. This means providing two slices
     /// of `128` elements in length must take a result buffer of `128` elements in length.
     ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore
diff --git a/cfavml/src/safe_trait_cmp_ops.rs b/cfavml/src/safe_trait_cmp_ops.rs
index 5953266..319b718 100644
--- a/cfavml/src/safe_trait_cmp_ops.rs
+++ b/cfavml/src/safe_trait_cmp_ops.rs
@@ -33,6 +33,26 @@ pub trait CmpOps: Sized + Copy {
     /// Performs an element wise max on each element of vector `a` and `b`,
     /// writing the result to `result`.
     ///
+    /// See [cfavml::max_vertical](crate::max_vertical) for examples.
+    ///
+    /// ### Projecting Vectors
+    ///
+    /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+    /// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+    ///
+    /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+    /// which exhibit the standard behaviour as you might expect.
+    ///
+    /// When providing two slices as inputs they cannot be projected to a buffer
+    /// that is larger their input sizes by default. This means providing two slices
+    /// of `128` elements in length must take a result buffer of `128` elements in length.
+    ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore
@@ -82,6 +102,25 @@ pub trait CmpOps: Sized + Copy {
     /// Performs an element wise min on each element of vector `a` and `b`,
     /// writing the result to `result`.
     ///
+    /// See [cfavml::min_vertical](crate::min_vertical) for examples.
+    ///
+    /// ### Projecting Vectors
+    ///
+    /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+    /// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+    ///
+    /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+    /// which exhibit the standard behaviour as you might expect.
+    ///
+    /// When providing two slices as inputs they cannot be projected to a buffer
+    /// that is larger their input sizes by default. This means providing two slices
+    /// of `128` elements in length must take a result buffer of `128` elements in length.
+    ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore
@@ -114,6 +153,25 @@ pub trait CmpOps: Sized + Copy {
     /// Checks each element pair from vectors `a` and `b` of size `dims`  comparing
     /// if element `a` is **_equal to_** element `b` returning a mask vector of the same type.
     ///
+    /// See [cfavml::eq_vertical](crate::eq_vertical) for examples.
+    ///
+    /// ### Projecting Vectors
+    ///
+    /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+    /// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+    ///
+    /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+    /// which exhibit the standard behaviour as you might expect.
+    ///
+    /// When providing two slices as inputs they cannot be projected to a buffer
+    /// that is larger their input sizes by default. This means providing two slices
+    /// of `128` elements in length must take a result buffer of `128` elements in length.
+    ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore
@@ -155,6 +213,25 @@ pub trait CmpOps: Sized + Copy {
     /// Checks each element pair from vectors `a` and `b` of size `dims`  comparing
     /// if element `a` is **_not equal to_** element `b` returning a mask vector of the same type.
     ///
+    /// See [cfavml::neq_vertical](crate::neq_vertical) for examples.
+    ///
+    /// ### Projecting Vectors
+    ///
+    /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+    /// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+    ///
+    /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+    /// which exhibit the standard behaviour as you might expect.
+    ///
+    /// When providing two slices as inputs they cannot be projected to a buffer
+    /// that is larger their input sizes by default. This means providing two slices
+    /// of `128` elements in length must take a result buffer of `128` elements in length.
+    ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore
@@ -196,6 +273,25 @@ pub trait CmpOps: Sized + Copy {
     /// Checks each element pair from vectors `a` and `b` of size `dims`  comparing
     /// if element `a` is **_less than_** element `b` returning a mask vector of the same type.
     ///
+    /// See [cfavml::lt_vertical](crate::lt_vertical) for examples.
+    ///
+    /// ### Projecting Vectors
+    ///
+    /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+    /// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+    ///
+    /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+    /// which exhibit the standard behaviour as you might expect.
+    ///
+    /// When providing two slices as inputs they cannot be projected to a buffer
+    /// that is larger their input sizes by default. This means providing two slices
+    /// of `128` elements in length must take a result buffer of `128` elements in length.
+    ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore
@@ -237,6 +333,25 @@ pub trait CmpOps: Sized + Copy {
     /// Checks each element pair from vectors `a` and `b` of size `dims`  comparing
     /// if element `a` is **_less than or equal to_** element `b` returning a mask vector of the same type.
     ///
+    /// See [cfavml::lte_vertical](crate::lte_vertical) for examples.
+    ///
+    /// ### Projecting Vectors
+    ///
+    /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+    /// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+    ///
+    /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+    /// which exhibit the standard behaviour as you might expect.
+    ///
+    /// When providing two slices as inputs they cannot be projected to a buffer
+    /// that is larger their input sizes by default. This means providing two slices
+    /// of `128` elements in length must take a result buffer of `128` elements in length.
+    ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore
@@ -278,6 +393,25 @@ pub trait CmpOps: Sized + Copy {
     /// Checks each element pair from vectors `a` and `b` of size `dims`  comparing
     /// if element `a` is **_greater than_** element `b` returning a mask vector of the same type.
     ///
+    /// See [cfavml::gt_vertical](crate::gt_vertical) for examples.
+    ///
+    /// ### Projecting Vectors
+    ///
+    /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+    /// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+    ///
+    /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+    /// which exhibit the standard behaviour as you might expect.
+    ///
+    /// When providing two slices as inputs they cannot be projected to a buffer
+    /// that is larger their input sizes by default. This means providing two slices
+    /// of `128` elements in length must take a result buffer of `128` elements in length.
+    ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore
@@ -319,6 +453,25 @@ pub trait CmpOps: Sized + Copy {
     /// Checks each element pair from vectors `a` and `b` of size `dims`  comparing
     /// if element `a` is **_greater than_** element `b` returning a mask vector of the same type.
     ///
+    /// See [cfavml::gte_vertical](crate::gte_vertical) for examples.
+    ///
+    /// ### Projecting Vectors
+    ///
+    /// CFAVML allows for working over a wide variety of buffers for applications, projection is effectively
+    /// broadcasting of two input buffers implementing `IntoMemLoader<T>`.
+    ///
+    /// By default, you can provide _two slices_, _one slice and a broadcast value_, or _two broadcast values_,
+    /// which exhibit the standard behaviour as you might expect.
+    ///
+    /// When providing two slices as inputs they cannot be projected to a buffer
+    /// that is larger their input sizes by default. This means providing two slices
+    /// of `128` elements in length must take a result buffer of `128` elements in length.
+    ///
+    /// You can wrap your inputs in a [Projected](crate::mem_loader::Projected) wrapper which
+    /// enables projecting of the input buffer to new sizes providing the new size is a
+    /// multiple of the original size. When this buffer is projected, it is effectively
+    /// repeated `N` times, where `N` is how many times the old size fits into the new size.
+    ///
     /// ### Implementation Pseudocode
     ///
     /// ```ignore