From 9e5a416d79a9e75eb474acb7f6d313c490a23036 Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Thu, 6 Aug 2015 23:37:28 -0300 Subject: [PATCH] Port code to "SIMD groundwork part 1" Port the SIMD code to https://github.com/rust-lang/rust/pull/27169 --- Cargo.toml | 6 +-- src/lib.rs | 5 +- src/simd.rs | 143 ++++++++++++++++++++++++++++++++++++-------------- src/simdty.rs | 76 +++++++++++++++++++++++++++ 4 files changed, 185 insertions(+), 45 deletions(-) create mode 100644 src/simdty.rs diff --git a/Cargo.toml b/Cargo.toml index 1f483af..07ac881 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,16 +10,12 @@ license = "MIT" [features] bench = [] -simd = ["simdty"] +simd = [] simd_opt = ["simd"] simd_asm = ["simd_opt"] [dependencies] constant_time_eq = "0.1.0" -[dependencies.simdty] -version = "0.0.3" -optional = true - [dev-dependencies] rustc-serialize = "0.3.15" diff --git a/src/lib.rs b/src/lib.rs index 15951bc..b0a40a3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,16 +27,17 @@ //! A pure Rust implementation of BLAKE2 based on the draft RFC. #![cfg_attr(all(feature = "bench", test), feature(test))] -#![cfg_attr(feature = "simd", feature(link_llvm_intrinsics, simd, simd_ffi))] +#![cfg_attr(feature = "simd", feature(platform_intrinsics, simd_basics))] +#![cfg_attr(feature = "simd_opt", feature(cfg_target_feature))] #![cfg_attr(feature = "simd_asm", feature(asm))] #[cfg(all(feature = "bench", test))] extern crate test; -#[cfg(feature = "simd")] extern crate simdty; extern crate constant_time_eq; mod as_bytes; mod bytes; +mod simdty; mod simd; #[macro_use] diff --git a/src/simd.rs b/src/simd.rs index 8e7a0c2..789574f 100644 --- a/src/simd.rs +++ b/src/simd.rs @@ -27,28 +27,64 @@ #[cfg(feature = "simd_opt")] use std::mem::transmute; -#[cfg(feature = "simd")] pub use simdty::{u32x4, u64x4}; -#[cfg(not(feature = "simd"))] -#[derive(Clone, Copy, Debug)] -#[repr(C)] -pub struct u32x4(pub u32, pub u32, pub u32, pub u32); +#[cfg(feature = "simd")] +extern "platform-intrinsic" { + fn simd_add(x: T, y: T) -> T; + fn simd_shl(x: T, y: T) -> T; + fn simd_shr(x: T, y: T) -> T; + fn simd_xor(x: T, y: T) -> T; +} -#[cfg(not(feature = "simd"))] -#[derive(Clone, Copy, Debug)] -#[repr(C)] -pub struct u64x4(pub u64, pub u64, pub u64, pub u64); +#[cfg(feature = "simd_opt")] +extern "platform-intrinsic" { + fn simd_shuffle8(v: T, w: T, + i0: u32, i1: u32, i2: u32, i3: u32, + i4: u32, i5: u32, i6: u32, i7: u32) -> T; -#[cfg(not(feature = "simd"))] -use std::ops::BitXor; + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + fn simd_shuffle16(v: T, w: T, + i0: u32, i1: u32, i2: u32, i3: u32, + i4: u32, i5: u32, i6: u32, i7: u32, + i8: u32, i9: u32, i10: u32, i11: u32, + i12: u32, i13: u32, i14: u32, i15: u32, + ) -> T; +} -macro_rules! impl_bitxor { +use std::ops::{Add, BitXor, Shl, Shr}; + +macro_rules! impl_ops { ($vec:ident) => { - #[cfg(not(feature = "simd"))] + impl Add for $vec { + type Output = Self; + + #[cfg(feature = "simd")] + #[inline(always)] + fn add(self, rhs: Self) -> Self::Output { + unsafe { simd_add(self, rhs) } + } + + #[cfg(not(feature = "simd"))] + #[inline(always)] + fn add(self, rhs: Self) -> Self::Output { + $vec(self.0.wrapping_add(rhs.0), + self.1.wrapping_add(rhs.1), + self.2.wrapping_add(rhs.2), + self.3.wrapping_add(rhs.3)) + } + } + impl BitXor for $vec { type Output = Self; + #[cfg(feature = "simd")] + #[inline(always)] + fn bitxor(self, rhs: Self) -> Self::Output { + unsafe { simd_xor(self, rhs) } + } + + #[cfg(not(feature = "simd"))] #[inline(always)] fn bitxor(self, rhs: Self) -> Self::Output { $vec(self.0 ^ rhs.0, @@ -57,11 +93,49 @@ macro_rules! impl_bitxor { self.3 ^ rhs.3) } } + + impl Shl<$vec> for $vec { + type Output = Self; + + #[cfg(feature = "simd")] + #[inline(always)] + fn shl(self, rhs: Self) -> Self::Output { + unsafe { simd_shl(self, rhs) } + } + + #[cfg(not(feature = "simd"))] + #[inline(always)] + fn shl(self, rhs: Self) -> Self::Output { + $vec(self.0 << rhs.0, + self.1 << rhs.1, + self.2 << rhs.2, + self.3 << rhs.3) + } + } + + impl Shr<$vec> for $vec { + type Output = Self; + + #[cfg(feature = "simd")] + #[inline(always)] + fn shr(self, rhs: Self) -> Self::Output { + unsafe { simd_shr(self, rhs) } + } + + #[cfg(not(feature = "simd"))] + #[inline(always)] + fn shr(self, rhs: Self) -> Self::Output { + $vec(self.0 >> rhs.0, + self.1 >> rhs.1, + self.2 >> rhs.2, + self.3 >> rhs.3) + } + } } } -impl_bitxor!(u32x4); -impl_bitxor!(u64x4); +impl_ops!(u32x4); +impl_ops!(u64x4); pub trait Vector4: Copy { fn gather(src: &[T], i0: usize, i1: usize, i2: usize, i3: usize) -> Self; @@ -118,26 +192,16 @@ macro_rules! impl_vector4_common { self.3.to_le()) } - #[cfg(feature = "simd")] #[inline(always)] fn wrapping_add(self, rhs: Self) -> Self { self + rhs } - #[cfg(not(feature = "simd"))] - #[inline(always)] - fn wrapping_add(self, rhs: Self) -> Self { - $vec(self.0.wrapping_add(rhs.0), - self.1.wrapping_add(rhs.1), - self.2.wrapping_add(rhs.2), - self.3.wrapping_add(rhs.3)) - } - #[cfg(feature = "simd")] #[inline(always)] fn rotate_right_any(self, n: u32) -> Self { let r = n as $word; let l = $bits - r; - (self >> $vec(r, r, r, r)) | (self << $vec(l, l, l, l)) + (self >> $vec(r, r, r, r)) ^ (self << $vec(l, l, l, l)) } #[cfg(not(feature = "simd"))] @@ -174,10 +238,11 @@ fn u32x4_rotate_right_16(vec: u32x4) -> u32x4 { use simdty::u16x8; unsafe { let tmp: u16x8 = transmute(vec); - transmute(u16x8(tmp.1, tmp.0, - tmp.3, tmp.2, - tmp.5, tmp.4, - tmp.7, tmp.6)) + transmute(simd_shuffle8::(tmp, tmp, + 1, 0, + 3, 2, + 5, 4, + 7, 6)) } } @@ -205,10 +270,11 @@ fn u64x4_rotate_right_32(vec: u64x4) -> u64x4 { use simdty::u32x8; unsafe { let tmp: u32x8 = transmute(vec); - transmute(u32x8(tmp.1, tmp.0, - tmp.3, tmp.2, - tmp.5, tmp.4, - tmp.7, tmp.6)) + transmute(simd_shuffle8::(tmp, tmp, + 1, 0, + 3, 2, + 5, 4, + 7, 6)) } } @@ -219,10 +285,11 @@ fn u64x4_rotate_right_16(vec: u64x4) -> u64x4 { use simdty::u16x16; unsafe { let tmp: u16x16 = transmute(vec); - transmute(u16x16(tmp.1, tmp.2, tmp.3, tmp.0, - tmp.5, tmp.6, tmp.7, tmp.4, - tmp.9, tmp.10, tmp.11, tmp.8, - tmp.13, tmp.14, tmp.15, tmp.12)) + transmute(simd_shuffle16::(tmp, tmp, + 1, 2, 3, 0, + 5, 6, 7, 4, + 9, 10, 11, 8, + 13, 14, 15, 12)) } } diff --git a/src/simdty.rs b/src/simdty.rs new file mode 100644 index 0000000..becb8d6 --- /dev/null +++ b/src/simdty.rs @@ -0,0 +1,76 @@ +// Copyright (c) 2015 Cesar Eduardo Barros +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. + +#[cfg(feature = "simd")] +macro_rules! decl_vec { + ($($decl:item)*) => { + $( + #[allow(non_camel_case_types)] + #[derive(Clone, Copy, Debug)] + #[repr(simd)] + $decl + )* + } +} + +#[cfg(not(feature = "simd"))] +macro_rules! decl_vec { + ($($decl:item)*) => { + $( + #[derive(Clone, Copy, Debug)] + #[repr(C)] + $decl + )* + } +} + +decl_vec!{ + pub struct u32x4(pub u32, pub u32, pub u32, pub u32); + pub struct u64x4(pub u64, pub u64, pub u64, pub u64); +} + +#[cfg(feature = "simd_opt")] +decl_vec!{ + pub struct u16x8(pub u16, pub u16, pub u16, pub u16, + pub u16, pub u16, pub u16, pub u16); + pub struct u32x8(pub u32, pub u32, pub u32, pub u32, + pub u32, pub u32, pub u32, pub u32); +} + +#[cfg(feature = "simd_opt")] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +decl_vec!{ + pub struct u16x16(pub u16, pub u16, pub u16, pub u16, + pub u16, pub u16, pub u16, pub u16, + pub u16, pub u16, pub u16, pub u16, + pub u16, pub u16, pub u16, pub u16); +} + +#[cfg(feature = "simd_asm")] +#[cfg(target_arch = "arm")] +decl_vec!{ + pub struct u64x2(pub u64, pub u64); +}