Skip to content

Commit

Permalink
Port code to "SIMD groundwork part 1"
Browse files Browse the repository at this point in the history
Port the SIMD code to rust-lang/rust#27169
  • Loading branch information
cesarb committed Aug 7, 2015
1 parent 8d895f7 commit 9e5a416
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 45 deletions.
6 changes: 1 addition & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,12 @@ license = "MIT"

[features]
bench = []
simd = ["simdty"]
simd = []
simd_opt = ["simd"]
simd_asm = ["simd_opt"]

[dependencies]
constant_time_eq = "0.1.0"

[dependencies.simdty]
version = "0.0.3"
optional = true

[dev-dependencies]
rustc-serialize = "0.3.15"
5 changes: 3 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,17 @@
//! A pure Rust implementation of BLAKE2 based on the draft RFC.
#![cfg_attr(all(feature = "bench", test), feature(test))]
#![cfg_attr(feature = "simd", feature(link_llvm_intrinsics, simd, simd_ffi))]
#![cfg_attr(feature = "simd", feature(platform_intrinsics, simd_basics))]
#![cfg_attr(feature = "simd_opt", feature(cfg_target_feature))]
#![cfg_attr(feature = "simd_asm", feature(asm))]

#[cfg(all(feature = "bench", test))] extern crate test;
#[cfg(feature = "simd")] extern crate simdty;

extern crate constant_time_eq;

mod as_bytes;
mod bytes;
mod simdty;
mod simd;

#[macro_use]
Expand Down
143 changes: 105 additions & 38 deletions src/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,64 @@
#[cfg(feature = "simd_opt")]
use std::mem::transmute;

#[cfg(feature = "simd")]
pub use simdty::{u32x4, u64x4};

#[cfg(not(feature = "simd"))]
#[derive(Clone, Copy, Debug)]
#[repr(C)]
pub struct u32x4(pub u32, pub u32, pub u32, pub u32);
#[cfg(feature = "simd")]
extern "platform-intrinsic" {
fn simd_add<T>(x: T, y: T) -> T;
fn simd_shl<T>(x: T, y: T) -> T;
fn simd_shr<T>(x: T, y: T) -> T;
fn simd_xor<T>(x: T, y: T) -> T;
}

#[cfg(not(feature = "simd"))]
#[derive(Clone, Copy, Debug)]
#[repr(C)]
pub struct u64x4(pub u64, pub u64, pub u64, pub u64);
#[cfg(feature = "simd_opt")]
extern "platform-intrinsic" {
fn simd_shuffle8<T, Elem>(v: T, w: T,
i0: u32, i1: u32, i2: u32, i3: u32,
i4: u32, i5: u32, i6: u32, i7: u32) -> T;

#[cfg(not(feature = "simd"))]
use std::ops::BitXor;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn simd_shuffle16<T, Elem>(v: T, w: T,
i0: u32, i1: u32, i2: u32, i3: u32,
i4: u32, i5: u32, i6: u32, i7: u32,
i8: u32, i9: u32, i10: u32, i11: u32,
i12: u32, i13: u32, i14: u32, i15: u32,
) -> T;
}

macro_rules! impl_bitxor {
use std::ops::{Add, BitXor, Shl, Shr};

macro_rules! impl_ops {
($vec:ident) => {
#[cfg(not(feature = "simd"))]
impl Add for $vec {
type Output = Self;

#[cfg(feature = "simd")]
#[inline(always)]
fn add(self, rhs: Self) -> Self::Output {
unsafe { simd_add(self, rhs) }
}

#[cfg(not(feature = "simd"))]
#[inline(always)]
fn add(self, rhs: Self) -> Self::Output {
$vec(self.0.wrapping_add(rhs.0),
self.1.wrapping_add(rhs.1),
self.2.wrapping_add(rhs.2),
self.3.wrapping_add(rhs.3))
}
}

impl BitXor for $vec {
type Output = Self;

#[cfg(feature = "simd")]
#[inline(always)]
fn bitxor(self, rhs: Self) -> Self::Output {
unsafe { simd_xor(self, rhs) }
}

#[cfg(not(feature = "simd"))]
#[inline(always)]
fn bitxor(self, rhs: Self) -> Self::Output {
$vec(self.0 ^ rhs.0,
Expand All @@ -57,11 +93,49 @@ macro_rules! impl_bitxor {
self.3 ^ rhs.3)
}
}

impl Shl<$vec> for $vec {
type Output = Self;

#[cfg(feature = "simd")]
#[inline(always)]
fn shl(self, rhs: Self) -> Self::Output {
unsafe { simd_shl(self, rhs) }
}

#[cfg(not(feature = "simd"))]
#[inline(always)]
fn shl(self, rhs: Self) -> Self::Output {
$vec(self.0 << rhs.0,
self.1 << rhs.1,
self.2 << rhs.2,
self.3 << rhs.3)
}
}

impl Shr<$vec> for $vec {
type Output = Self;

#[cfg(feature = "simd")]
#[inline(always)]
fn shr(self, rhs: Self) -> Self::Output {
unsafe { simd_shr(self, rhs) }
}

#[cfg(not(feature = "simd"))]
#[inline(always)]
fn shr(self, rhs: Self) -> Self::Output {
$vec(self.0 >> rhs.0,
self.1 >> rhs.1,
self.2 >> rhs.2,
self.3 >> rhs.3)
}
}
}
}

impl_bitxor!(u32x4);
impl_bitxor!(u64x4);
impl_ops!(u32x4);
impl_ops!(u64x4);

pub trait Vector4<T>: Copy {
fn gather(src: &[T], i0: usize, i1: usize, i2: usize, i3: usize) -> Self;
Expand Down Expand Up @@ -118,26 +192,16 @@ macro_rules! impl_vector4_common {
self.3.to_le())
}

#[cfg(feature = "simd")]
#[inline(always)]
fn wrapping_add(self, rhs: Self) -> Self { self + rhs }

#[cfg(not(feature = "simd"))]
#[inline(always)]
fn wrapping_add(self, rhs: Self) -> Self {
$vec(self.0.wrapping_add(rhs.0),
self.1.wrapping_add(rhs.1),
self.2.wrapping_add(rhs.2),
self.3.wrapping_add(rhs.3))
}

#[cfg(feature = "simd")]
#[inline(always)]
fn rotate_right_any(self, n: u32) -> Self {
let r = n as $word;
let l = $bits - r;

(self >> $vec(r, r, r, r)) | (self << $vec(l, l, l, l))
(self >> $vec(r, r, r, r)) ^ (self << $vec(l, l, l, l))
}

#[cfg(not(feature = "simd"))]
Expand Down Expand Up @@ -174,10 +238,11 @@ fn u32x4_rotate_right_16(vec: u32x4) -> u32x4 {
use simdty::u16x8;
unsafe {
let tmp: u16x8 = transmute(vec);
transmute(u16x8(tmp.1, tmp.0,
tmp.3, tmp.2,
tmp.5, tmp.4,
tmp.7, tmp.6))
transmute(simd_shuffle8::<u16x8, u16>(tmp, tmp,
1, 0,
3, 2,
5, 4,
7, 6))
}
}

Expand Down Expand Up @@ -205,10 +270,11 @@ fn u64x4_rotate_right_32(vec: u64x4) -> u64x4 {
use simdty::u32x8;
unsafe {
let tmp: u32x8 = transmute(vec);
transmute(u32x8(tmp.1, tmp.0,
tmp.3, tmp.2,
tmp.5, tmp.4,
tmp.7, tmp.6))
transmute(simd_shuffle8::<u32x8, u32>(tmp, tmp,
1, 0,
3, 2,
5, 4,
7, 6))
}
}

Expand All @@ -219,10 +285,11 @@ fn u64x4_rotate_right_16(vec: u64x4) -> u64x4 {
use simdty::u16x16;
unsafe {
let tmp: u16x16 = transmute(vec);
transmute(u16x16(tmp.1, tmp.2, tmp.3, tmp.0,
tmp.5, tmp.6, tmp.7, tmp.4,
tmp.9, tmp.10, tmp.11, tmp.8,
tmp.13, tmp.14, tmp.15, tmp.12))
transmute(simd_shuffle16::<u16x16, u16>(tmp, tmp,
1, 2, 3, 0,
5, 6, 7, 4,
9, 10, 11, 8,
13, 14, 15, 12))
}
}

Expand Down
76 changes: 76 additions & 0 deletions src/simdty.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright (c) 2015 Cesar Eduardo Barros
//
// Permission is hereby granted, free of charge, to any
// person obtaining a copy of this software and associated
// documentation files (the "Software"), to deal in the
// Software without restriction, including without
// limitation the rights to use, copy, modify, merge,
// publish, distribute, sublicense, and/or sell copies of
// the Software, and to permit persons to whom the Software
// is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice
// shall be included in all copies or substantial portions
// of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS IN THE SOFTWARE.

#[cfg(feature = "simd")]
macro_rules! decl_vec {
($($decl:item)*) => {
$(
#[allow(non_camel_case_types)]
#[derive(Clone, Copy, Debug)]
#[repr(simd)]
$decl
)*
}
}

#[cfg(not(feature = "simd"))]
macro_rules! decl_vec {
($($decl:item)*) => {
$(
#[derive(Clone, Copy, Debug)]
#[repr(C)]
$decl
)*
}
}

decl_vec!{
pub struct u32x4(pub u32, pub u32, pub u32, pub u32);
pub struct u64x4(pub u64, pub u64, pub u64, pub u64);
}

#[cfg(feature = "simd_opt")]
decl_vec!{
pub struct u16x8(pub u16, pub u16, pub u16, pub u16,
pub u16, pub u16, pub u16, pub u16);
pub struct u32x8(pub u32, pub u32, pub u32, pub u32,
pub u32, pub u32, pub u32, pub u32);
}

#[cfg(feature = "simd_opt")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
decl_vec!{
pub struct u16x16(pub u16, pub u16, pub u16, pub u16,
pub u16, pub u16, pub u16, pub u16,
pub u16, pub u16, pub u16, pub u16,
pub u16, pub u16, pub u16, pub u16);
}

#[cfg(feature = "simd_asm")]
#[cfg(target_arch = "arm")]
decl_vec!{
pub struct u64x2(pub u64, pub u64);
}

0 comments on commit 9e5a416

Please sign in to comment.