Skip to content

Commit

Permalink
softgpu: Use SSE in Vec?::Length().
Browse files Browse the repository at this point in the history
Minor perf boost but if I do everything in Vec things get slower.
  • Loading branch information
unknownbrackets committed Mar 17, 2014
1 parent 6ef0aa1 commit 4772852
Showing 1 changed file with 28 additions and 0 deletions.
28 changes: 28 additions & 0 deletions GPU/Math3D.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,16 @@ namespace Math3D {
template<>
float Vec2<float>::Length() const
{
#if defined(_M_SSE)
float ret;
__m128 sq = _mm_mul_ps(vec, vec);
const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1));
const __m128 res = _mm_add_ss(sq, r2);
_mm_store_ps(&ret, _mm_sqrt_ss(res));
return ret;
#else
return sqrtf(Length2());
#endif
}

template<>
Expand Down Expand Up @@ -88,7 +97,17 @@ unsigned int Vec3<int>::ToRGB() const
template<>
float Vec3<float>::Length() const
{
#if defined(_M_SSE)
float ret;
__m128 sq = _mm_mul_ps(vec, vec);
const __m128 r2 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 1));
const __m128 r3 = _mm_shuffle_ps(sq, sq, _MM_SHUFFLE(0, 0, 0, 2));
const __m128 res = _mm_add_ss(sq, _mm_add_ss(r2, r3));
_mm_store_ps(&ret, _mm_sqrt_ss(res));
return ret;
#else
return sqrtf(Length2());
#endif
}

template<>
Expand Down Expand Up @@ -156,7 +175,16 @@ unsigned int Vec4<int>::ToRGBA() const
template<>
float Vec4<float>::Length() const
{
#if defined(_M_SSE)
float ret;
__m128 sq = _mm_mul_ps(vec, vec);
const __m128 r2 = _mm_add_ps(sq, _mm_movehl_ps(sq, sq));
const __m128 res = _mm_add_ss(r2, _mm_shuffle_ps(r2, r2, _MM_SHUFFLE(0, 0, 0, 1)));
_mm_store_ps(&ret, _mm_sqrt_ss(res));
return ret;
#else
return sqrtf(Length2());
#endif
}

template<>
Expand Down

0 comments on commit 4772852

Please sign in to comment.