31 #ifndef _VECTORMATH_AOS_CPP_SSE_H
32 #define _VECTORMATH_AOS_CPP_SSE_H
35 #include <xmmintrin.h>
36 #include <emmintrin.h>
39 #define Vector3Ref Vector3&
41 #define Matrix3Ref Matrix3&
43 #if (defined (_WIN32) && (_MSC_VER) && _MSC_VER >= 1400)
44 #define USE_SSE3_LDDQU
46 #define VM_ATTRIBUTE_ALIGNED_CLASS16(a) __declspec(align(16)) a
47 #define VM_ATTRIBUTE_ALIGN16 __declspec(align(16))
48 #define VECTORMATH_FORCE_INLINE __forceinline
50 #define VM_ATTRIBUTE_ALIGNED_CLASS16(a) a __attribute__ ((aligned (16)))
51 #define VM_ATTRIBUTE_ALIGN16 __attribute__ ((aligned (16)))
52 #define VECTORMATH_FORCE_INLINE inline __attribute__ ((always_inline))
54 #define USE_SSE3_LDDQU
60 #include <pmmintrin.h>
61 #endif //USE_SSE3_LDDQU
71 #define vec_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e,e,e,e))
73 #define _mm_ror_ps(vec,i) \
74 (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(i+3)%4,(unsigned char)(i+2)%4,(unsigned char)(i+1)%4,(unsigned char)(i+0)%4))) : (vec))
75 #define _mm_rol_ps(vec,i) \
76 (((i)%4) ? (_mm_shuffle_ps(vec,vec, _MM_SHUFFLE((unsigned char)(7-i)%4,(unsigned char)(6-i)%4,(unsigned char)(5-i)%4,(unsigned char)(4-i)%4))) : (vec))
78 #define vec_sld(vec,vec2,x) _mm_ror_ps(vec, ((x)/4))
80 #define _mm_abs_ps(vec) _mm_andnot_ps(_MASKSIGN_,vec)
81 #define _mm_neg_ps(vec) _mm_xor_ps(_MASKSIGN_,vec)
83 #define vec_madd(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b) )
100 return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a));
104 return vec_sel(a, b, _mm_load_ps((
float *)_mask));
108 return vec_sel(a, b, _mm_set1_ps(*(
float *)&_mask));
113 return _mm_set1_ps( *(
float *)&x );
118 return _mm_and_ps( x,
toM128( 0x7fffffff ) );
159 __m128i result = _mm_cvtps_epi32(x);
160 return (__m128 &)result;
167 return _mm_cvtepi32_ps((__m128i &)x);
170 #define vec_nmsub(a,b,c) _mm_sub_ps( c, _mm_mul_ps( a, b ) )
171 #define vec_sub(a,b) _mm_sub_ps( a, b )
172 #define vec_add(a,b) _mm_add_ps( a, b )
173 #define vec_mul(a,b) _mm_mul_ps( a, b )
174 #define vec_xor(a,b) _mm_xor_ps( a, b )
175 #define vec_and(a,b) _mm_and_ps( a, b )
176 #define vec_cmpeq(a,b) _mm_cmpeq_ps( a, b )
177 #define vec_cmpgt(a,b) _mm_cmpgt_ps( a, b )
179 #define vec_mergeh(a,b) _mm_unpacklo_ps( a, b )
180 #define vec_mergel(a,b) _mm_unpackhi_ps( a, b )
182 #define vec_andc(a,b) _mm_andnot_ps( b, a )
184 #define sqrtf4(x) _mm_sqrt_ps( x )
185 #define rsqrtf4(x) _mm_rsqrt_ps( x )
186 #define recipf4(x) _mm_rcp_ps( x )
187 #define negatef4(x) _mm_sub_ps( _mm_setzero_ps(), x )
191 #define _half4 _mm_setr_ps(.5f,.5f,.5f,.5f)
192 #define _three _mm_setr_ps(3.f,3.f,3.f,3.f)
193 const __m128 approx = _mm_rsqrt_ps( v );
194 const __m128 muls = _mm_mul_ps(_mm_mul_ps(v, approx), approx);
195 return _mm_mul_ps(_mm_mul_ps(
_half4, approx), _mm_sub_ps(
_three, muls) );
201 __m128
select = _mm_cmplt_ps( x, _mm_setzero_ps() );
209 __m128 xabs2 = _mm_mul_ps(xabs, xabs);
210 __m128 xabs4 = _mm_mul_ps(xabs2, xabs2);
212 xabs, _mm_set1_ps(0.0066700901f)),
213 xabs, _mm_set1_ps(-0.0170881256f)),
214 xabs, _mm_set1_ps( 0.0308918810f));
216 xabs, _mm_set1_ps(0.0889789874f)),
217 xabs, _mm_set1_ps(-0.2145988016f)),
218 xabs, _mm_set1_ps( 1.5707963050f));
220 __m128 result =
vec_madd(hi, xabs4, lo);
225 vec_nmsub(t1, result, _mm_set1_ps(3.1415926535898f)),
235 #define _SINCOS_CC0 -0.0013602249f
236 #define _SINCOS_CC1 0.0416566950f
237 #define _SINCOS_CC2 -0.4999990225f
238 #define _SINCOS_SC0 -0.0001950727f
239 #define _SINCOS_SC1 0.0083320758f
240 #define _SINCOS_SC2 -0.1666665247f
242 #define _SINCOS_KC1 1.57079625129f
243 #define _SINCOS_KC2 7.54978995489e-8f
249 xl =
vec_mul(x, _mm_set1_ps(0.63661977236f));
307 xl =
vec_mul(x, _mm_set1_ps(0.63661977236f));
319 __m128i temp = _mm_add_epi32(_mm_set1_epi32(1),(__m128i &)offsetSin);
320 offsetCos = (__m128 &)temp;
367 #ifdef _VECTORMATH_DEBUG
370 namespace Vectormath {
711 VECTORMATH_FORCE_INLINE void storeHalfFloats(
const Vector3 &vec0,
const Vector3 &vec1,
const Vector3 &vec2,
const Vector3 &vec3,
const Vector3 &vec4,
const Vector3 &vec5,
const Vector3 &vec6,
const Vector3 &vec7,
vec_ushort8 * threeQuads );
713 #ifdef _VECTORMATH_DEBUG
1054 #ifdef _VECTORMATH_DEBUG
1309 #ifdef _VECTORMATH_DEBUG
1630 VECTORMATH_FORCE_INLINE const Quat
squad(
float t,
const Quat &unitQuat0,
const Quat &unitQuat1,
const Quat &unitQuat2,
const Quat &unitQuat3 );
1650 #ifdef _VECTORMATH_DEBUG
1916 #ifdef _VECTORMATH_DEBUG
2267 #ifdef _VECTORMATH_DEBUG
2524 #ifdef _VECTORMATH_DEBUG