Bullet Collision Detection & Physics Library
btVector3.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 
17 #ifndef BT_VECTOR3_H
18 #define BT_VECTOR3_H
19 
20 //#include <stdint.h>
21 #include "btScalar.h"
22 #include "btMinMax.h"
23 #include "btAlignedAllocator.h"
24 
25 #ifdef BT_USE_DOUBLE_PRECISION
26 #define btVector3Data btVector3DoubleData
27 #define btVector3DataName "btVector3DoubleData"
28 #else
29 #define btVector3Data btVector3FloatData
30 #define btVector3DataName "btVector3FloatData"
31 #endif //BT_USE_DOUBLE_PRECISION
32 
33 #if defined BT_USE_SSE
34 
35 //typedef uint32_t __m128i __attribute__ ((vector_size(16)));
36 
37 #ifdef _MSC_VER
38 #pragma warning(disable: 4556) // value of intrinsic immediate argument '4294967239' is out of range '0 - 255'
39 #endif
40 
41 
42 #define BT_SHUFFLE(x,y,z,w) ((w)<<6 | (z)<<4 | (y)<<2 | (x))
43 //#define bt_pshufd_ps( _a, _mask ) (__m128) _mm_shuffle_epi32((__m128i)(_a), (_mask) )
44 #define bt_pshufd_ps( _a, _mask ) _mm_shuffle_ps((_a), (_a), (_mask) )
45 #define bt_splat3_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i, 3) )
46 #define bt_splat_ps( _a, _i ) bt_pshufd_ps((_a), BT_SHUFFLE(_i,_i,_i,_i) )
47 
48 #define btv3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
49 #define btvAbsMask (_mm_set_epi32( 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF))
50 #define btvFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF))
51 #define btv3AbsfMask btCastiTo128f(btv3AbsiMask)
52 #define btvFFF0fMask btCastiTo128f(btvFFF0Mask)
53 #define btvxyzMaskf btvFFF0fMask
54 #define btvAbsfMask btCastiTo128f(btvAbsMask)
55 
56 
57 
58 const __m128 ATTRIBUTE_ALIGNED16(btvMzeroMask) = {-0.0f, -0.0f, -0.0f, -0.0f};
59 const __m128 ATTRIBUTE_ALIGNED16(v1110) = {1.0f, 1.0f, 1.0f, 0.0f};
60 const __m128 ATTRIBUTE_ALIGNED16(vHalf) = {0.5f, 0.5f, 0.5f, 0.5f};
61 const __m128 ATTRIBUTE_ALIGNED16(v1_5) = {1.5f, 1.5f, 1.5f, 1.5f};
62 
63 #endif
64 
65 #ifdef BT_USE_NEON
66 
67 const float32x4_t ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
68 const int32x4_t ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0};
69 const int32x4_t ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
70 const int32x4_t ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
71 
72 #endif
73 
79 {
80 public:
81 
83 
84 #if defined (__SPU__) && defined (__CELLOS_LV2__)
85  btScalar m_floats[4];
86 public:
87  SIMD_FORCE_INLINE const vec_float4& get128() const
88  {
89  return *((const vec_float4*)&m_floats[0]);
90  }
91 public:
92 #else //__CELLOS_LV2__ __SPU__
93  #if defined (BT_USE_SSE) || defined(BT_USE_NEON) // _WIN32 || ARM
94  union {
95  btSimdFloat4 mVec128;
96  btScalar m_floats[4];
97  };
98  SIMD_FORCE_INLINE btSimdFloat4 get128() const
99  {
100  return mVec128;
101  }
102  SIMD_FORCE_INLINE void set128(btSimdFloat4 v128)
103  {
104  mVec128 = v128;
105  }
106  #else
107  btScalar m_floats[4];
108  #endif
109 #endif //__CELLOS_LV2__ __SPU__
110 
111  public:
112 
115  {
116 
117  }
118 
119 
120 
126  SIMD_FORCE_INLINE btVector3(const btScalar& _x, const btScalar& _y, const btScalar& _z)
127  {
128  m_floats[0] = _x;
129  m_floats[1] = _y;
130  m_floats[2] = _z;
131  m_floats[3] = btScalar(0.f);
132  }
133 
134 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) )|| defined (BT_USE_NEON)
135  // Set Vector
136  SIMD_FORCE_INLINE btVector3( btSimdFloat4 v)
137  {
138  mVec128 = v;
139  }
140 
141  // Copy constructor
143  {
144  mVec128 = rhs.mVec128;
145  }
146 
147  // Assignment Operator
149  operator=(const btVector3& v)
150  {
151  mVec128 = v.mVec128;
152 
153  return *this;
154  }
155 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
156 
160  {
161 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
162  mVec128 = _mm_add_ps(mVec128, v.mVec128);
163 #elif defined(BT_USE_NEON)
164  mVec128 = vaddq_f32(mVec128, v.mVec128);
165 #else
166  m_floats[0] += v.m_floats[0];
167  m_floats[1] += v.m_floats[1];
168  m_floats[2] += v.m_floats[2];
169 #endif
170  return *this;
171  }
172 
173 
177  {
178 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
179  mVec128 = _mm_sub_ps(mVec128, v.mVec128);
180 #elif defined(BT_USE_NEON)
181  mVec128 = vsubq_f32(mVec128, v.mVec128);
182 #else
183  m_floats[0] -= v.m_floats[0];
184  m_floats[1] -= v.m_floats[1];
185  m_floats[2] -= v.m_floats[2];
186 #endif
187  return *this;
188  }
189 
193  {
194 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
195  __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
196  vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
197  mVec128 = _mm_mul_ps(mVec128, vs);
198 #elif defined(BT_USE_NEON)
199  mVec128 = vmulq_n_f32(mVec128, s);
200 #else
201  m_floats[0] *= s;
202  m_floats[1] *= s;
203  m_floats[2] *= s;
204 #endif
205  return *this;
206  }
207 
211  {
212  btFullAssert(s != btScalar(0.0));
213 
214 #if 0 //defined(BT_USE_SSE_IN_API)
215 // this code is not faster !
216  __m128 vs = _mm_load_ss(&s);
217  vs = _mm_div_ss(v1110, vs);
218  vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
219 
220  mVec128 = _mm_mul_ps(mVec128, vs);
221 
222  return *this;
223 #else
224  return *this *= btScalar(1.0) / s;
225 #endif
226  }
227 
231  {
232 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
233  __m128 vd = _mm_mul_ps(mVec128, v.mVec128);
234  __m128 z = _mm_movehl_ps(vd, vd);
235  __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
236  vd = _mm_add_ss(vd, y);
237  vd = _mm_add_ss(vd, z);
238  return _mm_cvtss_f32(vd);
239 #elif defined(BT_USE_NEON)
240  float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
241  float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
242  x = vadd_f32(x, vget_high_f32(vd));
243  return vget_lane_f32(x, 0);
244 #else
245  return m_floats[0] * v.m_floats[0] +
246  m_floats[1] * v.m_floats[1] +
247  m_floats[2] * v.m_floats[2];
248 #endif
249  }
250 
253  {
254  return dot(*this);
255  }
256 
259  {
260  return btSqrt(length2());
261  }
262 
265  SIMD_FORCE_INLINE btScalar distance2(const btVector3& v) const;
266 
269  SIMD_FORCE_INLINE btScalar distance(const btVector3& v) const;
270 
272  {
273  btVector3 absVec = this->absolute();
274  int maxIndex = absVec.maxAxis();
275  if (absVec[maxIndex]>0)
276  {
277  *this /= absVec[maxIndex];
278  return *this /= length();
279  }
280  setValue(1,0,0);
281  return *this;
282  }
283 
287  {
288 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
289  // dot product first
290  __m128 vd = _mm_mul_ps(mVec128, mVec128);
291  __m128 z = _mm_movehl_ps(vd, vd);
292  __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
293  vd = _mm_add_ss(vd, y);
294  vd = _mm_add_ss(vd, z);
295 
296  #if 0
297  vd = _mm_sqrt_ss(vd);
298  vd = _mm_div_ss(v1110, vd);
299  vd = bt_splat_ps(vd, 0x80);
300  mVec128 = _mm_mul_ps(mVec128, vd);
301  #else
302 
303  // NR step 1/sqrt(x) - vd is x, y is output
304  y = _mm_rsqrt_ss(vd); // estimate
305 
306  // one step NR
307  z = v1_5;
308  vd = _mm_mul_ss(vd, vHalf); // vd * 0.5
309  //x2 = vd;
310  vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0
311  vd = _mm_mul_ss(vd, y); // vd * 0.5 * y0 * y0
312  z = _mm_sub_ss(z, vd); // 1.5 - vd * 0.5 * y0 * y0
313 
314  y = _mm_mul_ss(y, z); // y0 * (1.5 - vd * 0.5 * y0 * y0)
315 
316  y = bt_splat_ps(y, 0x80);
317  mVec128 = _mm_mul_ps(mVec128, y);
318 
319  #endif
320 
321 
322  return *this;
323 #else
324  return *this /= length();
325 #endif
326  }
327 
329  SIMD_FORCE_INLINE btVector3 normalized() const;
330 
334  SIMD_FORCE_INLINE btVector3 rotate( const btVector3& wAxis, const btScalar angle ) const;
335 
339  {
340  btScalar s = btSqrt(length2() * v.length2());
341  btFullAssert(s != btScalar(0.0));
342  return btAcos(dot(v) / s);
343  }
344 
347  {
348 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
349  return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
350 #elif defined(BT_USE_NEON)
351  return btVector3(vabsq_f32(mVec128));
352 #else
353  return btVector3(
354  btFabs(m_floats[0]),
355  btFabs(m_floats[1]),
356  btFabs(m_floats[2]));
357 #endif
358  }
359 
363  {
364 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
365  __m128 T, V;
366 
367  T = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
368  V = bt_pshufd_ps(v.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
369 
370  V = _mm_mul_ps(V, mVec128);
371  T = _mm_mul_ps(T, v.mVec128);
372  V = _mm_sub_ps(V, T);
373 
374  V = bt_pshufd_ps(V, BT_SHUFFLE(1, 2, 0, 3));
375  return btVector3(V);
376 #elif defined(BT_USE_NEON)
377  float32x4_t T, V;
378  // form (Y, Z, X, _) of mVec128 and v.mVec128
379  float32x2_t Tlow = vget_low_f32(mVec128);
380  float32x2_t Vlow = vget_low_f32(v.mVec128);
381  T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
382  V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
383 
384  V = vmulq_f32(V, mVec128);
385  T = vmulq_f32(T, v.mVec128);
386  V = vsubq_f32(V, T);
387  Vlow = vget_low_f32(V);
388  // form (Y, Z, X, _);
389  V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
390  V = (float32x4_t)vandq_s32((int32x4_t)V, btvFFF0Mask);
391 
392  return btVector3(V);
393 #else
394  return btVector3(
395  m_floats[1] * v.m_floats[2] - m_floats[2] * v.m_floats[1],
396  m_floats[2] * v.m_floats[0] - m_floats[0] * v.m_floats[2],
397  m_floats[0] * v.m_floats[1] - m_floats[1] * v.m_floats[0]);
398 #endif
399  }
400 
402  {
403 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
404  // cross:
405  __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
406  __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3)); // (Y Z X 0)
407 
408  V = _mm_mul_ps(V, v1.mVec128);
409  T = _mm_mul_ps(T, v2.mVec128);
410  V = _mm_sub_ps(V, T);
411 
412  V = _mm_shuffle_ps(V, V, BT_SHUFFLE(1, 2, 0, 3));
413 
414  // dot:
415  V = _mm_mul_ps(V, mVec128);
416  __m128 z = _mm_movehl_ps(V, V);
417  __m128 y = _mm_shuffle_ps(V, V, 0x55);
418  V = _mm_add_ss(V, y);
419  V = _mm_add_ss(V, z);
420  return _mm_cvtss_f32(V);
421 
422 #elif defined(BT_USE_NEON)
423  // cross:
424  float32x4_t T, V;
425  // form (Y, Z, X, _) of mVec128 and v.mVec128
426  float32x2_t Tlow = vget_low_f32(v1.mVec128);
427  float32x2_t Vlow = vget_low_f32(v2.mVec128);
428  T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
429  V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
430 
431  V = vmulq_f32(V, v1.mVec128);
432  T = vmulq_f32(T, v2.mVec128);
433  V = vsubq_f32(V, T);
434  Vlow = vget_low_f32(V);
435  // form (Y, Z, X, _);
436  V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
437 
438  // dot:
439  V = vmulq_f32(mVec128, V);
440  float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
441  x = vadd_f32(x, vget_high_f32(V));
442  return vget_lane_f32(x, 0);
443 #else
444  return
445  m_floats[0] * (v1.m_floats[1] * v2.m_floats[2] - v1.m_floats[2] * v2.m_floats[1]) +
446  m_floats[1] * (v1.m_floats[2] * v2.m_floats[0] - v1.m_floats[0] * v2.m_floats[2]) +
447  m_floats[2] * (v1.m_floats[0] * v2.m_floats[1] - v1.m_floats[1] * v2.m_floats[0]);
448 #endif
449  }
450 
454  {
455  return m_floats[0] < m_floats[1] ? (m_floats[0] <m_floats[2] ? 0 : 2) : (m_floats[1] <m_floats[2] ? 1 : 2);
456  }
457 
461  {
462  return m_floats[0] < m_floats[1] ? (m_floats[1] <m_floats[2] ? 2 : 1) : (m_floats[0] <m_floats[2] ? 2 : 0);
463  }
464 
466  {
467  return absolute().minAxis();
468  }
469 
471  {
472  return absolute().maxAxis();
473  }
474 
475 
477  {
478 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
479  __m128 vrt = _mm_load_ss(&rt); // (rt 0 0 0)
480  btScalar s = btScalar(1.0) - rt;
481  __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
482  vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
483  __m128 r0 = _mm_mul_ps(v0.mVec128, vs);
484  vrt = bt_pshufd_ps(vrt, 0x80); // (rt rt rt 0.0)
485  __m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
486  __m128 tmp3 = _mm_add_ps(r0,r1);
487  mVec128 = tmp3;
488 #elif defined(BT_USE_NEON)
489  mVec128 = vsubq_f32(v1.mVec128, v0.mVec128);
490  mVec128 = vmulq_n_f32(mVec128, rt);
491  mVec128 = vaddq_f32(mVec128, v0.mVec128);
492 #else
493  btScalar s = btScalar(1.0) - rt;
494  m_floats[0] = s * v0.m_floats[0] + rt * v1.m_floats[0];
495  m_floats[1] = s * v0.m_floats[1] + rt * v1.m_floats[1];
496  m_floats[2] = s * v0.m_floats[2] + rt * v1.m_floats[2];
497  //don't do the unused w component
498  // m_co[3] = s * v0[3] + rt * v1[3];
499 #endif
500  }
501 
505  SIMD_FORCE_INLINE btVector3 lerp(const btVector3& v, const btScalar& t) const
506  {
507 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
508  __m128 vt = _mm_load_ss(&t); // (t 0 0 0)
509  vt = bt_pshufd_ps(vt, 0x80); // (rt rt rt 0.0)
510  __m128 vl = _mm_sub_ps(v.mVec128, mVec128);
511  vl = _mm_mul_ps(vl, vt);
512  vl = _mm_add_ps(vl, mVec128);
513 
514  return btVector3(vl);
515 #elif defined(BT_USE_NEON)
516  float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
517  vl = vmulq_n_f32(vl, t);
518  vl = vaddq_f32(vl, mVec128);
519 
520  return btVector3(vl);
521 #else
522  return
523  btVector3( m_floats[0] + (v.m_floats[0] - m_floats[0]) * t,
524  m_floats[1] + (v.m_floats[1] - m_floats[1]) * t,
525  m_floats[2] + (v.m_floats[2] - m_floats[2]) * t);
526 #endif
527  }
528 
532  {
533 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
534  mVec128 = _mm_mul_ps(mVec128, v.mVec128);
535 #elif defined(BT_USE_NEON)
536  mVec128 = vmulq_f32(mVec128, v.mVec128);
537 #else
538  m_floats[0] *= v.m_floats[0];
539  m_floats[1] *= v.m_floats[1];
540  m_floats[2] *= v.m_floats[2];
541 #endif
542  return *this;
543  }
544 
546  SIMD_FORCE_INLINE const btScalar& getX() const { return m_floats[0]; }
548  SIMD_FORCE_INLINE const btScalar& getY() const { return m_floats[1]; }
550  SIMD_FORCE_INLINE const btScalar& getZ() const { return m_floats[2]; }
552  SIMD_FORCE_INLINE void setX(btScalar _x) { m_floats[0] = _x;};
554  SIMD_FORCE_INLINE void setY(btScalar _y) { m_floats[1] = _y;};
556  SIMD_FORCE_INLINE void setZ(btScalar _z) { m_floats[2] = _z;};
558  SIMD_FORCE_INLINE void setW(btScalar _w) { m_floats[3] = _w;};
560  SIMD_FORCE_INLINE const btScalar& x() const { return m_floats[0]; }
562  SIMD_FORCE_INLINE const btScalar& y() const { return m_floats[1]; }
564  SIMD_FORCE_INLINE const btScalar& z() const { return m_floats[2]; }
566  SIMD_FORCE_INLINE const btScalar& w() const { return m_floats[3]; }
567 
568  //SIMD_FORCE_INLINE btScalar& operator[](int i) { return (&m_floats[0])[i]; }
569  //SIMD_FORCE_INLINE const btScalar& operator[](int i) const { return (&m_floats[0])[i]; }
571  SIMD_FORCE_INLINE operator btScalar *() { return &m_floats[0]; }
572  SIMD_FORCE_INLINE operator const btScalar *() const { return &m_floats[0]; }
573 
574  SIMD_FORCE_INLINE bool operator==(const btVector3& other) const
575  {
576 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
577  return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
578 #else
579  return ((m_floats[3]==other.m_floats[3]) &&
580  (m_floats[2]==other.m_floats[2]) &&
581  (m_floats[1]==other.m_floats[1]) &&
582  (m_floats[0]==other.m_floats[0]));
583 #endif
584  }
585 
586  SIMD_FORCE_INLINE bool operator!=(const btVector3& other) const
587  {
588  return !(*this == other);
589  }
590 
594  SIMD_FORCE_INLINE void setMax(const btVector3& other)
595  {
596 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
597  mVec128 = _mm_max_ps(mVec128, other.mVec128);
598 #elif defined(BT_USE_NEON)
599  mVec128 = vmaxq_f32(mVec128, other.mVec128);
600 #else
601  btSetMax(m_floats[0], other.m_floats[0]);
602  btSetMax(m_floats[1], other.m_floats[1]);
603  btSetMax(m_floats[2], other.m_floats[2]);
604  btSetMax(m_floats[3], other.w());
605 #endif
606  }
607 
611  SIMD_FORCE_INLINE void setMin(const btVector3& other)
612  {
613 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
614  mVec128 = _mm_min_ps(mVec128, other.mVec128);
615 #elif defined(BT_USE_NEON)
616  mVec128 = vminq_f32(mVec128, other.mVec128);
617 #else
618  btSetMin(m_floats[0], other.m_floats[0]);
619  btSetMin(m_floats[1], other.m_floats[1]);
620  btSetMin(m_floats[2], other.m_floats[2]);
621  btSetMin(m_floats[3], other.w());
622 #endif
623  }
624 
625  SIMD_FORCE_INLINE void setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z)
626  {
627  m_floats[0]=_x;
628  m_floats[1]=_y;
629  m_floats[2]=_z;
630  m_floats[3] = btScalar(0.f);
631  }
632 
634  {
635 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
636 
637  __m128 V = _mm_and_ps(mVec128, btvFFF0fMask);
638  __m128 V0 = _mm_xor_ps(btvMzeroMask, V);
639  __m128 V2 = _mm_movelh_ps(V0, V);
640 
641  __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
642 
643  V0 = _mm_shuffle_ps(V0, V, 0xDB);
644  V2 = _mm_shuffle_ps(V2, V, 0xF9);
645 
646  v0->mVec128 = V0;
647  v1->mVec128 = V1;
648  v2->mVec128 = V2;
649 #else
650  v0->setValue(0. ,-z() ,y());
651  v1->setValue(z() ,0. ,-x());
652  v2->setValue(-y() ,x() ,0.);
653 #endif
654  }
655 
656  void setZero()
657  {
658 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
659  mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
660 #elif defined(BT_USE_NEON)
661  int32x4_t vi = vdupq_n_s32(0);
662  mVec128 = vreinterpretq_f32_s32(vi);
663 #else
664  setValue(btScalar(0.),btScalar(0.),btScalar(0.));
665 #endif
666  }
667 
668  SIMD_FORCE_INLINE bool isZero() const
669  {
670  return m_floats[0] == btScalar(0) && m_floats[1] == btScalar(0) && m_floats[2] == btScalar(0);
671  }
672 
674  {
675  return length2() < SIMD_EPSILON;
676  }
677 
678  SIMD_FORCE_INLINE void serialize(struct btVector3Data& dataOut) const;
679 
680  SIMD_FORCE_INLINE void deSerialize(const struct btVector3Data& dataIn);
681 
682  SIMD_FORCE_INLINE void serializeFloat(struct btVector3FloatData& dataOut) const;
683 
684  SIMD_FORCE_INLINE void deSerializeFloat(const struct btVector3FloatData& dataIn);
685 
686  SIMD_FORCE_INLINE void serializeDouble(struct btVector3DoubleData& dataOut) const;
687 
688  SIMD_FORCE_INLINE void deSerializeDouble(const struct btVector3DoubleData& dataIn);
689 
694  SIMD_FORCE_INLINE long maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
695 
700  SIMD_FORCE_INLINE long minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const;
701 
702  /* create a vector as btVector3( this->dot( btVector3 v0 ), this->dot( btVector3 v1), this->dot( btVector3 v2 )) */
703  SIMD_FORCE_INLINE btVector3 dot3( const btVector3 &v0, const btVector3 &v1, const btVector3 &v2 ) const
704  {
705 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
706 
707  __m128 a0 = _mm_mul_ps( v0.mVec128, this->mVec128 );
708  __m128 a1 = _mm_mul_ps( v1.mVec128, this->mVec128 );
709  __m128 a2 = _mm_mul_ps( v2.mVec128, this->mVec128 );
710  __m128 b0 = _mm_unpacklo_ps( a0, a1 );
711  __m128 b1 = _mm_unpackhi_ps( a0, a1 );
712  __m128 b2 = _mm_unpacklo_ps( a2, _mm_setzero_ps() );
713  __m128 r = _mm_movelh_ps( b0, b2 );
714  r = _mm_add_ps( r, _mm_movehl_ps( b2, b0 ));
715  a2 = _mm_and_ps( a2, btvxyzMaskf);
716  r = _mm_add_ps( r, btCastdTo128f (_mm_move_sd( btCastfTo128d(a2), btCastfTo128d(b1) )));
717  return btVector3(r);
718 
719 #elif defined(BT_USE_NEON)
720  static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
721  float32x4_t a0 = vmulq_f32( v0.mVec128, this->mVec128);
722  float32x4_t a1 = vmulq_f32( v1.mVec128, this->mVec128);
723  float32x4_t a2 = vmulq_f32( v2.mVec128, this->mVec128);
724  float32x2x2_t zLo = vtrn_f32( vget_high_f32(a0), vget_high_f32(a1));
725  a2 = (float32x4_t) vandq_u32((uint32x4_t) a2, xyzMask );
726  float32x2_t b0 = vadd_f32( vpadd_f32( vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0] );
727  float32x2_t b1 = vpadd_f32( vpadd_f32( vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
728  return btVector3( vcombine_f32(b0, b1) );
729 #else
730  return btVector3( dot(v0), dot(v1), dot(v2));
731 #endif
732  }
733 };
734 
737 operator+(const btVector3& v1, const btVector3& v2)
738 {
739 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
740  return btVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
741 #elif defined(BT_USE_NEON)
742  return btVector3(vaddq_f32(v1.mVec128, v2.mVec128));
743 #else
744  return btVector3(
745  v1.m_floats[0] + v2.m_floats[0],
746  v1.m_floats[1] + v2.m_floats[1],
747  v1.m_floats[2] + v2.m_floats[2]);
748 #endif
749 }
750 
753 operator*(const btVector3& v1, const btVector3& v2)
754 {
755 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
756  return btVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
757 #elif defined(BT_USE_NEON)
758  return btVector3(vmulq_f32(v1.mVec128, v2.mVec128));
759 #else
760  return btVector3(
761  v1.m_floats[0] * v2.m_floats[0],
762  v1.m_floats[1] * v2.m_floats[1],
763  v1.m_floats[2] * v2.m_floats[2]);
764 #endif
765 }
766 
769 operator-(const btVector3& v1, const btVector3& v2)
770 {
771 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))
772 
773  // without _mm_and_ps this code causes slowdown in Concave moving
774  __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
775  return btVector3(_mm_and_ps(r, btvFFF0fMask));
776 #elif defined(BT_USE_NEON)
777  float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
778  return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
779 #else
780  return btVector3(
781  v1.m_floats[0] - v2.m_floats[0],
782  v1.m_floats[1] - v2.m_floats[1],
783  v1.m_floats[2] - v2.m_floats[2]);
784 #endif
785 }
786 
790 {
791 #if (defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
792  __m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
793  return btVector3(_mm_and_ps(r, btvFFF0fMask));
794 #elif defined(BT_USE_NEON)
795  return btVector3((btSimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)btvMzeroMask));
796 #else
797  return btVector3(-v.m_floats[0], -v.m_floats[1], -v.m_floats[2]);
798 #endif
799 }
800 
803 operator*(const btVector3& v, const btScalar& s)
804 {
805 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
806  __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
807  vs = bt_pshufd_ps(vs, 0x80); // (S S S 0.0)
808  return btVector3(_mm_mul_ps(v.mVec128, vs));
809 #elif defined(BT_USE_NEON)
810  float32x4_t r = vmulq_n_f32(v.mVec128, s);
811  return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
812 #else
813  return btVector3(v.m_floats[0] * s, v.m_floats[1] * s, v.m_floats[2] * s);
814 #endif
815 }
816 
819 operator*(const btScalar& s, const btVector3& v)
820 {
821  return v * s;
822 }
823 
826 operator/(const btVector3& v, const btScalar& s)
827 {
828  btFullAssert(s != btScalar(0.0));
829 #if 0 //defined(BT_USE_SSE_IN_API)
830 // this code is not faster !
831  __m128 vs = _mm_load_ss(&s);
832  vs = _mm_div_ss(v1110, vs);
833  vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
834 
835  return btVector3(_mm_mul_ps(v.mVec128, vs));
836 #else
837  return v * (btScalar(1.0) / s);
838 #endif
839 }
840 
843 operator/(const btVector3& v1, const btVector3& v2)
844 {
845 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE))
846  __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
847  vec = _mm_and_ps(vec, btvFFF0fMask);
848  return btVector3(vec);
849 #elif defined(BT_USE_NEON)
850  float32x4_t x, y, v, m;
851 
852  x = v1.mVec128;
853  y = v2.mVec128;
854 
855  v = vrecpeq_f32(y); // v ~ 1/y
856  m = vrecpsq_f32(y, v); // m = (2-v*y)
857  v = vmulq_f32(v, m); // vv = v*m ~~ 1/y
858  m = vrecpsq_f32(y, v); // mm = (2-vv*y)
859  v = vmulq_f32(v, x); // x*vv
860  v = vmulq_f32(v, m); // (x*vv)*(2-vv*y) = x*(vv(2-vv*y)) ~~~ x/y
861 
862  return btVector3(v);
863 #else
864  return btVector3(
865  v1.m_floats[0] / v2.m_floats[0],
866  v1.m_floats[1] / v2.m_floats[1],
867  v1.m_floats[2] / v2.m_floats[2]);
868 #endif
869 }
870 
873 btDot(const btVector3& v1, const btVector3& v2)
874 {
875  return v1.dot(v2);
876 }
877 
878 
881 btDistance2(const btVector3& v1, const btVector3& v2)
882 {
883  return v1.distance2(v2);
884 }
885 
886 
889 btDistance(const btVector3& v1, const btVector3& v2)
890 {
891  return v1.distance(v2);
892 }
893 
896 btAngle(const btVector3& v1, const btVector3& v2)
897 {
898  return v1.angle(v2);
899 }
900 
903 btCross(const btVector3& v1, const btVector3& v2)
904 {
905  return v1.cross(v2);
906 }
907 
909 btTriple(const btVector3& v1, const btVector3& v2, const btVector3& v3)
910 {
911  return v1.triple(v2, v3);
912 }
913 
919 lerp(const btVector3& v1, const btVector3& v2, const btScalar& t)
920 {
921  return v1.lerp(v2, t);
922 }
923 
924 
925 
927 {
928  return (v - *this).length2();
929 }
930 
932 {
933  return (v - *this).length();
934 }
935 
937 {
938 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
939  btVector3 norm = *this;
940 
941  return norm.normalize();
942 #else
943  return *this / length();
944 #endif
945 }
946 
948 {
949  // wAxis must be a unit lenght vector
950 
951 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
952 
953  __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
954  btScalar ssin = btSin( _angle );
955  __m128 C = wAxis.cross( mVec128 ).mVec128;
956  O = _mm_and_ps(O, btvFFF0fMask);
957  btScalar scos = btCos( _angle );
958 
959  __m128 vsin = _mm_load_ss(&ssin); // (S 0 0 0)
960  __m128 vcos = _mm_load_ss(&scos); // (S 0 0 0)
961 
962  __m128 Y = bt_pshufd_ps(O, 0xC9); // (Y Z X 0)
963  __m128 Z = bt_pshufd_ps(O, 0xD2); // (Z X Y 0)
964  O = _mm_add_ps(O, Y);
965  vsin = bt_pshufd_ps(vsin, 0x80); // (S S S 0)
966  O = _mm_add_ps(O, Z);
967  vcos = bt_pshufd_ps(vcos, 0x80); // (S S S 0)
968 
969  vsin = vsin * C;
970  O = O * wAxis.mVec128;
971  __m128 X = mVec128 - O;
972 
973  O = O + vsin;
974  vcos = vcos * X;
975  O = O + vcos;
976 
977  return btVector3(O);
978 #else
979  btVector3 o = wAxis * wAxis.dot( *this );
980  btVector3 _x = *this - o;
981  btVector3 _y;
982 
983  _y = wAxis.cross( *this );
984 
985  return ( o + _x * btCos( _angle ) + _y * btSin( _angle ) );
986 #endif
987 }
988 
989 SIMD_FORCE_INLINE long btVector3::maxDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
990 {
991 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
992  #if defined _WIN32 || defined (BT_USE_SSE)
993  const long scalar_cutoff = 10;
994  long _maxdot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
995  #elif defined BT_USE_NEON
996  const long scalar_cutoff = 4;
997  extern long (*_maxdot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
998  #endif
999  if( array_count < scalar_cutoff )
1000 #else
1001 
1002 #endif//BT_USE_SSE || BT_USE_NEON
1003  {
1005  int i = 0;
1006  int ptIndex = -1;
1007  for( i = 0; i < array_count; i++ )
1008  {
1009  btScalar dot = array[i].dot(*this);
1010 
1011  if( dot > maxDot )
1012  {
1013  maxDot = dot;
1014  ptIndex = i;
1015  }
1016  }
1017 
1018  dotOut = maxDot;
1019  return ptIndex;
1020  }
1021 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
1022  return _maxdot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1023 #endif
1024 }
1025 
1026 SIMD_FORCE_INLINE long btVector3::minDot( const btVector3 *array, long array_count, btScalar &dotOut ) const
1027 {
1028 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
1029  #if defined BT_USE_SSE
1030  const long scalar_cutoff = 10;
1031  long _mindot_large( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1032  #elif defined BT_USE_NEON
1033  const long scalar_cutoff = 4;
1034  extern long (*_mindot_large)( const float *array, const float *vec, unsigned long array_count, float *dotOut );
1035  #else
1036  #error unhandled arch!
1037  #endif
1038 
1039  if( array_count < scalar_cutoff )
1040 #endif//BT_USE_SSE || BT_USE_NEON
1041  {
1043  int i = 0;
1044  int ptIndex = -1;
1045 
1046  for( i = 0; i < array_count; i++ )
1047  {
1048  btScalar dot = array[i].dot(*this);
1049 
1050  if( dot < minDot )
1051  {
1052  minDot = dot;
1053  ptIndex = i;
1054  }
1055  }
1056 
1057  dotOut = minDot;
1058 
1059  return ptIndex;
1060  }
1061 #if defined (BT_USE_SSE) || defined (BT_USE_NEON)
1062  return _mindot_large( (float*) array, (float*) &m_floats[0], array_count, &dotOut );
1063 #endif
1064 }
1065 
1066 
1067 class btVector4 : public btVector3
1068 {
1069 public:
1070 
1072 
1073 
1074  SIMD_FORCE_INLINE btVector4(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1075  : btVector3(_x,_y,_z)
1076  {
1077  m_floats[3] = _w;
1078  }
1079 
1080 #if (defined (BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined (BT_USE_NEON)
1081  SIMD_FORCE_INLINE btVector4(const btSimdFloat4 vec)
1082  {
1083  mVec128 = vec;
1084  }
1085 
1087  {
1088  mVec128 = rhs.mVec128;
1089  }
1090 
1092  operator=(const btVector4& v)
1093  {
1094  mVec128 = v.mVec128;
1095  return *this;
1096  }
1097 #endif // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON)
1098 
1100  {
1101 #if defined(BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
1102  return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
1103 #elif defined(BT_USE_NEON)
1104  return btVector4(vabsq_f32(mVec128));
1105 #else
1106  return btVector4(
1107  btFabs(m_floats[0]),
1108  btFabs(m_floats[1]),
1109  btFabs(m_floats[2]),
1110  btFabs(m_floats[3]));
1111 #endif
1112  }
1113 
1114 
1115  btScalar getW() const { return m_floats[3];}
1116 
1117 
1119  {
1120  int maxIndex = -1;
1121  btScalar maxVal = btScalar(-BT_LARGE_FLOAT);
1122  if (m_floats[0] > maxVal)
1123  {
1124  maxIndex = 0;
1125  maxVal = m_floats[0];
1126  }
1127  if (m_floats[1] > maxVal)
1128  {
1129  maxIndex = 1;
1130  maxVal = m_floats[1];
1131  }
1132  if (m_floats[2] > maxVal)
1133  {
1134  maxIndex = 2;
1135  maxVal =m_floats[2];
1136  }
1137  if (m_floats[3] > maxVal)
1138  {
1139  maxIndex = 3;
1140  maxVal = m_floats[3];
1141  }
1142 
1143  return maxIndex;
1144  }
1145 
1146 
1148  {
1149  int minIndex = -1;
1150  btScalar minVal = btScalar(BT_LARGE_FLOAT);
1151  if (m_floats[0] < minVal)
1152  {
1153  minIndex = 0;
1154  minVal = m_floats[0];
1155  }
1156  if (m_floats[1] < minVal)
1157  {
1158  minIndex = 1;
1159  minVal = m_floats[1];
1160  }
1161  if (m_floats[2] < minVal)
1162  {
1163  minIndex = 2;
1164  minVal =m_floats[2];
1165  }
1166  if (m_floats[3] < minVal)
1167  {
1168  minIndex = 3;
1169  minVal = m_floats[3];
1170  }
1171 
1172  return minIndex;
1173  }
1174 
1175 
1177  {
1178  return absolute4().maxAxis4();
1179  }
1180 
1181 
1182 
1183 
1191 /* void getValue(btScalar *m) const
1192  {
1193  m[0] = m_floats[0];
1194  m[1] = m_floats[1];
1195  m[2] =m_floats[2];
1196  }
1197 */
1204  SIMD_FORCE_INLINE void setValue(const btScalar& _x, const btScalar& _y, const btScalar& _z,const btScalar& _w)
1205  {
1206  m_floats[0]=_x;
1207  m_floats[1]=_y;
1208  m_floats[2]=_z;
1209  m_floats[3]=_w;
1210  }
1211 
1212 
1213 };
1214 
1215 
1217 SIMD_FORCE_INLINE void btSwapScalarEndian(const btScalar& sourceVal, btScalar& destVal)
1218 {
1219  #ifdef BT_USE_DOUBLE_PRECISION
1220  unsigned char* dest = (unsigned char*) &destVal;
1221  unsigned char* src = (unsigned char*) &sourceVal;
1222  dest[0] = src[7];
1223  dest[1] = src[6];
1224  dest[2] = src[5];
1225  dest[3] = src[4];
1226  dest[4] = src[3];
1227  dest[5] = src[2];
1228  dest[6] = src[1];
1229  dest[7] = src[0];
1230 #else
1231  unsigned char* dest = (unsigned char*) &destVal;
1232  unsigned char* src = (unsigned char*) &sourceVal;
1233  dest[0] = src[3];
1234  dest[1] = src[2];
1235  dest[2] = src[1];
1236  dest[3] = src[0];
1237 #endif //BT_USE_DOUBLE_PRECISION
1238 }
1241 {
1242  for (int i=0;i<4;i++)
1243  {
1244  btSwapScalarEndian(sourceVec[i],destVec[i]);
1245  }
1246 
1247 }
1248 
1251 {
1252 
1253  btVector3 swappedVec;
1254  for (int i=0;i<4;i++)
1255  {
1256  btSwapScalarEndian(vector[i],swappedVec[i]);
1257  }
1258  vector = swappedVec;
1259 }
1260 
1261 template <class T>
1262 SIMD_FORCE_INLINE void btPlaneSpace1 (const T& n, T& p, T& q)
1263 {
1264  if (btFabs(n[2]) > SIMDSQRT12) {
1265  // choose p in y-z plane
1266  btScalar a = n[1]*n[1] + n[2]*n[2];
1267  btScalar k = btRecipSqrt (a);
1268  p[0] = 0;
1269  p[1] = -n[2]*k;
1270  p[2] = n[1]*k;
1271  // set q = n x p
1272  q[0] = a*k;
1273  q[1] = -n[0]*p[2];
1274  q[2] = n[0]*p[1];
1275  }
1276  else {
1277  // choose p in x-y plane
1278  btScalar a = n[0]*n[0] + n[1]*n[1];
1279  btScalar k = btRecipSqrt (a);
1280  p[0] = -n[1]*k;
1281  p[1] = n[0]*k;
1282  p[2] = 0;
1283  // set q = n x p
1284  q[0] = -n[2]*p[1];
1285  q[1] = n[2]*p[0];
1286  q[2] = a*k;
1287  }
1288 }
1289 
1290 
1292 {
1293  float m_floats[4];
1294 };
1295 
1297 {
1298  double m_floats[4];
1299 
1300 };
1301 
1303 {
1305  for (int i=0;i<4;i++)
1306  dataOut.m_floats[i] = float(m_floats[i]);
1307 }
1308 
1310 {
1311  for (int i=0;i<4;i++)
1312  m_floats[i] = btScalar(dataIn.m_floats[i]);
1313 }
1314 
1315 
1317 {
1319  for (int i=0;i<4;i++)
1320  dataOut.m_floats[i] = double(m_floats[i]);
1321 }
1322 
1324 {
1325  for (int i=0;i<4;i++)
1326  m_floats[i] = btScalar(dataIn.m_floats[i]);
1327 }
1328 
1329 
1331 {
1333  for (int i=0;i<4;i++)
1334  dataOut.m_floats[i] = m_floats[i];
1335 }
1336 
1338 {
1339  for (int i=0;i<4;i++)
1340  m_floats[i] = dataIn.m_floats[i];
1341 }
1342 
1343 #endif //BT_VECTOR3_H