Bullet Collision Detection & Physics Library
btQuaternion.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 
17 #ifndef BT_SIMD__QUATERNION_H_
18 #define BT_SIMD__QUATERNION_H_
19 
20 
21 #include "btVector3.h"
22 #include "btQuadWord.h"
23 
24 
25 
26 
27 
28 #ifdef BT_USE_SSE
29 
30 const __m128 ATTRIBUTE_ALIGNED16(vOnes) = {1.0f, 1.0f, 1.0f, 1.0f};
31 
32 #endif
33 
34 #if defined(BT_USE_SSE) || defined(BT_USE_NEON)
35 
36 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vQInv) = {-0.0f, -0.0f, -0.0f, +0.0f};
37 const btSimdFloat4 ATTRIBUTE_ALIGNED16(vPPPM) = {+0.0f, +0.0f, +0.0f, -0.0f};
38 
39 #endif
40 
42 class btQuaternion : public btQuadWord {
43 public:
46 
47 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
48  // Set Vector
49  SIMD_FORCE_INLINE btQuaternion(const btSimdFloat4 vec)
50  {
51  mVec128 = vec;
52  }
53 
54  // Copy constructor
56  {
57  mVec128 = rhs.mVec128;
58  }
59 
60  // Assignment Operator
62  operator=(const btQuaternion& v)
63  {
64  mVec128 = v.mVec128;
65 
66  return *this;
67  }
68 
69 #endif
70 
71  // template <typename btScalar>
72  // explicit Quaternion(const btScalar *v) : Tuple4<btScalar>(v) {}
74  btQuaternion(const btScalar& _x, const btScalar& _y, const btScalar& _z, const btScalar& _w)
75  : btQuadWord(_x, _y, _z, _w)
76  {}
80  btQuaternion(const btVector3& _axis, const btScalar& _angle)
81  {
82  setRotation(_axis, _angle);
83  }
88  btQuaternion(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
89  {
90 #ifndef BT_EULER_DEFAULT_ZYX
91  setEuler(yaw, pitch, roll);
92 #else
93  setEulerZYX(yaw, pitch, roll);
94 #endif
95  }
99  void setRotation(const btVector3& axis, const btScalar& _angle)
100  {
101  btScalar d = axis.length();
102  btAssert(d != btScalar(0.0));
103  btScalar s = btSin(_angle * btScalar(0.5)) / d;
104  setValue(axis.x() * s, axis.y() * s, axis.z() * s,
105  btCos(_angle * btScalar(0.5)));
106  }
111  void setEuler(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
112  {
113  btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
114  btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
115  btScalar halfRoll = btScalar(roll) * btScalar(0.5);
116  btScalar cosYaw = btCos(halfYaw);
117  btScalar sinYaw = btSin(halfYaw);
118  btScalar cosPitch = btCos(halfPitch);
119  btScalar sinPitch = btSin(halfPitch);
120  btScalar cosRoll = btCos(halfRoll);
121  btScalar sinRoll = btSin(halfRoll);
122  setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
123  cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
124  sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
125  cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
126  }
131  void setEulerZYX(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
132  {
133  btScalar halfYaw = btScalar(yaw) * btScalar(0.5);
134  btScalar halfPitch = btScalar(pitch) * btScalar(0.5);
135  btScalar halfRoll = btScalar(roll) * btScalar(0.5);
136  btScalar cosYaw = btCos(halfYaw);
137  btScalar sinYaw = btSin(halfYaw);
138  btScalar cosPitch = btCos(halfPitch);
139  btScalar sinPitch = btSin(halfPitch);
140  btScalar cosRoll = btCos(halfRoll);
141  btScalar sinRoll = btSin(halfRoll);
142  setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw, //x
143  cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw, //y
144  cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw, //z
145  cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw); //formerly yzx
146  }
150  {
151 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
152  mVec128 = _mm_add_ps(mVec128, q.mVec128);
153 #elif defined(BT_USE_NEON)
154  mVec128 = vaddq_f32(mVec128, q.mVec128);
155 #else
156  m_floats[0] += q.x();
157  m_floats[1] += q.y();
158  m_floats[2] += q.z();
159  m_floats[3] += q.m_floats[3];
160 #endif
161  return *this;
162  }
163 
167  {
168 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
169  mVec128 = _mm_sub_ps(mVec128, q.mVec128);
170 #elif defined(BT_USE_NEON)
171  mVec128 = vsubq_f32(mVec128, q.mVec128);
172 #else
173  m_floats[0] -= q.x();
174  m_floats[1] -= q.y();
175  m_floats[2] -= q.z();
176  m_floats[3] -= q.m_floats[3];
177 #endif
178  return *this;
179  }
180 
184  {
185 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
186  __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
187  vs = bt_pshufd_ps(vs, 0); // (S S S S)
188  mVec128 = _mm_mul_ps(mVec128, vs);
189 #elif defined(BT_USE_NEON)
190  mVec128 = vmulq_n_f32(mVec128, s);
191 #else
192  m_floats[0] *= s;
193  m_floats[1] *= s;
194  m_floats[2] *= s;
195  m_floats[3] *= s;
196 #endif
197  return *this;
198  }
199 
204  {
205 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
206  __m128 vQ2 = q.get128();
207 
208  __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
209  __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
210 
211  A1 = A1 * B1;
212 
213  __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
214  __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
215 
216  A2 = A2 * B2;
217 
218  B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
219  B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
220 
221  B1 = B1 * B2; // A3 *= B3
222 
223  mVec128 = bt_splat_ps(mVec128, 3); // A0
224  mVec128 = mVec128 * vQ2; // A0 * B0
225 
226  A1 = A1 + A2; // AB12
227  mVec128 = mVec128 - B1; // AB03 = AB0 - AB3
228  A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
229  mVec128 = mVec128+ A1; // AB03 + AB12
230 
231 #elif defined(BT_USE_NEON)
232 
233  float32x4_t vQ1 = mVec128;
234  float32x4_t vQ2 = q.get128();
235  float32x4_t A0, A1, B1, A2, B2, A3, B3;
236  float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
237 
238  {
239  float32x2x2_t tmp;
240  tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
241  vQ1zx = tmp.val[0];
242 
243  tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
244  vQ2zx = tmp.val[0];
245  }
246  vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
247 
248  vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
249 
250  vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
251  vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
252 
253  A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
254  B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
255 
256  A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
257  B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
258 
259  A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
260  B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
261 
262  A1 = vmulq_f32(A1, B1);
263  A2 = vmulq_f32(A2, B2);
264  A3 = vmulq_f32(A3, B3); // A3 *= B3
265  A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
266 
267  A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
268  A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
269 
270  // change the sign of the last element
271  A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
272  A0 = vaddq_f32(A0, A1); // AB03 + AB12
273 
274  mVec128 = A0;
275 #else
276  setValue(
277  m_floats[3] * q.x() + m_floats[0] * q.m_floats[3] + m_floats[1] * q.z() - m_floats[2] * q.y(),
278  m_floats[3] * q.y() + m_floats[1] * q.m_floats[3] + m_floats[2] * q.x() - m_floats[0] * q.z(),
279  m_floats[3] * q.z() + m_floats[2] * q.m_floats[3] + m_floats[0] * q.y() - m_floats[1] * q.x(),
280  m_floats[3] * q.m_floats[3] - m_floats[0] * q.x() - m_floats[1] * q.y() - m_floats[2] * q.z());
281 #endif
282  return *this;
283  }
286  btScalar dot(const btQuaternion& q) const
287  {
288 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
289  __m128 vd;
290 
291  vd = _mm_mul_ps(mVec128, q.mVec128);
292 
293  __m128 t = _mm_movehl_ps(vd, vd);
294  vd = _mm_add_ps(vd, t);
295  t = _mm_shuffle_ps(vd, vd, 0x55);
296  vd = _mm_add_ss(vd, t);
297 
298  return _mm_cvtss_f32(vd);
299 #elif defined(BT_USE_NEON)
300  float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
301  float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
302  x = vpadd_f32(x, x);
303  return vget_lane_f32(x, 0);
304 #else
305  return m_floats[0] * q.x() +
306  m_floats[1] * q.y() +
307  m_floats[2] * q.z() +
308  m_floats[3] * q.m_floats[3];
309 #endif
310  }
311 
314  {
315  return dot(*this);
316  }
317 
319  btScalar length() const
320  {
321  return btSqrt(length2());
322  }
323 
327  {
328 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
329  __m128 vd;
330 
331  vd = _mm_mul_ps(mVec128, mVec128);
332 
333  __m128 t = _mm_movehl_ps(vd, vd);
334  vd = _mm_add_ps(vd, t);
335  t = _mm_shuffle_ps(vd, vd, 0x55);
336  vd = _mm_add_ss(vd, t);
337 
338  vd = _mm_sqrt_ss(vd);
339  vd = _mm_div_ss(vOnes, vd);
340  vd = bt_pshufd_ps(vd, 0); // splat
341  mVec128 = _mm_mul_ps(mVec128, vd);
342 
343  return *this;
344 #else
345  return *this /= length();
346 #endif
347  }
348 
352  operator*(const btScalar& s) const
353  {
354 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
355  __m128 vs = _mm_load_ss(&s); // (S 0 0 0)
356  vs = bt_pshufd_ps(vs, 0x00); // (S S S S)
357 
358  return btQuaternion(_mm_mul_ps(mVec128, vs));
359 #elif defined(BT_USE_NEON)
360  return btQuaternion(vmulq_n_f32(mVec128, s));
361 #else
362  return btQuaternion(x() * s, y() * s, z() * s, m_floats[3] * s);
363 #endif
364  }
365 
369  {
370  btAssert(s != btScalar(0.0));
371  return *this * (btScalar(1.0) / s);
372  }
373 
377  {
378  btAssert(s != btScalar(0.0));
379  return *this *= btScalar(1.0) / s;
380  }
381 
384  {
385  return *this / length();
386  }
389  btScalar angle(const btQuaternion& q) const
390  {
391  btScalar s = btSqrt(length2() * q.length2());
392  btAssert(s != btScalar(0.0));
393  return btAcos(dot(q) / s);
394  }
397  {
398  btScalar s = btScalar(2.) * btAcos(m_floats[3]);
399  return s;
400  }
401 
404  {
405  btScalar s_squared = 1.f-m_floats[3]*m_floats[3];
406 
407  if (s_squared < btScalar(10.) * SIMD_EPSILON) //Check for divide by zero
408  return btVector3(1.0, 0.0, 0.0); // Arbitrary
409  btScalar s = 1.f/btSqrt(s_squared);
410  return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
411  }
412 
415  {
416 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
417  return btQuaternion(_mm_xor_ps(mVec128, vQInv));
418 #elif defined(BT_USE_NEON)
419  return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
420 #else
421  return btQuaternion(-m_floats[0], -m_floats[1], -m_floats[2], m_floats[3]);
422 #endif
423  }
424 
428  operator+(const btQuaternion& q2) const
429  {
430 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
431  return btQuaternion(_mm_add_ps(mVec128, q2.mVec128));
432 #elif defined(BT_USE_NEON)
433  return btQuaternion(vaddq_f32(mVec128, q2.mVec128));
434 #else
435  const btQuaternion& q1 = *this;
436  return btQuaternion(q1.x() + q2.x(), q1.y() + q2.y(), q1.z() + q2.z(), q1.m_floats[3] + q2.m_floats[3]);
437 #endif
438  }
439 
443  operator-(const btQuaternion& q2) const
444  {
445 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
446  return btQuaternion(_mm_sub_ps(mVec128, q2.mVec128));
447 #elif defined(BT_USE_NEON)
448  return btQuaternion(vsubq_f32(mVec128, q2.mVec128));
449 #else
450  const btQuaternion& q1 = *this;
451  return btQuaternion(q1.x() - q2.x(), q1.y() - q2.y(), q1.z() - q2.z(), q1.m_floats[3] - q2.m_floats[3]);
452 #endif
453  }
454 
458  {
459 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
460  return btQuaternion(_mm_xor_ps(mVec128, btvMzeroMask));
461 #elif defined(BT_USE_NEON)
462  return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
463 #else
464  const btQuaternion& q2 = *this;
465  return btQuaternion( - q2.x(), - q2.y(), - q2.z(), - q2.m_floats[3]);
466 #endif
467  }
470  {
471  btQuaternion diff,sum;
472  diff = *this - qd;
473  sum = *this + qd;
474  if( diff.dot(diff) > sum.dot(sum) )
475  return qd;
476  return (-qd);
477  }
478 
481  {
482  btQuaternion diff,sum;
483  diff = *this - qd;
484  sum = *this + qd;
485  if( diff.dot(diff) < sum.dot(sum) )
486  return qd;
487  return (-qd);
488  }
489 
490 
495  btQuaternion slerp(const btQuaternion& q, const btScalar& t) const
496  {
497  btScalar magnitude = btSqrt(length2() * q.length2());
498  btAssert(magnitude > btScalar(0));
499 
500  btScalar product = dot(q) / magnitude;
501  if (btFabs(product) != btScalar(1))
502  {
503  // Take care of long angle case see http://en.wikipedia.org/wiki/Slerp
504  const btScalar sign = (product < 0) ? btScalar(-1) : btScalar(1);
505 
506  const btScalar theta = btAcos(sign * product);
507  const btScalar s1 = btSin(sign * t * theta);
508  const btScalar d = btScalar(1.0) / btSin(theta);
509  const btScalar s0 = btSin((btScalar(1.0) - t) * theta);
510 
511  return btQuaternion(
512  (m_floats[0] * s0 + q.x() * s1) * d,
513  (m_floats[1] * s0 + q.y() * s1) * d,
514  (m_floats[2] * s0 + q.z() * s1) * d,
515  (m_floats[3] * s0 + q.m_floats[3] * s1) * d);
516  }
517  else
518  {
519  return *this;
520  }
521  }
522 
523  static const btQuaternion& getIdentity()
524  {
525  static const btQuaternion identityQuat(btScalar(0.),btScalar(0.),btScalar(0.),btScalar(1.));
526  return identityQuat;
527  }
528 
529  SIMD_FORCE_INLINE const btScalar& getW() const { return m_floats[3]; }
530 
531 
532 };
533 
534 
535 
536 
537 
540 operator*(const btQuaternion& q1, const btQuaternion& q2)
541 {
542 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
543  __m128 vQ1 = q1.get128();
544  __m128 vQ2 = q2.get128();
545  __m128 A0, A1, B1, A2, B2;
546 
547  A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x // vtrn
548  B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X // vdup vext
549 
550  A1 = A1 * B1;
551 
552  A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1)); // Y Z X Y // vext
553  B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1)); // z x Y Y // vtrn vdup
554 
555  A2 = A2 * B2;
556 
557  B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2)); // z x Y Z // vtrn vext
558  B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2)); // Y Z x z // vext vtrn
559 
560  B1 = B1 * B2; // A3 *= B3
561 
562  A0 = bt_splat_ps(vQ1, 3); // A0
563  A0 = A0 * vQ2; // A0 * B0
564 
565  A1 = A1 + A2; // AB12
566  A0 = A0 - B1; // AB03 = AB0 - AB3
567 
568  A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
569  A0 = A0 + A1; // AB03 + AB12
570 
571  return btQuaternion(A0);
572 
573 #elif defined(BT_USE_NEON)
574 
575  float32x4_t vQ1 = q1.get128();
576  float32x4_t vQ2 = q2.get128();
577  float32x4_t A0, A1, B1, A2, B2, A3, B3;
578  float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
579 
580  {
581  float32x2x2_t tmp;
582  tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
583  vQ1zx = tmp.val[0];
584 
585  tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
586  vQ2zx = tmp.val[0];
587  }
588  vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
589 
590  vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
591 
592  vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
593  vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
594 
595  A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
596  B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
597 
598  A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
599  B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
600 
601  A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
602  B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
603 
604  A1 = vmulq_f32(A1, B1);
605  A2 = vmulq_f32(A2, B2);
606  A3 = vmulq_f32(A3, B3); // A3 *= B3
607  A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1); // A0 * B0
608 
609  A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
610  A0 = vsubq_f32(A0, A3); // AB03 = AB0 - AB3
611 
612  // change the sign of the last element
613  A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
614  A0 = vaddq_f32(A0, A1); // AB03 + AB12
615 
616  return btQuaternion(A0);
617 
618 #else
619  return btQuaternion(
620  q1.w() * q2.x() + q1.x() * q2.w() + q1.y() * q2.z() - q1.z() * q2.y(),
621  q1.w() * q2.y() + q1.y() * q2.w() + q1.z() * q2.x() - q1.x() * q2.z(),
622  q1.w() * q2.z() + q1.z() * q2.w() + q1.x() * q2.y() - q1.y() * q2.x(),
623  q1.w() * q2.w() - q1.x() * q2.x() - q1.y() * q2.y() - q1.z() * q2.z());
624 #endif
625 }
626 
628 operator*(const btQuaternion& q, const btVector3& w)
629 {
630 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
631  __m128 vQ1 = q.get128();
632  __m128 vQ2 = w.get128();
633  __m128 A1, B1, A2, B2, A3, B3;
634 
635  A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
636  B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
637 
638  A1 = A1 * B1;
639 
640  A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
641  B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
642 
643  A2 = A2 * B2;
644 
645  A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
646  B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
647 
648  A3 = A3 * B3; // A3 *= B3
649 
650  A1 = A1 + A2; // AB12
651  A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
652  A1 = A1 - A3; // AB123 = AB12 - AB3
653 
654  return btQuaternion(A1);
655 
656 #elif defined(BT_USE_NEON)
657 
658  float32x4_t vQ1 = q.get128();
659  float32x4_t vQ2 = w.get128();
660  float32x4_t A1, B1, A2, B2, A3, B3;
661  float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
662 
663  vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
664  {
665  float32x2x2_t tmp;
666 
667  tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
668  vQ2zx = tmp.val[0];
669 
670  tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
671  vQ1zx = tmp.val[0];
672  }
673 
674  vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
675 
676  vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
677  vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
678 
679  A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx); // W W W X
680  B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx); // X Y z x
681 
682  A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
683  B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
684 
685  A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
686  B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
687 
688  A1 = vmulq_f32(A1, B1);
689  A2 = vmulq_f32(A2, B2);
690  A3 = vmulq_f32(A3, B3); // A3 *= B3
691 
692  A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
693 
694  // change the sign of the last element
695  A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
696 
697  A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
698 
699  return btQuaternion(A1);
700 
701 #else
702  return btQuaternion(
703  q.w() * w.x() + q.y() * w.z() - q.z() * w.y(),
704  q.w() * w.y() + q.z() * w.x() - q.x() * w.z(),
705  q.w() * w.z() + q.x() * w.y() - q.y() * w.x(),
706  -q.x() * w.x() - q.y() * w.y() - q.z() * w.z());
707 #endif
708 }
709 
711 operator*(const btVector3& w, const btQuaternion& q)
712 {
713 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
714  __m128 vQ1 = w.get128();
715  __m128 vQ2 = q.get128();
716  __m128 A1, B1, A2, B2, A3, B3;
717 
718  A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0)); // X Y z x
719  B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0)); // W W W X
720 
721  A1 = A1 * B1;
722 
723  A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
724  B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
725 
726  A2 = A2 *B2;
727 
728  A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
729  B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
730 
731  A3 = A3 * B3; // A3 *= B3
732 
733  A1 = A1 + A2; // AB12
734  A1 = _mm_xor_ps(A1, vPPPM); // change sign of the last element
735  A1 = A1 - A3; // AB123 = AB12 - AB3
736 
737  return btQuaternion(A1);
738 
739 #elif defined(BT_USE_NEON)
740 
741  float32x4_t vQ1 = w.get128();
742  float32x4_t vQ2 = q.get128();
743  float32x4_t A1, B1, A2, B2, A3, B3;
744  float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
745 
746  {
747  float32x2x2_t tmp;
748 
749  tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) ); // {z x}, {w y}
750  vQ1zx = tmp.val[0];
751 
752  tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) ); // {z x}, {w y}
753  vQ2zx = tmp.val[0];
754  }
755  vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
756 
757  vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
758 
759  vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
760  vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
761 
762  A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx); // X Y z x
763  B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx); // W W W X
764 
765  A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
766  B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
767 
768  A3 = vcombine_f32(vQ1zx, vQ1yz); // Z X Y Z
769  B3 = vcombine_f32(vQ2yz, vQ2xz); // Y Z x z
770 
771  A1 = vmulq_f32(A1, B1);
772  A2 = vmulq_f32(A2, B2);
773  A3 = vmulq_f32(A3, B3); // A3 *= B3
774 
775  A1 = vaddq_f32(A1, A2); // AB12 = AB1 + AB2
776 
777  // change the sign of the last element
778  A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
779 
780  A1 = vsubq_f32(A1, A3); // AB123 = AB12 - AB3
781 
782  return btQuaternion(A1);
783 
784 #else
785  return btQuaternion(
786  +w.x() * q.w() + w.y() * q.z() - w.z() * q.y(),
787  +w.y() * q.w() + w.z() * q.x() - w.x() * q.z(),
788  +w.z() * q.w() + w.x() * q.y() - w.y() * q.x(),
789  -w.x() * q.x() - w.y() * q.y() - w.z() * q.z());
790 #endif
791 }
792 
795 dot(const btQuaternion& q1, const btQuaternion& q2)
796 {
797  return q1.dot(q2);
798 }
799 
800 
804 {
805  return q.length();
806 }
807 
810 btAngle(const btQuaternion& q1, const btQuaternion& q2)
811 {
812  return q1.angle(q2);
813 }
814 
818 {
819  return q.inverse();
820 }
821 
828 slerp(const btQuaternion& q1, const btQuaternion& q2, const btScalar& t)
829 {
830  return q1.slerp(q2, t);
831 }
832 
834 quatRotate(const btQuaternion& rotation, const btVector3& v)
835 {
836  btQuaternion q = rotation * v;
837  q *= rotation.inverse();
838 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
839  return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
840 #elif defined(BT_USE_NEON)
841  return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
842 #else
843  return btVector3(q.getX(),q.getY(),q.getZ());
844 #endif
845 }
846 
848 shortestArcQuat(const btVector3& v0, const btVector3& v1) // Game Programming Gems 2.10. make sure v0,v1 are normalized
849 {
850  btVector3 c = v0.cross(v1);
851  btScalar d = v0.dot(v1);
852 
853  if (d < -1.0 + SIMD_EPSILON)
854  {
855  btVector3 n,unused;
856  btPlaneSpace1(v0,n,unused);
857  return btQuaternion(n.x(),n.y(),n.z(),0.0f); // just pick any vector that is orthogonal to v0
858  }
859 
860  btScalar s = btSqrt((1.0f + d) * 2.0f);
861  btScalar rs = 1.0f / s;
862 
863  return btQuaternion(c.getX()*rs,c.getY()*rs,c.getZ()*rs,s * 0.5f);
864 }
865 
868 {
869  v0.normalize();
870  v1.normalize();
871  return shortestArcQuat(v0,v1);
872 }
873 
874 #endif //BT_SIMD__QUATERNION_H_
875 
876 
877