Bullet Collision Detection & Physics Library
btMatrix3x3.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 #ifndef BT_MATRIX3x3_H
17 #define BT_MATRIX3x3_H
18 
19 #include "btVector3.h"
20 #include "btQuaternion.h"
21 #include <stdio.h>
22 
23 #ifdef BT_USE_SSE
24 //const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
25 const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
26 #endif
27 
28 #if defined(BT_USE_SSE) || defined(BT_USE_NEON)
29 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
30 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
31 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
32 #endif
33 
34 #ifdef BT_USE_DOUBLE_PRECISION
35 #define btMatrix3x3Data btMatrix3x3DoubleData
36 #else
37 #define btMatrix3x3Data btMatrix3x3FloatData
38 #endif //BT_USE_DOUBLE_PRECISION
39 
40 
44 
46  btVector3 m_el[3];
47 
48 public:
51 
52  // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
53 
55  explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
56  /*
57  template <typename btScalar>
58  Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
59  {
60  setEulerYPR(yaw, pitch, roll);
61  }
62  */
64  btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
65  const btScalar& yx, const btScalar& yy, const btScalar& yz,
66  const btScalar& zx, const btScalar& zy, const btScalar& zz)
67  {
68  setValue(xx, xy, xz,
69  yx, yy, yz,
70  zx, zy, zz);
71  }
72 
73 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
74  SIMD_FORCE_INLINE btMatrix3x3 (const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2 )
75  {
76  m_el[0].mVec128 = v0;
77  m_el[1].mVec128 = v1;
78  m_el[2].mVec128 = v2;
79  }
80 
81  SIMD_FORCE_INLINE btMatrix3x3 (const btVector3& v0, const btVector3& v1, const btVector3& v2 )
82  {
83  m_el[0] = v0;
84  m_el[1] = v1;
85  m_el[2] = v2;
86  }
87 
88  // Copy constructor
90  {
91  m_el[0].mVec128 = rhs.m_el[0].mVec128;
92  m_el[1].mVec128 = rhs.m_el[1].mVec128;
93  m_el[2].mVec128 = rhs.m_el[2].mVec128;
94  }
95 
96  // Assignment Operator
97  SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
98  {
99  m_el[0].mVec128 = m.m_el[0].mVec128;
100  m_el[1].mVec128 = m.m_el[1].mVec128;
101  m_el[2].mVec128 = m.m_el[2].mVec128;
102 
103  return *this;
104  }
105 
106 #else
107 
110  {
111  m_el[0] = other.m_el[0];
112  m_el[1] = other.m_el[1];
113  m_el[2] = other.m_el[2];
114  }
115 
118  {
119  m_el[0] = other.m_el[0];
120  m_el[1] = other.m_el[1];
121  m_el[2] = other.m_el[2];
122  return *this;
123  }
124 
125 #endif
126 
130  {
131  return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
132  }
133 
134 
137  SIMD_FORCE_INLINE const btVector3& getRow(int i) const
138  {
139  btFullAssert(0 <= i && i < 3);
140  return m_el[i];
141  }
142 
146  {
147  btFullAssert(0 <= i && i < 3);
148  return m_el[i];
149  }
150 
154  {
155  btFullAssert(0 <= i && i < 3);
156  return m_el[i];
157  }
158 
162  btMatrix3x3& operator*=(const btMatrix3x3& m);
163 
167  btMatrix3x3& operator+=(const btMatrix3x3& m);
168 
172  btMatrix3x3& operator-=(const btMatrix3x3& m);
173 
177  {
178  m_el[0].setValue(m[0],m[4],m[8]);
179  m_el[1].setValue(m[1],m[5],m[9]);
180  m_el[2].setValue(m[2],m[6],m[10]);
181 
182  }
193  void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
194  const btScalar& yx, const btScalar& yy, const btScalar& yz,
195  const btScalar& zx, const btScalar& zy, const btScalar& zz)
196  {
197  m_el[0].setValue(xx,xy,xz);
198  m_el[1].setValue(yx,yy,yz);
199  m_el[2].setValue(zx,zy,zz);
200  }
201 
204  void setRotation(const btQuaternion& q)
205  {
206  btScalar d = q.length2();
207  btFullAssert(d != btScalar(0.0));
208  btScalar s = btScalar(2.0) / d;
209 
210  #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
211  __m128 vs, Q = q.get128();
212  __m128i Qi = btCastfTo128i(Q);
213  __m128 Y, Z;
214  __m128 V1, V2, V3;
215  __m128 V11, V21, V31;
216  __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
217  __m128i NQi = btCastfTo128i(NQ);
218 
219  V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3))); // Y X Z W
220  V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3)); // -X -X Y W
221  V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3))); // Z Y X W
222  V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
223 
224  V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3))); // Y Y X W
225  V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
226  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3)); // X Z -X -W
227 
228  V2 = V2 * V1; //
229  V1 = V1 * V11; //
230  V3 = V3 * V31; //
231 
232  V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3)); // -Z -W Y W
233  V11 = V11 * V21; //
234  V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
235  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3)); // W W -Y -W
236  V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
237  Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3))); // -W -Z -X -W
238  Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3))); // Y X Y W
239 
240  vs = _mm_load_ss(&s);
241  V21 = V21 * Y;
242  V31 = V31 * Z;
243 
244  V1 = V1 + V11;
245  V2 = V2 + V21;
246  V3 = V3 + V31;
247 
248  vs = bt_splat3_ps(vs, 0);
249  // s ready
250  V1 = V1 * vs;
251  V2 = V2 * vs;
252  V3 = V3 * vs;
253 
254  V1 = V1 + v1000;
255  V2 = V2 + v0100;
256  V3 = V3 + v0010;
257 
258  m_el[0] = V1;
259  m_el[1] = V2;
260  m_el[2] = V3;
261  #else
262  btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
263  btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
264  btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
265  btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
266  setValue(
267  btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
268  xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
269  xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
270  #endif
271  }
272 
273 
279  void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
280  {
281  setEulerZYX(roll, pitch, yaw);
282  }
283 
293  void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) {
295  btScalar ci ( btCos(eulerX));
296  btScalar cj ( btCos(eulerY));
297  btScalar ch ( btCos(eulerZ));
298  btScalar si ( btSin(eulerX));
299  btScalar sj ( btSin(eulerY));
300  btScalar sh ( btSin(eulerZ));
301  btScalar cc = ci * ch;
302  btScalar cs = ci * sh;
303  btScalar sc = si * ch;
304  btScalar ss = si * sh;
305 
306  setValue(cj * ch, sj * sc - cs, sj * cc + ss,
307  cj * sh, sj * ss + cc, sj * cs - sc,
308  -sj, cj * si, cj * ci);
309  }
310 
312  void setIdentity()
313  {
314 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
315  m_el[0] = v1000;
316  m_el[1] = v0100;
317  m_el[2] = v0010;
318 #else
319  setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
320  btScalar(0.0), btScalar(1.0), btScalar(0.0),
321  btScalar(0.0), btScalar(0.0), btScalar(1.0));
322 #endif
323  }
324 
325  static const btMatrix3x3& getIdentity()
326  {
327 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
328  static const btMatrix3x3
329  identityMatrix(v1000, v0100, v0010);
330 #else
331  static const btMatrix3x3
332  identityMatrix(
333  btScalar(1.0), btScalar(0.0), btScalar(0.0),
334  btScalar(0.0), btScalar(1.0), btScalar(0.0),
335  btScalar(0.0), btScalar(0.0), btScalar(1.0));
336 #endif
337  return identityMatrix;
338  }
339 
342  void getOpenGLSubMatrix(btScalar *m) const
343  {
344 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
345  __m128 v0 = m_el[0].mVec128;
346  __m128 v1 = m_el[1].mVec128;
347  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
348  __m128 *vm = (__m128 *)m;
349  __m128 vT;
350 
351  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
352 
353  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
354  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
355 
356  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
357  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
358  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
359 
360  vm[0] = v0;
361  vm[1] = v1;
362  vm[2] = v2;
363 #elif defined(BT_USE_NEON)
364  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
365  static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
366  float32x4_t *vm = (float32x4_t *)m;
367  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
368  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
369  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
370  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
371  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
372  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
373 
374  vm[0] = v0;
375  vm[1] = v1;
376  vm[2] = v2;
377 #else
378  m[0] = btScalar(m_el[0].x());
379  m[1] = btScalar(m_el[1].x());
380  m[2] = btScalar(m_el[2].x());
381  m[3] = btScalar(0.0);
382  m[4] = btScalar(m_el[0].y());
383  m[5] = btScalar(m_el[1].y());
384  m[6] = btScalar(m_el[2].y());
385  m[7] = btScalar(0.0);
386  m[8] = btScalar(m_el[0].z());
387  m[9] = btScalar(m_el[1].z());
388  m[10] = btScalar(m_el[2].z());
389  m[11] = btScalar(0.0);
390 #endif
391  }
392 
395  void getRotation(btQuaternion& q) const
396  {
397 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
398  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
399  btScalar s, x;
400 
401  union {
402  btSimdFloat4 vec;
403  btScalar f[4];
404  } temp;
405 
406  if (trace > btScalar(0.0))
407  {
408  x = trace + btScalar(1.0);
409 
410  temp.f[0]=m_el[2].y() - m_el[1].z();
411  temp.f[1]=m_el[0].z() - m_el[2].x();
412  temp.f[2]=m_el[1].x() - m_el[0].y();
413  temp.f[3]=x;
414  //temp.f[3]= s * btScalar(0.5);
415  }
416  else
417  {
418  int i, j, k;
419  if(m_el[0].x() < m_el[1].y())
420  {
421  if( m_el[1].y() < m_el[2].z() )
422  { i = 2; j = 0; k = 1; }
423  else
424  { i = 1; j = 2; k = 0; }
425  }
426  else
427  {
428  if( m_el[0].x() < m_el[2].z())
429  { i = 2; j = 0; k = 1; }
430  else
431  { i = 0; j = 1; k = 2; }
432  }
433 
434  x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
435 
436  temp.f[3] = (m_el[k][j] - m_el[j][k]);
437  temp.f[j] = (m_el[j][i] + m_el[i][j]);
438  temp.f[k] = (m_el[k][i] + m_el[i][k]);
439  temp.f[i] = x;
440  //temp.f[i] = s * btScalar(0.5);
441  }
442 
443  s = btSqrt(x);
444  q.set128(temp.vec);
445  s = btScalar(0.5) / s;
446 
447  q *= s;
448 #else
449  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
450 
451  btScalar temp[4];
452 
453  if (trace > btScalar(0.0))
454  {
455  btScalar s = btSqrt(trace + btScalar(1.0));
456  temp[3]=(s * btScalar(0.5));
457  s = btScalar(0.5) / s;
458 
459  temp[0]=((m_el[2].y() - m_el[1].z()) * s);
460  temp[1]=((m_el[0].z() - m_el[2].x()) * s);
461  temp[2]=((m_el[1].x() - m_el[0].y()) * s);
462  }
463  else
464  {
465  int i = m_el[0].x() < m_el[1].y() ?
466  (m_el[1].y() < m_el[2].z() ? 2 : 1) :
467  (m_el[0].x() < m_el[2].z() ? 2 : 0);
468  int j = (i + 1) % 3;
469  int k = (i + 2) % 3;
470 
471  btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
472  temp[i] = s * btScalar(0.5);
473  s = btScalar(0.5) / s;
474 
475  temp[3] = (m_el[k][j] - m_el[j][k]) * s;
476  temp[j] = (m_el[j][i] + m_el[i][j]) * s;
477  temp[k] = (m_el[k][i] + m_el[i][k]) * s;
478  }
479  q.setValue(temp[0],temp[1],temp[2],temp[3]);
480 #endif
481  }
482 
487  void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
488  {
489 
490  // first use the normal calculus
491  yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
492  pitch = btScalar(btAsin(-m_el[2].x()));
493  roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
494 
495  // on pitch = +/-HalfPI
496  if (btFabs(pitch)==SIMD_HALF_PI)
497  {
498  if (yaw>0)
499  yaw-=SIMD_PI;
500  else
501  yaw+=SIMD_PI;
502 
503  if (roll>0)
504  roll-=SIMD_PI;
505  else
506  roll+=SIMD_PI;
507  }
508  };
509 
510 
516  void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
517  {
518  struct Euler
519  {
520  btScalar yaw;
521  btScalar pitch;
522  btScalar roll;
523  };
524 
525  Euler euler_out;
526  Euler euler_out2; //second solution
527  //get the pointer to the raw data
528 
529  // Check that pitch is not at a singularity
530  if (btFabs(m_el[2].x()) >= 1)
531  {
532  euler_out.yaw = 0;
533  euler_out2.yaw = 0;
534 
535  // From difference of angles formula
536  btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
537  if (m_el[2].x() > 0) //gimbal locked up
538  {
539  euler_out.pitch = SIMD_PI / btScalar(2.0);
540  euler_out2.pitch = SIMD_PI / btScalar(2.0);
541  euler_out.roll = euler_out.pitch + delta;
542  euler_out2.roll = euler_out.pitch + delta;
543  }
544  else // gimbal locked down
545  {
546  euler_out.pitch = -SIMD_PI / btScalar(2.0);
547  euler_out2.pitch = -SIMD_PI / btScalar(2.0);
548  euler_out.roll = -euler_out.pitch + delta;
549  euler_out2.roll = -euler_out.pitch + delta;
550  }
551  }
552  else
553  {
554  euler_out.pitch = - btAsin(m_el[2].x());
555  euler_out2.pitch = SIMD_PI - euler_out.pitch;
556 
557  euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch),
558  m_el[2].z()/btCos(euler_out.pitch));
559  euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch),
560  m_el[2].z()/btCos(euler_out2.pitch));
561 
562  euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch),
563  m_el[0].x()/btCos(euler_out.pitch));
564  euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch),
565  m_el[0].x()/btCos(euler_out2.pitch));
566  }
567 
568  if (solution_number == 1)
569  {
570  yaw = euler_out.yaw;
571  pitch = euler_out.pitch;
572  roll = euler_out.roll;
573  }
574  else
575  {
576  yaw = euler_out2.yaw;
577  pitch = euler_out2.pitch;
578  roll = euler_out2.roll;
579  }
580  }
581 
585  btMatrix3x3 scaled(const btVector3& s) const
586  {
587 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
588  return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
589 #else
590  return btMatrix3x3(
591  m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
592  m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
593  m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
594 #endif
595  }
596 
598  btScalar determinant() const;
600  btMatrix3x3 adjoint() const;
602  btMatrix3x3 absolute() const;
604  btMatrix3x3 transpose() const;
606  btMatrix3x3 inverse() const;
607 
608  btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
609  btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
610 
612  {
613  return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
614  }
616  {
617  return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
618  }
620  {
621  return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
622  }
623 
624 
634  void diagonalize(btMatrix3x3& rot, btScalar threshold, int maxSteps)
635  {
636  rot.setIdentity();
637  for (int step = maxSteps; step > 0; step--)
638  {
639  // find off-diagonal element [p][q] with largest magnitude
640  int p = 0;
641  int q = 1;
642  int r = 2;
643  btScalar max = btFabs(m_el[0][1]);
644  btScalar v = btFabs(m_el[0][2]);
645  if (v > max)
646  {
647  q = 2;
648  r = 1;
649  max = v;
650  }
651  v = btFabs(m_el[1][2]);
652  if (v > max)
653  {
654  p = 1;
655  q = 2;
656  r = 0;
657  max = v;
658  }
659 
660  btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
661  if (max <= t)
662  {
663  if (max <= SIMD_EPSILON * t)
664  {
665  return;
666  }
667  step = 1;
668  }
669 
670  // compute Jacobi rotation J which leads to a zero for element [p][q]
671  btScalar mpq = m_el[p][q];
672  btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
673  btScalar theta2 = theta * theta;
674  btScalar cos;
675  btScalar sin;
676  if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
677  {
678  t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
679  : 1 / (theta - btSqrt(1 + theta2));
680  cos = 1 / btSqrt(1 + t * t);
681  sin = cos * t;
682  }
683  else
684  {
685  // approximation for large theta-value, i.e., a nearly diagonal matrix
686  t = 1 / (theta * (2 + btScalar(0.5) / theta2));
687  cos = 1 - btScalar(0.5) * t * t;
688  sin = cos * t;
689  }
690 
691  // apply rotation to matrix (this = J^T * this * J)
692  m_el[p][q] = m_el[q][p] = 0;
693  m_el[p][p] -= t * mpq;
694  m_el[q][q] += t * mpq;
695  btScalar mrp = m_el[r][p];
696  btScalar mrq = m_el[r][q];
697  m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
698  m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
699 
700  // apply rotation to rot (rot = rot * J)
701  for (int i = 0; i < 3; i++)
702  {
703  btVector3& row = rot[i];
704  mrp = row[p];
705  mrq = row[q];
706  row[p] = cos * mrp - sin * mrq;
707  row[q] = cos * mrq + sin * mrp;
708  }
709  }
710  }
711 
712 
713 
714 
722  btScalar cofac(int r1, int c1, int r2, int c2) const
723  {
724  return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
725  }
726 
727  void serialize(struct btMatrix3x3Data& dataOut) const;
728 
729  void serializeFloat(struct btMatrix3x3FloatData& dataOut) const;
730 
731  void deSerialize(const struct btMatrix3x3Data& dataIn);
732 
733  void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
734 
735  void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
736 
737 };
738 
739 
742 {
743 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
744  __m128 rv00, rv01, rv02;
745  __m128 rv10, rv11, rv12;
746  __m128 rv20, rv21, rv22;
747  __m128 mv0, mv1, mv2;
748 
749  rv02 = m_el[0].mVec128;
750  rv12 = m_el[1].mVec128;
751  rv22 = m_el[2].mVec128;
752 
753  mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
754  mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
755  mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
756 
757  // rv0
758  rv00 = bt_splat_ps(rv02, 0);
759  rv01 = bt_splat_ps(rv02, 1);
760  rv02 = bt_splat_ps(rv02, 2);
761 
762  rv00 = _mm_mul_ps(rv00, mv0);
763  rv01 = _mm_mul_ps(rv01, mv1);
764  rv02 = _mm_mul_ps(rv02, mv2);
765 
766  // rv1
767  rv10 = bt_splat_ps(rv12, 0);
768  rv11 = bt_splat_ps(rv12, 1);
769  rv12 = bt_splat_ps(rv12, 2);
770 
771  rv10 = _mm_mul_ps(rv10, mv0);
772  rv11 = _mm_mul_ps(rv11, mv1);
773  rv12 = _mm_mul_ps(rv12, mv2);
774 
775  // rv2
776  rv20 = bt_splat_ps(rv22, 0);
777  rv21 = bt_splat_ps(rv22, 1);
778  rv22 = bt_splat_ps(rv22, 2);
779 
780  rv20 = _mm_mul_ps(rv20, mv0);
781  rv21 = _mm_mul_ps(rv21, mv1);
782  rv22 = _mm_mul_ps(rv22, mv2);
783 
784  rv00 = _mm_add_ps(rv00, rv01);
785  rv10 = _mm_add_ps(rv10, rv11);
786  rv20 = _mm_add_ps(rv20, rv21);
787 
788  m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
789  m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
790  m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
791 
792 #elif defined(BT_USE_NEON)
793 
794  float32x4_t rv0, rv1, rv2;
795  float32x4_t v0, v1, v2;
796  float32x4_t mv0, mv1, mv2;
797 
798  v0 = m_el[0].mVec128;
799  v1 = m_el[1].mVec128;
800  v2 = m_el[2].mVec128;
801 
802  mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
803  mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
804  mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
805 
806  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
807  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
808  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
809 
810  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
811  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
812  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
813 
814  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
815  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
816  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
817 
818  m_el[0].mVec128 = rv0;
819  m_el[1].mVec128 = rv1;
820  m_el[2].mVec128 = rv2;
821 #else
822  setValue(
823  m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
824  m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
825  m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
826 #endif
827  return *this;
828 }
829 
832 {
833 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
834  m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
835  m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
836  m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
837 #else
838  setValue(
839  m_el[0][0]+m.m_el[0][0],
840  m_el[0][1]+m.m_el[0][1],
841  m_el[0][2]+m.m_el[0][2],
842  m_el[1][0]+m.m_el[1][0],
843  m_el[1][1]+m.m_el[1][1],
844  m_el[1][2]+m.m_el[1][2],
845  m_el[2][0]+m.m_el[2][0],
846  m_el[2][1]+m.m_el[2][1],
847  m_el[2][2]+m.m_el[2][2]);
848 #endif
849  return *this;
850 }
851 
853 operator*(const btMatrix3x3& m, const btScalar & k)
854 {
855 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
856  __m128 vk = bt_splat_ps(_mm_load_ss((float *)&k), 0x80);
857  return btMatrix3x3(
858  _mm_mul_ps(m[0].mVec128, vk),
859  _mm_mul_ps(m[1].mVec128, vk),
860  _mm_mul_ps(m[2].mVec128, vk));
861 #elif defined(BT_USE_NEON)
862  return btMatrix3x3(
863  vmulq_n_f32(m[0].mVec128, k),
864  vmulq_n_f32(m[1].mVec128, k),
865  vmulq_n_f32(m[2].mVec128, k));
866 #else
867  return btMatrix3x3(
868  m[0].x()*k,m[0].y()*k,m[0].z()*k,
869  m[1].x()*k,m[1].y()*k,m[1].z()*k,
870  m[2].x()*k,m[2].y()*k,m[2].z()*k);
871 #endif
872 }
873 
875 operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
876 {
877 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
878  return btMatrix3x3(
879  m1[0].mVec128 + m2[0].mVec128,
880  m1[1].mVec128 + m2[1].mVec128,
881  m1[2].mVec128 + m2[2].mVec128);
882 #else
883  return btMatrix3x3(
884  m1[0][0]+m2[0][0],
885  m1[0][1]+m2[0][1],
886  m1[0][2]+m2[0][2],
887 
888  m1[1][0]+m2[1][0],
889  m1[1][1]+m2[1][1],
890  m1[1][2]+m2[1][2],
891 
892  m1[2][0]+m2[2][0],
893  m1[2][1]+m2[2][1],
894  m1[2][2]+m2[2][2]);
895 #endif
896 }
897 
899 operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
900 {
901 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
902  return btMatrix3x3(
903  m1[0].mVec128 - m2[0].mVec128,
904  m1[1].mVec128 - m2[1].mVec128,
905  m1[2].mVec128 - m2[2].mVec128);
906 #else
907  return btMatrix3x3(
908  m1[0][0]-m2[0][0],
909  m1[0][1]-m2[0][1],
910  m1[0][2]-m2[0][2],
911 
912  m1[1][0]-m2[1][0],
913  m1[1][1]-m2[1][1],
914  m1[1][2]-m2[1][2],
915 
916  m1[2][0]-m2[2][0],
917  m1[2][1]-m2[2][1],
918  m1[2][2]-m2[2][2]);
919 #endif
920 }
921 
922 
925 {
926 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
927  m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
928  m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
929  m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
930 #else
931  setValue(
932  m_el[0][0]-m.m_el[0][0],
933  m_el[0][1]-m.m_el[0][1],
934  m_el[0][2]-m.m_el[0][2],
935  m_el[1][0]-m.m_el[1][0],
936  m_el[1][1]-m.m_el[1][1],
937  m_el[1][2]-m.m_el[1][2],
938  m_el[2][0]-m.m_el[2][0],
939  m_el[2][1]-m.m_el[2][1],
940  m_el[2][2]-m.m_el[2][2]);
941 #endif
942  return *this;
943 }
944 
945 
948 {
949  return btTriple((*this)[0], (*this)[1], (*this)[2]);
950 }
951 
952 
955 {
956 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
957  return btMatrix3x3(
958  _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
959  _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
960  _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
961 #elif defined(BT_USE_NEON)
962  return btMatrix3x3(
963  (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
964  (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
965  (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
966 #else
967  return btMatrix3x3(
968  btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
969  btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
970  btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
971 #endif
972 }
973 
976 {
977 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
978  __m128 v0 = m_el[0].mVec128;
979  __m128 v1 = m_el[1].mVec128;
980  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
981  __m128 vT;
982 
983  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
984 
985  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
986  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
987 
988  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
989  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
990  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
991 
992 
993  return btMatrix3x3( v0, v1, v2 );
994 #elif defined(BT_USE_NEON)
995  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
996  static const uint32x2_t zMask = (const uint32x2_t) {-1, 0 };
997  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
998  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
999  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
1000  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
1001  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
1002  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
1003  return btMatrix3x3( v0, v1, v2 );
1004 #else
1005  return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
1006  m_el[0].y(), m_el[1].y(), m_el[2].y(),
1007  m_el[0].z(), m_el[1].z(), m_el[2].z());
1008 #endif
1009 }
1010 
1013 {
1014  return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1015  cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1016  cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1017 }
1018 
1021 {
1022  btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1023  btScalar det = (*this)[0].dot(co);
1024  btFullAssert(det != btScalar(0.0));
1025  btScalar s = btScalar(1.0) / det;
1026  return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1027  co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1028  co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1029 }
1030 
1033 {
1034 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1035  // zeros w
1036 // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1037  __m128 row = m_el[0].mVec128;
1038  __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, btvFFF0fMask );
1039  __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, btvFFF0fMask);
1040  __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, btvFFF0fMask );
1041  __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1042  __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1043  __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1044  row = m_el[1].mVec128;
1045  r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1046  r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1047  r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1048  row = m_el[2].mVec128;
1049  r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1050  r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1051  r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1052  return btMatrix3x3( r0, r1, r2 );
1053 
1054 #elif defined BT_USE_NEON
1055  // zeros w
1056  static const uint32x4_t xyzMask = (const uint32x4_t){ -1, -1, -1, 0 };
1057  float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
1058  float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
1059  float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
1060  float32x4_t row = m_el[0].mVec128;
1061  float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1062  float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1063  float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1064  row = m_el[1].mVec128;
1065  r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1066  r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1067  r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1068  row = m_el[2].mVec128;
1069  r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1070  r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1071  r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1072  return btMatrix3x3( r0, r1, r2 );
1073 #else
1074  return btMatrix3x3(
1075  m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1076  m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1077  m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1078  m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1079  m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1080  m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1081  m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1082  m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1083  m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1084 #endif
1085 }
1086 
1089 {
1090 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1091  __m128 a0 = m_el[0].mVec128;
1092  __m128 a1 = m_el[1].mVec128;
1093  __m128 a2 = m_el[2].mVec128;
1094 
1095  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1096  __m128 mx = mT[0].mVec128;
1097  __m128 my = mT[1].mVec128;
1098  __m128 mz = mT[2].mVec128;
1099 
1100  __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1101  __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1102  __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1103  r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1104  r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1105  r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1106  r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1107  r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1108  r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1109  return btMatrix3x3( r0, r1, r2);
1110 
1111 #elif defined BT_USE_NEON
1112  float32x4_t a0 = m_el[0].mVec128;
1113  float32x4_t a1 = m_el[1].mVec128;
1114  float32x4_t a2 = m_el[2].mVec128;
1115 
1116  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1117  float32x4_t mx = mT[0].mVec128;
1118  float32x4_t my = mT[1].mVec128;
1119  float32x4_t mz = mT[2].mVec128;
1120 
1121  float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1122  float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1123  float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1124  r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1125  r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1126  r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1127  r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1128  r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1129  r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1130  return btMatrix3x3( r0, r1, r2 );
1131 
1132 #else
1133  return btMatrix3x3(
1134  m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1135  m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1136  m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1137 #endif
1138 }
1139 
1141 operator*(const btMatrix3x3& m, const btVector3& v)
1142 {
1143 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
1144  return v.dot3(m[0], m[1], m[2]);
1145 #else
1146  return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1147 #endif
1148 }
1149 
1150 
1152 operator*(const btVector3& v, const btMatrix3x3& m)
1153 {
1154 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1155 
1156  const __m128 vv = v.mVec128;
1157 
1158  __m128 c0 = bt_splat_ps( vv, 0);
1159  __m128 c1 = bt_splat_ps( vv, 1);
1160  __m128 c2 = bt_splat_ps( vv, 2);
1161 
1162  c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1163  c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1164  c0 = _mm_add_ps(c0, c1);
1165  c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1166 
1167  return btVector3(_mm_add_ps(c0, c2));
1168 #elif defined(BT_USE_NEON)
1169  const float32x4_t vv = v.mVec128;
1170  const float32x2_t vlo = vget_low_f32(vv);
1171  const float32x2_t vhi = vget_high_f32(vv);
1172 
1173  float32x4_t c0, c1, c2;
1174 
1175  c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1176  c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1177  c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1178 
1179  c0 = vmulq_lane_f32(c0, vlo, 0);
1180  c1 = vmulq_lane_f32(c1, vlo, 1);
1181  c2 = vmulq_lane_f32(c2, vhi, 0);
1182  c0 = vaddq_f32(c0, c1);
1183  c0 = vaddq_f32(c0, c2);
1184 
1185  return btVector3(c0);
1186 #else
1187  return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1188 #endif
1189 }
1190 
1192 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1193 {
1194 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1195 
1196  __m128 m10 = m1[0].mVec128;
1197  __m128 m11 = m1[1].mVec128;
1198  __m128 m12 = m1[2].mVec128;
1199 
1200  __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1201 
1202  __m128 c0 = bt_splat_ps( m10, 0);
1203  __m128 c1 = bt_splat_ps( m11, 0);
1204  __m128 c2 = bt_splat_ps( m12, 0);
1205 
1206  c0 = _mm_mul_ps(c0, m2v);
1207  c1 = _mm_mul_ps(c1, m2v);
1208  c2 = _mm_mul_ps(c2, m2v);
1209 
1210  m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1211 
1212  __m128 c0_1 = bt_splat_ps( m10, 1);
1213  __m128 c1_1 = bt_splat_ps( m11, 1);
1214  __m128 c2_1 = bt_splat_ps( m12, 1);
1215 
1216  c0_1 = _mm_mul_ps(c0_1, m2v);
1217  c1_1 = _mm_mul_ps(c1_1, m2v);
1218  c2_1 = _mm_mul_ps(c2_1, m2v);
1219 
1220  m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1221 
1222  c0 = _mm_add_ps(c0, c0_1);
1223  c1 = _mm_add_ps(c1, c1_1);
1224  c2 = _mm_add_ps(c2, c2_1);
1225 
1226  m10 = bt_splat_ps( m10, 2);
1227  m11 = bt_splat_ps( m11, 2);
1228  m12 = bt_splat_ps( m12, 2);
1229 
1230  m10 = _mm_mul_ps(m10, m2v);
1231  m11 = _mm_mul_ps(m11, m2v);
1232  m12 = _mm_mul_ps(m12, m2v);
1233 
1234  c0 = _mm_add_ps(c0, m10);
1235  c1 = _mm_add_ps(c1, m11);
1236  c2 = _mm_add_ps(c2, m12);
1237 
1238  return btMatrix3x3(c0, c1, c2);
1239 
1240 #elif defined(BT_USE_NEON)
1241 
1242  float32x4_t rv0, rv1, rv2;
1243  float32x4_t v0, v1, v2;
1244  float32x4_t mv0, mv1, mv2;
1245 
1246  v0 = m1[0].mVec128;
1247  v1 = m1[1].mVec128;
1248  v2 = m1[2].mVec128;
1249 
1250  mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1251  mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1252  mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1253 
1254  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1255  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1256  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1257 
1258  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1259  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1260  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1261 
1262  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1263  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1264  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1265 
1266  return btMatrix3x3(rv0, rv1, rv2);
1267 
1268 #else
1269  return btMatrix3x3(
1270  m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
1271  m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
1272  m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
1273 #endif
1274 }
1275 
1276 /*
1277 SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1278 return btMatrix3x3(
1279 m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1280 m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1281 m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1282 m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1283 m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1284 m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1285 m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1286 m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1287 m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1288 }
1289 */
1290 
1294 {
1295 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1296 
1297  __m128 c0, c1, c2;
1298 
1299  c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1300  c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1301  c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1302 
1303  c0 = _mm_and_ps(c0, c1);
1304  c0 = _mm_and_ps(c0, c2);
1305 
1306  return (0x7 == _mm_movemask_ps((__m128)c0));
1307 #else
1308  return
1309  ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1310  m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1311  m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1312 #endif
1313 }
1314 
1317 {
1319 };
1320 
1323 {
1325 };
1326 
1327 
1328 
1329 
1331 {
1332  for (int i=0;i<3;i++)
1333  m_el[i].serialize(dataOut.m_el[i]);
1334 }
1335 
1337 {
1338  for (int i=0;i<3;i++)
1339  m_el[i].serializeFloat(dataOut.m_el[i]);
1340 }
1341 
1342 
1344 {
1345  for (int i=0;i<3;i++)
1346  m_el[i].deSerialize(dataIn.m_el[i]);
1347 }
1348 
1350 {
1351  for (int i=0;i<3;i++)
1352  m_el[i].deSerializeFloat(dataIn.m_el[i]);
1353 }
1354 
1356 {
1357  for (int i=0;i<3;i++)
1358  m_el[i].deSerializeDouble(dataIn.m_el[i]);
1359 }
1360 
1361 #endif //BT_MATRIX3x3_H
1362