Bullet Collision Detection & Physics Library
btMatrix3x3.h
Go to the documentation of this file.
1/*
2Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3
4This software is provided 'as-is', without any express or implied warranty.
5In no event will the authors be held liable for any damages arising from the use of this software.
6Permission is granted to anyone to use this software for any purpose,
7including commercial applications, and to alter it and redistribute it freely,
8subject to the following restrictions:
9
101. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
112. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
123. This notice may not be removed or altered from any source distribution.
13*/
14
15
16#ifndef BT_MATRIX3x3_H
17#define BT_MATRIX3x3_H
18
19#include "btVector3.h"
20#include "btQuaternion.h"
21#include <stdio.h>
22
23#ifdef BT_USE_SSE
24//const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
25//const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
26#define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))
27#endif
28
29#if defined(BT_USE_SSE)
30#define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))
31#define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))
32#define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))
33#elif defined(BT_USE_NEON)
34const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
35const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
36const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
37#endif
38
39#ifdef BT_USE_DOUBLE_PRECISION
40#define btMatrix3x3Data btMatrix3x3DoubleData
41#else
42#define btMatrix3x3Data btMatrix3x3FloatData
43#endif //BT_USE_DOUBLE_PRECISION
44
45
49
51 btVector3 m_el[3];
52
53public:
56
57 // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
58
60 explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
61 /*
62 template <typename btScalar>
63 Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
64 {
65 setEulerYPR(yaw, pitch, roll);
66 }
67 */
69 btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
70 const btScalar& yx, const btScalar& yy, const btScalar& yz,
71 const btScalar& zx, const btScalar& zy, const btScalar& zz)
72 {
73 setValue(xx, xy, xz,
74 yx, yy, yz,
75 zx, zy, zz);
76 }
77
78#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
79 SIMD_FORCE_INLINE btMatrix3x3 (const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2 )
80 {
81 m_el[0].mVec128 = v0;
82 m_el[1].mVec128 = v1;
83 m_el[2].mVec128 = v2;
84 }
85
86 SIMD_FORCE_INLINE btMatrix3x3 (const btVector3& v0, const btVector3& v1, const btVector3& v2 )
87 {
88 m_el[0] = v0;
89 m_el[1] = v1;
90 m_el[2] = v2;
91 }
92
93 // Copy constructor
95 {
96 m_el[0].mVec128 = rhs.m_el[0].mVec128;
97 m_el[1].mVec128 = rhs.m_el[1].mVec128;
98 m_el[2].mVec128 = rhs.m_el[2].mVec128;
99 }
100
101 // Assignment Operator
102 SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
103 {
104 m_el[0].mVec128 = m.m_el[0].mVec128;
105 m_el[1].mVec128 = m.m_el[1].mVec128;
106 m_el[2].mVec128 = m.m_el[2].mVec128;
107
108 return *this;
109 }
110
111#else
112
115 {
116 m_el[0] = other.m_el[0];
117 m_el[1] = other.m_el[1];
118 m_el[2] = other.m_el[2];
119 }
120
123 {
124 m_el[0] = other.m_el[0];
125 m_el[1] = other.m_el[1];
126 m_el[2] = other.m_el[2];
127 return *this;
128 }
129
130#endif
131
135 {
136 return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
137 }
138
139
143 {
144 btFullAssert(0 <= i && i < 3);
145 return m_el[i];
146 }
147
151 {
152 btFullAssert(0 <= i && i < 3);
153 return m_el[i];
154 }
155
159 {
160 btFullAssert(0 <= i && i < 3);
161 return m_el[i];
162 }
163
167 btMatrix3x3& operator*=(const btMatrix3x3& m);
168
172 btMatrix3x3& operator+=(const btMatrix3x3& m);
173
177 btMatrix3x3& operator-=(const btMatrix3x3& m);
178
182 {
183 m_el[0].setValue(m[0],m[4],m[8]);
184 m_el[1].setValue(m[1],m[5],m[9]);
185 m_el[2].setValue(m[2],m[6],m[10]);
186
187 }
198 void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
199 const btScalar& yx, const btScalar& yy, const btScalar& yz,
200 const btScalar& zx, const btScalar& zy, const btScalar& zz)
201 {
202 m_el[0].setValue(xx,xy,xz);
203 m_el[1].setValue(yx,yy,yz);
204 m_el[2].setValue(zx,zy,zz);
205 }
206
209 void setRotation(const btQuaternion& q)
210 {
211 btScalar d = q.length2();
212 btFullAssert(d != btScalar(0.0));
213 btScalar s = btScalar(2.0) / d;
214
215 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
216 __m128 vs, Q = q.get128();
217 __m128i Qi = btCastfTo128i(Q);
218 __m128 Y, Z;
219 __m128 V1, V2, V3;
220 __m128 V11, V21, V31;
221 __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
222 __m128i NQi = btCastfTo128i(NQ);
223
224 V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3))); // Y X Z W
225 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3)); // -X -X Y W
226 V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3))); // Z Y X W
227 V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
228
229 V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3))); // Y Y X W
230 V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
231 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3)); // X Z -X -W
232
233 V2 = V2 * V1; //
234 V1 = V1 * V11; //
235 V3 = V3 * V31; //
236
237 V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3)); // -Z -W Y W
238 V11 = V11 * V21; //
239 V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
240 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3)); // W W -Y -W
241 V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
242 Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3))); // -W -Z -X -W
243 Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3))); // Y X Y W
244
245 vs = _mm_load_ss(&s);
246 V21 = V21 * Y;
247 V31 = V31 * Z;
248
249 V1 = V1 + V11;
250 V2 = V2 + V21;
251 V3 = V3 + V31;
252
253 vs = bt_splat3_ps(vs, 0);
254 // s ready
255 V1 = V1 * vs;
256 V2 = V2 * vs;
257 V3 = V3 * vs;
258
259 V1 = V1 + v1000;
260 V2 = V2 + v0100;
261 V3 = V3 + v0010;
262
263 m_el[0] = V1;
264 m_el[1] = V2;
265 m_el[2] = V3;
266 #else
267 btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
268 btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
269 btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
270 btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
271 setValue(
272 btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
273 xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
274 xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
275 #endif
276 }
277
278
284 void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
285 {
286 setEulerZYX(roll, pitch, yaw);
287 }
288
298 void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) {
300 btScalar ci ( btCos(eulerX));
301 btScalar cj ( btCos(eulerY));
302 btScalar ch ( btCos(eulerZ));
303 btScalar si ( btSin(eulerX));
304 btScalar sj ( btSin(eulerY));
305 btScalar sh ( btSin(eulerZ));
306 btScalar cc = ci * ch;
307 btScalar cs = ci * sh;
308 btScalar sc = si * ch;
309 btScalar ss = si * sh;
310
311 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
312 cj * sh, sj * ss + cc, sj * cs - sc,
313 -sj, cj * si, cj * ci);
314 }
315
318 {
319#if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
320 m_el[0] = v1000;
321 m_el[1] = v0100;
322 m_el[2] = v0010;
323#else
324 setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
325 btScalar(0.0), btScalar(1.0), btScalar(0.0),
326 btScalar(0.0), btScalar(0.0), btScalar(1.0));
327#endif
328 }
329
330 static const btMatrix3x3& getIdentity()
331 {
332#if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
333 static const btMatrix3x3
334 identityMatrix(v1000, v0100, v0010);
335#else
336 static const btMatrix3x3
337 identityMatrix(
338 btScalar(1.0), btScalar(0.0), btScalar(0.0),
339 btScalar(0.0), btScalar(1.0), btScalar(0.0),
340 btScalar(0.0), btScalar(0.0), btScalar(1.0));
341#endif
342 return identityMatrix;
343 }
344
348 {
349#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
350 __m128 v0 = m_el[0].mVec128;
351 __m128 v1 = m_el[1].mVec128;
352 __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
353 __m128 *vm = (__m128 *)m;
354 __m128 vT;
355
356 v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
357
358 vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
359 v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
360
361 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
362 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
363 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
364
365 vm[0] = v0;
366 vm[1] = v1;
367 vm[2] = v2;
368#elif defined(BT_USE_NEON)
369 // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
370 static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
371 float32x4_t *vm = (float32x4_t *)m;
372 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
373 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
374 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
375 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
376 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
377 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
378
379 vm[0] = v0;
380 vm[1] = v1;
381 vm[2] = v2;
382#else
383 m[0] = btScalar(m_el[0].x());
384 m[1] = btScalar(m_el[1].x());
385 m[2] = btScalar(m_el[2].x());
386 m[3] = btScalar(0.0);
387 m[4] = btScalar(m_el[0].y());
388 m[5] = btScalar(m_el[1].y());
389 m[6] = btScalar(m_el[2].y());
390 m[7] = btScalar(0.0);
391 m[8] = btScalar(m_el[0].z());
392 m[9] = btScalar(m_el[1].z());
393 m[10] = btScalar(m_el[2].z());
394 m[11] = btScalar(0.0);
395#endif
396 }
397
401 {
402#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
403 btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
404 btScalar s, x;
405
406 union {
407 btSimdFloat4 vec;
408 btScalar f[4];
409 } temp;
410
411 if (trace > btScalar(0.0))
412 {
413 x = trace + btScalar(1.0);
414
415 temp.f[0]=m_el[2].y() - m_el[1].z();
416 temp.f[1]=m_el[0].z() - m_el[2].x();
417 temp.f[2]=m_el[1].x() - m_el[0].y();
418 temp.f[3]=x;
419 //temp.f[3]= s * btScalar(0.5);
420 }
421 else
422 {
423 int i, j, k;
424 if(m_el[0].x() < m_el[1].y())
425 {
426 if( m_el[1].y() < m_el[2].z() )
427 { i = 2; j = 0; k = 1; }
428 else
429 { i = 1; j = 2; k = 0; }
430 }
431 else
432 {
433 if( m_el[0].x() < m_el[2].z())
434 { i = 2; j = 0; k = 1; }
435 else
436 { i = 0; j = 1; k = 2; }
437 }
438
439 x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
440
441 temp.f[3] = (m_el[k][j] - m_el[j][k]);
442 temp.f[j] = (m_el[j][i] + m_el[i][j]);
443 temp.f[k] = (m_el[k][i] + m_el[i][k]);
444 temp.f[i] = x;
445 //temp.f[i] = s * btScalar(0.5);
446 }
447
448 s = btSqrt(x);
449 q.set128(temp.vec);
450 s = btScalar(0.5) / s;
451
452 q *= s;
453#else
454 btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
455
456 btScalar temp[4];
457
458 if (trace > btScalar(0.0))
459 {
460 btScalar s = btSqrt(trace + btScalar(1.0));
461 temp[3]=(s * btScalar(0.5));
462 s = btScalar(0.5) / s;
463
464 temp[0]=((m_el[2].y() - m_el[1].z()) * s);
465 temp[1]=((m_el[0].z() - m_el[2].x()) * s);
466 temp[2]=((m_el[1].x() - m_el[0].y()) * s);
467 }
468 else
469 {
470 int i = m_el[0].x() < m_el[1].y() ?
471 (m_el[1].y() < m_el[2].z() ? 2 : 1) :
472 (m_el[0].x() < m_el[2].z() ? 2 : 0);
473 int j = (i + 1) % 3;
474 int k = (i + 2) % 3;
475
476 btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
477 temp[i] = s * btScalar(0.5);
478 s = btScalar(0.5) / s;
479
480 temp[3] = (m_el[k][j] - m_el[j][k]) * s;
481 temp[j] = (m_el[j][i] + m_el[i][j]) * s;
482 temp[k] = (m_el[k][i] + m_el[i][k]) * s;
483 }
484 q.setValue(temp[0],temp[1],temp[2],temp[3]);
485#endif
486 }
487
492 void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
493 {
494
495 // first use the normal calculus
496 yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
497 pitch = btScalar(btAsin(-m_el[2].x()));
498 roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
499
500 // on pitch = +/-HalfPI
501 if (btFabs(pitch)==SIMD_HALF_PI)
502 {
503 if (yaw>0)
504 yaw-=SIMD_PI;
505 else
506 yaw+=SIMD_PI;
507
508 if (roll>0)
509 roll-=SIMD_PI;
510 else
511 roll+=SIMD_PI;
512 }
513 };
514
515
521 void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
522 {
523 struct Euler
524 {
525 btScalar yaw;
526 btScalar pitch;
527 btScalar roll;
528 };
529
530 Euler euler_out;
531 Euler euler_out2; //second solution
532 //get the pointer to the raw data
533
534 // Check that pitch is not at a singularity
535 if (btFabs(m_el[2].x()) >= 1)
536 {
537 euler_out.yaw = 0;
538 euler_out2.yaw = 0;
539
540 // From difference of angles formula
541 btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
542 if (m_el[2].x() > 0) //gimbal locked up
543 {
544 euler_out.pitch = SIMD_PI / btScalar(2.0);
545 euler_out2.pitch = SIMD_PI / btScalar(2.0);
546 euler_out.roll = euler_out.pitch + delta;
547 euler_out2.roll = euler_out.pitch + delta;
548 }
549 else // gimbal locked down
550 {
551 euler_out.pitch = -SIMD_PI / btScalar(2.0);
552 euler_out2.pitch = -SIMD_PI / btScalar(2.0);
553 euler_out.roll = -euler_out.pitch + delta;
554 euler_out2.roll = -euler_out.pitch + delta;
555 }
556 }
557 else
558 {
559 euler_out.pitch = - btAsin(m_el[2].x());
560 euler_out2.pitch = SIMD_PI - euler_out.pitch;
561
562 euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch),
563 m_el[2].z()/btCos(euler_out.pitch));
564 euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch),
565 m_el[2].z()/btCos(euler_out2.pitch));
566
567 euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch),
568 m_el[0].x()/btCos(euler_out.pitch));
569 euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch),
570 m_el[0].x()/btCos(euler_out2.pitch));
571 }
572
573 if (solution_number == 1)
574 {
575 yaw = euler_out.yaw;
576 pitch = euler_out.pitch;
577 roll = euler_out.roll;
578 }
579 else
580 {
581 yaw = euler_out2.yaw;
582 pitch = euler_out2.pitch;
583 roll = euler_out2.roll;
584 }
585 }
586
591 {
592#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
593 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
594#else
595 return btMatrix3x3(
596 m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
597 m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
598 m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
599#endif
600 }
601
603 btScalar determinant() const;
605 btMatrix3x3 adjoint() const;
607 btMatrix3x3 absolute() const;
609 btMatrix3x3 transpose() const;
611 btMatrix3x3 inverse() const;
612
617 {
618 btVector3 col1 = getColumn(0);
619 btVector3 col2 = getColumn(1);
620 btVector3 col3 = getColumn(2);
621
622 btScalar det = btDot(col1, btCross(col2, col3));
623 if (btFabs(det)>SIMD_EPSILON)
624 {
625 det = 1.0f / det;
626 }
627 btVector3 x;
628 x[0] = det * btDot(b, btCross(col2, col3));
629 x[1] = det * btDot(col1, btCross(b, col3));
630 x[2] = det * btDot(col1, btCross(col2, b));
631 return x;
632 }
633
634 btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
635 btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
636
638 {
639 return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
640 }
642 {
643 return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
644 }
646 {
647 return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
648 }
649
652 SIMD_FORCE_INLINE void extractRotation(btQuaternion &q,btScalar tolerance = 1.0e-9, int maxIter=100)
653 {
654 int iter =0;
655 btScalar w;
656 const btMatrix3x3& A=*this;
657 for(iter = 0; iter < maxIter; iter++)
658 {
659 btMatrix3x3 R(q);
660 btVector3 omega = (R.getColumn(0).cross(A.getColumn(0)) + R.getColumn(1).cross(A.getColumn(1))
661 + R.getColumn(2).cross(A.getColumn(2))
662 ) * (btScalar(1.0) / btFabs(R.getColumn(0).dot(A.getColumn(0)) + R.getColumn
663 (1).dot(A.getColumn(1)) + R.getColumn(2).dot(A.getColumn(2))) +
664 tolerance);
665 w = omega.norm();
666 if(w < tolerance)
667 break;
668 q = btQuaternion(btVector3((btScalar(1.0) / w) * omega),w) *
669 q;
670 q.normalize();
671 }
672 }
673
674
675
682 void diagonalize(btMatrix3x3& rot, btScalar tolerance = 1.0e-9, int maxIter=100)
683 {
684 btQuaternion r;
686 extractRotation(r,tolerance,maxIter);
687 rot.setRotation(r);
688 btMatrix3x3 rotInv = btMatrix3x3(r.inverse());
689 btMatrix3x3 old = *this;
690 setValue(old.tdotx( rotInv[0]), old.tdoty( rotInv[0]), old.tdotz( rotInv[0]),
691 old.tdotx( rotInv[1]), old.tdoty( rotInv[1]), old.tdotz( rotInv[1]),
692 old.tdotx( rotInv[2]), old.tdoty( rotInv[2]), old.tdotz( rotInv[2]));
693 }
694
695
696
697
705 btScalar cofac(int r1, int c1, int r2, int c2) const
706 {
707 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
708 }
709
710 void serialize(struct btMatrix3x3Data& dataOut) const;
711
712 void serializeFloat(struct btMatrix3x3FloatData& dataOut) const;
713
714 void deSerialize(const struct btMatrix3x3Data& dataIn);
715
716 void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
717
718 void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
719
720};
721
722
725{
726#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
727 __m128 rv00, rv01, rv02;
728 __m128 rv10, rv11, rv12;
729 __m128 rv20, rv21, rv22;
730 __m128 mv0, mv1, mv2;
731
732 rv02 = m_el[0].mVec128;
733 rv12 = m_el[1].mVec128;
734 rv22 = m_el[2].mVec128;
735
736 mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
737 mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
738 mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
739
740 // rv0
741 rv00 = bt_splat_ps(rv02, 0);
742 rv01 = bt_splat_ps(rv02, 1);
743 rv02 = bt_splat_ps(rv02, 2);
744
745 rv00 = _mm_mul_ps(rv00, mv0);
746 rv01 = _mm_mul_ps(rv01, mv1);
747 rv02 = _mm_mul_ps(rv02, mv2);
748
749 // rv1
750 rv10 = bt_splat_ps(rv12, 0);
751 rv11 = bt_splat_ps(rv12, 1);
752 rv12 = bt_splat_ps(rv12, 2);
753
754 rv10 = _mm_mul_ps(rv10, mv0);
755 rv11 = _mm_mul_ps(rv11, mv1);
756 rv12 = _mm_mul_ps(rv12, mv2);
757
758 // rv2
759 rv20 = bt_splat_ps(rv22, 0);
760 rv21 = bt_splat_ps(rv22, 1);
761 rv22 = bt_splat_ps(rv22, 2);
762
763 rv20 = _mm_mul_ps(rv20, mv0);
764 rv21 = _mm_mul_ps(rv21, mv1);
765 rv22 = _mm_mul_ps(rv22, mv2);
766
767 rv00 = _mm_add_ps(rv00, rv01);
768 rv10 = _mm_add_ps(rv10, rv11);
769 rv20 = _mm_add_ps(rv20, rv21);
770
771 m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
772 m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
773 m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
774
775#elif defined(BT_USE_NEON)
776
777 float32x4_t rv0, rv1, rv2;
778 float32x4_t v0, v1, v2;
779 float32x4_t mv0, mv1, mv2;
780
781 v0 = m_el[0].mVec128;
782 v1 = m_el[1].mVec128;
783 v2 = m_el[2].mVec128;
784
785 mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
786 mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
787 mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
788
789 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
790 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
791 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
792
793 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
794 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
795 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
796
797 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
798 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
799 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
800
801 m_el[0].mVec128 = rv0;
802 m_el[1].mVec128 = rv1;
803 m_el[2].mVec128 = rv2;
804#else
805 setValue(
806 m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
807 m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
808 m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
809#endif
810 return *this;
811}
812
815{
816#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
817 m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
818 m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
819 m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
820#else
821 setValue(
822 m_el[0][0]+m.m_el[0][0],
823 m_el[0][1]+m.m_el[0][1],
824 m_el[0][2]+m.m_el[0][2],
825 m_el[1][0]+m.m_el[1][0],
826 m_el[1][1]+m.m_el[1][1],
827 m_el[1][2]+m.m_el[1][2],
828 m_el[2][0]+m.m_el[2][0],
829 m_el[2][1]+m.m_el[2][1],
830 m_el[2][2]+m.m_el[2][2]);
831#endif
832 return *this;
833}
834
836operator*(const btMatrix3x3& m, const btScalar & k)
837{
838#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
839 __m128 vk = bt_splat_ps(_mm_load_ss((float *)&k), 0x80);
840 return btMatrix3x3(
841 _mm_mul_ps(m[0].mVec128, vk),
842 _mm_mul_ps(m[1].mVec128, vk),
843 _mm_mul_ps(m[2].mVec128, vk));
844#elif defined(BT_USE_NEON)
845 return btMatrix3x3(
846 vmulq_n_f32(m[0].mVec128, k),
847 vmulq_n_f32(m[1].mVec128, k),
848 vmulq_n_f32(m[2].mVec128, k));
849#else
850 return btMatrix3x3(
851 m[0].x()*k,m[0].y()*k,m[0].z()*k,
852 m[1].x()*k,m[1].y()*k,m[1].z()*k,
853 m[2].x()*k,m[2].y()*k,m[2].z()*k);
854#endif
855}
856
858operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
859{
860#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
861 return btMatrix3x3(
862 m1[0].mVec128 + m2[0].mVec128,
863 m1[1].mVec128 + m2[1].mVec128,
864 m1[2].mVec128 + m2[2].mVec128);
865#else
866 return btMatrix3x3(
867 m1[0][0]+m2[0][0],
868 m1[0][1]+m2[0][1],
869 m1[0][2]+m2[0][2],
870
871 m1[1][0]+m2[1][0],
872 m1[1][1]+m2[1][1],
873 m1[1][2]+m2[1][2],
874
875 m1[2][0]+m2[2][0],
876 m1[2][1]+m2[2][1],
877 m1[2][2]+m2[2][2]);
878#endif
879}
880
882operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
883{
884#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
885 return btMatrix3x3(
886 m1[0].mVec128 - m2[0].mVec128,
887 m1[1].mVec128 - m2[1].mVec128,
888 m1[2].mVec128 - m2[2].mVec128);
889#else
890 return btMatrix3x3(
891 m1[0][0]-m2[0][0],
892 m1[0][1]-m2[0][1],
893 m1[0][2]-m2[0][2],
894
895 m1[1][0]-m2[1][0],
896 m1[1][1]-m2[1][1],
897 m1[1][2]-m2[1][2],
898
899 m1[2][0]-m2[2][0],
900 m1[2][1]-m2[2][1],
901 m1[2][2]-m2[2][2]);
902#endif
903}
904
905
908{
909#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
910 m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
911 m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
912 m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
913#else
914 setValue(
915 m_el[0][0]-m.m_el[0][0],
916 m_el[0][1]-m.m_el[0][1],
917 m_el[0][2]-m.m_el[0][2],
918 m_el[1][0]-m.m_el[1][0],
919 m_el[1][1]-m.m_el[1][1],
920 m_el[1][2]-m.m_el[1][2],
921 m_el[2][0]-m.m_el[2][0],
922 m_el[2][1]-m.m_el[2][1],
923 m_el[2][2]-m.m_el[2][2]);
924#endif
925 return *this;
926}
927
928
931{
932 return btTriple((*this)[0], (*this)[1], (*this)[2]);
933}
934
935
938{
939#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
940 return btMatrix3x3(
941 _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
942 _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
943 _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
944#elif defined(BT_USE_NEON)
945 return btMatrix3x3(
946 (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
947 (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
948 (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
949#else
950 return btMatrix3x3(
951 btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
952 btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
953 btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
954#endif
955}
956
959{
960#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
961 __m128 v0 = m_el[0].mVec128;
962 __m128 v1 = m_el[1].mVec128;
963 __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
964 __m128 vT;
965
966 v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
967
968 vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
969 v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
970
971 v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
972 v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
973 v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
974
975
976 return btMatrix3x3( v0, v1, v2 );
977#elif defined(BT_USE_NEON)
978 // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
979 static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
980 float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
981 float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
982 float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
983 float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
984 float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
985 float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
986 return btMatrix3x3( v0, v1, v2 );
987#else
988 return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
989 m_el[0].y(), m_el[1].y(), m_el[2].y(),
990 m_el[0].z(), m_el[1].z(), m_el[2].z());
991#endif
992}
993
996{
997 return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
998 cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
999 cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1000}
1001
1004{
1005 btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1006 btScalar det = (*this)[0].dot(co);
1007 //btFullAssert(det != btScalar(0.0));
1008 btAssert(det != btScalar(0.0));
1009 btScalar s = btScalar(1.0) / det;
1010 return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1011 co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1012 co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1013}
1014
1017{
1018#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1019 // zeros w
1020// static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1021 __m128 row = m_el[0].mVec128;
1022 __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, btvFFF0fMask );
1023 __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, btvFFF0fMask);
1024 __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, btvFFF0fMask );
1025 __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1026 __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1027 __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1028 row = m_el[1].mVec128;
1029 r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1030 r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1031 r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1032 row = m_el[2].mVec128;
1033 r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1034 r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1035 r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1036 return btMatrix3x3( r0, r1, r2 );
1037
1038#elif defined BT_USE_NEON
1039 // zeros w
1040 static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
1041 float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
1042 float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
1043 float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
1044 float32x4_t row = m_el[0].mVec128;
1045 float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1046 float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1047 float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1048 row = m_el[1].mVec128;
1049 r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1050 r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1051 r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1052 row = m_el[2].mVec128;
1053 r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1054 r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1055 r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1056 return btMatrix3x3( r0, r1, r2 );
1057#else
1058 return btMatrix3x3(
1059 m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1060 m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1061 m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1062 m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1063 m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1064 m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1065 m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1066 m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1067 m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1068#endif
1069}
1070
1073{
1074#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1075 __m128 a0 = m_el[0].mVec128;
1076 __m128 a1 = m_el[1].mVec128;
1077 __m128 a2 = m_el[2].mVec128;
1078
1079 btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1080 __m128 mx = mT[0].mVec128;
1081 __m128 my = mT[1].mVec128;
1082 __m128 mz = mT[2].mVec128;
1083
1084 __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1085 __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1086 __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1087 r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1088 r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1089 r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1090 r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1091 r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1092 r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1093 return btMatrix3x3( r0, r1, r2);
1094
1095#elif defined BT_USE_NEON
1096 float32x4_t a0 = m_el[0].mVec128;
1097 float32x4_t a1 = m_el[1].mVec128;
1098 float32x4_t a2 = m_el[2].mVec128;
1099
1100 btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1101 float32x4_t mx = mT[0].mVec128;
1102 float32x4_t my = mT[1].mVec128;
1103 float32x4_t mz = mT[2].mVec128;
1104
1105 float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1106 float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1107 float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1108 r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1109 r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1110 r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1111 r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1112 r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1113 r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1114 return btMatrix3x3( r0, r1, r2 );
1115
1116#else
1117 return btMatrix3x3(
1118 m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1119 m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1120 m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1121#endif
1122}
1123
1125operator*(const btMatrix3x3& m, const btVector3& v)
1126{
1127#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
1128 return v.dot3(m[0], m[1], m[2]);
1129#else
1130 return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1131#endif
1132}
1133
1134
1137{
1138#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1139
1140 const __m128 vv = v.mVec128;
1141
1142 __m128 c0 = bt_splat_ps( vv, 0);
1143 __m128 c1 = bt_splat_ps( vv, 1);
1144 __m128 c2 = bt_splat_ps( vv, 2);
1145
1146 c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1147 c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1148 c0 = _mm_add_ps(c0, c1);
1149 c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1150
1151 return btVector3(_mm_add_ps(c0, c2));
1152#elif defined(BT_USE_NEON)
1153 const float32x4_t vv = v.mVec128;
1154 const float32x2_t vlo = vget_low_f32(vv);
1155 const float32x2_t vhi = vget_high_f32(vv);
1156
1157 float32x4_t c0, c1, c2;
1158
1159 c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1160 c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1161 c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1162
1163 c0 = vmulq_lane_f32(c0, vlo, 0);
1164 c1 = vmulq_lane_f32(c1, vlo, 1);
1165 c2 = vmulq_lane_f32(c2, vhi, 0);
1166 c0 = vaddq_f32(c0, c1);
1167 c0 = vaddq_f32(c0, c2);
1168
1169 return btVector3(c0);
1170#else
1171 return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1172#endif
1173}
1174
1176operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1177{
1178#if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1179
1180 __m128 m10 = m1[0].mVec128;
1181 __m128 m11 = m1[1].mVec128;
1182 __m128 m12 = m1[2].mVec128;
1183
1184 __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1185
1186 __m128 c0 = bt_splat_ps( m10, 0);
1187 __m128 c1 = bt_splat_ps( m11, 0);
1188 __m128 c2 = bt_splat_ps( m12, 0);
1189
1190 c0 = _mm_mul_ps(c0, m2v);
1191 c1 = _mm_mul_ps(c1, m2v);
1192 c2 = _mm_mul_ps(c2, m2v);
1193
1194 m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1195
1196 __m128 c0_1 = bt_splat_ps( m10, 1);
1197 __m128 c1_1 = bt_splat_ps( m11, 1);
1198 __m128 c2_1 = bt_splat_ps( m12, 1);
1199
1200 c0_1 = _mm_mul_ps(c0_1, m2v);
1201 c1_1 = _mm_mul_ps(c1_1, m2v);
1202 c2_1 = _mm_mul_ps(c2_1, m2v);
1203
1204 m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1205
1206 c0 = _mm_add_ps(c0, c0_1);
1207 c1 = _mm_add_ps(c1, c1_1);
1208 c2 = _mm_add_ps(c2, c2_1);
1209
1210 m10 = bt_splat_ps( m10, 2);
1211 m11 = bt_splat_ps( m11, 2);
1212 m12 = bt_splat_ps( m12, 2);
1213
1214 m10 = _mm_mul_ps(m10, m2v);
1215 m11 = _mm_mul_ps(m11, m2v);
1216 m12 = _mm_mul_ps(m12, m2v);
1217
1218 c0 = _mm_add_ps(c0, m10);
1219 c1 = _mm_add_ps(c1, m11);
1220 c2 = _mm_add_ps(c2, m12);
1221
1222 return btMatrix3x3(c0, c1, c2);
1223
1224#elif defined(BT_USE_NEON)
1225
1226 float32x4_t rv0, rv1, rv2;
1227 float32x4_t v0, v1, v2;
1228 float32x4_t mv0, mv1, mv2;
1229
1230 v0 = m1[0].mVec128;
1231 v1 = m1[1].mVec128;
1232 v2 = m1[2].mVec128;
1233
1234 mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1235 mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1236 mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1237
1238 rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1239 rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1240 rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1241
1242 rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1243 rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1244 rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1245
1246 rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1247 rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1248 rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1249
1250 return btMatrix3x3(rv0, rv1, rv2);
1251
1252#else
1253 return btMatrix3x3(
1254 m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
1255 m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
1256 m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
1257#endif
1258}
1259
1260/*
1261SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1262return btMatrix3x3(
1263m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1264m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1265m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1266m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1267m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1268m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1269m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1270m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1271m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1272}
1273*/
1274
1278{
1279#if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1280
1281 __m128 c0, c1, c2;
1282
1283 c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1284 c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1285 c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1286
1287 c0 = _mm_and_ps(c0, c1);
1288 c0 = _mm_and_ps(c0, c2);
1289
1290 int m = _mm_movemask_ps((__m128)c0);
1291 return (0x7 == (m & 0x7));
1292
1293#else
1294 return
1295 ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1296 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1297 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1298#endif
1299}
1300
1303{
1305};
1306
1309{
1311};
1312
1313
1314
1315
1317{
1318 for (int i=0;i<3;i++)
1319 m_el[i].serialize(dataOut.m_el[i]);
1320}
1321
1323{
1324 for (int i=0;i<3;i++)
1325 m_el[i].serializeFloat(dataOut.m_el[i]);
1326}
1327
1328
1330{
1331 for (int i=0;i<3;i++)
1332 m_el[i].deSerialize(dataIn.m_el[i]);
1333}
1334
1336{
1337 for (int i=0;i<3;i++)
1338 m_el[i].deSerializeFloat(dataIn.m_el[i]);
1339}
1340
1342{
1343 for (int i=0;i<3;i++)
1344 m_el[i].deSerializeDouble(dataIn.m_el[i]);
1345}
1346
1347#endif //BT_MATRIX3x3_H
1348
unsigned int uint32_t
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
Definition: btMatrix3x3.h:836
#define btMatrix3x3Data
Definition: btMatrix3x3.h:42
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
Definition: btMatrix3x3.h:1277
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:858
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:882
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
Definition: btQuaternion.h:878
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
Definition: btQuaternion.h:900
#define SIMD_PI
Definition: btScalar.h:504
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:292
#define ATTRIBUTE_ALIGNED16(a)
Definition: btScalar.h:82
btScalar btSqrt(btScalar y)
Definition: btScalar.h:444
btScalar btAtan2(btScalar x, btScalar y)
Definition: btScalar.h:496
btScalar btSin(btScalar x)
Definition: btScalar.h:477
btScalar btFabs(btScalar x)
Definition: btScalar.h:475
#define SIMD_FORCE_INLINE
Definition: btScalar.h:81
#define btFullAssert(x)
Definition: btScalar.h:134
btScalar btCos(btScalar x)
Definition: btScalar.h:476
#define SIMD_EPSILON
Definition: btScalar.h:521
btScalar btAsin(btScalar x)
Definition: btScalar.h:487
#define SIMD_HALF_PI
Definition: btScalar.h:506
#define btAssert(x)
Definition: btScalar.h:131
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
Definition: btVector3.h:901
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
Definition: btVector3.h:931
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
Definition: btVector3.h:937
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
Definition: btMatrix3x3.h:48
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.
Definition: btMatrix3x3.h:298
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
Definition: btMatrix3x3.h:995
btMatrix3x3 inverse() const
Return the inverse of the matrix.
Definition: btMatrix3x3.h:1003
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
Definition: btMatrix3x3.h:492
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
Definition: btMatrix3x3.h:181
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
Definition: btMatrix3x3.h:616
btMatrix3x3 transpose() const
Return the transpose of the matrix.
Definition: btMatrix3x3.h:958
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
Definition: btMatrix3x3.h:284
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
Definition: btMatrix3x3.h:907
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
Definition: btMatrix3x3.h:400
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
Definition: btMatrix3x3.h:69
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
Definition: btMatrix3x3.h:1335
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
Definition: btMatrix3x3.h:590
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
Definition: btMatrix3x3.h:122
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
Definition: btMatrix3x3.h:114
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
Definition: btMatrix3x3.h:521
static const btMatrix3x3 & getIdentity()
Definition: btMatrix3x3.h:330
btScalar tdotz(const btVector3 &v) const
Definition: btMatrix3x3.h:645
void setIdentity()
Set the matrix to the identity.
Definition: btMatrix3x3.h:317
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
Definition: btMatrix3x3.h:814
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:150
btScalar tdotx(const btVector3 &v) const
Definition: btMatrix3x3.h:637
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
Definition: btMatrix3x3.h:347
btScalar determinant() const
Return the determinant of the matrix.
Definition: btMatrix3x3.h:930
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:158
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1016
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
Definition: btMatrix3x3.h:1341
void serialize(struct btMatrix3x3Data &dataOut) const
Definition: btMatrix3x3.h:1316
void extractRotation(btQuaternion &q, btScalar tolerance=1.0e-9, int maxIter=100)
extractRotation is from "A robust method to extract the rotational part of deformations" See http://d...
Definition: btMatrix3x3.h:652
btScalar tdoty(const btVector3 &v) const
Definition: btMatrix3x3.h:641
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
Definition: btMatrix3x3.h:51
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
Definition: btMatrix3x3.h:1322
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
Definition: btMatrix3x3.h:134
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
Definition: btMatrix3x3.h:724
void deSerialize(const struct btMatrix3x3Data &dataIn)
Definition: btMatrix3x3.h:1329
void diagonalize(btMatrix3x3 &rot, btScalar tolerance=1.0e-9, int maxIter=100)
diagonalizes this matrix
Definition: btMatrix3x3.h:682
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
Definition: btMatrix3x3.h:198
btMatrix3x3()
No initializaion constructor.
Definition: btMatrix3x3.h:55
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
Definition: btMatrix3x3.h:209
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
Definition: btMatrix3x3.h:142
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
Definition: btMatrix3x3.h:705
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
Definition: btMatrix3x3.h:60
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1072
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
Definition: btMatrix3x3.h:937
const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:122
const btScalar & z() const
Return the z value.
Definition: btQuadWord.h:120
const btScalar & y() const
Return the y value.
Definition: btQuadWord.h:118
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
Definition: btQuadWord.h:152
const btScalar & x() const
Return the x value.
Definition: btQuadWord.h:116
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
Definition: btQuaternion.h:55
static const btQuaternion & getIdentity()
Definition: btQuaternion.h:595
btScalar length2() const
Return the length squared of the quaternion.
Definition: btQuaternion.h:348
btQuaternion inverse() const
Return the inverse of this quaternion.
Definition: btQuaternion.h:482
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
Definition: btQuaternion.h:369
btVector3 can be used to represent 3D points and vectors.
Definition: btVector3.h:84
const btScalar & z() const
Return the z value.
Definition: btVector3.h:591
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
Definition: btVector3.h:389
btScalar dot(const btVector3 &v) const
Return the dot product.
Definition: btVector3.h:235
btScalar norm() const
Return the norm (length) of the vector.
Definition: btVector3.h:269
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
Definition: btVector3.h:731
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Definition: btVector3.h:652
const btScalar & x() const
Return the x value.
Definition: btVector3.h:587
const btScalar & y() const
Return the y value.
Definition: btVector3.h:589
for serialization
Definition: btMatrix3x3.h:1309
btVector3DoubleData m_el[3]
Definition: btMatrix3x3.h:1310
for serialization
Definition: btMatrix3x3.h:1303
btVector3FloatData m_el[3]
Definition: btMatrix3x3.h:1304