17#ifndef BT_SIMD__QUATERNION_H_
18#define BT_SIMD__QUATERNION_H_
25#ifdef BT_USE_DOUBLE_PRECISION
26#define btQuaternionData btQuaternionDoubleData
27#define btQuaternionDataName "btQuaternionDoubleData"
29#define btQuaternionData btQuaternionFloatData
30#define btQuaternionDataName "btQuaternionFloatData"
38#define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f))
42#if defined(BT_USE_SSE)
44#define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f))
45#define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f))
47#elif defined(BT_USE_NEON)
60#if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON)
70 mVec128 = rhs.mVec128;
103#ifndef BT_EULER_DEFAULT_ZYX
135 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
136 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
137 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
138 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
155 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
156 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
157 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
158 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
186#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
187 mVec128 = _mm_add_ps(mVec128, q.mVec128);
188#elif defined(BT_USE_NEON)
189 mVec128 = vaddq_f32(mVec128, q.mVec128);
203#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
204 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
205#elif defined(BT_USE_NEON)
206 mVec128 = vsubq_f32(mVec128, q.mVec128);
220#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
221 __m128 vs = _mm_load_ss(&s);
222 vs = bt_pshufd_ps(vs, 0);
223 mVec128 = _mm_mul_ps(mVec128, vs);
224#elif defined(BT_USE_NEON)
225 mVec128 = vmulq_n_f32(mVec128, s);
240#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
241 __m128 vQ2 = q.get128();
243 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
244 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
248 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
249 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
253 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
254 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
258 mVec128 = bt_splat_ps(mVec128, 3);
259 mVec128 = mVec128 * vQ2;
262 mVec128 = mVec128 - B1;
263 A1 = _mm_xor_ps(A1, vPPPM);
264 mVec128 = mVec128+ A1;
266#elif defined(BT_USE_NEON)
268 float32x4_t vQ1 = mVec128;
269 float32x4_t vQ2 = q.get128();
270 float32x4_t A0, A1, B1, A2, B2, A3, B3;
271 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
275 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
278 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
281 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
283 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
285 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
286 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
288 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
289 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
291 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
292 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
294 A3 = vcombine_f32(vQ1zx, vQ1yz);
295 B3 = vcombine_f32(vQ2yz, vQ2xz);
297 A1 = vmulq_f32(A1, B1);
298 A2 = vmulq_f32(A2, B2);
299 A3 = vmulq_f32(A3, B3);
300 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);
302 A1 = vaddq_f32(A1, A2);
303 A0 = vsubq_f32(A0, A3);
306 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
307 A0 = vaddq_f32(A0, A1);
323#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
326 vd = _mm_mul_ps(mVec128, q.mVec128);
328 __m128 t = _mm_movehl_ps(vd, vd);
329 vd = _mm_add_ps(vd, t);
330 t = _mm_shuffle_ps(vd, vd, 0x55);
331 vd = _mm_add_ss(vd, t);
333 return _mm_cvtss_f32(vd);
334#elif defined(BT_USE_NEON)
335 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
336 float32x2_t
x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
338 return vget_lane_f32(
x, 0);
371#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
374 vd = _mm_mul_ps(mVec128, mVec128);
376 __m128 t = _mm_movehl_ps(vd, vd);
377 vd = _mm_add_ps(vd, t);
378 t = _mm_shuffle_ps(vd, vd, 0x55);
379 vd = _mm_add_ss(vd, t);
381 vd = _mm_sqrt_ss(vd);
382 vd = _mm_div_ss(vOnes, vd);
383 vd = bt_pshufd_ps(vd, 0);
384 mVec128 = _mm_mul_ps(mVec128, vd);
397#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
398 __m128 vs = _mm_load_ss(&s);
399 vs = bt_pshufd_ps(vs, 0x00);
402#elif defined(BT_USE_NEON)
484#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
486#elif defined(BT_USE_NEON)
487 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
498#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
500#elif defined(BT_USE_NEON)
513#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
515#elif defined(BT_USE_NEON)
527#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
529#elif defined(BT_USE_NEON)
530 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
625#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
626 __m128 vQ1 = q1.get128();
627 __m128 vQ2 = q2.get128();
628 __m128 A0, A1, B1, A2, B2;
630 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));
631 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
635 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
636 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
640 B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
641 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
645 A0 = bt_splat_ps(vQ1, 3);
651 A1 = _mm_xor_ps(A1, vPPPM);
656#elif defined(BT_USE_NEON)
658 float32x4_t vQ1 = q1.get128();
659 float32x4_t vQ2 = q2.get128();
660 float32x4_t A0, A1, B1, A2, B2, A3, B3;
661 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
665 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
668 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
671 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
673 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
675 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
676 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
678 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
679 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
681 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
682 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
684 A3 = vcombine_f32(vQ1zx, vQ1yz);
685 B3 = vcombine_f32(vQ2yz, vQ2xz);
687 A1 = vmulq_f32(A1, B1);
688 A2 = vmulq_f32(A2, B2);
689 A3 = vmulq_f32(A3, B3);
690 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);
692 A1 = vaddq_f32(A1, A2);
693 A0 = vsubq_f32(A0, A3);
696 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
697 A0 = vaddq_f32(A0, A1);
703 q1.
w() * q2.
x() + q1.
x() * q2.
w() + q1.
y() * q2.
z() - q1.
z() * q2.
y(),
704 q1.
w() * q2.
y() + q1.
y() * q2.
w() + q1.
z() * q2.
x() - q1.
x() * q2.
z(),
705 q1.
w() * q2.
z() + q1.
z() * q2.
w() + q1.
x() * q2.
y() - q1.
y() * q2.
x(),
706 q1.
w() * q2.
w() - q1.
x() * q2.
x() - q1.
y() * q2.
y() - q1.
z() * q2.
z());
713#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
714 __m128 vQ1 = q.get128();
715 __m128 vQ2 = w.get128();
716 __m128 A1, B1, A2, B2, A3, B3;
718 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
719 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
723 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
724 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
728 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
729 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
734 A1 = _mm_xor_ps(A1, vPPPM);
739#elif defined(BT_USE_NEON)
741 float32x4_t vQ1 = q.get128();
742 float32x4_t vQ2 = w.get128();
743 float32x4_t A1, B1, A2, B2, A3, B3;
744 float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
746 vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
750 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
753 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
757 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
759 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
760 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
762 A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx);
763 B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);
765 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
766 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
768 A3 = vcombine_f32(vQ1zx, vQ1yz);
769 B3 = vcombine_f32(vQ2yz, vQ2xz);
771 A1 = vmulq_f32(A1, B1);
772 A2 = vmulq_f32(A2, B2);
773 A3 = vmulq_f32(A3, B3);
775 A1 = vaddq_f32(A1, A2);
778 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
780 A1 = vsubq_f32(A1, A3);
786 q.
w() * w.
x() + q.
y() * w.
z() - q.
z() * w.
y(),
787 q.
w() * w.
y() + q.
z() * w.
x() - q.
x() * w.
z(),
788 q.
w() * w.
z() + q.
x() * w.
y() - q.
y() * w.
x(),
789 -q.
x() * w.
x() - q.
y() * w.
y() - q.
z() * w.
z());
796#if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
797 __m128 vQ1 = w.get128();
798 __m128 vQ2 = q.get128();
799 __m128 A1, B1, A2, B2, A3, B3;
801 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));
802 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
806 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
807 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
811 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
812 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
817 A1 = _mm_xor_ps(A1, vPPPM);
822#elif defined(BT_USE_NEON)
824 float32x4_t vQ1 = w.get128();
825 float32x4_t vQ2 = q.get128();
826 float32x4_t A1, B1, A2, B2, A3, B3;
827 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
832 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
835 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
838 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
840 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
842 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
843 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
845 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
846 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
848 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
849 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
851 A3 = vcombine_f32(vQ1zx, vQ1yz);
852 B3 = vcombine_f32(vQ2yz, vQ2xz);
854 A1 = vmulq_f32(A1, B1);
855 A2 = vmulq_f32(A2, B2);
856 A3 = vmulq_f32(A3, B3);
858 A1 = vaddq_f32(A1, A2);
861 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
863 A1 = vsubq_f32(A1, A3);
869 +w.
x() * q.
w() + w.
y() * q.
z() - w.
z() * q.
y(),
870 +w.
y() * q.
w() + w.
z() * q.
x() - w.
x() * q.
z(),
871 +w.
z() * q.
w() + w.
x() * q.
y() - w.
y() * q.
x(),
872 -w.
x() * q.
x() - w.
y() * q.
y() - w.
z() * q.
z());
913 return q1.
slerp(q2, t);
921#if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
922 return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
923#elif defined(BT_USE_NEON)
924 return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
974 for (
int i=0;i<4;i++)
980 for (
int i=0;i<4;i++)
988 for (
int i=0;i<4;i++)
994 for (
int i=0;i<4;i++)
1002 for (
int i=0;i<4;i++)
1008 for (
int i=0;i<4;i++)
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
btQuaternion slerp(const btQuaternion &q1, const btQuaternion &q2, const btScalar &t)
Return the result of spherical linear interpolation betwen two quaternions.
btQuaternion shortestArcQuat(const btVector3 &v0, const btVector3 &v1)
btQuaternion shortestArcQuatNormalize2(btVector3 &v0, btVector3 &v1)
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
btVector3 quatRotate(const btQuaternion &rotation, const btVector3 &v)
btQuaternion operator*(const btQuaternion &q1, const btQuaternion &q2)
Return the product of two quaternions.
btScalar length(const btQuaternion &q)
Return the length of a quaternion.
btScalar btAngle(const btQuaternion &q1, const btQuaternion &q2)
Return the angle between two quaternions.
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
#define ATTRIBUTE_ALIGNED16(a)
btScalar btSqrt(btScalar y)
btScalar btAtan2(btScalar x, btScalar y)
btScalar btSin(btScalar x)
btScalar btFabs(btScalar x)
#define SIMD_FORCE_INLINE
btScalar btCos(btScalar x)
btScalar btAcos(btScalar x)
btScalar btAsin(btScalar x)
static T sum(const btAlignedObjectArray< T > &items)
void btPlaneSpace1(const T &n, T &p, T &q)
The btQuadWord class is base class for btVector3 and btQuaternion.
const btScalar & w() const
Return the w value.
const btScalar & getY() const
Return the y value.
const btScalar & getX() const
Return the x value.
const btScalar & getZ() const
Return the z value.
const btScalar & z() const
Return the z value.
const btScalar & y() const
Return the y value.
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
const btScalar & x() const
Return the x value.
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
btQuaternion operator-(const btQuaternion &q2) const
Return the difference between this quaternion and the other.
btQuaternion operator/(const btScalar &s) const
Return an inversely scaled versionof this quaternion.
btQuaternion slerp(const btQuaternion &q, const btScalar &t) const
Return the quaternion which is the result of Spherical Linear Interpolation between this and the othe...
btQuaternion operator*(const btScalar &s) const
Return a scaled version of this quaternion.
void getEulerZYX(btScalar &yawZ, btScalar &pitchY, btScalar &rollX) const
Get the euler angles from this quaternion.
btScalar angleShortestPath(const btQuaternion &q) const
Return the angle between this quaternion and the other along the shortest path.
void deSerializeFloat(const struct btQuaternionFloatData &dataIn)
static const btQuaternion & getIdentity()
btQuaternion & operator*=(const btQuaternion &q)
Multiply this quaternion by q on the right.
btScalar length() const
Return the length of the quaternion.
btQuaternion(const btVector3 &_axis, const btScalar &_angle)
Axis angle Constructor.
btQuaternion farthest(const btQuaternion &qd) const
void deSerialize(const struct btQuaternionData &dataIn)
btQuaternion()
No initialization constructor.
const btScalar & getW() const
btScalar dot(const btQuaternion &q) const
Return the dot product between this quaternion and another.
void serialize(struct btQuaternionData &dataOut) const
void deSerializeDouble(const struct btQuaternionDoubleData &dataIn)
btScalar getAngle() const
Return the angle [0, 2Pi] of rotation represented by this quaternion.
void serializeDouble(struct btQuaternionDoubleData &dataOut) const
btQuaternion operator+(const btQuaternion &q2) const
Return the sum of this quaternion and the other.
btQuaternion(const btScalar &_x, const btScalar &_y, const btScalar &_z, const btScalar &_w)
Constructor from scalars.
btScalar angle(const btQuaternion &q) const
Return the half angle between this quaternion and the other.
btQuaternion nearest(const btQuaternion &qd) const
btQuaternion & operator*=(const btScalar &s)
Scale this quaternion.
btQuaternion(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Constructor from Euler angles.
btScalar length2() const
Return the length squared of the quaternion.
btQuaternion normalized() const
Return a normalized version of this quaternion.
btQuaternion operator-() const
Return the negative of this quaternion This simply negates each element.
btQuaternion & safeNormalize()
btQuaternion inverse() const
Return the inverse of this quaternion.
btQuaternion & operator/=(const btScalar &s)
Inversely scale this quaternion.
btScalar getAngleShortestPath() const
Return the angle [0, Pi] of rotation represented by this quaternion along the shortest path.
btQuaternion & operator-=(const btQuaternion &q)
Subtract out a quaternion.
btVector3 getAxis() const
Return the axis of the rotation represented by this quaternion.
void setRotation(const btVector3 &axis, const btScalar &_angle)
Set the rotation using axis angle notation.
void setEulerZYX(const btScalar &yawZ, const btScalar &pitchY, const btScalar &rollX)
Set the quaternion using euler angles.
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
void setEuler(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the quaternion using Euler angles.
btQuaternion & operator+=(const btQuaternion &q)
Add two quaternions.
void serializeFloat(struct btQuaternionFloatData &dataOut) const
btVector3 can be used to represent 3D points and vectors.
const btScalar & getZ() const
Return the z value.
const btScalar & z() const
Return the z value.
btScalar length() const
Return the length of the vector.
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
btScalar dot(const btVector3 &v) const
Return the dot product.
const btScalar & getY() const
Return the y value.
const btScalar & x() const
Return the x value.
btVector3 & normalize()
Normalize this vector x^2 + y^2 + z^2 = 1.
const btScalar & getX() const
Return the x value.
const btScalar & y() const
Return the y value.