the game where you go into mines and start crafting! but for consoles (forked directly from smartcmd's github)
1//-------------------------------------------------------------------------------------
2// DirectXMathMisc.inl -- SIMD C++ Math library
3//
4// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
5// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
6// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
7// PARTICULAR PURPOSE.
8//
9// Copyright (c) Microsoft Corporation. All rights reserved.
10//-------------------------------------------------------------------------------------
11
12#ifdef _MSC_VER
13#pragma once
14#endif
15
16/****************************************************************************
17 *
18 * Quaternion
19 *
20 ****************************************************************************/
21
22//------------------------------------------------------------------------------
23// Comparison operations
24//------------------------------------------------------------------------------
25
26//------------------------------------------------------------------------------
27
28inline bool XMQuaternionEqual
29(
30 FXMVECTOR Q1,
31 FXMVECTOR Q2
32)
33{
34 return XMVector4Equal(Q1, Q2);
35}
36
37//------------------------------------------------------------------------------
38
39inline bool XMQuaternionNotEqual
40(
41 FXMVECTOR Q1,
42 FXMVECTOR Q2
43)
44{
45 return XMVector4NotEqual(Q1, Q2);
46}
47
48//------------------------------------------------------------------------------
49
50inline bool XMQuaternionIsNaN
51(
52 FXMVECTOR Q
53)
54{
55 return XMVector4IsNaN(Q);
56}
57
58//------------------------------------------------------------------------------
59
60inline bool XMQuaternionIsInfinite
61(
62 FXMVECTOR Q
63)
64{
65 return XMVector4IsInfinite(Q);
66}
67
68//------------------------------------------------------------------------------
69
70inline bool XMQuaternionIsIdentity
71(
72 FXMVECTOR Q
73)
74{
75#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
76 return XMVector4Equal(Q, g_XMIdentityR3.v);
77#else // _XM_VMX128_INTRINSICS_
78#endif // _XM_VMX128_INTRINSICS_
79}
80
81//------------------------------------------------------------------------------
82// Computation operations
83//------------------------------------------------------------------------------
84
85//------------------------------------------------------------------------------
86
87inline XMVECTOR XMQuaternionDot
88(
89 FXMVECTOR Q1,
90 FXMVECTOR Q2
91)
92{
93 return XMVector4Dot(Q1, Q2);
94}
95
96//------------------------------------------------------------------------------
97
98inline XMVECTOR XMQuaternionMultiply
99(
100 FXMVECTOR Q1,
101 FXMVECTOR Q2
102)
103{
104 // Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 followed by the rotation Q2)
105
106 // [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y),
107 // (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x),
108 // (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w),
109 // (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ]
110
111#if defined(_XM_NO_INTRINSICS_)
112 XMVECTOR Result = {
113 (Q2.vector4_f32[3] * Q1.vector4_f32[0]) + (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - (Q2.vector4_f32[2] * Q1.vector4_f32[1]),
114 (Q2.vector4_f32[3] * Q1.vector4_f32[1]) - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + (Q2.vector4_f32[2] * Q1.vector4_f32[0]),
115 (Q2.vector4_f32[3] * Q1.vector4_f32[2]) + (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + (Q2.vector4_f32[2] * Q1.vector4_f32[3]),
116 (Q2.vector4_f32[3] * Q1.vector4_f32[3]) - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - (Q2.vector4_f32[2] * Q1.vector4_f32[2]) };
117 return Result;
118#elif defined(_XM_ARM_NEON_INTRINSICS_)
119 static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
120 static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
121 static const XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
122
123 __n64 Q2L = vget_low_f32(Q2);
124 __n64 Q2H = vget_high_f32(Q2);
125
126 __n128 Q2X = vdupq_lane_f32( Q2L, 0 );
127 __n128 Q2Y = vdupq_lane_f32( Q2L, 1 );
128 __n128 Q2Z = vdupq_lane_f32( Q2H, 0 );
129 __n128 vResult = vdupq_lane_f32( Q2H, 1 );
130 vResult = vmulq_f32(vResult,Q1);
131
132 // Mul by Q1WZYX
133 __n128 vTemp = vrev64q_u32(Q1);
134 vTemp = vcombine_f32( vget_high_f32(vTemp), vget_low_f32(vTemp) );
135 Q2X = vmulq_f32(Q2X,vTemp);
136 vResult = vmlaq_f32( vResult, Q2X, ControlWZYX );
137
138 // Mul by Q1ZWXY
139 vTemp = vrev64q_u32(vTemp);
140 Q2Y = vmulq_f32(Q2Y,vTemp);
141 vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY);
142
143 // Mul by Q1YXWZ
144 vTemp = vrev64q_u32(vTemp);
145 vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp));
146 Q2Z = vmulq_f32(Q2Z,vTemp);
147 vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ);
148 return vResult;
149#elif defined(_XM_SSE_INTRINSICS_)
150 static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f};
151 static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f};
152 static const XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f};
153 // Copy to SSE registers and use as few as possible for x86
154 XMVECTOR Q2X = Q2;
155 XMVECTOR Q2Y = Q2;
156 XMVECTOR Q2Z = Q2;
157 XMVECTOR vResult = Q2;
158 // Splat with one instruction
159 vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,3,3,3));
160 Q2X = XM_PERMUTE_PS(Q2X,_MM_SHUFFLE(0,0,0,0));
161 Q2Y = XM_PERMUTE_PS(Q2Y,_MM_SHUFFLE(1,1,1,1));
162 Q2Z = XM_PERMUTE_PS(Q2Z,_MM_SHUFFLE(2,2,2,2));
163 // Retire Q1 and perform Q1*Q2W
164 vResult = _mm_mul_ps(vResult,Q1);
165 XMVECTOR Q1Shuffle = Q1;
166 // Shuffle the copies of Q1
167 Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
168 // Mul by Q1WZYX
169 Q2X = _mm_mul_ps(Q2X,Q1Shuffle);
170 Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(2,3,0,1));
171 // Flip the signs on y and z
172 Q2X = _mm_mul_ps(Q2X,ControlWZYX);
173 // Mul by Q1ZWXY
174 Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle);
175 Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3));
176 // Flip the signs on z and w
177 Q2Y = _mm_mul_ps(Q2Y,ControlZWXY);
178 // Mul by Q1YXWZ
179 Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle);
180 vResult = _mm_add_ps(vResult,Q2X);
181 // Flip the signs on x and w
182 Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ);
183 Q2Y = _mm_add_ps(Q2Y,Q2Z);
184 vResult = _mm_add_ps(vResult,Q2Y);
185 return vResult;
186#else // _XM_VMX128_INTRINSICS_
187#endif // _XM_VMX128_INTRINSICS_
188}
189
190//------------------------------------------------------------------------------
191
192inline XMVECTOR XMQuaternionLengthSq
193(
194 FXMVECTOR Q
195)
196{
197 return XMVector4LengthSq(Q);
198}
199
200//------------------------------------------------------------------------------
201
202inline XMVECTOR XMQuaternionReciprocalLength
203(
204 FXMVECTOR Q
205)
206{
207 return XMVector4ReciprocalLength(Q);
208}
209
210//------------------------------------------------------------------------------
211
212inline XMVECTOR XMQuaternionLength
213(
214 FXMVECTOR Q
215)
216{
217 return XMVector4Length(Q);
218}
219
220//------------------------------------------------------------------------------
221
222inline XMVECTOR XMQuaternionNormalizeEst
223(
224 FXMVECTOR Q
225)
226{
227 return XMVector4NormalizeEst(Q);
228}
229
230//------------------------------------------------------------------------------
231
232inline XMVECTOR XMQuaternionNormalize
233(
234 FXMVECTOR Q
235)
236{
237 return XMVector4Normalize(Q);
238}
239
240//------------------------------------------------------------------------------
241
242inline XMVECTOR XMQuaternionConjugate
243(
244 FXMVECTOR Q
245)
246{
247#if defined(_XM_NO_INTRINSICS_)
248 XMVECTOR Result = {
249 -Q.vector4_f32[0],
250 -Q.vector4_f32[1],
251 -Q.vector4_f32[2],
252 Q.vector4_f32[3]
253 };
254 return Result;
255#elif defined(_XM_ARM_NEON_INTRINSICS_)
256 static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
257 return vmulq_f32(Q, NegativeOne3.v );
258#elif defined(_XM_SSE_INTRINSICS_)
259 static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f};
260 return _mm_mul_ps(Q,NegativeOne3);
261#else // _XM_VMX128_INTRINSICS_
262#endif // _XM_VMX128_INTRINSICS_
263}
264
265//------------------------------------------------------------------------------
266
267inline XMVECTOR XMQuaternionInverse
268(
269 FXMVECTOR Q
270)
271{
272#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
273
274 const XMVECTOR Zero = XMVectorZero();
275
276 XMVECTOR L = XMVector4LengthSq(Q);
277 XMVECTOR Conjugate = XMQuaternionConjugate(Q);
278
279 XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v);
280
281 XMVECTOR Result = XMVectorDivide(Conjugate, L);
282
283 Result = XMVectorSelect(Result, Zero, Control);
284
285 return Result;
286
287#else // _XM_VMX128_INTRINSICS_
288#endif // _XM_VMX128_INTRINSICS_
289}
290
291//------------------------------------------------------------------------------
292
293inline XMVECTOR XMQuaternionLn
294(
295 FXMVECTOR Q
296)
297{
298#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
299
300 static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
301
302 XMVECTOR QW = XMVectorSplatW(Q);
303 XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v);
304
305 XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v);
306
307 XMVECTOR Theta = XMVectorACos(QW);
308 XMVECTOR SinTheta = XMVectorSin(Theta);
309
310 XMVECTOR S = XMVectorDivide(Theta,SinTheta);
311
312 XMVECTOR Result = XMVectorMultiply(Q0, S);
313 Result = XMVectorSelect(Q0, Result, ControlW);
314
315 return Result;
316
317#else // _XM_VMX128_INTRINSICS_
318#endif // _XM_VMX128_INTRINSICS_
319}
320
321//------------------------------------------------------------------------------
322
323inline XMVECTOR XMQuaternionExp
324(
325 FXMVECTOR Q
326)
327{
328#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
329
330 XMVECTOR Theta = XMVector3Length(Q);
331
332 XMVECTOR SinTheta, CosTheta;
333 XMVectorSinCos(&SinTheta, &CosTheta, Theta);
334
335 XMVECTOR S = XMVectorDivide(SinTheta, Theta);
336
337 XMVECTOR Result = XMVectorMultiply(Q, S);
338
339 const XMVECTOR Zero = XMVectorZero();
340 XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v);
341 Result = XMVectorSelect(Result, Q, Control);
342
343 Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v);
344
345 return Result;
346
347#else // _XM_VMX128_INTRINSICS_
348#endif // _XM_VMX128_INTRINSICS_
349}
350
351//------------------------------------------------------------------------------
352
353inline XMVECTOR XMQuaternionSlerp
354(
355 FXMVECTOR Q0,
356 FXMVECTOR Q1,
357 float t
358)
359{
360 XMVECTOR T = XMVectorReplicate(t);
361 return XMQuaternionSlerpV(Q0, Q1, T);
362}
363
364//------------------------------------------------------------------------------
365
366inline XMVECTOR XMQuaternionSlerpV
367(
368 FXMVECTOR Q0,
369 FXMVECTOR Q1,
370 FXMVECTOR T
371)
372{
373 assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)));
374
375 // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega)
376
377#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
378
379 const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
380
381 XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
382
383 const XMVECTOR Zero = XMVectorZero();
384 XMVECTOR Control = XMVectorLess(CosOmega, Zero);
385 XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control);
386
387 CosOmega = XMVectorMultiply(CosOmega, Sign);
388
389 Control = XMVectorLess(CosOmega, OneMinusEpsilon);
390
391 XMVECTOR SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v);
392 SinOmega = XMVectorSqrt(SinOmega);
393
394 XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
395
396 XMVECTOR SignMask = XMVectorSplatSignMask();
397 XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2);
398 SignMask = XMVectorShiftLeft(SignMask, Zero, 3);
399 V01 = XMVectorXorInt(V01, SignMask);
400 V01 = XMVectorAdd(g_XMIdentityR0.v, V01);
401
402 XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega);
403
404 XMVECTOR S0 = XMVectorMultiply(V01, Omega);
405 S0 = XMVectorSin(S0);
406 S0 = XMVectorMultiply(S0, InvSinOmega);
407
408 S0 = XMVectorSelect(V01, S0, Control);
409
410 XMVECTOR S1 = XMVectorSplatY(S0);
411 S0 = XMVectorSplatX(S0);
412
413 S1 = XMVectorMultiply(S1, Sign);
414
415 XMVECTOR Result = XMVectorMultiply(Q0, S0);
416 Result = XMVectorMultiplyAdd(Q1, S1, Result);
417
418 return Result;
419
420#elif defined(_XM_SSE_INTRINSICS_)
421 static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f};
422 static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000};
423 static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000};
424
425 XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1);
426
427 const XMVECTOR Zero = XMVectorZero();
428 XMVECTOR Control = XMVectorLess(CosOmega, Zero);
429 XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control);
430
431 CosOmega = _mm_mul_ps(CosOmega, Sign);
432
433 Control = XMVectorLess(CosOmega, OneMinusEpsilon);
434
435 XMVECTOR SinOmega = _mm_mul_ps(CosOmega,CosOmega);
436 SinOmega = _mm_sub_ps(g_XMOne,SinOmega);
437 SinOmega = _mm_sqrt_ps(SinOmega);
438
439 XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega);
440
441 XMVECTOR V01 = XM_PERMUTE_PS(T,_MM_SHUFFLE(2,3,0,1));
442 V01 = _mm_and_ps(V01,MaskXY);
443 V01 = _mm_xor_ps(V01,SignMask2);
444 V01 = _mm_add_ps(g_XMIdentityR0, V01);
445
446 XMVECTOR S0 = _mm_mul_ps(V01, Omega);
447 S0 = XMVectorSin(S0);
448 S0 = _mm_div_ps(S0, SinOmega);
449
450 S0 = XMVectorSelect(V01, S0, Control);
451
452 XMVECTOR S1 = XMVectorSplatY(S0);
453 S0 = XMVectorSplatX(S0);
454
455 S1 = _mm_mul_ps(S1, Sign);
456 XMVECTOR Result = _mm_mul_ps(Q0, S0);
457 S1 = _mm_mul_ps(S1, Q1);
458 Result = _mm_add_ps(Result,S1);
459 return Result;
460#else // _XM_VMX128_INTRINSICS_
461#endif // _XM_VMX128_INTRINSICS_
462}
463
464//------------------------------------------------------------------------------
465
466inline XMVECTOR XMQuaternionSquad
467(
468 FXMVECTOR Q0,
469 FXMVECTOR Q1,
470 FXMVECTOR Q2,
471 GXMVECTOR Q3,
472 float t
473)
474{
475 XMVECTOR T = XMVectorReplicate(t);
476 return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T);
477}
478
479//------------------------------------------------------------------------------
480
481inline XMVECTOR XMQuaternionSquadV
482(
483 FXMVECTOR Q0,
484 FXMVECTOR Q1,
485 FXMVECTOR Q2,
486 GXMVECTOR Q3,
487 CXMVECTOR T
488)
489{
490 assert( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) );
491
492 XMVECTOR TP = T;
493 const XMVECTOR Two = XMVectorSplatConstant(2, 0);
494
495 XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T);
496 XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T);
497
498 TP = XMVectorNegativeMultiplySubtract(TP, TP, TP);
499 TP = XMVectorMultiply(TP, Two);
500
501 XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP);
502
503 return Result;
504}
505
506//------------------------------------------------------------------------------
507_Use_decl_annotations_
508inline void XMQuaternionSquadSetup
509(
510 XMVECTOR* pA,
511 XMVECTOR* pB,
512 XMVECTOR* pC,
513 FXMVECTOR Q0,
514 FXMVECTOR Q1,
515 FXMVECTOR Q2,
516 GXMVECTOR Q3
517)
518{
519 assert(pA);
520 assert(pB);
521 assert(pC);
522
523 XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2));
524 XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2));
525 XMVECTOR SQ2 = XMVectorNegate(Q2);
526
527 XMVECTOR Control1 = XMVectorLess(LS12, LD12);
528 SQ2 = XMVectorSelect(Q2, SQ2, Control1);
529
530 XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1));
531 XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1));
532 XMVECTOR SQ0 = XMVectorNegate(Q0);
533
534 XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3));
535 XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3));
536 XMVECTOR SQ3 = XMVectorNegate(Q3);
537
538 XMVECTOR Control0 = XMVectorLess(LS01, LD01);
539 XMVECTOR Control2 = XMVectorLess(LS23, LD23);
540
541 SQ0 = XMVectorSelect(Q0, SQ0, Control0);
542 SQ3 = XMVectorSelect(Q3, SQ3, Control2);
543
544 XMVECTOR InvQ1 = XMQuaternionInverse(Q1);
545 XMVECTOR InvQ2 = XMQuaternionInverse(SQ2);
546
547 XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0));
548 XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2));
549 XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1));
550 XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3));
551
552 const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2);
553
554 XMVECTOR ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter);
555 XMVECTOR ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter);
556 ExpQ02 = XMQuaternionExp(ExpQ02);
557 ExpQ13 = XMQuaternionExp(ExpQ13);
558
559 *pA = XMQuaternionMultiply(Q1, ExpQ02);
560 *pB = XMQuaternionMultiply(SQ2, ExpQ13);
561 *pC = SQ2;
562}
563
564//------------------------------------------------------------------------------
565
566inline XMVECTOR XMQuaternionBaryCentric
567(
568 FXMVECTOR Q0,
569 FXMVECTOR Q1,
570 FXMVECTOR Q2,
571 float f,
572 float g
573)
574{
575 float s = f + g;
576
577 XMVECTOR Result;
578 if ((s < 0.00001f) && (s > -0.00001f))
579 {
580 Result = Q0;
581 }
582 else
583 {
584 XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s);
585 XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s);
586
587 Result = XMQuaternionSlerp(Q01, Q02, g / s);
588 }
589
590 return Result;
591}
592
593//------------------------------------------------------------------------------
594
595inline XMVECTOR XMQuaternionBaryCentricV
596(
597 FXMVECTOR Q0,
598 FXMVECTOR Q1,
599 FXMVECTOR Q2,
600 GXMVECTOR F,
601 CXMVECTOR G
602)
603{
604 assert( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) );
605 assert( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) );
606
607 const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16);
608
609 XMVECTOR S = XMVectorAdd(F, G);
610
611 XMVECTOR Result;
612 if (XMVector4InBounds(S, Epsilon))
613 {
614 Result = Q0;
615 }
616 else
617 {
618 XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S);
619 XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S);
620 XMVECTOR GS = XMVectorReciprocal(S);
621 GS = XMVectorMultiply(G, GS);
622
623 Result = XMQuaternionSlerpV(Q01, Q02, GS);
624 }
625
626 return Result;
627}
628
629//------------------------------------------------------------------------------
630// Transformation operations
631//------------------------------------------------------------------------------
632
633//------------------------------------------------------------------------------
634
635inline XMVECTOR XMQuaternionIdentity()
636{
637#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
638 return g_XMIdentityR3.v;
639#else // _XM_VMX128_INTRINSICS_
640#endif // _XM_VMX128_INTRINSICS_
641}
642
643//------------------------------------------------------------------------------
644
645inline XMVECTOR XMQuaternionRotationRollPitchYaw
646(
647 float Pitch,
648 float Yaw,
649 float Roll
650)
651{
652 XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
653 XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
654 return Q;
655}
656
657//------------------------------------------------------------------------------
658
659inline XMVECTOR XMQuaternionRotationRollPitchYawFromVector
660(
661 FXMVECTOR Angles // <Pitch, Yaw, Roll, 0>
662)
663{
664#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
665
666 static const XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f};
667
668 XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v);
669
670 XMVECTOR SinAngles, CosAngles;
671 XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles);
672
673 XMVECTOR P0 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(SinAngles, CosAngles);
674 XMVECTOR Y0 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(SinAngles, CosAngles);
675 XMVECTOR R0 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(SinAngles, CosAngles);
676 XMVECTOR P1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(CosAngles, SinAngles);
677 XMVECTOR Y1 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(CosAngles, SinAngles);
678 XMVECTOR R1 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(CosAngles, SinAngles);
679
680 XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v);
681 XMVECTOR Q0 = XMVectorMultiply(P0, Y0);
682 Q1 = XMVectorMultiply(Q1, Y1);
683 Q0 = XMVectorMultiply(Q0, R0);
684 XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0);
685
686 return Q;
687
688#else // _XM_VMX128_INTRINSICS_
689#endif // _XM_VMX128_INTRINSICS_
690}
691
692//------------------------------------------------------------------------------
693
694inline XMVECTOR XMQuaternionRotationNormal
695(
696 FXMVECTOR NormalAxis,
697 float Angle
698)
699{
700#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
701
702 XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v);
703
704 float SinV, CosV;
705 XMScalarSinCos(&SinV, &CosV, 0.5f * Angle);
706
707 XMVECTOR Scale = XMVectorSet( SinV, SinV, SinV, CosV );
708 return XMVectorMultiply(N, Scale);
709#elif defined(_XM_SSE_INTRINSICS_)
710 XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3);
711 N = _mm_or_ps(N,g_XMIdentityR3);
712 XMVECTOR Scale = _mm_set_ps1(0.5f * Angle);
713 XMVECTOR vSine;
714 XMVECTOR vCosine;
715 XMVectorSinCos(&vSine,&vCosine,Scale);
716 Scale = _mm_and_ps(vSine,g_XMMask3);
717 vCosine = _mm_and_ps(vCosine,g_XMMaskW);
718 Scale = _mm_or_ps(Scale,vCosine);
719 N = _mm_mul_ps(N,Scale);
720 return N;
721#else // _XM_VMX128_INTRINSICS_
722#endif // _XM_VMX128_INTRINSICS_
723}
724
725//------------------------------------------------------------------------------
726
727inline XMVECTOR XMQuaternionRotationAxis
728(
729 FXMVECTOR Axis,
730 float Angle
731)
732{
733 assert(!XMVector3Equal(Axis, XMVectorZero()));
734 assert(!XMVector3IsInfinite(Axis));
735
736#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
737 XMVECTOR Normal = XMVector3Normalize(Axis);
738 XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle);
739 return Q;
740#else // _XM_VMX128_INTRINSICS_
741#endif // _XM_VMX128_INTRINSICS_
742}
743
744//------------------------------------------------------------------------------
745
746inline XMVECTOR XMQuaternionRotationMatrix
747(
748 CXMMATRIX M
749)
750{
751#if defined(_XM_NO_INTRINSICS_)
752
753 XMVECTORF32 q;
754 float r22 = M.m[2][2];
755 if (r22 <= 0.f) // x^2 + y^2 >= z^2 + w^2
756 {
757 float dif10 = M.m[1][1] - M.m[0][0];
758 float omr22 = 1.f - r22;
759 if (dif10 <= 0.f) // x^2 >= y^2
760 {
761 float fourXSqr = omr22 - dif10;
762 float inv4x = 0.5f / sqrtf(fourXSqr);
763 q.f[0] = fourXSqr*inv4x;
764 q.f[1] = (M.m[0][1] + M.m[1][0])*inv4x;
765 q.f[2] = (M.m[0][2] + M.m[2][0])*inv4x;
766 q.f[3] = (M.m[1][2] - M.m[2][1])*inv4x;
767 }
768 else // y^2 >= x^2
769 {
770 float fourYSqr = omr22 + dif10;
771 float inv4y = 0.5f / sqrtf(fourYSqr);
772 q.f[0] = (M.m[0][1] + M.m[1][0])*inv4y;
773 q.f[1] = fourYSqr*inv4y;
774 q.f[2] = (M.m[1][2] + M.m[2][1])*inv4y;
775 q.f[3] = (M.m[2][0] - M.m[0][2])*inv4y;
776 }
777 }
778 else // z^2 + w^2 >= x^2 + y^2
779 {
780 float sum10 = M.m[1][1] + M.m[0][0];
781 float opr22 = 1.f + r22;
782 if (sum10 <= 0.f) // z^2 >= w^2
783 {
784 float fourZSqr = opr22 - sum10;
785 float inv4z = 0.5f / sqrtf(fourZSqr);
786 q.f[0] = (M.m[0][2] + M.m[2][0])*inv4z;
787 q.f[1] = (M.m[1][2] + M.m[2][1])*inv4z;
788 q.f[2] = fourZSqr*inv4z;
789 q.f[3] = (M.m[0][1] - M.m[1][0])*inv4z;
790 }
791 else // w^2 >= z^2
792 {
793 float fourWSqr = opr22 + sum10;
794 float inv4w = 0.5f / sqrtf(fourWSqr);
795 q.f[0] = (M.m[1][2] - M.m[2][1])*inv4w;
796 q.f[1] = (M.m[2][0] - M.m[0][2])*inv4w;
797 q.f[2] = (M.m[0][1] - M.m[1][0])*inv4w;
798 q.f[3] = fourWSqr*inv4w;
799 }
800 }
801 return q.v;
802
803#elif defined(_XM_ARM_NEON_INTRINSICS_)
804 static const XMVECTORF32 XMPMMP = {+1.0f, -1.0f, -1.0f, +1.0f};
805 static const XMVECTORF32 XMMPMP = {-1.0f, +1.0f, -1.0f, +1.0f};
806 static const XMVECTORF32 XMMMPP = {-1.0f, -1.0f, +1.0f, +1.0f};
807 static const XMVECTORU32 Select0110 = { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 };
808 static const XMVECTORU32 Select0010 = { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 };
809
810 XMVECTOR r0 = M.r[0];
811 XMVECTOR r1 = M.r[1];
812 XMVECTOR r2 = M.r[2];
813
814 XMVECTOR r00 = vdupq_lane_f32(vget_low_f32(r0), 0);
815 XMVECTOR r11 = vdupq_lane_f32(vget_low_f32(r1), 1);
816 XMVECTOR r22 = vdupq_lane_f32(vget_high_f32(r2), 0);
817
818 // x^2 >= y^2 equivalent to r11 - r00 <= 0
819 XMVECTOR r11mr00 = vsubq_f32(r11, r00);
820 XMVECTOR x2gey2 = vcleq_f32(r11mr00, g_XMZero);
821
822 // z^2 >= w^2 equivalent to r11 + r00 <= 0
823 XMVECTOR r11pr00 = vaddq_f32(r11, r00);
824 XMVECTOR z2gew2 = vcleq_f32(r11pr00, g_XMZero);
825
826 // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
827 XMVECTOR x2py2gez2pw2 = vcleq_f32(r22, g_XMZero);
828
829 // (4*x^2, 4*y^2, 4*z^2, 4*w^2)
830 XMVECTOR t0 = vmulq_f32( XMPMMP, r00 );
831 XMVECTOR x2y2z2w2 = vmlaq_f32( t0, XMMPMP, r11 );
832 x2y2z2w2 = vmlaq_f32( x2y2z2w2, XMMMPP, r22 );
833 x2y2z2w2 = vaddq_f32( x2y2z2w2, g_XMOne );
834
835 // (r01, r02, r12, r11)
836 t0 = vextq_f32(r0, r0, 1);
837 XMVECTOR t1 = vextq_f32(r1, r1, 1);
838 t0 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_low_f32( t1 ) ) );
839
840 // (r10, r20, r21, r10)
841 t1 = vextq_f32(r2, r2, 3);
842 XMVECTOR r10 = vdupq_lane_f32( vget_low_f32(r1), 0 );
843 t1 = vbslq_f32( Select0110, t1, r10 );
844
845 // (4*x*y, 4*x*z, 4*y*z, unused)
846 XMVECTOR xyxzyz = vaddq_f32(t0, t1);
847
848 // (r21, r20, r10, r10)
849 t0 = vcombine_f32( vrev64_f32( vget_low_f32(r2) ), vget_low_f32(r10) );
850
851 // (r12, r02, r01, r12)
852 XMVECTOR t2 = vcombine_f32( vrev64_f32( vget_high_f32(r0) ), vrev64_f32( vget_low_f32(r0) ) );
853 XMVECTOR t3 = vdupq_lane_f32( vget_high_f32(r1), 0 );
854 t1 = vbslq_f32( Select0110, t2, t3 );
855
856 // (4*x*w, 4*y*w, 4*z*w, unused)
857 XMVECTOR xwywzw = vsubq_f32(t0, t1);
858 xwywzw = vmulq_f32(XMMPMP, xwywzw);
859
860 // (4*x*x, 4*x*y, 4*x*z, 4*x*w)
861 t0 = vextq_f32( xyxzyz, xyxzyz, 3 );
862 t1 = vbslq_f32( Select0110, t0, x2y2z2w2 );
863 t2 = vdupq_lane_f32( vget_low_f32(xwywzw), 0 );
864 XMVECTOR tensor0 = vbslq_f32( g_XMSelect1110, t1, t2 );
865
866 // (4*y*x, 4*y*y, 4*y*z, 4*y*w)
867 t0 = vbslq_f32( g_XMSelect1011, xyxzyz, x2y2z2w2 );
868 t1 = vdupq_lane_f32( vget_low_f32(xwywzw), 1 );
869 XMVECTOR tensor1 = vbslq_f32( g_XMSelect1110, t0, t1 );
870
871 // (4*z*x, 4*z*y, 4*z*z, 4*z*w)
872 t0 = vextq_f32(xyxzyz, xyxzyz, 1);
873 t1 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_high_f32(xwywzw) ) );
874 XMVECTOR tensor2 = vbslq_f32( Select0010, x2y2z2w2, t1 );
875
876 // (4*w*x, 4*w*y, 4*w*z, 4*w*w)
877 XMVECTOR tensor3 = vbslq_f32( g_XMSelect1110, xwywzw, x2y2z2w2 );
878
879 // Select the row of the tensor-product matrix that has the largest
880 // magnitude.
881 t0 = vbslq_f32( x2gey2, tensor0, tensor1 );
882 t1 = vbslq_f32( z2gew2, tensor2, tensor3 );
883 t2 = vbslq_f32( x2py2gez2pw2, t0, t1 );
884
885 // Normalize the row. No division by zero is possible because the
886 // quaternion is unit-length (and the row is a nonzero multiple of
887 // the quaternion).
888 t0 = XMVector4Length(t2);
889 return XMVectorDivide(t2, t0);
890#elif defined(_XM_SSE_INTRINSICS_)
891 static const XMVECTORF32 XMPMMP = {+1.0f, -1.0f, -1.0f, +1.0f};
892 static const XMVECTORF32 XMMPMP = {-1.0f, +1.0f, -1.0f, +1.0f};
893 static const XMVECTORF32 XMMMPP = {-1.0f, -1.0f, +1.0f, +1.0f};
894
895 XMVECTOR r0 = M.r[0]; // (r00, r01, r02, 0)
896 XMVECTOR r1 = M.r[1]; // (r10, r11, r12, 0)
897 XMVECTOR r2 = M.r[2]; // (r20, r21, r22, 0)
898
899 // (r00, r00, r00, r00)
900 XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0,0,0,0));
901 // (r11, r11, r11, r11)
902 XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1,1,1,1));
903 // (r22, r22, r22, r22)
904 XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2,2,2,2));
905
906 // x^2 >= y^2 equivalent to r11 - r00 <= 0
907 // (r11 - r00, r11 - r00, r11 - r00, r11 - r00)
908 XMVECTOR r11mr00 = _mm_sub_ps(r11, r00);
909 XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero);
910
911 // z^2 >= w^2 equivalent to r11 + r00 <= 0
912 // (r11 + r00, r11 + r00, r11 + r00, r11 + r00)
913 XMVECTOR r11pr00 = _mm_add_ps(r11, r00);
914 XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero);
915
916 // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0
917 XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero);
918
919 // (+r00, -r00, -r00, +r00)
920 XMVECTOR t0 = _mm_mul_ps(XMPMMP, r00);
921
922 // (-r11, +r11, -r11, +r11)
923 XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11);
924
925 // (-r22, -r22, +r22, +r22)
926 XMVECTOR t2 = _mm_mul_ps(XMMMPP, r22);
927
928 // (4*x^2, 4*y^2, 4*z^2, 4*w^2)
929 XMVECTOR x2y2z2w2 = _mm_add_ps(t0, t1);
930 x2y2z2w2 = _mm_add_ps(t2, x2y2z2w2);
931 x2y2z2w2 = _mm_add_ps(x2y2z2w2, g_XMOne);
932
933 // (r01, r02, r12, r11)
934 t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1,2,2,1));
935 // (r10, r10, r20, r21)
936 t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1,0,0,0));
937 // (r10, r20, r21, r10)
938 t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0));
939 // (4*x*y, 4*x*z, 4*y*z, unused)
940 XMVECTOR xyxzyz = _mm_add_ps(t0, t1);
941
942 // (r21, r20, r10, r10)
943 t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0,0,0,1));
944 // (r12, r12, r02, r01)
945 t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1,2,2,2));
946 // (r12, r02, r01, r12)
947 t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0));
948 // (4*x*w, 4*y*w, 4*z*w, unused)
949 XMVECTOR xwywzw = _mm_sub_ps(t0, t1);
950 xwywzw = _mm_mul_ps(XMMPMP, xwywzw);
951
952 // (4*x^2, 4*y^2, 4*x*y, unused)
953 t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0,0,1,0));
954 // (4*z^2, 4*w^2, 4*z*w, unused)
955 t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0,2,3,2));
956 // (4*x*z, 4*y*z, 4*x*w, 4*y*w)
957 t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1,0,2,1));
958
959 // (4*x*x, 4*x*y, 4*x*z, 4*x*w)
960 XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,0,2,0));
961 // (4*y*x, 4*y*y, 4*y*z, 4*y*w)
962 XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,1,1,2));
963 // (4*z*x, 4*z*y, 4*z*z, 4*z*w)
964 XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2,0,1,0));
965 // (4*w*x, 4*w*y, 4*w*z, 4*w*w)
966 XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1,2,3,2));
967
968 // Select the row of the tensor-product matrix that has the largest
969 // magnitude.
970 t0 = _mm_and_ps(x2gey2, tensor0);
971 t1 = _mm_andnot_ps(x2gey2, tensor1);
972 t0 = _mm_or_ps(t0, t1);
973 t1 = _mm_and_ps(z2gew2, tensor2);
974 t2 = _mm_andnot_ps(z2gew2, tensor3);
975 t1 = _mm_or_ps(t1, t2);
976 t0 = _mm_and_ps(x2py2gez2pw2, t0);
977 t1 = _mm_andnot_ps(x2py2gez2pw2, t1);
978 t2 = _mm_or_ps(t0, t1);
979
980 // Normalize the row. No division by zero is possible because the
981 // quaternion is unit-length (and the row is a nonzero multiple of
982 // the quaternion).
983 t0 = XMVector4Length(t2);
984 return _mm_div_ps(t2, t0);
985#else // _XM_VMX128_INTRINSICS_
986#endif // _XM_VMX128_INTRINSICS_
987}
988
989//------------------------------------------------------------------------------
990// Conversion operations
991//------------------------------------------------------------------------------
992
993//------------------------------------------------------------------------------
994_Use_decl_annotations_
995inline void XMQuaternionToAxisAngle
996(
997 XMVECTOR* pAxis,
998 float* pAngle,
999 FXMVECTOR Q
1000)
1001{
1002 assert(pAxis);
1003 assert(pAngle);
1004
1005 *pAxis = Q;
1006
1007 *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q));
1008}
1009
1010/****************************************************************************
1011 *
1012 * Plane
1013 *
1014 ****************************************************************************/
1015
1016//------------------------------------------------------------------------------
1017// Comparison operations
1018//------------------------------------------------------------------------------
1019
1020//------------------------------------------------------------------------------
1021
1022inline bool XMPlaneEqual
1023(
1024 FXMVECTOR P1,
1025 FXMVECTOR P2
1026)
1027{
1028 return XMVector4Equal(P1, P2);
1029}
1030
1031//------------------------------------------------------------------------------
1032
1033inline bool XMPlaneNearEqual
1034(
1035 FXMVECTOR P1,
1036 FXMVECTOR P2,
1037 FXMVECTOR Epsilon
1038)
1039{
1040 XMVECTOR NP1 = XMPlaneNormalize(P1);
1041 XMVECTOR NP2 = XMPlaneNormalize(P2);
1042 return XMVector4NearEqual(NP1, NP2, Epsilon);
1043}
1044
1045//------------------------------------------------------------------------------
1046
1047inline bool XMPlaneNotEqual
1048(
1049 FXMVECTOR P1,
1050 FXMVECTOR P2
1051)
1052{
1053 return XMVector4NotEqual(P1, P2);
1054}
1055
1056//------------------------------------------------------------------------------
1057
1058inline bool XMPlaneIsNaN
1059(
1060 FXMVECTOR P
1061)
1062{
1063 return XMVector4IsNaN(P);
1064}
1065
1066//------------------------------------------------------------------------------
1067
1068inline bool XMPlaneIsInfinite
1069(
1070 FXMVECTOR P
1071)
1072{
1073 return XMVector4IsInfinite(P);
1074}
1075
1076//------------------------------------------------------------------------------
1077// Computation operations
1078//------------------------------------------------------------------------------
1079
1080//------------------------------------------------------------------------------
1081
1082inline XMVECTOR XMPlaneDot
1083(
1084 FXMVECTOR P,
1085 FXMVECTOR V
1086)
1087{
1088 return XMVector4Dot(P, V);
1089}
1090
1091//------------------------------------------------------------------------------
1092
1093inline XMVECTOR XMPlaneDotCoord
1094(
1095 FXMVECTOR P,
1096 FXMVECTOR V
1097)
1098{
1099 // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3]
1100
1101#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1102
1103 XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v);
1104 XMVECTOR Result = XMVector4Dot(P, V3);
1105 return Result;
1106
1107#else // _XM_VMX128_INTRINSICS_
1108#endif // _XM_VMX128_INTRINSICS_
1109}
1110
1111//------------------------------------------------------------------------------
1112
1113inline XMVECTOR XMPlaneDotNormal
1114(
1115 FXMVECTOR P,
1116 FXMVECTOR V
1117)
1118{
1119 return XMVector3Dot(P, V);
1120}
1121
1122//------------------------------------------------------------------------------
1123// XMPlaneNormalizeEst uses a reciprocal estimate and
1124// returns QNaN on zero and infinite vectors.
1125
1126inline XMVECTOR XMPlaneNormalizeEst
1127(
1128 FXMVECTOR P
1129)
1130{
1131#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1132
1133 XMVECTOR Result = XMVector3ReciprocalLengthEst(P);
1134 return XMVectorMultiply(P, Result);
1135
1136#elif defined(_XM_SSE_INTRINSICS_)
1137 // Perform the dot product
1138 XMVECTOR vDot = _mm_mul_ps(P,P);
1139 // x=Dot.y, y=Dot.z
1140 XMVECTOR vTemp = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(2,1,2,1));
1141 // Result.x = x+y
1142 vDot = _mm_add_ss(vDot,vTemp);
1143 // x=Dot.z
1144 vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
1145 // Result.x = (x+y)+z
1146 vDot = _mm_add_ss(vDot,vTemp);
1147 // Splat x
1148 vDot = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(0,0,0,0));
1149 // Get the reciprocal
1150 vDot = _mm_rsqrt_ps(vDot);
1151 // Get the reciprocal
1152 vDot = _mm_mul_ps(vDot,P);
1153 return vDot;
1154#else // _XM_VMX128_INTRINSICS_
1155#endif // _XM_VMX128_INTRINSICS_
1156}
1157
1158//------------------------------------------------------------------------------
1159
1160inline XMVECTOR XMPlaneNormalize
1161(
1162 FXMVECTOR P
1163)
1164{
1165#if defined(_XM_NO_INTRINSICS_)
1166 float fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2]));
1167 // Prevent divide by zero
1168 if (fLengthSq) {
1169 fLengthSq = 1.0f/fLengthSq;
1170 }
1171 {
1172 XMVECTOR vResult = {
1173 P.vector4_f32[0]*fLengthSq,
1174 P.vector4_f32[1]*fLengthSq,
1175 P.vector4_f32[2]*fLengthSq,
1176 P.vector4_f32[3]*fLengthSq
1177 };
1178 return vResult;
1179 }
1180#elif defined(_XM_ARM_NEON_INTRINSICS_)
1181 XMVECTOR vLength = XMVector3ReciprocalLength(P);
1182 return XMVectorMultiply( P, vLength );
1183#elif defined(_XM_SSE_INTRINSICS_)
1184 // Perform the dot product on x,y and z only
1185 XMVECTOR vLengthSq = _mm_mul_ps(P,P);
1186 XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,1,2,1));
1187 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
1188 vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1));
1189 vLengthSq = _mm_add_ss(vLengthSq,vTemp);
1190 vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0));
1191 // Prepare for the division
1192 XMVECTOR vResult = _mm_sqrt_ps(vLengthSq);
1193 // Failsafe on zero (Or epsilon) length planes
1194 // If the length is infinity, set the elements to zero
1195 vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity);
1196 // Reciprocal mul to perform the normalization
1197 vResult = _mm_div_ps(P,vResult);
1198 // Any that are infinity, set to zero
1199 vResult = _mm_and_ps(vResult,vLengthSq);
1200 return vResult;
1201#else // _XM_VMX128_INTRINSICS_
1202#endif // _XM_VMX128_INTRINSICS_
1203}
1204
1205//------------------------------------------------------------------------------
1206
1207inline XMVECTOR XMPlaneIntersectLine
1208(
1209 FXMVECTOR P,
1210 FXMVECTOR LinePoint1,
1211 FXMVECTOR LinePoint2
1212)
1213{
1214#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1215
1216 XMVECTOR V1 = XMVector3Dot(P, LinePoint1);
1217 XMVECTOR V2 = XMVector3Dot(P, LinePoint2);
1218 XMVECTOR D = XMVectorSubtract(V1, V2);
1219
1220 XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1);
1221 VT = XMVectorDivide(VT, D);
1222
1223 XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1);
1224 Point = XMVectorMultiplyAdd(Point, VT, LinePoint1);
1225
1226 const XMVECTOR Zero = XMVectorZero();
1227 XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v);
1228
1229 return XMVectorSelect(Point, g_XMQNaN.v, Control);
1230
1231#else // _XM_VMX128_INTRINSICS_
1232#endif // _XM_VMX128_INTRINSICS_
1233}
1234
1235//------------------------------------------------------------------------------
1236_Use_decl_annotations_
1237inline void XMPlaneIntersectPlane
1238(
1239 XMVECTOR* pLinePoint1,
1240 XMVECTOR* pLinePoint2,
1241 FXMVECTOR P1,
1242 FXMVECTOR P2
1243)
1244{
1245 assert(pLinePoint1);
1246 assert(pLinePoint2);
1247#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1248
1249 XMVECTOR V1 = XMVector3Cross(P2, P1);
1250
1251 XMVECTOR LengthSq = XMVector3LengthSq(V1);
1252
1253 XMVECTOR V2 = XMVector3Cross(P2, V1);
1254
1255 XMVECTOR P1W = XMVectorSplatW(P1);
1256 XMVECTOR Point = XMVectorMultiply(V2, P1W);
1257
1258 XMVECTOR V3 = XMVector3Cross(V1, P1);
1259
1260 XMVECTOR P2W = XMVectorSplatW(P2);
1261 Point = XMVectorMultiplyAdd(V3, P2W, Point);
1262
1263 XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq);
1264
1265 XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1);
1266
1267 XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v);
1268 *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control);
1269 *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control);
1270
1271#else // _XM_VMX128_INTRINSICS_
1272#endif // _XM_VMX128_INTRINSICS_
1273}
1274
1275//------------------------------------------------------------------------------
1276
1277inline XMVECTOR XMPlaneTransform
1278(
1279 FXMVECTOR P,
1280 CXMMATRIX M
1281)
1282{
1283#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1284
1285 XMVECTOR W = XMVectorSplatW(P);
1286 XMVECTOR Z = XMVectorSplatZ(P);
1287 XMVECTOR Y = XMVectorSplatY(P);
1288 XMVECTOR X = XMVectorSplatX(P);
1289
1290 XMVECTOR Result = XMVectorMultiply(W, M.r[3]);
1291 Result = XMVectorMultiplyAdd(Z, M.r[2], Result);
1292 Result = XMVectorMultiplyAdd(Y, M.r[1], Result);
1293 Result = XMVectorMultiplyAdd(X, M.r[0], Result);
1294 return Result;
1295
1296#else // _XM_VMX128_INTRINSICS_
1297#endif // _XM_VMX128_INTRINSICS_
1298}
1299
1300//------------------------------------------------------------------------------
1301_Use_decl_annotations_
1302inline XMFLOAT4* XMPlaneTransformStream
1303(
1304 XMFLOAT4* pOutputStream,
1305 size_t OutputStride,
1306 const XMFLOAT4* pInputStream,
1307 size_t InputStride,
1308 size_t PlaneCount,
1309 CXMMATRIX M
1310)
1311{
1312 return XMVector4TransformStream(pOutputStream,
1313 OutputStride,
1314 pInputStream,
1315 InputStride,
1316 PlaneCount,
1317 M);
1318}
1319
1320//------------------------------------------------------------------------------
1321// Conversion operations
1322//------------------------------------------------------------------------------
1323
1324//------------------------------------------------------------------------------
1325
1326inline XMVECTOR XMPlaneFromPointNormal
1327(
1328 FXMVECTOR Point,
1329 FXMVECTOR Normal
1330)
1331{
1332 XMVECTOR W = XMVector3Dot(Point, Normal);
1333 W = XMVectorNegate(W);
1334 return XMVectorSelect(W, Normal, g_XMSelect1110.v);
1335}
1336
1337//------------------------------------------------------------------------------
1338
1339inline XMVECTOR XMPlaneFromPoints
1340(
1341 FXMVECTOR Point1,
1342 FXMVECTOR Point2,
1343 FXMVECTOR Point3
1344)
1345{
1346#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1347
1348 XMVECTOR V21 = XMVectorSubtract(Point1, Point2);
1349 XMVECTOR V31 = XMVectorSubtract(Point1, Point3);
1350
1351 XMVECTOR N = XMVector3Cross(V21, V31);
1352 N = XMVector3Normalize(N);
1353
1354 XMVECTOR D = XMPlaneDotNormal(N, Point1);
1355 D = XMVectorNegate(D);
1356
1357 XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v);
1358
1359 return Result;
1360
1361#else // _XM_VMX128_INTRINSICS_
1362#endif // _XM_VMX128_INTRINSICS_
1363}
1364
1365/****************************************************************************
1366 *
1367 * Color
1368 *
1369 ****************************************************************************/
1370
1371//------------------------------------------------------------------------------
1372// Comparison operations
1373//------------------------------------------------------------------------------
1374
1375//------------------------------------------------------------------------------
1376
1377inline bool XMColorEqual
1378(
1379 FXMVECTOR C1,
1380 FXMVECTOR C2
1381)
1382{
1383 return XMVector4Equal(C1, C2);
1384}
1385
1386//------------------------------------------------------------------------------
1387
1388inline bool XMColorNotEqual
1389(
1390 FXMVECTOR C1,
1391 FXMVECTOR C2
1392)
1393{
1394 return XMVector4NotEqual(C1, C2);
1395}
1396
1397//------------------------------------------------------------------------------
1398
1399inline bool XMColorGreater
1400(
1401 FXMVECTOR C1,
1402 FXMVECTOR C2
1403)
1404{
1405 return XMVector4Greater(C1, C2);
1406}
1407
1408//------------------------------------------------------------------------------
1409
1410inline bool XMColorGreaterOrEqual
1411(
1412 FXMVECTOR C1,
1413 FXMVECTOR C2
1414)
1415{
1416 return XMVector4GreaterOrEqual(C1, C2);
1417}
1418
1419//------------------------------------------------------------------------------
1420
1421inline bool XMColorLess
1422(
1423 FXMVECTOR C1,
1424 FXMVECTOR C2
1425)
1426{
1427 return XMVector4Less(C1, C2);
1428}
1429
1430//------------------------------------------------------------------------------
1431
1432inline bool XMColorLessOrEqual
1433(
1434 FXMVECTOR C1,
1435 FXMVECTOR C2
1436)
1437{
1438 return XMVector4LessOrEqual(C1, C2);
1439}
1440
1441//------------------------------------------------------------------------------
1442
1443inline bool XMColorIsNaN
1444(
1445 FXMVECTOR C
1446)
1447{
1448 return XMVector4IsNaN(C);
1449}
1450
1451//------------------------------------------------------------------------------
1452
1453inline bool XMColorIsInfinite
1454(
1455 FXMVECTOR C
1456)
1457{
1458 return XMVector4IsInfinite(C);
1459}
1460
1461//------------------------------------------------------------------------------
1462// Computation operations
1463//------------------------------------------------------------------------------
1464
1465//------------------------------------------------------------------------------
1466
1467inline XMVECTOR XMColorNegative
1468(
1469 FXMVECTOR vColor
1470)
1471{
1472#if defined(_XM_NO_INTRINSICS_)
1473 XMVECTORF32 vResult = {
1474 1.0f - vColor.vector4_f32[0],
1475 1.0f - vColor.vector4_f32[1],
1476 1.0f - vColor.vector4_f32[2],
1477 vColor.vector4_f32[3]
1478 };
1479 return vResult.v;
1480#elif defined(_XM_ARM_NEON_INTRINSICS_)
1481 XMVECTOR vTemp = veorq_u32(vColor,g_XMNegate3);
1482 return vaddq_f32(vTemp,g_XMOne3);
1483#elif defined(_XM_SSE_INTRINSICS_)
1484 // Negate only x,y and z.
1485 XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3);
1486 // Add 1,1,1,0 to -x,-y,-z,w
1487 return _mm_add_ps(vTemp,g_XMOne3);
1488#else // _XM_VMX128_INTRINSICS_
1489#endif // _XM_VMX128_INTRINSICS_
1490}
1491
1492//------------------------------------------------------------------------------
1493
1494inline XMVECTOR XMColorModulate
1495(
1496 FXMVECTOR C1,
1497 FXMVECTOR C2
1498)
1499{
1500 return XMVectorMultiply(C1, C2);
1501}
1502
1503//------------------------------------------------------------------------------
1504
1505inline XMVECTOR XMColorAdjustSaturation
1506(
1507 FXMVECTOR vColor,
1508 float fSaturation
1509)
1510{
1511 // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2];
1512 // Result = (C - Luminance) * Saturation + Luminance;
1513
1514#if defined(_XM_NO_INTRINSICS_)
1515 const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
1516
1517 float fLuminance = (vColor.vector4_f32[0]*gvLuminance.f[0])+(vColor.vector4_f32[1]*gvLuminance.f[1])+(vColor.vector4_f32[2]*gvLuminance.f[2]);
1518 XMVECTORF32 vResult = {
1519 ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance,
1520 ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance,
1521 ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance,
1522 vColor.vector4_f32[3]};
1523 return vResult.v;
1524
1525#elif defined(_XM_ARM_NEON_INTRINSICS_)
1526 static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
1527 XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance );
1528 XMVECTOR vResult = vsubq_f32(vColor, vLuminance);
1529 XMVECTOR vSaturation = vdupq_n_f32(fSaturation);
1530 vResult = vmlaq_f32( vLuminance, vResult, vSaturation );
1531 return vbslq_f32( g_XMSelect1110, vResult, vColor );
1532#elif defined(_XM_SSE_INTRINSICS_)
1533 static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f};
1534 XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance );
1535// Splat fSaturation
1536 XMVECTOR vSaturation = _mm_set_ps1(fSaturation);
1537// vResult = ((vColor-vLuminance)*vSaturation)+vLuminance;
1538 XMVECTOR vResult = _mm_sub_ps(vColor,vLuminance);
1539 vResult = _mm_mul_ps(vResult,vSaturation);
1540 vResult = _mm_add_ps(vResult,vLuminance);
1541// Retain w from the source color
1542 vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
1543 vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
1544 return vResult;
1545#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1546#endif // _XM_VMX128_INTRINSICS_
1547}
1548
1549//------------------------------------------------------------------------------
1550
1551inline XMVECTOR XMColorAdjustContrast
1552(
1553 FXMVECTOR vColor,
1554 float fContrast
1555)
1556{
1557 // Result = (vColor - 0.5f) * fContrast + 0.5f;
1558
1559#if defined(_XM_NO_INTRINSICS_)
1560 XMVECTORF32 vResult = {
1561 ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f,
1562 ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f,
1563 ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f,
1564 vColor.vector4_f32[3] // Leave W untouched
1565 };
1566 return vResult.v;
1567#elif defined(_XM_ARM_NEON_INTRINSICS_)
1568 XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v);
1569 XMVECTOR vContrast = vdupq_n_f32(fContrast);
1570 vResult = vmlaq_f32( g_XMOneHalf.v, vResult, vContrast );
1571 return vbslq_f32( g_XMSelect1110, vResult, vColor );
1572#elif defined(_XM_SSE_INTRINSICS_)
1573 XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale
1574 XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source)
1575 vResult = _mm_mul_ps(vResult,vScale); // Mul by scale
1576 vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f
1577// Retain w from the source color
1578 vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w
1579 vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w
1580 return vResult;
1581#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1582#endif // _XM_VMX128_INTRINSICS_
1583}
1584
1585//------------------------------------------------------------------------------
1586
1587inline XMVECTOR XMColorRGBToHSL( FXMVECTOR rgb )
1588{
1589 XMVECTOR r = XMVectorSplatX( rgb );
1590 XMVECTOR g = XMVectorSplatY( rgb );
1591 XMVECTOR b = XMVectorSplatZ( rgb );
1592
1593 XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) );
1594 XMVECTOR max = XMVectorMax( r, XMVectorMax( g, b ) );
1595
1596 XMVECTOR l = XMVectorMultiply( XMVectorAdd( min, max ), g_XMOneHalf );
1597
1598 XMVECTOR d = XMVectorSubtract( max, min );
1599
1600 XMVECTOR la = XMVectorSelect( rgb, l, g_XMSelect1110 );
1601
1602 if ( XMVector3Less( d, g_XMEpsilon ) )
1603 {
1604 // Achromatic, assume H and S of 0
1605 return XMVectorSelect( la, g_XMZero, g_XMSelect1100 );
1606 }
1607 else
1608 {
1609 XMVECTOR s, h;
1610
1611 XMVECTOR d2 = XMVectorAdd( min, max );
1612
1613 if ( XMVector3Greater( l, g_XMOneHalf ) )
1614 {
1615 // d / (2-max-min)
1616 s = XMVectorDivide( d, XMVectorSubtract( g_XMTwo, d2 ) );
1617 }
1618 else
1619 {
1620 // d / (max+min)
1621 s = XMVectorDivide( d, d2 );
1622 }
1623
1624 if ( XMVector3Equal( r, max ) )
1625 {
1626 // Red is max
1627 h = XMVectorDivide( XMVectorSubtract( g, b ), d );
1628 }
1629 else if ( XMVector3Equal( g, max ) )
1630 {
1631 // Green is max
1632 h = XMVectorDivide( XMVectorSubtract( b, r ), d );
1633 h = XMVectorAdd( h, g_XMTwo );
1634 }
1635 else
1636 {
1637 // Blue is max
1638 h = XMVectorDivide( XMVectorSubtract( r, g ), d );
1639 h = XMVectorAdd( h, g_XMFour );
1640 }
1641
1642 h = XMVectorDivide( h, g_XMSix );
1643
1644 if ( XMVector3Less( h, g_XMZero ) )
1645 h = XMVectorAdd( h, g_XMOne );
1646
1647 XMVECTOR lha = XMVectorSelect( la, h, g_XMSelect1100 );
1648 return XMVectorSelect( s, lha, g_XMSelect1011 );
1649 }
1650}
1651
1652//------------------------------------------------------------------------------
1653
1654namespace Internal
1655{
1656
1657inline XMVECTOR XMColorHue2Clr( FXMVECTOR p, FXMVECTOR q, FXMVECTOR h )
1658{
1659 static const XMVECTORF32 oneSixth = { 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f };
1660 static const XMVECTORF32 twoThirds = { 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f };
1661
1662 XMVECTOR t = h;
1663
1664 if ( XMVector3Less( t, g_XMZero ) )
1665 t = XMVectorAdd( t, g_XMOne );
1666
1667 if ( XMVector3Greater( t, g_XMOne ) )
1668 t = XMVectorSubtract( t, g_XMOne );
1669
1670 if ( XMVector3Less( t, oneSixth ) )
1671 {
1672 // p + (q - p) * 6 * t
1673 XMVECTOR t1 = XMVectorSubtract( q, p );
1674 XMVECTOR t2 = XMVectorMultiply( g_XMSix, t );
1675 return XMVectorMultiplyAdd( t1, t2, p );
1676 }
1677
1678 if ( XMVector3Less( t, g_XMOneHalf ) )
1679 return q;
1680
1681 if ( XMVector3Less( t, twoThirds ) )
1682 {
1683 // p + (q - p) * 6 * (2/3 - t)
1684 XMVECTOR t1 = XMVectorSubtract( q, p );
1685 XMVECTOR t2 = XMVectorMultiply( g_XMSix, XMVectorSubtract( twoThirds, t ) );
1686 return XMVectorMultiplyAdd( t1, t2, p );
1687 }
1688
1689 return p;
1690}
1691
1692}; // namespace Internal
1693
1694inline XMVECTOR XMColorHSLToRGB( FXMVECTOR hsl )
1695{
1696 static const XMVECTORF32 oneThird = { 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f };
1697
1698 XMVECTOR s = XMVectorSplatY( hsl );
1699 XMVECTOR l = XMVectorSplatZ( hsl );
1700
1701 if ( XMVector3NearEqual( s, g_XMZero, g_XMEpsilon ) )
1702 {
1703 // Achromatic
1704 return XMVectorSelect( hsl, l, g_XMSelect1110 );
1705 }
1706 else
1707 {
1708 XMVECTOR h = XMVectorSplatX( hsl );
1709
1710 XMVECTOR q;
1711 if ( XMVector3Less( l, g_XMOneHalf ) )
1712 {
1713 q = XMVectorMultiply( l, XMVectorAdd ( g_XMOne, s ) );
1714 }
1715 else
1716 {
1717 q = XMVectorSubtract( XMVectorAdd( l, s ), XMVectorMultiply( l, s ) );
1718 }
1719
1720 XMVECTOR p = XMVectorSubtract( XMVectorMultiply( g_XMTwo, l ), q );
1721
1722 XMVECTOR r = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorAdd( h, oneThird ) );
1723 XMVECTOR g = DirectX::Internal::XMColorHue2Clr( p, q, h );
1724 XMVECTOR b = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorSubtract( h, oneThird ) );
1725
1726 XMVECTOR rg = XMVectorSelect( g, r, g_XMSelect1000 );
1727 XMVECTOR ba = XMVectorSelect( hsl, b, g_XMSelect1110 );
1728
1729 return XMVectorSelect( ba, rg, g_XMSelect1100 );
1730 }
1731}
1732
1733//------------------------------------------------------------------------------
1734
1735inline XMVECTOR XMColorRGBToHSV( FXMVECTOR rgb )
1736{
1737 XMVECTOR r = XMVectorSplatX( rgb );
1738 XMVECTOR g = XMVectorSplatY( rgb );
1739 XMVECTOR b = XMVectorSplatZ( rgb );
1740
1741 XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) );
1742 XMVECTOR v = XMVectorMax( r, XMVectorMax( g, b ) );
1743
1744 XMVECTOR d = XMVectorSubtract( v, min );
1745
1746 XMVECTOR s = ( XMVector3NearEqual( v, g_XMZero, g_XMEpsilon ) ) ? g_XMZero : XMVectorDivide( d, v );
1747
1748 if ( XMVector3Less( d, g_XMEpsilon ) )
1749 {
1750 // Achromatic, assume H of 0
1751 XMVECTOR hv = XMVectorSelect( v, g_XMZero, g_XMSelect1000 );
1752 XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 );
1753 return XMVectorSelect( s, hva, g_XMSelect1011 );
1754 }
1755 else
1756 {
1757 XMVECTOR h;
1758
1759 if ( XMVector3Equal( r, v ) )
1760 {
1761 // Red is max
1762 h = XMVectorDivide( XMVectorSubtract( g, b ), d );
1763
1764 if ( XMVector3Less( g, b ) )
1765 h = XMVectorAdd( h, g_XMSix );
1766 }
1767 else if ( XMVector3Equal( g, v ) )
1768 {
1769 // Green is max
1770 h = XMVectorDivide( XMVectorSubtract( b, r ), d );
1771 h = XMVectorAdd( h, g_XMTwo );
1772 }
1773 else
1774 {
1775 // Blue is max
1776 h = XMVectorDivide( XMVectorSubtract( r, g ), d );
1777 h = XMVectorAdd( h, g_XMFour );
1778 }
1779
1780 h = XMVectorDivide( h, g_XMSix );
1781
1782 XMVECTOR hv = XMVectorSelect( v, h, g_XMSelect1000 );
1783 XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 );
1784 return XMVectorSelect( s, hva, g_XMSelect1011 );
1785 }
1786}
1787
1788//------------------------------------------------------------------------------
1789
1790inline XMVECTOR XMColorHSVToRGB( FXMVECTOR hsv )
1791{
1792 XMVECTOR h = XMVectorSplatX( hsv );
1793 XMVECTOR s = XMVectorSplatY( hsv );
1794 XMVECTOR v = XMVectorSplatZ( hsv );
1795
1796 XMVECTOR h6 = XMVectorMultiply( h, g_XMSix );
1797
1798 XMVECTOR i = XMVectorFloor( h6 );
1799 XMVECTOR f = XMVectorSubtract( h6, i );
1800
1801 // p = v* (1-s)
1802 XMVECTOR p = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, s ) );
1803
1804 // q = v*(1-f*s)
1805 XMVECTOR q = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( f, s ) ) );
1806
1807 // t = v*(1 - (1-f)*s)
1808 XMVECTOR t = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( XMVectorSubtract( g_XMOne, f ), s ) ) );
1809
1810 int ii = static_cast<int>( XMVectorGetX( XMVectorMod( i, g_XMSix ) ) );
1811
1812 XMVECTOR _rgb;
1813
1814 switch (ii)
1815 {
1816 case 0: // rgb = vtp
1817 {
1818 XMVECTOR vt = XMVectorSelect( t, v, g_XMSelect1000 );
1819 _rgb = XMVectorSelect( p, vt, g_XMSelect1100 );
1820 }
1821 break;
1822 case 1: // rgb = qvp
1823 {
1824 XMVECTOR qv = XMVectorSelect( v, q, g_XMSelect1000 );
1825 _rgb = XMVectorSelect( p, qv, g_XMSelect1100 );
1826 }
1827 break;
1828 case 2: // rgb = pvt
1829 {
1830 XMVECTOR pv = XMVectorSelect( v, p, g_XMSelect1000 );
1831 _rgb = XMVectorSelect( t, pv, g_XMSelect1100 );
1832 }
1833 break;
1834 case 3: // rgb = pqv
1835 {
1836 XMVECTOR pq = XMVectorSelect( q, p, g_XMSelect1000 );
1837 _rgb = XMVectorSelect( v, pq, g_XMSelect1100 );
1838 }
1839 break;
1840 case 4: // rgb = tpv
1841 {
1842 XMVECTOR tp = XMVectorSelect( p, t, g_XMSelect1000 );
1843 _rgb = XMVectorSelect( v, tp, g_XMSelect1100 );
1844 }
1845 break;
1846 default: // rgb = vpq
1847 {
1848 XMVECTOR vp = XMVectorSelect( p, v, g_XMSelect1000 );
1849 _rgb = XMVectorSelect( q, vp, g_XMSelect1100 );
1850 }
1851 break;
1852 }
1853
1854 return XMVectorSelect( hsv, _rgb, g_XMSelect1110 );
1855}
1856
1857//------------------------------------------------------------------------------
1858
1859inline XMVECTOR XMColorRGBToYUV( FXMVECTOR rgb )
1860{
1861 static const XMVECTORF32 Scale0 = { 0.299f, -0.147f, 0.615f, 0.0f };
1862 static const XMVECTORF32 Scale1 = { 0.587f, -0.289f, -0.515f, 0.0f };
1863 static const XMVECTORF32 Scale2 = { 0.114f, 0.436f, -0.100f, 0.0f };
1864
1865 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
1866 XMVECTOR clr = XMVector3Transform( rgb, M );
1867
1868 return XMVectorSelect( rgb, clr, g_XMSelect1110 );
1869}
1870
1871//------------------------------------------------------------------------------
1872
1873inline XMVECTOR XMColorYUVToRGB( FXMVECTOR yuv )
1874{
1875 static const XMVECTORF32 Scale1 = { 0.0f, -0.395f, 2.032f, 0.0f };
1876 static const XMVECTORF32 Scale2 = { 1.140f, -0.581f, 0.0f, 0.0f };
1877
1878 XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero );
1879 XMVECTOR clr = XMVector3Transform( yuv, M );
1880
1881 return XMVectorSelect( yuv, clr, g_XMSelect1110 );
1882}
1883
1884//------------------------------------------------------------------------------
1885
1886inline XMVECTOR XMColorRGBToYUV_HD( FXMVECTOR rgb )
1887{
1888 static const XMVECTORF32 Scale0 = { 0.2126f, -0.0997f, 0.6150f, 0.0f };
1889 static const XMVECTORF32 Scale1 = { 0.7152f, -0.3354f, -0.5586f, 0.0f };
1890 static const XMVECTORF32 Scale2 = { 0.0722f, 0.4351f, -0.0564f, 0.0f };
1891
1892 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
1893 XMVECTOR clr = XMVector3Transform( rgb, M );
1894
1895 return XMVectorSelect( rgb, clr, g_XMSelect1110 );
1896}
1897
1898//------------------------------------------------------------------------------
1899
1900inline XMVECTOR XMColorYUVToRGB_HD( FXMVECTOR yuv )
1901{
1902 static const XMVECTORF32 Scale1 = { 0.0f, -0.2153f, 2.1324f, 0.0f };
1903 static const XMVECTORF32 Scale2 = { 1.2803f, -0.3806f, 0.0f, 0.0f };
1904
1905 XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero );
1906 XMVECTOR clr = XMVector3Transform( yuv, M );
1907
1908 return XMVectorSelect( yuv, clr, g_XMSelect1110 );
1909}
1910
1911//------------------------------------------------------------------------------
1912
1913inline XMVECTOR XMColorRGBToXYZ( FXMVECTOR rgb )
1914{
1915 static const XMVECTORF32 Scale0 = { 0.4887180f, 0.1762044f, 0.0000000f, 0.0f };
1916 static const XMVECTORF32 Scale1 = { 0.3106803f, 0.8129847f, 0.0102048f, 0.0f };
1917 static const XMVECTORF32 Scale2 = { 0.2006017f, 0.0108109f, 0.9897952f, 0.0f };
1918 static const XMVECTORF32 Scale = { 1.f/0.17697f, 1.f/0.17697f, 1.f/0.17697f, 0.0f };
1919
1920 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
1921 XMVECTOR clr = XMVectorMultiply( XMVector3Transform( rgb, M ), Scale );
1922
1923 return XMVectorSelect( rgb, clr, g_XMSelect1110 );
1924}
1925
1926inline XMVECTOR XMColorXYZToRGB( FXMVECTOR xyz )
1927{
1928 static const XMVECTORF32 Scale0 = { 2.3706743f, -0.5138850f, 0.0052982f, 0.0f };
1929 static const XMVECTORF32 Scale1 = { -0.9000405f, 1.4253036f, -0.0146949f, 0.0f };
1930 static const XMVECTORF32 Scale2 = { -0.4706338f, 0.0885814f, 1.0093968f, 0.0f };
1931 static const XMVECTORF32 Scale = { 0.17697f, 0.17697f, 0.17697f, 0.0f };
1932
1933 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
1934 XMVECTOR clr = XMVector3Transform( XMVectorMultiply( xyz, Scale ), M );
1935
1936 return XMVectorSelect( xyz, clr, g_XMSelect1110 );
1937}
1938
1939//------------------------------------------------------------------------------
1940
1941inline XMVECTOR XMColorXYZToSRGB( FXMVECTOR xyz )
1942{
1943 static const XMVECTORF32 Scale0 = { 3.2406f, -0.9689f, 0.0557f, 0.0f };
1944 static const XMVECTORF32 Scale1 = { -1.5372f, 1.8758f, -0.2040f, 0.0f };
1945 static const XMVECTORF32 Scale2 = { -0.4986f, 0.0415f, 1.0570f, 0.0f };
1946 static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 0.0f };
1947 static const XMVECTORF32 Exp = { 1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.0f };
1948
1949 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
1950 XMVECTOR lclr = XMVector3Transform( xyz, M );
1951
1952 XMVECTOR sel = XMVectorGreater( lclr, Cutoff );
1953
1954 // clr = 12.92 * lclr for lclr <= 0.0031308f
1955 XMVECTOR smallC = XMVectorMultiply( lclr, g_XMsrgbScale );
1956
1957 // clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055)
1958 XMVECTOR largeC = XMVectorSubtract( XMVectorMultiply( g_XMsrgbA1, XMVectorPow( lclr, Exp ) ), g_XMsrgbA );
1959
1960 XMVECTOR clr = XMVectorSelect( smallC, largeC, sel );
1961
1962 return XMVectorSelect( xyz, clr, g_XMSelect1110 );
1963}
1964
1965//------------------------------------------------------------------------------
1966
1967inline XMVECTOR XMColorSRGBToXYZ( FXMVECTOR srgb )
1968{
1969 static const XMVECTORF32 Scale0 = { 0.4124f, 0.2126f, 0.0193f, 0.0f };
1970 static const XMVECTORF32 Scale1 = { 0.3576f, 0.7152f, 0.1192f, 0.0f };
1971 static const XMVECTORF32 Scale2 = { 0.1805f, 0.0722f, 0.9505f, 0.0f };
1972 static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 0.0f };
1973 static const XMVECTORF32 Exp = { 2.4f, 2.4f, 2.4f, 1.0f };
1974
1975 XMVECTOR sel = XMVectorGreater( srgb, Cutoff );
1976
1977 // lclr = clr / 12.92
1978 XMVECTOR smallC = XMVectorDivide( srgb, g_XMsrgbScale );
1979
1980 // lclr = pow( (clr + a) / (1+a), 2.4 )
1981 XMVECTOR largeC = XMVectorPow( XMVectorDivide( XMVectorAdd( srgb, g_XMsrgbA ), g_XMsrgbA1 ), Exp );
1982
1983 XMVECTOR lclr = XMVectorSelect( smallC, largeC, sel );
1984
1985 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero );
1986 XMVECTOR clr = XMVector3Transform( lclr, M );
1987
1988 return XMVectorSelect( srgb, clr, g_XMSelect1110 );
1989}
1990
1991/****************************************************************************
1992 *
1993 * Miscellaneous
1994 *
1995 ****************************************************************************/
1996
1997//------------------------------------------------------------------------------
1998
1999inline bool XMVerifyCPUSupport()
2000{
2001#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
2002#if defined(_M_AMD64)
2003 // The X64 processor model requires SSE2 support
2004 return true;
2005#elif defined(PF_XMMI_INSTRUCTIONS_AVAILABLE)
2006 // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail
2007 // Detecting SSE2 on older versions of Windows would require using cpuid directly
2008 return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) != 0 && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) != 0 );
2009#else
2010 // If windows.h is not included, we return false (likely a false negative)
2011 return false;
2012#endif
2013#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
2014#ifdef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE
2015 return ( IsProcessorFeaturePresent( PF_ARM_NEON_INSTRUCTIONS_AVAILABLE ) != 0 );
2016#else
2017 // If windows.h is not included, we return false (likely a false negative)
2018 return false;
2019#endif
2020#else
2021 return true;
2022#endif
2023}
2024
2025//------------------------------------------------------------------------------
2026
2027inline XMVECTOR XMFresnelTerm
2028(
2029 FXMVECTOR CosIncidentAngle,
2030 FXMVECTOR RefractionIndex
2031)
2032{
2033 assert(!XMVector4IsInfinite(CosIncidentAngle));
2034
2035 // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where
2036 // c = CosIncidentAngle
2037 // g = sqrt(c^2 + RefractionIndex^2 - 1)
2038
2039#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2040
2041 XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v);
2042 G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G);
2043 G = XMVectorAbs(G);
2044 G = XMVectorSqrt(G);
2045
2046 XMVECTOR S = XMVectorAdd(G, CosIncidentAngle);
2047 XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle);
2048
2049 XMVECTOR V0 = XMVectorMultiply(D, D);
2050 XMVECTOR V1 = XMVectorMultiply(S, S);
2051 V1 = XMVectorReciprocal(V1);
2052 V0 = XMVectorMultiply(g_XMOneHalf.v, V0);
2053 V0 = XMVectorMultiply(V0, V1);
2054
2055 XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v);
2056 XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v);
2057 V2 = XMVectorMultiply(V2, V2);
2058 V3 = XMVectorMultiply(V3, V3);
2059 V3 = XMVectorReciprocal(V3);
2060 V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v);
2061
2062 XMVECTOR Result = XMVectorMultiply(V0, V2);
2063
2064 Result = XMVectorSaturate(Result);
2065
2066 return Result;
2067
2068#elif defined(_XM_SSE_INTRINSICS_)
2069 // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2))
2070 XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex);
2071 XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle);
2072 G = _mm_sub_ps(G,g_XMOne);
2073 vTemp = _mm_add_ps(vTemp,G);
2074 // max((0-vTemp),vTemp) == abs(vTemp)
2075 // The abs is needed to deal with refraction and cosine being zero
2076 G = _mm_setzero_ps();
2077 G = _mm_sub_ps(G,vTemp);
2078 G = _mm_max_ps(G,vTemp);
2079 // Last operation, the sqrt()
2080 G = _mm_sqrt_ps(G);
2081
2082 // Calc G-C and G+C
2083 XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle);
2084 XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle);
2085 // Perform the term (0.5f *(g - c)^2) / (g + c)^2
2086 XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC);
2087 vTemp = _mm_mul_ps(GAddC,GAddC);
2088 vResult = _mm_mul_ps(vResult,g_XMOneHalf);
2089 vResult = _mm_div_ps(vResult,vTemp);
2090 // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1)
2091 GAddC = _mm_mul_ps(GAddC,CosIncidentAngle);
2092 GSubC = _mm_mul_ps(GSubC,CosIncidentAngle);
2093 GAddC = _mm_sub_ps(GAddC,g_XMOne);
2094 GSubC = _mm_add_ps(GSubC,g_XMOne);
2095 GAddC = _mm_mul_ps(GAddC,GAddC);
2096 GSubC = _mm_mul_ps(GSubC,GSubC);
2097 GAddC = _mm_div_ps(GAddC,GSubC);
2098 GAddC = _mm_add_ps(GAddC,g_XMOne);
2099 // Multiply the two term parts
2100 vResult = _mm_mul_ps(vResult,GAddC);
2101 // Clamp to 0.0 - 1.0f
2102 vResult = _mm_max_ps(vResult,g_XMZero);
2103 vResult = _mm_min_ps(vResult,g_XMOne);
2104 return vResult;
2105#else // _XM_VMX128_INTRINSICS_
2106#endif // _XM_VMX128_INTRINSICS_
2107}
2108
2109//------------------------------------------------------------------------------
2110
2111inline bool XMScalarNearEqual
2112(
2113 float S1,
2114 float S2,
2115 float Epsilon
2116)
2117{
2118 float Delta = S1 - S2;
2119 return (fabsf(Delta) <= Epsilon);
2120}
2121
2122//------------------------------------------------------------------------------
2123// Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI
2124inline float XMScalarModAngle
2125(
2126 float Angle
2127)
2128{
2129 // Note: The modulo is performed with unsigned math only to work
2130 // around a precision error on numbers that are close to PI
2131
2132 // Normalize the range from 0.0f to XM_2PI
2133 Angle = Angle + XM_PI;
2134 // Perform the modulo, unsigned
2135 float fTemp = fabsf(Angle);
2136 fTemp = fTemp - (XM_2PI * (float)((int32_t)(fTemp/XM_2PI)));
2137 // Restore the number to the range of -XM_PI to XM_PI-epsilon
2138 fTemp = fTemp - XM_PI;
2139 // If the modulo'd value was negative, restore negation
2140 if (Angle<0.0f) {
2141 fTemp = -fTemp;
2142 }
2143 return fTemp;
2144}
2145
2146//------------------------------------------------------------------------------
2147
2148inline float XMScalarSin
2149(
2150 float Value
2151)
2152{
2153 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2154 float quotient = XM_1DIV2PI*Value;
2155 if (Value >= 0.0f)
2156 {
2157 quotient = (float)((int)(quotient + 0.5f));
2158 }
2159 else
2160 {
2161 quotient = (float)((int)(quotient - 0.5f));
2162 }
2163 float y = Value - XM_2PI*quotient;
2164
2165 // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
2166 if (y > XM_PIDIV2)
2167 {
2168 y = XM_PI - y;
2169 }
2170 else if (y < -XM_PIDIV2)
2171 {
2172 y = -XM_PI - y;
2173 }
2174
2175 // 11-degree minimax approximation
2176 float y2 = y * y;
2177 return ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y;
2178}
2179
2180//------------------------------------------------------------------------------
2181
2182inline float XMScalarSinEst
2183(
2184 float Value
2185)
2186{
2187 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2188 float quotient = XM_1DIV2PI*Value;
2189 if (Value >= 0.0f)
2190 {
2191 quotient = (float)((int)(quotient + 0.5f));
2192 }
2193 else
2194 {
2195 quotient = (float)((int)(quotient - 0.5f));
2196 }
2197 float y = Value - XM_2PI*quotient;
2198
2199 // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
2200 if (y > XM_PIDIV2)
2201 {
2202 y = XM_PI - y;
2203 }
2204 else if (y < -XM_PIDIV2)
2205 {
2206 y = -XM_PI - y;
2207 }
2208
2209 // 7-degree minimax approximation
2210 float y2 = y * y;
2211 return ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y;
2212}
2213
2214//------------------------------------------------------------------------------
2215
2216inline float XMScalarCos
2217(
2218 float Value
2219)
2220{
2221 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2222 float quotient = XM_1DIV2PI*Value;
2223 if (Value >= 0.0f)
2224 {
2225 quotient = (float)((int)(quotient + 0.5f));
2226 }
2227 else
2228 {
2229 quotient = (float)((int)(quotient - 0.5f));
2230 }
2231 float y = Value - XM_2PI*quotient;
2232
2233 // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
2234 float sign;
2235 if (y > XM_PIDIV2)
2236 {
2237 y = XM_PI - y;
2238 sign = -1.0f;
2239 }
2240 else if (y < -XM_PIDIV2)
2241 {
2242 y = -XM_PI - y;
2243 sign = -1.0f;
2244 }
2245 else
2246 {
2247 sign = +1.0f;
2248 }
2249
2250 // 10-degree minimax approximation
2251 float y2 = y*y;
2252 float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f;
2253 return sign*p;
2254}
2255
2256//------------------------------------------------------------------------------
2257
2258inline float XMScalarCosEst
2259(
2260 float Value
2261)
2262{
2263 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2264 float quotient = XM_1DIV2PI*Value;
2265 if (Value >= 0.0f)
2266 {
2267 quotient = (float)((int)(quotient + 0.5f));
2268 }
2269 else
2270 {
2271 quotient = (float)((int)(quotient - 0.5f));
2272 }
2273 float y = Value - XM_2PI*quotient;
2274
2275 // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x).
2276 float sign;
2277 if (y > XM_PIDIV2)
2278 {
2279 y = XM_PI - y;
2280 sign = -1.0f;
2281 }
2282 else if (y < -XM_PIDIV2)
2283 {
2284 y = -XM_PI - y;
2285 sign = -1.0f;
2286 }
2287 else
2288 {
2289 sign = +1.0f;
2290 }
2291
2292 // 6-degree minimax approximation
2293 float y2 = y * y;
2294 float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f;
2295 return sign*p;
2296}
2297
2298//------------------------------------------------------------------------------
2299
2300_Use_decl_annotations_
2301inline void XMScalarSinCos
2302(
2303 float* pSin,
2304 float* pCos,
2305 float Value
2306)
2307{
2308 assert(pSin);
2309 assert(pCos);
2310
2311 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2312 float quotient = XM_1DIV2PI*Value;
2313 if (Value >= 0.0f)
2314 {
2315 quotient = (float)((int)(quotient + 0.5f));
2316 }
2317 else
2318 {
2319 quotient = (float)((int)(quotient - 0.5f));
2320 }
2321 float y = Value - XM_2PI*quotient;
2322
2323 // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
2324 float sign;
2325 if (y > XM_PIDIV2)
2326 {
2327 y = XM_PI - y;
2328 sign = -1.0f;
2329 }
2330 else if (y < -XM_PIDIV2)
2331 {
2332 y = -XM_PI - y;
2333 sign = -1.0f;
2334 }
2335 else
2336 {
2337 sign = +1.0f;
2338 }
2339
2340 float y2 = y * y;
2341
2342 // 11-degree minimax approximation
2343 *pSin = ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y;
2344
2345 // 10-degree minimax approximation
2346 float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f;
2347 *pCos = sign*p;
2348}
2349
2350//------------------------------------------------------------------------------
2351
2352_Use_decl_annotations_
2353inline void XMScalarSinCosEst
2354(
2355 float* pSin,
2356 float* pCos,
2357 float Value
2358)
2359{
2360 assert(pSin);
2361 assert(pCos);
2362
2363 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder.
2364 float quotient = XM_1DIV2PI*Value;
2365 if (Value >= 0.0f)
2366 {
2367 quotient = (float)((int)(quotient + 0.5f));
2368 }
2369 else
2370 {
2371 quotient = (float)((int)(quotient - 0.5f));
2372 }
2373 float y = Value - XM_2PI*quotient;
2374
2375 // Map y to [-pi/2,pi/2] with sin(y) = sin(Value).
2376 float sign;
2377 if (y > XM_PIDIV2)
2378 {
2379 y = XM_PI - y;
2380 sign = -1.0f;
2381 }
2382 else if (y < -XM_PIDIV2)
2383 {
2384 y = -XM_PI - y;
2385 sign = -1.0f;
2386 }
2387 else
2388 {
2389 sign = +1.0f;
2390 }
2391
2392 float y2 = y * y;
2393
2394 // 7-degree minimax approximation
2395 *pSin = ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y;
2396
2397 // 6-degree minimax approximation
2398 float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f;
2399 *pCos = sign*p;
2400}
2401
2402//------------------------------------------------------------------------------
2403
2404inline float XMScalarASin
2405(
2406 float Value
2407)
2408{
2409 // Clamp input to [-1,1].
2410 bool nonnegative = (Value >= 0.0f);
2411 float x = fabsf(Value);
2412 float omx = 1.0f - x;
2413 if (omx < 0.0f)
2414 {
2415 omx = 0.0f;
2416 }
2417 float root = sqrt(omx);
2418
2419 // 7-degree minimax approximation
2420 float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f;
2421 result *= root; // acos(|x|)
2422
2423 // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
2424 return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
2425}
2426
2427//------------------------------------------------------------------------------
2428
2429inline float XMScalarASinEst
2430(
2431 float Value
2432)
2433{
2434 // Clamp input to [-1,1].
2435 bool nonnegative = (Value >= 0.0f);
2436 float x = fabsf(Value);
2437 float omx = 1.0f - x;
2438 if (omx < 0.0f)
2439 {
2440 omx = 0.0f;
2441 }
2442 float root = sqrt(omx);
2443
2444 // 3-degree minimax approximation
2445 float result = ((-0.0187293f*x+0.0742610f)*x-0.2121144f)*x+1.5707288f;
2446 result *= root; // acos(|x|)
2447
2448 // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x)
2449 return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2);
2450}
2451
2452//------------------------------------------------------------------------------
2453
2454inline float XMScalarACos
2455(
2456 float Value
2457)
2458{
2459 // Clamp input to [-1,1].
2460 bool nonnegative = (Value >= 0.0f);
2461 float x = fabsf(Value);
2462 float omx = 1.0f - x;
2463 if (omx < 0.0f)
2464 {
2465 omx = 0.0f;
2466 }
2467 float root = sqrtf(omx);
2468
2469 // 7-degree minimax approximation
2470 float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f;
2471 result *= root;
2472
2473 // acos(x) = pi - acos(-x) when x < 0
2474 return (nonnegative ? result : XM_PI - result);
2475}
2476
2477//------------------------------------------------------------------------------
2478
2479inline float XMScalarACosEst
2480(
2481 float Value
2482)
2483{
2484 // Clamp input to [-1,1].
2485 bool nonnegative = (Value >= 0.0f);
2486 float x = fabsf(Value);
2487 float omx = 1.0f - x;
2488 if (omx < 0.0f)
2489 {
2490 omx = 0.0f;
2491 }
2492 float root = sqrtf(omx);
2493
2494 // 3-degree minimax approximation
2495 float result = ( ( -0.0187293f * x + 0.0742610f ) * x - 0.2121144f ) * x + 1.5707288f;
2496 result *= root;
2497
2498 // acos(x) = pi - acos(-x) when x < 0
2499 return (nonnegative ? result : XM_PI - result);
2500}
2501