the game where you go into mines and start crafting! but for consoles (forked directly from smartcmd's github)
at main 2501 lines 75 kB view raw
1//------------------------------------------------------------------------------------- 2// DirectXMathMisc.inl -- SIMD C++ Math library 3// 4// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF 5// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO 6// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A 7// PARTICULAR PURPOSE. 8// 9// Copyright (c) Microsoft Corporation. All rights reserved. 10//------------------------------------------------------------------------------------- 11 12#ifdef _MSC_VER 13#pragma once 14#endif 15 16/**************************************************************************** 17 * 18 * Quaternion 19 * 20 ****************************************************************************/ 21 22//------------------------------------------------------------------------------ 23// Comparison operations 24//------------------------------------------------------------------------------ 25 26//------------------------------------------------------------------------------ 27 28inline bool XMQuaternionEqual 29( 30 FXMVECTOR Q1, 31 FXMVECTOR Q2 32) 33{ 34 return XMVector4Equal(Q1, Q2); 35} 36 37//------------------------------------------------------------------------------ 38 39inline bool XMQuaternionNotEqual 40( 41 FXMVECTOR Q1, 42 FXMVECTOR Q2 43) 44{ 45 return XMVector4NotEqual(Q1, Q2); 46} 47 48//------------------------------------------------------------------------------ 49 50inline bool XMQuaternionIsNaN 51( 52 FXMVECTOR Q 53) 54{ 55 return XMVector4IsNaN(Q); 56} 57 58//------------------------------------------------------------------------------ 59 60inline bool XMQuaternionIsInfinite 61( 62 FXMVECTOR Q 63) 64{ 65 return XMVector4IsInfinite(Q); 66} 67 68//------------------------------------------------------------------------------ 69 70inline bool XMQuaternionIsIdentity 71( 72 FXMVECTOR Q 73) 74{ 75#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 76 return XMVector4Equal(Q, g_XMIdentityR3.v); 77#else // _XM_VMX128_INTRINSICS_ 78#endif // _XM_VMX128_INTRINSICS_ 79} 80 81//------------------------------------------------------------------------------ 82// Computation operations 83//------------------------------------------------------------------------------ 84 85//------------------------------------------------------------------------------ 86 87inline XMVECTOR XMQuaternionDot 88( 89 FXMVECTOR Q1, 90 FXMVECTOR Q2 91) 92{ 93 return XMVector4Dot(Q1, Q2); 94} 95 96//------------------------------------------------------------------------------ 97 98inline XMVECTOR XMQuaternionMultiply 99( 100 FXMVECTOR Q1, 101 FXMVECTOR Q2 102) 103{ 104 // Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 followed by the rotation Q2) 105 106 // [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y), 107 // (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x), 108 // (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w), 109 // (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ] 110 111#if defined(_XM_NO_INTRINSICS_) 112 XMVECTOR Result = { 113 (Q2.vector4_f32[3] * Q1.vector4_f32[0]) + (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - (Q2.vector4_f32[2] * Q1.vector4_f32[1]), 114 (Q2.vector4_f32[3] * Q1.vector4_f32[1]) - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + (Q2.vector4_f32[2] * Q1.vector4_f32[0]), 115 (Q2.vector4_f32[3] * Q1.vector4_f32[2]) + (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + (Q2.vector4_f32[2] * Q1.vector4_f32[3]), 116 (Q2.vector4_f32[3] * Q1.vector4_f32[3]) - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - (Q2.vector4_f32[2] * Q1.vector4_f32[2]) }; 117 return Result; 118#elif defined(_XM_ARM_NEON_INTRINSICS_) 119 static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f}; 120 static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f}; 121 static const XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f}; 122 123 __n64 Q2L = vget_low_f32(Q2); 124 __n64 Q2H = vget_high_f32(Q2); 125 126 __n128 Q2X = vdupq_lane_f32( Q2L, 0 ); 127 __n128 Q2Y = vdupq_lane_f32( Q2L, 1 ); 128 __n128 Q2Z = vdupq_lane_f32( Q2H, 0 ); 129 __n128 vResult = vdupq_lane_f32( Q2H, 1 ); 130 vResult = vmulq_f32(vResult,Q1); 131 132 // Mul by Q1WZYX 133 __n128 vTemp = vrev64q_u32(Q1); 134 vTemp = vcombine_f32( vget_high_f32(vTemp), vget_low_f32(vTemp) ); 135 Q2X = vmulq_f32(Q2X,vTemp); 136 vResult = vmlaq_f32( vResult, Q2X, ControlWZYX ); 137 138 // Mul by Q1ZWXY 139 vTemp = vrev64q_u32(vTemp); 140 Q2Y = vmulq_f32(Q2Y,vTemp); 141 vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY); 142 143 // Mul by Q1YXWZ 144 vTemp = vrev64q_u32(vTemp); 145 vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp)); 146 Q2Z = vmulq_f32(Q2Z,vTemp); 147 vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ); 148 return vResult; 149#elif defined(_XM_SSE_INTRINSICS_) 150 static const XMVECTORF32 ControlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f}; 151 static const XMVECTORF32 ControlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f}; 152 static const XMVECTORF32 ControlYXWZ = {-1.0f, 1.0f, 1.0f,-1.0f}; 153 // Copy to SSE registers and use as few as possible for x86 154 XMVECTOR Q2X = Q2; 155 XMVECTOR Q2Y = Q2; 156 XMVECTOR Q2Z = Q2; 157 XMVECTOR vResult = Q2; 158 // Splat with one instruction 159 vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,3,3,3)); 160 Q2X = XM_PERMUTE_PS(Q2X,_MM_SHUFFLE(0,0,0,0)); 161 Q2Y = XM_PERMUTE_PS(Q2Y,_MM_SHUFFLE(1,1,1,1)); 162 Q2Z = XM_PERMUTE_PS(Q2Z,_MM_SHUFFLE(2,2,2,2)); 163 // Retire Q1 and perform Q1*Q2W 164 vResult = _mm_mul_ps(vResult,Q1); 165 XMVECTOR Q1Shuffle = Q1; 166 // Shuffle the copies of Q1 167 Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3)); 168 // Mul by Q1WZYX 169 Q2X = _mm_mul_ps(Q2X,Q1Shuffle); 170 Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(2,3,0,1)); 171 // Flip the signs on y and z 172 Q2X = _mm_mul_ps(Q2X,ControlWZYX); 173 // Mul by Q1ZWXY 174 Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle); 175 Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3)); 176 // Flip the signs on z and w 177 Q2Y = _mm_mul_ps(Q2Y,ControlZWXY); 178 // Mul by Q1YXWZ 179 Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle); 180 vResult = _mm_add_ps(vResult,Q2X); 181 // Flip the signs on x and w 182 Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ); 183 Q2Y = _mm_add_ps(Q2Y,Q2Z); 184 vResult = _mm_add_ps(vResult,Q2Y); 185 return vResult; 186#else // _XM_VMX128_INTRINSICS_ 187#endif // _XM_VMX128_INTRINSICS_ 188} 189 190//------------------------------------------------------------------------------ 191 192inline XMVECTOR XMQuaternionLengthSq 193( 194 FXMVECTOR Q 195) 196{ 197 return XMVector4LengthSq(Q); 198} 199 200//------------------------------------------------------------------------------ 201 202inline XMVECTOR XMQuaternionReciprocalLength 203( 204 FXMVECTOR Q 205) 206{ 207 return XMVector4ReciprocalLength(Q); 208} 209 210//------------------------------------------------------------------------------ 211 212inline XMVECTOR XMQuaternionLength 213( 214 FXMVECTOR Q 215) 216{ 217 return XMVector4Length(Q); 218} 219 220//------------------------------------------------------------------------------ 221 222inline XMVECTOR XMQuaternionNormalizeEst 223( 224 FXMVECTOR Q 225) 226{ 227 return XMVector4NormalizeEst(Q); 228} 229 230//------------------------------------------------------------------------------ 231 232inline XMVECTOR XMQuaternionNormalize 233( 234 FXMVECTOR Q 235) 236{ 237 return XMVector4Normalize(Q); 238} 239 240//------------------------------------------------------------------------------ 241 242inline XMVECTOR XMQuaternionConjugate 243( 244 FXMVECTOR Q 245) 246{ 247#if defined(_XM_NO_INTRINSICS_) 248 XMVECTOR Result = { 249 -Q.vector4_f32[0], 250 -Q.vector4_f32[1], 251 -Q.vector4_f32[2], 252 Q.vector4_f32[3] 253 }; 254 return Result; 255#elif defined(_XM_ARM_NEON_INTRINSICS_) 256 static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f}; 257 return vmulq_f32(Q, NegativeOne3.v ); 258#elif defined(_XM_SSE_INTRINSICS_) 259 static const XMVECTORF32 NegativeOne3 = {-1.0f,-1.0f,-1.0f,1.0f}; 260 return _mm_mul_ps(Q,NegativeOne3); 261#else // _XM_VMX128_INTRINSICS_ 262#endif // _XM_VMX128_INTRINSICS_ 263} 264 265//------------------------------------------------------------------------------ 266 267inline XMVECTOR XMQuaternionInverse 268( 269 FXMVECTOR Q 270) 271{ 272#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 273 274 const XMVECTOR Zero = XMVectorZero(); 275 276 XMVECTOR L = XMVector4LengthSq(Q); 277 XMVECTOR Conjugate = XMQuaternionConjugate(Q); 278 279 XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v); 280 281 XMVECTOR Result = XMVectorDivide(Conjugate, L); 282 283 Result = XMVectorSelect(Result, Zero, Control); 284 285 return Result; 286 287#else // _XM_VMX128_INTRINSICS_ 288#endif // _XM_VMX128_INTRINSICS_ 289} 290 291//------------------------------------------------------------------------------ 292 293inline XMVECTOR XMQuaternionLn 294( 295 FXMVECTOR Q 296) 297{ 298#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 299 300 static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; 301 302 XMVECTOR QW = XMVectorSplatW(Q); 303 XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v); 304 305 XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v); 306 307 XMVECTOR Theta = XMVectorACos(QW); 308 XMVECTOR SinTheta = XMVectorSin(Theta); 309 310 XMVECTOR S = XMVectorDivide(Theta,SinTheta); 311 312 XMVECTOR Result = XMVectorMultiply(Q0, S); 313 Result = XMVectorSelect(Q0, Result, ControlW); 314 315 return Result; 316 317#else // _XM_VMX128_INTRINSICS_ 318#endif // _XM_VMX128_INTRINSICS_ 319} 320 321//------------------------------------------------------------------------------ 322 323inline XMVECTOR XMQuaternionExp 324( 325 FXMVECTOR Q 326) 327{ 328#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 329 330 XMVECTOR Theta = XMVector3Length(Q); 331 332 XMVECTOR SinTheta, CosTheta; 333 XMVectorSinCos(&SinTheta, &CosTheta, Theta); 334 335 XMVECTOR S = XMVectorDivide(SinTheta, Theta); 336 337 XMVECTOR Result = XMVectorMultiply(Q, S); 338 339 const XMVECTOR Zero = XMVectorZero(); 340 XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v); 341 Result = XMVectorSelect(Result, Q, Control); 342 343 Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v); 344 345 return Result; 346 347#else // _XM_VMX128_INTRINSICS_ 348#endif // _XM_VMX128_INTRINSICS_ 349} 350 351//------------------------------------------------------------------------------ 352 353inline XMVECTOR XMQuaternionSlerp 354( 355 FXMVECTOR Q0, 356 FXMVECTOR Q1, 357 float t 358) 359{ 360 XMVECTOR T = XMVectorReplicate(t); 361 return XMQuaternionSlerpV(Q0, Q1, T); 362} 363 364//------------------------------------------------------------------------------ 365 366inline XMVECTOR XMQuaternionSlerpV 367( 368 FXMVECTOR Q0, 369 FXMVECTOR Q1, 370 FXMVECTOR T 371) 372{ 373 assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T))); 374 375 // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega) 376 377#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 378 379 const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; 380 381 XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); 382 383 const XMVECTOR Zero = XMVectorZero(); 384 XMVECTOR Control = XMVectorLess(CosOmega, Zero); 385 XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control); 386 387 CosOmega = XMVectorMultiply(CosOmega, Sign); 388 389 Control = XMVectorLess(CosOmega, OneMinusEpsilon); 390 391 XMVECTOR SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v); 392 SinOmega = XMVectorSqrt(SinOmega); 393 394 XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); 395 396 XMVECTOR SignMask = XMVectorSplatSignMask(); 397 XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2); 398 SignMask = XMVectorShiftLeft(SignMask, Zero, 3); 399 V01 = XMVectorXorInt(V01, SignMask); 400 V01 = XMVectorAdd(g_XMIdentityR0.v, V01); 401 402 XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega); 403 404 XMVECTOR S0 = XMVectorMultiply(V01, Omega); 405 S0 = XMVectorSin(S0); 406 S0 = XMVectorMultiply(S0, InvSinOmega); 407 408 S0 = XMVectorSelect(V01, S0, Control); 409 410 XMVECTOR S1 = XMVectorSplatY(S0); 411 S0 = XMVectorSplatX(S0); 412 413 S1 = XMVectorMultiply(S1, Sign); 414 415 XMVECTOR Result = XMVectorMultiply(Q0, S0); 416 Result = XMVectorMultiplyAdd(Q1, S1, Result); 417 418 return Result; 419 420#elif defined(_XM_SSE_INTRINSICS_) 421 static const XMVECTORF32 OneMinusEpsilon = {1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f}; 422 static const XMVECTORI32 SignMask2 = {0x80000000,0x00000000,0x00000000,0x00000000}; 423 static const XMVECTORI32 MaskXY = {0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000000}; 424 425 XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); 426 427 const XMVECTOR Zero = XMVectorZero(); 428 XMVECTOR Control = XMVectorLess(CosOmega, Zero); 429 XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control); 430 431 CosOmega = _mm_mul_ps(CosOmega, Sign); 432 433 Control = XMVectorLess(CosOmega, OneMinusEpsilon); 434 435 XMVECTOR SinOmega = _mm_mul_ps(CosOmega,CosOmega); 436 SinOmega = _mm_sub_ps(g_XMOne,SinOmega); 437 SinOmega = _mm_sqrt_ps(SinOmega); 438 439 XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); 440 441 XMVECTOR V01 = XM_PERMUTE_PS(T,_MM_SHUFFLE(2,3,0,1)); 442 V01 = _mm_and_ps(V01,MaskXY); 443 V01 = _mm_xor_ps(V01,SignMask2); 444 V01 = _mm_add_ps(g_XMIdentityR0, V01); 445 446 XMVECTOR S0 = _mm_mul_ps(V01, Omega); 447 S0 = XMVectorSin(S0); 448 S0 = _mm_div_ps(S0, SinOmega); 449 450 S0 = XMVectorSelect(V01, S0, Control); 451 452 XMVECTOR S1 = XMVectorSplatY(S0); 453 S0 = XMVectorSplatX(S0); 454 455 S1 = _mm_mul_ps(S1, Sign); 456 XMVECTOR Result = _mm_mul_ps(Q0, S0); 457 S1 = _mm_mul_ps(S1, Q1); 458 Result = _mm_add_ps(Result,S1); 459 return Result; 460#else // _XM_VMX128_INTRINSICS_ 461#endif // _XM_VMX128_INTRINSICS_ 462} 463 464//------------------------------------------------------------------------------ 465 466inline XMVECTOR XMQuaternionSquad 467( 468 FXMVECTOR Q0, 469 FXMVECTOR Q1, 470 FXMVECTOR Q2, 471 GXMVECTOR Q3, 472 float t 473) 474{ 475 XMVECTOR T = XMVectorReplicate(t); 476 return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T); 477} 478 479//------------------------------------------------------------------------------ 480 481inline XMVECTOR XMQuaternionSquadV 482( 483 FXMVECTOR Q0, 484 FXMVECTOR Q1, 485 FXMVECTOR Q2, 486 GXMVECTOR Q3, 487 CXMVECTOR T 488) 489{ 490 assert( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) ); 491 492 XMVECTOR TP = T; 493 const XMVECTOR Two = XMVectorSplatConstant(2, 0); 494 495 XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T); 496 XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T); 497 498 TP = XMVectorNegativeMultiplySubtract(TP, TP, TP); 499 TP = XMVectorMultiply(TP, Two); 500 501 XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP); 502 503 return Result; 504} 505 506//------------------------------------------------------------------------------ 507_Use_decl_annotations_ 508inline void XMQuaternionSquadSetup 509( 510 XMVECTOR* pA, 511 XMVECTOR* pB, 512 XMVECTOR* pC, 513 FXMVECTOR Q0, 514 FXMVECTOR Q1, 515 FXMVECTOR Q2, 516 GXMVECTOR Q3 517) 518{ 519 assert(pA); 520 assert(pB); 521 assert(pC); 522 523 XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2)); 524 XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2)); 525 XMVECTOR SQ2 = XMVectorNegate(Q2); 526 527 XMVECTOR Control1 = XMVectorLess(LS12, LD12); 528 SQ2 = XMVectorSelect(Q2, SQ2, Control1); 529 530 XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1)); 531 XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1)); 532 XMVECTOR SQ0 = XMVectorNegate(Q0); 533 534 XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3)); 535 XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3)); 536 XMVECTOR SQ3 = XMVectorNegate(Q3); 537 538 XMVECTOR Control0 = XMVectorLess(LS01, LD01); 539 XMVECTOR Control2 = XMVectorLess(LS23, LD23); 540 541 SQ0 = XMVectorSelect(Q0, SQ0, Control0); 542 SQ3 = XMVectorSelect(Q3, SQ3, Control2); 543 544 XMVECTOR InvQ1 = XMQuaternionInverse(Q1); 545 XMVECTOR InvQ2 = XMQuaternionInverse(SQ2); 546 547 XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0)); 548 XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2)); 549 XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1)); 550 XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3)); 551 552 const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2); 553 554 XMVECTOR ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter); 555 XMVECTOR ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter); 556 ExpQ02 = XMQuaternionExp(ExpQ02); 557 ExpQ13 = XMQuaternionExp(ExpQ13); 558 559 *pA = XMQuaternionMultiply(Q1, ExpQ02); 560 *pB = XMQuaternionMultiply(SQ2, ExpQ13); 561 *pC = SQ2; 562} 563 564//------------------------------------------------------------------------------ 565 566inline XMVECTOR XMQuaternionBaryCentric 567( 568 FXMVECTOR Q0, 569 FXMVECTOR Q1, 570 FXMVECTOR Q2, 571 float f, 572 float g 573) 574{ 575 float s = f + g; 576 577 XMVECTOR Result; 578 if ((s < 0.00001f) && (s > -0.00001f)) 579 { 580 Result = Q0; 581 } 582 else 583 { 584 XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s); 585 XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s); 586 587 Result = XMQuaternionSlerp(Q01, Q02, g / s); 588 } 589 590 return Result; 591} 592 593//------------------------------------------------------------------------------ 594 595inline XMVECTOR XMQuaternionBaryCentricV 596( 597 FXMVECTOR Q0, 598 FXMVECTOR Q1, 599 FXMVECTOR Q2, 600 GXMVECTOR F, 601 CXMVECTOR G 602) 603{ 604 assert( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) ); 605 assert( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) ); 606 607 const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16); 608 609 XMVECTOR S = XMVectorAdd(F, G); 610 611 XMVECTOR Result; 612 if (XMVector4InBounds(S, Epsilon)) 613 { 614 Result = Q0; 615 } 616 else 617 { 618 XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S); 619 XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S); 620 XMVECTOR GS = XMVectorReciprocal(S); 621 GS = XMVectorMultiply(G, GS); 622 623 Result = XMQuaternionSlerpV(Q01, Q02, GS); 624 } 625 626 return Result; 627} 628 629//------------------------------------------------------------------------------ 630// Transformation operations 631//------------------------------------------------------------------------------ 632 633//------------------------------------------------------------------------------ 634 635inline XMVECTOR XMQuaternionIdentity() 636{ 637#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 638 return g_XMIdentityR3.v; 639#else // _XM_VMX128_INTRINSICS_ 640#endif // _XM_VMX128_INTRINSICS_ 641} 642 643//------------------------------------------------------------------------------ 644 645inline XMVECTOR XMQuaternionRotationRollPitchYaw 646( 647 float Pitch, 648 float Yaw, 649 float Roll 650) 651{ 652 XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f); 653 XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles); 654 return Q; 655} 656 657//------------------------------------------------------------------------------ 658 659inline XMVECTOR XMQuaternionRotationRollPitchYawFromVector 660( 661 FXMVECTOR Angles // <Pitch, Yaw, Roll, 0> 662) 663{ 664#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 665 666 static const XMVECTORF32 Sign = {1.0f, -1.0f, -1.0f, 1.0f}; 667 668 XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v); 669 670 XMVECTOR SinAngles, CosAngles; 671 XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles); 672 673 XMVECTOR P0 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(SinAngles, CosAngles); 674 XMVECTOR Y0 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(SinAngles, CosAngles); 675 XMVECTOR R0 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(SinAngles, CosAngles); 676 XMVECTOR P1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1X>(CosAngles, SinAngles); 677 XMVECTOR Y1 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(CosAngles, SinAngles); 678 XMVECTOR R1 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(CosAngles, SinAngles); 679 680 XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v); 681 XMVECTOR Q0 = XMVectorMultiply(P0, Y0); 682 Q1 = XMVectorMultiply(Q1, Y1); 683 Q0 = XMVectorMultiply(Q0, R0); 684 XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0); 685 686 return Q; 687 688#else // _XM_VMX128_INTRINSICS_ 689#endif // _XM_VMX128_INTRINSICS_ 690} 691 692//------------------------------------------------------------------------------ 693 694inline XMVECTOR XMQuaternionRotationNormal 695( 696 FXMVECTOR NormalAxis, 697 float Angle 698) 699{ 700#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 701 702 XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v); 703 704 float SinV, CosV; 705 XMScalarSinCos(&SinV, &CosV, 0.5f * Angle); 706 707 XMVECTOR Scale = XMVectorSet( SinV, SinV, SinV, CosV ); 708 return XMVectorMultiply(N, Scale); 709#elif defined(_XM_SSE_INTRINSICS_) 710 XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3); 711 N = _mm_or_ps(N,g_XMIdentityR3); 712 XMVECTOR Scale = _mm_set_ps1(0.5f * Angle); 713 XMVECTOR vSine; 714 XMVECTOR vCosine; 715 XMVectorSinCos(&vSine,&vCosine,Scale); 716 Scale = _mm_and_ps(vSine,g_XMMask3); 717 vCosine = _mm_and_ps(vCosine,g_XMMaskW); 718 Scale = _mm_or_ps(Scale,vCosine); 719 N = _mm_mul_ps(N,Scale); 720 return N; 721#else // _XM_VMX128_INTRINSICS_ 722#endif // _XM_VMX128_INTRINSICS_ 723} 724 725//------------------------------------------------------------------------------ 726 727inline XMVECTOR XMQuaternionRotationAxis 728( 729 FXMVECTOR Axis, 730 float Angle 731) 732{ 733 assert(!XMVector3Equal(Axis, XMVectorZero())); 734 assert(!XMVector3IsInfinite(Axis)); 735 736#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 737 XMVECTOR Normal = XMVector3Normalize(Axis); 738 XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle); 739 return Q; 740#else // _XM_VMX128_INTRINSICS_ 741#endif // _XM_VMX128_INTRINSICS_ 742} 743 744//------------------------------------------------------------------------------ 745 746inline XMVECTOR XMQuaternionRotationMatrix 747( 748 CXMMATRIX M 749) 750{ 751#if defined(_XM_NO_INTRINSICS_) 752 753 XMVECTORF32 q; 754 float r22 = M.m[2][2]; 755 if (r22 <= 0.f) // x^2 + y^2 >= z^2 + w^2 756 { 757 float dif10 = M.m[1][1] - M.m[0][0]; 758 float omr22 = 1.f - r22; 759 if (dif10 <= 0.f) // x^2 >= y^2 760 { 761 float fourXSqr = omr22 - dif10; 762 float inv4x = 0.5f / sqrtf(fourXSqr); 763 q.f[0] = fourXSqr*inv4x; 764 q.f[1] = (M.m[0][1] + M.m[1][0])*inv4x; 765 q.f[2] = (M.m[0][2] + M.m[2][0])*inv4x; 766 q.f[3] = (M.m[1][2] - M.m[2][1])*inv4x; 767 } 768 else // y^2 >= x^2 769 { 770 float fourYSqr = omr22 + dif10; 771 float inv4y = 0.5f / sqrtf(fourYSqr); 772 q.f[0] = (M.m[0][1] + M.m[1][0])*inv4y; 773 q.f[1] = fourYSqr*inv4y; 774 q.f[2] = (M.m[1][2] + M.m[2][1])*inv4y; 775 q.f[3] = (M.m[2][0] - M.m[0][2])*inv4y; 776 } 777 } 778 else // z^2 + w^2 >= x^2 + y^2 779 { 780 float sum10 = M.m[1][1] + M.m[0][0]; 781 float opr22 = 1.f + r22; 782 if (sum10 <= 0.f) // z^2 >= w^2 783 { 784 float fourZSqr = opr22 - sum10; 785 float inv4z = 0.5f / sqrtf(fourZSqr); 786 q.f[0] = (M.m[0][2] + M.m[2][0])*inv4z; 787 q.f[1] = (M.m[1][2] + M.m[2][1])*inv4z; 788 q.f[2] = fourZSqr*inv4z; 789 q.f[3] = (M.m[0][1] - M.m[1][0])*inv4z; 790 } 791 else // w^2 >= z^2 792 { 793 float fourWSqr = opr22 + sum10; 794 float inv4w = 0.5f / sqrtf(fourWSqr); 795 q.f[0] = (M.m[1][2] - M.m[2][1])*inv4w; 796 q.f[1] = (M.m[2][0] - M.m[0][2])*inv4w; 797 q.f[2] = (M.m[0][1] - M.m[1][0])*inv4w; 798 q.f[3] = fourWSqr*inv4w; 799 } 800 } 801 return q.v; 802 803#elif defined(_XM_ARM_NEON_INTRINSICS_) 804 static const XMVECTORF32 XMPMMP = {+1.0f, -1.0f, -1.0f, +1.0f}; 805 static const XMVECTORF32 XMMPMP = {-1.0f, +1.0f, -1.0f, +1.0f}; 806 static const XMVECTORF32 XMMMPP = {-1.0f, -1.0f, +1.0f, +1.0f}; 807 static const XMVECTORU32 Select0110 = { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 }; 808 static const XMVECTORU32 Select0010 = { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 }; 809 810 XMVECTOR r0 = M.r[0]; 811 XMVECTOR r1 = M.r[1]; 812 XMVECTOR r2 = M.r[2]; 813 814 XMVECTOR r00 = vdupq_lane_f32(vget_low_f32(r0), 0); 815 XMVECTOR r11 = vdupq_lane_f32(vget_low_f32(r1), 1); 816 XMVECTOR r22 = vdupq_lane_f32(vget_high_f32(r2), 0); 817 818 // x^2 >= y^2 equivalent to r11 - r00 <= 0 819 XMVECTOR r11mr00 = vsubq_f32(r11, r00); 820 XMVECTOR x2gey2 = vcleq_f32(r11mr00, g_XMZero); 821 822 // z^2 >= w^2 equivalent to r11 + r00 <= 0 823 XMVECTOR r11pr00 = vaddq_f32(r11, r00); 824 XMVECTOR z2gew2 = vcleq_f32(r11pr00, g_XMZero); 825 826 // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 827 XMVECTOR x2py2gez2pw2 = vcleq_f32(r22, g_XMZero); 828 829 // (4*x^2, 4*y^2, 4*z^2, 4*w^2) 830 XMVECTOR t0 = vmulq_f32( XMPMMP, r00 ); 831 XMVECTOR x2y2z2w2 = vmlaq_f32( t0, XMMPMP, r11 ); 832 x2y2z2w2 = vmlaq_f32( x2y2z2w2, XMMMPP, r22 ); 833 x2y2z2w2 = vaddq_f32( x2y2z2w2, g_XMOne ); 834 835 // (r01, r02, r12, r11) 836 t0 = vextq_f32(r0, r0, 1); 837 XMVECTOR t1 = vextq_f32(r1, r1, 1); 838 t0 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_low_f32( t1 ) ) ); 839 840 // (r10, r20, r21, r10) 841 t1 = vextq_f32(r2, r2, 3); 842 XMVECTOR r10 = vdupq_lane_f32( vget_low_f32(r1), 0 ); 843 t1 = vbslq_f32( Select0110, t1, r10 ); 844 845 // (4*x*y, 4*x*z, 4*y*z, unused) 846 XMVECTOR xyxzyz = vaddq_f32(t0, t1); 847 848 // (r21, r20, r10, r10) 849 t0 = vcombine_f32( vrev64_f32( vget_low_f32(r2) ), vget_low_f32(r10) ); 850 851 // (r12, r02, r01, r12) 852 XMVECTOR t2 = vcombine_f32( vrev64_f32( vget_high_f32(r0) ), vrev64_f32( vget_low_f32(r0) ) ); 853 XMVECTOR t3 = vdupq_lane_f32( vget_high_f32(r1), 0 ); 854 t1 = vbslq_f32( Select0110, t2, t3 ); 855 856 // (4*x*w, 4*y*w, 4*z*w, unused) 857 XMVECTOR xwywzw = vsubq_f32(t0, t1); 858 xwywzw = vmulq_f32(XMMPMP, xwywzw); 859 860 // (4*x*x, 4*x*y, 4*x*z, 4*x*w) 861 t0 = vextq_f32( xyxzyz, xyxzyz, 3 ); 862 t1 = vbslq_f32( Select0110, t0, x2y2z2w2 ); 863 t2 = vdupq_lane_f32( vget_low_f32(xwywzw), 0 ); 864 XMVECTOR tensor0 = vbslq_f32( g_XMSelect1110, t1, t2 ); 865 866 // (4*y*x, 4*y*y, 4*y*z, 4*y*w) 867 t0 = vbslq_f32( g_XMSelect1011, xyxzyz, x2y2z2w2 ); 868 t1 = vdupq_lane_f32( vget_low_f32(xwywzw), 1 ); 869 XMVECTOR tensor1 = vbslq_f32( g_XMSelect1110, t0, t1 ); 870 871 // (4*z*x, 4*z*y, 4*z*z, 4*z*w) 872 t0 = vextq_f32(xyxzyz, xyxzyz, 1); 873 t1 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_high_f32(xwywzw) ) ); 874 XMVECTOR tensor2 = vbslq_f32( Select0010, x2y2z2w2, t1 ); 875 876 // (4*w*x, 4*w*y, 4*w*z, 4*w*w) 877 XMVECTOR tensor3 = vbslq_f32( g_XMSelect1110, xwywzw, x2y2z2w2 ); 878 879 // Select the row of the tensor-product matrix that has the largest 880 // magnitude. 881 t0 = vbslq_f32( x2gey2, tensor0, tensor1 ); 882 t1 = vbslq_f32( z2gew2, tensor2, tensor3 ); 883 t2 = vbslq_f32( x2py2gez2pw2, t0, t1 ); 884 885 // Normalize the row. No division by zero is possible because the 886 // quaternion is unit-length (and the row is a nonzero multiple of 887 // the quaternion). 888 t0 = XMVector4Length(t2); 889 return XMVectorDivide(t2, t0); 890#elif defined(_XM_SSE_INTRINSICS_) 891 static const XMVECTORF32 XMPMMP = {+1.0f, -1.0f, -1.0f, +1.0f}; 892 static const XMVECTORF32 XMMPMP = {-1.0f, +1.0f, -1.0f, +1.0f}; 893 static const XMVECTORF32 XMMMPP = {-1.0f, -1.0f, +1.0f, +1.0f}; 894 895 XMVECTOR r0 = M.r[0]; // (r00, r01, r02, 0) 896 XMVECTOR r1 = M.r[1]; // (r10, r11, r12, 0) 897 XMVECTOR r2 = M.r[2]; // (r20, r21, r22, 0) 898 899 // (r00, r00, r00, r00) 900 XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0,0,0,0)); 901 // (r11, r11, r11, r11) 902 XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1,1,1,1)); 903 // (r22, r22, r22, r22) 904 XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2,2,2,2)); 905 906 // x^2 >= y^2 equivalent to r11 - r00 <= 0 907 // (r11 - r00, r11 - r00, r11 - r00, r11 - r00) 908 XMVECTOR r11mr00 = _mm_sub_ps(r11, r00); 909 XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero); 910 911 // z^2 >= w^2 equivalent to r11 + r00 <= 0 912 // (r11 + r00, r11 + r00, r11 + r00, r11 + r00) 913 XMVECTOR r11pr00 = _mm_add_ps(r11, r00); 914 XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero); 915 916 // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 917 XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero); 918 919 // (+r00, -r00, -r00, +r00) 920 XMVECTOR t0 = _mm_mul_ps(XMPMMP, r00); 921 922 // (-r11, +r11, -r11, +r11) 923 XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11); 924 925 // (-r22, -r22, +r22, +r22) 926 XMVECTOR t2 = _mm_mul_ps(XMMMPP, r22); 927 928 // (4*x^2, 4*y^2, 4*z^2, 4*w^2) 929 XMVECTOR x2y2z2w2 = _mm_add_ps(t0, t1); 930 x2y2z2w2 = _mm_add_ps(t2, x2y2z2w2); 931 x2y2z2w2 = _mm_add_ps(x2y2z2w2, g_XMOne); 932 933 // (r01, r02, r12, r11) 934 t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1,2,2,1)); 935 // (r10, r10, r20, r21) 936 t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1,0,0,0)); 937 // (r10, r20, r21, r10) 938 t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0)); 939 // (4*x*y, 4*x*z, 4*y*z, unused) 940 XMVECTOR xyxzyz = _mm_add_ps(t0, t1); 941 942 // (r21, r20, r10, r10) 943 t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0,0,0,1)); 944 // (r12, r12, r02, r01) 945 t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1,2,2,2)); 946 // (r12, r02, r01, r12) 947 t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0)); 948 // (4*x*w, 4*y*w, 4*z*w, unused) 949 XMVECTOR xwywzw = _mm_sub_ps(t0, t1); 950 xwywzw = _mm_mul_ps(XMMPMP, xwywzw); 951 952 // (4*x^2, 4*y^2, 4*x*y, unused) 953 t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0,0,1,0)); 954 // (4*z^2, 4*w^2, 4*z*w, unused) 955 t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0,2,3,2)); 956 // (4*x*z, 4*y*z, 4*x*w, 4*y*w) 957 t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1,0,2,1)); 958 959 // (4*x*x, 4*x*y, 4*x*z, 4*x*w) 960 XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,0,2,0)); 961 // (4*y*x, 4*y*y, 4*y*z, 4*y*w) 962 XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,1,1,2)); 963 // (4*z*x, 4*z*y, 4*z*z, 4*z*w) 964 XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2,0,1,0)); 965 // (4*w*x, 4*w*y, 4*w*z, 4*w*w) 966 XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1,2,3,2)); 967 968 // Select the row of the tensor-product matrix that has the largest 969 // magnitude. 970 t0 = _mm_and_ps(x2gey2, tensor0); 971 t1 = _mm_andnot_ps(x2gey2, tensor1); 972 t0 = _mm_or_ps(t0, t1); 973 t1 = _mm_and_ps(z2gew2, tensor2); 974 t2 = _mm_andnot_ps(z2gew2, tensor3); 975 t1 = _mm_or_ps(t1, t2); 976 t0 = _mm_and_ps(x2py2gez2pw2, t0); 977 t1 = _mm_andnot_ps(x2py2gez2pw2, t1); 978 t2 = _mm_or_ps(t0, t1); 979 980 // Normalize the row. No division by zero is possible because the 981 // quaternion is unit-length (and the row is a nonzero multiple of 982 // the quaternion). 983 t0 = XMVector4Length(t2); 984 return _mm_div_ps(t2, t0); 985#else // _XM_VMX128_INTRINSICS_ 986#endif // _XM_VMX128_INTRINSICS_ 987} 988 989//------------------------------------------------------------------------------ 990// Conversion operations 991//------------------------------------------------------------------------------ 992 993//------------------------------------------------------------------------------ 994_Use_decl_annotations_ 995inline void XMQuaternionToAxisAngle 996( 997 XMVECTOR* pAxis, 998 float* pAngle, 999 FXMVECTOR Q 1000) 1001{ 1002 assert(pAxis); 1003 assert(pAngle); 1004 1005 *pAxis = Q; 1006 1007 *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q)); 1008} 1009 1010/**************************************************************************** 1011 * 1012 * Plane 1013 * 1014 ****************************************************************************/ 1015 1016//------------------------------------------------------------------------------ 1017// Comparison operations 1018//------------------------------------------------------------------------------ 1019 1020//------------------------------------------------------------------------------ 1021 1022inline bool XMPlaneEqual 1023( 1024 FXMVECTOR P1, 1025 FXMVECTOR P2 1026) 1027{ 1028 return XMVector4Equal(P1, P2); 1029} 1030 1031//------------------------------------------------------------------------------ 1032 1033inline bool XMPlaneNearEqual 1034( 1035 FXMVECTOR P1, 1036 FXMVECTOR P2, 1037 FXMVECTOR Epsilon 1038) 1039{ 1040 XMVECTOR NP1 = XMPlaneNormalize(P1); 1041 XMVECTOR NP2 = XMPlaneNormalize(P2); 1042 return XMVector4NearEqual(NP1, NP2, Epsilon); 1043} 1044 1045//------------------------------------------------------------------------------ 1046 1047inline bool XMPlaneNotEqual 1048( 1049 FXMVECTOR P1, 1050 FXMVECTOR P2 1051) 1052{ 1053 return XMVector4NotEqual(P1, P2); 1054} 1055 1056//------------------------------------------------------------------------------ 1057 1058inline bool XMPlaneIsNaN 1059( 1060 FXMVECTOR P 1061) 1062{ 1063 return XMVector4IsNaN(P); 1064} 1065 1066//------------------------------------------------------------------------------ 1067 1068inline bool XMPlaneIsInfinite 1069( 1070 FXMVECTOR P 1071) 1072{ 1073 return XMVector4IsInfinite(P); 1074} 1075 1076//------------------------------------------------------------------------------ 1077// Computation operations 1078//------------------------------------------------------------------------------ 1079 1080//------------------------------------------------------------------------------ 1081 1082inline XMVECTOR XMPlaneDot 1083( 1084 FXMVECTOR P, 1085 FXMVECTOR V 1086) 1087{ 1088 return XMVector4Dot(P, V); 1089} 1090 1091//------------------------------------------------------------------------------ 1092 1093inline XMVECTOR XMPlaneDotCoord 1094( 1095 FXMVECTOR P, 1096 FXMVECTOR V 1097) 1098{ 1099 // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3] 1100 1101#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1102 1103 XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v); 1104 XMVECTOR Result = XMVector4Dot(P, V3); 1105 return Result; 1106 1107#else // _XM_VMX128_INTRINSICS_ 1108#endif // _XM_VMX128_INTRINSICS_ 1109} 1110 1111//------------------------------------------------------------------------------ 1112 1113inline XMVECTOR XMPlaneDotNormal 1114( 1115 FXMVECTOR P, 1116 FXMVECTOR V 1117) 1118{ 1119 return XMVector3Dot(P, V); 1120} 1121 1122//------------------------------------------------------------------------------ 1123// XMPlaneNormalizeEst uses a reciprocal estimate and 1124// returns QNaN on zero and infinite vectors. 1125 1126inline XMVECTOR XMPlaneNormalizeEst 1127( 1128 FXMVECTOR P 1129) 1130{ 1131#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1132 1133 XMVECTOR Result = XMVector3ReciprocalLengthEst(P); 1134 return XMVectorMultiply(P, Result); 1135 1136#elif defined(_XM_SSE_INTRINSICS_) 1137 // Perform the dot product 1138 XMVECTOR vDot = _mm_mul_ps(P,P); 1139 // x=Dot.y, y=Dot.z 1140 XMVECTOR vTemp = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(2,1,2,1)); 1141 // Result.x = x+y 1142 vDot = _mm_add_ss(vDot,vTemp); 1143 // x=Dot.z 1144 vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1)); 1145 // Result.x = (x+y)+z 1146 vDot = _mm_add_ss(vDot,vTemp); 1147 // Splat x 1148 vDot = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(0,0,0,0)); 1149 // Get the reciprocal 1150 vDot = _mm_rsqrt_ps(vDot); 1151 // Get the reciprocal 1152 vDot = _mm_mul_ps(vDot,P); 1153 return vDot; 1154#else // _XM_VMX128_INTRINSICS_ 1155#endif // _XM_VMX128_INTRINSICS_ 1156} 1157 1158//------------------------------------------------------------------------------ 1159 1160inline XMVECTOR XMPlaneNormalize 1161( 1162 FXMVECTOR P 1163) 1164{ 1165#if defined(_XM_NO_INTRINSICS_) 1166 float fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2])); 1167 // Prevent divide by zero 1168 if (fLengthSq) { 1169 fLengthSq = 1.0f/fLengthSq; 1170 } 1171 { 1172 XMVECTOR vResult = { 1173 P.vector4_f32[0]*fLengthSq, 1174 P.vector4_f32[1]*fLengthSq, 1175 P.vector4_f32[2]*fLengthSq, 1176 P.vector4_f32[3]*fLengthSq 1177 }; 1178 return vResult; 1179 } 1180#elif defined(_XM_ARM_NEON_INTRINSICS_) 1181 XMVECTOR vLength = XMVector3ReciprocalLength(P); 1182 return XMVectorMultiply( P, vLength ); 1183#elif defined(_XM_SSE_INTRINSICS_) 1184 // Perform the dot product on x,y and z only 1185 XMVECTOR vLengthSq = _mm_mul_ps(P,P); 1186 XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,1,2,1)); 1187 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 1188 vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1)); 1189 vLengthSq = _mm_add_ss(vLengthSq,vTemp); 1190 vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0)); 1191 // Prepare for the division 1192 XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); 1193 // Failsafe on zero (Or epsilon) length planes 1194 // If the length is infinity, set the elements to zero 1195 vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); 1196 // Reciprocal mul to perform the normalization 1197 vResult = _mm_div_ps(P,vResult); 1198 // Any that are infinity, set to zero 1199 vResult = _mm_and_ps(vResult,vLengthSq); 1200 return vResult; 1201#else // _XM_VMX128_INTRINSICS_ 1202#endif // _XM_VMX128_INTRINSICS_ 1203} 1204 1205//------------------------------------------------------------------------------ 1206 1207inline XMVECTOR XMPlaneIntersectLine 1208( 1209 FXMVECTOR P, 1210 FXMVECTOR LinePoint1, 1211 FXMVECTOR LinePoint2 1212) 1213{ 1214#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1215 1216 XMVECTOR V1 = XMVector3Dot(P, LinePoint1); 1217 XMVECTOR V2 = XMVector3Dot(P, LinePoint2); 1218 XMVECTOR D = XMVectorSubtract(V1, V2); 1219 1220 XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1); 1221 VT = XMVectorDivide(VT, D); 1222 1223 XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1); 1224 Point = XMVectorMultiplyAdd(Point, VT, LinePoint1); 1225 1226 const XMVECTOR Zero = XMVectorZero(); 1227 XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v); 1228 1229 return XMVectorSelect(Point, g_XMQNaN.v, Control); 1230 1231#else // _XM_VMX128_INTRINSICS_ 1232#endif // _XM_VMX128_INTRINSICS_ 1233} 1234 1235//------------------------------------------------------------------------------ 1236_Use_decl_annotations_ 1237inline void XMPlaneIntersectPlane 1238( 1239 XMVECTOR* pLinePoint1, 1240 XMVECTOR* pLinePoint2, 1241 FXMVECTOR P1, 1242 FXMVECTOR P2 1243) 1244{ 1245 assert(pLinePoint1); 1246 assert(pLinePoint2); 1247#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1248 1249 XMVECTOR V1 = XMVector3Cross(P2, P1); 1250 1251 XMVECTOR LengthSq = XMVector3LengthSq(V1); 1252 1253 XMVECTOR V2 = XMVector3Cross(P2, V1); 1254 1255 XMVECTOR P1W = XMVectorSplatW(P1); 1256 XMVECTOR Point = XMVectorMultiply(V2, P1W); 1257 1258 XMVECTOR V3 = XMVector3Cross(V1, P1); 1259 1260 XMVECTOR P2W = XMVectorSplatW(P2); 1261 Point = XMVectorMultiplyAdd(V3, P2W, Point); 1262 1263 XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq); 1264 1265 XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1); 1266 1267 XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v); 1268 *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control); 1269 *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control); 1270 1271#else // _XM_VMX128_INTRINSICS_ 1272#endif // _XM_VMX128_INTRINSICS_ 1273} 1274 1275//------------------------------------------------------------------------------ 1276 1277inline XMVECTOR XMPlaneTransform 1278( 1279 FXMVECTOR P, 1280 CXMMATRIX M 1281) 1282{ 1283#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1284 1285 XMVECTOR W = XMVectorSplatW(P); 1286 XMVECTOR Z = XMVectorSplatZ(P); 1287 XMVECTOR Y = XMVectorSplatY(P); 1288 XMVECTOR X = XMVectorSplatX(P); 1289 1290 XMVECTOR Result = XMVectorMultiply(W, M.r[3]); 1291 Result = XMVectorMultiplyAdd(Z, M.r[2], Result); 1292 Result = XMVectorMultiplyAdd(Y, M.r[1], Result); 1293 Result = XMVectorMultiplyAdd(X, M.r[0], Result); 1294 return Result; 1295 1296#else // _XM_VMX128_INTRINSICS_ 1297#endif // _XM_VMX128_INTRINSICS_ 1298} 1299 1300//------------------------------------------------------------------------------ 1301_Use_decl_annotations_ 1302inline XMFLOAT4* XMPlaneTransformStream 1303( 1304 XMFLOAT4* pOutputStream, 1305 size_t OutputStride, 1306 const XMFLOAT4* pInputStream, 1307 size_t InputStride, 1308 size_t PlaneCount, 1309 CXMMATRIX M 1310) 1311{ 1312 return XMVector4TransformStream(pOutputStream, 1313 OutputStride, 1314 pInputStream, 1315 InputStride, 1316 PlaneCount, 1317 M); 1318} 1319 1320//------------------------------------------------------------------------------ 1321// Conversion operations 1322//------------------------------------------------------------------------------ 1323 1324//------------------------------------------------------------------------------ 1325 1326inline XMVECTOR XMPlaneFromPointNormal 1327( 1328 FXMVECTOR Point, 1329 FXMVECTOR Normal 1330) 1331{ 1332 XMVECTOR W = XMVector3Dot(Point, Normal); 1333 W = XMVectorNegate(W); 1334 return XMVectorSelect(W, Normal, g_XMSelect1110.v); 1335} 1336 1337//------------------------------------------------------------------------------ 1338 1339inline XMVECTOR XMPlaneFromPoints 1340( 1341 FXMVECTOR Point1, 1342 FXMVECTOR Point2, 1343 FXMVECTOR Point3 1344) 1345{ 1346#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1347 1348 XMVECTOR V21 = XMVectorSubtract(Point1, Point2); 1349 XMVECTOR V31 = XMVectorSubtract(Point1, Point3); 1350 1351 XMVECTOR N = XMVector3Cross(V21, V31); 1352 N = XMVector3Normalize(N); 1353 1354 XMVECTOR D = XMPlaneDotNormal(N, Point1); 1355 D = XMVectorNegate(D); 1356 1357 XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v); 1358 1359 return Result; 1360 1361#else // _XM_VMX128_INTRINSICS_ 1362#endif // _XM_VMX128_INTRINSICS_ 1363} 1364 1365/**************************************************************************** 1366 * 1367 * Color 1368 * 1369 ****************************************************************************/ 1370 1371//------------------------------------------------------------------------------ 1372// Comparison operations 1373//------------------------------------------------------------------------------ 1374 1375//------------------------------------------------------------------------------ 1376 1377inline bool XMColorEqual 1378( 1379 FXMVECTOR C1, 1380 FXMVECTOR C2 1381) 1382{ 1383 return XMVector4Equal(C1, C2); 1384} 1385 1386//------------------------------------------------------------------------------ 1387 1388inline bool XMColorNotEqual 1389( 1390 FXMVECTOR C1, 1391 FXMVECTOR C2 1392) 1393{ 1394 return XMVector4NotEqual(C1, C2); 1395} 1396 1397//------------------------------------------------------------------------------ 1398 1399inline bool XMColorGreater 1400( 1401 FXMVECTOR C1, 1402 FXMVECTOR C2 1403) 1404{ 1405 return XMVector4Greater(C1, C2); 1406} 1407 1408//------------------------------------------------------------------------------ 1409 1410inline bool XMColorGreaterOrEqual 1411( 1412 FXMVECTOR C1, 1413 FXMVECTOR C2 1414) 1415{ 1416 return XMVector4GreaterOrEqual(C1, C2); 1417} 1418 1419//------------------------------------------------------------------------------ 1420 1421inline bool XMColorLess 1422( 1423 FXMVECTOR C1, 1424 FXMVECTOR C2 1425) 1426{ 1427 return XMVector4Less(C1, C2); 1428} 1429 1430//------------------------------------------------------------------------------ 1431 1432inline bool XMColorLessOrEqual 1433( 1434 FXMVECTOR C1, 1435 FXMVECTOR C2 1436) 1437{ 1438 return XMVector4LessOrEqual(C1, C2); 1439} 1440 1441//------------------------------------------------------------------------------ 1442 1443inline bool XMColorIsNaN 1444( 1445 FXMVECTOR C 1446) 1447{ 1448 return XMVector4IsNaN(C); 1449} 1450 1451//------------------------------------------------------------------------------ 1452 1453inline bool XMColorIsInfinite 1454( 1455 FXMVECTOR C 1456) 1457{ 1458 return XMVector4IsInfinite(C); 1459} 1460 1461//------------------------------------------------------------------------------ 1462// Computation operations 1463//------------------------------------------------------------------------------ 1464 1465//------------------------------------------------------------------------------ 1466 1467inline XMVECTOR XMColorNegative 1468( 1469 FXMVECTOR vColor 1470) 1471{ 1472#if defined(_XM_NO_INTRINSICS_) 1473 XMVECTORF32 vResult = { 1474 1.0f - vColor.vector4_f32[0], 1475 1.0f - vColor.vector4_f32[1], 1476 1.0f - vColor.vector4_f32[2], 1477 vColor.vector4_f32[3] 1478 }; 1479 return vResult.v; 1480#elif defined(_XM_ARM_NEON_INTRINSICS_) 1481 XMVECTOR vTemp = veorq_u32(vColor,g_XMNegate3); 1482 return vaddq_f32(vTemp,g_XMOne3); 1483#elif defined(_XM_SSE_INTRINSICS_) 1484 // Negate only x,y and z. 1485 XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3); 1486 // Add 1,1,1,0 to -x,-y,-z,w 1487 return _mm_add_ps(vTemp,g_XMOne3); 1488#else // _XM_VMX128_INTRINSICS_ 1489#endif // _XM_VMX128_INTRINSICS_ 1490} 1491 1492//------------------------------------------------------------------------------ 1493 1494inline XMVECTOR XMColorModulate 1495( 1496 FXMVECTOR C1, 1497 FXMVECTOR C2 1498) 1499{ 1500 return XMVectorMultiply(C1, C2); 1501} 1502 1503//------------------------------------------------------------------------------ 1504 1505inline XMVECTOR XMColorAdjustSaturation 1506( 1507 FXMVECTOR vColor, 1508 float fSaturation 1509) 1510{ 1511 // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2]; 1512 // Result = (C - Luminance) * Saturation + Luminance; 1513 1514#if defined(_XM_NO_INTRINSICS_) 1515 const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f}; 1516 1517 float fLuminance = (vColor.vector4_f32[0]*gvLuminance.f[0])+(vColor.vector4_f32[1]*gvLuminance.f[1])+(vColor.vector4_f32[2]*gvLuminance.f[2]); 1518 XMVECTORF32 vResult = { 1519 ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance, 1520 ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance, 1521 ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance, 1522 vColor.vector4_f32[3]}; 1523 return vResult.v; 1524 1525#elif defined(_XM_ARM_NEON_INTRINSICS_) 1526 static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f}; 1527 XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance ); 1528 XMVECTOR vResult = vsubq_f32(vColor, vLuminance); 1529 XMVECTOR vSaturation = vdupq_n_f32(fSaturation); 1530 vResult = vmlaq_f32( vLuminance, vResult, vSaturation ); 1531 return vbslq_f32( g_XMSelect1110, vResult, vColor ); 1532#elif defined(_XM_SSE_INTRINSICS_) 1533 static const XMVECTORF32 gvLuminance = {0.2125f, 0.7154f, 0.0721f, 0.0f}; 1534 XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance ); 1535// Splat fSaturation 1536 XMVECTOR vSaturation = _mm_set_ps1(fSaturation); 1537// vResult = ((vColor-vLuminance)*vSaturation)+vLuminance; 1538 XMVECTOR vResult = _mm_sub_ps(vColor,vLuminance); 1539 vResult = _mm_mul_ps(vResult,vSaturation); 1540 vResult = _mm_add_ps(vResult,vLuminance); 1541// Retain w from the source color 1542 vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w 1543 vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w 1544 return vResult; 1545#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 1546#endif // _XM_VMX128_INTRINSICS_ 1547} 1548 1549//------------------------------------------------------------------------------ 1550 1551inline XMVECTOR XMColorAdjustContrast 1552( 1553 FXMVECTOR vColor, 1554 float fContrast 1555) 1556{ 1557 // Result = (vColor - 0.5f) * fContrast + 0.5f; 1558 1559#if defined(_XM_NO_INTRINSICS_) 1560 XMVECTORF32 vResult = { 1561 ((vColor.vector4_f32[0]-0.5f) * fContrast) + 0.5f, 1562 ((vColor.vector4_f32[1]-0.5f) * fContrast) + 0.5f, 1563 ((vColor.vector4_f32[2]-0.5f) * fContrast) + 0.5f, 1564 vColor.vector4_f32[3] // Leave W untouched 1565 }; 1566 return vResult.v; 1567#elif defined(_XM_ARM_NEON_INTRINSICS_) 1568 XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v); 1569 XMVECTOR vContrast = vdupq_n_f32(fContrast); 1570 vResult = vmlaq_f32( g_XMOneHalf.v, vResult, vContrast ); 1571 return vbslq_f32( g_XMSelect1110, vResult, vColor ); 1572#elif defined(_XM_SSE_INTRINSICS_) 1573 XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale 1574 XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source) 1575 vResult = _mm_mul_ps(vResult,vScale); // Mul by scale 1576 vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f 1577// Retain w from the source color 1578 vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w 1579 vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w 1580 return vResult; 1581#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 1582#endif // _XM_VMX128_INTRINSICS_ 1583} 1584 1585//------------------------------------------------------------------------------ 1586 1587inline XMVECTOR XMColorRGBToHSL( FXMVECTOR rgb ) 1588{ 1589 XMVECTOR r = XMVectorSplatX( rgb ); 1590 XMVECTOR g = XMVectorSplatY( rgb ); 1591 XMVECTOR b = XMVectorSplatZ( rgb ); 1592 1593 XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) ); 1594 XMVECTOR max = XMVectorMax( r, XMVectorMax( g, b ) ); 1595 1596 XMVECTOR l = XMVectorMultiply( XMVectorAdd( min, max ), g_XMOneHalf ); 1597 1598 XMVECTOR d = XMVectorSubtract( max, min ); 1599 1600 XMVECTOR la = XMVectorSelect( rgb, l, g_XMSelect1110 ); 1601 1602 if ( XMVector3Less( d, g_XMEpsilon ) ) 1603 { 1604 // Achromatic, assume H and S of 0 1605 return XMVectorSelect( la, g_XMZero, g_XMSelect1100 ); 1606 } 1607 else 1608 { 1609 XMVECTOR s, h; 1610 1611 XMVECTOR d2 = XMVectorAdd( min, max ); 1612 1613 if ( XMVector3Greater( l, g_XMOneHalf ) ) 1614 { 1615 // d / (2-max-min) 1616 s = XMVectorDivide( d, XMVectorSubtract( g_XMTwo, d2 ) ); 1617 } 1618 else 1619 { 1620 // d / (max+min) 1621 s = XMVectorDivide( d, d2 ); 1622 } 1623 1624 if ( XMVector3Equal( r, max ) ) 1625 { 1626 // Red is max 1627 h = XMVectorDivide( XMVectorSubtract( g, b ), d ); 1628 } 1629 else if ( XMVector3Equal( g, max ) ) 1630 { 1631 // Green is max 1632 h = XMVectorDivide( XMVectorSubtract( b, r ), d ); 1633 h = XMVectorAdd( h, g_XMTwo ); 1634 } 1635 else 1636 { 1637 // Blue is max 1638 h = XMVectorDivide( XMVectorSubtract( r, g ), d ); 1639 h = XMVectorAdd( h, g_XMFour ); 1640 } 1641 1642 h = XMVectorDivide( h, g_XMSix ); 1643 1644 if ( XMVector3Less( h, g_XMZero ) ) 1645 h = XMVectorAdd( h, g_XMOne ); 1646 1647 XMVECTOR lha = XMVectorSelect( la, h, g_XMSelect1100 ); 1648 return XMVectorSelect( s, lha, g_XMSelect1011 ); 1649 } 1650} 1651 1652//------------------------------------------------------------------------------ 1653 1654namespace Internal 1655{ 1656 1657inline XMVECTOR XMColorHue2Clr( FXMVECTOR p, FXMVECTOR q, FXMVECTOR h ) 1658{ 1659 static const XMVECTORF32 oneSixth = { 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f }; 1660 static const XMVECTORF32 twoThirds = { 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f }; 1661 1662 XMVECTOR t = h; 1663 1664 if ( XMVector3Less( t, g_XMZero ) ) 1665 t = XMVectorAdd( t, g_XMOne ); 1666 1667 if ( XMVector3Greater( t, g_XMOne ) ) 1668 t = XMVectorSubtract( t, g_XMOne ); 1669 1670 if ( XMVector3Less( t, oneSixth ) ) 1671 { 1672 // p + (q - p) * 6 * t 1673 XMVECTOR t1 = XMVectorSubtract( q, p ); 1674 XMVECTOR t2 = XMVectorMultiply( g_XMSix, t ); 1675 return XMVectorMultiplyAdd( t1, t2, p ); 1676 } 1677 1678 if ( XMVector3Less( t, g_XMOneHalf ) ) 1679 return q; 1680 1681 if ( XMVector3Less( t, twoThirds ) ) 1682 { 1683 // p + (q - p) * 6 * (2/3 - t) 1684 XMVECTOR t1 = XMVectorSubtract( q, p ); 1685 XMVECTOR t2 = XMVectorMultiply( g_XMSix, XMVectorSubtract( twoThirds, t ) ); 1686 return XMVectorMultiplyAdd( t1, t2, p ); 1687 } 1688 1689 return p; 1690} 1691 1692}; // namespace Internal 1693 1694inline XMVECTOR XMColorHSLToRGB( FXMVECTOR hsl ) 1695{ 1696 static const XMVECTORF32 oneThird = { 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f }; 1697 1698 XMVECTOR s = XMVectorSplatY( hsl ); 1699 XMVECTOR l = XMVectorSplatZ( hsl ); 1700 1701 if ( XMVector3NearEqual( s, g_XMZero, g_XMEpsilon ) ) 1702 { 1703 // Achromatic 1704 return XMVectorSelect( hsl, l, g_XMSelect1110 ); 1705 } 1706 else 1707 { 1708 XMVECTOR h = XMVectorSplatX( hsl ); 1709 1710 XMVECTOR q; 1711 if ( XMVector3Less( l, g_XMOneHalf ) ) 1712 { 1713 q = XMVectorMultiply( l, XMVectorAdd ( g_XMOne, s ) ); 1714 } 1715 else 1716 { 1717 q = XMVectorSubtract( XMVectorAdd( l, s ), XMVectorMultiply( l, s ) ); 1718 } 1719 1720 XMVECTOR p = XMVectorSubtract( XMVectorMultiply( g_XMTwo, l ), q ); 1721 1722 XMVECTOR r = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorAdd( h, oneThird ) ); 1723 XMVECTOR g = DirectX::Internal::XMColorHue2Clr( p, q, h ); 1724 XMVECTOR b = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorSubtract( h, oneThird ) ); 1725 1726 XMVECTOR rg = XMVectorSelect( g, r, g_XMSelect1000 ); 1727 XMVECTOR ba = XMVectorSelect( hsl, b, g_XMSelect1110 ); 1728 1729 return XMVectorSelect( ba, rg, g_XMSelect1100 ); 1730 } 1731} 1732 1733//------------------------------------------------------------------------------ 1734 1735inline XMVECTOR XMColorRGBToHSV( FXMVECTOR rgb ) 1736{ 1737 XMVECTOR r = XMVectorSplatX( rgb ); 1738 XMVECTOR g = XMVectorSplatY( rgb ); 1739 XMVECTOR b = XMVectorSplatZ( rgb ); 1740 1741 XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) ); 1742 XMVECTOR v = XMVectorMax( r, XMVectorMax( g, b ) ); 1743 1744 XMVECTOR d = XMVectorSubtract( v, min ); 1745 1746 XMVECTOR s = ( XMVector3NearEqual( v, g_XMZero, g_XMEpsilon ) ) ? g_XMZero : XMVectorDivide( d, v ); 1747 1748 if ( XMVector3Less( d, g_XMEpsilon ) ) 1749 { 1750 // Achromatic, assume H of 0 1751 XMVECTOR hv = XMVectorSelect( v, g_XMZero, g_XMSelect1000 ); 1752 XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 ); 1753 return XMVectorSelect( s, hva, g_XMSelect1011 ); 1754 } 1755 else 1756 { 1757 XMVECTOR h; 1758 1759 if ( XMVector3Equal( r, v ) ) 1760 { 1761 // Red is max 1762 h = XMVectorDivide( XMVectorSubtract( g, b ), d ); 1763 1764 if ( XMVector3Less( g, b ) ) 1765 h = XMVectorAdd( h, g_XMSix ); 1766 } 1767 else if ( XMVector3Equal( g, v ) ) 1768 { 1769 // Green is max 1770 h = XMVectorDivide( XMVectorSubtract( b, r ), d ); 1771 h = XMVectorAdd( h, g_XMTwo ); 1772 } 1773 else 1774 { 1775 // Blue is max 1776 h = XMVectorDivide( XMVectorSubtract( r, g ), d ); 1777 h = XMVectorAdd( h, g_XMFour ); 1778 } 1779 1780 h = XMVectorDivide( h, g_XMSix ); 1781 1782 XMVECTOR hv = XMVectorSelect( v, h, g_XMSelect1000 ); 1783 XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 ); 1784 return XMVectorSelect( s, hva, g_XMSelect1011 ); 1785 } 1786} 1787 1788//------------------------------------------------------------------------------ 1789 1790inline XMVECTOR XMColorHSVToRGB( FXMVECTOR hsv ) 1791{ 1792 XMVECTOR h = XMVectorSplatX( hsv ); 1793 XMVECTOR s = XMVectorSplatY( hsv ); 1794 XMVECTOR v = XMVectorSplatZ( hsv ); 1795 1796 XMVECTOR h6 = XMVectorMultiply( h, g_XMSix ); 1797 1798 XMVECTOR i = XMVectorFloor( h6 ); 1799 XMVECTOR f = XMVectorSubtract( h6, i ); 1800 1801 // p = v* (1-s) 1802 XMVECTOR p = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, s ) ); 1803 1804 // q = v*(1-f*s) 1805 XMVECTOR q = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( f, s ) ) ); 1806 1807 // t = v*(1 - (1-f)*s) 1808 XMVECTOR t = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( XMVectorSubtract( g_XMOne, f ), s ) ) ); 1809 1810 int ii = static_cast<int>( XMVectorGetX( XMVectorMod( i, g_XMSix ) ) ); 1811 1812 XMVECTOR _rgb; 1813 1814 switch (ii) 1815 { 1816 case 0: // rgb = vtp 1817 { 1818 XMVECTOR vt = XMVectorSelect( t, v, g_XMSelect1000 ); 1819 _rgb = XMVectorSelect( p, vt, g_XMSelect1100 ); 1820 } 1821 break; 1822 case 1: // rgb = qvp 1823 { 1824 XMVECTOR qv = XMVectorSelect( v, q, g_XMSelect1000 ); 1825 _rgb = XMVectorSelect( p, qv, g_XMSelect1100 ); 1826 } 1827 break; 1828 case 2: // rgb = pvt 1829 { 1830 XMVECTOR pv = XMVectorSelect( v, p, g_XMSelect1000 ); 1831 _rgb = XMVectorSelect( t, pv, g_XMSelect1100 ); 1832 } 1833 break; 1834 case 3: // rgb = pqv 1835 { 1836 XMVECTOR pq = XMVectorSelect( q, p, g_XMSelect1000 ); 1837 _rgb = XMVectorSelect( v, pq, g_XMSelect1100 ); 1838 } 1839 break; 1840 case 4: // rgb = tpv 1841 { 1842 XMVECTOR tp = XMVectorSelect( p, t, g_XMSelect1000 ); 1843 _rgb = XMVectorSelect( v, tp, g_XMSelect1100 ); 1844 } 1845 break; 1846 default: // rgb = vpq 1847 { 1848 XMVECTOR vp = XMVectorSelect( p, v, g_XMSelect1000 ); 1849 _rgb = XMVectorSelect( q, vp, g_XMSelect1100 ); 1850 } 1851 break; 1852 } 1853 1854 return XMVectorSelect( hsv, _rgb, g_XMSelect1110 ); 1855} 1856 1857//------------------------------------------------------------------------------ 1858 1859inline XMVECTOR XMColorRGBToYUV( FXMVECTOR rgb ) 1860{ 1861 static const XMVECTORF32 Scale0 = { 0.299f, -0.147f, 0.615f, 0.0f }; 1862 static const XMVECTORF32 Scale1 = { 0.587f, -0.289f, -0.515f, 0.0f }; 1863 static const XMVECTORF32 Scale2 = { 0.114f, 0.436f, -0.100f, 0.0f }; 1864 1865 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); 1866 XMVECTOR clr = XMVector3Transform( rgb, M ); 1867 1868 return XMVectorSelect( rgb, clr, g_XMSelect1110 ); 1869} 1870 1871//------------------------------------------------------------------------------ 1872 1873inline XMVECTOR XMColorYUVToRGB( FXMVECTOR yuv ) 1874{ 1875 static const XMVECTORF32 Scale1 = { 0.0f, -0.395f, 2.032f, 0.0f }; 1876 static const XMVECTORF32 Scale2 = { 1.140f, -0.581f, 0.0f, 0.0f }; 1877 1878 XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero ); 1879 XMVECTOR clr = XMVector3Transform( yuv, M ); 1880 1881 return XMVectorSelect( yuv, clr, g_XMSelect1110 ); 1882} 1883 1884//------------------------------------------------------------------------------ 1885 1886inline XMVECTOR XMColorRGBToYUV_HD( FXMVECTOR rgb ) 1887{ 1888 static const XMVECTORF32 Scale0 = { 0.2126f, -0.0997f, 0.6150f, 0.0f }; 1889 static const XMVECTORF32 Scale1 = { 0.7152f, -0.3354f, -0.5586f, 0.0f }; 1890 static const XMVECTORF32 Scale2 = { 0.0722f, 0.4351f, -0.0564f, 0.0f }; 1891 1892 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); 1893 XMVECTOR clr = XMVector3Transform( rgb, M ); 1894 1895 return XMVectorSelect( rgb, clr, g_XMSelect1110 ); 1896} 1897 1898//------------------------------------------------------------------------------ 1899 1900inline XMVECTOR XMColorYUVToRGB_HD( FXMVECTOR yuv ) 1901{ 1902 static const XMVECTORF32 Scale1 = { 0.0f, -0.2153f, 2.1324f, 0.0f }; 1903 static const XMVECTORF32 Scale2 = { 1.2803f, -0.3806f, 0.0f, 0.0f }; 1904 1905 XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero ); 1906 XMVECTOR clr = XMVector3Transform( yuv, M ); 1907 1908 return XMVectorSelect( yuv, clr, g_XMSelect1110 ); 1909} 1910 1911//------------------------------------------------------------------------------ 1912 1913inline XMVECTOR XMColorRGBToXYZ( FXMVECTOR rgb ) 1914{ 1915 static const XMVECTORF32 Scale0 = { 0.4887180f, 0.1762044f, 0.0000000f, 0.0f }; 1916 static const XMVECTORF32 Scale1 = { 0.3106803f, 0.8129847f, 0.0102048f, 0.0f }; 1917 static const XMVECTORF32 Scale2 = { 0.2006017f, 0.0108109f, 0.9897952f, 0.0f }; 1918 static const XMVECTORF32 Scale = { 1.f/0.17697f, 1.f/0.17697f, 1.f/0.17697f, 0.0f }; 1919 1920 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); 1921 XMVECTOR clr = XMVectorMultiply( XMVector3Transform( rgb, M ), Scale ); 1922 1923 return XMVectorSelect( rgb, clr, g_XMSelect1110 ); 1924} 1925 1926inline XMVECTOR XMColorXYZToRGB( FXMVECTOR xyz ) 1927{ 1928 static const XMVECTORF32 Scale0 = { 2.3706743f, -0.5138850f, 0.0052982f, 0.0f }; 1929 static const XMVECTORF32 Scale1 = { -0.9000405f, 1.4253036f, -0.0146949f, 0.0f }; 1930 static const XMVECTORF32 Scale2 = { -0.4706338f, 0.0885814f, 1.0093968f, 0.0f }; 1931 static const XMVECTORF32 Scale = { 0.17697f, 0.17697f, 0.17697f, 0.0f }; 1932 1933 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); 1934 XMVECTOR clr = XMVector3Transform( XMVectorMultiply( xyz, Scale ), M ); 1935 1936 return XMVectorSelect( xyz, clr, g_XMSelect1110 ); 1937} 1938 1939//------------------------------------------------------------------------------ 1940 1941inline XMVECTOR XMColorXYZToSRGB( FXMVECTOR xyz ) 1942{ 1943 static const XMVECTORF32 Scale0 = { 3.2406f, -0.9689f, 0.0557f, 0.0f }; 1944 static const XMVECTORF32 Scale1 = { -1.5372f, 1.8758f, -0.2040f, 0.0f }; 1945 static const XMVECTORF32 Scale2 = { -0.4986f, 0.0415f, 1.0570f, 0.0f }; 1946 static const XMVECTORF32 Cutoff = { 0.0031308f, 0.0031308f, 0.0031308f, 0.0f }; 1947 static const XMVECTORF32 Exp = { 1.0f/2.4f, 1.0f/2.4f, 1.0f/2.4f, 1.0f }; 1948 1949 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); 1950 XMVECTOR lclr = XMVector3Transform( xyz, M ); 1951 1952 XMVECTOR sel = XMVectorGreater( lclr, Cutoff ); 1953 1954 // clr = 12.92 * lclr for lclr <= 0.0031308f 1955 XMVECTOR smallC = XMVectorMultiply( lclr, g_XMsrgbScale ); 1956 1957 // clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055) 1958 XMVECTOR largeC = XMVectorSubtract( XMVectorMultiply( g_XMsrgbA1, XMVectorPow( lclr, Exp ) ), g_XMsrgbA ); 1959 1960 XMVECTOR clr = XMVectorSelect( smallC, largeC, sel ); 1961 1962 return XMVectorSelect( xyz, clr, g_XMSelect1110 ); 1963} 1964 1965//------------------------------------------------------------------------------ 1966 1967inline XMVECTOR XMColorSRGBToXYZ( FXMVECTOR srgb ) 1968{ 1969 static const XMVECTORF32 Scale0 = { 0.4124f, 0.2126f, 0.0193f, 0.0f }; 1970 static const XMVECTORF32 Scale1 = { 0.3576f, 0.7152f, 0.1192f, 0.0f }; 1971 static const XMVECTORF32 Scale2 = { 0.1805f, 0.0722f, 0.9505f, 0.0f }; 1972 static const XMVECTORF32 Cutoff = { 0.04045f, 0.04045f, 0.04045f, 0.0f }; 1973 static const XMVECTORF32 Exp = { 2.4f, 2.4f, 2.4f, 1.0f }; 1974 1975 XMVECTOR sel = XMVectorGreater( srgb, Cutoff ); 1976 1977 // lclr = clr / 12.92 1978 XMVECTOR smallC = XMVectorDivide( srgb, g_XMsrgbScale ); 1979 1980 // lclr = pow( (clr + a) / (1+a), 2.4 ) 1981 XMVECTOR largeC = XMVectorPow( XMVectorDivide( XMVectorAdd( srgb, g_XMsrgbA ), g_XMsrgbA1 ), Exp ); 1982 1983 XMVECTOR lclr = XMVectorSelect( smallC, largeC, sel ); 1984 1985 XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); 1986 XMVECTOR clr = XMVector3Transform( lclr, M ); 1987 1988 return XMVectorSelect( srgb, clr, g_XMSelect1110 ); 1989} 1990 1991/**************************************************************************** 1992 * 1993 * Miscellaneous 1994 * 1995 ****************************************************************************/ 1996 1997//------------------------------------------------------------------------------ 1998 1999inline bool XMVerifyCPUSupport() 2000{ 2001#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) 2002#if defined(_M_AMD64) 2003 // The X64 processor model requires SSE2 support 2004 return true; 2005#elif defined(PF_XMMI_INSTRUCTIONS_AVAILABLE) 2006 // Note that on Windows 2000 or older, SSE2 detection is not supported so this will always fail 2007 // Detecting SSE2 on older versions of Windows would require using cpuid directly 2008 return ( IsProcessorFeaturePresent( PF_XMMI_INSTRUCTIONS_AVAILABLE ) != 0 && IsProcessorFeaturePresent( PF_XMMI64_INSTRUCTIONS_AVAILABLE ) != 0 ); 2009#else 2010 // If windows.h is not included, we return false (likely a false negative) 2011 return false; 2012#endif 2013#elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) 2014#ifdef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 2015 return ( IsProcessorFeaturePresent( PF_ARM_NEON_INSTRUCTIONS_AVAILABLE ) != 0 ); 2016#else 2017 // If windows.h is not included, we return false (likely a false negative) 2018 return false; 2019#endif 2020#else 2021 return true; 2022#endif 2023} 2024 2025//------------------------------------------------------------------------------ 2026 2027inline XMVECTOR XMFresnelTerm 2028( 2029 FXMVECTOR CosIncidentAngle, 2030 FXMVECTOR RefractionIndex 2031) 2032{ 2033 assert(!XMVector4IsInfinite(CosIncidentAngle)); 2034 2035 // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where 2036 // c = CosIncidentAngle 2037 // g = sqrt(c^2 + RefractionIndex^2 - 1) 2038 2039#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2040 2041 XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v); 2042 G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G); 2043 G = XMVectorAbs(G); 2044 G = XMVectorSqrt(G); 2045 2046 XMVECTOR S = XMVectorAdd(G, CosIncidentAngle); 2047 XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle); 2048 2049 XMVECTOR V0 = XMVectorMultiply(D, D); 2050 XMVECTOR V1 = XMVectorMultiply(S, S); 2051 V1 = XMVectorReciprocal(V1); 2052 V0 = XMVectorMultiply(g_XMOneHalf.v, V0); 2053 V0 = XMVectorMultiply(V0, V1); 2054 2055 XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v); 2056 XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v); 2057 V2 = XMVectorMultiply(V2, V2); 2058 V3 = XMVectorMultiply(V3, V3); 2059 V3 = XMVectorReciprocal(V3); 2060 V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v); 2061 2062 XMVECTOR Result = XMVectorMultiply(V0, V2); 2063 2064 Result = XMVectorSaturate(Result); 2065 2066 return Result; 2067 2068#elif defined(_XM_SSE_INTRINSICS_) 2069 // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2)) 2070 XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex); 2071 XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle); 2072 G = _mm_sub_ps(G,g_XMOne); 2073 vTemp = _mm_add_ps(vTemp,G); 2074 // max((0-vTemp),vTemp) == abs(vTemp) 2075 // The abs is needed to deal with refraction and cosine being zero 2076 G = _mm_setzero_ps(); 2077 G = _mm_sub_ps(G,vTemp); 2078 G = _mm_max_ps(G,vTemp); 2079 // Last operation, the sqrt() 2080 G = _mm_sqrt_ps(G); 2081 2082 // Calc G-C and G+C 2083 XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle); 2084 XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle); 2085 // Perform the term (0.5f *(g - c)^2) / (g + c)^2 2086 XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC); 2087 vTemp = _mm_mul_ps(GAddC,GAddC); 2088 vResult = _mm_mul_ps(vResult,g_XMOneHalf); 2089 vResult = _mm_div_ps(vResult,vTemp); 2090 // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) 2091 GAddC = _mm_mul_ps(GAddC,CosIncidentAngle); 2092 GSubC = _mm_mul_ps(GSubC,CosIncidentAngle); 2093 GAddC = _mm_sub_ps(GAddC,g_XMOne); 2094 GSubC = _mm_add_ps(GSubC,g_XMOne); 2095 GAddC = _mm_mul_ps(GAddC,GAddC); 2096 GSubC = _mm_mul_ps(GSubC,GSubC); 2097 GAddC = _mm_div_ps(GAddC,GSubC); 2098 GAddC = _mm_add_ps(GAddC,g_XMOne); 2099 // Multiply the two term parts 2100 vResult = _mm_mul_ps(vResult,GAddC); 2101 // Clamp to 0.0 - 1.0f 2102 vResult = _mm_max_ps(vResult,g_XMZero); 2103 vResult = _mm_min_ps(vResult,g_XMOne); 2104 return vResult; 2105#else // _XM_VMX128_INTRINSICS_ 2106#endif // _XM_VMX128_INTRINSICS_ 2107} 2108 2109//------------------------------------------------------------------------------ 2110 2111inline bool XMScalarNearEqual 2112( 2113 float S1, 2114 float S2, 2115 float Epsilon 2116) 2117{ 2118 float Delta = S1 - S2; 2119 return (fabsf(Delta) <= Epsilon); 2120} 2121 2122//------------------------------------------------------------------------------ 2123// Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI 2124inline float XMScalarModAngle 2125( 2126 float Angle 2127) 2128{ 2129 // Note: The modulo is performed with unsigned math only to work 2130 // around a precision error on numbers that are close to PI 2131 2132 // Normalize the range from 0.0f to XM_2PI 2133 Angle = Angle + XM_PI; 2134 // Perform the modulo, unsigned 2135 float fTemp = fabsf(Angle); 2136 fTemp = fTemp - (XM_2PI * (float)((int32_t)(fTemp/XM_2PI))); 2137 // Restore the number to the range of -XM_PI to XM_PI-epsilon 2138 fTemp = fTemp - XM_PI; 2139 // If the modulo'd value was negative, restore negation 2140 if (Angle<0.0f) { 2141 fTemp = -fTemp; 2142 } 2143 return fTemp; 2144} 2145 2146//------------------------------------------------------------------------------ 2147 2148inline float XMScalarSin 2149( 2150 float Value 2151) 2152{ 2153 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2154 float quotient = XM_1DIV2PI*Value; 2155 if (Value >= 0.0f) 2156 { 2157 quotient = (float)((int)(quotient + 0.5f)); 2158 } 2159 else 2160 { 2161 quotient = (float)((int)(quotient - 0.5f)); 2162 } 2163 float y = Value - XM_2PI*quotient; 2164 2165 // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). 2166 if (y > XM_PIDIV2) 2167 { 2168 y = XM_PI - y; 2169 } 2170 else if (y < -XM_PIDIV2) 2171 { 2172 y = -XM_PI - y; 2173 } 2174 2175 // 11-degree minimax approximation 2176 float y2 = y * y; 2177 return ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y; 2178} 2179 2180//------------------------------------------------------------------------------ 2181 2182inline float XMScalarSinEst 2183( 2184 float Value 2185) 2186{ 2187 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2188 float quotient = XM_1DIV2PI*Value; 2189 if (Value >= 0.0f) 2190 { 2191 quotient = (float)((int)(quotient + 0.5f)); 2192 } 2193 else 2194 { 2195 quotient = (float)((int)(quotient - 0.5f)); 2196 } 2197 float y = Value - XM_2PI*quotient; 2198 2199 // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). 2200 if (y > XM_PIDIV2) 2201 { 2202 y = XM_PI - y; 2203 } 2204 else if (y < -XM_PIDIV2) 2205 { 2206 y = -XM_PI - y; 2207 } 2208 2209 // 7-degree minimax approximation 2210 float y2 = y * y; 2211 return ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y; 2212} 2213 2214//------------------------------------------------------------------------------ 2215 2216inline float XMScalarCos 2217( 2218 float Value 2219) 2220{ 2221 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2222 float quotient = XM_1DIV2PI*Value; 2223 if (Value >= 0.0f) 2224 { 2225 quotient = (float)((int)(quotient + 0.5f)); 2226 } 2227 else 2228 { 2229 quotient = (float)((int)(quotient - 0.5f)); 2230 } 2231 float y = Value - XM_2PI*quotient; 2232 2233 // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). 2234 float sign; 2235 if (y > XM_PIDIV2) 2236 { 2237 y = XM_PI - y; 2238 sign = -1.0f; 2239 } 2240 else if (y < -XM_PIDIV2) 2241 { 2242 y = -XM_PI - y; 2243 sign = -1.0f; 2244 } 2245 else 2246 { 2247 sign = +1.0f; 2248 } 2249 2250 // 10-degree minimax approximation 2251 float y2 = y*y; 2252 float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f; 2253 return sign*p; 2254} 2255 2256//------------------------------------------------------------------------------ 2257 2258inline float XMScalarCosEst 2259( 2260 float Value 2261) 2262{ 2263 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2264 float quotient = XM_1DIV2PI*Value; 2265 if (Value >= 0.0f) 2266 { 2267 quotient = (float)((int)(quotient + 0.5f)); 2268 } 2269 else 2270 { 2271 quotient = (float)((int)(quotient - 0.5f)); 2272 } 2273 float y = Value - XM_2PI*quotient; 2274 2275 // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). 2276 float sign; 2277 if (y > XM_PIDIV2) 2278 { 2279 y = XM_PI - y; 2280 sign = -1.0f; 2281 } 2282 else if (y < -XM_PIDIV2) 2283 { 2284 y = -XM_PI - y; 2285 sign = -1.0f; 2286 } 2287 else 2288 { 2289 sign = +1.0f; 2290 } 2291 2292 // 6-degree minimax approximation 2293 float y2 = y * y; 2294 float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f; 2295 return sign*p; 2296} 2297 2298//------------------------------------------------------------------------------ 2299 2300_Use_decl_annotations_ 2301inline void XMScalarSinCos 2302( 2303 float* pSin, 2304 float* pCos, 2305 float Value 2306) 2307{ 2308 assert(pSin); 2309 assert(pCos); 2310 2311 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2312 float quotient = XM_1DIV2PI*Value; 2313 if (Value >= 0.0f) 2314 { 2315 quotient = (float)((int)(quotient + 0.5f)); 2316 } 2317 else 2318 { 2319 quotient = (float)((int)(quotient - 0.5f)); 2320 } 2321 float y = Value - XM_2PI*quotient; 2322 2323 // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). 2324 float sign; 2325 if (y > XM_PIDIV2) 2326 { 2327 y = XM_PI - y; 2328 sign = -1.0f; 2329 } 2330 else if (y < -XM_PIDIV2) 2331 { 2332 y = -XM_PI - y; 2333 sign = -1.0f; 2334 } 2335 else 2336 { 2337 sign = +1.0f; 2338 } 2339 2340 float y2 = y * y; 2341 2342 // 11-degree minimax approximation 2343 *pSin = ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y; 2344 2345 // 10-degree minimax approximation 2346 float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f; 2347 *pCos = sign*p; 2348} 2349 2350//------------------------------------------------------------------------------ 2351 2352_Use_decl_annotations_ 2353inline void XMScalarSinCosEst 2354( 2355 float* pSin, 2356 float* pCos, 2357 float Value 2358) 2359{ 2360 assert(pSin); 2361 assert(pCos); 2362 2363 // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. 2364 float quotient = XM_1DIV2PI*Value; 2365 if (Value >= 0.0f) 2366 { 2367 quotient = (float)((int)(quotient + 0.5f)); 2368 } 2369 else 2370 { 2371 quotient = (float)((int)(quotient - 0.5f)); 2372 } 2373 float y = Value - XM_2PI*quotient; 2374 2375 // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). 2376 float sign; 2377 if (y > XM_PIDIV2) 2378 { 2379 y = XM_PI - y; 2380 sign = -1.0f; 2381 } 2382 else if (y < -XM_PIDIV2) 2383 { 2384 y = -XM_PI - y; 2385 sign = -1.0f; 2386 } 2387 else 2388 { 2389 sign = +1.0f; 2390 } 2391 2392 float y2 = y * y; 2393 2394 // 7-degree minimax approximation 2395 *pSin = ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y; 2396 2397 // 6-degree minimax approximation 2398 float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f; 2399 *pCos = sign*p; 2400} 2401 2402//------------------------------------------------------------------------------ 2403 2404inline float XMScalarASin 2405( 2406 float Value 2407) 2408{ 2409 // Clamp input to [-1,1]. 2410 bool nonnegative = (Value >= 0.0f); 2411 float x = fabsf(Value); 2412 float omx = 1.0f - x; 2413 if (omx < 0.0f) 2414 { 2415 omx = 0.0f; 2416 } 2417 float root = sqrt(omx); 2418 2419 // 7-degree minimax approximation 2420 float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f; 2421 result *= root; // acos(|x|) 2422 2423 // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) 2424 return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); 2425} 2426 2427//------------------------------------------------------------------------------ 2428 2429inline float XMScalarASinEst 2430( 2431 float Value 2432) 2433{ 2434 // Clamp input to [-1,1]. 2435 bool nonnegative = (Value >= 0.0f); 2436 float x = fabsf(Value); 2437 float omx = 1.0f - x; 2438 if (omx < 0.0f) 2439 { 2440 omx = 0.0f; 2441 } 2442 float root = sqrt(omx); 2443 2444 // 3-degree minimax approximation 2445 float result = ((-0.0187293f*x+0.0742610f)*x-0.2121144f)*x+1.5707288f; 2446 result *= root; // acos(|x|) 2447 2448 // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) 2449 return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); 2450} 2451 2452//------------------------------------------------------------------------------ 2453 2454inline float XMScalarACos 2455( 2456 float Value 2457) 2458{ 2459 // Clamp input to [-1,1]. 2460 bool nonnegative = (Value >= 0.0f); 2461 float x = fabsf(Value); 2462 float omx = 1.0f - x; 2463 if (omx < 0.0f) 2464 { 2465 omx = 0.0f; 2466 } 2467 float root = sqrtf(omx); 2468 2469 // 7-degree minimax approximation 2470 float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f; 2471 result *= root; 2472 2473 // acos(x) = pi - acos(-x) when x < 0 2474 return (nonnegative ? result : XM_PI - result); 2475} 2476 2477//------------------------------------------------------------------------------ 2478 2479inline float XMScalarACosEst 2480( 2481 float Value 2482) 2483{ 2484 // Clamp input to [-1,1]. 2485 bool nonnegative = (Value >= 0.0f); 2486 float x = fabsf(Value); 2487 float omx = 1.0f - x; 2488 if (omx < 0.0f) 2489 { 2490 omx = 0.0f; 2491 } 2492 float root = sqrtf(omx); 2493 2494 // 3-degree minimax approximation 2495 float result = ( ( -0.0187293f * x + 0.0742610f ) * x - 0.2121144f ) * x + 1.5707288f; 2496 result *= root; 2497 2498 // acos(x) = pi - acos(-x) when x < 0 2499 return (nonnegative ? result : XM_PI - result); 2500} 2501