the game where you go into mines and start crafting! but for consoles (forked directly from smartcmd's github)
at main 3414 lines 107 kB view raw
1//------------------------------------------------------------------------------------- 2// DirectXMathMatrix.inl -- SIMD C++ Math library 3// 4// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF 5// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO 6// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A 7// PARTICULAR PURPOSE. 8// 9// Copyright (c) Microsoft Corporation. All rights reserved. 10//------------------------------------------------------------------------------------- 11 12#ifdef _MSC_VER 13#pragma once 14#endif 15 16/**************************************************************************** 17 * 18 * Matrix 19 * 20 ****************************************************************************/ 21 22//------------------------------------------------------------------------------ 23// Comparison operations 24//------------------------------------------------------------------------------ 25 26//------------------------------------------------------------------------------ 27 28// Return true if any entry in the matrix is NaN 29inline bool XMMatrixIsNaN 30( 31 CXMMATRIX M 32) 33{ 34#if defined(_XM_NO_INTRINSICS_) 35 size_t i = 16; 36 const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]); 37 do { 38 // Fetch value into integer unit 39 uint32_t uTest = pWork[0]; 40 // Remove sign 41 uTest &= 0x7FFFFFFFU; 42 // NaN is 0x7F800001 through 0x7FFFFFFF inclusive 43 uTest -= 0x7F800001U; 44 if (uTest<0x007FFFFFU) { 45 break; // NaN found 46 } 47 ++pWork; // Next entry 48 } while (--i); 49 return (i!=0); // i == 0 if nothing matched 50#elif defined(_XM_ARM_NEON_INTRINSICS_) 51 // Load in registers 52 XMVECTOR vX = M.r[0]; 53 XMVECTOR vY = M.r[1]; 54 XMVECTOR vZ = M.r[2]; 55 XMVECTOR vW = M.r[3]; 56 // Test themselves to check for NaN 57 vX = vmvnq_u32(vceqq_f32(vX, vX)); 58 vY = vmvnq_u32(vceqq_f32(vY, vY)); 59 vZ = vmvnq_u32(vceqq_f32(vZ, vZ)); 60 vW = vmvnq_u32(vceqq_f32(vW, vW)); 61 // Or all the results 62 vX = vorrq_u32(vX,vZ); 63 vY = vorrq_u32(vY,vW); 64 vX = vorrq_u32(vX,vY); 65 // If any tested true, return true 66 int8x8x2_t vTemp = vzip_u8(vget_low_u8(vX), vget_high_u8(vX)); 67 vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]); 68 uint32_t r = vget_lane_u32(vTemp.val[1], 1); 69 return (r != 0); 70#elif defined(_XM_SSE_INTRINSICS_) 71 // Load in registers 72 XMVECTOR vX = M.r[0]; 73 XMVECTOR vY = M.r[1]; 74 XMVECTOR vZ = M.r[2]; 75 XMVECTOR vW = M.r[3]; 76 // Test themselves to check for NaN 77 vX = _mm_cmpneq_ps(vX,vX); 78 vY = _mm_cmpneq_ps(vY,vY); 79 vZ = _mm_cmpneq_ps(vZ,vZ); 80 vW = _mm_cmpneq_ps(vW,vW); 81 // Or all the results 82 vX = _mm_or_ps(vX,vZ); 83 vY = _mm_or_ps(vY,vW); 84 vX = _mm_or_ps(vX,vY); 85 // If any tested true, return true 86 return (_mm_movemask_ps(vX)!=0); 87#else 88#endif 89} 90 91//------------------------------------------------------------------------------ 92 93// Return true if any entry in the matrix is +/-INF 94inline bool XMMatrixIsInfinite 95( 96 CXMMATRIX M 97) 98{ 99#if defined(_XM_NO_INTRINSICS_) 100 size_t i = 16; 101 const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]); 102 do { 103 // Fetch value into integer unit 104 uint32_t uTest = pWork[0]; 105 // Remove sign 106 uTest &= 0x7FFFFFFFU; 107 // INF is 0x7F800000 108 if (uTest==0x7F800000U) { 109 break; // INF found 110 } 111 ++pWork; // Next entry 112 } while (--i); 113 return (i!=0); // i == 0 if nothing matched 114#elif defined(_XM_ARM_NEON_INTRINSICS_) 115 // Mask off the sign bits 116 XMVECTOR vTemp1 = vandq_u32(M.r[0],g_XMAbsMask); 117 XMVECTOR vTemp2 = vandq_u32(M.r[1],g_XMAbsMask); 118 XMVECTOR vTemp3 = vandq_u32(M.r[2],g_XMAbsMask); 119 XMVECTOR vTemp4 = vandq_u32(M.r[3],g_XMAbsMask); 120 // Compare to infinity 121 vTemp1 = vceqq_f32(vTemp1,g_XMInfinity); 122 vTemp2 = vceqq_f32(vTemp2,g_XMInfinity); 123 vTemp3 = vceqq_f32(vTemp3,g_XMInfinity); 124 vTemp4 = vceqq_f32(vTemp4,g_XMInfinity); 125 // Or the answers together 126 vTemp1 = vorrq_u32(vTemp1,vTemp2); 127 vTemp3 = vorrq_u32(vTemp3,vTemp4); 128 vTemp1 = vorrq_u32(vTemp1,vTemp3); 129 // If any are infinity, the signs are true. 130 int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1)); 131 vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]); 132 uint32_t r = vget_lane_u32(vTemp.val[1], 1); 133 return (r != 0); 134#elif defined(_XM_SSE_INTRINSICS_) 135 // Mask off the sign bits 136 XMVECTOR vTemp1 = _mm_and_ps(M.r[0],g_XMAbsMask); 137 XMVECTOR vTemp2 = _mm_and_ps(M.r[1],g_XMAbsMask); 138 XMVECTOR vTemp3 = _mm_and_ps(M.r[2],g_XMAbsMask); 139 XMVECTOR vTemp4 = _mm_and_ps(M.r[3],g_XMAbsMask); 140 // Compare to infinity 141 vTemp1 = _mm_cmpeq_ps(vTemp1,g_XMInfinity); 142 vTemp2 = _mm_cmpeq_ps(vTemp2,g_XMInfinity); 143 vTemp3 = _mm_cmpeq_ps(vTemp3,g_XMInfinity); 144 vTemp4 = _mm_cmpeq_ps(vTemp4,g_XMInfinity); 145 // Or the answers together 146 vTemp1 = _mm_or_ps(vTemp1,vTemp2); 147 vTemp3 = _mm_or_ps(vTemp3,vTemp4); 148 vTemp1 = _mm_or_ps(vTemp1,vTemp3); 149 // If any are infinity, the signs are true. 150 return (_mm_movemask_ps(vTemp1)!=0); 151#else // _XM_VMX128_INTRINSICS_ 152#endif // _XM_VMX128_INTRINSICS_ 153} 154 155//------------------------------------------------------------------------------ 156 157// Return true if the XMMatrix is equal to identity 158inline bool XMMatrixIsIdentity 159( 160 CXMMATRIX M 161) 162{ 163#if defined(_XM_NO_INTRINSICS_) 164 // Use the integer pipeline to reduce branching to a minimum 165 const uint32_t *pWork = (const uint32_t*)(&M.m[0][0]); 166 // Convert 1.0f to zero and or them together 167 uint32_t uOne = pWork[0]^0x3F800000U; 168 // Or all the 0.0f entries together 169 uint32_t uZero = pWork[1]; 170 uZero |= pWork[2]; 171 uZero |= pWork[3]; 172 // 2nd row 173 uZero |= pWork[4]; 174 uOne |= pWork[5]^0x3F800000U; 175 uZero |= pWork[6]; 176 uZero |= pWork[7]; 177 // 3rd row 178 uZero |= pWork[8]; 179 uZero |= pWork[9]; 180 uOne |= pWork[10]^0x3F800000U; 181 uZero |= pWork[11]; 182 // 4th row 183 uZero |= pWork[12]; 184 uZero |= pWork[13]; 185 uZero |= pWork[14]; 186 uOne |= pWork[15]^0x3F800000U; 187 // If all zero entries are zero, the uZero==0 188 uZero &= 0x7FFFFFFF; // Allow -0.0f 189 // If all 1.0f entries are 1.0f, then uOne==0 190 uOne |= uZero; 191 return (uOne==0); 192#elif defined(_XM_ARM_NEON_INTRINSICS_) 193 XMVECTOR vTemp1 = vceqq_f32(M.r[0],g_XMIdentityR0); 194 XMVECTOR vTemp2 = vceqq_f32(M.r[1],g_XMIdentityR1); 195 XMVECTOR vTemp3 = vceqq_f32(M.r[2],g_XMIdentityR2); 196 XMVECTOR vTemp4 = vceqq_f32(M.r[3],g_XMIdentityR3); 197 vTemp1 = vandq_u32(vTemp1,vTemp2); 198 vTemp3 = vandq_u32(vTemp3,vTemp4); 199 vTemp1 = vandq_u32(vTemp1,vTemp3); 200 int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1)); 201 vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]); 202 uint32_t r = vget_lane_u32(vTemp.val[1], 1); 203 return ( r == 0xFFFFFFFFU ); 204#elif defined(_XM_SSE_INTRINSICS_) 205 XMVECTOR vTemp1 = _mm_cmpeq_ps(M.r[0],g_XMIdentityR0); 206 XMVECTOR vTemp2 = _mm_cmpeq_ps(M.r[1],g_XMIdentityR1); 207 XMVECTOR vTemp3 = _mm_cmpeq_ps(M.r[2],g_XMIdentityR2); 208 XMVECTOR vTemp4 = _mm_cmpeq_ps(M.r[3],g_XMIdentityR3); 209 vTemp1 = _mm_and_ps(vTemp1,vTemp2); 210 vTemp3 = _mm_and_ps(vTemp3,vTemp4); 211 vTemp1 = _mm_and_ps(vTemp1,vTemp3); 212 return (_mm_movemask_ps(vTemp1)==0x0f); 213#else // _XM_VMX128_INTRINSICS_ 214#endif // _XM_VMX128_INTRINSICS_ 215} 216 217//------------------------------------------------------------------------------ 218// Computation operations 219//------------------------------------------------------------------------------ 220 221//------------------------------------------------------------------------------ 222// Perform a 4x4 matrix multiply by a 4x4 matrix 223inline XMMATRIX XMMatrixMultiply 224( 225 CXMMATRIX M1, 226 CXMMATRIX M2 227) 228{ 229#if defined(_XM_NO_INTRINSICS_) 230 XMMATRIX mResult; 231 // Cache the invariants in registers 232 float x = M1.m[0][0]; 233 float y = M1.m[0][1]; 234 float z = M1.m[0][2]; 235 float w = M1.m[0][3]; 236 // Perform the operation on the first row 237 mResult.m[0][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w); 238 mResult.m[0][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w); 239 mResult.m[0][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w); 240 mResult.m[0][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w); 241 // Repeat for all the other rows 242 x = M1.m[1][0]; 243 y = M1.m[1][1]; 244 z = M1.m[1][2]; 245 w = M1.m[1][3]; 246 mResult.m[1][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w); 247 mResult.m[1][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w); 248 mResult.m[1][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w); 249 mResult.m[1][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w); 250 x = M1.m[2][0]; 251 y = M1.m[2][1]; 252 z = M1.m[2][2]; 253 w = M1.m[2][3]; 254 mResult.m[2][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w); 255 mResult.m[2][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w); 256 mResult.m[2][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w); 257 mResult.m[2][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w); 258 x = M1.m[3][0]; 259 y = M1.m[3][1]; 260 z = M1.m[3][2]; 261 w = M1.m[3][3]; 262 mResult.m[3][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w); 263 mResult.m[3][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w); 264 mResult.m[3][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w); 265 mResult.m[3][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w); 266 return mResult; 267#elif defined(_XM_ARM_NEON_INTRINSICS_) 268 XMMATRIX mResult; 269 __n64 VL = vget_low_f32( M1.r[0] ); 270 __n64 VH = vget_high_f32( M1.r[0] ); 271 // Splat the component X,Y,Z then W 272 XMVECTOR vX = vdupq_lane_f32(VL, 0); 273 XMVECTOR vY = vdupq_lane_f32(VL, 1); 274 XMVECTOR vZ = vdupq_lane_f32(VH, 0); 275 XMVECTOR vW = vdupq_lane_f32(VH, 1); 276 // Perform the operation on the first row 277 vX = vmulq_f32(vX,M2.r[0]); 278 vY = vmulq_f32(vY,M2.r[1]); 279 vZ = vmlaq_f32(vX,vZ,M2.r[2]); 280 vW = vmlaq_f32(vY,vW,M2.r[3]); 281 mResult.r[0] = vaddq_f32( vZ, vW ); 282 // Repeat for the other 3 rows 283 VL = vget_low_f32( M1.r[1] ); 284 VH = vget_high_f32( M1.r[1] ); 285 vX = vdupq_lane_f32(VL, 0); 286 vY = vdupq_lane_f32(VL, 1); 287 vZ = vdupq_lane_f32(VH, 0); 288 vW = vdupq_lane_f32(VH, 1); 289 vX = vmulq_f32(vX,M2.r[0]); 290 vY = vmulq_f32(vY,M2.r[1]); 291 vZ = vmlaq_f32(vX,vZ,M2.r[2]); 292 vW = vmlaq_f32(vY,vW,M2.r[3]); 293 mResult.r[1] = vaddq_f32( vZ, vW ); 294 VL = vget_low_f32( M1.r[2] ); 295 VH = vget_high_f32( M1.r[2] ); 296 vX = vdupq_lane_f32(VL, 0); 297 vY = vdupq_lane_f32(VL, 1); 298 vZ = vdupq_lane_f32(VH, 0); 299 vW = vdupq_lane_f32(VH, 1); 300 vX = vmulq_f32(vX,M2.r[0]); 301 vY = vmulq_f32(vY,M2.r[1]); 302 vZ = vmlaq_f32(vX,vZ,M2.r[2]); 303 vW = vmlaq_f32(vY,vW,M2.r[3]); 304 mResult.r[2] = vaddq_f32( vZ, vW ); 305 VL = vget_low_f32( M1.r[3] ); 306 VH = vget_high_f32( M1.r[3] ); 307 vX = vdupq_lane_f32(VL, 0); 308 vY = vdupq_lane_f32(VL, 1); 309 vZ = vdupq_lane_f32(VH, 0); 310 vW = vdupq_lane_f32(VH, 1); 311 vX = vmulq_f32(vX,M2.r[0]); 312 vY = vmulq_f32(vY,M2.r[1]); 313 vZ = vmlaq_f32(vX,vZ,M2.r[2]); 314 vW = vmlaq_f32(vY,vW,M2.r[3]); 315 mResult.r[3] = vaddq_f32( vZ, vW ); 316 return mResult; 317#elif defined(_XM_SSE_INTRINSICS_) 318 XMMATRIX mResult; 319 // Use vW to hold the original row 320 XMVECTOR vW = M1.r[0]; 321 // Splat the component X,Y,Z then W 322 XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0)); 323 XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1)); 324 XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2)); 325 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3)); 326 // Perform the operation on the first row 327 vX = _mm_mul_ps(vX,M2.r[0]); 328 vY = _mm_mul_ps(vY,M2.r[1]); 329 vZ = _mm_mul_ps(vZ,M2.r[2]); 330 vW = _mm_mul_ps(vW,M2.r[3]); 331 // Perform a binary add to reduce cumulative errors 332 vX = _mm_add_ps(vX,vZ); 333 vY = _mm_add_ps(vY,vW); 334 vX = _mm_add_ps(vX,vY); 335 mResult.r[0] = vX; 336 // Repeat for the other 3 rows 337 vW = M1.r[1]; 338 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0)); 339 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1)); 340 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2)); 341 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3)); 342 vX = _mm_mul_ps(vX,M2.r[0]); 343 vY = _mm_mul_ps(vY,M2.r[1]); 344 vZ = _mm_mul_ps(vZ,M2.r[2]); 345 vW = _mm_mul_ps(vW,M2.r[3]); 346 vX = _mm_add_ps(vX,vZ); 347 vY = _mm_add_ps(vY,vW); 348 vX = _mm_add_ps(vX,vY); 349 mResult.r[1] = vX; 350 vW = M1.r[2]; 351 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0)); 352 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1)); 353 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2)); 354 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3)); 355 vX = _mm_mul_ps(vX,M2.r[0]); 356 vY = _mm_mul_ps(vY,M2.r[1]); 357 vZ = _mm_mul_ps(vZ,M2.r[2]); 358 vW = _mm_mul_ps(vW,M2.r[3]); 359 vX = _mm_add_ps(vX,vZ); 360 vY = _mm_add_ps(vY,vW); 361 vX = _mm_add_ps(vX,vY); 362 mResult.r[2] = vX; 363 vW = M1.r[3]; 364 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0)); 365 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1)); 366 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2)); 367 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3)); 368 vX = _mm_mul_ps(vX,M2.r[0]); 369 vY = _mm_mul_ps(vY,M2.r[1]); 370 vZ = _mm_mul_ps(vZ,M2.r[2]); 371 vW = _mm_mul_ps(vW,M2.r[3]); 372 vX = _mm_add_ps(vX,vZ); 373 vY = _mm_add_ps(vY,vW); 374 vX = _mm_add_ps(vX,vY); 375 mResult.r[3] = vX; 376 return mResult; 377#else // _XM_VMX128_INTRINSICS_ 378#endif // _XM_VMX128_INTRINSICS_ 379} 380 381//------------------------------------------------------------------------------ 382 383inline XMMATRIX XMMatrixMultiplyTranspose 384( 385 CXMMATRIX M1, 386 CXMMATRIX M2 387) 388{ 389#if defined(_XM_NO_INTRINSICS_) 390 XMMATRIX mResult; 391 // Cache the invariants in registers 392 float x = M2.m[0][0]; 393 float y = M2.m[1][0]; 394 float z = M2.m[2][0]; 395 float w = M2.m[3][0]; 396 // Perform the operation on the first row 397 mResult.m[0][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w); 398 mResult.m[0][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w); 399 mResult.m[0][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w); 400 mResult.m[0][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w); 401 // Repeat for all the other rows 402 x = M2.m[0][1]; 403 y = M2.m[1][1]; 404 z = M2.m[2][1]; 405 w = M2.m[3][1]; 406 mResult.m[1][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w); 407 mResult.m[1][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w); 408 mResult.m[1][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w); 409 mResult.m[1][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w); 410 x = M2.m[0][2]; 411 y = M2.m[1][2]; 412 z = M2.m[2][2]; 413 w = M2.m[3][2]; 414 mResult.m[2][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w); 415 mResult.m[2][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w); 416 mResult.m[2][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w); 417 mResult.m[2][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w); 418 x = M2.m[0][3]; 419 y = M2.m[1][3]; 420 z = M2.m[2][3]; 421 w = M2.m[3][3]; 422 mResult.m[3][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w); 423 mResult.m[3][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w); 424 mResult.m[3][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w); 425 mResult.m[3][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w); 426 return mResult; 427#elif defined(_XM_ARM_NEON_INTRINSICS_) 428 __n64 VL = vget_low_f32( M1.r[0] ); 429 __n64 VH = vget_high_f32( M1.r[0] ); 430 // Splat the component X,Y,Z then W 431 XMVECTOR vX = vdupq_lane_f32(VL, 0); 432 XMVECTOR vY = vdupq_lane_f32(VL, 1); 433 XMVECTOR vZ = vdupq_lane_f32(VH, 0); 434 XMVECTOR vW = vdupq_lane_f32(VH, 1); 435 // Perform the operation on the first row 436 vX = vmulq_f32(vX,M2.r[0]); 437 vY = vmulq_f32(vY,M2.r[1]); 438 vZ = vmlaq_f32(vX,vZ,M2.r[2]); 439 vW = vmlaq_f32(vY,vW,M2.r[3]); 440 __n128 r0 = vaddq_f32( vZ, vW ); 441 // Repeat for the other 3 rows 442 VL = vget_low_f32( M1.r[1] ); 443 VH = vget_high_f32( M1.r[1] ); 444 vX = vdupq_lane_f32(VL, 0); 445 vY = vdupq_lane_f32(VL, 1); 446 vZ = vdupq_lane_f32(VH, 0); 447 vW = vdupq_lane_f32(VH, 1); 448 vX = vmulq_f32(vX,M2.r[0]); 449 vY = vmulq_f32(vY,M2.r[1]); 450 vZ = vmlaq_f32(vX,vZ,M2.r[2]); 451 vW = vmlaq_f32(vY,vW,M2.r[3]); 452 __n128 r1 = vaddq_f32( vZ, vW ); 453 VL = vget_low_f32( M1.r[2] ); 454 VH = vget_high_f32( M1.r[2] ); 455 vX = vdupq_lane_f32(VL, 0); 456 vY = vdupq_lane_f32(VL, 1); 457 vZ = vdupq_lane_f32(VH, 0); 458 vW = vdupq_lane_f32(VH, 1); 459 vX = vmulq_f32(vX,M2.r[0]); 460 vY = vmulq_f32(vY,M2.r[1]); 461 vZ = vmlaq_f32(vX,vZ,M2.r[2]); 462 vW = vmlaq_f32(vY,vW,M2.r[3]); 463 __n128 r2 = vaddq_f32( vZ, vW ); 464 VL = vget_low_f32( M1.r[3] ); 465 VH = vget_high_f32( M1.r[3] ); 466 vX = vdupq_lane_f32(VL, 0); 467 vY = vdupq_lane_f32(VL, 1); 468 vZ = vdupq_lane_f32(VH, 0); 469 vW = vdupq_lane_f32(VH, 1); 470 vX = vmulq_f32(vX,M2.r[0]); 471 vY = vmulq_f32(vY,M2.r[1]); 472 vZ = vmlaq_f32(vX,vZ,M2.r[2]); 473 vW = vmlaq_f32(vY,vW,M2.r[3]); 474 __n128 r3 = vaddq_f32( vZ, vW ); 475 476 // Transpose result 477 float32x4x2_t P0 = vzipq_f32( r0, r2 ); 478 float32x4x2_t P1 = vzipq_f32( r1, r3 ); 479 480 float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] ); 481 float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] ); 482 483 XMMATRIX mResult; 484 mResult.r[0] = T0.val[0]; 485 mResult.r[1] = T0.val[1]; 486 mResult.r[2] = T1.val[0]; 487 mResult.r[3] = T1.val[1]; 488 return mResult; 489#elif defined(_XM_SSE_INTRINSICS_) 490 // Use vW to hold the original row 491 XMVECTOR vW = M1.r[0]; 492 // Splat the component X,Y,Z then W 493 XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0)); 494 XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1)); 495 XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2)); 496 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3)); 497 // Perform the operation on the first row 498 vX = _mm_mul_ps(vX,M2.r[0]); 499 vY = _mm_mul_ps(vY,M2.r[1]); 500 vZ = _mm_mul_ps(vZ,M2.r[2]); 501 vW = _mm_mul_ps(vW,M2.r[3]); 502 // Perform a binary add to reduce cumulative errors 503 vX = _mm_add_ps(vX,vZ); 504 vY = _mm_add_ps(vY,vW); 505 vX = _mm_add_ps(vX,vY); 506 __m128 r0 = vX; 507 // Repeat for the other 3 rows 508 vW = M1.r[1]; 509 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0)); 510 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1)); 511 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2)); 512 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3)); 513 vX = _mm_mul_ps(vX,M2.r[0]); 514 vY = _mm_mul_ps(vY,M2.r[1]); 515 vZ = _mm_mul_ps(vZ,M2.r[2]); 516 vW = _mm_mul_ps(vW,M2.r[3]); 517 vX = _mm_add_ps(vX,vZ); 518 vY = _mm_add_ps(vY,vW); 519 vX = _mm_add_ps(vX,vY); 520 __m128 r1 = vX; 521 vW = M1.r[2]; 522 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0)); 523 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1)); 524 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2)); 525 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3)); 526 vX = _mm_mul_ps(vX,M2.r[0]); 527 vY = _mm_mul_ps(vY,M2.r[1]); 528 vZ = _mm_mul_ps(vZ,M2.r[2]); 529 vW = _mm_mul_ps(vW,M2.r[3]); 530 vX = _mm_add_ps(vX,vZ); 531 vY = _mm_add_ps(vY,vW); 532 vX = _mm_add_ps(vX,vY); 533 __m128 r2 = vX; 534 vW = M1.r[3]; 535 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0)); 536 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1)); 537 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2)); 538 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3)); 539 vX = _mm_mul_ps(vX,M2.r[0]); 540 vY = _mm_mul_ps(vY,M2.r[1]); 541 vZ = _mm_mul_ps(vZ,M2.r[2]); 542 vW = _mm_mul_ps(vW,M2.r[3]); 543 vX = _mm_add_ps(vX,vZ); 544 vY = _mm_add_ps(vY,vW); 545 vX = _mm_add_ps(vX,vY); 546 __m128 r3 = vX; 547 548 // x.x,x.y,y.x,y.y 549 XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0)); 550 // x.z,x.w,y.z,y.w 551 XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2)); 552 // z.x,z.y,w.x,w.y 553 XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0)); 554 // z.z,z.w,w.z,w.w 555 XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2)); 556 557 XMMATRIX mResult; 558 // x.x,y.x,z.x,w.x 559 mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0)); 560 // x.y,y.y,z.y,w.y 561 mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1)); 562 // x.z,y.z,z.z,w.z 563 mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0)); 564 // x.w,y.w,z.w,w.w 565 mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1)); 566 return mResult; 567#else // _XM_VMX128_INTRINSICS_ 568#endif // _XM_VMX128_INTRINSICS_ 569} 570 571//------------------------------------------------------------------------------ 572 573inline XMMATRIX XMMatrixTranspose 574( 575 CXMMATRIX M 576) 577{ 578#if defined(_XM_NO_INTRINSICS_) 579 580 // Original matrix: 581 // 582 // m00m01m02m03 583 // m10m11m12m13 584 // m20m21m22m23 585 // m30m31m32m33 586 587 XMMATRIX P; 588 P.r[0] = XMVectorMergeXY(M.r[0], M.r[2]); // m00m20m01m21 589 P.r[1] = XMVectorMergeXY(M.r[1], M.r[3]); // m10m30m11m31 590 P.r[2] = XMVectorMergeZW(M.r[0], M.r[2]); // m02m22m03m23 591 P.r[3] = XMVectorMergeZW(M.r[1], M.r[3]); // m12m32m13m33 592 593 XMMATRIX MT; 594 MT.r[0] = XMVectorMergeXY(P.r[0], P.r[1]); // m00m10m20m30 595 MT.r[1] = XMVectorMergeZW(P.r[0], P.r[1]); // m01m11m21m31 596 MT.r[2] = XMVectorMergeXY(P.r[2], P.r[3]); // m02m12m22m32 597 MT.r[3] = XMVectorMergeZW(P.r[2], P.r[3]); // m03m13m23m33 598 return MT; 599 600#elif defined(_XM_ARM_NEON_INTRINSICS_) 601 float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] ); 602 float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] ); 603 604 float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] ); 605 float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] ); 606 607 XMMATRIX mResult; 608 mResult.r[0] = T0.val[0]; 609 mResult.r[1] = T0.val[1]; 610 mResult.r[2] = T1.val[0]; 611 mResult.r[3] = T1.val[1]; 612 return mResult; 613#elif defined(_XM_SSE_INTRINSICS_) 614 // x.x,x.y,y.x,y.y 615 XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(1,0,1,0)); 616 // x.z,x.w,y.z,y.w 617 XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(3,2,3,2)); 618 // z.x,z.y,w.x,w.y 619 XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(1,0,1,0)); 620 // z.z,z.w,w.z,w.w 621 XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(3,2,3,2)); 622 XMMATRIX mResult; 623 624 // x.x,y.x,z.x,w.x 625 mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0)); 626 // x.y,y.y,z.y,w.y 627 mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1)); 628 // x.z,y.z,z.z,w.z 629 mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0)); 630 // x.w,y.w,z.w,w.w 631 mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1)); 632 return mResult; 633#else // _XM_VMX128_INTRINSICS_ 634#endif // _XM_VMX128_INTRINSICS_ 635} 636 637//------------------------------------------------------------------------------ 638// Return the inverse and the determinant of a 4x4 matrix 639_Use_decl_annotations_ 640inline XMMATRIX XMMatrixInverse 641( 642 XMVECTOR* pDeterminant, 643 CXMMATRIX M 644) 645{ 646#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 647 648 XMMATRIX MT = XMMatrixTranspose(M); 649 650 XMVECTOR V0[4], V1[4]; 651 V0[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[2]); 652 V1[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[3]); 653 V0[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[0]); 654 V1[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[1]); 655 V0[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[2], MT.r[0]); 656 V1[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[3], MT.r[1]); 657 658 XMVECTOR D0 = XMVectorMultiply(V0[0], V1[0]); 659 XMVECTOR D1 = XMVectorMultiply(V0[1], V1[1]); 660 XMVECTOR D2 = XMVectorMultiply(V0[2], V1[2]); 661 662 V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[2]); 663 V1[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[3]); 664 V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[0]); 665 V1[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[1]); 666 V0[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[2], MT.r[0]); 667 V1[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[3], MT.r[1]); 668 669 D0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], D0); 670 D1 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], D1); 671 D2 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], D2); 672 673 V0[0] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[1]); 674 V1[0] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D0, D2); 675 V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[0]); 676 V1[1] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D0, D2); 677 V0[2] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[3]); 678 V1[2] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D1, D2); 679 V0[3] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[2]); 680 V1[3] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D1, D2); 681 682 XMVECTOR C0 = XMVectorMultiply(V0[0], V1[0]); 683 XMVECTOR C2 = XMVectorMultiply(V0[1], V1[1]); 684 XMVECTOR C4 = XMVectorMultiply(V0[2], V1[2]); 685 XMVECTOR C6 = XMVectorMultiply(V0[3], V1[3]); 686 687 V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[1]); 688 V1[0] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(D0, D2); 689 V0[1] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[0]); 690 V1[1] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0X>(D0, D2); 691 V0[2] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[3]); 692 V1[2] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1Z>(D1, D2); 693 V0[3] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[2]); 694 V1[3] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(D1, D2); 695 696 C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0); 697 C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2); 698 C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4); 699 C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6); 700 701 V0[0] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[1]); 702 V1[0] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1X, XM_PERMUTE_0Z>(D0, D2); 703 V0[1] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[0]); 704 V1[1] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1X>(D0, D2); 705 V0[2] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[3]); 706 V1[2] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1W, XM_PERMUTE_1Z, XM_PERMUTE_0Z>(D1, D2); 707 V0[3] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[2]); 708 V1[3] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z>(D1, D2); 709 710 XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0); 711 C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0); 712 XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2); 713 C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2); 714 XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4); 715 C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4); 716 XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6); 717 C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6); 718 719 XMMATRIX R; 720 R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v); 721 R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v); 722 R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v); 723 R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v); 724 725 XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]); 726 727 if (pDeterminant != NULL) 728 *pDeterminant = Determinant; 729 730 XMVECTOR Reciprocal = XMVectorReciprocal(Determinant); 731 732 XMMATRIX Result; 733 Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal); 734 Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal); 735 Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal); 736 Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal); 737 return Result; 738 739#elif defined(_XM_SSE_INTRINSICS_) 740 XMMATRIX MT = XMMatrixTranspose(M); 741 XMVECTOR V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,1,0,0)); 742 XMVECTOR V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(3,2,3,2)); 743 XMVECTOR V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(1,1,0,0)); 744 XMVECTOR V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(3,2,3,2)); 745 XMVECTOR V02 = _mm_shuffle_ps(MT.r[2], MT.r[0],_MM_SHUFFLE(2,0,2,0)); 746 XMVECTOR V12 = _mm_shuffle_ps(MT.r[3], MT.r[1],_MM_SHUFFLE(3,1,3,1)); 747 748 XMVECTOR D0 = _mm_mul_ps(V00,V10); 749 XMVECTOR D1 = _mm_mul_ps(V01,V11); 750 XMVECTOR D2 = _mm_mul_ps(V02,V12); 751 752 V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(3,2,3,2)); 753 V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(1,1,0,0)); 754 V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(3,2,3,2)); 755 V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(1,1,0,0)); 756 V02 = _mm_shuffle_ps(MT.r[2],MT.r[0],_MM_SHUFFLE(3,1,3,1)); 757 V12 = _mm_shuffle_ps(MT.r[3],MT.r[1],_MM_SHUFFLE(2,0,2,0)); 758 759 V00 = _mm_mul_ps(V00,V10); 760 V01 = _mm_mul_ps(V01,V11); 761 V02 = _mm_mul_ps(V02,V12); 762 D0 = _mm_sub_ps(D0,V00); 763 D1 = _mm_sub_ps(D1,V01); 764 D2 = _mm_sub_ps(D2,V02); 765 // V11 = D0Y,D0W,D2Y,D2Y 766 V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,1,3,1)); 767 V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1,0,2,1)); 768 V10 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(0,3,0,2)); 769 V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(0,1,0,2)); 770 V11 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(2,1,2,1)); 771 // V13 = D1Y,D1W,D2W,D2W 772 XMVECTOR V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,3,3,1)); 773 V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1,0,2,1)); 774 V12 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(0,3,0,2)); 775 XMVECTOR V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(0,1,0,2)); 776 V13 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(2,1,2,1)); 777 778 XMVECTOR C0 = _mm_mul_ps(V00,V10); 779 XMVECTOR C2 = _mm_mul_ps(V01,V11); 780 XMVECTOR C4 = _mm_mul_ps(V02,V12); 781 XMVECTOR C6 = _mm_mul_ps(V03,V13); 782 783 // V11 = D0X,D0Y,D2X,D2X 784 V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(0,0,1,0)); 785 V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(2,1,3,2)); 786 V10 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(2,1,0,3)); 787 V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1,3,2,3)); 788 V11 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(0,2,1,2)); 789 // V13 = D1X,D1Y,D2Z,D2Z 790 V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(2,2,1,0)); 791 V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(2,1,3,2)); 792 V12 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(2,1,0,3)); 793 V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,3,2,3)); 794 V13 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(0,2,1,2)); 795 796 V00 = _mm_mul_ps(V00,V10); 797 V01 = _mm_mul_ps(V01,V11); 798 V02 = _mm_mul_ps(V02,V12); 799 V03 = _mm_mul_ps(V03,V13); 800 C0 = _mm_sub_ps(C0,V00); 801 C2 = _mm_sub_ps(C2,V01); 802 C4 = _mm_sub_ps(C4,V02); 803 C6 = _mm_sub_ps(C6,V03); 804 805 V00 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(0,3,0,3)); 806 // V10 = D0Z,D0Z,D2X,D2Y 807 V10 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,2,2)); 808 V10 = XM_PERMUTE_PS(V10,_MM_SHUFFLE(0,2,3,0)); 809 V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(2,0,3,1)); 810 // V11 = D0X,D0W,D2X,D2Y 811 V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,3,0)); 812 V11 = XM_PERMUTE_PS(V11,_MM_SHUFFLE(2,1,0,3)); 813 V02 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(0,3,0,3)); 814 // V12 = D1Z,D1Z,D2Z,D2W 815 V12 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,2,2)); 816 V12 = XM_PERMUTE_PS(V12,_MM_SHUFFLE(0,2,3,0)); 817 V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(2,0,3,1)); 818 // V13 = D1X,D1W,D2Z,D2W 819 V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,3,0)); 820 V13 = XM_PERMUTE_PS(V13,_MM_SHUFFLE(2,1,0,3)); 821 822 V00 = _mm_mul_ps(V00,V10); 823 V01 = _mm_mul_ps(V01,V11); 824 V02 = _mm_mul_ps(V02,V12); 825 V03 = _mm_mul_ps(V03,V13); 826 XMVECTOR C1 = _mm_sub_ps(C0,V00); 827 C0 = _mm_add_ps(C0,V00); 828 XMVECTOR C3 = _mm_add_ps(C2,V01); 829 C2 = _mm_sub_ps(C2,V01); 830 XMVECTOR C5 = _mm_sub_ps(C4,V02); 831 C4 = _mm_add_ps(C4,V02); 832 XMVECTOR C7 = _mm_add_ps(C6,V03); 833 C6 = _mm_sub_ps(C6,V03); 834 835 C0 = _mm_shuffle_ps(C0,C1,_MM_SHUFFLE(3,1,2,0)); 836 C2 = _mm_shuffle_ps(C2,C3,_MM_SHUFFLE(3,1,2,0)); 837 C4 = _mm_shuffle_ps(C4,C5,_MM_SHUFFLE(3,1,2,0)); 838 C6 = _mm_shuffle_ps(C6,C7,_MM_SHUFFLE(3,1,2,0)); 839 C0 = XM_PERMUTE_PS(C0,_MM_SHUFFLE(3,1,2,0)); 840 C2 = XM_PERMUTE_PS(C2,_MM_SHUFFLE(3,1,2,0)); 841 C4 = XM_PERMUTE_PS(C4,_MM_SHUFFLE(3,1,2,0)); 842 C6 = XM_PERMUTE_PS(C6,_MM_SHUFFLE(3,1,2,0)); 843 // Get the determinate 844 XMVECTOR vTemp = XMVector4Dot(C0,MT.r[0]); 845 if (pDeterminant != NULL) 846 *pDeterminant = vTemp; 847 vTemp = _mm_div_ps(g_XMOne,vTemp); 848 XMMATRIX mResult; 849 mResult.r[0] = _mm_mul_ps(C0,vTemp); 850 mResult.r[1] = _mm_mul_ps(C2,vTemp); 851 mResult.r[2] = _mm_mul_ps(C4,vTemp); 852 mResult.r[3] = _mm_mul_ps(C6,vTemp); 853 return mResult; 854#else // _XM_VMX128_INTRINSICS_ 855#endif // _XM_VMX128_INTRINSICS_ 856} 857 858//------------------------------------------------------------------------------ 859 860inline XMVECTOR XMMatrixDeterminant 861( 862 CXMMATRIX M 863) 864{ 865#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 866 867 static const XMVECTORF32 Sign = {1.0f, -1.0f, 1.0f, -1.0f}; 868 869 XMVECTOR V0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]); 870 XMVECTOR V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]); 871 XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]); 872 XMVECTOR V3 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]); 873 XMVECTOR V4 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]); 874 XMVECTOR V5 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]); 875 876 XMVECTOR P0 = XMVectorMultiply(V0, V1); 877 XMVECTOR P1 = XMVectorMultiply(V2, V3); 878 XMVECTOR P2 = XMVectorMultiply(V4, V5); 879 880 V0 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]); 881 V1 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]); 882 V2 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]); 883 V3 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]); 884 V4 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]); 885 V5 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]); 886 887 P0 = XMVectorNegativeMultiplySubtract(V0, V1, P0); 888 P1 = XMVectorNegativeMultiplySubtract(V2, V3, P1); 889 P2 = XMVectorNegativeMultiplySubtract(V4, V5, P2); 890 891 V0 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[1]); 892 V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[1]); 893 V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[1]); 894 895 XMVECTOR S = XMVectorMultiply(M.r[0], Sign.v); 896 XMVECTOR R = XMVectorMultiply(V0, P0); 897 R = XMVectorNegativeMultiplySubtract(V1, P1, R); 898 R = XMVectorMultiplyAdd(V2, P2, R); 899 900 return XMVector4Dot(S, R); 901 902#else // _XM_VMX128_INTRINSICS_ 903#endif // _XM_VMX128_INTRINSICS_ 904} 905 906#define XM3RANKDECOMPOSE(a, b, c, x, y, z) \ 907 if((x) < (y)) \ 908 { \ 909 if((y) < (z)) \ 910 { \ 911 (a) = 2; \ 912 (b) = 1; \ 913 (c) = 0; \ 914 } \ 915 else \ 916 { \ 917 (a) = 1; \ 918 \ 919 if((x) < (z)) \ 920 { \ 921 (b) = 2; \ 922 (c) = 0; \ 923 } \ 924 else \ 925 { \ 926 (b) = 0; \ 927 (c) = 2; \ 928 } \ 929 } \ 930 } \ 931 else \ 932 { \ 933 if((x) < (z)) \ 934 { \ 935 (a) = 2; \ 936 (b) = 0; \ 937 (c) = 1; \ 938 } \ 939 else \ 940 { \ 941 (a) = 0; \ 942 \ 943 if((y) < (z)) \ 944 { \ 945 (b) = 2; \ 946 (c) = 1; \ 947 } \ 948 else \ 949 { \ 950 (b) = 1; \ 951 (c) = 2; \ 952 } \ 953 } \ 954 } 955 956#define XM3_DECOMP_EPSILON 0.0001f 957 958_Use_decl_annotations_ 959inline bool XMMatrixDecompose 960( 961 XMVECTOR *outScale, 962 XMVECTOR *outRotQuat, 963 XMVECTOR *outTrans, 964 CXMMATRIX M 965) 966{ 967 static const XMVECTOR *pvCanonicalBasis[3] = { 968 &g_XMIdentityR0.v, 969 &g_XMIdentityR1.v, 970 &g_XMIdentityR2.v 971 }; 972 973 assert( outScale != NULL ); 974 assert( outRotQuat != NULL ); 975 assert( outTrans != NULL ); 976 977 // Get the translation 978 outTrans[0] = M.r[3]; 979 980 XMVECTOR *ppvBasis[3]; 981 XMMATRIX matTemp; 982 ppvBasis[0] = &matTemp.r[0]; 983 ppvBasis[1] = &matTemp.r[1]; 984 ppvBasis[2] = &matTemp.r[2]; 985 986 matTemp.r[0] = M.r[0]; 987 matTemp.r[1] = M.r[1]; 988 matTemp.r[2] = M.r[2]; 989 matTemp.r[3] = g_XMIdentityR3.v; 990 991 float *pfScales = (float *)outScale; 992 993 size_t a, b, c; 994 XMVectorGetXPtr(&pfScales[0],XMVector3Length(ppvBasis[0][0])); 995 XMVectorGetXPtr(&pfScales[1],XMVector3Length(ppvBasis[1][0])); 996 XMVectorGetXPtr(&pfScales[2],XMVector3Length(ppvBasis[2][0])); 997 pfScales[3] = 0.f; 998 999 XM3RANKDECOMPOSE(a, b, c, pfScales[0], pfScales[1], pfScales[2]) 1000 1001 if(pfScales[a] < XM3_DECOMP_EPSILON) 1002 { 1003 ppvBasis[a][0] = pvCanonicalBasis[a][0]; 1004 } 1005 ppvBasis[a][0] = XMVector3Normalize(ppvBasis[a][0]); 1006 1007 if(pfScales[b] < XM3_DECOMP_EPSILON) 1008 { 1009 size_t aa, bb, cc; 1010 float fAbsX, fAbsY, fAbsZ; 1011 1012 fAbsX = fabsf(XMVectorGetX(ppvBasis[a][0])); 1013 fAbsY = fabsf(XMVectorGetY(ppvBasis[a][0])); 1014 fAbsZ = fabsf(XMVectorGetZ(ppvBasis[a][0])); 1015 1016 XM3RANKDECOMPOSE(aa, bb, cc, fAbsX, fAbsY, fAbsZ) 1017 1018 ppvBasis[b][0] = XMVector3Cross(ppvBasis[a][0],pvCanonicalBasis[cc][0]); 1019 } 1020 1021 ppvBasis[b][0] = XMVector3Normalize(ppvBasis[b][0]); 1022 1023 if(pfScales[c] < XM3_DECOMP_EPSILON) 1024 { 1025 ppvBasis[c][0] = XMVector3Cross(ppvBasis[a][0],ppvBasis[b][0]); 1026 } 1027 1028 ppvBasis[c][0] = XMVector3Normalize(ppvBasis[c][0]); 1029 1030 float fDet = XMVectorGetX(XMMatrixDeterminant(matTemp)); 1031 1032 // use Kramer's rule to check for handedness of coordinate system 1033 if(fDet < 0.0f) 1034 { 1035 // switch coordinate system by negating the scale and inverting the basis vector on the x-axis 1036 pfScales[a] = -pfScales[a]; 1037 ppvBasis[a][0] = XMVectorNegate(ppvBasis[a][0]); 1038 1039 fDet = -fDet; 1040 } 1041 1042 fDet -= 1.0f; 1043 fDet *= fDet; 1044 1045 if(XM3_DECOMP_EPSILON < fDet) 1046 { 1047 // Non-SRT matrix encountered 1048 return false; 1049 } 1050 1051 // generate the quaternion from the matrix 1052 outRotQuat[0] = XMQuaternionRotationMatrix(matTemp); 1053 return true; 1054} 1055 1056#undef XM3_DECOMP_EPSILON 1057#undef XM3RANKDECOMPOSE 1058 1059//------------------------------------------------------------------------------ 1060// Transformation operations 1061//------------------------------------------------------------------------------ 1062 1063//------------------------------------------------------------------------------ 1064 1065inline XMMATRIX XMMatrixIdentity() 1066{ 1067#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1068 1069 XMMATRIX M; 1070 M.r[0] = g_XMIdentityR0.v; 1071 M.r[1] = g_XMIdentityR1.v; 1072 M.r[2] = g_XMIdentityR2.v; 1073 M.r[3] = g_XMIdentityR3.v; 1074 return M; 1075 1076#else // _XM_VMX128_INTRINSICS_ 1077#endif // _XM_VMX128_INTRINSICS_ 1078} 1079 1080//------------------------------------------------------------------------------ 1081 1082inline XMMATRIX XMMatrixSet 1083( 1084 float m00, float m01, float m02, float m03, 1085 float m10, float m11, float m12, float m13, 1086 float m20, float m21, float m22, float m23, 1087 float m30, float m31, float m32, float m33 1088) 1089{ 1090 XMMATRIX M; 1091#if defined(_XM_NO_INTRINSICS_) 1092 M.m[0][0] = m00; M.m[0][1] = m01; M.m[0][2] = m02; M.m[0][3] = m03; 1093 M.m[1][0] = m10; M.m[1][1] = m11; M.m[1][2] = m12; M.m[1][3] = m13; 1094 M.m[2][0] = m20; M.m[2][1] = m21; M.m[2][2] = m22; M.m[2][3] = m23; 1095 M.m[3][0] = m30; M.m[3][1] = m31; M.m[3][2] = m32; M.m[3][3] = m33; 1096#else 1097 M.r[0] = XMVectorSet(m00, m01, m02, m03); 1098 M.r[1] = XMVectorSet(m10, m11, m12, m13); 1099 M.r[2] = XMVectorSet(m20, m21, m22, m23); 1100 M.r[3] = XMVectorSet(m30, m31, m32, m33); 1101#endif 1102 return M; 1103} 1104 1105//------------------------------------------------------------------------------ 1106 1107inline XMMATRIX XMMatrixTranslation 1108( 1109 float OffsetX, 1110 float OffsetY, 1111 float OffsetZ 1112) 1113{ 1114#if defined(_XM_NO_INTRINSICS_) 1115 1116 XMMATRIX M; 1117 M.m[0][0] = 1.0f; 1118 M.m[0][1] = 0.0f; 1119 M.m[0][2] = 0.0f; 1120 M.m[0][3] = 0.0f; 1121 1122 M.m[1][0] = 0.0f; 1123 M.m[1][1] = 1.0f; 1124 M.m[1][2] = 0.0f; 1125 M.m[1][3] = 0.0f; 1126 1127 M.m[2][0] = 0.0f; 1128 M.m[2][1] = 0.0f; 1129 M.m[2][2] = 1.0f; 1130 M.m[2][3] = 0.0f; 1131 1132 M.m[3][0] = OffsetX; 1133 M.m[3][1] = OffsetY; 1134 M.m[3][2] = OffsetZ; 1135 M.m[3][3] = 1.0f; 1136 return M; 1137 1138#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1139 XMMATRIX M; 1140 M.r[0] = g_XMIdentityR0.v; 1141 M.r[1] = g_XMIdentityR1.v; 1142 M.r[2] = g_XMIdentityR2.v; 1143 M.r[3] = XMVectorSet(OffsetX, OffsetY, OffsetZ, 1.f ); 1144 return M; 1145#else // _XM_VMX128_INTRINSICS_ 1146#endif // _XM_VMX128_INTRINSICS_ 1147} 1148 1149 1150//------------------------------------------------------------------------------ 1151 1152inline XMMATRIX XMMatrixTranslationFromVector 1153( 1154 FXMVECTOR Offset 1155) 1156{ 1157#if defined(_XM_NO_INTRINSICS_) 1158 1159 XMMATRIX M; 1160 M.m[0][0] = 1.0f; 1161 M.m[0][1] = 0.0f; 1162 M.m[0][2] = 0.0f; 1163 M.m[0][3] = 0.0f; 1164 1165 M.m[1][0] = 0.0f; 1166 M.m[1][1] = 1.0f; 1167 M.m[1][2] = 0.0f; 1168 M.m[1][3] = 0.0f; 1169 1170 M.m[2][0] = 0.0f; 1171 M.m[2][1] = 0.0f; 1172 M.m[2][2] = 1.0f; 1173 M.m[2][3] = 0.0f; 1174 1175 M.m[3][0] = Offset.vector4_f32[0]; 1176 M.m[3][1] = Offset.vector4_f32[1]; 1177 M.m[3][2] = Offset.vector4_f32[2]; 1178 M.m[3][3] = 1.0f; 1179 return M; 1180 1181#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1182 XMMATRIX M; 1183 M.r[0] = g_XMIdentityR0.v; 1184 M.r[1] = g_XMIdentityR1.v; 1185 M.r[2] = g_XMIdentityR2.v; 1186 M.r[3] = XMVectorSelect( g_XMIdentityR3.v, Offset, g_XMSelect1110.v ); 1187 return M; 1188#else // _XM_VMX128_INTRINSICS_ 1189#endif // _XM_VMX128_INTRINSICS_ 1190} 1191 1192//------------------------------------------------------------------------------ 1193 1194inline XMMATRIX XMMatrixScaling 1195( 1196 float ScaleX, 1197 float ScaleY, 1198 float ScaleZ 1199) 1200{ 1201#if defined(_XM_NO_INTRINSICS_) 1202 1203 XMMATRIX M; 1204 M.m[0][0] = ScaleX; 1205 M.m[0][1] = 0.0f; 1206 M.m[0][2] = 0.0f; 1207 M.m[0][3] = 0.0f; 1208 1209 M.m[1][0] = 0.0f; 1210 M.m[1][1] = ScaleY; 1211 M.m[1][2] = 0.0f; 1212 M.m[1][3] = 0.0f; 1213 1214 M.m[2][0] = 0.0f; 1215 M.m[2][1] = 0.0f; 1216 M.m[2][2] = ScaleZ; 1217 M.m[2][3] = 0.0f; 1218 1219 M.m[3][0] = 0.0f; 1220 M.m[3][1] = 0.0f; 1221 M.m[3][2] = 0.0f; 1222 M.m[3][3] = 1.0f; 1223 return M; 1224 1225#elif defined(_XM_ARM_NEON_INTRINSICS_) 1226 const XMVECTOR Zero = vdupq_n_f32(0); 1227 XMMATRIX M; 1228 M.r[0] = vsetq_lane_f32( ScaleX, Zero, 0 ); 1229 M.r[1] = vsetq_lane_f32( ScaleY, Zero, 1 ); 1230 M.r[2] = vsetq_lane_f32( ScaleZ, Zero, 2 ); 1231 M.r[3] = g_XMIdentityR3.v; 1232 return M; 1233#elif defined(_XM_SSE_INTRINSICS_) 1234 XMMATRIX M; 1235 M.r[0] = _mm_set_ps( 0, 0, 0, ScaleX ); 1236 M.r[1] = _mm_set_ps( 0, 0, ScaleY, 0 ); 1237 M.r[2] = _mm_set_ps( 0, ScaleZ, 0, 0 ); 1238 M.r[3] = g_XMIdentityR3.v; 1239 return M; 1240#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 1241#endif // _XM_VMX128_INTRINSICS_ 1242} 1243 1244//------------------------------------------------------------------------------ 1245 1246inline XMMATRIX XMMatrixScalingFromVector 1247( 1248 FXMVECTOR Scale 1249) 1250{ 1251#if defined(_XM_NO_INTRINSICS_) 1252 1253 XMMATRIX M; 1254 M.m[0][0] = Scale.vector4_f32[0]; 1255 M.m[0][1] = 0.0f; 1256 M.m[0][2] = 0.0f; 1257 M.m[0][3] = 0.0f; 1258 1259 M.m[1][0] = 0.0f; 1260 M.m[1][1] = Scale.vector4_f32[1]; 1261 M.m[1][2] = 0.0f; 1262 M.m[1][3] = 0.0f; 1263 1264 M.m[2][0] = 0.0f; 1265 M.m[2][1] = 0.0f; 1266 M.m[2][2] = Scale.vector4_f32[2]; 1267 M.m[2][3] = 0.0f; 1268 1269 M.m[3][0] = 0.0f; 1270 M.m[3][1] = 0.0f; 1271 M.m[3][2] = 0.0f; 1272 M.m[3][3] = 1.0f; 1273 return M; 1274 1275#elif defined(_XM_ARM_NEON_INTRINSICS_) 1276 XMMATRIX M; 1277 M.r[0] = vandq_u32(Scale,g_XMMaskX); 1278 M.r[1] = vandq_u32(Scale,g_XMMaskY); 1279 M.r[2] = vandq_u32(Scale,g_XMMaskZ); 1280 M.r[3] = g_XMIdentityR3.v; 1281 return M; 1282#elif defined(_XM_SSE_INTRINSICS_) 1283 XMMATRIX M; 1284 M.r[0] = _mm_and_ps(Scale,g_XMMaskX); 1285 M.r[1] = _mm_and_ps(Scale,g_XMMaskY); 1286 M.r[2] = _mm_and_ps(Scale,g_XMMaskZ); 1287 M.r[3] = g_XMIdentityR3.v; 1288 return M; 1289#else // _XM_VMX128_INTRINSICS_ 1290#endif // _XM_VMX128_INTRINSICS_ 1291} 1292 1293//------------------------------------------------------------------------------ 1294 1295inline XMMATRIX XMMatrixRotationX 1296( 1297 float Angle 1298) 1299{ 1300#if defined(_XM_NO_INTRINSICS_) 1301 1302 float fSinAngle; 1303 float fCosAngle; 1304 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); 1305 1306 XMMATRIX M; 1307 M.m[0][0] = 1.0f; 1308 M.m[0][1] = 0.0f; 1309 M.m[0][2] = 0.0f; 1310 M.m[0][3] = 0.0f; 1311 1312 M.m[1][0] = 0.0f; 1313 M.m[1][1] = fCosAngle; 1314 M.m[1][2] = fSinAngle; 1315 M.m[1][3] = 0.0f; 1316 1317 M.m[2][0] = 0.0f; 1318 M.m[2][1] = -fSinAngle; 1319 M.m[2][2] = fCosAngle; 1320 M.m[2][3] = 0.0f; 1321 1322 M.m[3][0] = 0.0f; 1323 M.m[3][1] = 0.0f; 1324 M.m[3][2] = 0.0f; 1325 M.m[3][3] = 1.0f; 1326 return M; 1327 1328#elif defined(_XM_ARM_NEON_INTRINSICS_) 1329 float fSinAngle; 1330 float fCosAngle; 1331 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); 1332 1333 const XMVECTOR Zero = vdupq_n_f32(0); 1334 1335 XMVECTOR T1 = vsetq_lane_f32( fCosAngle, Zero, 1 ); 1336 T1 = vsetq_lane_f32( fSinAngle, T1, 2 ); 1337 1338 XMVECTOR T2 = vsetq_lane_f32( -fSinAngle, Zero, 1 ); 1339 T2 = vsetq_lane_f32( fCosAngle, T2, 2 ); 1340 1341 XMMATRIX M; 1342 M.r[0] = g_XMIdentityR0.v; 1343 M.r[1] = T1; 1344 M.r[2] = T2; 1345 M.r[3] = g_XMIdentityR3.v; 1346 return M; 1347#elif defined(_XM_SSE_INTRINSICS_) 1348 float SinAngle; 1349 float CosAngle; 1350 XMScalarSinCos(&SinAngle, &CosAngle, Angle); 1351 1352 XMVECTOR vSin = _mm_set_ss(SinAngle); 1353 XMVECTOR vCos = _mm_set_ss(CosAngle); 1354 // x = 0,y = cos,z = sin, w = 0 1355 vCos = _mm_shuffle_ps(vCos,vSin,_MM_SHUFFLE(3,0,0,3)); 1356 XMMATRIX M; 1357 M.r[0] = g_XMIdentityR0; 1358 M.r[1] = vCos; 1359 // x = 0,y = sin,z = cos, w = 0 1360 vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,1,2,0)); 1361 // x = 0,y = -sin,z = cos, w = 0 1362 vCos = _mm_mul_ps(vCos,g_XMNegateY); 1363 M.r[2] = vCos; 1364 M.r[3] = g_XMIdentityR3; 1365 return M; 1366#else // _XM_VMX128_INTRINSICS_ 1367#endif // _XM_VMX128_INTRINSICS_ 1368} 1369 1370//------------------------------------------------------------------------------ 1371 1372inline XMMATRIX XMMatrixRotationY 1373( 1374 float Angle 1375) 1376{ 1377#if defined(_XM_NO_INTRINSICS_) 1378 1379 float fSinAngle; 1380 float fCosAngle; 1381 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); 1382 1383 XMMATRIX M; 1384 M.m[0][0] = fCosAngle; 1385 M.m[0][1] = 0.0f; 1386 M.m[0][2] = -fSinAngle; 1387 M.m[0][3] = 0.0f; 1388 1389 M.m[1][0] = 0.0f; 1390 M.m[1][1] = 1.0f; 1391 M.m[1][2] = 0.0f; 1392 M.m[1][3] = 0.0f; 1393 1394 M.m[2][0] = fSinAngle; 1395 M.m[2][1] = 0.0f; 1396 M.m[2][2] = fCosAngle; 1397 M.m[2][3] = 0.0f; 1398 1399 M.m[3][0] = 0.0f; 1400 M.m[3][1] = 0.0f; 1401 M.m[3][2] = 0.0f; 1402 M.m[3][3] = 1.0f; 1403 return M; 1404 1405#elif defined(_XM_ARM_NEON_INTRINSICS_) 1406 float fSinAngle; 1407 float fCosAngle; 1408 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); 1409 1410 const XMVECTOR Zero = vdupq_n_f32(0); 1411 1412 XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 ); 1413 T0 = vsetq_lane_f32( -fSinAngle, T0, 2 ); 1414 1415 XMVECTOR T2 = vsetq_lane_f32( fSinAngle, Zero, 0 ); 1416 T2 = vsetq_lane_f32( fCosAngle, T2, 2 ); 1417 1418 XMMATRIX M; 1419 M.r[0] = T0; 1420 M.r[1] = g_XMIdentityR1.v; 1421 M.r[2] = T2; 1422 M.r[3] = g_XMIdentityR3.v; 1423 return M; 1424#elif defined(_XM_SSE_INTRINSICS_) 1425 float SinAngle; 1426 float CosAngle; 1427 XMScalarSinCos(&SinAngle, &CosAngle, Angle); 1428 1429 XMVECTOR vSin = _mm_set_ss(SinAngle); 1430 XMVECTOR vCos = _mm_set_ss(CosAngle); 1431 // x = sin,y = 0,z = cos, w = 0 1432 vSin = _mm_shuffle_ps(vSin,vCos,_MM_SHUFFLE(3,0,3,0)); 1433 XMMATRIX M; 1434 M.r[2] = vSin; 1435 M.r[1] = g_XMIdentityR1; 1436 // x = cos,y = 0,z = sin, w = 0 1437 vSin = XM_PERMUTE_PS(vSin,_MM_SHUFFLE(3,0,1,2)); 1438 // x = cos,y = 0,z = -sin, w = 0 1439 vSin = _mm_mul_ps(vSin,g_XMNegateZ); 1440 M.r[0] = vSin; 1441 M.r[3] = g_XMIdentityR3; 1442 return M; 1443#else // _XM_VMX128_INTRINSICS_ 1444#endif // _XM_VMX128_INTRINSICS_ 1445} 1446 1447//------------------------------------------------------------------------------ 1448 1449inline XMMATRIX XMMatrixRotationZ 1450( 1451 float Angle 1452) 1453{ 1454#if defined(_XM_NO_INTRINSICS_) 1455 1456 float fSinAngle; 1457 float fCosAngle; 1458 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); 1459 1460 XMMATRIX M; 1461 M.m[0][0] = fCosAngle; 1462 M.m[0][1] = fSinAngle; 1463 M.m[0][2] = 0.0f; 1464 M.m[0][3] = 0.0f; 1465 1466 M.m[1][0] = -fSinAngle; 1467 M.m[1][1] = fCosAngle; 1468 M.m[1][2] = 0.0f; 1469 M.m[1][3] = 0.0f; 1470 1471 M.m[2][0] = 0.0f; 1472 M.m[2][1] = 0.0f; 1473 M.m[2][2] = 1.0f; 1474 M.m[2][3] = 0.0f; 1475 1476 M.m[3][0] = 0.0f; 1477 M.m[3][1] = 0.0f; 1478 M.m[3][2] = 0.0f; 1479 M.m[3][3] = 1.0f; 1480 return M; 1481 1482#elif defined(_XM_ARM_NEON_INTRINSICS_) 1483 float fSinAngle; 1484 float fCosAngle; 1485 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); 1486 1487 const XMVECTOR Zero = vdupq_n_f32(0); 1488 1489 XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 ); 1490 T0 = vsetq_lane_f32( fSinAngle, T0, 1 ); 1491 1492 XMVECTOR T1 = vsetq_lane_f32( -fSinAngle, Zero, 0 ); 1493 T1 = vsetq_lane_f32( fCosAngle, T1, 1 ); 1494 1495 XMMATRIX M; 1496 M.r[0] = T0; 1497 M.r[1] = T1; 1498 M.r[2] = g_XMIdentityR2.v; 1499 M.r[3] = g_XMIdentityR3.v; 1500 return M; 1501#elif defined(_XM_SSE_INTRINSICS_) 1502 float SinAngle; 1503 float CosAngle; 1504 XMScalarSinCos(&SinAngle, &CosAngle, Angle); 1505 1506 XMVECTOR vSin = _mm_set_ss(SinAngle); 1507 XMVECTOR vCos = _mm_set_ss(CosAngle); 1508 // x = cos,y = sin,z = 0, w = 0 1509 vCos = _mm_unpacklo_ps(vCos,vSin); 1510 XMMATRIX M; 1511 M.r[0] = vCos; 1512 // x = sin,y = cos,z = 0, w = 0 1513 vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,2,0,1)); 1514 // x = cos,y = -sin,z = 0, w = 0 1515 vCos = _mm_mul_ps(vCos,g_XMNegateX); 1516 M.r[1] = vCos; 1517 M.r[2] = g_XMIdentityR2; 1518 M.r[3] = g_XMIdentityR3; 1519 return M; 1520#else // _XM_VMX128_INTRINSICS_ 1521#endif // _XM_VMX128_INTRINSICS_ 1522} 1523 1524//------------------------------------------------------------------------------ 1525 1526inline XMMATRIX XMMatrixRotationRollPitchYaw 1527( 1528 float Pitch, 1529 float Yaw, 1530 float Roll 1531) 1532{ 1533 XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f); 1534 return XMMatrixRotationRollPitchYawFromVector(Angles); 1535} 1536 1537//------------------------------------------------------------------------------ 1538 1539inline XMMATRIX XMMatrixRotationRollPitchYawFromVector 1540( 1541 FXMVECTOR Angles // <Pitch, Yaw, Roll, undefined> 1542) 1543{ 1544 XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles); 1545 return XMMatrixRotationQuaternion(Q); 1546} 1547 1548//------------------------------------------------------------------------------ 1549 1550inline XMMATRIX XMMatrixRotationNormal 1551( 1552 FXMVECTOR NormalAxis, 1553 float Angle 1554) 1555{ 1556#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1557 1558 float fSinAngle; 1559 float fCosAngle; 1560 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); 1561 1562 XMVECTOR A = XMVectorSet(fSinAngle, fCosAngle, 1.0f - fCosAngle, 0.0f); 1563 1564 XMVECTOR C2 = XMVectorSplatZ(A); 1565 XMVECTOR C1 = XMVectorSplatY(A); 1566 XMVECTOR C0 = XMVectorSplatX(A); 1567 1568 XMVECTOR N0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(NormalAxis); 1569 XMVECTOR N1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(NormalAxis); 1570 1571 XMVECTOR V0 = XMVectorMultiply(C2, N0); 1572 V0 = XMVectorMultiply(V0, N1); 1573 1574 XMVECTOR R0 = XMVectorMultiply(C2, NormalAxis); 1575 R0 = XMVectorMultiplyAdd(R0, NormalAxis, C1); 1576 1577 XMVECTOR R1 = XMVectorMultiplyAdd(C0, NormalAxis, V0); 1578 XMVECTOR R2 = XMVectorNegativeMultiplySubtract(C0, NormalAxis, V0); 1579 1580 V0 = XMVectorSelect(A, R0, g_XMSelect1110.v); 1581 XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(R1, R2); 1582 XMVECTOR V2 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(R1, R2); 1583 1584 XMMATRIX M; 1585 M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(V0, V1); 1586 M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(V0, V1); 1587 M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(V0, V2); 1588 M.r[3] = g_XMIdentityR3.v; 1589 return M; 1590 1591#elif defined(_XM_SSE_INTRINSICS_) 1592 float fSinAngle; 1593 float fCosAngle; 1594 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle); 1595 1596 XMVECTOR C2 = _mm_set_ps1(1.0f - fCosAngle); 1597 XMVECTOR C1 = _mm_set_ps1(fCosAngle); 1598 XMVECTOR C0 = _mm_set_ps1(fSinAngle); 1599 1600 XMVECTOR N0 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,0,2,1)); 1601 XMVECTOR N1 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,1,0,2)); 1602 1603 XMVECTOR V0 = _mm_mul_ps(C2, N0); 1604 V0 = _mm_mul_ps(V0, N1); 1605 1606 XMVECTOR R0 = _mm_mul_ps(C2, NormalAxis); 1607 R0 = _mm_mul_ps(R0, NormalAxis); 1608 R0 = _mm_add_ps(R0, C1); 1609 1610 XMVECTOR R1 = _mm_mul_ps(C0, NormalAxis); 1611 R1 = _mm_add_ps(R1, V0); 1612 XMVECTOR R2 = _mm_mul_ps(C0, NormalAxis); 1613 R2 = _mm_sub_ps(V0,R2); 1614 1615 V0 = _mm_and_ps(R0,g_XMMask3); 1616 XMVECTOR V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,1,2,0)); 1617 V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(0,3,2,1)); 1618 XMVECTOR V2 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(0,0,1,1)); 1619 V2 = XM_PERMUTE_PS(V2,_MM_SHUFFLE(2,0,2,0)); 1620 1621 R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(1,0,3,0)); 1622 R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,2,0)); 1623 1624 XMMATRIX M; 1625 M.r[0] = R2; 1626 1627 R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(3,2,3,1)); 1628 R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,0,2)); 1629 M.r[1] = R2; 1630 1631 V2 = _mm_shuffle_ps(V2,V0,_MM_SHUFFLE(3,2,1,0)); 1632 M.r[2] = V2; 1633 M.r[3] = g_XMIdentityR3.v; 1634 return M; 1635#else // _XM_VMX128_INTRINSICS_ 1636#endif // _XM_VMX128_INTRINSICS_ 1637} 1638 1639//------------------------------------------------------------------------------ 1640 1641inline XMMATRIX XMMatrixRotationAxis 1642( 1643 FXMVECTOR Axis, 1644 float Angle 1645) 1646{ 1647 assert(!XMVector3Equal(Axis, XMVectorZero())); 1648 assert(!XMVector3IsInfinite(Axis)); 1649 1650#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1651 1652 XMVECTOR Normal = XMVector3Normalize(Axis); 1653 return XMMatrixRotationNormal(Normal, Angle); 1654 1655#else // _XM_VMX128_INTRINSICS_ 1656#endif // _XM_VMX128_INTRINSICS_ 1657} 1658 1659//------------------------------------------------------------------------------ 1660 1661inline XMMATRIX XMMatrixRotationQuaternion 1662( 1663 FXMVECTOR Quaternion 1664) 1665{ 1666#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1667 1668 static const XMVECTORF32 Constant1110 = {1.0f, 1.0f, 1.0f, 0.0f}; 1669 1670 XMVECTOR Q0 = XMVectorAdd(Quaternion, Quaternion); 1671 XMVECTOR Q1 = XMVectorMultiply(Quaternion, Q0); 1672 1673 XMVECTOR V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_1W>(Q1, Constant1110.v); 1674 XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1W>(Q1, Constant1110.v); 1675 XMVECTOR R0 = XMVectorSubtract(Constant1110, V0); 1676 R0 = XMVectorSubtract(R0, V1); 1677 1678 V0 = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(Quaternion); 1679 V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_W>(Q0); 1680 V0 = XMVectorMultiply(V0, V1); 1681 1682 V1 = XMVectorSplatW(Quaternion); 1683 XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(Q0); 1684 V1 = XMVectorMultiply(V1, V2); 1685 1686 XMVECTOR R1 = XMVectorAdd(V0, V1); 1687 XMVECTOR R2 = XMVectorSubtract(V0, V1); 1688 1689 V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z>(R1, R2); 1690 V1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1Z, XM_PERMUTE_0X, XM_PERMUTE_1Z>(R1, R2); 1691 1692 XMMATRIX M; 1693 M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(R0, V0); 1694 M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(R0, V0); 1695 M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(R0, V1); 1696 M.r[3] = g_XMIdentityR3.v; 1697 return M; 1698 1699#elif defined(_XM_SSE_INTRINSICS_) 1700 static const XMVECTORF32 Constant1110 = {1.0f, 1.0f, 1.0f, 0.0f}; 1701 1702 XMVECTOR Q0 = _mm_add_ps(Quaternion,Quaternion); 1703 XMVECTOR Q1 = _mm_mul_ps(Quaternion,Q0); 1704 1705 XMVECTOR V0 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,0,0,1)); 1706 V0 = _mm_and_ps(V0,g_XMMask3); 1707 XMVECTOR V1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,1,2,2)); 1708 V1 = _mm_and_ps(V1,g_XMMask3); 1709 XMVECTOR R0 = _mm_sub_ps(Constant1110,V0); 1710 R0 = _mm_sub_ps(R0, V1); 1711 1712 V0 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,1,0,0)); 1713 V1 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,2,1,2)); 1714 V0 = _mm_mul_ps(V0, V1); 1715 1716 V1 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,3,3,3)); 1717 XMVECTOR V2 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,0,2,1)); 1718 V1 = _mm_mul_ps(V1, V2); 1719 1720 XMVECTOR R1 = _mm_add_ps(V0, V1); 1721 XMVECTOR R2 = _mm_sub_ps(V0, V1); 1722 1723 V0 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(1,0,2,1)); 1724 V0 = XM_PERMUTE_PS(V0,_MM_SHUFFLE(1,3,2,0)); 1725 V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,2,0,0)); 1726 V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(2,0,2,0)); 1727 1728 Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(1,0,3,0)); 1729 Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,2,0)); 1730 1731 XMMATRIX M; 1732 M.r[0] = Q1; 1733 1734 Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(3,2,3,1)); 1735 Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,0,2)); 1736 M.r[1] = Q1; 1737 1738 Q1 = _mm_shuffle_ps(V1,R0,_MM_SHUFFLE(3,2,1,0)); 1739 M.r[2] = Q1; 1740 M.r[3] = g_XMIdentityR3; 1741 return M; 1742#else // _XM_VMX128_INTRINSICS_ 1743#endif // _XM_VMX128_INTRINSICS_ 1744} 1745 1746//------------------------------------------------------------------------------ 1747 1748inline XMMATRIX XMMatrixTransformation2D 1749( 1750 FXMVECTOR ScalingOrigin, 1751 float ScalingOrientation, 1752 FXMVECTOR Scaling, 1753 FXMVECTOR RotationOrigin, 1754 float Rotation, 1755 GXMVECTOR Translation 1756) 1757{ 1758 // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation * 1759 // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation; 1760 1761 XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1100.v, ScalingOrigin, g_XMSelect1100.v); 1762 XMVECTOR NegScalingOrigin = XMVectorNegate(VScalingOrigin); 1763 1764 XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin); 1765 XMMATRIX MScalingOrientation = XMMatrixRotationZ(ScalingOrientation); 1766 XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation); 1767 XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v); 1768 XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling); 1769 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v); 1770 XMMATRIX MRotation = XMMatrixRotationZ(Rotation); 1771 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v); 1772 1773 XMMATRIX M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT); 1774 M = XMMatrixMultiply(M, MScaling); 1775 M = XMMatrixMultiply(M, MScalingOrientation); 1776 M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin); 1777 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); 1778 M = XMMatrixMultiply(M, MRotation); 1779 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); 1780 M.r[3] = XMVectorAdd(M.r[3], VTranslation); 1781 1782 return M; 1783} 1784 1785//------------------------------------------------------------------------------ 1786 1787inline XMMATRIX XMMatrixTransformation 1788( 1789 FXMVECTOR ScalingOrigin, 1790 FXMVECTOR ScalingOrientationQuaternion, 1791 FXMVECTOR Scaling, 1792 GXMVECTOR RotationOrigin, 1793 CXMVECTOR RotationQuaternion, 1794 CXMVECTOR Translation 1795) 1796{ 1797 // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation * 1798 // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation; 1799 1800 XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1110.v, ScalingOrigin, g_XMSelect1110.v); 1801 XMVECTOR NegScalingOrigin = XMVectorNegate(ScalingOrigin); 1802 1803 XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin); 1804 XMMATRIX MScalingOrientation = XMMatrixRotationQuaternion(ScalingOrientationQuaternion); 1805 XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation); 1806 XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling); 1807 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v); 1808 XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion); 1809 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v); 1810 1811 XMMATRIX M; 1812 M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT); 1813 M = XMMatrixMultiply(M, MScaling); 1814 M = XMMatrixMultiply(M, MScalingOrientation); 1815 M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin); 1816 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); 1817 M = XMMatrixMultiply(M, MRotation); 1818 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); 1819 M.r[3] = XMVectorAdd(M.r[3], VTranslation); 1820 return M; 1821} 1822 1823//------------------------------------------------------------------------------ 1824 1825inline XMMATRIX XMMatrixAffineTransformation2D 1826( 1827 FXMVECTOR Scaling, 1828 FXMVECTOR RotationOrigin, 1829 float Rotation, 1830 FXMVECTOR Translation 1831) 1832{ 1833 // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation; 1834 1835 XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v); 1836 XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling); 1837 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v); 1838 XMMATRIX MRotation = XMMatrixRotationZ(Rotation); 1839 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v); 1840 1841 XMMATRIX M; 1842 M = MScaling; 1843 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); 1844 M = XMMatrixMultiply(M, MRotation); 1845 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); 1846 M.r[3] = XMVectorAdd(M.r[3], VTranslation); 1847 return M; 1848} 1849 1850//------------------------------------------------------------------------------ 1851 1852inline XMMATRIX XMMatrixAffineTransformation 1853( 1854 FXMVECTOR Scaling, 1855 FXMVECTOR RotationOrigin, 1856 FXMVECTOR RotationQuaternion, 1857 GXMVECTOR Translation 1858) 1859{ 1860 // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation; 1861 1862 XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling); 1863 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin,g_XMSelect1110.v); 1864 XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion); 1865 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation,g_XMSelect1110.v); 1866 1867 XMMATRIX M; 1868 M = MScaling; 1869 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin); 1870 M = XMMatrixMultiply(M, MRotation); 1871 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin); 1872 M.r[3] = XMVectorAdd(M.r[3], VTranslation); 1873 return M; 1874} 1875 1876//------------------------------------------------------------------------------ 1877 1878inline XMMATRIX XMMatrixReflect 1879( 1880 FXMVECTOR ReflectionPlane 1881) 1882{ 1883 assert(!XMVector3Equal(ReflectionPlane, XMVectorZero())); 1884 assert(!XMPlaneIsInfinite(ReflectionPlane)); 1885 1886#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1887 1888 static const XMVECTORF32 NegativeTwo = {-2.0f, -2.0f, -2.0f, 0.0f}; 1889 1890 XMVECTOR P = XMPlaneNormalize(ReflectionPlane); 1891 XMVECTOR S = XMVectorMultiply(P, NegativeTwo); 1892 1893 XMVECTOR A = XMVectorSplatX(P); 1894 XMVECTOR B = XMVectorSplatY(P); 1895 XMVECTOR C = XMVectorSplatZ(P); 1896 XMVECTOR D = XMVectorSplatW(P); 1897 1898 XMMATRIX M; 1899 M.r[0] = XMVectorMultiplyAdd(A, S, g_XMIdentityR0.v); 1900 M.r[1] = XMVectorMultiplyAdd(B, S, g_XMIdentityR1.v); 1901 M.r[2] = XMVectorMultiplyAdd(C, S, g_XMIdentityR2.v); 1902 M.r[3] = XMVectorMultiplyAdd(D, S, g_XMIdentityR3.v); 1903 return M; 1904 1905#else // _XM_VMX128_INTRINSICS_ 1906#endif // _XM_VMX128_INTRINSICS_ 1907} 1908 1909//------------------------------------------------------------------------------ 1910 1911inline XMMATRIX XMMatrixShadow 1912( 1913 FXMVECTOR ShadowPlane, 1914 FXMVECTOR LightPosition 1915) 1916{ 1917 static const XMVECTORU32 Select0001 = {XM_SELECT_0, XM_SELECT_0, XM_SELECT_0, XM_SELECT_1}; 1918 1919 assert(!XMVector3Equal(ShadowPlane, XMVectorZero())); 1920 assert(!XMPlaneIsInfinite(ShadowPlane)); 1921 1922 XMVECTOR P = XMPlaneNormalize(ShadowPlane); 1923 XMVECTOR Dot = XMPlaneDot(P, LightPosition); 1924 P = XMVectorNegate(P); 1925 XMVECTOR D = XMVectorSplatW(P); 1926 XMVECTOR C = XMVectorSplatZ(P); 1927 XMVECTOR B = XMVectorSplatY(P); 1928 XMVECTOR A = XMVectorSplatX(P); 1929 Dot = XMVectorSelect(Select0001.v, Dot, Select0001.v); 1930 1931 XMMATRIX M; 1932 M.r[3] = XMVectorMultiplyAdd(D, LightPosition, Dot); 1933 Dot = XMVectorRotateLeft(Dot, 1); 1934 M.r[2] = XMVectorMultiplyAdd(C, LightPosition, Dot); 1935 Dot = XMVectorRotateLeft(Dot, 1); 1936 M.r[1] = XMVectorMultiplyAdd(B, LightPosition, Dot); 1937 Dot = XMVectorRotateLeft(Dot, 1); 1938 M.r[0] = XMVectorMultiplyAdd(A, LightPosition, Dot); 1939 return M; 1940} 1941 1942//------------------------------------------------------------------------------ 1943// View and projection initialization operations 1944//------------------------------------------------------------------------------ 1945 1946inline XMMATRIX XMMatrixLookAtLH 1947( 1948 FXMVECTOR EyePosition, 1949 FXMVECTOR FocusPosition, 1950 FXMVECTOR UpDirection 1951) 1952{ 1953 XMVECTOR EyeDirection = XMVectorSubtract(FocusPosition, EyePosition); 1954 return XMMatrixLookToLH(EyePosition, EyeDirection, UpDirection); 1955} 1956 1957//------------------------------------------------------------------------------ 1958 1959inline XMMATRIX XMMatrixLookAtRH 1960( 1961 FXMVECTOR EyePosition, 1962 FXMVECTOR FocusPosition, 1963 FXMVECTOR UpDirection 1964) 1965{ 1966 XMVECTOR NegEyeDirection = XMVectorSubtract(EyePosition, FocusPosition); 1967 return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection); 1968} 1969 1970//------------------------------------------------------------------------------ 1971 1972inline XMMATRIX XMMatrixLookToLH 1973( 1974 FXMVECTOR EyePosition, 1975 FXMVECTOR EyeDirection, 1976 FXMVECTOR UpDirection 1977) 1978{ 1979 assert(!XMVector3Equal(EyeDirection, XMVectorZero())); 1980 assert(!XMVector3IsInfinite(EyeDirection)); 1981 assert(!XMVector3Equal(UpDirection, XMVectorZero())); 1982 assert(!XMVector3IsInfinite(UpDirection)); 1983 1984#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1985 1986 XMVECTOR R2 = XMVector3Normalize(EyeDirection); 1987 1988 XMVECTOR R0 = XMVector3Cross(UpDirection, R2); 1989 R0 = XMVector3Normalize(R0); 1990 1991 XMVECTOR R1 = XMVector3Cross(R2, R0); 1992 1993 XMVECTOR NegEyePosition = XMVectorNegate(EyePosition); 1994 1995 XMVECTOR D0 = XMVector3Dot(R0, NegEyePosition); 1996 XMVECTOR D1 = XMVector3Dot(R1, NegEyePosition); 1997 XMVECTOR D2 = XMVector3Dot(R2, NegEyePosition); 1998 1999 XMMATRIX M; 2000 M.r[0] = XMVectorSelect(D0, R0, g_XMSelect1110.v); 2001 M.r[1] = XMVectorSelect(D1, R1, g_XMSelect1110.v); 2002 M.r[2] = XMVectorSelect(D2, R2, g_XMSelect1110.v); 2003 M.r[3] = g_XMIdentityR3.v; 2004 2005 M = XMMatrixTranspose(M); 2006 2007 return M; 2008 2009#else // _XM_VMX128_INTRINSICS_ 2010#endif // _XM_VMX128_INTRINSICS_ 2011} 2012 2013//------------------------------------------------------------------------------ 2014 2015inline XMMATRIX XMMatrixLookToRH 2016( 2017 FXMVECTOR EyePosition, 2018 FXMVECTOR EyeDirection, 2019 FXMVECTOR UpDirection 2020) 2021{ 2022 XMVECTOR NegEyeDirection = XMVectorNegate(EyeDirection); 2023 return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection); 2024} 2025 2026//------------------------------------------------------------------------------ 2027 2028inline XMMATRIX XMMatrixPerspectiveLH 2029( 2030 float ViewWidth, 2031 float ViewHeight, 2032 float NearZ, 2033 float FarZ 2034) 2035{ 2036 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); 2037 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); 2038 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); 2039 2040#if defined(_XM_NO_INTRINSICS_) 2041 2042 float TwoNearZ = NearZ + NearZ; 2043 float fRange = FarZ / (FarZ - NearZ); 2044 2045 XMMATRIX M; 2046 M.m[0][0] = TwoNearZ / ViewWidth; 2047 M.m[0][1] = 0.0f; 2048 M.m[0][2] = 0.0f; 2049 M.m[0][3] = 0.0f; 2050 2051 M.m[1][0] = 0.0f; 2052 M.m[1][1] = TwoNearZ / ViewHeight; 2053 M.m[1][2] = 0.0f; 2054 M.m[1][3] = 0.0f; 2055 2056 M.m[2][0] = 0.0f; 2057 M.m[2][1] = 0.0f; 2058 M.m[2][2] = fRange; 2059 M.m[2][3] = 1.0f; 2060 2061 M.m[3][0] = 0.0f; 2062 M.m[3][1] = 0.0f; 2063 M.m[3][2] = -fRange * NearZ; 2064 M.m[3][3] = 0.0f; 2065 return M; 2066 2067#elif defined(_XM_ARM_NEON_INTRINSICS_) 2068 float TwoNearZ = NearZ + NearZ; 2069 float fRange = FarZ / (FarZ - NearZ); 2070 const XMVECTOR Zero = vdupq_n_f32(0); 2071 XMMATRIX M; 2072 M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 ); 2073 M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 ); 2074 M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 ); 2075 M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 ); 2076 return M; 2077#elif defined(_XM_SSE_INTRINSICS_) 2078 XMMATRIX M; 2079 float TwoNearZ = NearZ + NearZ; 2080 float fRange = FarZ / (FarZ - NearZ); 2081 // Note: This is recorded on the stack 2082 XMVECTOR rMem = { 2083 TwoNearZ / ViewWidth, 2084 TwoNearZ / ViewHeight, 2085 fRange, 2086 -fRange * NearZ 2087 }; 2088 // Copy from memory to SSE register 2089 XMVECTOR vValues = rMem; 2090 XMVECTOR vTemp = _mm_setzero_ps(); 2091 // Copy x only 2092 vTemp = _mm_move_ss(vTemp,vValues); 2093 // TwoNearZ / ViewWidth,0,0,0 2094 M.r[0] = vTemp; 2095 // 0,TwoNearZ / ViewHeight,0,0 2096 vTemp = vValues; 2097 vTemp = _mm_and_ps(vTemp,g_XMMaskY); 2098 M.r[1] = vTemp; 2099 // x=fRange,y=-fRange * NearZ,0,1.0f 2100 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2)); 2101 // 0,0,fRange,1.0f 2102 vTemp = _mm_setzero_ps(); 2103 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0)); 2104 M.r[2] = vTemp; 2105 // 0,0,-fRange * NearZ,0 2106 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0)); 2107 M.r[3] = vTemp; 2108 2109 return M; 2110#else // _XM_VMX128_INTRINSICS_ 2111#endif // _XM_VMX128_INTRINSICS_ 2112} 2113 2114//------------------------------------------------------------------------------ 2115 2116inline XMMATRIX XMMatrixPerspectiveRH 2117( 2118 float ViewWidth, 2119 float ViewHeight, 2120 float NearZ, 2121 float FarZ 2122) 2123{ 2124 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); 2125 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); 2126 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); 2127 2128#if defined(_XM_NO_INTRINSICS_) 2129 2130 float TwoNearZ = NearZ + NearZ; 2131 float fRange = FarZ / (NearZ - FarZ); 2132 2133 XMMATRIX M; 2134 M.m[0][0] = TwoNearZ / ViewWidth; 2135 M.m[0][1] = 0.0f; 2136 M.m[0][2] = 0.0f; 2137 M.m[0][3] = 0.0f; 2138 2139 M.m[1][0] = 0.0f; 2140 M.m[1][1] = TwoNearZ / ViewHeight; 2141 M.m[1][2] = 0.0f; 2142 M.m[1][3] = 0.0f; 2143 2144 M.m[2][0] = 0.0f; 2145 M.m[2][1] = 0.0f; 2146 M.m[2][2] = fRange; 2147 M.m[2][3] = -1.0f; 2148 2149 M.m[3][0] = 0.0f; 2150 M.m[3][1] = 0.0f; 2151 M.m[3][2] = fRange * NearZ; 2152 M.m[3][3] = 0.0f; 2153 return M; 2154 2155#elif defined(_XM_ARM_NEON_INTRINSICS_) 2156 float TwoNearZ = NearZ + NearZ; 2157 float fRange = FarZ / (NearZ - FarZ); 2158 const XMVECTOR Zero = vdupq_n_f32(0); 2159 2160 XMMATRIX M; 2161 M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 ); 2162 M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 ); 2163 M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 ); 2164 M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 ); 2165 return M; 2166#elif defined(_XM_SSE_INTRINSICS_) 2167 XMMATRIX M; 2168 float TwoNearZ = NearZ + NearZ; 2169 float fRange = FarZ / (NearZ-FarZ); 2170 // Note: This is recorded on the stack 2171 XMVECTOR rMem = { 2172 TwoNearZ / ViewWidth, 2173 TwoNearZ / ViewHeight, 2174 fRange, 2175 fRange * NearZ 2176 }; 2177 // Copy from memory to SSE register 2178 XMVECTOR vValues = rMem; 2179 XMVECTOR vTemp = _mm_setzero_ps(); 2180 // Copy x only 2181 vTemp = _mm_move_ss(vTemp,vValues); 2182 // TwoNearZ / ViewWidth,0,0,0 2183 M.r[0] = vTemp; 2184 // 0,TwoNearZ / ViewHeight,0,0 2185 vTemp = vValues; 2186 vTemp = _mm_and_ps(vTemp,g_XMMaskY); 2187 M.r[1] = vTemp; 2188 // x=fRange,y=-fRange * NearZ,0,-1.0f 2189 vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2)); 2190 // 0,0,fRange,-1.0f 2191 vTemp = _mm_setzero_ps(); 2192 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0)); 2193 M.r[2] = vTemp; 2194 // 0,0,-fRange * NearZ,0 2195 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0)); 2196 M.r[3] = vTemp; 2197 return M; 2198#else // _XM_VMX128_INTRINSICS_ 2199#endif // _XM_VMX128_INTRINSICS_ 2200} 2201 2202//------------------------------------------------------------------------------ 2203 2204inline XMMATRIX XMMatrixPerspectiveFovLH 2205( 2206 float FovAngleY, 2207 float AspectHByW, 2208 float NearZ, 2209 float FarZ 2210) 2211{ 2212 assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f)); 2213 assert(!XMScalarNearEqual(AspectHByW, 0.0f, 0.00001f)); 2214 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); 2215 2216#if defined(_XM_NO_INTRINSICS_) 2217 2218 float SinFov; 2219 float CosFov; 2220 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); 2221 2222 float Height = CosFov / SinFov; 2223 float Width = Height / AspectHByW; 2224 float fRange = FarZ / (FarZ-NearZ); 2225 2226 XMMATRIX M; 2227 M.m[0][0] = Width; 2228 M.m[0][1] = 0.0f; 2229 M.m[0][2] = 0.0f; 2230 M.m[0][3] = 0.0f; 2231 2232 M.m[1][0] = 0.0f; 2233 M.m[1][1] = Height; 2234 M.m[1][2] = 0.0f; 2235 M.m[1][3] = 0.0f; 2236 2237 M.m[2][0] = 0.0f; 2238 M.m[2][1] = 0.0f; 2239 M.m[2][2] = fRange; 2240 M.m[2][3] = 1.0f; 2241 2242 M.m[3][0] = 0.0f; 2243 M.m[3][1] = 0.0f; 2244 M.m[3][2] = -fRange * NearZ; 2245 M.m[3][3] = 0.0f; 2246 return M; 2247 2248#elif defined(_XM_ARM_NEON_INTRINSICS_) 2249 float SinFov; 2250 float CosFov; 2251 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); 2252 2253 float fRange = FarZ / (FarZ-NearZ); 2254 float Height = CosFov / SinFov; 2255 float Width = Height / AspectHByW; 2256 const XMVECTOR Zero = vdupq_n_f32(0); 2257 2258 XMMATRIX M; 2259 M.r[0] = vsetq_lane_f32( Width, Zero, 0 ); 2260 M.r[1] = vsetq_lane_f32( Height, Zero, 1 ); 2261 M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 ); 2262 M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 ); 2263 return M; 2264#elif defined(_XM_SSE_INTRINSICS_) 2265 float SinFov; 2266 float CosFov; 2267 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); 2268 2269 float fRange = FarZ / (FarZ-NearZ); 2270 // Note: This is recorded on the stack 2271 float Height = CosFov / SinFov; 2272 XMVECTOR rMem = { 2273 Height / AspectHByW, 2274 Height, 2275 fRange, 2276 -fRange * NearZ 2277 }; 2278 // Copy from memory to SSE register 2279 XMVECTOR vValues = rMem; 2280 XMVECTOR vTemp = _mm_setzero_ps(); 2281 // Copy x only 2282 vTemp = _mm_move_ss(vTemp,vValues); 2283 // CosFov / SinFov,0,0,0 2284 XMMATRIX M; 2285 M.r[0] = vTemp; 2286 // 0,Height / AspectHByW,0,0 2287 vTemp = vValues; 2288 vTemp = _mm_and_ps(vTemp,g_XMMaskY); 2289 M.r[1] = vTemp; 2290 // x=fRange,y=-fRange * NearZ,0,1.0f 2291 vTemp = _mm_setzero_ps(); 2292 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2)); 2293 // 0,0,fRange,1.0f 2294 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0)); 2295 M.r[2] = vTemp; 2296 // 0,0,-fRange * NearZ,0.0f 2297 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0)); 2298 M.r[3] = vTemp; 2299 return M; 2300#else // _XM_VMX128_INTRINSICS_ 2301#endif // _XM_VMX128_INTRINSICS_ 2302} 2303 2304//------------------------------------------------------------------------------ 2305 2306inline XMMATRIX XMMatrixPerspectiveFovRH 2307( 2308 float FovAngleY, 2309 float AspectHByW, 2310 float NearZ, 2311 float FarZ 2312) 2313{ 2314 assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f)); 2315 assert(!XMScalarNearEqual(AspectHByW, 0.0f, 0.00001f)); 2316 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); 2317 2318#if defined(_XM_NO_INTRINSICS_) 2319 2320 float SinFov; 2321 float CosFov; 2322 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); 2323 2324 float Height = CosFov / SinFov; 2325 float Width = Height / AspectHByW; 2326 float fRange = FarZ / (NearZ-FarZ); 2327 2328 XMMATRIX M; 2329 M.m[0][0] = Width; 2330 M.m[0][1] = 0.0f; 2331 M.m[0][2] = 0.0f; 2332 M.m[0][3] = 0.0f; 2333 2334 M.m[1][0] = 0.0f; 2335 M.m[1][1] = Height; 2336 M.m[1][2] = 0.0f; 2337 M.m[1][3] = 0.0f; 2338 2339 M.m[2][0] = 0.0f; 2340 M.m[2][1] = 0.0f; 2341 M.m[2][2] = fRange; 2342 M.m[2][3] = -1.0f; 2343 2344 M.m[3][0] = 0.0f; 2345 M.m[3][1] = 0.0f; 2346 M.m[3][2] = fRange * NearZ; 2347 M.m[3][3] = 0.0f; 2348 return M; 2349 2350#elif defined(_XM_ARM_NEON_INTRINSICS_) 2351 float SinFov; 2352 float CosFov; 2353 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); 2354 float fRange = FarZ / (NearZ-FarZ); 2355 float Height = CosFov / SinFov; 2356 float Width = Height / AspectHByW; 2357 const XMVECTOR Zero = vdupq_n_f32(0); 2358 2359 XMMATRIX M; 2360 M.r[0] = vsetq_lane_f32( Width, Zero, 0 ); 2361 M.r[1] = vsetq_lane_f32( Height, Zero, 1 ); 2362 M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 ); 2363 M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 ); 2364 return M; 2365#elif defined(_XM_SSE_INTRINSICS_) 2366 float SinFov; 2367 float CosFov; 2368 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY); 2369 float fRange = FarZ / (NearZ-FarZ); 2370 // Note: This is recorded on the stack 2371 float Height = CosFov / SinFov; 2372 XMVECTOR rMem = { 2373 Height / AspectHByW, 2374 Height, 2375 fRange, 2376 fRange * NearZ 2377 }; 2378 // Copy from memory to SSE register 2379 XMVECTOR vValues = rMem; 2380 XMVECTOR vTemp = _mm_setzero_ps(); 2381 // Copy x only 2382 vTemp = _mm_move_ss(vTemp,vValues); 2383 // CosFov / SinFov,0,0,0 2384 XMMATRIX M; 2385 M.r[0] = vTemp; 2386 // 0,Height / AspectHByW,0,0 2387 vTemp = vValues; 2388 vTemp = _mm_and_ps(vTemp,g_XMMaskY); 2389 M.r[1] = vTemp; 2390 // x=fRange,y=-fRange * NearZ,0,-1.0f 2391 vTemp = _mm_setzero_ps(); 2392 vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2)); 2393 // 0,0,fRange,-1.0f 2394 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0)); 2395 M.r[2] = vTemp; 2396 // 0,0,fRange * NearZ,0.0f 2397 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0)); 2398 M.r[3] = vTemp; 2399 return M; 2400#else // _XM_VMX128_INTRINSICS_ 2401#endif // _XM_VMX128_INTRINSICS_ 2402} 2403 2404//------------------------------------------------------------------------------ 2405 2406inline XMMATRIX XMMatrixPerspectiveOffCenterLH 2407( 2408 float ViewLeft, 2409 float ViewRight, 2410 float ViewBottom, 2411 float ViewTop, 2412 float NearZ, 2413 float FarZ 2414) 2415{ 2416 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); 2417 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); 2418 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); 2419 2420#if defined(_XM_NO_INTRINSICS_) 2421 2422 float TwoNearZ = NearZ + NearZ; 2423 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2424 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2425 float fRange = FarZ / (FarZ-NearZ); 2426 2427 XMMATRIX M; 2428 M.m[0][0] = TwoNearZ * ReciprocalWidth; 2429 M.m[0][1] = 0.0f; 2430 M.m[0][2] = 0.0f; 2431 M.m[0][3] = 0.0f; 2432 2433 M.m[1][0] = 0.0f; 2434 M.m[1][1] = TwoNearZ * ReciprocalHeight; 2435 M.m[1][2] = 0.0f; 2436 M.m[1][3] = 0.0f; 2437 2438 M.m[2][0] = -(ViewLeft + ViewRight) * ReciprocalWidth; 2439 M.m[2][1] = -(ViewTop + ViewBottom) * ReciprocalHeight; 2440 M.m[2][2] = fRange; 2441 M.m[2][3] = 1.0f; 2442 2443 M.m[3][0] = 0.0f; 2444 M.m[3][1] = 0.0f; 2445 M.m[3][2] = -fRange * NearZ; 2446 M.m[3][3] = 0.0f; 2447 return M; 2448 2449#elif defined(_XM_ARM_NEON_INTRINSICS_) 2450 float TwoNearZ = NearZ + NearZ; 2451 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2452 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2453 float fRange = FarZ / (FarZ-NearZ); 2454 const XMVECTOR Zero = vdupq_n_f32(0); 2455 2456 XMMATRIX M; 2457 M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 ); 2458 M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 ); 2459 M.r[2] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, 2460 -(ViewTop + ViewBottom) * ReciprocalHeight, 2461 fRange, 2462 1.0f); 2463 M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 ); 2464 return M; 2465#elif defined(_XM_SSE_INTRINSICS_) 2466 XMMATRIX M; 2467 float TwoNearZ = NearZ+NearZ; 2468 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2469 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2470 float fRange = FarZ / (FarZ-NearZ); 2471 // Note: This is recorded on the stack 2472 XMVECTOR rMem = { 2473 TwoNearZ*ReciprocalWidth, 2474 TwoNearZ*ReciprocalHeight, 2475 -fRange * NearZ, 2476 0 2477 }; 2478 // Copy from memory to SSE register 2479 XMVECTOR vValues = rMem; 2480 XMVECTOR vTemp = _mm_setzero_ps(); 2481 // Copy x only 2482 vTemp = _mm_move_ss(vTemp,vValues); 2483 // TwoNearZ*ReciprocalWidth,0,0,0 2484 M.r[0] = vTemp; 2485 // 0,TwoNearZ*ReciprocalHeight,0,0 2486 vTemp = vValues; 2487 vTemp = _mm_and_ps(vTemp,g_XMMaskY); 2488 M.r[1] = vTemp; 2489 // 0,0,fRange,1.0f 2490 M.r[2] = XMVectorSet( -(ViewLeft + ViewRight) * ReciprocalWidth, 2491 -(ViewTop + ViewBottom) * ReciprocalHeight, 2492 fRange, 2493 1.0f ); 2494 // 0,0,-fRange * NearZ,0.0f 2495 vValues = _mm_and_ps(vValues,g_XMMaskZ); 2496 M.r[3] = vValues; 2497 return M; 2498#else // _XM_VMX128_INTRINSICS_ 2499#endif // _XM_VMX128_INTRINSICS_ 2500} 2501 2502//------------------------------------------------------------------------------ 2503 2504inline XMMATRIX XMMatrixPerspectiveOffCenterRH 2505( 2506 float ViewLeft, 2507 float ViewRight, 2508 float ViewBottom, 2509 float ViewTop, 2510 float NearZ, 2511 float FarZ 2512) 2513{ 2514 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); 2515 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); 2516 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); 2517 2518#if defined(_XM_NO_INTRINSICS_) 2519 2520 float TwoNearZ = NearZ + NearZ; 2521 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2522 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2523 float fRange = FarZ / (NearZ-FarZ); 2524 2525 XMMATRIX M; 2526 M.m[0][0] = TwoNearZ * ReciprocalWidth; 2527 M.m[0][1] = 0.0f; 2528 M.m[0][2] = 0.0f; 2529 M.m[0][3] = 0.0f; 2530 2531 M.m[1][0] = 0.0f; 2532 M.m[1][1] = TwoNearZ * ReciprocalHeight; 2533 M.m[1][2] = 0.0f; 2534 M.m[1][3] = 0.0f; 2535 2536 M.m[2][0] = (ViewLeft + ViewRight) * ReciprocalWidth; 2537 M.m[2][1] = (ViewTop + ViewBottom) * ReciprocalHeight; 2538 M.m[2][2] = fRange; 2539 M.m[2][3] = -1.0f; 2540 2541 M.m[3][0] = 0.0f; 2542 M.m[3][1] = 0.0f; 2543 M.m[3][2] = fRange * NearZ; 2544 M.m[3][3] = 0.0f; 2545 return M; 2546 2547#elif defined(_XM_ARM_NEON_INTRINSICS_) 2548 float TwoNearZ = NearZ + NearZ; 2549 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2550 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2551 float fRange = FarZ / (NearZ-FarZ); 2552 const XMVECTOR Zero = vdupq_n_f32(0); 2553 2554 XMMATRIX M; 2555 M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 ); 2556 M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 ); 2557 M.r[2] = XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth, 2558 (ViewTop + ViewBottom) * ReciprocalHeight, 2559 fRange, 2560 -1.0f); 2561 M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 ); 2562 return M; 2563#elif defined(_XM_SSE_INTRINSICS_) 2564 XMMATRIX M; 2565 float TwoNearZ = NearZ+NearZ; 2566 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2567 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2568 float fRange = FarZ / (NearZ-FarZ); 2569 // Note: This is recorded on the stack 2570 XMVECTOR rMem = { 2571 TwoNearZ*ReciprocalWidth, 2572 TwoNearZ*ReciprocalHeight, 2573 fRange * NearZ, 2574 0 2575 }; 2576 // Copy from memory to SSE register 2577 XMVECTOR vValues = rMem; 2578 XMVECTOR vTemp = _mm_setzero_ps(); 2579 // Copy x only 2580 vTemp = _mm_move_ss(vTemp,vValues); 2581 // TwoNearZ*ReciprocalWidth,0,0,0 2582 M.r[0] = vTemp; 2583 // 0,TwoNearZ*ReciprocalHeight,0,0 2584 vTemp = vValues; 2585 vTemp = _mm_and_ps(vTemp,g_XMMaskY); 2586 M.r[1] = vTemp; 2587 // 0,0,fRange,1.0f 2588 M.r[2] = XMVectorSet( (ViewLeft + ViewRight) * ReciprocalWidth, 2589 (ViewTop + ViewBottom) * ReciprocalHeight, 2590 fRange, 2591 -1.0f ); 2592 // 0,0,-fRange * NearZ,0.0f 2593 vValues = _mm_and_ps(vValues,g_XMMaskZ); 2594 M.r[3] = vValues; 2595 return M; 2596#else // _XM_VMX128_INTRINSICS_ 2597#endif // _XM_VMX128_INTRINSICS_ 2598} 2599 2600//------------------------------------------------------------------------------ 2601 2602inline XMMATRIX XMMatrixOrthographicLH 2603( 2604 float ViewWidth, 2605 float ViewHeight, 2606 float NearZ, 2607 float FarZ 2608) 2609{ 2610 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); 2611 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); 2612 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); 2613 2614#if defined(_XM_NO_INTRINSICS_) 2615 2616 float fRange = 1.0f / (FarZ-NearZ); 2617 2618 XMMATRIX M; 2619 M.m[0][0] = 2.0f / ViewWidth; 2620 M.m[0][1] = 0.0f; 2621 M.m[0][2] = 0.0f; 2622 M.m[0][3] = 0.0f; 2623 2624 M.m[1][0] = 0.0f; 2625 M.m[1][1] = 2.0f / ViewHeight; 2626 M.m[1][2] = 0.0f; 2627 M.m[1][3] = 0.0f; 2628 2629 M.m[2][0] = 0.0f; 2630 M.m[2][1] = 0.0f; 2631 M.m[2][2] = fRange; 2632 M.m[2][3] = 0.0f; 2633 2634 M.m[3][0] = 0.0f; 2635 M.m[3][1] = 0.0f; 2636 M.m[3][2] = -fRange * NearZ; 2637 M.m[3][3] = 1.0f; 2638 return M; 2639 2640#elif defined(_XM_ARM_NEON_INTRINSICS_) 2641 float fRange = 1.0f / (FarZ-NearZ); 2642 2643 const XMVECTOR Zero = vdupq_n_f32(0); 2644 XMMATRIX M; 2645 M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 ); 2646 M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 ); 2647 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 ); 2648 M.r[3] = vsetq_lane_f32( -fRange * NearZ, g_XMIdentityR3.v, 2 ); 2649 return M; 2650#elif defined(_XM_SSE_INTRINSICS_) 2651 XMMATRIX M; 2652 float fRange = 1.0f / (FarZ-NearZ); 2653 // Note: This is recorded on the stack 2654 XMVECTOR rMem = { 2655 2.0f / ViewWidth, 2656 2.0f / ViewHeight, 2657 fRange, 2658 -fRange * NearZ 2659 }; 2660 // Copy from memory to SSE register 2661 XMVECTOR vValues = rMem; 2662 XMVECTOR vTemp = _mm_setzero_ps(); 2663 // Copy x only 2664 vTemp = _mm_move_ss(vTemp,vValues); 2665 // 2.0f / ViewWidth,0,0,0 2666 M.r[0] = vTemp; 2667 // 0,2.0f / ViewHeight,0,0 2668 vTemp = vValues; 2669 vTemp = _mm_and_ps(vTemp,g_XMMaskY); 2670 M.r[1] = vTemp; 2671 // x=fRange,y=-fRange * NearZ,0,1.0f 2672 vTemp = _mm_setzero_ps(); 2673 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2)); 2674 // 0,0,fRange,0.0f 2675 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0)); 2676 M.r[2] = vTemp; 2677 // 0,0,-fRange * NearZ,1.0f 2678 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0)); 2679 M.r[3] = vTemp; 2680 return M; 2681#else // _XM_VMX128_INTRINSICS_ 2682#endif // _XM_VMX128_INTRINSICS_ 2683} 2684 2685//------------------------------------------------------------------------------ 2686 2687inline XMMATRIX XMMatrixOrthographicRH 2688( 2689 float ViewWidth, 2690 float ViewHeight, 2691 float NearZ, 2692 float FarZ 2693) 2694{ 2695 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f)); 2696 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f)); 2697 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); 2698 2699#if defined(_XM_NO_INTRINSICS_) 2700 2701 float fRange = 1.0f / (NearZ-FarZ); 2702 2703 XMMATRIX M; 2704 M.m[0][0] = 2.0f / ViewWidth; 2705 M.m[0][1] = 0.0f; 2706 M.m[0][2] = 0.0f; 2707 M.m[0][3] = 0.0f; 2708 2709 M.m[1][0] = 0.0f; 2710 M.m[1][1] = 2.0f / ViewHeight; 2711 M.m[1][2] = 0.0f; 2712 M.m[1][3] = 0.0f; 2713 2714 M.m[2][0] = 0.0f; 2715 M.m[2][1] = 0.0f; 2716 M.m[2][2] = fRange; 2717 M.m[2][3] = 0.0f; 2718 2719 M.m[3][0] = 0.0f; 2720 M.m[3][1] = 0.0f; 2721 M.m[3][2] = fRange * NearZ; 2722 M.m[3][3] = 1.0f; 2723 return M; 2724 2725#elif defined(_XM_ARM_NEON_INTRINSICS_) 2726 float fRange = 1.0f / (NearZ-FarZ); 2727 2728 const XMVECTOR Zero = vdupq_n_f32(0); 2729 XMMATRIX M; 2730 M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 ); 2731 M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 ); 2732 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 ); 2733 M.r[3] = vsetq_lane_f32( fRange * NearZ, g_XMIdentityR3.v, 2 ); 2734 return M; 2735#elif defined(_XM_SSE_INTRINSICS_) 2736 XMMATRIX M; 2737 float fRange = 1.0f / (NearZ-FarZ); 2738 // Note: This is recorded on the stack 2739 XMVECTOR rMem = { 2740 2.0f / ViewWidth, 2741 2.0f / ViewHeight, 2742 fRange, 2743 fRange * NearZ 2744 }; 2745 // Copy from memory to SSE register 2746 XMVECTOR vValues = rMem; 2747 XMVECTOR vTemp = _mm_setzero_ps(); 2748 // Copy x only 2749 vTemp = _mm_move_ss(vTemp,vValues); 2750 // 2.0f / ViewWidth,0,0,0 2751 M.r[0] = vTemp; 2752 // 0,2.0f / ViewHeight,0,0 2753 vTemp = vValues; 2754 vTemp = _mm_and_ps(vTemp,g_XMMaskY); 2755 M.r[1] = vTemp; 2756 // x=fRange,y=fRange * NearZ,0,1.0f 2757 vTemp = _mm_setzero_ps(); 2758 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2)); 2759 // 0,0,fRange,0.0f 2760 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0)); 2761 M.r[2] = vTemp; 2762 // 0,0,fRange * NearZ,1.0f 2763 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0)); 2764 M.r[3] = vTemp; 2765 return M; 2766#else // _XM_VMX128_INTRINSICS_ 2767#endif // _XM_VMX128_INTRINSICS_ 2768} 2769 2770//------------------------------------------------------------------------------ 2771 2772inline XMMATRIX XMMatrixOrthographicOffCenterLH 2773( 2774 float ViewLeft, 2775 float ViewRight, 2776 float ViewBottom, 2777 float ViewTop, 2778 float NearZ, 2779 float FarZ 2780) 2781{ 2782 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); 2783 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); 2784 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); 2785 2786#if defined(_XM_NO_INTRINSICS_) 2787 2788 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2789 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2790 float fRange = 1.0f / (FarZ-NearZ); 2791 2792 XMMATRIX M; 2793 M.m[0][0] = ReciprocalWidth + ReciprocalWidth; 2794 M.m[0][1] = 0.0f; 2795 M.m[0][2] = 0.0f; 2796 M.m[0][3] = 0.0f; 2797 2798 M.m[1][0] = 0.0f; 2799 M.m[1][1] = ReciprocalHeight + ReciprocalHeight; 2800 M.m[1][2] = 0.0f; 2801 M.m[1][3] = 0.0f; 2802 2803 M.m[2][0] = 0.0f; 2804 M.m[2][1] = 0.0f; 2805 M.m[2][2] = fRange; 2806 M.m[2][3] = 0.0f; 2807 2808 M.m[3][0] = -(ViewLeft + ViewRight) * ReciprocalWidth; 2809 M.m[3][1] = -(ViewTop + ViewBottom) * ReciprocalHeight; 2810 M.m[3][2] = -fRange * NearZ; 2811 M.m[3][3] = 1.0f; 2812 return M; 2813 2814#elif defined(_XM_ARM_NEON_INTRINSICS_) 2815 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2816 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2817 float fRange = 1.0f / (FarZ-NearZ); 2818 const XMVECTOR Zero = vdupq_n_f32(0); 2819 XMMATRIX M; 2820 M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 ); 2821 M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 ); 2822 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 ); 2823 M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, 2824 -(ViewTop + ViewBottom) * ReciprocalHeight, 2825 -fRange * NearZ, 2826 1.0f); 2827 return M; 2828#elif defined(_XM_SSE_INTRINSICS_) 2829 XMMATRIX M; 2830 float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2831 float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2832 float fRange = 1.0f / (FarZ-NearZ); 2833 // Note: This is recorded on the stack 2834 XMVECTOR rMem = { 2835 fReciprocalWidth, 2836 fReciprocalHeight, 2837 fRange, 2838 1.0f 2839 }; 2840 XMVECTOR rMem2 = { 2841 -(ViewLeft + ViewRight), 2842 -(ViewTop + ViewBottom), 2843 -NearZ, 2844 1.0f 2845 }; 2846 // Copy from memory to SSE register 2847 XMVECTOR vValues = rMem; 2848 XMVECTOR vTemp = _mm_setzero_ps(); 2849 // Copy x only 2850 vTemp = _mm_move_ss(vTemp,vValues); 2851 // fReciprocalWidth*2,0,0,0 2852 vTemp = _mm_add_ss(vTemp,vTemp); 2853 M.r[0] = vTemp; 2854 // 0,fReciprocalHeight*2,0,0 2855 vTemp = vValues; 2856 vTemp = _mm_and_ps(vTemp,g_XMMaskY); 2857 vTemp = _mm_add_ps(vTemp,vTemp); 2858 M.r[1] = vTemp; 2859 // 0,0,fRange,0.0f 2860 vTemp = vValues; 2861 vTemp = _mm_and_ps(vTemp,g_XMMaskZ); 2862 M.r[2] = vTemp; 2863 // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f 2864 vValues = _mm_mul_ps(vValues,rMem2); 2865 M.r[3] = vValues; 2866 return M; 2867#else // _XM_VMX128_INTRINSICS_ 2868#endif // _XM_VMX128_INTRINSICS_ 2869} 2870 2871//------------------------------------------------------------------------------ 2872 2873inline XMMATRIX XMMatrixOrthographicOffCenterRH 2874( 2875 float ViewLeft, 2876 float ViewRight, 2877 float ViewBottom, 2878 float ViewTop, 2879 float NearZ, 2880 float FarZ 2881) 2882{ 2883 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f)); 2884 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f)); 2885 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f)); 2886 2887#if defined(_XM_NO_INTRINSICS_) 2888 2889 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2890 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2891 float fRange = 1.0f / (NearZ-FarZ); 2892 2893 XMMATRIX M; 2894 M.m[0][0] = ReciprocalWidth + ReciprocalWidth; 2895 M.m[0][1] = 0.0f; 2896 M.m[0][2] = 0.0f; 2897 M.m[0][3] = 0.0f; 2898 2899 M.m[1][0] = 0.0f; 2900 M.m[1][1] = ReciprocalHeight + ReciprocalHeight; 2901 M.m[1][2] = 0.0f; 2902 M.m[1][3] = 0.0f; 2903 2904 M.m[2][0] = 0.0f; 2905 M.m[2][1] = 0.0f; 2906 M.m[2][2] = fRange; 2907 M.m[2][3] = 0.0f; 2908 2909 M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, 2910 -(ViewTop + ViewBottom) * ReciprocalHeight, 2911 fRange * NearZ, 2912 1.0f); 2913 return M; 2914 2915#elif defined(_XM_ARM_NEON_INTRINSICS_) 2916 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2917 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2918 float fRange = 1.0f / (NearZ-FarZ); 2919 const XMVECTOR Zero = vdupq_n_f32(0); 2920 XMMATRIX M; 2921 M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 ); 2922 M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 ); 2923 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 ); 2924 M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth, 2925 -(ViewTop + ViewBottom) * ReciprocalHeight, 2926 fRange * NearZ, 2927 1.0f); 2928 return M; 2929#elif defined(_XM_SSE_INTRINSICS_) 2930 XMMATRIX M; 2931 float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft); 2932 float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom); 2933 float fRange = 1.0f / (NearZ-FarZ); 2934 // Note: This is recorded on the stack 2935 XMVECTOR rMem = { 2936 fReciprocalWidth, 2937 fReciprocalHeight, 2938 fRange, 2939 1.0f 2940 }; 2941 XMVECTOR rMem2 = { 2942 -(ViewLeft + ViewRight), 2943 -(ViewTop + ViewBottom), 2944 NearZ, 2945 1.0f 2946 }; 2947 // Copy from memory to SSE register 2948 XMVECTOR vValues = rMem; 2949 XMVECTOR vTemp = _mm_setzero_ps(); 2950 // Copy x only 2951 vTemp = _mm_move_ss(vTemp,vValues); 2952 // fReciprocalWidth*2,0,0,0 2953 vTemp = _mm_add_ss(vTemp,vTemp); 2954 M.r[0] = vTemp; 2955 // 0,fReciprocalHeight*2,0,0 2956 vTemp = vValues; 2957 vTemp = _mm_and_ps(vTemp,g_XMMaskY); 2958 vTemp = _mm_add_ps(vTemp,vTemp); 2959 M.r[1] = vTemp; 2960 // 0,0,fRange,0.0f 2961 vTemp = vValues; 2962 vTemp = _mm_and_ps(vTemp,g_XMMaskZ); 2963 M.r[2] = vTemp; 2964 // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f 2965 vValues = _mm_mul_ps(vValues,rMem2); 2966 M.r[3] = vValues; 2967 return M; 2968#else // _XM_VMX128_INTRINSICS_ 2969#endif // _XM_VMX128_INTRINSICS_ 2970} 2971 2972 2973/**************************************************************************** 2974 * 2975 * XMMATRIX operators and methods 2976 * 2977 ****************************************************************************/ 2978 2979//------------------------------------------------------------------------------ 2980 2981inline XMMATRIX::XMMATRIX 2982( 2983 float m00, float m01, float m02, float m03, 2984 float m10, float m11, float m12, float m13, 2985 float m20, float m21, float m22, float m23, 2986 float m30, float m31, float m32, float m33 2987) 2988{ 2989 r[0] = XMVectorSet(m00, m01, m02, m03); 2990 r[1] = XMVectorSet(m10, m11, m12, m13); 2991 r[2] = XMVectorSet(m20, m21, m22, m23); 2992 r[3] = XMVectorSet(m30, m31, m32, m33); 2993} 2994 2995//------------------------------------------------------------------------------ 2996_Use_decl_annotations_ 2997inline XMMATRIX::XMMATRIX 2998( 2999 const float* pArray 3000) 3001{ 3002 assert( pArray != NULL ); 3003 r[0] = XMLoadFloat4((const XMFLOAT4*)pArray); 3004 r[1] = XMLoadFloat4((const XMFLOAT4*)(pArray + 4)); 3005 r[2] = XMLoadFloat4((const XMFLOAT4*)(pArray + 8)); 3006 r[3] = XMLoadFloat4((const XMFLOAT4*)(pArray + 12)); 3007} 3008 3009//------------------------------------------------------------------------------ 3010 3011inline XMMATRIX XMMATRIX::operator- () const 3012{ 3013 XMMATRIX R; 3014 R.r[0] = XMVectorNegate( r[0] ); 3015 R.r[1] = XMVectorNegate( r[1] ); 3016 R.r[2] = XMVectorNegate( r[2] ); 3017 R.r[3] = XMVectorNegate( r[3] ); 3018 return R; 3019} 3020 3021//------------------------------------------------------------------------------ 3022 3023inline XMMATRIX& XMMATRIX::operator+= (CXMMATRIX M) 3024{ 3025 r[0] = XMVectorAdd( r[0], M.r[0] ); 3026 r[1] = XMVectorAdd( r[1], M.r[1] ); 3027 r[2] = XMVectorAdd( r[2], M.r[2] ); 3028 r[3] = XMVectorAdd( r[3], M.r[3] ); 3029 return *this; 3030} 3031 3032//------------------------------------------------------------------------------ 3033 3034inline XMMATRIX& XMMATRIX::operator-= (CXMMATRIX M) 3035{ 3036 r[0] = XMVectorSubtract( r[0], M.r[0] ); 3037 r[1] = XMVectorSubtract( r[1], M.r[1] ); 3038 r[2] = XMVectorSubtract( r[2], M.r[2] ); 3039 r[3] = XMVectorSubtract( r[3], M.r[3] ); 3040 return *this; 3041} 3042 3043//------------------------------------------------------------------------------ 3044 3045inline XMMATRIX& XMMATRIX::operator*=(CXMMATRIX M) 3046{ 3047 *this = XMMatrixMultiply( *this, M ); 3048 return *this; 3049} 3050 3051//------------------------------------------------------------------------------ 3052 3053inline XMMATRIX& XMMATRIX::operator*= (float S) 3054{ 3055 r[0] = XMVectorScale( r[0], S ); 3056 r[1] = XMVectorScale( r[1], S ); 3057 r[2] = XMVectorScale( r[2], S ); 3058 r[3] = XMVectorScale( r[3], S ); 3059 return *this; 3060} 3061 3062//------------------------------------------------------------------------------ 3063 3064inline XMMATRIX& XMMATRIX::operator/= (float S) 3065{ 3066 assert( S != 0.0f ); 3067 float t = 1.0f / S; 3068 r[0] = XMVectorScale( r[0], t ); 3069 r[1] = XMVectorScale( r[1], t ); 3070 r[2] = XMVectorScale( r[2], t ); 3071 r[3] = XMVectorScale( r[3], t ); 3072 return *this; 3073} 3074 3075//------------------------------------------------------------------------------ 3076 3077inline XMMATRIX XMMATRIX::operator+ (CXMMATRIX M) const 3078{ 3079 XMMATRIX R; 3080 R.r[0] = XMVectorAdd( r[0], M.r[0] ); 3081 R.r[1] = XMVectorAdd( r[1], M.r[1] ); 3082 R.r[2] = XMVectorAdd( r[2], M.r[2] ); 3083 R.r[3] = XMVectorAdd( r[3], M.r[3] ); 3084 return R; 3085} 3086 3087//------------------------------------------------------------------------------ 3088 3089inline XMMATRIX XMMATRIX::operator- (CXMMATRIX M) const 3090{ 3091 XMMATRIX R; 3092 R.r[0] = XMVectorSubtract( r[0], M.r[0] ); 3093 R.r[1] = XMVectorSubtract( r[1], M.r[1] ); 3094 R.r[2] = XMVectorSubtract( r[2], M.r[2] ); 3095 R.r[3] = XMVectorSubtract( r[3], M.r[3] ); 3096 return R; 3097} 3098 3099//------------------------------------------------------------------------------ 3100 3101inline XMMATRIX XMMATRIX::operator*(CXMMATRIX M) const 3102{ 3103 return XMMatrixMultiply(*this, M); 3104} 3105 3106//------------------------------------------------------------------------------ 3107 3108inline XMMATRIX XMMATRIX::operator* (float S) const 3109{ 3110 XMMATRIX R; 3111 R.r[0] = XMVectorScale( r[0], S ); 3112 R.r[1] = XMVectorScale( r[1], S ); 3113 R.r[2] = XMVectorScale( r[2], S ); 3114 R.r[3] = XMVectorScale( r[3], S ); 3115 return R; 3116} 3117 3118//------------------------------------------------------------------------------ 3119 3120inline XMMATRIX XMMATRIX::operator/ (float S) const 3121{ 3122 assert( S != 0.0f ); 3123 XMMATRIX R; 3124 float t = 1.0f / S; 3125 R.r[0] = XMVectorScale( r[0], t ); 3126 R.r[1] = XMVectorScale( r[1], t ); 3127 R.r[2] = XMVectorScale( r[2], t ); 3128 R.r[3] = XMVectorScale( r[3], t ); 3129 return R; 3130} 3131 3132//------------------------------------------------------------------------------ 3133 3134inline XMMATRIX operator* 3135( 3136 float S, 3137 CXMMATRIX M 3138) 3139{ 3140 XMMATRIX R; 3141 R.r[0] = XMVectorScale( M.r[0], S ); 3142 R.r[1] = XMVectorScale( M.r[1], S ); 3143 R.r[2] = XMVectorScale( M.r[2], S ); 3144 R.r[3] = XMVectorScale( M.r[3], S ); 3145 return R; 3146} 3147 3148/**************************************************************************** 3149 * 3150 * XMFLOAT3X3 operators 3151 * 3152 ****************************************************************************/ 3153 3154//------------------------------------------------------------------------------ 3155 3156inline XMFLOAT3X3::XMFLOAT3X3 3157( 3158 float m00, float m01, float m02, 3159 float m10, float m11, float m12, 3160 float m20, float m21, float m22 3161) 3162{ 3163 m[0][0] = m00; 3164 m[0][1] = m01; 3165 m[0][2] = m02; 3166 3167 m[1][0] = m10; 3168 m[1][1] = m11; 3169 m[1][2] = m12; 3170 3171 m[2][0] = m20; 3172 m[2][1] = m21; 3173 m[2][2] = m22; 3174} 3175 3176//------------------------------------------------------------------------------ 3177_Use_decl_annotations_ 3178inline XMFLOAT3X3::XMFLOAT3X3 3179( 3180 const float* pArray 3181) 3182{ 3183 assert( pArray != NULL ); 3184 for (size_t Row = 0; Row < 3; Row++) 3185 { 3186 for (size_t Column = 0; Column < 3; Column++) 3187 { 3188 m[Row][Column] = pArray[Row * 3 + Column]; 3189 } 3190 } 3191} 3192 3193//------------------------------------------------------------------------------ 3194 3195inline XMFLOAT3X3& XMFLOAT3X3::operator= 3196( 3197 const XMFLOAT3X3& Float3x3 3198) 3199{ 3200 _11 = Float3x3._11; 3201 _12 = Float3x3._12; 3202 _13 = Float3x3._13; 3203 _21 = Float3x3._21; 3204 _22 = Float3x3._22; 3205 _23 = Float3x3._23; 3206 _31 = Float3x3._31; 3207 _32 = Float3x3._32; 3208 _33 = Float3x3._33; 3209 3210 return *this; 3211} 3212 3213/**************************************************************************** 3214 * 3215 * XMFLOAT4X3 operators 3216 * 3217 ****************************************************************************/ 3218 3219//------------------------------------------------------------------------------ 3220 3221inline XMFLOAT4X3::XMFLOAT4X3 3222( 3223 float m00, float m01, float m02, 3224 float m10, float m11, float m12, 3225 float m20, float m21, float m22, 3226 float m30, float m31, float m32 3227) 3228{ 3229 m[0][0] = m00; 3230 m[0][1] = m01; 3231 m[0][2] = m02; 3232 3233 m[1][0] = m10; 3234 m[1][1] = m11; 3235 m[1][2] = m12; 3236 3237 m[2][0] = m20; 3238 m[2][1] = m21; 3239 m[2][2] = m22; 3240 3241 m[3][0] = m30; 3242 m[3][1] = m31; 3243 m[3][2] = m32; 3244} 3245 3246//------------------------------------------------------------------------------ 3247_Use_decl_annotations_ 3248inline XMFLOAT4X3::XMFLOAT4X3 3249( 3250 const float* pArray 3251) 3252{ 3253 assert( pArray != NULL ); 3254 3255 m[0][0] = pArray[0]; 3256 m[0][1] = pArray[1]; 3257 m[0][2] = pArray[2]; 3258 3259 m[1][0] = pArray[3]; 3260 m[1][1] = pArray[4]; 3261 m[1][2] = pArray[5]; 3262 3263 m[2][0] = pArray[6]; 3264 m[2][1] = pArray[7]; 3265 m[2][2] = pArray[8]; 3266 3267 m[3][0] = pArray[9]; 3268 m[3][1] = pArray[10]; 3269 m[3][2] = pArray[11]; 3270} 3271 3272//------------------------------------------------------------------------------ 3273 3274inline XMFLOAT4X3& XMFLOAT4X3::operator= 3275( 3276 const XMFLOAT4X3& Float4x3 3277) 3278{ 3279 XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._11); 3280 XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._22); 3281 XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._33); 3282 3283 XMStoreFloat4((XMFLOAT4*)&_11, V1); 3284 XMStoreFloat4((XMFLOAT4*)&_22, V2); 3285 XMStoreFloat4((XMFLOAT4*)&_33, V3); 3286 3287 return *this; 3288} 3289 3290//------------------------------------------------------------------------------ 3291 3292inline XMFLOAT4X3A& XMFLOAT4X3A::operator= 3293( 3294 const XMFLOAT4X3A& Float4x3 3295) 3296{ 3297 XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._11); 3298 XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._22); 3299 XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._33); 3300 3301 XMStoreFloat4A((XMFLOAT4A*)&_11, V1); 3302 XMStoreFloat4A((XMFLOAT4A*)&_22, V2); 3303 XMStoreFloat4A((XMFLOAT4A*)&_33, V3); 3304 3305 return *this; 3306} 3307 3308/**************************************************************************** 3309 * 3310 * XMFLOAT4X4 operators 3311 * 3312 ****************************************************************************/ 3313 3314//------------------------------------------------------------------------------ 3315 3316inline XMFLOAT4X4::XMFLOAT4X4 3317( 3318 float m00, float m01, float m02, float m03, 3319 float m10, float m11, float m12, float m13, 3320 float m20, float m21, float m22, float m23, 3321 float m30, float m31, float m32, float m33 3322) 3323{ 3324 m[0][0] = m00; 3325 m[0][1] = m01; 3326 m[0][2] = m02; 3327 m[0][3] = m03; 3328 3329 m[1][0] = m10; 3330 m[1][1] = m11; 3331 m[1][2] = m12; 3332 m[1][3] = m13; 3333 3334 m[2][0] = m20; 3335 m[2][1] = m21; 3336 m[2][2] = m22; 3337 m[2][3] = m23; 3338 3339 m[3][0] = m30; 3340 m[3][1] = m31; 3341 m[3][2] = m32; 3342 m[3][3] = m33; 3343} 3344 3345//------------------------------------------------------------------------------ 3346_Use_decl_annotations_ 3347inline XMFLOAT4X4::XMFLOAT4X4 3348( 3349 const float* pArray 3350) 3351{ 3352 assert( pArray != NULL ); 3353 3354 m[0][0] = pArray[0]; 3355 m[0][1] = pArray[1]; 3356 m[0][2] = pArray[2]; 3357 m[0][3] = pArray[3]; 3358 3359 m[1][0] = pArray[4]; 3360 m[1][1] = pArray[5]; 3361 m[1][2] = pArray[6]; 3362 m[1][3] = pArray[7]; 3363 3364 m[2][0] = pArray[8]; 3365 m[2][1] = pArray[9]; 3366 m[2][2] = pArray[10]; 3367 m[2][3] = pArray[11]; 3368 3369 m[3][0] = pArray[12]; 3370 m[3][1] = pArray[13]; 3371 m[3][2] = pArray[14]; 3372 m[3][3] = pArray[15]; 3373} 3374 3375//------------------------------------------------------------------------------ 3376 3377inline XMFLOAT4X4& XMFLOAT4X4::operator= 3378( 3379 const XMFLOAT4X4& Float4x4 3380) 3381{ 3382 XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._11); 3383 XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._21); 3384 XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._31); 3385 XMVECTOR V4 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._41); 3386 3387 XMStoreFloat4((XMFLOAT4*)&_11, V1); 3388 XMStoreFloat4((XMFLOAT4*)&_21, V2); 3389 XMStoreFloat4((XMFLOAT4*)&_31, V3); 3390 XMStoreFloat4((XMFLOAT4*)&_41, V4); 3391 3392 return *this; 3393} 3394 3395//------------------------------------------------------------------------------ 3396 3397inline XMFLOAT4X4A& XMFLOAT4X4A::operator= 3398( 3399 const XMFLOAT4X4A& Float4x4 3400) 3401{ 3402 XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._11); 3403 XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._21); 3404 XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._31); 3405 XMVECTOR V4 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._41); 3406 3407 XMStoreFloat4A((XMFLOAT4A*)&_11, V1); 3408 XMStoreFloat4A((XMFLOAT4A*)&_21, V2); 3409 XMStoreFloat4A((XMFLOAT4A*)&_31, V3); 3410 XMStoreFloat4A((XMFLOAT4A*)&_41, V4); 3411 3412 return *this; 3413} 3414