the game where you go into mines and start crafting! but for consoles (forked directly from smartcmd's github)
1//-------------------------------------------------------------------------------------
2// DirectXMathMatrix.inl -- SIMD C++ Math library
3//
4// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
5// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
6// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
7// PARTICULAR PURPOSE.
8//
9// Copyright (c) Microsoft Corporation. All rights reserved.
10//-------------------------------------------------------------------------------------
11
12#ifdef _MSC_VER
13#pragma once
14#endif
15
16/****************************************************************************
17 *
18 * Matrix
19 *
20 ****************************************************************************/
21
22//------------------------------------------------------------------------------
23// Comparison operations
24//------------------------------------------------------------------------------
25
26//------------------------------------------------------------------------------
27
28// Return true if any entry in the matrix is NaN
29inline bool XMMatrixIsNaN
30(
31 CXMMATRIX M
32)
33{
34#if defined(_XM_NO_INTRINSICS_)
35 size_t i = 16;
36 const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]);
37 do {
38 // Fetch value into integer unit
39 uint32_t uTest = pWork[0];
40 // Remove sign
41 uTest &= 0x7FFFFFFFU;
42 // NaN is 0x7F800001 through 0x7FFFFFFF inclusive
43 uTest -= 0x7F800001U;
44 if (uTest<0x007FFFFFU) {
45 break; // NaN found
46 }
47 ++pWork; // Next entry
48 } while (--i);
49 return (i!=0); // i == 0 if nothing matched
50#elif defined(_XM_ARM_NEON_INTRINSICS_)
51 // Load in registers
52 XMVECTOR vX = M.r[0];
53 XMVECTOR vY = M.r[1];
54 XMVECTOR vZ = M.r[2];
55 XMVECTOR vW = M.r[3];
56 // Test themselves to check for NaN
57 vX = vmvnq_u32(vceqq_f32(vX, vX));
58 vY = vmvnq_u32(vceqq_f32(vY, vY));
59 vZ = vmvnq_u32(vceqq_f32(vZ, vZ));
60 vW = vmvnq_u32(vceqq_f32(vW, vW));
61 // Or all the results
62 vX = vorrq_u32(vX,vZ);
63 vY = vorrq_u32(vY,vW);
64 vX = vorrq_u32(vX,vY);
65 // If any tested true, return true
66 int8x8x2_t vTemp = vzip_u8(vget_low_u8(vX), vget_high_u8(vX));
67 vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
68 uint32_t r = vget_lane_u32(vTemp.val[1], 1);
69 return (r != 0);
70#elif defined(_XM_SSE_INTRINSICS_)
71 // Load in registers
72 XMVECTOR vX = M.r[0];
73 XMVECTOR vY = M.r[1];
74 XMVECTOR vZ = M.r[2];
75 XMVECTOR vW = M.r[3];
76 // Test themselves to check for NaN
77 vX = _mm_cmpneq_ps(vX,vX);
78 vY = _mm_cmpneq_ps(vY,vY);
79 vZ = _mm_cmpneq_ps(vZ,vZ);
80 vW = _mm_cmpneq_ps(vW,vW);
81 // Or all the results
82 vX = _mm_or_ps(vX,vZ);
83 vY = _mm_or_ps(vY,vW);
84 vX = _mm_or_ps(vX,vY);
85 // If any tested true, return true
86 return (_mm_movemask_ps(vX)!=0);
87#else
88#endif
89}
90
91//------------------------------------------------------------------------------
92
93// Return true if any entry in the matrix is +/-INF
94inline bool XMMatrixIsInfinite
95(
96 CXMMATRIX M
97)
98{
99#if defined(_XM_NO_INTRINSICS_)
100 size_t i = 16;
101 const uint32_t *pWork = (const uint32_t *)(&M.m[0][0]);
102 do {
103 // Fetch value into integer unit
104 uint32_t uTest = pWork[0];
105 // Remove sign
106 uTest &= 0x7FFFFFFFU;
107 // INF is 0x7F800000
108 if (uTest==0x7F800000U) {
109 break; // INF found
110 }
111 ++pWork; // Next entry
112 } while (--i);
113 return (i!=0); // i == 0 if nothing matched
114#elif defined(_XM_ARM_NEON_INTRINSICS_)
115 // Mask off the sign bits
116 XMVECTOR vTemp1 = vandq_u32(M.r[0],g_XMAbsMask);
117 XMVECTOR vTemp2 = vandq_u32(M.r[1],g_XMAbsMask);
118 XMVECTOR vTemp3 = vandq_u32(M.r[2],g_XMAbsMask);
119 XMVECTOR vTemp4 = vandq_u32(M.r[3],g_XMAbsMask);
120 // Compare to infinity
121 vTemp1 = vceqq_f32(vTemp1,g_XMInfinity);
122 vTemp2 = vceqq_f32(vTemp2,g_XMInfinity);
123 vTemp3 = vceqq_f32(vTemp3,g_XMInfinity);
124 vTemp4 = vceqq_f32(vTemp4,g_XMInfinity);
125 // Or the answers together
126 vTemp1 = vorrq_u32(vTemp1,vTemp2);
127 vTemp3 = vorrq_u32(vTemp3,vTemp4);
128 vTemp1 = vorrq_u32(vTemp1,vTemp3);
129 // If any are infinity, the signs are true.
130 int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
131 vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
132 uint32_t r = vget_lane_u32(vTemp.val[1], 1);
133 return (r != 0);
134#elif defined(_XM_SSE_INTRINSICS_)
135 // Mask off the sign bits
136 XMVECTOR vTemp1 = _mm_and_ps(M.r[0],g_XMAbsMask);
137 XMVECTOR vTemp2 = _mm_and_ps(M.r[1],g_XMAbsMask);
138 XMVECTOR vTemp3 = _mm_and_ps(M.r[2],g_XMAbsMask);
139 XMVECTOR vTemp4 = _mm_and_ps(M.r[3],g_XMAbsMask);
140 // Compare to infinity
141 vTemp1 = _mm_cmpeq_ps(vTemp1,g_XMInfinity);
142 vTemp2 = _mm_cmpeq_ps(vTemp2,g_XMInfinity);
143 vTemp3 = _mm_cmpeq_ps(vTemp3,g_XMInfinity);
144 vTemp4 = _mm_cmpeq_ps(vTemp4,g_XMInfinity);
145 // Or the answers together
146 vTemp1 = _mm_or_ps(vTemp1,vTemp2);
147 vTemp3 = _mm_or_ps(vTemp3,vTemp4);
148 vTemp1 = _mm_or_ps(vTemp1,vTemp3);
149 // If any are infinity, the signs are true.
150 return (_mm_movemask_ps(vTemp1)!=0);
151#else // _XM_VMX128_INTRINSICS_
152#endif // _XM_VMX128_INTRINSICS_
153}
154
155//------------------------------------------------------------------------------
156
157// Return true if the XMMatrix is equal to identity
158inline bool XMMatrixIsIdentity
159(
160 CXMMATRIX M
161)
162{
163#if defined(_XM_NO_INTRINSICS_)
164 // Use the integer pipeline to reduce branching to a minimum
165 const uint32_t *pWork = (const uint32_t*)(&M.m[0][0]);
166 // Convert 1.0f to zero and or them together
167 uint32_t uOne = pWork[0]^0x3F800000U;
168 // Or all the 0.0f entries together
169 uint32_t uZero = pWork[1];
170 uZero |= pWork[2];
171 uZero |= pWork[3];
172 // 2nd row
173 uZero |= pWork[4];
174 uOne |= pWork[5]^0x3F800000U;
175 uZero |= pWork[6];
176 uZero |= pWork[7];
177 // 3rd row
178 uZero |= pWork[8];
179 uZero |= pWork[9];
180 uOne |= pWork[10]^0x3F800000U;
181 uZero |= pWork[11];
182 // 4th row
183 uZero |= pWork[12];
184 uZero |= pWork[13];
185 uZero |= pWork[14];
186 uOne |= pWork[15]^0x3F800000U;
187 // If all zero entries are zero, the uZero==0
188 uZero &= 0x7FFFFFFF; // Allow -0.0f
189 // If all 1.0f entries are 1.0f, then uOne==0
190 uOne |= uZero;
191 return (uOne==0);
192#elif defined(_XM_ARM_NEON_INTRINSICS_)
193 XMVECTOR vTemp1 = vceqq_f32(M.r[0],g_XMIdentityR0);
194 XMVECTOR vTemp2 = vceqq_f32(M.r[1],g_XMIdentityR1);
195 XMVECTOR vTemp3 = vceqq_f32(M.r[2],g_XMIdentityR2);
196 XMVECTOR vTemp4 = vceqq_f32(M.r[3],g_XMIdentityR3);
197 vTemp1 = vandq_u32(vTemp1,vTemp2);
198 vTemp3 = vandq_u32(vTemp3,vTemp4);
199 vTemp1 = vandq_u32(vTemp1,vTemp3);
200 int8x8x2_t vTemp = vzip_u8(vget_low_u8(vTemp1), vget_high_u8(vTemp1));
201 vTemp = vzip_u16(vTemp.val[0], vTemp.val[1]);
202 uint32_t r = vget_lane_u32(vTemp.val[1], 1);
203 return ( r == 0xFFFFFFFFU );
204#elif defined(_XM_SSE_INTRINSICS_)
205 XMVECTOR vTemp1 = _mm_cmpeq_ps(M.r[0],g_XMIdentityR0);
206 XMVECTOR vTemp2 = _mm_cmpeq_ps(M.r[1],g_XMIdentityR1);
207 XMVECTOR vTemp3 = _mm_cmpeq_ps(M.r[2],g_XMIdentityR2);
208 XMVECTOR vTemp4 = _mm_cmpeq_ps(M.r[3],g_XMIdentityR3);
209 vTemp1 = _mm_and_ps(vTemp1,vTemp2);
210 vTemp3 = _mm_and_ps(vTemp3,vTemp4);
211 vTemp1 = _mm_and_ps(vTemp1,vTemp3);
212 return (_mm_movemask_ps(vTemp1)==0x0f);
213#else // _XM_VMX128_INTRINSICS_
214#endif // _XM_VMX128_INTRINSICS_
215}
216
217//------------------------------------------------------------------------------
218// Computation operations
219//------------------------------------------------------------------------------
220
221//------------------------------------------------------------------------------
222// Perform a 4x4 matrix multiply by a 4x4 matrix
223inline XMMATRIX XMMatrixMultiply
224(
225 CXMMATRIX M1,
226 CXMMATRIX M2
227)
228{
229#if defined(_XM_NO_INTRINSICS_)
230 XMMATRIX mResult;
231 // Cache the invariants in registers
232 float x = M1.m[0][0];
233 float y = M1.m[0][1];
234 float z = M1.m[0][2];
235 float w = M1.m[0][3];
236 // Perform the operation on the first row
237 mResult.m[0][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
238 mResult.m[0][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
239 mResult.m[0][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
240 mResult.m[0][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
241 // Repeat for all the other rows
242 x = M1.m[1][0];
243 y = M1.m[1][1];
244 z = M1.m[1][2];
245 w = M1.m[1][3];
246 mResult.m[1][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
247 mResult.m[1][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
248 mResult.m[1][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
249 mResult.m[1][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
250 x = M1.m[2][0];
251 y = M1.m[2][1];
252 z = M1.m[2][2];
253 w = M1.m[2][3];
254 mResult.m[2][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
255 mResult.m[2][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
256 mResult.m[2][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
257 mResult.m[2][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
258 x = M1.m[3][0];
259 y = M1.m[3][1];
260 z = M1.m[3][2];
261 w = M1.m[3][3];
262 mResult.m[3][0] = (M2.m[0][0]*x)+(M2.m[1][0]*y)+(M2.m[2][0]*z)+(M2.m[3][0]*w);
263 mResult.m[3][1] = (M2.m[0][1]*x)+(M2.m[1][1]*y)+(M2.m[2][1]*z)+(M2.m[3][1]*w);
264 mResult.m[3][2] = (M2.m[0][2]*x)+(M2.m[1][2]*y)+(M2.m[2][2]*z)+(M2.m[3][2]*w);
265 mResult.m[3][3] = (M2.m[0][3]*x)+(M2.m[1][3]*y)+(M2.m[2][3]*z)+(M2.m[3][3]*w);
266 return mResult;
267#elif defined(_XM_ARM_NEON_INTRINSICS_)
268 XMMATRIX mResult;
269 __n64 VL = vget_low_f32( M1.r[0] );
270 __n64 VH = vget_high_f32( M1.r[0] );
271 // Splat the component X,Y,Z then W
272 XMVECTOR vX = vdupq_lane_f32(VL, 0);
273 XMVECTOR vY = vdupq_lane_f32(VL, 1);
274 XMVECTOR vZ = vdupq_lane_f32(VH, 0);
275 XMVECTOR vW = vdupq_lane_f32(VH, 1);
276 // Perform the operation on the first row
277 vX = vmulq_f32(vX,M2.r[0]);
278 vY = vmulq_f32(vY,M2.r[1]);
279 vZ = vmlaq_f32(vX,vZ,M2.r[2]);
280 vW = vmlaq_f32(vY,vW,M2.r[3]);
281 mResult.r[0] = vaddq_f32( vZ, vW );
282 // Repeat for the other 3 rows
283 VL = vget_low_f32( M1.r[1] );
284 VH = vget_high_f32( M1.r[1] );
285 vX = vdupq_lane_f32(VL, 0);
286 vY = vdupq_lane_f32(VL, 1);
287 vZ = vdupq_lane_f32(VH, 0);
288 vW = vdupq_lane_f32(VH, 1);
289 vX = vmulq_f32(vX,M2.r[0]);
290 vY = vmulq_f32(vY,M2.r[1]);
291 vZ = vmlaq_f32(vX,vZ,M2.r[2]);
292 vW = vmlaq_f32(vY,vW,M2.r[3]);
293 mResult.r[1] = vaddq_f32( vZ, vW );
294 VL = vget_low_f32( M1.r[2] );
295 VH = vget_high_f32( M1.r[2] );
296 vX = vdupq_lane_f32(VL, 0);
297 vY = vdupq_lane_f32(VL, 1);
298 vZ = vdupq_lane_f32(VH, 0);
299 vW = vdupq_lane_f32(VH, 1);
300 vX = vmulq_f32(vX,M2.r[0]);
301 vY = vmulq_f32(vY,M2.r[1]);
302 vZ = vmlaq_f32(vX,vZ,M2.r[2]);
303 vW = vmlaq_f32(vY,vW,M2.r[3]);
304 mResult.r[2] = vaddq_f32( vZ, vW );
305 VL = vget_low_f32( M1.r[3] );
306 VH = vget_high_f32( M1.r[3] );
307 vX = vdupq_lane_f32(VL, 0);
308 vY = vdupq_lane_f32(VL, 1);
309 vZ = vdupq_lane_f32(VH, 0);
310 vW = vdupq_lane_f32(VH, 1);
311 vX = vmulq_f32(vX,M2.r[0]);
312 vY = vmulq_f32(vY,M2.r[1]);
313 vZ = vmlaq_f32(vX,vZ,M2.r[2]);
314 vW = vmlaq_f32(vY,vW,M2.r[3]);
315 mResult.r[3] = vaddq_f32( vZ, vW );
316 return mResult;
317#elif defined(_XM_SSE_INTRINSICS_)
318 XMMATRIX mResult;
319 // Use vW to hold the original row
320 XMVECTOR vW = M1.r[0];
321 // Splat the component X,Y,Z then W
322 XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
323 XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
324 XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
325 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
326 // Perform the operation on the first row
327 vX = _mm_mul_ps(vX,M2.r[0]);
328 vY = _mm_mul_ps(vY,M2.r[1]);
329 vZ = _mm_mul_ps(vZ,M2.r[2]);
330 vW = _mm_mul_ps(vW,M2.r[3]);
331 // Perform a binary add to reduce cumulative errors
332 vX = _mm_add_ps(vX,vZ);
333 vY = _mm_add_ps(vY,vW);
334 vX = _mm_add_ps(vX,vY);
335 mResult.r[0] = vX;
336 // Repeat for the other 3 rows
337 vW = M1.r[1];
338 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
339 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
340 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
341 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
342 vX = _mm_mul_ps(vX,M2.r[0]);
343 vY = _mm_mul_ps(vY,M2.r[1]);
344 vZ = _mm_mul_ps(vZ,M2.r[2]);
345 vW = _mm_mul_ps(vW,M2.r[3]);
346 vX = _mm_add_ps(vX,vZ);
347 vY = _mm_add_ps(vY,vW);
348 vX = _mm_add_ps(vX,vY);
349 mResult.r[1] = vX;
350 vW = M1.r[2];
351 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
352 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
353 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
354 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
355 vX = _mm_mul_ps(vX,M2.r[0]);
356 vY = _mm_mul_ps(vY,M2.r[1]);
357 vZ = _mm_mul_ps(vZ,M2.r[2]);
358 vW = _mm_mul_ps(vW,M2.r[3]);
359 vX = _mm_add_ps(vX,vZ);
360 vY = _mm_add_ps(vY,vW);
361 vX = _mm_add_ps(vX,vY);
362 mResult.r[2] = vX;
363 vW = M1.r[3];
364 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
365 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
366 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
367 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
368 vX = _mm_mul_ps(vX,M2.r[0]);
369 vY = _mm_mul_ps(vY,M2.r[1]);
370 vZ = _mm_mul_ps(vZ,M2.r[2]);
371 vW = _mm_mul_ps(vW,M2.r[3]);
372 vX = _mm_add_ps(vX,vZ);
373 vY = _mm_add_ps(vY,vW);
374 vX = _mm_add_ps(vX,vY);
375 mResult.r[3] = vX;
376 return mResult;
377#else // _XM_VMX128_INTRINSICS_
378#endif // _XM_VMX128_INTRINSICS_
379}
380
381//------------------------------------------------------------------------------
382
383inline XMMATRIX XMMatrixMultiplyTranspose
384(
385 CXMMATRIX M1,
386 CXMMATRIX M2
387)
388{
389#if defined(_XM_NO_INTRINSICS_)
390 XMMATRIX mResult;
391 // Cache the invariants in registers
392 float x = M2.m[0][0];
393 float y = M2.m[1][0];
394 float z = M2.m[2][0];
395 float w = M2.m[3][0];
396 // Perform the operation on the first row
397 mResult.m[0][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
398 mResult.m[0][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
399 mResult.m[0][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
400 mResult.m[0][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
401 // Repeat for all the other rows
402 x = M2.m[0][1];
403 y = M2.m[1][1];
404 z = M2.m[2][1];
405 w = M2.m[3][1];
406 mResult.m[1][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
407 mResult.m[1][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
408 mResult.m[1][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
409 mResult.m[1][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
410 x = M2.m[0][2];
411 y = M2.m[1][2];
412 z = M2.m[2][2];
413 w = M2.m[3][2];
414 mResult.m[2][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
415 mResult.m[2][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
416 mResult.m[2][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
417 mResult.m[2][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
418 x = M2.m[0][3];
419 y = M2.m[1][3];
420 z = M2.m[2][3];
421 w = M2.m[3][3];
422 mResult.m[3][0] = (M1.m[0][0]*x)+(M1.m[0][1]*y)+(M1.m[0][2]*z)+(M1.m[0][3]*w);
423 mResult.m[3][1] = (M1.m[1][0]*x)+(M1.m[1][1]*y)+(M1.m[1][2]*z)+(M1.m[1][3]*w);
424 mResult.m[3][2] = (M1.m[2][0]*x)+(M1.m[2][1]*y)+(M1.m[2][2]*z)+(M1.m[2][3]*w);
425 mResult.m[3][3] = (M1.m[3][0]*x)+(M1.m[3][1]*y)+(M1.m[3][2]*z)+(M1.m[3][3]*w);
426 return mResult;
427#elif defined(_XM_ARM_NEON_INTRINSICS_)
428 __n64 VL = vget_low_f32( M1.r[0] );
429 __n64 VH = vget_high_f32( M1.r[0] );
430 // Splat the component X,Y,Z then W
431 XMVECTOR vX = vdupq_lane_f32(VL, 0);
432 XMVECTOR vY = vdupq_lane_f32(VL, 1);
433 XMVECTOR vZ = vdupq_lane_f32(VH, 0);
434 XMVECTOR vW = vdupq_lane_f32(VH, 1);
435 // Perform the operation on the first row
436 vX = vmulq_f32(vX,M2.r[0]);
437 vY = vmulq_f32(vY,M2.r[1]);
438 vZ = vmlaq_f32(vX,vZ,M2.r[2]);
439 vW = vmlaq_f32(vY,vW,M2.r[3]);
440 __n128 r0 = vaddq_f32( vZ, vW );
441 // Repeat for the other 3 rows
442 VL = vget_low_f32( M1.r[1] );
443 VH = vget_high_f32( M1.r[1] );
444 vX = vdupq_lane_f32(VL, 0);
445 vY = vdupq_lane_f32(VL, 1);
446 vZ = vdupq_lane_f32(VH, 0);
447 vW = vdupq_lane_f32(VH, 1);
448 vX = vmulq_f32(vX,M2.r[0]);
449 vY = vmulq_f32(vY,M2.r[1]);
450 vZ = vmlaq_f32(vX,vZ,M2.r[2]);
451 vW = vmlaq_f32(vY,vW,M2.r[3]);
452 __n128 r1 = vaddq_f32( vZ, vW );
453 VL = vget_low_f32( M1.r[2] );
454 VH = vget_high_f32( M1.r[2] );
455 vX = vdupq_lane_f32(VL, 0);
456 vY = vdupq_lane_f32(VL, 1);
457 vZ = vdupq_lane_f32(VH, 0);
458 vW = vdupq_lane_f32(VH, 1);
459 vX = vmulq_f32(vX,M2.r[0]);
460 vY = vmulq_f32(vY,M2.r[1]);
461 vZ = vmlaq_f32(vX,vZ,M2.r[2]);
462 vW = vmlaq_f32(vY,vW,M2.r[3]);
463 __n128 r2 = vaddq_f32( vZ, vW );
464 VL = vget_low_f32( M1.r[3] );
465 VH = vget_high_f32( M1.r[3] );
466 vX = vdupq_lane_f32(VL, 0);
467 vY = vdupq_lane_f32(VL, 1);
468 vZ = vdupq_lane_f32(VH, 0);
469 vW = vdupq_lane_f32(VH, 1);
470 vX = vmulq_f32(vX,M2.r[0]);
471 vY = vmulq_f32(vY,M2.r[1]);
472 vZ = vmlaq_f32(vX,vZ,M2.r[2]);
473 vW = vmlaq_f32(vY,vW,M2.r[3]);
474 __n128 r3 = vaddq_f32( vZ, vW );
475
476 // Transpose result
477 float32x4x2_t P0 = vzipq_f32( r0, r2 );
478 float32x4x2_t P1 = vzipq_f32( r1, r3 );
479
480 float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
481 float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
482
483 XMMATRIX mResult;
484 mResult.r[0] = T0.val[0];
485 mResult.r[1] = T0.val[1];
486 mResult.r[2] = T1.val[0];
487 mResult.r[3] = T1.val[1];
488 return mResult;
489#elif defined(_XM_SSE_INTRINSICS_)
490 // Use vW to hold the original row
491 XMVECTOR vW = M1.r[0];
492 // Splat the component X,Y,Z then W
493 XMVECTOR vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
494 XMVECTOR vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
495 XMVECTOR vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
496 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
497 // Perform the operation on the first row
498 vX = _mm_mul_ps(vX,M2.r[0]);
499 vY = _mm_mul_ps(vY,M2.r[1]);
500 vZ = _mm_mul_ps(vZ,M2.r[2]);
501 vW = _mm_mul_ps(vW,M2.r[3]);
502 // Perform a binary add to reduce cumulative errors
503 vX = _mm_add_ps(vX,vZ);
504 vY = _mm_add_ps(vY,vW);
505 vX = _mm_add_ps(vX,vY);
506 __m128 r0 = vX;
507 // Repeat for the other 3 rows
508 vW = M1.r[1];
509 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
510 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
511 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
512 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
513 vX = _mm_mul_ps(vX,M2.r[0]);
514 vY = _mm_mul_ps(vY,M2.r[1]);
515 vZ = _mm_mul_ps(vZ,M2.r[2]);
516 vW = _mm_mul_ps(vW,M2.r[3]);
517 vX = _mm_add_ps(vX,vZ);
518 vY = _mm_add_ps(vY,vW);
519 vX = _mm_add_ps(vX,vY);
520 __m128 r1 = vX;
521 vW = M1.r[2];
522 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
523 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
524 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
525 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
526 vX = _mm_mul_ps(vX,M2.r[0]);
527 vY = _mm_mul_ps(vY,M2.r[1]);
528 vZ = _mm_mul_ps(vZ,M2.r[2]);
529 vW = _mm_mul_ps(vW,M2.r[3]);
530 vX = _mm_add_ps(vX,vZ);
531 vY = _mm_add_ps(vY,vW);
532 vX = _mm_add_ps(vX,vY);
533 __m128 r2 = vX;
534 vW = M1.r[3];
535 vX = XM_PERMUTE_PS(vW,_MM_SHUFFLE(0,0,0,0));
536 vY = XM_PERMUTE_PS(vW,_MM_SHUFFLE(1,1,1,1));
537 vZ = XM_PERMUTE_PS(vW,_MM_SHUFFLE(2,2,2,2));
538 vW = XM_PERMUTE_PS(vW,_MM_SHUFFLE(3,3,3,3));
539 vX = _mm_mul_ps(vX,M2.r[0]);
540 vY = _mm_mul_ps(vY,M2.r[1]);
541 vZ = _mm_mul_ps(vZ,M2.r[2]);
542 vW = _mm_mul_ps(vW,M2.r[3]);
543 vX = _mm_add_ps(vX,vZ);
544 vY = _mm_add_ps(vY,vW);
545 vX = _mm_add_ps(vX,vY);
546 __m128 r3 = vX;
547
548 // x.x,x.y,y.x,y.y
549 XMVECTOR vTemp1 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(1,0,1,0));
550 // x.z,x.w,y.z,y.w
551 XMVECTOR vTemp3 = _mm_shuffle_ps(r0,r1,_MM_SHUFFLE(3,2,3,2));
552 // z.x,z.y,w.x,w.y
553 XMVECTOR vTemp2 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(1,0,1,0));
554 // z.z,z.w,w.z,w.w
555 XMVECTOR vTemp4 = _mm_shuffle_ps(r2,r3,_MM_SHUFFLE(3,2,3,2));
556
557 XMMATRIX mResult;
558 // x.x,y.x,z.x,w.x
559 mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
560 // x.y,y.y,z.y,w.y
561 mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
562 // x.z,y.z,z.z,w.z
563 mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
564 // x.w,y.w,z.w,w.w
565 mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
566 return mResult;
567#else // _XM_VMX128_INTRINSICS_
568#endif // _XM_VMX128_INTRINSICS_
569}
570
571//------------------------------------------------------------------------------
572
573inline XMMATRIX XMMatrixTranspose
574(
575 CXMMATRIX M
576)
577{
578#if defined(_XM_NO_INTRINSICS_)
579
580 // Original matrix:
581 //
582 // m00m01m02m03
583 // m10m11m12m13
584 // m20m21m22m23
585 // m30m31m32m33
586
587 XMMATRIX P;
588 P.r[0] = XMVectorMergeXY(M.r[0], M.r[2]); // m00m20m01m21
589 P.r[1] = XMVectorMergeXY(M.r[1], M.r[3]); // m10m30m11m31
590 P.r[2] = XMVectorMergeZW(M.r[0], M.r[2]); // m02m22m03m23
591 P.r[3] = XMVectorMergeZW(M.r[1], M.r[3]); // m12m32m13m33
592
593 XMMATRIX MT;
594 MT.r[0] = XMVectorMergeXY(P.r[0], P.r[1]); // m00m10m20m30
595 MT.r[1] = XMVectorMergeZW(P.r[0], P.r[1]); // m01m11m21m31
596 MT.r[2] = XMVectorMergeXY(P.r[2], P.r[3]); // m02m12m22m32
597 MT.r[3] = XMVectorMergeZW(P.r[2], P.r[3]); // m03m13m23m33
598 return MT;
599
600#elif defined(_XM_ARM_NEON_INTRINSICS_)
601 float32x4x2_t P0 = vzipq_f32( M.r[0], M.r[2] );
602 float32x4x2_t P1 = vzipq_f32( M.r[1], M.r[3] );
603
604 float32x4x2_t T0 = vzipq_f32( P0.val[0], P1.val[0] );
605 float32x4x2_t T1 = vzipq_f32( P0.val[1], P1.val[1] );
606
607 XMMATRIX mResult;
608 mResult.r[0] = T0.val[0];
609 mResult.r[1] = T0.val[1];
610 mResult.r[2] = T1.val[0];
611 mResult.r[3] = T1.val[1];
612 return mResult;
613#elif defined(_XM_SSE_INTRINSICS_)
614 // x.x,x.y,y.x,y.y
615 XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(1,0,1,0));
616 // x.z,x.w,y.z,y.w
617 XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0],M.r[1],_MM_SHUFFLE(3,2,3,2));
618 // z.x,z.y,w.x,w.y
619 XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(1,0,1,0));
620 // z.z,z.w,w.z,w.w
621 XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2],M.r[3],_MM_SHUFFLE(3,2,3,2));
622 XMMATRIX mResult;
623
624 // x.x,y.x,z.x,w.x
625 mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(2,0,2,0));
626 // x.y,y.y,z.y,w.y
627 mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2,_MM_SHUFFLE(3,1,3,1));
628 // x.z,y.z,z.z,w.z
629 mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(2,0,2,0));
630 // x.w,y.w,z.w,w.w
631 mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4,_MM_SHUFFLE(3,1,3,1));
632 return mResult;
633#else // _XM_VMX128_INTRINSICS_
634#endif // _XM_VMX128_INTRINSICS_
635}
636
637//------------------------------------------------------------------------------
638// Return the inverse and the determinant of a 4x4 matrix
639_Use_decl_annotations_
640inline XMMATRIX XMMatrixInverse
641(
642 XMVECTOR* pDeterminant,
643 CXMMATRIX M
644)
645{
646#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
647
648 XMMATRIX MT = XMMatrixTranspose(M);
649
650 XMVECTOR V0[4], V1[4];
651 V0[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[2]);
652 V1[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[3]);
653 V0[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[0]);
654 V1[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[1]);
655 V0[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[2], MT.r[0]);
656 V1[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[3], MT.r[1]);
657
658 XMVECTOR D0 = XMVectorMultiply(V0[0], V1[0]);
659 XMVECTOR D1 = XMVectorMultiply(V0[1], V1[1]);
660 XMVECTOR D2 = XMVectorMultiply(V0[2], V1[2]);
661
662 V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[2]);
663 V1[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[3]);
664 V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[0]);
665 V1[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[1]);
666 V0[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[2], MT.r[0]);
667 V1[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[3], MT.r[1]);
668
669 D0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], D0);
670 D1 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], D1);
671 D2 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], D2);
672
673 V0[0] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[1]);
674 V1[0] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D0, D2);
675 V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[0]);
676 V1[1] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D0, D2);
677 V0[2] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[3]);
678 V1[2] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D1, D2);
679 V0[3] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[2]);
680 V1[3] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D1, D2);
681
682 XMVECTOR C0 = XMVectorMultiply(V0[0], V1[0]);
683 XMVECTOR C2 = XMVectorMultiply(V0[1], V1[1]);
684 XMVECTOR C4 = XMVectorMultiply(V0[2], V1[2]);
685 XMVECTOR C6 = XMVectorMultiply(V0[3], V1[3]);
686
687 V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[1]);
688 V1[0] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(D0, D2);
689 V0[1] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[0]);
690 V1[1] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0X>(D0, D2);
691 V0[2] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[3]);
692 V1[2] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1Z>(D1, D2);
693 V0[3] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[2]);
694 V1[3] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(D1, D2);
695
696 C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
697 C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
698 C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
699 C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
700
701 V0[0] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[1]);
702 V1[0] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1X, XM_PERMUTE_0Z>(D0, D2);
703 V0[1] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[0]);
704 V1[1] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1X>(D0, D2);
705 V0[2] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[3]);
706 V1[2] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1W, XM_PERMUTE_1Z, XM_PERMUTE_0Z>(D1, D2);
707 V0[3] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[2]);
708 V1[3] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z>(D1, D2);
709
710 XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
711 C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0);
712 XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2);
713 C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
714 XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
715 C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4);
716 XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6);
717 C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
718
719 XMMATRIX R;
720 R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v);
721 R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v);
722 R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v);
723 R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v);
724
725 XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]);
726
727 if (pDeterminant != NULL)
728 *pDeterminant = Determinant;
729
730 XMVECTOR Reciprocal = XMVectorReciprocal(Determinant);
731
732 XMMATRIX Result;
733 Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal);
734 Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal);
735 Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal);
736 Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal);
737 return Result;
738
739#elif defined(_XM_SSE_INTRINSICS_)
740 XMMATRIX MT = XMMatrixTranspose(M);
741 XMVECTOR V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,1,0,0));
742 XMVECTOR V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(3,2,3,2));
743 XMVECTOR V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(1,1,0,0));
744 XMVECTOR V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(3,2,3,2));
745 XMVECTOR V02 = _mm_shuffle_ps(MT.r[2], MT.r[0],_MM_SHUFFLE(2,0,2,0));
746 XMVECTOR V12 = _mm_shuffle_ps(MT.r[3], MT.r[1],_MM_SHUFFLE(3,1,3,1));
747
748 XMVECTOR D0 = _mm_mul_ps(V00,V10);
749 XMVECTOR D1 = _mm_mul_ps(V01,V11);
750 XMVECTOR D2 = _mm_mul_ps(V02,V12);
751
752 V00 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(3,2,3,2));
753 V10 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(1,1,0,0));
754 V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(3,2,3,2));
755 V11 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(1,1,0,0));
756 V02 = _mm_shuffle_ps(MT.r[2],MT.r[0],_MM_SHUFFLE(3,1,3,1));
757 V12 = _mm_shuffle_ps(MT.r[3],MT.r[1],_MM_SHUFFLE(2,0,2,0));
758
759 V00 = _mm_mul_ps(V00,V10);
760 V01 = _mm_mul_ps(V01,V11);
761 V02 = _mm_mul_ps(V02,V12);
762 D0 = _mm_sub_ps(D0,V00);
763 D1 = _mm_sub_ps(D1,V01);
764 D2 = _mm_sub_ps(D2,V02);
765 // V11 = D0Y,D0W,D2Y,D2Y
766 V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,1,3,1));
767 V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1,0,2,1));
768 V10 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(0,3,0,2));
769 V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(0,1,0,2));
770 V11 = _mm_shuffle_ps(V11,D0,_MM_SHUFFLE(2,1,2,1));
771 // V13 = D1Y,D1W,D2W,D2W
772 XMVECTOR V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,3,3,1));
773 V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1,0,2,1));
774 V12 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(0,3,0,2));
775 XMVECTOR V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(0,1,0,2));
776 V13 = _mm_shuffle_ps(V13,D1,_MM_SHUFFLE(2,1,2,1));
777
778 XMVECTOR C0 = _mm_mul_ps(V00,V10);
779 XMVECTOR C2 = _mm_mul_ps(V01,V11);
780 XMVECTOR C4 = _mm_mul_ps(V02,V12);
781 XMVECTOR C6 = _mm_mul_ps(V03,V13);
782
783 // V11 = D0X,D0Y,D2X,D2X
784 V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(0,0,1,0));
785 V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(2,1,3,2));
786 V10 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(2,1,0,3));
787 V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1,3,2,3));
788 V11 = _mm_shuffle_ps(D0,V11,_MM_SHUFFLE(0,2,1,2));
789 // V13 = D1X,D1Y,D2Z,D2Z
790 V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(2,2,1,0));
791 V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(2,1,3,2));
792 V12 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(2,1,0,3));
793 V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(1,3,2,3));
794 V13 = _mm_shuffle_ps(D1,V13,_MM_SHUFFLE(0,2,1,2));
795
796 V00 = _mm_mul_ps(V00,V10);
797 V01 = _mm_mul_ps(V01,V11);
798 V02 = _mm_mul_ps(V02,V12);
799 V03 = _mm_mul_ps(V03,V13);
800 C0 = _mm_sub_ps(C0,V00);
801 C2 = _mm_sub_ps(C2,V01);
802 C4 = _mm_sub_ps(C4,V02);
803 C6 = _mm_sub_ps(C6,V03);
804
805 V00 = XM_PERMUTE_PS(MT.r[1],_MM_SHUFFLE(0,3,0,3));
806 // V10 = D0Z,D0Z,D2X,D2Y
807 V10 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,2,2));
808 V10 = XM_PERMUTE_PS(V10,_MM_SHUFFLE(0,2,3,0));
809 V01 = XM_PERMUTE_PS(MT.r[0],_MM_SHUFFLE(2,0,3,1));
810 // V11 = D0X,D0W,D2X,D2Y
811 V11 = _mm_shuffle_ps(D0,D2,_MM_SHUFFLE(1,0,3,0));
812 V11 = XM_PERMUTE_PS(V11,_MM_SHUFFLE(2,1,0,3));
813 V02 = XM_PERMUTE_PS(MT.r[3],_MM_SHUFFLE(0,3,0,3));
814 // V12 = D1Z,D1Z,D2Z,D2W
815 V12 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,2,2));
816 V12 = XM_PERMUTE_PS(V12,_MM_SHUFFLE(0,2,3,0));
817 V03 = XM_PERMUTE_PS(MT.r[2],_MM_SHUFFLE(2,0,3,1));
818 // V13 = D1X,D1W,D2Z,D2W
819 V13 = _mm_shuffle_ps(D1,D2,_MM_SHUFFLE(3,2,3,0));
820 V13 = XM_PERMUTE_PS(V13,_MM_SHUFFLE(2,1,0,3));
821
822 V00 = _mm_mul_ps(V00,V10);
823 V01 = _mm_mul_ps(V01,V11);
824 V02 = _mm_mul_ps(V02,V12);
825 V03 = _mm_mul_ps(V03,V13);
826 XMVECTOR C1 = _mm_sub_ps(C0,V00);
827 C0 = _mm_add_ps(C0,V00);
828 XMVECTOR C3 = _mm_add_ps(C2,V01);
829 C2 = _mm_sub_ps(C2,V01);
830 XMVECTOR C5 = _mm_sub_ps(C4,V02);
831 C4 = _mm_add_ps(C4,V02);
832 XMVECTOR C7 = _mm_add_ps(C6,V03);
833 C6 = _mm_sub_ps(C6,V03);
834
835 C0 = _mm_shuffle_ps(C0,C1,_MM_SHUFFLE(3,1,2,0));
836 C2 = _mm_shuffle_ps(C2,C3,_MM_SHUFFLE(3,1,2,0));
837 C4 = _mm_shuffle_ps(C4,C5,_MM_SHUFFLE(3,1,2,0));
838 C6 = _mm_shuffle_ps(C6,C7,_MM_SHUFFLE(3,1,2,0));
839 C0 = XM_PERMUTE_PS(C0,_MM_SHUFFLE(3,1,2,0));
840 C2 = XM_PERMUTE_PS(C2,_MM_SHUFFLE(3,1,2,0));
841 C4 = XM_PERMUTE_PS(C4,_MM_SHUFFLE(3,1,2,0));
842 C6 = XM_PERMUTE_PS(C6,_MM_SHUFFLE(3,1,2,0));
843 // Get the determinate
844 XMVECTOR vTemp = XMVector4Dot(C0,MT.r[0]);
845 if (pDeterminant != NULL)
846 *pDeterminant = vTemp;
847 vTemp = _mm_div_ps(g_XMOne,vTemp);
848 XMMATRIX mResult;
849 mResult.r[0] = _mm_mul_ps(C0,vTemp);
850 mResult.r[1] = _mm_mul_ps(C2,vTemp);
851 mResult.r[2] = _mm_mul_ps(C4,vTemp);
852 mResult.r[3] = _mm_mul_ps(C6,vTemp);
853 return mResult;
854#else // _XM_VMX128_INTRINSICS_
855#endif // _XM_VMX128_INTRINSICS_
856}
857
858//------------------------------------------------------------------------------
859
860inline XMVECTOR XMMatrixDeterminant
861(
862 CXMMATRIX M
863)
864{
865#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
866
867 static const XMVECTORF32 Sign = {1.0f, -1.0f, 1.0f, -1.0f};
868
869 XMVECTOR V0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
870 XMVECTOR V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
871 XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
872 XMVECTOR V3 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
873 XMVECTOR V4 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
874 XMVECTOR V5 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
875
876 XMVECTOR P0 = XMVectorMultiply(V0, V1);
877 XMVECTOR P1 = XMVectorMultiply(V2, V3);
878 XMVECTOR P2 = XMVectorMultiply(V4, V5);
879
880 V0 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
881 V1 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
882 V2 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
883 V3 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
884 V4 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
885 V5 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
886
887 P0 = XMVectorNegativeMultiplySubtract(V0, V1, P0);
888 P1 = XMVectorNegativeMultiplySubtract(V2, V3, P1);
889 P2 = XMVectorNegativeMultiplySubtract(V4, V5, P2);
890
891 V0 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[1]);
892 V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[1]);
893 V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[1]);
894
895 XMVECTOR S = XMVectorMultiply(M.r[0], Sign.v);
896 XMVECTOR R = XMVectorMultiply(V0, P0);
897 R = XMVectorNegativeMultiplySubtract(V1, P1, R);
898 R = XMVectorMultiplyAdd(V2, P2, R);
899
900 return XMVector4Dot(S, R);
901
902#else // _XM_VMX128_INTRINSICS_
903#endif // _XM_VMX128_INTRINSICS_
904}
905
906#define XM3RANKDECOMPOSE(a, b, c, x, y, z) \
907 if((x) < (y)) \
908 { \
909 if((y) < (z)) \
910 { \
911 (a) = 2; \
912 (b) = 1; \
913 (c) = 0; \
914 } \
915 else \
916 { \
917 (a) = 1; \
918 \
919 if((x) < (z)) \
920 { \
921 (b) = 2; \
922 (c) = 0; \
923 } \
924 else \
925 { \
926 (b) = 0; \
927 (c) = 2; \
928 } \
929 } \
930 } \
931 else \
932 { \
933 if((x) < (z)) \
934 { \
935 (a) = 2; \
936 (b) = 0; \
937 (c) = 1; \
938 } \
939 else \
940 { \
941 (a) = 0; \
942 \
943 if((y) < (z)) \
944 { \
945 (b) = 2; \
946 (c) = 1; \
947 } \
948 else \
949 { \
950 (b) = 1; \
951 (c) = 2; \
952 } \
953 } \
954 }
955
956#define XM3_DECOMP_EPSILON 0.0001f
957
958_Use_decl_annotations_
959inline bool XMMatrixDecompose
960(
961 XMVECTOR *outScale,
962 XMVECTOR *outRotQuat,
963 XMVECTOR *outTrans,
964 CXMMATRIX M
965)
966{
967 static const XMVECTOR *pvCanonicalBasis[3] = {
968 &g_XMIdentityR0.v,
969 &g_XMIdentityR1.v,
970 &g_XMIdentityR2.v
971 };
972
973 assert( outScale != NULL );
974 assert( outRotQuat != NULL );
975 assert( outTrans != NULL );
976
977 // Get the translation
978 outTrans[0] = M.r[3];
979
980 XMVECTOR *ppvBasis[3];
981 XMMATRIX matTemp;
982 ppvBasis[0] = &matTemp.r[0];
983 ppvBasis[1] = &matTemp.r[1];
984 ppvBasis[2] = &matTemp.r[2];
985
986 matTemp.r[0] = M.r[0];
987 matTemp.r[1] = M.r[1];
988 matTemp.r[2] = M.r[2];
989 matTemp.r[3] = g_XMIdentityR3.v;
990
991 float *pfScales = (float *)outScale;
992
993 size_t a, b, c;
994 XMVectorGetXPtr(&pfScales[0],XMVector3Length(ppvBasis[0][0]));
995 XMVectorGetXPtr(&pfScales[1],XMVector3Length(ppvBasis[1][0]));
996 XMVectorGetXPtr(&pfScales[2],XMVector3Length(ppvBasis[2][0]));
997 pfScales[3] = 0.f;
998
999 XM3RANKDECOMPOSE(a, b, c, pfScales[0], pfScales[1], pfScales[2])
1000
1001 if(pfScales[a] < XM3_DECOMP_EPSILON)
1002 {
1003 ppvBasis[a][0] = pvCanonicalBasis[a][0];
1004 }
1005 ppvBasis[a][0] = XMVector3Normalize(ppvBasis[a][0]);
1006
1007 if(pfScales[b] < XM3_DECOMP_EPSILON)
1008 {
1009 size_t aa, bb, cc;
1010 float fAbsX, fAbsY, fAbsZ;
1011
1012 fAbsX = fabsf(XMVectorGetX(ppvBasis[a][0]));
1013 fAbsY = fabsf(XMVectorGetY(ppvBasis[a][0]));
1014 fAbsZ = fabsf(XMVectorGetZ(ppvBasis[a][0]));
1015
1016 XM3RANKDECOMPOSE(aa, bb, cc, fAbsX, fAbsY, fAbsZ)
1017
1018 ppvBasis[b][0] = XMVector3Cross(ppvBasis[a][0],pvCanonicalBasis[cc][0]);
1019 }
1020
1021 ppvBasis[b][0] = XMVector3Normalize(ppvBasis[b][0]);
1022
1023 if(pfScales[c] < XM3_DECOMP_EPSILON)
1024 {
1025 ppvBasis[c][0] = XMVector3Cross(ppvBasis[a][0],ppvBasis[b][0]);
1026 }
1027
1028 ppvBasis[c][0] = XMVector3Normalize(ppvBasis[c][0]);
1029
1030 float fDet = XMVectorGetX(XMMatrixDeterminant(matTemp));
1031
1032 // use Kramer's rule to check for handedness of coordinate system
1033 if(fDet < 0.0f)
1034 {
1035 // switch coordinate system by negating the scale and inverting the basis vector on the x-axis
1036 pfScales[a] = -pfScales[a];
1037 ppvBasis[a][0] = XMVectorNegate(ppvBasis[a][0]);
1038
1039 fDet = -fDet;
1040 }
1041
1042 fDet -= 1.0f;
1043 fDet *= fDet;
1044
1045 if(XM3_DECOMP_EPSILON < fDet)
1046 {
1047 // Non-SRT matrix encountered
1048 return false;
1049 }
1050
1051 // generate the quaternion from the matrix
1052 outRotQuat[0] = XMQuaternionRotationMatrix(matTemp);
1053 return true;
1054}
1055
1056#undef XM3_DECOMP_EPSILON
1057#undef XM3RANKDECOMPOSE
1058
1059//------------------------------------------------------------------------------
1060// Transformation operations
1061//------------------------------------------------------------------------------
1062
1063//------------------------------------------------------------------------------
1064
1065inline XMMATRIX XMMatrixIdentity()
1066{
1067#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1068
1069 XMMATRIX M;
1070 M.r[0] = g_XMIdentityR0.v;
1071 M.r[1] = g_XMIdentityR1.v;
1072 M.r[2] = g_XMIdentityR2.v;
1073 M.r[3] = g_XMIdentityR3.v;
1074 return M;
1075
1076#else // _XM_VMX128_INTRINSICS_
1077#endif // _XM_VMX128_INTRINSICS_
1078}
1079
1080//------------------------------------------------------------------------------
1081
1082inline XMMATRIX XMMatrixSet
1083(
1084 float m00, float m01, float m02, float m03,
1085 float m10, float m11, float m12, float m13,
1086 float m20, float m21, float m22, float m23,
1087 float m30, float m31, float m32, float m33
1088)
1089{
1090 XMMATRIX M;
1091#if defined(_XM_NO_INTRINSICS_)
1092 M.m[0][0] = m00; M.m[0][1] = m01; M.m[0][2] = m02; M.m[0][3] = m03;
1093 M.m[1][0] = m10; M.m[1][1] = m11; M.m[1][2] = m12; M.m[1][3] = m13;
1094 M.m[2][0] = m20; M.m[2][1] = m21; M.m[2][2] = m22; M.m[2][3] = m23;
1095 M.m[3][0] = m30; M.m[3][1] = m31; M.m[3][2] = m32; M.m[3][3] = m33;
1096#else
1097 M.r[0] = XMVectorSet(m00, m01, m02, m03);
1098 M.r[1] = XMVectorSet(m10, m11, m12, m13);
1099 M.r[2] = XMVectorSet(m20, m21, m22, m23);
1100 M.r[3] = XMVectorSet(m30, m31, m32, m33);
1101#endif
1102 return M;
1103}
1104
1105//------------------------------------------------------------------------------
1106
1107inline XMMATRIX XMMatrixTranslation
1108(
1109 float OffsetX,
1110 float OffsetY,
1111 float OffsetZ
1112)
1113{
1114#if defined(_XM_NO_INTRINSICS_)
1115
1116 XMMATRIX M;
1117 M.m[0][0] = 1.0f;
1118 M.m[0][1] = 0.0f;
1119 M.m[0][2] = 0.0f;
1120 M.m[0][3] = 0.0f;
1121
1122 M.m[1][0] = 0.0f;
1123 M.m[1][1] = 1.0f;
1124 M.m[1][2] = 0.0f;
1125 M.m[1][3] = 0.0f;
1126
1127 M.m[2][0] = 0.0f;
1128 M.m[2][1] = 0.0f;
1129 M.m[2][2] = 1.0f;
1130 M.m[2][3] = 0.0f;
1131
1132 M.m[3][0] = OffsetX;
1133 M.m[3][1] = OffsetY;
1134 M.m[3][2] = OffsetZ;
1135 M.m[3][3] = 1.0f;
1136 return M;
1137
1138#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1139 XMMATRIX M;
1140 M.r[0] = g_XMIdentityR0.v;
1141 M.r[1] = g_XMIdentityR1.v;
1142 M.r[2] = g_XMIdentityR2.v;
1143 M.r[3] = XMVectorSet(OffsetX, OffsetY, OffsetZ, 1.f );
1144 return M;
1145#else // _XM_VMX128_INTRINSICS_
1146#endif // _XM_VMX128_INTRINSICS_
1147}
1148
1149
1150//------------------------------------------------------------------------------
1151
1152inline XMMATRIX XMMatrixTranslationFromVector
1153(
1154 FXMVECTOR Offset
1155)
1156{
1157#if defined(_XM_NO_INTRINSICS_)
1158
1159 XMMATRIX M;
1160 M.m[0][0] = 1.0f;
1161 M.m[0][1] = 0.0f;
1162 M.m[0][2] = 0.0f;
1163 M.m[0][3] = 0.0f;
1164
1165 M.m[1][0] = 0.0f;
1166 M.m[1][1] = 1.0f;
1167 M.m[1][2] = 0.0f;
1168 M.m[1][3] = 0.0f;
1169
1170 M.m[2][0] = 0.0f;
1171 M.m[2][1] = 0.0f;
1172 M.m[2][2] = 1.0f;
1173 M.m[2][3] = 0.0f;
1174
1175 M.m[3][0] = Offset.vector4_f32[0];
1176 M.m[3][1] = Offset.vector4_f32[1];
1177 M.m[3][2] = Offset.vector4_f32[2];
1178 M.m[3][3] = 1.0f;
1179 return M;
1180
1181#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1182 XMMATRIX M;
1183 M.r[0] = g_XMIdentityR0.v;
1184 M.r[1] = g_XMIdentityR1.v;
1185 M.r[2] = g_XMIdentityR2.v;
1186 M.r[3] = XMVectorSelect( g_XMIdentityR3.v, Offset, g_XMSelect1110.v );
1187 return M;
1188#else // _XM_VMX128_INTRINSICS_
1189#endif // _XM_VMX128_INTRINSICS_
1190}
1191
1192//------------------------------------------------------------------------------
1193
1194inline XMMATRIX XMMatrixScaling
1195(
1196 float ScaleX,
1197 float ScaleY,
1198 float ScaleZ
1199)
1200{
1201#if defined(_XM_NO_INTRINSICS_)
1202
1203 XMMATRIX M;
1204 M.m[0][0] = ScaleX;
1205 M.m[0][1] = 0.0f;
1206 M.m[0][2] = 0.0f;
1207 M.m[0][3] = 0.0f;
1208
1209 M.m[1][0] = 0.0f;
1210 M.m[1][1] = ScaleY;
1211 M.m[1][2] = 0.0f;
1212 M.m[1][3] = 0.0f;
1213
1214 M.m[2][0] = 0.0f;
1215 M.m[2][1] = 0.0f;
1216 M.m[2][2] = ScaleZ;
1217 M.m[2][3] = 0.0f;
1218
1219 M.m[3][0] = 0.0f;
1220 M.m[3][1] = 0.0f;
1221 M.m[3][2] = 0.0f;
1222 M.m[3][3] = 1.0f;
1223 return M;
1224
1225#elif defined(_XM_ARM_NEON_INTRINSICS_)
1226 const XMVECTOR Zero = vdupq_n_f32(0);
1227 XMMATRIX M;
1228 M.r[0] = vsetq_lane_f32( ScaleX, Zero, 0 );
1229 M.r[1] = vsetq_lane_f32( ScaleY, Zero, 1 );
1230 M.r[2] = vsetq_lane_f32( ScaleZ, Zero, 2 );
1231 M.r[3] = g_XMIdentityR3.v;
1232 return M;
1233#elif defined(_XM_SSE_INTRINSICS_)
1234 XMMATRIX M;
1235 M.r[0] = _mm_set_ps( 0, 0, 0, ScaleX );
1236 M.r[1] = _mm_set_ps( 0, 0, ScaleY, 0 );
1237 M.r[2] = _mm_set_ps( 0, ScaleZ, 0, 0 );
1238 M.r[3] = g_XMIdentityR3.v;
1239 return M;
1240#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1241#endif // _XM_VMX128_INTRINSICS_
1242}
1243
1244//------------------------------------------------------------------------------
1245
1246inline XMMATRIX XMMatrixScalingFromVector
1247(
1248 FXMVECTOR Scale
1249)
1250{
1251#if defined(_XM_NO_INTRINSICS_)
1252
1253 XMMATRIX M;
1254 M.m[0][0] = Scale.vector4_f32[0];
1255 M.m[0][1] = 0.0f;
1256 M.m[0][2] = 0.0f;
1257 M.m[0][3] = 0.0f;
1258
1259 M.m[1][0] = 0.0f;
1260 M.m[1][1] = Scale.vector4_f32[1];
1261 M.m[1][2] = 0.0f;
1262 M.m[1][3] = 0.0f;
1263
1264 M.m[2][0] = 0.0f;
1265 M.m[2][1] = 0.0f;
1266 M.m[2][2] = Scale.vector4_f32[2];
1267 M.m[2][3] = 0.0f;
1268
1269 M.m[3][0] = 0.0f;
1270 M.m[3][1] = 0.0f;
1271 M.m[3][2] = 0.0f;
1272 M.m[3][3] = 1.0f;
1273 return M;
1274
1275#elif defined(_XM_ARM_NEON_INTRINSICS_)
1276 XMMATRIX M;
1277 M.r[0] = vandq_u32(Scale,g_XMMaskX);
1278 M.r[1] = vandq_u32(Scale,g_XMMaskY);
1279 M.r[2] = vandq_u32(Scale,g_XMMaskZ);
1280 M.r[3] = g_XMIdentityR3.v;
1281 return M;
1282#elif defined(_XM_SSE_INTRINSICS_)
1283 XMMATRIX M;
1284 M.r[0] = _mm_and_ps(Scale,g_XMMaskX);
1285 M.r[1] = _mm_and_ps(Scale,g_XMMaskY);
1286 M.r[2] = _mm_and_ps(Scale,g_XMMaskZ);
1287 M.r[3] = g_XMIdentityR3.v;
1288 return M;
1289#else // _XM_VMX128_INTRINSICS_
1290#endif // _XM_VMX128_INTRINSICS_
1291}
1292
1293//------------------------------------------------------------------------------
1294
1295inline XMMATRIX XMMatrixRotationX
1296(
1297 float Angle
1298)
1299{
1300#if defined(_XM_NO_INTRINSICS_)
1301
1302 float fSinAngle;
1303 float fCosAngle;
1304 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1305
1306 XMMATRIX M;
1307 M.m[0][0] = 1.0f;
1308 M.m[0][1] = 0.0f;
1309 M.m[0][2] = 0.0f;
1310 M.m[0][3] = 0.0f;
1311
1312 M.m[1][0] = 0.0f;
1313 M.m[1][1] = fCosAngle;
1314 M.m[1][2] = fSinAngle;
1315 M.m[1][3] = 0.0f;
1316
1317 M.m[2][0] = 0.0f;
1318 M.m[2][1] = -fSinAngle;
1319 M.m[2][2] = fCosAngle;
1320 M.m[2][3] = 0.0f;
1321
1322 M.m[3][0] = 0.0f;
1323 M.m[3][1] = 0.0f;
1324 M.m[3][2] = 0.0f;
1325 M.m[3][3] = 1.0f;
1326 return M;
1327
1328#elif defined(_XM_ARM_NEON_INTRINSICS_)
1329 float fSinAngle;
1330 float fCosAngle;
1331 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1332
1333 const XMVECTOR Zero = vdupq_n_f32(0);
1334
1335 XMVECTOR T1 = vsetq_lane_f32( fCosAngle, Zero, 1 );
1336 T1 = vsetq_lane_f32( fSinAngle, T1, 2 );
1337
1338 XMVECTOR T2 = vsetq_lane_f32( -fSinAngle, Zero, 1 );
1339 T2 = vsetq_lane_f32( fCosAngle, T2, 2 );
1340
1341 XMMATRIX M;
1342 M.r[0] = g_XMIdentityR0.v;
1343 M.r[1] = T1;
1344 M.r[2] = T2;
1345 M.r[3] = g_XMIdentityR3.v;
1346 return M;
1347#elif defined(_XM_SSE_INTRINSICS_)
1348 float SinAngle;
1349 float CosAngle;
1350 XMScalarSinCos(&SinAngle, &CosAngle, Angle);
1351
1352 XMVECTOR vSin = _mm_set_ss(SinAngle);
1353 XMVECTOR vCos = _mm_set_ss(CosAngle);
1354 // x = 0,y = cos,z = sin, w = 0
1355 vCos = _mm_shuffle_ps(vCos,vSin,_MM_SHUFFLE(3,0,0,3));
1356 XMMATRIX M;
1357 M.r[0] = g_XMIdentityR0;
1358 M.r[1] = vCos;
1359 // x = 0,y = sin,z = cos, w = 0
1360 vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,1,2,0));
1361 // x = 0,y = -sin,z = cos, w = 0
1362 vCos = _mm_mul_ps(vCos,g_XMNegateY);
1363 M.r[2] = vCos;
1364 M.r[3] = g_XMIdentityR3;
1365 return M;
1366#else // _XM_VMX128_INTRINSICS_
1367#endif // _XM_VMX128_INTRINSICS_
1368}
1369
1370//------------------------------------------------------------------------------
1371
1372inline XMMATRIX XMMatrixRotationY
1373(
1374 float Angle
1375)
1376{
1377#if defined(_XM_NO_INTRINSICS_)
1378
1379 float fSinAngle;
1380 float fCosAngle;
1381 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1382
1383 XMMATRIX M;
1384 M.m[0][0] = fCosAngle;
1385 M.m[0][1] = 0.0f;
1386 M.m[0][2] = -fSinAngle;
1387 M.m[0][3] = 0.0f;
1388
1389 M.m[1][0] = 0.0f;
1390 M.m[1][1] = 1.0f;
1391 M.m[1][2] = 0.0f;
1392 M.m[1][3] = 0.0f;
1393
1394 M.m[2][0] = fSinAngle;
1395 M.m[2][1] = 0.0f;
1396 M.m[2][2] = fCosAngle;
1397 M.m[2][3] = 0.0f;
1398
1399 M.m[3][0] = 0.0f;
1400 M.m[3][1] = 0.0f;
1401 M.m[3][2] = 0.0f;
1402 M.m[3][3] = 1.0f;
1403 return M;
1404
1405#elif defined(_XM_ARM_NEON_INTRINSICS_)
1406 float fSinAngle;
1407 float fCosAngle;
1408 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1409
1410 const XMVECTOR Zero = vdupq_n_f32(0);
1411
1412 XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 );
1413 T0 = vsetq_lane_f32( -fSinAngle, T0, 2 );
1414
1415 XMVECTOR T2 = vsetq_lane_f32( fSinAngle, Zero, 0 );
1416 T2 = vsetq_lane_f32( fCosAngle, T2, 2 );
1417
1418 XMMATRIX M;
1419 M.r[0] = T0;
1420 M.r[1] = g_XMIdentityR1.v;
1421 M.r[2] = T2;
1422 M.r[3] = g_XMIdentityR3.v;
1423 return M;
1424#elif defined(_XM_SSE_INTRINSICS_)
1425 float SinAngle;
1426 float CosAngle;
1427 XMScalarSinCos(&SinAngle, &CosAngle, Angle);
1428
1429 XMVECTOR vSin = _mm_set_ss(SinAngle);
1430 XMVECTOR vCos = _mm_set_ss(CosAngle);
1431 // x = sin,y = 0,z = cos, w = 0
1432 vSin = _mm_shuffle_ps(vSin,vCos,_MM_SHUFFLE(3,0,3,0));
1433 XMMATRIX M;
1434 M.r[2] = vSin;
1435 M.r[1] = g_XMIdentityR1;
1436 // x = cos,y = 0,z = sin, w = 0
1437 vSin = XM_PERMUTE_PS(vSin,_MM_SHUFFLE(3,0,1,2));
1438 // x = cos,y = 0,z = -sin, w = 0
1439 vSin = _mm_mul_ps(vSin,g_XMNegateZ);
1440 M.r[0] = vSin;
1441 M.r[3] = g_XMIdentityR3;
1442 return M;
1443#else // _XM_VMX128_INTRINSICS_
1444#endif // _XM_VMX128_INTRINSICS_
1445}
1446
1447//------------------------------------------------------------------------------
1448
1449inline XMMATRIX XMMatrixRotationZ
1450(
1451 float Angle
1452)
1453{
1454#if defined(_XM_NO_INTRINSICS_)
1455
1456 float fSinAngle;
1457 float fCosAngle;
1458 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1459
1460 XMMATRIX M;
1461 M.m[0][0] = fCosAngle;
1462 M.m[0][1] = fSinAngle;
1463 M.m[0][2] = 0.0f;
1464 M.m[0][3] = 0.0f;
1465
1466 M.m[1][0] = -fSinAngle;
1467 M.m[1][1] = fCosAngle;
1468 M.m[1][2] = 0.0f;
1469 M.m[1][3] = 0.0f;
1470
1471 M.m[2][0] = 0.0f;
1472 M.m[2][1] = 0.0f;
1473 M.m[2][2] = 1.0f;
1474 M.m[2][3] = 0.0f;
1475
1476 M.m[3][0] = 0.0f;
1477 M.m[3][1] = 0.0f;
1478 M.m[3][2] = 0.0f;
1479 M.m[3][3] = 1.0f;
1480 return M;
1481
1482#elif defined(_XM_ARM_NEON_INTRINSICS_)
1483 float fSinAngle;
1484 float fCosAngle;
1485 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1486
1487 const XMVECTOR Zero = vdupq_n_f32(0);
1488
1489 XMVECTOR T0 = vsetq_lane_f32( fCosAngle, Zero, 0 );
1490 T0 = vsetq_lane_f32( fSinAngle, T0, 1 );
1491
1492 XMVECTOR T1 = vsetq_lane_f32( -fSinAngle, Zero, 0 );
1493 T1 = vsetq_lane_f32( fCosAngle, T1, 1 );
1494
1495 XMMATRIX M;
1496 M.r[0] = T0;
1497 M.r[1] = T1;
1498 M.r[2] = g_XMIdentityR2.v;
1499 M.r[3] = g_XMIdentityR3.v;
1500 return M;
1501#elif defined(_XM_SSE_INTRINSICS_)
1502 float SinAngle;
1503 float CosAngle;
1504 XMScalarSinCos(&SinAngle, &CosAngle, Angle);
1505
1506 XMVECTOR vSin = _mm_set_ss(SinAngle);
1507 XMVECTOR vCos = _mm_set_ss(CosAngle);
1508 // x = cos,y = sin,z = 0, w = 0
1509 vCos = _mm_unpacklo_ps(vCos,vSin);
1510 XMMATRIX M;
1511 M.r[0] = vCos;
1512 // x = sin,y = cos,z = 0, w = 0
1513 vCos = XM_PERMUTE_PS(vCos,_MM_SHUFFLE(3,2,0,1));
1514 // x = cos,y = -sin,z = 0, w = 0
1515 vCos = _mm_mul_ps(vCos,g_XMNegateX);
1516 M.r[1] = vCos;
1517 M.r[2] = g_XMIdentityR2;
1518 M.r[3] = g_XMIdentityR3;
1519 return M;
1520#else // _XM_VMX128_INTRINSICS_
1521#endif // _XM_VMX128_INTRINSICS_
1522}
1523
1524//------------------------------------------------------------------------------
1525
1526inline XMMATRIX XMMatrixRotationRollPitchYaw
1527(
1528 float Pitch,
1529 float Yaw,
1530 float Roll
1531)
1532{
1533 XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
1534 return XMMatrixRotationRollPitchYawFromVector(Angles);
1535}
1536
1537//------------------------------------------------------------------------------
1538
1539inline XMMATRIX XMMatrixRotationRollPitchYawFromVector
1540(
1541 FXMVECTOR Angles // <Pitch, Yaw, Roll, undefined>
1542)
1543{
1544 XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles);
1545 return XMMatrixRotationQuaternion(Q);
1546}
1547
1548//------------------------------------------------------------------------------
1549
1550inline XMMATRIX XMMatrixRotationNormal
1551(
1552 FXMVECTOR NormalAxis,
1553 float Angle
1554)
1555{
1556#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1557
1558 float fSinAngle;
1559 float fCosAngle;
1560 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1561
1562 XMVECTOR A = XMVectorSet(fSinAngle, fCosAngle, 1.0f - fCosAngle, 0.0f);
1563
1564 XMVECTOR C2 = XMVectorSplatZ(A);
1565 XMVECTOR C1 = XMVectorSplatY(A);
1566 XMVECTOR C0 = XMVectorSplatX(A);
1567
1568 XMVECTOR N0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(NormalAxis);
1569 XMVECTOR N1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(NormalAxis);
1570
1571 XMVECTOR V0 = XMVectorMultiply(C2, N0);
1572 V0 = XMVectorMultiply(V0, N1);
1573
1574 XMVECTOR R0 = XMVectorMultiply(C2, NormalAxis);
1575 R0 = XMVectorMultiplyAdd(R0, NormalAxis, C1);
1576
1577 XMVECTOR R1 = XMVectorMultiplyAdd(C0, NormalAxis, V0);
1578 XMVECTOR R2 = XMVectorNegativeMultiplySubtract(C0, NormalAxis, V0);
1579
1580 V0 = XMVectorSelect(A, R0, g_XMSelect1110.v);
1581 XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(R1, R2);
1582 XMVECTOR V2 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(R1, R2);
1583
1584 XMMATRIX M;
1585 M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(V0, V1);
1586 M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(V0, V1);
1587 M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(V0, V2);
1588 M.r[3] = g_XMIdentityR3.v;
1589 return M;
1590
1591#elif defined(_XM_SSE_INTRINSICS_)
1592 float fSinAngle;
1593 float fCosAngle;
1594 XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
1595
1596 XMVECTOR C2 = _mm_set_ps1(1.0f - fCosAngle);
1597 XMVECTOR C1 = _mm_set_ps1(fCosAngle);
1598 XMVECTOR C0 = _mm_set_ps1(fSinAngle);
1599
1600 XMVECTOR N0 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,0,2,1));
1601 XMVECTOR N1 = XM_PERMUTE_PS(NormalAxis,_MM_SHUFFLE(3,1,0,2));
1602
1603 XMVECTOR V0 = _mm_mul_ps(C2, N0);
1604 V0 = _mm_mul_ps(V0, N1);
1605
1606 XMVECTOR R0 = _mm_mul_ps(C2, NormalAxis);
1607 R0 = _mm_mul_ps(R0, NormalAxis);
1608 R0 = _mm_add_ps(R0, C1);
1609
1610 XMVECTOR R1 = _mm_mul_ps(C0, NormalAxis);
1611 R1 = _mm_add_ps(R1, V0);
1612 XMVECTOR R2 = _mm_mul_ps(C0, NormalAxis);
1613 R2 = _mm_sub_ps(V0,R2);
1614
1615 V0 = _mm_and_ps(R0,g_XMMask3);
1616 XMVECTOR V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,1,2,0));
1617 V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(0,3,2,1));
1618 XMVECTOR V2 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(0,0,1,1));
1619 V2 = XM_PERMUTE_PS(V2,_MM_SHUFFLE(2,0,2,0));
1620
1621 R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(1,0,3,0));
1622 R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,2,0));
1623
1624 XMMATRIX M;
1625 M.r[0] = R2;
1626
1627 R2 = _mm_shuffle_ps(V0,V1,_MM_SHUFFLE(3,2,3,1));
1628 R2 = XM_PERMUTE_PS(R2,_MM_SHUFFLE(1,3,0,2));
1629 M.r[1] = R2;
1630
1631 V2 = _mm_shuffle_ps(V2,V0,_MM_SHUFFLE(3,2,1,0));
1632 M.r[2] = V2;
1633 M.r[3] = g_XMIdentityR3.v;
1634 return M;
1635#else // _XM_VMX128_INTRINSICS_
1636#endif // _XM_VMX128_INTRINSICS_
1637}
1638
1639//------------------------------------------------------------------------------
1640
1641inline XMMATRIX XMMatrixRotationAxis
1642(
1643 FXMVECTOR Axis,
1644 float Angle
1645)
1646{
1647 assert(!XMVector3Equal(Axis, XMVectorZero()));
1648 assert(!XMVector3IsInfinite(Axis));
1649
1650#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1651
1652 XMVECTOR Normal = XMVector3Normalize(Axis);
1653 return XMMatrixRotationNormal(Normal, Angle);
1654
1655#else // _XM_VMX128_INTRINSICS_
1656#endif // _XM_VMX128_INTRINSICS_
1657}
1658
1659//------------------------------------------------------------------------------
1660
1661inline XMMATRIX XMMatrixRotationQuaternion
1662(
1663 FXMVECTOR Quaternion
1664)
1665{
1666#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1667
1668 static const XMVECTORF32 Constant1110 = {1.0f, 1.0f, 1.0f, 0.0f};
1669
1670 XMVECTOR Q0 = XMVectorAdd(Quaternion, Quaternion);
1671 XMVECTOR Q1 = XMVectorMultiply(Quaternion, Q0);
1672
1673 XMVECTOR V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_1W>(Q1, Constant1110.v);
1674 XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1W>(Q1, Constant1110.v);
1675 XMVECTOR R0 = XMVectorSubtract(Constant1110, V0);
1676 R0 = XMVectorSubtract(R0, V1);
1677
1678 V0 = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(Quaternion);
1679 V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_W>(Q0);
1680 V0 = XMVectorMultiply(V0, V1);
1681
1682 V1 = XMVectorSplatW(Quaternion);
1683 XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(Q0);
1684 V1 = XMVectorMultiply(V1, V2);
1685
1686 XMVECTOR R1 = XMVectorAdd(V0, V1);
1687 XMVECTOR R2 = XMVectorSubtract(V0, V1);
1688
1689 V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z>(R1, R2);
1690 V1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1Z, XM_PERMUTE_0X, XM_PERMUTE_1Z>(R1, R2);
1691
1692 XMMATRIX M;
1693 M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(R0, V0);
1694 M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(R0, V0);
1695 M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(R0, V1);
1696 M.r[3] = g_XMIdentityR3.v;
1697 return M;
1698
1699#elif defined(_XM_SSE_INTRINSICS_)
1700 static const XMVECTORF32 Constant1110 = {1.0f, 1.0f, 1.0f, 0.0f};
1701
1702 XMVECTOR Q0 = _mm_add_ps(Quaternion,Quaternion);
1703 XMVECTOR Q1 = _mm_mul_ps(Quaternion,Q0);
1704
1705 XMVECTOR V0 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,0,0,1));
1706 V0 = _mm_and_ps(V0,g_XMMask3);
1707 XMVECTOR V1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(3,1,2,2));
1708 V1 = _mm_and_ps(V1,g_XMMask3);
1709 XMVECTOR R0 = _mm_sub_ps(Constant1110,V0);
1710 R0 = _mm_sub_ps(R0, V1);
1711
1712 V0 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,1,0,0));
1713 V1 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,2,1,2));
1714 V0 = _mm_mul_ps(V0, V1);
1715
1716 V1 = XM_PERMUTE_PS(Quaternion,_MM_SHUFFLE(3,3,3,3));
1717 XMVECTOR V2 = XM_PERMUTE_PS(Q0,_MM_SHUFFLE(3,0,2,1));
1718 V1 = _mm_mul_ps(V1, V2);
1719
1720 XMVECTOR R1 = _mm_add_ps(V0, V1);
1721 XMVECTOR R2 = _mm_sub_ps(V0, V1);
1722
1723 V0 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(1,0,2,1));
1724 V0 = XM_PERMUTE_PS(V0,_MM_SHUFFLE(1,3,2,0));
1725 V1 = _mm_shuffle_ps(R1,R2,_MM_SHUFFLE(2,2,0,0));
1726 V1 = XM_PERMUTE_PS(V1,_MM_SHUFFLE(2,0,2,0));
1727
1728 Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(1,0,3,0));
1729 Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,2,0));
1730
1731 XMMATRIX M;
1732 M.r[0] = Q1;
1733
1734 Q1 = _mm_shuffle_ps(R0,V0,_MM_SHUFFLE(3,2,3,1));
1735 Q1 = XM_PERMUTE_PS(Q1,_MM_SHUFFLE(1,3,0,2));
1736 M.r[1] = Q1;
1737
1738 Q1 = _mm_shuffle_ps(V1,R0,_MM_SHUFFLE(3,2,1,0));
1739 M.r[2] = Q1;
1740 M.r[3] = g_XMIdentityR3;
1741 return M;
1742#else // _XM_VMX128_INTRINSICS_
1743#endif // _XM_VMX128_INTRINSICS_
1744}
1745
1746//------------------------------------------------------------------------------
1747
1748inline XMMATRIX XMMatrixTransformation2D
1749(
1750 FXMVECTOR ScalingOrigin,
1751 float ScalingOrientation,
1752 FXMVECTOR Scaling,
1753 FXMVECTOR RotationOrigin,
1754 float Rotation,
1755 GXMVECTOR Translation
1756)
1757{
1758 // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
1759 // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
1760
1761 XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1100.v, ScalingOrigin, g_XMSelect1100.v);
1762 XMVECTOR NegScalingOrigin = XMVectorNegate(VScalingOrigin);
1763
1764 XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
1765 XMMATRIX MScalingOrientation = XMMatrixRotationZ(ScalingOrientation);
1766 XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
1767 XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
1768 XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
1769 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
1770 XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
1771 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
1772
1773 XMMATRIX M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
1774 M = XMMatrixMultiply(M, MScaling);
1775 M = XMMatrixMultiply(M, MScalingOrientation);
1776 M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
1777 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
1778 M = XMMatrixMultiply(M, MRotation);
1779 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
1780 M.r[3] = XMVectorAdd(M.r[3], VTranslation);
1781
1782 return M;
1783}
1784
1785//------------------------------------------------------------------------------
1786
1787inline XMMATRIX XMMatrixTransformation
1788(
1789 FXMVECTOR ScalingOrigin,
1790 FXMVECTOR ScalingOrientationQuaternion,
1791 FXMVECTOR Scaling,
1792 GXMVECTOR RotationOrigin,
1793 CXMVECTOR RotationQuaternion,
1794 CXMVECTOR Translation
1795)
1796{
1797 // M = Inverse(MScalingOrigin) * Transpose(MScalingOrientation) * MScaling * MScalingOrientation *
1798 // MScalingOrigin * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
1799
1800 XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1110.v, ScalingOrigin, g_XMSelect1110.v);
1801 XMVECTOR NegScalingOrigin = XMVectorNegate(ScalingOrigin);
1802
1803 XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
1804 XMMATRIX MScalingOrientation = XMMatrixRotationQuaternion(ScalingOrientationQuaternion);
1805 XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
1806 XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
1807 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v);
1808 XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
1809 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v);
1810
1811 XMMATRIX M;
1812 M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
1813 M = XMMatrixMultiply(M, MScaling);
1814 M = XMMatrixMultiply(M, MScalingOrientation);
1815 M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
1816 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
1817 M = XMMatrixMultiply(M, MRotation);
1818 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
1819 M.r[3] = XMVectorAdd(M.r[3], VTranslation);
1820 return M;
1821}
1822
1823//------------------------------------------------------------------------------
1824
1825inline XMMATRIX XMMatrixAffineTransformation2D
1826(
1827 FXMVECTOR Scaling,
1828 FXMVECTOR RotationOrigin,
1829 float Rotation,
1830 FXMVECTOR Translation
1831)
1832{
1833 // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
1834
1835 XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
1836 XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
1837 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
1838 XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
1839 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation,g_XMSelect1100.v);
1840
1841 XMMATRIX M;
1842 M = MScaling;
1843 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
1844 M = XMMatrixMultiply(M, MRotation);
1845 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
1846 M.r[3] = XMVectorAdd(M.r[3], VTranslation);
1847 return M;
1848}
1849
1850//------------------------------------------------------------------------------
1851
1852inline XMMATRIX XMMatrixAffineTransformation
1853(
1854 FXMVECTOR Scaling,
1855 FXMVECTOR RotationOrigin,
1856 FXMVECTOR RotationQuaternion,
1857 GXMVECTOR Translation
1858)
1859{
1860 // M = MScaling * Inverse(MRotationOrigin) * MRotation * MRotationOrigin * MTranslation;
1861
1862 XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
1863 XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin,g_XMSelect1110.v);
1864 XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
1865 XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation,g_XMSelect1110.v);
1866
1867 XMMATRIX M;
1868 M = MScaling;
1869 M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
1870 M = XMMatrixMultiply(M, MRotation);
1871 M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
1872 M.r[3] = XMVectorAdd(M.r[3], VTranslation);
1873 return M;
1874}
1875
1876//------------------------------------------------------------------------------
1877
1878inline XMMATRIX XMMatrixReflect
1879(
1880 FXMVECTOR ReflectionPlane
1881)
1882{
1883 assert(!XMVector3Equal(ReflectionPlane, XMVectorZero()));
1884 assert(!XMPlaneIsInfinite(ReflectionPlane));
1885
1886#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1887
1888 static const XMVECTORF32 NegativeTwo = {-2.0f, -2.0f, -2.0f, 0.0f};
1889
1890 XMVECTOR P = XMPlaneNormalize(ReflectionPlane);
1891 XMVECTOR S = XMVectorMultiply(P, NegativeTwo);
1892
1893 XMVECTOR A = XMVectorSplatX(P);
1894 XMVECTOR B = XMVectorSplatY(P);
1895 XMVECTOR C = XMVectorSplatZ(P);
1896 XMVECTOR D = XMVectorSplatW(P);
1897
1898 XMMATRIX M;
1899 M.r[0] = XMVectorMultiplyAdd(A, S, g_XMIdentityR0.v);
1900 M.r[1] = XMVectorMultiplyAdd(B, S, g_XMIdentityR1.v);
1901 M.r[2] = XMVectorMultiplyAdd(C, S, g_XMIdentityR2.v);
1902 M.r[3] = XMVectorMultiplyAdd(D, S, g_XMIdentityR3.v);
1903 return M;
1904
1905#else // _XM_VMX128_INTRINSICS_
1906#endif // _XM_VMX128_INTRINSICS_
1907}
1908
1909//------------------------------------------------------------------------------
1910
1911inline XMMATRIX XMMatrixShadow
1912(
1913 FXMVECTOR ShadowPlane,
1914 FXMVECTOR LightPosition
1915)
1916{
1917 static const XMVECTORU32 Select0001 = {XM_SELECT_0, XM_SELECT_0, XM_SELECT_0, XM_SELECT_1};
1918
1919 assert(!XMVector3Equal(ShadowPlane, XMVectorZero()));
1920 assert(!XMPlaneIsInfinite(ShadowPlane));
1921
1922 XMVECTOR P = XMPlaneNormalize(ShadowPlane);
1923 XMVECTOR Dot = XMPlaneDot(P, LightPosition);
1924 P = XMVectorNegate(P);
1925 XMVECTOR D = XMVectorSplatW(P);
1926 XMVECTOR C = XMVectorSplatZ(P);
1927 XMVECTOR B = XMVectorSplatY(P);
1928 XMVECTOR A = XMVectorSplatX(P);
1929 Dot = XMVectorSelect(Select0001.v, Dot, Select0001.v);
1930
1931 XMMATRIX M;
1932 M.r[3] = XMVectorMultiplyAdd(D, LightPosition, Dot);
1933 Dot = XMVectorRotateLeft(Dot, 1);
1934 M.r[2] = XMVectorMultiplyAdd(C, LightPosition, Dot);
1935 Dot = XMVectorRotateLeft(Dot, 1);
1936 M.r[1] = XMVectorMultiplyAdd(B, LightPosition, Dot);
1937 Dot = XMVectorRotateLeft(Dot, 1);
1938 M.r[0] = XMVectorMultiplyAdd(A, LightPosition, Dot);
1939 return M;
1940}
1941
1942//------------------------------------------------------------------------------
1943// View and projection initialization operations
1944//------------------------------------------------------------------------------
1945
1946inline XMMATRIX XMMatrixLookAtLH
1947(
1948 FXMVECTOR EyePosition,
1949 FXMVECTOR FocusPosition,
1950 FXMVECTOR UpDirection
1951)
1952{
1953 XMVECTOR EyeDirection = XMVectorSubtract(FocusPosition, EyePosition);
1954 return XMMatrixLookToLH(EyePosition, EyeDirection, UpDirection);
1955}
1956
1957//------------------------------------------------------------------------------
1958
1959inline XMMATRIX XMMatrixLookAtRH
1960(
1961 FXMVECTOR EyePosition,
1962 FXMVECTOR FocusPosition,
1963 FXMVECTOR UpDirection
1964)
1965{
1966 XMVECTOR NegEyeDirection = XMVectorSubtract(EyePosition, FocusPosition);
1967 return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
1968}
1969
1970//------------------------------------------------------------------------------
1971
1972inline XMMATRIX XMMatrixLookToLH
1973(
1974 FXMVECTOR EyePosition,
1975 FXMVECTOR EyeDirection,
1976 FXMVECTOR UpDirection
1977)
1978{
1979 assert(!XMVector3Equal(EyeDirection, XMVectorZero()));
1980 assert(!XMVector3IsInfinite(EyeDirection));
1981 assert(!XMVector3Equal(UpDirection, XMVectorZero()));
1982 assert(!XMVector3IsInfinite(UpDirection));
1983
1984#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1985
1986 XMVECTOR R2 = XMVector3Normalize(EyeDirection);
1987
1988 XMVECTOR R0 = XMVector3Cross(UpDirection, R2);
1989 R0 = XMVector3Normalize(R0);
1990
1991 XMVECTOR R1 = XMVector3Cross(R2, R0);
1992
1993 XMVECTOR NegEyePosition = XMVectorNegate(EyePosition);
1994
1995 XMVECTOR D0 = XMVector3Dot(R0, NegEyePosition);
1996 XMVECTOR D1 = XMVector3Dot(R1, NegEyePosition);
1997 XMVECTOR D2 = XMVector3Dot(R2, NegEyePosition);
1998
1999 XMMATRIX M;
2000 M.r[0] = XMVectorSelect(D0, R0, g_XMSelect1110.v);
2001 M.r[1] = XMVectorSelect(D1, R1, g_XMSelect1110.v);
2002 M.r[2] = XMVectorSelect(D2, R2, g_XMSelect1110.v);
2003 M.r[3] = g_XMIdentityR3.v;
2004
2005 M = XMMatrixTranspose(M);
2006
2007 return M;
2008
2009#else // _XM_VMX128_INTRINSICS_
2010#endif // _XM_VMX128_INTRINSICS_
2011}
2012
2013//------------------------------------------------------------------------------
2014
2015inline XMMATRIX XMMatrixLookToRH
2016(
2017 FXMVECTOR EyePosition,
2018 FXMVECTOR EyeDirection,
2019 FXMVECTOR UpDirection
2020)
2021{
2022 XMVECTOR NegEyeDirection = XMVectorNegate(EyeDirection);
2023 return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
2024}
2025
2026//------------------------------------------------------------------------------
2027
2028inline XMMATRIX XMMatrixPerspectiveLH
2029(
2030 float ViewWidth,
2031 float ViewHeight,
2032 float NearZ,
2033 float FarZ
2034)
2035{
2036 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
2037 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
2038 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2039
2040#if defined(_XM_NO_INTRINSICS_)
2041
2042 float TwoNearZ = NearZ + NearZ;
2043 float fRange = FarZ / (FarZ - NearZ);
2044
2045 XMMATRIX M;
2046 M.m[0][0] = TwoNearZ / ViewWidth;
2047 M.m[0][1] = 0.0f;
2048 M.m[0][2] = 0.0f;
2049 M.m[0][3] = 0.0f;
2050
2051 M.m[1][0] = 0.0f;
2052 M.m[1][1] = TwoNearZ / ViewHeight;
2053 M.m[1][2] = 0.0f;
2054 M.m[1][3] = 0.0f;
2055
2056 M.m[2][0] = 0.0f;
2057 M.m[2][1] = 0.0f;
2058 M.m[2][2] = fRange;
2059 M.m[2][3] = 1.0f;
2060
2061 M.m[3][0] = 0.0f;
2062 M.m[3][1] = 0.0f;
2063 M.m[3][2] = -fRange * NearZ;
2064 M.m[3][3] = 0.0f;
2065 return M;
2066
2067#elif defined(_XM_ARM_NEON_INTRINSICS_)
2068 float TwoNearZ = NearZ + NearZ;
2069 float fRange = FarZ / (FarZ - NearZ);
2070 const XMVECTOR Zero = vdupq_n_f32(0);
2071 XMMATRIX M;
2072 M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 );
2073 M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 );
2074 M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 );
2075 M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
2076 return M;
2077#elif defined(_XM_SSE_INTRINSICS_)
2078 XMMATRIX M;
2079 float TwoNearZ = NearZ + NearZ;
2080 float fRange = FarZ / (FarZ - NearZ);
2081 // Note: This is recorded on the stack
2082 XMVECTOR rMem = {
2083 TwoNearZ / ViewWidth,
2084 TwoNearZ / ViewHeight,
2085 fRange,
2086 -fRange * NearZ
2087 };
2088 // Copy from memory to SSE register
2089 XMVECTOR vValues = rMem;
2090 XMVECTOR vTemp = _mm_setzero_ps();
2091 // Copy x only
2092 vTemp = _mm_move_ss(vTemp,vValues);
2093 // TwoNearZ / ViewWidth,0,0,0
2094 M.r[0] = vTemp;
2095 // 0,TwoNearZ / ViewHeight,0,0
2096 vTemp = vValues;
2097 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2098 M.r[1] = vTemp;
2099 // x=fRange,y=-fRange * NearZ,0,1.0f
2100 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
2101 // 0,0,fRange,1.0f
2102 vTemp = _mm_setzero_ps();
2103 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
2104 M.r[2] = vTemp;
2105 // 0,0,-fRange * NearZ,0
2106 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
2107 M.r[3] = vTemp;
2108
2109 return M;
2110#else // _XM_VMX128_INTRINSICS_
2111#endif // _XM_VMX128_INTRINSICS_
2112}
2113
2114//------------------------------------------------------------------------------
2115
2116inline XMMATRIX XMMatrixPerspectiveRH
2117(
2118 float ViewWidth,
2119 float ViewHeight,
2120 float NearZ,
2121 float FarZ
2122)
2123{
2124 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
2125 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
2126 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2127
2128#if defined(_XM_NO_INTRINSICS_)
2129
2130 float TwoNearZ = NearZ + NearZ;
2131 float fRange = FarZ / (NearZ - FarZ);
2132
2133 XMMATRIX M;
2134 M.m[0][0] = TwoNearZ / ViewWidth;
2135 M.m[0][1] = 0.0f;
2136 M.m[0][2] = 0.0f;
2137 M.m[0][3] = 0.0f;
2138
2139 M.m[1][0] = 0.0f;
2140 M.m[1][1] = TwoNearZ / ViewHeight;
2141 M.m[1][2] = 0.0f;
2142 M.m[1][3] = 0.0f;
2143
2144 M.m[2][0] = 0.0f;
2145 M.m[2][1] = 0.0f;
2146 M.m[2][2] = fRange;
2147 M.m[2][3] = -1.0f;
2148
2149 M.m[3][0] = 0.0f;
2150 M.m[3][1] = 0.0f;
2151 M.m[3][2] = fRange * NearZ;
2152 M.m[3][3] = 0.0f;
2153 return M;
2154
2155#elif defined(_XM_ARM_NEON_INTRINSICS_)
2156 float TwoNearZ = NearZ + NearZ;
2157 float fRange = FarZ / (NearZ - FarZ);
2158 const XMVECTOR Zero = vdupq_n_f32(0);
2159
2160 XMMATRIX M;
2161 M.r[0] = vsetq_lane_f32( TwoNearZ / ViewWidth, Zero, 0 );
2162 M.r[1] = vsetq_lane_f32( TwoNearZ / ViewHeight, Zero, 1 );
2163 M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 );
2164 M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
2165 return M;
2166#elif defined(_XM_SSE_INTRINSICS_)
2167 XMMATRIX M;
2168 float TwoNearZ = NearZ + NearZ;
2169 float fRange = FarZ / (NearZ-FarZ);
2170 // Note: This is recorded on the stack
2171 XMVECTOR rMem = {
2172 TwoNearZ / ViewWidth,
2173 TwoNearZ / ViewHeight,
2174 fRange,
2175 fRange * NearZ
2176 };
2177 // Copy from memory to SSE register
2178 XMVECTOR vValues = rMem;
2179 XMVECTOR vTemp = _mm_setzero_ps();
2180 // Copy x only
2181 vTemp = _mm_move_ss(vTemp,vValues);
2182 // TwoNearZ / ViewWidth,0,0,0
2183 M.r[0] = vTemp;
2184 // 0,TwoNearZ / ViewHeight,0,0
2185 vTemp = vValues;
2186 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2187 M.r[1] = vTemp;
2188 // x=fRange,y=-fRange * NearZ,0,-1.0f
2189 vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
2190 // 0,0,fRange,-1.0f
2191 vTemp = _mm_setzero_ps();
2192 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
2193 M.r[2] = vTemp;
2194 // 0,0,-fRange * NearZ,0
2195 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
2196 M.r[3] = vTemp;
2197 return M;
2198#else // _XM_VMX128_INTRINSICS_
2199#endif // _XM_VMX128_INTRINSICS_
2200}
2201
2202//------------------------------------------------------------------------------
2203
2204inline XMMATRIX XMMatrixPerspectiveFovLH
2205(
2206 float FovAngleY,
2207 float AspectHByW,
2208 float NearZ,
2209 float FarZ
2210)
2211{
2212 assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
2213 assert(!XMScalarNearEqual(AspectHByW, 0.0f, 0.00001f));
2214 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2215
2216#if defined(_XM_NO_INTRINSICS_)
2217
2218 float SinFov;
2219 float CosFov;
2220 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2221
2222 float Height = CosFov / SinFov;
2223 float Width = Height / AspectHByW;
2224 float fRange = FarZ / (FarZ-NearZ);
2225
2226 XMMATRIX M;
2227 M.m[0][0] = Width;
2228 M.m[0][1] = 0.0f;
2229 M.m[0][2] = 0.0f;
2230 M.m[0][3] = 0.0f;
2231
2232 M.m[1][0] = 0.0f;
2233 M.m[1][1] = Height;
2234 M.m[1][2] = 0.0f;
2235 M.m[1][3] = 0.0f;
2236
2237 M.m[2][0] = 0.0f;
2238 M.m[2][1] = 0.0f;
2239 M.m[2][2] = fRange;
2240 M.m[2][3] = 1.0f;
2241
2242 M.m[3][0] = 0.0f;
2243 M.m[3][1] = 0.0f;
2244 M.m[3][2] = -fRange * NearZ;
2245 M.m[3][3] = 0.0f;
2246 return M;
2247
2248#elif defined(_XM_ARM_NEON_INTRINSICS_)
2249 float SinFov;
2250 float CosFov;
2251 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2252
2253 float fRange = FarZ / (FarZ-NearZ);
2254 float Height = CosFov / SinFov;
2255 float Width = Height / AspectHByW;
2256 const XMVECTOR Zero = vdupq_n_f32(0);
2257
2258 XMMATRIX M;
2259 M.r[0] = vsetq_lane_f32( Width, Zero, 0 );
2260 M.r[1] = vsetq_lane_f32( Height, Zero, 1 );
2261 M.r[2] = vsetq_lane_f32( fRange, g_XMIdentityR3.v, 2 );
2262 M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
2263 return M;
2264#elif defined(_XM_SSE_INTRINSICS_)
2265 float SinFov;
2266 float CosFov;
2267 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2268
2269 float fRange = FarZ / (FarZ-NearZ);
2270 // Note: This is recorded on the stack
2271 float Height = CosFov / SinFov;
2272 XMVECTOR rMem = {
2273 Height / AspectHByW,
2274 Height,
2275 fRange,
2276 -fRange * NearZ
2277 };
2278 // Copy from memory to SSE register
2279 XMVECTOR vValues = rMem;
2280 XMVECTOR vTemp = _mm_setzero_ps();
2281 // Copy x only
2282 vTemp = _mm_move_ss(vTemp,vValues);
2283 // CosFov / SinFov,0,0,0
2284 XMMATRIX M;
2285 M.r[0] = vTemp;
2286 // 0,Height / AspectHByW,0,0
2287 vTemp = vValues;
2288 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2289 M.r[1] = vTemp;
2290 // x=fRange,y=-fRange * NearZ,0,1.0f
2291 vTemp = _mm_setzero_ps();
2292 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
2293 // 0,0,fRange,1.0f
2294 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
2295 M.r[2] = vTemp;
2296 // 0,0,-fRange * NearZ,0.0f
2297 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
2298 M.r[3] = vTemp;
2299 return M;
2300#else // _XM_VMX128_INTRINSICS_
2301#endif // _XM_VMX128_INTRINSICS_
2302}
2303
2304//------------------------------------------------------------------------------
2305
2306inline XMMATRIX XMMatrixPerspectiveFovRH
2307(
2308 float FovAngleY,
2309 float AspectHByW,
2310 float NearZ,
2311 float FarZ
2312)
2313{
2314 assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
2315 assert(!XMScalarNearEqual(AspectHByW, 0.0f, 0.00001f));
2316 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2317
2318#if defined(_XM_NO_INTRINSICS_)
2319
2320 float SinFov;
2321 float CosFov;
2322 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2323
2324 float Height = CosFov / SinFov;
2325 float Width = Height / AspectHByW;
2326 float fRange = FarZ / (NearZ-FarZ);
2327
2328 XMMATRIX M;
2329 M.m[0][0] = Width;
2330 M.m[0][1] = 0.0f;
2331 M.m[0][2] = 0.0f;
2332 M.m[0][3] = 0.0f;
2333
2334 M.m[1][0] = 0.0f;
2335 M.m[1][1] = Height;
2336 M.m[1][2] = 0.0f;
2337 M.m[1][3] = 0.0f;
2338
2339 M.m[2][0] = 0.0f;
2340 M.m[2][1] = 0.0f;
2341 M.m[2][2] = fRange;
2342 M.m[2][3] = -1.0f;
2343
2344 M.m[3][0] = 0.0f;
2345 M.m[3][1] = 0.0f;
2346 M.m[3][2] = fRange * NearZ;
2347 M.m[3][3] = 0.0f;
2348 return M;
2349
2350#elif defined(_XM_ARM_NEON_INTRINSICS_)
2351 float SinFov;
2352 float CosFov;
2353 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2354 float fRange = FarZ / (NearZ-FarZ);
2355 float Height = CosFov / SinFov;
2356 float Width = Height / AspectHByW;
2357 const XMVECTOR Zero = vdupq_n_f32(0);
2358
2359 XMMATRIX M;
2360 M.r[0] = vsetq_lane_f32( Width, Zero, 0 );
2361 M.r[1] = vsetq_lane_f32( Height, Zero, 1 );
2362 M.r[2] = vsetq_lane_f32( fRange, g_XMNegIdentityR3.v, 2 );
2363 M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
2364 return M;
2365#elif defined(_XM_SSE_INTRINSICS_)
2366 float SinFov;
2367 float CosFov;
2368 XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
2369 float fRange = FarZ / (NearZ-FarZ);
2370 // Note: This is recorded on the stack
2371 float Height = CosFov / SinFov;
2372 XMVECTOR rMem = {
2373 Height / AspectHByW,
2374 Height,
2375 fRange,
2376 fRange * NearZ
2377 };
2378 // Copy from memory to SSE register
2379 XMVECTOR vValues = rMem;
2380 XMVECTOR vTemp = _mm_setzero_ps();
2381 // Copy x only
2382 vTemp = _mm_move_ss(vTemp,vValues);
2383 // CosFov / SinFov,0,0,0
2384 XMMATRIX M;
2385 M.r[0] = vTemp;
2386 // 0,Height / AspectHByW,0,0
2387 vTemp = vValues;
2388 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2389 M.r[1] = vTemp;
2390 // x=fRange,y=-fRange * NearZ,0,-1.0f
2391 vTemp = _mm_setzero_ps();
2392 vValues = _mm_shuffle_ps(vValues,g_XMNegIdentityR3,_MM_SHUFFLE(3,2,3,2));
2393 // 0,0,fRange,-1.0f
2394 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,0,0,0));
2395 M.r[2] = vTemp;
2396 // 0,0,fRange * NearZ,0.0f
2397 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,1,0,0));
2398 M.r[3] = vTemp;
2399 return M;
2400#else // _XM_VMX128_INTRINSICS_
2401#endif // _XM_VMX128_INTRINSICS_
2402}
2403
2404//------------------------------------------------------------------------------
2405
2406inline XMMATRIX XMMatrixPerspectiveOffCenterLH
2407(
2408 float ViewLeft,
2409 float ViewRight,
2410 float ViewBottom,
2411 float ViewTop,
2412 float NearZ,
2413 float FarZ
2414)
2415{
2416 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
2417 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
2418 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2419
2420#if defined(_XM_NO_INTRINSICS_)
2421
2422 float TwoNearZ = NearZ + NearZ;
2423 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2424 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2425 float fRange = FarZ / (FarZ-NearZ);
2426
2427 XMMATRIX M;
2428 M.m[0][0] = TwoNearZ * ReciprocalWidth;
2429 M.m[0][1] = 0.0f;
2430 M.m[0][2] = 0.0f;
2431 M.m[0][3] = 0.0f;
2432
2433 M.m[1][0] = 0.0f;
2434 M.m[1][1] = TwoNearZ * ReciprocalHeight;
2435 M.m[1][2] = 0.0f;
2436 M.m[1][3] = 0.0f;
2437
2438 M.m[2][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
2439 M.m[2][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
2440 M.m[2][2] = fRange;
2441 M.m[2][3] = 1.0f;
2442
2443 M.m[3][0] = 0.0f;
2444 M.m[3][1] = 0.0f;
2445 M.m[3][2] = -fRange * NearZ;
2446 M.m[3][3] = 0.0f;
2447 return M;
2448
2449#elif defined(_XM_ARM_NEON_INTRINSICS_)
2450 float TwoNearZ = NearZ + NearZ;
2451 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2452 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2453 float fRange = FarZ / (FarZ-NearZ);
2454 const XMVECTOR Zero = vdupq_n_f32(0);
2455
2456 XMMATRIX M;
2457 M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 );
2458 M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 );
2459 M.r[2] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
2460 -(ViewTop + ViewBottom) * ReciprocalHeight,
2461 fRange,
2462 1.0f);
2463 M.r[3] = vsetq_lane_f32( -fRange * NearZ, Zero, 2 );
2464 return M;
2465#elif defined(_XM_SSE_INTRINSICS_)
2466 XMMATRIX M;
2467 float TwoNearZ = NearZ+NearZ;
2468 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2469 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2470 float fRange = FarZ / (FarZ-NearZ);
2471 // Note: This is recorded on the stack
2472 XMVECTOR rMem = {
2473 TwoNearZ*ReciprocalWidth,
2474 TwoNearZ*ReciprocalHeight,
2475 -fRange * NearZ,
2476 0
2477 };
2478 // Copy from memory to SSE register
2479 XMVECTOR vValues = rMem;
2480 XMVECTOR vTemp = _mm_setzero_ps();
2481 // Copy x only
2482 vTemp = _mm_move_ss(vTemp,vValues);
2483 // TwoNearZ*ReciprocalWidth,0,0,0
2484 M.r[0] = vTemp;
2485 // 0,TwoNearZ*ReciprocalHeight,0,0
2486 vTemp = vValues;
2487 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2488 M.r[1] = vTemp;
2489 // 0,0,fRange,1.0f
2490 M.r[2] = XMVectorSet( -(ViewLeft + ViewRight) * ReciprocalWidth,
2491 -(ViewTop + ViewBottom) * ReciprocalHeight,
2492 fRange,
2493 1.0f );
2494 // 0,0,-fRange * NearZ,0.0f
2495 vValues = _mm_and_ps(vValues,g_XMMaskZ);
2496 M.r[3] = vValues;
2497 return M;
2498#else // _XM_VMX128_INTRINSICS_
2499#endif // _XM_VMX128_INTRINSICS_
2500}
2501
2502//------------------------------------------------------------------------------
2503
2504inline XMMATRIX XMMatrixPerspectiveOffCenterRH
2505(
2506 float ViewLeft,
2507 float ViewRight,
2508 float ViewBottom,
2509 float ViewTop,
2510 float NearZ,
2511 float FarZ
2512)
2513{
2514 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
2515 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
2516 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2517
2518#if defined(_XM_NO_INTRINSICS_)
2519
2520 float TwoNearZ = NearZ + NearZ;
2521 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2522 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2523 float fRange = FarZ / (NearZ-FarZ);
2524
2525 XMMATRIX M;
2526 M.m[0][0] = TwoNearZ * ReciprocalWidth;
2527 M.m[0][1] = 0.0f;
2528 M.m[0][2] = 0.0f;
2529 M.m[0][3] = 0.0f;
2530
2531 M.m[1][0] = 0.0f;
2532 M.m[1][1] = TwoNearZ * ReciprocalHeight;
2533 M.m[1][2] = 0.0f;
2534 M.m[1][3] = 0.0f;
2535
2536 M.m[2][0] = (ViewLeft + ViewRight) * ReciprocalWidth;
2537 M.m[2][1] = (ViewTop + ViewBottom) * ReciprocalHeight;
2538 M.m[2][2] = fRange;
2539 M.m[2][3] = -1.0f;
2540
2541 M.m[3][0] = 0.0f;
2542 M.m[3][1] = 0.0f;
2543 M.m[3][2] = fRange * NearZ;
2544 M.m[3][3] = 0.0f;
2545 return M;
2546
2547#elif defined(_XM_ARM_NEON_INTRINSICS_)
2548 float TwoNearZ = NearZ + NearZ;
2549 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2550 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2551 float fRange = FarZ / (NearZ-FarZ);
2552 const XMVECTOR Zero = vdupq_n_f32(0);
2553
2554 XMMATRIX M;
2555 M.r[0] = vsetq_lane_f32( TwoNearZ * ReciprocalWidth, Zero, 0 );
2556 M.r[1] = vsetq_lane_f32( TwoNearZ * ReciprocalHeight, Zero, 1 );
2557 M.r[2] = XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth,
2558 (ViewTop + ViewBottom) * ReciprocalHeight,
2559 fRange,
2560 -1.0f);
2561 M.r[3] = vsetq_lane_f32( fRange * NearZ, Zero, 2 );
2562 return M;
2563#elif defined(_XM_SSE_INTRINSICS_)
2564 XMMATRIX M;
2565 float TwoNearZ = NearZ+NearZ;
2566 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2567 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2568 float fRange = FarZ / (NearZ-FarZ);
2569 // Note: This is recorded on the stack
2570 XMVECTOR rMem = {
2571 TwoNearZ*ReciprocalWidth,
2572 TwoNearZ*ReciprocalHeight,
2573 fRange * NearZ,
2574 0
2575 };
2576 // Copy from memory to SSE register
2577 XMVECTOR vValues = rMem;
2578 XMVECTOR vTemp = _mm_setzero_ps();
2579 // Copy x only
2580 vTemp = _mm_move_ss(vTemp,vValues);
2581 // TwoNearZ*ReciprocalWidth,0,0,0
2582 M.r[0] = vTemp;
2583 // 0,TwoNearZ*ReciprocalHeight,0,0
2584 vTemp = vValues;
2585 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2586 M.r[1] = vTemp;
2587 // 0,0,fRange,1.0f
2588 M.r[2] = XMVectorSet( (ViewLeft + ViewRight) * ReciprocalWidth,
2589 (ViewTop + ViewBottom) * ReciprocalHeight,
2590 fRange,
2591 -1.0f );
2592 // 0,0,-fRange * NearZ,0.0f
2593 vValues = _mm_and_ps(vValues,g_XMMaskZ);
2594 M.r[3] = vValues;
2595 return M;
2596#else // _XM_VMX128_INTRINSICS_
2597#endif // _XM_VMX128_INTRINSICS_
2598}
2599
2600//------------------------------------------------------------------------------
2601
2602inline XMMATRIX XMMatrixOrthographicLH
2603(
2604 float ViewWidth,
2605 float ViewHeight,
2606 float NearZ,
2607 float FarZ
2608)
2609{
2610 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
2611 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
2612 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2613
2614#if defined(_XM_NO_INTRINSICS_)
2615
2616 float fRange = 1.0f / (FarZ-NearZ);
2617
2618 XMMATRIX M;
2619 M.m[0][0] = 2.0f / ViewWidth;
2620 M.m[0][1] = 0.0f;
2621 M.m[0][2] = 0.0f;
2622 M.m[0][3] = 0.0f;
2623
2624 M.m[1][0] = 0.0f;
2625 M.m[1][1] = 2.0f / ViewHeight;
2626 M.m[1][2] = 0.0f;
2627 M.m[1][3] = 0.0f;
2628
2629 M.m[2][0] = 0.0f;
2630 M.m[2][1] = 0.0f;
2631 M.m[2][2] = fRange;
2632 M.m[2][3] = 0.0f;
2633
2634 M.m[3][0] = 0.0f;
2635 M.m[3][1] = 0.0f;
2636 M.m[3][2] = -fRange * NearZ;
2637 M.m[3][3] = 1.0f;
2638 return M;
2639
2640#elif defined(_XM_ARM_NEON_INTRINSICS_)
2641 float fRange = 1.0f / (FarZ-NearZ);
2642
2643 const XMVECTOR Zero = vdupq_n_f32(0);
2644 XMMATRIX M;
2645 M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 );
2646 M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 );
2647 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
2648 M.r[3] = vsetq_lane_f32( -fRange * NearZ, g_XMIdentityR3.v, 2 );
2649 return M;
2650#elif defined(_XM_SSE_INTRINSICS_)
2651 XMMATRIX M;
2652 float fRange = 1.0f / (FarZ-NearZ);
2653 // Note: This is recorded on the stack
2654 XMVECTOR rMem = {
2655 2.0f / ViewWidth,
2656 2.0f / ViewHeight,
2657 fRange,
2658 -fRange * NearZ
2659 };
2660 // Copy from memory to SSE register
2661 XMVECTOR vValues = rMem;
2662 XMVECTOR vTemp = _mm_setzero_ps();
2663 // Copy x only
2664 vTemp = _mm_move_ss(vTemp,vValues);
2665 // 2.0f / ViewWidth,0,0,0
2666 M.r[0] = vTemp;
2667 // 0,2.0f / ViewHeight,0,0
2668 vTemp = vValues;
2669 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2670 M.r[1] = vTemp;
2671 // x=fRange,y=-fRange * NearZ,0,1.0f
2672 vTemp = _mm_setzero_ps();
2673 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
2674 // 0,0,fRange,0.0f
2675 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
2676 M.r[2] = vTemp;
2677 // 0,0,-fRange * NearZ,1.0f
2678 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
2679 M.r[3] = vTemp;
2680 return M;
2681#else // _XM_VMX128_INTRINSICS_
2682#endif // _XM_VMX128_INTRINSICS_
2683}
2684
2685//------------------------------------------------------------------------------
2686
2687inline XMMATRIX XMMatrixOrthographicRH
2688(
2689 float ViewWidth,
2690 float ViewHeight,
2691 float NearZ,
2692 float FarZ
2693)
2694{
2695 assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
2696 assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
2697 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2698
2699#if defined(_XM_NO_INTRINSICS_)
2700
2701 float fRange = 1.0f / (NearZ-FarZ);
2702
2703 XMMATRIX M;
2704 M.m[0][0] = 2.0f / ViewWidth;
2705 M.m[0][1] = 0.0f;
2706 M.m[0][2] = 0.0f;
2707 M.m[0][3] = 0.0f;
2708
2709 M.m[1][0] = 0.0f;
2710 M.m[1][1] = 2.0f / ViewHeight;
2711 M.m[1][2] = 0.0f;
2712 M.m[1][3] = 0.0f;
2713
2714 M.m[2][0] = 0.0f;
2715 M.m[2][1] = 0.0f;
2716 M.m[2][2] = fRange;
2717 M.m[2][3] = 0.0f;
2718
2719 M.m[3][0] = 0.0f;
2720 M.m[3][1] = 0.0f;
2721 M.m[3][2] = fRange * NearZ;
2722 M.m[3][3] = 1.0f;
2723 return M;
2724
2725#elif defined(_XM_ARM_NEON_INTRINSICS_)
2726 float fRange = 1.0f / (NearZ-FarZ);
2727
2728 const XMVECTOR Zero = vdupq_n_f32(0);
2729 XMMATRIX M;
2730 M.r[0] = vsetq_lane_f32( 2.0f / ViewWidth, Zero, 0 );
2731 M.r[1] = vsetq_lane_f32( 2.0f / ViewHeight, Zero, 1 );
2732 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
2733 M.r[3] = vsetq_lane_f32( fRange * NearZ, g_XMIdentityR3.v, 2 );
2734 return M;
2735#elif defined(_XM_SSE_INTRINSICS_)
2736 XMMATRIX M;
2737 float fRange = 1.0f / (NearZ-FarZ);
2738 // Note: This is recorded on the stack
2739 XMVECTOR rMem = {
2740 2.0f / ViewWidth,
2741 2.0f / ViewHeight,
2742 fRange,
2743 fRange * NearZ
2744 };
2745 // Copy from memory to SSE register
2746 XMVECTOR vValues = rMem;
2747 XMVECTOR vTemp = _mm_setzero_ps();
2748 // Copy x only
2749 vTemp = _mm_move_ss(vTemp,vValues);
2750 // 2.0f / ViewWidth,0,0,0
2751 M.r[0] = vTemp;
2752 // 0,2.0f / ViewHeight,0,0
2753 vTemp = vValues;
2754 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2755 M.r[1] = vTemp;
2756 // x=fRange,y=fRange * NearZ,0,1.0f
2757 vTemp = _mm_setzero_ps();
2758 vValues = _mm_shuffle_ps(vValues,g_XMIdentityR3,_MM_SHUFFLE(3,2,3,2));
2759 // 0,0,fRange,0.0f
2760 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(2,0,0,0));
2761 M.r[2] = vTemp;
2762 // 0,0,fRange * NearZ,1.0f
2763 vTemp = _mm_shuffle_ps(vTemp,vValues,_MM_SHUFFLE(3,1,0,0));
2764 M.r[3] = vTemp;
2765 return M;
2766#else // _XM_VMX128_INTRINSICS_
2767#endif // _XM_VMX128_INTRINSICS_
2768}
2769
2770//------------------------------------------------------------------------------
2771
2772inline XMMATRIX XMMatrixOrthographicOffCenterLH
2773(
2774 float ViewLeft,
2775 float ViewRight,
2776 float ViewBottom,
2777 float ViewTop,
2778 float NearZ,
2779 float FarZ
2780)
2781{
2782 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
2783 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
2784 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2785
2786#if defined(_XM_NO_INTRINSICS_)
2787
2788 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2789 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2790 float fRange = 1.0f / (FarZ-NearZ);
2791
2792 XMMATRIX M;
2793 M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
2794 M.m[0][1] = 0.0f;
2795 M.m[0][2] = 0.0f;
2796 M.m[0][3] = 0.0f;
2797
2798 M.m[1][0] = 0.0f;
2799 M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
2800 M.m[1][2] = 0.0f;
2801 M.m[1][3] = 0.0f;
2802
2803 M.m[2][0] = 0.0f;
2804 M.m[2][1] = 0.0f;
2805 M.m[2][2] = fRange;
2806 M.m[2][3] = 0.0f;
2807
2808 M.m[3][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
2809 M.m[3][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
2810 M.m[3][2] = -fRange * NearZ;
2811 M.m[3][3] = 1.0f;
2812 return M;
2813
2814#elif defined(_XM_ARM_NEON_INTRINSICS_)
2815 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2816 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2817 float fRange = 1.0f / (FarZ-NearZ);
2818 const XMVECTOR Zero = vdupq_n_f32(0);
2819 XMMATRIX M;
2820 M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 );
2821 M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 );
2822 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
2823 M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
2824 -(ViewTop + ViewBottom) * ReciprocalHeight,
2825 -fRange * NearZ,
2826 1.0f);
2827 return M;
2828#elif defined(_XM_SSE_INTRINSICS_)
2829 XMMATRIX M;
2830 float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2831 float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2832 float fRange = 1.0f / (FarZ-NearZ);
2833 // Note: This is recorded on the stack
2834 XMVECTOR rMem = {
2835 fReciprocalWidth,
2836 fReciprocalHeight,
2837 fRange,
2838 1.0f
2839 };
2840 XMVECTOR rMem2 = {
2841 -(ViewLeft + ViewRight),
2842 -(ViewTop + ViewBottom),
2843 -NearZ,
2844 1.0f
2845 };
2846 // Copy from memory to SSE register
2847 XMVECTOR vValues = rMem;
2848 XMVECTOR vTemp = _mm_setzero_ps();
2849 // Copy x only
2850 vTemp = _mm_move_ss(vTemp,vValues);
2851 // fReciprocalWidth*2,0,0,0
2852 vTemp = _mm_add_ss(vTemp,vTemp);
2853 M.r[0] = vTemp;
2854 // 0,fReciprocalHeight*2,0,0
2855 vTemp = vValues;
2856 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2857 vTemp = _mm_add_ps(vTemp,vTemp);
2858 M.r[1] = vTemp;
2859 // 0,0,fRange,0.0f
2860 vTemp = vValues;
2861 vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
2862 M.r[2] = vTemp;
2863 // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
2864 vValues = _mm_mul_ps(vValues,rMem2);
2865 M.r[3] = vValues;
2866 return M;
2867#else // _XM_VMX128_INTRINSICS_
2868#endif // _XM_VMX128_INTRINSICS_
2869}
2870
2871//------------------------------------------------------------------------------
2872
2873inline XMMATRIX XMMatrixOrthographicOffCenterRH
2874(
2875 float ViewLeft,
2876 float ViewRight,
2877 float ViewBottom,
2878 float ViewTop,
2879 float NearZ,
2880 float FarZ
2881)
2882{
2883 assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
2884 assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
2885 assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
2886
2887#if defined(_XM_NO_INTRINSICS_)
2888
2889 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2890 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2891 float fRange = 1.0f / (NearZ-FarZ);
2892
2893 XMMATRIX M;
2894 M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
2895 M.m[0][1] = 0.0f;
2896 M.m[0][2] = 0.0f;
2897 M.m[0][3] = 0.0f;
2898
2899 M.m[1][0] = 0.0f;
2900 M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
2901 M.m[1][2] = 0.0f;
2902 M.m[1][3] = 0.0f;
2903
2904 M.m[2][0] = 0.0f;
2905 M.m[2][1] = 0.0f;
2906 M.m[2][2] = fRange;
2907 M.m[2][3] = 0.0f;
2908
2909 M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
2910 -(ViewTop + ViewBottom) * ReciprocalHeight,
2911 fRange * NearZ,
2912 1.0f);
2913 return M;
2914
2915#elif defined(_XM_ARM_NEON_INTRINSICS_)
2916 float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2917 float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2918 float fRange = 1.0f / (NearZ-FarZ);
2919 const XMVECTOR Zero = vdupq_n_f32(0);
2920 XMMATRIX M;
2921 M.r[0] = vsetq_lane_f32( ReciprocalWidth + ReciprocalWidth, Zero, 0 );
2922 M.r[1] = vsetq_lane_f32( ReciprocalHeight + ReciprocalHeight, Zero, 1 );
2923 M.r[2] = vsetq_lane_f32( fRange, Zero, 2 );
2924 M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
2925 -(ViewTop + ViewBottom) * ReciprocalHeight,
2926 fRange * NearZ,
2927 1.0f);
2928 return M;
2929#elif defined(_XM_SSE_INTRINSICS_)
2930 XMMATRIX M;
2931 float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
2932 float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
2933 float fRange = 1.0f / (NearZ-FarZ);
2934 // Note: This is recorded on the stack
2935 XMVECTOR rMem = {
2936 fReciprocalWidth,
2937 fReciprocalHeight,
2938 fRange,
2939 1.0f
2940 };
2941 XMVECTOR rMem2 = {
2942 -(ViewLeft + ViewRight),
2943 -(ViewTop + ViewBottom),
2944 NearZ,
2945 1.0f
2946 };
2947 // Copy from memory to SSE register
2948 XMVECTOR vValues = rMem;
2949 XMVECTOR vTemp = _mm_setzero_ps();
2950 // Copy x only
2951 vTemp = _mm_move_ss(vTemp,vValues);
2952 // fReciprocalWidth*2,0,0,0
2953 vTemp = _mm_add_ss(vTemp,vTemp);
2954 M.r[0] = vTemp;
2955 // 0,fReciprocalHeight*2,0,0
2956 vTemp = vValues;
2957 vTemp = _mm_and_ps(vTemp,g_XMMaskY);
2958 vTemp = _mm_add_ps(vTemp,vTemp);
2959 M.r[1] = vTemp;
2960 // 0,0,fRange,0.0f
2961 vTemp = vValues;
2962 vTemp = _mm_and_ps(vTemp,g_XMMaskZ);
2963 M.r[2] = vTemp;
2964 // -(ViewLeft + ViewRight)*fReciprocalWidth,-(ViewTop + ViewBottom)*fReciprocalHeight,fRange*-NearZ,1.0f
2965 vValues = _mm_mul_ps(vValues,rMem2);
2966 M.r[3] = vValues;
2967 return M;
2968#else // _XM_VMX128_INTRINSICS_
2969#endif // _XM_VMX128_INTRINSICS_
2970}
2971
2972
2973/****************************************************************************
2974 *
2975 * XMMATRIX operators and methods
2976 *
2977 ****************************************************************************/
2978
2979//------------------------------------------------------------------------------
2980
2981inline XMMATRIX::XMMATRIX
2982(
2983 float m00, float m01, float m02, float m03,
2984 float m10, float m11, float m12, float m13,
2985 float m20, float m21, float m22, float m23,
2986 float m30, float m31, float m32, float m33
2987)
2988{
2989 r[0] = XMVectorSet(m00, m01, m02, m03);
2990 r[1] = XMVectorSet(m10, m11, m12, m13);
2991 r[2] = XMVectorSet(m20, m21, m22, m23);
2992 r[3] = XMVectorSet(m30, m31, m32, m33);
2993}
2994
2995//------------------------------------------------------------------------------
2996_Use_decl_annotations_
2997inline XMMATRIX::XMMATRIX
2998(
2999 const float* pArray
3000)
3001{
3002 assert( pArray != NULL );
3003 r[0] = XMLoadFloat4((const XMFLOAT4*)pArray);
3004 r[1] = XMLoadFloat4((const XMFLOAT4*)(pArray + 4));
3005 r[2] = XMLoadFloat4((const XMFLOAT4*)(pArray + 8));
3006 r[3] = XMLoadFloat4((const XMFLOAT4*)(pArray + 12));
3007}
3008
3009//------------------------------------------------------------------------------
3010
3011inline XMMATRIX XMMATRIX::operator- () const
3012{
3013 XMMATRIX R;
3014 R.r[0] = XMVectorNegate( r[0] );
3015 R.r[1] = XMVectorNegate( r[1] );
3016 R.r[2] = XMVectorNegate( r[2] );
3017 R.r[3] = XMVectorNegate( r[3] );
3018 return R;
3019}
3020
3021//------------------------------------------------------------------------------
3022
3023inline XMMATRIX& XMMATRIX::operator+= (CXMMATRIX M)
3024{
3025 r[0] = XMVectorAdd( r[0], M.r[0] );
3026 r[1] = XMVectorAdd( r[1], M.r[1] );
3027 r[2] = XMVectorAdd( r[2], M.r[2] );
3028 r[3] = XMVectorAdd( r[3], M.r[3] );
3029 return *this;
3030}
3031
3032//------------------------------------------------------------------------------
3033
3034inline XMMATRIX& XMMATRIX::operator-= (CXMMATRIX M)
3035{
3036 r[0] = XMVectorSubtract( r[0], M.r[0] );
3037 r[1] = XMVectorSubtract( r[1], M.r[1] );
3038 r[2] = XMVectorSubtract( r[2], M.r[2] );
3039 r[3] = XMVectorSubtract( r[3], M.r[3] );
3040 return *this;
3041}
3042
3043//------------------------------------------------------------------------------
3044
3045inline XMMATRIX& XMMATRIX::operator*=(CXMMATRIX M)
3046{
3047 *this = XMMatrixMultiply( *this, M );
3048 return *this;
3049}
3050
3051//------------------------------------------------------------------------------
3052
3053inline XMMATRIX& XMMATRIX::operator*= (float S)
3054{
3055 r[0] = XMVectorScale( r[0], S );
3056 r[1] = XMVectorScale( r[1], S );
3057 r[2] = XMVectorScale( r[2], S );
3058 r[3] = XMVectorScale( r[3], S );
3059 return *this;
3060}
3061
3062//------------------------------------------------------------------------------
3063
3064inline XMMATRIX& XMMATRIX::operator/= (float S)
3065{
3066 assert( S != 0.0f );
3067 float t = 1.0f / S;
3068 r[0] = XMVectorScale( r[0], t );
3069 r[1] = XMVectorScale( r[1], t );
3070 r[2] = XMVectorScale( r[2], t );
3071 r[3] = XMVectorScale( r[3], t );
3072 return *this;
3073}
3074
3075//------------------------------------------------------------------------------
3076
3077inline XMMATRIX XMMATRIX::operator+ (CXMMATRIX M) const
3078{
3079 XMMATRIX R;
3080 R.r[0] = XMVectorAdd( r[0], M.r[0] );
3081 R.r[1] = XMVectorAdd( r[1], M.r[1] );
3082 R.r[2] = XMVectorAdd( r[2], M.r[2] );
3083 R.r[3] = XMVectorAdd( r[3], M.r[3] );
3084 return R;
3085}
3086
3087//------------------------------------------------------------------------------
3088
3089inline XMMATRIX XMMATRIX::operator- (CXMMATRIX M) const
3090{
3091 XMMATRIX R;
3092 R.r[0] = XMVectorSubtract( r[0], M.r[0] );
3093 R.r[1] = XMVectorSubtract( r[1], M.r[1] );
3094 R.r[2] = XMVectorSubtract( r[2], M.r[2] );
3095 R.r[3] = XMVectorSubtract( r[3], M.r[3] );
3096 return R;
3097}
3098
3099//------------------------------------------------------------------------------
3100
3101inline XMMATRIX XMMATRIX::operator*(CXMMATRIX M) const
3102{
3103 return XMMatrixMultiply(*this, M);
3104}
3105
3106//------------------------------------------------------------------------------
3107
3108inline XMMATRIX XMMATRIX::operator* (float S) const
3109{
3110 XMMATRIX R;
3111 R.r[0] = XMVectorScale( r[0], S );
3112 R.r[1] = XMVectorScale( r[1], S );
3113 R.r[2] = XMVectorScale( r[2], S );
3114 R.r[3] = XMVectorScale( r[3], S );
3115 return R;
3116}
3117
3118//------------------------------------------------------------------------------
3119
3120inline XMMATRIX XMMATRIX::operator/ (float S) const
3121{
3122 assert( S != 0.0f );
3123 XMMATRIX R;
3124 float t = 1.0f / S;
3125 R.r[0] = XMVectorScale( r[0], t );
3126 R.r[1] = XMVectorScale( r[1], t );
3127 R.r[2] = XMVectorScale( r[2], t );
3128 R.r[3] = XMVectorScale( r[3], t );
3129 return R;
3130}
3131
3132//------------------------------------------------------------------------------
3133
3134inline XMMATRIX operator*
3135(
3136 float S,
3137 CXMMATRIX M
3138)
3139{
3140 XMMATRIX R;
3141 R.r[0] = XMVectorScale( M.r[0], S );
3142 R.r[1] = XMVectorScale( M.r[1], S );
3143 R.r[2] = XMVectorScale( M.r[2], S );
3144 R.r[3] = XMVectorScale( M.r[3], S );
3145 return R;
3146}
3147
3148/****************************************************************************
3149 *
3150 * XMFLOAT3X3 operators
3151 *
3152 ****************************************************************************/
3153
3154//------------------------------------------------------------------------------
3155
3156inline XMFLOAT3X3::XMFLOAT3X3
3157(
3158 float m00, float m01, float m02,
3159 float m10, float m11, float m12,
3160 float m20, float m21, float m22
3161)
3162{
3163 m[0][0] = m00;
3164 m[0][1] = m01;
3165 m[0][2] = m02;
3166
3167 m[1][0] = m10;
3168 m[1][1] = m11;
3169 m[1][2] = m12;
3170
3171 m[2][0] = m20;
3172 m[2][1] = m21;
3173 m[2][2] = m22;
3174}
3175
3176//------------------------------------------------------------------------------
3177_Use_decl_annotations_
3178inline XMFLOAT3X3::XMFLOAT3X3
3179(
3180 const float* pArray
3181)
3182{
3183 assert( pArray != NULL );
3184 for (size_t Row = 0; Row < 3; Row++)
3185 {
3186 for (size_t Column = 0; Column < 3; Column++)
3187 {
3188 m[Row][Column] = pArray[Row * 3 + Column];
3189 }
3190 }
3191}
3192
3193//------------------------------------------------------------------------------
3194
3195inline XMFLOAT3X3& XMFLOAT3X3::operator=
3196(
3197 const XMFLOAT3X3& Float3x3
3198)
3199{
3200 _11 = Float3x3._11;
3201 _12 = Float3x3._12;
3202 _13 = Float3x3._13;
3203 _21 = Float3x3._21;
3204 _22 = Float3x3._22;
3205 _23 = Float3x3._23;
3206 _31 = Float3x3._31;
3207 _32 = Float3x3._32;
3208 _33 = Float3x3._33;
3209
3210 return *this;
3211}
3212
3213/****************************************************************************
3214 *
3215 * XMFLOAT4X3 operators
3216 *
3217 ****************************************************************************/
3218
3219//------------------------------------------------------------------------------
3220
3221inline XMFLOAT4X3::XMFLOAT4X3
3222(
3223 float m00, float m01, float m02,
3224 float m10, float m11, float m12,
3225 float m20, float m21, float m22,
3226 float m30, float m31, float m32
3227)
3228{
3229 m[0][0] = m00;
3230 m[0][1] = m01;
3231 m[0][2] = m02;
3232
3233 m[1][0] = m10;
3234 m[1][1] = m11;
3235 m[1][2] = m12;
3236
3237 m[2][0] = m20;
3238 m[2][1] = m21;
3239 m[2][2] = m22;
3240
3241 m[3][0] = m30;
3242 m[3][1] = m31;
3243 m[3][2] = m32;
3244}
3245
3246//------------------------------------------------------------------------------
3247_Use_decl_annotations_
3248inline XMFLOAT4X3::XMFLOAT4X3
3249(
3250 const float* pArray
3251)
3252{
3253 assert( pArray != NULL );
3254
3255 m[0][0] = pArray[0];
3256 m[0][1] = pArray[1];
3257 m[0][2] = pArray[2];
3258
3259 m[1][0] = pArray[3];
3260 m[1][1] = pArray[4];
3261 m[1][2] = pArray[5];
3262
3263 m[2][0] = pArray[6];
3264 m[2][1] = pArray[7];
3265 m[2][2] = pArray[8];
3266
3267 m[3][0] = pArray[9];
3268 m[3][1] = pArray[10];
3269 m[3][2] = pArray[11];
3270}
3271
3272//------------------------------------------------------------------------------
3273
3274inline XMFLOAT4X3& XMFLOAT4X3::operator=
3275(
3276 const XMFLOAT4X3& Float4x3
3277)
3278{
3279 XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._11);
3280 XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._22);
3281 XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x3._33);
3282
3283 XMStoreFloat4((XMFLOAT4*)&_11, V1);
3284 XMStoreFloat4((XMFLOAT4*)&_22, V2);
3285 XMStoreFloat4((XMFLOAT4*)&_33, V3);
3286
3287 return *this;
3288}
3289
3290//------------------------------------------------------------------------------
3291
3292inline XMFLOAT4X3A& XMFLOAT4X3A::operator=
3293(
3294 const XMFLOAT4X3A& Float4x3
3295)
3296{
3297 XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._11);
3298 XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._22);
3299 XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x3._33);
3300
3301 XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
3302 XMStoreFloat4A((XMFLOAT4A*)&_22, V2);
3303 XMStoreFloat4A((XMFLOAT4A*)&_33, V3);
3304
3305 return *this;
3306}
3307
3308/****************************************************************************
3309 *
3310 * XMFLOAT4X4 operators
3311 *
3312 ****************************************************************************/
3313
3314//------------------------------------------------------------------------------
3315
3316inline XMFLOAT4X4::XMFLOAT4X4
3317(
3318 float m00, float m01, float m02, float m03,
3319 float m10, float m11, float m12, float m13,
3320 float m20, float m21, float m22, float m23,
3321 float m30, float m31, float m32, float m33
3322)
3323{
3324 m[0][0] = m00;
3325 m[0][1] = m01;
3326 m[0][2] = m02;
3327 m[0][3] = m03;
3328
3329 m[1][0] = m10;
3330 m[1][1] = m11;
3331 m[1][2] = m12;
3332 m[1][3] = m13;
3333
3334 m[2][0] = m20;
3335 m[2][1] = m21;
3336 m[2][2] = m22;
3337 m[2][3] = m23;
3338
3339 m[3][0] = m30;
3340 m[3][1] = m31;
3341 m[3][2] = m32;
3342 m[3][3] = m33;
3343}
3344
3345//------------------------------------------------------------------------------
3346_Use_decl_annotations_
3347inline XMFLOAT4X4::XMFLOAT4X4
3348(
3349 const float* pArray
3350)
3351{
3352 assert( pArray != NULL );
3353
3354 m[0][0] = pArray[0];
3355 m[0][1] = pArray[1];
3356 m[0][2] = pArray[2];
3357 m[0][3] = pArray[3];
3358
3359 m[1][0] = pArray[4];
3360 m[1][1] = pArray[5];
3361 m[1][2] = pArray[6];
3362 m[1][3] = pArray[7];
3363
3364 m[2][0] = pArray[8];
3365 m[2][1] = pArray[9];
3366 m[2][2] = pArray[10];
3367 m[2][3] = pArray[11];
3368
3369 m[3][0] = pArray[12];
3370 m[3][1] = pArray[13];
3371 m[3][2] = pArray[14];
3372 m[3][3] = pArray[15];
3373}
3374
3375//------------------------------------------------------------------------------
3376
3377inline XMFLOAT4X4& XMFLOAT4X4::operator=
3378(
3379 const XMFLOAT4X4& Float4x4
3380)
3381{
3382 XMVECTOR V1 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._11);
3383 XMVECTOR V2 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._21);
3384 XMVECTOR V3 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._31);
3385 XMVECTOR V4 = XMLoadFloat4((const XMFLOAT4*)&Float4x4._41);
3386
3387 XMStoreFloat4((XMFLOAT4*)&_11, V1);
3388 XMStoreFloat4((XMFLOAT4*)&_21, V2);
3389 XMStoreFloat4((XMFLOAT4*)&_31, V3);
3390 XMStoreFloat4((XMFLOAT4*)&_41, V4);
3391
3392 return *this;
3393}
3394
3395//------------------------------------------------------------------------------
3396
3397inline XMFLOAT4X4A& XMFLOAT4X4A::operator=
3398(
3399 const XMFLOAT4X4A& Float4x4
3400)
3401{
3402 XMVECTOR V1 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._11);
3403 XMVECTOR V2 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._21);
3404 XMVECTOR V3 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._31);
3405 XMVECTOR V4 = XMLoadFloat4A((const XMFLOAT4A*)&Float4x4._41);
3406
3407 XMStoreFloat4A((XMFLOAT4A*)&_11, V1);
3408 XMStoreFloat4A((XMFLOAT4A*)&_21, V2);
3409 XMStoreFloat4A((XMFLOAT4A*)&_31, V3);
3410 XMStoreFloat4A((XMFLOAT4A*)&_41, V4);
3411
3412 return *this;
3413}
3414