the game where you go into mines and start crafting! but for consoles (forked directly from smartcmd's github)
1//-------------------------------------------------------------------------------------
2// DirectXPackedVector.inl -- SIMD C++ Math library
3//
4// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF
5// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO
6// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A
7// PARTICULAR PURPOSE.
8//
9// Copyright (c) Microsoft Corporation. All rights reserved.
10//-------------------------------------------------------------------------------------
11
12#ifdef _MSC_VER
13#pragma once
14#endif
15
16
17/****************************************************************************
18 *
19 * Data conversion
20 *
21 ****************************************************************************/
22
23//------------------------------------------------------------------------------
24
25inline float PackedVector::XMConvertHalfToFloat
26(
27 HALF Value
28)
29{
30#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
31
32 uint32_t Mantissa = (uint32_t)(Value & 0x03FF);
33
34 uint32_t Exponent;
35 if ((Value & 0x7C00) != 0) // The value is normalized
36 {
37 Exponent = (uint32_t)((Value >> 10) & 0x1F);
38 }
39 else if (Mantissa != 0) // The value is denormalized
40 {
41 // Normalize the value in the resulting float
42 Exponent = 1;
43
44 do
45 {
46 Exponent--;
47 Mantissa <<= 1;
48 } while ((Mantissa & 0x0400) == 0);
49
50 Mantissa &= 0x03FF;
51 }
52 else // The value is zero
53 {
54 Exponent = (uint32_t)-112;
55 }
56
57 uint32_t Result = ((Value & 0x8000) << 16) | // Sign
58 ((Exponent + 112) << 23) | // Exponent
59 (Mantissa << 13); // Mantissa
60
61 return reinterpret_cast<float*>(&Result)[0];
62#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
63#endif
64}
65
66//------------------------------------------------------------------------------
67_Use_decl_annotations_
68inline float* PackedVector::XMConvertHalfToFloatStream
69(
70 float* pOutputStream,
71 size_t OutputStride,
72 const HALF* pInputStream,
73 size_t InputStride,
74 size_t HalfCount
75)
76{
77 assert(pOutputStream);
78 assert(pInputStream);
79#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
80
81 const uint8_t* pHalf = reinterpret_cast<const uint8_t*>(pInputStream);
82 uint8_t* pFloat = reinterpret_cast<uint8_t*>(pOutputStream);
83
84 for (size_t i = 0; i < HalfCount; i++)
85 {
86 *reinterpret_cast<float*>(pFloat) = XMConvertHalfToFloat(reinterpret_cast<const HALF*>(pHalf)[0]);
87 pHalf += InputStride;
88 pFloat += OutputStride;
89 }
90
91 return pOutputStream;
92
93#else // _XM_VMX128_INTRINSICS_
94#endif // _XM_VMX128_INTRINSICS_
95}
96
97//------------------------------------------------------------------------------
98
99inline PackedVector::HALF PackedVector::XMConvertFloatToHalf
100(
101 float Value
102)
103{
104#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
105 uint32_t Result;
106
107 uint32_t IValue = reinterpret_cast<uint32_t *>(&Value)[0];
108 uint32_t Sign = (IValue & 0x80000000U) >> 16U;
109 IValue = IValue & 0x7FFFFFFFU; // Hack off the sign
110
111 if (IValue > 0x47FFEFFFU)
112 {
113 // The number is too large to be represented as a half. Saturate to infinity.
114 Result = 0x7FFFU;
115 }
116 else
117 {
118 if (IValue < 0x38800000U)
119 {
120 // The number is too small to be represented as a normalized half.
121 // Convert it to a denormalized value.
122 uint32_t Shift = 113U - (IValue >> 23U);
123 IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift;
124 }
125 else
126 {
127 // Rebias the exponent to represent the value as a normalized half.
128 IValue += 0xC8000000U;
129 }
130
131 Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U)&0x7FFFU;
132 }
133 return (HALF)(Result|Sign);
134#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
135#endif
136}
137
138//------------------------------------------------------------------------------
139_Use_decl_annotations_
140inline PackedVector::HALF* PackedVector::XMConvertFloatToHalfStream
141(
142 HALF* pOutputStream,
143 size_t OutputStride,
144 const float* pInputStream,
145 size_t InputStride,
146 size_t FloatCount
147)
148{
149 assert(pOutputStream);
150 assert(pInputStream);
151#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
152
153 const uint8_t* pFloat = reinterpret_cast<const uint8_t*>(pInputStream);
154 uint8_t* pHalf = reinterpret_cast<uint8_t*>(pOutputStream);
155
156 for (size_t i = 0; i < FloatCount; i++)
157 {
158 *reinterpret_cast<HALF*>(pHalf) = XMConvertFloatToHalf(reinterpret_cast<const float*>(pFloat)[0]);
159 pFloat += InputStride;
160 pHalf += OutputStride;
161 }
162 return pOutputStream;
163
164#else // _XM_VMX128_INTRINSICS_
165#endif // _XM_VMX128_INTRINSICS_
166}
167
168/****************************************************************************
169 *
170 * Vector and matrix load operations
171 *
172 ****************************************************************************/
173_Use_decl_annotations_
174inline XMVECTOR PackedVector::XMLoadColor
175(
176 const XMCOLOR* pSource
177)
178{
179 assert(pSource);
180#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
181 // int32_t -> Float conversions are done in one instruction.
182 // uint32_t -> Float calls a runtime function. Keep in int32_t
183 int32_t iColor = (int32_t)(pSource->c);
184 XMVECTORF32 vColor = {
185 (float)((iColor >> 16) & 0xFF) * (1.0f/255.0f),
186 (float)((iColor >> 8) & 0xFF) * (1.0f/255.0f),
187 (float)(iColor & 0xFF) * (1.0f/255.0f),
188 (float)((iColor >> 24) & 0xFF) * (1.0f/255.0f)
189 };
190 return vColor.v;
191#elif defined(_XM_SSE_INTRINSICS_)
192 // Splat the color in all four entries
193 __m128i vInt = _mm_set1_epi32(pSource->c);
194 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
195 vInt = _mm_and_si128(vInt,g_XMMaskA8R8G8B8);
196 // a is unsigned! Flip the bit to convert the order to signed
197 vInt = _mm_xor_si128(vInt,g_XMFlipA8R8G8B8);
198 // Convert to floating point numbers
199 XMVECTOR vTemp = _mm_cvtepi32_ps(vInt);
200 // RGB + 0, A + 0x80000000.f to undo the signed order.
201 vTemp = _mm_add_ps(vTemp,g_XMFixAA8R8G8B8);
202 // Convert 0-255 to 0.0f-1.0f
203 return _mm_mul_ps(vTemp,g_XMNormalizeA8R8G8B8);
204#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
205#endif // _XM_VMX128_INTRINSICS_
206}
207
208//------------------------------------------------------------------------------
209_Use_decl_annotations_
210inline XMVECTOR PackedVector::XMLoadHalf2
211(
212 const XMHALF2* pSource
213)
214{
215 assert(pSource);
216#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
217 XMVECTORF32 vResult = {
218 XMConvertHalfToFloat(pSource->x),
219 XMConvertHalfToFloat(pSource->y),
220 0.0f,
221 0.0f
222 };
223 return vResult.v;
224#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
225#endif // _XM_VMX128_INTRINSICS_
226}
227
228//------------------------------------------------------------------------------
229_Use_decl_annotations_
230inline XMVECTOR PackedVector::XMLoadShortN2
231(
232 const XMSHORTN2* pSource
233)
234{
235 assert(pSource);
236#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
237 XMVECTORF32 vResult = {
238 (pSource->x == -32768) ? -1.f : ((float)pSource->x * (1.0f/32767.0f)),
239 (pSource->y == -32768) ? -1.f : ((float)pSource->y * (1.0f/32767.0f)),
240 0.0f,
241 0.0f
242 };
243 return vResult.v;
244#elif defined(_XM_SSE_INTRINSICS_)
245 // Splat the two shorts in all four entries (WORD alignment okay,
246 // DWORD alignment preferred)
247 __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x));
248 // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0
249 vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16);
250 // x needs to be sign extended
251 vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16);
252 // Convert to floating point numbers
253 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
254 // x - 0x8000 to undo the signed order.
255 vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16);
256 // Convert -1.0f - 1.0f
257 vTemp = _mm_mul_ps(vTemp,g_XMNormalizeX16Y16);
258 // Clamp result (for case of -32768)
259 return _mm_max_ps( vTemp, g_XMNegativeOne );
260#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
261#endif // _XM_VMX128_INTRINSICS_
262}
263
264//------------------------------------------------------------------------------
265_Use_decl_annotations_
266inline XMVECTOR PackedVector::XMLoadShort2
267(
268 const XMSHORT2* pSource
269)
270{
271 assert(pSource);
272#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
273 XMVECTORF32 vResult = {
274 (float)pSource->x,
275 (float)pSource->y,
276 0.f,
277 0.f
278 };
279 return vResult.v;
280#elif defined(_XM_SSE_INTRINSICS_)
281 // Splat the two shorts in all four entries (WORD alignment okay,
282 // DWORD alignment preferred)
283 __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x));
284 // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0
285 vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16);
286 // x needs to be sign extended
287 vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16);
288 // Convert to floating point numbers
289 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
290 // x - 0x8000 to undo the signed order.
291 vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16);
292 // Y is 65536 too large
293 return _mm_mul_ps(vTemp,g_XMFixupY16);
294#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
295#endif // _XM_VMX128_INTRINSICS_
296}
297
298//------------------------------------------------------------------------------
299_Use_decl_annotations_
300inline XMVECTOR PackedVector::XMLoadUShortN2
301(
302 const XMUSHORTN2* pSource
303)
304{
305 assert(pSource);
306#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
307 XMVECTORF32 vResult = {
308 (float)pSource->x / 65535.0f,
309 (float)pSource->y / 65535.0f,
310 0.f,
311 0.f
312 };
313 return vResult.v;
314#elif defined(_XM_SSE_INTRINSICS_)
315 static const XMVECTORF32 FixupY16 = {1.0f/65535.0f,1.0f/(65535.0f*65536.0f),0.0f,0.0f};
316 static const XMVECTORF32 FixaddY16 = {0,32768.0f*65536.0f,0,0};
317 // Splat the two shorts in all four entries (WORD alignment okay,
318 // DWORD alignment preferred)
319 __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x));
320 // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0
321 vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16);
322 // y needs to be sign flipped
323 vTemp = _mm_xor_ps(vTemp,g_XMFlipY);
324 // Convert to floating point numbers
325 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
326 // y + 0x8000 to undo the signed order.
327 vTemp = _mm_add_ps(vTemp,FixaddY16);
328 // Y is 65536 times too large
329 vTemp = _mm_mul_ps(vTemp,FixupY16);
330 return vTemp;
331#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
332#endif // _XM_VMX128_INTRINSICS_
333}
334
335//------------------------------------------------------------------------------
336_Use_decl_annotations_
337inline XMVECTOR PackedVector::XMLoadUShort2
338(
339 const XMUSHORT2* pSource
340)
341{
342 assert(pSource);
343#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
344 XMVECTORF32 vResult = {
345 (float)pSource->x,
346 (float)pSource->y,
347 0.f,
348 0.f
349 };
350 return vResult.v;
351#elif defined(_XM_SSE_INTRINSICS_)
352 static const XMVECTORF32 FixaddY16 = {0,32768.0f,0,0};
353 // Splat the two shorts in all four entries (WORD alignment okay,
354 // DWORD alignment preferred)
355 __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x));
356 // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0
357 vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16);
358 // y needs to be sign flipped
359 vTemp = _mm_xor_ps(vTemp,g_XMFlipY);
360 // Convert to floating point numbers
361 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
362 // Y is 65536 times too large
363 vTemp = _mm_mul_ps(vTemp,g_XMFixupY16);
364 // y + 0x8000 to undo the signed order.
365 vTemp = _mm_add_ps(vTemp,FixaddY16);
366 return vTemp;
367#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
368#endif // _XM_VMX128_INTRINSICS_
369}
370
371//------------------------------------------------------------------------------
372_Use_decl_annotations_
373inline XMVECTOR PackedVector::XMLoadByteN2
374(
375 const XMBYTEN2* pSource
376)
377{
378 assert(pSource);
379 XMVECTORF32 vResult = {
380 (pSource->x == -128) ? -1.f : ((float)pSource->x * (1.0f/127.0f)),
381 (pSource->y == -128) ? -1.f : ((float)pSource->y * (1.0f/127.0f)),
382 0.0f,
383 0.0f
384 };
385 return vResult.v;
386}
387
388//------------------------------------------------------------------------------
389_Use_decl_annotations_
390inline XMVECTOR PackedVector::XMLoadByte2
391(
392 const XMBYTE2* pSource
393)
394{
395 assert(pSource);
396 XMVECTORF32 vResult = {
397 (float)pSource->x,
398 (float)pSource->y,
399 0.0f,
400 0.0f
401 };
402 return vResult.v;
403}
404
405//------------------------------------------------------------------------------
406_Use_decl_annotations_
407inline XMVECTOR PackedVector::XMLoadUByteN2
408(
409 const XMUBYTEN2* pSource
410)
411{
412 assert(pSource);
413 XMVECTORF32 vResult = {
414 (float)pSource->x * (1.0f/255.0f),
415 (float)pSource->y * (1.0f/255.0f),
416 0.0f,
417 0.0f
418 };
419 return vResult.v;
420}
421
422//------------------------------------------------------------------------------
423_Use_decl_annotations_
424inline XMVECTOR PackedVector::XMLoadUByte2
425(
426 const XMUBYTE2* pSource
427)
428{
429 assert(pSource);
430 XMVECTORF32 vResult = {
431 (float)pSource->x,
432 (float)pSource->y,
433 0.0f,
434 0.0f
435 };
436 return vResult.v;
437}
438
439//------------------------------------------------------------------------------
440_Use_decl_annotations_
441inline XMVECTOR PackedVector::XMLoadU565
442(
443 const XMU565* pSource
444)
445{
446 assert(pSource);
447#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
448 static const XMVECTORI32 U565And = {0x1F,0x3F<<5,0x1F<<11,0};
449 static const XMVECTORF32 U565Mul = {1.0f,1.0f/32.0f,1.0f/2048.f,0};
450 // Get the 32 bit value and splat it
451 XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
452 // Mask off x, y and z
453 vResult = _mm_and_ps(vResult,U565And);
454 // Convert to float
455 vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
456 // Normalize x, y, and z
457 vResult = _mm_mul_ps(vResult,U565Mul);
458 return vResult;
459#else
460 XMVECTORF32 vResult = {
461 float(pSource->v & 0x1F),
462 float((pSource->v >> 5) & 0x3F),
463 float((pSource->v >> 11) & 0x1F),
464 0.f,
465 };
466 return vResult.v;
467#endif // !_XM_SSE_INTRINSICS_
468}
469
470//------------------------------------------------------------------------------
471_Use_decl_annotations_
472inline XMVECTOR PackedVector::XMLoadFloat3PK
473(
474 const XMFLOAT3PK* pSource
475)
476{
477 assert(pSource);
478
479 __declspec(align(16)) uint32_t Result[4];
480 uint32_t Mantissa;
481 uint32_t Exponent;
482
483 // X Channel (6-bit mantissa)
484 Mantissa = pSource->xm;
485
486 if ( pSource->xe == 0x1f ) // INF or NAN
487 {
488 Result[0] = 0x7f800000 | (pSource->xm << 17);
489 }
490 else
491 {
492 if ( pSource->xe != 0 ) // The value is normalized
493 {
494 Exponent = pSource->xe;
495 }
496 else if (Mantissa != 0) // The value is denormalized
497 {
498 // Normalize the value in the resulting float
499 Exponent = 1;
500
501 do
502 {
503 Exponent--;
504 Mantissa <<= 1;
505 } while ((Mantissa & 0x40) == 0);
506
507 Mantissa &= 0x3F;
508 }
509 else // The value is zero
510 {
511 Exponent = (uint32_t)-112;
512 }
513
514 Result[0] = ((Exponent + 112) << 23) | (Mantissa << 17);
515 }
516
517 // Y Channel (6-bit mantissa)
518 Mantissa = pSource->ym;
519
520 if ( pSource->ye == 0x1f ) // INF or NAN
521 {
522 Result[1] = 0x7f800000 | (pSource->ym << 17);
523 }
524 else
525 {
526 if ( pSource->ye != 0 ) // The value is normalized
527 {
528 Exponent = pSource->ye;
529 }
530 else if (Mantissa != 0) // The value is denormalized
531 {
532 // Normalize the value in the resulting float
533 Exponent = 1;
534
535 do
536 {
537 Exponent--;
538 Mantissa <<= 1;
539 } while ((Mantissa & 0x40) == 0);
540
541 Mantissa &= 0x3F;
542 }
543 else // The value is zero
544 {
545 Exponent = (uint32_t)-112;
546 }
547
548 Result[1] = ((Exponent + 112) << 23) | (Mantissa << 17);
549 }
550
551 // Z Channel (5-bit mantissa)
552 Mantissa = pSource->zm;
553
554 if ( pSource->ze == 0x1f ) // INF or NAN
555 {
556 Result[2] = 0x7f800000 | (pSource->zm << 17);
557 }
558 else
559 {
560 if ( pSource->ze != 0 ) // The value is normalized
561 {
562 Exponent = pSource->ze;
563 }
564 else if (Mantissa != 0) // The value is denormalized
565 {
566 // Normalize the value in the resulting float
567 Exponent = 1;
568
569 do
570 {
571 Exponent--;
572 Mantissa <<= 1;
573 } while ((Mantissa & 0x20) == 0);
574
575 Mantissa &= 0x1F;
576 }
577 else // The value is zero
578 {
579 Exponent = (uint32_t)-112;
580 }
581
582 Result[2] = ((Exponent + 112) << 23) | (Mantissa << 18);
583 }
584
585 return XMLoadFloat3A( reinterpret_cast<const XMFLOAT3A*>(&Result) );
586}
587
588//------------------------------------------------------------------------------
589_Use_decl_annotations_
590inline XMVECTOR PackedVector::XMLoadFloat3SE
591(
592 const XMFLOAT3SE* pSource
593)
594{
595 assert(pSource);
596
597 __declspec(align(16)) uint32_t Result[4];
598 uint32_t Mantissa;
599 uint32_t Exponent, ExpBits;
600
601 if ( pSource->e == 0x1f ) // INF or NAN
602 {
603 Result[0] = 0x7f800000 | (pSource->xm << 14);
604 Result[1] = 0x7f800000 | (pSource->ym << 14);
605 Result[2] = 0x7f800000 | (pSource->zm << 14);
606 }
607 else if ( pSource->e != 0 ) // The values are all normalized
608 {
609 Exponent = pSource->e;
610
611 ExpBits = (Exponent + 112) << 23;
612
613 Mantissa = pSource->xm;
614 Result[0] = ExpBits | (Mantissa << 14);
615
616 Mantissa = pSource->ym;
617 Result[1] = ExpBits | (Mantissa << 14);
618
619 Mantissa = pSource->zm;
620 Result[2] = ExpBits | (Mantissa << 14);
621 }
622 else
623 {
624 // X Channel
625 Mantissa = pSource->xm;
626
627 if (Mantissa != 0) // The value is denormalized
628 {
629 // Normalize the value in the resulting float
630 Exponent = 1;
631
632 do
633 {
634 Exponent--;
635 Mantissa <<= 1;
636 } while ((Mantissa & 0x200) == 0);
637
638 Mantissa &= 0x1FF;
639 }
640 else // The value is zero
641 {
642 Exponent = (uint32_t)-112;
643 }
644
645 Result[0] = ((Exponent + 112) << 23) | (Mantissa << 14);
646
647 // Y Channel
648 Mantissa = pSource->ym;
649
650 if (Mantissa != 0) // The value is denormalized
651 {
652 // Normalize the value in the resulting float
653 Exponent = 1;
654
655 do
656 {
657 Exponent--;
658 Mantissa <<= 1;
659 } while ((Mantissa & 0x200) == 0);
660
661 Mantissa &= 0x1FF;
662 }
663 else // The value is zero
664 {
665 Exponent = (uint32_t)-112;
666 }
667
668 Result[1] = ((Exponent + 112) << 23) | (Mantissa << 14);
669
670 // Z Channel
671 Mantissa = pSource->zm;
672
673 if (Mantissa != 0) // The value is denormalized
674 {
675 // Normalize the value in the resulting float
676 Exponent = 1;
677
678 do
679 {
680 Exponent--;
681 Mantissa <<= 1;
682 } while ((Mantissa & 0x200) == 0);
683
684 Mantissa &= 0x1FF;
685 }
686 else // The value is zero
687 {
688 Exponent = (uint32_t)-112;
689 }
690
691 Result[2] = ((Exponent + 112) << 23) | (Mantissa << 14);
692 }
693
694 return XMLoadFloat3A( reinterpret_cast<const XMFLOAT3A*>(&Result) );
695}
696
697//------------------------------------------------------------------------------
698_Use_decl_annotations_
699inline XMVECTOR PackedVector::XMLoadHalf4
700(
701 const XMHALF4* pSource
702)
703{
704 assert(pSource);
705#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
706 XMVECTORF32 vResult = {
707 XMConvertHalfToFloat(pSource->x),
708 XMConvertHalfToFloat(pSource->y),
709 XMConvertHalfToFloat(pSource->z),
710 XMConvertHalfToFloat(pSource->w)
711 };
712 return vResult.v;
713#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
714#endif // _XM_VMX128_INTRINSICS_
715}
716
717//------------------------------------------------------------------------------
718_Use_decl_annotations_
719inline XMVECTOR PackedVector::XMLoadShortN4
720(
721 const XMSHORTN4* pSource
722)
723{
724 assert(pSource);
725#if defined(_XM_NO_INTRINSICS_)
726 XMVECTORF32 vResult = {
727 (pSource->x == -32768) ? -1.f : ((float)pSource->x * (1.0f/32767.0f)),
728 (pSource->y == -32768) ? -1.f : ((float)pSource->y * (1.0f/32767.0f)),
729 (pSource->z == -32768) ? -1.f : ((float)pSource->z * (1.0f/32767.0f)),
730 (pSource->w == -32768) ? -1.f : ((float)pSource->w * (1.0f/32767.0f))
731 };
732 return vResult.v;
733#elif defined(_XM_ARM_NEON_INTRINSICS_)
734 __n64 vInt = vld1_s16( (const int16_t*)pSource );
735 __n128 V = vmovl_s16( vInt );
736 V = vcvtq_f32_s32( V );
737 const __n128 Scale = vdupq_n_f32( 1.0f/32767.0f );
738 V = vmulq_f32( V, Scale );
739 return vmaxq_f32( V, g_XMNegativeOne );
740#elif defined(_XM_SSE_INTRINSICS_)
741 // Splat the color in all four entries (x,z,y,w)
742 __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
743 // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
744 __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
745 // x and z are unsigned! Flip the bits to convert the order to signed
746 vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16Z16W16);
747 // Convert to floating point numbers
748 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
749 // x and z - 0x8000 to complete the conversion
750 vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16Z16W16);
751 // Convert to -1.0f - 1.0f
752 vTemp = _mm_mul_ps(vTemp,g_XMNormalizeX16Y16Z16W16);
753 // Very important! The entries are x,z,y,w, flip it to x,y,z,w
754 vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
755 // Clamp result (for case of -32768)
756 return _mm_max_ps( vTemp, g_XMNegativeOne );
757#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
758#endif // _XM_VMX128_INTRINSICS_
759}
760
761//------------------------------------------------------------------------------
762_Use_decl_annotations_
763inline XMVECTOR PackedVector::XMLoadShort4
764(
765 const XMSHORT4* pSource
766)
767{
768 assert(pSource);
769#if defined(_XM_NO_INTRINSICS_)
770 XMVECTORF32 vResult = {
771 (float)pSource->x,
772 (float)pSource->y,
773 (float)pSource->z,
774 (float)pSource->w
775 };
776 return vResult.v;
777#elif defined(_XM_ARM_NEON_INTRINSICS_)
778 __n64 vInt = vld1_s16( (const int16_t*)pSource );
779 __n128 V = vmovl_s16( vInt );
780 return vcvtq_f32_s32( V );
781#elif defined(_XM_SSE_INTRINSICS_)
782 // Splat the color in all four entries (x,z,y,w)
783 __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
784 // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
785 __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
786 // x and z are unsigned! Flip the bits to convert the order to signed
787 vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16Z16W16);
788 // Convert to floating point numbers
789 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
790 // x and z - 0x8000 to complete the conversion
791 vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16Z16W16);
792 // Fix y and w because they are 65536 too large
793 vTemp = _mm_mul_ps(vTemp,g_XMFixupY16W16);
794 // Very important! The entries are x,z,y,w, flip it to x,y,z,w
795 return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
796#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
797#endif // _XM_VMX128_INTRINSICS_
798}
799
800//------------------------------------------------------------------------------
801_Use_decl_annotations_
802inline XMVECTOR PackedVector::XMLoadUShortN4
803(
804 const XMUSHORTN4* pSource
805)
806{
807 assert(pSource);
808#if defined(_XM_NO_INTRINSICS_)
809 XMVECTORF32 vResult = {
810 (float)pSource->x / 65535.0f,
811 (float)pSource->y / 65535.0f,
812 (float)pSource->z / 65535.0f,
813 (float)pSource->w / 65535.0f
814 };
815 return vResult.v;
816#elif defined(_XM_ARM_NEON_INTRINSICS_)
817 __n64 vInt = vld1_u16( (const uint16_t*)pSource );
818 __n128 V = vmovl_u16( vInt );
819 V = vcvtq_f32_u32( V );
820 const __n128 Scale = vdupq_n_f32( 1.0f/65535.0f );
821 return vmulq_f32( V, Scale );
822#elif defined(_XM_SSE_INTRINSICS_)
823 static const XMVECTORF32 FixupY16W16 = {1.0f/65535.0f,1.0f/65535.0f,1.0f/(65535.0f*65536.0f),1.0f/(65535.0f*65536.0f)};
824 static const XMVECTORF32 FixaddY16W16 = {0,0,32768.0f*65536.0f,32768.0f*65536.0f};
825 // Splat the color in all four entries (x,z,y,w)
826 __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
827 // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
828 __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
829 // y and w are signed! Flip the bits to convert the order to unsigned
830 vTemp = _mm_xor_ps(vTemp,g_XMFlipZW);
831 // Convert to floating point numbers
832 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
833 // y and w + 0x8000 to complete the conversion
834 vTemp = _mm_add_ps(vTemp,FixaddY16W16);
835 // Fix y and w because they are 65536 too large
836 vTemp = _mm_mul_ps(vTemp,FixupY16W16);
837 // Very important! The entries are x,z,y,w, flip it to x,y,z,w
838 return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
839#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
840#endif // _XM_VMX128_INTRINSICS_
841}
842
843//------------------------------------------------------------------------------
844_Use_decl_annotations_
845inline XMVECTOR PackedVector::XMLoadUShort4
846(
847 const XMUSHORT4* pSource
848)
849{
850 assert(pSource);
851#if defined(_XM_NO_INTRINSICS_)
852 XMVECTORF32 vResult = {
853 (float)pSource->x,
854 (float)pSource->y,
855 (float)pSource->z,
856 (float)pSource->w
857 };
858 return vResult.v;
859#elif defined(_XM_ARM_NEON_INTRINSICS_)
860 __n64 vInt = vld1_u16( (const uint16_t*)pSource );
861 __n128 V = vmovl_u16( vInt );
862 return vcvtq_f32_u32( V );
863#elif defined(_XM_SSE_INTRINSICS_)
864 static const XMVECTORF32 FixaddY16W16 = {0,0,32768.0f,32768.0f};
865 // Splat the color in all four entries (x,z,y,w)
866 __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x));
867 // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000
868 __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16);
869 // y and w are signed! Flip the bits to convert the order to unsigned
870 vTemp = _mm_xor_ps(vTemp,g_XMFlipZW);
871 // Convert to floating point numbers
872 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
873 // Fix y and w because they are 65536 too large
874 vTemp = _mm_mul_ps(vTemp,g_XMFixupY16W16);
875 // y and w + 0x8000 to complete the conversion
876 vTemp = _mm_add_ps(vTemp,FixaddY16W16);
877 // Very important! The entries are x,z,y,w, flip it to x,y,z,w
878 return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0));
879#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
880#endif // _XM_VMX128_INTRINSICS_
881}
882
883//------------------------------------------------------------------------------
884_Use_decl_annotations_
885inline XMVECTOR PackedVector::XMLoadXDecN4
886(
887 const XMXDECN4* pSource
888)
889{
890 assert(pSource);
891#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
892 static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00};
893
894 uint32_t ElementX = pSource->v & 0x3FF;
895 uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
896 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
897
898 XMVECTORF32 vResult = {
899 (ElementX == 0x200) ? -1.f : ((float)(int16_t)(ElementX | SignExtend[ElementX >> 9]) / 511.0f),
900 (ElementY == 0x200) ? -1.f : ((float)(int16_t)(ElementY | SignExtend[ElementY >> 9]) / 511.0f),
901 (ElementZ == 0x200) ? -1.f : ((float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]) / 511.0f),
902 (float)(pSource->v >> 30) / 3.0f
903 };
904 return vResult.v;
905#elif defined(_XM_SSE_INTRINSICS_)
906 // Splat the color in all four entries
907 __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
908 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
909 vTemp = _mm_and_ps(vTemp,g_XMMaskA2B10G10R10);
910 // a is unsigned! Flip the bit to convert the order to signed
911 vTemp = _mm_xor_ps(vTemp,g_XMFlipA2B10G10R10);
912 // Convert to floating point numbers
913 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
914 // RGB + 0, A + 0x80000000.f to undo the signed order.
915 vTemp = _mm_add_ps(vTemp,g_XMFixAA2B10G10R10);
916 // Convert 0-255 to 0.0f-1.0f
917 vTemp = _mm_mul_ps(vTemp,g_XMNormalizeA2B10G10R10);
918 // Clamp result (for case of -512)
919 return _mm_max_ps( vTemp, g_XMNegativeOne );
920#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
921#endif // _XM_VMX128_INTRINSICS_
922}
923
924//------------------------------------------------------------------------------
925_Use_decl_annotations_
926inline XMVECTOR PackedVector::XMLoadXDec4
927(
928 const XMXDEC4* pSource
929)
930{
931 assert(pSource);
932#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
933 static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00};
934
935 uint32_t ElementX = pSource->v & 0x3FF;
936 uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
937 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
938
939 XMVECTORF32 vResult = {
940 (float)(int16_t)(ElementX | SignExtend[ElementX >> 9]),
941 (float)(int16_t)(ElementY | SignExtend[ElementY >> 9]),
942 (float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]),
943 (float)(pSource->v >> 30)
944 };
945 return vResult.v;
946#elif defined(_XM_SSE_INTRINSICS_)
947 static const XMVECTORI32 XDec4Xor = {0x200, 0x200<<10, 0x200<<20, 0x80000000};
948 static const XMVECTORF32 XDec4Add = {-512.0f,-512.0f*1024.0f,-512.0f*1024.0f*1024.0f,32768*65536.0f};
949 // Splat the color in all four entries
950 XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
951 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
952 vTemp = _mm_and_ps(vTemp,g_XMMaskDec4);
953 // a is unsigned! Flip the bit to convert the order to signed
954 vTemp = _mm_xor_ps(vTemp,XDec4Xor);
955 // Convert to floating point numbers
956 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
957 // RGB + 0, A + 0x80000000.f to undo the signed order.
958 vTemp = _mm_add_ps(vTemp,XDec4Add);
959 // Convert 0-255 to 0.0f-1.0f
960 vTemp = _mm_mul_ps(vTemp,g_XMMulDec4);
961 return vTemp;
962#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
963#endif // _XM_VMX128_INTRINSICS_
964}
965
966//------------------------------------------------------------------------------
967_Use_decl_annotations_
968inline XMVECTOR PackedVector::XMLoadUDecN4
969(
970 const XMUDECN4* pSource
971)
972{
973 assert(pSource);
974#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
975
976 uint32_t ElementX = pSource->v & 0x3FF;
977 uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
978 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
979
980 XMVECTORF32 vResult = {
981 (float)ElementX / 1023.0f,
982 (float)ElementY / 1023.0f,
983 (float)ElementZ / 1023.0f,
984 (float)(pSource->v >> 30) / 3.0f
985 };
986 return vResult.v;
987#elif defined(_XM_SSE_INTRINSICS_)
988 static const XMVECTORF32 UDecN4Mul = {1.0f/1023.0f,1.0f/(1023.0f*1024.0f),1.0f/(1023.0f*1024.0f*1024.0f),1.0f/(3.0f*1024.0f*1024.0f*1024.0f)};
989 // Splat the color in all four entries
990 XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
991 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
992 vTemp = _mm_and_ps(vTemp,g_XMMaskDec4);
993 // a is unsigned! Flip the bit to convert the order to signed
994 vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
995 // Convert to floating point numbers
996 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
997 // RGB + 0, A + 0x80000000.f to undo the signed order.
998 vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
999 // Convert 0-255 to 0.0f-1.0f
1000 vTemp = _mm_mul_ps(vTemp,UDecN4Mul);
1001 return vTemp;
1002#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1003#endif // _XM_VMX128_INTRINSICS_
1004}
1005
1006//------------------------------------------------------------------------------
1007_Use_decl_annotations_
1008inline XMVECTOR PackedVector::XMLoadUDec4
1009(
1010 const XMUDEC4* pSource
1011)
1012{
1013 assert(pSource);
1014#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1015 uint32_t ElementX = pSource->v & 0x3FF;
1016 uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
1017 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
1018
1019 XMVECTORF32 vResult = {
1020 (float)ElementX,
1021 (float)ElementY,
1022 (float)ElementZ,
1023 (float)(pSource->v >> 30)
1024 };
1025 return vResult.v;
1026#elif defined(_XM_SSE_INTRINSICS_)
1027 // Splat the color in all four entries
1028 XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
1029 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
1030 vTemp = _mm_and_ps(vTemp,g_XMMaskDec4);
1031 // a is unsigned! Flip the bit to convert the order to signed
1032 vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
1033 // Convert to floating point numbers
1034 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
1035 // RGB + 0, A + 0x80000000.f to undo the signed order.
1036 vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
1037 // Convert 0-255 to 0.0f-1.0f
1038 vTemp = _mm_mul_ps(vTemp,g_XMMulDec4);
1039 return vTemp;
1040#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1041#endif // _XM_VMX128_INTRINSICS_
1042}
1043
1044//------------------------------------------------------------------------------
1045_Use_decl_annotations_
1046inline XMVECTOR PackedVector::XMLoadDecN4
1047(
1048 const XMDECN4* pSource
1049)
1050{
1051 assert(pSource);
1052#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1053 static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00};
1054 static const uint32_t SignExtendW[] = {0x00000000, 0xFFFFFFFC};
1055
1056 uint32_t ElementX = pSource->v & 0x3FF;
1057 uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
1058 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
1059 uint32_t ElementW = pSource->v >> 30;
1060
1061 XMVECTORF32 vResult = {
1062 (ElementX == 0x200) ? -1.f : ((float)(int16_t)(ElementX | SignExtend[ElementX >> 9]) / 511.0f),
1063 (ElementY == 0x200) ? -1.f : ((float)(int16_t)(ElementY | SignExtend[ElementY >> 9]) / 511.0f),
1064 (ElementZ == 0x200) ? -1.f : ((float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]) / 511.0f),
1065 (ElementW == 0x2) ? -1.f : ((float)(int16_t)(ElementW | SignExtendW[(ElementW >> 1) & 1]))
1066 };
1067 return vResult.v;
1068#elif defined(_XM_SSE_INTRINSICS_)
1069 static const XMVECTORF32 DecN4Mul = {1.0f/511.0f,1.0f/(511.0f*1024.0f),1.0f/(511.0f*1024.0f*1024.0f),1.0f/(1024.0f*1024.0f*1024.0f)};
1070 // Splat the color in all four entries
1071 XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
1072 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
1073 vTemp = _mm_and_ps(vTemp,g_XMMaskDec4);
1074 // a is unsigned! Flip the bit to convert the order to signed
1075 vTemp = _mm_xor_ps(vTemp,g_XMXorDec4);
1076 // Convert to floating point numbers
1077 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
1078 // RGB + 0, A + 0x80000000.f to undo the signed order.
1079 vTemp = _mm_add_ps(vTemp,g_XMAddDec4);
1080 // Convert 0-255 to 0.0f-1.0f
1081 vTemp = _mm_mul_ps(vTemp,DecN4Mul);
1082 // Clamp result (for case of -512/-1)
1083 return _mm_max_ps( vTemp, g_XMNegativeOne );
1084#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1085#endif // _XM_VMX128_INTRINSICS_
1086}
1087
1088//------------------------------------------------------------------------------
1089_Use_decl_annotations_
1090inline XMVECTOR PackedVector::XMLoadDec4
1091(
1092 const XMDEC4* pSource
1093)
1094{
1095 assert(pSource);
1096#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1097 static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00};
1098 static const uint32_t SignExtendW[] = {0x00000000, 0xFFFFFFFC};
1099
1100 uint32_t ElementX = pSource->v & 0x3FF;
1101 uint32_t ElementY = (pSource->v >> 10) & 0x3FF;
1102 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF;
1103 uint32_t ElementW = pSource->v >> 30;
1104
1105 XMVECTORF32 vResult = {
1106 (float)(int16_t)(ElementX | SignExtend[ElementX >> 9]),
1107 (float)(int16_t)(ElementY | SignExtend[ElementY >> 9]),
1108 (float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]),
1109 (float)(int16_t)(ElementW | SignExtendW[ElementW >> 1])
1110 };
1111 return vResult.v;
1112#elif defined(_XM_SSE_INTRINSICS_)
1113 // Splat the color in all four entries
1114 XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
1115 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000
1116 vTemp = _mm_and_ps(vTemp,g_XMMaskDec4);
1117 // a is unsigned! Flip the bit to convert the order to signed
1118 vTemp = _mm_xor_ps(vTemp,g_XMXorDec4);
1119 // Convert to floating point numbers
1120 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
1121 // RGB + 0, A + 0x80000000.f to undo the signed order.
1122 vTemp = _mm_add_ps(vTemp,g_XMAddDec4);
1123 // Convert 0-255 to 0.0f-1.0f
1124 vTemp = _mm_mul_ps(vTemp,g_XMMulDec4);
1125 return vTemp;
1126#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1127#endif // _XM_VMX128_INTRINSICS_
1128}
1129
1130//------------------------------------------------------------------------------
1131_Use_decl_annotations_
1132inline XMVECTOR PackedVector::XMLoadUByteN4
1133(
1134 const XMUBYTEN4* pSource
1135)
1136{
1137 assert(pSource);
1138#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1139 XMVECTORF32 vResult = {
1140 (float)pSource->x / 255.0f,
1141 (float)pSource->y / 255.0f,
1142 (float)pSource->z / 255.0f,
1143 (float)pSource->w / 255.0f
1144 };
1145 return vResult.v;
1146#elif defined(_XM_SSE_INTRINSICS_)
1147 static const XMVECTORF32 LoadUByteN4Mul = {1.0f/255.0f,1.0f/(255.0f*256.0f),1.0f/(255.0f*65536.0f),1.0f/(255.0f*65536.0f*256.0f)};
1148 // Splat the color in all four entries (x,z,y,w)
1149 XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x));
1150 // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000
1151 vTemp = _mm_and_ps(vTemp,g_XMMaskByte4);
1152 // w is signed! Flip the bits to convert the order to unsigned
1153 vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
1154 // Convert to floating point numbers
1155 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
1156 // w + 0x80 to complete the conversion
1157 vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
1158 // Fix y, z and w because they are too large
1159 vTemp = _mm_mul_ps(vTemp,LoadUByteN4Mul);
1160 return vTemp;
1161#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1162#endif // _XM_VMX128_INTRINSICS_
1163}
1164
1165//------------------------------------------------------------------------------
1166_Use_decl_annotations_
1167inline XMVECTOR PackedVector::XMLoadUByte4
1168(
1169 const XMUBYTE4* pSource
1170)
1171{
1172 assert(pSource);
1173#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1174 XMVECTORF32 vResult = {
1175 (float)pSource->x,
1176 (float)pSource->y,
1177 (float)pSource->z,
1178 (float)pSource->w
1179 };
1180 return vResult.v;
1181#elif defined(_XM_SSE_INTRINSICS_)
1182 static const XMVECTORF32 LoadUByte4Mul = {1.0f,1.0f/256.0f,1.0f/65536.0f,1.0f/(65536.0f*256.0f)};
1183 // Splat the color in all four entries (x,z,y,w)
1184 XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x));
1185 // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000
1186 vTemp = _mm_and_ps(vTemp,g_XMMaskByte4);
1187 // w is signed! Flip the bits to convert the order to unsigned
1188 vTemp = _mm_xor_ps(vTemp,g_XMFlipW);
1189 // Convert to floating point numbers
1190 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
1191 // w + 0x80 to complete the conversion
1192 vTemp = _mm_add_ps(vTemp,g_XMAddUDec4);
1193 // Fix y, z and w because they are too large
1194 vTemp = _mm_mul_ps(vTemp,LoadUByte4Mul);
1195 return vTemp;
1196#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1197#endif // _XM_VMX128_INTRINSICS_
1198}
1199
1200//------------------------------------------------------------------------------
1201_Use_decl_annotations_
1202inline XMVECTOR PackedVector::XMLoadByteN4
1203(
1204 const XMBYTEN4* pSource
1205)
1206{
1207 assert(pSource);
1208#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1209 XMVECTORF32 vResult = {
1210 (pSource->x == -128) ? -1.f : ((float)pSource->x / 127.0f),
1211 (pSource->y == -128) ? -1.f : ((float)pSource->y / 127.0f),
1212 (pSource->z == -128) ? -1.f : ((float)pSource->z / 127.0f),
1213 (pSource->w == -128) ? -1.f : ((float)pSource->w / 127.0f)
1214 };
1215 return vResult.v;
1216#elif defined(_XM_SSE_INTRINSICS_)
1217 static const XMVECTORF32 LoadByteN4Mul = {1.0f/127.0f,1.0f/(127.0f*256.0f),1.0f/(127.0f*65536.0f),1.0f/(127.0f*65536.0f*256.0f)};
1218 // Splat the color in all four entries (x,z,y,w)
1219 XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x));
1220 // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000
1221 vTemp = _mm_and_ps(vTemp,g_XMMaskByte4);
1222 // x,y and z are unsigned! Flip the bits to convert the order to signed
1223 vTemp = _mm_xor_ps(vTemp,g_XMXorByte4);
1224 // Convert to floating point numbers
1225 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
1226 // x, y and z - 0x80 to complete the conversion
1227 vTemp = _mm_add_ps(vTemp,g_XMAddByte4);
1228 // Fix y, z and w because they are too large
1229 vTemp = _mm_mul_ps(vTemp,LoadByteN4Mul);
1230 // Clamp result (for case of -128)
1231 return _mm_max_ps( vTemp, g_XMNegativeOne );
1232#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1233#endif // _XM_VMX128_INTRINSICS_
1234}
1235
1236//------------------------------------------------------------------------------
1237_Use_decl_annotations_
1238inline XMVECTOR PackedVector::XMLoadByte4
1239(
1240 const XMBYTE4* pSource
1241)
1242{
1243 assert(pSource);
1244#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1245 XMVECTORF32 vResult = {
1246 (float)pSource->x,
1247 (float)pSource->y,
1248 (float)pSource->z,
1249 (float)pSource->w
1250 };
1251 return vResult.v;
1252#elif defined(_XM_SSE_INTRINSICS_)
1253 static const XMVECTORF32 LoadByte4Mul = {1.0f,1.0f/256.0f,1.0f/65536.0f,1.0f/(65536.0f*256.0f)};
1254 // Splat the color in all four entries (x,z,y,w)
1255 XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x));
1256 // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000
1257 vTemp = _mm_and_ps(vTemp,g_XMMaskByte4);
1258 // x,y and z are unsigned! Flip the bits to convert the order to signed
1259 vTemp = _mm_xor_ps(vTemp,g_XMXorByte4);
1260 // Convert to floating point numbers
1261 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp));
1262 // x, y and z - 0x80 to complete the conversion
1263 vTemp = _mm_add_ps(vTemp,g_XMAddByte4);
1264 // Fix y, z and w because they are too large
1265 vTemp = _mm_mul_ps(vTemp,LoadByte4Mul);
1266 return vTemp;
1267#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS)
1268#endif // _XM_VMX128_INTRINSICS_
1269}
1270
1271//------------------------------------------------------------------------------
1272_Use_decl_annotations_
1273inline XMVECTOR PackedVector::XMLoadUNibble4
1274(
1275 const XMUNIBBLE4* pSource
1276)
1277{
1278 assert(pSource);
1279#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
1280 static const XMVECTORI32 UNibble4And = {0xF,0xF0,0xF00,0xF000};
1281 static const XMVECTORF32 UNibble4Mul = {1.0f,1.0f/16.f,1.0f/256.f,1.0f/4096.f};
1282 // Get the 32 bit value and splat it
1283 XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
1284 // Mask off x, y and z
1285 vResult = _mm_and_ps(vResult,UNibble4And);
1286 // Convert to float
1287 vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
1288 // Normalize x, y, and z
1289 vResult = _mm_mul_ps(vResult,UNibble4Mul);
1290 return vResult;
1291#else
1292 XMVECTORF32 vResult = {
1293 float(pSource->v & 0xF),
1294 float((pSource->v >> 4) & 0xF),
1295 float((pSource->v >> 8) & 0xF),
1296 float((pSource->v >> 12) & 0xF)
1297 };
1298 return vResult.v;
1299#endif // !_XM_SSE_INTRISICS_
1300}
1301
1302//------------------------------------------------------------------------------
1303_Use_decl_annotations_
1304inline XMVECTOR PackedVector::XMLoadU555
1305(
1306 const XMU555* pSource
1307)
1308{
1309 assert(pSource);
1310#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
1311 static const XMVECTORI32 U555And = {0x1F,0x1F<<5,0x1F<<10,0x8000};
1312 static const XMVECTORF32 U555Mul = {1.0f,1.0f/32.f,1.0f/1024.f,1.0f/32768.f};
1313 // Get the 32 bit value and splat it
1314 XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v));
1315 // Mask off x, y and z
1316 vResult = _mm_and_ps(vResult,U555And);
1317 // Convert to float
1318 vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
1319 // Normalize x, y, and z
1320 vResult = _mm_mul_ps(vResult,U555Mul);
1321 return vResult;
1322#else
1323 XMVECTORF32 vResult = {
1324 float(pSource->v & 0x1F),
1325 float((pSource->v >> 5) & 0x1F),
1326 float((pSource->v >> 10) & 0x1F),
1327 float((pSource->v >> 15) & 0x1)
1328 };
1329 return vResult.v;
1330#endif // !_XM_SSE_INTRISICS_
1331}
1332
1333
1334/****************************************************************************
1335 *
1336 * Vector and matrix store operations
1337 *
1338 ****************************************************************************/
1339_Use_decl_annotations_
1340inline void PackedVector::XMStoreColor
1341(
1342 XMCOLOR* pDestination,
1343 FXMVECTOR V
1344)
1345{
1346 assert(pDestination);
1347#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1348
1349 static const XMVECTORF32 Scale = {255.0f, 255.0f, 255.0f, 255.0f};
1350
1351 XMVECTOR N = XMVectorSaturate(V);
1352 N = XMVectorMultiply(N, Scale.v);
1353 N = XMVectorRound(N);
1354
1355 XMFLOAT4A tmp;
1356 XMStoreFloat4A( &tmp, N );
1357
1358 pDestination->c = ((uint32_t)tmp.w << 24) |
1359 ((uint32_t)tmp.x << 16) |
1360 ((uint32_t)tmp.y << 8) |
1361 ((uint32_t)tmp.z);
1362
1363#elif defined(_XM_SSE_INTRINSICS_)
1364 static const XMVECTORF32 Scale = {255.0f,255.0f,255.0f,255.0f};
1365 // Set <0 to 0
1366 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
1367 // Set>1 to 1
1368 vResult = _mm_min_ps(vResult,g_XMOne);
1369 // Convert to 0-255
1370 vResult = _mm_mul_ps(vResult,Scale);
1371 // Shuffle RGBA to ARGB
1372 vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,0,1,2));
1373 // Convert to int
1374 __m128i vInt = _mm_cvtps_epi32(vResult);
1375 // Mash to shorts
1376 vInt = _mm_packs_epi32(vInt,vInt);
1377 // Mash to bytes
1378 vInt = _mm_packus_epi16(vInt,vInt);
1379 // Store the color
1380 _mm_store_ss(reinterpret_cast<float *>(&pDestination->c),reinterpret_cast<__m128 *>(&vInt)[0]);
1381#else // _XM_VMX128_INTRINSICS_
1382#endif // _XM_VMX128_INTRINSICS_
1383}
1384
1385//------------------------------------------------------------------------------
1386_Use_decl_annotations_
1387inline void PackedVector::XMStoreHalf2
1388(
1389 XMHALF2* pDestination,
1390 FXMVECTOR V
1391)
1392{
1393 assert(pDestination);
1394#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1395
1396 pDestination->x = XMConvertFloatToHalf(XMVectorGetX(V));
1397 pDestination->y = XMConvertFloatToHalf(XMVectorGetY(V));
1398
1399#else // _XM_VMX128_INTRINSICS_
1400#endif // _XM_VMX128_INTRINSICS_
1401}
1402
1403//------------------------------------------------------------------------------
1404_Use_decl_annotations_
1405inline void PackedVector::XMStoreShortN2
1406(
1407 XMSHORTN2* pDestination,
1408 FXMVECTOR V
1409)
1410{
1411 assert(pDestination);
1412#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1413
1414 static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
1415
1416 XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v);
1417 N = XMVectorMultiply(N, Scale.v);
1418 N = XMVectorRound(N);
1419
1420 XMFLOAT4A tmp;
1421 XMStoreFloat4A( &tmp, N );
1422
1423 pDestination->x = (int16_t)tmp.x;
1424 pDestination->y = (int16_t)tmp.y;
1425
1426#elif defined(_XM_SSE_INTRINSICS_)
1427 static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
1428
1429 XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne);
1430 vResult = _mm_min_ps(vResult,g_XMOne);
1431 vResult = _mm_mul_ps(vResult,Scale);
1432 __m128i vResulti = _mm_cvtps_epi32(vResult);
1433 vResulti = _mm_packs_epi32(vResulti,vResulti);
1434 _mm_store_ss(reinterpret_cast<float *>(&pDestination->x),_mm_castsi128_ps(vResulti));
1435#else // _XM_VMX128_INTRINSICS_
1436#endif // _XM_VMX128_INTRINSICS_
1437}
1438
1439//------------------------------------------------------------------------------
1440_Use_decl_annotations_
1441inline void PackedVector::XMStoreShort2
1442(
1443 XMSHORT2* pDestination,
1444 FXMVECTOR V
1445)
1446{
1447 assert(pDestination);
1448#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1449
1450 static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f};
1451 static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
1452
1453 XMVECTOR N = XMVectorClamp(V, Min, Max);
1454 N = XMVectorRound(N);
1455
1456 XMFLOAT4A tmp;
1457 XMStoreFloat4A( &tmp, N );
1458
1459 pDestination->x = (int16_t)tmp.x;
1460 pDestination->y = (int16_t)tmp.y;
1461
1462#elif defined(_XM_SSE_INTRINSICS_)
1463 static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f};
1464 static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
1465 // Bounds check
1466 XMVECTOR vResult = _mm_max_ps(V,Min);
1467 vResult = _mm_min_ps(vResult,Max);
1468 // Convert to int with rounding
1469 __m128i vInt = _mm_cvtps_epi32(vResult);
1470 // Pack the ints into shorts
1471 vInt = _mm_packs_epi32(vInt,vInt);
1472 _mm_store_ss(reinterpret_cast<float *>(&pDestination->x),_mm_castsi128_ps(vInt));
1473#else // _XM_VMX128_INTRINSICS_
1474#endif // _XM_VMX128_INTRINSICS_
1475}
1476
1477//------------------------------------------------------------------------------
1478_Use_decl_annotations_
1479inline void PackedVector::XMStoreUShortN2
1480(
1481 XMUSHORTN2* pDestination,
1482 FXMVECTOR V
1483)
1484{
1485 assert(pDestination);
1486#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1487
1488 static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
1489
1490 XMVECTOR N = XMVectorSaturate(V);
1491 N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v);
1492 N = XMVectorTruncate(N);
1493
1494 XMFLOAT4A tmp;
1495 XMStoreFloat4A( &tmp, N );
1496
1497 pDestination->x = (int16_t)tmp.x;
1498 pDestination->y = (int16_t)tmp.y;
1499
1500#elif defined(_XM_SSE_INTRINSICS_)
1501 static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
1502 // Bounds check
1503 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
1504 vResult = _mm_min_ps(vResult,g_XMOne);
1505 vResult = _mm_mul_ps(vResult,Scale);
1506 // Convert to int with rounding
1507 __m128i vInt = _mm_cvtps_epi32(vResult);
1508 // Since the SSE pack instruction clamps using signed rules,
1509 // manually extract the values to store them to memory
1510 pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0));
1511 pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
1512#else // _XM_VMX128_INTRINSICS_
1513#endif // _XM_VMX128_INTRINSICS_
1514}
1515
1516//------------------------------------------------------------------------------
1517_Use_decl_annotations_
1518inline void PackedVector::XMStoreUShort2
1519(
1520 XMUSHORT2* pDestination,
1521 FXMVECTOR V
1522)
1523{
1524 assert(pDestination);
1525#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1526
1527 static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
1528
1529 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max);
1530 N = XMVectorRound(N);
1531
1532 XMFLOAT4A tmp;
1533 XMStoreFloat4A( &tmp, N );
1534
1535 pDestination->x = (int16_t)tmp.x;
1536 pDestination->y = (int16_t)tmp.y;
1537
1538#elif defined(_XM_SSE_INTRINSICS_)
1539 static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
1540 // Bounds check
1541 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
1542 vResult = _mm_min_ps(vResult,Max);
1543 // Convert to int with rounding
1544 __m128i vInt = _mm_cvtps_epi32(vResult);
1545 // Since the SSE pack instruction clamps using signed rules,
1546 // manually extract the values to store them to memory
1547 pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0));
1548 pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
1549#else // _XM_VMX128_INTRINSICS_
1550#endif // _XM_VMX128_INTRINSICS_
1551}
1552
1553//------------------------------------------------------------------------------
1554_Use_decl_annotations_
1555inline void PackedVector::XMStoreByteN2
1556(
1557 XMBYTEN2* pDestination,
1558 FXMVECTOR V
1559)
1560{
1561 assert(pDestination);
1562
1563 static const XMVECTORF32 Scale = {127.0f, 127.0f, 127.0f, 127.0f};
1564
1565 XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v);
1566 N = XMVectorMultiply(N, Scale.v);
1567 N = XMVectorRound(N);
1568
1569 XMFLOAT4A tmp;
1570 XMStoreFloat4A( &tmp, N );
1571
1572 pDestination->x = (int8_t)tmp.x;
1573 pDestination->y = (int8_t)tmp.y;
1574}
1575
1576//------------------------------------------------------------------------------
1577_Use_decl_annotations_
1578inline void PackedVector::XMStoreByte2
1579(
1580 XMBYTE2* pDestination,
1581 FXMVECTOR V
1582)
1583{
1584 assert(pDestination);
1585
1586 static const XMVECTORF32 Min = {-127.0f, -127.0f, -127.0f, -127.0f};
1587 static const XMVECTORF32 Max = {127.0f, 127.0f, 127.0f, 127.0f};
1588
1589 XMVECTOR N = XMVectorClamp(V, Min, Max);
1590 N = XMVectorRound(N);
1591
1592 XMFLOAT4A tmp;
1593 XMStoreFloat4A( &tmp, N );
1594
1595 pDestination->x = (int8_t)tmp.x;
1596 pDestination->y = (int8_t)tmp.y;
1597}
1598
1599//------------------------------------------------------------------------------
1600_Use_decl_annotations_
1601inline void PackedVector::XMStoreUByteN2
1602(
1603 XMUBYTEN2* pDestination,
1604 FXMVECTOR V
1605)
1606{
1607 assert(pDestination);
1608
1609 static const XMVECTORF32 Scale = {255.0f, 255.0f, 255.0f, 255.0f};
1610
1611 XMVECTOR N = XMVectorSaturate(V);
1612 N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v);
1613 N = XMVectorTruncate(N);
1614
1615 XMFLOAT4A tmp;
1616 XMStoreFloat4A( &tmp, N );
1617
1618 pDestination->x = (uint8_t)tmp.x;
1619 pDestination->y = (uint8_t)tmp.y;
1620}
1621
1622//------------------------------------------------------------------------------
1623_Use_decl_annotations_
1624inline void PackedVector::XMStoreUByte2
1625(
1626 XMUBYTE2* pDestination,
1627 FXMVECTOR V
1628)
1629{
1630 assert(pDestination);
1631
1632 static const XMVECTORF32 Max = {255.0f, 255.0f, 255.0f, 255.0f};
1633
1634 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max);
1635 N = XMVectorRound(N);
1636
1637 XMFLOAT4A tmp;
1638 XMStoreFloat4A( &tmp, N );
1639
1640 pDestination->x = (uint8_t)tmp.x;
1641 pDestination->y = (uint8_t)tmp.y;
1642}
1643
1644//------------------------------------------------------------------------------
1645_Use_decl_annotations_
1646inline void PackedVector::XMStoreU565
1647(
1648 XMU565* pDestination,
1649 FXMVECTOR V
1650)
1651{
1652 assert(pDestination);
1653#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
1654 static const XMVECTORF32 Max = {31.0f, 63.0f, 31.0f, 0.0f};
1655 // Bounds check
1656 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
1657 vResult = _mm_min_ps(vResult,Max);
1658 // Convert to int with rounding
1659 __m128i vInt = _mm_cvtps_epi32(vResult);
1660 // No SSE operations will write to 16-bit values, so we have to extract them manually
1661 uint16_t x = static_cast<uint16_t>(_mm_extract_epi16(vInt,0));
1662 uint16_t y = static_cast<uint16_t>(_mm_extract_epi16(vInt,2));
1663 uint16_t z = static_cast<uint16_t>(_mm_extract_epi16(vInt,4));
1664 pDestination->v = ((z & 0x1F) << 11) |
1665 ((y & 0x3F) << 5) |
1666 ((x & 0x1F));
1667#else
1668 static const XMVECTORF32 Max = {31.0f, 63.0f, 31.0f, 0.0f};
1669
1670 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v);
1671 N = XMVectorRound(N);
1672
1673 XMFLOAT4A tmp;
1674 XMStoreFloat4A( &tmp, N );
1675
1676 pDestination->v = (((uint16_t)tmp.z & 0x1F) << 11) |
1677 (((uint16_t)tmp.y & 0x3F) << 5) |
1678 (((uint16_t)tmp.x & 0x1F));
1679#endif !_XM_SSE_INTRINSICS_
1680}
1681
1682//------------------------------------------------------------------------------
1683_Use_decl_annotations_
1684inline void PackedVector::XMStoreFloat3PK
1685(
1686 XMFLOAT3PK* pDestination,
1687 FXMVECTOR V
1688)
1689{
1690 assert(pDestination);
1691
1692 __declspec(align(16)) uint32_t IValue[4];
1693 XMStoreFloat3A( reinterpret_cast<XMFLOAT3A*>(&IValue), V );
1694
1695 uint32_t Result[3];
1696
1697 // X & Y Channels (5-bit exponent, 6-bit mantissa)
1698 for(uint32_t j=0; j < 2; ++j)
1699 {
1700 uint32_t Sign = IValue[j] & 0x80000000;
1701 uint32_t I = IValue[j] & 0x7FFFFFFF;
1702
1703 if ((I & 0x7F800000) == 0x7F800000)
1704 {
1705 // INF or NAN
1706 Result[j] = 0x7c0;
1707 if (( I & 0x7FFFFF ) != 0)
1708 {
1709 Result[j] = 0x7c0 | (((I>>17)|(I>11)|(I>>6)|(I))&0x3f);
1710 }
1711 else if ( Sign )
1712 {
1713 // -INF is clamped to 0 since 3PK is positive only
1714 Result[j] = 0;
1715 }
1716 }
1717 else if ( Sign )
1718 {
1719 // 3PK is positive only, so clamp to zero
1720 Result[j] = 0;
1721 }
1722 else if (I > 0x477E0000U)
1723 {
1724 // The number is too large to be represented as a float11, set to max
1725 Result[j] = 0x7BF;
1726 }
1727 else
1728 {
1729 if (I < 0x38800000U)
1730 {
1731 // The number is too small to be represented as a normalized float11
1732 // Convert it to a denormalized value.
1733 uint32_t Shift = 113U - (I >> 23U);
1734 I = (0x800000U | (I & 0x7FFFFFU)) >> Shift;
1735 }
1736 else
1737 {
1738 // Rebias the exponent to represent the value as a normalized float11
1739 I += 0xC8000000U;
1740 }
1741
1742 Result[j] = ((I + 0xFFFFU + ((I >> 17U) & 1U)) >> 17U)&0x7ffU;
1743 }
1744 }
1745
1746 // Z Channel (5-bit exponent, 5-bit mantissa)
1747 uint32_t Sign = IValue[2] & 0x80000000;
1748 uint32_t I = IValue[2] & 0x7FFFFFFF;
1749
1750 if ((I & 0x7F800000) == 0x7F800000)
1751 {
1752 // INF or NAN
1753 Result[2] = 0x3e0;
1754 if ( I & 0x7FFFFF )
1755 {
1756 Result[2] = 0x3e0 | (((I>>18)|(I>13)|(I>>3)|(I))&0x1f);
1757 }
1758 else if ( Sign )
1759 {
1760 // -INF is clamped to 0 since 3PK is positive only
1761 Result[2] = 0;
1762 }
1763 }
1764 else if ( Sign )
1765 {
1766 // 3PK is positive only, so clamp to zero
1767 Result[2] = 0;
1768 }
1769 else if (I > 0x477C0000U)
1770 {
1771 // The number is too large to be represented as a float10, set to max
1772 Result[2] = 0x3df;
1773 }
1774 else
1775 {
1776 if (I < 0x38800000U)
1777 {
1778 // The number is too small to be represented as a normalized float10
1779 // Convert it to a denormalized value.
1780 uint32_t Shift = 113U - (I >> 23U);
1781 I = (0x800000U | (I & 0x7FFFFFU)) >> Shift;
1782 }
1783 else
1784 {
1785 // Rebias the exponent to represent the value as a normalized float10
1786 I += 0xC8000000U;
1787 }
1788
1789 Result[2] = ((I + 0x1FFFFU + ((I >> 18U) & 1U)) >> 18U)&0x3ffU;
1790 }
1791
1792 // Pack Result into memory
1793 pDestination->v = (Result[0] & 0x7ff)
1794 | ( (Result[1] & 0x7ff) << 11 )
1795 | ( (Result[2] & 0x3ff) << 22 );
1796}
1797
1798//------------------------------------------------------------------------------
1799_Use_decl_annotations_
1800inline void PackedVector::XMStoreFloat3SE
1801(
1802 XMFLOAT3SE* pDestination,
1803 FXMVECTOR V
1804)
1805{
1806 assert(pDestination);
1807
1808 __declspec(align(16)) uint32_t IValue[4];
1809 XMStoreFloat3A( reinterpret_cast<XMFLOAT3A*>(&IValue), V );
1810
1811 uint32_t Exp[3];
1812 uint32_t Frac[3];
1813
1814 // X, Y, Z Channels (5-bit exponent, 9-bit mantissa)
1815 for(uint32_t j=0; j < 3; ++j)
1816 {
1817 uint32_t Sign = IValue[j] & 0x80000000;
1818 uint32_t I = IValue[j] & 0x7FFFFFFF;
1819
1820 if ((I & 0x7F800000) == 0x7F800000)
1821 {
1822 // INF or NAN
1823 Exp[j] = 0x1f;
1824 if (( I & 0x7FFFFF ) != 0)
1825 {
1826 Frac[j] = ((I>>14)|(I>5)|(I))&0x1ff;
1827 }
1828 else if ( Sign )
1829 {
1830 // -INF is clamped to 0 since 3SE is positive only
1831 Exp[j] = Frac[j] = 0;
1832 }
1833 }
1834 else if ( Sign )
1835 {
1836 // 3SE is positive only, so clamp to zero
1837 Exp[j] = Frac[j] = 0;
1838 }
1839 else if (I > 0x477FC000U)
1840 {
1841 // The number is too large, set to max
1842 Exp[j] = 0x1e;
1843 Frac[j] = 0x1ff;
1844 }
1845 else
1846 {
1847 if (I < 0x38800000U)
1848 {
1849 // The number is too small to be represented as a normalized float11
1850 // Convert it to a denormalized value.
1851 uint32_t Shift = 113U - (I >> 23U);
1852 I = (0x800000U | (I & 0x7FFFFFU)) >> Shift;
1853 }
1854 else
1855 {
1856 // Rebias the exponent to represent the value as a normalized float11
1857 I += 0xC8000000U;
1858 }
1859
1860 uint32_t T = ((I + 0x1FFFU + ((I >> 14U) & 1U)) >> 14U)&0x3fffU;
1861
1862 Exp[j] = (T & 0x3E00) >> 9;
1863 Frac[j] = T & 0x1ff;
1864 }
1865 }
1866
1867 // Adjust to a shared exponent
1868 uint32_t T = XMMax( Exp[0], XMMax( Exp[1], Exp[2] ) );
1869
1870 Frac[0] = Frac[0] >> (T - Exp[0]);
1871 Frac[1] = Frac[1] >> (T - Exp[1]);
1872 Frac[2] = Frac[2] >> (T - Exp[2]);
1873
1874 // Store packed into memory
1875 pDestination->xm = Frac[0];
1876 pDestination->ym = Frac[1];
1877 pDestination->zm = Frac[2];
1878 pDestination->e = T;
1879}
1880
1881//------------------------------------------------------------------------------
1882_Use_decl_annotations_
1883inline void PackedVector::XMStoreHalf4
1884(
1885 XMHALF4* pDestination,
1886 FXMVECTOR V
1887)
1888{
1889 assert(pDestination);
1890#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
1891
1892 XMFLOAT4A t;
1893 XMStoreFloat4A(&t, V );
1894
1895 pDestination->x = XMConvertFloatToHalf(t.x);
1896 pDestination->y = XMConvertFloatToHalf(t.y);
1897 pDestination->z = XMConvertFloatToHalf(t.z);
1898 pDestination->w = XMConvertFloatToHalf(t.w);
1899
1900#else // _XM_VMX128_INTRINSICS_
1901#endif // _XM_VMX128_INTRINSICS_
1902}
1903
1904//------------------------------------------------------------------------------
1905_Use_decl_annotations_
1906inline void PackedVector::XMStoreShortN4
1907(
1908 XMSHORTN4* pDestination,
1909 FXMVECTOR V
1910)
1911{
1912 assert(pDestination);
1913#if defined(_XM_NO_INTRINSICS_)
1914
1915 static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
1916
1917 XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v);
1918 N = XMVectorMultiply(N, Scale.v);
1919 N = XMVectorRound(N);
1920
1921 XMFLOAT4A tmp;
1922 XMStoreFloat4A(&tmp, N );
1923
1924 pDestination->x = (int16_t)tmp.x;
1925 pDestination->y = (int16_t)tmp.y;
1926 pDestination->z = (int16_t)tmp.z;
1927 pDestination->w = (int16_t)tmp.w;
1928
1929#elif defined(_XM_ARM_NEON_INTRINSICS_)
1930 __n128 vResult = vmaxq_f32( V, g_XMNegativeOne );
1931 vResult = vminq_f32( vResult, g_XMOne );
1932 const __n128 Scale = vdupq_n_f32( 32767.0f );
1933 vResult = vmulq_f32( vResult, Scale );
1934 vResult = vcvtq_s32_f32( vResult );
1935 __n64 vInt = vmovn_s32( vResult );
1936 vst1_s16( (int16_t*)pDestination, vInt );
1937#elif defined(_XM_SSE_INTRINSICS_)
1938 static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
1939
1940 XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne);
1941 vResult = _mm_min_ps(vResult,g_XMOne);
1942 vResult = _mm_mul_ps(vResult,Scale);
1943 __m128i vResulti = _mm_cvtps_epi32(vResult);
1944 vResulti = _mm_packs_epi32(vResulti,vResulti);
1945 _mm_store_sd(reinterpret_cast<double *>(&pDestination->x),_mm_castsi128_pd(vResulti));
1946#else // _XM_VMX128_INTRINSICS_
1947#endif // _XM_VMX128_INTRINSICS_
1948}
1949
1950//------------------------------------------------------------------------------
1951_Use_decl_annotations_
1952inline void PackedVector::XMStoreShort4
1953(
1954 XMSHORT4* pDestination,
1955 FXMVECTOR V
1956)
1957{
1958 assert(pDestination);
1959#if defined(_XM_NO_INTRINSICS_)
1960
1961 static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f};
1962 static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
1963
1964 XMVECTOR N = XMVectorClamp(V, Min, Max);
1965 N = XMVectorRound(N);
1966
1967 XMFLOAT4A tmp;
1968 XMStoreFloat4A(&tmp, N );
1969
1970 pDestination->x = (int16_t)tmp.x;
1971 pDestination->y = (int16_t)tmp.y;
1972 pDestination->z = (int16_t)tmp.z;
1973 pDestination->w = (int16_t)tmp.w;
1974
1975#elif defined(_XM_ARM_NEON_INTRINSICS_)
1976 static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f};
1977 static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
1978
1979 __n128 vResult = vmaxq_f32( V, Min );
1980 vResult = vminq_f32( vResult, Max );
1981 vResult = vcvtq_s32_f32( vResult );
1982 __n64 vInt = vmovn_s32( vResult );
1983 vst1_s16( (int16_t*)pDestination, vInt );
1984#elif defined(_XM_SSE_INTRINSICS_)
1985 static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f};
1986 static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f};
1987 // Bounds check
1988 XMVECTOR vResult = _mm_max_ps(V,Min);
1989 vResult = _mm_min_ps(vResult,Max);
1990 // Convert to int with rounding
1991 __m128i vInt = _mm_cvtps_epi32(vResult);
1992 // Pack the ints into shorts
1993 vInt = _mm_packs_epi32(vInt,vInt);
1994 _mm_store_sd(reinterpret_cast<double *>(&pDestination->x),_mm_castsi128_pd(vInt));
1995#else // _XM_VMX128_INTRINSICS_
1996#endif // _XM_VMX128_INTRINSICS_
1997}
1998
1999//------------------------------------------------------------------------------
2000_Use_decl_annotations_
2001inline void PackedVector::XMStoreUShortN4
2002(
2003 XMUSHORTN4* pDestination,
2004 FXMVECTOR V
2005)
2006{
2007 assert(pDestination);
2008#if defined(_XM_NO_INTRINSICS_)
2009
2010 static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
2011
2012 XMVECTOR N = XMVectorSaturate(V);
2013 N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v);
2014 N = XMVectorTruncate(N);
2015
2016 XMFLOAT4A tmp;
2017 XMStoreFloat4A(&tmp, N );
2018
2019 pDestination->x = (int16_t)tmp.x;
2020 pDestination->y = (int16_t)tmp.y;
2021 pDestination->z = (int16_t)tmp.z;
2022 pDestination->w = (int16_t)tmp.w;
2023
2024#elif defined(_XM_ARM_NEON_INTRINSICS_)
2025 __n128 vResult = vmaxq_f32( V, g_XMZero );
2026 vResult = vminq_f32( vResult, g_XMOne );
2027 const __n128 Scale = vdupq_n_f32( 65535.0f );
2028 vResult = vmulq_f32( vResult, Scale );
2029 vResult = vcvtq_u32_f32( vResult );
2030 __n64 vInt = vmovn_u32( vResult );
2031 vst1_u16( (uint16_t*)pDestination, vInt );
2032#elif defined(_XM_SSE_INTRINSICS_)
2033 static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
2034 // Bounds check
2035 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
2036 vResult = _mm_min_ps(vResult,g_XMOne);
2037 vResult = _mm_mul_ps(vResult,Scale);
2038 // Convert to int with rounding
2039 __m128i vInt = _mm_cvtps_epi32(vResult);
2040 // Since the SSE pack instruction clamps using signed rules,
2041 // manually extract the values to store them to memory
2042 pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0));
2043 pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
2044 pDestination->z = static_cast<int16_t>(_mm_extract_epi16(vInt,4));
2045 pDestination->w = static_cast<int16_t>(_mm_extract_epi16(vInt,6));
2046#else // _XM_VMX128_INTRINSICS_
2047#endif // _XM_VMX128_INTRINSICS_
2048}
2049
2050//------------------------------------------------------------------------------
2051_Use_decl_annotations_
2052inline void PackedVector::XMStoreUShort4
2053(
2054 XMUSHORT4* pDestination,
2055 FXMVECTOR V
2056)
2057{
2058 assert(pDestination);
2059#if defined(_XM_NO_INTRINSICS_)
2060
2061 static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
2062
2063 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max);
2064 N = XMVectorRound(N);
2065
2066 XMFLOAT4A tmp;
2067 XMStoreFloat4A(&tmp, N );
2068
2069 pDestination->x = (int16_t)tmp.x;
2070 pDestination->y = (int16_t)tmp.y;
2071 pDestination->z = (int16_t)tmp.z;
2072 pDestination->w = (int16_t)tmp.w;
2073
2074#elif defined(_XM_ARM_NEON_INTRINSICS_)
2075 static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
2076
2077 __n128 vResult = vmaxq_f32( V, g_XMZero );
2078 vResult = vminq_f32( vResult, Max );
2079 vResult = vcvtq_u32_f32( vResult );
2080 __n64 vInt = vmovn_u32( vResult );
2081 vst1_u16( (uint16_t*)pDestination, vInt );
2082#elif defined(_XM_SSE_INTRINSICS_)
2083 static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f};
2084 // Bounds check
2085 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
2086 vResult = _mm_min_ps(vResult,Max);
2087 // Convert to int with rounding
2088 __m128i vInt = _mm_cvtps_epi32(vResult);
2089 // Since the SSE pack instruction clamps using signed rules,
2090 // manually extract the values to store them to memory
2091 pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0));
2092 pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2));
2093 pDestination->z = static_cast<int16_t>(_mm_extract_epi16(vInt,4));
2094 pDestination->w = static_cast<int16_t>(_mm_extract_epi16(vInt,6));
2095#else // _XM_VMX128_INTRINSICS_
2096#endif // _XM_VMX128_INTRINSICS_
2097}
2098
2099//------------------------------------------------------------------------------
2100_Use_decl_annotations_
2101inline void PackedVector::XMStoreXDecN4
2102(
2103 XMXDECN4* pDestination,
2104 FXMVECTOR V
2105)
2106{
2107 assert(pDestination);
2108#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2109
2110 static const XMVECTORF32 Min = {-1.0f, -1.0f, -1.0f, 0.0f};
2111 static const XMVECTORF32 Scale = {511.0f, 511.0f, 511.0f, 3.0f};
2112
2113 XMVECTOR N = XMVectorClamp(V, Min.v, g_XMOne.v);
2114 N = XMVectorMultiply(N, Scale.v);
2115 N = XMVectorRound(N);
2116
2117 XMFLOAT4A tmp;
2118 XMStoreFloat4A(&tmp, N );
2119
2120 pDestination->v = ((uint32_t)tmp.w << 30) |
2121 (((int32_t)tmp.z & 0x3FF) << 20) |
2122 (((int32_t)tmp.y & 0x3FF) << 10) |
2123 (((int32_t)tmp.x & 0x3FF));
2124
2125#elif defined(_XM_SSE_INTRINSICS_)
2126 static const XMVECTORF32 Min = {-1.0f, -1.0f, -1.0f, 0.0f};
2127 static const XMVECTORF32 Scale = {511.0f, 511.0f*1024.0f, 511.0f*1048576.0f,3.0f*536870912.0f};
2128 static const XMVECTORI32 ScaleMask = {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<29};
2129 XMVECTOR vResult = _mm_max_ps(V,Min);
2130 vResult = _mm_min_ps(vResult,g_XMOne);
2131 // Scale by multiplication
2132 vResult = _mm_mul_ps(vResult,Scale);
2133 // Convert to int (W is unsigned)
2134 __m128i vResulti = _mm_cvtps_epi32(vResult);
2135 // Mask off any fraction
2136 vResulti = _mm_and_si128(vResulti,ScaleMask);
2137 // To fix W, add itself to shift it up to <<30 instead of <<29
2138 __m128i vResultw = _mm_and_si128(vResulti,g_XMMaskW);
2139 vResulti = _mm_add_epi32(vResulti,vResultw);
2140 // Do a horizontal or of all 4 entries
2141 vResult = XM_PERMUTE_PS(_mm_castsi128_ps(vResulti),_MM_SHUFFLE(0,3,2,1));
2142 vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult));
2143 vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,3,2,1));
2144 vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult));
2145 vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,3,2,1));
2146 vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult));
2147 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
2148#else // _XM_VMX128_INTRINSICS_
2149#endif // _XM_VMX128_INTRINSICS_
2150}
2151
2152//------------------------------------------------------------------------------
2153_Use_decl_annotations_
2154inline void PackedVector::XMStoreXDec4
2155(
2156 XMXDEC4* pDestination,
2157 FXMVECTOR V
2158)
2159{
2160 assert(pDestination);
2161#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2162
2163 static const XMVECTORF32 Min = {-511.0f, -511.0f, -511.0f, 0.0f};
2164 static const XMVECTORF32 Max = {511.0f, 511.0f, 511.0f, 3.0f};
2165
2166 XMVECTOR N = XMVectorClamp(V, Min, Max);
2167
2168 XMFLOAT4A tmp;
2169 XMStoreFloat4A(&tmp, N );
2170
2171 pDestination->v = ((uint32_t)tmp.w << 30) |
2172 (((int32_t)tmp.z & 0x3FF) << 20) |
2173 (((int32_t)tmp.y & 0x3FF) << 10) |
2174 (((int32_t)tmp.x & 0x3FF));
2175
2176#elif defined(_XM_SSE_INTRINSICS_)
2177 static const XMVECTORF32 MinXDec4 = {-511.0f,-511.0f,-511.0f, 0.0f};
2178 static const XMVECTORF32 MaxXDec4 = { 511.0f, 511.0f, 511.0f, 3.0f};
2179 static const XMVECTORF32 ScaleXDec4 = {1.0f,1024.0f/2.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f/2.0f};
2180 static const XMVECTORI32 MaskXDec4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)};
2181 // Clamp to bounds
2182 XMVECTOR vResult = _mm_max_ps(V,MinXDec4);
2183 vResult = _mm_min_ps(vResult,MaxXDec4);
2184 // Scale by multiplication
2185 vResult = _mm_mul_ps(vResult,ScaleXDec4);
2186 // Convert to int
2187 __m128i vResulti = _mm_cvttps_epi32(vResult);
2188 // Mask off any fraction
2189 vResulti = _mm_and_si128(vResulti,MaskXDec4);
2190 // Do a horizontal or of 4 entries
2191 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
2192 // x = x|z, y = y|w
2193 vResulti = _mm_or_si128(vResulti,vResulti2);
2194 // Move Z to the x position
2195 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
2196 // Perform a single bit left shift on y|w
2197 vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
2198 // i = x|y|z|w
2199 vResulti = _mm_or_si128(vResulti,vResulti2);
2200 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
2201#else // _XM_VMX128_INTRINSICS_
2202#endif // _XM_VMX128_INTRINSICS_
2203}
2204
2205//------------------------------------------------------------------------------
2206_Use_decl_annotations_
2207inline void PackedVector::XMStoreUDecN4
2208(
2209 XMUDECN4* pDestination,
2210 FXMVECTOR V
2211)
2212{
2213 assert(pDestination);
2214#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2215
2216 static const XMVECTORF32 Scale = {1023.0f, 1023.0f, 1023.0f, 3.0f};
2217
2218 XMVECTOR N = XMVectorSaturate(V);
2219 N = XMVectorMultiply(N, Scale.v);
2220
2221 XMFLOAT4A tmp;
2222 XMStoreFloat4A(&tmp, N );
2223
2224 pDestination->v = ((uint32_t)tmp.w << 30) |
2225 (((uint32_t)tmp.z & 0x3FF) << 20) |
2226 (((uint32_t)tmp.y & 0x3FF) << 10) |
2227 (((uint32_t)tmp.x & 0x3FF));
2228
2229#elif defined(_XM_SSE_INTRINSICS_)
2230 static const XMVECTORF32 ScaleUDecN4 = {1023.0f,1023.0f*1024.0f*0.5f,1023.0f*1024.0f*1024.0f,3.0f*1024.0f*1024.0f*1024.0f*0.5f};
2231 static const XMVECTORI32 MaskUDecN4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)};
2232 // Clamp to bounds
2233 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
2234 vResult = _mm_min_ps(vResult,g_XMOne);
2235 // Scale by multiplication
2236 vResult = _mm_mul_ps(vResult,ScaleUDecN4);
2237 // Convert to int
2238 __m128i vResulti = _mm_cvttps_epi32(vResult);
2239 // Mask off any fraction
2240 vResulti = _mm_and_si128(vResulti,MaskUDecN4);
2241 // Do a horizontal or of 4 entries
2242 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
2243 // x = x|z, y = y|w
2244 vResulti = _mm_or_si128(vResulti,vResulti2);
2245 // Move Z to the x position
2246 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
2247 // Perform a left shift by one bit on y|w
2248 vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
2249 // i = x|y|z|w
2250 vResulti = _mm_or_si128(vResulti,vResulti2);
2251 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
2252#else // _XM_VMX128_INTRINSICS_
2253#endif // _XM_VMX128_INTRINSICS_
2254}
2255
2256//------------------------------------------------------------------------------
2257_Use_decl_annotations_
2258inline void PackedVector::XMStoreUDec4
2259(
2260 XMUDEC4* pDestination,
2261 FXMVECTOR V
2262)
2263{
2264 assert(pDestination);
2265#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2266
2267 static const XMVECTORF32 Max = {1023.0f, 1023.0f, 1023.0f, 3.0f};
2268
2269 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max);
2270
2271 XMFLOAT4A tmp;
2272 XMStoreFloat4A(&tmp, N );
2273
2274 pDestination->v = ((uint32_t)tmp.w << 30) |
2275 (((uint32_t)tmp.z & 0x3FF) << 20) |
2276 (((uint32_t)tmp.y & 0x3FF) << 10) |
2277 (((uint32_t)tmp.x & 0x3FF));
2278
2279#elif defined(_XM_SSE_INTRINSICS_)
2280 static const XMVECTORF32 MaxUDec4 = { 1023.0f, 1023.0f, 1023.0f, 3.0f};
2281 static const XMVECTORF32 ScaleUDec4 = {1.0f,1024.0f/2.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f/2.0f};
2282 static const XMVECTORI32 MaskUDec4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)};
2283 // Clamp to bounds
2284 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
2285 vResult = _mm_min_ps(vResult,MaxUDec4);
2286 // Scale by multiplication
2287 vResult = _mm_mul_ps(vResult,ScaleUDec4);
2288 // Convert to int
2289 __m128i vResulti = _mm_cvttps_epi32(vResult);
2290 // Mask off any fraction
2291 vResulti = _mm_and_si128(vResulti,MaskUDec4);
2292 // Do a horizontal or of 4 entries
2293 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
2294 // x = x|z, y = y|w
2295 vResulti = _mm_or_si128(vResulti,vResulti2);
2296 // Move Z to the x position
2297 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
2298 // Perform a left shift by one bit on y|w
2299 vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
2300 // i = x|y|z|w
2301 vResulti = _mm_or_si128(vResulti,vResulti2);
2302 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
2303#else // _XM_VMX128_INTRINSICS_
2304#endif // _XM_VMX128_INTRINSICS_
2305}
2306
2307//------------------------------------------------------------------------------
2308_Use_decl_annotations_
2309inline void PackedVector::XMStoreDecN4
2310(
2311 XMDECN4* pDestination,
2312 FXMVECTOR V
2313)
2314{
2315 assert(pDestination);
2316#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2317
2318 static const XMVECTORF32 Scale = {511.0f, 511.0f, 511.0f, 1.0f};
2319
2320 XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v);
2321 N = XMVectorMultiply(N, Scale.v);
2322
2323 XMFLOAT4A tmp;
2324 XMStoreFloat4A(&tmp, N );
2325
2326 pDestination->v = ((int32_t)tmp.w << 30) |
2327 (((int32_t)tmp.z & 0x3FF) << 20) |
2328 (((int32_t)tmp.y & 0x3FF) << 10) |
2329 (((int32_t)tmp.x & 0x3FF));
2330
2331#elif defined(_XM_SSE_INTRINSICS_)
2332 static const XMVECTORF32 ScaleDecN4 = {511.0f,511.0f*1024.0f,511.0f*1024.0f*1024.0f,1.0f*1024.0f*1024.0f*1024.0f};
2333 static const XMVECTORI32 MaskDecN4= {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<30};
2334 // Clamp to bounds
2335 XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne);
2336 vResult = _mm_min_ps(vResult,g_XMOne);
2337 // Scale by multiplication
2338 vResult = _mm_mul_ps(vResult,ScaleDecN4);
2339 // Convert to int
2340 __m128i vResulti = _mm_cvttps_epi32(vResult);
2341 // Mask off any fraction
2342 vResulti = _mm_and_si128(vResulti,MaskDecN4);
2343 // Do a horizontal or of 4 entries
2344 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
2345 // x = x|z, y = y|w
2346 vResulti = _mm_or_si128(vResulti,vResulti2);
2347 // Move Z to the x position
2348 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
2349 // i = x|y|z|w
2350 vResulti = _mm_or_si128(vResulti,vResulti2);
2351 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
2352#else // _XM_VMX128_INTRINSICS_
2353#endif // _XM_VMX128_INTRINSICS_
2354}
2355
2356//------------------------------------------------------------------------------
2357_Use_decl_annotations_
2358inline void PackedVector::XMStoreDec4
2359(
2360 XMDEC4* pDestination,
2361 FXMVECTOR V
2362)
2363{
2364 assert(pDestination);
2365#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2366
2367 static const XMVECTORF32 Min = {-511.0f, -511.0f, -511.0f, -1.0f};
2368 static const XMVECTORF32 Max = {511.0f, 511.0f, 511.0f, 1.0f};
2369
2370 XMVECTOR N = XMVectorClamp(V, Min, Max);
2371
2372 XMFLOAT4A tmp;
2373 XMStoreFloat4A(&tmp, N );
2374
2375 pDestination->v = ((int32_t)tmp.w << 30) |
2376 (((int32_t)tmp.z & 0x3FF) << 20) |
2377 (((int32_t)tmp.y & 0x3FF) << 10) |
2378 (((int32_t)tmp.x & 0x3FF));
2379
2380#elif defined(_XM_SSE_INTRINSICS_)
2381 static const XMVECTORF32 MinDec4 = {-511.0f,-511.0f,-511.0f,-1.0f};
2382 static const XMVECTORF32 MaxDec4 = { 511.0f, 511.0f, 511.0f, 1.0f};
2383 static const XMVECTORF32 ScaleDec4 = {1.0f,1024.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f};
2384 static const XMVECTORI32 MaskDec4= {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<30};
2385 // Clamp to bounds
2386 XMVECTOR vResult = _mm_max_ps(V,MinDec4);
2387 vResult = _mm_min_ps(vResult,MaxDec4);
2388 // Scale by multiplication
2389 vResult = _mm_mul_ps(vResult,ScaleDec4);
2390 // Convert to int
2391 __m128i vResulti = _mm_cvttps_epi32(vResult);
2392 // Mask off any fraction
2393 vResulti = _mm_and_si128(vResulti,MaskDec4);
2394 // Do a horizontal or of 4 entries
2395 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
2396 // x = x|z, y = y|w
2397 vResulti = _mm_or_si128(vResulti,vResulti2);
2398 // Move Z to the x position
2399 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
2400 // i = x|y|z|w
2401 vResulti = _mm_or_si128(vResulti,vResulti2);
2402 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
2403#else // _XM_VMX128_INTRINSICS_
2404#endif // _XM_VMX128_INTRINSICS_
2405}
2406
2407//------------------------------------------------------------------------------
2408_Use_decl_annotations_
2409inline void PackedVector::XMStoreUByteN4
2410(
2411 XMUBYTEN4* pDestination,
2412 FXMVECTOR V
2413)
2414{
2415 assert(pDestination);
2416#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2417
2418 static const XMVECTORF32 Scale = {255.0f, 255.0f, 255.0f, 255.0f};
2419
2420 XMVECTOR N = XMVectorSaturate(V);
2421 N = XMVectorMultiply(N, Scale.v);
2422 N = XMVectorRound(N);
2423
2424 XMFLOAT4A tmp;
2425 XMStoreFloat4A(&tmp, N );
2426
2427 pDestination->x = (uint8_t)tmp.x;
2428 pDestination->y = (uint8_t)tmp.y;
2429 pDestination->z = (uint8_t)tmp.z;
2430 pDestination->w = (uint8_t)tmp.w;
2431
2432#elif defined(_XM_SSE_INTRINSICS_)
2433 static const XMVECTORF32 ScaleUByteN4 = {255.0f,255.0f*256.0f*0.5f,255.0f*256.0f*256.0f,255.0f*256.0f*256.0f*256.0f*0.5f};
2434 static const XMVECTORI32 MaskUByteN4 = {0xFF,0xFF<<(8-1),0xFF<<16,0xFF<<(24-1)};
2435 // Clamp to bounds
2436 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
2437 vResult = _mm_min_ps(vResult,g_XMOne);
2438 // Scale by multiplication
2439 vResult = _mm_mul_ps(vResult,ScaleUByteN4);
2440 // Convert to int
2441 __m128i vResulti = _mm_cvttps_epi32(vResult);
2442 // Mask off any fraction
2443 vResulti = _mm_and_si128(vResulti,MaskUByteN4);
2444 // Do a horizontal or of 4 entries
2445 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
2446 // x = x|z, y = y|w
2447 vResulti = _mm_or_si128(vResulti,vResulti2);
2448 // Move Z to the x position
2449 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
2450 // Perform a single bit left shift to fix y|w
2451 vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
2452 // i = x|y|z|w
2453 vResulti = _mm_or_si128(vResulti,vResulti2);
2454 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
2455#else // _XM_VMX128_INTRINSICS_
2456#endif // _XM_VMX128_INTRINSICS_
2457}
2458
2459//------------------------------------------------------------------------------
2460_Use_decl_annotations_
2461inline void PackedVector::XMStoreUByte4
2462(
2463 XMUBYTE4* pDestination,
2464 FXMVECTOR V
2465)
2466{
2467 assert(pDestination);
2468#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2469
2470 static const XMVECTORF32 Max = {255.0f, 255.0f, 255.0f, 255.0f};
2471
2472 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max);
2473 N = XMVectorRound(N);
2474
2475 XMFLOAT4A tmp;
2476 XMStoreFloat4A(&tmp, N );
2477
2478 pDestination->x = (uint8_t)tmp.x;
2479 pDestination->y = (uint8_t)tmp.y;
2480 pDestination->z = (uint8_t)tmp.z;
2481 pDestination->w = (uint8_t)tmp.w;
2482
2483#elif defined(_XM_SSE_INTRINSICS_)
2484 static const XMVECTORF32 MaxUByte4 = { 255.0f, 255.0f, 255.0f, 255.0f};
2485 static const XMVECTORF32 ScaleUByte4 = {1.0f,256.0f*0.5f,256.0f*256.0f,256.0f*256.0f*256.0f*0.5f};
2486 static const XMVECTORI32 MaskUByte4 = {0xFF,0xFF<<(8-1),0xFF<<16,0xFF<<(24-1)};
2487 // Clamp to bounds
2488 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
2489 vResult = _mm_min_ps(vResult,MaxUByte4);
2490 // Scale by multiplication
2491 vResult = _mm_mul_ps(vResult,ScaleUByte4);
2492 // Convert to int
2493 __m128i vResulti = _mm_cvttps_epi32(vResult);
2494 // Mask off any fraction
2495 vResulti = _mm_and_si128(vResulti,MaskUByte4);
2496 // Do a horizontal or of 4 entries
2497 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
2498 // x = x|z, y = y|w
2499 vResulti = _mm_or_si128(vResulti,vResulti2);
2500 // Move Z to the x position
2501 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
2502 // Perform a single bit left shift to fix y|w
2503 vResulti2 = _mm_add_epi32(vResulti2,vResulti2);
2504 // i = x|y|z|w
2505 vResulti = _mm_or_si128(vResulti,vResulti2);
2506 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
2507#else // _XM_VMX128_INTRINSICS_
2508#endif // _XM_VMX128_INTRINSICS_
2509}
2510
2511//------------------------------------------------------------------------------
2512_Use_decl_annotations_
2513inline void PackedVector::XMStoreByteN4
2514(
2515 XMBYTEN4* pDestination,
2516 FXMVECTOR V
2517)
2518{
2519 assert(pDestination);
2520#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2521
2522 static const XMVECTORF32 Scale = {127.0f, 127.0f, 127.0f, 127.0f};
2523
2524 XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v);
2525 N = XMVectorMultiply(V, Scale.v);
2526 N = XMVectorRound(N);
2527
2528 XMFLOAT4A tmp;
2529 XMStoreFloat4A(&tmp, N );
2530
2531 pDestination->x = (int8_t)tmp.x;
2532 pDestination->y = (int8_t)tmp.y;
2533 pDestination->z = (int8_t)tmp.z;
2534 pDestination->w = (int8_t)tmp.w;
2535
2536#elif defined(_XM_SSE_INTRINSICS_)
2537 static const XMVECTORF32 ScaleByteN4 = {127.0f,127.0f*256.0f,127.0f*256.0f*256.0f,127.0f*256.0f*256.0f*256.0f};
2538 static const XMVECTORI32 MaskByteN4 = {0xFF,0xFF<<8,0xFF<<16,0xFF<<24};
2539 // Clamp to bounds
2540 XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne);
2541 vResult = _mm_min_ps(vResult,g_XMOne);
2542 // Scale by multiplication
2543 vResult = _mm_mul_ps(vResult,ScaleByteN4);
2544 // Convert to int
2545 __m128i vResulti = _mm_cvttps_epi32(vResult);
2546 // Mask off any fraction
2547 vResulti = _mm_and_si128(vResulti,MaskByteN4);
2548 // Do a horizontal or of 4 entries
2549 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
2550 // x = x|z, y = y|w
2551 vResulti = _mm_or_si128(vResulti,vResulti2);
2552 // Move Z to the x position
2553 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
2554 // i = x|y|z|w
2555 vResulti = _mm_or_si128(vResulti,vResulti2);
2556 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
2557#else // _XM_VMX128_INTRINSICS_
2558#endif // _XM_VMX128_INTRINSICS_
2559}
2560
2561//------------------------------------------------------------------------------
2562_Use_decl_annotations_
2563inline void PackedVector::XMStoreByte4
2564(
2565 XMBYTE4* pDestination,
2566 FXMVECTOR V
2567)
2568{
2569 assert(pDestination);
2570#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
2571
2572 static const XMVECTORF32 Min = {-127.0f, -127.0f, -127.0f, -127.0f};
2573 static const XMVECTORF32 Max = {127.0f, 127.0f, 127.0f, 127.0f};
2574
2575 XMVECTOR N = XMVectorClamp(V, Min, Max);
2576 N = XMVectorRound(N);
2577
2578 XMFLOAT4A tmp;
2579 XMStoreFloat4A(&tmp, N );
2580
2581 pDestination->x = (int8_t)tmp.x;
2582 pDestination->y = (int8_t)tmp.y;
2583 pDestination->z = (int8_t)tmp.z;
2584 pDestination->w = (int8_t)tmp.w;
2585
2586#elif defined(_XM_SSE_INTRINSICS_)
2587 static const XMVECTORF32 MinByte4 = {-127.0f,-127.0f,-127.0f,-127.0f};
2588 static const XMVECTORF32 MaxByte4 = { 127.0f, 127.0f, 127.0f, 127.0f};
2589 static const XMVECTORF32 ScaleByte4 = {1.0f,256.0f,256.0f*256.0f,256.0f*256.0f*256.0f};
2590 static const XMVECTORI32 MaskByte4 = {0xFF,0xFF<<8,0xFF<<16,0xFF<<24};
2591 // Clamp to bounds
2592 XMVECTOR vResult = _mm_max_ps(V,MinByte4);
2593 vResult = _mm_min_ps(vResult,MaxByte4);
2594 // Scale by multiplication
2595 vResult = _mm_mul_ps(vResult,ScaleByte4);
2596 // Convert to int
2597 __m128i vResulti = _mm_cvttps_epi32(vResult);
2598 // Mask off any fraction
2599 vResulti = _mm_and_si128(vResulti,MaskByte4);
2600 // Do a horizontal or of 4 entries
2601 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2));
2602 // x = x|z, y = y|w
2603 vResulti = _mm_or_si128(vResulti,vResulti2);
2604 // Move Z to the x position
2605 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1));
2606 // i = x|y|z|w
2607 vResulti = _mm_or_si128(vResulti,vResulti2);
2608 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti));
2609#else // _XM_VMX128_INTRINSICS_
2610#endif // _XM_VMX128_INTRINSICS_
2611}
2612
2613//------------------------------------------------------------------------------
2614_Use_decl_annotations_
2615inline void PackedVector::XMStoreUNibble4
2616(
2617 XMUNIBBLE4* pDestination,
2618 FXMVECTOR V
2619)
2620{
2621 assert(pDestination);
2622#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
2623 static const XMVECTORF32 Max = {15.0f,15.0f,15.0f,15.0f};
2624 // Bounds check
2625 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
2626 vResult = _mm_min_ps(vResult,Max);
2627 // Convert to int with rounding
2628 __m128i vInt = _mm_cvtps_epi32(vResult);
2629 // No SSE operations will write to 16-bit values, so we have to extract them manually
2630 uint16_t x = static_cast<uint16_t>(_mm_extract_epi16(vInt,0));
2631 uint16_t y = static_cast<uint16_t>(_mm_extract_epi16(vInt,2));
2632 uint16_t z = static_cast<uint16_t>(_mm_extract_epi16(vInt,4));
2633 uint16_t w = static_cast<uint16_t>(_mm_extract_epi16(vInt,6));
2634 pDestination->v = ((w & 0xF) << 12) |
2635 ((z & 0xF) << 8) |
2636 ((y & 0xF) << 4) |
2637 ((x & 0xF));
2638#else
2639 static const XMVECTORF32 Max = {15.0f,15.0f,15.0f,15.0f};
2640
2641 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v);
2642 N = XMVectorRound(N);
2643
2644 XMFLOAT4A tmp;
2645 XMStoreFloat4A(&tmp, N );
2646
2647 pDestination->v = (((uint16_t)tmp.w & 0xF) << 12) |
2648 (((uint16_t)tmp.z & 0xF) << 8) |
2649 (((uint16_t)tmp.y & 0xF) << 4) |
2650 (((uint16_t)tmp.x & 0xF));
2651#endif !_XM_SSE_INTRINSICS_
2652}
2653
2654//------------------------------------------------------------------------------
2655_Use_decl_annotations_
2656inline void PackedVector::XMStoreU555
2657(
2658 XMU555* pDestination,
2659 FXMVECTOR V
2660)
2661{
2662 assert(pDestination);
2663#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_)
2664 static const XMVECTORF32 Max = {31.0f, 31.0f, 31.0f, 1.0f};
2665 // Bounds check
2666 XMVECTOR vResult = _mm_max_ps(V,g_XMZero);
2667 vResult = _mm_min_ps(vResult,Max);
2668 // Convert to int with rounding
2669 __m128i vInt = _mm_cvtps_epi32(vResult);
2670 // No SSE operations will write to 16-bit values, so we have to extract them manually
2671 uint16_t x = static_cast<uint16_t>(_mm_extract_epi16(vInt,0));
2672 uint16_t y = static_cast<uint16_t>(_mm_extract_epi16(vInt,2));
2673 uint16_t z = static_cast<uint16_t>(_mm_extract_epi16(vInt,4));
2674 uint16_t w = static_cast<uint16_t>(_mm_extract_epi16(vInt,6));
2675 pDestination->v = ((w) ? 0x8000 : 0) |
2676 ((z & 0x1F) << 10) |
2677 ((y & 0x1F) << 5) |
2678 ((x & 0x1F));
2679#else
2680 static const XMVECTORF32 Max = {31.0f, 31.0f, 31.0f, 1.0f};
2681
2682 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v);
2683 N = XMVectorRound(N);
2684
2685 XMFLOAT4A tmp;
2686 XMStoreFloat4A(&tmp, N );
2687
2688 pDestination->v = ((tmp.w > 0.f) ? 0x8000 : 0) |
2689 (((uint16_t)tmp.z & 0x1F) << 10) |
2690 (((uint16_t)tmp.y & 0x1F) << 5) |
2691 (((uint16_t)tmp.x & 0x1F));
2692#endif !_XM_SSE_INTRINSICS_
2693}
2694
2695
2696/****************************************************************************
2697 *
2698 * XMCOLOR operators
2699 *
2700 ****************************************************************************/
2701
2702//------------------------------------------------------------------------------
2703
2704inline PackedVector::XMCOLOR::XMCOLOR
2705(
2706 float _r,
2707 float _g,
2708 float _b,
2709 float _a
2710)
2711{
2712 XMStoreColor(this, XMVectorSet(_r, _g, _b, _a));
2713}
2714
2715//------------------------------------------------------------------------------
2716_Use_decl_annotations_
2717inline PackedVector::XMCOLOR::XMCOLOR
2718(
2719 const float* pArray
2720)
2721{
2722 XMStoreColor(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
2723}
2724
2725/****************************************************************************
2726 *
2727 * XMHALF2 operators
2728 *
2729 ****************************************************************************/
2730
2731//------------------------------------------------------------------------------
2732
2733inline PackedVector::XMHALF2::XMHALF2
2734(
2735 float _x,
2736 float _y
2737)
2738{
2739 x = XMConvertFloatToHalf(_x);
2740 y = XMConvertFloatToHalf(_y);
2741}
2742
2743//------------------------------------------------------------------------------
2744_Use_decl_annotations_
2745inline PackedVector::XMHALF2::XMHALF2
2746(
2747 const float* pArray
2748)
2749{
2750 assert( pArray != nullptr );
2751 x = XMConvertFloatToHalf(pArray[0]);
2752 y = XMConvertFloatToHalf(pArray[1]);
2753}
2754
2755/****************************************************************************
2756 *
2757 * XMSHORTN2 operators
2758 *
2759 ****************************************************************************/
2760
2761//------------------------------------------------------------------------------
2762
2763inline PackedVector::XMSHORTN2::XMSHORTN2
2764(
2765 float _x,
2766 float _y
2767)
2768{
2769 XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
2770}
2771
2772//------------------------------------------------------------------------------
2773_Use_decl_annotations_
2774inline PackedVector::XMSHORTN2::XMSHORTN2
2775(
2776 const float* pArray
2777)
2778{
2779 XMStoreShortN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
2780}
2781
2782/****************************************************************************
2783 *
2784 * XMSHORT2 operators
2785 *
2786 ****************************************************************************/
2787
2788//------------------------------------------------------------------------------
2789
2790inline PackedVector::XMSHORT2::XMSHORT2
2791(
2792 float _x,
2793 float _y
2794)
2795{
2796 XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
2797}
2798
2799//------------------------------------------------------------------------------
2800_Use_decl_annotations_
2801inline PackedVector::XMSHORT2::XMSHORT2
2802(
2803 const float* pArray
2804)
2805{
2806 XMStoreShort2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
2807}
2808
2809/****************************************************************************
2810 *
2811 * XMUSHORTN2 operators
2812 *
2813 ****************************************************************************/
2814
2815//------------------------------------------------------------------------------
2816
2817inline PackedVector::XMUSHORTN2::XMUSHORTN2
2818(
2819 float _x,
2820 float _y
2821)
2822{
2823 XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
2824}
2825
2826//------------------------------------------------------------------------------
2827_Use_decl_annotations_
2828inline PackedVector::XMUSHORTN2::XMUSHORTN2
2829(
2830 const float* pArray
2831)
2832{
2833 XMStoreUShortN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
2834}
2835
2836/****************************************************************************
2837 *
2838 * XMUSHORT2 operators
2839 *
2840 ****************************************************************************/
2841
2842//------------------------------------------------------------------------------
2843
2844inline PackedVector::XMUSHORT2::XMUSHORT2
2845(
2846 float _x,
2847 float _y
2848)
2849{
2850 XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
2851}
2852
2853//------------------------------------------------------------------------------
2854_Use_decl_annotations_
2855inline PackedVector::XMUSHORT2::XMUSHORT2
2856(
2857 const float* pArray
2858)
2859{
2860 XMStoreUShort2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
2861}
2862
2863/****************************************************************************
2864 *
2865 * XMBYTEN2 operators
2866 *
2867 ****************************************************************************/
2868
2869//------------------------------------------------------------------------------
2870
2871inline PackedVector::XMBYTEN2::XMBYTEN2
2872(
2873 float _x,
2874 float _y
2875)
2876{
2877 XMStoreByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
2878}
2879
2880//------------------------------------------------------------------------------
2881_Use_decl_annotations_
2882inline PackedVector::XMBYTEN2::XMBYTEN2
2883(
2884 const float* pArray
2885)
2886{
2887 XMStoreByteN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
2888}
2889
2890/****************************************************************************
2891 *
2892 * XMBYTE2 operators
2893 *
2894 ****************************************************************************/
2895
2896//------------------------------------------------------------------------------
2897
2898inline PackedVector::XMBYTE2::XMBYTE2
2899(
2900 float _x,
2901 float _y
2902)
2903{
2904 XMStoreByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
2905}
2906
2907//------------------------------------------------------------------------------
2908_Use_decl_annotations_
2909inline PackedVector::XMBYTE2::XMBYTE2
2910(
2911 const float* pArray
2912)
2913{
2914 XMStoreByte2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
2915}
2916
2917/****************************************************************************
2918 *
2919 * XMUBYTEN2 operators
2920 *
2921 ****************************************************************************/
2922
2923//------------------------------------------------------------------------------
2924
2925inline PackedVector::XMUBYTEN2::XMUBYTEN2
2926(
2927 float _x,
2928 float _y
2929)
2930{
2931 XMStoreUByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
2932}
2933
2934//------------------------------------------------------------------------------
2935_Use_decl_annotations_
2936inline PackedVector::XMUBYTEN2::XMUBYTEN2
2937(
2938 const float* pArray
2939)
2940{
2941 XMStoreUByteN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
2942}
2943
2944/****************************************************************************
2945 *
2946 * XMUBYTE2 operators
2947 *
2948 ****************************************************************************/
2949
2950//------------------------------------------------------------------------------
2951
2952inline PackedVector::XMUBYTE2::XMUBYTE2
2953(
2954 float _x,
2955 float _y
2956)
2957{
2958 XMStoreUByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f));
2959}
2960
2961//------------------------------------------------------------------------------
2962_Use_decl_annotations_
2963inline PackedVector::XMUBYTE2::XMUBYTE2
2964(
2965 const float* pArray
2966)
2967{
2968 XMStoreUByte2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray)));
2969}
2970
2971/****************************************************************************
2972 *
2973 * XMU565 operators
2974 *
2975 ****************************************************************************/
2976
2977inline PackedVector::XMU565::XMU565
2978(
2979 float _x,
2980 float _y,
2981 float _z
2982)
2983{
2984 XMStoreU565(this, XMVectorSet( _x, _y, _z, 0.0f ));
2985}
2986
2987_Use_decl_annotations_
2988inline PackedVector::XMU565::XMU565
2989(
2990 const float *pArray
2991)
2992{
2993 XMStoreU565(this, XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray)));
2994}
2995
2996/****************************************************************************
2997 *
2998 * XMFLOAT3PK operators
2999 *
3000 ****************************************************************************/
3001
3002inline PackedVector::XMFLOAT3PK::XMFLOAT3PK
3003(
3004 float _x,
3005 float _y,
3006 float _z
3007)
3008{
3009 XMStoreFloat3PK(this, XMVectorSet( _x, _y, _z, 0.0f ));
3010}
3011
3012_Use_decl_annotations_
3013inline PackedVector::XMFLOAT3PK::XMFLOAT3PK
3014(
3015 const float *pArray
3016)
3017{
3018 XMStoreFloat3PK(this, XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray)));
3019}
3020
3021/****************************************************************************
3022 *
3023 * XMFLOAT3SE operators
3024 *
3025 ****************************************************************************/
3026
3027inline PackedVector::XMFLOAT3SE::XMFLOAT3SE
3028(
3029 float _x,
3030 float _y,
3031 float _z
3032)
3033{
3034 XMStoreFloat3SE(this, XMVectorSet( _x, _y, _z, 0.0f ));
3035}
3036
3037_Use_decl_annotations_
3038inline PackedVector::XMFLOAT3SE::XMFLOAT3SE
3039(
3040 const float *pArray
3041)
3042{
3043 XMStoreFloat3SE(this, XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray)));
3044}
3045
3046/****************************************************************************
3047 *
3048 * XMHALF4 operators
3049 *
3050 ****************************************************************************/
3051
3052//------------------------------------------------------------------------------
3053
3054inline PackedVector::XMHALF4::XMHALF4
3055(
3056 float _x,
3057 float _y,
3058 float _z,
3059 float _w
3060)
3061{
3062 x = XMConvertFloatToHalf(_x);
3063 y = XMConvertFloatToHalf(_y);
3064 z = XMConvertFloatToHalf(_z);
3065 w = XMConvertFloatToHalf(_w);
3066}
3067
3068//------------------------------------------------------------------------------
3069
3070_Use_decl_annotations_
3071inline PackedVector::XMHALF4::XMHALF4
3072(
3073 const float* pArray
3074)
3075{
3076 XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(float), 4);
3077}
3078
3079/****************************************************************************
3080 *
3081 * XMSHORTN4 operators
3082 *
3083 ****************************************************************************/
3084
3085//------------------------------------------------------------------------------
3086
3087inline PackedVector::XMSHORTN4::XMSHORTN4
3088(
3089 float _x,
3090 float _y,
3091 float _z,
3092 float _w
3093)
3094{
3095 XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w));
3096}
3097
3098//------------------------------------------------------------------------------
3099_Use_decl_annotations_
3100inline PackedVector::XMSHORTN4::XMSHORTN4
3101(
3102 const float* pArray
3103)
3104{
3105 XMStoreShortN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3106}
3107
3108/****************************************************************************
3109 *
3110 * XMSHORT4 operators
3111 *
3112 ****************************************************************************/
3113
3114//------------------------------------------------------------------------------
3115
3116inline PackedVector::XMSHORT4::XMSHORT4
3117(
3118 float _x,
3119 float _y,
3120 float _z,
3121 float _w
3122)
3123{
3124 XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w));
3125}
3126
3127//------------------------------------------------------------------------------
3128_Use_decl_annotations_
3129inline PackedVector::XMSHORT4::XMSHORT4
3130(
3131 const float* pArray
3132)
3133{
3134 XMStoreShort4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3135}
3136
3137/****************************************************************************
3138 *
3139 * XMUSHORTN4 operators
3140 *
3141 ****************************************************************************/
3142
3143//------------------------------------------------------------------------------
3144
3145inline PackedVector::XMUSHORTN4::XMUSHORTN4
3146(
3147 float _x,
3148 float _y,
3149 float _z,
3150 float _w
3151)
3152{
3153 XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w));
3154}
3155
3156//------------------------------------------------------------------------------
3157_Use_decl_annotations_
3158inline PackedVector::XMUSHORTN4::XMUSHORTN4
3159(
3160 const float* pArray
3161)
3162{
3163 XMStoreUShortN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3164}
3165
3166/****************************************************************************
3167 *
3168 * XMUSHORT4 operators
3169 *
3170 ****************************************************************************/
3171
3172//------------------------------------------------------------------------------
3173
3174inline PackedVector::XMUSHORT4::XMUSHORT4
3175(
3176 float _x,
3177 float _y,
3178 float _z,
3179 float _w
3180)
3181{
3182 XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w));
3183}
3184
3185//------------------------------------------------------------------------------
3186_Use_decl_annotations_
3187inline PackedVector::XMUSHORT4::XMUSHORT4
3188(
3189 const float* pArray
3190)
3191{
3192 XMStoreUShort4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3193}
3194
3195/****************************************************************************
3196 *
3197 * XMXDECN4 operators
3198 *
3199 ****************************************************************************/
3200
3201//------------------------------------------------------------------------------
3202
3203inline PackedVector::XMXDECN4::XMXDECN4
3204(
3205 float _x,
3206 float _y,
3207 float _z,
3208 float _w
3209)
3210{
3211 XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w));
3212}
3213
3214//------------------------------------------------------------------------------
3215_Use_decl_annotations_
3216inline PackedVector::XMXDECN4::XMXDECN4
3217(
3218 const float* pArray
3219)
3220{
3221 XMStoreXDecN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3222}
3223
3224/****************************************************************************
3225 *
3226 * XMXDEC4 operators
3227 *
3228 ****************************************************************************/
3229
3230//------------------------------------------------------------------------------
3231
3232inline PackedVector::XMXDEC4::XMXDEC4
3233(
3234 float _x,
3235 float _y,
3236 float _z,
3237 float _w
3238)
3239{
3240 XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w));
3241}
3242
3243//------------------------------------------------------------------------------
3244_Use_decl_annotations_
3245inline PackedVector::XMXDEC4::XMXDEC4
3246(
3247 const float* pArray
3248)
3249{
3250 XMStoreXDec4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3251}
3252
3253/****************************************************************************
3254 *
3255 * XMDECN4 operators
3256 *
3257 ****************************************************************************/
3258
3259//------------------------------------------------------------------------------
3260
3261inline PackedVector::XMDECN4::XMDECN4
3262(
3263 float _x,
3264 float _y,
3265 float _z,
3266 float _w
3267)
3268{
3269 XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w));
3270}
3271
3272//------------------------------------------------------------------------------
3273_Use_decl_annotations_
3274inline PackedVector::XMDECN4::XMDECN4
3275(
3276 const float* pArray
3277)
3278{
3279 XMStoreDecN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3280}
3281
3282/****************************************************************************
3283 *
3284 * XMDEC4 operators
3285 *
3286 ****************************************************************************/
3287
3288//------------------------------------------------------------------------------
3289
3290inline PackedVector::XMDEC4::XMDEC4
3291(
3292 float _x,
3293 float _y,
3294 float _z,
3295 float _w
3296)
3297{
3298 XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w));
3299}
3300
3301//------------------------------------------------------------------------------
3302_Use_decl_annotations_
3303inline PackedVector::XMDEC4::XMDEC4
3304(
3305 const float* pArray
3306)
3307{
3308 XMStoreDec4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3309}
3310
3311/****************************************************************************
3312 *
3313 * XMUDECN4 operators
3314 *
3315 ****************************************************************************/
3316
3317//------------------------------------------------------------------------------
3318
3319inline PackedVector::XMUDECN4::XMUDECN4
3320(
3321 float _x,
3322 float _y,
3323 float _z,
3324 float _w
3325)
3326{
3327 XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w));
3328}
3329
3330//------------------------------------------------------------------------------
3331_Use_decl_annotations_
3332inline PackedVector::XMUDECN4::XMUDECN4
3333(
3334 const float* pArray
3335)
3336{
3337 XMStoreUDecN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3338}
3339
3340/****************************************************************************
3341 *
3342 * XMUDEC4 operators
3343 *
3344 ****************************************************************************/
3345
3346//------------------------------------------------------------------------------
3347
3348inline PackedVector::XMUDEC4::XMUDEC4
3349(
3350 float _x,
3351 float _y,
3352 float _z,
3353 float _w
3354)
3355{
3356 XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w));
3357}
3358
3359//------------------------------------------------------------------------------
3360_Use_decl_annotations_
3361inline PackedVector::XMUDEC4::XMUDEC4
3362(
3363 const float* pArray
3364)
3365{
3366 XMStoreUDec4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3367}
3368
3369/****************************************************************************
3370 *
3371 * XMBYTEN4 operators
3372 *
3373 ****************************************************************************/
3374
3375//------------------------------------------------------------------------------
3376
3377inline PackedVector::XMBYTEN4::XMBYTEN4
3378(
3379 float _x,
3380 float _y,
3381 float _z,
3382 float _w
3383)
3384{
3385 XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w));
3386}
3387
3388//------------------------------------------------------------------------------
3389_Use_decl_annotations_
3390inline PackedVector::XMBYTEN4::XMBYTEN4
3391(
3392 const float* pArray
3393)
3394{
3395 XMStoreByteN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3396}
3397
3398/****************************************************************************
3399 *
3400 * XMBYTE4 operators
3401 *
3402 ****************************************************************************/
3403
3404//------------------------------------------------------------------------------
3405
3406inline PackedVector::XMBYTE4::XMBYTE4
3407(
3408 float _x,
3409 float _y,
3410 float _z,
3411 float _w
3412)
3413{
3414 XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w));
3415}
3416
3417//------------------------------------------------------------------------------
3418_Use_decl_annotations_
3419inline PackedVector::XMBYTE4::XMBYTE4
3420(
3421 const float* pArray
3422)
3423{
3424 XMStoreByte4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3425}
3426
3427/****************************************************************************
3428 *
3429 * XMUBYTEN4 operators
3430 *
3431 ****************************************************************************/
3432
3433//------------------------------------------------------------------------------
3434
3435inline PackedVector::XMUBYTEN4::XMUBYTEN4
3436(
3437 float _x,
3438 float _y,
3439 float _z,
3440 float _w
3441)
3442{
3443 XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w));
3444}
3445
3446//------------------------------------------------------------------------------
3447_Use_decl_annotations_
3448inline PackedVector::XMUBYTEN4::XMUBYTEN4
3449(
3450 const float* pArray
3451)
3452{
3453 XMStoreUByteN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3454}
3455
3456/****************************************************************************
3457 *
3458 * XMUBYTE4 operators
3459 *
3460 ****************************************************************************/
3461
3462//------------------------------------------------------------------------------
3463
3464inline PackedVector::XMUBYTE4::XMUBYTE4
3465(
3466 float _x,
3467 float _y,
3468 float _z,
3469 float _w
3470)
3471{
3472 XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w));
3473}
3474
3475//------------------------------------------------------------------------------
3476_Use_decl_annotations_
3477inline PackedVector::XMUBYTE4::XMUBYTE4
3478(
3479 const float* pArray
3480)
3481{
3482 XMStoreUByte4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3483}
3484
3485/****************************************************************************
3486 *
3487 * XMUNIBBLE4 operators
3488 *
3489 ****************************************************************************/
3490
3491//------------------------------------------------------------------------------
3492
3493inline PackedVector::XMUNIBBLE4::XMUNIBBLE4
3494(
3495 float _x,
3496 float _y,
3497 float _z,
3498 float _w
3499)
3500{
3501 XMStoreUNibble4(this, XMVectorSet( _x, _y, _z, _w ));
3502}
3503
3504//------------------------------------------------------------------------------
3505_Use_decl_annotations_
3506inline PackedVector::XMUNIBBLE4::XMUNIBBLE4
3507(
3508 const float *pArray
3509)
3510{
3511 XMStoreUNibble4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray)));
3512}
3513
3514/****************************************************************************
3515 *
3516 * XMU555 operators
3517 *
3518 ****************************************************************************/
3519
3520//------------------------------------------------------------------------------
3521
3522inline PackedVector::XMU555::XMU555
3523(
3524 float _x,
3525 float _y,
3526 float _z,
3527 bool _w
3528)
3529{
3530 XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f) ));
3531}
3532
3533//------------------------------------------------------------------------------
3534_Use_decl_annotations_
3535inline PackedVector::XMU555::XMU555
3536(
3537 const float *pArray,
3538 bool _w
3539)
3540{
3541 XMVECTOR V = XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray));
3542 XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f) ));
3543}
3544
3545