the game where you go into mines and start crafting! but for consoles (forked directly from smartcmd's github)
at master 3545 lines 111 kB view raw
1//------------------------------------------------------------------------------------- 2// DirectXPackedVector.inl -- SIMD C++ Math library 3// 4// THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF 5// ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO 6// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A 7// PARTICULAR PURPOSE. 8// 9// Copyright (c) Microsoft Corporation. All rights reserved. 10//------------------------------------------------------------------------------------- 11 12#ifdef _MSC_VER 13#pragma once 14#endif 15 16 17/**************************************************************************** 18 * 19 * Data conversion 20 * 21 ****************************************************************************/ 22 23//------------------------------------------------------------------------------ 24 25inline float PackedVector::XMConvertHalfToFloat 26( 27 HALF Value 28) 29{ 30#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 31 32 uint32_t Mantissa = (uint32_t)(Value & 0x03FF); 33 34 uint32_t Exponent; 35 if ((Value & 0x7C00) != 0) // The value is normalized 36 { 37 Exponent = (uint32_t)((Value >> 10) & 0x1F); 38 } 39 else if (Mantissa != 0) // The value is denormalized 40 { 41 // Normalize the value in the resulting float 42 Exponent = 1; 43 44 do 45 { 46 Exponent--; 47 Mantissa <<= 1; 48 } while ((Mantissa & 0x0400) == 0); 49 50 Mantissa &= 0x03FF; 51 } 52 else // The value is zero 53 { 54 Exponent = (uint32_t)-112; 55 } 56 57 uint32_t Result = ((Value & 0x8000) << 16) | // Sign 58 ((Exponent + 112) << 23) | // Exponent 59 (Mantissa << 13); // Mantissa 60 61 return reinterpret_cast<float*>(&Result)[0]; 62#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 63#endif 64} 65 66//------------------------------------------------------------------------------ 67_Use_decl_annotations_ 68inline float* PackedVector::XMConvertHalfToFloatStream 69( 70 float* pOutputStream, 71 size_t OutputStride, 72 const HALF* pInputStream, 73 size_t InputStride, 74 size_t HalfCount 75) 76{ 77 assert(pOutputStream); 78 assert(pInputStream); 79#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 80 81 const uint8_t* pHalf = reinterpret_cast<const uint8_t*>(pInputStream); 82 uint8_t* pFloat = reinterpret_cast<uint8_t*>(pOutputStream); 83 84 for (size_t i = 0; i < HalfCount; i++) 85 { 86 *reinterpret_cast<float*>(pFloat) = XMConvertHalfToFloat(reinterpret_cast<const HALF*>(pHalf)[0]); 87 pHalf += InputStride; 88 pFloat += OutputStride; 89 } 90 91 return pOutputStream; 92 93#else // _XM_VMX128_INTRINSICS_ 94#endif // _XM_VMX128_INTRINSICS_ 95} 96 97//------------------------------------------------------------------------------ 98 99inline PackedVector::HALF PackedVector::XMConvertFloatToHalf 100( 101 float Value 102) 103{ 104#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 105 uint32_t Result; 106 107 uint32_t IValue = reinterpret_cast<uint32_t *>(&Value)[0]; 108 uint32_t Sign = (IValue & 0x80000000U) >> 16U; 109 IValue = IValue & 0x7FFFFFFFU; // Hack off the sign 110 111 if (IValue > 0x47FFEFFFU) 112 { 113 // The number is too large to be represented as a half. Saturate to infinity. 114 Result = 0x7FFFU; 115 } 116 else 117 { 118 if (IValue < 0x38800000U) 119 { 120 // The number is too small to be represented as a normalized half. 121 // Convert it to a denormalized value. 122 uint32_t Shift = 113U - (IValue >> 23U); 123 IValue = (0x800000U | (IValue & 0x7FFFFFU)) >> Shift; 124 } 125 else 126 { 127 // Rebias the exponent to represent the value as a normalized half. 128 IValue += 0xC8000000U; 129 } 130 131 Result = ((IValue + 0x0FFFU + ((IValue >> 13U) & 1U)) >> 13U)&0x7FFFU; 132 } 133 return (HALF)(Result|Sign); 134#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 135#endif 136} 137 138//------------------------------------------------------------------------------ 139_Use_decl_annotations_ 140inline PackedVector::HALF* PackedVector::XMConvertFloatToHalfStream 141( 142 HALF* pOutputStream, 143 size_t OutputStride, 144 const float* pInputStream, 145 size_t InputStride, 146 size_t FloatCount 147) 148{ 149 assert(pOutputStream); 150 assert(pInputStream); 151#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) || defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 152 153 const uint8_t* pFloat = reinterpret_cast<const uint8_t*>(pInputStream); 154 uint8_t* pHalf = reinterpret_cast<uint8_t*>(pOutputStream); 155 156 for (size_t i = 0; i < FloatCount; i++) 157 { 158 *reinterpret_cast<HALF*>(pHalf) = XMConvertFloatToHalf(reinterpret_cast<const float*>(pFloat)[0]); 159 pFloat += InputStride; 160 pHalf += OutputStride; 161 } 162 return pOutputStream; 163 164#else // _XM_VMX128_INTRINSICS_ 165#endif // _XM_VMX128_INTRINSICS_ 166} 167 168/**************************************************************************** 169 * 170 * Vector and matrix load operations 171 * 172 ****************************************************************************/ 173_Use_decl_annotations_ 174inline XMVECTOR PackedVector::XMLoadColor 175( 176 const XMCOLOR* pSource 177) 178{ 179 assert(pSource); 180#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 181 // int32_t -> Float conversions are done in one instruction. 182 // uint32_t -> Float calls a runtime function. Keep in int32_t 183 int32_t iColor = (int32_t)(pSource->c); 184 XMVECTORF32 vColor = { 185 (float)((iColor >> 16) & 0xFF) * (1.0f/255.0f), 186 (float)((iColor >> 8) & 0xFF) * (1.0f/255.0f), 187 (float)(iColor & 0xFF) * (1.0f/255.0f), 188 (float)((iColor >> 24) & 0xFF) * (1.0f/255.0f) 189 }; 190 return vColor.v; 191#elif defined(_XM_SSE_INTRINSICS_) 192 // Splat the color in all four entries 193 __m128i vInt = _mm_set1_epi32(pSource->c); 194 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 195 vInt = _mm_and_si128(vInt,g_XMMaskA8R8G8B8); 196 // a is unsigned! Flip the bit to convert the order to signed 197 vInt = _mm_xor_si128(vInt,g_XMFlipA8R8G8B8); 198 // Convert to floating point numbers 199 XMVECTOR vTemp = _mm_cvtepi32_ps(vInt); 200 // RGB + 0, A + 0x80000000.f to undo the signed order. 201 vTemp = _mm_add_ps(vTemp,g_XMFixAA8R8G8B8); 202 // Convert 0-255 to 0.0f-1.0f 203 return _mm_mul_ps(vTemp,g_XMNormalizeA8R8G8B8); 204#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 205#endif // _XM_VMX128_INTRINSICS_ 206} 207 208//------------------------------------------------------------------------------ 209_Use_decl_annotations_ 210inline XMVECTOR PackedVector::XMLoadHalf2 211( 212 const XMHALF2* pSource 213) 214{ 215 assert(pSource); 216#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 217 XMVECTORF32 vResult = { 218 XMConvertHalfToFloat(pSource->x), 219 XMConvertHalfToFloat(pSource->y), 220 0.0f, 221 0.0f 222 }; 223 return vResult.v; 224#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 225#endif // _XM_VMX128_INTRINSICS_ 226} 227 228//------------------------------------------------------------------------------ 229_Use_decl_annotations_ 230inline XMVECTOR PackedVector::XMLoadShortN2 231( 232 const XMSHORTN2* pSource 233) 234{ 235 assert(pSource); 236#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 237 XMVECTORF32 vResult = { 238 (pSource->x == -32768) ? -1.f : ((float)pSource->x * (1.0f/32767.0f)), 239 (pSource->y == -32768) ? -1.f : ((float)pSource->y * (1.0f/32767.0f)), 240 0.0f, 241 0.0f 242 }; 243 return vResult.v; 244#elif defined(_XM_SSE_INTRINSICS_) 245 // Splat the two shorts in all four entries (WORD alignment okay, 246 // DWORD alignment preferred) 247 __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x)); 248 // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 249 vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16); 250 // x needs to be sign extended 251 vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16); 252 // Convert to floating point numbers 253 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 254 // x - 0x8000 to undo the signed order. 255 vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16); 256 // Convert -1.0f - 1.0f 257 vTemp = _mm_mul_ps(vTemp,g_XMNormalizeX16Y16); 258 // Clamp result (for case of -32768) 259 return _mm_max_ps( vTemp, g_XMNegativeOne ); 260#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 261#endif // _XM_VMX128_INTRINSICS_ 262} 263 264//------------------------------------------------------------------------------ 265_Use_decl_annotations_ 266inline XMVECTOR PackedVector::XMLoadShort2 267( 268 const XMSHORT2* pSource 269) 270{ 271 assert(pSource); 272#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 273 XMVECTORF32 vResult = { 274 (float)pSource->x, 275 (float)pSource->y, 276 0.f, 277 0.f 278 }; 279 return vResult.v; 280#elif defined(_XM_SSE_INTRINSICS_) 281 // Splat the two shorts in all four entries (WORD alignment okay, 282 // DWORD alignment preferred) 283 __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x)); 284 // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 285 vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16); 286 // x needs to be sign extended 287 vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16); 288 // Convert to floating point numbers 289 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 290 // x - 0x8000 to undo the signed order. 291 vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16); 292 // Y is 65536 too large 293 return _mm_mul_ps(vTemp,g_XMFixupY16); 294#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 295#endif // _XM_VMX128_INTRINSICS_ 296} 297 298//------------------------------------------------------------------------------ 299_Use_decl_annotations_ 300inline XMVECTOR PackedVector::XMLoadUShortN2 301( 302 const XMUSHORTN2* pSource 303) 304{ 305 assert(pSource); 306#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 307 XMVECTORF32 vResult = { 308 (float)pSource->x / 65535.0f, 309 (float)pSource->y / 65535.0f, 310 0.f, 311 0.f 312 }; 313 return vResult.v; 314#elif defined(_XM_SSE_INTRINSICS_) 315 static const XMVECTORF32 FixupY16 = {1.0f/65535.0f,1.0f/(65535.0f*65536.0f),0.0f,0.0f}; 316 static const XMVECTORF32 FixaddY16 = {0,32768.0f*65536.0f,0,0}; 317 // Splat the two shorts in all four entries (WORD alignment okay, 318 // DWORD alignment preferred) 319 __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x)); 320 // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 321 vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16); 322 // y needs to be sign flipped 323 vTemp = _mm_xor_ps(vTemp,g_XMFlipY); 324 // Convert to floating point numbers 325 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 326 // y + 0x8000 to undo the signed order. 327 vTemp = _mm_add_ps(vTemp,FixaddY16); 328 // Y is 65536 times too large 329 vTemp = _mm_mul_ps(vTemp,FixupY16); 330 return vTemp; 331#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 332#endif // _XM_VMX128_INTRINSICS_ 333} 334 335//------------------------------------------------------------------------------ 336_Use_decl_annotations_ 337inline XMVECTOR PackedVector::XMLoadUShort2 338( 339 const XMUSHORT2* pSource 340) 341{ 342 assert(pSource); 343#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 344 XMVECTORF32 vResult = { 345 (float)pSource->x, 346 (float)pSource->y, 347 0.f, 348 0.f 349 }; 350 return vResult.v; 351#elif defined(_XM_SSE_INTRINSICS_) 352 static const XMVECTORF32 FixaddY16 = {0,32768.0f,0,0}; 353 // Splat the two shorts in all four entries (WORD alignment okay, 354 // DWORD alignment preferred) 355 __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->x)); 356 // Mask x&0xFFFF, y&0xFFFF0000,z&0,w&0 357 vTemp = _mm_and_ps(vTemp,g_XMMaskX16Y16); 358 // y needs to be sign flipped 359 vTemp = _mm_xor_ps(vTemp,g_XMFlipY); 360 // Convert to floating point numbers 361 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 362 // Y is 65536 times too large 363 vTemp = _mm_mul_ps(vTemp,g_XMFixupY16); 364 // y + 0x8000 to undo the signed order. 365 vTemp = _mm_add_ps(vTemp,FixaddY16); 366 return vTemp; 367#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 368#endif // _XM_VMX128_INTRINSICS_ 369} 370 371//------------------------------------------------------------------------------ 372_Use_decl_annotations_ 373inline XMVECTOR PackedVector::XMLoadByteN2 374( 375 const XMBYTEN2* pSource 376) 377{ 378 assert(pSource); 379 XMVECTORF32 vResult = { 380 (pSource->x == -128) ? -1.f : ((float)pSource->x * (1.0f/127.0f)), 381 (pSource->y == -128) ? -1.f : ((float)pSource->y * (1.0f/127.0f)), 382 0.0f, 383 0.0f 384 }; 385 return vResult.v; 386} 387 388//------------------------------------------------------------------------------ 389_Use_decl_annotations_ 390inline XMVECTOR PackedVector::XMLoadByte2 391( 392 const XMBYTE2* pSource 393) 394{ 395 assert(pSource); 396 XMVECTORF32 vResult = { 397 (float)pSource->x, 398 (float)pSource->y, 399 0.0f, 400 0.0f 401 }; 402 return vResult.v; 403} 404 405//------------------------------------------------------------------------------ 406_Use_decl_annotations_ 407inline XMVECTOR PackedVector::XMLoadUByteN2 408( 409 const XMUBYTEN2* pSource 410) 411{ 412 assert(pSource); 413 XMVECTORF32 vResult = { 414 (float)pSource->x * (1.0f/255.0f), 415 (float)pSource->y * (1.0f/255.0f), 416 0.0f, 417 0.0f 418 }; 419 return vResult.v; 420} 421 422//------------------------------------------------------------------------------ 423_Use_decl_annotations_ 424inline XMVECTOR PackedVector::XMLoadUByte2 425( 426 const XMUBYTE2* pSource 427) 428{ 429 assert(pSource); 430 XMVECTORF32 vResult = { 431 (float)pSource->x, 432 (float)pSource->y, 433 0.0f, 434 0.0f 435 }; 436 return vResult.v; 437} 438 439//------------------------------------------------------------------------------ 440_Use_decl_annotations_ 441inline XMVECTOR PackedVector::XMLoadU565 442( 443 const XMU565* pSource 444) 445{ 446 assert(pSource); 447#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) 448 static const XMVECTORI32 U565And = {0x1F,0x3F<<5,0x1F<<11,0}; 449 static const XMVECTORF32 U565Mul = {1.0f,1.0f/32.0f,1.0f/2048.f,0}; 450 // Get the 32 bit value and splat it 451 XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); 452 // Mask off x, y and z 453 vResult = _mm_and_ps(vResult,U565And); 454 // Convert to float 455 vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); 456 // Normalize x, y, and z 457 vResult = _mm_mul_ps(vResult,U565Mul); 458 return vResult; 459#else 460 XMVECTORF32 vResult = { 461 float(pSource->v & 0x1F), 462 float((pSource->v >> 5) & 0x3F), 463 float((pSource->v >> 11) & 0x1F), 464 0.f, 465 }; 466 return vResult.v; 467#endif // !_XM_SSE_INTRINSICS_ 468} 469 470//------------------------------------------------------------------------------ 471_Use_decl_annotations_ 472inline XMVECTOR PackedVector::XMLoadFloat3PK 473( 474 const XMFLOAT3PK* pSource 475) 476{ 477 assert(pSource); 478 479 __declspec(align(16)) uint32_t Result[4]; 480 uint32_t Mantissa; 481 uint32_t Exponent; 482 483 // X Channel (6-bit mantissa) 484 Mantissa = pSource->xm; 485 486 if ( pSource->xe == 0x1f ) // INF or NAN 487 { 488 Result[0] = 0x7f800000 | (pSource->xm << 17); 489 } 490 else 491 { 492 if ( pSource->xe != 0 ) // The value is normalized 493 { 494 Exponent = pSource->xe; 495 } 496 else if (Mantissa != 0) // The value is denormalized 497 { 498 // Normalize the value in the resulting float 499 Exponent = 1; 500 501 do 502 { 503 Exponent--; 504 Mantissa <<= 1; 505 } while ((Mantissa & 0x40) == 0); 506 507 Mantissa &= 0x3F; 508 } 509 else // The value is zero 510 { 511 Exponent = (uint32_t)-112; 512 } 513 514 Result[0] = ((Exponent + 112) << 23) | (Mantissa << 17); 515 } 516 517 // Y Channel (6-bit mantissa) 518 Mantissa = pSource->ym; 519 520 if ( pSource->ye == 0x1f ) // INF or NAN 521 { 522 Result[1] = 0x7f800000 | (pSource->ym << 17); 523 } 524 else 525 { 526 if ( pSource->ye != 0 ) // The value is normalized 527 { 528 Exponent = pSource->ye; 529 } 530 else if (Mantissa != 0) // The value is denormalized 531 { 532 // Normalize the value in the resulting float 533 Exponent = 1; 534 535 do 536 { 537 Exponent--; 538 Mantissa <<= 1; 539 } while ((Mantissa & 0x40) == 0); 540 541 Mantissa &= 0x3F; 542 } 543 else // The value is zero 544 { 545 Exponent = (uint32_t)-112; 546 } 547 548 Result[1] = ((Exponent + 112) << 23) | (Mantissa << 17); 549 } 550 551 // Z Channel (5-bit mantissa) 552 Mantissa = pSource->zm; 553 554 if ( pSource->ze == 0x1f ) // INF or NAN 555 { 556 Result[2] = 0x7f800000 | (pSource->zm << 17); 557 } 558 else 559 { 560 if ( pSource->ze != 0 ) // The value is normalized 561 { 562 Exponent = pSource->ze; 563 } 564 else if (Mantissa != 0) // The value is denormalized 565 { 566 // Normalize the value in the resulting float 567 Exponent = 1; 568 569 do 570 { 571 Exponent--; 572 Mantissa <<= 1; 573 } while ((Mantissa & 0x20) == 0); 574 575 Mantissa &= 0x1F; 576 } 577 else // The value is zero 578 { 579 Exponent = (uint32_t)-112; 580 } 581 582 Result[2] = ((Exponent + 112) << 23) | (Mantissa << 18); 583 } 584 585 return XMLoadFloat3A( reinterpret_cast<const XMFLOAT3A*>(&Result) ); 586} 587 588//------------------------------------------------------------------------------ 589_Use_decl_annotations_ 590inline XMVECTOR PackedVector::XMLoadFloat3SE 591( 592 const XMFLOAT3SE* pSource 593) 594{ 595 assert(pSource); 596 597 __declspec(align(16)) uint32_t Result[4]; 598 uint32_t Mantissa; 599 uint32_t Exponent, ExpBits; 600 601 if ( pSource->e == 0x1f ) // INF or NAN 602 { 603 Result[0] = 0x7f800000 | (pSource->xm << 14); 604 Result[1] = 0x7f800000 | (pSource->ym << 14); 605 Result[2] = 0x7f800000 | (pSource->zm << 14); 606 } 607 else if ( pSource->e != 0 ) // The values are all normalized 608 { 609 Exponent = pSource->e; 610 611 ExpBits = (Exponent + 112) << 23; 612 613 Mantissa = pSource->xm; 614 Result[0] = ExpBits | (Mantissa << 14); 615 616 Mantissa = pSource->ym; 617 Result[1] = ExpBits | (Mantissa << 14); 618 619 Mantissa = pSource->zm; 620 Result[2] = ExpBits | (Mantissa << 14); 621 } 622 else 623 { 624 // X Channel 625 Mantissa = pSource->xm; 626 627 if (Mantissa != 0) // The value is denormalized 628 { 629 // Normalize the value in the resulting float 630 Exponent = 1; 631 632 do 633 { 634 Exponent--; 635 Mantissa <<= 1; 636 } while ((Mantissa & 0x200) == 0); 637 638 Mantissa &= 0x1FF; 639 } 640 else // The value is zero 641 { 642 Exponent = (uint32_t)-112; 643 } 644 645 Result[0] = ((Exponent + 112) << 23) | (Mantissa << 14); 646 647 // Y Channel 648 Mantissa = pSource->ym; 649 650 if (Mantissa != 0) // The value is denormalized 651 { 652 // Normalize the value in the resulting float 653 Exponent = 1; 654 655 do 656 { 657 Exponent--; 658 Mantissa <<= 1; 659 } while ((Mantissa & 0x200) == 0); 660 661 Mantissa &= 0x1FF; 662 } 663 else // The value is zero 664 { 665 Exponent = (uint32_t)-112; 666 } 667 668 Result[1] = ((Exponent + 112) << 23) | (Mantissa << 14); 669 670 // Z Channel 671 Mantissa = pSource->zm; 672 673 if (Mantissa != 0) // The value is denormalized 674 { 675 // Normalize the value in the resulting float 676 Exponent = 1; 677 678 do 679 { 680 Exponent--; 681 Mantissa <<= 1; 682 } while ((Mantissa & 0x200) == 0); 683 684 Mantissa &= 0x1FF; 685 } 686 else // The value is zero 687 { 688 Exponent = (uint32_t)-112; 689 } 690 691 Result[2] = ((Exponent + 112) << 23) | (Mantissa << 14); 692 } 693 694 return XMLoadFloat3A( reinterpret_cast<const XMFLOAT3A*>(&Result) ); 695} 696 697//------------------------------------------------------------------------------ 698_Use_decl_annotations_ 699inline XMVECTOR PackedVector::XMLoadHalf4 700( 701 const XMHALF4* pSource 702) 703{ 704 assert(pSource); 705#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 706 XMVECTORF32 vResult = { 707 XMConvertHalfToFloat(pSource->x), 708 XMConvertHalfToFloat(pSource->y), 709 XMConvertHalfToFloat(pSource->z), 710 XMConvertHalfToFloat(pSource->w) 711 }; 712 return vResult.v; 713#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 714#endif // _XM_VMX128_INTRINSICS_ 715} 716 717//------------------------------------------------------------------------------ 718_Use_decl_annotations_ 719inline XMVECTOR PackedVector::XMLoadShortN4 720( 721 const XMSHORTN4* pSource 722) 723{ 724 assert(pSource); 725#if defined(_XM_NO_INTRINSICS_) 726 XMVECTORF32 vResult = { 727 (pSource->x == -32768) ? -1.f : ((float)pSource->x * (1.0f/32767.0f)), 728 (pSource->y == -32768) ? -1.f : ((float)pSource->y * (1.0f/32767.0f)), 729 (pSource->z == -32768) ? -1.f : ((float)pSource->z * (1.0f/32767.0f)), 730 (pSource->w == -32768) ? -1.f : ((float)pSource->w * (1.0f/32767.0f)) 731 }; 732 return vResult.v; 733#elif defined(_XM_ARM_NEON_INTRINSICS_) 734 __n64 vInt = vld1_s16( (const int16_t*)pSource ); 735 __n128 V = vmovl_s16( vInt ); 736 V = vcvtq_f32_s32( V ); 737 const __n128 Scale = vdupq_n_f32( 1.0f/32767.0f ); 738 V = vmulq_f32( V, Scale ); 739 return vmaxq_f32( V, g_XMNegativeOne ); 740#elif defined(_XM_SSE_INTRINSICS_) 741 // Splat the color in all four entries (x,z,y,w) 742 __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x)); 743 // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 744 __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16); 745 // x and z are unsigned! Flip the bits to convert the order to signed 746 vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16Z16W16); 747 // Convert to floating point numbers 748 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 749 // x and z - 0x8000 to complete the conversion 750 vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16Z16W16); 751 // Convert to -1.0f - 1.0f 752 vTemp = _mm_mul_ps(vTemp,g_XMNormalizeX16Y16Z16W16); 753 // Very important! The entries are x,z,y,w, flip it to x,y,z,w 754 vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0)); 755 // Clamp result (for case of -32768) 756 return _mm_max_ps( vTemp, g_XMNegativeOne ); 757#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 758#endif // _XM_VMX128_INTRINSICS_ 759} 760 761//------------------------------------------------------------------------------ 762_Use_decl_annotations_ 763inline XMVECTOR PackedVector::XMLoadShort4 764( 765 const XMSHORT4* pSource 766) 767{ 768 assert(pSource); 769#if defined(_XM_NO_INTRINSICS_) 770 XMVECTORF32 vResult = { 771 (float)pSource->x, 772 (float)pSource->y, 773 (float)pSource->z, 774 (float)pSource->w 775 }; 776 return vResult.v; 777#elif defined(_XM_ARM_NEON_INTRINSICS_) 778 __n64 vInt = vld1_s16( (const int16_t*)pSource ); 779 __n128 V = vmovl_s16( vInt ); 780 return vcvtq_f32_s32( V ); 781#elif defined(_XM_SSE_INTRINSICS_) 782 // Splat the color in all four entries (x,z,y,w) 783 __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x)); 784 // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 785 __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16); 786 // x and z are unsigned! Flip the bits to convert the order to signed 787 vTemp = _mm_xor_ps(vTemp,g_XMFlipX16Y16Z16W16); 788 // Convert to floating point numbers 789 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 790 // x and z - 0x8000 to complete the conversion 791 vTemp = _mm_add_ps(vTemp,g_XMFixX16Y16Z16W16); 792 // Fix y and w because they are 65536 too large 793 vTemp = _mm_mul_ps(vTemp,g_XMFixupY16W16); 794 // Very important! The entries are x,z,y,w, flip it to x,y,z,w 795 return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0)); 796#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 797#endif // _XM_VMX128_INTRINSICS_ 798} 799 800//------------------------------------------------------------------------------ 801_Use_decl_annotations_ 802inline XMVECTOR PackedVector::XMLoadUShortN4 803( 804 const XMUSHORTN4* pSource 805) 806{ 807 assert(pSource); 808#if defined(_XM_NO_INTRINSICS_) 809 XMVECTORF32 vResult = { 810 (float)pSource->x / 65535.0f, 811 (float)pSource->y / 65535.0f, 812 (float)pSource->z / 65535.0f, 813 (float)pSource->w / 65535.0f 814 }; 815 return vResult.v; 816#elif defined(_XM_ARM_NEON_INTRINSICS_) 817 __n64 vInt = vld1_u16( (const uint16_t*)pSource ); 818 __n128 V = vmovl_u16( vInt ); 819 V = vcvtq_f32_u32( V ); 820 const __n128 Scale = vdupq_n_f32( 1.0f/65535.0f ); 821 return vmulq_f32( V, Scale ); 822#elif defined(_XM_SSE_INTRINSICS_) 823 static const XMVECTORF32 FixupY16W16 = {1.0f/65535.0f,1.0f/65535.0f,1.0f/(65535.0f*65536.0f),1.0f/(65535.0f*65536.0f)}; 824 static const XMVECTORF32 FixaddY16W16 = {0,0,32768.0f*65536.0f,32768.0f*65536.0f}; 825 // Splat the color in all four entries (x,z,y,w) 826 __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x)); 827 // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 828 __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16); 829 // y and w are signed! Flip the bits to convert the order to unsigned 830 vTemp = _mm_xor_ps(vTemp,g_XMFlipZW); 831 // Convert to floating point numbers 832 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 833 // y and w + 0x8000 to complete the conversion 834 vTemp = _mm_add_ps(vTemp,FixaddY16W16); 835 // Fix y and w because they are 65536 too large 836 vTemp = _mm_mul_ps(vTemp,FixupY16W16); 837 // Very important! The entries are x,z,y,w, flip it to x,y,z,w 838 return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0)); 839#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 840#endif // _XM_VMX128_INTRINSICS_ 841} 842 843//------------------------------------------------------------------------------ 844_Use_decl_annotations_ 845inline XMVECTOR PackedVector::XMLoadUShort4 846( 847 const XMUSHORT4* pSource 848) 849{ 850 assert(pSource); 851#if defined(_XM_NO_INTRINSICS_) 852 XMVECTORF32 vResult = { 853 (float)pSource->x, 854 (float)pSource->y, 855 (float)pSource->z, 856 (float)pSource->w 857 }; 858 return vResult.v; 859#elif defined(_XM_ARM_NEON_INTRINSICS_) 860 __n64 vInt = vld1_u16( (const uint16_t*)pSource ); 861 __n128 V = vmovl_u16( vInt ); 862 return vcvtq_f32_u32( V ); 863#elif defined(_XM_SSE_INTRINSICS_) 864 static const XMVECTORF32 FixaddY16W16 = {0,0,32768.0f,32768.0f}; 865 // Splat the color in all four entries (x,z,y,w) 866 __m128d vIntd = _mm_load1_pd(reinterpret_cast<const double *>(&pSource->x)); 867 // Shift x&0ffff,z&0xffff,y&0xffff0000,w&0xffff0000 868 __m128 vTemp = _mm_and_ps(_mm_castpd_ps(vIntd),g_XMMaskX16Y16Z16W16); 869 // y and w are signed! Flip the bits to convert the order to unsigned 870 vTemp = _mm_xor_ps(vTemp,g_XMFlipZW); 871 // Convert to floating point numbers 872 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 873 // Fix y and w because they are 65536 too large 874 vTemp = _mm_mul_ps(vTemp,g_XMFixupY16W16); 875 // y and w + 0x8000 to complete the conversion 876 vTemp = _mm_add_ps(vTemp,FixaddY16W16); 877 // Very important! The entries are x,z,y,w, flip it to x,y,z,w 878 return XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(3,1,2,0)); 879#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 880#endif // _XM_VMX128_INTRINSICS_ 881} 882 883//------------------------------------------------------------------------------ 884_Use_decl_annotations_ 885inline XMVECTOR PackedVector::XMLoadXDecN4 886( 887 const XMXDECN4* pSource 888) 889{ 890 assert(pSource); 891#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 892 static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00}; 893 894 uint32_t ElementX = pSource->v & 0x3FF; 895 uint32_t ElementY = (pSource->v >> 10) & 0x3FF; 896 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; 897 898 XMVECTORF32 vResult = { 899 (ElementX == 0x200) ? -1.f : ((float)(int16_t)(ElementX | SignExtend[ElementX >> 9]) / 511.0f), 900 (ElementY == 0x200) ? -1.f : ((float)(int16_t)(ElementY | SignExtend[ElementY >> 9]) / 511.0f), 901 (ElementZ == 0x200) ? -1.f : ((float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]) / 511.0f), 902 (float)(pSource->v >> 30) / 3.0f 903 }; 904 return vResult.v; 905#elif defined(_XM_SSE_INTRINSICS_) 906 // Splat the color in all four entries 907 __m128 vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); 908 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 909 vTemp = _mm_and_ps(vTemp,g_XMMaskA2B10G10R10); 910 // a is unsigned! Flip the bit to convert the order to signed 911 vTemp = _mm_xor_ps(vTemp,g_XMFlipA2B10G10R10); 912 // Convert to floating point numbers 913 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 914 // RGB + 0, A + 0x80000000.f to undo the signed order. 915 vTemp = _mm_add_ps(vTemp,g_XMFixAA2B10G10R10); 916 // Convert 0-255 to 0.0f-1.0f 917 vTemp = _mm_mul_ps(vTemp,g_XMNormalizeA2B10G10R10); 918 // Clamp result (for case of -512) 919 return _mm_max_ps( vTemp, g_XMNegativeOne ); 920#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 921#endif // _XM_VMX128_INTRINSICS_ 922} 923 924//------------------------------------------------------------------------------ 925_Use_decl_annotations_ 926inline XMVECTOR PackedVector::XMLoadXDec4 927( 928 const XMXDEC4* pSource 929) 930{ 931 assert(pSource); 932#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 933 static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00}; 934 935 uint32_t ElementX = pSource->v & 0x3FF; 936 uint32_t ElementY = (pSource->v >> 10) & 0x3FF; 937 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; 938 939 XMVECTORF32 vResult = { 940 (float)(int16_t)(ElementX | SignExtend[ElementX >> 9]), 941 (float)(int16_t)(ElementY | SignExtend[ElementY >> 9]), 942 (float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]), 943 (float)(pSource->v >> 30) 944 }; 945 return vResult.v; 946#elif defined(_XM_SSE_INTRINSICS_) 947 static const XMVECTORI32 XDec4Xor = {0x200, 0x200<<10, 0x200<<20, 0x80000000}; 948 static const XMVECTORF32 XDec4Add = {-512.0f,-512.0f*1024.0f,-512.0f*1024.0f*1024.0f,32768*65536.0f}; 949 // Splat the color in all four entries 950 XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); 951 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 952 vTemp = _mm_and_ps(vTemp,g_XMMaskDec4); 953 // a is unsigned! Flip the bit to convert the order to signed 954 vTemp = _mm_xor_ps(vTemp,XDec4Xor); 955 // Convert to floating point numbers 956 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 957 // RGB + 0, A + 0x80000000.f to undo the signed order. 958 vTemp = _mm_add_ps(vTemp,XDec4Add); 959 // Convert 0-255 to 0.0f-1.0f 960 vTemp = _mm_mul_ps(vTemp,g_XMMulDec4); 961 return vTemp; 962#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 963#endif // _XM_VMX128_INTRINSICS_ 964} 965 966//------------------------------------------------------------------------------ 967_Use_decl_annotations_ 968inline XMVECTOR PackedVector::XMLoadUDecN4 969( 970 const XMUDECN4* pSource 971) 972{ 973 assert(pSource); 974#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 975 976 uint32_t ElementX = pSource->v & 0x3FF; 977 uint32_t ElementY = (pSource->v >> 10) & 0x3FF; 978 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; 979 980 XMVECTORF32 vResult = { 981 (float)ElementX / 1023.0f, 982 (float)ElementY / 1023.0f, 983 (float)ElementZ / 1023.0f, 984 (float)(pSource->v >> 30) / 3.0f 985 }; 986 return vResult.v; 987#elif defined(_XM_SSE_INTRINSICS_) 988 static const XMVECTORF32 UDecN4Mul = {1.0f/1023.0f,1.0f/(1023.0f*1024.0f),1.0f/(1023.0f*1024.0f*1024.0f),1.0f/(3.0f*1024.0f*1024.0f*1024.0f)}; 989 // Splat the color in all four entries 990 XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); 991 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 992 vTemp = _mm_and_ps(vTemp,g_XMMaskDec4); 993 // a is unsigned! Flip the bit to convert the order to signed 994 vTemp = _mm_xor_ps(vTemp,g_XMFlipW); 995 // Convert to floating point numbers 996 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 997 // RGB + 0, A + 0x80000000.f to undo the signed order. 998 vTemp = _mm_add_ps(vTemp,g_XMAddUDec4); 999 // Convert 0-255 to 0.0f-1.0f 1000 vTemp = _mm_mul_ps(vTemp,UDecN4Mul); 1001 return vTemp; 1002#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 1003#endif // _XM_VMX128_INTRINSICS_ 1004} 1005 1006//------------------------------------------------------------------------------ 1007_Use_decl_annotations_ 1008inline XMVECTOR PackedVector::XMLoadUDec4 1009( 1010 const XMUDEC4* pSource 1011) 1012{ 1013 assert(pSource); 1014#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1015 uint32_t ElementX = pSource->v & 0x3FF; 1016 uint32_t ElementY = (pSource->v >> 10) & 0x3FF; 1017 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; 1018 1019 XMVECTORF32 vResult = { 1020 (float)ElementX, 1021 (float)ElementY, 1022 (float)ElementZ, 1023 (float)(pSource->v >> 30) 1024 }; 1025 return vResult.v; 1026#elif defined(_XM_SSE_INTRINSICS_) 1027 // Splat the color in all four entries 1028 XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); 1029 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 1030 vTemp = _mm_and_ps(vTemp,g_XMMaskDec4); 1031 // a is unsigned! Flip the bit to convert the order to signed 1032 vTemp = _mm_xor_ps(vTemp,g_XMFlipW); 1033 // Convert to floating point numbers 1034 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 1035 // RGB + 0, A + 0x80000000.f to undo the signed order. 1036 vTemp = _mm_add_ps(vTemp,g_XMAddUDec4); 1037 // Convert 0-255 to 0.0f-1.0f 1038 vTemp = _mm_mul_ps(vTemp,g_XMMulDec4); 1039 return vTemp; 1040#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 1041#endif // _XM_VMX128_INTRINSICS_ 1042} 1043 1044//------------------------------------------------------------------------------ 1045_Use_decl_annotations_ 1046inline XMVECTOR PackedVector::XMLoadDecN4 1047( 1048 const XMDECN4* pSource 1049) 1050{ 1051 assert(pSource); 1052#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1053 static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00}; 1054 static const uint32_t SignExtendW[] = {0x00000000, 0xFFFFFFFC}; 1055 1056 uint32_t ElementX = pSource->v & 0x3FF; 1057 uint32_t ElementY = (pSource->v >> 10) & 0x3FF; 1058 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; 1059 uint32_t ElementW = pSource->v >> 30; 1060 1061 XMVECTORF32 vResult = { 1062 (ElementX == 0x200) ? -1.f : ((float)(int16_t)(ElementX | SignExtend[ElementX >> 9]) / 511.0f), 1063 (ElementY == 0x200) ? -1.f : ((float)(int16_t)(ElementY | SignExtend[ElementY >> 9]) / 511.0f), 1064 (ElementZ == 0x200) ? -1.f : ((float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]) / 511.0f), 1065 (ElementW == 0x2) ? -1.f : ((float)(int16_t)(ElementW | SignExtendW[(ElementW >> 1) & 1])) 1066 }; 1067 return vResult.v; 1068#elif defined(_XM_SSE_INTRINSICS_) 1069 static const XMVECTORF32 DecN4Mul = {1.0f/511.0f,1.0f/(511.0f*1024.0f),1.0f/(511.0f*1024.0f*1024.0f),1.0f/(1024.0f*1024.0f*1024.0f)}; 1070 // Splat the color in all four entries 1071 XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); 1072 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 1073 vTemp = _mm_and_ps(vTemp,g_XMMaskDec4); 1074 // a is unsigned! Flip the bit to convert the order to signed 1075 vTemp = _mm_xor_ps(vTemp,g_XMXorDec4); 1076 // Convert to floating point numbers 1077 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 1078 // RGB + 0, A + 0x80000000.f to undo the signed order. 1079 vTemp = _mm_add_ps(vTemp,g_XMAddDec4); 1080 // Convert 0-255 to 0.0f-1.0f 1081 vTemp = _mm_mul_ps(vTemp,DecN4Mul); 1082 // Clamp result (for case of -512/-1) 1083 return _mm_max_ps( vTemp, g_XMNegativeOne ); 1084#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 1085#endif // _XM_VMX128_INTRINSICS_ 1086} 1087 1088//------------------------------------------------------------------------------ 1089_Use_decl_annotations_ 1090inline XMVECTOR PackedVector::XMLoadDec4 1091( 1092 const XMDEC4* pSource 1093) 1094{ 1095 assert(pSource); 1096#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1097 static const uint32_t SignExtend[] = {0x00000000, 0xFFFFFC00}; 1098 static const uint32_t SignExtendW[] = {0x00000000, 0xFFFFFFFC}; 1099 1100 uint32_t ElementX = pSource->v & 0x3FF; 1101 uint32_t ElementY = (pSource->v >> 10) & 0x3FF; 1102 uint32_t ElementZ = (pSource->v >> 20) & 0x3FF; 1103 uint32_t ElementW = pSource->v >> 30; 1104 1105 XMVECTORF32 vResult = { 1106 (float)(int16_t)(ElementX | SignExtend[ElementX >> 9]), 1107 (float)(int16_t)(ElementY | SignExtend[ElementY >> 9]), 1108 (float)(int16_t)(ElementZ | SignExtend[ElementZ >> 9]), 1109 (float)(int16_t)(ElementW | SignExtendW[ElementW >> 1]) 1110 }; 1111 return vResult.v; 1112#elif defined(_XM_SSE_INTRINSICS_) 1113 // Splat the color in all four entries 1114 XMVECTOR vTemp = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); 1115 // Shift R&0xFF0000, G&0xFF00, B&0xFF, A&0xFF000000 1116 vTemp = _mm_and_ps(vTemp,g_XMMaskDec4); 1117 // a is unsigned! Flip the bit to convert the order to signed 1118 vTemp = _mm_xor_ps(vTemp,g_XMXorDec4); 1119 // Convert to floating point numbers 1120 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 1121 // RGB + 0, A + 0x80000000.f to undo the signed order. 1122 vTemp = _mm_add_ps(vTemp,g_XMAddDec4); 1123 // Convert 0-255 to 0.0f-1.0f 1124 vTemp = _mm_mul_ps(vTemp,g_XMMulDec4); 1125 return vTemp; 1126#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 1127#endif // _XM_VMX128_INTRINSICS_ 1128} 1129 1130//------------------------------------------------------------------------------ 1131_Use_decl_annotations_ 1132inline XMVECTOR PackedVector::XMLoadUByteN4 1133( 1134 const XMUBYTEN4* pSource 1135) 1136{ 1137 assert(pSource); 1138#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1139 XMVECTORF32 vResult = { 1140 (float)pSource->x / 255.0f, 1141 (float)pSource->y / 255.0f, 1142 (float)pSource->z / 255.0f, 1143 (float)pSource->w / 255.0f 1144 }; 1145 return vResult.v; 1146#elif defined(_XM_SSE_INTRINSICS_) 1147 static const XMVECTORF32 LoadUByteN4Mul = {1.0f/255.0f,1.0f/(255.0f*256.0f),1.0f/(255.0f*65536.0f),1.0f/(255.0f*65536.0f*256.0f)}; 1148 // Splat the color in all four entries (x,z,y,w) 1149 XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x)); 1150 // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 1151 vTemp = _mm_and_ps(vTemp,g_XMMaskByte4); 1152 // w is signed! Flip the bits to convert the order to unsigned 1153 vTemp = _mm_xor_ps(vTemp,g_XMFlipW); 1154 // Convert to floating point numbers 1155 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 1156 // w + 0x80 to complete the conversion 1157 vTemp = _mm_add_ps(vTemp,g_XMAddUDec4); 1158 // Fix y, z and w because they are too large 1159 vTemp = _mm_mul_ps(vTemp,LoadUByteN4Mul); 1160 return vTemp; 1161#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 1162#endif // _XM_VMX128_INTRINSICS_ 1163} 1164 1165//------------------------------------------------------------------------------ 1166_Use_decl_annotations_ 1167inline XMVECTOR PackedVector::XMLoadUByte4 1168( 1169 const XMUBYTE4* pSource 1170) 1171{ 1172 assert(pSource); 1173#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1174 XMVECTORF32 vResult = { 1175 (float)pSource->x, 1176 (float)pSource->y, 1177 (float)pSource->z, 1178 (float)pSource->w 1179 }; 1180 return vResult.v; 1181#elif defined(_XM_SSE_INTRINSICS_) 1182 static const XMVECTORF32 LoadUByte4Mul = {1.0f,1.0f/256.0f,1.0f/65536.0f,1.0f/(65536.0f*256.0f)}; 1183 // Splat the color in all four entries (x,z,y,w) 1184 XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x)); 1185 // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 1186 vTemp = _mm_and_ps(vTemp,g_XMMaskByte4); 1187 // w is signed! Flip the bits to convert the order to unsigned 1188 vTemp = _mm_xor_ps(vTemp,g_XMFlipW); 1189 // Convert to floating point numbers 1190 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 1191 // w + 0x80 to complete the conversion 1192 vTemp = _mm_add_ps(vTemp,g_XMAddUDec4); 1193 // Fix y, z and w because they are too large 1194 vTemp = _mm_mul_ps(vTemp,LoadUByte4Mul); 1195 return vTemp; 1196#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 1197#endif // _XM_VMX128_INTRINSICS_ 1198} 1199 1200//------------------------------------------------------------------------------ 1201_Use_decl_annotations_ 1202inline XMVECTOR PackedVector::XMLoadByteN4 1203( 1204 const XMBYTEN4* pSource 1205) 1206{ 1207 assert(pSource); 1208#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1209 XMVECTORF32 vResult = { 1210 (pSource->x == -128) ? -1.f : ((float)pSource->x / 127.0f), 1211 (pSource->y == -128) ? -1.f : ((float)pSource->y / 127.0f), 1212 (pSource->z == -128) ? -1.f : ((float)pSource->z / 127.0f), 1213 (pSource->w == -128) ? -1.f : ((float)pSource->w / 127.0f) 1214 }; 1215 return vResult.v; 1216#elif defined(_XM_SSE_INTRINSICS_) 1217 static const XMVECTORF32 LoadByteN4Mul = {1.0f/127.0f,1.0f/(127.0f*256.0f),1.0f/(127.0f*65536.0f),1.0f/(127.0f*65536.0f*256.0f)}; 1218 // Splat the color in all four entries (x,z,y,w) 1219 XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x)); 1220 // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 1221 vTemp = _mm_and_ps(vTemp,g_XMMaskByte4); 1222 // x,y and z are unsigned! Flip the bits to convert the order to signed 1223 vTemp = _mm_xor_ps(vTemp,g_XMXorByte4); 1224 // Convert to floating point numbers 1225 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 1226 // x, y and z - 0x80 to complete the conversion 1227 vTemp = _mm_add_ps(vTemp,g_XMAddByte4); 1228 // Fix y, z and w because they are too large 1229 vTemp = _mm_mul_ps(vTemp,LoadByteN4Mul); 1230 // Clamp result (for case of -128) 1231 return _mm_max_ps( vTemp, g_XMNegativeOne ); 1232#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 1233#endif // _XM_VMX128_INTRINSICS_ 1234} 1235 1236//------------------------------------------------------------------------------ 1237_Use_decl_annotations_ 1238inline XMVECTOR PackedVector::XMLoadByte4 1239( 1240 const XMBYTE4* pSource 1241) 1242{ 1243 assert(pSource); 1244#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1245 XMVECTORF32 vResult = { 1246 (float)pSource->x, 1247 (float)pSource->y, 1248 (float)pSource->z, 1249 (float)pSource->w 1250 }; 1251 return vResult.v; 1252#elif defined(_XM_SSE_INTRINSICS_) 1253 static const XMVECTORF32 LoadByte4Mul = {1.0f,1.0f/256.0f,1.0f/65536.0f,1.0f/(65536.0f*256.0f)}; 1254 // Splat the color in all four entries (x,z,y,w) 1255 XMVECTOR vTemp = _mm_load1_ps(reinterpret_cast<const float *>(&pSource->x)); 1256 // Mask x&0ff,y&0xff00,z&0xff0000,w&0xff000000 1257 vTemp = _mm_and_ps(vTemp,g_XMMaskByte4); 1258 // x,y and z are unsigned! Flip the bits to convert the order to signed 1259 vTemp = _mm_xor_ps(vTemp,g_XMXorByte4); 1260 // Convert to floating point numbers 1261 vTemp = _mm_cvtepi32_ps(_mm_castps_si128(vTemp)); 1262 // x, y and z - 0x80 to complete the conversion 1263 vTemp = _mm_add_ps(vTemp,g_XMAddByte4); 1264 // Fix y, z and w because they are too large 1265 vTemp = _mm_mul_ps(vTemp,LoadByte4Mul); 1266 return vTemp; 1267#elif defined(XM_NO_MISALIGNED_VECTOR_ACCESS) 1268#endif // _XM_VMX128_INTRINSICS_ 1269} 1270 1271//------------------------------------------------------------------------------ 1272_Use_decl_annotations_ 1273inline XMVECTOR PackedVector::XMLoadUNibble4 1274( 1275 const XMUNIBBLE4* pSource 1276) 1277{ 1278 assert(pSource); 1279#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) 1280 static const XMVECTORI32 UNibble4And = {0xF,0xF0,0xF00,0xF000}; 1281 static const XMVECTORF32 UNibble4Mul = {1.0f,1.0f/16.f,1.0f/256.f,1.0f/4096.f}; 1282 // Get the 32 bit value and splat it 1283 XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); 1284 // Mask off x, y and z 1285 vResult = _mm_and_ps(vResult,UNibble4And); 1286 // Convert to float 1287 vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); 1288 // Normalize x, y, and z 1289 vResult = _mm_mul_ps(vResult,UNibble4Mul); 1290 return vResult; 1291#else 1292 XMVECTORF32 vResult = { 1293 float(pSource->v & 0xF), 1294 float((pSource->v >> 4) & 0xF), 1295 float((pSource->v >> 8) & 0xF), 1296 float((pSource->v >> 12) & 0xF) 1297 }; 1298 return vResult.v; 1299#endif // !_XM_SSE_INTRISICS_ 1300} 1301 1302//------------------------------------------------------------------------------ 1303_Use_decl_annotations_ 1304inline XMVECTOR PackedVector::XMLoadU555 1305( 1306 const XMU555* pSource 1307) 1308{ 1309 assert(pSource); 1310#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) 1311 static const XMVECTORI32 U555And = {0x1F,0x1F<<5,0x1F<<10,0x8000}; 1312 static const XMVECTORF32 U555Mul = {1.0f,1.0f/32.f,1.0f/1024.f,1.0f/32768.f}; 1313 // Get the 32 bit value and splat it 1314 XMVECTOR vResult = _mm_load_ps1(reinterpret_cast<const float *>(&pSource->v)); 1315 // Mask off x, y and z 1316 vResult = _mm_and_ps(vResult,U555And); 1317 // Convert to float 1318 vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult)); 1319 // Normalize x, y, and z 1320 vResult = _mm_mul_ps(vResult,U555Mul); 1321 return vResult; 1322#else 1323 XMVECTORF32 vResult = { 1324 float(pSource->v & 0x1F), 1325 float((pSource->v >> 5) & 0x1F), 1326 float((pSource->v >> 10) & 0x1F), 1327 float((pSource->v >> 15) & 0x1) 1328 }; 1329 return vResult.v; 1330#endif // !_XM_SSE_INTRISICS_ 1331} 1332 1333 1334/**************************************************************************** 1335 * 1336 * Vector and matrix store operations 1337 * 1338 ****************************************************************************/ 1339_Use_decl_annotations_ 1340inline void PackedVector::XMStoreColor 1341( 1342 XMCOLOR* pDestination, 1343 FXMVECTOR V 1344) 1345{ 1346 assert(pDestination); 1347#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1348 1349 static const XMVECTORF32 Scale = {255.0f, 255.0f, 255.0f, 255.0f}; 1350 1351 XMVECTOR N = XMVectorSaturate(V); 1352 N = XMVectorMultiply(N, Scale.v); 1353 N = XMVectorRound(N); 1354 1355 XMFLOAT4A tmp; 1356 XMStoreFloat4A( &tmp, N ); 1357 1358 pDestination->c = ((uint32_t)tmp.w << 24) | 1359 ((uint32_t)tmp.x << 16) | 1360 ((uint32_t)tmp.y << 8) | 1361 ((uint32_t)tmp.z); 1362 1363#elif defined(_XM_SSE_INTRINSICS_) 1364 static const XMVECTORF32 Scale = {255.0f,255.0f,255.0f,255.0f}; 1365 // Set <0 to 0 1366 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 1367 // Set>1 to 1 1368 vResult = _mm_min_ps(vResult,g_XMOne); 1369 // Convert to 0-255 1370 vResult = _mm_mul_ps(vResult,Scale); 1371 // Shuffle RGBA to ARGB 1372 vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,0,1,2)); 1373 // Convert to int 1374 __m128i vInt = _mm_cvtps_epi32(vResult); 1375 // Mash to shorts 1376 vInt = _mm_packs_epi32(vInt,vInt); 1377 // Mash to bytes 1378 vInt = _mm_packus_epi16(vInt,vInt); 1379 // Store the color 1380 _mm_store_ss(reinterpret_cast<float *>(&pDestination->c),reinterpret_cast<__m128 *>(&vInt)[0]); 1381#else // _XM_VMX128_INTRINSICS_ 1382#endif // _XM_VMX128_INTRINSICS_ 1383} 1384 1385//------------------------------------------------------------------------------ 1386_Use_decl_annotations_ 1387inline void PackedVector::XMStoreHalf2 1388( 1389 XMHALF2* pDestination, 1390 FXMVECTOR V 1391) 1392{ 1393 assert(pDestination); 1394#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1395 1396 pDestination->x = XMConvertFloatToHalf(XMVectorGetX(V)); 1397 pDestination->y = XMConvertFloatToHalf(XMVectorGetY(V)); 1398 1399#else // _XM_VMX128_INTRINSICS_ 1400#endif // _XM_VMX128_INTRINSICS_ 1401} 1402 1403//------------------------------------------------------------------------------ 1404_Use_decl_annotations_ 1405inline void PackedVector::XMStoreShortN2 1406( 1407 XMSHORTN2* pDestination, 1408 FXMVECTOR V 1409) 1410{ 1411 assert(pDestination); 1412#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1413 1414 static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; 1415 1416 XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); 1417 N = XMVectorMultiply(N, Scale.v); 1418 N = XMVectorRound(N); 1419 1420 XMFLOAT4A tmp; 1421 XMStoreFloat4A( &tmp, N ); 1422 1423 pDestination->x = (int16_t)tmp.x; 1424 pDestination->y = (int16_t)tmp.y; 1425 1426#elif defined(_XM_SSE_INTRINSICS_) 1427 static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; 1428 1429 XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne); 1430 vResult = _mm_min_ps(vResult,g_XMOne); 1431 vResult = _mm_mul_ps(vResult,Scale); 1432 __m128i vResulti = _mm_cvtps_epi32(vResult); 1433 vResulti = _mm_packs_epi32(vResulti,vResulti); 1434 _mm_store_ss(reinterpret_cast<float *>(&pDestination->x),_mm_castsi128_ps(vResulti)); 1435#else // _XM_VMX128_INTRINSICS_ 1436#endif // _XM_VMX128_INTRINSICS_ 1437} 1438 1439//------------------------------------------------------------------------------ 1440_Use_decl_annotations_ 1441inline void PackedVector::XMStoreShort2 1442( 1443 XMSHORT2* pDestination, 1444 FXMVECTOR V 1445) 1446{ 1447 assert(pDestination); 1448#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1449 1450 static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f}; 1451 static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; 1452 1453 XMVECTOR N = XMVectorClamp(V, Min, Max); 1454 N = XMVectorRound(N); 1455 1456 XMFLOAT4A tmp; 1457 XMStoreFloat4A( &tmp, N ); 1458 1459 pDestination->x = (int16_t)tmp.x; 1460 pDestination->y = (int16_t)tmp.y; 1461 1462#elif defined(_XM_SSE_INTRINSICS_) 1463 static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f}; 1464 static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; 1465 // Bounds check 1466 XMVECTOR vResult = _mm_max_ps(V,Min); 1467 vResult = _mm_min_ps(vResult,Max); 1468 // Convert to int with rounding 1469 __m128i vInt = _mm_cvtps_epi32(vResult); 1470 // Pack the ints into shorts 1471 vInt = _mm_packs_epi32(vInt,vInt); 1472 _mm_store_ss(reinterpret_cast<float *>(&pDestination->x),_mm_castsi128_ps(vInt)); 1473#else // _XM_VMX128_INTRINSICS_ 1474#endif // _XM_VMX128_INTRINSICS_ 1475} 1476 1477//------------------------------------------------------------------------------ 1478_Use_decl_annotations_ 1479inline void PackedVector::XMStoreUShortN2 1480( 1481 XMUSHORTN2* pDestination, 1482 FXMVECTOR V 1483) 1484{ 1485 assert(pDestination); 1486#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1487 1488 static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; 1489 1490 XMVECTOR N = XMVectorSaturate(V); 1491 N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v); 1492 N = XMVectorTruncate(N); 1493 1494 XMFLOAT4A tmp; 1495 XMStoreFloat4A( &tmp, N ); 1496 1497 pDestination->x = (int16_t)tmp.x; 1498 pDestination->y = (int16_t)tmp.y; 1499 1500#elif defined(_XM_SSE_INTRINSICS_) 1501 static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; 1502 // Bounds check 1503 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 1504 vResult = _mm_min_ps(vResult,g_XMOne); 1505 vResult = _mm_mul_ps(vResult,Scale); 1506 // Convert to int with rounding 1507 __m128i vInt = _mm_cvtps_epi32(vResult); 1508 // Since the SSE pack instruction clamps using signed rules, 1509 // manually extract the values to store them to memory 1510 pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0)); 1511 pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2)); 1512#else // _XM_VMX128_INTRINSICS_ 1513#endif // _XM_VMX128_INTRINSICS_ 1514} 1515 1516//------------------------------------------------------------------------------ 1517_Use_decl_annotations_ 1518inline void PackedVector::XMStoreUShort2 1519( 1520 XMUSHORT2* pDestination, 1521 FXMVECTOR V 1522) 1523{ 1524 assert(pDestination); 1525#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1526 1527 static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; 1528 1529 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max); 1530 N = XMVectorRound(N); 1531 1532 XMFLOAT4A tmp; 1533 XMStoreFloat4A( &tmp, N ); 1534 1535 pDestination->x = (int16_t)tmp.x; 1536 pDestination->y = (int16_t)tmp.y; 1537 1538#elif defined(_XM_SSE_INTRINSICS_) 1539 static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; 1540 // Bounds check 1541 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 1542 vResult = _mm_min_ps(vResult,Max); 1543 // Convert to int with rounding 1544 __m128i vInt = _mm_cvtps_epi32(vResult); 1545 // Since the SSE pack instruction clamps using signed rules, 1546 // manually extract the values to store them to memory 1547 pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0)); 1548 pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2)); 1549#else // _XM_VMX128_INTRINSICS_ 1550#endif // _XM_VMX128_INTRINSICS_ 1551} 1552 1553//------------------------------------------------------------------------------ 1554_Use_decl_annotations_ 1555inline void PackedVector::XMStoreByteN2 1556( 1557 XMBYTEN2* pDestination, 1558 FXMVECTOR V 1559) 1560{ 1561 assert(pDestination); 1562 1563 static const XMVECTORF32 Scale = {127.0f, 127.0f, 127.0f, 127.0f}; 1564 1565 XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); 1566 N = XMVectorMultiply(N, Scale.v); 1567 N = XMVectorRound(N); 1568 1569 XMFLOAT4A tmp; 1570 XMStoreFloat4A( &tmp, N ); 1571 1572 pDestination->x = (int8_t)tmp.x; 1573 pDestination->y = (int8_t)tmp.y; 1574} 1575 1576//------------------------------------------------------------------------------ 1577_Use_decl_annotations_ 1578inline void PackedVector::XMStoreByte2 1579( 1580 XMBYTE2* pDestination, 1581 FXMVECTOR V 1582) 1583{ 1584 assert(pDestination); 1585 1586 static const XMVECTORF32 Min = {-127.0f, -127.0f, -127.0f, -127.0f}; 1587 static const XMVECTORF32 Max = {127.0f, 127.0f, 127.0f, 127.0f}; 1588 1589 XMVECTOR N = XMVectorClamp(V, Min, Max); 1590 N = XMVectorRound(N); 1591 1592 XMFLOAT4A tmp; 1593 XMStoreFloat4A( &tmp, N ); 1594 1595 pDestination->x = (int8_t)tmp.x; 1596 pDestination->y = (int8_t)tmp.y; 1597} 1598 1599//------------------------------------------------------------------------------ 1600_Use_decl_annotations_ 1601inline void PackedVector::XMStoreUByteN2 1602( 1603 XMUBYTEN2* pDestination, 1604 FXMVECTOR V 1605) 1606{ 1607 assert(pDestination); 1608 1609 static const XMVECTORF32 Scale = {255.0f, 255.0f, 255.0f, 255.0f}; 1610 1611 XMVECTOR N = XMVectorSaturate(V); 1612 N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v); 1613 N = XMVectorTruncate(N); 1614 1615 XMFLOAT4A tmp; 1616 XMStoreFloat4A( &tmp, N ); 1617 1618 pDestination->x = (uint8_t)tmp.x; 1619 pDestination->y = (uint8_t)tmp.y; 1620} 1621 1622//------------------------------------------------------------------------------ 1623_Use_decl_annotations_ 1624inline void PackedVector::XMStoreUByte2 1625( 1626 XMUBYTE2* pDestination, 1627 FXMVECTOR V 1628) 1629{ 1630 assert(pDestination); 1631 1632 static const XMVECTORF32 Max = {255.0f, 255.0f, 255.0f, 255.0f}; 1633 1634 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max); 1635 N = XMVectorRound(N); 1636 1637 XMFLOAT4A tmp; 1638 XMStoreFloat4A( &tmp, N ); 1639 1640 pDestination->x = (uint8_t)tmp.x; 1641 pDestination->y = (uint8_t)tmp.y; 1642} 1643 1644//------------------------------------------------------------------------------ 1645_Use_decl_annotations_ 1646inline void PackedVector::XMStoreU565 1647( 1648 XMU565* pDestination, 1649 FXMVECTOR V 1650) 1651{ 1652 assert(pDestination); 1653#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) 1654 static const XMVECTORF32 Max = {31.0f, 63.0f, 31.0f, 0.0f}; 1655 // Bounds check 1656 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 1657 vResult = _mm_min_ps(vResult,Max); 1658 // Convert to int with rounding 1659 __m128i vInt = _mm_cvtps_epi32(vResult); 1660 // No SSE operations will write to 16-bit values, so we have to extract them manually 1661 uint16_t x = static_cast<uint16_t>(_mm_extract_epi16(vInt,0)); 1662 uint16_t y = static_cast<uint16_t>(_mm_extract_epi16(vInt,2)); 1663 uint16_t z = static_cast<uint16_t>(_mm_extract_epi16(vInt,4)); 1664 pDestination->v = ((z & 0x1F) << 11) | 1665 ((y & 0x3F) << 5) | 1666 ((x & 0x1F)); 1667#else 1668 static const XMVECTORF32 Max = {31.0f, 63.0f, 31.0f, 0.0f}; 1669 1670 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v); 1671 N = XMVectorRound(N); 1672 1673 XMFLOAT4A tmp; 1674 XMStoreFloat4A( &tmp, N ); 1675 1676 pDestination->v = (((uint16_t)tmp.z & 0x1F) << 11) | 1677 (((uint16_t)tmp.y & 0x3F) << 5) | 1678 (((uint16_t)tmp.x & 0x1F)); 1679#endif !_XM_SSE_INTRINSICS_ 1680} 1681 1682//------------------------------------------------------------------------------ 1683_Use_decl_annotations_ 1684inline void PackedVector::XMStoreFloat3PK 1685( 1686 XMFLOAT3PK* pDestination, 1687 FXMVECTOR V 1688) 1689{ 1690 assert(pDestination); 1691 1692 __declspec(align(16)) uint32_t IValue[4]; 1693 XMStoreFloat3A( reinterpret_cast<XMFLOAT3A*>(&IValue), V ); 1694 1695 uint32_t Result[3]; 1696 1697 // X & Y Channels (5-bit exponent, 6-bit mantissa) 1698 for(uint32_t j=0; j < 2; ++j) 1699 { 1700 uint32_t Sign = IValue[j] & 0x80000000; 1701 uint32_t I = IValue[j] & 0x7FFFFFFF; 1702 1703 if ((I & 0x7F800000) == 0x7F800000) 1704 { 1705 // INF or NAN 1706 Result[j] = 0x7c0; 1707 if (( I & 0x7FFFFF ) != 0) 1708 { 1709 Result[j] = 0x7c0 | (((I>>17)|(I>11)|(I>>6)|(I))&0x3f); 1710 } 1711 else if ( Sign ) 1712 { 1713 // -INF is clamped to 0 since 3PK is positive only 1714 Result[j] = 0; 1715 } 1716 } 1717 else if ( Sign ) 1718 { 1719 // 3PK is positive only, so clamp to zero 1720 Result[j] = 0; 1721 } 1722 else if (I > 0x477E0000U) 1723 { 1724 // The number is too large to be represented as a float11, set to max 1725 Result[j] = 0x7BF; 1726 } 1727 else 1728 { 1729 if (I < 0x38800000U) 1730 { 1731 // The number is too small to be represented as a normalized float11 1732 // Convert it to a denormalized value. 1733 uint32_t Shift = 113U - (I >> 23U); 1734 I = (0x800000U | (I & 0x7FFFFFU)) >> Shift; 1735 } 1736 else 1737 { 1738 // Rebias the exponent to represent the value as a normalized float11 1739 I += 0xC8000000U; 1740 } 1741 1742 Result[j] = ((I + 0xFFFFU + ((I >> 17U) & 1U)) >> 17U)&0x7ffU; 1743 } 1744 } 1745 1746 // Z Channel (5-bit exponent, 5-bit mantissa) 1747 uint32_t Sign = IValue[2] & 0x80000000; 1748 uint32_t I = IValue[2] & 0x7FFFFFFF; 1749 1750 if ((I & 0x7F800000) == 0x7F800000) 1751 { 1752 // INF or NAN 1753 Result[2] = 0x3e0; 1754 if ( I & 0x7FFFFF ) 1755 { 1756 Result[2] = 0x3e0 | (((I>>18)|(I>13)|(I>>3)|(I))&0x1f); 1757 } 1758 else if ( Sign ) 1759 { 1760 // -INF is clamped to 0 since 3PK is positive only 1761 Result[2] = 0; 1762 } 1763 } 1764 else if ( Sign ) 1765 { 1766 // 3PK is positive only, so clamp to zero 1767 Result[2] = 0; 1768 } 1769 else if (I > 0x477C0000U) 1770 { 1771 // The number is too large to be represented as a float10, set to max 1772 Result[2] = 0x3df; 1773 } 1774 else 1775 { 1776 if (I < 0x38800000U) 1777 { 1778 // The number is too small to be represented as a normalized float10 1779 // Convert it to a denormalized value. 1780 uint32_t Shift = 113U - (I >> 23U); 1781 I = (0x800000U | (I & 0x7FFFFFU)) >> Shift; 1782 } 1783 else 1784 { 1785 // Rebias the exponent to represent the value as a normalized float10 1786 I += 0xC8000000U; 1787 } 1788 1789 Result[2] = ((I + 0x1FFFFU + ((I >> 18U) & 1U)) >> 18U)&0x3ffU; 1790 } 1791 1792 // Pack Result into memory 1793 pDestination->v = (Result[0] & 0x7ff) 1794 | ( (Result[1] & 0x7ff) << 11 ) 1795 | ( (Result[2] & 0x3ff) << 22 ); 1796} 1797 1798//------------------------------------------------------------------------------ 1799_Use_decl_annotations_ 1800inline void PackedVector::XMStoreFloat3SE 1801( 1802 XMFLOAT3SE* pDestination, 1803 FXMVECTOR V 1804) 1805{ 1806 assert(pDestination); 1807 1808 __declspec(align(16)) uint32_t IValue[4]; 1809 XMStoreFloat3A( reinterpret_cast<XMFLOAT3A*>(&IValue), V ); 1810 1811 uint32_t Exp[3]; 1812 uint32_t Frac[3]; 1813 1814 // X, Y, Z Channels (5-bit exponent, 9-bit mantissa) 1815 for(uint32_t j=0; j < 3; ++j) 1816 { 1817 uint32_t Sign = IValue[j] & 0x80000000; 1818 uint32_t I = IValue[j] & 0x7FFFFFFF; 1819 1820 if ((I & 0x7F800000) == 0x7F800000) 1821 { 1822 // INF or NAN 1823 Exp[j] = 0x1f; 1824 if (( I & 0x7FFFFF ) != 0) 1825 { 1826 Frac[j] = ((I>>14)|(I>5)|(I))&0x1ff; 1827 } 1828 else if ( Sign ) 1829 { 1830 // -INF is clamped to 0 since 3SE is positive only 1831 Exp[j] = Frac[j] = 0; 1832 } 1833 } 1834 else if ( Sign ) 1835 { 1836 // 3SE is positive only, so clamp to zero 1837 Exp[j] = Frac[j] = 0; 1838 } 1839 else if (I > 0x477FC000U) 1840 { 1841 // The number is too large, set to max 1842 Exp[j] = 0x1e; 1843 Frac[j] = 0x1ff; 1844 } 1845 else 1846 { 1847 if (I < 0x38800000U) 1848 { 1849 // The number is too small to be represented as a normalized float11 1850 // Convert it to a denormalized value. 1851 uint32_t Shift = 113U - (I >> 23U); 1852 I = (0x800000U | (I & 0x7FFFFFU)) >> Shift; 1853 } 1854 else 1855 { 1856 // Rebias the exponent to represent the value as a normalized float11 1857 I += 0xC8000000U; 1858 } 1859 1860 uint32_t T = ((I + 0x1FFFU + ((I >> 14U) & 1U)) >> 14U)&0x3fffU; 1861 1862 Exp[j] = (T & 0x3E00) >> 9; 1863 Frac[j] = T & 0x1ff; 1864 } 1865 } 1866 1867 // Adjust to a shared exponent 1868 uint32_t T = XMMax( Exp[0], XMMax( Exp[1], Exp[2] ) ); 1869 1870 Frac[0] = Frac[0] >> (T - Exp[0]); 1871 Frac[1] = Frac[1] >> (T - Exp[1]); 1872 Frac[2] = Frac[2] >> (T - Exp[2]); 1873 1874 // Store packed into memory 1875 pDestination->xm = Frac[0]; 1876 pDestination->ym = Frac[1]; 1877 pDestination->zm = Frac[2]; 1878 pDestination->e = T; 1879} 1880 1881//------------------------------------------------------------------------------ 1882_Use_decl_annotations_ 1883inline void PackedVector::XMStoreHalf4 1884( 1885 XMHALF4* pDestination, 1886 FXMVECTOR V 1887) 1888{ 1889 assert(pDestination); 1890#if defined(_XM_NO_INTRINSICS_) || defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 1891 1892 XMFLOAT4A t; 1893 XMStoreFloat4A(&t, V ); 1894 1895 pDestination->x = XMConvertFloatToHalf(t.x); 1896 pDestination->y = XMConvertFloatToHalf(t.y); 1897 pDestination->z = XMConvertFloatToHalf(t.z); 1898 pDestination->w = XMConvertFloatToHalf(t.w); 1899 1900#else // _XM_VMX128_INTRINSICS_ 1901#endif // _XM_VMX128_INTRINSICS_ 1902} 1903 1904//------------------------------------------------------------------------------ 1905_Use_decl_annotations_ 1906inline void PackedVector::XMStoreShortN4 1907( 1908 XMSHORTN4* pDestination, 1909 FXMVECTOR V 1910) 1911{ 1912 assert(pDestination); 1913#if defined(_XM_NO_INTRINSICS_) 1914 1915 static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; 1916 1917 XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); 1918 N = XMVectorMultiply(N, Scale.v); 1919 N = XMVectorRound(N); 1920 1921 XMFLOAT4A tmp; 1922 XMStoreFloat4A(&tmp, N ); 1923 1924 pDestination->x = (int16_t)tmp.x; 1925 pDestination->y = (int16_t)tmp.y; 1926 pDestination->z = (int16_t)tmp.z; 1927 pDestination->w = (int16_t)tmp.w; 1928 1929#elif defined(_XM_ARM_NEON_INTRINSICS_) 1930 __n128 vResult = vmaxq_f32( V, g_XMNegativeOne ); 1931 vResult = vminq_f32( vResult, g_XMOne ); 1932 const __n128 Scale = vdupq_n_f32( 32767.0f ); 1933 vResult = vmulq_f32( vResult, Scale ); 1934 vResult = vcvtq_s32_f32( vResult ); 1935 __n64 vInt = vmovn_s32( vResult ); 1936 vst1_s16( (int16_t*)pDestination, vInt ); 1937#elif defined(_XM_SSE_INTRINSICS_) 1938 static const XMVECTORF32 Scale = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; 1939 1940 XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne); 1941 vResult = _mm_min_ps(vResult,g_XMOne); 1942 vResult = _mm_mul_ps(vResult,Scale); 1943 __m128i vResulti = _mm_cvtps_epi32(vResult); 1944 vResulti = _mm_packs_epi32(vResulti,vResulti); 1945 _mm_store_sd(reinterpret_cast<double *>(&pDestination->x),_mm_castsi128_pd(vResulti)); 1946#else // _XM_VMX128_INTRINSICS_ 1947#endif // _XM_VMX128_INTRINSICS_ 1948} 1949 1950//------------------------------------------------------------------------------ 1951_Use_decl_annotations_ 1952inline void PackedVector::XMStoreShort4 1953( 1954 XMSHORT4* pDestination, 1955 FXMVECTOR V 1956) 1957{ 1958 assert(pDestination); 1959#if defined(_XM_NO_INTRINSICS_) 1960 1961 static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f}; 1962 static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; 1963 1964 XMVECTOR N = XMVectorClamp(V, Min, Max); 1965 N = XMVectorRound(N); 1966 1967 XMFLOAT4A tmp; 1968 XMStoreFloat4A(&tmp, N ); 1969 1970 pDestination->x = (int16_t)tmp.x; 1971 pDestination->y = (int16_t)tmp.y; 1972 pDestination->z = (int16_t)tmp.z; 1973 pDestination->w = (int16_t)tmp.w; 1974 1975#elif defined(_XM_ARM_NEON_INTRINSICS_) 1976 static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f}; 1977 static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; 1978 1979 __n128 vResult = vmaxq_f32( V, Min ); 1980 vResult = vminq_f32( vResult, Max ); 1981 vResult = vcvtq_s32_f32( vResult ); 1982 __n64 vInt = vmovn_s32( vResult ); 1983 vst1_s16( (int16_t*)pDestination, vInt ); 1984#elif defined(_XM_SSE_INTRINSICS_) 1985 static const XMVECTORF32 Min = {-32767.0f, -32767.0f, -32767.0f, -32767.0f}; 1986 static const XMVECTORF32 Max = {32767.0f, 32767.0f, 32767.0f, 32767.0f}; 1987 // Bounds check 1988 XMVECTOR vResult = _mm_max_ps(V,Min); 1989 vResult = _mm_min_ps(vResult,Max); 1990 // Convert to int with rounding 1991 __m128i vInt = _mm_cvtps_epi32(vResult); 1992 // Pack the ints into shorts 1993 vInt = _mm_packs_epi32(vInt,vInt); 1994 _mm_store_sd(reinterpret_cast<double *>(&pDestination->x),_mm_castsi128_pd(vInt)); 1995#else // _XM_VMX128_INTRINSICS_ 1996#endif // _XM_VMX128_INTRINSICS_ 1997} 1998 1999//------------------------------------------------------------------------------ 2000_Use_decl_annotations_ 2001inline void PackedVector::XMStoreUShortN4 2002( 2003 XMUSHORTN4* pDestination, 2004 FXMVECTOR V 2005) 2006{ 2007 assert(pDestination); 2008#if defined(_XM_NO_INTRINSICS_) 2009 2010 static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; 2011 2012 XMVECTOR N = XMVectorSaturate(V); 2013 N = XMVectorMultiplyAdd(N, Scale.v, g_XMOneHalf.v); 2014 N = XMVectorTruncate(N); 2015 2016 XMFLOAT4A tmp; 2017 XMStoreFloat4A(&tmp, N ); 2018 2019 pDestination->x = (int16_t)tmp.x; 2020 pDestination->y = (int16_t)tmp.y; 2021 pDestination->z = (int16_t)tmp.z; 2022 pDestination->w = (int16_t)tmp.w; 2023 2024#elif defined(_XM_ARM_NEON_INTRINSICS_) 2025 __n128 vResult = vmaxq_f32( V, g_XMZero ); 2026 vResult = vminq_f32( vResult, g_XMOne ); 2027 const __n128 Scale = vdupq_n_f32( 65535.0f ); 2028 vResult = vmulq_f32( vResult, Scale ); 2029 vResult = vcvtq_u32_f32( vResult ); 2030 __n64 vInt = vmovn_u32( vResult ); 2031 vst1_u16( (uint16_t*)pDestination, vInt ); 2032#elif defined(_XM_SSE_INTRINSICS_) 2033 static const XMVECTORF32 Scale = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; 2034 // Bounds check 2035 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 2036 vResult = _mm_min_ps(vResult,g_XMOne); 2037 vResult = _mm_mul_ps(vResult,Scale); 2038 // Convert to int with rounding 2039 __m128i vInt = _mm_cvtps_epi32(vResult); 2040 // Since the SSE pack instruction clamps using signed rules, 2041 // manually extract the values to store them to memory 2042 pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0)); 2043 pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2)); 2044 pDestination->z = static_cast<int16_t>(_mm_extract_epi16(vInt,4)); 2045 pDestination->w = static_cast<int16_t>(_mm_extract_epi16(vInt,6)); 2046#else // _XM_VMX128_INTRINSICS_ 2047#endif // _XM_VMX128_INTRINSICS_ 2048} 2049 2050//------------------------------------------------------------------------------ 2051_Use_decl_annotations_ 2052inline void PackedVector::XMStoreUShort4 2053( 2054 XMUSHORT4* pDestination, 2055 FXMVECTOR V 2056) 2057{ 2058 assert(pDestination); 2059#if defined(_XM_NO_INTRINSICS_) 2060 2061 static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; 2062 2063 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max); 2064 N = XMVectorRound(N); 2065 2066 XMFLOAT4A tmp; 2067 XMStoreFloat4A(&tmp, N ); 2068 2069 pDestination->x = (int16_t)tmp.x; 2070 pDestination->y = (int16_t)tmp.y; 2071 pDestination->z = (int16_t)tmp.z; 2072 pDestination->w = (int16_t)tmp.w; 2073 2074#elif defined(_XM_ARM_NEON_INTRINSICS_) 2075 static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; 2076 2077 __n128 vResult = vmaxq_f32( V, g_XMZero ); 2078 vResult = vminq_f32( vResult, Max ); 2079 vResult = vcvtq_u32_f32( vResult ); 2080 __n64 vInt = vmovn_u32( vResult ); 2081 vst1_u16( (uint16_t*)pDestination, vInt ); 2082#elif defined(_XM_SSE_INTRINSICS_) 2083 static const XMVECTORF32 Max = {65535.0f, 65535.0f, 65535.0f, 65535.0f}; 2084 // Bounds check 2085 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 2086 vResult = _mm_min_ps(vResult,Max); 2087 // Convert to int with rounding 2088 __m128i vInt = _mm_cvtps_epi32(vResult); 2089 // Since the SSE pack instruction clamps using signed rules, 2090 // manually extract the values to store them to memory 2091 pDestination->x = static_cast<int16_t>(_mm_extract_epi16(vInt,0)); 2092 pDestination->y = static_cast<int16_t>(_mm_extract_epi16(vInt,2)); 2093 pDestination->z = static_cast<int16_t>(_mm_extract_epi16(vInt,4)); 2094 pDestination->w = static_cast<int16_t>(_mm_extract_epi16(vInt,6)); 2095#else // _XM_VMX128_INTRINSICS_ 2096#endif // _XM_VMX128_INTRINSICS_ 2097} 2098 2099//------------------------------------------------------------------------------ 2100_Use_decl_annotations_ 2101inline void PackedVector::XMStoreXDecN4 2102( 2103 XMXDECN4* pDestination, 2104 FXMVECTOR V 2105) 2106{ 2107 assert(pDestination); 2108#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2109 2110 static const XMVECTORF32 Min = {-1.0f, -1.0f, -1.0f, 0.0f}; 2111 static const XMVECTORF32 Scale = {511.0f, 511.0f, 511.0f, 3.0f}; 2112 2113 XMVECTOR N = XMVectorClamp(V, Min.v, g_XMOne.v); 2114 N = XMVectorMultiply(N, Scale.v); 2115 N = XMVectorRound(N); 2116 2117 XMFLOAT4A tmp; 2118 XMStoreFloat4A(&tmp, N ); 2119 2120 pDestination->v = ((uint32_t)tmp.w << 30) | 2121 (((int32_t)tmp.z & 0x3FF) << 20) | 2122 (((int32_t)tmp.y & 0x3FF) << 10) | 2123 (((int32_t)tmp.x & 0x3FF)); 2124 2125#elif defined(_XM_SSE_INTRINSICS_) 2126 static const XMVECTORF32 Min = {-1.0f, -1.0f, -1.0f, 0.0f}; 2127 static const XMVECTORF32 Scale = {511.0f, 511.0f*1024.0f, 511.0f*1048576.0f,3.0f*536870912.0f}; 2128 static const XMVECTORI32 ScaleMask = {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<29}; 2129 XMVECTOR vResult = _mm_max_ps(V,Min); 2130 vResult = _mm_min_ps(vResult,g_XMOne); 2131 // Scale by multiplication 2132 vResult = _mm_mul_ps(vResult,Scale); 2133 // Convert to int (W is unsigned) 2134 __m128i vResulti = _mm_cvtps_epi32(vResult); 2135 // Mask off any fraction 2136 vResulti = _mm_and_si128(vResulti,ScaleMask); 2137 // To fix W, add itself to shift it up to <<30 instead of <<29 2138 __m128i vResultw = _mm_and_si128(vResulti,g_XMMaskW); 2139 vResulti = _mm_add_epi32(vResulti,vResultw); 2140 // Do a horizontal or of all 4 entries 2141 vResult = XM_PERMUTE_PS(_mm_castsi128_ps(vResulti),_MM_SHUFFLE(0,3,2,1)); 2142 vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult)); 2143 vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,3,2,1)); 2144 vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult)); 2145 vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(0,3,2,1)); 2146 vResulti = _mm_or_si128(vResulti,_mm_castps_si128(vResult)); 2147 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti)); 2148#else // _XM_VMX128_INTRINSICS_ 2149#endif // _XM_VMX128_INTRINSICS_ 2150} 2151 2152//------------------------------------------------------------------------------ 2153_Use_decl_annotations_ 2154inline void PackedVector::XMStoreXDec4 2155( 2156 XMXDEC4* pDestination, 2157 FXMVECTOR V 2158) 2159{ 2160 assert(pDestination); 2161#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2162 2163 static const XMVECTORF32 Min = {-511.0f, -511.0f, -511.0f, 0.0f}; 2164 static const XMVECTORF32 Max = {511.0f, 511.0f, 511.0f, 3.0f}; 2165 2166 XMVECTOR N = XMVectorClamp(V, Min, Max); 2167 2168 XMFLOAT4A tmp; 2169 XMStoreFloat4A(&tmp, N ); 2170 2171 pDestination->v = ((uint32_t)tmp.w << 30) | 2172 (((int32_t)tmp.z & 0x3FF) << 20) | 2173 (((int32_t)tmp.y & 0x3FF) << 10) | 2174 (((int32_t)tmp.x & 0x3FF)); 2175 2176#elif defined(_XM_SSE_INTRINSICS_) 2177 static const XMVECTORF32 MinXDec4 = {-511.0f,-511.0f,-511.0f, 0.0f}; 2178 static const XMVECTORF32 MaxXDec4 = { 511.0f, 511.0f, 511.0f, 3.0f}; 2179 static const XMVECTORF32 ScaleXDec4 = {1.0f,1024.0f/2.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f/2.0f}; 2180 static const XMVECTORI32 MaskXDec4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)}; 2181 // Clamp to bounds 2182 XMVECTOR vResult = _mm_max_ps(V,MinXDec4); 2183 vResult = _mm_min_ps(vResult,MaxXDec4); 2184 // Scale by multiplication 2185 vResult = _mm_mul_ps(vResult,ScaleXDec4); 2186 // Convert to int 2187 __m128i vResulti = _mm_cvttps_epi32(vResult); 2188 // Mask off any fraction 2189 vResulti = _mm_and_si128(vResulti,MaskXDec4); 2190 // Do a horizontal or of 4 entries 2191 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); 2192 // x = x|z, y = y|w 2193 vResulti = _mm_or_si128(vResulti,vResulti2); 2194 // Move Z to the x position 2195 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); 2196 // Perform a single bit left shift on y|w 2197 vResulti2 = _mm_add_epi32(vResulti2,vResulti2); 2198 // i = x|y|z|w 2199 vResulti = _mm_or_si128(vResulti,vResulti2); 2200 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti)); 2201#else // _XM_VMX128_INTRINSICS_ 2202#endif // _XM_VMX128_INTRINSICS_ 2203} 2204 2205//------------------------------------------------------------------------------ 2206_Use_decl_annotations_ 2207inline void PackedVector::XMStoreUDecN4 2208( 2209 XMUDECN4* pDestination, 2210 FXMVECTOR V 2211) 2212{ 2213 assert(pDestination); 2214#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2215 2216 static const XMVECTORF32 Scale = {1023.0f, 1023.0f, 1023.0f, 3.0f}; 2217 2218 XMVECTOR N = XMVectorSaturate(V); 2219 N = XMVectorMultiply(N, Scale.v); 2220 2221 XMFLOAT4A tmp; 2222 XMStoreFloat4A(&tmp, N ); 2223 2224 pDestination->v = ((uint32_t)tmp.w << 30) | 2225 (((uint32_t)tmp.z & 0x3FF) << 20) | 2226 (((uint32_t)tmp.y & 0x3FF) << 10) | 2227 (((uint32_t)tmp.x & 0x3FF)); 2228 2229#elif defined(_XM_SSE_INTRINSICS_) 2230 static const XMVECTORF32 ScaleUDecN4 = {1023.0f,1023.0f*1024.0f*0.5f,1023.0f*1024.0f*1024.0f,3.0f*1024.0f*1024.0f*1024.0f*0.5f}; 2231 static const XMVECTORI32 MaskUDecN4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)}; 2232 // Clamp to bounds 2233 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 2234 vResult = _mm_min_ps(vResult,g_XMOne); 2235 // Scale by multiplication 2236 vResult = _mm_mul_ps(vResult,ScaleUDecN4); 2237 // Convert to int 2238 __m128i vResulti = _mm_cvttps_epi32(vResult); 2239 // Mask off any fraction 2240 vResulti = _mm_and_si128(vResulti,MaskUDecN4); 2241 // Do a horizontal or of 4 entries 2242 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); 2243 // x = x|z, y = y|w 2244 vResulti = _mm_or_si128(vResulti,vResulti2); 2245 // Move Z to the x position 2246 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); 2247 // Perform a left shift by one bit on y|w 2248 vResulti2 = _mm_add_epi32(vResulti2,vResulti2); 2249 // i = x|y|z|w 2250 vResulti = _mm_or_si128(vResulti,vResulti2); 2251 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti)); 2252#else // _XM_VMX128_INTRINSICS_ 2253#endif // _XM_VMX128_INTRINSICS_ 2254} 2255 2256//------------------------------------------------------------------------------ 2257_Use_decl_annotations_ 2258inline void PackedVector::XMStoreUDec4 2259( 2260 XMUDEC4* pDestination, 2261 FXMVECTOR V 2262) 2263{ 2264 assert(pDestination); 2265#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2266 2267 static const XMVECTORF32 Max = {1023.0f, 1023.0f, 1023.0f, 3.0f}; 2268 2269 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max); 2270 2271 XMFLOAT4A tmp; 2272 XMStoreFloat4A(&tmp, N ); 2273 2274 pDestination->v = ((uint32_t)tmp.w << 30) | 2275 (((uint32_t)tmp.z & 0x3FF) << 20) | 2276 (((uint32_t)tmp.y & 0x3FF) << 10) | 2277 (((uint32_t)tmp.x & 0x3FF)); 2278 2279#elif defined(_XM_SSE_INTRINSICS_) 2280 static const XMVECTORF32 MaxUDec4 = { 1023.0f, 1023.0f, 1023.0f, 3.0f}; 2281 static const XMVECTORF32 ScaleUDec4 = {1.0f,1024.0f/2.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f/2.0f}; 2282 static const XMVECTORI32 MaskUDec4= {0x3FF,0x3FF<<(10-1),0x3FF<<20,0x3<<(30-1)}; 2283 // Clamp to bounds 2284 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 2285 vResult = _mm_min_ps(vResult,MaxUDec4); 2286 // Scale by multiplication 2287 vResult = _mm_mul_ps(vResult,ScaleUDec4); 2288 // Convert to int 2289 __m128i vResulti = _mm_cvttps_epi32(vResult); 2290 // Mask off any fraction 2291 vResulti = _mm_and_si128(vResulti,MaskUDec4); 2292 // Do a horizontal or of 4 entries 2293 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); 2294 // x = x|z, y = y|w 2295 vResulti = _mm_or_si128(vResulti,vResulti2); 2296 // Move Z to the x position 2297 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); 2298 // Perform a left shift by one bit on y|w 2299 vResulti2 = _mm_add_epi32(vResulti2,vResulti2); 2300 // i = x|y|z|w 2301 vResulti = _mm_or_si128(vResulti,vResulti2); 2302 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti)); 2303#else // _XM_VMX128_INTRINSICS_ 2304#endif // _XM_VMX128_INTRINSICS_ 2305} 2306 2307//------------------------------------------------------------------------------ 2308_Use_decl_annotations_ 2309inline void PackedVector::XMStoreDecN4 2310( 2311 XMDECN4* pDestination, 2312 FXMVECTOR V 2313) 2314{ 2315 assert(pDestination); 2316#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2317 2318 static const XMVECTORF32 Scale = {511.0f, 511.0f, 511.0f, 1.0f}; 2319 2320 XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); 2321 N = XMVectorMultiply(N, Scale.v); 2322 2323 XMFLOAT4A tmp; 2324 XMStoreFloat4A(&tmp, N ); 2325 2326 pDestination->v = ((int32_t)tmp.w << 30) | 2327 (((int32_t)tmp.z & 0x3FF) << 20) | 2328 (((int32_t)tmp.y & 0x3FF) << 10) | 2329 (((int32_t)tmp.x & 0x3FF)); 2330 2331#elif defined(_XM_SSE_INTRINSICS_) 2332 static const XMVECTORF32 ScaleDecN4 = {511.0f,511.0f*1024.0f,511.0f*1024.0f*1024.0f,1.0f*1024.0f*1024.0f*1024.0f}; 2333 static const XMVECTORI32 MaskDecN4= {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<30}; 2334 // Clamp to bounds 2335 XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne); 2336 vResult = _mm_min_ps(vResult,g_XMOne); 2337 // Scale by multiplication 2338 vResult = _mm_mul_ps(vResult,ScaleDecN4); 2339 // Convert to int 2340 __m128i vResulti = _mm_cvttps_epi32(vResult); 2341 // Mask off any fraction 2342 vResulti = _mm_and_si128(vResulti,MaskDecN4); 2343 // Do a horizontal or of 4 entries 2344 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); 2345 // x = x|z, y = y|w 2346 vResulti = _mm_or_si128(vResulti,vResulti2); 2347 // Move Z to the x position 2348 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); 2349 // i = x|y|z|w 2350 vResulti = _mm_or_si128(vResulti,vResulti2); 2351 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti)); 2352#else // _XM_VMX128_INTRINSICS_ 2353#endif // _XM_VMX128_INTRINSICS_ 2354} 2355 2356//------------------------------------------------------------------------------ 2357_Use_decl_annotations_ 2358inline void PackedVector::XMStoreDec4 2359( 2360 XMDEC4* pDestination, 2361 FXMVECTOR V 2362) 2363{ 2364 assert(pDestination); 2365#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2366 2367 static const XMVECTORF32 Min = {-511.0f, -511.0f, -511.0f, -1.0f}; 2368 static const XMVECTORF32 Max = {511.0f, 511.0f, 511.0f, 1.0f}; 2369 2370 XMVECTOR N = XMVectorClamp(V, Min, Max); 2371 2372 XMFLOAT4A tmp; 2373 XMStoreFloat4A(&tmp, N ); 2374 2375 pDestination->v = ((int32_t)tmp.w << 30) | 2376 (((int32_t)tmp.z & 0x3FF) << 20) | 2377 (((int32_t)tmp.y & 0x3FF) << 10) | 2378 (((int32_t)tmp.x & 0x3FF)); 2379 2380#elif defined(_XM_SSE_INTRINSICS_) 2381 static const XMVECTORF32 MinDec4 = {-511.0f,-511.0f,-511.0f,-1.0f}; 2382 static const XMVECTORF32 MaxDec4 = { 511.0f, 511.0f, 511.0f, 1.0f}; 2383 static const XMVECTORF32 ScaleDec4 = {1.0f,1024.0f,1024.0f*1024.0f,1024.0f*1024.0f*1024.0f}; 2384 static const XMVECTORI32 MaskDec4= {0x3FF,0x3FF<<10,0x3FF<<20,0x3<<30}; 2385 // Clamp to bounds 2386 XMVECTOR vResult = _mm_max_ps(V,MinDec4); 2387 vResult = _mm_min_ps(vResult,MaxDec4); 2388 // Scale by multiplication 2389 vResult = _mm_mul_ps(vResult,ScaleDec4); 2390 // Convert to int 2391 __m128i vResulti = _mm_cvttps_epi32(vResult); 2392 // Mask off any fraction 2393 vResulti = _mm_and_si128(vResulti,MaskDec4); 2394 // Do a horizontal or of 4 entries 2395 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); 2396 // x = x|z, y = y|w 2397 vResulti = _mm_or_si128(vResulti,vResulti2); 2398 // Move Z to the x position 2399 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); 2400 // i = x|y|z|w 2401 vResulti = _mm_or_si128(vResulti,vResulti2); 2402 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti)); 2403#else // _XM_VMX128_INTRINSICS_ 2404#endif // _XM_VMX128_INTRINSICS_ 2405} 2406 2407//------------------------------------------------------------------------------ 2408_Use_decl_annotations_ 2409inline void PackedVector::XMStoreUByteN4 2410( 2411 XMUBYTEN4* pDestination, 2412 FXMVECTOR V 2413) 2414{ 2415 assert(pDestination); 2416#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2417 2418 static const XMVECTORF32 Scale = {255.0f, 255.0f, 255.0f, 255.0f}; 2419 2420 XMVECTOR N = XMVectorSaturate(V); 2421 N = XMVectorMultiply(N, Scale.v); 2422 N = XMVectorRound(N); 2423 2424 XMFLOAT4A tmp; 2425 XMStoreFloat4A(&tmp, N ); 2426 2427 pDestination->x = (uint8_t)tmp.x; 2428 pDestination->y = (uint8_t)tmp.y; 2429 pDestination->z = (uint8_t)tmp.z; 2430 pDestination->w = (uint8_t)tmp.w; 2431 2432#elif defined(_XM_SSE_INTRINSICS_) 2433 static const XMVECTORF32 ScaleUByteN4 = {255.0f,255.0f*256.0f*0.5f,255.0f*256.0f*256.0f,255.0f*256.0f*256.0f*256.0f*0.5f}; 2434 static const XMVECTORI32 MaskUByteN4 = {0xFF,0xFF<<(8-1),0xFF<<16,0xFF<<(24-1)}; 2435 // Clamp to bounds 2436 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 2437 vResult = _mm_min_ps(vResult,g_XMOne); 2438 // Scale by multiplication 2439 vResult = _mm_mul_ps(vResult,ScaleUByteN4); 2440 // Convert to int 2441 __m128i vResulti = _mm_cvttps_epi32(vResult); 2442 // Mask off any fraction 2443 vResulti = _mm_and_si128(vResulti,MaskUByteN4); 2444 // Do a horizontal or of 4 entries 2445 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); 2446 // x = x|z, y = y|w 2447 vResulti = _mm_or_si128(vResulti,vResulti2); 2448 // Move Z to the x position 2449 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); 2450 // Perform a single bit left shift to fix y|w 2451 vResulti2 = _mm_add_epi32(vResulti2,vResulti2); 2452 // i = x|y|z|w 2453 vResulti = _mm_or_si128(vResulti,vResulti2); 2454 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti)); 2455#else // _XM_VMX128_INTRINSICS_ 2456#endif // _XM_VMX128_INTRINSICS_ 2457} 2458 2459//------------------------------------------------------------------------------ 2460_Use_decl_annotations_ 2461inline void PackedVector::XMStoreUByte4 2462( 2463 XMUBYTE4* pDestination, 2464 FXMVECTOR V 2465) 2466{ 2467 assert(pDestination); 2468#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2469 2470 static const XMVECTORF32 Max = {255.0f, 255.0f, 255.0f, 255.0f}; 2471 2472 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max); 2473 N = XMVectorRound(N); 2474 2475 XMFLOAT4A tmp; 2476 XMStoreFloat4A(&tmp, N ); 2477 2478 pDestination->x = (uint8_t)tmp.x; 2479 pDestination->y = (uint8_t)tmp.y; 2480 pDestination->z = (uint8_t)tmp.z; 2481 pDestination->w = (uint8_t)tmp.w; 2482 2483#elif defined(_XM_SSE_INTRINSICS_) 2484 static const XMVECTORF32 MaxUByte4 = { 255.0f, 255.0f, 255.0f, 255.0f}; 2485 static const XMVECTORF32 ScaleUByte4 = {1.0f,256.0f*0.5f,256.0f*256.0f,256.0f*256.0f*256.0f*0.5f}; 2486 static const XMVECTORI32 MaskUByte4 = {0xFF,0xFF<<(8-1),0xFF<<16,0xFF<<(24-1)}; 2487 // Clamp to bounds 2488 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 2489 vResult = _mm_min_ps(vResult,MaxUByte4); 2490 // Scale by multiplication 2491 vResult = _mm_mul_ps(vResult,ScaleUByte4); 2492 // Convert to int 2493 __m128i vResulti = _mm_cvttps_epi32(vResult); 2494 // Mask off any fraction 2495 vResulti = _mm_and_si128(vResulti,MaskUByte4); 2496 // Do a horizontal or of 4 entries 2497 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); 2498 // x = x|z, y = y|w 2499 vResulti = _mm_or_si128(vResulti,vResulti2); 2500 // Move Z to the x position 2501 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); 2502 // Perform a single bit left shift to fix y|w 2503 vResulti2 = _mm_add_epi32(vResulti2,vResulti2); 2504 // i = x|y|z|w 2505 vResulti = _mm_or_si128(vResulti,vResulti2); 2506 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti)); 2507#else // _XM_VMX128_INTRINSICS_ 2508#endif // _XM_VMX128_INTRINSICS_ 2509} 2510 2511//------------------------------------------------------------------------------ 2512_Use_decl_annotations_ 2513inline void PackedVector::XMStoreByteN4 2514( 2515 XMBYTEN4* pDestination, 2516 FXMVECTOR V 2517) 2518{ 2519 assert(pDestination); 2520#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2521 2522 static const XMVECTORF32 Scale = {127.0f, 127.0f, 127.0f, 127.0f}; 2523 2524 XMVECTOR N = XMVectorClamp(V, g_XMNegativeOne.v, g_XMOne.v); 2525 N = XMVectorMultiply(V, Scale.v); 2526 N = XMVectorRound(N); 2527 2528 XMFLOAT4A tmp; 2529 XMStoreFloat4A(&tmp, N ); 2530 2531 pDestination->x = (int8_t)tmp.x; 2532 pDestination->y = (int8_t)tmp.y; 2533 pDestination->z = (int8_t)tmp.z; 2534 pDestination->w = (int8_t)tmp.w; 2535 2536#elif defined(_XM_SSE_INTRINSICS_) 2537 static const XMVECTORF32 ScaleByteN4 = {127.0f,127.0f*256.0f,127.0f*256.0f*256.0f,127.0f*256.0f*256.0f*256.0f}; 2538 static const XMVECTORI32 MaskByteN4 = {0xFF,0xFF<<8,0xFF<<16,0xFF<<24}; 2539 // Clamp to bounds 2540 XMVECTOR vResult = _mm_max_ps(V,g_XMNegativeOne); 2541 vResult = _mm_min_ps(vResult,g_XMOne); 2542 // Scale by multiplication 2543 vResult = _mm_mul_ps(vResult,ScaleByteN4); 2544 // Convert to int 2545 __m128i vResulti = _mm_cvttps_epi32(vResult); 2546 // Mask off any fraction 2547 vResulti = _mm_and_si128(vResulti,MaskByteN4); 2548 // Do a horizontal or of 4 entries 2549 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); 2550 // x = x|z, y = y|w 2551 vResulti = _mm_or_si128(vResulti,vResulti2); 2552 // Move Z to the x position 2553 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); 2554 // i = x|y|z|w 2555 vResulti = _mm_or_si128(vResulti,vResulti2); 2556 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti)); 2557#else // _XM_VMX128_INTRINSICS_ 2558#endif // _XM_VMX128_INTRINSICS_ 2559} 2560 2561//------------------------------------------------------------------------------ 2562_Use_decl_annotations_ 2563inline void PackedVector::XMStoreByte4 2564( 2565 XMBYTE4* pDestination, 2566 FXMVECTOR V 2567) 2568{ 2569 assert(pDestination); 2570#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) 2571 2572 static const XMVECTORF32 Min = {-127.0f, -127.0f, -127.0f, -127.0f}; 2573 static const XMVECTORF32 Max = {127.0f, 127.0f, 127.0f, 127.0f}; 2574 2575 XMVECTOR N = XMVectorClamp(V, Min, Max); 2576 N = XMVectorRound(N); 2577 2578 XMFLOAT4A tmp; 2579 XMStoreFloat4A(&tmp, N ); 2580 2581 pDestination->x = (int8_t)tmp.x; 2582 pDestination->y = (int8_t)tmp.y; 2583 pDestination->z = (int8_t)tmp.z; 2584 pDestination->w = (int8_t)tmp.w; 2585 2586#elif defined(_XM_SSE_INTRINSICS_) 2587 static const XMVECTORF32 MinByte4 = {-127.0f,-127.0f,-127.0f,-127.0f}; 2588 static const XMVECTORF32 MaxByte4 = { 127.0f, 127.0f, 127.0f, 127.0f}; 2589 static const XMVECTORF32 ScaleByte4 = {1.0f,256.0f,256.0f*256.0f,256.0f*256.0f*256.0f}; 2590 static const XMVECTORI32 MaskByte4 = {0xFF,0xFF<<8,0xFF<<16,0xFF<<24}; 2591 // Clamp to bounds 2592 XMVECTOR vResult = _mm_max_ps(V,MinByte4); 2593 vResult = _mm_min_ps(vResult,MaxByte4); 2594 // Scale by multiplication 2595 vResult = _mm_mul_ps(vResult,ScaleByte4); 2596 // Convert to int 2597 __m128i vResulti = _mm_cvttps_epi32(vResult); 2598 // Mask off any fraction 2599 vResulti = _mm_and_si128(vResulti,MaskByte4); 2600 // Do a horizontal or of 4 entries 2601 __m128i vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(3,2,3,2)); 2602 // x = x|z, y = y|w 2603 vResulti = _mm_or_si128(vResulti,vResulti2); 2604 // Move Z to the x position 2605 vResulti2 = _mm_shuffle_epi32(vResulti,_MM_SHUFFLE(1,1,1,1)); 2606 // i = x|y|z|w 2607 vResulti = _mm_or_si128(vResulti,vResulti2); 2608 _mm_store_ss(reinterpret_cast<float *>(&pDestination->v),_mm_castsi128_ps(vResulti)); 2609#else // _XM_VMX128_INTRINSICS_ 2610#endif // _XM_VMX128_INTRINSICS_ 2611} 2612 2613//------------------------------------------------------------------------------ 2614_Use_decl_annotations_ 2615inline void PackedVector::XMStoreUNibble4 2616( 2617 XMUNIBBLE4* pDestination, 2618 FXMVECTOR V 2619) 2620{ 2621 assert(pDestination); 2622#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) 2623 static const XMVECTORF32 Max = {15.0f,15.0f,15.0f,15.0f}; 2624 // Bounds check 2625 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 2626 vResult = _mm_min_ps(vResult,Max); 2627 // Convert to int with rounding 2628 __m128i vInt = _mm_cvtps_epi32(vResult); 2629 // No SSE operations will write to 16-bit values, so we have to extract them manually 2630 uint16_t x = static_cast<uint16_t>(_mm_extract_epi16(vInt,0)); 2631 uint16_t y = static_cast<uint16_t>(_mm_extract_epi16(vInt,2)); 2632 uint16_t z = static_cast<uint16_t>(_mm_extract_epi16(vInt,4)); 2633 uint16_t w = static_cast<uint16_t>(_mm_extract_epi16(vInt,6)); 2634 pDestination->v = ((w & 0xF) << 12) | 2635 ((z & 0xF) << 8) | 2636 ((y & 0xF) << 4) | 2637 ((x & 0xF)); 2638#else 2639 static const XMVECTORF32 Max = {15.0f,15.0f,15.0f,15.0f}; 2640 2641 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v); 2642 N = XMVectorRound(N); 2643 2644 XMFLOAT4A tmp; 2645 XMStoreFloat4A(&tmp, N ); 2646 2647 pDestination->v = (((uint16_t)tmp.w & 0xF) << 12) | 2648 (((uint16_t)tmp.z & 0xF) << 8) | 2649 (((uint16_t)tmp.y & 0xF) << 4) | 2650 (((uint16_t)tmp.x & 0xF)); 2651#endif !_XM_SSE_INTRINSICS_ 2652} 2653 2654//------------------------------------------------------------------------------ 2655_Use_decl_annotations_ 2656inline void PackedVector::XMStoreU555 2657( 2658 XMU555* pDestination, 2659 FXMVECTOR V 2660) 2661{ 2662 assert(pDestination); 2663#if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) 2664 static const XMVECTORF32 Max = {31.0f, 31.0f, 31.0f, 1.0f}; 2665 // Bounds check 2666 XMVECTOR vResult = _mm_max_ps(V,g_XMZero); 2667 vResult = _mm_min_ps(vResult,Max); 2668 // Convert to int with rounding 2669 __m128i vInt = _mm_cvtps_epi32(vResult); 2670 // No SSE operations will write to 16-bit values, so we have to extract them manually 2671 uint16_t x = static_cast<uint16_t>(_mm_extract_epi16(vInt,0)); 2672 uint16_t y = static_cast<uint16_t>(_mm_extract_epi16(vInt,2)); 2673 uint16_t z = static_cast<uint16_t>(_mm_extract_epi16(vInt,4)); 2674 uint16_t w = static_cast<uint16_t>(_mm_extract_epi16(vInt,6)); 2675 pDestination->v = ((w) ? 0x8000 : 0) | 2676 ((z & 0x1F) << 10) | 2677 ((y & 0x1F) << 5) | 2678 ((x & 0x1F)); 2679#else 2680 static const XMVECTORF32 Max = {31.0f, 31.0f, 31.0f, 1.0f}; 2681 2682 XMVECTOR N = XMVectorClamp(V, XMVectorZero(), Max.v); 2683 N = XMVectorRound(N); 2684 2685 XMFLOAT4A tmp; 2686 XMStoreFloat4A(&tmp, N ); 2687 2688 pDestination->v = ((tmp.w > 0.f) ? 0x8000 : 0) | 2689 (((uint16_t)tmp.z & 0x1F) << 10) | 2690 (((uint16_t)tmp.y & 0x1F) << 5) | 2691 (((uint16_t)tmp.x & 0x1F)); 2692#endif !_XM_SSE_INTRINSICS_ 2693} 2694 2695 2696/**************************************************************************** 2697 * 2698 * XMCOLOR operators 2699 * 2700 ****************************************************************************/ 2701 2702//------------------------------------------------------------------------------ 2703 2704inline PackedVector::XMCOLOR::XMCOLOR 2705( 2706 float _r, 2707 float _g, 2708 float _b, 2709 float _a 2710) 2711{ 2712 XMStoreColor(this, XMVectorSet(_r, _g, _b, _a)); 2713} 2714 2715//------------------------------------------------------------------------------ 2716_Use_decl_annotations_ 2717inline PackedVector::XMCOLOR::XMCOLOR 2718( 2719 const float* pArray 2720) 2721{ 2722 XMStoreColor(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 2723} 2724 2725/**************************************************************************** 2726 * 2727 * XMHALF2 operators 2728 * 2729 ****************************************************************************/ 2730 2731//------------------------------------------------------------------------------ 2732 2733inline PackedVector::XMHALF2::XMHALF2 2734( 2735 float _x, 2736 float _y 2737) 2738{ 2739 x = XMConvertFloatToHalf(_x); 2740 y = XMConvertFloatToHalf(_y); 2741} 2742 2743//------------------------------------------------------------------------------ 2744_Use_decl_annotations_ 2745inline PackedVector::XMHALF2::XMHALF2 2746( 2747 const float* pArray 2748) 2749{ 2750 assert( pArray != nullptr ); 2751 x = XMConvertFloatToHalf(pArray[0]); 2752 y = XMConvertFloatToHalf(pArray[1]); 2753} 2754 2755/**************************************************************************** 2756 * 2757 * XMSHORTN2 operators 2758 * 2759 ****************************************************************************/ 2760 2761//------------------------------------------------------------------------------ 2762 2763inline PackedVector::XMSHORTN2::XMSHORTN2 2764( 2765 float _x, 2766 float _y 2767) 2768{ 2769 XMStoreShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 2770} 2771 2772//------------------------------------------------------------------------------ 2773_Use_decl_annotations_ 2774inline PackedVector::XMSHORTN2::XMSHORTN2 2775( 2776 const float* pArray 2777) 2778{ 2779 XMStoreShortN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray))); 2780} 2781 2782/**************************************************************************** 2783 * 2784 * XMSHORT2 operators 2785 * 2786 ****************************************************************************/ 2787 2788//------------------------------------------------------------------------------ 2789 2790inline PackedVector::XMSHORT2::XMSHORT2 2791( 2792 float _x, 2793 float _y 2794) 2795{ 2796 XMStoreShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 2797} 2798 2799//------------------------------------------------------------------------------ 2800_Use_decl_annotations_ 2801inline PackedVector::XMSHORT2::XMSHORT2 2802( 2803 const float* pArray 2804) 2805{ 2806 XMStoreShort2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray))); 2807} 2808 2809/**************************************************************************** 2810 * 2811 * XMUSHORTN2 operators 2812 * 2813 ****************************************************************************/ 2814 2815//------------------------------------------------------------------------------ 2816 2817inline PackedVector::XMUSHORTN2::XMUSHORTN2 2818( 2819 float _x, 2820 float _y 2821) 2822{ 2823 XMStoreUShortN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 2824} 2825 2826//------------------------------------------------------------------------------ 2827_Use_decl_annotations_ 2828inline PackedVector::XMUSHORTN2::XMUSHORTN2 2829( 2830 const float* pArray 2831) 2832{ 2833 XMStoreUShortN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray))); 2834} 2835 2836/**************************************************************************** 2837 * 2838 * XMUSHORT2 operators 2839 * 2840 ****************************************************************************/ 2841 2842//------------------------------------------------------------------------------ 2843 2844inline PackedVector::XMUSHORT2::XMUSHORT2 2845( 2846 float _x, 2847 float _y 2848) 2849{ 2850 XMStoreUShort2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 2851} 2852 2853//------------------------------------------------------------------------------ 2854_Use_decl_annotations_ 2855inline PackedVector::XMUSHORT2::XMUSHORT2 2856( 2857 const float* pArray 2858) 2859{ 2860 XMStoreUShort2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray))); 2861} 2862 2863/**************************************************************************** 2864 * 2865 * XMBYTEN2 operators 2866 * 2867 ****************************************************************************/ 2868 2869//------------------------------------------------------------------------------ 2870 2871inline PackedVector::XMBYTEN2::XMBYTEN2 2872( 2873 float _x, 2874 float _y 2875) 2876{ 2877 XMStoreByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 2878} 2879 2880//------------------------------------------------------------------------------ 2881_Use_decl_annotations_ 2882inline PackedVector::XMBYTEN2::XMBYTEN2 2883( 2884 const float* pArray 2885) 2886{ 2887 XMStoreByteN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray))); 2888} 2889 2890/**************************************************************************** 2891 * 2892 * XMBYTE2 operators 2893 * 2894 ****************************************************************************/ 2895 2896//------------------------------------------------------------------------------ 2897 2898inline PackedVector::XMBYTE2::XMBYTE2 2899( 2900 float _x, 2901 float _y 2902) 2903{ 2904 XMStoreByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 2905} 2906 2907//------------------------------------------------------------------------------ 2908_Use_decl_annotations_ 2909inline PackedVector::XMBYTE2::XMBYTE2 2910( 2911 const float* pArray 2912) 2913{ 2914 XMStoreByte2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray))); 2915} 2916 2917/**************************************************************************** 2918 * 2919 * XMUBYTEN2 operators 2920 * 2921 ****************************************************************************/ 2922 2923//------------------------------------------------------------------------------ 2924 2925inline PackedVector::XMUBYTEN2::XMUBYTEN2 2926( 2927 float _x, 2928 float _y 2929) 2930{ 2931 XMStoreUByteN2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 2932} 2933 2934//------------------------------------------------------------------------------ 2935_Use_decl_annotations_ 2936inline PackedVector::XMUBYTEN2::XMUBYTEN2 2937( 2938 const float* pArray 2939) 2940{ 2941 XMStoreUByteN2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray))); 2942} 2943 2944/**************************************************************************** 2945 * 2946 * XMUBYTE2 operators 2947 * 2948 ****************************************************************************/ 2949 2950//------------------------------------------------------------------------------ 2951 2952inline PackedVector::XMUBYTE2::XMUBYTE2 2953( 2954 float _x, 2955 float _y 2956) 2957{ 2958 XMStoreUByte2(this, XMVectorSet(_x, _y, 0.0f, 0.0f)); 2959} 2960 2961//------------------------------------------------------------------------------ 2962_Use_decl_annotations_ 2963inline PackedVector::XMUBYTE2::XMUBYTE2 2964( 2965 const float* pArray 2966) 2967{ 2968 XMStoreUByte2(this, XMLoadFloat2(reinterpret_cast<const XMFLOAT2*>(pArray))); 2969} 2970 2971/**************************************************************************** 2972 * 2973 * XMU565 operators 2974 * 2975 ****************************************************************************/ 2976 2977inline PackedVector::XMU565::XMU565 2978( 2979 float _x, 2980 float _y, 2981 float _z 2982) 2983{ 2984 XMStoreU565(this, XMVectorSet( _x, _y, _z, 0.0f )); 2985} 2986 2987_Use_decl_annotations_ 2988inline PackedVector::XMU565::XMU565 2989( 2990 const float *pArray 2991) 2992{ 2993 XMStoreU565(this, XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray))); 2994} 2995 2996/**************************************************************************** 2997 * 2998 * XMFLOAT3PK operators 2999 * 3000 ****************************************************************************/ 3001 3002inline PackedVector::XMFLOAT3PK::XMFLOAT3PK 3003( 3004 float _x, 3005 float _y, 3006 float _z 3007) 3008{ 3009 XMStoreFloat3PK(this, XMVectorSet( _x, _y, _z, 0.0f )); 3010} 3011 3012_Use_decl_annotations_ 3013inline PackedVector::XMFLOAT3PK::XMFLOAT3PK 3014( 3015 const float *pArray 3016) 3017{ 3018 XMStoreFloat3PK(this, XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray))); 3019} 3020 3021/**************************************************************************** 3022 * 3023 * XMFLOAT3SE operators 3024 * 3025 ****************************************************************************/ 3026 3027inline PackedVector::XMFLOAT3SE::XMFLOAT3SE 3028( 3029 float _x, 3030 float _y, 3031 float _z 3032) 3033{ 3034 XMStoreFloat3SE(this, XMVectorSet( _x, _y, _z, 0.0f )); 3035} 3036 3037_Use_decl_annotations_ 3038inline PackedVector::XMFLOAT3SE::XMFLOAT3SE 3039( 3040 const float *pArray 3041) 3042{ 3043 XMStoreFloat3SE(this, XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray))); 3044} 3045 3046/**************************************************************************** 3047 * 3048 * XMHALF4 operators 3049 * 3050 ****************************************************************************/ 3051 3052//------------------------------------------------------------------------------ 3053 3054inline PackedVector::XMHALF4::XMHALF4 3055( 3056 float _x, 3057 float _y, 3058 float _z, 3059 float _w 3060) 3061{ 3062 x = XMConvertFloatToHalf(_x); 3063 y = XMConvertFloatToHalf(_y); 3064 z = XMConvertFloatToHalf(_z); 3065 w = XMConvertFloatToHalf(_w); 3066} 3067 3068//------------------------------------------------------------------------------ 3069 3070_Use_decl_annotations_ 3071inline PackedVector::XMHALF4::XMHALF4 3072( 3073 const float* pArray 3074) 3075{ 3076 XMConvertFloatToHalfStream(&x, sizeof(HALF), pArray, sizeof(float), 4); 3077} 3078 3079/**************************************************************************** 3080 * 3081 * XMSHORTN4 operators 3082 * 3083 ****************************************************************************/ 3084 3085//------------------------------------------------------------------------------ 3086 3087inline PackedVector::XMSHORTN4::XMSHORTN4 3088( 3089 float _x, 3090 float _y, 3091 float _z, 3092 float _w 3093) 3094{ 3095 XMStoreShortN4(this, XMVectorSet(_x, _y, _z, _w)); 3096} 3097 3098//------------------------------------------------------------------------------ 3099_Use_decl_annotations_ 3100inline PackedVector::XMSHORTN4::XMSHORTN4 3101( 3102 const float* pArray 3103) 3104{ 3105 XMStoreShortN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3106} 3107 3108/**************************************************************************** 3109 * 3110 * XMSHORT4 operators 3111 * 3112 ****************************************************************************/ 3113 3114//------------------------------------------------------------------------------ 3115 3116inline PackedVector::XMSHORT4::XMSHORT4 3117( 3118 float _x, 3119 float _y, 3120 float _z, 3121 float _w 3122) 3123{ 3124 XMStoreShort4(this, XMVectorSet(_x, _y, _z, _w)); 3125} 3126 3127//------------------------------------------------------------------------------ 3128_Use_decl_annotations_ 3129inline PackedVector::XMSHORT4::XMSHORT4 3130( 3131 const float* pArray 3132) 3133{ 3134 XMStoreShort4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3135} 3136 3137/**************************************************************************** 3138 * 3139 * XMUSHORTN4 operators 3140 * 3141 ****************************************************************************/ 3142 3143//------------------------------------------------------------------------------ 3144 3145inline PackedVector::XMUSHORTN4::XMUSHORTN4 3146( 3147 float _x, 3148 float _y, 3149 float _z, 3150 float _w 3151) 3152{ 3153 XMStoreUShortN4(this, XMVectorSet(_x, _y, _z, _w)); 3154} 3155 3156//------------------------------------------------------------------------------ 3157_Use_decl_annotations_ 3158inline PackedVector::XMUSHORTN4::XMUSHORTN4 3159( 3160 const float* pArray 3161) 3162{ 3163 XMStoreUShortN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3164} 3165 3166/**************************************************************************** 3167 * 3168 * XMUSHORT4 operators 3169 * 3170 ****************************************************************************/ 3171 3172//------------------------------------------------------------------------------ 3173 3174inline PackedVector::XMUSHORT4::XMUSHORT4 3175( 3176 float _x, 3177 float _y, 3178 float _z, 3179 float _w 3180) 3181{ 3182 XMStoreUShort4(this, XMVectorSet(_x, _y, _z, _w)); 3183} 3184 3185//------------------------------------------------------------------------------ 3186_Use_decl_annotations_ 3187inline PackedVector::XMUSHORT4::XMUSHORT4 3188( 3189 const float* pArray 3190) 3191{ 3192 XMStoreUShort4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3193} 3194 3195/**************************************************************************** 3196 * 3197 * XMXDECN4 operators 3198 * 3199 ****************************************************************************/ 3200 3201//------------------------------------------------------------------------------ 3202 3203inline PackedVector::XMXDECN4::XMXDECN4 3204( 3205 float _x, 3206 float _y, 3207 float _z, 3208 float _w 3209) 3210{ 3211 XMStoreXDecN4(this, XMVectorSet(_x, _y, _z, _w)); 3212} 3213 3214//------------------------------------------------------------------------------ 3215_Use_decl_annotations_ 3216inline PackedVector::XMXDECN4::XMXDECN4 3217( 3218 const float* pArray 3219) 3220{ 3221 XMStoreXDecN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3222} 3223 3224/**************************************************************************** 3225 * 3226 * XMXDEC4 operators 3227 * 3228 ****************************************************************************/ 3229 3230//------------------------------------------------------------------------------ 3231 3232inline PackedVector::XMXDEC4::XMXDEC4 3233( 3234 float _x, 3235 float _y, 3236 float _z, 3237 float _w 3238) 3239{ 3240 XMStoreXDec4(this, XMVectorSet(_x, _y, _z, _w)); 3241} 3242 3243//------------------------------------------------------------------------------ 3244_Use_decl_annotations_ 3245inline PackedVector::XMXDEC4::XMXDEC4 3246( 3247 const float* pArray 3248) 3249{ 3250 XMStoreXDec4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3251} 3252 3253/**************************************************************************** 3254 * 3255 * XMDECN4 operators 3256 * 3257 ****************************************************************************/ 3258 3259//------------------------------------------------------------------------------ 3260 3261inline PackedVector::XMDECN4::XMDECN4 3262( 3263 float _x, 3264 float _y, 3265 float _z, 3266 float _w 3267) 3268{ 3269 XMStoreDecN4(this, XMVectorSet(_x, _y, _z, _w)); 3270} 3271 3272//------------------------------------------------------------------------------ 3273_Use_decl_annotations_ 3274inline PackedVector::XMDECN4::XMDECN4 3275( 3276 const float* pArray 3277) 3278{ 3279 XMStoreDecN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3280} 3281 3282/**************************************************************************** 3283 * 3284 * XMDEC4 operators 3285 * 3286 ****************************************************************************/ 3287 3288//------------------------------------------------------------------------------ 3289 3290inline PackedVector::XMDEC4::XMDEC4 3291( 3292 float _x, 3293 float _y, 3294 float _z, 3295 float _w 3296) 3297{ 3298 XMStoreDec4(this, XMVectorSet(_x, _y, _z, _w)); 3299} 3300 3301//------------------------------------------------------------------------------ 3302_Use_decl_annotations_ 3303inline PackedVector::XMDEC4::XMDEC4 3304( 3305 const float* pArray 3306) 3307{ 3308 XMStoreDec4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3309} 3310 3311/**************************************************************************** 3312 * 3313 * XMUDECN4 operators 3314 * 3315 ****************************************************************************/ 3316 3317//------------------------------------------------------------------------------ 3318 3319inline PackedVector::XMUDECN4::XMUDECN4 3320( 3321 float _x, 3322 float _y, 3323 float _z, 3324 float _w 3325) 3326{ 3327 XMStoreUDecN4(this, XMVectorSet(_x, _y, _z, _w)); 3328} 3329 3330//------------------------------------------------------------------------------ 3331_Use_decl_annotations_ 3332inline PackedVector::XMUDECN4::XMUDECN4 3333( 3334 const float* pArray 3335) 3336{ 3337 XMStoreUDecN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3338} 3339 3340/**************************************************************************** 3341 * 3342 * XMUDEC4 operators 3343 * 3344 ****************************************************************************/ 3345 3346//------------------------------------------------------------------------------ 3347 3348inline PackedVector::XMUDEC4::XMUDEC4 3349( 3350 float _x, 3351 float _y, 3352 float _z, 3353 float _w 3354) 3355{ 3356 XMStoreUDec4(this, XMVectorSet(_x, _y, _z, _w)); 3357} 3358 3359//------------------------------------------------------------------------------ 3360_Use_decl_annotations_ 3361inline PackedVector::XMUDEC4::XMUDEC4 3362( 3363 const float* pArray 3364) 3365{ 3366 XMStoreUDec4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3367} 3368 3369/**************************************************************************** 3370 * 3371 * XMBYTEN4 operators 3372 * 3373 ****************************************************************************/ 3374 3375//------------------------------------------------------------------------------ 3376 3377inline PackedVector::XMBYTEN4::XMBYTEN4 3378( 3379 float _x, 3380 float _y, 3381 float _z, 3382 float _w 3383) 3384{ 3385 XMStoreByteN4(this, XMVectorSet(_x, _y, _z, _w)); 3386} 3387 3388//------------------------------------------------------------------------------ 3389_Use_decl_annotations_ 3390inline PackedVector::XMBYTEN4::XMBYTEN4 3391( 3392 const float* pArray 3393) 3394{ 3395 XMStoreByteN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3396} 3397 3398/**************************************************************************** 3399 * 3400 * XMBYTE4 operators 3401 * 3402 ****************************************************************************/ 3403 3404//------------------------------------------------------------------------------ 3405 3406inline PackedVector::XMBYTE4::XMBYTE4 3407( 3408 float _x, 3409 float _y, 3410 float _z, 3411 float _w 3412) 3413{ 3414 XMStoreByte4(this, XMVectorSet(_x, _y, _z, _w)); 3415} 3416 3417//------------------------------------------------------------------------------ 3418_Use_decl_annotations_ 3419inline PackedVector::XMBYTE4::XMBYTE4 3420( 3421 const float* pArray 3422) 3423{ 3424 XMStoreByte4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3425} 3426 3427/**************************************************************************** 3428 * 3429 * XMUBYTEN4 operators 3430 * 3431 ****************************************************************************/ 3432 3433//------------------------------------------------------------------------------ 3434 3435inline PackedVector::XMUBYTEN4::XMUBYTEN4 3436( 3437 float _x, 3438 float _y, 3439 float _z, 3440 float _w 3441) 3442{ 3443 XMStoreUByteN4(this, XMVectorSet(_x, _y, _z, _w)); 3444} 3445 3446//------------------------------------------------------------------------------ 3447_Use_decl_annotations_ 3448inline PackedVector::XMUBYTEN4::XMUBYTEN4 3449( 3450 const float* pArray 3451) 3452{ 3453 XMStoreUByteN4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3454} 3455 3456/**************************************************************************** 3457 * 3458 * XMUBYTE4 operators 3459 * 3460 ****************************************************************************/ 3461 3462//------------------------------------------------------------------------------ 3463 3464inline PackedVector::XMUBYTE4::XMUBYTE4 3465( 3466 float _x, 3467 float _y, 3468 float _z, 3469 float _w 3470) 3471{ 3472 XMStoreUByte4(this, XMVectorSet(_x, _y, _z, _w)); 3473} 3474 3475//------------------------------------------------------------------------------ 3476_Use_decl_annotations_ 3477inline PackedVector::XMUBYTE4::XMUBYTE4 3478( 3479 const float* pArray 3480) 3481{ 3482 XMStoreUByte4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3483} 3484 3485/**************************************************************************** 3486 * 3487 * XMUNIBBLE4 operators 3488 * 3489 ****************************************************************************/ 3490 3491//------------------------------------------------------------------------------ 3492 3493inline PackedVector::XMUNIBBLE4::XMUNIBBLE4 3494( 3495 float _x, 3496 float _y, 3497 float _z, 3498 float _w 3499) 3500{ 3501 XMStoreUNibble4(this, XMVectorSet( _x, _y, _z, _w )); 3502} 3503 3504//------------------------------------------------------------------------------ 3505_Use_decl_annotations_ 3506inline PackedVector::XMUNIBBLE4::XMUNIBBLE4 3507( 3508 const float *pArray 3509) 3510{ 3511 XMStoreUNibble4(this, XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray))); 3512} 3513 3514/**************************************************************************** 3515 * 3516 * XMU555 operators 3517 * 3518 ****************************************************************************/ 3519 3520//------------------------------------------------------------------------------ 3521 3522inline PackedVector::XMU555::XMU555 3523( 3524 float _x, 3525 float _y, 3526 float _z, 3527 bool _w 3528) 3529{ 3530 XMStoreU555(this, XMVectorSet(_x, _y, _z, ((_w) ? 1.0f : 0.0f) )); 3531} 3532 3533//------------------------------------------------------------------------------ 3534_Use_decl_annotations_ 3535inline PackedVector::XMU555::XMU555 3536( 3537 const float *pArray, 3538 bool _w 3539) 3540{ 3541 XMVECTOR V = XMLoadFloat3(reinterpret_cast<const XMFLOAT3*>(pArray)); 3542 XMStoreU555(this, XMVectorSetW(V, ((_w) ? 1.0f : 0.0f) )); 3543} 3544 3545