Miscellaneous Intrinsics Using Streaming SIMD Extensions

The prototypes for Streaming SIMD Extensions intrinsics are in the xmmintrin.h header file.

Intrinsic
Name
Operation Corresponding
Instruction
_mm_shuffle_ps Shuffle SHUFPS
_mm_unpackhi_ps Unpack High UNPCKHPS
_mm_unpacklo_ps Unpack Low UNPCKLPS
_mm_loadh_pi Load High MOVHPS reg, mem
_mm_storeh_pi Store High MOVHPS mem, reg
_mm_movehl_ps Move High to Low MOVHLPS
_mm_movelh_ps Move Low to High MOVLHPS
_mm_loadl_pi Load Low MOVLPS reg, mem
_mm_storel_pi Store Low MOVLPS mem, reg
_mm_movemask_ps Create four-bit mask MOVMSKPS

__m128 _mm_shuffle_ps(__m128 a, __m128 b, unsigned int imm8)

Selects four specific SP FP values from a and b, based on the mask imm8. The mask must be an immediate. See Macro Function for Shuffle Using Streaming SIMD Extensions for a description of the shuffle semantics.

__m128 _mm_unpackhi_ps(__m128 a, __m128 b)

Selects and interleaves the upper two SP FP values from a and b.
r0 := a2

r1 := b2

r2 := a3

r3 := b3

__m128 _mm_unpacklo_ps(__m128 a, __m128 b)

Selects and interleaves the lower two SP FP values from a and b.
r0 := a0

r1 := b0

r2 := a1

r3 := b1

__m128 _mm_loadh_pi(__m128, __m64 const *p)

Sets the upper two SP FP values with 64 bits of data loaded from the address p.
r0 := a0

r1 := a1

r2 := *p0

r3 := *p1

void _mm_storeh_pi(__m64 *p, __m128 a)

Stores the upper two SP FP values to the address p.
*p0 := a2

*p1 := a3

__m128 _mm_movehl_ps(__m128 a, __m128 b)

Moves the upper 2 SP FP values of b to the lower 2 SP FP values of the result. The upper 2 SP FP values of a are passed through to the result.
r3 := a3

r2 := a2

r1 := b3

r0 := b2

__m128 _mm_movelh_ps(__m128 a, __m128 b)

Moves the lower 2 SP FP values of b to the upper 2 SP FP values of the result. The lower 2 SP FP values of a are passed through to the result.
r3 := b1

r2 := b0

r1 := a1

r0 := a0

__m128 _mm_loadl_pi(__m128 a, __m64 const *p)

Sets the lower two SP FP values with 64 bits of data loaded from the address p; the upper two values are passed through from a.
r0 := *p0

r1 := *p1

r2 := a2

r3 := a3

void _mm_storel_pi(__m64 *p, __m128 a)

Stores the lower two SP FP values of a to the address p.
*p0 := a0

*p1 := a1

int _mm_movemask_ps(__m128 a)

Creates a 4-bit mask from the most significant bits of the four SP FP values.
r := sign(a3)<<3 | sign(a2)<<2 | sign(a1)<<1 | sign(a0)