Arithmetic Operations for Streaming SIMD Extensions

Intrinsic Instruction Operation R0 R1 R2 R3

_mm_add_ss

ADDSS

Addition

a0 [op] b0

a1

a2

a3

_mm_add_ps

ADDPS

Addition

a0 [op] b0

a1 [op] b1

a2 [op] b2

a3 [op] b3

_mm_sub_ss

SUBSS

Subtraction

a0 [op] b0

a1

a2

a3

_mm_sub_ps

SUBPS

Subtraction

a0 [op] b0

a1 [op] b1

a2 [op] b2

a3 [op] b3

_mm_mul_ss

MULSS

Multiplication

a0 [op] b0

a1

a2

a3

_mm_mul_ps

MULPS

Multiplication

a0 [op] b0

a1 [op] b1

a2 [op] b2

a3 [op] b3

_mm_div_ss

DIVSS

Division

a0 [op] b0

a1

a2

a3

_mm_div_ps

DIVPS

Division

a0 [op] b0

a1 [op] b1

a2 [op] b2

a3 [op] b3

_mm_sqrt_ss

SQRTSS

Squared Root

[op] a0

a1

a2

a3

_mm_sqrt_ps

SQRTPS

Squared Root

[op] a0

[op] b1

[op] b2

[op] b3

_mm_rcp_ss

RCPSS

Reciprocal

[op] a0

a1

a2

a3

_mm_rcp_ps

RCPPS

Reciprocal

[op] a0

[op] b1

[op] b2

[op] b3

_mm_rsqrt_ss

RSQRTSS

Reciprocal Square Root

[op] a0

a1

a2

a3

_mm_rsqrt_ps

RSQRTPS

Reciprocal Squared Root

[op] a0

[op] b1

[op] b2

[op] b3

_mm_min_ss

MINSS

Computes Minimum

[op]( a0,b0)

a1

a2

a3

_mm_min_ps

MINPS

Computes Minimum

[op]( a0,b0)

[op] (a1, b1)

[op] (a2, b2)

[op] (a3, b3)

_mm_max_ss

MAXSS

Computes Maximum

[op]( a0,b0)

a1

a2

a3

_mm_max_ps

MAXPS

Computes Maximum

[op]( a0,b0)

[op] (a1, b1)

[op] (a2, b2)

[op] (a3, b3)

 

__m128 _mm_add_ss(__m128 a, __m128 b )

Adds the lower SP FP (single-precision, floating-point) values of a and b ; the upper 3 SP FP values are passed through from a.

r0 := a0 + b0

r1 := a1 ; r2 := a2 ; r3 := a3

 

__m128 _mm_add_ps(__m128 a, __m128 b )

Adds the four SP FP values of a and b.

r0 := a0 + b0

r1 := a1 + b1

r2 := a2 + b2

r3 := a3 + b3

 

__m128 _mm_sub_ss(__m128 a, __m128 b )

Subtracts the lower SP FP values of a and b. The upper 3 SP FP values are passed through from a.

r0 := a0 - b0

r1 := a1 ; r2 := a2 ; r3 := a3

 

__m128 _mm_sub_ps(__m128 a, __m128 b )

Subtracts the four SP FP values of a and b.

r0 := a0 - b0

r1 := a1 - b1

r2 := a2 - b2

r3 := a3 - b3

 

__m128 _mm_mul_ss(__m128 a, __m128 b )

Multiplies the lower SP FP values of a and b ; the upper 3 SP FP values are passed through from a.

r0 := a0 * b0

r1 := a1 ; r2 := a2 ; r3 := a3

 

__m128 _mm_mul_ps(__m128 a, __m128 b )

Multiplies the four SP FP values of a and b.

r0 := a0 * b0

r1 := a1 * b1

r2 := a2 * b2

r3 := a3 * b3

 

__m128 _mm_div_ss(__m128 a, __m128 b )

Divides the lower SP FP values of a and b ; the upper 3 SP FP values are passed through from a.

r0 := a0 / b0

r1 := a1 ; r2 := a2 ; r3 := a3

 

__m128 _mm_div_ps(__m128 a, __m128 b )

Divides the four SP FP values of a and b.

r0 := a0 / b0

r1 := a1 / b1

r2 := a2 / b2

r3 := a3 / b3

 

__m128 _mm_sqrt_ss(__m128 a )

Computes the square root of the lower SP FP value of a ; the upper 3 SP FP values are passed through.

r0 := sqrt(a0)

r1 := a1 ; r2 := a2 ; r3 := a3

 

__m128 _mm_sqrt_ps(__m128 a )

Computes the square roots of the four SP FP values of a.

r0 := sqrt(a0)

r1 := sqrt(a1)

r2 := sqrt(a2)

r3 := sqrt(a3)

 

__m128 _mm_rcp_ss(__m128 a )

Computes the approximation of the reciprocal of the lower SP FP value of a; the upper 3 SP FP values are passed through.

r0 := recip(a0)

r1 := a1 ; r2 := a2 ; r3 := a3

 

__m128 _mm_rcp_ps(__m128 a )

Computes the approximations of reciprocals of the four SP FP values of a.

r0 := recip(a0)

r1 := recip(a1)

r2 := recip(a2)

r3 := recip(a3)

 

__m128 _mm_rsqrt_ss(__m128 a )

Computes the approximation of the reciprocal of the square root of the lower SP FP value of a; the upper 3 SP FP values are passed through.

r0 := recip(sqrt(a0))

r1 := a1 ; r2 := a2 ; r3 := a3

 

__mm128 _mm_rsqrt_ps(__m128 a )

Computes the approximations of the reciprocals of the square roots of the four SP FP values of a.

r0 := recip(sqrt(a0))

r1 := recip(sqrt(a1))

r2 := recip(sqrt(a2))

r3 := recip(sqrt(a3))

 

__m128 _mm_min_ss(__m128 a, __m128 b )

Computes the minimum of the lower SP FP values of a and b; the upper 3 SP FP values are passed through from a.

r0 := min(a0, b0)

r1 := a1 ; r2 := a2 ; r3 := a3

 

__m128 _mm_min_ps(__m128 a, __m128 b )

Computes the minima of the four SP FP values of a and b.

r0 := min(a0, b0)

r1 := min(a1, b1)

r2 := min(a2, b2)

r3 := min(a3, b3)

 

__m128 _mm_max_ss(__m128 a, __m128 b )

Computes the maximum of the lower SP FP values of a and b ; the upper 3 SP FP values are passed through from a.

r0 := max(a0, b0)

r1 := a1 ; r2 := a2 ; r3 := a3

 

__m128 _mm_max_ps(__m128 a, __m128 b )

Computes the maximums of the four SP FP values of a and b.

r0 := max(a0, b0)

r1 := max(a1, b1)

r2 := max(a2, b2)

r3 := max(a3, b3)