SSE_ALL-Miscellaneous-XMM#

_mm_sad_pu8#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: xmmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: __m64
Param Types:: __m64 a, __m64 b
Param ETypes:: UI8 a, UI8 b

__m64 _mm_sad_pu8(__m64 a, __m64 b);

Intel Description

Compute the absolute differences of packed unsigned 8-bit integers in “a” and “b”, then horizontally sum each consecutive 8 differences to produce four unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*8
        tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
ENDFOR
dst[15:0] := tmp[7:0] + tmp[15:8] + tmp[23:16] + tmp[31:24] + tmp[39:32] + tmp[47:40] + tmp[55:48] + tmp[63:56]
dst[63:16] := 0

_mm_movemask_pi8#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: xmmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: int
Param Types:: __m64 a
Param ETypes:: UI8 a

int _mm_movemask_pi8(__m64 a);

Intel Description

Create mask from the most significant bit of each 8-bit element in “a”, and store the result in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*8
        dst[j] := a[i+7]
ENDFOR
dst[MAX:8] := 0

_mm_movemask_ps#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: xmmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: int
Param Types:: __m128 a
Param ETypes:: FP32 a

int _mm_movemask_ps(__m128 a);

Intel Description

Set each bit of mask “dst” based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in “a”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*32
        IF a[i+31]
                dst[j] := 1
        ELSE
                dst[j] := 0
        FI
ENDFOR
dst[MAX:4] := 0

_mm_sad_epu8#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: emmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: __m128i
Param Types:: __m128i a, __m128i b
Param ETypes:: UI8 a, UI8 b

__m128i _mm_sad_epu8(__m128i a, __m128i b);

Intel Description

Compute the absolute differences of packed unsigned 8-bit integers in “a” and “b”, then horizontally sum each consecutive 8 differences to produce two unsigned 16-bit integers, and pack these unsigned 16-bit integers in the low 16 bits of 64-bit elements in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        i := j*8
        tmp[i+7:i] := ABS(a[i+7:i] - b[i+7:i])
ENDFOR
FOR j := 0 to 1
        i := j*64
        dst[i+15:i] := tmp[i+7:i] + tmp[i+15:i+8] + tmp[i+23:i+16] + tmp[i+31:i+24] + \
                       tmp[i+39:i+32] + tmp[i+47:i+40] + tmp[i+55:i+48] + tmp[i+63:i+56]
        dst[i+63:i+16] := 0
ENDFOR

_mm_movepi64_pi64#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: emmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: __m64
Param Types:: __m128i a
Param ETypes:: UI64 a

__m64 _mm_movepi64_pi64(__m128i a);

Intel Description

Copy the lower 64-bit integer in “a” to “dst”.

Intel Implementation Psudeo-Code

dst[63:0] := a[63:0]

_mm_packs_epi16#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: emmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: __m128i
Param Types:: __m128i a, __m128i b
Param ETypes:: SI16 a, SI16 b

__m128i _mm_packs_epi16(__m128i a, __m128i b);

Intel Description

Convert packed signed 16-bit integers from “a” and “b” to packed 8-bit integers using signed saturation, and store the results in “dst”.

Intel Implementation Psudeo-Code

dst[7:0] := Saturate8(a[15:0])
dst[15:8] := Saturate8(a[31:16])
dst[23:16] := Saturate8(a[47:32])
dst[31:24] := Saturate8(a[63:48])
dst[39:32] := Saturate8(a[79:64])
dst[47:40] := Saturate8(a[95:80])
dst[55:48] := Saturate8(a[111:96])
dst[63:56] := Saturate8(a[127:112])
dst[71:64] := Saturate8(b[15:0])
dst[79:72] := Saturate8(b[31:16])
dst[87:80] := Saturate8(b[47:32])
dst[95:88] := Saturate8(b[63:48])
dst[103:96] := Saturate8(b[79:64])
dst[111:104] := Saturate8(b[95:80])
dst[119:112] := Saturate8(b[111:96])
dst[127:120] := Saturate8(b[127:112])

_mm_packs_epi32#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: emmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: __m128i
Param Types:: __m128i a, __m128i b
Param ETypes:: SI32 a, SI32 b

__m128i _mm_packs_epi32(__m128i a, __m128i b);

Intel Description

Convert packed signed 32-bit integers from “a” and “b” to packed 16-bit integers using signed saturation, and store the results in “dst”.

Intel Implementation Psudeo-Code

dst[15:0] := Saturate16(a[31:0])
dst[31:16] := Saturate16(a[63:32])
dst[47:32] := Saturate16(a[95:64])
dst[63:48] := Saturate16(a[127:96])
dst[79:64] := Saturate16(b[31:0])
dst[95:80] := Saturate16(b[63:32])
dst[111:96] := Saturate16(b[95:64])
dst[127:112] := Saturate16(b[127:96])

_mm_packus_epi16#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: emmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: __m128i
Param Types:: __m128i a, __m128i b
Param ETypes:: SI16 a, SI16 b

__m128i _mm_packus_epi16(__m128i a, __m128i b);

Intel Description

Convert packed signed 16-bit integers from “a” and “b” to packed 8-bit integers using unsigned saturation, and store the results in “dst”.

Intel Implementation Psudeo-Code

dst[7:0] := SaturateU8(a[15:0])
dst[15:8] := SaturateU8(a[31:16])
dst[23:16] := SaturateU8(a[47:32])
dst[31:24] := SaturateU8(a[63:48])
dst[39:32] := SaturateU8(a[79:64])
dst[47:40] := SaturateU8(a[95:80])
dst[55:48] := SaturateU8(a[111:96])
dst[63:56] := SaturateU8(a[127:112])
dst[71:64] := SaturateU8(b[15:0])
dst[79:72] := SaturateU8(b[31:16])
dst[87:80] := SaturateU8(b[47:32])
dst[95:88] := SaturateU8(b[63:48])
dst[103:96] := SaturateU8(b[79:64])
dst[111:104] := SaturateU8(b[95:80])
dst[119:112] := SaturateU8(b[111:96])
dst[127:120] := SaturateU8(b[127:112])

_mm_movemask_epi8#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: emmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: int
Param Types:: __m128i a
Param ETypes:: UI8 a

int _mm_movemask_epi8(__m128i a);

Intel Description

Create mask from the most significant bit of each 8-bit element in “a”, and store the result in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        i := j*8
        dst[j] := a[i+7]
ENDFOR
dst[MAX:16] := 0

_mm_movemask_pd#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: emmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: int
Param Types:: __m128d a
Param ETypes:: FP64 a

int _mm_movemask_pd(__m128d a);

Intel Description

Set each bit of mask “dst” based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in “a”.

Intel Implementation Psudeo-Code

FOR j := 0 to 1
        i := j*64
        IF a[i+63]
                dst[j] := 1
        ELSE
                dst[j] := 0
        FI
ENDFOR
dst[MAX:2] := 0

_mm_mpsadbw_epu8#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: smmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: __m128i
Param Types:: __m128i a, __m128i b, const int imm8
Param ETypes:: UI8 a, UI8 b, IMM imm8

__m128i _mm_mpsadbw_epu8(__m128i a, __m128i b,
                         const int imm8)

Intel Description

Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in “a” compared to those in “b”, and store the 16-bit results in “dst”.: Eight SADs are performed using one quadruplet from “b” and eight quadruplets from “a”. One quadruplet is selected from “b” starting at on the offset specified in “imm8”. Eight quadruplets are formed from sequential 8-bit integers selected from “a” starting at the offset specified in “imm8”.

Intel Implementation Psudeo-Code

DEFINE MPSADBW(a[127:0], b[127:0], imm8[2:0]) {
        a_offset := imm8[2]*32
        b_offset := imm8[1:0]*32
        FOR j := 0 to 7
                i := j*8
                k := a_offset+i
                l := b_offset
                tmp[i*2+15:i*2] := ABS(Signed(a[k+7:k] - b[l+7:l])) + ABS(Signed(a[k+15:k+8] - b[l+15:l+8])) + \
                                   ABS(Signed(a[k+23:k+16] - b[l+23:l+16])) + ABS(Signed(a[k+31:k+24] - b[l+31:l+24]))
        ENDFOR
        RETURN tmp[127:0]
}
dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0])

_mm_packus_epi32#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: smmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: __m128i
Param Types:: __m128i a, __m128i b
Param ETypes:: SI32 a, SI32 b

__m128i _mm_packus_epi32(__m128i a, __m128i b);

Intel Description

Convert packed signed 32-bit integers from “a” and “b” to packed 16-bit integers using unsigned saturation, and store the results in “dst”.

Intel Implementation Psudeo-Code

dst[15:0] := SaturateU16(a[31:0])
dst[31:16] := SaturateU16(a[63:32])
dst[47:32] := SaturateU16(a[95:64])
dst[63:48] := SaturateU16(a[127:96])
dst[79:64] := SaturateU16(b[31:0])
dst[95:80] := SaturateU16(b[63:32])
dst[111:96] := SaturateU16(b[95:64])
dst[127:112] := SaturateU16(b[127:96])

_mm_minpos_epu16#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: smmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: __m128i
Param Types:: __m128i a
Param ETypes:: UI16 a

__m128i _mm_minpos_epu16(__m128i a);

Intel Description

Horizontally compute the minimum amongst the packed unsigned 16-bit integers in “a”, store the minimum and index in “dst”, and zero the remaining bits in “dst”.

Intel Implementation Psudeo-Code

index[2:0] := 0
min[15:0] := a[15:0]
FOR j := 0 to 7
        i := j*16
        IF a[i+15:i] < min[15:0]
                index[2:0] := j
                min[15:0] := a[i+15:i]
        FI
ENDFOR
dst[15:0] := min[15:0]
dst[18:16] := index[2:0]
dst[127:19] := 0

_mm_alignr_epi8#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: tmmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: __m128i
Param Types:: __m128i a, __m128i b, int imm8
Param ETypes:: UI8 a, UI8 b, IMM imm8

__m128i _mm_alignr_epi8(__m128i a, __m128i b, int imm8);

Intel Description

Concatenate 16-byte blocks in “a” and “b” into a 32-byte temporary result, shift the result right by “imm8” bytes, and store the low 16 bytes in “dst”.

Intel Implementation Psudeo-Code

tmp[255:0] := ((a[127:0] << 128)[255:0] OR b[127:0]) >> (imm8*8)
dst[127:0] := tmp[127:0]

_mm_alignr_pi8#

Tech:: SSE_ALL
Category:: Miscellaneous
Header:: tmmintrin.h
Searchable:: SSE_ALL-Miscellaneous-XMM
Register:: XMM 128 bit
Return Type:: __m64
Param Types:: __m64 a, __m64 b, int imm8
Param ETypes:: UI8 a, UI8 b, IMM imm8

__m64 _mm_alignr_pi8(__m64 a, __m64 b, int imm8);

Intel Description

Concatenate 8-byte blocks in “a” and “b” into a 16-byte temporary result, shift the result right by “imm8” bytes, and store the low 16 bytes in “dst”.

Intel Implementation Psudeo-Code

tmp[127:0] := ((a[63:0] << 64)[127:0] OR b[63:0]) >> (imm8*8)
dst[63:0] := tmp[63:0]

SSE_ALL-Miscellaneous-XMM

Contents

SSE_ALL-Miscellaneous-XMM#

_mm_sad_pu8#

_mm_movemask_pi8#

_mm_movemask_ps#

_mm_sad_epu8#

_mm_movepi64_pi64#

_mm_packs_epi16#

_mm_packs_epi32#

_mm_packus_epi16#

_mm_movemask_epi8#

_mm_movemask_pd#

_mm_mpsadbw_epu8#

_mm_packus_epi32#

_mm_minpos_epu16#

_mm_alignr_epi8#

_mm_alignr_pi8#