AVX_ALL-Miscellaneous-YMM#

_mm256_movemask_pd#

Tech:

AVX_ALL

Category:

Miscellaneous

Header:

immintrin.h

Searchable:

AVX_ALL-Miscellaneous-YMM

Register:

YMM 256 bit

Return Type:

int

Param Types:

__m256d a

Param ETypes:

FP64 a

int _mm256_movemask_pd(__m256d a);

Intel Description

Set each bit of mask “dst” based on the most significant bit of the corresponding packed double-precision (64-bit) floating-point element in “a”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        IF a[i+63]
                dst[j] := 1
        ELSE
                dst[j] := 0
        FI
ENDFOR
dst[MAX:4] := 0

_mm256_movemask_ps#

Tech:

AVX_ALL

Category:

Miscellaneous

Header:

immintrin.h

Searchable:

AVX_ALL-Miscellaneous-YMM

Register:

YMM 256 bit

Return Type:

int

Param Types:

__m256 a

Param ETypes:

FP32 a

int _mm256_movemask_ps(__m256 a);

Intel Description

Set each bit of mask “dst” based on the most significant bit of the corresponding packed single-precision (32-bit) floating-point element in “a”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        IF a[i+31]
                dst[j] := 1
        ELSE
                dst[j] := 0
        FI
ENDFOR
dst[MAX:8] := 0

_mm256_alignr_epi8#

Tech:

AVX_ALL

Category:

Miscellaneous

Header:

immintrin.h

Searchable:

AVX_ALL-Miscellaneous-YMM

Register:

YMM 256 bit

Return Type:

__m256i

Param Types:

__m256i a, __m256i b, const int imm8

Param ETypes:

UI8 a, UI8 b, IMM imm8

__m256i _mm256_alignr_epi8(__m256i a, __m256i b,
                           const int imm8)

Intel Description

Concatenate pairs of 16-byte blocks in “a” and “b” into a 32-byte temporary result, shift the result right by “imm8” bytes, and store the low 16 bytes in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 1
        i := j*128
        tmp[255:0] := ((a[i+127:i] << 128)[255:0] OR b[i+127:i]) >> (imm8*8)
        dst[i+127:i] := tmp[127:0]
ENDFOR
dst[MAX:256] := 0

_mm256_movemask_epi8#

Tech:

AVX_ALL

Category:

Miscellaneous

Header:

immintrin.h

Searchable:

AVX_ALL-Miscellaneous-YMM

Register:

YMM 256 bit

Return Type:

int

Param Types:

__m256i a

Param ETypes:

UI8 a

int _mm256_movemask_epi8(__m256i a);

Intel Description

Create mask from the most significant bit of each 8-bit element in “a”, and store the result in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 31
        i := j*8
        dst[j] := a[i+7]
ENDFOR

_mm256_mpsadbw_epu8#

Tech:

AVX_ALL

Category:

Miscellaneous

Header:

immintrin.h

Searchable:

AVX_ALL-Miscellaneous-YMM

Register:

YMM 256 bit

Return Type:

__m256i

Param Types:

__m256i a, __m256i b, const int imm8

Param ETypes:

UI8 a, UI8 b, IMM imm8

__m256i _mm256_mpsadbw_epu8(__m256i a, __m256i b,
                            const int imm8)

Intel Description

Compute the sum of absolute differences (SADs) of quadruplets of unsigned 8-bit integers in “a” compared to those in “b”, and store the 16-bit results in “dst”.

Eight SADs are performed for each 128-bit lane using one quadruplet from “b” and eight quadruplets from “a”. One quadruplet is selected from “b” starting at on the offset specified in “imm8”. Eight quadruplets are formed from sequential 8-bit integers selected from “a” starting at the offset specified in “imm8”.

Intel Implementation Psudeo-Code

DEFINE MPSADBW(a[127:0], b[127:0], imm8[2:0]) {
        a_offset := imm8[2]*32
        b_offset := imm8[1:0]*32
        FOR j := 0 to 7
                i := j*8
                k := a_offset+i
                l := b_offset
                tmp[i*2+15:i*2] := ABS(Signed(a[k+7:k] - b[l+7:l])) + ABS(Signed(a[k+15:k+8] - b[l+15:l+8])) + \
                                   ABS(Signed(a[k+23:k+16] - b[l+23:l+16])) + ABS(Signed(a[k+31:k+24] - b[l+31:l+24]))
        ENDFOR
        RETURN tmp[127:0]
}
dst[127:0] := MPSADBW(a[127:0], b[127:0], imm8[2:0])
dst[255:128] := MPSADBW(a[255:128], b[255:128], imm8[5:3])
dst[MAX:256] := 0

_mm256_packs_epi16#

Tech:

AVX_ALL

Category:

Miscellaneous

Header:

immintrin.h

Searchable:

AVX_ALL-Miscellaneous-YMM

Register:

YMM 256 bit

Return Type:

__m256i

Param Types:

__m256i a, __m256i b

Param ETypes:

SI16 a, SI16 b

__m256i _mm256_packs_epi16(__m256i a, __m256i b);

Intel Description

Convert packed signed 16-bit integers from “a” and “b” to packed 8-bit integers using signed saturation, and store the results in “dst”.

Intel Implementation Psudeo-Code

dst[7:0] := Saturate8(a[15:0])
dst[15:8] := Saturate8(a[31:16])
dst[23:16] := Saturate8(a[47:32])
dst[31:24] := Saturate8(a[63:48])
dst[39:32] := Saturate8(a[79:64])
dst[47:40] := Saturate8(a[95:80])
dst[55:48] := Saturate8(a[111:96])
dst[63:56] := Saturate8(a[127:112])
dst[71:64] := Saturate8(b[15:0])
dst[79:72] := Saturate8(b[31:16])
dst[87:80] := Saturate8(b[47:32])
dst[95:88] := Saturate8(b[63:48])
dst[103:96] := Saturate8(b[79:64])
dst[111:104] := Saturate8(b[95:80])
dst[119:112] := Saturate8(b[111:96])
dst[127:120] := Saturate8(b[127:112])
dst[135:128] := Saturate8(a[143:128])
dst[143:136] := Saturate8(a[159:144])
dst[151:144] := Saturate8(a[175:160])
dst[159:152] := Saturate8(a[191:176])
dst[167:160] := Saturate8(a[207:192])
dst[175:168] := Saturate8(a[223:208])
dst[183:176] := Saturate8(a[239:224])
dst[191:184] := Saturate8(a[255:240])
dst[199:192] := Saturate8(b[143:128])
dst[207:200] := Saturate8(b[159:144])
dst[215:208] := Saturate8(b[175:160])
dst[223:216] := Saturate8(b[191:176])
dst[231:224] := Saturate8(b[207:192])
dst[239:232] := Saturate8(b[223:208])
dst[247:240] := Saturate8(b[239:224])
dst[255:248] := Saturate8(b[255:240])
dst[MAX:256] := 0

_mm256_packs_epi32#

Tech:

AVX_ALL

Category:

Miscellaneous

Header:

immintrin.h

Searchable:

AVX_ALL-Miscellaneous-YMM

Register:

YMM 256 bit

Return Type:

__m256i

Param Types:

__m256i a, __m256i b

Param ETypes:

SI32 a, SI32 b

__m256i _mm256_packs_epi32(__m256i a, __m256i b);

Intel Description

Convert packed signed 32-bit integers from “a” and “b” to packed 16-bit integers using signed saturation, and store the results in “dst”.

Intel Implementation Psudeo-Code

dst[15:0] := Saturate16(a[31:0])
dst[31:16] := Saturate16(a[63:32])
dst[47:32] := Saturate16(a[95:64])
dst[63:48] := Saturate16(a[127:96])
dst[79:64] := Saturate16(b[31:0])
dst[95:80] := Saturate16(b[63:32])
dst[111:96] := Saturate16(b[95:64])
dst[127:112] := Saturate16(b[127:96])
dst[143:128] := Saturate16(a[159:128])
dst[159:144] := Saturate16(a[191:160])
dst[175:160] := Saturate16(a[223:192])
dst[191:176] := Saturate16(a[255:224])
dst[207:192] := Saturate16(b[159:128])
dst[223:208] := Saturate16(b[191:160])
dst[239:224] := Saturate16(b[223:192])
dst[255:240] := Saturate16(b[255:224])
dst[MAX:256] := 0

_mm256_packus_epi16#

Tech:

AVX_ALL

Category:

Miscellaneous

Header:

immintrin.h

Searchable:

AVX_ALL-Miscellaneous-YMM

Register:

YMM 256 bit

Return Type:

__m256i

Param Types:

__m256i a, __m256i b

Param ETypes:

SI16 a, SI16 b

__m256i _mm256_packus_epi16(__m256i a, __m256i b);

Intel Description

Convert packed signed 16-bit integers from “a” and “b” to packed 8-bit integers using unsigned saturation, and store the results in “dst”.

Intel Implementation Psudeo-Code

dst[7:0] := SaturateU8(a[15:0])
dst[15:8] := SaturateU8(a[31:16])
dst[23:16] := SaturateU8(a[47:32])
dst[31:24] := SaturateU8(a[63:48])
dst[39:32] := SaturateU8(a[79:64])
dst[47:40] := SaturateU8(a[95:80])
dst[55:48] := SaturateU8(a[111:96])
dst[63:56] := SaturateU8(a[127:112])
dst[71:64] := SaturateU8(b[15:0])
dst[79:72] := SaturateU8(b[31:16])
dst[87:80] := SaturateU8(b[47:32])
dst[95:88] := SaturateU8(b[63:48])
dst[103:96] := SaturateU8(b[79:64])
dst[111:104] := SaturateU8(b[95:80])
dst[119:112] := SaturateU8(b[111:96])
dst[127:120] := SaturateU8(b[127:112])
dst[135:128] := SaturateU8(a[143:128])
dst[143:136] := SaturateU8(a[159:144])
dst[151:144] := SaturateU8(a[175:160])
dst[159:152] := SaturateU8(a[191:176])
dst[167:160] := SaturateU8(a[207:192])
dst[175:168] := SaturateU8(a[223:208])
dst[183:176] := SaturateU8(a[239:224])
dst[191:184] := SaturateU8(a[255:240])
dst[199:192] := SaturateU8(b[143:128])
dst[207:200] := SaturateU8(b[159:144])
dst[215:208] := SaturateU8(b[175:160])
dst[223:216] := SaturateU8(b[191:176])
dst[231:224] := SaturateU8(b[207:192])
dst[239:232] := SaturateU8(b[223:208])
dst[247:240] := SaturateU8(b[239:224])
dst[255:248] := SaturateU8(b[255:240])
dst[MAX:256] := 0

_mm256_packus_epi32#

Tech:

AVX_ALL

Category:

Miscellaneous

Header:

immintrin.h

Searchable:

AVX_ALL-Miscellaneous-YMM

Register:

YMM 256 bit

Return Type:

__m256i

Param Types:

__m256i a, __m256i b

Param ETypes:

SI32 a, SI32 b

__m256i _mm256_packus_epi32(__m256i a, __m256i b);

Intel Description

Convert packed signed 32-bit integers from “a” and “b” to packed 16-bit integers using unsigned saturation, and store the results in “dst”.

Intel Implementation Psudeo-Code

dst[15:0] := SaturateU16(a[31:0])
dst[31:16] := SaturateU16(a[63:32])
dst[47:32] := SaturateU16(a[95:64])
dst[63:48] := SaturateU16(a[127:96])
dst[79:64] := SaturateU16(b[31:0])
dst[95:80] := SaturateU16(b[63:32])
dst[111:96] := SaturateU16(b[95:64])
dst[127:112] := SaturateU16(b[127:96])
dst[143:128] := SaturateU16(a[159:128])
dst[159:144] := SaturateU16(a[191:160])
dst[175:160] := SaturateU16(a[223:192])
dst[191:176] := SaturateU16(a[255:224])
dst[207:192] := SaturateU16(b[159:128])
dst[223:208] := SaturateU16(b[191:160])
dst[239:224] := SaturateU16(b[223:192])
dst[255:240] := SaturateU16(b[255:224])
dst[MAX:256] := 0