AVX_ALL-Swizzle-YMM#

_mm256_blend_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m256d a, __m256d b, const int imm8
Param ETypes:: FP64 a, FP64 b, IMM imm8

__m256d _mm256_blend_pd(__m256d a, __m256d b,
                        const int imm8)

Intel Description

Blend packed double-precision (64-bit) floating-point elements from “a” and “b” using control mask “imm8”, and store the results in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        IF imm8[j]
                dst[i+63:i] := b[i+63:i]
        ELSE
                dst[i+63:i] := a[i+63:i]
        FI
ENDFOR
dst[MAX:256] := 0

_mm256_blend_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m256 a, __m256 b, const int imm8
Param ETypes:: FP32 a, FP32 b, IMM imm8

__m256 _mm256_blend_ps(__m256 a, __m256 b, const int imm8);

Intel Description

Blend packed single-precision (32-bit) floating-point elements from “a” and “b” using control mask “imm8”, and store the results in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        IF imm8[j]
                dst[i+31:i] := b[i+31:i]
        ELSE
                dst[i+31:i] := a[i+31:i]
        FI
ENDFOR
dst[MAX:256] := 0

_mm256_blendv_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m256d a, __m256d b, __m256d mask
Param ETypes:: FP64 a, FP64 b, MASK mask

__m256d _mm256_blendv_pd(__m256d a, __m256d b,
                         __m256d mask)

Intel Description

Blend packed double-precision (64-bit) floating-point elements from “a” and “b” using “mask”, and store the results in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        IF mask[i+63]
                dst[i+63:i] := b[i+63:i]
        ELSE
                dst[i+63:i] := a[i+63:i]
        FI
ENDFOR
dst[MAX:256] := 0

_mm256_blendv_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m256 a, __m256 b, __m256 mask
Param ETypes:: FP32 a, FP32 b, MASK mask

__m256 _mm256_blendv_ps(__m256 a, __m256 b, __m256 mask);

Intel Description

Blend packed single-precision (32-bit) floating-point elements from “a” and “b” using “mask”, and store the results in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        IF mask[i+31]
                dst[i+31:i] := b[i+31:i]
        ELSE
                dst[i+31:i] := a[i+31:i]
        FI
ENDFOR
dst[MAX:256] := 0

_mm256_shuffle_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m256d a, __m256d b, const int imm8
Param ETypes:: FP64 a, FP64 b, IMM imm8

__m256d _mm256_shuffle_pd(__m256d a, __m256d b,
                          const int imm8)

Intel Description

Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in “imm8”, and store the results in “dst”.

Intel Implementation Psudeo-Code

dst[63:0] := (imm8[0] == 0) ? a[63:0] : a[127:64]
dst[127:64] := (imm8[1] == 0) ? b[63:0] : b[127:64]
dst[191:128] := (imm8[2] == 0) ? a[191:128] : a[255:192]
dst[255:192] := (imm8[3] == 0) ? b[191:128] : b[255:192]
dst[MAX:256] := 0

_mm256_shuffle_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m256 a, __m256 b, const int imm8
Param ETypes:: FP32 a, FP32 b, IMM imm8

__m256 _mm256_shuffle_ps(__m256 a, __m256 b,
                         const int imm8)

Intel Description

Shuffle single-precision (32-bit) floating-point elements in “a” within 128-bit lanes using the control in “imm8”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE SELECT4(src, control) {
        CASE(control[1:0]) OF
        0:      tmp[31:0] := src[31:0]
        1:      tmp[31:0] := src[63:32]
        2:      tmp[31:0] := src[95:64]
        3:      tmp[31:0] := src[127:96]
        ESAC
        RETURN tmp[31:0]
}
dst[31:0] := SELECT4(a[127:0], imm8[1:0])
dst[63:32] := SELECT4(a[127:0], imm8[3:2])
dst[95:64] := SELECT4(b[127:0], imm8[5:4])
dst[127:96] := SELECT4(b[127:0], imm8[7:6])
dst[159:128] := SELECT4(a[255:128], imm8[1:0])
dst[191:160] := SELECT4(a[255:128], imm8[3:2])
dst[223:192] := SELECT4(b[255:128], imm8[5:4])
dst[255:224] := SELECT4(b[255:128], imm8[7:6])
dst[MAX:256] := 0

_mm256_extractf128_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m128
Param Types:: __m256 a, const int imm8
Param ETypes:: FP32 a, IMM imm8

__m128 _mm256_extractf128_ps(__m256 a, const int imm8);

Intel Description

Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from “a”, selected with “imm8”, and store the result in “dst”.

Intel Implementation Psudeo-Code

CASE imm8[0] OF
0: dst[127:0] := a[127:0]
1: dst[127:0] := a[255:128]
ESAC
dst[MAX:128] := 0

_mm256_extractf128_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m128d
Param Types:: __m256d a, const int imm8
Param ETypes:: FP64 a, IMM imm8

__m128d _mm256_extractf128_pd(__m256d a, const int imm8);

Intel Description

Extract 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from “a”, selected with “imm8”, and store the result in “dst”.

Intel Implementation Psudeo-Code

CASE imm8[0] OF
0: dst[127:0] := a[127:0]
1: dst[127:0] := a[255:128]
ESAC
dst[MAX:128] := 0

_mm256_extractf128_si256#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m128i
Param Types:: __m256i a, const int imm8
Param ETypes:: M128 a, IMM imm8

__m128i _mm256_extractf128_si256(__m256i a, const int imm8);

Intel Description

Extract 128 bits (composed of integer data) from “a”, selected with “imm8”, and store the result in “dst”.

Intel Implementation Psudeo-Code

CASE imm8[0] OF
0: dst[127:0] := a[127:0]
1: dst[127:0] := a[255:128]
ESAC
dst[MAX:128] := 0

_mm256_extract_epi32#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __int32
Param Types:: __m256i a, const int index
Param ETypes:: UI32 a, IMM index

__int32 _mm256_extract_epi32(__m256i a, const int index);

Intel Description

Extract a 32-bit integer from “a”, selected with “index”, and store the result in “dst”.

Intel Implementation Psudeo-Code

dst[31:0] := (a[255:0] >> (index[2:0] * 32))[31:0]

_mm256_extract_epi64#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __int64
Param Types:: __m256i a, const int index
Param ETypes:: UI64 a, IMM index

__int64 _mm256_extract_epi64(__m256i a, const int index);

Intel Description

Extract a 64-bit integer from “a”, selected with “index”, and store the result in “dst”.

Intel Implementation Psudeo-Code

dst[63:0] := (a[255:0] >> (index[1:0] * 64))[63:0]

_mm256_permutevar_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m256 a, __m256i b
Param ETypes:: FP32 a, UI32 b

__m256 _mm256_permutevar_ps(__m256 a, __m256i b);

Intel Description

Shuffle single-precision (32-bit) floating-point elements in “a” within 128-bit lanes using the control in “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE SELECT4(src, control) {
        CASE(control[1:0]) OF
        0:      tmp[31:0] := src[31:0]
        1:      tmp[31:0] := src[63:32]
        2:      tmp[31:0] := src[95:64]
        3:      tmp[31:0] := src[127:96]
        ESAC
        RETURN tmp[31:0]
}
dst[31:0] := SELECT4(a[127:0], b[1:0])
dst[63:32] := SELECT4(a[127:0], b[33:32])
dst[95:64] := SELECT4(a[127:0], b[65:64])
dst[127:96] := SELECT4(a[127:0], b[97:96])
dst[159:128] := SELECT4(a[255:128], b[129:128])
dst[191:160] := SELECT4(a[255:128], b[161:160])
dst[223:192] := SELECT4(a[255:128], b[193:192])
dst[255:224] := SELECT4(a[255:128], b[225:224])
dst[MAX:256] := 0

_mm256_permute_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m256 a, int imm8
Param ETypes:: FP32 a, IMM imm8

__m256 _mm256_permute_ps(__m256 a, int imm8);

Intel Description

Shuffle single-precision (32-bit) floating-point elements in “a” within 128-bit lanes using the control in “imm8”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE SELECT4(src, control) {
        CASE(control[1:0]) OF
        0:      tmp[31:0] := src[31:0]
        1:      tmp[31:0] := src[63:32]
        2:      tmp[31:0] := src[95:64]
        3:      tmp[31:0] := src[127:96]
        ESAC
        RETURN tmp[31:0]
}
dst[31:0] := SELECT4(a[127:0], imm8[1:0])
dst[63:32] := SELECT4(a[127:0], imm8[3:2])
dst[95:64] := SELECT4(a[127:0], imm8[5:4])
dst[127:96] := SELECT4(a[127:0], imm8[7:6])
dst[159:128] := SELECT4(a[255:128], imm8[1:0])
dst[191:160] := SELECT4(a[255:128], imm8[3:2])
dst[223:192] := SELECT4(a[255:128], imm8[5:4])
dst[255:224] := SELECT4(a[255:128], imm8[7:6])
dst[MAX:256] := 0

_mm256_permutevar_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m256d a, __m256i b
Param ETypes:: FP64 a, UI64 b

__m256d _mm256_permutevar_pd(__m256d a, __m256i b);

Intel Description

Shuffle double-precision (64-bit) floating-point elements in “a” within 128-bit lanes using the control in “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

IF (b[1] == 0) dst[63:0] := a[63:0]; FI
IF (b[1] == 1) dst[63:0] := a[127:64]; FI
IF (b[65] == 0) dst[127:64] := a[63:0]; FI
IF (b[65] == 1) dst[127:64] := a[127:64]; FI
IF (b[129] == 0) dst[191:128] := a[191:128]; FI
IF (b[129] == 1) dst[191:128] := a[255:192]; FI
IF (b[193] == 0) dst[255:192] := a[191:128]; FI
IF (b[193] == 1) dst[255:192] := a[255:192]; FI
dst[MAX:256] := 0

_mm256_permute_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m256d a, int imm8
Param ETypes:: FP64 a, IMM imm8

__m256d _mm256_permute_pd(__m256d a, int imm8);

Intel Description

Shuffle double-precision (64-bit) floating-point elements in “a” within 128-bit lanes using the control in “imm8”, and store the results in “dst”.

Intel Implementation Psudeo-Code

IF (imm8[0] == 0) dst[63:0] := a[63:0]; FI
IF (imm8[0] == 1) dst[63:0] := a[127:64]; FI
IF (imm8[1] == 0) dst[127:64] := a[63:0]; FI
IF (imm8[1] == 1) dst[127:64] := a[127:64]; FI
IF (imm8[2] == 0) dst[191:128] := a[191:128]; FI
IF (imm8[2] == 1) dst[191:128] := a[255:192]; FI
IF (imm8[3] == 0) dst[255:192] := a[191:128]; FI
IF (imm8[3] == 1) dst[255:192] := a[255:192]; FI
dst[MAX:256] := 0

_mm256_permute2f128_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m256 a, __m256 b, int imm8
Param ETypes:: FP32 a, FP32 b, IMM imm8

__m256 _mm256_permute2f128_ps(__m256 a, __m256 b, int imm8);

Intel Description

Shuffle 128-bits (composed of 4 packed single-precision (32-bit) floating-point elements) selected by “imm8” from “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE SELECT4(src1, src2, control) {
        CASE(control[1:0]) OF
        0:      tmp[127:0] := src1[127:0]
        1:      tmp[127:0] := src1[255:128]
        2:      tmp[127:0] := src2[127:0]
        3:      tmp[127:0] := src2[255:128]
        ESAC
        IF control[3]
                tmp[127:0] := 0
        FI
        RETURN tmp[127:0]
}
dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
dst[MAX:256] := 0

_mm256_permute2f128_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m256d a, __m256d b, int imm8
Param ETypes:: FP64 a, FP64 b, IMM imm8

__m256d _mm256_permute2f128_pd(__m256d a, __m256d b,
                               int imm8)

Intel Description

Shuffle 128-bits (composed of 2 packed double-precision (64-bit) floating-point elements) selected by “imm8” from “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE SELECT4(src1, src2, control) {
        CASE(control[1:0]) OF
        0:      tmp[127:0] := src1[127:0]
        1:      tmp[127:0] := src1[255:128]
        2:      tmp[127:0] := src2[127:0]
        3:      tmp[127:0] := src2[255:128]
        ESAC
        IF control[3]
                tmp[127:0] := 0
        FI
        RETURN tmp[127:0]
}
dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
dst[MAX:256] := 0

_mm256_permute2f128_si256#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b, int imm8
Param ETypes:: M256 a, M256 b, IMM imm8

__m256i _mm256_permute2f128_si256(__m256i a, __m256i b,
                                  int imm8)

Intel Description

Shuffle 128-bits (composed of integer data) selected by “imm8” from “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE SELECT4(src1, src2, control) {
        CASE(control[1:0]) OF
        0:      tmp[127:0] := src1[127:0]
        1:      tmp[127:0] := src1[255:128]
        2:      tmp[127:0] := src2[127:0]
        3:      tmp[127:0] := src2[255:128]
        ESAC
        IF control[3]
                tmp[127:0] := 0
        FI
        RETURN tmp[127:0]
}
dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
dst[MAX:256] := 0

_mm256_insertf128_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m256 a, __m128 b, int imm8
Param ETypes:: FP32 a, FP32 b, IMM imm8

__m256 _mm256_insertf128_ps(__m256 a, __m128 b, int imm8);

Intel Description

Copy “a” to “dst”, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from “b” into “dst” at the location specified by “imm8”.

Intel Implementation Psudeo-Code

dst[255:0] := a[255:0]
CASE (imm8[0]) OF
0: dst[127:0] := b[127:0]
1: dst[255:128] := b[127:0]
ESAC
dst[MAX:256] := 0

_mm256_insertf128_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m256d a, __m128d b, int imm8
Param ETypes:: FP64 a, FP64 b, IMM imm8

__m256d _mm256_insertf128_pd(__m256d a, __m128d b,
                             int imm8)

Intel Description

Copy “a” to “dst”, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from “b” into “dst” at the location specified by “imm8”.

Intel Implementation Psudeo-Code

dst[255:0] := a[255:0]
CASE imm8[0] OF
0: dst[127:0] := b[127:0]
1: dst[255:128] := b[127:0]
ESAC
dst[MAX:256] := 0

_mm256_insertf128_si256#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m128i b, int imm8
Param ETypes:: M256 a, M128 b, IMM imm8

__m256i _mm256_insertf128_si256(__m256i a, __m128i b,
                                int imm8)

Intel Description

Copy “a” to “dst”, then insert 128 bits from “b” into “dst” at the location specified by “imm8”.

Intel Implementation Psudeo-Code

dst[255:0] := a[255:0]
CASE (imm8[0]) OF
0: dst[127:0] := b[127:0]
1: dst[255:128] := b[127:0]
ESAC
dst[MAX:256] := 0

_mm256_insert_epi8#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __int8 i, const int index
Param ETypes:: UI8 a, UI8 i, IMM index

__m256i _mm256_insert_epi8(__m256i a, __int8 i,
                           const int index)

Intel Description

Copy “a” to “dst”, and insert the 8-bit integer “i” into “dst” at the location specified by “index”.

Intel Implementation Psudeo-Code

dst[255:0] := a[255:0]
sel := index[4:0]*8
dst[sel+7:sel] := i[7:0]

_mm256_insert_epi16#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __int16 i, const int index
Param ETypes:: UI16 a, UI16 i, IMM index

__m256i _mm256_insert_epi16(__m256i a, __int16 i,
                            const int index)

Intel Description

Copy “a” to “dst”, and insert the 16-bit integer “i” into “dst” at the location specified by “index”.

Intel Implementation Psudeo-Code

dst[255:0] := a[255:0]
sel := index[3:0]*16
dst[sel+15:sel] := i[15:0]

_mm256_insert_epi32#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __int32 i, const int index
Param ETypes:: UI32 a, UI32 i, IMM index

__m256i _mm256_insert_epi32(__m256i a, __int32 i,
                            const int index)

Intel Description

Copy “a” to “dst”, and insert the 32-bit integer “i” into “dst” at the location specified by “index”.

Intel Implementation Psudeo-Code

dst[255:0] := a[255:0]
sel := index[2:0]*32
dst[sel+31:sel] := i[31:0]

_mm256_insert_epi64#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __int64 i, const int index
Param ETypes:: UI64 a, UI64 i, IMM index

__m256i _mm256_insert_epi64(__m256i a, __int64 i,
                            const int index)

Intel Description

Copy “a” to “dst”, and insert the 64-bit integer “i” into “dst” at the location specified by “index”.

Intel Implementation Psudeo-Code

dst[255:0] := a[255:0]
sel := index[1:0]*64
dst[sel+63:sel] := i[63:0]

_mm256_unpackhi_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m256d a, __m256d b
Param ETypes:: FP64 a, FP64 b

__m256d _mm256_unpackhi_pd(__m256d a, __m256d b);

Intel Description

Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
        dst[63:0] := src1[127:64]
        dst[127:64] := src2[127:64]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
dst[MAX:256] := 0

_mm256_unpackhi_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m256 a, __m256 b
Param ETypes:: FP32 a, FP32 b

__m256 _mm256_unpackhi_ps(__m256 a, __m256 b);

Intel Description

Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
        dst[31:0] := src1[95:64]
        dst[63:32] := src2[95:64]
        dst[95:64] := src1[127:96]
        dst[127:96] := src2[127:96]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
dst[MAX:256] := 0

_mm256_unpacklo_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m256d a, __m256d b
Param ETypes:: FP64 a, FP64 b

__m256d _mm256_unpacklo_pd(__m256d a, __m256d b);

Intel Description

Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
        dst[63:0] := src1[63:0]
        dst[127:64] := src2[63:0]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
dst[MAX:256] := 0

_mm256_unpacklo_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m256 a, __m256 b
Param ETypes:: FP32 a, FP32 b

__m256 _mm256_unpacklo_ps(__m256 a, __m256 b);

Intel Description

Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
        dst[31:0] := src1[31:0]
        dst[63:32] := src2[31:0]
        dst[95:64] := src1[63:32]
        dst[127:96] := src2[63:32]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
dst[MAX:256] := 0

_mm256_broadcast_sd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: double const * mem_addr
Param ETypes:: FP64 mem_addr

__m256d _mm256_broadcast_sd(double const * mem_addr);

Intel Description

Broadcast a double-precision (64-bit) floating-point element from memory to all elements of “dst”.

Intel Implementation Psudeo-Code

tmp[63:0] := MEM[mem_addr+63:mem_addr]
FOR j := 0 to 3
        i := j*64
        dst[i+63:i] := tmp[63:0]
ENDFOR
dst[MAX:256] := 0

_mm256_broadcast_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m128 const * mem_addr
Param ETypes:: FP32 mem_addr

__m256 _mm256_broadcast_ps(__m128 const * mem_addr);

Intel Description

Broadcast 128 bits from memory (composed of 4 packed single-precision (32-bit) floating-point elements) to all elements of “dst”.

Intel Implementation Psudeo-Code

tmp[127:0] := MEM[mem_addr+127:mem_addr]
dst[127:0] := tmp[127:0]
dst[255:128] := tmp[127:0]
dst[MAX:256] := 0

_mm256_broadcast_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m128d const * mem_addr
Param ETypes:: FP64 mem_addr

__m256d _mm256_broadcast_pd(__m128d const * mem_addr);

Intel Description

Broadcast 128 bits from memory (composed of 2 packed double-precision (64-bit) floating-point elements) to all elements of “dst”.

Intel Implementation Psudeo-Code

tmp[127:0] := MEM[mem_addr+127:mem_addr]
dst[127:0] := tmp[127:0]
dst[255:128] := tmp[127:0]
dst[MAX:256] := 0

_mm256_extract_epi8#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: int
Param Types:: __m256i a, const int index
Param ETypes:: UI8 a, IMM index

int _mm256_extract_epi8(__m256i a, const int index);

Intel Description

Extract an 8-bit integer from “a”, selected with “index”, and store the result in “dst”.

Intel Implementation Psudeo-Code

dst[7:0] := (a[255:0] >> (index[4:0] * 8))[7:0]

_mm256_extract_epi16#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: int
Param Types:: __m256i a, const int index
Param ETypes:: UI16 a, IMM index

int _mm256_extract_epi16(__m256i a, const int index);

Intel Description

Extract a 16-bit integer from “a”, selected with “index”, and store the result in “dst”.

Intel Implementation Psudeo-Code

dst[15:0] := (a[255:0] >> (index[3:0] * 16))[15:0]

_mm256_blend_epi16#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b, const int imm8
Param ETypes:: UI16 a, UI16 b, IMM imm8

__m256i _mm256_blend_epi16(__m256i a, __m256i b,
                           const int imm8)

Intel Description

Blend packed 16-bit integers from “a” and “b” within 128-bit lanes using control mask “imm8”, and store the results in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        i := j*16
        IF imm8[j%8]
                dst[i+15:i] := b[i+15:i]
        ELSE
                dst[i+15:i] := a[i+15:i]
        FI
ENDFOR
dst[MAX:256] := 0

_mm256_blend_epi32#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b, const int imm8
Param ETypes:: UI32 a, UI32 b, IMM imm8

__m256i _mm256_blend_epi32(__m256i a, __m256i b,
                           const int imm8)

Intel Description

Blend packed 32-bit integers from “a” and “b” using control mask “imm8”, and store the results in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        IF imm8[j]
                dst[i+31:i] := b[i+31:i]
        ELSE
                dst[i+31:i] := a[i+31:i]
        FI
ENDFOR
dst[MAX:256] := 0

_mm256_blendv_epi8#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b, __m256i mask
Param ETypes:: UI8 a, UI8 b, MASK mask

__m256i _mm256_blendv_epi8(__m256i a, __m256i b,
                           __m256i mask)

Intel Description

Blend packed 8-bit integers from “a” and “b” using “mask”, and store the results in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 31
        i := j*8
        IF mask[i+7]
                dst[i+7:i] := b[i+7:i]
        ELSE
                dst[i+7:i] := a[i+7:i]
        FI
ENDFOR
dst[MAX:256] := 0

_mm256_broadcastb_epi8#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m128i a
Param ETypes:: UI8 a

__m256i _mm256_broadcastb_epi8(__m128i a);

Intel Description

Broadcast the low packed 8-bit integer from “a” to all elements of “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 31
        i := j*8
        dst[i+7:i] := a[7:0]
ENDFOR
dst[MAX:256] := 0

_mm256_broadcastd_epi32#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m128i a
Param ETypes:: UI32 a

__m256i _mm256_broadcastd_epi32(__m128i a);

Intel Description

Broadcast the low packed 32-bit integer from “a” to all elements of “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        dst[i+31:i] := a[31:0]
ENDFOR
dst[MAX:256] := 0

_mm256_broadcastq_epi64#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m128i a
Param ETypes:: UI64 a

__m256i _mm256_broadcastq_epi64(__m128i a);

Intel Description

Broadcast the low packed 64-bit integer from “a” to all elements of “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        dst[i+63:i] := a[63:0]
ENDFOR
dst[MAX:256] := 0

_mm256_broadcastsd_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m128d a
Param ETypes:: FP64 a

__m256d _mm256_broadcastsd_pd(__m128d a);

Intel Description

Broadcast the low double-precision (64-bit) floating-point element from “a” to all elements of “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        dst[i+63:i] := a[63:0]
ENDFOR
dst[MAX:256] := 0

_mm256_broadcastsi128_si256#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m128i a
Param ETypes:: M128 a

__m256i _mm256_broadcastsi128_si256(__m128i a);

Intel Description

Broadcast 128 bits of integer data from “a” to all 128-bit lanes in “dst”.

Intel Implementation Psudeo-Code

dst[127:0] := a[127:0]
dst[255:128] := a[127:0]
dst[MAX:256] := 0

_mm256_broadcastss_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m128 a
Param ETypes:: FP32 a

__m256 _mm256_broadcastss_ps(__m128 a);

Intel Description

Broadcast the low single-precision (32-bit) floating-point element from “a” to all elements of “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        dst[i+31:i] := a[31:0]
ENDFOR
dst[MAX:256] := 0

_mm256_broadcastw_epi16#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m128i a
Param ETypes:: UI16 a

__m256i _mm256_broadcastw_epi16(__m128i a);

Intel Description

Broadcast the low packed 16-bit integer from “a” to all elements of “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        i := j*16
        dst[i+15:i] := a[15:0]
ENDFOR
dst[MAX:256] := 0

_mm256_extracti128_si256#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m128i
Param Types:: __m256i a, const int imm8
Param ETypes:: M128 a, IMM imm8

__m128i _mm256_extracti128_si256(__m256i a, const int imm8);

Intel Description

Extract 128 bits (composed of integer data) from “a”, selected with “imm8”, and store the result in “dst”.

Intel Implementation Psudeo-Code

CASE imm8[0] OF
0: dst[127:0] := a[127:0]
1: dst[127:0] := a[255:128]
ESAC
dst[MAX:128] := 0

_mm256_inserti128_si256#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m128i b, const int imm8
Param ETypes:: M256 a, M128 b, IMM imm8

__m256i _mm256_inserti128_si256(__m256i a, __m128i b,
                                const int imm8)

Intel Description

Copy “a” to “dst”, then insert 128 bits (composed of integer data) from “b” into “dst” at the location specified by “imm8”.

Intel Implementation Psudeo-Code

dst[255:0] := a[255:0]
CASE (imm8[0]) OF
0: dst[127:0] := b[127:0]
1: dst[255:128] := b[127:0]
ESAC
dst[MAX:256] := 0

_mm256_permute2x128_si256#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b, const int imm8
Param ETypes:: M256 a, M256 b, IMM imm8

__m256i _mm256_permute2x128_si256(__m256i a, __m256i b,
                                  const int imm8)

Intel Description

Shuffle 128-bits (composed of integer data) selected by “imm8” from “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE SELECT4(src1, src2, control) {
        CASE(control[1:0]) OF
        0:      tmp[127:0] := src1[127:0]
        1:      tmp[127:0] := src1[255:128]
        2:      tmp[127:0] := src2[127:0]
        3:      tmp[127:0] := src2[255:128]
        ESAC
        IF control[3]
                tmp[127:0] := 0
        FI
        RETURN tmp[127:0]
}
dst[127:0] := SELECT4(a[255:0], b[255:0], imm8[3:0])
dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
dst[MAX:256] := 0

_mm256_permute4x64_epi64#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, const int imm8
Param ETypes:: UI64 a, IMM imm8

__m256i _mm256_permute4x64_epi64(__m256i a, const int imm8);

Intel Description

Shuffle 64-bit integers in “a” across lanes using the control in “imm8”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE SELECT4(src, control) {
        CASE(control[1:0]) OF
        0:      tmp[63:0] := src[63:0]
        1:      tmp[63:0] := src[127:64]
        2:      tmp[63:0] := src[191:128]
        3:      tmp[63:0] := src[255:192]
        ESAC
        RETURN tmp[63:0]
}
dst[63:0] := SELECT4(a[255:0], imm8[1:0])
dst[127:64] := SELECT4(a[255:0], imm8[3:2])
dst[191:128] := SELECT4(a[255:0], imm8[5:4])
dst[255:192] := SELECT4(a[255:0], imm8[7:6])
dst[MAX:256] := 0

_mm256_permute4x64_pd#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256d
Param Types:: __m256d a, const int imm8
Param ETypes:: FP64 a, IMM imm8

__m256d _mm256_permute4x64_pd(__m256d a, const int imm8);

Intel Description

Shuffle double-precision (64-bit) floating-point elements in “a” across lanes using the control in “imm8”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE SELECT4(src, control) {
        CASE(control[1:0]) OF
        0:      tmp[63:0] := src[63:0]
        1:      tmp[63:0] := src[127:64]
        2:      tmp[63:0] := src[191:128]
        3:      tmp[63:0] := src[255:192]
        ESAC
        RETURN tmp[63:0]
}
dst[63:0] := SELECT4(a[255:0], imm8[1:0])
dst[127:64] := SELECT4(a[255:0], imm8[3:2])
dst[191:128] := SELECT4(a[255:0], imm8[5:4])
dst[255:192] := SELECT4(a[255:0], imm8[7:6])
dst[MAX:256] := 0

_mm256_permutevar8x32_epi32#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i idx
Param ETypes:: UI32 a, UI32 idx

__m256i _mm256_permutevar8x32_epi32(__m256i a, __m256i idx);

Intel Description

Shuffle 32-bit integers in “a” across lanes using the corresponding index in “idx”, and store the results in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        id := idx[i+2:i]*32
        dst[i+31:i] := a[id+31:id]
ENDFOR
dst[MAX:256] := 0

_mm256_permutevar8x32_ps#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256
Param Types:: __m256 a, __m256i idx
Param ETypes:: FP32 a, UI32 idx

__m256 _mm256_permutevar8x32_ps(__m256 a, __m256i idx);

Intel Description

Shuffle single-precision (32-bit) floating-point elements in “a” across lanes using the corresponding index in “idx”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        id := idx[i+2:i]*32
        dst[i+31:i] := a[id+31:id]
ENDFOR
dst[MAX:256] := 0

_mm256_shuffle_epi32#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, const int imm8
Param ETypes:: UI32 a, IMM imm8

__m256i _mm256_shuffle_epi32(__m256i a, const int imm8);

Intel Description

Shuffle 32-bit integers in “a” within 128-bit lanes using the control in “imm8”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE SELECT4(src, control) {
        CASE(control[1:0]) OF
        0:      tmp[31:0] := src[31:0]
        1:      tmp[31:0] := src[63:32]
        2:      tmp[31:0] := src[95:64]
        3:      tmp[31:0] := src[127:96]
        ESAC
        RETURN tmp[31:0]
}
dst[31:0] := SELECT4(a[127:0], imm8[1:0])
dst[63:32] := SELECT4(a[127:0], imm8[3:2])
dst[95:64] := SELECT4(a[127:0], imm8[5:4])
dst[127:96] := SELECT4(a[127:0], imm8[7:6])
dst[159:128] := SELECT4(a[255:128], imm8[1:0])
dst[191:160] := SELECT4(a[255:128], imm8[3:2])
dst[223:192] := SELECT4(a[255:128], imm8[5:4])
dst[255:224] := SELECT4(a[255:128], imm8[7:6])
dst[MAX:256] := 0

_mm256_shuffle_epi8#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b
Param ETypes:: UI8 a, UI8 b

__m256i _mm256_shuffle_epi8(__m256i a, __m256i b);

Intel Description

Shuffle 8-bit integers in “a” within 128-bit lanes according to shuffle control mask in the corresponding 8-bit element of “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        i := j*8
        IF b[i+7] == 1
                dst[i+7:i] := 0
        ELSE
                index[3:0] := b[i+3:i]
                dst[i+7:i] := a[index*8+7:index*8]
        FI
        IF b[128+i+7] == 1
                dst[128+i+7:128+i] := 0
        ELSE
                index[3:0] := b[128+i+3:128+i]
                dst[128+i+7:128+i] := a[128+index*8+7:128+index*8]
        FI
ENDFOR
dst[MAX:256] := 0

_mm256_shufflehi_epi16#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, const int imm8
Param ETypes:: UI16 a, IMM imm8

__m256i _mm256_shufflehi_epi16(__m256i a, const int imm8);

Intel Description

Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of “a” using the control in “imm8”. Store the results in the high 64 bits of 128-bit lanes of “dst”, with the low 64 bits of 128-bit lanes being copied from from “a” to “dst”.

Intel Implementation Psudeo-Code

dst[63:0] := a[63:0]
dst[79:64] := (a >> (imm8[1:0] * 16))[79:64]
dst[95:80] := (a >> (imm8[3:2] * 16))[79:64]
dst[111:96] := (a >> (imm8[5:4] * 16))[79:64]
dst[127:112] := (a >> (imm8[7:6] * 16))[79:64]
dst[191:128] := a[191:128]
dst[207:192] := (a >> (imm8[1:0] * 16))[207:192]
dst[223:208] := (a >> (imm8[3:2] * 16))[207:192]
dst[239:224] := (a >> (imm8[5:4] * 16))[207:192]
dst[255:240] := (a >> (imm8[7:6] * 16))[207:192]
dst[MAX:256] := 0

_mm256_shufflelo_epi16#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, const int imm8
Param ETypes:: UI16 a, IMM imm8

__m256i _mm256_shufflelo_epi16(__m256i a, const int imm8);

Intel Description

Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of “a” using the control in “imm8”. Store the results in the low 64 bits of 128-bit lanes of “dst”, with the high 64 bits of 128-bit lanes being copied from from “a” to “dst”.

Intel Implementation Psudeo-Code

dst[15:0] := (a >> (imm8[1:0] * 16))[15:0]
dst[31:16] := (a >> (imm8[3:2] * 16))[15:0]
dst[47:32] := (a >> (imm8[5:4] * 16))[15:0]
dst[63:48] := (a >> (imm8[7:6] * 16))[15:0]
dst[127:64] := a[127:64]
dst[143:128] := (a >> (imm8[1:0] * 16))[143:128]
dst[159:144] := (a >> (imm8[3:2] * 16))[143:128]
dst[175:160] := (a >> (imm8[5:4] * 16))[143:128]
dst[191:176] := (a >> (imm8[7:6] * 16))[143:128]
dst[255:192] := a[255:192]
dst[MAX:256] := 0

_mm256_unpackhi_epi8#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b
Param ETypes:: UI8 a, UI8 b

__m256i _mm256_unpackhi_epi8(__m256i a, __m256i b);

Intel Description

Unpack and interleave 8-bit integers from the high half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_HIGH_BYTES(src1[127:0], src2[127:0]) {
        dst[7:0] := src1[71:64]
        dst[15:8] := src2[71:64]
        dst[23:16] := src1[79:72]
        dst[31:24] := src2[79:72]
        dst[39:32] := src1[87:80]
        dst[47:40] := src2[87:80]
        dst[55:48] := src1[95:88]
        dst[63:56] := src2[95:88]
        dst[71:64] := src1[103:96]
        dst[79:72] := src2[103:96]
        dst[87:80] := src1[111:104]
        dst[95:88] := src2[111:104]
        dst[103:96] := src1[119:112]
        dst[111:104] := src2[119:112]
        dst[119:112] := src1[127:120]
        dst[127:120] := src2[127:120]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_HIGH_BYTES(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_HIGH_BYTES(a[255:128], b[255:128])
dst[MAX:256] := 0

_mm256_unpackhi_epi16#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b
Param ETypes:: UI16 a, UI16 b

__m256i _mm256_unpackhi_epi16(__m256i a, __m256i b);

Intel Description

Unpack and interleave 16-bit integers from the high half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_HIGH_WORDS(src1[127:0], src2[127:0]) {
        dst[15:0] := src1[79:64]
        dst[31:16] := src2[79:64]
        dst[47:32] := src1[95:80]
        dst[63:48] := src2[95:80]
        dst[79:64] := src1[111:96]
        dst[95:80] := src2[111:96]
        dst[111:96] := src1[127:112]
        dst[127:112] := src2[127:112]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_HIGH_WORDS(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_HIGH_WORDS(a[255:128], b[255:128])
dst[MAX:256] := 0

_mm256_unpackhi_epi32#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b
Param ETypes:: UI32 a, UI32 b

__m256i _mm256_unpackhi_epi32(__m256i a, __m256i b);

Intel Description

Unpack and interleave 32-bit integers from the high half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_HIGH_DWORDS(src1[127:0], src2[127:0]) {
        dst[31:0] := src1[95:64]
        dst[63:32] := src2[95:64]
        dst[95:64] := src1[127:96]
        dst[127:96] := src2[127:96]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_HIGH_DWORDS(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_HIGH_DWORDS(a[255:128], b[255:128])
dst[MAX:256] := 0

_mm256_unpackhi_epi64#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b
Param ETypes:: UI64 a, UI64 b

__m256i _mm256_unpackhi_epi64(__m256i a, __m256i b);

Intel Description

Unpack and interleave 64-bit integers from the high half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_HIGH_QWORDS(src1[127:0], src2[127:0]) {
        dst[63:0] := src1[127:64]
        dst[127:64] := src2[127:64]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_HIGH_QWORDS(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_HIGH_QWORDS(a[255:128], b[255:128])
dst[MAX:256] := 0

_mm256_unpacklo_epi8#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b
Param ETypes:: UI8 a, UI8 b

__m256i _mm256_unpacklo_epi8(__m256i a, __m256i b);

Intel Description

Unpack and interleave 8-bit integers from the low half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_BYTES(src1[127:0], src2[127:0]) {
        dst[7:0] := src1[7:0]
        dst[15:8] := src2[7:0]
        dst[23:16] := src1[15:8]
        dst[31:24] := src2[15:8]
        dst[39:32] := src1[23:16]
        dst[47:40] := src2[23:16]
        dst[55:48] := src1[31:24]
        dst[63:56] := src2[31:24]
        dst[71:64] := src1[39:32]
        dst[79:72] := src2[39:32]
        dst[87:80] := src1[47:40]
        dst[95:88] := src2[47:40]
        dst[103:96] := src1[55:48]
        dst[111:104] := src2[55:48]
        dst[119:112] := src1[63:56]
        dst[127:120] := src2[63:56]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_BYTES(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_BYTES(a[255:128], b[255:128])
dst[MAX:256] := 0

_mm256_unpacklo_epi16#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b
Param ETypes:: UI16 a, UI16 b

__m256i _mm256_unpacklo_epi16(__m256i a, __m256i b);

Intel Description

Unpack and interleave 16-bit integers from the low half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_WORDS(src1[127:0], src2[127:0]) {
        dst[15:0] := src1[15:0]
        dst[31:16] := src2[15:0]
        dst[47:32] := src1[31:16]
        dst[63:48] := src2[31:16]
        dst[79:64] := src1[47:32]
        dst[95:80] := src2[47:32]
        dst[111:96] := src1[63:48]
        dst[127:112] := src2[63:48]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_WORDS(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_WORDS(a[255:128], b[255:128])
dst[MAX:256] := 0

_mm256_unpacklo_epi32#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b
Param ETypes:: UI32 a, UI32 b

__m256i _mm256_unpacklo_epi32(__m256i a, __m256i b);

Intel Description

Unpack and interleave 32-bit integers from the low half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_DWORDS(src1[127:0], src2[127:0]) {
        dst[31:0] := src1[31:0]
        dst[63:32] := src2[31:0]
        dst[95:64] := src1[63:32]
        dst[127:96] := src2[63:32]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_DWORDS(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_DWORDS(a[255:128], b[255:128])
dst[MAX:256] := 0

_mm256_unpacklo_epi64#

Tech:: AVX_ALL
Category:: Swizzle
Header:: immintrin.h
Searchable:: AVX_ALL-Swizzle-YMM
Register:: YMM 256 bit
Return Type:: __m256i
Param Types:: __m256i a, __m256i b
Param ETypes:: UI64 a, UI64 b

__m256i _mm256_unpacklo_epi64(__m256i a, __m256i b);

Intel Description

Unpack and interleave 64-bit integers from the low half of each 128-bit lane in “a” and “b”, and store the results in “dst”.

Intel Implementation Psudeo-Code

DEFINE INTERLEAVE_QWORDS(src1[127:0], src2[127:0]) {
        dst[63:0] := src1[63:0]
        dst[127:64] := src2[63:0]
        RETURN dst[127:0]
}
dst[127:0] := INTERLEAVE_QWORDS(a[127:0], b[127:0])
dst[255:128] := INTERLEAVE_QWORDS(a[255:128], b[255:128])
dst[MAX:256] := 0

AVX_ALL-Swizzle-YMM

Contents

AVX_ALL-Swizzle-YMM#

_mm256_blend_pd#

_mm256_blend_ps#

_mm256_blendv_pd#

_mm256_blendv_ps#

_mm256_shuffle_pd#

_mm256_shuffle_ps#

_mm256_extractf128_ps#

_mm256_extractf128_pd#

_mm256_extractf128_si256#

_mm256_extract_epi32#

_mm256_extract_epi64#

_mm256_permutevar_ps#

_mm256_permute_ps#

_mm256_permutevar_pd#

_mm256_permute_pd#

_mm256_permute2f128_ps#

_mm256_permute2f128_pd#

_mm256_permute2f128_si256#

_mm256_insertf128_ps#

_mm256_insertf128_pd#

_mm256_insertf128_si256#

_mm256_insert_epi8#

_mm256_insert_epi16#

_mm256_insert_epi32#

_mm256_insert_epi64#

_mm256_unpackhi_pd#

_mm256_unpackhi_ps#

_mm256_unpacklo_pd#

_mm256_unpacklo_ps#

_mm256_broadcast_sd#

_mm256_broadcast_ps#

_mm256_broadcast_pd#

_mm256_extract_epi8#

_mm256_extract_epi16#

_mm256_blend_epi16#

_mm256_blend_epi32#

_mm256_blendv_epi8#

_mm256_broadcastb_epi8#

_mm256_broadcastd_epi32#

_mm256_broadcastq_epi64#

_mm256_broadcastsd_pd#

_mm256_broadcastsi128_si256#

_mm256_broadcastss_ps#

_mm256_broadcastw_epi16#

_mm256_extracti128_si256#

_mm256_inserti128_si256#

_mm256_permute2x128_si256#

_mm256_permute4x64_epi64#

_mm256_permute4x64_pd#

_mm256_permutevar8x32_epi32#

_mm256_permutevar8x32_ps#

_mm256_shuffle_epi32#

_mm256_shuffle_epi8#

_mm256_shufflehi_epi16#

_mm256_shufflelo_epi16#

_mm256_unpackhi_epi8#

_mm256_unpackhi_epi16#

_mm256_unpackhi_epi32#

_mm256_unpackhi_epi64#

_mm256_unpacklo_epi8#

_mm256_unpacklo_epi16#

_mm256_unpacklo_epi32#

_mm256_unpacklo_epi64#