AVX-512-Store-YMM

Contents

AVX-512-Store-YMM#

_mm256_mask_storeu_epi16#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __mmask16 k, __m256i a

Param ETypes:

UI16 mem_addr, MASK k, UI16 a

void _mm256_mask_storeu_epi16(void* mem_addr, __mmask16 k,
                              __m256i a)

Intel Description

Store packed 16-bit integers from “a” into memory using writemask “k”.

“mem_addr” does not need to be aligned on any particular boundary.

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        i := j*16
        IF k[j]
                MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
        FI
ENDFOR

_mm256_mask_storeu_epi8#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __mmask32 k, __m256i a

Param ETypes:

UI8 mem_addr, MASK k, UI8 a

void _mm256_mask_storeu_epi8(void* mem_addr, __mmask32 k,
                             __m256i a)

Intel Description

Store packed 8-bit integers from “a” into memory using writemask “k”.

“mem_addr” does not need to be aligned on any particular boundary.

Intel Implementation Psudeo-Code

FOR j := 0 to 31
        i := j*8
        IF k[j]
                MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
        FI
ENDFOR

_mm256_storeu_epi16#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __m256i a

Param ETypes:

UI16 mem_addr, UI16 a

void _mm256_storeu_epi16(void* mem_addr, __m256i a);

Intel Description

Store 256-bits (composed of 16 packed 16-bit integers) from “a” into memory.

“mem_addr” does not need to be aligned on any particular boundary.

Intel Implementation Psudeo-Code

MEM[mem_addr+255:mem_addr] := a[255:0]

_mm256_storeu_epi8#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __m256i a

Param ETypes:

UI8 mem_addr, UI8 a

void _mm256_storeu_epi8(void* mem_addr, __m256i a);

Intel Description

Store 256-bits (composed of 32 packed 8-bit integers) from “a” into memory.

“mem_addr” does not need to be aligned on any particular boundary.

Intel Implementation Psudeo-Code

MEM[mem_addr+255:mem_addr] := a[255:0]

_mm256_mask_cvtsepi16_storeu_epi8#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask16 k, __m256i a

Param ETypes:

SI8 base_addr, MASK k, SI16 a

void _mm256_mask_cvtsepi16_storeu_epi8(void* base_addr,
                                       __mmask16 k,
                                       __m256i a)

Intel Description

Convert packed signed 16-bit integers in “a” to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        i := 16*j
        l := 8*j
        IF k[j]
                MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i])
        FI
ENDFOR

_mm256_mask_cvtusepi16_storeu_epi8#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask16 k, __m256i a

Param ETypes:

UI8 base_addr, MASK k, UI16 a

void _mm256_mask_cvtusepi16_storeu_epi8(void* base_addr,
                                        __mmask16 k,
                                        __m256i a)

Intel Description

Convert packed unsigned 16-bit integers in “a” to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        i := 16*j
        l := 8*j
        IF k[j]
                MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i])
        FI
ENDFOR

_mm256_mask_cvtepi16_storeu_epi8#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask16 k, __m256i a

Param ETypes:

UI8 base_addr, MASK k, UI16 a

void _mm256_mask_cvtepi16_storeu_epi8(void* base_addr,
                                      __mmask16 k,
                                      __m256i a)

Intel Description

Convert packed 16-bit integers in “a” to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        i := 16*j
        l := 8*j
        IF k[j]
                MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i])
        FI
ENDFOR

_mm256_mask_compressstoreu_pd#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256d a

Param ETypes:

FP64 base_addr, MASK k, FP64 a

void _mm256_mask_compressstoreu_pd(void* base_addr,
                                   __mmask8 k, __m256d a)

Intel Description

Contiguously store the active double-precision (64-bit) floating-point elements in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

size := 64
m := base_addr
FOR j := 0 to 3
        i := j*64
        IF k[j]
                MEM[m+size-1:m] := a[i+63:i]
                m := m + size
        FI
ENDFOR

_mm256_mask_compressstoreu_ps#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256 a

Param ETypes:

FP32 base_addr, MASK k, FP32 a

void _mm256_mask_compressstoreu_ps(void* base_addr,
                                   __mmask8 k, __m256 a)

Intel Description

Contiguously store the active single-precision (32-bit) floating-point elements in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

size := 32
m := base_addr
FOR j := 0 to 7
        i := j*32
        IF k[j]
                MEM[m+size-1:m] := a[i+31:i]
                m := m + size
        FI
ENDFOR

_mm256_mask_store_pd#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __mmask8 k, __m256d a

Param ETypes:

FP64 mem_addr, MASK k, FP64 a

void _mm256_mask_store_pd(void* mem_addr, __mmask8 k,
                          __m256d a)

Intel Description

Store packed double-precision (64-bit) floating-point elements from “a” into memory using writemask “k”.

“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        IF k[j]
                MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
        FI
ENDFOR

_mm256_mask_store_ps#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __mmask8 k, __m256 a

Param ETypes:

FP32 mem_addr, MASK k, FP32 a

void _mm256_mask_store_ps(void* mem_addr, __mmask8 k,
                          __m256 a)

Intel Description

Store packed single-precision (32-bit) floating-point elements from “a” into memory using writemask “k”.

“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        IF k[j]
                MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
        FI
ENDFOR

_mm256_mask_store_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __mmask8 k, __m256i a

Param ETypes:

UI32 mem_addr, MASK k, UI32 a

void _mm256_mask_store_epi32(void* mem_addr, __mmask8 k,
                             __m256i a)

Intel Description

Store packed 32-bit integers from “a” into memory using writemask “k”.

“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        IF k[j]
                MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
        FI
ENDFOR

_mm256_mask_store_epi64#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __mmask8 k, __m256i a

Param ETypes:

UI64 mem_addr, MASK k, UI64 a

void _mm256_mask_store_epi64(void* mem_addr, __mmask8 k,
                             __m256i a)

Intel Description

Store packed 64-bit integers from “a” into memory using writemask “k”.

“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        IF k[j]
                MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
        FI
ENDFOR

_mm256_mask_storeu_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __mmask8 k, __m256i a

Param ETypes:

UI32 mem_addr, MASK k, UI32 a

void _mm256_mask_storeu_epi32(void* mem_addr, __mmask8 k,
                              __m256i a)

Intel Description

Store packed 32-bit integers from “a” into memory using writemask “k”.

“mem_addr” does not need to be aligned on any particular boundary.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        IF k[j]
                MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
        FI
ENDFOR

_mm256_mask_storeu_epi64#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __mmask8 k, __m256i a

Param ETypes:

UI64 mem_addr, MASK k, UI64 a

void _mm256_mask_storeu_epi64(void* mem_addr, __mmask8 k,
                              __m256i a)

Intel Description

Store packed 64-bit integers from “a” into memory using writemask “k”.

“mem_addr” does not need to be aligned on any particular boundary.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        IF k[j]
                MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
        FI
ENDFOR

_mm256_mask_storeu_pd#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __mmask8 k, __m256d a

Param ETypes:

FP64 mem_addr, MASK k, FP64 a

void _mm256_mask_storeu_pd(void* mem_addr, __mmask8 k,
                           __m256d a)

Intel Description

Store packed double-precision (64-bit) floating-point elements from “a” into memory using writemask “k”.

“mem_addr” does not need to be aligned on any particular boundary.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        IF k[j]
                MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
        FI
ENDFOR

_mm256_mask_storeu_ps#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __mmask8 k, __m256 a

Param ETypes:

FP32 mem_addr, MASK k, FP32 a

void _mm256_mask_storeu_ps(void* mem_addr, __mmask8 k,
                           __m256 a)

Intel Description

Store packed single-precision (32-bit) floating-point elements from “a” into memory using writemask “k”.

“mem_addr” does not need to be aligned on any particular boundary.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        IF k[j]
                MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
        FI
ENDFOR

_mm256_mask_compressstoreu_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI32 base_addr, MASK k, UI32 a

void _mm256_mask_compressstoreu_epi32(void* base_addr,
                                      __mmask8 k,
                                      __m256i a)

Intel Description

Contiguously store the active 32-bit integers in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

size := 32
m := base_addr
FOR j := 0 to 7
        i := j*32
        IF k[j]
                MEM[m+size-1:m] := a[i+31:i]
                m := m + size
        FI
ENDFOR

_mm256_mask_compressstoreu_epi64#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI64 base_addr, MASK k, UI64 a

void _mm256_mask_compressstoreu_epi64(void* base_addr,
                                      __mmask8 k,
                                      __m256i a)

Intel Description

Contiguously store the active 64-bit integers in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

size := 64
m := base_addr
FOR j := 0 to 3
        i := j*64
        IF k[j]
                MEM[m+size-1:m] := a[i+63:i]
                m := m + size
        FI
ENDFOR

_mm256_i32scatter_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __m256i vindex, __m256i a, const int scale

Param ETypes:

UI32 base_addr, SI32 vindex, UI32 a, IMM scale

void _mm256_i32scatter_epi32(void* base_addr,
                             __m256i vindex, __m256i a,
                             const int scale)

Intel Description

Scatter 32-bit integers from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        m := j*32
        addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
        MEM[addr+31:addr] := a[i+31:i]
ENDFOR

_mm256_mask_i32scatter_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i vindex, __m256i a, const int scale

Param ETypes:

UI32 base_addr, MASK k, SI32 vindex, UI32 a, IMM scale

void _mm256_mask_i32scatter_epi32(void* base_addr,
                                  __mmask8 k,
                                  __m256i vindex, __m256i a,
                                  const int scale)

Intel Description

Scatter 32-bit integers from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        m := j*32
        IF k[j]
                addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
                MEM[addr+31:addr] := a[i+31:i]
        FI
ENDFOR

_mm256_i32scatter_epi64#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __m128i vindex, __m256i a, const int scale

Param ETypes:

UI64 base_addr, SI32 vindex, UI64 a, IMM scale

void _mm256_i32scatter_epi64(void* base_addr,
                             __m128i vindex, __m256i a,
                             const int scale)

Intel Description

Scatter 64-bit integers from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        m := j*32
        addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
        MEM[addr+63:addr] := a[i+63:i]
ENDFOR

_mm256_mask_i32scatter_epi64#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m128i vindex, __m256i a, const int scale

Param ETypes:

UI64 base_addr, MASK k, SI32 vindex, UI64 a, IMM scale

void _mm256_mask_i32scatter_epi64(void* base_addr,
                                  __mmask8 k,
                                  __m128i vindex, __m256i a,
                                  const int scale)

Intel Description

Scatter 64-bit integers from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        m := j*32
        IF k[j]
                addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
                MEM[addr+63:addr] := a[i+63:i]
        FI
ENDFOR

_mm256_i64scatter_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __m256i vindex, __m128i a, const int scale

Param ETypes:

UI32 base_addr, SI64 vindex, UI32 a, IMM scale

void _mm256_i64scatter_epi32(void* base_addr,
                             __m256i vindex, __m128i a,
                             const int scale)

Intel Description

Scatter 32-bit integers from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*32
        m := j*64
        addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
        MEM[addr+31:addr] := a[i+31:i]
ENDFOR

_mm256_mask_i64scatter_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i vindex, __m128i a, const int scale

Param ETypes:

UI32 base_addr, MASK k, SI64 vindex, UI32 a, IMM scale

void _mm256_mask_i64scatter_epi32(void* base_addr,
                                  __mmask8 k,
                                  __m256i vindex, __m128i a,
                                  const int scale)

Intel Description

Scatter 32-bit integers from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*32
        m := j*64
        IF k[j]
                addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
                MEM[addr+31:addr] := a[i+31:i]
        FI
ENDFOR

_mm256_i64scatter_epi64#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __m256i vindex, __m256i a, const int scale

Param ETypes:

UI64 base_addr, SI64 vindex, UI64 a, IMM scale

void _mm256_i64scatter_epi64(void* base_addr,
                             __m256i vindex, __m256i a,
                             const int scale)

Intel Description

Scatter 64-bit integers from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        m := j*64
        addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
        MEM[addr+63:addr] := a[i+63:i]
ENDFOR

_mm256_mask_i64scatter_epi64#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i vindex, __m256i a, const int scale

Param ETypes:

UI64 base_addr, MASK k, SI64 vindex, UI64 a, IMM scale

void _mm256_mask_i64scatter_epi64(void* base_addr,
                                  __mmask8 k,
                                  __m256i vindex, __m256i a,
                                  const int scale)

Intel Description

Scatter 64-bit integers from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        m := j*64
        IF k[j]
                addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
                MEM[addr+63:addr] := a[i+63:i]
        FI
ENDFOR

_mm256_i32scatter_pd#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __m128i vindex, __m256d a, const int scale

Param ETypes:

FP64 base_addr, SI32 vindex, FP64 a, IMM scale

void _mm256_i32scatter_pd(void* base_addr, __m128i vindex,
                          __m256d a, const int scale)

Intel Description

Scatter double-precision (64-bit) floating-point elements from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        m := j*32
        addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
        MEM[addr+63:addr] := a[i+63:i]
ENDFOR

_mm256_mask_i32scatter_pd#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m128i vindex, __m256d a, const int scale

Param ETypes:

FP64 base_addr, MASK k, SI32 vindex, FP64 a, IMM scale

void _mm256_mask_i32scatter_pd(void* base_addr, __mmask8 k,
                               __m128i vindex, __m256d a,
                               const int scale)

Intel Description

Scatter double-precision (64-bit) floating-point elements from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        m := j*32
        IF k[j]
                addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
                MEM[addr+63:addr] := a[i+63:i]
        FI
ENDFOR

_mm256_i32scatter_ps#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __m256i vindex, __m256 a, const int scale

Param ETypes:

FP32 base_addr, SI32 vindex, FP32 a, IMM scale

void _mm256_i32scatter_ps(void* base_addr, __m256i vindex,
                          __m256 a, const int scale)

Intel Description

Scatter single-precision (32-bit) floating-point elements from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        m := j*32
        addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
        MEM[addr+31:addr] := a[i+31:i]
ENDFOR

_mm256_mask_i32scatter_ps#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i vindex, __m256 a, const int scale

Param ETypes:

FP32 base_addr, MASK k, SI32 vindex, FP32 a, IMM scale

void _mm256_mask_i32scatter_ps(void* base_addr, __mmask8 k,
                               __m256i vindex, __m256 a,
                               const int scale)

Intel Description

Scatter single-precision (32-bit) floating-point elements from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := j*32
        m := j*32
        IF k[j]
                addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
                MEM[addr+31:addr] := a[i+31:i]
        FI
ENDFOR

_mm256_i64scatter_pd#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __m256i vindex, __m256d a, const int scale

Param ETypes:

FP64 base_addr, SI64 vindex, FP64 a, IMM scale

void _mm256_i64scatter_pd(void* base_addr, __m256i vindex,
                          __m256d a, const int scale)

Intel Description

Scatter double-precision (64-bit) floating-point elements from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        m := j*64
        addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
        MEM[addr+63:addr] := a[i+63:i]
ENDFOR

_mm256_mask_i64scatter_pd#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i vindex, __m256d a, const int scale

Param ETypes:

FP64 base_addr, MASK k, SI64 vindex, FP64 a, IMM scale

void _mm256_mask_i64scatter_pd(void* base_addr, __mmask8 k,
                               __m256i vindex, __m256d a,
                               const int scale)

Intel Description

Scatter double-precision (64-bit) floating-point elements from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*64
        m := j*64
        IF k[j]
                addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
                MEM[addr+63:addr] := a[i+63:i]
        FI
ENDFOR

_mm256_i64scatter_ps#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __m256i vindex, __m128 a, const int scale

Param ETypes:

FP32 base_addr, SI64 vindex, FP32 a, IMM scale

void _mm256_i64scatter_ps(void* base_addr, __m256i vindex,
                          __m128 a, const int scale)

Intel Description

Scatter single-precision (32-bit) floating-point elements from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*32
        m := j*64
        addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
        MEM[addr+31:addr] := a[i+31:i]
ENDFOR

_mm256_mask_i64scatter_ps#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i vindex, __m128 a, const int scale

Param ETypes:

FP32 base_addr, MASK k, SI64 vindex, FP32 a, IMM scale

void _mm256_mask_i64scatter_ps(void* base_addr, __mmask8 k,
                               __m256i vindex, __m128 a,
                               const int scale)

Intel Description

Scatter single-precision (32-bit) floating-point elements from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := j*32
        m := j*64
        IF k[j]
                addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
                MEM[addr+31:addr] := a[i+31:i]
        FI
ENDFOR

_mm256_storeu_epi64#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __m256i a

Param ETypes:

UI64 mem_addr, UI64 a

void _mm256_storeu_epi64(void* mem_addr, __m256i a);

Intel Description

Store 256-bits (composed of 4 packed 64-bit integers) from “a” into memory.

“mem_addr” does not need to be aligned on any particular boundary.

Intel Implementation Psudeo-Code

MEM[mem_addr+255:mem_addr] := a[255:0]

_mm256_storeu_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __m256i a

Param ETypes:

UI32 mem_addr, UI32 a

void _mm256_storeu_epi32(void* mem_addr, __m256i a);

Intel Description

Store 256-bits (composed of 8 packed 32-bit integers) from “a” into memory.

“mem_addr” does not need to be aligned on any particular boundary.

Intel Implementation Psudeo-Code

MEM[mem_addr+255:mem_addr] := a[255:0]

_mm256_store_epi64#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __m256i a

Param ETypes:

UI64 mem_addr, UI64 a

void _mm256_store_epi64(void* mem_addr, __m256i a);

Intel Description

Store 256-bits (composed of 4 packed 64-bit integers) from “a” into memory.

“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.

Intel Implementation Psudeo-Code

MEM[mem_addr+255:mem_addr] := a[255:0]

_mm256_store_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* mem_addr, __m256i a

Param ETypes:

UI32 mem_addr, UI32 a

void _mm256_store_epi32(void* mem_addr, __m256i a);

Intel Description

Store 256-bits (composed of 8 packed 32-bit integers) from “a” into memory.

“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.

Intel Implementation Psudeo-Code

MEM[mem_addr+255:mem_addr] := a[255:0]

_mm256_mask_cvtepi32_storeu_epi8#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI8 base_addr, MASK k, UI32 a

void _mm256_mask_cvtepi32_storeu_epi8(void* base_addr,
                                      __mmask8 k,
                                      __m256i a)

Intel Description

Convert packed 32-bit integers in “a” to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := 32*j
        l := 8*j
        IF k[j]
                MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i])
        FI
ENDFOR

_mm256_mask_cvtepi32_storeu_epi16#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI16 base_addr, MASK k, UI32 a

void _mm256_mask_cvtepi32_storeu_epi16(void* base_addr,
                                       __mmask8 k,
                                       __m256i a)

Intel Description

Convert packed 32-bit integers in “a” to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := 32*j
        l := 16*j
        IF k[j]
                MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i])
        FI
ENDFOR

_mm256_mask_cvtepi64_storeu_epi8#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI8 base_addr, MASK k, UI64 a

void _mm256_mask_cvtepi64_storeu_epi8(void* base_addr,
                                      __mmask8 k,
                                      __m256i a)

Intel Description

Convert packed 64-bit integers in “a” to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := 64*j
        l := 8*j
        IF k[j]
                MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i])
        FI
ENDFOR

_mm256_mask_cvtepi64_storeu_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI32 base_addr, MASK k, UI64 a

void _mm256_mask_cvtepi64_storeu_epi32(void* base_addr,
                                       __mmask8 k,
                                       __m256i a)

Intel Description

Convert packed 64-bit integers in “a” to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := 64*j
        l := 32*j
        IF k[j]
                MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i])
        FI
ENDFOR

_mm256_mask_cvtepi64_storeu_epi16#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI16 base_addr, MASK k, UI64 a

void _mm256_mask_cvtepi64_storeu_epi16(void* base_addr,
                                       __mmask8 k,
                                       __m256i a)

Intel Description

Convert packed 64-bit integers in “a” to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := 64*j
        l := 16*j
        IF k[j]
                MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i])
        FI
ENDFOR

_mm256_mask_cvtsepi32_storeu_epi8#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

SI8 base_addr, MASK k, SI32 a

void _mm256_mask_cvtsepi32_storeu_epi8(void* base_addr,
                                       __mmask8 k,
                                       __m256i a)

Intel Description

Convert packed signed 32-bit integers in “a” to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := 32*j
        l := 8*j
        IF k[j]
                MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i])
        FI
ENDFOR

_mm256_mask_cvtsepi32_storeu_epi16#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

SI16 base_addr, MASK k, SI32 a

void _mm256_mask_cvtsepi32_storeu_epi16(void* base_addr,
                                        __mmask8 k,
                                        __m256i a)

Intel Description

Convert packed signed 32-bit integers in “a” to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := 32*j
        l := 16*j
        IF k[j]
                MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i])
        FI
ENDFOR

_mm256_mask_cvtsepi64_storeu_epi8#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

SI8 base_addr, MASK k, SI64 a

void _mm256_mask_cvtsepi64_storeu_epi8(void* base_addr,
                                       __mmask8 k,
                                       __m256i a)

Intel Description

Convert packed signed 64-bit integers in “a” to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := 64*j
        l := 8*j
        IF k[j]
                MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i])
        FI
ENDFOR

_mm256_mask_cvtsepi64_storeu_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

SI32 base_addr, MASK k, SI64 a

void _mm256_mask_cvtsepi64_storeu_epi32(void* base_addr,
                                        __mmask8 k,
                                        __m256i a)

Intel Description

Convert packed signed 64-bit integers in “a” to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := 64*j
        l := 32*j
        IF k[j]
                MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i])
        FI
ENDFOR

_mm256_mask_cvtsepi64_storeu_epi16#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

SI16 base_addr, MASK k, SI64 a

void _mm256_mask_cvtsepi64_storeu_epi16(void* base_addr,
                                        __mmask8 k,
                                        __m256i a)

Intel Description

Convert packed signed 64-bit integers in “a” to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := 64*j
        l := 16*j
        IF k[j]
                MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i])
        FI
ENDFOR

_mm256_mask_cvtusepi32_storeu_epi8#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI8 base_addr, MASK k, UI32 a

void _mm256_mask_cvtusepi32_storeu_epi8(void* base_addr,
                                        __mmask8 k,
                                        __m256i a)

Intel Description

Convert packed unsigned 32-bit integers in “a” to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := 32*j
        l := 8*j
        IF k[j]
                MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i])
        FI
ENDFOR

_mm256_mask_cvtusepi32_storeu_epi16#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI16 base_addr, MASK k, UI32 a

void _mm256_mask_cvtusepi32_storeu_epi16(void* base_addr,
                                         __mmask8 k,
                                         __m256i a)

Intel Description

Convert packed unsigned 32-bit integers in “a” to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        i := 32*j
        l := 16*j
        IF k[j]
                MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i])
        FI
ENDFOR

_mm256_mask_cvtusepi64_storeu_epi8#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI8 base_addr, MASK k, UI64 a

void _mm256_mask_cvtusepi64_storeu_epi8(void* base_addr,
                                        __mmask8 k,
                                        __m256i a)

Intel Description

Convert packed unsigned 64-bit integers in “a” to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := 64*j
        l := 8*j
        IF k[j]
                MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i])
        FI
ENDFOR

_mm256_mask_cvtusepi64_storeu_epi32#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI32 base_addr, MASK k, UI64 a

void _mm256_mask_cvtusepi64_storeu_epi32(void* base_addr,
                                         __mmask8 k,
                                         __m256i a)

Intel Description

Convert packed unsigned 64-bit integers in “a” to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := 64*j
        l := 32*j
        IF k[j]
                MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i])
        FI
ENDFOR

_mm256_mask_cvtusepi64_storeu_epi16#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void* base_addr, __mmask8 k, __m256i a

Param ETypes:

UI16 base_addr, MASK k, UI64 a

void _mm256_mask_cvtusepi64_storeu_epi16(void* base_addr,
                                         __mmask8 k,
                                         __m256i a)

Intel Description

Convert packed unsigned 64-bit integers in “a” to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.

Intel Implementation Psudeo-Code

FOR j := 0 to 3
        i := 64*j
        l := 16*j
        IF k[j]
                MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i])
        FI
ENDFOR

_mm256_store_ph#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void * mem_addr, __m256h a

Param ETypes:

FP16 mem_addr, FP16 a

void _mm256_store_ph(void * mem_addr, __m256h a);

Intel Description

Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from “a” into memory.

“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.

Intel Implementation Psudeo-Code

MEM[mem_addr+255:mem_addr] := a[255:0]

_mm256_storeu_ph#

Tech:

AVX-512

Category:

Store

Header:

immintrin.h

Searchable:

AVX-512-Store-YMM

Register:

YMM 256 bit

Return Type:

void

Param Types:

void * mem_addr, __m256h a

Param ETypes:

FP16 mem_addr, FP16 a

void _mm256_storeu_ph(void * mem_addr, __m256h a);

Intel Description

Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from “a” into memory.

“mem_addr” does not need to be aligned on any particular boundary.

Intel Implementation Psudeo-Code

MEM[mem_addr+255:mem_addr] := a[255:0]