AVX-512-Store-YMM#
_mm256_mask_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask16 k, __m256i a
- Param ETypes:
UI16 mem_addr, MASK k, UI16 a
void _mm256_mask_storeu_epi16(void* mem_addr, __mmask16 k,
__m256i a)
Intel Description
- Store packed 16-bit integers from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 15
i := j*16
IF k[j]
MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
FI
ENDFOR
_mm256_mask_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask32 k, __m256i a
- Param ETypes:
UI8 mem_addr, MASK k, UI8 a
void _mm256_mask_storeu_epi8(void* mem_addr, __mmask32 k,
__m256i a)
Intel Description
- Store packed 8-bit integers from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 31
i := j*8
IF k[j]
MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
FI
ENDFOR
_mm256_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m256i a
- Param ETypes:
UI16 mem_addr, UI16 a
void _mm256_storeu_epi16(void* mem_addr, __m256i a);
Intel Description
- Store 256-bits (composed of 16 packed 16-bit integers) from “a” into memory.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
MEM[mem_addr+255:mem_addr] := a[255:0]
_mm256_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m256i a
- Param ETypes:
UI8 mem_addr, UI8 a
void _mm256_storeu_epi8(void* mem_addr, __m256i a);
Intel Description
- Store 256-bits (composed of 32 packed 8-bit integers) from “a” into memory.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
MEM[mem_addr+255:mem_addr] := a[255:0]
_mm256_mask_cvtsepi16_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask16 k, __m256i a
- Param ETypes:
SI8 base_addr, MASK k, SI16 a
void _mm256_mask_cvtsepi16_storeu_epi8(void* base_addr,
__mmask16 k,
__m256i a)
Intel Description
Convert packed signed 16-bit integers in “a” to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 15
i := 16*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i])
FI
ENDFOR
_mm256_mask_cvtusepi16_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask16 k, __m256i a
- Param ETypes:
UI8 base_addr, MASK k, UI16 a
void _mm256_mask_cvtusepi16_storeu_epi8(void* base_addr,
__mmask16 k,
__m256i a)
Intel Description
Convert packed unsigned 16-bit integers in “a” to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 15
i := 16*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i])
FI
ENDFOR
_mm256_mask_cvtepi16_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask16 k, __m256i a
- Param ETypes:
UI8 base_addr, MASK k, UI16 a
void _mm256_mask_cvtepi16_storeu_epi8(void* base_addr,
__mmask16 k,
__m256i a)
Intel Description
Convert packed 16-bit integers in “a” to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 15
i := 16*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i])
FI
ENDFOR
_mm256_mask_compressstoreu_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256d a
- Param ETypes:
FP64 base_addr, MASK k, FP64 a
void _mm256_mask_compressstoreu_pd(void* base_addr,
__mmask8 k, __m256d a)
Intel Description
Contiguously store the active double-precision (64-bit) floating-point elements in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
size := 64
m := base_addr
FOR j := 0 to 3
i := j*64
IF k[j]
MEM[m+size-1:m] := a[i+63:i]
m := m + size
FI
ENDFOR
_mm256_mask_compressstoreu_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256 a
- Param ETypes:
FP32 base_addr, MASK k, FP32 a
void _mm256_mask_compressstoreu_ps(void* base_addr,
__mmask8 k, __m256 a)
Intel Description
Contiguously store the active single-precision (32-bit) floating-point elements in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
size := 32
m := base_addr
FOR j := 0 to 7
i := j*32
IF k[j]
MEM[m+size-1:m] := a[i+31:i]
m := m + size
FI
ENDFOR
_mm256_mask_store_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m256d a
- Param ETypes:
FP64 mem_addr, MASK k, FP64 a
void _mm256_mask_store_pd(void* mem_addr, __mmask8 k,
__m256d a)
Intel Description
- Store packed double-precision (64-bit) floating-point elements from “a” into memory using writemask “k”.
“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
IF k[j]
MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
FI
ENDFOR
_mm256_mask_store_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m256 a
- Param ETypes:
FP32 mem_addr, MASK k, FP32 a
void _mm256_mask_store_ps(void* mem_addr, __mmask8 k,
__m256 a)
Intel Description
- Store packed single-precision (32-bit) floating-point elements from “a” into memory using writemask “k”.
“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := j*32
IF k[j]
MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
FI
ENDFOR
_mm256_mask_store_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m256i a
- Param ETypes:
UI32 mem_addr, MASK k, UI32 a
void _mm256_mask_store_epi32(void* mem_addr, __mmask8 k,
__m256i a)
Intel Description
- Store packed 32-bit integers from “a” into memory using writemask “k”.
“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := j*32
IF k[j]
MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
FI
ENDFOR
_mm256_mask_store_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m256i a
- Param ETypes:
UI64 mem_addr, MASK k, UI64 a
void _mm256_mask_store_epi64(void* mem_addr, __mmask8 k,
__m256i a)
Intel Description
- Store packed 64-bit integers from “a” into memory using writemask “k”.
“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
IF k[j]
MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
FI
ENDFOR
_mm256_mask_storeu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m256i a
- Param ETypes:
UI32 mem_addr, MASK k, UI32 a
void _mm256_mask_storeu_epi32(void* mem_addr, __mmask8 k,
__m256i a)
Intel Description
- Store packed 32-bit integers from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := j*32
IF k[j]
MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
FI
ENDFOR
_mm256_mask_storeu_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m256i a
- Param ETypes:
UI64 mem_addr, MASK k, UI64 a
void _mm256_mask_storeu_epi64(void* mem_addr, __mmask8 k,
__m256i a)
Intel Description
- Store packed 64-bit integers from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
IF k[j]
MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
FI
ENDFOR
_mm256_mask_storeu_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m256d a
- Param ETypes:
FP64 mem_addr, MASK k, FP64 a
void _mm256_mask_storeu_pd(void* mem_addr, __mmask8 k,
__m256d a)
Intel Description
- Store packed double-precision (64-bit) floating-point elements from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
IF k[j]
MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
FI
ENDFOR
_mm256_mask_storeu_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m256 a
- Param ETypes:
FP32 mem_addr, MASK k, FP32 a
void _mm256_mask_storeu_ps(void* mem_addr, __mmask8 k,
__m256 a)
Intel Description
- Store packed single-precision (32-bit) floating-point elements from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := j*32
IF k[j]
MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
FI
ENDFOR
_mm256_mask_compressstoreu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI32 base_addr, MASK k, UI32 a
void _mm256_mask_compressstoreu_epi32(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Contiguously store the active 32-bit integers in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
size := 32
m := base_addr
FOR j := 0 to 7
i := j*32
IF k[j]
MEM[m+size-1:m] := a[i+31:i]
m := m + size
FI
ENDFOR
_mm256_mask_compressstoreu_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI64 base_addr, MASK k, UI64 a
void _mm256_mask_compressstoreu_epi64(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Contiguously store the active 64-bit integers in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
size := 64
m := base_addr
FOR j := 0 to 3
i := j*64
IF k[j]
MEM[m+size-1:m] := a[i+63:i]
m := m + size
FI
ENDFOR
_mm256_i32scatter_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __m256i vindex, __m256i a, const int scale
- Param ETypes:
UI32 base_addr, SI32 vindex, UI32 a, IMM scale
void _mm256_i32scatter_epi32(void* base_addr,
__m256i vindex, __m256i a,
const int scale)
Intel Description
Scatter 32-bit integers from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := j*32
m := j*32
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
ENDFOR
_mm256_mask_i32scatter_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i vindex, __m256i a, const int scale
- Param ETypes:
UI32 base_addr, MASK k, SI32 vindex, UI32 a, IMM scale
void _mm256_mask_i32scatter_epi32(void* base_addr,
__mmask8 k,
__m256i vindex, __m256i a,
const int scale)
Intel Description
Scatter 32-bit integers from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := j*32
m := j*32
IF k[j]
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
FI
ENDFOR
_mm256_i32scatter_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __m128i vindex, __m256i a, const int scale
- Param ETypes:
UI64 base_addr, SI32 vindex, UI64 a, IMM scale
void _mm256_i32scatter_epi64(void* base_addr,
__m128i vindex, __m256i a,
const int scale)
Intel Description
Scatter 64-bit integers from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
m := j*32
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
ENDFOR
_mm256_mask_i32scatter_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i vindex, __m256i a, const int scale
- Param ETypes:
UI64 base_addr, MASK k, SI32 vindex, UI64 a, IMM scale
void _mm256_mask_i32scatter_epi64(void* base_addr,
__mmask8 k,
__m128i vindex, __m256i a,
const int scale)
Intel Description
Scatter 64-bit integers from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
m := j*32
IF k[j]
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
FI
ENDFOR
_mm256_i64scatter_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __m256i vindex, __m128i a, const int scale
- Param ETypes:
UI32 base_addr, SI64 vindex, UI32 a, IMM scale
void _mm256_i64scatter_epi32(void* base_addr,
__m256i vindex, __m128i a,
const int scale)
Intel Description
Scatter 32-bit integers from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
m := j*64
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
ENDFOR
_mm256_mask_i64scatter_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i vindex, __m128i a, const int scale
- Param ETypes:
UI32 base_addr, MASK k, SI64 vindex, UI32 a, IMM scale
void _mm256_mask_i64scatter_epi32(void* base_addr,
__mmask8 k,
__m256i vindex, __m128i a,
const int scale)
Intel Description
Scatter 32-bit integers from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
m := j*64
IF k[j]
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
FI
ENDFOR
_mm256_i64scatter_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __m256i vindex, __m256i a, const int scale
- Param ETypes:
UI64 base_addr, SI64 vindex, UI64 a, IMM scale
void _mm256_i64scatter_epi64(void* base_addr,
__m256i vindex, __m256i a,
const int scale)
Intel Description
Scatter 64-bit integers from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
m := j*64
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
ENDFOR
_mm256_mask_i64scatter_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i vindex, __m256i a, const int scale
- Param ETypes:
UI64 base_addr, MASK k, SI64 vindex, UI64 a, IMM scale
void _mm256_mask_i64scatter_epi64(void* base_addr,
__mmask8 k,
__m256i vindex, __m256i a,
const int scale)
Intel Description
Scatter 64-bit integers from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
m := j*64
IF k[j]
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
FI
ENDFOR
_mm256_i32scatter_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __m128i vindex, __m256d a, const int scale
- Param ETypes:
FP64 base_addr, SI32 vindex, FP64 a, IMM scale
void _mm256_i32scatter_pd(void* base_addr, __m128i vindex,
__m256d a, const int scale)
Intel Description
Scatter double-precision (64-bit) floating-point elements from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
m := j*32
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
ENDFOR
_mm256_mask_i32scatter_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i vindex, __m256d a, const int scale
- Param ETypes:
FP64 base_addr, MASK k, SI32 vindex, FP64 a, IMM scale
void _mm256_mask_i32scatter_pd(void* base_addr, __mmask8 k,
__m128i vindex, __m256d a,
const int scale)
Intel Description
Scatter double-precision (64-bit) floating-point elements from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
m := j*32
IF k[j]
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
FI
ENDFOR
_mm256_i32scatter_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __m256i vindex, __m256 a, const int scale
- Param ETypes:
FP32 base_addr, SI32 vindex, FP32 a, IMM scale
void _mm256_i32scatter_ps(void* base_addr, __m256i vindex,
__m256 a, const int scale)
Intel Description
Scatter single-precision (32-bit) floating-point elements from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := j*32
m := j*32
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
ENDFOR
_mm256_mask_i32scatter_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i vindex, __m256 a, const int scale
- Param ETypes:
FP32 base_addr, MASK k, SI32 vindex, FP32 a, IMM scale
void _mm256_mask_i32scatter_ps(void* base_addr, __mmask8 k,
__m256i vindex, __m256 a,
const int scale)
Intel Description
Scatter single-precision (32-bit) floating-point elements from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := j*32
m := j*32
IF k[j]
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
FI
ENDFOR
_mm256_i64scatter_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __m256i vindex, __m256d a, const int scale
- Param ETypes:
FP64 base_addr, SI64 vindex, FP64 a, IMM scale
void _mm256_i64scatter_pd(void* base_addr, __m256i vindex,
__m256d a, const int scale)
Intel Description
Scatter double-precision (64-bit) floating-point elements from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
m := j*64
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
ENDFOR
_mm256_mask_i64scatter_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i vindex, __m256d a, const int scale
- Param ETypes:
FP64 base_addr, MASK k, SI64 vindex, FP64 a, IMM scale
void _mm256_mask_i64scatter_pd(void* base_addr, __mmask8 k,
__m256i vindex, __m256d a,
const int scale)
Intel Description
Scatter double-precision (64-bit) floating-point elements from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*64
m := j*64
IF k[j]
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
FI
ENDFOR
_mm256_i64scatter_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __m256i vindex, __m128 a, const int scale
- Param ETypes:
FP32 base_addr, SI64 vindex, FP32 a, IMM scale
void _mm256_i64scatter_ps(void* base_addr, __m256i vindex,
__m128 a, const int scale)
Intel Description
Scatter single-precision (32-bit) floating-point elements from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
m := j*64
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
ENDFOR
_mm256_mask_i64scatter_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i vindex, __m128 a, const int scale
- Param ETypes:
FP32 base_addr, MASK k, SI64 vindex, FP32 a, IMM scale
void _mm256_mask_i64scatter_ps(void* base_addr, __mmask8 k,
__m256i vindex, __m128 a,
const int scale)
Intel Description
Scatter single-precision (32-bit) floating-point elements from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
m := j*64
IF k[j]
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
FI
ENDFOR
_mm256_storeu_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m256i a
- Param ETypes:
UI64 mem_addr, UI64 a
void _mm256_storeu_epi64(void* mem_addr, __m256i a);
Intel Description
- Store 256-bits (composed of 4 packed 64-bit integers) from “a” into memory.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
MEM[mem_addr+255:mem_addr] := a[255:0]
_mm256_storeu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m256i a
- Param ETypes:
UI32 mem_addr, UI32 a
void _mm256_storeu_epi32(void* mem_addr, __m256i a);
Intel Description
- Store 256-bits (composed of 8 packed 32-bit integers) from “a” into memory.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
MEM[mem_addr+255:mem_addr] := a[255:0]
_mm256_store_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m256i a
- Param ETypes:
UI64 mem_addr, UI64 a
void _mm256_store_epi64(void* mem_addr, __m256i a);
Intel Description
- Store 256-bits (composed of 4 packed 64-bit integers) from “a” into memory.
“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
MEM[mem_addr+255:mem_addr] := a[255:0]
_mm256_store_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m256i a
- Param ETypes:
UI32 mem_addr, UI32 a
void _mm256_store_epi32(void* mem_addr, __m256i a);
Intel Description
- Store 256-bits (composed of 8 packed 32-bit integers) from “a” into memory.
“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
MEM[mem_addr+255:mem_addr] := a[255:0]
_mm256_mask_cvtepi32_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI8 base_addr, MASK k, UI32 a
void _mm256_mask_cvtepi32_storeu_epi8(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed 32-bit integers in “a” to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := 32*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i])
FI
ENDFOR
_mm256_mask_cvtepi32_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI16 base_addr, MASK k, UI32 a
void _mm256_mask_cvtepi32_storeu_epi16(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed 32-bit integers in “a” to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := 32*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i])
FI
ENDFOR
_mm256_mask_cvtepi64_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI8 base_addr, MASK k, UI64 a
void _mm256_mask_cvtepi64_storeu_epi8(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed 64-bit integers in “a” to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 64*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i])
FI
ENDFOR
_mm256_mask_cvtepi64_storeu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI32 base_addr, MASK k, UI64 a
void _mm256_mask_cvtepi64_storeu_epi32(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed 64-bit integers in “a” to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 64*j
l := 32*j
IF k[j]
MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i])
FI
ENDFOR
_mm256_mask_cvtepi64_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI16 base_addr, MASK k, UI64 a
void _mm256_mask_cvtepi64_storeu_epi16(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed 64-bit integers in “a” to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 64*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i])
FI
ENDFOR
_mm256_mask_cvtsepi32_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
SI8 base_addr, MASK k, SI32 a
void _mm256_mask_cvtsepi32_storeu_epi8(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed signed 32-bit integers in “a” to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := 32*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i])
FI
ENDFOR
_mm256_mask_cvtsepi32_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
SI16 base_addr, MASK k, SI32 a
void _mm256_mask_cvtsepi32_storeu_epi16(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed signed 32-bit integers in “a” to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := 32*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i])
FI
ENDFOR
_mm256_mask_cvtsepi64_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
SI8 base_addr, MASK k, SI64 a
void _mm256_mask_cvtsepi64_storeu_epi8(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed signed 64-bit integers in “a” to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 64*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i])
FI
ENDFOR
_mm256_mask_cvtsepi64_storeu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
SI32 base_addr, MASK k, SI64 a
void _mm256_mask_cvtsepi64_storeu_epi32(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed signed 64-bit integers in “a” to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 64*j
l := 32*j
IF k[j]
MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i])
FI
ENDFOR
_mm256_mask_cvtsepi64_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
SI16 base_addr, MASK k, SI64 a
void _mm256_mask_cvtsepi64_storeu_epi16(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed signed 64-bit integers in “a” to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 64*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i])
FI
ENDFOR
_mm256_mask_cvtusepi32_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI8 base_addr, MASK k, UI32 a
void _mm256_mask_cvtusepi32_storeu_epi8(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed unsigned 32-bit integers in “a” to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := 32*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i])
FI
ENDFOR
_mm256_mask_cvtusepi32_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI16 base_addr, MASK k, UI32 a
void _mm256_mask_cvtusepi32_storeu_epi16(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed unsigned 32-bit integers in “a” to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := 32*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i])
FI
ENDFOR
_mm256_mask_cvtusepi64_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI8 base_addr, MASK k, UI64 a
void _mm256_mask_cvtusepi64_storeu_epi8(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed unsigned 64-bit integers in “a” to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 64*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i])
FI
ENDFOR
_mm256_mask_cvtusepi64_storeu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI32 base_addr, MASK k, UI64 a
void _mm256_mask_cvtusepi64_storeu_epi32(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed unsigned 64-bit integers in “a” to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 64*j
l := 32*j
IF k[j]
MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i])
FI
ENDFOR
_mm256_mask_cvtusepi64_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m256i a
- Param ETypes:
UI16 base_addr, MASK k, UI64 a
void _mm256_mask_cvtusepi64_storeu_epi16(void* base_addr,
__mmask8 k,
__m256i a)
Intel Description
Convert packed unsigned 64-bit integers in “a” to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 64*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i])
FI
ENDFOR
_mm256_store_ph#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void * mem_addr, __m256h a
- Param ETypes:
FP16 mem_addr, FP16 a
void _mm256_store_ph(void * mem_addr, __m256h a);
Intel Description
- Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from “a” into memory.
“mem_addr” must be aligned on a 32-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
MEM[mem_addr+255:mem_addr] := a[255:0]
_mm256_storeu_ph#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-YMM
- Register:
YMM 256 bit
- Return Type:
void
- Param Types:
void * mem_addr, __m256h a
- Param ETypes:
FP16 mem_addr, FP16 a
void _mm256_storeu_ph(void * mem_addr, __m256h a);
Intel Description
- Store 256-bits (composed of 16 packed half-precision (16-bit) floating-point elements) from “a” into memory.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
MEM[mem_addr+255:mem_addr] := a[255:0]