AVX-512-Store-XMM#
_mm_mask_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m128i a
- Param ETypes:
UI16 mem_addr, MASK k, UI16 a
void _mm_mask_storeu_epi16(void* mem_addr, __mmask8 k,
__m128i a)
Intel Description
- Store packed 16-bit integers from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := j*16
IF k[j]
MEM[mem_addr+i+15:mem_addr+i] := a[i+15:i]
FI
ENDFOR
_mm_mask_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask16 k, __m128i a
- Param ETypes:
UI8 mem_addr, MASK k, UI8 a
void _mm_mask_storeu_epi8(void* mem_addr, __mmask16 k,
__m128i a)
Intel Description
- Store packed 8-bit integers from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 15
i := j*8
IF k[j]
MEM[mem_addr+i+7:mem_addr+i] := a[i+7:i]
FI
ENDFOR
_mm_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m128i a
- Param ETypes:
UI16 mem_addr, UI16 a
void _mm_storeu_epi16(void* mem_addr, __m128i a);
Intel Description
- Store 128-bits (composed of 8 packed 16-bit integers) from “a” into memory.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
MEM[mem_addr+127:mem_addr] := a[127:0]
_mm_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m128i a
- Param ETypes:
UI8 mem_addr, UI8 a
void _mm_storeu_epi8(void* mem_addr, __m128i a);
Intel Description
- Store 128-bits (composed of 16 packed 8-bit integers) from “a” into memory.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
MEM[mem_addr+127:mem_addr] := a[127:0]
_mm_mask_cvtsepi16_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
SI8 base_addr, MASK k, SI16 a
void _mm_mask_cvtsepi16_storeu_epi8(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed signed 16-bit integers in “a” to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := 16*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+15:i])
FI
ENDFOR
_mm_mask_cvtusepi16_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI8 base_addr, MASK k, UI16 a
void _mm_mask_cvtusepi16_storeu_epi8(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed unsigned 16-bit integers in “a” to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := 16*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+15:i])
FI
ENDFOR
_mm_mask_cvtepi16_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI8 base_addr, MASK k, UI16 a
void _mm_mask_cvtepi16_storeu_epi8(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed 16-bit integers in “a” to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 7
i := 16*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+15:i])
FI
ENDFOR
_mm_mask_compressstoreu_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128d a
- Param ETypes:
FP64 base_addr, MASK k, FP64 a
void _mm_mask_compressstoreu_pd(void* base_addr, __mmask8 k,
__m128d a)
Intel Description
Contiguously store the active double-precision (64-bit) floating-point elements in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
size := 64
m := base_addr
FOR j := 0 to 1
i := j*64
IF k[j]
MEM[m+size-1:m] := a[i+63:i]
m := m + size
FI
ENDFOR
_mm_mask_compressstoreu_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128 a
- Param ETypes:
FP32 base_addr, MASK k, FP32 a
void _mm_mask_compressstoreu_ps(void* base_addr, __mmask8 k,
__m128 a)
Intel Description
Contiguously store the active single-precision (32-bit) floating-point elements in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
size := 32
m := base_addr
FOR j := 0 to 3
i := j*32
IF k[j]
MEM[m+size-1:m] := a[i+31:i]
m := m + size
FI
ENDFOR
_mm_mask_store_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m128d a
- Param ETypes:
FP64 mem_addr, MASK k, FP64 a
void _mm_mask_store_pd(void* mem_addr, __mmask8 k,
__m128d a)
Intel Description
- Store packed double-precision (64-bit) floating-point elements from “a” into memory using writemask “k”.
“mem_addr” must be aligned on a 16-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
IF k[j]
MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
FI
ENDFOR
_mm_mask_store_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m128 a
- Param ETypes:
FP32 mem_addr, MASK k, FP32 a
void _mm_mask_store_ps(void* mem_addr, __mmask8 k,
__m128 a)
Intel Description
- Store packed single-precision (32-bit) floating-point elements from “a” into memory using writemask “k”.
“mem_addr” must be aligned on a 16-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
IF k[j]
MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
FI
ENDFOR
_mm_mask_store_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m128i a
- Param ETypes:
UI32 mem_addr, MASK k, UI32 a
void _mm_mask_store_epi32(void* mem_addr, __mmask8 k,
__m128i a)
Intel Description
- Store packed 32-bit integers from “a” into memory using writemask “k”.
“mem_addr” must be aligned on a 16-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
IF k[j]
MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
FI
ENDFOR
_mm_mask_store_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m128i a
- Param ETypes:
UI64 mem_addr, MASK k, UI64 a
void _mm_mask_store_epi64(void* mem_addr, __mmask8 k,
__m128i a)
Intel Description
- Store packed 64-bit integers from “a” into memory using writemask “k”.
“mem_addr” must be aligned on a 16-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
IF k[j]
MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
FI
ENDFOR
_mm_mask_storeu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m128i a
- Param ETypes:
UI32 mem_addr, MASK k, UI32 a
void _mm_mask_storeu_epi32(void* mem_addr, __mmask8 k,
__m128i a)
Intel Description
- Store packed 32-bit integers from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
IF k[j]
MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
FI
ENDFOR
_mm_mask_storeu_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m128i a
- Param ETypes:
UI64 mem_addr, MASK k, UI64 a
void _mm_mask_storeu_epi64(void* mem_addr, __mmask8 k,
__m128i a)
Intel Description
- Store packed 64-bit integers from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
IF k[j]
MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
FI
ENDFOR
_mm_mask_storeu_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m128d a
- Param ETypes:
FP64 mem_addr, MASK k, FP64 a
void _mm_mask_storeu_pd(void* mem_addr, __mmask8 k,
__m128d a)
Intel Description
- Store packed double-precision (64-bit) floating-point elements from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
IF k[j]
MEM[mem_addr+i+63:mem_addr+i] := a[i+63:i]
FI
ENDFOR
_mm_mask_storeu_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __mmask8 k, __m128 a
- Param ETypes:
FP32 mem_addr, MASK k, FP32 a
void _mm_mask_storeu_ps(void* mem_addr, __mmask8 k,
__m128 a)
Intel Description
- Store packed single-precision (32-bit) floating-point elements from “a” into memory using writemask “k”.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
IF k[j]
MEM[mem_addr+i+31:mem_addr+i] := a[i+31:i]
FI
ENDFOR
_mm_mask_compressstoreu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI32 base_addr, MASK k, UI32 a
void _mm_mask_compressstoreu_epi32(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Contiguously store the active 32-bit integers in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
size := 32
m := base_addr
FOR j := 0 to 3
i := j*32
IF k[j]
MEM[m+size-1:m] := a[i+31:i]
m := m + size
FI
ENDFOR
_mm_mask_compressstoreu_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI64 base_addr, MASK k, UI64 a
void _mm_mask_compressstoreu_epi64(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Contiguously store the active 64-bit integers in “a” (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
size := 64
m := base_addr
FOR j := 0 to 1
i := j*64
IF k[j]
MEM[m+size-1:m] := a[i+63:i]
m := m + size
FI
ENDFOR
_mm_i32scatter_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __m128i vindex, __m128i a, const int scale
- Param ETypes:
UI32 base_addr, SI32 vindex, UI32 a, IMM scale
void _mm_i32scatter_epi32(void* base_addr, __m128i vindex,
__m128i a, const int scale)
Intel Description
Scatter 32-bit integers from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
m := j*32
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
ENDFOR
_mm_mask_i32scatter_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i vindex, __m128i a, const int scale
- Param ETypes:
UI32 base_addr, MASK k, SI32 vindex, UI32 a, IMM scale
void _mm_mask_i32scatter_epi32(void* base_addr, __mmask8 k,
__m128i vindex, __m128i a,
const int scale)
Intel Description
Scatter 32-bit integers from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
m := j*32
IF k[j]
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
FI
ENDFOR
_mm_i32scatter_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __m128i vindex, __m128i a, const int scale
- Param ETypes:
UI64 base_addr, SI32 vindex, UI64 a, IMM scale
void _mm_i32scatter_epi64(void* base_addr, __m128i vindex,
__m128i a, const int scale)
Intel Description
Scatter 64-bit integers from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
m := j*32
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
ENDFOR
_mm_mask_i32scatter_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i vindex, __m128i a, const int scale
- Param ETypes:
UI64 base_addr, MASK k, SI32 vindex, UI64 a, IMM scale
void _mm_mask_i32scatter_epi64(void* base_addr, __mmask8 k,
__m128i vindex, __m128i a,
const int scale)
Intel Description
Scatter 64-bit integers from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
m := j*32
IF k[j]
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
FI
ENDFOR
_mm_i64scatter_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __m128i vindex, __m128i a, const int scale
- Param ETypes:
UI32 base_addr, SI64 vindex, UI32 a, IMM scale
void _mm_i64scatter_epi32(void* base_addr, __m128i vindex,
__m128i a, const int scale)
Intel Description
Scatter 32-bit integers from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*32
m := j*64
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
ENDFOR
_mm_mask_i64scatter_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i vindex, __m128i a, const int scale
- Param ETypes:
UI32 base_addr, MASK k, SI64 vindex, UI32 a, IMM scale
void _mm_mask_i64scatter_epi32(void* base_addr, __mmask8 k,
__m128i vindex, __m128i a,
const int scale)
Intel Description
Scatter 32-bit integers from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*32
m := j*64
IF k[j]
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
FI
ENDFOR
_mm_i64scatter_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __m128i vindex, __m128i a, const int scale
- Param ETypes:
UI64 base_addr, SI64 vindex, UI64 a, IMM scale
void _mm_i64scatter_epi64(void* base_addr, __m128i vindex,
__m128i a, const int scale)
Intel Description
Scatter 64-bit integers from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
m := j*64
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
ENDFOR
_mm_mask_i64scatter_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i vindex, __m128i a, const int scale
- Param ETypes:
UI64 base_addr, MASK k, SI64 vindex, UI64 a, IMM scale
void _mm_mask_i64scatter_epi64(void* base_addr, __mmask8 k,
__m128i vindex, __m128i a,
const int scale)
Intel Description
Scatter 64-bit integers from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
m := j*64
IF k[j]
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
FI
ENDFOR
_mm_i32scatter_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __m128i vindex, __m128d a, const int scale
- Param ETypes:
FP64 base_addr, SI32 vindex, FP64 a, IMM scale
void _mm_i32scatter_pd(void* base_addr, __m128i vindex,
__m128d a, const int scale)
Intel Description
Scatter double-precision (64-bit) floating-point elements from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
m := j*32
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
ENDFOR
_mm_mask_i32scatter_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i vindex, __m128d a, const int scale
- Param ETypes:
FP64 base_addr, MASK k, SI32 vindex, FP64 a, IMM scale
void _mm_mask_i32scatter_pd(void* base_addr, __mmask8 k,
__m128i vindex, __m128d a,
const int scale)
Intel Description
Scatter double-precision (64-bit) floating-point elements from “a” into memory using 32-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
m := j*32
IF k[j]
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
FI
ENDFOR
_mm_i32scatter_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __m128i vindex, __m128 a, const int scale
- Param ETypes:
FP32 base_addr, SI32 vindex, FP32 a, IMM scale
void _mm_i32scatter_ps(void* base_addr, __m128i vindex,
__m128 a, const int scale)
Intel Description
Scatter single-precision (32-bit) floating-point elements from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
m := j*32
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
ENDFOR
_mm_mask_i32scatter_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i vindex, __m128 a, const int scale
- Param ETypes:
FP32 base_addr, MASK k, SI32 vindex, FP32 a, IMM scale
void _mm_mask_i32scatter_ps(void* base_addr, __mmask8 k,
__m128i vindex, __m128 a,
const int scale)
Intel Description
Scatter single-precision (32-bit) floating-point elements from “a” into memory using 32-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 32-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
m := j*32
IF k[j]
addr := base_addr + SignExtend64(vindex[m+31:m]) * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
FI
ENDFOR
_mm_i64scatter_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __m128i vindex, __m128d a, const int scale
- Param ETypes:
FP64 base_addr, SI64 vindex, FP64 a, IMM scale
void _mm_i64scatter_pd(void* base_addr, __m128i vindex,
__m128d a, const int scale)
Intel Description
Scatter double-precision (64-bit) floating-point elements from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
m := j*64
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
ENDFOR
_mm_mask_i64scatter_pd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i vindex, __m128d a, const int scale
- Param ETypes:
FP64 base_addr, MASK k, SI64 vindex, FP64 a, IMM scale
void _mm_mask_i64scatter_pd(void* base_addr, __mmask8 k,
__m128i vindex, __m128d a,
const int scale)
Intel Description
Scatter double-precision (64-bit) floating-point elements from “a” into memory using 64-bit indices. 64-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
m := j*64
IF k[j]
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+63:addr] := a[i+63:i]
FI
ENDFOR
_mm_i64scatter_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __m128i vindex, __m128 a, const int scale
- Param ETypes:
FP32 base_addr, SI64 vindex, FP32 a, IMM scale
void _mm_i64scatter_ps(void* base_addr, __m128i vindex,
__m128 a, const int scale)
Intel Description
Scatter single-precision (32-bit) floating-point elements from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*32
m := j*64
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
ENDFOR
_mm_mask_i64scatter_ps#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i vindex, __m128 a, const int scale
- Param ETypes:
FP32 base_addr, MASK k, SI64 vindex, FP32 a, IMM scale
void _mm_mask_i64scatter_ps(void* base_addr, __mmask8 k,
__m128i vindex, __m128 a,
const int scale)
Intel Description
Scatter single-precision (32-bit) floating-point elements from “a” into memory using 64-bit indices. 32-bit elements are stored at addresses starting at “base_addr” and offset by each 64-bit element in “vindex” (each index is scaled by the factor in “scale”) subject to mask “k” (elements are not stored when the corresponding mask bit is not set). “scale” should be 1, 2, 4 or 8.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*32
m := j*64
IF k[j]
addr := base_addr + vindex[m+63:m] * ZeroExtend64(scale) * 8
MEM[addr+31:addr] := a[i+31:i]
FI
ENDFOR
_mm_storeu_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m128i a
- Param ETypes:
UI64 mem_addr, UI64 a
void _mm_storeu_epi64(void* mem_addr, __m128i a);
Intel Description
- Store 128-bits (composed of 2 packed 64-bit integers) from “a” into memory.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
MEM[mem_addr+127:mem_addr] := a[127:0]
_mm_storeu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m128i a
- Param ETypes:
UI32 mem_addr, UI32 a
void _mm_storeu_epi32(void* mem_addr, __m128i a);
Intel Description
- Store 128-bits (composed of 4 packed 32-bit integers) from “a” into memory.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
MEM[mem_addr+127:mem_addr] := a[127:0]
_mm_store_epi64#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m128i a
- Param ETypes:
UI64 mem_addr, UI64 a
void _mm_store_epi64(void* mem_addr, __m128i a);
Intel Description
- Store 128-bits (composed of 2 packed 64-bit integers) from “a” into memory.
“mem_addr” must be aligned on a 16-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
MEM[mem_addr+127:mem_addr] := a[127:0]
_mm_store_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* mem_addr, __m128i a
- Param ETypes:
UI32 mem_addr, UI32 a
void _mm_store_epi32(void* mem_addr, __m128i a);
Intel Description
- Store 128-bits (composed of 4 packed 32-bit integers) from “a” into memory.
“mem_addr” must be aligned on a 16-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
MEM[mem_addr+127:mem_addr] := a[127:0]
_mm_mask_cvtepi32_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI8 base_addr, MASK k, UI32 a
void _mm_mask_cvtepi32_storeu_epi8(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed 32-bit integers in “a” to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 32*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+31:i])
FI
ENDFOR
_mm_mask_cvtepi32_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI16 base_addr, MASK k, UI32 a
void _mm_mask_cvtepi32_storeu_epi16(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed 32-bit integers in “a” to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 32*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+31:i])
FI
ENDFOR
_mm_mask_cvtepi64_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI8 base_addr, MASK k, UI64 a
void _mm_mask_cvtepi64_storeu_epi8(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed 64-bit integers in “a” to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := 64*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Truncate8(a[i+63:i])
FI
ENDFOR
_mm_mask_cvtepi64_storeu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI32 base_addr, MASK k, UI64 a
void _mm_mask_cvtepi64_storeu_epi32(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed 64-bit integers in “a” to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := 64*j
l := 32*j
IF k[j]
MEM[base_addr+l+31:base_addr+l] := Truncate32(a[i+63:i])
FI
ENDFOR
_mm_mask_cvtepi64_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI16 base_addr, MASK k, UI64 a
void _mm_mask_cvtepi64_storeu_epi16(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed 64-bit integers in “a” to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := 64*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := Truncate16(a[i+63:i])
FI
ENDFOR
_mm_mask_cvtsepi32_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
SI8 base_addr, MASK k, SI32 a
void _mm_mask_cvtsepi32_storeu_epi8(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed signed 32-bit integers in “a” to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 32*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+31:i])
FI
ENDFOR
_mm_mask_cvtsepi32_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
SI16 base_addr, MASK k, SI32 a
void _mm_mask_cvtsepi32_storeu_epi16(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed signed 32-bit integers in “a” to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 32*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+31:i])
FI
ENDFOR
_mm_mask_cvtsepi64_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
SI8 base_addr, MASK k, SI64 a
void _mm_mask_cvtsepi64_storeu_epi8(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed signed 64-bit integers in “a” to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := 64*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := Saturate8(a[i+63:i])
FI
ENDFOR
_mm_mask_cvtsepi64_storeu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
SI32 base_addr, MASK k, SI64 a
void _mm_mask_cvtsepi64_storeu_epi32(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed signed 64-bit integers in “a” to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := 64*j
l := 32*j
IF k[j]
MEM[base_addr+l+31:base_addr+l] := Saturate32(a[i+63:i])
FI
ENDFOR
_mm_mask_cvtsepi64_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
SI16 base_addr, MASK k, SI64 a
void _mm_mask_cvtsepi64_storeu_epi16(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed signed 64-bit integers in “a” to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := 64*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := Saturate16(a[i+63:i])
FI
ENDFOR
_mm_mask_cvtusepi32_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI8 base_addr, MASK k, UI32 a
void _mm_mask_cvtusepi32_storeu_epi8(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed unsigned 32-bit integers in “a” to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 32*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+31:i])
FI
ENDFOR
_mm_mask_cvtusepi32_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI16 base_addr, MASK k, UI32 a
void _mm_mask_cvtusepi32_storeu_epi16(void* base_addr,
__mmask8 k,
__m128i a)
Intel Description
Convert packed unsigned 32-bit integers in “a” to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := 32*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+31:i])
FI
ENDFOR
_mm_mask_cvtusepi64_storeu_epi8#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI8 base_addr, MASK k, UI64 a
void _mm_mask_cvtusepi64_storeu_epi8(void* base_addr,
__mmask8 k, __m128i a)
Intel Description
Convert packed unsigned 64-bit integers in “a” to packed unsigned 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := 64*j
l := 8*j
IF k[j]
MEM[base_addr+l+7:base_addr+l] := SaturateU8(a[i+63:i])
FI
ENDFOR
_mm_mask_cvtusepi64_storeu_epi32#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI32 base_addr, MASK k, UI64 a
void _mm_mask_cvtusepi64_storeu_epi32(void* base_addr,
__mmask8 k,
__m128i a)
Intel Description
Convert packed unsigned 64-bit integers in “a” to packed unsigned 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := 64*j
l := 32*j
IF k[j]
MEM[base_addr+l+31:base_addr+l] := SaturateU32(a[i+63:i])
FI
ENDFOR
_mm_mask_cvtusepi64_storeu_epi16#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void* base_addr, __mmask8 k, __m128i a
- Param ETypes:
UI16 base_addr, MASK k, UI64 a
void _mm_mask_cvtusepi64_storeu_epi16(void* base_addr,
__mmask8 k,
__m128i a)
Intel Description
Convert packed unsigned 64-bit integers in “a” to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask “k”) to unaligned memory at “base_addr”.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := 64*j
l := 16*j
IF k[j]
MEM[base_addr+l+15:base_addr+l] := SaturateU16(a[i+63:i])
FI
ENDFOR
_mm_mask_store_sd#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
double* mem_addr, __mmask8 k, __m128d a
- Param ETypes:
FP64 mem_addr, MASK k, FP64 a
void _mm_mask_store_sd(double* mem_addr, __mmask8 k,
__m128d a)
Intel Description
- Store the lower double-precision (64-bit) floating-point element from “a” into memory using writemask “k”.
“mem_addr” must be aligned on a 16-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
IF k[0]
MEM[mem_addr+63:mem_addr] := a[63:0]
FI
_mm_mask_store_ss#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
float* mem_addr, __mmask8 k, __m128 a
- Param ETypes:
FP32 mem_addr, MASK k, FP32 a
void _mm_mask_store_ss(float* mem_addr, __mmask8 k,
__m128 a)
Intel Description
- Store the lower single-precision (32-bit) floating-point element from “a” into memory using writemask “k”.
“mem_addr” must be aligned on a 16-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
IF k[0]
MEM[mem_addr+31:mem_addr] := a[31:0]
FI
_mm_store_ph#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void * mem_addr, __m128h a
- Param ETypes:
FP16 mem_addr, FP16 a
void _mm_store_ph(void * mem_addr, __m128h a);
Intel Description
- Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from “a” into memory.
“mem_addr” must be aligned on a 16-byte boundary or a general-protection exception may be generated.
Intel Implementation Psudeo-Code
MEM[mem_addr+127:mem_addr] := a[127:0]
_mm_storeu_ph#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void * mem_addr, __m128h a
- Param ETypes:
FP16 mem_addr, FP16 a
void _mm_storeu_ph(void * mem_addr, __m128h a);
Intel Description
- Store 128-bits (composed of 8 packed half-precision (16-bit) floating-point elements) from “a” into memory.
“mem_addr” does not need to be aligned on any particular boundary.
Intel Implementation Psudeo-Code
MEM[mem_addr+127:mem_addr] := a[127:0]
_mm_store_sh#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void * mem_addr, __m128h a
- Param ETypes:
FP16 mem_addr, FP16 a
void _mm_store_sh(void * mem_addr, __m128h a);
Intel Description
Store the lower half-precision (16-bit) floating-point element from “a” into memory.
Intel Implementation Psudeo-Code
MEM[mem_addr].fp16[0] := a.fp16[0]
_mm_mask_store_sh#
- Tech:
AVX-512
- Category:
Store
- Header:
immintrin.h
- Searchable:
AVX-512-Store-XMM
- Register:
XMM 128 bit
- Return Type:
void
- Param Types:
void * mem_addr, __mmask8 k, __m128h a
- Param ETypes:
FP16 mem_addr, MASK k, FP16 a
void _mm_mask_store_sh(void* mem_addr, __mmask8 k,
__m128h a)
Intel Description
Store the lower half-precision (16-bit) floating-point element from “a” into memory using writemask “k”.
Intel Implementation Psudeo-Code
IF k[0]
MEM[mem_addr].fp16[0] := a.fp16[0]
FI