AVX-512-Bit Manipulation-XMM#
_mm_lzcnt_epi32#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i a
- Param ETypes:
UI32 a
__m128i _mm_lzcnt_epi32(__m128i a);
Intel Description
Counts the number of leading zero bits in each packed 32-bit integer in “a”, and store the results in “dst”.
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
tmp := 31
dst[i+31:i] := 0
DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
tmp := tmp - 1
dst[i+31:i] := dst[i+31:i] + 1
OD
ENDFOR
dst[MAX:128] := 0
_mm_mask_lzcnt_epi32#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i src, __mmask8 k, __m128i a
- Param ETypes:
UI32 src, MASK k, UI32 a
__m128i _mm_mask_lzcnt_epi32(__m128i src, __mmask8 k,
__m128i a)
Intel Description
Counts the number of leading zero bits in each packed 32-bit integer in “a”, and store the results in “dst” using writemask “k” (elements are copied from “src” when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
IF k[j]
tmp := 31
dst[i+31:i] := 0
DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
tmp := tmp - 1
dst[i+31:i] := dst[i+31:i] + 1
OD
ELSE
dst[i+31:i] := src[i+31:i]
FI
ENDFOR
dst[MAX:128] := 0
_mm_maskz_lzcnt_epi32#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__mmask8 k, __m128i a
- Param ETypes:
MASK k, UI32 a
__m128i _mm_maskz_lzcnt_epi32(__mmask8 k, __m128i a);
Intel Description
Counts the number of leading zero bits in each packed 32-bit integer in “a”, and store the results in “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
FOR j := 0 to 3
i := j*32
IF k[j]
tmp := 31
dst[i+31:i] := 0
DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
tmp := tmp - 1
dst[i+31:i] := dst[i+31:i] + 1
OD
ELSE
dst[i+31:i] := 0
FI
ENDFOR
dst[MAX:128] := 0
_mm_lzcnt_epi64#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i a
- Param ETypes:
UI64 a
__m128i _mm_lzcnt_epi64(__m128i a);
Intel Description
Counts the number of leading zero bits in each packed 64-bit integer in “a”, and store the results in “dst”.
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
tmp := 63
dst[i+63:i] := 0
DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
tmp := tmp - 1
dst[i+63:i] := dst[i+63:i] + 1
OD
ENDFOR
dst[MAX:128] := 0
_mm_mask_lzcnt_epi64#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i src, __mmask8 k, __m128i a
- Param ETypes:
UI64 src, MASK k, UI64 a
__m128i _mm_mask_lzcnt_epi64(__m128i src, __mmask8 k,
__m128i a)
Intel Description
Counts the number of leading zero bits in each packed 64-bit integer in “a”, and store the results in “dst” using writemask “k” (elements are copied from “src” when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
IF k[j]
tmp := 63
dst[i+63:i] := 0
DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
tmp := tmp - 1
dst[i+63:i] := dst[i+63:i] + 1
OD
ELSE
dst[i+63:i] := src[i+63:i]
FI
ENDFOR
dst[MAX:128] := 0
_mm_maskz_lzcnt_epi64#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__mmask8 k, __m128i a
- Param ETypes:
MASK k, UI64 a
__m128i _mm_maskz_lzcnt_epi64(__mmask8 k, __m128i a);
Intel Description
Counts the number of leading zero bits in each packed 64-bit integer in “a”, and store the results in “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
FOR j := 0 to 1
i := j*64
IF k[j]
tmp := 63
dst[i+63:i] := 0
DO WHILE (tmp >= 0 AND a[i+tmp] == 0)
tmp := tmp - 1
dst[i+63:i] := dst[i+63:i] + 1
OD
ELSE
dst[i+63:i] := 0
FI
ENDFOR
dst[MAX:128] := 0
_mm_maskz_popcnt_epi64#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__mmask8 k, __m128i a
- Param ETypes:
MASK k, UI64 a
__m128i _mm_maskz_popcnt_epi64(__mmask8 k, __m128i a);
Intel Description
Count the number of logical 1 bits in packed 64-bit integers in “a”, and store the results in “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 1
i := j*64
IF k[j]
dst[i+63:i] := POPCNT(a[i+63:i])
ELSE
dst[i+63:i] := 0
FI
ENDFOR
dst[MAX:128] := 0
_mm_mask_popcnt_epi64#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i src, __mmask8 k, __m128i a
- Param ETypes:
UI64 src, MASK k, UI64 a
__m128i _mm_mask_popcnt_epi64(__m128i src, __mmask8 k,
__m128i a)
Intel Description
Count the number of logical 1 bits in packed 64-bit integers in “a”, and store the results in “dst” using writemask “k” (elements are copied from “src” when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 1
i := j*64
IF k[j]
dst[i+63:i] := POPCNT(a[i+63:i])
ELSE
dst[i+63:i] := src[i+63:i]
FI
ENDFOR
dst[MAX:128] := 0
_mm_popcnt_epi64#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i a
- Param ETypes:
UI64 a
__m128i _mm_popcnt_epi64(__m128i a);
Intel Description
Count the number of logical 1 bits in packed 64-bit integers in “a”, and store the results in “dst”.
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 1
i := j*64
dst[i+63:i] := POPCNT(a[i+63:i])
ENDFOR
dst[MAX:128] := 0
_mm_popcnt_epi32#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i a
- Param ETypes:
UI32 a
__m128i _mm_popcnt_epi32(__m128i a);
Intel Description
Count the number of logical 1 bits in packed 32-bit integers in “a”, and store the results in “dst”.
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 3
i := j*32
dst[i+31:i] := POPCNT(a[i+31:i])
ENDFOR
dst[MAX:128] := 0
_mm_mask_popcnt_epi32#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i src, __mmask8 k, __m128i a
- Param ETypes:
UI32 src, MASK k, UI32 a
__m128i _mm_mask_popcnt_epi32(__m128i src, __mmask8 k,
__m128i a)
Intel Description
Count the number of logical 1 bits in packed 32-bit integers in “a”, and store the results in “dst” using writemask “k” (elements are copied from “src” when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 3
i := j*32
IF k[j]
dst[i+31:i] := POPCNT(a[i+31:i])
ELSE
dst[i+31:i] := src[i+31:i]
FI
ENDFOR
dst[MAX:128] := 0
_mm_maskz_popcnt_epi32#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__mmask8 k, __m128i a
- Param ETypes:
MASK k, UI32 a
__m128i _mm_maskz_popcnt_epi32(__mmask8 k, __m128i a);
Intel Description
Count the number of logical 1 bits in packed 32-bit integers in “a”, and store the results in “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 3
i := j*32
IF k[j]
dst[i+31:i] := POPCNT(a[i+31:i])
ELSE
dst[i+31:i] := 0
FI
ENDFOR
dst[MAX:128] := 0
_mm_mask_bitshuffle_epi64_mask#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__mmask16
- Param Types:
__mmask16 k, __m128i b, __m128i c
- Param ETypes:
MASK k, UI64 b, UI64 c
__mmask16 _mm_mask_bitshuffle_epi64_mask(__mmask16 k,
__m128i b,
__m128i c)
Intel Description
Gather 64 bits from “b” using selection bits in “c”. For each 64-bit element in “b”, gather 8 bits from the 64-bit element in “b” at 8 bit position controlled by the 8 corresponding 8-bit elements of “c”, and store the result in the corresponding 8-bit element of “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
FOR i := 0 to 1 //Qword
FOR j := 0 to 7 // Byte
IF k[i*8+j]
m := c.qword[i].byte[j] & 0x3F
dst[i*8+j] := b.qword[i].bit[m]
ELSE
dst[i*8+j] := 0
FI
ENDFOR
ENDFOR
dst[MAX:16] := 0
_mm_bitshuffle_epi64_mask#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__mmask16
- Param Types:
__m128i b, __m128i c
- Param ETypes:
UI64 b, UI64 c
__mmask16 _mm_bitshuffle_epi64_mask(__m128i b, __m128i c);
Intel Description
Gather 64 bits from “b” using selection bits in “c”. For each 64-bit element in “b”, gather 8 bits from the 64-bit element in “b” at 8 bit position controlled by the 8 corresponding 8-bit elements of “c”, and store the result in the corresponding 8-bit element of “dst”.
Intel Implementation Psudeo-Code
FOR i := 0 to 1 //Qword
FOR j := 0 to 7 // Byte
m := c.qword[i].byte[j] & 0x3F
dst[i*8+j] := b.qword[i].bit[m]
ENDFOR
ENDFOR
dst[MAX:16] := 0
_mm_popcnt_epi16#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i a
- Param ETypes:
UI16 a
__m128i _mm_popcnt_epi16(__m128i a);
Intel Description
Count the number of logical 1 bits in packed 16-bit integers in “a”, and store the results in “dst”.
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 7
i := j*16
dst[i+15:i] := POPCNT(a[i+15:i])
ENDFOR
dst[MAX:128] := 0
_mm_mask_popcnt_epi16#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i src, __mmask8 k, __m128i a
- Param ETypes:
UI16 src, MASK k, UI16 a
__m128i _mm_mask_popcnt_epi16(__m128i src, __mmask8 k,
__m128i a)
Intel Description
Count the number of logical 1 bits in packed 16-bit integers in “a”, and store the results in “dst” using writemask “k” (elements are copied from “src” when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 7
i := j*16
IF k[j]
dst[i+15:i] := POPCNT(a[i+15:i])
ELSE
dst[i+15:i] := src[i+15:i]
FI
ENDFOR
dst[MAX:128] := 0
_mm_maskz_popcnt_epi16#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__mmask8 k, __m128i a
- Param ETypes:
MASK k, UI16 a
__m128i _mm_maskz_popcnt_epi16(__mmask8 k, __m128i a);
Intel Description
Count the number of logical 1 bits in packed 16-bit integers in “a”, and store the results in “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 7
i := j*16
IF k[j]
dst[i+15:i] := POPCNT(a[i+15:i])
ELSE
dst[i+15:i] := 0
FI
ENDFOR
dst[MAX:128] := 0
_mm_popcnt_epi8#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i a
- Param ETypes:
UI8 a
__m128i _mm_popcnt_epi8(__m128i a);
Intel Description
Count the number of logical 1 bits in packed 8-bit integers in “a”, and store the results in “dst”.
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 15
i := j*8
dst[i+7:i] := POPCNT(a[i+7:i])
ENDFOR
dst[MAX:128] := 0
_mm_mask_popcnt_epi8#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i src, __mmask16 k, __m128i a
- Param ETypes:
UI8 src, MASK k, UI8 a
__m128i _mm_mask_popcnt_epi8(__m128i src, __mmask16 k,
__m128i a)
Intel Description
Count the number of logical 1 bits in packed 8-bit integers in “a”, and store the results in “dst” using writemask “k” (elements are copied from “src” when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 15
i := j*8
IF k[j]
dst[i+7:i] := POPCNT(a[i+7:i])
ELSE
dst[i+7:i] := src[i+7:i]
FI
ENDFOR
dst[MAX:128] := 0
_mm_maskz_popcnt_epi8#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__mmask16 k, __m128i a
- Param ETypes:
MASK k, UI8 a
__m128i _mm_maskz_popcnt_epi8(__mmask16 k, __m128i a);
Intel Description
Count the number of logical 1 bits in packed 8-bit integers in “a”, and store the results in “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
DEFINE POPCNT(a) {
count := 0
DO WHILE a > 0
count += a[0]
a >>= 1
OD
RETURN count
}
FOR j := 0 to 15
i := j*8
IF k[j]
dst[i+7:i] := POPCNT(a[i+7:i])
ELSE
dst[i+7:i] := 0
FI
ENDFOR
dst[MAX:128] := 0
_mm_multishift_epi64_epi8#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i a, __m128i b
- Param ETypes:
UI8 a, UI8 b
__m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b);
Intel Description
For each 64-bit element in “b”, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of “a”, and store the 8 assembled bytes to the corresponding 64-bit element of “dst”.
Intel Implementation Psudeo-Code
FOR i := 0 to 1
q := i * 64
FOR j := 0 to 7
tmp8 := 0
ctrl := a[q+j*8+7:q+j*8] & 63
FOR l := 0 to 7
tmp8[l] := b[q+((ctrl+l) & 63)]
ENDFOR
dst[q+j*8+7:q+j*8] := tmp8[7:0]
ENDFOR
ENDFOR
dst[MAX:128] := 0
_mm_mask_multishift_epi64_epi8#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__m128i src, __mmask16 k, __m128i a, __m128i b
- Param ETypes:
UI8 src, MASK k, UI8 a, UI8 b
__m128i _mm_mask_multishift_epi64_epi8(__m128i src,
__mmask16 k,
__m128i a,
__m128i b)
Intel Description
For each 64-bit element in “b”, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of “a”, and store the 8 assembled bytes to the corresponding 64-bit element of “dst” using writemask “k” (elements are copied from “src” when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
FOR i := 0 to 1
q := i * 64
FOR j := 0 to 7
tmp8 := 0
ctrl := a[q+j*8+7:q+j*8] & 63
FOR l := 0 to 7
tmp8[l] := b[q+((ctrl+l) & 63)]
ENDFOR
IF k[i*8+j]
dst[q+j*8+7:q+j*8] := tmp8[7:0]
ELSE
dst[q+j*8+7:q+j*8] := src[q+j*8+7:q+j*8]
FI
ENDFOR
ENDFOR
dst[MAX:128] := 0
_mm_maskz_multishift_epi64_epi8#
- Tech:
AVX-512
- Category:
Bit Manipulation
- Header:
immintrin.h
- Searchable:
AVX-512-Bit Manipulation-XMM
- Register:
XMM 128 bit
- Return Type:
__m128i
- Param Types:
__mmask16 k, __m128i a, __m128i b
- Param ETypes:
MASK k, UI8 a, UI8 b
__m128i _mm_maskz_multishift_epi64_epi8(__mmask16 k,
__m128i a,
__m128i b)
Intel Description
For each 64-bit element in “b”, select 8 unaligned bytes using a byte-granular shift control within the corresponding 64-bit element of “a”, and store the 8 assembled bytes to the corresponding 64-bit element of “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set).
Intel Implementation Psudeo-Code
FOR i := 0 to 1
q := i * 64
FOR j := 0 to 7
tmp8 := 0
ctrl := a[q+j*8+7:q+j*8] & 63
FOR l := 0 to 7
tmp8[l] := b[q+((ctrl+l) & 63)]
ENDFOR
IF k[i*8+j]
dst[q+j*8+7:q+j*8] := tmp8[7:0]
ELSE
dst[q+j*8+7:q+j*8] := 0
FI
ENDFOR
ENDFOR
dst[MAX:128] := 0