AVX-512-Special Math Functions-XMM#

_mm_reduce_max_epi16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

short

Param Types:

__m128i a

Param ETypes:

SI16 a

short _mm_reduce_max_epi16(__m128i a);

Intel Description

Reduce the packed signed 16-bit integers in “a” by maximum. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MAX(src[16*len-1:0], len)
}
dst[15:0] := REDUCE_MAX(a, 8)

_mm_mask_reduce_max_epi16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

short

Param Types:

__mmask8 k, __m128i a

Param ETypes:

MASK k, SI16 a

short _mm_mask_reduce_max_epi16(__mmask8 k, __m128i a);

Intel Description

Reduce the packed signed 16-bit integers in “a” by maximum using mask “k”. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MAX(src[16*len-1:0], len)
}
tmp := a
FOR j := 0 to 7
        i := j*16
        IF k[j]
                tmp[i+15:i] := a[i+15:i]
        ELSE
                tmp[i+15:i] := Int16(-0x8000)
        FI
ENDFOR
dst[15:0] := REDUCE_MAX(tmp, 8)

_mm_reduce_max_epi8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

char

Param Types:

__m128i a

Param ETypes:

SI8 a

char _mm_reduce_max_epi8(__m128i a);

Intel Description

Reduce the packed signed 8-bit integers in “a” by maximum. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MAX(src[8*len-1:0], len)
}
dst[7:0] := REDUCE_MAX(a, 16)

_mm_mask_reduce_max_epi8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

char

Param Types:

__mmask16 k, __m128i a

Param ETypes:

MASK k, SI8 a

char _mm_mask_reduce_max_epi8(__mmask16 k, __m128i a);

Intel Description

Reduce the packed signed 8-bit integers in “a” by maximum using mask “k”. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MAX(src[8*len-1:0], len)
}
tmp := a
FOR j := 0 to 15
        i := j*8
        IF k[j]
                tmp[i+7:i] := a[i+7:i]
        ELSE
                tmp[i+7:i] := Int8(-0x80)
        FI
ENDFOR
dst[7:0] := REDUCE_MAX(tmp, 16)

_mm_reduce_max_epu16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

unsigned short

Param Types:

__m128i a

Param ETypes:

UI16 a

unsigned short _mm_reduce_max_epu16(__m128i a);

Intel Description

Reduce the packed unsigned 16-bit integers in “a” by maximum. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MAX(src[16*len-1:0], len)
}
dst[15:0] := REDUCE_MAX(a, 8)

_mm_mask_reduce_max_epu16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

unsigned short

Param Types:

__mmask8 k, __m128i a

Param ETypes:

MASK k, UI16 a

unsigned short _mm_mask_reduce_max_epu16(__mmask8 k, __m128i a);

Intel Description

Reduce the packed unsigned 16-bit integers in “a” by maximum using mask “k”. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MAX(src[16*len-1:0], len)
}
tmp := a
FOR j := 0 to 7
        i := j*16
        IF k[j]
                tmp[i+15:i] := a[i+15:i]
        ELSE
                tmp[i+15:i] := 0
        FI
ENDFOR
dst[15:0] := REDUCE_MAX(tmp, 8)

_mm_reduce_max_epu8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

unsigned char

Param Types:

__m128i a

Param ETypes:

UI8 a

unsigned char _mm_reduce_max_epu8(__m128i a);

Intel Description

Reduce the packed unsigned 8-bit integers in “a” by maximum. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MAX(src[8*len-1:0], len)
}
dst[7:0] := REDUCE_MAX(a, 16)

_mm_mask_reduce_max_epu8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

unsigned char

Param Types:

__mmask16 k, __m128i a

Param ETypes:

MASK k, UI8 a

unsigned char _mm_mask_reduce_max_epu8(__mmask16 k, __m128i a);

Intel Description

Reduce the packed unsigned 8-bit integers in “a” by maximum using mask “k”. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MAX(src[8*len-1:0], len)
}
tmp := a
FOR j := 0 to 15
        i := j*8
        IF k[j]
                tmp[i+7:i] := a[i+7:i]
        ELSE
                tmp[i+7:i] := 0
        FI
ENDFOR
dst[7:0] := REDUCE_MAX(tmp, 16)

_mm_reduce_min_epi16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

short

Param Types:

__m128i a

Param ETypes:

SI16 a

short _mm_reduce_min_epi16(__m128i a);

Intel Description

Reduce the packed signed 16-bit integers in “a” by minimum. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MIN(src[16*len-1:0], len)
}
dst[15:0] := REDUCE_MIN(a, 8)

_mm_mask_reduce_min_epi16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

short

Param Types:

__mmask8 k, __m128i a

Param ETypes:

MASK k, SI16 a

short _mm_mask_reduce_min_epi16(__mmask8 k, __m128i a);

Intel Description

Reduce the packed signed 16-bit integers in “a” by minimum using mask “k”. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MIN(src[16*len-1:0], len)
}
tmp := a
FOR j := 0 to 7
        i := j*16
        IF k[j]
                tmp[i+15:i] := a[i+15:i]
        ELSE
                tmp[i+15:i] := Int16(0x7FFF)
        FI
ENDFOR
dst[15:0] := REDUCE_MIN(tmp, 8)

_mm_reduce_min_epi8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

char

Param Types:

__m128i a

Param ETypes:

SI8 a

char _mm_reduce_min_epi8(__m128i a);

Intel Description

Reduce the packed signed 8-bit integers in “a” by minimum. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MIN(src[8*len-1:0], len)
}
dst[7:0] := REDUCE_MIN(a, 16)

_mm_mask_reduce_min_epi8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

char

Param Types:

__mmask16 k, __m128i a

Param ETypes:

MASK k, SI8 a

char _mm_mask_reduce_min_epi8(__mmask16 k, __m128i a);

Intel Description

Reduce the packed signed 8-bit integers in “a” by minimum using mask “k”. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MIN(src[8*len-1:0], len)
}
tmp := a
FOR j := 0 to 15
        i := j*8
        IF k[j]
                tmp[i+7:i] := a[i+7:i]
        ELSE
                tmp[i+7:i] := Int8(0x7F)
        FI
ENDFOR
dst[7:0] := REDUCE_MIN(tmp, 16)

_mm_reduce_min_epu16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

unsigned short

Param Types:

__m128i a

Param ETypes:

UI16 a

unsigned short _mm_reduce_min_epu16(__m128i a);

Intel Description

Reduce the packed unsigned 16-bit integers in “a” by minimum. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MIN(src[16*len-1:0], len)
}
dst[15:0] := REDUCE_MIN(a, 8)

_mm_mask_reduce_min_epu16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

unsigned short

Param Types:

__mmask8 k, __m128i a

Param ETypes:

MASK k, UI16 a

unsigned short _mm_mask_reduce_min_epu16(__mmask8 k, __m128i a);

Intel Description

Reduce the packed unsigned 16-bit integers in “a” by minimum using mask “k”. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MIN(src[16*len-1:0], len)
}
tmp := a
FOR j := 0 to 7
        i := j*16
        IF k[j]
                tmp[i+15:i] := a[i+15:i]
        ELSE
                tmp[i+15:i] := 0xFFFF
        FI
ENDFOR
dst[15:0] := REDUCE_MIN(tmp, 8)

_mm_reduce_min_epu8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

unsigned char

Param Types:

__m128i a

Param ETypes:

UI8 a

unsigned char _mm_reduce_min_epu8(__m128i a);

Intel Description

Reduce the packed unsigned 8-bit integers in “a” by minimum. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MIN(src[8*len-1:0], len)
}
dst[7:0] := REDUCE_MIN(a, 16)

_mm_mask_reduce_min_epu8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

unsigned char

Param Types:

__mmask16 k, __m128i a

Param ETypes:

MASK k, UI8 a

unsigned char _mm_mask_reduce_min_epu8(__mmask16 k, __m128i a);

Intel Description

Reduce the packed unsigned 8-bit integers in “a” by minimum using mask “k”. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MIN(src[8*len-1:0], len)
}
tmp := a
FOR j := 0 to 15
        i := j*8
        IF k[j]
                tmp[i+7:i] := a[i+7:i]
        ELSE
                tmp[i+7:i] := 0xFF
        FI
ENDFOR
dst[7:0] := REDUCE_MIN(tmp, 16)

_mm_mask_max_round_sd#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128d

Param Types:

__m128d src, __mmask8 k, __m128d a, __m128d b, int sae

Param ETypes:

FP64 src, MASK k, FP64 a, FP64 b, IMM sae

__m128d _mm_mask_max_round_sd(__m128d src, __mmask8 k,
                              __m128d a, __m128d b,
                              int sae)

Intel Description

Compare the lower double-precision (64-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper element from “a” to the upper element of “dst”. [sae_note][max_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst[63:0] := MAX(a[63:0], b[63:0])
ELSE
        dst[63:0] := src[63:0]
FI
dst[127:64] := a[127:64]
dst[MAX:128] := 0

_mm_mask_max_sd#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128d

Param Types:

__m128d src, __mmask8 k, __m128d a, __m128d b

Param ETypes:

FP64 src, MASK k, FP64 a, FP64 b

__m128d _mm_mask_max_sd(__m128d src, __mmask8 k, __m128d a,
                        __m128d b)

Intel Description

Compare the lower double-precision (64-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper element from “a” to the upper element of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst[63:0] := MAX(a[63:0], b[63:0])
ELSE
        dst[63:0] := src[63:0]
FI
dst[127:64] := a[127:64]
dst[MAX:128] := 0

_mm_maskz_max_round_sd#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128d

Param Types:

__mmask8 k, __m128d a, __m128d b, int sae

Param ETypes:

MASK k, FP64 a, FP64 b, IMM sae

__m128d _mm_maskz_max_round_sd(__mmask8 k, __m128d a,
                               __m128d b, int sae)

Intel Description

Compare the lower double-precision (64-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper element from “a” to the upper element of “dst”. [sae_note][max_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst[63:0] := MAX(a[63:0], b[63:0])
ELSE
        dst[63:0] := 0
FI
dst[127:64] := a[127:64]
dst[MAX:128] := 0

_mm_maskz_max_sd#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128d

Param Types:

__mmask8 k, __m128d a, __m128d b

Param ETypes:

MASK k, FP64 a, FP64 b

__m128d _mm_maskz_max_sd(__mmask8 k, __m128d a, __m128d b);

Intel Description

Compare the lower double-precision (64-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper element from “a” to the upper element of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst[63:0] := MAX(a[63:0], b[63:0])
ELSE
        dst[63:0] := 0
FI
dst[127:64] := a[127:64]
dst[MAX:128] := 0

_mm_max_round_sd#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128d

Param Types:

__m128d a, __m128d b, int sae

Param ETypes:

FP64 a, FP64 b, IMM sae

__m128d _mm_max_round_sd(__m128d a, __m128d b, int sae);

Intel Description

Compare the lower double-precision (64-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst”, and copy the upper element from “a” to the upper element of “dst”. [sae_note][max_float_note]

Intel Implementation Psudeo-Code

dst[63:0] := MAX(a[63:0], b[63:0])
dst[127:64] := a[127:64]
dst[MAX:128] := 0

_mm_mask_max_round_ss#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128

Param Types:

__m128 src, __mmask8 k, __m128 a, __m128 b, int sae

Param ETypes:

FP32 src, MASK k, FP32 a, FP32 b, IMM sae

__m128 _mm_mask_max_round_ss(__m128 src, __mmask8 k,
                             __m128 a, __m128 b, int sae)

Intel Description

Compare the lower single-precision (32-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper 3 packed elements from “a” to the upper elements of “dst”. [sae_note][max_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst[31:0] := MAX(a[31:0], b[31:0])
ELSE
        dst[31:0] := src[31:0]
FI
dst[127:32] := a[127:32]
dst[MAX:128] := 0

_mm_mask_max_ss#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128

Param Types:

__m128 src, __mmask8 k, __m128 a, __m128 b

Param ETypes:

FP32 src, MASK k, FP32 a, FP32 b

__m128 _mm_mask_max_ss(__m128 src, __mmask8 k, __m128 a,
                       __m128 b)

Intel Description

Compare the lower single-precision (32-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper 3 packed elements from “a” to the upper elements of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst[31:0] := MAX(a[31:0], b[31:0])
ELSE
        dst[31:0] := src[31:0]
FI
dst[127:32] := a[127:32]
dst[MAX:128] := 0

_mm_maskz_max_round_ss#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128

Param Types:

__mmask8 k, __m128 a, __m128 b, int sae

Param ETypes:

MASK k, FP32 a, FP32 b, IMM sae

__m128 _mm_maskz_max_round_ss(__mmask8 k, __m128 a,
                              __m128 b, int sae)

Intel Description

Compare the lower single-precision (32-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from “a” to the upper elements of “dst”. [sae_note][max_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst[31:0] := MAX(a[31:0], b[31:0])
ELSE
        dst[31:0] := 0
FI
dst[127:32] := a[127:32]
dst[MAX:128] := 0

_mm_maskz_max_ss#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128

Param Types:

__mmask8 k, __m128 a, __m128 b

Param ETypes:

MASK k, FP32 a, FP32 b

__m128 _mm_maskz_max_ss(__mmask8 k, __m128 a, __m128 b);

Intel Description

Compare the lower single-precision (32-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from “a” to the upper elements of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst[31:0] := MAX(a[31:0], b[31:0])
ELSE
        dst[31:0] := 0
FI
dst[127:32] := a[127:32]
dst[MAX:128] := 0

_mm_max_round_ss#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128

Param Types:

__m128 a, __m128 b, int sae

Param ETypes:

FP32 a, FP32 b, IMM sae

__m128 _mm_max_round_ss(__m128 a, __m128 b, int sae);

Intel Description

Compare the lower single-precision (32-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst”, and copy the upper 3 packed elements from “a” to the upper elements of “dst”. [sae_note][max_float_note]

Intel Implementation Psudeo-Code

dst[31:0] := MAX(a[31:0], b[31:0])
dst[127:32] := a[127:32]
dst[MAX:128] := 0

_mm_mask_min_round_sd#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128d

Param Types:

__m128d src, __mmask8 k, __m128d a, __m128d b, int sae

Param ETypes:

FP64 src, MASK k, FP64 a, FP64 b, IMM sae

__m128d _mm_mask_min_round_sd(__m128d src, __mmask8 k,
                              __m128d a, __m128d b,
                              int sae)

Intel Description

Compare the lower double-precision (64-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper element from “a” to the upper element of “dst”. [sae_note][min_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst[63:0] := MIN(a[63:0], b[63:0])
ELSE
        dst[63:0] := src[63:0]
FI
dst[127:64] := a[127:64]
dst[MAX:128] := 0

_mm_mask_min_sd#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128d

Param Types:

__m128d src, __mmask8 k, __m128d a, __m128d b

Param ETypes:

FP64 src, MASK k, FP64 a, FP64 b

__m128d _mm_mask_min_sd(__m128d src, __mmask8 k, __m128d a,
                        __m128d b)

Intel Description

Compare the lower double-precision (64-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper element from “a” to the upper element of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst[63:0] := MIN(a[63:0], b[63:0])
ELSE
        dst[63:0] := src[63:0]
FI
dst[127:64] := a[127:64]
dst[MAX:128] := 0

_mm_maskz_min_round_sd#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128d

Param Types:

__mmask8 k, __m128d a, __m128d b, int sae

Param ETypes:

MASK k, FP64 a, FP64 b, IMM sae

__m128d _mm_maskz_min_round_sd(__mmask8 k, __m128d a,
                               __m128d b, int sae)

Intel Description

Compare the lower double-precision (64-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper element from “a” to the upper element of “dst”. [sae_note][min_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst[63:0] := MIN(a[63:0], b[63:0])
ELSE
        dst[63:0] := 0
FI
dst[127:64] := a[127:64]
dst[MAX:128] := 0

_mm_maskz_min_sd#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128d

Param Types:

__mmask8 k, __m128d a, __m128d b

Param ETypes:

MASK k, FP64 a, FP64 b

__m128d _mm_maskz_min_sd(__mmask8 k, __m128d a, __m128d b);

Intel Description

Compare the lower double-precision (64-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper element from “a” to the upper element of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst[63:0] := MIN(a[63:0], b[63:0])
ELSE
        dst[63:0] := 0
FI
dst[127:64] := a[127:64]
dst[MAX:128] := 0

_mm_min_round_sd#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128d

Param Types:

__m128d a, __m128d b, int sae

Param ETypes:

FP64 a, FP64 b, IMM sae

__m128d _mm_min_round_sd(__m128d a, __m128d b, int sae);

Intel Description

Compare the lower double-precision (64-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” , and copy the upper element from “a” to the upper element of “dst”. [sae_note][min_float_note]

Intel Implementation Psudeo-Code

dst[63:0] := MIN(a[63:0], b[63:0])
dst[127:64] := a[127:64]
dst[MAX:128] := 0

_mm_mask_min_round_ss#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128

Param Types:

__m128 src, __mmask8 k, __m128 a, __m128 b, int sae

Param ETypes:

FP32 src, MASK k, FP32 a, FP32 b, IMM sae

__m128 _mm_mask_min_round_ss(__m128 src, __mmask8 k,
                             __m128 a, __m128 b, int sae)

Intel Description

Compare the lower single-precision (32-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper 3 packed elements from “a” to the upper elements of “dst”. [sae_note][min_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst[31:0] := MIN(a[31:0], b[31:0])
ELSE
        dst[31:0] := src[31:0]
FI
dst[127:32] := a[127:32]
dst[MAX:128] := 0

_mm_mask_min_ss#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128

Param Types:

__m128 src, __mmask8 k, __m128 a, __m128 b

Param ETypes:

FP32 src, MASK k, FP32 a, FP32 b

__m128 _mm_mask_min_ss(__m128 src, __mmask8 k, __m128 a,
                       __m128 b)

Intel Description

Compare the lower single-precision (32-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper 3 packed elements from “a” to the upper elements of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst[31:0] := MIN(a[31:0], b[31:0])
ELSE
        dst[31:0] := src[31:0]
FI
dst[127:32] := a[127:32]
dst[MAX:128] := 0

_mm_maskz_min_round_ss#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128

Param Types:

__mmask8 k, __m128 a, __m128 b, int sae

Param ETypes:

MASK k, FP32 a, FP32 b, IMM sae

__m128 _mm_maskz_min_round_ss(__mmask8 k, __m128 a,
                              __m128 b, int sae)

Intel Description

Compare the lower single-precision (32-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from “a” to the upper elements of “dst”. [sae_note][min_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst[31:0] := MIN(a[31:0], b[31:0])
ELSE
        dst[31:0] := 0
FI
dst[127:32] := a[127:32]
dst[MAX:128] := 0

_mm_maskz_min_ss#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128

Param Types:

__mmask8 k, __m128 a, __m128 b

Param ETypes:

MASK k, FP32 a, FP32 b

__m128 _mm_maskz_min_ss(__mmask8 k, __m128 a, __m128 b);

Intel Description

Compare the lower single-precision (32-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from “a” to the upper elements of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst[31:0] := MIN(a[31:0], b[31:0])
ELSE
        dst[31:0] := 0
FI
dst[127:32] := a[127:32]
dst[MAX:128] := 0

_mm_min_round_ss#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128

Param Types:

__m128 a, __m128 b, int sae

Param ETypes:

FP32 a, FP32 b, IMM sae

__m128 _mm_min_round_ss(__m128 a, __m128 b, int sae);

Intel Description

Compare the lower single-precision (32-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst”, and copy the upper 3 packed elements from “a” to the upper elements of “dst”. [sae_note][min_float_note]

Intel Implementation Psudeo-Code

dst[31:0] := MIN(a[31:0], b[31:0])
dst[127:32] := a[127:32]
dst[MAX:128] := 0

_mm_max_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h a, __m128h b

Param ETypes:

FP16 a, FP16 b

__m128h _mm_max_ph(__m128h a, __m128h b);

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed maximum values in “dst”. [max_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j])
ENDFOR
dst[MAX:128] := 0

_mm_mask_max_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h src, __mmask8 k, __m128h a, __m128h b

Param ETypes:

FP16 src, MASK k, FP16 a, FP16 b

__m128h _mm_mask_max_ph(__m128h src, __mmask8 k, __m128h a,
                        __m128h b)

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed maximum values in “dst” using writemask “k” (elements are copied from “src” when the corresponding mask bit is not set). [max_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        IF k[j]
                dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j])
        ELSE
                dst.fp16[j] := src.fp16[j]
        FI
ENDFOR
dst[MAX:128] := 0

_mm_maskz_max_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__mmask8 k, __m128h a, __m128h b

Param ETypes:

MASK k, FP16 a, FP16 b

__m128h _mm_maskz_max_ph(__mmask8 k, __m128h a, __m128h b);

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed maximum values in “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set). [max_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        IF k[j]
                dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j])
        ELSE
                dst.fp16[j] := 0
        FI
ENDFOR
dst[MAX:128] := 0

_mm_max_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h a, __m128h b

Param ETypes:

FP16 a, FP16 b

__m128h _mm_max_sh(__m128h a, __m128h b);

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst”, and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [max_float_note]

Intel Implementation Psudeo-Code

dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0])
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_mask_max_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h src, __mmask8 k, __m128h a, __m128h b

Param ETypes:

FP16 src, MASK k, FP16 a, FP16 b

__m128h _mm_mask_max_sh(__m128h src, __mmask8 k, __m128h a,
                        __m128h b)

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0])
ELSE
        dst.fp16[0] := src.fp16[0]
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_maskz_max_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__mmask8 k, __m128h a, __m128h b

Param ETypes:

MASK k, FP16 a, FP16 b

__m128h _mm_maskz_max_sh(__mmask8 k, __m128h a, __m128h b);

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0])
ELSE
        dst.fp16[0] := 0
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_max_round_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h a, __m128h b, int sae

Param ETypes:

FP16 a, FP16 b, IMM sae

__m128h _mm_max_round_sh(__m128h a, __m128h b, int sae);

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst”, and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [sae_note][max_float_note]

Intel Implementation Psudeo-Code

dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0])
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_mask_max_round_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h src, __mmask8 k, __m128h a, __m128h b, int sae

Param ETypes:

FP16 src, MASK k, FP16 a, FP16 b, IMM sae

__m128h _mm_mask_max_round_sh(__m128h src, __mmask8 k,
                              __m128h a, __m128h b,
                              int sae)

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [sae_note][max_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0])
ELSE
        dst.fp16[0] := src.fp16[0]
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_maskz_max_round_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__mmask8 k, __m128h a, __m128h b, int sae

Param ETypes:

MASK k, FP16 a, FP16 b, IMM sae

__m128h _mm_maskz_max_round_sh(__mmask8 k, __m128h a,
                               __m128h b, int sae)

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the maximum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [sae_note][max_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst.fp16[0] := (a.fp16[0] > b.fp16[0] ? a.fp16[0] : b.fp16[0])
ELSE
        dst.fp16[0] := 0
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_min_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h a, __m128h b

Param ETypes:

FP16 a, FP16 b

__m128h _mm_min_ph(__m128h a, __m128h b);

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed minimum values in “dst”. [min_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j])
ENDFOR
dst[MAX:128] := 0

_mm_mask_min_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h src, __mmask8 k, __m128h a, __m128h b

Param ETypes:

FP16 src, MASK k, FP16 a, FP16 b

__m128h _mm_mask_min_ph(__m128h src, __mmask8 k, __m128h a,
                        __m128h b)

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed minimum values in “dst” using writemask “k” (elements are copied from “src” when the corresponding mask bit is not set). [min_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        IF k[j]
                dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j])
        ELSE
                dst.fp16[j] := src.fp16[j]
        FI
ENDFOR
dst[MAX:128] := 0

_mm_maskz_min_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__mmask8 k, __m128h a, __m128h b

Param ETypes:

MASK k, FP16 a, FP16 b

__m128h _mm_maskz_min_ph(__mmask8 k, __m128h a, __m128h b);

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed minimum values in “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set). [min_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 7
        IF k[j]
                dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j])
        ELSE
                dst.fp16[j] := 0
        FI
ENDFOR
dst[MAX:128] := 0

_mm_min_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h a, __m128h b

Param ETypes:

FP16 a, FP16 b

__m128h _mm_min_sh(__m128h a, __m128h b);

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst”, and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [min_float_note]

Intel Implementation Psudeo-Code

dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0])
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_mask_min_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h src, __mmask8 k, __m128h a, __m128h b

Param ETypes:

FP16 src, MASK k, FP16 a, FP16 b

__m128h _mm_mask_min_sh(__m128h src, __mmask8 k, __m128h a,
                        __m128h b)

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0])
ELSE
        dst.fp16[0] := src.fp16[0]
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_maskz_min_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__mmask8 k, __m128h a, __m128h b

Param ETypes:

MASK k, FP16 a, FP16 b

__m128h _mm_maskz_min_sh(__mmask8 k, __m128h a, __m128h b);

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”.

Intel Implementation Psudeo-Code

IF k[0]
        dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0])
ELSE
        dst.fp16[0] := 0
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_min_round_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h a, __m128h b, int sae

Param ETypes:

FP16 a, FP16 b, IMM sae

__m128h _mm_min_round_sh(__m128h a, __m128h b, int sae);

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst”, and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [sae_note][min_float_note]

Intel Implementation Psudeo-Code

dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0])
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_mask_min_round_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h src, __mmask8 k, __m128h a, __m128h b, int sae

Param ETypes:

FP16 src, MASK k, FP16 a, FP16 b, IMM sae

__m128h _mm_mask_min_round_sh(__m128h src, __mmask8 k,
                              __m128h a, __m128h b,
                              int sae)

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [sae_note][min_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0])
ELSE
        dst.fp16[0] := src.fp16[0]
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_maskz_min_round_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__mmask8 k, __m128h a, __m128h b, int sae

Param ETypes:

MASK k, FP16 a, FP16 b, IMM sae

__m128h _mm_maskz_min_round_sh(__mmask8 k, __m128h a,
                               __m128h b, int sae)

Intel Description

Compare the lower half-precision (16-bit) floating-point elements in “a” and “b”, store the minimum value in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [sae_note][min_float_note]

Intel Implementation Psudeo-Code

IF k[0]
        dst.fp16[0] := (a.fp16[0] < b.fp16[0] ? a.fp16[0] : b.fp16[0])
ELSE
        dst.fp16[0] := 0
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_reduce_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h a, __m128h b, int imm8

Param ETypes:

FP16 a, FP16 b, IMM imm8

__m128h _mm_reduce_sh(__m128h a, __m128h b, int imm8);

Intel Description

Extract the reduced argument of the lower half-precision (16-bit) floating-point element in “b” by the number of bits specified by “imm8”, store the result in the lower element of “dst”, and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [round_imm_note]

Intel Implementation Psudeo-Code

DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) {
        m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved
        tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0])
        tmp[15:0] := src[15:0] - tmp[15:0]
        IF IsInf(tmp[15:0])
                tmp[15:0] := FP16(0.0)
        FI
        RETURN tmp[15:0]
}
dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8)
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_reduce_round_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h a, __m128h b, int imm8, const int sae

Param ETypes:

FP16 a, FP16 b, IMM imm8, IMM sae

__m128h _mm_reduce_round_sh(__m128h a, __m128h b, int imm8,
                            const int sae)

Intel Description

Extract the reduced argument of the lower half-precision (16-bit) floating-point element in “b” by the number of bits specified by “imm8”, store the result in the lower element of “dst”, and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [round_imm_note][sae_note]

Intel Implementation Psudeo-Code

DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) {
        m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved
        tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0])
        tmp[15:0] := src[15:0] - tmp[15:0]
        IF IsInf(tmp[15:0])
                tmp[15:0] := FP16(0.0)
        FI
        RETURN tmp[15:0]
}
dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8)
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_mask_reduce_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h src, __mmask8 k, __m128h a, __m128h b, int imm8

Param ETypes:

FP16 src, MASK k, FP16 a, FP16 b, IMM imm8

__m128h _mm_mask_reduce_sh(__m128h src, __mmask8 k,
                           __m128h a, __m128h b, int imm8)

Intel Description

Extract the reduced argument of the lower half-precision (16-bit) floating-point element in “b” by the number of bits specified by “imm8”, store the result in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [round_imm_note]

Intel Implementation Psudeo-Code

DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) {
        m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved
        tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0])
        tmp[15:0] := src[15:0] - tmp[15:0]
        IF IsInf(tmp[15:0])
                tmp[15:0] := FP16(0.0)
        FI
        RETURN tmp[15:0]
}
IF k[0]
        dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8)
ELSE
        dst.fp16[0] := src.fp16[0]
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_mask_reduce_round_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__m128h src, __mmask8 k, __m128h a, __m128h b, int imm8, const int sae

Param ETypes:

FP16 src, MASK k, FP16 a, FP16 b, IMM imm8, IMM sae

__m128h _mm_mask_reduce_round_sh(__m128h src, __mmask8 k,
                                 __m128h a, __m128h b,
                                 int imm8, const int sae)

Intel Description

Extract the reduced argument of the lower half-precision (16-bit) floating-point element in “b” by the number of bits specified by “imm8”, store the result in the lower element of “dst” using writemask “k” (the element is copied from “src” when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [round_imm_note][sae_note]

Intel Implementation Psudeo-Code

DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) {
        m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved
        tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0])
        tmp[15:0] := src[15:0] - tmp[15:0]
        IF IsInf(tmp[15:0])
                tmp[15:0] := FP16(0.0)
        FI
        RETURN tmp[15:0]
}
IF k[0]
        dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8)
ELSE
        dst.fp16[0] := src.fp16[0]
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_maskz_reduce_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__mmask8 k, __m128h a, __m128h b, int imm8

Param ETypes:

MASK k, FP16 a, FP16 b, IMM imm8

__m128h _mm_maskz_reduce_sh(__mmask8 k, __m128h a,
                            __m128h b, int imm8)

Intel Description

Extract the reduced argument of the lower half-precision (16-bit) floating-point element in “b” by the number of bits specified by “imm8”, store the result in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [round_imm_note]

Intel Implementation Psudeo-Code

DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) {
        m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved
        tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0])
        tmp[15:0] := src[15:0] - tmp[15:0]
        IF IsInf(tmp[15:0])
                tmp[15:0] := FP16(0.0)
        FI
        RETURN tmp[15:0]
}
IF k[0]
        dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8)
ELSE
        dst.fp16[0] := 0
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0

_mm_maskz_reduce_round_sh#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-XMM

Register:

XMM 128 bit

Return Type:

__m128h

Param Types:

__mmask8 k, __m128h a, __m128h b, int imm8, const int sae

Param ETypes:

MASK k, FP16 a, FP16 b, IMM imm8, IMM sae

__m128h _mm_maskz_reduce_round_sh(__mmask8 k, __m128h a,
                                  __m128h b, int imm8,
                                  const int sae)

Intel Description

Extract the reduced argument of the lower half-precision (16-bit) floating-point element in “b” by the number of bits specified by “imm8”, store the result in the lower element of “dst” using zeromask “k” (the element is zeroed out when mask bit 0 is not set), and copy the upper 7 packed elements from “a” to the upper elements of “dst”. [round_imm_note][sae_note]

Intel Implementation Psudeo-Code

DEFINE ReduceArgumentFP16(src[15:0], imm8[7:0]) {
        m[15:0] := FP16(imm8[7:4]) // number of fraction bits after the binary point to be preserved
        tmp[15:0] := POW(2.0, FP16(-m)) * ROUND(POW(2.0, FP16(m)) * src[15:0], imm8[3:0])
        tmp[15:0] := src[15:0] - tmp[15:0]
        IF IsInf(tmp[15:0])
                tmp[15:0] := FP16(0.0)
        FI
        RETURN tmp[15:0]
}
IF k[0]
        dst.fp16[0] := ReduceArgumentFP16(b.fp16[0], imm8)
ELSE
        dst.fp16[0] := 0
FI
dst[127:16] := a[127:16]
dst[MAX:128] := 0