AVX-512-Special Math Functions-YMM#

_mm256_reduce_max_epi16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

short

Param Types:

__m256i a

Param ETypes:

SI16 a

short _mm256_reduce_max_epi16(__m256i a);

Intel Description

Reduce the packed signed 16-bit integers in “a” by maximum. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MAX(src[16*len-1:0], len)
}
dst[15:0] := REDUCE_MAX(a, 16)

_mm256_mask_reduce_max_epi16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

short

Param Types:

__mmask16 k, __m256i a

Param ETypes:

MASK k, SI16 a

short _mm256_mask_reduce_max_epi16(__mmask16 k, __m256i a);

Intel Description

Reduce the packed signed 16-bit integers in “a” by maximum using mask “k”. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MAX(src[16*len-1:0], len)
}
tmp := a
FOR j := 0 to 15
        i := j*16
        IF k[j]
                tmp[i+15:i] := a[i+15:i]
        ELSE
                tmp[i+15:i] := Int16(-0x8000)
        FI
ENDFOR
dst[15:0] := REDUCE_MAX(tmp, 16)

_mm256_reduce_max_epi8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

char

Param Types:

__m256i a

Param ETypes:

SI8 a

char _mm256_reduce_max_epi8(__m256i a);

Intel Description

Reduce the packed signed 8-bit integers in “a” by maximum. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MAX(src[8*len-1:0], len)
}
dst[7:0] := REDUCE_MAX(a, 32)

_mm256_mask_reduce_max_epi8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

char

Param Types:

__mmask32 k, __m256i a

Param ETypes:

MASK k, SI8 a

char _mm256_mask_reduce_max_epi8(__mmask32 k, __m256i a);

Intel Description

Reduce the packed signed 8-bit integers in “a” by maximum using mask “k”. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MAX(src[8*len-1:0], len)
}
tmp := a
FOR j := 0 to 31
        i := j*8
        IF k[j]
                tmp[i+7:i] := a[i+7:i]
        ELSE
                tmp[i+7:i] := Int8(-0x80)
        FI
ENDFOR
dst[7:0] := REDUCE_MAX(tmp, 32)

_mm256_reduce_max_epu16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

unsigned short

Param Types:

__m256i a

Param ETypes:

UI16 a

unsigned short _mm256_reduce_max_epu16(__m256i a);

Intel Description

Reduce the packed unsigned 16-bit integers in “a” by maximum. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MAX(src[16*len-1:0], len)
}
dst[15:0] := REDUCE_MAX(a, 16)

_mm256_mask_reduce_max_epu16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

unsigned short

Param Types:

__mmask16 k, __m256i a

Param ETypes:

MASK k, UI16 a

unsigned short _mm256_mask_reduce_max_epu16(__mmask16 k, __m256i a);

Intel Description

Reduce the packed unsigned 16-bit integers in “a” by maximum using mask “k”. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[15:0] > src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] > src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MAX(src[16*len-1:0], len)
}
tmp := a
FOR j := 0 to 15
        i := j*16
        IF k[j]
                tmp[i+15:i] := a[i+15:i]
        ELSE
                tmp[i+15:i] := 0
        FI
ENDFOR
dst[15:0] := REDUCE_MAX(tmp, 16)

_mm256_reduce_max_epu8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

unsigned char

Param Types:

__m256i a

Param ETypes:

UI8 a

unsigned char _mm256_reduce_max_epu8(__m256i a);

Intel Description

Reduce the packed unsigned 8-bit integers in “a” by maximum. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MAX(src[8*len-1:0], len)
}
dst[7:0] := REDUCE_MAX(a, 32)

_mm256_mask_reduce_max_epu8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

unsigned char

Param Types:

__mmask32 k, __m256i a

Param ETypes:

MASK k, UI8 a

unsigned char _mm256_mask_reduce_max_epu8(__mmask32 k, __m256i a);

Intel Description

Reduce the packed unsigned 8-bit integers in “a” by maximum using mask “k”. Returns the maximum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MAX(src, len) {
        IF len == 2
                RETURN (src[7:0] > src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] > src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MAX(src[8*len-1:0], len)
}
tmp := a
FOR j := 0 to 31
        i := j*8
        IF k[j]
                tmp[i+7:i] := a[i+7:i]
        ELSE
                tmp[i+7:i] := 0
        FI
ENDFOR
dst[7:0] := REDUCE_MAX(tmp, 32)

_mm256_reduce_min_epi16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

short

Param Types:

__m256i a

Param ETypes:

SI16 a

short _mm256_reduce_min_epi16(__m256i a);

Intel Description

Reduce the packed signed 16-bit integers in “a” by minimum. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MIN(src[16*len-1:0], len)
}
dst[15:0] := REDUCE_MIN(a, 16)

_mm256_mask_reduce_min_epi16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

short

Param Types:

__mmask16 k, __m256i a

Param ETypes:

MASK k, SI16 a

short _mm256_mask_reduce_min_epi16(__mmask16 k, __m256i a);

Intel Description

Reduce the packed signed 16-bit integers in “a” by minimum using mask “k”. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MIN(src[16*len-1:0], len)
}
tmp := a
FOR j := 0 to 15
        i := j*16
        IF k[j]
                tmp[i+15:i] := a[i+15:i]
        ELSE
                tmp[i+15:i] := Int16(0x7FFF)
        FI
ENDFOR
dst[15:0] := REDUCE_MIN(tmp, 16)

_mm256_reduce_min_epi8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

char

Param Types:

__m256i a

Param ETypes:

SI8 a

char _mm256_reduce_min_epi8(__m256i a);

Intel Description

Reduce the packed signed 8-bit integers in “a” by minimum. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MIN(src[8*len-1:0], len)
}
dst[7:0] := REDUCE_MIN(a, 32)

_mm256_mask_reduce_min_epi8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

char

Param Types:

__mmask32 k, __m256i a

Param ETypes:

MASK k, SI8 a

char _mm256_mask_reduce_min_epi8(__mmask32 k, __m256i a);

Intel Description

Reduce the packed signed 8-bit integers in “a” by minimum using mask “k”. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MIN(src[8*len-1:0], len)
}
tmp := a
FOR j := 0 to 31
        i := j*8
        IF k[j]
                tmp[i+7:i] := a[i+7:i]
        ELSE
                tmp[i+7:i] := Int8(0x7F)
        FI
ENDFOR
dst[7:0] := REDUCE_MIN(tmp, 32)

_mm256_reduce_min_epu16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

unsigned short

Param Types:

__m256i a

Param ETypes:

UI16 a

unsigned short _mm256_reduce_min_epu16(__m256i a);

Intel Description

Reduce the packed unsigned 16-bit integers in “a” by minimum. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MIN(src[16*len-1:0], len)
}
dst[15:0] := REDUCE_MIN(a, 16)

_mm256_mask_reduce_min_epu16#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

unsigned short

Param Types:

__mmask16 k, __m256i a

Param ETypes:

MASK k, UI16 a

unsigned short _mm256_mask_reduce_min_epu16(__mmask16 k, __m256i a);

Intel Description

Reduce the packed unsigned 16-bit integers in “a” by minimum using mask “k”. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[15:0] < src[31:16] ? src[15:0] : src[31:16])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*16
                src[i+15:i] := (src[i+15:i] < src[i+16*len+15:i+16*len] ? src[i+15:i] : src[i+16*len+15:i+16*len])
        ENDFOR
        RETURN REDUCE_MIN(src[16*len-1:0], len)
}
tmp := a
FOR j := 0 to 15
        i := j*16
        IF k[j]
                tmp[i+15:i] := a[i+15:i]
        ELSE
                tmp[i+15:i] := 0xFFFF
        FI
ENDFOR
dst[15:0] := REDUCE_MIN(tmp, 16)

_mm256_reduce_min_epu8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

unsigned char

Param Types:

__m256i a

Param ETypes:

UI8 a

unsigned char _mm256_reduce_min_epu8(__m256i a);

Intel Description

Reduce the packed unsigned 8-bit integers in “a” by minimum. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MIN(src[8*len-1:0], len)
}
dst[7:0] := REDUCE_MIN(a, 32)

_mm256_mask_reduce_min_epu8#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

unsigned char

Param Types:

__mmask32 k, __m256i a

Param ETypes:

MASK k, UI8 a

unsigned char _mm256_mask_reduce_min_epu8(__mmask32 k, __m256i a);

Intel Description

Reduce the packed unsigned 8-bit integers in “a” by minimum using mask “k”. Returns the minimum of all active elements in “a”.

Intel Implementation Psudeo-Code

DEFINE REDUCE_MIN(src, len) {
        IF len == 2
                RETURN (src[7:0] < src[15:8] ? src[7:0] : src[15:8])
        FI
        len := len / 2
        FOR j:= 0 to (len-1)
                i := j*8
                src[i+7:i] := (src[i+7:i] < src[i+8*len+7:i+8*len] ? src[i+7:i] : src[i+8*len+7:i+8*len])
        ENDFOR
        RETURN REDUCE_MIN(src[8*len-1:0], len)
}
tmp := a
FOR j := 0 to 15
        i := j*8
        IF k[j]
                tmp[i+7:i] := a[i+7:i]
        ELSE
                tmp[i+7:i] := 0xFF
        FI
ENDFOR
dst[7:0] := REDUCE_MIN(tmp, 16)

_mm256_max_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

__m256h

Param Types:

__m256h a, __m256h b

Param ETypes:

FP16 a, FP16 b

__m256h _mm256_max_ph(__m256h a, __m256h b);

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed maximum values in “dst”. [max_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j])
ENDFOR
dst[MAX:256] := 0

_mm256_mask_max_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

__m256h

Param Types:

__m256h src, __mmask16 k, __m256h a, __m256h b

Param ETypes:

FP16 src, MASK k, FP16 a, FP16 b

__m256h _mm256_mask_max_ph(__m256h src, __mmask16 k,
                           __m256h a, __m256h b)

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed maximum values in “dst” using writemask “k” (elements are copied from “src” when the corresponding mask bit is not set). [max_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        IF k[j]
                dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j])
        ELSE
                dst.fp16[j] := src.fp16[j]
        FI
ENDFOR
dst[MAX:256] := 0

_mm256_maskz_max_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

__m256h

Param Types:

__mmask16 k, __m256h a, __m256h b

Param ETypes:

MASK k, FP16 a, FP16 b

__m256h _mm256_maskz_max_ph(__mmask16 k, __m256h a,
                            __m256h b)

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed maximum values in “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set). [max_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        IF k[j]
                dst.fp16[j] := (a.fp16[j] > b.fp16[j] ? a.fp16[j] : b.fp16[j])
        ELSE
                dst.fp16[j] := 0
        FI
ENDFOR
dst[MAX:256] := 0

_mm256_min_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

__m256h

Param Types:

__m256h a, __m256h b

Param ETypes:

FP16 a, FP16 b

__m256h _mm256_min_ph(__m256h a, __m256h b);

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed minimum values in “dst”. [min_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j])
ENDFOR
dst[MAX:256] := 0

_mm256_mask_min_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

__m256h

Param Types:

__m256h src, __mmask16 k, __m256h a, __m256h b

Param ETypes:

FP16 src, MASK k, FP16 a, FP16 b

__m256h _mm256_mask_min_ph(__m256h src, __mmask16 k,
                           __m256h a, __m256h b)

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed minimum values in “dst” using writemask “k” (elements are copied from “src” when the corresponding mask bit is not set). [min_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        IF k[j]
                dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j])
        ELSE
                dst.fp16[j] := src.fp16[j]
        FI
ENDFOR
dst[MAX:256] := 0

_mm256_maskz_min_ph#

Tech:

AVX-512

Category:

Special Math Functions

Header:

immintrin.h

Searchable:

AVX-512-Special Math Functions-YMM

Register:

YMM 256 bit

Return Type:

__m256h

Param Types:

__mmask16 k, __m256h a, __m256h b

Param ETypes:

MASK k, FP16 a, FP16 b

__m256h _mm256_maskz_min_ph(__mmask16 k, __m256h a,
                            __m256h b)

Intel Description

Compare packed half-precision (16-bit) floating-point elements in “a” and “b”, and store packed minimum values in “dst” using zeromask “k” (elements are zeroed out when the corresponding mask bit is not set). [min_float_note]

Intel Implementation Psudeo-Code

FOR j := 0 to 15
        IF k[j]
                dst.fp16[j] := (a.fp16[j] < b.fp16[j] ? a.fp16[j] : b.fp16[j])
        ELSE
                dst.fp16[j] := 0
        FI
ENDFOR
dst[MAX:256] := 0