AVX_ALL-Cryptography-YMM#

_mm256_sha512msg1_epi64#

Tech:

AVX_ALL

Category:

Cryptography

Header:

immintrin.h

Searchable:

AVX_ALL-Cryptography-YMM

Register:

YMM 256 bit

Return Type:

__m256i

Param Types:

__m256i __A, __m128i __B

Param ETypes:

UI64 __A, UI64 __B

__m256i _mm256_sha512msg1_epi64(__m256i __A, __m128i __B);

Intel Description

This intrinisc is one of the two SHA512 message scheduling instructions. The intrinsic performs an intermediate calculation for the next four SHA512 message qwords. The calculated results are stored in “dst”.

Intel Implementation Psudeo-Code

DEFINE ROR64(qword, n) {
        count := n % 64
        dest := (qword >> count) | (qword << (64 - count))
        RETURN dest
}
DEFINE SHR64(qword, n) {
        RETURN qword >> n
}
DEFINE s0(qword) {
        RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7)
}
W.qword[4] := __B.qword[0]
W.qword[3] := __A.qword[3]
W.qword[2] := __A.qword[2]
W.qword[1] := __A.qword[1]
W.qword[0] := __A.qword[0]
dst.qword[3] := W.qword[3] + s0(W.qword[4])
dst.qword[2] := W.qword[2] + s0(W.qword[3])
dst.qword[1] := W.qword[1] + s0(W.qword[2])
dst.qword[0] := W.qword[0] + s0(W.qword[1])

_mm256_sha512msg2_epi64#

Tech:

AVX_ALL

Category:

Cryptography

Header:

immintrin.h

Searchable:

AVX_ALL-Cryptography-YMM

Register:

YMM 256 bit

Return Type:

__m256i

Param Types:

__m256i __A, __m256i __B

Param ETypes:

UI64 __A, UI64 __B

__m256i _mm256_sha512msg2_epi64(__m256i __A, __m256i __B);

Intel Description

This intrinisc is one of the two SHA512 message scheduling instructions. The intrinsic performs the final calculation for the next four SHA512 message qwords. The calculated results are stored in “dst”.

Intel Implementation Psudeo-Code

DEFINE ROR64(qword, n) {
        count := n % 64
        dest := (qword >> count) | (qword << (64 - count))
        RETURN dest
}
DEFINE SHR64(qword, n) {
        RETURN qword >> n
}
DEFINE s1(qword) {
        RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6)
}
W.qword[14] := __B.qword[2]
W.qword[15] := __B.qword[3]
W.qword[16] := __A.qword[0] + s1(W.qword[14])
W.qword[17] := __A.qword[1] + s1(W.qword[15])
W.qword[18] := __A.qword[2] + s1(W.qword[16])
W.qword[19] := __A.qword[3] + s1(W.qword[17])
dst.qword[3] := W.qword[19]
dst.qword[2] := W.qword[18]
dst.qword[1] := W.qword[17]
dst.qword[0] := W.qword[16]

_mm256_sha512rnds2_epi64#

Tech:

AVX_ALL

Category:

Cryptography

Header:

immintrin.h

Searchable:

AVX_ALL-Cryptography-YMM

Register:

YMM 256 bit

Return Type:

__m256i

Param Types:

__m256i __A, __m256i __B, __m128i __C

Param ETypes:

UI64 __A, UI64 __B, UI64 __C

__m256i _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B,
                                 __m128i __C)

Intel Description

This intrinisc performs two rounds of SHA512 operation using initial SHA512 state (C,D,G,H) from “__A”, an initial SHA512 state (A,B,E,F) from “__B”, and a pre-computed sum of the next two round message qwords and the corresponding round constants from “__C” (only the two lower qwords of the third operand). The updated SHA512 state (A,B,E,F) is written to “dst”, and “dst” can be used as the updated state (C,D,G,H) in later rounds.

Intel Implementation Psudeo-Code

DEFINE ROR64(qword, n) {
        count := n % 64
        dest := (qword >> count) | (qword << (64 - count))
        RETURN dest
}
DEFINE SHR64(qword, n) {
        RETURN qword >> n
}
DEFINE cap_sigma0(qword) {
        RETURN ROR64(qword, 28) ^ ROR64(qword, 34) ^ ROR64(qword, 39)
}
DEFINE cap_sigma1(qword) {
        RETURN ROR64(qword, 14) ^ ROR64(qword, 18) ^ ROR64(qword, 41)
}
DEFINE MAJ(a,b,c) {
        RETURN (a & b) ^ (a & c) ^ (b & c)
}
DEFINE CH(a,b,c) {
        RETURN (a & b) ^ (c & ~a)
}
A.qword[0] := __B.qword[3]
B.qword[0] := __B.qword[2]
C.qword[0] := __A.qword[3]
D.qword[0] := __A.qword[2]
E.qword[0] := __B.qword[1]
F.qword[0] := __B.qword[0]
G.qword[0] := __A.qword[1]
H.qword[0] := __A.qword[0]
WK.qword[0]:= __C.qword[0]
WK.qword[1]:= __C.qword[1]
FOR i := 0 to 1
        A.qword[i+1] := CH(E.qword[i], F.qword[i], G.qword[i]) + cap_sigma1(E.qword[i]) + WK.qword[i] + H.qword[i] + MAJ(A.qword[i], B.qword[i], C.qword[i]) + cap_sigma0(A.qword[i])
        B.qword[i+1] := A.qword[i]
        C.qword[i+1] := B.qword[i]
        D.qword[i+1] := C.qword[i]
        E.qword[i+1] := CH(E.qword[i], F.qword[i], G.qword[i]) + cap_sigma1(E.qword[i]) + WK.qword[i] + H.qword[i] + D.qword[i]
        F.qword[i+1] := E.qword[i]
        G.qword[i+1] := F.qword[i]
        H.qword[i+1] := G.qword[i]
ENDFOR
dst.qword[3] := A.qword[2]
dst.qword[2] := B.qword[2]
dst.qword[1] := E.qword[2]
dst.qword[0] := F.qword[2]

_mm256_sm4key4_epi32#

Tech:

AVX_ALL

Category:

Cryptography

Header:

immintrin.h

Searchable:

AVX_ALL-Cryptography-YMM

Register:

YMM 256 bit

Return Type:

__m256i

Param Types:

__m256i __A, __m256i __B

Param ETypes:

UI32 __A, UI32 __B

__m256i _mm256_sm4key4_epi32(__m256i __A, __m256i __B);

Intel Description

This intrinsic performs four rounds of SM4 key expansion. The intrinsic operates on independent 128-bit lanes. The calculated results are stored in “dst”.

Intel Implementation Psudeo-Code

BYTE sbox[256] = {
0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05,
0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62,
0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6,
0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8,
0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35,
0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87,
0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E,
0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1,
0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3,
0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F,
0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51,
0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8,
0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0,
0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48
}
DEFINE ROL32(dword, n) {
        count := n % 32
        dest := (dword << count) | (dword >> (32-count))
        RETURN dest
}
DEFINE SBOX_BYTE(dword, i) {
        RETURN sbox[dword.byte[i]]
}
DEFINE lower_t(dword) {
        tmp.byte[0] := SBOX_BYTE(dword, 0)
        tmp.byte[1] := SBOX_BYTE(dword, 1)
        tmp.byte[2] := SBOX_BYTE(dword, 2)
        tmp.byte[3] := SBOX_BYTE(dword, 3)
        RETURN tmp
}
DEFINE L_KEY(dword) {
        RETURN dword ^ ROL32(dword, 13) ^ ROL32(dword, 23)
}
DEFINE T_KEY(dword) {
        RETURN L_KEY(lower_t(dword))
}
DEFINE F_KEY(X0, X1, X2, X3, round_key) {
        RETURN X0 ^ T_KEY(X1 ^ X2 ^ X3 ^ round_key)
}
FOR i:= 0 to 1
        P.dword[0] := __A.dword[4*i]
        P.dword[1] := __A.dword[4*i+1]
        P.dword[2] := __A.dword[4*i+2]
        P.dword[3] := __A.dword[4*i+3]
        C.dword[0] := F_KEY(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[4*i])
        C.dword[1] := F_KEY(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[4*i+1])
        C.dword[2] := F_KEY(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[4*i+2])
        C.dword[3] := F_KEY(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[4*i+3])
        dst.dword[4*i] := C.dword[0]
        dst.dword[4*i+1] := C.dword[1]
        dst.dword[4*i+2] := C.dword[2]
        dst.dword[4*i+3] := C.dword[3]
ENDFOR
dst[MAX:256] := 0

_mm256_sm4rnds4_epi32#

Tech:

AVX_ALL

Category:

Cryptography

Header:

immintrin.h

Searchable:

AVX_ALL-Cryptography-YMM

Register:

YMM 256 bit

Return Type:

__m256i

Param Types:

__m256i __A, __m256i __B

Param ETypes:

UI32 __A, UI32 __B

__m256i _mm256_sm4rnds4_epi32(__m256i __A, __m256i __B);

Intel Description

This intrinisc performs four rounds of SM4 encryption. The intrinisc operates on independent 128-bit lanes. The calculated results are stored in “dst”.

Intel Implementation Psudeo-Code

BYTE sbox[256] = {
0xD6, 0x90, 0xE9, 0xFE, 0xCC, 0xE1, 0x3D, 0xB7, 0x16, 0xB6, 0x14, 0xC2, 0x28, 0xFB, 0x2C, 0x05,
0x2B, 0x67, 0x9A, 0x76, 0x2A, 0xBE, 0x04, 0xC3, 0xAA, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
0x9C, 0x42, 0x50, 0xF4, 0x91, 0xEF, 0x98, 0x7A, 0x33, 0x54, 0x0B, 0x43, 0xED, 0xCF, 0xAC, 0x62,
0xE4, 0xB3, 0x1C, 0xA9, 0xC9, 0x08, 0xE8, 0x95, 0x80, 0xDF, 0x94, 0xFA, 0x75, 0x8F, 0x3F, 0xA6,
0x47, 0x07, 0xA7, 0xFC, 0xF3, 0x73, 0x17, 0xBA, 0x83, 0x59, 0x3C, 0x19, 0xE6, 0x85, 0x4F, 0xA8,
0x68, 0x6B, 0x81, 0xB2, 0x71, 0x64, 0xDA, 0x8B, 0xF8, 0xEB, 0x0F, 0x4B, 0x70, 0x56, 0x9D, 0x35,
0x1E, 0x24, 0x0E, 0x5E, 0x63, 0x58, 0xD1, 0xA2, 0x25, 0x22, 0x7C, 0x3B, 0x01, 0x21, 0x78, 0x87,
0xD4, 0x00, 0x46, 0x57, 0x9F, 0xD3, 0x27, 0x52, 0x4C, 0x36, 0x02, 0xE7, 0xA0, 0xC4, 0xC8, 0x9E,
0xEA, 0xBF, 0x8A, 0xD2, 0x40, 0xC7, 0x38, 0xB5, 0xA3, 0xF7, 0xF2, 0xCE, 0xF9, 0x61, 0x15, 0xA1,
0xE0, 0xAE, 0x5D, 0xA4, 0x9B, 0x34, 0x1A, 0x55, 0xAD, 0x93, 0x32, 0x30, 0xF5, 0x8C, 0xB1, 0xE3,
0x1D, 0xF6, 0xE2, 0x2E, 0x82, 0x66, 0xCA, 0x60, 0xC0, 0x29, 0x23, 0xAB, 0x0D, 0x53, 0x4E, 0x6F,
0xD5, 0xDB, 0x37, 0x45, 0xDE, 0xFD, 0x8E, 0x2F, 0x03, 0xFF, 0x6A, 0x72, 0x6D, 0x6C, 0x5B, 0x51,
0x8D, 0x1B, 0xAF, 0x92, 0xBB, 0xDD, 0xBC, 0x7F, 0x11, 0xD9, 0x5C, 0x41, 0x1F, 0x10, 0x5A, 0xD8,
0x0A, 0xC1, 0x31, 0x88, 0xA5, 0xCD, 0x7B, 0xBD, 0x2D, 0x74, 0xD0, 0x12, 0xB8, 0xE5, 0xB4, 0xB0,
0x89, 0x69, 0x97, 0x4A, 0x0C, 0x96, 0x77, 0x7E, 0x65, 0xB9, 0xF1, 0x09, 0xC5, 0x6E, 0xC6, 0x84,
0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E, 0xD7, 0xCB, 0x39, 0x48
}
DEFINE ROL32(dword, n) {
        count := n % 32
        dest := (dword << count) | (dword >> (32-count))
        RETURN dest
}
DEFINE SBOX_BYTE(dword, i) {
        RETURN sbox[dword.byte[i]]
}
DEFINE lower_t(dword) {
        tmp.byte[0] := SBOX_BYTE(dword, 0)
        tmp.byte[1] := SBOX_BYTE(dword, 1)
        tmp.byte[2] := SBOX_BYTE(dword, 2)
        tmp.byte[3] := SBOX_BYTE(dword, 3)
        RETURN tmp
}
DEFINE L_RND(dword) {
        tmp := dword
        tmp := tmp ^ ROL32(dword, 2)
        tmp := tmp ^ ROL32(dword, 10)
        tmp := tmp ^ ROL32(dword, 18)
        tmp := tmp ^ ROL32(dword, 24)
        RETURN tmp
}
DEFINE T_RND(dword) {
        RETURN L_RND(lower_t(dword))
}
DEFINE F_RND(X0, X1, X2, X3, round_key) {
        RETURN X0 ^ T_RND(X1 ^ X2 ^ X3 ^ round_key)
}
FOR i:= 0 to 1
        P.dword[0] := __A.dword[4*i]
        P.dword[1] := __A.dword[4*i+1]
        P.dword[2] := __A.dword[4*i+2]
        P.dword[3] := __A.dword[4*i+3]
        C.dword[0] := F_RND(P.dword[0], P.dword[1], P.dword[2], P.dword[3], __B.dword[4*i])
        C.dword[1] := F_RND(P.dword[1], P.dword[2], P.dword[3], C.dword[0], __B.dword[4*i+1])
        C.dword[2] := F_RND(P.dword[2], P.dword[3], C.dword[0], C.dword[1], __B.dword[4*i+2])
        C.dword[3] := F_RND(P.dword[3], C.dword[0], C.dword[1], C.dword[2], __B.dword[4*i+3])
        dst.dword[4*i] := C.dword[0]
        dst.dword[4*i+1] := C.dword[1]
        dst.dword[4*i+2] := C.dword[2]
        dst.dword[4*i+3] := C.dword[3]
ENDFOR
dst[MAX:256] := 0