RetroZilla/security/nss/lib/freebl/intel-aes-x86-masm.asm
roytam1 30d33aa8e8 cherry-picked mozilla NSS upstream changes (to rev f7a4c771997e, which is on par with 3.16.1 but without windows rand() changes):
9934c8faef29, 3c3b381c4865, 5a67f6beee9a, 1b1eb6d77728, a8b668fd72f7, bug962760, bug743700, bug857304, bug972653, bug972450, bug971358, bug903885, bug977073, bug976111, bug949939, bug947653, bug947572, bug903885, bug979106, bug966596, bug979004, bug979752, bug980848, bug938369, bug981170, bug668130, bug974693, bug975056, bug979132, bug370717, bug979070, bug985070, bug900067, bug977673, bug519255, bug989558, bug557299, bug987263, bug369802, a751a5146718, bug992343, bug952572, bug979703, bug994883, bug994869, bug993489, bug984608, bug977869, bug667371, bug672828, bug793347, bug977869
2018-07-14 21:22:29 +08:00

950 lines
20 KiB
NASM

; LICENSE:
; This submission to NSS is to be made available under the terms of the
; Mozilla Public License, v. 2.0. You can obtain one at http:
; //mozilla.org/MPL/2.0/.
;###############################################################################
; Copyright(c) 2014, Intel Corp.
; Developers and authors:
; Shay Gueron and Vlad Krasnov
; Intel Corporation, Israel Development Centre, Haifa, Israel
; Please send feedback directly to crypto.feedback.alias@intel.com
.MODEL FLAT, C
.XMM
.DATA
ALIGN 16
Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
Lcon1 dd 1,1,1,1
Lcon2 dd 1bh,1bh,1bh,1bh
.CODE
ctx textequ <ecx>
output textequ <edx>
input textequ <eax>
inputLen textequ <edi>
aes_rnd MACRO i
movdqu xmm7, [i*16 + ctx]
aesenc xmm0, xmm7
aesenc xmm1, xmm7
aesenc xmm2, xmm7
aesenc xmm3, xmm7
aesenc xmm4, xmm7
aesenc xmm5, xmm7
aesenc xmm6, xmm7
ENDM
aes_last_rnd MACRO i
movdqu xmm7, [i*16 + ctx]
aesenclast xmm0, xmm7
aesenclast xmm1, xmm7
aesenclast xmm2, xmm7
aesenclast xmm3, xmm7
aesenclast xmm4, xmm7
aesenclast xmm5, xmm7
aesenclast xmm6, xmm7
ENDM
aes_dec_rnd MACRO i
movdqu xmm7, [i*16 + ctx]
aesdec xmm0, xmm7
aesdec xmm1, xmm7
aesdec xmm2, xmm7
aesdec xmm3, xmm7
aesdec xmm4, xmm7
aesdec xmm5, xmm7
aesdec xmm6, xmm7
ENDM
aes_dec_last_rnd MACRO i
movdqu xmm7, [i*16 + ctx]
aesdeclast xmm0, xmm7
aesdeclast xmm1, xmm7
aesdeclast xmm2, xmm7
aesdeclast xmm3, xmm7
aesdeclast xmm4, xmm7
aesdeclast xmm5, xmm7
aesdeclast xmm6, xmm7
ENDM
gen_aes_ecb_func MACRO enc, rnds
LOCAL loop7
LOCAL loop1
LOCAL bail
push inputLen
mov ctx, [esp + 2*4 + 0*4]
mov output, [esp + 2*4 + 1*4]
mov input, [esp + 2*4 + 4*4]
mov inputLen, [esp + 2*4 + 5*4]
lea ctx, [44+ctx]
loop7:
cmp inputLen, 7*16
jb loop1
movdqu xmm0, [0*16 + input]
movdqu xmm1, [1*16 + input]
movdqu xmm2, [2*16 + input]
movdqu xmm3, [3*16 + input]
movdqu xmm4, [4*16 + input]
movdqu xmm5, [5*16 + input]
movdqu xmm6, [6*16 + input]
movdqu xmm7, [0*16 + ctx]
pxor xmm0, xmm7
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm3, xmm7
pxor xmm4, xmm7
pxor xmm5, xmm7
pxor xmm6, xmm7
IF enc eq 1
rnd textequ <aes_rnd>
lastrnd textequ <aes_last_rnd>
aesinst textequ <aesenc>
aeslastinst textequ <aesenclast>
ELSE
rnd textequ <aes_dec_rnd>
lastrnd textequ <aes_dec_last_rnd>
aesinst textequ <aesdec>
aeslastinst textequ <aesdeclast>
ENDIF
i = 1
WHILE i LT rnds
rnd i
i = i+1
ENDM
lastrnd rnds
movdqu [0*16 + output], xmm0
movdqu [1*16 + output], xmm1
movdqu [2*16 + output], xmm2
movdqu [3*16 + output], xmm3
movdqu [4*16 + output], xmm4
movdqu [5*16 + output], xmm5
movdqu [6*16 + output], xmm6
lea input, [7*16 + input]
lea output, [7*16 + output]
sub inputLen, 7*16
jmp loop7
loop1:
cmp inputLen, 1*16
jb bail
movdqu xmm0, [input]
movdqu xmm7, [0*16 + ctx]
pxor xmm0, xmm7
i = 1
WHILE i LT rnds
movdqu xmm7, [i*16 + ctx]
aesinst xmm0, xmm7
i = i+1
ENDM
movdqu xmm7, [rnds*16 + ctx]
aeslastinst xmm0, xmm7
movdqu [output], xmm0
lea input, [1*16 + input]
lea output, [1*16 + output]
sub inputLen, 1*16
jmp loop1
bail:
xor eax, eax
pop inputLen
ret
ENDM
ALIGN 16
intel_aes_encrypt_ecb_128 PROC
gen_aes_ecb_func 1, 10
intel_aes_encrypt_ecb_128 ENDP
ALIGN 16
intel_aes_encrypt_ecb_192 PROC
gen_aes_ecb_func 1, 12
intel_aes_encrypt_ecb_192 ENDP
ALIGN 16
intel_aes_encrypt_ecb_256 PROC
gen_aes_ecb_func 1, 14
intel_aes_encrypt_ecb_256 ENDP
ALIGN 16
intel_aes_decrypt_ecb_128 PROC
gen_aes_ecb_func 0, 10
intel_aes_decrypt_ecb_128 ENDP
ALIGN 16
intel_aes_decrypt_ecb_192 PROC
gen_aes_ecb_func 0, 12
intel_aes_decrypt_ecb_192 ENDP
ALIGN 16
intel_aes_decrypt_ecb_256 PROC
gen_aes_ecb_func 0, 14
intel_aes_decrypt_ecb_256 ENDP
KEY textequ <ecx>
KS textequ <edx>
ITR textequ <eax>
ALIGN 16
intel_aes_encrypt_init_128 PROC
mov KEY, [esp + 1*4 + 0*4]
mov KS, [esp + 1*4 + 1*4]
movdqu xmm1, [KEY]
movdqu [KS], xmm1
movdqa xmm2, xmm1
lea ITR, Lcon1
movdqa xmm0, [ITR]
lea ITR, Lmask
movdqa xmm4, [ITR]
mov ITR, 8
Lenc_128_ks_loop:
lea KS, [16 + KS]
dec ITR
pshufb xmm2, xmm4
aesenclast xmm2, xmm0
pslld xmm0, 1
movdqa xmm3, xmm1
pslldq xmm3, 4
pxor xmm1, xmm3
pslldq xmm3, 4
pxor xmm1, xmm3
pslldq xmm3, 4
pxor xmm1, xmm3
pxor xmm1, xmm2
movdqu [KS], xmm1
movdqa xmm2, xmm1
jne Lenc_128_ks_loop
lea ITR, Lcon2
movdqa xmm0, [ITR]
pshufb xmm2, xmm4
aesenclast xmm2, xmm0
pslld xmm0, 1
movdqa xmm3, xmm1
pslldq xmm3, 4
pxor xmm1, xmm3
pslldq xmm3, 4
pxor xmm1, xmm3
pslldq xmm3, 4
pxor xmm1, xmm3
pxor xmm1, xmm2
movdqu [16 + KS], xmm1
movdqa xmm2, xmm1
pshufb xmm2, xmm4
aesenclast xmm2, xmm0
movdqa xmm3, xmm1
pslldq xmm3, 4
pxor xmm1, xmm3
pslldq xmm3, 4
pxor xmm1, xmm3
pslldq xmm3, 4
pxor xmm1, xmm3
pxor xmm1, xmm2
movdqu [32 + KS], xmm1
movdqa xmm2, xmm1
ret
intel_aes_encrypt_init_128 ENDP
ALIGN 16
intel_aes_decrypt_init_128 PROC
mov KEY, [esp + 1*4 + 0*4]
mov KS, [esp + 1*4 + 1*4]
push KS
push KEY
call intel_aes_encrypt_init_128
pop KEY
pop KS
movdqu xmm0, [0*16 + KS]
movdqu xmm1, [10*16 + KS]
movdqu [10*16 + KS], xmm0
movdqu [0*16 + KS], xmm1
i = 1
WHILE i LT 5
movdqu xmm0, [i*16 + KS]
movdqu xmm1, [(10-i)*16 + KS]
aesimc xmm0, xmm0
aesimc xmm1, xmm1
movdqu [(10-i)*16 + KS], xmm0
movdqu [i*16 + KS], xmm1
i = i+1
ENDM
movdqu xmm0, [5*16 + KS]
aesimc xmm0, xmm0
movdqu [5*16 + KS], xmm0
ret
intel_aes_decrypt_init_128 ENDP
ALIGN 16
intel_aes_encrypt_init_192 PROC
mov KEY, [esp + 1*4 + 0*4]
mov KS, [esp + 1*4 + 1*4]
pxor xmm3, xmm3
movdqu xmm1, [KEY]
pinsrd xmm3, DWORD PTR [16 + KEY], 0
pinsrd xmm3, DWORD PTR [20 + KEY], 1
movdqu [KS], xmm1
movdqa xmm5, xmm3
lea ITR, Lcon1
movdqu xmm0, [ITR]
lea ITR, Lmask192
movdqu xmm4, [ITR]
mov ITR, 4
Lenc_192_ks_loop:
movdqa xmm2, xmm3
pshufb xmm2, xmm4
aesenclast xmm2, xmm0
pslld xmm0, 1
movdqa xmm6, xmm1
movdqa xmm7, xmm3
pslldq xmm6, 4
pslldq xmm7, 4
pxor xmm1, xmm6
pxor xmm3, xmm7
pslldq xmm6, 4
pxor xmm1, xmm6
pslldq xmm6, 4
pxor xmm1, xmm6
pxor xmm1, xmm2
pshufd xmm2, xmm1, 0ffh
pxor xmm3, xmm2
movdqa xmm6, xmm1
shufpd xmm5, xmm1, 00h
shufpd xmm6, xmm3, 01h
movdqu [16 + KS], xmm5
movdqu [32 + KS], xmm6
movdqa xmm2, xmm3
pshufb xmm2, xmm4
aesenclast xmm2, xmm0
pslld xmm0, 1
movdqa xmm6, xmm1
movdqa xmm7, xmm3
pslldq xmm6, 4
pslldq xmm7, 4
pxor xmm1, xmm6
pxor xmm3, xmm7
pslldq xmm6, 4
pxor xmm1, xmm6
pslldq xmm6, 4
pxor xmm1, xmm6
pxor xmm1, xmm2
pshufd xmm2, xmm1, 0ffh
pxor xmm3, xmm2
movdqu [48 + KS], xmm1
movdqa xmm5, xmm3
lea KS, [48 + KS]
dec ITR
jnz Lenc_192_ks_loop
movdqu [16 + KS], xmm5
ret
intel_aes_encrypt_init_192 ENDP
ALIGN 16
intel_aes_decrypt_init_192 PROC
mov KEY, [esp + 1*4 + 0*4]
mov KS, [esp + 1*4 + 1*4]
push KS
push KEY
call intel_aes_encrypt_init_192
pop KEY
pop KS
movdqu xmm0, [0*16 + KS]
movdqu xmm1, [12*16 + KS]
movdqu [12*16 + KS], xmm0
movdqu [0*16 + KS], xmm1
i = 1
WHILE i LT 6
movdqu xmm0, [i*16 + KS]
movdqu xmm1, [(12-i)*16 + KS]
aesimc xmm0, xmm0
aesimc xmm1, xmm1
movdqu [(12-i)*16 + KS], xmm0
movdqu [i*16 + KS], xmm1
i = i+1
ENDM
movdqu xmm0, [6*16 + KS]
aesimc xmm0, xmm0
movdqu [6*16 + KS], xmm0
ret
intel_aes_decrypt_init_192 ENDP
ALIGN 16
intel_aes_encrypt_init_256 PROC
mov KEY, [esp + 1*4 + 0*4]
mov KS, [esp + 1*4 + 1*4]
movdqu xmm1, [16*0 + KEY]
movdqu xmm3, [16*1 + KEY]
movdqu [16*0 + KS], xmm1
movdqu [16*1 + KS], xmm3
lea ITR, Lcon1
movdqu xmm0, [ITR]
lea ITR, Lmask256
movdqu xmm5, [ITR]
pxor xmm6, xmm6
mov ITR, 6
Lenc_256_ks_loop:
movdqa xmm2, xmm3
pshufb xmm2, xmm5
aesenclast xmm2, xmm0
pslld xmm0, 1
movdqa xmm4, xmm1
pslldq xmm4, 4
pxor xmm1, xmm4
pslldq xmm4, 4
pxor xmm1, xmm4
pslldq xmm4, 4
pxor xmm1, xmm4
pxor xmm1, xmm2
movdqu [16*2 + KS], xmm1
pshufd xmm2, xmm1, 0ffh
aesenclast xmm2, xmm6
movdqa xmm4, xmm3
pslldq xmm4, 4
pxor xmm3, xmm4
pslldq xmm4, 4
pxor xmm3, xmm4
pslldq xmm4, 4
pxor xmm3, xmm4
pxor xmm3, xmm2
movdqu [16*3 + KS], xmm3
lea KS, [32 + KS]
dec ITR
jnz Lenc_256_ks_loop
movdqa xmm2, xmm3
pshufb xmm2, xmm5
aesenclast xmm2, xmm0
movdqa xmm4, xmm1
pslldq xmm4, 4
pxor xmm1, xmm4
pslldq xmm4, 4
pxor xmm1, xmm4
pslldq xmm4, 4
pxor xmm1, xmm4
pxor xmm1, xmm2
movdqu [16*2 + KS], xmm1
ret
intel_aes_encrypt_init_256 ENDP
ALIGN 16
intel_aes_decrypt_init_256 PROC
mov KEY, [esp + 1*4 + 0*4]
mov KS, [esp + 1*4 + 1*4]
push KS
push KEY
call intel_aes_encrypt_init_256
pop KEY
pop KS
movdqu xmm0, [0*16 + KS]
movdqu xmm1, [14*16 + KS]
movdqu [14*16 + KS], xmm0
movdqu [0*16 + KS], xmm1
i = 1
WHILE i LT 7
movdqu xmm0, [i*16 + KS]
movdqu xmm1, [(14-i)*16 + KS]
aesimc xmm0, xmm0
aesimc xmm1, xmm1
movdqu [(14-i)*16 + KS], xmm0
movdqu [i*16 + KS], xmm1
i = i+1
ENDM
movdqu xmm0, [7*16 + KS]
aesimc xmm0, xmm0
movdqu [7*16 + KS], xmm0
ret
intel_aes_decrypt_init_256 ENDP
gen_aes_cbc_enc_func MACRO rnds
LOCAL loop1
LOCAL bail
push inputLen
mov ctx, [esp + 2*4 + 0*4]
mov output, [esp + 2*4 + 1*4]
mov input, [esp + 2*4 + 4*4]
mov inputLen, [esp + 2*4 + 5*4]
lea ctx, [44+ctx]
movdqu xmm0, [-32+ctx]
movdqu xmm2, [0*16 + ctx]
movdqu xmm3, [1*16 + ctx]
movdqu xmm4, [2*16 + ctx]
movdqu xmm5, [3*16 + ctx]
movdqu xmm6, [4*16 + ctx]
loop1:
cmp inputLen, 1*16
jb bail
movdqu xmm1, [input]
pxor xmm1, xmm2
pxor xmm0, xmm1
aesenc xmm0, xmm3
aesenc xmm0, xmm4
aesenc xmm0, xmm5
aesenc xmm0, xmm6
i = 5
WHILE i LT rnds
movdqu xmm7, [i*16 + ctx]
aesenc xmm0, xmm7
i = i+1
ENDM
movdqu xmm7, [rnds*16 + ctx]
aesenclast xmm0, xmm7
movdqu [output], xmm0
lea input, [1*16 + input]
lea output, [1*16 + output]
sub inputLen, 1*16
jmp loop1
bail:
movdqu [-32+ctx], xmm0
xor eax, eax
pop inputLen
ret
ENDM
gen_aes_cbc_dec_func MACRO rnds
LOCAL loop7
LOCAL loop1
LOCAL dec1
LOCAL bail
push inputLen
mov ctx, [esp + 2*4 + 0*4]
mov output, [esp + 2*4 + 1*4]
mov input, [esp + 2*4 + 4*4]
mov inputLen, [esp + 2*4 + 5*4]
lea ctx, [44+ctx]
loop7:
cmp inputLen, 7*16
jb dec1
movdqu xmm0, [0*16 + input]
movdqu xmm1, [1*16 + input]
movdqu xmm2, [2*16 + input]
movdqu xmm3, [3*16 + input]
movdqu xmm4, [4*16 + input]
movdqu xmm5, [5*16 + input]
movdqu xmm6, [6*16 + input]
movdqu xmm7, [0*16 + ctx]
pxor xmm0, xmm7
pxor xmm1, xmm7
pxor xmm2, xmm7
pxor xmm3, xmm7
pxor xmm4, xmm7
pxor xmm5, xmm7
pxor xmm6, xmm7
i = 1
WHILE i LT rnds
aes_dec_rnd i
i = i+1
ENDM
aes_dec_last_rnd rnds
movdqu xmm7, [-32 + ctx]
pxor xmm0, xmm7
movdqu xmm7, [0*16 + input]
pxor xmm1, xmm7
movdqu xmm7, [1*16 + input]
pxor xmm2, xmm7
movdqu xmm7, [2*16 + input]
pxor xmm3, xmm7
movdqu xmm7, [3*16 + input]
pxor xmm4, xmm7
movdqu xmm7, [4*16 + input]
pxor xmm5, xmm7
movdqu xmm7, [5*16 + input]
pxor xmm6, xmm7
movdqu xmm7, [6*16 + input]
movdqu [0*16 + output], xmm0
movdqu [1*16 + output], xmm1
movdqu [2*16 + output], xmm2
movdqu [3*16 + output], xmm3
movdqu [4*16 + output], xmm4
movdqu [5*16 + output], xmm5
movdqu [6*16 + output], xmm6
movdqu [-32 + ctx], xmm7
lea input, [7*16 + input]
lea output, [7*16 + output]
sub inputLen, 7*16
jmp loop7
dec1:
movdqu xmm3, [-32 + ctx]
loop1:
cmp inputLen, 1*16
jb bail
movdqu xmm0, [input]
movdqa xmm4, xmm0
movdqu xmm7, [0*16 + ctx]
pxor xmm0, xmm7
i = 1
WHILE i LT rnds
movdqu xmm7, [i*16 + ctx]
aesdec xmm0, xmm7
i = i+1
ENDM
movdqu xmm7, [rnds*16 + ctx]
aesdeclast xmm0, xmm7
pxor xmm3, xmm0
movdqu [output], xmm3
movdqa xmm3, xmm4
lea input, [1*16 + input]
lea output, [1*16 + output]
sub inputLen, 1*16
jmp loop1
bail:
movdqu [-32 + ctx], xmm3
xor eax, eax
pop inputLen
ret
ENDM
ALIGN 16
intel_aes_encrypt_cbc_128 PROC
gen_aes_cbc_enc_func 10
intel_aes_encrypt_cbc_128 ENDP
ALIGN 16
intel_aes_encrypt_cbc_192 PROC
gen_aes_cbc_enc_func 12
intel_aes_encrypt_cbc_192 ENDP
ALIGN 16
intel_aes_encrypt_cbc_256 PROC
gen_aes_cbc_enc_func 14
intel_aes_encrypt_cbc_256 ENDP
ALIGN 16
intel_aes_decrypt_cbc_128 PROC
gen_aes_cbc_dec_func 10
intel_aes_decrypt_cbc_128 ENDP
ALIGN 16
intel_aes_decrypt_cbc_192 PROC
gen_aes_cbc_dec_func 12
intel_aes_decrypt_cbc_192 ENDP
ALIGN 16
intel_aes_decrypt_cbc_256 PROC
gen_aes_cbc_dec_func 14
intel_aes_decrypt_cbc_256 ENDP
ctrCtx textequ <esi>
CTR textequ <ebx>
gen_aes_ctr_func MACRO rnds
LOCAL loop7
LOCAL loop1
LOCAL enc1
LOCAL bail
push inputLen
push ctrCtx
push CTR
push ebp
mov ctrCtx, [esp + 4*5 + 0*4]
mov output, [esp + 4*5 + 1*4]
mov input, [esp + 4*5 + 4*4]
mov inputLen, [esp + 4*5 + 5*4]
mov ctx, [4+ctrCtx]
lea ctx, [44+ctx]
mov ebp, esp
sub esp, 7*16
and esp, -16
movdqu xmm0, [8+ctrCtx]
mov ctrCtx, [ctrCtx + 8 + 3*4]
bswap ctrCtx
movdqu xmm1, [ctx + 0*16]
pxor xmm0, xmm1
movdqa [esp + 0*16], xmm0
movdqa [esp + 1*16], xmm0
movdqa [esp + 2*16], xmm0
movdqa [esp + 3*16], xmm0
movdqa [esp + 4*16], xmm0
movdqa [esp + 5*16], xmm0
movdqa [esp + 6*16], xmm0
inc ctrCtx
mov CTR, ctrCtx
bswap CTR
xor CTR, [ctx + 3*4]
mov [esp + 1*16 + 3*4], CTR
inc ctrCtx
mov CTR, ctrCtx
bswap CTR
xor CTR, [ctx + 3*4]
mov [esp + 2*16 + 3*4], CTR
inc ctrCtx
mov CTR, ctrCtx
bswap CTR
xor CTR, [ctx + 3*4]
mov [esp + 3*16 + 3*4], CTR
inc ctrCtx
mov CTR, ctrCtx
bswap CTR
xor CTR, [ctx + 3*4]
mov [esp + 4*16 + 3*4], CTR
inc ctrCtx
mov CTR, ctrCtx
bswap CTR
xor CTR, [ctx + 3*4]
mov [esp + 5*16 + 3*4], CTR
inc ctrCtx
mov CTR, ctrCtx
bswap CTR
xor CTR, [ctx + 3*4]
mov [esp + 6*16 + 3*4], CTR
loop7:
cmp inputLen, 7*16
jb loop1
movdqu xmm0, [0*16 + esp]
movdqu xmm1, [1*16 + esp]
movdqu xmm2, [2*16 + esp]
movdqu xmm3, [3*16 + esp]
movdqu xmm4, [4*16 + esp]
movdqu xmm5, [5*16 + esp]
movdqu xmm6, [6*16 + esp]
i = 1
WHILE i LE 7
aes_rnd i
inc ctrCtx
mov CTR, ctrCtx
bswap CTR
xor CTR, [ctx + 3*4]
mov [esp + (i-1)*16 + 3*4], CTR
i = i+1
ENDM
WHILE i LT rnds
aes_rnd i
i = i+1
ENDM
aes_last_rnd rnds
movdqu xmm7, [0*16 + input]
pxor xmm0, xmm7
movdqu xmm7, [1*16 + input]
pxor xmm1, xmm7
movdqu xmm7, [2*16 + input]
pxor xmm2, xmm7
movdqu xmm7, [3*16 + input]
pxor xmm3, xmm7
movdqu xmm7, [4*16 + input]
pxor xmm4, xmm7
movdqu xmm7, [5*16 + input]
pxor xmm5, xmm7
movdqu xmm7, [6*16 + input]
pxor xmm6, xmm7
movdqu [0*16 + output], xmm0
movdqu [1*16 + output], xmm1
movdqu [2*16 + output], xmm2
movdqu [3*16 + output], xmm3
movdqu [4*16 + output], xmm4
movdqu [5*16 + output], xmm5
movdqu [6*16 + output], xmm6
lea input, [7*16 + input]
lea output, [7*16 + output]
sub inputLen, 7*16
jmp loop7
loop1:
cmp inputLen, 1*16
jb bail
movdqu xmm0, [esp]
add esp, 16
i = 1
WHILE i LT rnds
movdqu xmm7, [i*16 + ctx]
aesenc xmm0, xmm7
i = i+1
ENDM
movdqu xmm7, [rnds*16 + ctx]
aesenclast xmm0, xmm7
movdqu xmm7, [input]
pxor xmm0, xmm7
movdqu [output], xmm0
lea input, [1*16 + input]
lea output, [1*16 + output]
sub inputLen, 1*16
jmp loop1
bail:
mov ctrCtx, [ebp + 4*5 + 0*4]
movdqu xmm0, [esp]
movdqu xmm1, [ctx + 0*16]
pxor xmm0, xmm1
movdqu [8+ctrCtx], xmm0
xor eax, eax
mov esp, ebp
pop ebp
pop CTR
pop ctrCtx
pop inputLen
ret
ENDM
ALIGN 16
intel_aes_encrypt_ctr_128 PROC
gen_aes_ctr_func 10
intel_aes_encrypt_ctr_128 ENDP
ALIGN 16
intel_aes_encrypt_ctr_192 PROC
gen_aes_ctr_func 12
intel_aes_encrypt_ctr_192 ENDP
ALIGN 16
intel_aes_encrypt_ctr_256 PROC
gen_aes_ctr_func 14
intel_aes_encrypt_ctr_256 ENDP
END