mirror of
https://github.com/rn10950/RetroZilla.git
synced 2024-11-11 02:10:17 +01:00
950 lines
20 KiB
NASM
950 lines
20 KiB
NASM
|
; LICENSE:
|
||
|
; This submission to NSS is to be made available under the terms of the
|
||
|
; Mozilla Public License, v. 2.0. You can obtain one at http:
|
||
|
; //mozilla.org/MPL/2.0/.
|
||
|
;###############################################################################
|
||
|
; Copyright(c) 2014, Intel Corp.
|
||
|
; Developers and authors:
|
||
|
; Shay Gueron and Vlad Krasnov
|
||
|
; Intel Corporation, Israel Development Centre, Haifa, Israel
|
||
|
; Please send feedback directly to crypto.feedback.alias@intel.com
|
||
|
|
||
|
|
||
|
.MODEL FLAT, C
|
||
|
.XMM
|
||
|
|
||
|
.DATA
|
||
|
ALIGN 16
|
||
|
Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
|
||
|
Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
|
||
|
Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
|
||
|
Lcon1 dd 1,1,1,1
|
||
|
Lcon2 dd 1bh,1bh,1bh,1bh
|
||
|
|
||
|
.CODE
|
||
|
|
||
|
ctx textequ <ecx>
|
||
|
output textequ <edx>
|
||
|
input textequ <eax>
|
||
|
inputLen textequ <edi>
|
||
|
|
||
|
|
||
|
aes_rnd MACRO i
|
||
|
movdqu xmm7, [i*16 + ctx]
|
||
|
aesenc xmm0, xmm7
|
||
|
aesenc xmm1, xmm7
|
||
|
aesenc xmm2, xmm7
|
||
|
aesenc xmm3, xmm7
|
||
|
aesenc xmm4, xmm7
|
||
|
aesenc xmm5, xmm7
|
||
|
aesenc xmm6, xmm7
|
||
|
ENDM
|
||
|
|
||
|
aes_last_rnd MACRO i
|
||
|
movdqu xmm7, [i*16 + ctx]
|
||
|
aesenclast xmm0, xmm7
|
||
|
aesenclast xmm1, xmm7
|
||
|
aesenclast xmm2, xmm7
|
||
|
aesenclast xmm3, xmm7
|
||
|
aesenclast xmm4, xmm7
|
||
|
aesenclast xmm5, xmm7
|
||
|
aesenclast xmm6, xmm7
|
||
|
ENDM
|
||
|
|
||
|
aes_dec_rnd MACRO i
|
||
|
movdqu xmm7, [i*16 + ctx]
|
||
|
aesdec xmm0, xmm7
|
||
|
aesdec xmm1, xmm7
|
||
|
aesdec xmm2, xmm7
|
||
|
aesdec xmm3, xmm7
|
||
|
aesdec xmm4, xmm7
|
||
|
aesdec xmm5, xmm7
|
||
|
aesdec xmm6, xmm7
|
||
|
ENDM
|
||
|
|
||
|
aes_dec_last_rnd MACRO i
|
||
|
movdqu xmm7, [i*16 + ctx]
|
||
|
aesdeclast xmm0, xmm7
|
||
|
aesdeclast xmm1, xmm7
|
||
|
aesdeclast xmm2, xmm7
|
||
|
aesdeclast xmm3, xmm7
|
||
|
aesdeclast xmm4, xmm7
|
||
|
aesdeclast xmm5, xmm7
|
||
|
aesdeclast xmm6, xmm7
|
||
|
ENDM
|
||
|
|
||
|
|
||
|
gen_aes_ecb_func MACRO enc, rnds
|
||
|
|
||
|
LOCAL loop7
|
||
|
LOCAL loop1
|
||
|
LOCAL bail
|
||
|
|
||
|
push inputLen
|
||
|
|
||
|
mov ctx, [esp + 2*4 + 0*4]
|
||
|
mov output, [esp + 2*4 + 1*4]
|
||
|
mov input, [esp + 2*4 + 4*4]
|
||
|
mov inputLen, [esp + 2*4 + 5*4]
|
||
|
|
||
|
lea ctx, [44+ctx]
|
||
|
|
||
|
loop7:
|
||
|
cmp inputLen, 7*16
|
||
|
jb loop1
|
||
|
|
||
|
movdqu xmm0, [0*16 + input]
|
||
|
movdqu xmm1, [1*16 + input]
|
||
|
movdqu xmm2, [2*16 + input]
|
||
|
movdqu xmm3, [3*16 + input]
|
||
|
movdqu xmm4, [4*16 + input]
|
||
|
movdqu xmm5, [5*16 + input]
|
||
|
movdqu xmm6, [6*16 + input]
|
||
|
|
||
|
movdqu xmm7, [0*16 + ctx]
|
||
|
pxor xmm0, xmm7
|
||
|
pxor xmm1, xmm7
|
||
|
pxor xmm2, xmm7
|
||
|
pxor xmm3, xmm7
|
||
|
pxor xmm4, xmm7
|
||
|
pxor xmm5, xmm7
|
||
|
pxor xmm6, xmm7
|
||
|
|
||
|
IF enc eq 1
|
||
|
rnd textequ <aes_rnd>
|
||
|
lastrnd textequ <aes_last_rnd>
|
||
|
aesinst textequ <aesenc>
|
||
|
aeslastinst textequ <aesenclast>
|
||
|
ELSE
|
||
|
rnd textequ <aes_dec_rnd>
|
||
|
lastrnd textequ <aes_dec_last_rnd>
|
||
|
aesinst textequ <aesdec>
|
||
|
aeslastinst textequ <aesdeclast>
|
||
|
ENDIF
|
||
|
|
||
|
i = 1
|
||
|
WHILE i LT rnds
|
||
|
rnd i
|
||
|
i = i+1
|
||
|
ENDM
|
||
|
lastrnd rnds
|
||
|
|
||
|
movdqu [0*16 + output], xmm0
|
||
|
movdqu [1*16 + output], xmm1
|
||
|
movdqu [2*16 + output], xmm2
|
||
|
movdqu [3*16 + output], xmm3
|
||
|
movdqu [4*16 + output], xmm4
|
||
|
movdqu [5*16 + output], xmm5
|
||
|
movdqu [6*16 + output], xmm6
|
||
|
|
||
|
lea input, [7*16 + input]
|
||
|
lea output, [7*16 + output]
|
||
|
sub inputLen, 7*16
|
||
|
jmp loop7
|
||
|
|
||
|
loop1:
|
||
|
cmp inputLen, 1*16
|
||
|
jb bail
|
||
|
|
||
|
movdqu xmm0, [input]
|
||
|
movdqu xmm7, [0*16 + ctx]
|
||
|
pxor xmm0, xmm7
|
||
|
|
||
|
i = 1
|
||
|
WHILE i LT rnds
|
||
|
movdqu xmm7, [i*16 + ctx]
|
||
|
aesinst xmm0, xmm7
|
||
|
i = i+1
|
||
|
ENDM
|
||
|
movdqu xmm7, [rnds*16 + ctx]
|
||
|
aeslastinst xmm0, xmm7
|
||
|
|
||
|
movdqu [output], xmm0
|
||
|
|
||
|
lea input, [1*16 + input]
|
||
|
lea output, [1*16 + output]
|
||
|
sub inputLen, 1*16
|
||
|
jmp loop1
|
||
|
|
||
|
bail:
|
||
|
xor eax, eax
|
||
|
pop inputLen
|
||
|
ret
|
||
|
|
||
|
ENDM
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_ecb_128 PROC
|
||
|
gen_aes_ecb_func 1, 10
|
||
|
intel_aes_encrypt_ecb_128 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_ecb_192 PROC
|
||
|
gen_aes_ecb_func 1, 12
|
||
|
intel_aes_encrypt_ecb_192 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_ecb_256 PROC
|
||
|
gen_aes_ecb_func 1, 14
|
||
|
intel_aes_encrypt_ecb_256 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_decrypt_ecb_128 PROC
|
||
|
gen_aes_ecb_func 0, 10
|
||
|
intel_aes_decrypt_ecb_128 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_decrypt_ecb_192 PROC
|
||
|
gen_aes_ecb_func 0, 12
|
||
|
intel_aes_decrypt_ecb_192 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_decrypt_ecb_256 PROC
|
||
|
gen_aes_ecb_func 0, 14
|
||
|
intel_aes_decrypt_ecb_256 ENDP
|
||
|
|
||
|
|
||
|
KEY textequ <ecx>
|
||
|
KS textequ <edx>
|
||
|
ITR textequ <eax>
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_init_128 PROC
|
||
|
|
||
|
mov KEY, [esp + 1*4 + 0*4]
|
||
|
mov KS, [esp + 1*4 + 1*4]
|
||
|
|
||
|
|
||
|
movdqu xmm1, [KEY]
|
||
|
movdqu [KS], xmm1
|
||
|
movdqa xmm2, xmm1
|
||
|
|
||
|
lea ITR, Lcon1
|
||
|
movdqa xmm0, [ITR]
|
||
|
lea ITR, Lmask
|
||
|
movdqa xmm4, [ITR]
|
||
|
|
||
|
mov ITR, 8
|
||
|
|
||
|
Lenc_128_ks_loop:
|
||
|
lea KS, [16 + KS]
|
||
|
dec ITR
|
||
|
|
||
|
pshufb xmm2, xmm4
|
||
|
aesenclast xmm2, xmm0
|
||
|
pslld xmm0, 1
|
||
|
movdqa xmm3, xmm1
|
||
|
pslldq xmm3, 4
|
||
|
pxor xmm1, xmm3
|
||
|
pslldq xmm3, 4
|
||
|
pxor xmm1, xmm3
|
||
|
pslldq xmm3, 4
|
||
|
pxor xmm1, xmm3
|
||
|
pxor xmm1, xmm2
|
||
|
movdqu [KS], xmm1
|
||
|
movdqa xmm2, xmm1
|
||
|
|
||
|
jne Lenc_128_ks_loop
|
||
|
|
||
|
lea ITR, Lcon2
|
||
|
movdqa xmm0, [ITR]
|
||
|
|
||
|
pshufb xmm2, xmm4
|
||
|
aesenclast xmm2, xmm0
|
||
|
pslld xmm0, 1
|
||
|
movdqa xmm3, xmm1
|
||
|
pslldq xmm3, 4
|
||
|
pxor xmm1, xmm3
|
||
|
pslldq xmm3, 4
|
||
|
pxor xmm1, xmm3
|
||
|
pslldq xmm3, 4
|
||
|
pxor xmm1, xmm3
|
||
|
pxor xmm1, xmm2
|
||
|
movdqu [16 + KS], xmm1
|
||
|
movdqa xmm2, xmm1
|
||
|
|
||
|
pshufb xmm2, xmm4
|
||
|
aesenclast xmm2, xmm0
|
||
|
movdqa xmm3, xmm1
|
||
|
pslldq xmm3, 4
|
||
|
pxor xmm1, xmm3
|
||
|
pslldq xmm3, 4
|
||
|
pxor xmm1, xmm3
|
||
|
pslldq xmm3, 4
|
||
|
pxor xmm1, xmm3
|
||
|
pxor xmm1, xmm2
|
||
|
movdqu [32 + KS], xmm1
|
||
|
movdqa xmm2, xmm1
|
||
|
|
||
|
ret
|
||
|
intel_aes_encrypt_init_128 ENDP
|
||
|
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_decrypt_init_128 PROC
|
||
|
|
||
|
mov KEY, [esp + 1*4 + 0*4]
|
||
|
mov KS, [esp + 1*4 + 1*4]
|
||
|
|
||
|
push KS
|
||
|
push KEY
|
||
|
|
||
|
call intel_aes_encrypt_init_128
|
||
|
|
||
|
pop KEY
|
||
|
pop KS
|
||
|
|
||
|
movdqu xmm0, [0*16 + KS]
|
||
|
movdqu xmm1, [10*16 + KS]
|
||
|
movdqu [10*16 + KS], xmm0
|
||
|
movdqu [0*16 + KS], xmm1
|
||
|
|
||
|
i = 1
|
||
|
WHILE i LT 5
|
||
|
movdqu xmm0, [i*16 + KS]
|
||
|
movdqu xmm1, [(10-i)*16 + KS]
|
||
|
|
||
|
aesimc xmm0, xmm0
|
||
|
aesimc xmm1, xmm1
|
||
|
|
||
|
movdqu [(10-i)*16 + KS], xmm0
|
||
|
movdqu [i*16 + KS], xmm1
|
||
|
|
||
|
i = i+1
|
||
|
ENDM
|
||
|
|
||
|
movdqu xmm0, [5*16 + KS]
|
||
|
aesimc xmm0, xmm0
|
||
|
movdqu [5*16 + KS], xmm0
|
||
|
ret
|
||
|
intel_aes_decrypt_init_128 ENDP
|
||
|
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_init_192 PROC
|
||
|
|
||
|
mov KEY, [esp + 1*4 + 0*4]
|
||
|
mov KS, [esp + 1*4 + 1*4]
|
||
|
|
||
|
pxor xmm3, xmm3
|
||
|
movdqu xmm1, [KEY]
|
||
|
pinsrd xmm3, DWORD PTR [16 + KEY], 0
|
||
|
pinsrd xmm3, DWORD PTR [20 + KEY], 1
|
||
|
|
||
|
movdqu [KS], xmm1
|
||
|
movdqa xmm5, xmm3
|
||
|
|
||
|
lea ITR, Lcon1
|
||
|
movdqu xmm0, [ITR]
|
||
|
lea ITR, Lmask192
|
||
|
movdqu xmm4, [ITR]
|
||
|
|
||
|
mov ITR, 4
|
||
|
|
||
|
Lenc_192_ks_loop:
|
||
|
movdqa xmm2, xmm3
|
||
|
pshufb xmm2, xmm4
|
||
|
aesenclast xmm2, xmm0
|
||
|
pslld xmm0, 1
|
||
|
|
||
|
movdqa xmm6, xmm1
|
||
|
movdqa xmm7, xmm3
|
||
|
pslldq xmm6, 4
|
||
|
pslldq xmm7, 4
|
||
|
pxor xmm1, xmm6
|
||
|
pxor xmm3, xmm7
|
||
|
pslldq xmm6, 4
|
||
|
pxor xmm1, xmm6
|
||
|
pslldq xmm6, 4
|
||
|
pxor xmm1, xmm6
|
||
|
pxor xmm1, xmm2
|
||
|
pshufd xmm2, xmm1, 0ffh
|
||
|
pxor xmm3, xmm2
|
||
|
|
||
|
movdqa xmm6, xmm1
|
||
|
shufpd xmm5, xmm1, 00h
|
||
|
shufpd xmm6, xmm3, 01h
|
||
|
|
||
|
movdqu [16 + KS], xmm5
|
||
|
movdqu [32 + KS], xmm6
|
||
|
|
||
|
movdqa xmm2, xmm3
|
||
|
pshufb xmm2, xmm4
|
||
|
aesenclast xmm2, xmm0
|
||
|
pslld xmm0, 1
|
||
|
|
||
|
movdqa xmm6, xmm1
|
||
|
movdqa xmm7, xmm3
|
||
|
pslldq xmm6, 4
|
||
|
pslldq xmm7, 4
|
||
|
pxor xmm1, xmm6
|
||
|
pxor xmm3, xmm7
|
||
|
pslldq xmm6, 4
|
||
|
pxor xmm1, xmm6
|
||
|
pslldq xmm6, 4
|
||
|
pxor xmm1, xmm6
|
||
|
pxor xmm1, xmm2
|
||
|
pshufd xmm2, xmm1, 0ffh
|
||
|
pxor xmm3, xmm2
|
||
|
|
||
|
movdqu [48 + KS], xmm1
|
||
|
movdqa xmm5, xmm3
|
||
|
|
||
|
lea KS, [48 + KS]
|
||
|
|
||
|
dec ITR
|
||
|
jnz Lenc_192_ks_loop
|
||
|
|
||
|
movdqu [16 + KS], xmm5
|
||
|
ret
|
||
|
intel_aes_encrypt_init_192 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_decrypt_init_192 PROC
|
||
|
mov KEY, [esp + 1*4 + 0*4]
|
||
|
mov KS, [esp + 1*4 + 1*4]
|
||
|
|
||
|
push KS
|
||
|
push KEY
|
||
|
|
||
|
call intel_aes_encrypt_init_192
|
||
|
|
||
|
pop KEY
|
||
|
pop KS
|
||
|
|
||
|
movdqu xmm0, [0*16 + KS]
|
||
|
movdqu xmm1, [12*16 + KS]
|
||
|
movdqu [12*16 + KS], xmm0
|
||
|
movdqu [0*16 + KS], xmm1
|
||
|
|
||
|
i = 1
|
||
|
WHILE i LT 6
|
||
|
movdqu xmm0, [i*16 + KS]
|
||
|
movdqu xmm1, [(12-i)*16 + KS]
|
||
|
|
||
|
aesimc xmm0, xmm0
|
||
|
aesimc xmm1, xmm1
|
||
|
|
||
|
movdqu [(12-i)*16 + KS], xmm0
|
||
|
movdqu [i*16 + KS], xmm1
|
||
|
|
||
|
i = i+1
|
||
|
ENDM
|
||
|
|
||
|
movdqu xmm0, [6*16 + KS]
|
||
|
aesimc xmm0, xmm0
|
||
|
movdqu [6*16 + KS], xmm0
|
||
|
ret
|
||
|
intel_aes_decrypt_init_192 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_init_256 PROC
|
||
|
|
||
|
mov KEY, [esp + 1*4 + 0*4]
|
||
|
mov KS, [esp + 1*4 + 1*4]
|
||
|
movdqu xmm1, [16*0 + KEY]
|
||
|
movdqu xmm3, [16*1 + KEY]
|
||
|
|
||
|
movdqu [16*0 + KS], xmm1
|
||
|
movdqu [16*1 + KS], xmm3
|
||
|
|
||
|
lea ITR, Lcon1
|
||
|
movdqu xmm0, [ITR]
|
||
|
lea ITR, Lmask256
|
||
|
movdqu xmm5, [ITR]
|
||
|
|
||
|
pxor xmm6, xmm6
|
||
|
|
||
|
mov ITR, 6
|
||
|
|
||
|
Lenc_256_ks_loop:
|
||
|
|
||
|
movdqa xmm2, xmm3
|
||
|
pshufb xmm2, xmm5
|
||
|
aesenclast xmm2, xmm0
|
||
|
pslld xmm0, 1
|
||
|
movdqa xmm4, xmm1
|
||
|
pslldq xmm4, 4
|
||
|
pxor xmm1, xmm4
|
||
|
pslldq xmm4, 4
|
||
|
pxor xmm1, xmm4
|
||
|
pslldq xmm4, 4
|
||
|
pxor xmm1, xmm4
|
||
|
pxor xmm1, xmm2
|
||
|
movdqu [16*2 + KS], xmm1
|
||
|
|
||
|
pshufd xmm2, xmm1, 0ffh
|
||
|
aesenclast xmm2, xmm6
|
||
|
movdqa xmm4, xmm3
|
||
|
pslldq xmm4, 4
|
||
|
pxor xmm3, xmm4
|
||
|
pslldq xmm4, 4
|
||
|
pxor xmm3, xmm4
|
||
|
pslldq xmm4, 4
|
||
|
pxor xmm3, xmm4
|
||
|
pxor xmm3, xmm2
|
||
|
movdqu [16*3 + KS], xmm3
|
||
|
|
||
|
lea KS, [32 + KS]
|
||
|
dec ITR
|
||
|
jnz Lenc_256_ks_loop
|
||
|
|
||
|
movdqa xmm2, xmm3
|
||
|
pshufb xmm2, xmm5
|
||
|
aesenclast xmm2, xmm0
|
||
|
movdqa xmm4, xmm1
|
||
|
pslldq xmm4, 4
|
||
|
pxor xmm1, xmm4
|
||
|
pslldq xmm4, 4
|
||
|
pxor xmm1, xmm4
|
||
|
pslldq xmm4, 4
|
||
|
pxor xmm1, xmm4
|
||
|
pxor xmm1, xmm2
|
||
|
movdqu [16*2 + KS], xmm1
|
||
|
|
||
|
ret
|
||
|
intel_aes_encrypt_init_256 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_decrypt_init_256 PROC
|
||
|
mov KEY, [esp + 1*4 + 0*4]
|
||
|
mov KS, [esp + 1*4 + 1*4]
|
||
|
|
||
|
push KS
|
||
|
push KEY
|
||
|
|
||
|
call intel_aes_encrypt_init_256
|
||
|
|
||
|
pop KEY
|
||
|
pop KS
|
||
|
|
||
|
movdqu xmm0, [0*16 + KS]
|
||
|
movdqu xmm1, [14*16 + KS]
|
||
|
movdqu [14*16 + KS], xmm0
|
||
|
movdqu [0*16 + KS], xmm1
|
||
|
|
||
|
i = 1
|
||
|
WHILE i LT 7
|
||
|
movdqu xmm0, [i*16 + KS]
|
||
|
movdqu xmm1, [(14-i)*16 + KS]
|
||
|
|
||
|
aesimc xmm0, xmm0
|
||
|
aesimc xmm1, xmm1
|
||
|
|
||
|
movdqu [(14-i)*16 + KS], xmm0
|
||
|
movdqu [i*16 + KS], xmm1
|
||
|
|
||
|
i = i+1
|
||
|
ENDM
|
||
|
|
||
|
movdqu xmm0, [7*16 + KS]
|
||
|
aesimc xmm0, xmm0
|
||
|
movdqu [7*16 + KS], xmm0
|
||
|
ret
|
||
|
intel_aes_decrypt_init_256 ENDP
|
||
|
|
||
|
|
||
|
|
||
|
gen_aes_cbc_enc_func MACRO rnds
|
||
|
|
||
|
LOCAL loop1
|
||
|
LOCAL bail
|
||
|
|
||
|
push inputLen
|
||
|
|
||
|
mov ctx, [esp + 2*4 + 0*4]
|
||
|
mov output, [esp + 2*4 + 1*4]
|
||
|
mov input, [esp + 2*4 + 4*4]
|
||
|
mov inputLen, [esp + 2*4 + 5*4]
|
||
|
|
||
|
lea ctx, [44+ctx]
|
||
|
|
||
|
movdqu xmm0, [-32+ctx]
|
||
|
|
||
|
movdqu xmm2, [0*16 + ctx]
|
||
|
movdqu xmm3, [1*16 + ctx]
|
||
|
movdqu xmm4, [2*16 + ctx]
|
||
|
movdqu xmm5, [3*16 + ctx]
|
||
|
movdqu xmm6, [4*16 + ctx]
|
||
|
|
||
|
loop1:
|
||
|
cmp inputLen, 1*16
|
||
|
jb bail
|
||
|
|
||
|
movdqu xmm1, [input]
|
||
|
pxor xmm1, xmm2
|
||
|
pxor xmm0, xmm1
|
||
|
|
||
|
aesenc xmm0, xmm3
|
||
|
aesenc xmm0, xmm4
|
||
|
aesenc xmm0, xmm5
|
||
|
aesenc xmm0, xmm6
|
||
|
|
||
|
i = 5
|
||
|
WHILE i LT rnds
|
||
|
movdqu xmm7, [i*16 + ctx]
|
||
|
aesenc xmm0, xmm7
|
||
|
i = i+1
|
||
|
ENDM
|
||
|
movdqu xmm7, [rnds*16 + ctx]
|
||
|
aesenclast xmm0, xmm7
|
||
|
|
||
|
movdqu [output], xmm0
|
||
|
|
||
|
lea input, [1*16 + input]
|
||
|
lea output, [1*16 + output]
|
||
|
sub inputLen, 1*16
|
||
|
jmp loop1
|
||
|
|
||
|
bail:
|
||
|
movdqu [-32+ctx], xmm0
|
||
|
|
||
|
xor eax, eax
|
||
|
pop inputLen
|
||
|
ret
|
||
|
|
||
|
ENDM
|
||
|
|
||
|
gen_aes_cbc_dec_func MACRO rnds
|
||
|
|
||
|
LOCAL loop7
|
||
|
LOCAL loop1
|
||
|
LOCAL dec1
|
||
|
LOCAL bail
|
||
|
|
||
|
push inputLen
|
||
|
|
||
|
mov ctx, [esp + 2*4 + 0*4]
|
||
|
mov output, [esp + 2*4 + 1*4]
|
||
|
mov input, [esp + 2*4 + 4*4]
|
||
|
mov inputLen, [esp + 2*4 + 5*4]
|
||
|
|
||
|
lea ctx, [44+ctx]
|
||
|
|
||
|
loop7:
|
||
|
cmp inputLen, 7*16
|
||
|
jb dec1
|
||
|
|
||
|
movdqu xmm0, [0*16 + input]
|
||
|
movdqu xmm1, [1*16 + input]
|
||
|
movdqu xmm2, [2*16 + input]
|
||
|
movdqu xmm3, [3*16 + input]
|
||
|
movdqu xmm4, [4*16 + input]
|
||
|
movdqu xmm5, [5*16 + input]
|
||
|
movdqu xmm6, [6*16 + input]
|
||
|
|
||
|
movdqu xmm7, [0*16 + ctx]
|
||
|
pxor xmm0, xmm7
|
||
|
pxor xmm1, xmm7
|
||
|
pxor xmm2, xmm7
|
||
|
pxor xmm3, xmm7
|
||
|
pxor xmm4, xmm7
|
||
|
pxor xmm5, xmm7
|
||
|
pxor xmm6, xmm7
|
||
|
|
||
|
i = 1
|
||
|
WHILE i LT rnds
|
||
|
aes_dec_rnd i
|
||
|
i = i+1
|
||
|
ENDM
|
||
|
aes_dec_last_rnd rnds
|
||
|
|
||
|
movdqu xmm7, [-32 + ctx]
|
||
|
pxor xmm0, xmm7
|
||
|
movdqu xmm7, [0*16 + input]
|
||
|
pxor xmm1, xmm7
|
||
|
movdqu xmm7, [1*16 + input]
|
||
|
pxor xmm2, xmm7
|
||
|
movdqu xmm7, [2*16 + input]
|
||
|
pxor xmm3, xmm7
|
||
|
movdqu xmm7, [3*16 + input]
|
||
|
pxor xmm4, xmm7
|
||
|
movdqu xmm7, [4*16 + input]
|
||
|
pxor xmm5, xmm7
|
||
|
movdqu xmm7, [5*16 + input]
|
||
|
pxor xmm6, xmm7
|
||
|
movdqu xmm7, [6*16 + input]
|
||
|
|
||
|
movdqu [0*16 + output], xmm0
|
||
|
movdqu [1*16 + output], xmm1
|
||
|
movdqu [2*16 + output], xmm2
|
||
|
movdqu [3*16 + output], xmm3
|
||
|
movdqu [4*16 + output], xmm4
|
||
|
movdqu [5*16 + output], xmm5
|
||
|
movdqu [6*16 + output], xmm6
|
||
|
movdqu [-32 + ctx], xmm7
|
||
|
|
||
|
lea input, [7*16 + input]
|
||
|
lea output, [7*16 + output]
|
||
|
sub inputLen, 7*16
|
||
|
jmp loop7
|
||
|
dec1:
|
||
|
|
||
|
movdqu xmm3, [-32 + ctx]
|
||
|
|
||
|
loop1:
|
||
|
cmp inputLen, 1*16
|
||
|
jb bail
|
||
|
|
||
|
movdqu xmm0, [input]
|
||
|
movdqa xmm4, xmm0
|
||
|
movdqu xmm7, [0*16 + ctx]
|
||
|
pxor xmm0, xmm7
|
||
|
|
||
|
i = 1
|
||
|
WHILE i LT rnds
|
||
|
movdqu xmm7, [i*16 + ctx]
|
||
|
aesdec xmm0, xmm7
|
||
|
i = i+1
|
||
|
ENDM
|
||
|
movdqu xmm7, [rnds*16 + ctx]
|
||
|
aesdeclast xmm0, xmm7
|
||
|
pxor xmm3, xmm0
|
||
|
|
||
|
movdqu [output], xmm3
|
||
|
movdqa xmm3, xmm4
|
||
|
|
||
|
lea input, [1*16 + input]
|
||
|
lea output, [1*16 + output]
|
||
|
sub inputLen, 1*16
|
||
|
jmp loop1
|
||
|
|
||
|
bail:
|
||
|
movdqu [-32 + ctx], xmm3
|
||
|
xor eax, eax
|
||
|
pop inputLen
|
||
|
ret
|
||
|
ENDM
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_cbc_128 PROC
|
||
|
gen_aes_cbc_enc_func 10
|
||
|
intel_aes_encrypt_cbc_128 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_cbc_192 PROC
|
||
|
gen_aes_cbc_enc_func 12
|
||
|
intel_aes_encrypt_cbc_192 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_cbc_256 PROC
|
||
|
gen_aes_cbc_enc_func 14
|
||
|
intel_aes_encrypt_cbc_256 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_decrypt_cbc_128 PROC
|
||
|
gen_aes_cbc_dec_func 10
|
||
|
intel_aes_decrypt_cbc_128 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_decrypt_cbc_192 PROC
|
||
|
gen_aes_cbc_dec_func 12
|
||
|
intel_aes_decrypt_cbc_192 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_decrypt_cbc_256 PROC
|
||
|
gen_aes_cbc_dec_func 14
|
||
|
intel_aes_decrypt_cbc_256 ENDP
|
||
|
|
||
|
|
||
|
|
||
|
ctrCtx textequ <esi>
|
||
|
CTR textequ <ebx>
|
||
|
|
||
|
gen_aes_ctr_func MACRO rnds
|
||
|
|
||
|
LOCAL loop7
|
||
|
LOCAL loop1
|
||
|
LOCAL enc1
|
||
|
LOCAL bail
|
||
|
|
||
|
push inputLen
|
||
|
push ctrCtx
|
||
|
push CTR
|
||
|
push ebp
|
||
|
|
||
|
mov ctrCtx, [esp + 4*5 + 0*4]
|
||
|
mov output, [esp + 4*5 + 1*4]
|
||
|
mov input, [esp + 4*5 + 4*4]
|
||
|
mov inputLen, [esp + 4*5 + 5*4]
|
||
|
|
||
|
mov ctx, [4+ctrCtx]
|
||
|
lea ctx, [44+ctx]
|
||
|
|
||
|
mov ebp, esp
|
||
|
sub esp, 7*16
|
||
|
and esp, -16
|
||
|
|
||
|
movdqu xmm0, [8+ctrCtx]
|
||
|
mov ctrCtx, [ctrCtx + 8 + 3*4]
|
||
|
bswap ctrCtx
|
||
|
movdqu xmm1, [ctx + 0*16]
|
||
|
|
||
|
pxor xmm0, xmm1
|
||
|
|
||
|
movdqa [esp + 0*16], xmm0
|
||
|
movdqa [esp + 1*16], xmm0
|
||
|
movdqa [esp + 2*16], xmm0
|
||
|
movdqa [esp + 3*16], xmm0
|
||
|
movdqa [esp + 4*16], xmm0
|
||
|
movdqa [esp + 5*16], xmm0
|
||
|
movdqa [esp + 6*16], xmm0
|
||
|
|
||
|
inc ctrCtx
|
||
|
mov CTR, ctrCtx
|
||
|
bswap CTR
|
||
|
xor CTR, [ctx + 3*4]
|
||
|
mov [esp + 1*16 + 3*4], CTR
|
||
|
|
||
|
inc ctrCtx
|
||
|
mov CTR, ctrCtx
|
||
|
bswap CTR
|
||
|
xor CTR, [ctx + 3*4]
|
||
|
mov [esp + 2*16 + 3*4], CTR
|
||
|
|
||
|
inc ctrCtx
|
||
|
mov CTR, ctrCtx
|
||
|
bswap CTR
|
||
|
xor CTR, [ctx + 3*4]
|
||
|
mov [esp + 3*16 + 3*4], CTR
|
||
|
|
||
|
inc ctrCtx
|
||
|
mov CTR, ctrCtx
|
||
|
bswap CTR
|
||
|
xor CTR, [ctx + 3*4]
|
||
|
mov [esp + 4*16 + 3*4], CTR
|
||
|
|
||
|
inc ctrCtx
|
||
|
mov CTR, ctrCtx
|
||
|
bswap CTR
|
||
|
xor CTR, [ctx + 3*4]
|
||
|
mov [esp + 5*16 + 3*4], CTR
|
||
|
|
||
|
inc ctrCtx
|
||
|
mov CTR, ctrCtx
|
||
|
bswap CTR
|
||
|
xor CTR, [ctx + 3*4]
|
||
|
mov [esp + 6*16 + 3*4], CTR
|
||
|
|
||
|
|
||
|
loop7:
|
||
|
cmp inputLen, 7*16
|
||
|
jb loop1
|
||
|
|
||
|
movdqu xmm0, [0*16 + esp]
|
||
|
movdqu xmm1, [1*16 + esp]
|
||
|
movdqu xmm2, [2*16 + esp]
|
||
|
movdqu xmm3, [3*16 + esp]
|
||
|
movdqu xmm4, [4*16 + esp]
|
||
|
movdqu xmm5, [5*16 + esp]
|
||
|
movdqu xmm6, [6*16 + esp]
|
||
|
|
||
|
i = 1
|
||
|
WHILE i LE 7
|
||
|
aes_rnd i
|
||
|
|
||
|
inc ctrCtx
|
||
|
mov CTR, ctrCtx
|
||
|
bswap CTR
|
||
|
xor CTR, [ctx + 3*4]
|
||
|
mov [esp + (i-1)*16 + 3*4], CTR
|
||
|
|
||
|
i = i+1
|
||
|
ENDM
|
||
|
WHILE i LT rnds
|
||
|
aes_rnd i
|
||
|
i = i+1
|
||
|
ENDM
|
||
|
aes_last_rnd rnds
|
||
|
|
||
|
movdqu xmm7, [0*16 + input]
|
||
|
pxor xmm0, xmm7
|
||
|
movdqu xmm7, [1*16 + input]
|
||
|
pxor xmm1, xmm7
|
||
|
movdqu xmm7, [2*16 + input]
|
||
|
pxor xmm2, xmm7
|
||
|
movdqu xmm7, [3*16 + input]
|
||
|
pxor xmm3, xmm7
|
||
|
movdqu xmm7, [4*16 + input]
|
||
|
pxor xmm4, xmm7
|
||
|
movdqu xmm7, [5*16 + input]
|
||
|
pxor xmm5, xmm7
|
||
|
movdqu xmm7, [6*16 + input]
|
||
|
pxor xmm6, xmm7
|
||
|
|
||
|
movdqu [0*16 + output], xmm0
|
||
|
movdqu [1*16 + output], xmm1
|
||
|
movdqu [2*16 + output], xmm2
|
||
|
movdqu [3*16 + output], xmm3
|
||
|
movdqu [4*16 + output], xmm4
|
||
|
movdqu [5*16 + output], xmm5
|
||
|
movdqu [6*16 + output], xmm6
|
||
|
|
||
|
lea input, [7*16 + input]
|
||
|
lea output, [7*16 + output]
|
||
|
sub inputLen, 7*16
|
||
|
jmp loop7
|
||
|
|
||
|
|
||
|
loop1:
|
||
|
cmp inputLen, 1*16
|
||
|
jb bail
|
||
|
|
||
|
movdqu xmm0, [esp]
|
||
|
add esp, 16
|
||
|
|
||
|
i = 1
|
||
|
WHILE i LT rnds
|
||
|
movdqu xmm7, [i*16 + ctx]
|
||
|
aesenc xmm0, xmm7
|
||
|
i = i+1
|
||
|
ENDM
|
||
|
movdqu xmm7, [rnds*16 + ctx]
|
||
|
aesenclast xmm0, xmm7
|
||
|
|
||
|
movdqu xmm7, [input]
|
||
|
pxor xmm0, xmm7
|
||
|
movdqu [output], xmm0
|
||
|
|
||
|
lea input, [1*16 + input]
|
||
|
lea output, [1*16 + output]
|
||
|
sub inputLen, 1*16
|
||
|
jmp loop1
|
||
|
|
||
|
bail:
|
||
|
|
||
|
mov ctrCtx, [ebp + 4*5 + 0*4]
|
||
|
movdqu xmm0, [esp]
|
||
|
movdqu xmm1, [ctx + 0*16]
|
||
|
pxor xmm0, xmm1
|
||
|
movdqu [8+ctrCtx], xmm0
|
||
|
|
||
|
|
||
|
xor eax, eax
|
||
|
mov esp, ebp
|
||
|
pop ebp
|
||
|
pop CTR
|
||
|
pop ctrCtx
|
||
|
pop inputLen
|
||
|
ret
|
||
|
ENDM
|
||
|
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_ctr_128 PROC
|
||
|
gen_aes_ctr_func 10
|
||
|
intel_aes_encrypt_ctr_128 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_ctr_192 PROC
|
||
|
gen_aes_ctr_func 12
|
||
|
intel_aes_encrypt_ctr_192 ENDP
|
||
|
|
||
|
ALIGN 16
|
||
|
intel_aes_encrypt_ctr_256 PROC
|
||
|
gen_aes_ctr_func 14
|
||
|
intel_aes_encrypt_ctr_256 ENDP
|
||
|
|
||
|
|
||
|
END
|