Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(2)

Unified Diff: nss/lib/freebl/intel-aes-x64-masm.asm

Issue 1155223003: Uprev NSS from 3.18.0 RTM to 3.19.0 RTM (Closed) Base URL: http://src.chromium.org/svn/trunk/deps/third_party/nss
Patch Set: *cough* Created 5 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « nss/lib/ckfw/builtins/nssckbi.h ('k') | nss/lib/freebl/intel-gcm.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: nss/lib/freebl/intel-aes-x64-masm.asm
diff --git a/nss/lib/freebl/intel-aes-x86-masm.asm b/nss/lib/freebl/intel-aes-x64-masm.asm
similarity index 62%
copy from nss/lib/freebl/intel-aes-x86-masm.asm
copy to nss/lib/freebl/intel-aes-x64-masm.asm
index 7d805e7660f15d20f89911424dc83dbb7d906dca..ef5c76ba28370882583003116b9aeeb3505e256d 100644
--- a/nss/lib/freebl/intel-aes-x86-masm.asm
+++ b/nss/lib/freebl/intel-aes-x64-masm.asm
@@ -10,9 +10,6 @@
; Please send feedback directly to crypto.feedback.alias@intel.com
-.MODEL FLAT, C
-.XMM
-
.DATA
ALIGN 16
Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
@@ -23,74 +20,81 @@ Lcon2 dd 1bh,1bh,1bh,1bh
.CODE
-ctx textequ <ecx>
-output textequ <edx>
-input textequ <eax>
-inputLen textequ <edi>
+ctx textequ <rcx>
+output textequ <rdx>
+input textequ <r8>
+inputLen textequ <r9d>
aes_rnd MACRO i
- movdqu xmm7, [i*16 + ctx]
- aesenc xmm0, xmm7
- aesenc xmm1, xmm7
- aesenc xmm2, xmm7
- aesenc xmm3, xmm7
- aesenc xmm4, xmm7
- aesenc xmm5, xmm7
- aesenc xmm6, xmm7
+ movdqu xmm8, [i*16 + ctx]
+ aesenc xmm0, xmm8
+ aesenc xmm1, xmm8
+ aesenc xmm2, xmm8
+ aesenc xmm3, xmm8
+ aesenc xmm4, xmm8
+ aesenc xmm5, xmm8
+ aesenc xmm6, xmm8
+ aesenc xmm7, xmm8
ENDM
aes_last_rnd MACRO i
- movdqu xmm7, [i*16 + ctx]
- aesenclast xmm0, xmm7
- aesenclast xmm1, xmm7
- aesenclast xmm2, xmm7
- aesenclast xmm3, xmm7
- aesenclast xmm4, xmm7
- aesenclast xmm5, xmm7
- aesenclast xmm6, xmm7
+ movdqu xmm8, [i*16 + ctx]
+ aesenclast xmm0, xmm8
+ aesenclast xmm1, xmm8
+ aesenclast xmm2, xmm8
+ aesenclast xmm3, xmm8
+ aesenclast xmm4, xmm8
+ aesenclast xmm5, xmm8
+ aesenclast xmm6, xmm8
+ aesenclast xmm7, xmm8
ENDM
aes_dec_rnd MACRO i
- movdqu xmm7, [i*16 + ctx]
- aesdec xmm0, xmm7
- aesdec xmm1, xmm7
- aesdec xmm2, xmm7
- aesdec xmm3, xmm7
- aesdec xmm4, xmm7
- aesdec xmm5, xmm7
- aesdec xmm6, xmm7
+ movdqu xmm8, [i*16 + ctx]
+ aesdec xmm0, xmm8
+ aesdec xmm1, xmm8
+ aesdec xmm2, xmm8
+ aesdec xmm3, xmm8
+ aesdec xmm4, xmm8
+ aesdec xmm5, xmm8
+ aesdec xmm6, xmm8
+ aesdec xmm7, xmm8
ENDM
aes_dec_last_rnd MACRO i
- movdqu xmm7, [i*16 + ctx]
- aesdeclast xmm0, xmm7
- aesdeclast xmm1, xmm7
- aesdeclast xmm2, xmm7
- aesdeclast xmm3, xmm7
- aesdeclast xmm4, xmm7
- aesdeclast xmm5, xmm7
- aesdeclast xmm6, xmm7
+ movdqu xmm8, [i*16 + ctx]
+ aesdeclast xmm0, xmm8
+ aesdeclast xmm1, xmm8
+ aesdeclast xmm2, xmm8
+ aesdeclast xmm3, xmm8
+ aesdeclast xmm4, xmm8
+ aesdeclast xmm5, xmm8
+ aesdeclast xmm6, xmm8
+ aesdeclast xmm7, xmm8
ENDM
gen_aes_ecb_func MACRO enc, rnds
-LOCAL loop7
+LOCAL loop8
LOCAL loop1
LOCAL bail
- push inputLen
+ xor inputLen, inputLen
+ mov input, [rsp + 1*8 + 8*4]
+ mov inputLen, [rsp + 1*8 + 8*5]
+
+ sub rsp, 3*16
- mov ctx, [esp + 2*4 + 0*4]
- mov output, [esp + 2*4 + 1*4]
- mov input, [esp + 2*4 + 4*4]
- mov inputLen, [esp + 2*4 + 5*4]
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
- lea ctx, [44+ctx]
+ lea ctx, [48+ctx]
-loop7:
- cmp inputLen, 7*16
+loop8:
+ cmp inputLen, 8*16
jb loop1
movdqu xmm0, [0*16 + input]
@@ -100,15 +104,17 @@ loop7:
movdqu xmm4, [4*16 + input]
movdqu xmm5, [5*16 + input]
movdqu xmm6, [6*16 + input]
-
- movdqu xmm7, [0*16 + ctx]
- pxor xmm0, xmm7
- pxor xmm1, xmm7
- pxor xmm2, xmm7
- pxor xmm3, xmm7
- pxor xmm4, xmm7
- pxor xmm5, xmm7
- pxor xmm6, xmm7
+ movdqu xmm7, [7*16 + input]
+
+ movdqu xmm8, [0*16 + ctx]
+ pxor xmm0, xmm8
+ pxor xmm1, xmm8
+ pxor xmm2, xmm8
+ pxor xmm3, xmm8
+ pxor xmm4, xmm8
+ pxor xmm5, xmm8
+ pxor xmm6, xmm8
+ pxor xmm7, xmm8
IF enc eq 1
rnd textequ <aes_rnd>
@@ -136,11 +142,12 @@ ENDIF
movdqu [4*16 + output], xmm4
movdqu [5*16 + output], xmm5
movdqu [6*16 + output], xmm6
+ movdqu [7*16 + output], xmm7
- lea input, [7*16 + input]
- lea output, [7*16 + output]
- sub inputLen, 7*16
- jmp loop7
+ lea input, [8*16 + input]
+ lea output, [8*16 + output]
+ sub inputLen, 8*16
+ jmp loop8
loop1:
cmp inputLen, 1*16
@@ -167,54 +174,46 @@ loop1:
jmp loop1
bail:
- xor eax, eax
- pop inputLen
- ret
+ xor rax, rax
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
+ ret
ENDM
-ALIGN 16
intel_aes_encrypt_ecb_128 PROC
gen_aes_ecb_func 1, 10
intel_aes_encrypt_ecb_128 ENDP
-ALIGN 16
intel_aes_encrypt_ecb_192 PROC
gen_aes_ecb_func 1, 12
intel_aes_encrypt_ecb_192 ENDP
-ALIGN 16
intel_aes_encrypt_ecb_256 PROC
gen_aes_ecb_func 1, 14
intel_aes_encrypt_ecb_256 ENDP
-ALIGN 16
intel_aes_decrypt_ecb_128 PROC
gen_aes_ecb_func 0, 10
intel_aes_decrypt_ecb_128 ENDP
-ALIGN 16
intel_aes_decrypt_ecb_192 PROC
gen_aes_ecb_func 0, 12
intel_aes_decrypt_ecb_192 ENDP
-ALIGN 16
intel_aes_decrypt_ecb_256 PROC
gen_aes_ecb_func 0, 14
intel_aes_decrypt_ecb_256 ENDP
-KEY textequ <ecx>
-KS textequ <edx>
-ITR textequ <eax>
+KEY textequ <rcx>
+KS textequ <rdx>
+ITR textequ <r8>
-ALIGN 16
intel_aes_encrypt_init_128 PROC
- mov KEY, [esp + 1*4 + 0*4]
- mov KS, [esp + 1*4 + 1*4]
-
-
movdqu xmm1, [KEY]
movdqu [KS], xmm1
movdqa xmm2, xmm1
@@ -280,12 +279,8 @@ Lenc_128_ks_loop:
intel_aes_encrypt_init_128 ENDP
-ALIGN 16
intel_aes_decrypt_init_128 PROC
- mov KEY, [esp + 1*4 + 0*4]
- mov KS, [esp + 1*4 + 1*4]
-
push KS
push KEY
@@ -320,16 +315,15 @@ intel_aes_decrypt_init_128 PROC
intel_aes_decrypt_init_128 ENDP
-ALIGN 16
intel_aes_encrypt_init_192 PROC
- mov KEY, [esp + 1*4 + 0*4]
- mov KS, [esp + 1*4 + 1*4]
+ sub rsp, 16*2
+ movdqu [16*0 + rsp], xmm6
+ movdqu [16*1 + rsp], xmm7
- pxor xmm3, xmm3
movdqu xmm1, [KEY]
- pinsrd xmm3, DWORD PTR [16 + KEY], 0
- pinsrd xmm3, DWORD PTR [20 + KEY], 1
+ mov ITR, [16 + KEY]
+ movd xmm3, ITR
movdqu [KS], xmm1
movdqa xmm5, xmm3
@@ -396,14 +390,14 @@ Lenc_192_ks_loop:
jnz Lenc_192_ks_loop
movdqu [16 + KS], xmm5
-ret
+
+ movdqu xmm7, [16*1 + rsp]
+ movdqu xmm6, [16*0 + rsp]
+ add rsp, 16*2
+ ret
intel_aes_encrypt_init_192 ENDP
-ALIGN 16
intel_aes_decrypt_init_192 PROC
- mov KEY, [esp + 1*4 + 0*4]
- mov KS, [esp + 1*4 + 1*4]
-
push KS
push KEY
@@ -437,11 +431,12 @@ intel_aes_decrypt_init_192 PROC
ret
intel_aes_decrypt_init_192 ENDP
-ALIGN 16
+
intel_aes_encrypt_init_256 PROC
+ sub rsp, 16*2
+ movdqu [16*0 + rsp], xmm6
+ movdqu [16*1 + rsp], xmm7
- mov KEY, [esp + 1*4 + 0*4]
- mov KS, [esp + 1*4 + 1*4]
movdqu xmm1, [16*0 + KEY]
movdqu xmm3, [16*1 + KEY]
@@ -502,14 +497,15 @@ Lenc_256_ks_loop:
pxor xmm1, xmm2
movdqu [16*2 + KS], xmm1
+ movdqu xmm7, [16*1 + rsp]
+ movdqu xmm6, [16*0 + rsp]
+ add rsp, 16*2
ret
+
intel_aes_encrypt_init_256 ENDP
-ALIGN 16
-intel_aes_decrypt_init_256 PROC
- mov KEY, [esp + 1*4 + 0*4]
- mov KS, [esp + 1*4 + 1*4]
+intel_aes_decrypt_init_256 PROC
push KS
push KEY
@@ -550,14 +546,16 @@ gen_aes_cbc_enc_func MACRO rnds
LOCAL loop1
LOCAL bail
- push inputLen
+ mov input, [rsp + 1*8 + 8*4]
+ mov inputLen, [rsp + 1*8 + 8*5]
- mov ctx, [esp + 2*4 + 0*4]
- mov output, [esp + 2*4 + 1*4]
- mov input, [esp + 2*4 + 4*4]
- mov inputLen, [esp + 2*4 + 5*4]
+ sub rsp, 3*16
- lea ctx, [44+ctx]
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
+
+ lea ctx, [48+ctx]
movdqu xmm0, [-32+ctx]
@@ -566,6 +564,7 @@ LOCAL bail
movdqu xmm4, [2*16 + ctx]
movdqu xmm5, [3*16 + ctx]
movdqu xmm6, [4*16 + ctx]
+ movdqu xmm7, [5*16 + ctx]
loop1:
cmp inputLen, 1*16
@@ -579,15 +578,16 @@ loop1:
aesenc xmm0, xmm4
aesenc xmm0, xmm5
aesenc xmm0, xmm6
+ aesenc xmm0, xmm7
- i = 5
+ i = 6
WHILE i LT rnds
- movdqu xmm7, [i*16 + ctx]
- aesenc xmm0, xmm7
+ movdqu xmm8, [i*16 + ctx]
+ aesenc xmm0, xmm8
i = i+1
ENDM
- movdqu xmm7, [rnds*16 + ctx]
- aesenclast xmm0, xmm7
+ movdqu xmm8, [rnds*16 + ctx]
+ aesenclast xmm0, xmm8
movdqu [output], xmm0
@@ -599,30 +599,36 @@ loop1:
bail:
movdqu [-32+ctx], xmm0
- xor eax, eax
- pop inputLen
+ xor rax, rax
+
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
ret
ENDM
gen_aes_cbc_dec_func MACRO rnds
-LOCAL loop7
+LOCAL loop8
LOCAL loop1
LOCAL dec1
LOCAL bail
- push inputLen
+ mov input, [rsp + 1*8 + 8*4]
+ mov inputLen, [rsp + 1*8 + 8*5]
- mov ctx, [esp + 2*4 + 0*4]
- mov output, [esp + 2*4 + 1*4]
- mov input, [esp + 2*4 + 4*4]
- mov inputLen, [esp + 2*4 + 5*4]
+ sub rsp, 3*16
- lea ctx, [44+ctx]
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
-loop7:
- cmp inputLen, 7*16
+ lea ctx, [48+ctx]
+
+loop8:
+ cmp inputLen, 8*16
jb dec1
movdqu xmm0, [0*16 + input]
@@ -632,15 +638,17 @@ loop7:
movdqu xmm4, [4*16 + input]
movdqu xmm5, [5*16 + input]
movdqu xmm6, [6*16 + input]
-
- movdqu xmm7, [0*16 + ctx]
- pxor xmm0, xmm7
- pxor xmm1, xmm7
- pxor xmm2, xmm7
- pxor xmm3, xmm7
- pxor xmm4, xmm7
- pxor xmm5, xmm7
- pxor xmm6, xmm7
+ movdqu xmm7, [7*16 + input]
+
+ movdqu xmm8, [0*16 + ctx]
+ pxor xmm0, xmm8
+ pxor xmm1, xmm8
+ pxor xmm2, xmm8
+ pxor xmm3, xmm8
+ pxor xmm4, xmm8
+ pxor xmm5, xmm8
+ pxor xmm6, xmm8
+ pxor xmm7, xmm8
i = 1
WHILE i LT rnds
@@ -649,21 +657,23 @@ loop7:
ENDM
aes_dec_last_rnd rnds
- movdqu xmm7, [-32 + ctx]
- pxor xmm0, xmm7
- movdqu xmm7, [0*16 + input]
- pxor xmm1, xmm7
- movdqu xmm7, [1*16 + input]
- pxor xmm2, xmm7
- movdqu xmm7, [2*16 + input]
- pxor xmm3, xmm7
- movdqu xmm7, [3*16 + input]
- pxor xmm4, xmm7
- movdqu xmm7, [4*16 + input]
- pxor xmm5, xmm7
- movdqu xmm7, [5*16 + input]
- pxor xmm6, xmm7
- movdqu xmm7, [6*16 + input]
+ movdqu xmm8, [-32 + ctx]
+ pxor xmm0, xmm8
+ movdqu xmm8, [0*16 + input]
+ pxor xmm1, xmm8
+ movdqu xmm8, [1*16 + input]
+ pxor xmm2, xmm8
+ movdqu xmm8, [2*16 + input]
+ pxor xmm3, xmm8
+ movdqu xmm8, [3*16 + input]
+ pxor xmm4, xmm8
+ movdqu xmm8, [4*16 + input]
+ pxor xmm5, xmm8
+ movdqu xmm8, [5*16 + input]
+ pxor xmm6, xmm8
+ movdqu xmm8, [6*16 + input]
+ pxor xmm7, xmm8
+ movdqu xmm8, [7*16 + input]
movdqu [0*16 + output], xmm0
movdqu [1*16 + output], xmm1
@@ -672,12 +682,13 @@ loop7:
movdqu [4*16 + output], xmm4
movdqu [5*16 + output], xmm5
movdqu [6*16 + output], xmm6
- movdqu [-32 + ctx], xmm7
+ movdqu [7*16 + output], xmm7
+ movdqu [-32 + ctx], xmm8
- lea input, [7*16 + input]
- lea output, [7*16 + output]
- sub inputLen, 7*16
- jmp loop7
+ lea input, [8*16 + input]
+ lea output, [8*16 + output]
+ sub inputLen, 8*16
+ jmp loop8
dec1:
movdqu xmm3, [-32 + ctx]
@@ -711,143 +722,152 @@ loop1:
bail:
movdqu [-32 + ctx], xmm3
- xor eax, eax
- pop inputLen
+ xor rax, rax
+
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
ret
ENDM
-ALIGN 16
intel_aes_encrypt_cbc_128 PROC
gen_aes_cbc_enc_func 10
intel_aes_encrypt_cbc_128 ENDP
-ALIGN 16
intel_aes_encrypt_cbc_192 PROC
gen_aes_cbc_enc_func 12
intel_aes_encrypt_cbc_192 ENDP
-ALIGN 16
intel_aes_encrypt_cbc_256 PROC
gen_aes_cbc_enc_func 14
intel_aes_encrypt_cbc_256 ENDP
-ALIGN 16
intel_aes_decrypt_cbc_128 PROC
gen_aes_cbc_dec_func 10
intel_aes_decrypt_cbc_128 ENDP
-ALIGN 16
intel_aes_decrypt_cbc_192 PROC
gen_aes_cbc_dec_func 12
intel_aes_decrypt_cbc_192 ENDP
-ALIGN 16
intel_aes_decrypt_cbc_256 PROC
gen_aes_cbc_dec_func 14
intel_aes_decrypt_cbc_256 ENDP
-ctrCtx textequ <esi>
-CTR textequ <ebx>
+ctrCtx textequ <r10>
+CTR textequ <r11d>
+CTRSave textequ <eax>
gen_aes_ctr_func MACRO rnds
-LOCAL loop7
+LOCAL loop8
LOCAL loop1
LOCAL enc1
LOCAL bail
- push inputLen
- push ctrCtx
- push CTR
- push ebp
+ mov input, [rsp + 8*1 + 4*8]
+ mov inputLen, [rsp + 8*1 + 5*8]
+
+ mov ctrCtx, ctx
+ mov ctx, [8+ctrCtx]
+ lea ctx, [48+ctx]
- mov ctrCtx, [esp + 4*5 + 0*4]
- mov output, [esp + 4*5 + 1*4]
- mov input, [esp + 4*5 + 4*4]
- mov inputLen, [esp + 4*5 + 5*4]
+ sub rsp, 3*16
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
- mov ctx, [4+ctrCtx]
- lea ctx, [44+ctx]
- mov ebp, esp
- sub esp, 7*16
- and esp, -16
+ push rbp
+ mov rbp, rsp
+ sub rsp, 8*16
+ and rsp, -16
- movdqu xmm0, [8+ctrCtx]
- mov ctrCtx, [ctrCtx + 8 + 3*4]
- bswap ctrCtx
+
+ movdqu xmm0, [16+ctrCtx]
+ mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4]
+ bswap CTRSave
movdqu xmm1, [ctx + 0*16]
pxor xmm0, xmm1
- movdqa [esp + 0*16], xmm0
- movdqa [esp + 1*16], xmm0
- movdqa [esp + 2*16], xmm0
- movdqa [esp + 3*16], xmm0
- movdqa [esp + 4*16], xmm0
- movdqa [esp + 5*16], xmm0
- movdqa [esp + 6*16], xmm0
+ movdqa [rsp + 0*16], xmm0
+ movdqa [rsp + 1*16], xmm0
+ movdqa [rsp + 2*16], xmm0
+ movdqa [rsp + 3*16], xmm0
+ movdqa [rsp + 4*16], xmm0
+ movdqa [rsp + 5*16], xmm0
+ movdqa [rsp + 6*16], xmm0
+ movdqa [rsp + 7*16], xmm0
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 1*16 + 3*4], CTR
- inc ctrCtx
- mov CTR, ctrCtx
+ inc CTRSave
+ mov CTR, CTRSave
bswap CTR
- xor CTR, [ctx + 3*4]
- mov [esp + 1*16 + 3*4], CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 2*16 + 3*4], CTR
- inc ctrCtx
- mov CTR, ctrCtx
+ inc CTRSave
+ mov CTR, CTRSave
bswap CTR
- xor CTR, [ctx + 3*4]
- mov [esp + 2*16 + 3*4], CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 3*16 + 3*4], CTR
- inc ctrCtx
- mov CTR, ctrCtx
+ inc CTRSave
+ mov CTR, CTRSave
bswap CTR
- xor CTR, [ctx + 3*4]
- mov [esp + 3*16 + 3*4], CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 4*16 + 3*4], CTR
- inc ctrCtx
- mov CTR, ctrCtx
+ inc CTRSave
+ mov CTR, CTRSave
bswap CTR
- xor CTR, [ctx + 3*4]
- mov [esp + 4*16 + 3*4], CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 5*16 + 3*4], CTR
- inc ctrCtx
- mov CTR, ctrCtx
+ inc CTRSave
+ mov CTR, CTRSave
bswap CTR
- xor CTR, [ctx + 3*4]
- mov [esp + 5*16 + 3*4], CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 6*16 + 3*4], CTR
- inc ctrCtx
- mov CTR, ctrCtx
+ inc CTRSave
+ mov CTR, CTRSave
bswap CTR
- xor CTR, [ctx + 3*4]
- mov [esp + 6*16 + 3*4], CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 7*16 + 3*4], CTR
-loop7:
- cmp inputLen, 7*16
+loop8:
+ cmp inputLen, 8*16
jb loop1
- movdqu xmm0, [0*16 + esp]
- movdqu xmm1, [1*16 + esp]
- movdqu xmm2, [2*16 + esp]
- movdqu xmm3, [3*16 + esp]
- movdqu xmm4, [4*16 + esp]
- movdqu xmm5, [5*16 + esp]
- movdqu xmm6, [6*16 + esp]
+ movdqu xmm0, [0*16 + rsp]
+ movdqu xmm1, [1*16 + rsp]
+ movdqu xmm2, [2*16 + rsp]
+ movdqu xmm3, [3*16 + rsp]
+ movdqu xmm4, [4*16 + rsp]
+ movdqu xmm5, [5*16 + rsp]
+ movdqu xmm6, [6*16 + rsp]
+ movdqu xmm7, [7*16 + rsp]
i = 1
- WHILE i LE 7
+ WHILE i LE 8
aes_rnd i
- inc ctrCtx
- mov CTR, ctrCtx
+ inc CTRSave
+ mov CTR, CTRSave
bswap CTR
- xor CTR, [ctx + 3*4]
- mov [esp + (i-1)*16 + 3*4], CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR
i = i+1
ENDM
@@ -857,20 +877,22 @@ loop7:
ENDM
aes_last_rnd rnds
- movdqu xmm7, [0*16 + input]
- pxor xmm0, xmm7
- movdqu xmm7, [1*16 + input]
- pxor xmm1, xmm7
- movdqu xmm7, [2*16 + input]
- pxor xmm2, xmm7
- movdqu xmm7, [3*16 + input]
- pxor xmm3, xmm7
- movdqu xmm7, [4*16 + input]
- pxor xmm4, xmm7
- movdqu xmm7, [5*16 + input]
- pxor xmm5, xmm7
- movdqu xmm7, [6*16 + input]
- pxor xmm6, xmm7
+ movdqu xmm8, [0*16 + input]
+ pxor xmm0, xmm8
+ movdqu xmm8, [1*16 + input]
+ pxor xmm1, xmm8
+ movdqu xmm8, [2*16 + input]
+ pxor xmm2, xmm8
+ movdqu xmm8, [3*16 + input]
+ pxor xmm3, xmm8
+ movdqu xmm8, [4*16 + input]
+ pxor xmm4, xmm8
+ movdqu xmm8, [5*16 + input]
+ pxor xmm5, xmm8
+ movdqu xmm8, [6*16 + input]
+ pxor xmm6, xmm8
+ movdqu xmm8, [7*16 + input]
+ pxor xmm7, xmm8
movdqu [0*16 + output], xmm0
movdqu [1*16 + output], xmm1
@@ -879,19 +901,20 @@ loop7:
movdqu [4*16 + output], xmm4
movdqu [5*16 + output], xmm5
movdqu [6*16 + output], xmm6
+ movdqu [7*16 + output], xmm7
- lea input, [7*16 + input]
- lea output, [7*16 + output]
- sub inputLen, 7*16
- jmp loop7
+ lea input, [8*16 + input]
+ lea output, [8*16 + output]
+ sub inputLen, 8*16
+ jmp loop8
loop1:
cmp inputLen, 1*16
jb bail
- movdqu xmm0, [esp]
- add esp, 16
+ movdqu xmm0, [rsp]
+ add rsp, 16
i = 1
WHILE i LT rnds
@@ -913,34 +936,33 @@ loop1:
bail:
- mov ctrCtx, [ebp + 4*5 + 0*4]
- movdqu xmm0, [esp]
+ movdqu xmm0, [rsp]
movdqu xmm1, [ctx + 0*16]
pxor xmm0, xmm1
- movdqu [8+ctrCtx], xmm0
+ movdqu [16+ctrCtx], xmm0
+
+
+ xor rax, rax
+ mov rsp, rbp
+ pop rbp
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
- xor eax, eax
- mov esp, ebp
- pop ebp
- pop CTR
- pop ctrCtx
- pop inputLen
ret
ENDM
-ALIGN 16
intel_aes_encrypt_ctr_128 PROC
gen_aes_ctr_func 10
intel_aes_encrypt_ctr_128 ENDP
-ALIGN 16
intel_aes_encrypt_ctr_192 PROC
gen_aes_ctr_func 12
intel_aes_encrypt_ctr_192 ENDP
-ALIGN 16
intel_aes_encrypt_ctr_256 PROC
gen_aes_ctr_func 14
intel_aes_encrypt_ctr_256 ENDP
« no previous file with comments | « nss/lib/ckfw/builtins/nssckbi.h ('k') | nss/lib/freebl/intel-gcm.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698