Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(302)

Side by Side Diff: openssl/crypto/md5/asm/md5-x86_64.pl

Issue 9254031: Upgrade chrome's OpenSSL to same version Android ships with. (Closed) Base URL: http://src.chromium.org/svn/trunk/deps/third_party/openssl/
Patch Set: '' Created 8 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « openssl/crypto/md5/asm/md5-ia64.S ('k') | openssl/crypto/md5/md5.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 #!/usr/bin/perl -w 1 #!/usr/bin/perl -w
2 # 2 #
3 # MD5 optimized for AMD64. 3 # MD5 optimized for AMD64.
4 # 4 #
5 # Author: Marc Bevand <bevand_m (at) epita.fr> 5 # Author: Marc Bevand <bevand_m (at) epita.fr>
6 # Licence: I hereby disclaim the copyright on this code and place it 6 # Licence: I hereby disclaim the copyright on this code and place it
7 # in the public domain. 7 # in the public domain.
8 # 8 #
9 9
10 use strict; 10 use strict;
11 11
12 my $code; 12 my $code;
13 13
14 # round1_step() does: 14 # round1_step() does:
15 # dst = x + ((dst + F(x,y,z) + X[k] + T_i) <<< s) 15 # dst = x + ((dst + F(x,y,z) + X[k] + T_i) <<< s)
16 # %r10d = X[k_next] 16 # %r10d = X[k_next]
17 # %r11d = z' (copy of z for the next step) 17 # %r11d = z' (copy of z for the next step)
18 # Each round1_step() takes about 5.71 clocks (9 instructions, 1.58 IPC) 18 # Each round1_step() takes about 5.3 clocks (9 instructions, 1.7 IPC)
19 sub round1_step 19 sub round1_step
20 { 20 {
21 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 21 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
22 $T_i = unpack("l",pack("l", hex($T_i))); # convert to 32-bit signed decimal
23 $code .= " mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */\n " if ($pos == -1); 22 $code .= " mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */\n " if ($pos == -1);
24 $code .= " mov %edx, %r11d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1); 23 $code .= " mov %edx, %r11d /* (NEXT STEP) z' = %edx */\n" if ($pos == -1);
25 $code .= <<EOF; 24 $code .= <<EOF;
26 xor $y, %r11d /* y ^ ... */ 25 xor $y, %r11d /* y ^ ... */
27 lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ 26 lea $T_i($dst,%r10d),$dst /* Const + dst + ... */
28 and $x, %r11d /* x & ... */ 27 and $x, %r11d /* x & ... */
29 xor $z, %r11d /* z ^ ... */ 28 xor $z, %r11d /* z ^ ... */
30 mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ 29 mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */
31 add %r11d, $dst /* dst += ... */ 30 add %r11d, $dst /* dst += ... */
32 rol \$$s, $dst /* dst <<< s */ 31 rol \$$s, $dst /* dst <<< s */
33 mov $y, %r11d /* (NEXT STEP) z' = $y */ 32 mov $y, %r11d /* (NEXT STEP) z' = $y */
34 add $x, $dst /* dst += x */ 33 add $x, $dst /* dst += x */
35 EOF 34 EOF
36 } 35 }
37 36
38 # round2_step() does: 37 # round2_step() does:
39 # dst = x + ((dst + G(x,y,z) + X[k] + T_i) <<< s) 38 # dst = x + ((dst + G(x,y,z) + X[k] + T_i) <<< s)
40 # %r10d = X[k_next] 39 # %r10d = X[k_next]
41 # %r11d = y' (copy of y for the next step) 40 # %r11d = z' (copy of z for the next step)
42 # Each round2_step() takes about 6.22 clocks (9 instructions, 1.45 IPC) 41 # %r12d = z' (copy of z for the next step)
42 # Each round2_step() takes about 5.4 clocks (11 instructions, 2.0 IPC)
43 sub round2_step 43 sub round2_step
44 { 44 {
45 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 45 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
46 $T_i = unpack("l",pack("l", hex($T_i))); # convert to 32-bit signed decimal
47 $code .= " mov 1*4(%rsi), %r10d /* (NEXT STEP) X[1] */\n " if ($pos == -1); 46 $code .= " mov 1*4(%rsi), %r10d /* (NEXT STEP) X[1] */\n " if ($pos == -1);
48 $code .= " mov» %ecx,» » %r11d» » /* (NEXT STEP) y' = %ecx */\n" if ($pos == -1); 47 $code .= " mov» %edx,» » %r11d» » /* (NEXT STEP) z' = %edx */\n" if ($pos == -1);
48 $code .= " mov» %edx,» » %r12d» » /* (NEXT STEP) z' = %edx */\n" if ($pos == -1);
49 $code .= <<EOF; 49 $code .= <<EOF;
50 » xor» $x,» » %r11d» » /* x ^ ... */ 50 » not» %r11d» » » » /* not z */
51 lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ 51 lea $T_i($dst,%r10d),$dst /* Const + dst + ... */
52 » and» $z,» » %r11d» » /* z & ... */ 52 » and» $x,» » %r12d» » /* x & z */
53 » xor» $y,» » %r11d» » /* y ^ ... */ 53 » and» $y,» » %r11d» » /* y & (not z) */
54 mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ 54 mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */
55 » add» %r11d,» » $dst» » /* dst += ... */ 55 » or» %r11d,» » %r12d» » /* (y & (not z)) | (x & z) */
56 » mov» $y,» » %r11d» » /* (NEXT STEP) z' = $y */
57 » add» %r12d,» » $dst» » /* dst += ... */
58 » mov» $y,» » %r12d» » /* (NEXT STEP) z' = $y */
56 rol \$$s, $dst /* dst <<< s */ 59 rol \$$s, $dst /* dst <<< s */
57 mov $x, %r11d /* (NEXT STEP) y' = $x */
58 add $x, $dst /* dst += x */ 60 add $x, $dst /* dst += x */
59 EOF 61 EOF
60 } 62 }
61 63
62 # round3_step() does: 64 # round3_step() does:
63 # dst = x + ((dst + H(x,y,z) + X[k] + T_i) <<< s) 65 # dst = x + ((dst + H(x,y,z) + X[k] + T_i) <<< s)
64 # %r10d = X[k_next] 66 # %r10d = X[k_next]
65 # %r11d = y' (copy of y for the next step) 67 # %r11d = y' (copy of y for the next step)
66 # Each round3_step() takes about 4.26 clocks (8 instructions, 1.88 IPC) 68 # Each round3_step() takes about 4.2 clocks (8 instructions, 1.9 IPC)
67 sub round3_step 69 sub round3_step
68 { 70 {
69 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 71 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
70 $T_i = unpack("l",pack("l", hex($T_i))); # convert to 32-bit signed decimal
71 $code .= " mov 5*4(%rsi), %r10d /* (NEXT STEP) X[5] */\n " if ($pos == -1); 72 $code .= " mov 5*4(%rsi), %r10d /* (NEXT STEP) X[5] */\n " if ($pos == -1);
72 $code .= " mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */\n" if ($pos == -1); 73 $code .= " mov %ecx, %r11d /* (NEXT STEP) y' = %ecx */\n" if ($pos == -1);
73 $code .= <<EOF; 74 $code .= <<EOF;
74 lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ 75 lea $T_i($dst,%r10d),$dst /* Const + dst + ... */
75 mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ 76 mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */
76 xor $z, %r11d /* z ^ ... */ 77 xor $z, %r11d /* z ^ ... */
77 xor $x, %r11d /* x ^ ... */ 78 xor $x, %r11d /* x ^ ... */
78 add %r11d, $dst /* dst += ... */ 79 add %r11d, $dst /* dst += ... */
79 rol \$$s, $dst /* dst <<< s */ 80 rol \$$s, $dst /* dst <<< s */
80 mov $x, %r11d /* (NEXT STEP) y' = $x */ 81 mov $x, %r11d /* (NEXT STEP) y' = $x */
81 add $x, $dst /* dst += x */ 82 add $x, $dst /* dst += x */
82 EOF 83 EOF
83 } 84 }
84 85
85 # round4_step() does: 86 # round4_step() does:
86 # dst = x + ((dst + I(x,y,z) + X[k] + T_i) <<< s) 87 # dst = x + ((dst + I(x,y,z) + X[k] + T_i) <<< s)
87 # %r10d = X[k_next] 88 # %r10d = X[k_next]
88 # %r11d = not z' (copy of not z for the next step) 89 # %r11d = not z' (copy of not z for the next step)
89 # Each round4_step() takes about 5.27 clocks (9 instructions, 1.71 IPC) 90 # Each round4_step() takes about 5.2 clocks (9 instructions, 1.7 IPC)
90 sub round4_step 91 sub round4_step
91 { 92 {
92 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_; 93 my ($pos, $dst, $x, $y, $z, $k_next, $T_i, $s) = @_;
93 $T_i = unpack("l",pack("l", hex($T_i))); # convert to 32-bit signed decimal
94 $code .= " mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */\n " if ($pos == -1); 94 $code .= " mov 0*4(%rsi), %r10d /* (NEXT STEP) X[0] */\n " if ($pos == -1);
95 $code .= " mov \$0xffffffff, %r11d\n" if ($pos == -1); 95 $code .= " mov \$0xffffffff, %r11d\n" if ($pos == -1);
96 $code .= " xor %edx, %r11d /* (NEXT STEP) not z' = not %edx*/\n" 96 $code .= " xor %edx, %r11d /* (NEXT STEP) not z' = not %edx*/\n"
97 if ($pos == -1); 97 if ($pos == -1);
98 $code .= <<EOF; 98 $code .= <<EOF;
99 lea $T_i($dst,%r10d),$dst /* Const + dst + ... */ 99 lea $T_i($dst,%r10d),$dst /* Const + dst + ... */
100 or $x, %r11d /* x | ... */ 100 or $x, %r11d /* x | ... */
101 xor $y, %r11d /* y ^ ... */ 101 xor $y, %r11d /* y ^ ... */
102 add %r11d, $dst /* dst += ... */ 102 add %r11d, $dst /* dst += ... */
103 mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */ 103 mov $k_next*4(%rsi),%r10d /* (NEXT STEP) X[$k_next] */
104 mov \$0xffffffff, %r11d 104 mov \$0xffffffff, %r11d
105 rol \$$s, $dst /* dst <<< s */ 105 rol \$$s, $dst /* dst <<< s */
106 xor $y, %r11d /* (NEXT STEP) not z' = not $y * / 106 xor $y, %r11d /* (NEXT STEP) not z' = not $y * /
107 add $x, $dst /* dst += x */ 107 add $x, $dst /* dst += x */
108 EOF 108 EOF
109 } 109 }
110 110
111 my $output = shift; 111 my $flavour = shift;
112 open STDOUT,"| $^X ../perlasm/x86_64-xlate.pl $output"; 112 my $output = shift;
113 if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
114
115 my $win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
116
117 $0 =~ m/(.*[\/\\])[^\/\\]+$/; my $dir=$1; my $xlate;
118 ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
119 ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
120 die "can't locate x86_64-xlate.pl";
121
122 no warnings qw(uninitialized);
123 open STDOUT,"| $^X $xlate $flavour $output";
113 124
114 $code .= <<EOF; 125 $code .= <<EOF;
115 .text 126 .text
116 .align 16 127 .align 16
117 128
118 .globl md5_block_asm_data_order 129 .globl md5_block_asm_data_order
119 .type md5_block_asm_data_order,\@function,3 130 .type md5_block_asm_data_order,\@function,3
120 md5_block_asm_data_order: 131 md5_block_asm_data_order:
121 push %rbp 132 push %rbp
122 push %rbx 133 push %rbx
134 push %r12
123 push %r14 135 push %r14
124 push %r15 136 push %r15
137 .Lprologue:
125 138
126 # rdi = arg #1 (ctx, MD5_CTX pointer) 139 # rdi = arg #1 (ctx, MD5_CTX pointer)
127 # rsi = arg #2 (ptr, data pointer) 140 # rsi = arg #2 (ptr, data pointer)
128 # rdx = arg #3 (nbr, number of 16-word blocks to process) 141 # rdx = arg #3 (nbr, number of 16-word blocks to process)
129 mov %rdi, %rbp # rbp = ctx 142 mov %rdi, %rbp # rbp = ctx
130 shl \$6, %rdx # rdx = nbr in bytes 143 shl \$6, %rdx # rdx = nbr in bytes
131 lea (%rsi,%rdx), %rdi # rdi = end 144 lea (%rsi,%rdx), %rdi # rdi = end
132 mov 0*4(%rbp), %eax # eax = ctx->A 145 mov 0*4(%rbp), %eax # eax = ctx->A
133 mov 1*4(%rbp), %ebx # ebx = ctx->B 146 mov 1*4(%rbp), %ebx # ebx = ctx->B
134 mov 2*4(%rbp), %ecx # ecx = ctx->C 147 mov 2*4(%rbp), %ecx # ecx = ctx->C
(...skipping 94 matching lines...) Expand 10 before | Expand all | Expand 10 after
229 cmp %rdi, %rsi # cmp end with ptr 242 cmp %rdi, %rsi # cmp end with ptr
230 jb .Lloop # jmp if ptr < end 243 jb .Lloop # jmp if ptr < end
231 # END of loop over 16-word blocks 244 # END of loop over 16-word blocks
232 245
233 .Lend: 246 .Lend:
234 mov %eax, 0*4(%rbp) # ctx->A = A 247 mov %eax, 0*4(%rbp) # ctx->A = A
235 mov %ebx, 1*4(%rbp) # ctx->B = B 248 mov %ebx, 1*4(%rbp) # ctx->B = B
236 mov %ecx, 2*4(%rbp) # ctx->C = C 249 mov %ecx, 2*4(%rbp) # ctx->C = C
237 mov %edx, 3*4(%rbp) # ctx->D = D 250 mov %edx, 3*4(%rbp) # ctx->D = D
238 251
239 » pop» %r15 252 » mov» (%rsp),%r15
240 » pop» %r14 253 » mov» 8(%rsp),%r14
241 » pop» %rbx 254 » mov» 16(%rsp),%r12
242 » pop» %rbp 255 » mov» 24(%rsp),%rbx
256 » mov» 32(%rsp),%rbp
257 » add» \$40,%rsp
258 .Lepilogue:
243 ret 259 ret
244 .size md5_block_asm_data_order,.-md5_block_asm_data_order 260 .size md5_block_asm_data_order,.-md5_block_asm_data_order
245 EOF 261 EOF
246 262
263 # EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
264 # CONTEXT *context,DISPATCHER_CONTEXT *disp)
265 if ($win64) {
266 my $rec="%rcx";
267 my $frame="%rdx";
268 my $context="%r8";
269 my $disp="%r9";
270
271 $code.=<<___;
272 .extern __imp_RtlVirtualUnwind
273 .type se_handler,\@abi-omnipotent
274 .align 16
275 se_handler:
276 push %rsi
277 push %rdi
278 push %rbx
279 push %rbp
280 push %r12
281 push %r13
282 push %r14
283 push %r15
284 pushfq
285 sub \$64,%rsp
286
287 mov 120($context),%rax # pull context->Rax
288 mov 248($context),%rbx # pull context->Rip
289
290 lea .Lprologue(%rip),%r10
291 cmp %r10,%rbx # context->Rip<.Lprologue
292 jb .Lin_prologue
293
294 mov 152($context),%rax # pull context->Rsp
295
296 lea .Lepilogue(%rip),%r10
297 cmp %r10,%rbx # context->Rip>=.Lepilogue
298 jae .Lin_prologue
299
300 lea 40(%rax),%rax
301
302 mov -8(%rax),%rbp
303 mov -16(%rax),%rbx
304 mov -24(%rax),%r12
305 mov -32(%rax),%r14
306 mov -40(%rax),%r15
307 mov %rbx,144($context) # restore context->Rbx
308 mov %rbp,160($context) # restore context->Rbp
309 mov %r12,216($context) # restore context->R12
310 mov %r14,232($context) # restore context->R14
311 mov %r15,240($context) # restore context->R15
312
313 .Lin_prologue:
314 mov 8(%rax),%rdi
315 mov 16(%rax),%rsi
316 mov %rax,152($context) # restore context->Rsp
317 mov %rsi,168($context) # restore context->Rsi
318 mov %rdi,176($context) # restore context->Rdi
319
320 mov 40($disp),%rdi # disp->ContextRecord
321 mov $context,%rsi # context
322 mov \$154,%ecx # sizeof(CONTEXT)
323 .long 0xa548f3fc # cld; rep movsq
324
325 mov $disp,%rsi
326 xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
327 mov 8(%rsi),%rdx # arg2, disp->ImageBase
328 mov 0(%rsi),%r8 # arg3, disp->ControlPc
329 mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
330 mov 40(%rsi),%r10 # disp->ContextRecord
331 lea 56(%rsi),%r11 # &disp->HandlerData
332 lea 24(%rsi),%r12 # &disp->EstablisherFrame
333 mov %r10,32(%rsp) # arg5
334 mov %r11,40(%rsp) # arg6
335 mov %r12,48(%rsp) # arg7
336 mov %rcx,56(%rsp) # arg8, (NULL)
337 call *__imp_RtlVirtualUnwind(%rip)
338
339 mov \$1,%eax # ExceptionContinueSearch
340 add \$64,%rsp
341 popfq
342 pop %r15
343 pop %r14
344 pop %r13
345 pop %r12
346 pop %rbp
347 pop %rbx
348 pop %rdi
349 pop %rsi
350 ret
351 .size se_handler,.-se_handler
352
353 .section .pdata
354 .align 4
355 .rva .LSEH_begin_md5_block_asm_data_order
356 .rva .LSEH_end_md5_block_asm_data_order
357 .rva .LSEH_info_md5_block_asm_data_order
358
359 .section .xdata
360 .align 8
361 .LSEH_info_md5_block_asm_data_order:
362 .byte 9,0,0,0
363 .rva se_handler
364 ___
365 }
366
247 print $code; 367 print $code;
248 368
249 close STDOUT; 369 close STDOUT;
OLDNEW
« no previous file with comments | « openssl/crypto/md5/asm/md5-ia64.S ('k') | openssl/crypto/md5/md5.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698