| Index: openssl/crypto/bn/asm/x86_64-mont.pl
|
| ===================================================================
|
| --- openssl/crypto/bn/asm/x86_64-mont.pl (revision 105093)
|
| +++ openssl/crypto/bn/asm/x86_64-mont.pl (working copy)
|
| @@ -15,14 +15,18 @@
|
| # respectful 50%. It remains to be seen if loop unrolling and
|
| # dedicated squaring routine can provide further improvement...
|
|
|
| -$output=shift;
|
| +$flavour = shift;
|
| +$output = shift;
|
| +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
|
|
|
| +$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
|
| +
|
| $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
| ( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
|
| ( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
|
| die "can't locate x86_64-xlate.pl";
|
|
|
| -open STDOUT,"| $^X $xlate $output";
|
| +open STDOUT,"| $^X $xlate $flavour $output";
|
|
|
| # int bn_mul_mont(
|
| $rp="%rdi"; # BN_ULONG *rp,
|
| @@ -55,13 +59,14 @@
|
| push %r15
|
|
|
| mov ${num}d,${num}d
|
| - lea 2($num),%rax
|
| - mov %rsp,%rbp
|
| - neg %rax
|
| - lea (%rsp,%rax,8),%rsp # tp=alloca(8*(num+2))
|
| + lea 2($num),%r10
|
| + mov %rsp,%r11
|
| + neg %r10
|
| + lea (%rsp,%r10,8),%rsp # tp=alloca(8*(num+2))
|
| and \$-1024,%rsp # minimize TLB usage
|
|
|
| - mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp
|
| + mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
|
| +.Lprologue:
|
| mov %rdx,$bp # $bp reassigned, remember?
|
|
|
| mov ($n0),$n0 # pull n0[0] value
|
| @@ -197,18 +202,129 @@
|
| dec $j
|
| jge .Lcopy
|
|
|
| - mov 8(%rsp,$num,8),%rsp # restore %rsp
|
| + mov 8(%rsp,$num,8),%rsi # restore %rsp
|
| mov \$1,%rax
|
| + mov (%rsi),%r15
|
| + mov 8(%rsi),%r14
|
| + mov 16(%rsi),%r13
|
| + mov 24(%rsi),%r12
|
| + mov 32(%rsi),%rbp
|
| + mov 40(%rsi),%rbx
|
| + lea 48(%rsi),%rsp
|
| +.Lepilogue:
|
| + ret
|
| +.size bn_mul_mont,.-bn_mul_mont
|
| +.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
|
| +.align 16
|
| +___
|
| +
|
| +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
|
| +# CONTEXT *context,DISPATCHER_CONTEXT *disp)
|
| +if ($win64) {
|
| +$rec="%rcx";
|
| +$frame="%rdx";
|
| +$context="%r8";
|
| +$disp="%r9";
|
| +
|
| +$code.=<<___;
|
| +.extern __imp_RtlVirtualUnwind
|
| +.type se_handler,\@abi-omnipotent
|
| +.align 16
|
| +se_handler:
|
| + push %rsi
|
| + push %rdi
|
| + push %rbx
|
| + push %rbp
|
| + push %r12
|
| + push %r13
|
| + push %r14
|
| + push %r15
|
| + pushfq
|
| + sub \$64,%rsp
|
| +
|
| + mov 120($context),%rax # pull context->Rax
|
| + mov 248($context),%rbx # pull context->Rip
|
| +
|
| + lea .Lprologue(%rip),%r10
|
| + cmp %r10,%rbx # context->Rip<.Lprologue
|
| + jb .Lin_prologue
|
| +
|
| + mov 152($context),%rax # pull context->Rsp
|
| +
|
| + lea .Lepilogue(%rip),%r10
|
| + cmp %r10,%rbx # context->Rip>=.Lepilogue
|
| + jae .Lin_prologue
|
| +
|
| + mov 192($context),%r10 # pull $num
|
| + mov 8(%rax,%r10,8),%rax # pull saved stack pointer
|
| + lea 48(%rax),%rax
|
| +
|
| + mov -8(%rax),%rbx
|
| + mov -16(%rax),%rbp
|
| + mov -24(%rax),%r12
|
| + mov -32(%rax),%r13
|
| + mov -40(%rax),%r14
|
| + mov -48(%rax),%r15
|
| + mov %rbx,144($context) # restore context->Rbx
|
| + mov %rbp,160($context) # restore context->Rbp
|
| + mov %r12,216($context) # restore context->R12
|
| + mov %r13,224($context) # restore context->R13
|
| + mov %r14,232($context) # restore context->R14
|
| + mov %r15,240($context) # restore context->R15
|
| +
|
| +.Lin_prologue:
|
| + mov 8(%rax),%rdi
|
| + mov 16(%rax),%rsi
|
| + mov %rax,152($context) # restore context->Rsp
|
| + mov %rsi,168($context) # restore context->Rsi
|
| + mov %rdi,176($context) # restore context->Rdi
|
| +
|
| + mov 40($disp),%rdi # disp->ContextRecord
|
| + mov $context,%rsi # context
|
| + mov \$154,%ecx # sizeof(CONTEXT)
|
| + .long 0xa548f3fc # cld; rep movsq
|
| +
|
| + mov $disp,%rsi
|
| + xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
|
| + mov 8(%rsi),%rdx # arg2, disp->ImageBase
|
| + mov 0(%rsi),%r8 # arg3, disp->ControlPc
|
| + mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
|
| + mov 40(%rsi),%r10 # disp->ContextRecord
|
| + lea 56(%rsi),%r11 # &disp->HandlerData
|
| + lea 24(%rsi),%r12 # &disp->EstablisherFrame
|
| + mov %r10,32(%rsp) # arg5
|
| + mov %r11,40(%rsp) # arg6
|
| + mov %r12,48(%rsp) # arg7
|
| + mov %rcx,56(%rsp) # arg8, (NULL)
|
| + call *__imp_RtlVirtualUnwind(%rip)
|
| +
|
| + mov \$1,%eax # ExceptionContinueSearch
|
| + add \$64,%rsp
|
| + popfq
|
| pop %r15
|
| pop %r14
|
| pop %r13
|
| pop %r12
|
| pop %rbp
|
| pop %rbx
|
| + pop %rdi
|
| + pop %rsi
|
| ret
|
| -.size bn_mul_mont,.-bn_mul_mont
|
| -.asciz "Montgomery Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
|
| +.size se_handler,.-se_handler
|
| +
|
| +.section .pdata
|
| +.align 4
|
| + .rva .LSEH_begin_bn_mul_mont
|
| + .rva .LSEH_end_bn_mul_mont
|
| + .rva .LSEH_info_bn_mul_mont
|
| +
|
| +.section .xdata
|
| +.align 8
|
| +.LSEH_info_bn_mul_mont:
|
| + .byte 9,0,0,0
|
| + .rva se_handler
|
| ___
|
| +}
|
|
|
| print $code;
|
| close STDOUT;
|
|
|