| Index: openssl/crypto/sparccpuid.S
|
| ===================================================================
|
| --- openssl/crypto/sparccpuid.S (revision 105093)
|
| +++ openssl/crypto/sparccpuid.S (working copy)
|
| @@ -34,7 +34,8 @@
|
| nop
|
| call .PIC.zero.up
|
| mov .zero-(.-4),%o0
|
| - ldd [%o0],%f0
|
| + ld [%o0],%f0
|
| + ld [%o0],%f1
|
|
|
| subcc %g0,1,%o0
|
| ! Following is V9 "rd %ccr,%o0" instruction. However! V8
|
| @@ -166,6 +167,7 @@
|
|
|
| .global OPENSSL_atomic_add
|
| .type OPENSSL_atomic_add,#function
|
| +.align 32
|
| OPENSSL_atomic_add:
|
| #ifndef ABI64
|
| subcc %g0,1,%o2
|
| @@ -177,7 +179,7 @@
|
| ba .enter
|
| nop
|
| #ifdef __sun
|
| -! Note that you don't have to link with libthread to call thr_yield,
|
| +! Note that you do not have to link with libthread to call thr_yield,
|
| ! as libc provides a stub, which is overloaded the moment you link
|
| ! with *either* libpthread or libthread...
|
| #define YIELD_CPU thr_yield
|
| @@ -213,27 +215,188 @@
|
| sra %o0,%g0,%o0 ! we return signed int, remember?
|
| .size OPENSSL_atomic_add,.-OPENSSL_atomic_add
|
|
|
| -.global OPENSSL_rdtsc
|
| +.global _sparcv9_rdtick
|
| +.align 32
|
| +_sparcv9_rdtick:
|
| subcc %g0,1,%o0
|
| .word 0x91408000 !rd %ccr,%o0
|
| cmp %o0,0x99
|
| - bne .notsc
|
| + bne .notick
|
| xor %o0,%o0,%o0
|
| - save %sp,FRAME-16,%sp
|
| - mov 513,%o0 !SI_PLATFORM
|
| - add %sp,BIAS+16,%o1
|
| - call sysinfo
|
| - mov 256,%o2
|
| + .word 0x91410000 !rd %tick,%o0
|
| + retl
|
| + .word 0x93323020 !srlx %o0,32,%o1
|
| +.notick:
|
| + retl
|
| + xor %o1,%o1,%o1
|
| +.type _sparcv9_rdtick,#function
|
| +.size _sparcv9_rdtick,.-_sparcv9_rdtick
|
|
|
| - add %sp,BIAS-16,%o1
|
| - ld [%o1],%l0
|
| - ld [%o1+4],%l1
|
| - ld [%o1+8],%l2
|
| - mov %lo('SUNW'),%l3
|
| - ret
|
| - restore
|
| -.notsc:
|
| +.global _sparcv9_vis1_probe
|
| +.align 8
|
| +_sparcv9_vis1_probe:
|
| + .word 0x81b00d80 !fxor %f0,%f0,%f0
|
| + add %sp,BIAS+2,%o1
|
| retl
|
| + .word 0xc19a5a40 !ldda [%o1]ASI_FP16_P,%f0
|
| +.type _sparcv9_vis1_probe,#function
|
| +.size _sparcv9_vis1_probe,.-_sparcv9_vis1_probe
|
| +
|
| +! Probe and instrument VIS1 instruction. Output is number of cycles it
|
| +! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
|
| +! is slow (documented to be 6 cycles on T2) and the core is in-order
|
| +! single-issue, it should be possible to distinguish Tx reliably...
|
| +! Observed return values are:
|
| +!
|
| +! UltraSPARC IIe 7
|
| +! UltraSPARC III 7
|
| +! UltraSPARC T1 24
|
| +!
|
| +! Numbers for T2 and SPARC64 V-VII are more than welcomed.
|
| +!
|
| +! It would be possible to detect specifically US-T1 by instrumenting
|
| +! fmul8ulx16, which is emulated on T1 and as such accounts for quite
|
| +! a lot of %tick-s, couple of thousand on Linux...
|
| +.global _sparcv9_vis1_instrument
|
| +.align 8
|
| +_sparcv9_vis1_instrument:
|
| + .word 0x91410000 !rd %tick,%o0
|
| + .word 0x81b00d80 !fxor %f0,%f0,%f0
|
| + .word 0x85b08d82 !fxor %f2,%f2,%f2
|
| + .word 0x93410000 !rd %tick,%o1
|
| + .word 0x81b00d80 !fxor %f0,%f0,%f0
|
| + .word 0x85b08d82 !fxor %f2,%f2,%f2
|
| + .word 0x95410000 !rd %tick,%o2
|
| + .word 0x81b00d80 !fxor %f0,%f0,%f0
|
| + .word 0x85b08d82 !fxor %f2,%f2,%f2
|
| + .word 0x97410000 !rd %tick,%o3
|
| + .word 0x81b00d80 !fxor %f0,%f0,%f0
|
| + .word 0x85b08d82 !fxor %f2,%f2,%f2
|
| + .word 0x99410000 !rd %tick,%o4
|
| +
|
| + ! calculate intervals
|
| + sub %o1,%o0,%o0
|
| + sub %o2,%o1,%o1
|
| + sub %o3,%o2,%o2
|
| + sub %o4,%o3,%o3
|
| +
|
| + ! find minumum value
|
| + cmp %o0,%o1
|
| + .word 0x38680002 !bgu,a %xcc,.+8
|
| + mov %o1,%o0
|
| + cmp %o0,%o2
|
| + .word 0x38680002 !bgu,a %xcc,.+8
|
| + mov %o2,%o0
|
| + cmp %o0,%o3
|
| + .word 0x38680002 !bgu,a %xcc,.+8
|
| + mov %o3,%o0
|
| +
|
| + retl
|
| nop
|
| -.type OPENSSL_rdtsc,#function
|
| -.size OPENSSL_rdtsc,.-OPENSSL_atomic_add
|
| +.type _sparcv9_vis1_instrument,#function
|
| +.size _sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
|
| +
|
| +.global _sparcv9_vis2_probe
|
| +.align 8
|
| +_sparcv9_vis2_probe:
|
| + retl
|
| + .word 0x81b00980 !bshuffle %f0,%f0,%f0
|
| +.type _sparcv9_vis2_probe,#function
|
| +.size _sparcv9_vis2_probe,.-_sparcv9_vis2_probe
|
| +
|
| +.global _sparcv9_fmadd_probe
|
| +.align 8
|
| +_sparcv9_fmadd_probe:
|
| + .word 0x81b00d80 !fxor %f0,%f0,%f0
|
| + .word 0x85b08d82 !fxor %f2,%f2,%f2
|
| + retl
|
| + .word 0x81b80440 !fmaddd %f0,%f0,%f2,%f0
|
| +.type _sparcv9_fmadd_probe,#function
|
| +.size _sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
|
| +
|
| +.global OPENSSL_cleanse
|
| +.align 32
|
| +OPENSSL_cleanse:
|
| + cmp %o1,14
|
| + nop
|
| +#ifdef ABI64
|
| + bgu %xcc,.Lot
|
| +#else
|
| + bgu .Lot
|
| +#endif
|
| + cmp %o1,0
|
| + bne .Little
|
| + nop
|
| + retl
|
| + nop
|
| +
|
| +.Little:
|
| + stb %g0,[%o0]
|
| + subcc %o1,1,%o1
|
| + bnz .Little
|
| + add %o0,1,%o0
|
| + retl
|
| + nop
|
| +.align 32
|
| +.Lot:
|
| +#ifndef ABI64
|
| + subcc %g0,1,%g1
|
| + ! see above for explanation
|
| + .word 0x83408000 !rd %ccr,%g1
|
| + cmp %g1,0x99
|
| + bne .v8lot
|
| + nop
|
| +#endif
|
| +
|
| +.v9lot: andcc %o0,7,%g0
|
| + bz .v9aligned
|
| + nop
|
| + stb %g0,[%o0]
|
| + sub %o1,1,%o1
|
| + ba .v9lot
|
| + add %o0,1,%o0
|
| +.align 16,0x01000000
|
| +.v9aligned:
|
| + .word 0xc0720000 !stx %g0,[%o0]
|
| + sub %o1,8,%o1
|
| + andcc %o1,-8,%g0
|
| +#ifdef ABI64
|
| + .word 0x126ffffd !bnz %xcc,.v9aligned
|
| +#else
|
| + .word 0x124ffffd !bnz %icc,.v9aligned
|
| +#endif
|
| + add %o0,8,%o0
|
| +
|
| + cmp %o1,0
|
| + bne .Little
|
| + nop
|
| + retl
|
| + nop
|
| +#ifndef ABI64
|
| +.v8lot: andcc %o0,3,%g0
|
| + bz .v8aligned
|
| + nop
|
| + stb %g0,[%o0]
|
| + sub %o1,1,%o1
|
| + ba .v8lot
|
| + add %o0,1,%o0
|
| + nop
|
| +.v8aligned:
|
| + st %g0,[%o0]
|
| + sub %o1,4,%o1
|
| + andcc %o1,-4,%g0
|
| + bnz .v8aligned
|
| + add %o0,4,%o0
|
| +
|
| + cmp %o1,0
|
| + bne .Little
|
| + nop
|
| + retl
|
| + nop
|
| +#endif
|
| +.type OPENSSL_cleanse,#function
|
| +.size OPENSSL_cleanse,.-OPENSSL_cleanse
|
| +
|
| +.section ".init",#alloc,#execinstr
|
| + call OPENSSL_cpuid_setup
|
| + nop
|
|
|