OLD | NEW |
1 #include "../bn_lcl.h" | 1 #include "../bn_lcl.h" |
2 #ifdef __SUNPRO_C | 2 #if !(defined(__GNUC__) && __GNUC__>=2) |
3 # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ | 3 # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ |
4 #else | 4 #else |
5 /* | 5 /* |
6 * x86_64 BIGNUM accelerator version 0.1, December 2002. | 6 * x86_64 BIGNUM accelerator version 0.1, December 2002. |
7 * | 7 * |
8 * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | 8 * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL |
9 * project. | 9 * project. |
10 * | 10 * |
11 * Rights for redistribution and usage in source and binary forms are | 11 * Rights for redistribution and usage in source and binary forms are |
12 * granted according to the OpenSSL license. Warranty of any kind is | 12 * granted according to the OpenSSL license. Warranty of any kind is |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
48 * sign verify sign/s verify/s | 48 * sign verify sign/s verify/s |
49 * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3 | 49 * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3 |
50 * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4 | 50 * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4 |
51 * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6 | 51 * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6 |
52 * | 52 * |
53 * For the reference. IA-32 assembler implementation performs | 53 * For the reference. IA-32 assembler implementation performs |
54 * very much like 64-bit code compiled with no-asm on the same | 54 * very much like 64-bit code compiled with no-asm on the same |
55 * machine. | 55 * machine. |
56 */ | 56 */ |
57 | 57 |
| 58 #ifdef _WIN64 |
| 59 #define BN_ULONG unsigned long long |
| 60 #else |
58 #define BN_ULONG unsigned long | 61 #define BN_ULONG unsigned long |
| 62 #endif |
59 | 63 |
60 #undef mul | 64 #undef mul |
61 #undef mul_add | 65 #undef mul_add |
62 #undef sqr | 66 #undef sqr |
63 | 67 |
64 /* | 68 /* |
65 * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; | 69 * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; |
66 * "g"(0) let the compiler to decide where does it | 70 * "g"(0) let the compiler to decide where does it |
67 * want to keep the value of zero; | 71 * want to keep the value of zero; |
68 */ | 72 */ |
(...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
180 return ret; | 184 return ret; |
181 } | 185 } |
182 | 186 |
183 BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
n) | 187 BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
n) |
184 { BN_ULONG ret=0,i=0; | 188 { BN_ULONG ret=0,i=0; |
185 | 189 |
186 if (n <= 0) return 0; | 190 if (n <= 0) return 0; |
187 | 191 |
188 asm ( | 192 asm ( |
189 " subq %2,%2 \n" | 193 " subq %2,%2 \n" |
190 » ".align 16» » » \n" | 194 » ".p2align 4» » » \n" |
191 "1: movq (%4,%2,8),%0 \n" | 195 "1: movq (%4,%2,8),%0 \n" |
192 " adcq (%5,%2,8),%0 \n" | 196 " adcq (%5,%2,8),%0 \n" |
193 " movq %0,(%3,%2,8) \n" | 197 " movq %0,(%3,%2,8) \n" |
194 " leaq 1(%2),%2 \n" | 198 " leaq 1(%2),%2 \n" |
195 " loop 1b \n" | 199 " loop 1b \n" |
196 " sbbq %0,%0 \n" | 200 " sbbq %0,%0 \n" |
197 : "=&a"(ret),"+c"(n),"=&r"(i) | 201 : "=&a"(ret),"+c"(n),"=&r"(i) |
198 : "r"(rp),"r"(ap),"r"(bp) | 202 : "r"(rp),"r"(ap),"r"(bp) |
199 : "cc" | 203 : "cc" |
200 ); | 204 ); |
201 | 205 |
202 return ret&1; | 206 return ret&1; |
203 } | 207 } |
204 | 208 |
205 #ifndef SIMICS | 209 #ifndef SIMICS |
206 BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
n) | 210 BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
n) |
207 { BN_ULONG ret=0,i=0; | 211 { BN_ULONG ret=0,i=0; |
208 | 212 |
209 if (n <= 0) return 0; | 213 if (n <= 0) return 0; |
210 | 214 |
211 asm ( | 215 asm ( |
212 " subq %2,%2 \n" | 216 " subq %2,%2 \n" |
213 » ".align 16» » » \n" | 217 » ".p2align 4» » » \n" |
214 "1: movq (%4,%2,8),%0 \n" | 218 "1: movq (%4,%2,8),%0 \n" |
215 " sbbq (%5,%2,8),%0 \n" | 219 " sbbq (%5,%2,8),%0 \n" |
216 " movq %0,(%3,%2,8) \n" | 220 " movq %0,(%3,%2,8) \n" |
217 " leaq 1(%2),%2 \n" | 221 " leaq 1(%2),%2 \n" |
218 " loop 1b \n" | 222 " loop 1b \n" |
219 " sbbq %0,%0 \n" | 223 " sbbq %0,%0 \n" |
220 : "=&a"(ret),"+c"(n),"=&r"(i) | 224 : "=&a"(ret),"+c"(n),"=&r"(i) |
221 : "r"(rp),"r"(ap),"r"(bp) | 225 : "r"(rp),"r"(ap),"r"(bp) |
222 : "cc" | 226 : "cc" |
223 ); | 227 ); |
(...skipping 369 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
593 r[4]=c2; | 597 r[4]=c2; |
594 c2=0; | 598 c2=0; |
595 sqr_add_c2(a,3,2,c3,c1,c2); | 599 sqr_add_c2(a,3,2,c3,c1,c2); |
596 r[5]=c3; | 600 r[5]=c3; |
597 c3=0; | 601 c3=0; |
598 sqr_add_c(a,3,c1,c2,c3); | 602 sqr_add_c(a,3,c1,c2,c3); |
599 r[6]=c1; | 603 r[6]=c1; |
600 r[7]=c2; | 604 r[7]=c2; |
601 } | 605 } |
602 #endif | 606 #endif |
OLD | NEW |