| OLD | NEW |
| 1 #include "../bn_lcl.h" | 1 #include "../bn_lcl.h" |
| 2 #ifdef __SUNPRO_C | 2 #if !(defined(__GNUC__) && __GNUC__>=2) |
| 3 # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ | 3 # include "../bn_asm.c" /* kind of dirty hack for Sun Studio */ |
| 4 #else | 4 #else |
| 5 /* | 5 /* |
| 6 * x86_64 BIGNUM accelerator version 0.1, December 2002. | 6 * x86_64 BIGNUM accelerator version 0.1, December 2002. |
| 7 * | 7 * |
| 8 * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL | 8 * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL |
| 9 * project. | 9 * project. |
| 10 * | 10 * |
| 11 * Rights for redistribution and usage in source and binary forms are | 11 * Rights for redistribution and usage in source and binary forms are |
| 12 * granted according to the OpenSSL license. Warranty of any kind is | 12 * granted according to the OpenSSL license. Warranty of any kind is |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 48 * sign verify sign/s verify/s | 48 * sign verify sign/s verify/s |
| 49 * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3 | 49 * dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3 |
| 50 * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4 | 50 * dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4 |
| 51 * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6 | 51 * dsa 2048 bits 0.0016s 0.0020s 620.4 504.6 |
| 52 * | 52 * |
| 53 * For the reference. IA-32 assembler implementation performs | 53 * For the reference. IA-32 assembler implementation performs |
| 54 * very much like 64-bit code compiled with no-asm on the same | 54 * very much like 64-bit code compiled with no-asm on the same |
| 55 * machine. | 55 * machine. |
| 56 */ | 56 */ |
| 57 | 57 |
| 58 #ifdef _WIN64 |
| 59 #define BN_ULONG unsigned long long |
| 60 #else |
| 58 #define BN_ULONG unsigned long | 61 #define BN_ULONG unsigned long |
| 62 #endif |
| 59 | 63 |
| 60 #undef mul | 64 #undef mul |
| 61 #undef mul_add | 65 #undef mul_add |
| 62 #undef sqr | 66 #undef sqr |
| 63 | 67 |
| 64 /* | 68 /* |
| 65 * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; | 69 * "m"(a), "+m"(r) is the way to favor DirectPath µ-code; |
| 66 * "g"(0) let the compiler to decide where does it | 70 * "g"(0) let the compiler to decide where does it |
| 67 * want to keep the value of zero; | 71 * want to keep the value of zero; |
| 68 */ | 72 */ |
| (...skipping 111 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 180 return ret; | 184 return ret; |
| 181 } | 185 } |
| 182 | 186 |
| 183 BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
n) | 187 BN_ULONG bn_add_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
n) |
| 184 { BN_ULONG ret=0,i=0; | 188 { BN_ULONG ret=0,i=0; |
| 185 | 189 |
| 186 if (n <= 0) return 0; | 190 if (n <= 0) return 0; |
| 187 | 191 |
| 188 asm ( | 192 asm ( |
| 189 " subq %2,%2 \n" | 193 " subq %2,%2 \n" |
| 190 » ".align 16» » » \n" | 194 » ".p2align 4» » » \n" |
| 191 "1: movq (%4,%2,8),%0 \n" | 195 "1: movq (%4,%2,8),%0 \n" |
| 192 " adcq (%5,%2,8),%0 \n" | 196 " adcq (%5,%2,8),%0 \n" |
| 193 " movq %0,(%3,%2,8) \n" | 197 " movq %0,(%3,%2,8) \n" |
| 194 " leaq 1(%2),%2 \n" | 198 " leaq 1(%2),%2 \n" |
| 195 " loop 1b \n" | 199 " loop 1b \n" |
| 196 " sbbq %0,%0 \n" | 200 " sbbq %0,%0 \n" |
| 197 : "=&a"(ret),"+c"(n),"=&r"(i) | 201 : "=&a"(ret),"+c"(n),"=&r"(i) |
| 198 : "r"(rp),"r"(ap),"r"(bp) | 202 : "r"(rp),"r"(ap),"r"(bp) |
| 199 : "cc" | 203 : "cc" |
| 200 ); | 204 ); |
| 201 | 205 |
| 202 return ret&1; | 206 return ret&1; |
| 203 } | 207 } |
| 204 | 208 |
| 205 #ifndef SIMICS | 209 #ifndef SIMICS |
| 206 BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
n) | 210 BN_ULONG bn_sub_words (BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,int
n) |
| 207 { BN_ULONG ret=0,i=0; | 211 { BN_ULONG ret=0,i=0; |
| 208 | 212 |
| 209 if (n <= 0) return 0; | 213 if (n <= 0) return 0; |
| 210 | 214 |
| 211 asm ( | 215 asm ( |
| 212 " subq %2,%2 \n" | 216 " subq %2,%2 \n" |
| 213 » ".align 16» » » \n" | 217 » ".p2align 4» » » \n" |
| 214 "1: movq (%4,%2,8),%0 \n" | 218 "1: movq (%4,%2,8),%0 \n" |
| 215 " sbbq (%5,%2,8),%0 \n" | 219 " sbbq (%5,%2,8),%0 \n" |
| 216 " movq %0,(%3,%2,8) \n" | 220 " movq %0,(%3,%2,8) \n" |
| 217 " leaq 1(%2),%2 \n" | 221 " leaq 1(%2),%2 \n" |
| 218 " loop 1b \n" | 222 " loop 1b \n" |
| 219 " sbbq %0,%0 \n" | 223 " sbbq %0,%0 \n" |
| 220 : "=&a"(ret),"+c"(n),"=&r"(i) | 224 : "=&a"(ret),"+c"(n),"=&r"(i) |
| 221 : "r"(rp),"r"(ap),"r"(bp) | 225 : "r"(rp),"r"(ap),"r"(bp) |
| 222 : "cc" | 226 : "cc" |
| 223 ); | 227 ); |
| (...skipping 369 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 593 r[4]=c2; | 597 r[4]=c2; |
| 594 c2=0; | 598 c2=0; |
| 595 sqr_add_c2(a,3,2,c3,c1,c2); | 599 sqr_add_c2(a,3,2,c3,c1,c2); |
| 596 r[5]=c3; | 600 r[5]=c3; |
| 597 c3=0; | 601 c3=0; |
| 598 sqr_add_c(a,3,c1,c2,c3); | 602 sqr_add_c(a,3,c1,c2,c3); |
| 599 r[6]=c1; | 603 r[6]=c1; |
| 600 r[7]=c2; | 604 r[7]=c2; |
| 601 } | 605 } |
| 602 #endif | 606 #endif |
| OLD | NEW |