OLD | NEW |
1 /* This Source Code Form is subject to the terms of the Mozilla Public | 1 /* This Source Code Form is subject to the terms of the Mozilla Public |
2 * License, v. 2.0. If a copy of the MPL was not distributed with this | 2 * License, v. 2.0. If a copy of the MPL was not distributed with this |
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ | 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
4 | 4 |
5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */ | 5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */ |
6 | 6 |
7 #include <string.h> | 7 #include <string.h> |
8 | 8 |
9 #include "prtypes.h" | 9 #include "prtypes.h" |
10 #include "mpi.h" | 10 #include "mpi.h" |
11 #include "mpi-priv.h" | 11 #include "mpi-priv.h" |
12 #include "ecp.h" | 12 #include "ecp.h" |
| 13 #include "secport.h" |
13 | 14 |
14 typedef PRUint8 u8; | 15 typedef PRUint8 u8; |
15 typedef PRUint32 u32; | 16 typedef PRUint32 u32; |
16 typedef PRInt32 s32; | |
17 typedef PRUint64 u64; | 17 typedef PRUint64 u64; |
18 | 18 |
19 /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's | 19 /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's |
20 * MPI library calls them digits, but here they are called limbs, which is | 20 * MPI library calls them digits, but here they are called limbs, which is |
21 * GMP's terminology. | 21 * GMP's terminology. |
22 * | 22 * |
23 * The value of an felem (field element) is: | 23 * The value of an felem (field element) is: |
24 * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228) | 24 * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228) |
25 * | 25 * |
26 * That is, each limb is alternately 29 or 28-bits wide in little-endian | 26 * That is, each limb is alternately 29 or 28-bits wide in little-endian |
(...skipping 127 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
154 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195
1, 0x9d0c177, 0x1c49a78, | 154 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195
1, 0x9d0c177, 0x1c49a78, |
155 }; | 155 }; |
156 | 156 |
157 /* Field element operations: | 157 /* Field element operations: |
158 */ | 158 */ |
159 | 159 |
160 /* NON_ZERO_TO_ALL_ONES returns: | 160 /* NON_ZERO_TO_ALL_ONES returns: |
161 * 0xffffffff for 0 < x <= 2**31 | 161 * 0xffffffff for 0 < x <= 2**31 |
162 * 0 for x == 0 or x > 2**31. | 162 * 0 for x == 0 or x > 2**31. |
163 * | 163 * |
164 * This macro assumes that right-shifting a signed number shifts in the MSB on | 164 * x must be a u32 or an equivalent type such as limb. |
165 * the left. This is not ensured by the C standard, but is true on the CPUs | |
166 * that we're targetting with this code (x86 and ARM). | |
167 */ | 165 */ |
168 #define NON_ZERO_TO_ALL_ONES(x) (~((u32) (((s32) ((x)-1)) >> 31))) | 166 #define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1) |
169 | 167 |
170 /* felem_reduce_carry adds a multiple of p in order to cancel |carry|, | 168 /* felem_reduce_carry adds a multiple of p in order to cancel |carry|, |
171 * which is a term at 2**257. | 169 * which is a term at 2**257. |
172 * | 170 * |
173 * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. | 171 * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. |
174 * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. | 172 * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. |
175 */ | 173 */ |
176 static void felem_reduce_carry(felem inout, limb carry) | 174 static void felem_reduce_carry(felem inout, limb carry) |
177 { | 175 { |
178 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry); | 176 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry); |
(...skipping 947 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1126 memset(ny, 0, sizeof(felem)); | 1124 memset(ny, 0, sizeof(felem)); |
1127 memset(nz, 0, sizeof(felem)); | 1125 memset(nz, 0, sizeof(felem)); |
1128 | 1126 |
1129 /* The loop adds bits at positions 0, 64, 128 and 192, followed by | 1127 /* The loop adds bits at positions 0, 64, 128 and 192, followed by |
1130 * positions 32,96,160 and 224 and does this 32 times. | 1128 * positions 32,96,160 and 224 and does this 32 times. |
1131 */ | 1129 */ |
1132 for (i = 0; i < 32; i++) { | 1130 for (i = 0; i < 32; i++) { |
1133 if (i) { | 1131 if (i) { |
1134 point_double(nx, ny, nz, nx, ny, nz); | 1132 point_double(nx, ny, nz, nx, ny, nz); |
1135 } | 1133 } |
| 1134 table_offset = 0; |
1136 for (j = 0; j <= 32; j += 32) { | 1135 for (j = 0; j <= 32; j += 32) { |
1137 char bit0 = get_bit(scalar, 31 - i + j); | 1136 char bit0 = get_bit(scalar, 31 - i + j); |
1138 char bit1 = get_bit(scalar, 95 - i + j); | 1137 char bit1 = get_bit(scalar, 95 - i + j); |
1139 char bit2 = get_bit(scalar, 159 - i + j); | 1138 char bit2 = get_bit(scalar, 159 - i + j); |
1140 char bit3 = get_bit(scalar, 223 - i + j); | 1139 char bit3 = get_bit(scalar, 223 - i + j); |
1141 limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3); | 1140 limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3); |
1142 | 1141 |
1143 table_offset = ((((s32)j) << (32-6)) >> 31) & (30*NLIMBS); | |
1144 select_affine_point(px, py, kPrecomputed + table_offset, index); | 1142 select_affine_point(px, py, kPrecomputed + table_offset, index); |
| 1143 table_offset += 30 * NLIMBS; |
1145 | 1144 |
1146 /* Since scalar is less than the order of the group, we know that | 1145 /* Since scalar is less than the order of the group, we know that |
1147 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle | 1146 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle |
1148 * below. | 1147 * below. |
1149 */ | 1148 */ |
1150 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py); | 1149 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py); |
1151 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero | 1150 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero |
1152 * (a.k.a. the point at infinity). We handle that situation by | 1151 * (a.k.a. the point at infinity). We handle that situation by |
1153 * copying the point from the table. | 1152 * copying the point from the table. |
1154 */ | 1153 */ |
(...skipping 67 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1222 } | 1221 } |
1223 | 1222 |
1224 index = scalar[31 - i / 2]; | 1223 index = scalar[31 - i / 2]; |
1225 if ((i & 1) == 1) { | 1224 if ((i & 1) == 1) { |
1226 index &= 15; | 1225 index &= 15; |
1227 } else { | 1226 } else { |
1228 index >>= 4; | 1227 index >>= 4; |
1229 } | 1228 } |
1230 | 1229 |
1231 /* See the comments in scalar_base_mult about handling infinities. */ | 1230 /* See the comments in scalar_base_mult about handling infinities. */ |
1232 » select_jacobian_point(px, py, pz, (limb *) precomp, index); | 1231 » select_jacobian_point(px, py, pz, precomp[0][0], index); |
1233 point_add(tx, ty, tz, nx, ny, nz, px, py, pz); | 1232 point_add(tx, ty, tz, nx, ny, nz, px, py, pz); |
1234 copy_conditional(nx, px, n_is_infinity_mask); | 1233 copy_conditional(nx, px, n_is_infinity_mask); |
1235 copy_conditional(ny, py, n_is_infinity_mask); | 1234 copy_conditional(ny, py, n_is_infinity_mask); |
1236 copy_conditional(nz, pz, n_is_infinity_mask); | 1235 copy_conditional(nz, pz, n_is_infinity_mask); |
1237 | 1236 |
1238 » p_is_noninfinite_mask = ((s32) ~ (index - 1)) >> 31; | 1237 » p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index); |
1239 mask = p_is_noninfinite_mask & ~n_is_infinity_mask; | 1238 mask = p_is_noninfinite_mask & ~n_is_infinity_mask; |
1240 copy_conditional(nx, tx, mask); | 1239 copy_conditional(nx, tx, mask); |
1241 copy_conditional(ny, ty, mask); | 1240 copy_conditional(ny, ty, mask); |
1242 copy_conditional(nz, tz, mask); | 1241 copy_conditional(nz, tz, mask); |
1243 n_is_infinity_mask &= ~p_is_noninfinite_mask; | 1242 n_is_infinity_mask &= ~p_is_noninfinite_mask; |
1244 } | 1243 } |
1245 } | 1244 } |
1246 | 1245 |
1247 /* Interface with Freebl: */ | 1246 /* Interface with Freebl: */ |
1248 | 1247 |
| 1248 /* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to |
| 1249 * little-endian order. |
| 1250 */ |
1249 #ifdef IS_BIG_ENDIAN | 1251 #ifdef IS_BIG_ENDIAN |
1250 #error "This code needs a little-endian processor" | 1252 #ifdef __APPLE__ |
| 1253 #include <libkern/OSByteOrder.h> |
| 1254 #define BYTESWAP32(x) OSSwapInt32(x) |
| 1255 #define BYTESWAP64(x) OSSwapInt64(x) |
| 1256 #else |
| 1257 #define BYTESWAP32(x) \ |
| 1258 ((x) >> 24 | (x) >> 8 & 0xff00 | ((x) & 0xff00) << 8 | (x) << 24) |
| 1259 #define BYTESWAP64(x) \ |
| 1260 ((x) >> 56 | (x) >> 40 & 0xff00 | \ |
| 1261 (x) >> 24 & 0xff0000 | (x) >> 8 & 0xff000000 | \ |
| 1262 ((x) & 0xff000000) << 8 | ((x) & 0xff0000) << 24 | \ |
| 1263 ((x) & 0xff00) << 40 | (x) << 56) |
1251 #endif | 1264 #endif |
1252 | 1265 |
1253 static const u32 kRInvDigits[8] = { | 1266 #ifdef MP_USE_UINT_DIGIT |
| 1267 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(BYTESWAP32(x)) |
| 1268 #else |
| 1269 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(BYTESWAP64(x)) |
| 1270 #endif |
| 1271 #endif /* IS_BIG_ENDIAN */ |
| 1272 |
| 1273 #ifdef MP_USE_UINT_DIGIT |
| 1274 static const mp_digit kRInvDigits[8] = { |
1254 0x80000000, 1, 0xffffffff, 0, | 1275 0x80000000, 1, 0xffffffff, 0, |
1255 0x80000001, 0xfffffffe, 1, 0x7fffffff | 1276 0x80000001, 0xfffffffe, 1, 0x7fffffff |
1256 }; | 1277 }; |
| 1278 #else |
| 1279 static const mp_digit kRInvDigits[4] = { |
| 1280 PR_UINT64(0x180000000), 0xffffffff, |
| 1281 PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001) |
| 1282 }; |
| 1283 #endif |
1257 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit)) | 1284 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit)) |
1258 static const mp_int kRInv = { | 1285 static const mp_int kRInv = { |
1259 MP_ZPOS, | 1286 MP_ZPOS, |
1260 MP_DIGITS_IN_256_BITS, | 1287 MP_DIGITS_IN_256_BITS, |
1261 MP_DIGITS_IN_256_BITS, | 1288 MP_DIGITS_IN_256_BITS, |
1262 /* Because we are running on a little-endian processor, this cast works for | |
1263 * both 32 and 64-bit processors. | |
1264 */ | |
1265 (mp_digit*) kRInvDigits | 1289 (mp_digit*) kRInvDigits |
1266 }; | 1290 }; |
1267 | 1291 |
1268 static const limb kTwo28 = 0x10000000; | 1292 static const limb kTwo28 = 0x10000000; |
1269 static const limb kTwo29 = 0x20000000; | 1293 static const limb kTwo29 = 0x20000000; |
1270 | 1294 |
1271 /* to_montgomery sets out = R*in. */ | 1295 /* to_montgomery sets out = R*in. */ |
1272 static mp_err to_montgomery(felem out, const mp_int *in, const ECGroup *group) | 1296 static mp_err to_montgomery(felem out, const mp_int *in, const ECGroup *group) |
1273 { | 1297 { |
1274 /* There are no MPI functions for bitshift operations and we wish to shift | 1298 /* There are no MPI functions for bitshift operations and we wish to shift |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1330 CLEANUP: | 1354 CLEANUP: |
1331 mp_clear(&result); | 1355 mp_clear(&result); |
1332 mp_clear(&tmp); | 1356 mp_clear(&tmp); |
1333 return res; | 1357 return res; |
1334 } | 1358 } |
1335 | 1359 |
1336 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */ | 1360 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */ |
1337 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n) | 1361 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n) |
1338 { | 1362 { |
1339 /* We require that |n| is less than the order of the group and therefore it | 1363 /* We require that |n| is less than the order of the group and therefore it |
1340 * will fit into |scalar|. However, these is a timing side-channel here that | 1364 * will fit into |out_scalar|. However, these is a timing side-channel here |
1341 * we cannot avoid: if |n| is sufficiently small it may be one or more words | 1365 * that we cannot avoid: if |n| is sufficiently small it may be one or more |
1342 * too short and we'll copy less data. | 1366 * words too short and we'll copy less data. |
1343 */ | 1367 */ |
| 1368 PORT_Assert(MP_USED(n) * sizeof(mp_digit) <= 32); |
1344 memset(out_scalar, 0, 32); | 1369 memset(out_scalar, 0, 32); |
| 1370 #ifdef IS_LITTLE_ENDIAN |
1345 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit)); | 1371 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit)); |
| 1372 #else |
| 1373 { |
| 1374 mp_size i; |
| 1375 mp_digit swapped[MP_DIGITS_IN_256_BITS]; |
| 1376 for (i = 0; i < MP_USED(n); i++) { |
| 1377 swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i)); |
| 1378 } |
| 1379 memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit)); |
| 1380 } |
| 1381 #endif |
1346 } | 1382 } |
1347 | 1383 |
1348 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the | 1384 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the |
1349 * order of the group. | 1385 * order of the group. |
1350 */ | 1386 */ |
1351 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n, | 1387 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n, |
1352 mp_int *out_x, mp_int *out_y, | 1388 mp_int *out_x, mp_int *out_y, |
1353 const ECGroup *group) | 1389 const ECGroup *group) |
1354 { | 1390 { |
1355 u8 scalar[32]; | 1391 u8 scalar[32]; |
(...skipping 105 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1461 /* Wire in fast point multiplication for named curves. */ | 1497 /* Wire in fast point multiplication for named curves. */ |
1462 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name) | 1498 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name) |
1463 { | 1499 { |
1464 if (name == ECCurve_NIST_P256) { | 1500 if (name == ECCurve_NIST_P256) { |
1465 group->base_point_mul = &ec_GFp_nistp256_base_point_mul; | 1501 group->base_point_mul = &ec_GFp_nistp256_base_point_mul; |
1466 group->point_mul = &ec_GFp_nistp256_point_mul; | 1502 group->point_mul = &ec_GFp_nistp256_point_mul; |
1467 group->points_mul = &ec_GFp_nistp256_points_mul_vartime; | 1503 group->points_mul = &ec_GFp_nistp256_points_mul_vartime; |
1468 } | 1504 } |
1469 return MP_OKAY; | 1505 return MP_OKAY; |
1470 } | 1506 } |
OLD | NEW |