Index: mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c |
=================================================================== |
--- mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c (revision 179928) |
+++ mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c (working copy) |
@@ -10,10 +10,10 @@ |
#include "mpi.h" |
#include "mpi-priv.h" |
#include "ecp.h" |
+#include "secport.h" |
typedef PRUint8 u8; |
typedef PRUint32 u32; |
-typedef PRInt32 s32; |
typedef PRUint64 u64; |
/* Our field elements are represented as nine, unsigned 32-bit words. Freebl's |
@@ -161,11 +161,9 @@ |
* 0xffffffff for 0 < x <= 2**31 |
* 0 for x == 0 or x > 2**31. |
* |
- * This macro assumes that right-shifting a signed number shifts in the MSB on |
- * the left. This is not ensured by the C standard, but is true on the CPUs |
- * that we're targetting with this code (x86 and ARM). |
+ * x must be a u32 or an equivalent type such as limb. |
*/ |
-#define NON_ZERO_TO_ALL_ONES(x) (~((u32) (((s32) ((x)-1)) >> 31))) |
+#define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1) |
/* felem_reduce_carry adds a multiple of p in order to cancel |carry|, |
* which is a term at 2**257. |
@@ -1133,6 +1131,7 @@ |
if (i) { |
point_double(nx, ny, nz, nx, ny, nz); |
} |
+ table_offset = 0; |
for (j = 0; j <= 32; j += 32) { |
char bit0 = get_bit(scalar, 31 - i + j); |
char bit1 = get_bit(scalar, 95 - i + j); |
@@ -1140,8 +1139,8 @@ |
char bit3 = get_bit(scalar, 223 - i + j); |
limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3); |
- table_offset = ((((s32)j) << (32-6)) >> 31) & (30*NLIMBS); |
select_affine_point(px, py, kPrecomputed + table_offset, index); |
+ table_offset += 30 * NLIMBS; |
/* Since scalar is less than the order of the group, we know that |
* {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle |
@@ -1229,13 +1228,13 @@ |
} |
/* See the comments in scalar_base_mult about handling infinities. */ |
- select_jacobian_point(px, py, pz, (limb *) precomp, index); |
+ select_jacobian_point(px, py, pz, precomp[0][0], index); |
point_add(tx, ty, tz, nx, ny, nz, px, py, pz); |
copy_conditional(nx, px, n_is_infinity_mask); |
copy_conditional(ny, py, n_is_infinity_mask); |
copy_conditional(nz, pz, n_is_infinity_mask); |
- p_is_noninfinite_mask = ((s32) ~ (index - 1)) >> 31; |
+ p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index); |
mask = p_is_noninfinite_mask & ~n_is_infinity_mask; |
copy_conditional(nx, tx, mask); |
copy_conditional(ny, ty, mask); |
@@ -1246,22 +1245,47 @@ |
/* Interface with Freebl: */ |
+/* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to |
+ * little-endian order. |
+ */ |
#ifdef IS_BIG_ENDIAN |
-#error "This code needs a little-endian processor" |
+#ifdef __APPLE__ |
+#include <libkern/OSByteOrder.h> |
+#define BYTESWAP32(x) OSSwapInt32(x) |
+#define BYTESWAP64(x) OSSwapInt64(x) |
+#else |
+#define BYTESWAP32(x) \ |
+ ((x) >> 24 | (x) >> 8 & 0xff00 | ((x) & 0xff00) << 8 | (x) << 24) |
+#define BYTESWAP64(x) \ |
+ ((x) >> 56 | (x) >> 40 & 0xff00 | \ |
+ (x) >> 24 & 0xff0000 | (x) >> 8 & 0xff000000 | \ |
+ ((x) & 0xff000000) << 8 | ((x) & 0xff0000) << 24 | \ |
+ ((x) & 0xff00) << 40 | (x) << 56) |
#endif |
-static const u32 kRInvDigits[8] = { |
+#ifdef MP_USE_UINT_DIGIT |
+#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(BYTESWAP32(x)) |
+#else |
+#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(BYTESWAP64(x)) |
+#endif |
+#endif /* IS_BIG_ENDIAN */ |
+ |
+#ifdef MP_USE_UINT_DIGIT |
+static const mp_digit kRInvDigits[8] = { |
0x80000000, 1, 0xffffffff, 0, |
0x80000001, 0xfffffffe, 1, 0x7fffffff |
}; |
+#else |
+static const mp_digit kRInvDigits[4] = { |
+ PR_UINT64(0x180000000), 0xffffffff, |
+ PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001) |
+}; |
+#endif |
#define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit)) |
static const mp_int kRInv = { |
MP_ZPOS, |
MP_DIGITS_IN_256_BITS, |
MP_DIGITS_IN_256_BITS, |
- /* Because we are running on a little-endian processor, this cast works for |
- * both 32 and 64-bit processors. |
- */ |
(mp_digit*) kRInvDigits |
}; |
@@ -1337,12 +1361,24 @@ |
static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n) |
{ |
/* We require that |n| is less than the order of the group and therefore it |
- * will fit into |scalar|. However, these is a timing side-channel here that |
- * we cannot avoid: if |n| is sufficiently small it may be one or more words |
- * too short and we'll copy less data. |
+ * will fit into |out_scalar|. However, these is a timing side-channel here |
+ * that we cannot avoid: if |n| is sufficiently small it may be one or more |
+ * words too short and we'll copy less data. |
*/ |
+ PORT_Assert(MP_USED(n) * sizeof(mp_digit) <= 32); |
memset(out_scalar, 0, 32); |
+#ifdef IS_LITTLE_ENDIAN |
memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit)); |
+#else |
+ { |
+ mp_size i; |
+ mp_digit swapped[MP_DIGITS_IN_256_BITS]; |
+ for (i = 0; i < MP_USED(n); i++) { |
+ swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i)); |
+ } |
+ memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit)); |
+ } |
+#endif |
} |
/* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the |