X-Git-Url: https://www.bearssl.org/gitweb//home/git/?p=BearSSL;a=blobdiff_plain;f=src%2Fec%2Fec_p256_m15.c;h=8d68d1d21bdba719fde019c496204d615b9bd444;hp=99c7224b3108940dd67971468a0e63aab3199b9f;hb=001d094d140488def90cb3876d5c03f4d79b3e27;hpb=2f9c953af45299f8546df8984d5262e767a7d943

diff --git a/src/ec/ec_p256_m15.c b/src/ec/ec_p256_m15.c
index 99c7224..8d68d1d 100644
--- a/src/ec/ec_p256_m15.c
+++ b/src/ec/ec_p256_m15.c
@@ -1101,18 +1101,20 @@ mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
 	}
 
 	/*
-	 * Propagate carries. Since the operation above really is a
-	 * truncature, followed by the addition of nonnegative values,
-	 * the result will be positive. Moreover, the carry cannot
-	 * exceed 5 bits (we performed 20 additions with values smaller
-	 * than 256 bits).
+	 * Propagate carries. This is a signed propagation, and the
+	 * result may be negative. The loop above may enlarge values,
+	 * but not two much: worst case is the chain involving t[i - 3],
+	 * in which a value may be added to itself up to 7 times. Since
+	 * starting values are 13-bit each, all words fit on 20 bits
+	 * (21 to account for the sign bit).
 	 */
 	cc = norm13(t, t, 20);
 
 	/*
 	 * Perform modular reduction again for the bits beyond 256 (the carry
-	 * and the bits 256..259). This time, we can simply inject full
-	 * word values.
+	 * and the bits 256..259). Since the largest shift below is by 10
+	 * bits, and the values fit on 21 bits, values fit in 32-bit words,
+	 * thereby allowing injecting full word values.
 	 */
 	cc = (cc << 4) | (t[19] >> 9);
 	t[19] &= 0x01FF;
@@ -1120,6 +1122,22 @@ mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b)
 	t[14] -= cc << 10;
 	t[7] -= cc << 5;
 	t[0] += cc;
+
+	/*
+	 * If the carry is negative, then after carry propagation, we may
+	 * end up with a value which is negative, and we don't want that.
+	 * Thus, in that case, we add the modulus. Note that the subtraction
+	 * result, when the carry is negative, is always smaller than the
+	 * modulus, so the extra addition will not make the value exceed
+	 * twice the modulus.
+	 */
+	cc >>= 31;
+	t[0] -= cc;
+	t[7] += cc << 5;
+	t[14] += cc << 10;
+	t[17] -= cc << 3;
+	t[19] += cc << 9;
+
 	norm13(d, t, 20);
 }
 
@@ -1172,18 +1190,20 @@ square_f256(uint32_t *d, const uint32_t *a)
 	}
 
 	/*
-	 * Propagate carries. Since the operation above really is a
-	 * truncature, followed by the addition of nonnegative values,
-	 * the result will be positive. Moreover, the carry cannot
-	 * exceed 5 bits (we performed 20 additions with values smaller
-	 * than 256 bits).
+	 * Propagate carries. This is a signed propagation, and the
+	 * result may be negative. The loop above may enlarge values,
+	 * but not two much: worst case is the chain involving t[i - 3],
+	 * in which a value may be added to itself up to 7 times. Since
+	 * starting values are 13-bit each, all words fit on 20 bits
+	 * (21 to account for the sign bit).
 	 */
 	cc = norm13(t, t, 20);
 
 	/*
 	 * Perform modular reduction again for the bits beyond 256 (the carry
-	 * and the bits 256..259). This time, we can simply inject full
-	 * word values.
+	 * and the bits 256..259). Since the largest shift below is by 10
+	 * bits, and the values fit on 21 bits, values fit in 32-bit words,
+	 * thereby allowing injecting full word values.
 	 */
 	cc = (cc << 4) | (t[19] >> 9);
 	t[19] &= 0x01FF;
@@ -1191,6 +1211,22 @@ square_f256(uint32_t *d, const uint32_t *a)
 	t[14] -= cc << 10;
 	t[7] -= cc << 5;
 	t[0] += cc;
+
+	/*
+	 * If the carry is negative, then after carry propagation, we may
+	 * end up with a value which is negative, and we don't want that.
+	 * Thus, in that case, we add the modulus. Note that the subtraction
+	 * result, when the carry is negative, is always smaller than the
+	 * modulus, so the extra addition will not make the value exceed
+	 * twice the modulus.
+	 */
+	cc >>= 31;
+	t[0] -= cc;
+	t[7] += cc << 5;
+	t[14] += cc << 10;
+	t[17] -= cc << 3;
+	t[19] += cc << 9;
+
 	norm13(d, t, 20);
 }
 
@@ -1703,7 +1739,7 @@ p256_decode(p256_jacobian *P, const void *src, size_t len)
 	memcpy(P->y, ty, sizeof ty);
 	memset(P->z, 0, sizeof P->z);
 	P->z[0] = 1;
-	return NEQ(bad, 0) ^ 1;
+	return EQ(bad, 0);
 }
 
 /*
@@ -1987,6 +2023,14 @@ api_order(int curve, size_t *len)
 	return P256_N;
 }
 
+static size_t
+api_xoff(int curve, size_t *len)
+{
+	(void)curve;
+	*len = 32;
+	return 1;
+}
+
 static uint32_t
 api_mul(unsigned char *G, size_t Glen,
 	const unsigned char *x, size_t xlen, int curve)
@@ -2079,6 +2123,7 @@ const br_ec_impl br_ec_p256_m15 = {
 	(uint32_t)0x00800000,
 	&api_generator,
 	&api_order,
+	&api_xoff,
 	&api_mul,
 	&api_mulgen,
 	&api_muladd