X-Git-Url: https://www.bearssl.org/gitweb//home/git/?p=BearSSL;a=blobdiff_plain;f=src%2Fec%2Fec_p256_m31.c;h=b185937e160603ecfc5ef911e811b319d41d2c49;hp=0631a135f7d1a0a3b0e1d39e34c967fd884341d2;hb=b2ec2030e40acf5e9e4cd0f2669aacb27eadb540;hpb=bd3036844bd20b2b8d7bce7fee5ad010ce401915 diff --git a/src/ec/ec_p256_m31.c b/src/ec/ec_p256_m31.c index 0631a13..b185937 100644 --- a/src/ec/ec_p256_m31.c +++ b/src/ec/ec_p256_m31.c @@ -394,7 +394,7 @@ mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) uint32_t t[18]; uint64_t s[18]; uint64_t cc, x; - uint32_t z; + uint32_t z, c; int i; mul9(t, a, b); @@ -423,17 +423,17 @@ mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) } for (i = 17; i >= 9; i --) { - uint64_t x; - - x = s[i]; - s[i - 1] += ARSHW(x, 2); - s[i - 2] += (x << 28) & 0x3FFFFFFF; - s[i - 2] -= ARSHW(x, 4); - s[i - 3] -= (x << 26) & 0x3FFFFFFF; - s[i - 5] -= ARSHW(x, 10); - s[i - 6] -= (x << 20) & 0x3FFFFFFF; - s[i - 8] += ARSHW(x, 16); - s[i - 9] += (x << 14) & 0x3FFFFFFF; + uint64_t y; + + y = s[i]; + s[i - 1] += ARSHW(y, 2); + s[i - 2] += (y << 28) & 0x3FFFFFFF; + s[i - 2] -= ARSHW(y, 4); + s[i - 3] -= (y << 26) & 0x3FFFFFFF; + s[i - 5] -= ARSHW(y, 10); + s[i - 6] -= (y << 20) & 0x3FFFFFFF; + s[i - 8] += ARSHW(y, 16); + s[i - 9] += (y << 14) & 0x3FFFFFFF; } /* @@ -465,7 +465,15 @@ mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) d[8] &= 0xFFFF; /* - * Subtract cc*p. + * One extra round of reduction, for cc*2^256, which means + * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) + * value. If cc is negative, then it may happen (rarely, but + * not neglectibly so) that the result would be negative. In + * order to avoid that, if cc is negative, then we add the + * modulus once. Note that if cc is negative, then propagating + * that carry must yield a value lower than the modulus, so + * adding the modulus once will keep the final result under + * twice the modulus. */ z = (uint32_t)cc; d[3] -= z << 6; @@ -473,6 +481,12 @@ mul_f256(uint32_t *d, const uint32_t *a, const uint32_t *b) d[7] -= ARSH(z, 18); d[7] += (z << 14) & 0x3FFFFFFF; d[8] += ARSH(z, 16); + c = z >> 31; + d[0] -= c; + d[3] += c << 6; + d[6] += c << 12; + d[7] -= c << 14; + d[8] += c << 16; for (i = 0; i < 9; i ++) { uint32_t w; @@ -492,7 +506,7 @@ square_f256(uint32_t *d, const uint32_t *a) uint32_t t[18]; uint64_t s[18]; uint64_t cc, x; - uint32_t z; + uint32_t z, c; int i; square9(t, a); @@ -521,17 +535,17 @@ square_f256(uint32_t *d, const uint32_t *a) } for (i = 17; i >= 9; i --) { - uint64_t x; - - x = s[i]; - s[i - 1] += ARSHW(x, 2); - s[i - 2] += (x << 28) & 0x3FFFFFFF; - s[i - 2] -= ARSHW(x, 4); - s[i - 3] -= (x << 26) & 0x3FFFFFFF; - s[i - 5] -= ARSHW(x, 10); - s[i - 6] -= (x << 20) & 0x3FFFFFFF; - s[i - 8] += ARSHW(x, 16); - s[i - 9] += (x << 14) & 0x3FFFFFFF; + uint64_t y; + + y = s[i]; + s[i - 1] += ARSHW(y, 2); + s[i - 2] += (y << 28) & 0x3FFFFFFF; + s[i - 2] -= ARSHW(y, 4); + s[i - 3] -= (y << 26) & 0x3FFFFFFF; + s[i - 5] -= ARSHW(y, 10); + s[i - 6] -= (y << 20) & 0x3FFFFFFF; + s[i - 8] += ARSHW(y, 16); + s[i - 9] += (y << 14) & 0x3FFFFFFF; } /* @@ -563,7 +577,15 @@ square_f256(uint32_t *d, const uint32_t *a) d[8] &= 0xFFFF; /* - * Subtract cc*p. + * One extra round of reduction, for cc*2^256, which means + * adding cc*(2^224-2^192-2^96+1) to a 256-bit (nonnegative) + * value. If cc is negative, then it may happen (rarely, but + * not neglectibly so) that the result would be negative. In + * order to avoid that, if cc is negative, then we add the + * modulus once. Note that if cc is negative, then propagating + * that carry must yield a value lower than the modulus, so + * adding the modulus once will keep the final result under + * twice the modulus. */ z = (uint32_t)cc; d[3] -= z << 6; @@ -571,6 +593,12 @@ square_f256(uint32_t *d, const uint32_t *a) d[7] -= ARSH(z, 18); d[7] += (z << 14) & 0x3FFFFFFF; d[8] += ARSH(z, 16); + c = z >> 31; + d[0] -= c; + d[3] += c << 6; + d[6] += c << 12; + d[7] -= c << 14; + d[8] += c << 16; for (i = 0; i < 9; i ++) { uint32_t w; @@ -1061,7 +1089,7 @@ p256_decode(p256_jacobian *P, const void *src, size_t len) memcpy(P->y, ty, sizeof ty); memset(P->z, 0, sizeof P->z); P->z[0] = 1; - return NEQ(bad, 0) ^ 1; + return EQ(bad, 0); } /* @@ -1356,12 +1384,13 @@ api_mul(unsigned char *G, size_t Glen, p256_jacobian P; (void)curve; + if (Glen != 65) { + return 0; + } r = p256_decode(&P, G, Glen); p256_mul(&P, x, xlen); - if (Glen >= 65) { - p256_to_affine(&P); - p256_encode(G, &P); - } + p256_to_affine(&P); + p256_encode(G, &P); return r; } @@ -1376,16 +1405,6 @@ api_mulgen(unsigned char *R, p256_to_affine(&P); p256_encode(R, &P); return 65; - - /* - const unsigned char *G; - size_t Glen; - - G = api_generator(curve, &Glen); - memcpy(R, G, Glen); - api_mul(R, Glen, x, xlen, curve); - return Glen; - */ } static uint32_t @@ -1398,6 +1417,9 @@ api_muladd(unsigned char *A, const unsigned char *B, size_t len, int i; (void)curve; + if (len != 65) { + return 0; + } r = p256_decode(&P, A, len); p256_mul(&P, x, xlen); if (B == NULL) {