X-Git-Url: https://www.bearssl.org/gitweb//home/git/?p=BearSSL;a=blobdiff_plain;f=src%2Fec%2Fec_c25519_m15.c;h=deff55b330b16a4412fa1910b038e11aa9015a2f;hp=3cb98d51ef42a249a906d9930d3b2c24f3a7e6b1;hb=9721b3e7566693128a5923cc1f481216b7853466;hpb=3655193439d4e093bb80a2f0d9e02179d424de08 diff --git a/src/ec/ec_c25519_m15.c b/src/ec/ec_c25519_m15.c index 3cb98d5..deff55b 100644 --- a/src/ec/ec_c25519_m15.c +++ b/src/ec/ec_c25519_m15.c @@ -808,6 +808,7 @@ mul20(uint32_t *d, const uint32_t *a, const uint32_t *b) t[37] = MUL15(a[18], b[19]) + MUL15(a[19], b[18]); t[38] = MUL15(a[19], b[19]); + d[39] = norm13(d, t, 39); } @@ -1026,6 +1027,7 @@ square20(uint32_t *d, const uint32_t *a) + ((MUL15(a[17], a[19])) << 1); t[37] = ((MUL15(a[18], a[19])) << 1); t[38] = MUL15(a[19], a[19]); + d[39] = norm13(d, t, 39); } @@ -1060,24 +1062,21 @@ reduce_final_f255(uint32_t *d) return cc; } -/* - * Perform a multiplication of two integers modulo 2^255-19. - * Operands are arrays of 20 words, each containing 13 bits of data, in - * little-endian order. Input value may be up to 2^256-1; on output, value - * fits on 256 bits and is lower than twice the modulus. - */ static void -f255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b) +f255_mulgen(uint32_t *d, const uint32_t *a, const uint32_t *b, int square) { uint32_t t[40], cc, w; - int i; /* * Compute raw multiplication. All result words fit in 13 bits * each; upper word (t[39]) must fit on 5 bits, since the product * of two 256-bit integers must fit on 512 bits. */ - mul20(t, a, b); + if (square) { + square20(t, a); + } else { + mul20(t, a, b); + } /* * Modular reduction: each high word is added where necessary. @@ -1088,61 +1087,80 @@ f255_mul(uint32_t *d, const uint32_t *a, const uint32_t *b) */ cc = MUL15(t[19] >> 8, 19); t[19] &= 0xFF; - for (i = 0; i < 20; i ++) { - w = t[i] + cc + MUL15(t[i + 20], 608); - t[i] = w & 0x1FFF; - cc = w >> 13; - } + +#define MM1(x) do { \ + w = t[x] + cc + MUL15(t[(x) + 20], 608); \ + t[x] = w & 0x1FFF; \ + cc = w >> 13; \ + } while (0) + + MM1( 0); + MM1( 1); + MM1( 2); + MM1( 3); + MM1( 4); + MM1( 5); + MM1( 6); + MM1( 7); + MM1( 8); + MM1( 9); + MM1(10); + MM1(11); + MM1(12); + MM1(13); + MM1(14); + MM1(15); + MM1(16); + MM1(17); + MM1(18); + MM1(19); + +#undef MM1 + cc = MUL15(w >> 8, 19); t[19] &= 0xFF; - for (i = 0; i < 20; i ++) { - w = t[i] + cc; - d[i] = w & 0x1FFF; - cc = w >> 13; - } + +#define MM2(x) do { \ + w = t[x] + cc; \ + d[x] = w & 0x1FFF; \ + cc = w >> 13; \ + } while (0) + + MM2( 0); + MM2( 1); + MM2( 2); + MM2( 3); + MM2( 4); + MM2( 5); + MM2( 6); + MM2( 7); + MM2( 8); + MM2( 9); + MM2(10); + MM2(11); + MM2(12); + MM2(13); + MM2(14); + MM2(15); + MM2(16); + MM2(17); + MM2(18); + MM2(19); + +#undef MM2 } /* - * Square an integer modulo 2^255-19. - * Operand is an array of 20 words, each containing 13 bits of data, in + * Perform a multiplication of two integers modulo 2^255-19. + * Operands are arrays of 20 words, each containing 13 bits of data, in * little-endian order. Input value may be up to 2^256-1; on output, value * fits on 256 bits and is lower than twice the modulus. + * + * f255_mul() is the general multiplication, f255_square() is specialised + * for squarings. */ -static void -f255_square(uint32_t *d, const uint32_t *a) -{ - uint32_t t[40], cc, w; - int i; - - /* - * Compute raw multiplication. All result words fit in 13 bits - * each; upper word (t[39]) must fit on 5 bits, since the product - * of two 256-bit integers must fit on 512 bits. - */ - square20(t, a); - - /* - * Modular reduction: each high word is added where necessary. - * Since the modulus is 2^255-19 and word 20 corresponds to - * offset 20*13 = 260, word 20+k must be added to word k with - * a factor of 19*2^5 = 608. The extra bits in word 19 are also - * added that way. - */ - cc = MUL15(t[19] >> 8, 19); - t[19] &= 0xFF; - for (i = 0; i < 20; i ++) { - w = t[i] + cc + MUL15(t[i + 20], 608); - t[i] = w & 0x1FFF; - cc = w >> 13; - } - cc = MUL15(w >> 8, 19); - t[19] &= 0xFF; - for (i = 0; i < 20; i ++) { - w = t[i] + cc; - d[i] = w & 0x1FFF; - cc = w >> 13; - } -} +#define f255_mul(d, a, b) f255_mulgen(d, a, b, 0) +#define f255_square(d, a) f255_mulgen(d, a, a, 1) /* * Add two values in F255. Partial reduction is performed (down to less @@ -1314,11 +1332,11 @@ api_mul(unsigned char *G, size_t Glen, memset(z3, 0, sizeof z3); z3[0] = 1; - memcpy(k, kb, kblen); - memset(k + kblen, 0, (sizeof k) - kblen); - k[0] &= 0xF8; - k[31] &= 0x7F; - k[31] |= 0x40; + memset(k, 0, (sizeof k) - kblen); + memcpy(k + (sizeof k) - kblen, kb, kblen); + k[31] &= 0xF8; + k[0] &= 0x7F; + k[0] |= 0x40; /* obsolete print_int("x1", x1); @@ -1328,7 +1346,7 @@ api_mul(unsigned char *G, size_t Glen, for (i = 254; i >= 0; i --) { uint32_t kt; - kt = (k[i >> 3] >> (i & 7)) & 1; + kt = (k[31 - (i >> 3)] >> (i & 7)) & 1; swap ^= kt; cswap(x2, x3, swap); cswap(z2, z3, swap);