X-Git-Url: https://www.bearssl.org/gitweb//home/git/?p=BearSSL;a=blobdiff_plain;f=src%2Fec%2Fec_c25519_i15.c;h=8fadcf48039b5c9bca1f04aeab1c1c3ad4817fab;hp=79560ae9f6197be721501c22670e197a86f887d6;hb=946f5bad7683aa79a9f4fcab760206e2aad8c555;hpb=2f9c953af45299f8546df8984d5262e767a7d943 diff --git a/src/ec/ec_c25519_i15.c b/src/ec/ec_c25519_i15.c index 79560ae..8fadcf4 100644 --- a/src/ec/ec_c25519_i15.c +++ b/src/ec/ec_c25519_i15.c @@ -46,6 +46,27 @@ static const uint16_t C255_R2[] = { 0x0000 }; +/* obsolete +#include +#include +static void +print_int_mont(const char *name, const uint16_t *x) +{ + uint16_t y[18]; + unsigned char tmp[32]; + size_t u; + + printf("%s = ", name); + memcpy(y, x, sizeof y); + br_i15_from_monty(y, C255_P, P0I); + br_i15_encode(tmp, sizeof tmp, y); + for (u = 0; u < sizeof tmp; u ++) { + printf("%02X", tmp[u]); + } + printf("\n"); +} +*/ + static const uint16_t C255_A24[] = { 0x0110, 0x45D3, 0x0046, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, @@ -61,10 +82,10 @@ static const unsigned char GEN[] = { }; static const unsigned char ORDER[] = { - 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x14, 0xDE, 0xF9, 0xDE, 0xA2, 0xF7, 0x9C, 0xD6, - 0x58, 0x12, 0x63, 0x1A, 0x5C, 0xF5, 0xD3, 0xED + 0x7F, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; static const unsigned char * @@ -83,6 +104,14 @@ api_order(int curve, size_t *len) return ORDER; } +static size_t +api_xoff(int curve, size_t *len) +{ + (void)curve; + *len = 32; + return 0; +} + static void cswap(uint16_t *a, uint16_t *b, uint32_t ctl) { @@ -150,8 +179,14 @@ static uint32_t api_mul(unsigned char *G, size_t Glen, const unsigned char *kb, size_t kblen, int curve) { +#define ILEN (18 * sizeof(uint16_t)) + + /* + * The a[] and b[] arrays have an extra word to allow for + * decoding without using br_i15_decode_reduce(). + */ uint16_t x1[18], x2[18], x3[18], z2[18], z3[18]; - uint16_t a[18], aa[18], b[18], bb[18]; + uint16_t a[19], aa[18], b[19], bb[18]; uint16_t c[18], d[18], e[18], da[18], cb[18]; unsigned char k[32]; uint32_t swap; @@ -176,34 +211,61 @@ api_mul(unsigned char *G, size_t Glen, */ byteswap(G); + /* + * Decode the point ('u' coordinate). This should be reduced + * modulo p, but we prefer to avoid the dependency on + * br_i15_decode_reduce(). Instead, we use br_i15_decode_mod() + * with a synthetic modulus of value 2^255 (this must work + * since G was truncated to 255 bits), then use a conditional + * subtraction. We use br_i15_decode_mod() and not + * br_i15_decode(), because the ec_prime_i15 implementation uses + * the former but not the latter. + * br_i15_decode_reduce(a, G, 32, C255_P); + */ + br_i15_zero(b, 0x111); + b[18] = 1; + br_i15_decode_mod(a, G, 32, b); + a[0] = 0x110; + br_i15_sub(a, C255_P, NOT(br_i15_sub(a, C255_P, 0))); + /* * Initialise variables x1, x2, z2, x3 and z3. We set all of them * into Montgomery representation. */ - br_i15_decode_reduce(a, G, 32, C255_P); br_i15_montymul(x1, a, C255_R2, C255_P, P0I); - memcpy(x3, x1, sizeof x1); + memcpy(x3, x1, ILEN); br_i15_zero(z2, C255_P[0]); - memcpy(x2, z2, sizeof z2); + memcpy(x2, z2, ILEN); x2[1] = 19; - memcpy(z3, x2, sizeof x2); + memcpy(z3, x2, ILEN); - memcpy(k, kb, kblen); - memset(k + kblen, 0, (sizeof k) - kblen); - k[0] &= 0xF8; - k[31] &= 0x7F; - k[31] |= 0x40; + memset(k, 0, (sizeof k) - kblen); + memcpy(k + (sizeof k) - kblen, kb, kblen); + k[31] &= 0xF8; + k[0] &= 0x7F; + k[0] |= 0x40; + + /* obsolete + print_int_mont("x1", x1); + */ swap = 0; for (i = 254; i >= 0; i --) { uint32_t kt; - kt = (k[i >> 3] >> (i & 7)) & 1; + kt = (k[31 - (i >> 3)] >> (i & 7)) & 1; swap ^= kt; cswap(x2, x3, swap); cswap(z2, z3, swap); swap = kt; + /* obsolete + print_int_mont("x2", x2); + print_int_mont("z2", z2); + print_int_mont("x3", x3); + print_int_mont("z3", z3); + */ + c255_add(a, x2, z2); c255_mul(aa, a, a); c255_sub(b, x2, z2); @@ -213,6 +275,19 @@ api_mul(unsigned char *G, size_t Glen, c255_sub(d, x3, z3); c255_mul(da, d, a); c255_mul(cb, c, b); + + /* obsolete + print_int_mont("a ", a); + print_int_mont("aa", aa); + print_int_mont("b ", b); + print_int_mont("bb", bb); + print_int_mont("e ", e); + print_int_mont("c ", c); + print_int_mont("d ", d); + print_int_mont("da", da); + print_int_mont("cb", cb); + */ + c255_add(x3, da, cb); c255_mul(x3, x3, x3); c255_sub(z3, da, cb); @@ -222,6 +297,13 @@ api_mul(unsigned char *G, size_t Glen, c255_mul(z2, C255_A24, e); c255_add(z2, z2, aa); c255_mul(z2, e, z2); + + /* obsolete + print_int_mont("x2", x2); + print_int_mont("z2", z2); + print_int_mont("x3", x3); + print_int_mont("z3", z3); + */ } cswap(x2, x3, swap); cswap(z2, z3, swap); @@ -231,12 +313,12 @@ api_mul(unsigned char *G, size_t Glen, * square-and-multiply algorithm; we mutualise most non-squarings * since the exponent contains almost only ones. */ - memcpy(a, z2, sizeof z2); + memcpy(a, z2, ILEN); for (i = 0; i < 15; i ++) { c255_mul(a, a, a); c255_mul(a, a, z2); } - memcpy(b, a, sizeof a); + memcpy(b, a, ILEN); for (i = 0; i < 14; i ++) { int j; @@ -251,11 +333,23 @@ api_mul(unsigned char *G, size_t Glen, c255_mul(b, z2, b); } } - c255_mul(x2, x2, b); - br_i15_from_monty(x2, C255_P, P0I); + c255_mul(b, x2, b); + + /* + * To avoid a dependency on br_i15_from_monty(), we use a + * Montgomery multiplication with 1. + * memcpy(x2, b, ILEN); + * br_i15_from_monty(x2, C255_P, P0I); + */ + br_i15_zero(a, C255_P[0]); + a[1] = 1; + br_i15_montymul(x2, a, b, C255_P, P0I); + br_i15_encode(G, 32, x2); byteswap(G); return 1; + +#undef ILEN } static size_t @@ -297,6 +391,7 @@ const br_ec_impl br_ec_c25519_i15 = { (uint32_t)0x20000000, &api_generator, &api_order, + &api_xoff, &api_mul, &api_mulgen, &api_muladd