X-Git-Url: https://www.bearssl.org/gitweb//home/git/?p=BearSSL;a=blobdiff_plain;f=src%2Fint%2Fi31_montmul.c;h=758f8f42a33b7946beb4cdd103c5ec013285a0ee;hp=80668086ff813e5a88ecf38aefab61c14bb95743;hb=001d094d140488def90cb3876d5c03f4d79b3e27;hpb=08eb07825be067729dff343de2d9a0c13252b415;ds=sidebyside diff --git a/src/int/i31_montmul.c b/src/int/i31_montmul.c index 8066808..758f8f4 100644 --- a/src/int/i31_montmul.c +++ b/src/int/i31_montmul.c @@ -29,16 +29,45 @@ void br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y, const uint32_t *m, uint32_t m0i) { + /* + * Each outer loop iteration computes: + * d <- (d + xu*y + f*m) / 2^31 + * We have xu <= 2^31-1 and f <= 2^31-1. + * Thus, if d <= 2*m-1 on input, then: + * 2*m-1 + 2*(2^31-1)*m <= (2^32)*m-1 + * and the new d value is less than 2*m. + * + * We represent d over 31-bit words, with an extra word 'dh' + * which can thus be only 0 or 1. + */ size_t len, len4, u, v; - uint64_t dh; + uint32_t dh; len = (m[0] + 31) >> 5; len4 = len & ~(size_t)3; br_i31_zero(d, m[0]); dh = 0; for (u = 0; u < len; u ++) { + /* + * The carry for each operation fits on 32 bits: + * d[v+1] <= 2^31-1 + * xu*y[v+1] <= (2^31-1)*(2^31-1) + * f*m[v+1] <= (2^31-1)*(2^31-1) + * r <= 2^32-1 + * (2^31-1) + 2*(2^31-1)*(2^31-1) + (2^32-1) = 2^63 - 2^31 + * After division by 2^31, the new r is then at most 2^32-1 + * + * Using a 32-bit carry has performance benefits on 32-bit + * systems; however, on 64-bit architectures, we prefer to + * keep the carry (r) in a 64-bit register, thus avoiding some + * "clear high bits" operations. + */ uint32_t f, xu; - uint64_t r, zh; +#if BR_64 + uint64_t r; +#else + uint32_t r; +#endif xu = x[u + 1]; f = MUL31_lo((d[1] + MUL31_lo(x[u + 1], y[1])), m0i); @@ -73,9 +102,14 @@ br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y, d[v] = (uint32_t)z & 0x7FFFFFFF; } - zh = dh + r; - d[len] = (uint32_t)zh & 0x7FFFFFFF; - dh = zh >> 31; + /* + * Since the new dh can only be 0 or 1, the addition of + * the old dh with the carry MUST fit on 32 bits, and + * thus can be done into dh itself. + */ + dh += r; + d[len] = dh & 0x7FFFFFFF; + dh >>= 31; } /*