X-Git-Url: https://www.bearssl.org/gitweb//home/git/?p=BearSSL;a=blobdiff_plain;f=src%2Frsa%2Frsa_i31_priv.c;h=b1e1244e29715837b32de0ffffdd804dbf9b2f9e;hp=efff6ae18ded6f469f6b66ca8cee34090c76ccbb;hb=fa0b7bbe90b4bf262d80c00b21bb37e9d1c75855;hpb=3210f38e0491b39aec1ef419cb4114e9483089fb diff --git a/src/rsa/rsa_i31_priv.c b/src/rsa/rsa_i31_priv.c index efff6ae..b1e1244 100644 --- a/src/rsa/rsa_i31_priv.c +++ b/src/rsa/rsa_i31_priv.c @@ -24,7 +24,8 @@ #include "inner.h" -#define U (1 + ((BR_MAX_RSA_FACTOR + 30) / 31)) +#define U (2 + ((BR_MAX_RSA_FACTOR + 30) / 31)) +#define TLEN (8 * U) /* see bearssl_rsa.h */ uint32_t @@ -32,30 +33,18 @@ br_rsa_i31_private(unsigned char *x, const br_rsa_private_key *sk) { const unsigned char *p, *q; size_t plen, qlen; - uint32_t tmp[6 * U]; - uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3; + size_t fwlen; uint32_t p0i, q0i; - size_t xlen; - - /* - * All our temporary buffers are from the tmp[] array. - * - * The mp, mq, s1, s2, t1 and t2 buffers are large enough to - * contain a RSA factor. The t3 buffer can contain a complete - * RSA modulus. t3 shares its storage space with s2, s1 and t1, - * in that order (this is important, see below). - */ - mq = tmp; - mp = tmp + U; - t2 = tmp + 2 * U; - s2 = tmp + 3 * U; - s1 = tmp + 4 * U; - t1 = tmp + 5 * U; - t3 = s2; + size_t xlen, u; + uint32_t tmp[1 + TLEN]; + long z; + uint32_t *mp, *mq, *s1, *s2, *t1, *t2, *t3; + uint32_t r; /* - * Compute the actual lengths (in bytes) of p and q, and check - * that they fit within our stack buffers. + * Compute the actual lengths of p and q, in bytes. + * These lengths are not considered secret (we cannot really hide + * them anyway in constant-time code). */ p = sk->p; plen = sk->plen; @@ -69,36 +58,100 @@ br_rsa_i31_private(unsigned char *x, const br_rsa_private_key *sk) q ++; qlen --; } - if (plen > (BR_MAX_RSA_FACTOR >> 3) - || qlen > (BR_MAX_RSA_FACTOR >> 3)) - { + + /* + * Compute the maximum factor length, in words. + */ + z = (long)(plen > qlen ? plen : qlen) << 3; + fwlen = 1; + while (z > 0) { + z -= 31; + fwlen ++; + } + + /* + * Round up the word length to an even number. + */ + fwlen += (fwlen & 1); + + /* + * We need to fit at least 6 values in the stack buffer. + */ + if (6 * fwlen > TLEN) { return 0; } /* - * Decode p and q. + * Compute modulus length (in bytes). + */ + xlen = (sk->n_bitlen + 7) >> 3; + + /* + * Decode q. */ - br_i31_decode(mp, p, plen); + mq = tmp; br_i31_decode(mq, q, qlen); /* - * Compute signature length (in bytes). + * Decode p. */ - xlen = (sk->n_bitlen + 7) >> 3; + t1 = mq + fwlen; + br_i31_decode(t1, p, plen); /* - * Compute s1 = x^dp mod p. + * Compute the modulus (product of the two factors), to compare + * it with the source value. We use br_i31_mulacc(), since it's + * already used later on. */ - p0i = br_i31_ninv31(mp[1]); - br_i31_decode_reduce(s1, x, xlen, mp); - br_i31_modpow(s1, sk->dp, sk->dplen, mp, p0i, t1, t2); + t2 = mq + 2 * fwlen; + br_i31_zero(t2, mq[0]); + br_i31_mulacc(t2, mq, t1); + + /* + * We encode the modulus into bytes, to perform the comparison + * with bytes. We know that the product length, in bytes, is + * exactly xlen. + * The comparison actually computes the carry when subtracting + * the modulus from the source value; that carry must be 1 for + * a value in the correct range. We keep it in r, which is our + * accumulator for the error code. + */ + t3 = mq + 4 * fwlen; + br_i31_encode(t3, xlen, t2); + u = xlen; + r = 0; + while (u > 0) { + uint32_t wn, wx; + + u --; + wn = ((unsigned char *)t3)[u]; + wx = x[u]; + r = ((wx - (wn + r)) >> 8) & 1; + } + + /* + * Move the decoded p to another temporary buffer. + */ + mp = mq + 2 * fwlen; + memmove(mp, t1, fwlen * sizeof *t1); /* * Compute s2 = x^dq mod q. */ q0i = br_i31_ninv31(mq[1]); + s2 = mq + fwlen; br_i31_decode_reduce(s2, x, xlen, mq); - br_i31_modpow(s2, sk->dq, sk->dqlen, mq, q0i, t1, t2); + r &= br_i31_modpow_opt(s2, sk->dq, sk->dqlen, mq, q0i, + mq + 3 * fwlen, TLEN - 3 * fwlen); + + /* + * Compute s1 = x^dp mod p. + */ + p0i = br_i31_ninv31(mp[1]); + s1 = mq + 3 * fwlen; + br_i31_decode_reduce(s1, x, xlen, mp); + r &= br_i31_modpow_opt(s1, sk->dp, sk->dplen, mp, p0i, + mq + 4 * fwlen, TLEN - 4 * fwlen); /* * Compute: @@ -113,6 +166,8 @@ br_rsa_i31_private(unsigned char *x, const br_rsa_private_key *sk) * inverse of q modulo p), we also tolerate improperly large * values for this parameter. */ + t1 = mq + 4 * fwlen; + t2 = mq + 5 * fwlen; br_i31_reduce(t2, s2, mp); br_i31_add(s1, mp, br_i31_sub(s1, t2, 1)); br_i31_to_monty(s1, mp); @@ -125,11 +180,13 @@ br_rsa_i31_private(unsigned char *x, const br_rsa_private_key *sk) * All these operations are non-modular. * * We need mq, s2 and t2. We use the t3 buffer as destination. - * The buffers mp, s1 and t1 are no longer needed. Moreover, - * the first step is to copy s2 into the destination buffer t3. - * We thus arranged for t3 to actually share space with s2, and - * to be followed by the space formerly used by s1 and t1. + * The buffers mp, s1 and t1 are no longer needed, so we can + * reuse them for t3. Moreover, the first step of the computation + * is to copy s2 into t3, after which s2 is not needed. Right + * now, mq is in slot 0, s2 is in slot 1, and t2 is in slot 5. + * Therefore, we have ample room for t3 by simply using s2. */ + t3 = s2; br_i31_mulacc(t3, mq, t2); /* @@ -142,5 +199,5 @@ br_rsa_i31_private(unsigned char *x, const br_rsa_private_key *sk) * The only error conditions remaining at that point are invalid * values for p and q (even integers). */ - return p0i & q0i & 1; + return p0i & q0i & r; }