2 * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 /* see bearssl_rsa.h */
29 br_rsa_i15_compute_privexp(void *d
,
30 const br_rsa_private_key
*sk
, uint32_t e
)
33 * We want to invert e modulo phi = (p-1)(q-1). This first
34 * requires computing phi, which is easy since we have the factors
35 * p and q in the private key structure.
37 * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer.
38 * We could invert e modulo phi/4 then patch the result to
39 * modulo phi, but this would involve assembling three modulus-wide
40 * values (phi/4, 1 and e) and calling moddiv, that requires
41 * three more temporaries, for a total of six big integers, or
42 * slightly more than 3 kB of stack space for RSA-4096. This
43 * exceeds our stack requirements.
45 * Instead, we first use one step of the extended GCD:
47 * - We compute phi = k*e + r (Euclidean division of phi by e).
48 * If public exponent e is correct, then r != 0 (e must be
49 * invertible modulo phi). We also have k != 0 since we
50 * enforce non-ridiculously-small factors.
52 * - We find small u, v such that u*e - v*r = 1 (using a
53 * binary GCD; we can arrange for u < r and v < e, i.e. all
54 * values fit on 32 bits).
56 * - Solution is: d = u + v*k
57 * This last computation is exact: since u < r and v < e,
58 * the above implies d < r + e*((phi-r)/e) = phi
61 uint16_t tmp
[4 * ((BR_MAX_RSA_FACTOR
+ 14) / 15) + 12];
62 uint16_t *p
, *q
, *k
, *m
, *z
, *phi
;
63 const unsigned char *pbuf
, *qbuf
;
64 size_t plen
, qlen
, u
, len
, dlen
;
65 uint32_t r
, a
, b
, u0
, v0
, u1
, v1
, he
, hr
;
69 * Check that e is correct.
71 if (e
< 3 || (e
& 1) == 0) {
76 * Check lengths of p and q, and that they are both odd.
80 while (plen
> 0 && *pbuf
== 0) {
84 if (plen
< 5 || plen
> (BR_MAX_RSA_FACTOR
/ 8)
85 || (pbuf
[plen
- 1] & 1) != 1)
91 while (qlen
> 0 && *qbuf
== 0) {
95 if (qlen
< 5 || qlen
> (BR_MAX_RSA_FACTOR
/ 8)
96 || (qbuf
[qlen
- 1] & 1) != 1)
102 * Output length is that of the modulus.
104 dlen
= (sk
->n_bitlen
+ 7) >> 3;
110 br_i15_decode(p
, pbuf
, plen
);
111 plen
= (p
[0] + 15) >> 4;
113 br_i15_decode(q
, qbuf
, qlen
);
114 qlen
= (q
[0] + 15) >> 4;
117 * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that
118 * we do not need anymore). The mulacc function sets the announced
119 * bit length of t to be the sum of the announced bit lengths of
120 * p-1 and q-1, which is usually exact but may overshoot by one 1
121 * bit in some cases; we readjust it to its true length.
126 br_i15_zero(phi
, p
[0]);
127 br_i15_mulacc(phi
, p
, q
);
128 len
= (phi
[0] + 15) >> 4;
129 memmove(tmp
, phi
, (1 + len
) * sizeof *phi
);
131 phi
[0] = br_i15_bit_length(phi
+ 1, len
);
132 len
= (phi
[0] + 15) >> 4;
135 * Divide phi by public exponent e. The final remainder r must be
136 * non-zero (otherwise, the key is invalid). The quotient is k,
137 * which we write over phi, since we don't need phi after that.
140 for (u
= len
; u
>= 1; u
--) {
142 * Upon entry, r < e, and phi[u] < 2^15; hence,
143 * hi:lo < e*2^15. Thus, the produced word k[u]
144 * must be lower than 2^15, and the new remainder r
150 lo
= (r
<< 15) + phi
[u
];
151 phi
[u
] = br_divrem(hi
, lo
, e
, &r
);
159 * Compute u and v such that u*e - v*r = GCD(e,r). We use
160 * a binary GCD algorithm, with 6 extra integers a, b,
161 * u0, u1, v0 and v1. Initial values are:
162 * a = e u0 = 1 v0 = 0
163 * b = r u1 = r v1 = e-1
164 * The following invariants are maintained:
174 * At each iteration, we reduce either a or b by one bit, and
175 * adjust u0, u1, v0 and v1 to maintain the invariants:
176 * - if a is even, then a <- a/2
177 * - otherwise, if b is even, then b <- b/2
178 * - otherwise, if a > b, then a <- (a-b)/2
179 * - otherwise, if b > a, then b <- (b-a)/2
180 * Algorithm stops when a = b. At that point, the common value
181 * is the GCD of e and r; it must be 1 (otherwise, the private
182 * key or public exponent is not valid). The (u0,v0) or (u1,v1)
183 * pairs are the solution we are looking for.
185 * Since either a or b is reduced by at least 1 bit at each
186 * iteration, 62 iterations are enough to reach the end
189 * To maintain the invariants, we must compute the same operations
190 * on the u* and v* values that we do on a and b:
191 * - When a is divided by 2, u0 and v0 must be divided by 2.
192 * - When b is divided by 2, u1 and v1 must be divided by 2.
193 * - When b is subtracted from a, u1 and v1 are subtracted from
194 * u0 and v0, respectively.
195 * - When a is subtracted from b, u0 and v0 are subtracted from
196 * u1 and v1, respectively.
198 * However, we want to keep the u* and v* values in their proper
199 * ranges. The following remarks apply:
201 * - When a is divided by 2, then a is even. Therefore:
203 * * If r is odd, then u0 and v0 must have the same parity;
204 * if they are both odd, then adding r to u0 and e to v0
205 * makes them both even, and the division by 2 brings them
206 * back to the proper range.
208 * * If r is even, then u0 must be even; if v0 is odd, then
209 * adding r to u0 and e to v0 makes them both even, and the
210 * division by 2 brings them back to the proper range.
212 * Thus, all we need to do is to look at the parity of v0,
213 * and add (r,e) to (u0,v0) when v0 is odd. In order to avoid
214 * a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the
215 * division (r+1 does not overflow since r < e; and (e/2)+1
216 * is equal to (e+1)/2 since e is odd).
218 * - When we subtract b from a, three cases may occur:
220 * * u1 <= u0 and v1 <= v0: just do the subtractions
222 * * u1 > u0 and v1 > v0: compute:
223 * (u0, v0) <- (u0 + r - u1, v0 + e - v1)
225 * * u1 <= u0 and v1 > v0: compute:
226 * (u0, v0) <- (u0 + r - u1, v0 + e - v1)
228 * The fourth case (u1 > u0 and v1 <= v0) is not possible
229 * because it would contradict "b < a" (which is the reason
230 * why we subtract b from a).
232 * The tricky case is the third one: from the equations, it
233 * seems that u0 may go out of range. However, the invariants
234 * and ranges of other values imply that, in that case, the
235 * new u0 does not actually exceed the range.
237 * We can thus handle the subtraction by adding (r,e) based
238 * solely on the comparison between v0 and v1.
248 for (i
= 0; i
< 62; i
++) {
249 uint32_t oa
, ob
, agtb
, bgta
;
250 uint32_t sab
, sba
, da
, db
;
253 oa
= a
& 1; /* 1 if a is odd */
254 ob
= b
& 1; /* 1 if b is odd */
255 agtb
= GT(a
, b
); /* 1 if a > b */
256 bgta
= GT(b
, a
); /* 1 if b > a */
258 sab
= oa
& ob
& agtb
; /* 1 if a <- a-b */
259 sba
= oa
& ob
& bgta
; /* 1 if b <- b-a */
261 /* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */
264 u0
-= (u1
- (r
& -ctl
)) & -sab
;
265 v0
-= (v1
- (e
& -ctl
)) & -sab
;
267 /* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */
270 u1
-= (u0
- (r
& -ctl
)) & -sba
;
271 v1
-= (v0
- (e
& -ctl
)) & -sba
;
273 da
= NOT(oa
) | sab
; /* 1 if a <- a/2 */
274 db
= (oa
& NOT(ob
)) | sba
; /* 1 if b <- b/2 */
276 /* a <- a/2, u0 <- u0/2, v0 <- v0/2 */
278 a
^= (a
^ (a
>> 1)) & -da
;
279 u0
^= (u0
^ ((u0
>> 1) + (hr
& -ctl
))) & -da
;
280 v0
^= (v0
^ ((v0
>> 1) + (he
& -ctl
))) & -da
;
282 /* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */
284 b
^= (b
^ (b
>> 1)) & -db
;
285 u1
^= (u1
^ ((u1
>> 1) + (hr
& -ctl
))) & -db
;
286 v1
^= (v1
^ ((v1
>> 1) + (he
& -ctl
))) & -db
;
290 * Check that the GCD is indeed 1. If not, then the key is invalid
291 * (and there's no harm in leaking that piece of information).
298 * Now we have u0*e - v0*r = 1. Let's compute the result as:
300 * We still have k in the tmp[] array, and its announced bit
301 * length is that of phi.
304 m
[0] = (2 << 4) + 2; /* bit length is 32 bits, encoded */
306 m
[2] = (v0
>> 15) & 0x7FFF;
309 br_i15_zero(z
, k
[0]);
311 z
[2] = (u0
>> 15) & 0x7FFF;
313 br_i15_mulacc(z
, k
, m
);
318 br_i15_encode(d
, dlen
, z
);