1 /*

2 * Copyright (c) 2018 Thomas Pornin <pornin@bolet.org>

3 *

4 * Permission is hereby granted, free of charge, to any person obtaining

5 * a copy of this software and associated documentation files (the

6 * "Software"), to deal in the Software without restriction, including

7 * without limitation the rights to use, copy, modify, merge, publish,

8 * distribute, sublicense, and/or sell copies of the Software, and to

9 * permit persons to whom the Software is furnished to do so, subject to

10 * the following conditions:

11 *

12 * The above copyright notice and this permission notice shall be

13 * included in all copies or substantial portions of the Software.

14 *

15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,

16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND

18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS

19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN

20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN

21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE

22 * SOFTWARE.

23 */

27 /* see bearssl_rsa.h */

28 size_t

31 {

32 /*

33 * We want to invert e modulo phi = (p-1)(q-1). This first

34 * requires computing phi, which is easy since we have the factors

35 * p and q in the private key structure.

36 *

37 * Since p = 3 mod 4 and q = 3 mod 4, phi/4 is an odd integer.

38 * We could invert e modulo phi/4 then patch the result to

39 * modulo phi, but this would involve assembling three modulus-wide

40 * values (phi/4, 1 and e) and calling moddiv, that requires

41 * three more temporaries, for a total of six big integers, or

42 * slightly more than 3 kB of stack space for RSA-4096. This

43 * exceeds our stack requirements.

44 *

45 * Instead, we first use one step of the extended GCD:

46 *

47 * - We compute phi = k*e + r (Euclidean division of phi by e).

48 * If public exponent e is correct, then r != 0 (e must be

49 * invertible modulo phi). We also have k != 0 since we

50 * enforce non-ridiculously-small factors.

51 *

52 * - We find small u, v such that u*e - v*r = 1 (using a

53 * binary GCD; we can arrange for u < r and v < e, i.e. all

54 * values fit on 32 bits).

55 *

56 * - Solution is: d = u + v*k

57 * This last computation is exact: since u < r and v < e,

58 * the above implies d < r + e*((phi-r)/e) = phi

59 */

68 /*

69 * Check that e is correct.

70 */

73 }

75 /*

76 * Check lengths of p and q, and that they are both odd.

77 */

81 pbuf ++;

82 plen --;

83 }

86 {

88 }

92 qbuf ++;

93 qlen --;

94 }

97 {

99 }

101 /*

102 * Output length is that of the modulus.

103 */

107 }

116 /*

117 * Compute phi = (p-1)*(q-1), then move it over p-1 and q-1 (that

118 * we do not need anymore). The mulacc function sets the announced

119 * bit length of t to be the sum of the announced bit lengths of

120 * p-1 and q-1, which is usually exact but may overshoot by one 1

121 * bit in some cases; we readjust it to its true length.

122 */

134 /*

135 * Divide phi by public exponent e. The final remainder r must be

136 * non-zero (otherwise, the key is invalid). The quotient is k,

137 * which we write over phi, since we don't need phi after that.

138 */

141 /*

142 * Upon entry, r < e, and phi[u] < 2^31; hence,

143 * hi:lo < e*2^31. Thus, the produced word k[u]

144 * must be lower than 2^31, and the new remainder r

145 * is lower than e.

146 */

152 }

155 }

158 /*

159 * Compute u and v such that u*e - v*r = GCD(e,r). We use

160 * a binary GCD algorithm, with 6 extra integers a, b,

161 * u0, u1, v0 and v1. Initial values are:

162 * a = e u0 = 1 v0 = 0

163 * b = r u1 = r v1 = e-1

164 * The following invariants are maintained:

165 * a = u0*e - v0*r

166 * b = u1*e - v1*r

167 * 0 < a <= e

168 * 0 < b <= r

169 * 0 <= u0 <= r

170 * 0 <= v0 <= e

171 * 0 <= u1 <= r

172 * 0 <= v1 <= e

173 *

174 * At each iteration, we reduce either a or b by one bit, and

175 * adjust u0, u1, v0 and v1 to maintain the invariants:

176 * - if a is even, then a <- a/2

177 * - otherwise, if b is even, then b <- b/2

178 * - otherwise, if a > b, then a <- (a-b)/2

179 * - otherwise, if b > a, then b <- (b-a)/2

180 * Algorithm stops when a = b. At that point, the common value

181 * is the GCD of e and r; it must be 1 (otherwise, the private

182 * key or public exponent is not valid). The (u0,v0) or (u1,v1)

183 * pairs are the solution we are looking for.

184 *

185 * Since either a or b is reduced by at least 1 bit at each

186 * iteration, 62 iterations are enough to reach the end

187 * condition.

188 *

189 * To maintain the invariants, we must compute the same operations

190 * on the u* and v* values that we do on a and b:

191 * - When a is divided by 2, u0 and v0 must be divided by 2.

192 * - When b is divided by 2, u1 and v1 must be divided by 2.

193 * - When b is subtracted from a, u1 and v1 are subtracted from

194 * u0 and v0, respectively.

195 * - When a is subtracted from b, u0 and v0 are subtracted from

196 * u1 and v1, respectively.

197 *

198 * However, we want to keep the u* and v* values in their proper

199 * ranges. The following remarks apply:

200 *

201 * - When a is divided by 2, then a is even. Therefore:

202 *

203 * * If r is odd, then u0 and v0 must have the same parity;

204 * if they are both odd, then adding r to u0 and e to v0

205 * makes them both even, and the division by 2 brings them

206 * back to the proper range.

207 *

208 * * If r is even, then u0 must be even; if v0 is odd, then

209 * adding r to u0 and e to v0 makes them both even, and the

210 * division by 2 brings them back to the proper range.

211 *

212 * Thus, all we need to do is to look at the parity of v0,

213 * and add (r,e) to (u0,v0) when v0 is odd. In order to avoid

214 * a 32-bit overflow, we can add ((r+1)/2,(e/2)+1) after the

215 * division (r+1 does not overflow since r < e; and (e/2)+1

216 * is equal to (e+1)/2 since e is odd).

217 *

218 * - When we subtract b from a, three cases may occur:

219 *

220 * * u1 <= u0 and v1 <= v0: just do the subtractions

221 *

222 * * u1 > u0 and v1 > v0: compute:

223 * (u0, v0) <- (u0 + r - u1, v0 + e - v1)

224 *

225 * * u1 <= u0 and v1 > v0: compute:

226 * (u0, v0) <- (u0 + r - u1, v0 + e - v1)

227 *

228 * The fourth case (u1 > u0 and v1 <= v0) is not possible

229 * because it would contradict "b < a" (which is the reason

230 * why we subtract b from a).

231 *

232 * The tricky case is the third one: from the equations, it

233 * seems that u0 may go out of range. However, the invariants

234 * and ranges of other values imply that, in that case, the

235 * new u0 does not actually exceed the range.

236 *

237 * We can thus handle the subtraction by adding (r,e) based

238 * solely on the comparison between v0 and v1.

239 */

261 /* a <- a-b, u0 <- u0-u1, v0 <- v0-v1 */

267 /* b <- b-a, u1 <- u1-u0 mod r, v1 <- v1-v0 mod e */

276 /* a <- a/2, u0 <- u0/2, v0 <- v0/2 */

282 /* b <- b/2, u1 <- u1/2 mod r, v1 <- v1/2 mod e */

287 }

289 /*

290 * Check that the GCD is indeed 1. If not, then the key is invalid

291 * (and there's no harm in leaking that piece of information).

292 */

295 }

297 /*

298 * Now we have u0*e - v0*r = 1. Let's compute the result as:

299 * d = u0 + v0*k

300 * We still have k in the tmp[] array, and its announced bit

301 * length is that of phi.

302 */

313 /*

314 * Encode the result.

315 */

318 }