2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 * This file contains the core "big integer" functions for the i15
29 * implementation, that represents integers as sequences of 15-bit
35 br_i15_iszero(const uint16_t *x
)
41 for (u
= (x
[0] + 15) >> 4; u
> 0; u
--) {
44 return ~(z
| -z
) >> 31;
49 br_i15_ninv15(uint16_t x
)
54 y
= MUL15(y
, 2 - MUL15(x
, y
));
55 y
= MUL15(y
, 2 - MUL15(x
, y
));
56 y
= MUL15(y
, 2 - MUL15(x
, y
));
57 return MUX(x
& 1, -y
, 0) & 0x7FFF;
62 br_i15_add(uint16_t *a
, const uint16_t *b
, uint32_t ctl
)
69 for (u
= 1; u
< m
; u
++) {
76 a
[u
] = MUX(ctl
, naw
& 0x7FFF, aw
);
83 br_i15_sub(uint16_t *a
, const uint16_t *b
, uint32_t ctl
)
90 for (u
= 1; u
< m
; u
++) {
97 a
[u
] = MUX(ctl
, naw
& 0x7FFF, aw
);
103 * Constant-time division. The divisor must not be larger than 16 bits,
104 * and the quotient must fit on 17 bits.
107 divrem16(uint32_t x
, uint32_t d
, uint32_t *r
)
114 for (i
= 16; i
>= 0; i
--) {
130 br_i15_muladd_small(uint16_t *x
, uint16_t z
, const uint16_t *m
)
133 * Constant-time: we accept to leak the exact bit length of the
136 unsigned m_bitlen
, mblr
;
138 uint32_t hi
, a0
, a
, b
, q
;
139 uint32_t cc
, tb
, over
, under
;
142 * Simple case: the modulus fits on one word.
148 if (m_bitlen
<= 15) {
151 divrem16(((uint32_t)x
[1] << 15) | z
, m
[1], &rem
);
155 mlen
= (m_bitlen
+ 15) >> 4;
156 mblr
= m_bitlen
& 15;
159 * Principle: we estimate the quotient (x*2^15+z)/m by
160 * doing a 30/15 division with the high words.
164 * a = (w*a0 + a1) * w^N + a2
173 * I.e. the two top words of a are a0:a1, the top word of b is
174 * b0, we ensured that b0 is "full" (high bit set), and a is
175 * such that the quotient q = a/b fits on one word (0 <= q < w).
177 * If a = b*q + r (with 0 <= r < q), then we can estimate q by
178 * using a division on the top words:
179 * a0*w + a1 = b0*u + v (with 0 <= v < b0)
180 * Then the following holds:
187 memmove(x
+ 2, x
+ 1, (mlen
- 1) * sizeof *x
);
189 a
= (a0
<< 15) + x
[mlen
];
192 a0
= (x
[mlen
] << (15 - mblr
)) | (x
[mlen
- 1] >> mblr
);
193 memmove(x
+ 2, x
+ 1, (mlen
- 1) * sizeof *x
);
195 a
= (a0
<< 15) | (((x
[mlen
] << (15 - mblr
))
196 | (x
[mlen
- 1] >> mblr
)) & 0x7FFF);
197 b
= (m
[mlen
] << (15 - mblr
)) | (m
[mlen
- 1] >> mblr
);
199 q
= divrem16(a
, b
, NULL
);
202 * We computed an estimate for q, but the real one may be q,
203 * q-1 or q-2; moreover, the division may have returned a value
204 * 8000 or even 8001 if the two high words were identical, and
205 * we want to avoid values beyond 7FFF. We thus adjust q so
206 * that the "true" multiplier will be q+1, q or q-1, and q is
207 * in the 0000..7FFF range.
209 q
= MUX(EQ(b
, a0
), 0x7FFF, q
- 1 + ((q
- 1) >> 31));
212 * We subtract q*m from x (x has an extra high word of value 'hi').
213 * Since q may be off by 1 (in either direction), we may have to
214 * add or subtract m afterwards.
216 * The 'tb' flag will be true (1) at the end of the loop if the
217 * result is greater than or equal to the modulus (not counting
218 * 'hi' or the carry).
222 for (u
= 1; u
<= mlen
; u
++) {
223 uint32_t mw
, zl
, xw
, nxw
;
226 zl
= MUL15(mw
, q
) + cc
;
234 tb
= MUX(EQ(nxw
, mw
), tb
, GT(nxw
, mw
));
238 * If we underestimated q, then either cc < hi (one extra bit
239 * beyond the top array word), or cc == hi and tb is true (no
240 * extra bit, but the result is not lower than the modulus).
242 * If we overestimated q, then cc > hi.
245 under
= ~over
& (tb
| LT(cc
, hi
));
246 br_i15_add(x
, m
, over
);
247 br_i15_sub(x
, m
, under
);
252 br_i15_montymul(uint16_t *d
, const uint16_t *x
, const uint16_t *y
,
253 const uint16_t *m
, uint16_t m0i
)
255 size_t len
, len4
, u
, v
;
258 len
= (m
[0] + 15) >> 4;
259 len4
= len
& ~(size_t)3;
260 br_i15_zero(d
, m
[0]);
262 for (u
= 0; u
< len
; u
++) {
263 uint32_t f
, xu
, r
, zh
;
266 f
= MUL15((d
[1] + MUL15(x
[u
+ 1], y
[1])) & 0x7FFF, m0i
)
270 for (v
= 0; v
< len4
; v
+= 4) {
273 z
= d
[v
+ 1] + MUL15(xu
, y
[v
+ 1])
274 + MUL15(f
, m
[v
+ 1]) + r
;
276 d
[v
+ 0] = z
& 0x7FFF;
277 z
= d
[v
+ 2] + MUL15(xu
, y
[v
+ 2])
278 + MUL15(f
, m
[v
+ 2]) + r
;
280 d
[v
+ 1] = z
& 0x7FFF;
281 z
= d
[v
+ 3] + MUL15(xu
, y
[v
+ 3])
282 + MUL15(f
, m
[v
+ 3]) + r
;
284 d
[v
+ 2] = z
& 0x7FFF;
285 z
= d
[v
+ 4] + MUL15(xu
, y
[v
+ 4])
286 + MUL15(f
, m
[v
+ 4]) + r
;
288 d
[v
+ 3] = z
& 0x7FFF;
290 for (; v
< len
; v
++) {
293 z
= d
[v
+ 1] + MUL15(xu
, y
[v
+ 1])
294 + MUL15(f
, m
[v
+ 1]) + r
;
296 d
[v
+ 0] = z
& 0x7FFF;
300 d
[len
] = zh
& 0x7FFF;
305 * Restore the bit length (it was overwritten in the loop above).
310 * d[] may be greater than m[], but it is still lower than twice
313 br_i15_sub(d
, m
, NEQ(dh
, 0) | NOT(br_i15_sub(d
, m
, 0)));
318 br_i15_to_monty(uint16_t *x
, const uint16_t *m
)
322 for (k
= (m
[0] + 15) >> 4; k
> 0; k
--) {
323 br_i15_muladd_small(x
, 0, m
);
329 br_i15_modpow(uint16_t *x
,
330 const unsigned char *e
, size_t elen
,
331 const uint16_t *m
, uint16_t m0i
, uint16_t *t1
, uint16_t *t2
)
336 mlen
= ((m
[0] + 31) >> 4) * sizeof m
[0];
338 br_i15_to_monty(t1
, m
);
339 br_i15_zero(x
, m
[0]);
341 for (k
= 0; k
< ((unsigned)elen
<< 3); k
++) {
344 ctl
= (e
[elen
- 1 - (k
>> 3)] >> (k
& 7)) & 1;
345 br_i15_montymul(t2
, x
, t1
, m
, m0i
);
346 CCOPY(ctl
, x
, t2
, mlen
);
347 br_i15_montymul(t2
, t1
, t1
, m
, m0i
);
348 memcpy(t1
, t2
, mlen
);
354 br_i15_encode(void *dst
, size_t len
, const uint16_t *x
)
361 xlen
= (x
[0] + 15) >> 4;
373 acc
+= (uint32_t)x
[u
++] << acc_len
;
377 buf
[len
] = (unsigned char)acc
;
385 br_i15_decode_mod(uint16_t *x
, const void *src
, size_t len
, const uint16_t *m
)
388 * Two-pass algorithm: in the first pass, we determine whether the
389 * value fits; in the second pass, we do the actual write.
391 * During the first pass, 'r' contains the comparison result so
393 * 0x00000000 value is equal to the modulus
394 * 0x00000001 value is greater than the modulus
395 * 0xFFFFFFFF value is lower than the modulus
397 * Since we iterate starting with the least significant bytes (at
398 * the end of src[]), each new comparison overrides the previous
399 * except when the comparison yields 0 (equal).
401 * During the second pass, 'r' is either 0xFFFFFFFF (value fits)
402 * or 0x00000000 (value does not fit).
404 * We must iterate over all bytes of the source, _and_ possibly
405 * some extra virutal bytes (with value 0) so as to cover the
406 * complete modulus as well. We also add 4 such extra bytes beyond
407 * the modulus length because it then guarantees that no accumulated
408 * partial word remains to be processed.
410 const unsigned char *buf
;
416 mlen
= (m
[0] + 15) >> 4;
423 for (pass
= 0; pass
< 2; pass
++) {
431 for (u
= 0; u
< tlen
; u
++) {
435 b
= buf
[len
- 1 - u
];
439 acc
|= (b
<< acc_len
);
444 xw
= acc
& (uint32_t)0x7FFF;
446 acc
= b
>> (8 - acc_len
);
453 cc
= (uint32_t)CMP(xw
, m
[v
]);
454 r
= MUX(EQ(cc
, 0), r
, cc
);
458 r
= MUX(EQ(xw
, 0), r
, 1);
466 * When we reach this point at the end of the first pass:
467 * r is either 0, 1 or -1; we want to set r to 0 if it
468 * is equal to 0 or 1, and leave it to -1 otherwise.
470 * When we reach this point at the end of the second pass:
471 * r is either 0 or -1; we want to leave that value
472 * untouched. This is a subcase of the previous.
479 return r
& (uint32_t)1;