2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
28 * Parameters for supported curves:
30 * - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
31 * - b*R mod p (b is the second curve equation parameter)
34 static const uint16_t P256_P
[] = {
36 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000,
37 0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF,
41 static const uint16_t P256_R2
[] = {
43 0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF,
44 0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF,
48 static const uint16_t P256_B
[] = {
50 0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993,
51 0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16,
55 static const uint16_t P384_P
[] = {
57 0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF,
58 0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
59 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
63 static const uint16_t P384_R2
[] = {
65 0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010,
66 0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000,
67 0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
71 static const uint16_t P384_B
[] = {
73 0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB,
74 0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2,
75 0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09,
79 static const uint16_t P521_P
[] = {
81 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
82 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
83 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
84 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
85 0x7FFF, 0x7FFF, 0x07FF
88 static const uint16_t P521_R2
[] = {
90 0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
91 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
92 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
93 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
94 0x0000, 0x0000, 0x0000
97 static const uint16_t P521_B
[] = {
99 0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC,
100 0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F,
101 0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD,
102 0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4,
103 0x1618, 0x27D7, 0x0465
114 static inline const curve_params
*
115 id_to_curve(int curve
)
117 static const curve_params pp
[] = {
118 { P256_P
, P256_B
, P256_R2
, 0x0001, 65 },
119 { P384_P
, P384_B
, P384_R2
, 0x0001, 97 },
120 { P521_P
, P521_B
, P521_R2
, 0x0001, 133 }
123 return &pp
[curve
- BR_EC_secp256r1
];
126 #define I15_LEN ((BR_MAX_EC_SIZE + 29) / 15)
129 * Type for a point in Jacobian coordinates:
130 * -- three values, x, y and z, in Montgomery representation
131 * -- affine coordinates are X = x / z^2 and Y = y / z^3
132 * -- for the point at infinity, z = 0
135 uint16_t c
[3][I15_LEN
];
139 * We use a custom interpreter that uses a dozen registers, and
140 * only six operations:
141 * MSET(d, a) copy a into d
142 * MADD(d, a) d = d+a (modular)
143 * MSUB(d, a) d = d-a (modular)
144 * MMUL(d, a, b) d = a*b (Montgomery multiplication)
145 * MINV(d, a, b) invert d modulo p; a and b are used as scratch registers
146 * MTZ(d) clear return value if d = 0
147 * Destination of MMUL (d) must be distinct from operands (a and b).
148 * There is no such constraint for MSUB and MADD.
150 * Registers include the operand coordinates, and temporaries.
152 #define MSET(d, a) (0x0000 + ((d) << 8) + ((a) << 4))
153 #define MADD(d, a) (0x1000 + ((d) << 8) + ((a) << 4))
154 #define MSUB(d, a) (0x2000 + ((d) << 8) + ((a) << 4))
155 #define MMUL(d, a, b) (0x3000 + ((d) << 8) + ((a) << 4) + (b))
156 #define MINV(d, a, b) (0x4000 + ((d) << 8) + ((a) << 4) + (b))
157 #define MTZ(d) (0x5000 + ((d) << 8))
161 * Registers for the input operands.
171 * Alternate names for the first input operand.
189 * Extra scratch registers available when there is no second operand (e.g.
190 * for "double" and "affine").
197 * Doubling formulas are:
200 * m = 3*(x + z^2)*(x - z^2)
202 * y' = m*(s - x') - 8*y^4
205 * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
206 * should. This case should not happen anyway, because our curves have
207 * prime order, and thus do not contain any point of order 2.
209 * If P is infinity (z = 0), then again the formulas yield infinity,
210 * which is correct. Thus, this code works for all points.
212 * Cost: 8 multiplications
214 static const uint16_t code_double
[] = {
216 * Compute z^2 (in t1).
221 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
228 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
236 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
244 * Compute x' = m^2 - 2*s.
251 * Compute z' = 2*y*z.
258 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
271 * Addtions formulas are:
279 * x3 = r^2 - h^3 - 2 * u1 * h^2
280 * y3 = r * (u1 * h^2 - x3) - s1 * h^3
283 * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
284 * z3 == 0, so the result is correct.
285 * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
287 * h == 0 only if u1 == u2; this happens in two cases:
288 * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
289 * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
291 * Thus, the following situations are not handled correctly:
292 * -- P1 = 0 and P2 != 0
293 * -- P1 != 0 and P2 = 0
295 * All other cases are properly computed. However, even in "incorrect"
296 * situations, the three coordinates still are properly formed field
299 * The returned flag is cleared if r == 0. This happens in the following
301 * -- Both points are on the same horizontal line (same Y coordinate).
302 * -- Both points are infinity.
303 * -- One point is infinity and the other is on line Y = 0.
304 * The third case cannot happen with our curves (there is no valid point
305 * on line Y = 0 since that would be a point of order 2). If the two
306 * source points are non-infinity, then remains only the case where the
307 * two points are on the same horizontal line.
309 * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
311 * -- If the returned value is not the point at infinity, then it was properly
313 * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
314 * is indeed the point at infinity.
315 * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
316 * use the 'double' code.
318 * Cost: 16 multiplications
320 static const uint16_t code_add
[] = {
322 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
330 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
338 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
344 * Report cases where r = 0 through the returned flag.
349 * Compute u1*h^2 (in t6) and h^3 (in t5).
356 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
357 * t1 and t7 can be used as scratch registers.
365 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
373 * Compute z3 = h*z1*z2.
382 * Check that the point is on the curve. This code snippet assumes the
383 * following conventions:
384 * -- Coordinates x and y have been freshly decoded in P1 (but not
385 * converted to Montgomery coordinates yet).
386 * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
388 static const uint16_t code_check
[] = {
390 /* Convert x and y to Montgomery representation. */
396 /* Compute x^3 in t1. */
400 /* Subtract 3*x from t1. */
408 /* Compute y^2 in t2. */
411 /* Compare y^2 with x^3 - 3*x + b; they must match. */
415 /* Set z to 1 (in Montgomery representation). */
422 * Conversion back to affine coordinates. This code snippet assumes that
423 * the z coordinate of P2 is set to 1 (not in Montgomery representation).
425 static const uint16_t code_affine
[] = {
427 /* Save z*R in t1. */
430 /* Compute z^3 in t2. */
435 /* Invert to (1/z^3) in t2. */
442 /* Compute (1/z^2) in t3. */
453 run_code(jacobian
*P1
, const jacobian
*P2
,
454 const curve_params
*cc
, const uint16_t *code
)
457 uint16_t t
[13][I15_LEN
];
463 * Copy the two operands in the dedicated registers.
465 memcpy(t
[P1x
], P1
->c
, 3 * I15_LEN
* sizeof(uint16_t));
466 memcpy(t
[P2x
], P2
->c
, 3 * I15_LEN
* sizeof(uint16_t));
472 unsigned op
, d
, a
, b
;
478 d
= (op
>> 8) & 0x0F;
479 a
= (op
>> 4) & 0x0F;
485 unsigned char tp
[(BR_MAX_EC_SIZE
+ 7) >> 3];
488 memcpy(t
[d
], t
[a
], I15_LEN
* sizeof(uint16_t));
491 ctl
= br_i15_add(t
[d
], t
[a
], 1);
492 ctl
|= NOT(br_i15_sub(t
[d
], cc
->p
, 0));
493 br_i15_sub(t
[d
], cc
->p
, ctl
);
496 br_i15_add(t
[d
], cc
->p
, br_i15_sub(t
[d
], t
[a
], 1));
499 br_i15_montymul(t
[d
], t
[a
], t
[b
], cc
->p
, cc
->p0i
);
502 plen
= (cc
->p
[0] - (cc
->p
[0] >> 4) + 7) >> 3;
503 br_i15_encode(tp
, plen
, cc
->p
);
505 br_i15_modpow(t
[d
], tp
, plen
,
506 cc
->p
, cc
->p0i
, t
[a
], t
[b
]);
509 r
&= ~br_i15_iszero(t
[d
]);
517 memcpy(P1
->c
, t
[P1x
], 3 * I15_LEN
* sizeof(uint16_t));
522 set_one(uint16_t *x
, const uint16_t *p
)
526 plen
= (p
[0] + 31) >> 4;
527 memset(x
, 0, plen
* sizeof *x
);
533 point_zero(jacobian
*P
, const curve_params
*cc
)
535 memset(P
, 0, sizeof *P
);
536 P
->c
[0][0] = P
->c
[1][0] = P
->c
[2][0] = cc
->p
[0];
540 point_double(jacobian
*P
, const curve_params
*cc
)
542 run_code(P
, P
, cc
, code_double
);
545 static inline uint32_t
546 point_add(jacobian
*P1
, const jacobian
*P2
, const curve_params
*cc
)
548 return run_code(P1
, P2
, cc
, code_add
);
552 point_mul(jacobian
*P
, const unsigned char *x
, size_t xlen
,
553 const curve_params
*cc
)
556 * We do a simple double-and-add ladder with a 2-bit window
557 * to make only one add every two doublings. We thus first
558 * precompute 2P and 3P in some local buffers.
560 * We always perform two doublings and one addition; the
561 * addition is with P, 2P and 3P and is done in a temporary
564 * The addition code cannot handle cases where one of the
565 * operands is infinity, which is the case at the start of the
566 * ladder. We therefore need to maintain a flag that controls
570 jacobian P2
, P3
, Q
, T
, U
;
572 memcpy(&P2
, P
, sizeof P2
);
573 point_double(&P2
, cc
);
574 memcpy(&P3
, P
, sizeof P3
);
575 point_add(&P3
, &P2
, cc
);
579 while (xlen
-- > 0) {
582 for (k
= 6; k
>= 0; k
-= 2) {
586 point_double(&Q
, cc
);
587 point_double(&Q
, cc
);
588 memcpy(&T
, P
, sizeof T
);
589 memcpy(&U
, &Q
, sizeof U
);
590 bits
= (*x
>> k
) & (uint32_t)3;
592 CCOPY(EQ(bits
, 2), &T
, &P2
, sizeof T
);
593 CCOPY(EQ(bits
, 3), &T
, &P3
, sizeof T
);
594 point_add(&U
, &T
, cc
);
595 CCOPY(bnz
& qz
, &Q
, &T
, sizeof Q
);
596 CCOPY(bnz
& ~qz
, &Q
, &U
, sizeof Q
);
601 memcpy(P
, &Q
, sizeof Q
);
605 * Decode point into Jacobian coordinates. This function does not support
606 * the point at infinity. If the point is invalid then this returns 0, but
607 * the coordinates are still set to properly formed field elements.
610 point_decode(jacobian
*P
, const void *src
, size_t len
, const curve_params
*cc
)
613 * Points must use uncompressed format:
614 * -- first byte is 0x04;
615 * -- coordinates X and Y use unsigned big-endian, with the same
616 * length as the field modulus.
618 * We don't support hybrid format (uncompressed, but first byte
619 * has value 0x06 or 0x07, depending on the least significant bit
620 * of Y) because it is rather useless, and explicitly forbidden
621 * by PKIX (RFC 5480, section 2.2).
623 * We don't support compressed format either, because it is not
624 * much used in practice (there are or were patent-related
625 * concerns about point compression, which explains the lack of
626 * generalised support). Also, point compression support would
627 * need a bit more code.
629 const unsigned char *buf
;
636 plen
= (cc
->p
[0] - (cc
->p
[0] >> 4) + 7) >> 3;
637 if (len
!= 1 + (plen
<< 1)) {
640 r
= br_i15_decode_mod(P
->c
[0], buf
+ 1, plen
, cc
->p
);
641 r
&= br_i15_decode_mod(P
->c
[1], buf
+ 1 + plen
, plen
, cc
->p
);
646 r
&= EQ(buf
[0], 0x04);
648 r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
649 & ~(uint32_t)(buf[0] ^ buf[plen << 1]));
653 * Convert coordinates and check that the point is valid.
655 zlen
= ((cc
->p
[0] + 31) >> 4) * sizeof(uint16_t);
656 memcpy(Q
.c
[0], cc
->R2
, zlen
);
657 memcpy(Q
.c
[1], cc
->b
, zlen
);
658 set_one(Q
.c
[2], cc
->p
);
659 r
&= ~run_code(P
, &Q
, cc
, code_check
);
664 * Encode a point. This method assumes that the point is correct and is
665 * not the point at infinity. Encoded size is always 1+2*plen, where
666 * plen is the field modulus length, in bytes.
669 point_encode(void *dst
, const jacobian
*P
, const curve_params
*cc
)
676 plen
= (cc
->p
[0] - (cc
->p
[0] >> 4) + 7) >> 3;
678 memcpy(&Q
, P
, sizeof *P
);
679 set_one(T
.c
[2], cc
->p
);
680 run_code(&Q
, &T
, cc
, code_affine
);
681 br_i15_encode(buf
+ 1, plen
, Q
.c
[0]);
682 br_i15_encode(buf
+ 1 + plen
, plen
, Q
.c
[1]);
685 static const br_ec_curve_def
*
686 id_to_curve_def(int curve
)
689 case BR_EC_secp256r1
:
690 return &br_secp256r1
;
691 case BR_EC_secp384r1
:
692 return &br_secp384r1
;
693 case BR_EC_secp521r1
:
694 return &br_secp521r1
;
699 static const unsigned char *
700 api_generator(int curve
, size_t *len
)
702 const br_ec_curve_def
*cd
;
704 cd
= id_to_curve_def(curve
);
705 *len
= cd
->generator_len
;
706 return cd
->generator
;
709 static const unsigned char *
710 api_order(int curve
, size_t *len
)
712 const br_ec_curve_def
*cd
;
714 cd
= id_to_curve_def(curve
);
715 *len
= cd
->order_len
;
720 api_mul(unsigned char *G
, size_t Glen
,
721 const unsigned char *x
, size_t xlen
, int curve
)
724 const curve_params
*cc
;
727 cc
= id_to_curve(curve
);
728 r
= point_decode(&P
, G
, Glen
, cc
);
729 point_mul(&P
, x
, xlen
, cc
);
730 if (Glen
== cc
->point_len
) {
731 point_encode(G
, &P
, cc
);
737 api_mulgen(unsigned char *R
,
738 const unsigned char *x
, size_t xlen
, int curve
)
740 const unsigned char *G
;
743 G
= api_generator(curve
, &Glen
);
745 api_mul(R
, Glen
, x
, xlen
, curve
);
750 api_muladd(unsigned char *A
, const unsigned char *B
, size_t len
,
751 const unsigned char *x
, size_t xlen
,
752 const unsigned char *y
, size_t ylen
, int curve
)
755 const curve_params
*cc
;
759 * TODO: see about merging the two ladders. Right now, we do
760 * two independant point multiplications, which is a bit
761 * wasteful of CPU resources (but yields short code).
764 cc
= id_to_curve(curve
);
765 r
= point_decode(&P
, A
, len
, cc
);
769 B
= api_generator(curve
, &Glen
);
771 r
&= point_decode(&Q
, B
, len
, cc
);
772 point_mul(&P
, x
, xlen
, cc
);
773 point_mul(&Q
, y
, ylen
, cc
);
776 * We want to compute P+Q. Since the base points A and B are distinct
777 * from infinity, and the multipliers are non-zero and lower than the
778 * curve order, then we know that P and Q are non-infinity. This
779 * leaves two special situations to test for:
780 * -- If P = Q then we must use point_double().
781 * -- If P+Q = 0 then we must report an error.
783 t
= point_add(&P
, &Q
, cc
);
784 point_double(&Q
, cc
);
785 z
= br_i15_iszero(P
.c
[2]);
788 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
789 * have the following:
791 * z = 0, t = 0 return P (normal addition)
792 * z = 0, t = 1 return P (normal addition)
793 * z = 1, t = 0 return Q (a 'double' case)
794 * z = 1, t = 1 report an error (P+Q = 0)
796 CCOPY(z
& ~t
, &P
, &Q
, sizeof Q
);
797 point_encode(A
, &P
, cc
);
803 /* see bearssl_ec.h */
804 const br_ec_impl br_ec_prime_i15
= {
805 (uint32_t)0x03800000,