_ Git - BearSSL/blob - ec/ec_prime_i15.c

   1 /*
   2  * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining
   5  * a copy of this software and associated documentation files (the
   6  * "Software"), to deal in the Software without restriction, including
   7  * without limitation the rights to use, copy, modify, merge, publish,
   8  * distribute, sublicense, and/or sell copies of the Software, and to
   9  * permit persons to whom the Software is furnished to do so, subject to
  10  * the following conditions:
  11  *
  12  * The above copyright notice and this permission notice shall be
  13  * included in all copies or substantial portions of the Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  18  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  19  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  20  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  21  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22  * SOFTWARE.
  23  */
  24
  25 #include "inner.h"
  26
  27 /*
  28  * Parameters for supported curves:
  29  *   - field modulus p
  30  *   - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
  31  *   - b*R mod p (b is the second curve equation parameter)
  32  */
  33
  34 static const uint16_t P256_P[] = {
  35         0x0111,
  36         0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000,
  37         0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF,
  38         0x7FFF, 0x0001
  39 };
  40
  41 static const uint16_t P256_R2[] = {
  42         0x0111,
  43         0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF,
  44         0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF,
  45         0x4FFF, 0x0000
  46 };
  47
  48 static const uint16_t P256_B[] = {
  49         0x0111,
  50         0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993,
  51         0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16,
  52         0x0187, 0x0000
  53 };
  54
  55 static const uint16_t P384_P[] = {
  56         0x0199,
  57         0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF,
  58         0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
  59         0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
  60         0x7FFF, 0x01FF
  61 };
  62
  63 static const uint16_t P384_R2[] = {
  64         0x0199,
  65         0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010,
  66         0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000,
  67         0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  68         0x0000, 0x0000
  69 };
  70
  71 static const uint16_t P384_B[] = {
  72         0x0199,
  73         0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB,
  74         0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2,
  75         0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09,
  76         0x0452, 0x0084
  77 };
  78
  79 static const uint16_t P521_P[] = {
  80         0x022B,
  81         0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
  82         0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
  83         0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
  84         0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
  85         0x7FFF, 0x7FFF, 0x07FF
  86 };
  87
  88 static const uint16_t P521_R2[] = {
  89         0x022B,
  90         0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  91         0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  92         0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  93         0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
  94         0x0000, 0x0000, 0x0000
  95 };
  96
  97 static const uint16_t P521_B[] = {
  98         0x022B,
  99         0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC,
 100         0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F,
 101         0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD,
 102         0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4,
 103         0x1618, 0x27D7, 0x0465
 104 };
 105
 106 typedef struct {
 107         const uint16_t *p;
 108         const uint16_t *b;
 109         const uint16_t *R2;
 110         uint16_t p0i;
 111         size_t point_len;
 112 } curve_params;
 113
 114 static inline const curve_params *
 115 id_to_curve(int curve)
 116 {
 117         static const curve_params pp[] = {
 118                 { P256_P, P256_B, P256_R2, 0x0001,  65 },
 119                 { P384_P, P384_B, P384_R2, 0x0001,  97 },
 120                 { P521_P, P521_B, P521_R2, 0x0001, 133 }
 121         };
 122
 123         return &pp[curve - BR_EC_secp256r1];
 124 }
 125
 126 #define I15_LEN   ((BR_MAX_EC_SIZE + 29) / 15)
 127
 128 /*
 129  * Type for a point in Jacobian coordinates:
 130  * -- three values, x, y and z, in Montgomery representation
 131  * -- affine coordinates are X = x / z^2 and Y = y / z^3
 132  * -- for the point at infinity, z = 0
 133  */
 134 typedef struct {
 135         uint16_t c[3][I15_LEN];
 136 } jacobian;
 137
 138 /*
 139  * We use a custom interpreter that uses a dozen registers, and
 140  * only six operations:
 141  *    MSET(d, a)       copy a into d
 142  *    MADD(d, a)       d = d+a (modular)
 143  *    MSUB(d, a)       d = d-a (modular)
 144  *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
 145  *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
 146  *    MTZ(d)           clear return value if d = 0
 147  * Destination of MMUL (d) must be distinct from operands (a and b).
 148  * There is no such constraint for MSUB and MADD.
 149  *
 150  * Registers include the operand coordinates, and temporaries.
 151  */
 152 #define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
 153 #define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
 154 #define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
 155 #define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
 156 #define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
 157 #define MTZ(d)          (0x5000 + ((d) << 8))
 158 #define ENDCODE         0
 159
 160 /*
 161  * Registers for the input operands.
 162  */
 163 #define P1x    0
 164 #define P1y    1
 165 #define P1z    2
 166 #define P2x    3
 167 #define P2y    4
 168 #define P2z    5
 169
 170 /*
 171  * Alternate names for the first input operand.
 172  */
 173 #define Px     0
 174 #define Py     1
 175 #define Pz     2
 176
 177 /*
 178  * Temporaries.
 179  */
 180 #define t1     6
 181 #define t2     7
 182 #define t3     8
 183 #define t4     9
 184 #define t5    10
 185 #define t6    11
 186 #define t7    12
 187
 188 /*
 189  * Extra scratch registers available when there is no second operand (e.g.
 190  * for "double" and "affine").
 191  */
 192 #define t8     3
 193 #define t9     4
 194 #define t10    5
 195
 196 /*
 197  * Doubling formulas are:
 198  *
 199  *   s = 4*x*y^2
 200  *   m = 3*(x + z^2)*(x - z^2)
 201  *   x' = m^2 - 2*s
 202  *   y' = m*(s - x') - 8*y^4
 203  *   z' = 2*y*z
 204  *
 205  * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
 206  * should. This case should not happen anyway, because our curves have
 207  * prime order, and thus do not contain any point of order 2.
 208  *
 209  * If P is infinity (z = 0), then again the formulas yield infinity,
 210  * which is correct. Thus, this code works for all points.
 211  *
 212  * Cost: 8 multiplications
 213  */
 214 static const uint16_t code_double[] = {
 215         /*
 216          * Compute z^2 (in t1).
 217          */
 218         MMUL(t1, Pz, Pz),
 219
 220         /*
 221          * Compute x-z^2 (in t2) and then x+z^2 (in t1).
 222          */
 223         MSET(t2, Px),
 224         MSUB(t2, t1),
 225         MADD(t1, Px),
 226
 227         /*
 228          * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
 229          */
 230         MMUL(t3, t1, t2),
 231         MSET(t1, t3),
 232         MADD(t1, t3),
 233         MADD(t1, t3),
 234
 235         /*
 236          * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
 237          */
 238         MMUL(t3, Py, Py),
 239         MADD(t3, t3),
 240         MMUL(t2, Px, t3),
 241         MADD(t2, t2),
 242
 243         /*
 244          * Compute x' = m^2 - 2*s.
 245          */
 246         MMUL(Px, t1, t1),
 247         MSUB(Px, t2),
 248         MSUB(Px, t2),
 249
 250         /*
 251          * Compute z' = 2*y*z.
 252          */
 253         MMUL(t4, Py, Pz),
 254         MSET(Pz, t4),
 255         MADD(Pz, t4),
 256
 257         /*
 258          * Compute y' = m*(s - x') - 8*y^4. Note that we already have
 259          * 2*y^2 in t3.
 260          */
 261         MSUB(t2, Px),
 262         MMUL(Py, t1, t2),
 263         MMUL(t4, t3, t3),
 264         MSUB(Py, t4),
 265         MSUB(Py, t4),
 266
 267         ENDCODE
 268 };
 269
 270 /*
 271  * Addtions formulas are:
 272  *
 273  *   u1 = x1 * z2^2
 274  *   u2 = x2 * z1^2
 275  *   s1 = y1 * z2^3
 276  *   s2 = y2 * z1^3
 277  *   h = u2 - u1
 278  *   r = s2 - s1
 279  *   x3 = r^2 - h^3 - 2 * u1 * h^2
 280  *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
 281  *   z3 = h * z1 * z2
 282  *
 283  * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
 284  * z3 == 0, so the result is correct.
 285  * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
 286  * not correct.
 287  * h == 0 only if u1 == u2; this happens in two cases:
 288  * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
 289  * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
 290  *
 291  * Thus, the following situations are not handled correctly:
 292  * -- P1 = 0 and P2 != 0
 293  * -- P1 != 0 and P2 = 0
 294  * -- P1 = P2
 295  * All other cases are properly computed. However, even in "incorrect"
 296  * situations, the three coordinates still are properly formed field
 297  * elements.
 298  *
 299  * The returned flag is cleared if r == 0. This happens in the following
 300  * cases:
 301  * -- Both points are on the same horizontal line (same Y coordinate).
 302  * -- Both points are infinity.
 303  * -- One point is infinity and the other is on line Y = 0.
 304  * The third case cannot happen with our curves (there is no valid point
 305  * on line Y = 0 since that would be a point of order 2). If the two
 306  * source points are non-infinity, then remains only the case where the
 307  * two points are on the same horizontal line.
 308  *
 309  * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
 310  * P2 != 0:
 311  * -- If the returned value is not the point at infinity, then it was properly
 312  * computed.
 313  * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
 314  * is indeed the point at infinity.
 315  * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
 316  * use the 'double' code.
 317  *
 318  * Cost: 16 multiplications
 319  */
 320 static const uint16_t code_add[] = {
 321         /*
 322          * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
 323          */
 324         MMUL(t3, P2z, P2z),
 325         MMUL(t1, P1x, t3),
 326         MMUL(t4, P2z, t3),
 327         MMUL(t3, P1y, t4),
 328
 329         /*
 330          * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
 331          */
 332         MMUL(t4, P1z, P1z),
 333         MMUL(t2, P2x, t4),
 334         MMUL(t5, P1z, t4),
 335         MMUL(t4, P2y, t5),
 336
 337         /*
 338          * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
 339          */
 340         MSUB(t2, t1),
 341         MSUB(t4, t3),
 342
 343         /*
 344          * Report cases where r = 0 through the returned flag.
 345          */
 346         MTZ(t4),
 347
 348         /*
 349          * Compute u1*h^2 (in t6) and h^3 (in t5).
 350          */
 351         MMUL(t7, t2, t2),
 352         MMUL(t6, t1, t7),
 353         MMUL(t5, t7, t2),
 354
 355         /*
 356          * Compute x3 = r^2 - h^3 - 2*u1*h^2.
 357          * t1 and t7 can be used as scratch registers.
 358          */
 359         MMUL(P1x, t4, t4),
 360         MSUB(P1x, t5),
 361         MSUB(P1x, t6),
 362         MSUB(P1x, t6),
 363
 364         /*
 365          * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
 366          */
 367         MSUB(t6, P1x),
 368         MMUL(P1y, t4, t6),
 369         MMUL(t1, t5, t3),
 370         MSUB(P1y, t1),
 371
 372         /*
 373          * Compute z3 = h*z1*z2.
 374          */
 375         MMUL(t1, P1z, P2z),
 376         MMUL(P1z, t1, t2),
 377
 378         ENDCODE
 379 };
 380
 381 /*
 382  * Check that the point is on the curve. This code snippet assumes the
 383  * following conventions:
 384  * -- Coordinates x and y have been freshly decoded in P1 (but not
 385  * converted to Montgomery coordinates yet).
 386  * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
 387  */
 388 static const uint16_t code_check[] = {
 389
 390         /* Convert x and y to Montgomery representation. */
 391         MMUL(t1, P1x, P2x),
 392         MMUL(t2, P1y, P2x),
 393         MSET(P1x, t1),
 394         MSET(P1y, t2),
 395
 396         /* Compute x^3 in t1. */
 397         MMUL(t2, P1x, P1x),
 398         MMUL(t1, P1x, t2),
 399
 400         /* Subtract 3*x from t1. */
 401         MSUB(t1, P1x),
 402         MSUB(t1, P1x),
 403         MSUB(t1, P1x),
 404
 405         /* Add b. */
 406         MADD(t1, P2y),
 407
 408         /* Compute y^2 in t2. */
 409         MMUL(t2, P1y, P1y),
 410
 411         /* Compare y^2 with x^3 - 3*x + b; they must match. */
 412         MSUB(t1, t2),
 413         MTZ(t1),
 414
 415         /* Set z to 1 (in Montgomery representation). */
 416         MMUL(P1z, P2x, P2z),
 417
 418         ENDCODE
 419 };
 420
 421 /*
 422  * Conversion back to affine coordinates. This code snippet assumes that
 423  * the z coordinate of P2 is set to 1 (not in Montgomery representation).
 424  */
 425 static const uint16_t code_affine[] = {
 426
 427         /* Save z*R in t1. */
 428         MSET(t1, P1z),
 429
 430         /* Compute z^3 in t2. */
 431         MMUL(t2, P1z, P1z),
 432         MMUL(t3, P1z, t2),
 433         MMUL(t2, t3, P2z),
 434
 435         /* Invert to (1/z^3) in t2. */
 436         MINV(t2, t3, t4),
 437
 438         /* Compute y. */
 439         MSET(t3, P1y),
 440         MMUL(P1y, t2, t3),
 441
 442         /* Compute (1/z^2) in t3. */
 443         MMUL(t3, t2, t1),
 444
 445         /* Compute x. */
 446         MSET(t2, P1x),
 447         MMUL(P1x, t2, t3),
 448
 449         ENDCODE
 450 };
 451
 452 static uint32_t
 453 run_code(jacobian *P1, const jacobian *P2,
 454         const curve_params *cc, const uint16_t *code)
 455 {
 456         uint32_t r;
 457         uint16_t t[13][I15_LEN];
 458         size_t u;
 459
 460         r = 1;
 461
 462         /*
 463          * Copy the two operands in the dedicated registers.
 464          */
 465         memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t));
 466         memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t));
 467
 468         /*
 469          * Run formulas.
 470          */
 471         for (u = 0;; u ++) {
 472                 unsigned op, d, a, b;
 473
 474                 op = code[u];
 475                 if (op == 0) {
 476                         break;
 477                 }
 478                 d = (op >> 8) & 0x0F;
 479                 a = (op >> 4) & 0x0F;
 480                 b = op & 0x0F;
 481                 op >>= 12;
 482                 switch (op) {
 483                         uint32_t ctl;
 484                         size_t plen;
 485                         unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
 486
 487                 case 0:
 488                         memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t));
 489                         break;
 490                 case 1:
 491                         ctl = br_i15_add(t[d], t[a], 1);
 492                         ctl |= NOT(br_i15_sub(t[d], cc->p, 0));
 493                         br_i15_sub(t[d], cc->p, ctl);
 494                         break;
 495                 case 2:
 496                         br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1));
 497                         break;
 498                 case 3:
 499                         br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
 500                         break;
 501                 case 4:
 502                         plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
 503                         br_i15_encode(tp, plen, cc->p);
 504                         tp[plen - 1] -= 2;
 505                         br_i15_modpow(t[d], tp, plen,
 506                                 cc->p, cc->p0i, t[a], t[b]);
 507                         break;
 508                 default:
 509                         r &= ~br_i15_iszero(t[d]);
 510                         break;
 511                 }
 512         }
 513
 514         /*
 515          * Copy back result.
 516          */
 517         memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t));
 518         return r;
 519 }
 520
 521 static void
 522 set_one(uint16_t *x, const uint16_t *p)
 523 {
 524         size_t plen;
 525
 526         plen = (p[0] + 31) >> 4;
 527         memset(x, 0, plen * sizeof *x);
 528         x[0] = p[0];
 529         x[1] = 0x0001;
 530 }
 531
 532 static void
 533 point_zero(jacobian *P, const curve_params *cc)
 534 {
 535         memset(P, 0, sizeof *P);
 536         P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
 537 }
 538
 539 static inline void
 540 point_double(jacobian *P, const curve_params *cc)
 541 {
 542         run_code(P, P, cc, code_double);
 543 }
 544
 545 static inline uint32_t
 546 point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
 547 {
 548         return run_code(P1, P2, cc, code_add);
 549 }
 550
 551 static void
 552 point_mul(jacobian *P, const unsigned char *x, size_t xlen,
 553         const curve_params *cc)
 554 {
 555         /*
 556          * We do a simple double-and-add ladder with a 2-bit window
 557          * to make only one add every two doublings. We thus first
 558          * precompute 2P and 3P in some local buffers.
 559          *
 560          * We always perform two doublings and one addition; the
 561          * addition is with P, 2P and 3P and is done in a temporary
 562          * array.
 563          *
 564          * The addition code cannot handle cases where one of the
 565          * operands is infinity, which is the case at the start of the
 566          * ladder. We therefore need to maintain a flag that controls
 567          * this situation.
 568          */
 569         uint32_t qz;
 570         jacobian P2, P3, Q, T, U;
 571
 572         memcpy(&P2, P, sizeof P2);
 573         point_double(&P2, cc);
 574         memcpy(&P3, P, sizeof P3);
 575         point_add(&P3, &P2, cc);
 576
 577         point_zero(&Q, cc);
 578         qz = 1;
 579         while (xlen -- > 0) {
 580                 int k;
 581
 582                 for (k = 6; k >= 0; k -= 2) {
 583                         uint32_t bits;
 584                         uint32_t bnz;
 585
 586                         point_double(&Q, cc);
 587                         point_double(&Q, cc);
 588                         memcpy(&T, P, sizeof T);
 589                         memcpy(&U, &Q, sizeof U);
 590                         bits = (*x >> k) & (uint32_t)3;
 591                         bnz = NEQ(bits, 0);
 592                         CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
 593                         CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
 594                         point_add(&U, &T, cc);
 595                         CCOPY(bnz & qz, &Q, &T, sizeof Q);
 596                         CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
 597                         qz &= ~bnz;
 598                 }
 599                 x ++;
 600         }
 601         memcpy(P, &Q, sizeof Q);
 602 }
 603
 604 /*
 605  * Decode point into Jacobian coordinates. This function does not support
 606  * the point at infinity. If the point is invalid then this returns 0, but
 607  * the coordinates are still set to properly formed field elements.
 608  */
 609 static uint32_t
 610 point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
 611 {
 612         /*
 613          * Points must use uncompressed format:
 614          * -- first byte is 0x04;
 615          * -- coordinates X and Y use unsigned big-endian, with the same
 616          *    length as the field modulus.
 617          *
 618          * We don't support hybrid format (uncompressed, but first byte
 619          * has value 0x06 or 0x07, depending on the least significant bit
 620          * of Y) because it is rather useless, and explicitly forbidden
 621          * by PKIX (RFC 5480, section 2.2).
 622          *
 623          * We don't support compressed format either, because it is not
 624          * much used in practice (there are or were patent-related
 625          * concerns about point compression, which explains the lack of
 626          * generalised support). Also, point compression support would
 627          * need a bit more code.
 628          */
 629         const unsigned char *buf;
 630         size_t plen, zlen;
 631         uint32_t r;
 632         jacobian Q;
 633
 634         buf = src;
 635         point_zero(P, cc);
 636         plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
 637         if (len != 1 + (plen << 1)) {
 638                 return 0;
 639         }
 640         r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p);
 641         r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
 642
 643         /*
 644          * Check first byte.
 645          */
 646         r &= EQ(buf[0], 0x04);
 647         /* obsolete
 648         r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
 649                 & ~(uint32_t)(buf[0] ^ buf[plen << 1]));
 650         */
 651
 652         /*
 653          * Convert coordinates and check that the point is valid.
 654          */
 655         zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t);
 656         memcpy(Q.c[0], cc->R2, zlen);
 657         memcpy(Q.c[1], cc->b, zlen);
 658         set_one(Q.c[2], cc->p);
 659         r &= ~run_code(P, &Q, cc, code_check);
 660         return r;
 661 }
 662
 663 /*
 664  * Encode a point. This method assumes that the point is correct and is
 665  * not the point at infinity. Encoded size is always 1+2*plen, where
 666  * plen is the field modulus length, in bytes.
 667  */
 668 static void
 669 point_encode(void *dst, const jacobian *P, const curve_params *cc)
 670 {
 671         unsigned char *buf;
 672         size_t plen;
 673         jacobian Q, T;
 674
 675         buf = dst;
 676         plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
 677         buf[0] = 0x04;
 678         memcpy(&Q, P, sizeof *P);
 679         set_one(T.c[2], cc->p);
 680         run_code(&Q, &T, cc, code_affine);
 681         br_i15_encode(buf + 1, plen, Q.c[0]);
 682         br_i15_encode(buf + 1 + plen, plen, Q.c[1]);
 683 }
 684
 685 static const br_ec_curve_def *
 686 id_to_curve_def(int curve)
 687 {
 688         switch (curve) {
 689         case BR_EC_secp256r1:
 690                 return &br_secp256r1;
 691         case BR_EC_secp384r1:
 692                 return &br_secp384r1;
 693         case BR_EC_secp521r1:
 694                 return &br_secp521r1;
 695         }
 696         return NULL;
 697 }
 698
 699 static const unsigned char *
 700 api_generator(int curve, size_t *len)
 701 {
 702         const br_ec_curve_def *cd;
 703
 704         cd = id_to_curve_def(curve);
 705         *len = cd->generator_len;
 706         return cd->generator;
 707 }
 708
 709 static const unsigned char *
 710 api_order(int curve, size_t *len)
 711 {
 712         const br_ec_curve_def *cd;
 713
 714         cd = id_to_curve_def(curve);
 715         *len = cd->order_len;
 716         return cd->order;
 717 }
 718
 719 static uint32_t
 720 api_mul(unsigned char *G, size_t Glen,
 721         const unsigned char *x, size_t xlen, int curve)
 722 {
 723         uint32_t r;
 724         const curve_params *cc;
 725         jacobian P;
 726
 727         cc = id_to_curve(curve);
 728         r = point_decode(&P, G, Glen, cc);
 729         point_mul(&P, x, xlen, cc);
 730         if (Glen == cc->point_len) {
 731                 point_encode(G, &P, cc);
 732         }
 733         return r;
 734 }
 735
 736 static size_t
 737 api_mulgen(unsigned char *R,
 738         const unsigned char *x, size_t xlen, int curve)
 739 {
 740         const unsigned char *G;
 741         size_t Glen;
 742
 743         G = api_generator(curve, &Glen);
 744         memcpy(R, G, Glen);
 745         api_mul(R, Glen, x, xlen, curve);
 746         return Glen;
 747 }
 748
 749 static uint32_t
 750 api_muladd(unsigned char *A, const unsigned char *B, size_t len,
 751         const unsigned char *x, size_t xlen,
 752         const unsigned char *y, size_t ylen, int curve)
 753 {
 754         uint32_t r, t, z;
 755         const curve_params *cc;
 756         jacobian P, Q;
 757
 758         /*
 759          * TODO: see about merging the two ladders. Right now, we do
 760          * two independant point multiplications, which is a bit
 761          * wasteful of CPU resources (but yields short code).
 762          */
 763
 764         cc = id_to_curve(curve);
 765         r = point_decode(&P, A, len, cc);
 766         if (B == NULL) {
 767                 size_t Glen;
 768
 769                 B = api_generator(curve, &Glen);
 770         }
 771         r &= point_decode(&Q, B, len, cc);
 772         point_mul(&P, x, xlen, cc);
 773         point_mul(&Q, y, ylen, cc);
 774
 775         /*
 776          * We want to compute P+Q. Since the base points A and B are distinct
 777          * from infinity, and the multipliers are non-zero and lower than the
 778          * curve order, then we know that P and Q are non-infinity. This
 779          * leaves two special situations to test for:
 780          * -- If P = Q then we must use point_double().
 781          * -- If P+Q = 0 then we must report an error.
 782          */
 783         t = point_add(&P, &Q, cc);
 784         point_double(&Q, cc);
 785         z = br_i15_iszero(P.c[2]);
 786
 787         /*
 788          * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
 789          * have the following:
 790          *
 791          *   z = 0, t = 0   return P (normal addition)
 792          *   z = 0, t = 1   return P (normal addition)
 793          *   z = 1, t = 0   return Q (a 'double' case)
 794          *   z = 1, t = 1   report an error (P+Q = 0)
 795          */
 796         CCOPY(z & ~t, &P, &Q, sizeof Q);
 797         point_encode(A, &P, cc);
 798         r &= ~(z & t);
 799
 800         return r;
 801 }
 802
 803 /* see bearssl_ec.h */
 804 const br_ec_impl br_ec_prime_i15 = {
 805         (uint32_t)0x03800000,
 806         &api_generator,
 807         &api_order,
 808         &api_mul,
 809         &api_mulgen,
 810         &api_muladd
 811 };