X-Git-Url: https://www.bearssl.org/gitweb//home/git/?p=BearSSL;a=blobdiff_plain;f=src%2Finner.h;h=e5c45b357a0496a19ef6c9e55bf50129fa534a99;hp=d2cc4c9f6ee02c7b5e9d99e9aa9a2c13fab552a9;hb=3f00688b9d9f483a6ca97e659eea104995ea15b7;hpb=b42bd5972f935ffc32019acac6f8a07ae08ae9c2 diff --git a/src/inner.h b/src/inner.h index d2cc4c9..e5c45b3 100644 --- a/src/inner.h +++ b/src/inner.h @@ -31,6 +31,15 @@ #include "config.h" #include "bearssl.h" +/* + * On MSVC, disable the warning about applying unary minus on an + * unsigned type: it is standard, we do it all the time, and for + * good reasons. + */ +#if _MSC_VER +#pragma warning( disable : 4146 ) +#endif + /* * Maximum size for a RSA modulus (in bits). Allocated stack buffers * depend on that size, so this value should be kept small. Currently, @@ -300,7 +309,7 @@ static inline void br_multihash_copyimpl(br_multihash_context *dst, const br_multihash_context *src) { - memcpy(dst->impl, src->impl, sizeof src->impl); + memcpy((void *)dst->impl, src->impl, sizeof src->impl); } /* ==================================================================== */ @@ -532,11 +541,30 @@ MAX(uint32_t x, uint32_t y) * (old) platforms where the default MUL31 is not. Unfortunately, it is * also substantially slower, and yields larger code, on more modern * platforms, which is why it is deactivated by default. + * + * MUL31_lo() must do some extra work because on some platforms, the + * _signed_ multiplication may return early if the top bits are 1. + * Simply truncating (casting) the output of MUL31() would not be + * sufficient, because the compiler may notice that we keep only the low + * word, and then replace automatically the unsigned multiplication with + * a signed multiplication opcode. */ #define MUL31(x, y) ((uint64_t)((x) | (uint32_t)0x80000000) \ * (uint64_t)((y) | (uint32_t)0x80000000) \ - ((uint64_t)(x) << 31) - ((uint64_t)(y) << 31) \ - ((uint64_t)1 << 62)) +static inline uint32_t +MUL31_lo(uint32_t x, uint32_t y) +{ + uint32_t xl, xh; + uint32_t yl, yh; + + xl = (x & 0xFFFF) | (uint32_t)0x80000000; + xh = (x >> 16) | (uint32_t)0x80000000; + yl = (y & 0xFFFF) | (uint32_t)0x80000000; + yh = (y >> 16) | (uint32_t)0x80000000; + return (xl * yl + ((xl * yh + xh * yl) << 16)) & (uint32_t)0x7FFFFFFF; +} #else @@ -544,11 +572,51 @@ MAX(uint32_t x, uint32_t y) * Multiply two 31-bit integers, with a 62-bit result. This default * implementation assumes that the basic multiplication operator * yields constant-time code. + * The MUL31_lo() macro returns only the low 31 bits of the product. */ -#define MUL31(x, y) ((uint64_t)(x) * (uint64_t)(y)) +#define MUL31(x, y) ((uint64_t)(x) * (uint64_t)(y)) +#define MUL31_lo(x, y) (((uint32_t)(x) * (uint32_t)(y)) & (uint32_t)0x7FFFFFFF) #endif +/* + * Multiply two words together; the sum of the lengths of the two + * operands must not exceed 31 (for instance, one operand may use 16 + * bits if the other fits on 15). If BR_CT_MUL15 is non-zero, then the + * macro will contain some extra operations that help in making the + * operation constant-time on some platforms, where the basic 32-bit + * multiplication is not constant-time. + */ +#if BR_CT_MUL15 +#define MUL15(x, y) (((uint32_t)(x) | (uint32_t)0x80000000) \ + * ((uint32_t)(y) | (uint32_t)0x80000000) \ + & (uint32_t)0x7FFFFFFF) +#else +#define MUL15(x, y) ((uint32_t)(x) * (uint32_t)(y)) +#endif + +/* + * Arithmetic right shift (sign bit is copied). What happens when + * right-shifting a negative value is _implementation-defined_, so it + * does not trigger undefined behaviour, but it is still up to each + * compiler to define (and document) what it does. Most/all compilers + * will do an arithmetic shift, the sign bit being used to fill the + * holes; this is a native operation on the underlying CPU, and it would + * make little sense for the compiler to do otherwise. GCC explicitly + * documents that it follows that convention. + * + * Still, if BR_NO_ARITH_SHIFT is defined (and non-zero), then an + * alternate version will be used, that does not rely on such + * implementation-defined behaviour. Unfortunately, it is also slower + * and yields bigger code, which is why it is deactivated by default. + */ +#if BR_NO_ARITH_SHIFT +#define ARSH(x, n) (((uint32_t)(x) >> (n)) \ + | ((-((uint32_t)(x) >> 31)) << (32 - (n)))) +#else +#define ARSH(x, n) ((*(int32_t *)&(x)) >> (n)) +#endif + /* * Constant-time division. The dividend hi:lo is divided by the * divisor d; the quotient is returned and the remainder is written @@ -1009,6 +1077,53 @@ void br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b); /* ==================================================================== */ +static inline void +br_i15_zero(uint16_t *x, uint16_t bit_len) +{ + *x ++ = bit_len; + memset(x, 0, ((bit_len + 15) >> 4) * sizeof *x); +} + +uint32_t br_i15_iszero(const uint16_t *x); + +uint16_t br_i15_ninv15(uint16_t x); + +uint32_t br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl); + +uint32_t br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl); + +void br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m); + +void br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y, + const uint16_t *m, uint16_t m0i); + +void br_i15_to_monty(uint16_t *x, const uint16_t *m); + +void br_i15_modpow(uint16_t *x, const unsigned char *e, size_t elen, + const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2); + +void br_i15_encode(void *dst, size_t len, const uint16_t *x); + +uint32_t br_i15_decode_mod(uint16_t *x, + const void *src, size_t len, const uint16_t *m); + +void br_i15_rshift(uint16_t *x, int count); + +uint32_t br_i15_bit_length(uint16_t *x, size_t xlen); + +void br_i15_decode(uint16_t *x, const void *src, size_t len); + +void br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i); + +void br_i15_decode_reduce(uint16_t *x, + const void *src, size_t len, const uint16_t *m); + +void br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m); + +void br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b); + +/* ==================================================================== */ + static inline size_t br_digest_size(const br_hash_class *digest_class) { @@ -1306,6 +1421,29 @@ unsigned br_aes_ct64_keysched(uint64_t *comp_skey, void br_aes_ct64_skey_expand(uint64_t *skey, unsigned num_rounds, const uint64_t *comp_skey); +/* ==================================================================== */ +/* + * RSA. + */ + +/* + * Apply proper PKCS#1 v1.5 padding (for signatures). 'hash_oid' is + * the encoded hash function OID, or NULL. + */ +uint32_t br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid, + const unsigned char *hash, size_t hash_len, + uint32_t n_bitlen, unsigned char *x); + +/* + * Check PKCS#1 v1.5 padding (for signatures). 'hash_oid' is the encoded + * hash function OID, or NULL. The provided 'sig' value is _after_ the + * modular exponentiation, i.e. it should be the padded hash. On + * success, the hashed message is extracted. + */ +uint32_t br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len, + const unsigned char *hash_oid, size_t hash_len, + unsigned char *hash_out); + /* ==================================================================== */ /* * Elliptic curves. @@ -1328,24 +1466,13 @@ extern const br_ec_curve_def br_secp384r1; extern const br_ec_curve_def br_secp521r1; /* - * Type for the parameters for a "prime curve": - * coordinates are in GF(p), with p prime - * curve equation is Y^2 = X^3 - 3*X + b - * b is in Montgomery representation - * curve order is n and is prime - * base point is G (encoded) and has order n + * For Curve25519, the advertised "order" really is 2^255-1, since the + * point multipliction function really works over arbitrary 255-bit + * scalars. This value is only meant as a hint for ECDH key generation; + * only ECDSA uses the exact curve order, and ECDSA is not used with + * that specific curve. */ -typedef struct { - const uint32_t *p; - const uint32_t *b; - const uint32_t p0i; -} br_ec_prime_i31_curve; - -extern const br_ec_prime_i31_curve br_ec_prime_i31_secp256r1; -extern const br_ec_prime_i31_curve br_ec_prime_i31_secp384r1; -extern const br_ec_prime_i31_curve br_ec_prime_i31_secp521r1; - -#define BR_EC_I31_LEN ((BR_MAX_EC_SIZE + 61) / 31) +extern const br_ec_curve_def br_curve25519; /* * Decode some bytes as an i31 integer, with truncation (corresponding @@ -1357,6 +1484,16 @@ extern const br_ec_prime_i31_curve br_ec_prime_i31_secp521r1; void br_ecdsa_i31_bits2int(uint32_t *x, const void *src, size_t len, uint32_t ebitlen); +/* + * Decode some bytes as an i15 integer, with truncation (corresponding + * to the 'bits2int' operation in RFC 6979). The target ENCODED bit + * length is provided as last parameter. The resulting value will have + * this declared bit length, and consists the big-endian unsigned decoding + * of exactly that many bits in the source (capped at the source length). + */ +void br_ecdsa_i15_bits2int(uint16_t *x, + const void *src, size_t len, uint32_t ebitlen); + /* ==================================================================== */ /* * SSL/TLS support functions.