From 28e4e120b84dacdf53963639f1a8a6fec2793662 Mon Sep 17 00:00:00 2001
From: Thomas Pornin <pornin@bolet.org>
Date: Wed, 4 Jan 2017 19:21:09 +0100
Subject: [PATCH] New "i15" implementation of big integers (faster, and
 constant-time, on ARM Cortex M0/M0+); imported into EC, ECDSA, RSA.

---
 Makefile                      |  53 ++-
 inc/bearssl_ec.h              |  75 ++++
 inc/bearssl_rsa.h             |  72 +++-
 src/ec/ec_prime_i15.c         | 792 ++++++++++++++++++++++++++++++++++
 src/ec/ec_prime_i31.c         |   2 +-
 src/ec/ecdsa_i15_bits.c       |  47 ++
 src/ec/ecdsa_i15_sign_asn1.c  |  45 ++
 src/ec/ecdsa_i15_sign_raw.c   | 184 ++++++++
 src/ec/ecdsa_i15_vrfy_asn1.c  |  48 +++
 src/ec/ecdsa_i15_vrfy_raw.c   | 166 +++++++
 src/inner.h                   | 108 ++++-
 src/int/i15_core.c            | 479 ++++++++++++++++++++
 src/int/i15_ext1.c            | 136 ++++++
 src/int/i15_ext2.c            | 173 ++++++++
 src/int/i31_fmont.c           |   2 +-
 src/int/i31_montmul.c         |   2 +-
 src/int/i31_muladd.c          |   2 +-
 src/rsa/rsa_i15_pkcs1_sign.c  |  37 ++
 src/rsa/rsa_i15_pkcs1_vrfy.c  |  43 ++
 src/rsa/rsa_i15_priv.c        | 146 +++++++
 src/rsa/rsa_i15_pub.c         |  78 ++++
 src/rsa/rsa_i31_pkcs1_sign.c  |  71 +--
 src/rsa/rsa_i31_pkcs1_vrfy.c  |  87 +---
 src/rsa/rsa_i32_pkcs1_sign.c  |  71 +--
 src/rsa/rsa_i32_pkcs1_vrfy.c  |  87 +---
 src/rsa/rsa_pkcs1_sig_pad.c   | 100 +++++
 src/rsa/rsa_pkcs1_sig_unpad.c | 121 ++++++
 test/test_crypto.c            |  54 ++-
 test/test_speed.c             |  38 +-
 29 files changed, 2993 insertions(+), 326 deletions(-)
 create mode 100644 src/ec/ec_prime_i15.c
 create mode 100644 src/ec/ecdsa_i15_bits.c
 create mode 100644 src/ec/ecdsa_i15_sign_asn1.c
 create mode 100644 src/ec/ecdsa_i15_sign_raw.c
 create mode 100644 src/ec/ecdsa_i15_vrfy_asn1.c
 create mode 100644 src/ec/ecdsa_i15_vrfy_raw.c
 create mode 100644 src/int/i15_core.c
 create mode 100644 src/int/i15_ext1.c
 create mode 100644 src/int/i15_ext2.c
 create mode 100644 src/rsa/rsa_i15_pkcs1_sign.c
 create mode 100644 src/rsa/rsa_i15_pkcs1_vrfy.c
 create mode 100644 src/rsa/rsa_i15_priv.c
 create mode 100644 src/rsa/rsa_i15_pub.c
 create mode 100644 src/rsa/rsa_pkcs1_sig_pad.c
 create mode 100644 src/rsa/rsa_pkcs1_sig_unpad.c

diff --git a/Makefile b/Makefile
index 8c63fb2..32c5d7b 100644
--- a/Makefile
+++ b/Makefile
@@ -47,17 +47,19 @@ TESTX509 = testx509
 TESTMATH = testmath
 
 OBJCODEC = $(BUILD)/ccopy.o $(BUILD)/dec16be.o $(BUILD)/dec16le.o $(BUILD)/dec32be.o $(BUILD)/dec32le.o $(BUILD)/dec64be.o $(BUILD)/dec64le.o $(BUILD)/enc16be.o $(BUILD)/enc16le.o $(BUILD)/enc32be.o $(BUILD)/enc32le.o $(BUILD)/enc64be.o $(BUILD)/enc64le.o $(BUILD)/pemdec.o
-OBJEC = $(BUILD)/ec_p256_i15.o $(BUILD)/ec_prime_i31.o $(BUILD)/ec_prime_i31_secp256r1.o $(BUILD)/ec_prime_i31_secp384r1.o $(BUILD)/ec_prime_i31_secp521r1.o $(BUILD)/ec_secp256r1.o $(BUILD)/ec_secp384r1.o $(BUILD)/ec_secp521r1.o $(BUILD)/ecdsa_atr.o $(BUILD)/ecdsa_i31_bits.o $(BUILD)/ecdsa_i31_sign_asn1.o $(BUILD)/ecdsa_i31_sign_raw.o $(BUILD)/ecdsa_i31_vrfy_asn1.o $(BUILD)/ecdsa_i31_vrfy_raw.o $(BUILD)/ecdsa_rta.o
+OBJEC = $(BUILD)/ec_p256_i15.o $(BUILD)/ec_prime_i15.o $(BUILD)/ec_prime_i31.o $(BUILD)/ec_secp256r1.o $(BUILD)/ec_secp384r1.o $(BUILD)/ec_secp521r1.o $(BUILD)/ecdsa_atr.o $(BUILD)/ecdsa_i15_bits.o $(BUILD)/ecdsa_i15_sign_asn1.o $(BUILD)/ecdsa_i15_sign_raw.o $(BUILD)/ecdsa_i15_vrfy_asn1.o $(BUILD)/ecdsa_i15_vrfy_raw.o $(BUILD)/ecdsa_i31_bits.o $(BUILD)/ecdsa_i31_sign_asn1.o $(BUILD)/ecdsa_i31_sign_raw.o $(BUILD)/ecdsa_i31_vrfy_asn1.o $(BUILD)/ecdsa_i31_vrfy_raw.o $(BUILD)/ecdsa_rta.o
+# $(BUILD)/ec_prime_i31_secp256r1.o $(BUILD)/ec_prime_i31_secp384r1.o $(BUILD)/ec_prime_i31_secp521r1.o
 OBJHASH = $(BUILD)/dig_oid.o $(BUILD)/dig_size.o $(BUILD)/ghash_ctmul.o $(BUILD)/ghash_ctmul32.o $(BUILD)/ghash_ctmul64.o $(BUILD)/md5.o $(BUILD)/md5sha1.o $(BUILD)/multihash.o $(BUILD)/sha1.o $(BUILD)/sha2big.o $(BUILD)/sha2small.o
+OBJINT15 = $(BUILD)/i15_core.o $(BUILD)/i15_ext1.o $(BUILD)/i15_ext2.o
 OBJINT31 = $(BUILD)/i31_add.o $(BUILD)/i31_bitlen.o $(BUILD)/i31_decmod.o $(BUILD)/i31_decode.o $(BUILD)/i31_decred.o $(BUILD)/i31_encode.o $(BUILD)/i31_fmont.o $(BUILD)/i31_iszero.o $(BUILD)/i31_modpow.o $(BUILD)/i31_montmul.o $(BUILD)/i31_mulacc.o $(BUILD)/i31_muladd.o $(BUILD)/i31_ninv31.o $(BUILD)/i31_reduce.o $(BUILD)/i31_rshift.o $(BUILD)/i31_sub.o $(BUILD)/i31_tmont.o
 OBJINT32 = $(BUILD)/i32_add.o $(BUILD)/i32_bitlen.o $(BUILD)/i32_decmod.o $(BUILD)/i32_decode.o $(BUILD)/i32_decred.o $(BUILD)/i32_div32.o $(BUILD)/i32_encode.o $(BUILD)/i32_fmont.o $(BUILD)/i32_iszero.o $(BUILD)/i32_modpow.o $(BUILD)/i32_montmul.o $(BUILD)/i32_mulacc.o $(BUILD)/i32_muladd.o $(BUILD)/i32_ninv32.o $(BUILD)/i32_reduce.o $(BUILD)/i32_sub.o $(BUILD)/i32_tmont.o
 OBJMAC = $(BUILD)/hmac.o $(BUILD)/hmac_ct.o
 OBJRAND = $(BUILD)/hmac_drbg.o
-OBJRSA = $(BUILD)/rsa_i31_pkcs1_sign.o $(BUILD)/rsa_i31_pkcs1_vrfy.o $(BUILD)/rsa_i31_priv.o $(BUILD)/rsa_i31_pub.o $(BUILD)/rsa_i32_pkcs1_sign.o $(BUILD)/rsa_i32_pkcs1_vrfy.o $(BUILD)/rsa_i32_priv.o $(BUILD)/rsa_i32_pub.o $(BUILD)/rsa_ssl_decrypt.o
+OBJRSA = $(BUILD)/rsa_i15_pkcs1_sign.o $(BUILD)/rsa_i15_pkcs1_vrfy.o $(BUILD)/rsa_i15_priv.o $(BUILD)/rsa_i15_pub.o $(BUILD)/rsa_i31_pkcs1_sign.o $(BUILD)/rsa_i31_pkcs1_vrfy.o $(BUILD)/rsa_i31_priv.o $(BUILD)/rsa_i31_pub.o $(BUILD)/rsa_i32_pkcs1_sign.o $(BUILD)/rsa_i32_pkcs1_vrfy.o $(BUILD)/rsa_i32_priv.o $(BUILD)/rsa_i32_pub.o $(BUILD)/rsa_pkcs1_sig_pad.o $(BUILD)/rsa_pkcs1_sig_unpad.o $(BUILD)/rsa_ssl_decrypt.o
 OBJSSL = $(BUILD)/prf.o $(BUILD)/prf_md5sha1.o $(BUILD)/prf_sha256.o $(BUILD)/prf_sha384.o $(BUILD)/ssl_ccert_single_ec.o $(BUILD)/ssl_ccert_single_rsa.o $(BUILD)/ssl_client.o $(BUILD)/ssl_client_full.o $(BUILD)/ssl_engine.o $(BUILD)/ssl_hashes.o $(BUILD)/ssl_hs_client.o $(BUILD)/ssl_hs_server.o $(BUILD)/ssl_io.o $(BUILD)/ssl_lru.o $(BUILD)/ssl_rec_cbc.o $(BUILD)/ssl_rec_chapol.o $(BUILD)/ssl_rec_gcm.o $(BUILD)/ssl_server.o $(BUILD)/ssl_server_mine2c.o $(BUILD)/ssl_server_mine2g.o $(BUILD)/ssl_server_minf2c.o $(BUILD)/ssl_server_minf2g.o $(BUILD)/ssl_server_minr2g.o $(BUILD)/ssl_server_minu2g.o $(BUILD)/ssl_server_minv2g.o $(BUILD)/ssl_server_full_ec.o $(BUILD)/ssl_server_full_rsa.o $(BUILD)/ssl_scert_single_ec.o $(BUILD)/ssl_scert_single_rsa.o
 OBJSYMCIPHER = $(BUILD)/aes_big_cbcdec.o $(BUILD)/aes_big_cbcenc.o $(BUILD)/aes_big_ctr.o $(BUILD)/aes_big_dec.o $(BUILD)/aes_big_enc.o $(BUILD)/aes_common.o $(BUILD)/aes_ct.o $(BUILD)/aes_ct64.o $(BUILD)/aes_ct64_cbcdec.o $(BUILD)/aes_ct64_cbcenc.o $(BUILD)/aes_ct64_ctr.o $(BUILD)/aes_ct64_dec.o $(BUILD)/aes_ct64_enc.o $(BUILD)/aes_ct_cbcdec.o $(BUILD)/aes_ct_cbcenc.o $(BUILD)/aes_ct_ctr.o $(BUILD)/aes_ct_dec.o $(BUILD)/aes_ct_enc.o $(BUILD)/aes_small_cbcdec.o $(BUILD)/aes_small_cbcenc.o $(BUILD)/aes_small_ctr.o $(BUILD)/aes_small_dec.o $(BUILD)/aes_small_enc.o $(BUILD)/chacha20_ct.o $(BUILD)/des_ct.o $(BUILD)/des_ct_cbcdec.o $(BUILD)/des_ct_cbcenc.o $(BUILD)/des_support.o $(BUILD)/des_tab.o $(BUILD)/des_tab_cbcdec.o $(BUILD)/des_tab_cbcenc.o $(BUILD)/poly1305_ctmul.o
 OBJX509 = $(BUILD)/skey_decoder.o $(BUILD)/x509_decoder.o $(BUILD)/x509_knownkey.o $(BUILD)/x509_minimal.o $(BUILD)/x509_minimal_full.o
-OBJ = $(OBJCODEC) $(OBJEC) $(OBJHASH) $(OBJINT31) $(OBJINT32) $(OBJMAC) $(OBJRAND) $(OBJRSA) $(OBJSSL) $(OBJSYMCIPHER) $(OBJX509)
+OBJ = $(OBJCODEC) $(OBJEC) $(OBJHASH) $(OBJINT15) $(OBJINT31) $(OBJINT32) $(OBJMAC) $(OBJRAND) $(OBJRSA) $(OBJSSL) $(OBJSYMCIPHER) $(OBJX509)
 OBJBRSSL = $(BUILD)/brssl.o $(BUILD)/certs.o $(BUILD)/chain.o $(BUILD)/client.o $(BUILD)/errors.o $(BUILD)/files.o $(BUILD)/keys.o $(BUILD)/names.o $(BUILD)/server.o $(BUILD)/skey.o $(BUILD)/sslio.o $(BUILD)/ta.o $(BUILD)/vector.o $(BUILD)/verify.o $(BUILD)/xmem.o
 OBJTESTCRYPTO = $(BUILD)/test_crypto.o
 OBJTESTSPEED = $(BUILD)/test_speed.o
@@ -163,6 +165,9 @@ $(BUILD)/ec_g_secp521r1.o: src/ec/ec_g_secp521r1.c $(HEADERS)
 $(BUILD)/ec_p256_i15.o: src/ec/ec_p256_i15.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $(BUILD)/ec_p256_i15.o src/ec/ec_p256_i15.c
 
+$(BUILD)/ec_prime_i15.o: src/ec/ec_prime_i15.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/ec_prime_i15.o src/ec/ec_prime_i15.c
+
 $(BUILD)/ec_prime_i31.o: src/ec/ec_prime_i31.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $(BUILD)/ec_prime_i31.o src/ec/ec_prime_i31.c
 
@@ -187,6 +192,21 @@ $(BUILD)/ec_secp521r1.o: src/ec/ec_secp521r1.c $(HEADERS)
 $(BUILD)/ecdsa_atr.o: src/ec/ecdsa_atr.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_atr.o src/ec/ecdsa_atr.c
 
+$(BUILD)/ecdsa_i15_bits.o: src/ec/ecdsa_i15_bits.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i15_bits.o src/ec/ecdsa_i15_bits.c
+
+$(BUILD)/ecdsa_i15_sign_asn1.o: src/ec/ecdsa_i15_sign_asn1.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i15_sign_asn1.o src/ec/ecdsa_i15_sign_asn1.c
+
+$(BUILD)/ecdsa_i15_sign_raw.o: src/ec/ecdsa_i15_sign_raw.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i15_sign_raw.o src/ec/ecdsa_i15_sign_raw.c
+
+$(BUILD)/ecdsa_i15_vrfy_asn1.o: src/ec/ecdsa_i15_vrfy_asn1.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i15_vrfy_asn1.o src/ec/ecdsa_i15_vrfy_asn1.c
+
+$(BUILD)/ecdsa_i15_vrfy_raw.o: src/ec/ecdsa_i15_vrfy_raw.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i15_vrfy_raw.o src/ec/ecdsa_i15_vrfy_raw.c
+
 $(BUILD)/ecdsa_i31_bits.o: src/ec/ecdsa_i31_bits.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $(BUILD)/ecdsa_i31_bits.o src/ec/ecdsa_i31_bits.c
 
@@ -238,6 +258,15 @@ $(BUILD)/sha2big.o: src/hash/sha2big.c $(HEADERS)
 $(BUILD)/sha2small.o: src/hash/sha2small.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $(BUILD)/sha2small.o src/hash/sha2small.c
 
+$(BUILD)/i15_core.o: src/int/i15_core.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/i15_core.o src/int/i15_core.c
+
+$(BUILD)/i15_ext1.o: src/int/i15_ext1.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/i15_ext1.o src/int/i15_ext1.c
+
+$(BUILD)/i15_ext2.o: src/int/i15_ext2.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/i15_ext2.o src/int/i15_ext2.c
+
 $(BUILD)/i31_add.o: src/int/i31_add.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $(BUILD)/i31_add.o src/int/i31_add.c
 
@@ -349,6 +378,18 @@ $(BUILD)/hmac_ct.o: src/mac/hmac_ct.c $(HEADERS)
 $(BUILD)/hmac_drbg.o: src/rand/hmac_drbg.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $(BUILD)/hmac_drbg.o src/rand/hmac_drbg.c
 
+$(BUILD)/rsa_i15_pkcs1_sign.o: src/rsa/rsa_i15_pkcs1_sign.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i15_pkcs1_sign.o src/rsa/rsa_i15_pkcs1_sign.c
+
+$(BUILD)/rsa_i15_pkcs1_vrfy.o: src/rsa/rsa_i15_pkcs1_vrfy.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i15_pkcs1_vrfy.o src/rsa/rsa_i15_pkcs1_vrfy.c
+
+$(BUILD)/rsa_i15_priv.o: src/rsa/rsa_i15_priv.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i15_priv.o src/rsa/rsa_i15_priv.c
+
+$(BUILD)/rsa_i15_pub.o: src/rsa/rsa_i15_pub.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i15_pub.o src/rsa/rsa_i15_pub.c
+
 $(BUILD)/rsa_i31_pkcs1_sign.o: src/rsa/rsa_i31_pkcs1_sign.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i31_pkcs1_sign.o src/rsa/rsa_i31_pkcs1_sign.c
 
@@ -373,6 +414,12 @@ $(BUILD)/rsa_i32_priv.o: src/rsa/rsa_i32_priv.c $(HEADERS)
 $(BUILD)/rsa_i32_pub.o: src/rsa/rsa_i32_pub.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $(BUILD)/rsa_i32_pub.o src/rsa/rsa_i32_pub.c
 
+$(BUILD)/rsa_pkcs1_sig_pad.o: src/rsa/rsa_pkcs1_sig_pad.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/rsa_pkcs1_sig_pad.o src/rsa/rsa_pkcs1_sig_pad.c
+
+$(BUILD)/rsa_pkcs1_sig_unpad.o: src/rsa/rsa_pkcs1_sig_unpad.c $(HEADERS)
+	$(CC) $(CFLAGS) -c -o $(BUILD)/rsa_pkcs1_sig_unpad.o src/rsa/rsa_pkcs1_sig_unpad.c
+
 $(BUILD)/rsa_ssl_decrypt.o: src/rsa/rsa_ssl_decrypt.c $(HEADERS)
 	$(CC) $(CFLAGS) -c -o $(BUILD)/rsa_ssl_decrypt.o src/rsa/rsa_ssl_decrypt.c
 
diff --git a/inc/bearssl_ec.h b/inc/bearssl_ec.h
index 336c89c..69ad29e 100644
--- a/inc/bearssl_ec.h
+++ b/inc/bearssl_ec.h
@@ -369,6 +369,15 @@ typedef struct {
  */
 extern const br_ec_impl br_ec_prime_i31;
 
+/**
+ * \brief EC implementation "i15".
+ *
+ * This implementation internally uses generic code for modular integers,
+ * with a representation as sequences of 15-bit words. It supports secp256r1,
+ * secp384r1 and secp521r1 (aka NIST curves P-256, P-384 and P-521).
+ */
+extern const br_ec_impl br_ec_prime_i15;
+
 /**
  * \brief EC implementation "i15" for P-256.
  *
@@ -533,4 +542,70 @@ uint32_t br_ecdsa_i31_vrfy_raw(const br_ec_impl *impl,
 	const void *hash, size_t hash_len,
 	const br_ec_public_key *pk, const void *sig, size_t sig_len);
 
+/**
+ * \brief ECDSA signature generator, "i15" implementation, "asn1" format.
+ *
+ * \see br_ecdsa_sign()
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+size_t br_ecdsa_i15_sign_asn1(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief ECDSA signature generator, "i15" implementation, "raw" format.
+ *
+ * \see br_ecdsa_sign()
+ *
+ * \param impl         EC implementation to use.
+ * \param hf           hash function used to process the data.
+ * \param hash_value   signed data (hashed).
+ * \param sk           EC private key.
+ * \param sig          destination buffer.
+ * \return  the signature length (in bytes), or 0 on error.
+ */
+size_t br_ecdsa_i15_sign_raw(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig);
+
+/**
+ * \brief ECDSA signature verifier, "i15" implementation, "asn1" format.
+ *
+ * \see br_ecdsa_vrfy()
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_ecdsa_i15_vrfy_asn1(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
+/**
+ * \brief ECDSA signature verifier, "i15" implementation, "raw" format.
+ *
+ * \see br_ecdsa_vrfy()
+ *
+ * \param impl       EC implementation to use.
+ * \param hash       signed data (hashed).
+ * \param hash_len   hash value length (in bytes).
+ * \param pk         EC public key.
+ * \param sig        signature.
+ * \param sig_len    signature length (in bytes).
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_ecdsa_i15_vrfy_raw(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk, const void *sig, size_t sig_len);
+
 #endif
diff --git a/inc/bearssl_rsa.h b/inc/bearssl_rsa.h
index f1bdf2b..b9362a3 100644
--- a/inc/bearssl_rsa.h
+++ b/inc/bearssl_rsa.h
@@ -121,7 +121,7 @@
  *
  * ## Implementations
  *
- * Two RSA implementations are included:
+ * Three RSA implementations are included:
  *
  *   - The **i32** implementation internally represents big integers
  *     as arrays of 32-bit integers. It is perfunctory and portable,
@@ -132,6 +132,12 @@
  *     faster than the i32 implementation (the reduced integer size makes
  *     carry propagation easier) for a similar code footprint, but uses
  *     very slightly larger stack buffers (about 4% bigger).
+ *
+ *   - The **i15** implementation uses 16-bit integers, each containing
+ *     15 bits worth of integer data. Multiplication results fit on
+ *     32 bits, so this won't use the "widening" multiplication routine
+ *     on ARM Cortex M0/M0+, for much better performance and constant-time
+ *     execution.
  */
 
 /**
@@ -445,6 +451,70 @@ uint32_t br_rsa_i31_pkcs1_sign(const unsigned char *hash_oid,
 	const unsigned char *hash, size_t hash_len,
 	const br_rsa_private_key *sk, unsigned char *x);
 
+/*
+ * RSA "i15" engine. Integers are represented as 15-bit integers, so
+ * the code uses only 32-bit multiplication (no 64-bit result), which
+ * is vastly faster (and constant-time) on the ARM Cortex M0/M0+.
+ */
+
+/**
+ * \brief RSA public key engine "i15".
+ *
+ * \see br_rsa_public
+ *
+ * \param x      operand to exponentiate.
+ * \param xlen   length of the operand (in bytes).
+ * \param pk     RSA public key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i15_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk);
+
+/**
+ * \brief RSA signature verification engine "i15".
+ *
+ * \see br_rsa_pkcs1_vrfy
+ *
+ * \param x          signature buffer.
+ * \param xlen       signature length (in bytes).
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash_len   expected hash value length (in bytes).
+ * \param pk         RSA public key.
+ * \param hash_out   output buffer for the hash value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i15_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out);
+
+/**
+ * \brief RSA private key engine "i15".
+ *
+ * \see br_rsa_private
+ *
+ * \param x    operand to exponentiate.
+ * \param sk   RSA private key.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i15_private(unsigned char *x,
+	const br_rsa_private_key *sk);
+
+/**
+ * \brief RSA signature generation engine "i15".
+ *
+ * \see br_rsa_pkcs1_sign
+ *
+ * \param hash_oid   encoded hash algorithm OID (or `NULL`).
+ * \param hash       hash value.
+ * \param hash_len   hash value length (in bytes).
+ * \param sk         RSA private key.
+ * \param x          output buffer for the hash value.
+ * \return  1 on success, 0 on error.
+ */
+uint32_t br_rsa_i15_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x);
+
 /**
  * \brief RSA decryption helper, for SSL/TLS.
  *
diff --git a/src/ec/ec_prime_i15.c b/src/ec/ec_prime_i15.c
new file mode 100644
index 0000000..04bdd5d
--- /dev/null
+++ b/src/ec/ec_prime_i15.c
@@ -0,0 +1,792 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * Parameters for supported curves:
+ *   - field modulus p
+ *   - R^2 mod p (R = 2^(15k) for the smallest k such that R >= p)
+ *   - b*R mod p (b is the second curve equation parameter)
+ */
+
+static const uint16_t P256_P[] = {
+	0x0111,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x003F, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x1000, 0x0000, 0x4000, 0x7FFF,
+	0x7FFF, 0x0001
+};
+
+static const uint16_t P256_R2[] = {
+	0x0111,
+	0x0000, 0x6000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7FFC, 0x7FFF,
+	0x7FBF, 0x7FFF, 0x7FBF, 0x7FFF, 0x7FFF, 0x7FFF, 0x77FF, 0x7FFF,
+	0x4FFF, 0x0000
+};
+
+static const uint16_t P256_B[] = {
+	0x0111,
+	0x770C, 0x5EEF, 0x29C4, 0x3EC4, 0x6273, 0x0486, 0x4543, 0x3993,
+	0x3C01, 0x6B56, 0x212E, 0x57EE, 0x4882, 0x204B, 0x7483, 0x3C16,
+	0x0187, 0x0000
+};
+
+static const uint16_t P384_P[] = {
+	0x0199,
+	0x7FFF, 0x7FFF, 0x0003, 0x0000, 0x0000, 0x0000, 0x7FC0, 0x7FFF,
+	0x7EFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x01FF
+};
+
+static const uint16_t P384_R2[] = {
+	0x0199,
+	0x1000, 0x0000, 0x0000, 0x7FFF, 0x7FFF, 0x0001, 0x0000, 0x0010,
+	0x0000, 0x0000, 0x0000, 0x7F00, 0x7FFF, 0x01FF, 0x0000, 0x1000,
+	0x0000, 0x2000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000
+};
+
+static const uint16_t P384_B[] = {
+	0x0199,
+	0x7333, 0x2096, 0x70D1, 0x2310, 0x3020, 0x6197, 0x1464, 0x35BB,
+	0x70CA, 0x0117, 0x1920, 0x4136, 0x5FC8, 0x5713, 0x4938, 0x7DD2,
+	0x4DD2, 0x4A71, 0x0220, 0x683E, 0x2C87, 0x4DB1, 0x7BFF, 0x6C09,
+	0x0452, 0x0084
+};
+
+static const uint16_t P521_P[] = {
+	0x022B,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF,
+	0x7FFF, 0x7FFF, 0x07FF
+};
+
+static const uint16_t P521_R2[] = {
+	0x022B,
+	0x0100, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+	0x0000, 0x0000, 0x0000
+};
+
+static const uint16_t P521_B[] = {
+	0x022B,
+	0x7002, 0x6A07, 0x751A, 0x228F, 0x71EF, 0x5869, 0x20F4, 0x1EFC,
+	0x7357, 0x37E0, 0x4EEC, 0x605E, 0x1652, 0x26F6, 0x31FA, 0x4A8F,
+	0x6193, 0x3C2A, 0x3C42, 0x48C7, 0x3489, 0x6771, 0x4C57, 0x5CCD,
+	0x2725, 0x545B, 0x503B, 0x5B42, 0x21A0, 0x2534, 0x687E, 0x70E4,
+	0x1618, 0x27D7, 0x0465
+};
+
+typedef struct {
+	const uint16_t *p;
+	const uint16_t *b;
+	const uint16_t *R2;
+	uint16_t p0i;
+	size_t point_len;
+} curve_params;
+
+static inline const curve_params *
+id_to_curve(int curve)
+{
+	static const curve_params pp[] = {
+		{ P256_P, P256_B, P256_R2, 0x0001,  65 },
+		{ P384_P, P384_B, P384_R2, 0x0001,  97 },
+		{ P521_P, P521_B, P521_R2, 0x0001, 133 }
+	};
+
+	return &pp[curve - BR_EC_secp256r1];
+}
+
+#define I15_LEN   ((BR_MAX_EC_SIZE + 29) / 15)
+
+/*
+ * Type for a point in Jacobian coordinates:
+ * -- three values, x, y and z, in Montgomery representation
+ * -- affine coordinates are X = x / z^2 and Y = y / z^3
+ * -- for the point at infinity, z = 0
+ */
+typedef struct {
+	uint16_t c[3][I15_LEN];
+} jacobian;
+
+/*
+ * We use a custom interpreter that uses a dozen registers, and
+ * only six operations:
+ *    MSET(d, a)       copy a into d
+ *    MADD(d, a)       d = d+a (modular)
+ *    MSUB(d, a)       d = d-a (modular)
+ *    MMUL(d, a, b)    d = a*b (Montgomery multiplication)
+ *    MINV(d, a, b)    invert d modulo p; a and b are used as scratch registers
+ *    MTZ(d)           clear return value if d = 0
+ * Destination of MMUL (d) must be distinct from operands (a and b).
+ * There is no such constraint for MSUB and MADD.
+ *
+ * Registers include the operand coordinates, and temporaries.
+ */
+#define MSET(d, a)      (0x0000 + ((d) << 8) + ((a) << 4))
+#define MADD(d, a)      (0x1000 + ((d) << 8) + ((a) << 4))
+#define MSUB(d, a)      (0x2000 + ((d) << 8) + ((a) << 4))
+#define MMUL(d, a, b)   (0x3000 + ((d) << 8) + ((a) << 4) + (b))
+#define MINV(d, a, b)   (0x4000 + ((d) << 8) + ((a) << 4) + (b))
+#define MTZ(d)          (0x5000 + ((d) << 8))
+#define ENDCODE         0
+
+/*
+ * Registers for the input operands.
+ */
+#define P1x    0
+#define P1y    1
+#define P1z    2
+#define P2x    3
+#define P2y    4
+#define P2z    5
+
+/*
+ * Alternate names for the first input operand.
+ */
+#define Px     0
+#define Py     1
+#define Pz     2
+
+/*
+ * Temporaries.
+ */
+#define t1     6
+#define t2     7
+#define t3     8
+#define t4     9
+#define t5    10
+#define t6    11
+#define t7    12
+
+/*
+ * Extra scratch registers available when there is no second operand (e.g.
+ * for "double" and "affine").
+ */
+#define t8     3
+#define t9     4
+#define t10    5
+
+/*
+ * Doubling formulas are:
+ *
+ *   s = 4*x*y^2
+ *   m = 3*(x + z^2)*(x - z^2)
+ *   x' = m^2 - 2*s
+ *   y' = m*(s - x') - 8*y^4
+ *   z' = 2*y*z
+ *
+ * If y = 0 (P has order 2) then this yields infinity (z' = 0), as it
+ * should. This case should not happen anyway, because our curves have
+ * prime order, and thus do not contain any point of order 2.
+ *
+ * If P is infinity (z = 0), then again the formulas yield infinity,
+ * which is correct. Thus, this code works for all points.
+ *
+ * Cost: 8 multiplications
+ */
+static const uint16_t code_double[] = {
+	/*
+	 * Compute z^2 (in t1).
+	 */
+	MMUL(t1, Pz, Pz),
+
+	/*
+	 * Compute x-z^2 (in t2) and then x+z^2 (in t1).
+	 */
+	MSET(t2, Px),
+	MSUB(t2, t1),
+	MADD(t1, Px),
+
+	/*
+	 * Compute m = 3*(x+z^2)*(x-z^2) (in t1).
+	 */
+	MMUL(t3, t1, t2),
+	MSET(t1, t3),
+	MADD(t1, t3),
+	MADD(t1, t3),
+
+	/*
+	 * Compute s = 4*x*y^2 (in t2) and 2*y^2 (in t3).
+	 */
+	MMUL(t3, Py, Py),
+	MADD(t3, t3),
+	MMUL(t2, Px, t3),
+	MADD(t2, t2),
+
+	/*
+	 * Compute x' = m^2 - 2*s.
+	 */
+	MMUL(Px, t1, t1),
+	MSUB(Px, t2),
+	MSUB(Px, t2),
+
+	/*
+	 * Compute z' = 2*y*z.
+	 */
+	MMUL(t4, Py, Pz),
+	MSET(Pz, t4),
+	MADD(Pz, t4),
+
+	/*
+	 * Compute y' = m*(s - x') - 8*y^4. Note that we already have
+	 * 2*y^2 in t3.
+	 */
+	MSUB(t2, Px),
+	MMUL(Py, t1, t2),
+	MMUL(t4, t3, t3),
+	MSUB(Py, t4),
+	MSUB(Py, t4),
+
+	ENDCODE
+};
+
+/*
+ * Addtions formulas are:
+ *
+ *   u1 = x1 * z2^2
+ *   u2 = x2 * z1^2
+ *   s1 = y1 * z2^3
+ *   s2 = y2 * z1^3
+ *   h = u2 - u1
+ *   r = s2 - s1
+ *   x3 = r^2 - h^3 - 2 * u1 * h^2
+ *   y3 = r * (u1 * h^2 - x3) - s1 * h^3
+ *   z3 = h * z1 * z2
+ *
+ * If both P1 and P2 are infinity, then z1 == 0 and z2 == 0, implying that
+ * z3 == 0, so the result is correct.
+ * If either of P1 or P2 is infinity, but not both, then z3 == 0, which is
+ * not correct.
+ * h == 0 only if u1 == u2; this happens in two cases:
+ * -- if s1 == s2 then P1 and/or P2 is infinity, or P1 == P2
+ * -- if s1 != s2 then P1 + P2 == infinity (but neither P1 or P2 is infinity)
+ *
+ * Thus, the following situations are not handled correctly:
+ * -- P1 = 0 and P2 != 0
+ * -- P1 != 0 and P2 = 0
+ * -- P1 = P2
+ * All other cases are properly computed. However, even in "incorrect"
+ * situations, the three coordinates still are properly formed field
+ * elements.
+ *
+ * The returned flag is cleared if r == 0. This happens in the following
+ * cases:
+ * -- Both points are on the same horizontal line (same Y coordinate).
+ * -- Both points are infinity.
+ * -- One point is infinity and the other is on line Y = 0.
+ * The third case cannot happen with our curves (there is no valid point
+ * on line Y = 0 since that would be a point of order 2). If the two
+ * source points are non-infinity, then remains only the case where the
+ * two points are on the same horizontal line.
+ *
+ * This allows us to detect the "P1 == P2" case, assuming that P1 != 0 and
+ * P2 != 0:
+ * -- If the returned value is not the point at infinity, then it was properly
+ * computed.
+ * -- Otherwise, if the returned flag is 1, then P1+P2 = 0, and the result
+ * is indeed the point at infinity.
+ * -- Otherwise (result is infinity, flag is 0), then P1 = P2 and we should
+ * use the 'double' code.
+ *
+ * Cost: 16 multiplications
+ */
+static const uint16_t code_add[] = {
+	/*
+	 * Compute u1 = x1*z2^2 (in t1) and s1 = y1*z2^3 (in t3).
+	 */
+	MMUL(t3, P2z, P2z),
+	MMUL(t1, P1x, t3),
+	MMUL(t4, P2z, t3),
+	MMUL(t3, P1y, t4),
+
+	/*
+	 * Compute u2 = x2*z1^2 (in t2) and s2 = y2*z1^3 (in t4).
+	 */
+	MMUL(t4, P1z, P1z),
+	MMUL(t2, P2x, t4),
+	MMUL(t5, P1z, t4),
+	MMUL(t4, P2y, t5),
+
+	/*
+	 * Compute h = u2 - u1 (in t2) and r = s2 - s1 (in t4).
+	 */
+	MSUB(t2, t1),
+	MSUB(t4, t3),
+
+	/*
+	 * Report cases where r = 0 through the returned flag.
+	 */
+	MTZ(t4),
+
+	/*
+	 * Compute u1*h^2 (in t6) and h^3 (in t5).
+	 */
+	MMUL(t7, t2, t2),
+	MMUL(t6, t1, t7),
+	MMUL(t5, t7, t2),
+
+	/*
+	 * Compute x3 = r^2 - h^3 - 2*u1*h^2.
+	 * t1 and t7 can be used as scratch registers.
+	 */
+	MMUL(P1x, t4, t4),
+	MSUB(P1x, t5),
+	MSUB(P1x, t6),
+	MSUB(P1x, t6),
+
+	/*
+	 * Compute y3 = r*(u1*h^2 - x3) - s1*h^3.
+	 */
+	MSUB(t6, P1x),
+	MMUL(P1y, t4, t6),
+	MMUL(t1, t5, t3),
+	MSUB(P1y, t1),
+
+	/*
+	 * Compute z3 = h*z1*z2.
+	 */
+	MMUL(t1, P1z, P2z),
+	MMUL(P1z, t1, t2),
+
+	ENDCODE
+};
+
+/*
+ * Check that the point is on the curve. This code snippet assumes the
+ * following conventions:
+ * -- Coordinates x and y have been freshly decoded in P1 (but not
+ * converted to Montgomery coordinates yet).
+ * -- P2x, P2y and P2z are set to, respectively, R^2, b*R and 1.
+ */
+static const uint16_t code_check[] = {
+
+	/* Convert x and y to Montgomery representation. */
+	MMUL(t1, P1x, P2x),
+	MMUL(t2, P1y, P2x),
+	MSET(P1x, t1),
+	MSET(P1y, t2),
+
+	/* Compute x^3 in t1. */
+	MMUL(t2, P1x, P1x),
+	MMUL(t1, P1x, t2),
+
+	/* Subtract 3*x from t1. */
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+	MSUB(t1, P1x),
+
+	/* Add b. */
+	MADD(t1, P2y),
+
+	/* Compute y^2 in t2. */
+	MMUL(t2, P1y, P1y),
+
+	/* Compare y^2 with x^3 - 3*x + b; they must match. */
+	MSUB(t1, t2),
+	MTZ(t1),
+
+	/* Set z to 1 (in Montgomery representation). */
+	MMUL(P1z, P2x, P2z),
+
+	ENDCODE
+};
+
+/*
+ * Conversion back to affine coordinates. This code snippet assumes that
+ * the z coordinate of P2 is set to 1 (not in Montgomery representation).
+ */
+static const uint16_t code_affine[] = {
+
+	/* Save z*R in t1. */
+	MSET(t1, P1z),
+
+	/* Compute z^3 in t2. */
+	MMUL(t2, P1z, P1z),
+	MMUL(t3, P1z, t2),
+	MMUL(t2, t3, P2z),
+
+	/* Invert to (1/z^3) in t2. */
+	MINV(t2, t3, t4),
+
+	/* Compute y. */
+	MSET(t3, P1y),
+	MMUL(P1y, t2, t3),
+
+	/* Compute (1/z^2) in t3. */
+	MMUL(t3, t2, t1),
+
+	/* Compute x. */
+	MSET(t2, P1x),
+	MMUL(P1x, t2, t3),
+
+	ENDCODE
+};
+
+static uint32_t
+run_code(jacobian *P1, const jacobian *P2,
+	const curve_params *cc, const uint16_t *code)
+{
+	uint32_t r;
+	uint16_t t[13][I15_LEN];
+	size_t u;
+
+	r = 1;
+
+	/*
+	 * Copy the two operands in the dedicated registers.
+	 */
+	memcpy(t[P1x], P1->c, 3 * I15_LEN * sizeof(uint16_t));
+	memcpy(t[P2x], P2->c, 3 * I15_LEN * sizeof(uint16_t));
+
+	/*
+	 * Run formulas.
+	 */
+	for (u = 0;; u ++) {
+		unsigned op, d, a, b;
+
+		op = code[u];
+		if (op == 0) {
+			break;
+		}
+		d = (op >> 8) & 0x0F;
+		a = (op >> 4) & 0x0F;
+		b = op & 0x0F;
+		op >>= 12;
+		switch (op) {
+			uint32_t ctl;
+			size_t plen;
+			unsigned char tp[(BR_MAX_EC_SIZE + 7) >> 3];
+
+		case 0:
+			memcpy(t[d], t[a], I15_LEN * sizeof(uint16_t));
+			break;
+		case 1:
+			ctl = br_i15_add(t[d], t[a], 1);
+			ctl |= NOT(br_i15_sub(t[d], cc->p, 0));
+			br_i15_sub(t[d], cc->p, ctl);
+			break;
+		case 2:
+			br_i15_add(t[d], cc->p, br_i15_sub(t[d], t[a], 1));
+			break;
+		case 3:
+			br_i15_montymul(t[d], t[a], t[b], cc->p, cc->p0i);
+			break;
+		case 4:
+			plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+			br_i15_encode(tp, plen, cc->p);
+			tp[plen - 1] -= 2;
+			br_i15_modpow(t[d], tp, plen,
+				cc->p, cc->p0i, t[a], t[b]);
+			break;
+		default:
+			r &= ~br_i15_iszero(t[d]);
+			break;
+		}
+	}
+
+	/*
+	 * Copy back result.
+	 */
+	memcpy(P1->c, t[P1x], 3 * I15_LEN * sizeof(uint16_t));
+	return r;
+}
+
+static void
+set_one(uint16_t *x, const uint16_t *p)
+{
+	size_t plen;
+
+	plen = (p[0] + 31) >> 4;
+	memset(x, 0, plen * sizeof *x);
+	x[0] = p[0];
+	x[1] = 0x0001;
+}
+
+static void
+point_zero(jacobian *P, const curve_params *cc)
+{
+	memset(P, 0, sizeof *P);
+	P->c[0][0] = P->c[1][0] = P->c[2][0] = cc->p[0];
+}
+
+static inline void
+point_double(jacobian *P, const curve_params *cc)
+{
+	run_code(P, P, cc, code_double);
+}
+
+static inline uint32_t
+point_add(jacobian *P1, const jacobian *P2, const curve_params *cc)
+{
+	return run_code(P1, P2, cc, code_add);
+}
+
+static void
+point_mul(jacobian *P, const unsigned char *x, size_t xlen,
+	const curve_params *cc)
+{
+	/*
+	 * We do a simple double-and-add ladder with a 2-bit window
+	 * to make only one add every two doublings. We thus first
+	 * precompute 2P and 3P in some local buffers.
+	 *
+	 * We always perform two doublings and one addition; the
+	 * addition is with P, 2P and 3P and is done in a temporary
+	 * array.
+	 *
+	 * The addition code cannot handle cases where one of the
+	 * operands is infinity, which is the case at the start of the
+	 * ladder. We therefore need to maintain a flag that controls
+	 * this situation.
+	 */
+	uint32_t qz;
+	jacobian P2, P3, Q, T, U;
+
+	memcpy(&P2, P, sizeof P2);
+	point_double(&P2, cc);
+	memcpy(&P3, P, sizeof P3);
+	point_add(&P3, &P2, cc);
+
+	point_zero(&Q, cc);
+	qz = 1;
+	while (xlen -- > 0) {
+		int k;
+
+		for (k = 6; k >= 0; k -= 2) {
+			uint32_t bits;
+			uint32_t bnz;
+
+			point_double(&Q, cc);
+			point_double(&Q, cc);
+			memcpy(&T, P, sizeof T);
+			memcpy(&U, &Q, sizeof U);
+			bits = (*x >> k) & (uint32_t)3;
+			bnz = NEQ(bits, 0);
+			CCOPY(EQ(bits, 2), &T, &P2, sizeof T);
+			CCOPY(EQ(bits, 3), &T, &P3, sizeof T);
+			point_add(&U, &T, cc);
+			CCOPY(bnz & qz, &Q, &T, sizeof Q);
+			CCOPY(bnz & ~qz, &Q, &U, sizeof Q);
+			qz &= ~bnz;
+		}
+		x ++;
+	}
+	memcpy(P, &Q, sizeof Q);
+}
+
+/*
+ * Decode point into Jacobian coordinates. This function does not support
+ * the point at infinity. If the point is invalid then this returns 0, but
+ * the coordinates are still set to properly formed field elements.
+ */
+static uint32_t
+point_decode(jacobian *P, const void *src, size_t len, const curve_params *cc)
+{
+	/*
+	 * Points must use uncompressed format:
+	 * -- first byte is 0x04;
+	 * -- coordinates X and Y use unsigned big-endian, with the same
+	 *    length as the field modulus.
+	 *
+	 * We don't support hybrid format (uncompressed, but first byte
+	 * has value 0x06 or 0x07, depending on the least significant bit
+	 * of Y) because it is rather useless, and explicitly forbidden
+	 * by PKIX (RFC 5480, section 2.2).
+	 *
+	 * We don't support compressed format either, because it is not
+	 * much used in practice (there are or were patent-related
+	 * concerns about point compression, which explains the lack of
+	 * generalised support). Also, point compression support would
+	 * need a bit more code.
+	 */
+	const unsigned char *buf;
+	size_t plen, zlen;
+	uint32_t r;
+	jacobian Q;
+
+	buf = src;
+	point_zero(P, cc);
+	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+	if (len != 1 + (plen << 1)) {
+		return 0;
+	}
+	r = br_i15_decode_mod(P->c[0], buf + 1, plen, cc->p);
+	r &= br_i15_decode_mod(P->c[1], buf + 1 + plen, plen, cc->p);
+
+	/*
+	 * Check first byte.
+	 */
+	r &= EQ(buf[0], 0x04);
+	/* obsolete
+	r &= EQ(buf[0], 0x04) | (EQ(buf[0] & 0xFE, 0x06)
+		& ~(uint32_t)(buf[0] ^ buf[plen << 1]));
+	*/
+
+	/*
+	 * Convert coordinates and check that the point is valid.
+	 */
+	zlen = ((cc->p[0] + 31) >> 4) * sizeof(uint16_t);
+	memcpy(Q.c[0], cc->R2, zlen);
+	memcpy(Q.c[1], cc->b, zlen);
+	set_one(Q.c[2], cc->p);
+	r &= ~run_code(P, &Q, cc, code_check);
+	return r;
+}
+
+/*
+ * Encode a point. This method assumes that the point is correct and is
+ * not the point at infinity. Encoded size is always 1+2*plen, where
+ * plen is the field modulus length, in bytes.
+ */
+static void
+point_encode(void *dst, const jacobian *P, const curve_params *cc)
+{
+	unsigned char *buf;
+	size_t plen;
+	jacobian Q, T;
+
+	buf = dst;
+	plen = (cc->p[0] - (cc->p[0] >> 4) + 7) >> 3;
+	buf[0] = 0x04;
+	memcpy(&Q, P, sizeof *P);
+	set_one(T.c[2], cc->p);
+	run_code(&Q, &T, cc, code_affine);
+	br_i15_encode(buf + 1, plen, Q.c[0]);
+	br_i15_encode(buf + 1 + plen, plen, Q.c[1]);
+}
+
+static const br_ec_curve_def *
+id_to_curve_def(int curve)
+{
+	switch (curve) {
+	case BR_EC_secp256r1:
+		return &br_secp256r1;
+	case BR_EC_secp384r1:
+		return &br_secp384r1;
+	case BR_EC_secp521r1:
+		return &br_secp521r1;
+	}
+	return NULL;
+}
+
+static const unsigned char *
+api_generator(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->generator_len;
+	return cd->generator;
+}
+
+static const unsigned char *
+api_order(int curve, size_t *len)
+{
+	const br_ec_curve_def *cd;
+
+	cd = id_to_curve_def(curve);
+	*len = cd->order_len;
+	return cd->order;
+}
+
+static uint32_t
+api_mul(unsigned char *G, size_t Glen,
+	const unsigned char *x, size_t xlen, int curve)
+{
+	uint32_t r;
+	const curve_params *cc;
+	jacobian P;
+
+	cc = id_to_curve(curve);
+	r = point_decode(&P, G, Glen, cc);
+	point_mul(&P, x, xlen, cc);
+	if (Glen == cc->point_len) {
+		point_encode(G, &P, cc);
+	}
+	return r;
+}
+
+static uint32_t
+api_muladd(unsigned char *A, const unsigned char *B, size_t len,
+	const unsigned char *x, size_t xlen,
+	const unsigned char *y, size_t ylen, int curve)
+{
+	uint32_t r, t, z;
+	const curve_params *cc;
+	jacobian P, Q;
+
+	/*
+	 * TODO: see about merging the two ladders. Right now, we do
+	 * two independant point multiplications, which is a bit
+	 * wasteful of CPU resources (but yields short code).
+	 */
+
+	cc = id_to_curve(curve);
+	r = point_decode(&P, A, len, cc);
+	r &= point_decode(&Q, B, len, cc);
+	point_mul(&P, x, xlen, cc);
+	point_mul(&Q, y, ylen, cc);
+
+	/*
+	 * We want to compute P+Q. Since the base points A and B are distinct
+	 * from infinity, and the multipliers are non-zero and lower than the
+	 * curve order, then we know that P and Q are non-infinity. This
+	 * leaves two special situations to test for:
+	 * -- If P = Q then we must use point_double().
+	 * -- If P+Q = 0 then we must report an error.
+	 */
+	t = point_add(&P, &Q, cc);
+	point_double(&Q, cc);
+	z = br_i15_iszero(P.c[2]);
+
+	/*
+	 * If z is 1 then either P+Q = 0 (t = 1) or P = Q (t = 0). So we
+	 * have the following:
+	 *
+	 *   z = 0, t = 0   return P (normal addition)
+	 *   z = 0, t = 1   return P (normal addition)
+	 *   z = 1, t = 0   return Q (a 'double' case)
+	 *   z = 1, t = 1   report an error (P+Q = 0)
+	 */
+	CCOPY(z & ~t, &P, &Q, sizeof Q);
+	point_encode(A, &P, cc);
+	r &= ~(z & t);
+
+	return r;
+}
+
+/* see bearssl_ec.h */
+const br_ec_impl br_ec_prime_i15 = {
+	(uint32_t)0x03800000,
+	&api_generator,
+	&api_order,
+	&api_mul,
+	&api_muladd
+};
diff --git a/src/ec/ec_prime_i31.c b/src/ec/ec_prime_i31.c
index 440641e..30c7ef3 100644
--- a/src/ec/ec_prime_i31.c
+++ b/src/ec/ec_prime_i31.c
@@ -135,7 +135,7 @@ typedef struct {
 
 /*
  * We use a custom interpreter that uses a dozen registers, and
- * only four operations:
+ * only six operations:
  *    MSET(d, a)       copy a into d
  *    MADD(d, a)       d = d+a (modular)
  *    MSUB(d, a)       d = d-a (modular)
diff --git a/src/ec/ecdsa_i15_bits.c b/src/ec/ecdsa_i15_bits.c
new file mode 100644
index 0000000..402d14a
--- /dev/null
+++ b/src/ec/ecdsa_i15_bits.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+void
+br_ecdsa_i15_bits2int(uint16_t *x,
+	const void *src, size_t len, uint32_t ebitlen)
+{
+	uint32_t bitlen, hbitlen;
+	int sc;
+
+	bitlen = ebitlen - (ebitlen >> 4);
+	hbitlen = (uint32_t)len << 3;
+	if (hbitlen > bitlen) {
+		len = (bitlen + 7) >> 3;
+		sc = (int)((hbitlen - bitlen) & 7);
+	} else {
+		sc = 0;
+	}
+	br_i15_zero(x, ebitlen);
+	br_i15_decode(x, src, len);
+	br_i15_rshift(x, sc);
+	x[0] = ebitlen;
+}
diff --git a/src/ec/ecdsa_i15_sign_asn1.c b/src/ec/ecdsa_i15_sign_asn1.c
new file mode 100644
index 0000000..ab4a283
--- /dev/null
+++ b/src/ec/ecdsa_i15_sign_asn1.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define ORDER_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_i15_sign_asn1(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig)
+{
+	unsigned char rsig[(ORDER_LEN << 1) + 12];
+	size_t sig_len;
+
+	sig_len = br_ecdsa_i15_sign_raw(impl, hf, hash_value, sk, rsig);
+	if (sig_len == 0) {
+		return 0;
+	}
+	sig_len = br_ecdsa_raw_to_asn1(rsig, sig_len);
+	memcpy(sig, rsig, sig_len);
+	return sig_len;
+}
diff --git a/src/ec/ecdsa_i15_sign_raw.c b/src/ec/ecdsa_i15_sign_raw.c
new file mode 100644
index 0000000..87b2f33
--- /dev/null
+++ b/src/ec/ecdsa_i15_sign_raw.c
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define I15_LEN     ((BR_MAX_EC_SIZE + 29) / 15)
+#define POINT_LEN   (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1))
+#define ORDER_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+size_t
+br_ecdsa_i15_sign_raw(const br_ec_impl *impl,
+	const br_hash_class *hf, const void *hash_value,
+	const br_ec_private_key *sk, void *sig)
+{
+	/*
+	 * IMPORTANT: this code is fit only for curves with a prime
+	 * order. This is needed so that modular reduction of the X
+	 * coordinate of a point can be done with a simple subtraction.
+	 * We also rely on the last byte of the curve order to be distinct
+	 * from 0 and 1.
+	 */
+	const br_ec_curve_def *cd;
+	uint16_t n[I15_LEN], r[I15_LEN], s[I15_LEN], x[I15_LEN];
+	uint16_t m[I15_LEN], k[I15_LEN], t1[I15_LEN], t2[I15_LEN];
+	unsigned char tt[ORDER_LEN << 1];
+	unsigned char eU[POINT_LEN];
+	size_t hash_len, nlen, ulen;
+	uint16_t n0i;
+	uint32_t ctl;
+	br_hmac_drbg_context drbg;
+
+	/*
+	 * If the curve is not supported, then exit with an error.
+	 */
+	if (((impl->supported_curves >> sk->curve) & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Get the curve parameters (generator and order).
+	 */
+	switch (sk->curve) {
+	case BR_EC_secp256r1:
+		cd = &br_secp256r1;
+		break;
+	case BR_EC_secp384r1:
+		cd = &br_secp384r1;
+		break;
+	case BR_EC_secp521r1:
+		cd = &br_secp521r1;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * Get modulus.
+	 */
+	nlen = cd->order_len;
+	br_i15_decode(n, cd->order, nlen);
+	n0i = br_i15_ninv15(n[1]);
+
+	/*
+	 * Get private key as an i15 integer. This also checks that the
+	 * private key is well-defined (not zero, and less than the
+	 * curve order).
+	 */
+	if (!br_i15_decode_mod(x, sk->x, sk->xlen, n)) {
+		return 0;
+	}
+	if (br_i15_iszero(x)) {
+		return 0;
+	}
+
+	/*
+	 * Get hash length.
+	 */
+	hash_len = (hf->desc >> BR_HASHDESC_OUT_OFF) & BR_HASHDESC_OUT_MASK;
+
+	/*
+	 * Truncate and reduce the hash value modulo the curve order.
+	 */
+	br_ecdsa_i15_bits2int(m, hash_value, hash_len, n[0]);
+	br_i15_sub(m, n, br_i15_sub(m, n, 0) ^ 1);
+
+	/*
+	 * RFC 6979 generation of the "k" value.
+	 *
+	 * The process uses HMAC_DRBG (with the hash function used to
+	 * process the message that is to be signed). The seed is the
+	 * concatenation of the encodings of the private key and
+	 * the hash value (after truncation and modular reduction).
+	 */
+	br_i15_encode(tt, nlen, x);
+	br_i15_encode(tt + nlen, nlen, m);
+	br_hmac_drbg_init(&drbg, hf, tt, nlen << 1);
+	for (;;) {
+		br_hmac_drbg_generate(&drbg, tt, nlen);
+		br_ecdsa_i15_bits2int(k, tt, nlen, n[0]);
+		if (br_i15_iszero(k)) {
+			continue;
+		}
+		if (br_i15_sub(k, n, 0)) {
+			break;
+		}
+	}
+
+	/*
+	 * Compute k*G and extract the X coordinate, then reduce it
+	 * modulo the curve order. Since we support only curves with
+	 * prime order, that reduction is only a matter of computing
+	 * a subtraction.
+	 */
+	ulen = cd->generator_len;
+	memcpy(eU, cd->generator, ulen);
+	br_i15_encode(tt, nlen, k);
+	if (!impl->mul(eU, ulen, tt, nlen, sk->curve)) {
+		/*
+		 * Point multiplication may fail here only if the
+		 * EC implementation does not support the curve, or the
+		 * private key is incorrect (x is a multiple of the curve
+		 * order).
+		 */
+		return 0;
+	}
+	br_i15_zero(r, n[0]);
+	br_i15_decode(r, &eU[1], ulen >> 1);
+	r[0] = n[0];
+	br_i15_sub(r, n, br_i15_sub(r, n, 0) ^ 1);
+
+	/*
+	 * Compute 1/k in double-Montgomery representation. We do so by
+	 * first converting _from_ Montgomery representation (twice),
+	 * then using a modular exponentiation.
+	 */
+	br_i15_from_monty(k, n, n0i);
+	br_i15_from_monty(k, n, n0i);
+	memcpy(tt, cd->order, nlen);
+	tt[nlen - 1] -= 2;
+	br_i15_modpow(k, tt, nlen, n, n0i, t1, t2);
+
+	/*
+	 * Compute s = (m+xr)/k (mod n).
+	 * The k[] array contains R^2/k (double-Montgomery representation);
+	 * we thus can use direct Montgomery multiplications and conversions
+	 * from Montgomery, avoiding any call to br_i15_to_monty() (which
+	 * is slower).
+	 */
+	br_i15_from_monty(m, n, n0i);
+	br_i15_montymul(t1, x, r, n, n0i);
+	ctl = br_i15_add(t1, m, 1);
+	ctl |= br_i15_sub(t1, n, 0) ^ 1;
+	br_i15_sub(t1, n, ctl);
+	br_i15_montymul(s, t1, k, n, n0i);
+
+	/*
+	 * Encode r and s in the signature.
+	 */
+	br_i15_encode(sig, nlen, r);
+	br_i15_encode((unsigned char *)sig + nlen, nlen, s);
+	return nlen << 1;
+}
diff --git a/src/ec/ecdsa_i15_vrfy_asn1.c b/src/ec/ecdsa_i15_vrfy_asn1.c
new file mode 100644
index 0000000..f4bef99
--- /dev/null
+++ b/src/ec/ecdsa_i15_vrfy_asn1.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define FIELD_LEN   ((BR_MAX_EC_SIZE + 7) >> 3)
+
+/* see bearssl_ec.h */
+uint32_t
+br_ecdsa_i15_vrfy_asn1(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk,
+	const void *sig, size_t sig_len)
+{
+	/*
+	 * We use a double-sized buffer because a malformed ASN.1 signature
+	 * may trigger a size expansion when converting to "raw" format.
+	 */
+	unsigned char rsig[(FIELD_LEN << 2) + 24];
+
+	if (sig_len > ((sizeof rsig) >> 1)) {
+		return 0;
+	}
+	memcpy(rsig, sig, sig_len);
+	sig_len = br_ecdsa_asn1_to_raw(rsig, sig_len);
+	return br_ecdsa_i15_vrfy_raw(impl, hash, hash_len, pk, rsig, sig_len);
+}
diff --git a/src/ec/ecdsa_i15_vrfy_raw.c b/src/ec/ecdsa_i15_vrfy_raw.c
new file mode 100644
index 0000000..5a16680
--- /dev/null
+++ b/src/ec/ecdsa_i15_vrfy_raw.c
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define I15_LEN     ((BR_MAX_EC_SIZE + 29) / 15)
+#define POINT_LEN   (1 + (((BR_MAX_EC_SIZE + 7) >> 3) << 1))
+
+/* see bearssl_ec.h */
+uint32_t
+br_ecdsa_i15_vrfy_raw(const br_ec_impl *impl,
+	const void *hash, size_t hash_len,
+	const br_ec_public_key *pk,
+	const void *sig, size_t sig_len)
+{
+	/*
+	 * IMPORTANT: this code is fit only for curves with a prime
+	 * order. This is needed so that modular reduction of the X
+	 * coordinate of a point can be done with a simple subtraction.
+	 */
+	const br_ec_curve_def *cd;
+	uint16_t n[I15_LEN], r[I15_LEN], s[I15_LEN], t1[I15_LEN], t2[I15_LEN];
+	unsigned char tx[(BR_MAX_EC_SIZE + 7) >> 3];
+	unsigned char ty[(BR_MAX_EC_SIZE + 7) >> 3];
+	unsigned char eU[POINT_LEN];
+	size_t nlen, rlen, ulen;
+	uint16_t n0i;
+	uint32_t res;
+
+	/*
+	 * If the curve is not supported, then report an error.
+	 */
+	if (((impl->supported_curves >> pk->curve) & 1) == 0) {
+		return 0;
+	}
+
+	/*
+	 * Get the curve parameters (generator and order).
+	 */
+	switch (pk->curve) {
+	case BR_EC_secp256r1:
+		cd = &br_secp256r1;
+		break;
+	case BR_EC_secp384r1:
+		cd = &br_secp384r1;
+		break;
+	case BR_EC_secp521r1:
+		cd = &br_secp521r1;
+		break;
+	default:
+		return 0;
+	}
+
+	/*
+	 * Signature length must be even.
+	 */
+	if (sig_len & 1) {
+		return 0;
+	}
+	rlen = sig_len >> 1;
+
+	/*
+	 * Public key point must have the proper size for this curve.
+	 */
+	if (pk->qlen != cd->generator_len) {
+		return 0;
+	}
+
+	/*
+	 * Get modulus; then decode the r and s values. They must be
+	 * lower than the modulus, and s must not be null.
+	 */
+	nlen = cd->order_len;
+	br_i15_decode(n, cd->order, nlen);
+	n0i = br_i15_ninv15(n[1]);
+	if (!br_i15_decode_mod(r, sig, rlen, n)) {
+		return 0;
+	}
+	if (!br_i15_decode_mod(s, (const unsigned char *)sig + rlen, rlen, n)) {
+		return 0;
+	}
+	if (br_i15_iszero(s)) {
+		return 0;
+	}
+
+	/*
+	 * Invert s. We do that with a modular exponentiation; we use
+	 * the fact that for all the curves we support, the least
+	 * significant byte is not 0 or 1, so we can subtract 2 without
+	 * any carry to process.
+	 * We also want 1/s in Montgomery representation, which can be
+	 * done by converting _from_ Montgomery representation before
+	 * the inversion (because (1/s)*R = 1/(s/R)).
+	 */
+	br_i15_from_monty(s, n, n0i);
+	memcpy(tx, cd->order, nlen);
+	tx[nlen - 1] -= 2;
+	br_i15_modpow(s, tx, nlen, n, n0i, t1, t2);
+
+	/*
+	 * Truncate the hash to the modulus length (in bits) and reduce
+	 * it modulo the curve order. The modular reduction can be done
+	 * with a subtraction since the truncation already reduced the
+	 * value to the modulus bit length.
+	 */
+	br_ecdsa_i15_bits2int(t1, hash, hash_len, n[0]);
+	br_i15_sub(t1, n, br_i15_sub(t1, n, 0) ^ 1);
+
+	/*
+	 * Multiply the (truncated, reduced) hash value with 1/s, result in
+	 * t2, encoded in ty.
+	 */
+	br_i15_montymul(t2, t1, s, n, n0i);
+	br_i15_encode(ty, nlen, t2);
+
+	/*
+	 * Multiply r with 1/s, result in t1, encoded in tx.
+	 */
+	br_i15_montymul(t1, r, s, n, n0i);
+	br_i15_encode(tx, nlen, t1);
+
+	/*
+	 * Compute the point x*Q + y*G.
+	 */
+	ulen = cd->generator_len;
+	memcpy(eU, pk->q, ulen);
+	res = impl->muladd(eU, cd->generator, ulen,
+		tx, nlen, ty, nlen, cd->curve);
+
+	/*
+	 * Get the X coordinate, reduce modulo the curve order, and
+	 * compare with the 'r' value.
+	 *
+	 * The modular reduction can be done with subtractions because
+	 * we work with curves of prime order, so the curve order is
+	 * close to the field order (Hasse's theorem).
+	 */
+	br_i15_zero(t1, n[0]);
+	br_i15_decode(t1, &eU[1], ulen >> 1);
+	t1[0] = n[0];
+	br_i15_sub(t1, n, br_i15_sub(t1, n, 0) ^ 1);
+	res &= ~br_i15_sub(t1, r, 1);
+	res &= br_i15_iszero(t1);
+	return res;
+}
diff --git a/src/inner.h b/src/inner.h
index da64f37..8417b24 100644
--- a/src/inner.h
+++ b/src/inner.h
@@ -532,11 +532,30 @@ MAX(uint32_t x, uint32_t y)
  * (old) platforms where the default MUL31 is not. Unfortunately, it is
  * also substantially slower, and yields larger code, on more modern
  * platforms, which is why it is deactivated by default.
+ *
+ * MUL31_lo() must do some extra work because on some platforms, the
+ * _signed_ multiplication may return early if the top bits are 1.
+ * Simply truncating (casting) the output of MUL31() would not be
+ * sufficient, because the compiler may notice that we keep only the low
+ * word, and then replace automatically the unsigned multiplication with
+ * a signed multiplication opcode.
  */
 #define MUL31(x, y)   ((uint64_t)((x) | (uint32_t)0x80000000) \
                        * (uint64_t)((y) | (uint32_t)0x80000000) \
                        - ((uint64_t)(x) << 31) - ((uint64_t)(y) << 31) \
                        - ((uint64_t)1 << 62))
+static inline uint32_t
+MUL31_lo(uint32_t x, uint32_t y)
+{
+	uint32_t xl, xh;
+	uint32_t yl, yh;
+
+	xl = (x & 0xFFFF) | (uint32_t)0x80000000;
+	xh = (x >> 16) | (uint32_t)0x80000000;
+	yl = (y & 0xFFFF) | (uint32_t)0x80000000;
+	yh = (y >> 16) | (uint32_t)0x80000000;
+	return (xl * yl + ((xl * yh + xh * yl) << 16)) & (uint32_t)0x7FFFFFFF;
+}
 
 #else
 
@@ -544,8 +563,10 @@ MAX(uint32_t x, uint32_t y)
  * Multiply two 31-bit integers, with a 62-bit result. This default
  * implementation assumes that the basic multiplication operator
  * yields constant-time code.
+ * The MUL31_lo() macro returns only the low 31 bits of the product.
  */
-#define MUL31(x, y)   ((uint64_t)(x) * (uint64_t)(y))
+#define MUL31(x, y)     ((uint64_t)(x) * (uint64_t)(y))
+#define MUL31_lo(x, y)  (((uint32_t)(x) * (uint32_t)(y)) & (uint32_t)0x7FFFFFFF)
 
 #endif
 
@@ -558,7 +579,7 @@ MAX(uint32_t x, uint32_t y)
  */
 #if BR_CT_MUL15
 #define MUL15(x, y)   (((uint32_t)(x) | (uint32_t)0x80000000) \
-                       * ((uint32_t)(x) | (uint32_t)0x80000000) \
+                       * ((uint32_t)(y) | (uint32_t)0x80000000) \
 		       & (uint32_t)0x3FFFFFFF)
 #else
 #define MUL15(x, y)   ((uint32_t)(x) * (uint32_t)(y))
@@ -1046,6 +1067,53 @@ void br_i31_mulacc(uint32_t *d, const uint32_t *a, const uint32_t *b);
 
 /* ==================================================================== */
 
+static inline void
+br_i15_zero(uint16_t *x, uint16_t bit_len)
+{
+	*x ++ = bit_len;
+	memset(x, 0, ((bit_len + 15) >> 4) * sizeof *x);
+}
+
+uint32_t br_i15_iszero(const uint16_t *x);
+
+uint16_t br_i15_ninv15(uint16_t x);
+
+uint32_t br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl);
+
+uint32_t br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl);
+
+void br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m);
+
+void br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y,
+	const uint16_t *m, uint16_t m0i);
+
+void br_i15_to_monty(uint16_t *x, const uint16_t *m);
+
+void br_i15_modpow(uint16_t *x, const unsigned char *e, size_t elen,
+	const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2);
+
+void br_i15_encode(void *dst, size_t len, const uint16_t *x);
+
+uint32_t br_i15_decode_mod(uint16_t *x,
+	const void *src, size_t len, const uint16_t *m);
+
+void br_i15_rshift(uint16_t *x, int count);
+
+uint32_t br_i15_bit_length(uint16_t *x, size_t xlen);
+
+void br_i15_decode(uint16_t *x, const void *src, size_t len);
+
+void br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i);
+
+void br_i15_decode_reduce(uint16_t *x,
+	const void *src, size_t len, const uint16_t *m);
+
+void br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m);
+
+void br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b);
+
+/* ==================================================================== */
+
 static inline size_t
 br_digest_size(const br_hash_class *digest_class)
 {
@@ -1343,6 +1411,29 @@ unsigned br_aes_ct64_keysched(uint64_t *comp_skey,
 void br_aes_ct64_skey_expand(uint64_t *skey,
 	unsigned num_rounds, const uint64_t *comp_skey);
 
+/* ==================================================================== */
+/*
+ * RSA.
+ */
+
+/*
+ * Apply proper PKCS#1 v1.5 padding (for signatures). 'hash_oid' is
+ * the encoded hash function OID, or NULL.
+ */
+uint32_t br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	uint32_t n_bitlen, unsigned char *x);
+
+/*
+ * Check PKCS#1 v1.5 padding (for signatures). 'hash_oid' is the encoded
+ * hash function OID, or NULL. The provided 'sig' value is _after_ the
+ * modular exponentiation, i.e. it should be the padded hash. On
+ * success, the hashed message is extracted.
+ */
+uint32_t br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len,
+	const unsigned char *hash_oid, size_t hash_len,
+	unsigned char *hash_out);
+
 /* ==================================================================== */
 /*
  * Elliptic curves.
@@ -1364,6 +1455,8 @@ extern const br_ec_curve_def br_secp256r1;
 extern const br_ec_curve_def br_secp384r1;
 extern const br_ec_curve_def br_secp521r1;
 
+#if 0
+/* obsolete */
 /*
  * Type for the parameters for a "prime curve":
  *   coordinates are in GF(p), with p prime
@@ -1383,6 +1476,7 @@ extern const br_ec_prime_i31_curve br_ec_prime_i31_secp384r1;
 extern const br_ec_prime_i31_curve br_ec_prime_i31_secp521r1;
 
 #define BR_EC_I31_LEN   ((BR_MAX_EC_SIZE + 61) / 31)
+#endif
 
 /*
  * Decode some bytes as an i31 integer, with truncation (corresponding
@@ -1394,6 +1488,16 @@ extern const br_ec_prime_i31_curve br_ec_prime_i31_secp521r1;
 void br_ecdsa_i31_bits2int(uint32_t *x,
 	const void *src, size_t len, uint32_t ebitlen);
 
+/*
+ * Decode some bytes as an i15 integer, with truncation (corresponding
+ * to the 'bits2int' operation in RFC 6979). The target ENCODED bit
+ * length is provided as last parameter. The resulting value will have
+ * this declared bit length, and consists the big-endian unsigned decoding
+ * of exactly that many bits in the source (capped at the source length).
+ */
+void br_ecdsa_i15_bits2int(uint16_t *x,
+	const void *src, size_t len, uint32_t ebitlen);
+
 /* ==================================================================== */
 /*
  * SSL/TLS support functions.
diff --git a/src/int/i15_core.c b/src/int/i15_core.c
new file mode 100644
index 0000000..5ae3b31
--- /dev/null
+++ b/src/int/i15_core.c
@@ -0,0 +1,479 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This file contains the core "big integer" functions for the i15
+ * implementation, that represents integers as sequences of 15-bit
+ * words.
+ */
+
+/* see inner.h */
+uint32_t
+br_i15_iszero(const uint16_t *x)
+{
+	uint32_t z;
+	size_t u;
+
+	z = 0;
+	for (u = (x[0] + 15) >> 4; u > 0; u --) {
+		z |= x[u];
+	}
+	return ~(z | -z) >> 31;
+}
+
+/* see inner.h */
+uint16_t
+br_i15_ninv15(uint16_t x)
+{
+	uint32_t y;
+
+	y = 2 - x;
+	y = MUL15(y, 2 - MUL15(x, y));
+	y = MUL15(y, 2 - MUL15(x, y));
+	y = MUL15(y, 2 - MUL15(x, y));
+	return MUX(x & 1, -y, 0) & 0x7FFF;
+}
+
+/* see inner.h */
+uint32_t
+br_i15_add(uint16_t *a, const uint16_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 31) >> 4;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw + bw + cc;
+		cc = naw >> 15;
+		a[u] = MUX(ctl, naw & 0x7FFF, aw);
+	}
+	return cc;
+}
+
+/* see inner.h */
+uint32_t
+br_i15_sub(uint16_t *a, const uint16_t *b, uint32_t ctl)
+{
+	uint32_t cc;
+	size_t u, m;
+
+	cc = 0;
+	m = (a[0] + 31) >> 4;
+	for (u = 1; u < m; u ++) {
+		uint32_t aw, bw, naw;
+
+		aw = a[u];
+		bw = b[u];
+		naw = aw - bw - cc;
+		cc = naw >> 31;
+		a[u] = MUX(ctl, naw & 0x7FFF, aw);
+	}
+	return cc;
+}
+
+/*
+ * Constant-time division. The divisor must not be larger than 16 bits,
+ * and the quotient must fit on 17 bits.
+ */
+static uint32_t
+divrem16(uint32_t x, uint32_t d, uint32_t *r)
+{
+	int i;
+	uint32_t q;
+
+	q = 0;
+	d <<= 16;
+	for (i = 16; i >= 0; i --) {
+		uint32_t ctl;
+
+		ctl = LE(d, x);
+		q |= ctl << i;
+		x -= (-ctl) & d;
+		d >>= 1;
+	}
+	if (r != NULL) {
+		*r = x;
+	}
+	return q;
+}
+
+/* see inner.h */
+void
+br_i15_muladd_small(uint16_t *x, uint16_t z, const uint16_t *m)
+{
+	/*
+	 * Constant-time: we accept to leak the exact bit length of the
+	 * modulus m.
+	 */
+	unsigned m_bitlen, mblr;
+	size_t u, mlen;
+	uint32_t hi, a0, a, b, q;
+	uint32_t cc, tb, over, under;
+
+	/*
+	 * Simple case: the modulus fits on one word.
+	 */
+	m_bitlen = m[0];
+	if (m_bitlen == 0) {
+		return;
+	}
+	if (m_bitlen <= 15) {
+		uint32_t rem;
+
+		divrem16(((uint32_t)x[1] << 15) | z, m[1], &rem);
+		x[1] = rem;
+		return;
+	}
+	mlen = (m_bitlen + 15) >> 4;
+	mblr = m_bitlen & 15;
+
+	/*
+	 * Principle: we estimate the quotient (x*2^15+z)/m by
+	 * doing a 30/15 division with the high words.
+	 *
+	 * Let:
+	 *   w = 2^15
+	 *   a = (w*a0 + a1) * w^N + a2
+	 *   b = b0 * w^N + b2
+	 * such that:
+	 *   0 <= a0 < w
+	 *   0 <= a1 < w
+	 *   0 <= a2 < w^N
+	 *   w/2 <= b0 < w
+	 *   0 <= b2 < w^N
+	 *   a < w*b
+	 * I.e. the two top words of a are a0:a1, the top word of b is
+	 * b0, we ensured that b0 is "full" (high bit set), and a is
+	 * such that the quotient q = a/b fits on one word (0 <= q < w).
+	 *
+	 * If a = b*q + r (with 0 <= r < q), then we can estimate q by
+	 * using a division on the top words:
+	 *   a0*w + a1 = b0*u + v (with 0 <= v < b0)
+	 * Then the following holds:
+	 *   0 <= u <= w
+	 *   u-2 <= q <= u
+	 */
+	hi = x[mlen];
+	if (mblr == 0) {
+		a0 = x[mlen];
+		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+		x[1] = z;
+		a = (a0 << 15) + x[mlen];
+		b = m[mlen];
+	} else {
+		a0 = (x[mlen] << (15 - mblr)) | (x[mlen - 1] >> mblr);
+		memmove(x + 2, x + 1, (mlen - 1) * sizeof *x);
+		x[1] = z;
+		a = (a0 << 15) | (((x[mlen] << (15 - mblr))
+			| (x[mlen - 1] >> mblr)) & 0x7FFF);
+		b = (m[mlen] << (15 - mblr)) | (m[mlen - 1] >> mblr);
+	}
+	q = divrem16(a, b, NULL);
+
+	/*
+	 * We computed an estimate for q, but the real one may be q,
+	 * q-1 or q-2; moreover, the division may have returned a value
+	 * 8000 or even 8001 if the two high words were identical, and
+	 * we want to avoid values beyond 7FFF. We thus adjust q so
+	 * that the "true" multiplier will be q+1, q or q-1, and q is
+	 * in the 0000..7FFF range.
+	 */
+	q = MUX(EQ(b, a0), 0x7FFF, q - 1 + ((q - 1) >> 31));
+
+	/*
+	 * We subtract q*m from x (x has an extra high word of value 'hi').
+	 * Since q may be off by 1 (in either direction), we may have to
+	 * add or subtract m afterwards.
+	 *
+	 * The 'tb' flag will be true (1) at the end of the loop if the
+	 * result is greater than or equal to the modulus (not counting
+	 * 'hi' or the carry).
+	 */
+	cc = 0;
+	tb = 1;
+	for (u = 1; u <= mlen; u ++) {
+		uint32_t mw, zl, xw, nxw;
+
+		mw = m[u];
+		zl = MUL15(mw, q) + cc;
+		cc = zl >> 15;
+		zl &= 0x7FFF;
+		xw = x[u];
+		nxw = xw - zl;
+		cc += nxw >> 31;
+		nxw &= 0x7FFF;
+		x[u] = nxw;
+		tb = MUX(EQ(nxw, mw), tb, GT(nxw, mw));
+	}
+
+	/*
+	 * If we underestimated q, then either cc < hi (one extra bit
+	 * beyond the top array word), or cc == hi and tb is true (no
+	 * extra bit, but the result is not lower than the modulus).
+	 *
+	 * If we overestimated q, then cc > hi.
+	 */
+	over = GT(cc, hi);
+	under = ~over & (tb | LT(cc, hi));
+	br_i15_add(x, m, over);
+	br_i15_sub(x, m, under);
+}
+
+/* see inner.h */
+void
+br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y,
+	const uint16_t *m, uint16_t m0i)
+{
+	size_t len, len4, u, v;
+	uint32_t dh;
+
+	len = (m[0] + 15) >> 4;
+	len4 = len & ~(size_t)3;
+	br_i15_zero(d, m[0]);
+	dh = 0;
+	for (u = 0; u < len; u ++) {
+		uint32_t f, xu, r, zh;
+
+		xu = x[u + 1];
+		f = MUL15(d[1] + MUL15(x[u + 1], y[1]), m0i) & 0x7FFF;
+
+		r = 0;
+		for (v = 0; v < len4; v += 4) {
+			uint32_t z;
+
+			z = d[v + 1] + MUL15(xu, y[v + 1])
+				+ MUL15(f, m[v + 1]) + r;
+			r = z >> 15;
+			d[v + 0] = z & 0x7FFF;
+			z = d[v + 2] + MUL15(xu, y[v + 2])
+				+ MUL15(f, m[v + 2]) + r;
+			r = z >> 15;
+			d[v + 1] = z & 0x7FFF;
+			z = d[v + 3] + MUL15(xu, y[v + 3])
+				+ MUL15(f, m[v + 3]) + r;
+			r = z >> 15;
+			d[v + 2] = z & 0x7FFF;
+			z = d[v + 4] + MUL15(xu, y[v + 4])
+				+ MUL15(f, m[v + 4]) + r;
+			r = z >> 15;
+			d[v + 3] = z & 0x7FFF;
+		}
+		for (; v < len; v ++) {
+			uint32_t z;
+
+			z = d[v + 1] + MUL15(xu, y[v + 1])
+				+ MUL15(f, m[v + 1]) + r;
+			r = z >> 15;
+			d[v + 0] = z & 0x7FFF;
+		}
+
+		zh = dh + r;
+		d[len] = zh & 0x7FFF;
+		dh = zh >> 31;
+	}
+
+	/*
+	 * Restore the bit length (it was overwritten in the loop above).
+	 */
+	d[0] = m[0];
+
+	/*
+	 * d[] may be greater than m[], but it is still lower than twice
+	 * the modulus.
+	 */
+	br_i15_sub(d, m, NEQ(dh, 0) | NOT(br_i15_sub(d, m, 0)));
+}
+
+/* see inner.h */
+void
+br_i15_to_monty(uint16_t *x, const uint16_t *m)
+{
+	unsigned k;
+
+	for (k = (m[0] + 15) >> 4; k > 0; k --) {
+		br_i15_muladd_small(x, 0, m);
+	}
+}
+
+/* see inner.h */
+void
+br_i15_modpow(uint16_t *x,
+	const unsigned char *e, size_t elen,
+	const uint16_t *m, uint16_t m0i, uint16_t *t1, uint16_t *t2)
+{
+	size_t mlen;
+	unsigned k;
+
+	mlen = ((m[0] + 31) >> 4) * sizeof m[0];
+	memcpy(t1, x, mlen);
+	br_i15_to_monty(t1, m);
+	br_i15_zero(x, m[0]);
+	x[1] = 1;
+	for (k = 0; k < ((unsigned)elen << 3); k ++) {
+		uint32_t ctl;
+
+		ctl = (e[elen - 1 - (k >> 3)] >> (k & 7)) & 1;
+		br_i15_montymul(t2, x, t1, m, m0i);
+		CCOPY(ctl, x, t2, mlen);
+		br_i15_montymul(t2, t1, t1, m, m0i);
+		memcpy(t1, t2, mlen);
+	}
+}
+
+/* see inner.h */
+void
+br_i15_encode(void *dst, size_t len, const uint16_t *x)
+{
+	unsigned char *buf;
+	size_t u, xlen;
+	uint32_t acc;
+	int acc_len;
+
+	xlen = (x[0] + 15) >> 4;
+	if (xlen == 0) {
+		memset(dst, 0, len);
+		return;
+	}
+	u = 1;
+	acc = 0;
+	acc_len = 0;
+	buf = dst;
+	while (len -- > 0) {
+		if (acc_len < 8) {
+			if (u <= xlen) {
+				acc += (uint32_t)x[u ++] << acc_len;
+			}
+			acc_len += 15;
+		}
+		buf[len] = (unsigned char)acc;
+		acc >>= 8;
+		acc_len -= 8;
+	}
+}
+
+/* see inner.h */
+uint32_t
+br_i15_decode_mod(uint16_t *x, const void *src, size_t len, const uint16_t *m)
+{
+	/*
+	 * Two-pass algorithm: in the first pass, we determine whether the
+	 * value fits; in the second pass, we do the actual write.
+	 *
+	 * During the first pass, 'r' contains the comparison result so
+	 * far:
+	 *  0x00000000   value is equal to the modulus
+	 *  0x00000001   value is greater than the modulus
+	 *  0xFFFFFFFF   value is lower than the modulus
+	 *
+	 * Since we iterate starting with the least significant bytes (at
+	 * the end of src[]), each new comparison overrides the previous
+	 * except when the comparison yields 0 (equal).
+	 *
+	 * During the second pass, 'r' is either 0xFFFFFFFF (value fits)
+	 * or 0x00000000 (value does not fit).
+	 *
+	 * We must iterate over all bytes of the source, _and_ possibly
+	 * some extra virutal bytes (with value 0) so as to cover the
+	 * complete modulus as well. We also add 4 such extra bytes beyond
+	 * the modulus length because it then guarantees that no accumulated
+	 * partial word remains to be processed.
+	 */
+	const unsigned char *buf;
+	size_t mlen, tlen;
+	int pass;
+	uint32_t r;
+
+	buf = src;
+	mlen = (m[0] + 15) >> 4;
+	tlen = (mlen << 1);
+	if (tlen < len) {
+		tlen = len;
+	}
+	tlen += 4;
+	r = 0;
+	for (pass = 0; pass < 2; pass ++) {
+		size_t u, v;
+		uint32_t acc;
+		int acc_len;
+
+		v = 1;
+		acc = 0;
+		acc_len = 0;
+		for (u = 0; u < tlen; u ++) {
+			uint32_t b;
+
+			if (u < len) {
+				b = buf[len - 1 - u];
+			} else {
+				b = 0;
+			}
+			acc |= (b << acc_len);
+			acc_len += 8;
+			if (acc_len >= 15) {
+				uint32_t xw;
+
+				xw = acc & (uint32_t)0x7FFF;
+				acc_len -= 15;
+				acc = b >> (8 - acc_len);
+				if (v <= mlen) {
+					if (pass) {
+						x[v] = r & xw;
+					} else {
+						uint32_t cc;
+
+						cc = (uint32_t)CMP(xw, m[v]);
+						r = MUX(EQ(cc, 0), r, cc);
+					}
+				} else {
+					if (!pass) {
+						r = MUX(EQ(xw, 0), r, 1);
+					}
+				}
+				v ++;
+			}
+		}
+
+		/*
+		 * When we reach this point at the end of the first pass:
+		 * r is either 0, 1 or -1; we want to set r to 0 if it
+		 * is equal to 0 or 1, and leave it to -1 otherwise.
+		 *
+		 * When we reach this point at the end of the second pass:
+		 * r is either 0 or -1; we want to leave that value
+		 * untouched. This is a subcase of the previous.
+		 */
+		r >>= 1;
+		r |= (r << 1);
+	}
+
+	x[0] = m[0];
+	return r & (uint32_t)1;
+}
diff --git a/src/int/i15_ext1.c b/src/int/i15_ext1.c
new file mode 100644
index 0000000..a99ac1a
--- /dev/null
+++ b/src/int/i15_ext1.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This file contains some additional functions for "i15" big integers.
+ * These functions are needed to support ECDSA.
+ */
+
+/* see inner.h */
+void
+br_i15_rshift(uint16_t *x, int count)
+{
+	size_t u, len;
+	unsigned r;
+
+	len = (x[0] + 15) >> 4;
+	if (len == 0) {
+		return;
+	}
+	r = x[1] >> count;
+	for (u = 2; u <= len; u ++) {
+		unsigned w;
+
+		w = x[u];
+		x[u - 1] = ((w << (15 - count)) | r) & 0x7FFF;
+		r = w >> count;
+	}
+	x[len] = r;
+}
+
+/* see inner.h */
+uint32_t
+br_i15_bit_length(uint16_t *x, size_t xlen)
+{
+	uint32_t tw, twk;
+
+	tw = 0;
+	twk = 0;
+	while (xlen -- > 0) {
+		uint32_t w, c;
+
+		c = EQ(tw, 0);
+		w = x[xlen];
+		tw = MUX(c, w, tw);
+		twk = MUX(c, (uint32_t)xlen, twk);
+	}
+	return (twk << 4) + BIT_LENGTH(tw);
+}
+
+/* see inner.h */
+void
+br_i15_decode(uint16_t *x, const void *src, size_t len)
+{
+	const unsigned char *buf;
+	size_t v;
+	uint32_t acc;
+	int acc_len;
+
+	buf = src;
+	v = 1;
+	acc = 0;
+	acc_len = 0;
+	while (len -- > 0) {
+		uint32_t b;
+
+		b = buf[len];
+		acc |= (b << acc_len);
+		acc_len += 8;
+		if (acc_len >= 15) {
+			x[v ++] = acc & 0x7FFF;
+			acc_len -= 15;
+			acc >>= 15;
+		}
+	}
+	if (acc_len != 0) {
+		x[v ++] = acc;
+	}
+	x[0] = br_i15_bit_length(x + 1, v - 1);
+}
+
+/* see inner.h */
+void
+br_i15_from_monty(uint16_t *x, const uint16_t *m, uint16_t m0i)
+{
+	size_t len, u, v;
+
+	len = (m[0] + 15) >> 4;
+	for (u = 0; u < len; u ++) {
+		uint32_t f, cc;
+
+		f = MUL15(x[1], m0i) & 0x7FFF;
+		cc = 0;
+		for (v = 0; v < len; v ++) {
+			uint32_t z;
+
+			z = (uint32_t)x[v + 1] + MUL15(f, m[v + 1]) + cc;
+			cc = z >> 15;
+			if (v != 0) {
+				x[v] = z & 0x7FFF;
+			}
+		}
+		x[len] = cc;
+	}
+
+	/*
+	 * We may have to do an extra subtraction, but only if the
+	 * value in x[] is indeed greater than or equal to that of m[],
+	 * which is why we must do two calls (first call computes the
+	 * carry, second call performs the subtraction only if the carry
+	 * is 0).
+	 */
+	br_i15_sub(x, m, NOT(br_i15_sub(x, m, 0)));
+}
diff --git a/src/int/i15_ext2.c b/src/int/i15_ext2.c
new file mode 100644
index 0000000..84fc2d6
--- /dev/null
+++ b/src/int/i15_ext2.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/*
+ * This file contains some additional functions for "i15" big integers.
+ * These functions are needed to support RSA.
+ */
+
+/* see inner.h */
+void
+br_i15_decode_reduce(uint16_t *x,
+	const void *src, size_t len, const uint16_t *m)
+{
+	uint32_t m_ebitlen, m_rbitlen;
+	size_t mblen, k;
+	const unsigned char *buf;
+	uint32_t acc;
+	int acc_len;
+
+	/*
+	 * Get the encoded bit length.
+	 */
+	m_ebitlen = m[0];
+
+	/*
+	 * Special case for an invalid (null) modulus.
+	 */
+	if (m_ebitlen == 0) {
+		x[0] = 0;
+		return;
+	}
+
+	/*
+	 * Clear the destination.
+	 */
+	br_i15_zero(x, m_ebitlen);
+
+	/*
+	 * First decode directly as many bytes as possible. This requires
+	 * computing the actual bit length.
+	 */
+	m_rbitlen = m_ebitlen >> 4;
+	m_rbitlen = (m_ebitlen & 15) + (m_rbitlen << 4) - m_rbitlen;
+	mblen = (m_rbitlen + 7) >> 3;
+	k = mblen - 1;
+	if (k >= len) {
+		br_i15_decode(x, src, len);
+		x[0] = m_ebitlen;
+		return;
+	}
+	buf = src;
+	br_i15_decode(x, buf, k);
+	x[0] = m_ebitlen;
+
+	/*
+	 * Input remaining bytes, using 15-bit words.
+	 */
+	acc = 0;
+	acc_len = 0;
+	while (k < len) {
+		uint32_t v;
+
+		v = buf[k ++];
+		acc = (acc << 8) | v;
+		acc_len += 8;
+		if (acc_len >= 15) {
+			br_i15_muladd_small(x, acc >> (acc_len - 15), m);
+			acc_len -= 15;
+			acc &= ~((uint32_t)-1 << acc_len);
+		}
+	}
+
+	/*
+	 * We may have some bits accumulated. We then perform a shift to
+	 * be able to inject these bits as a full 15-bit word.
+	 */
+	if (acc_len != 0) {
+		acc = (acc | (x[1] << acc_len)) & 0x7FFF;
+		br_i15_rshift(x, 15 - acc_len);
+		br_i15_muladd_small(x, acc, m);
+	}
+}
+
+/* see inner.h */
+void
+br_i15_reduce(uint16_t *x, const uint16_t *a, const uint16_t *m)
+{
+	uint32_t m_bitlen, a_bitlen;
+	size_t mlen, alen, u;
+
+	m_bitlen = m[0];
+	mlen = (m_bitlen + 15) >> 4;
+
+	x[0] = m_bitlen;
+	if (m_bitlen == 0) {
+		return;
+	}
+
+	/*
+	 * If the source is shorter, then simply copy all words from a[]
+	 * and zero out the upper words.
+	 */
+	a_bitlen = a[0];
+	alen = (a_bitlen + 15) >> 4;
+	if (a_bitlen < m_bitlen) {
+		memcpy(x + 1, a + 1, alen * sizeof *a);
+		for (u = alen; u < mlen; u ++) {
+			x[u + 1] = 0;
+		}
+		return;
+	}
+
+	/*
+	 * The source length is at least equal to that of the modulus.
+	 * We must thus copy N-1 words, and input the remaining words
+	 * one by one.
+	 */
+	memcpy(x + 1, a + 2 + (alen - mlen), (mlen - 1) * sizeof *a);
+	x[mlen] = 0;
+	for (u = 1 + alen - mlen; u > 0; u --) {
+		br_i15_muladd_small(x, a[u], m);
+	}
+}
+
+/* see inner.h */
+void
+br_i15_mulacc(uint16_t *d, const uint16_t *a, const uint16_t *b)
+{
+	size_t alen, blen, u;
+
+	alen = (a[0] + 15) >> 4;
+	blen = (b[0] + 15) >> 4;
+	d[0] = a[0] + b[0];
+	for (u = 0; u < blen; u ++) {
+		uint32_t f;
+		size_t v;
+		uint32_t cc;
+
+		f = b[1 + u];
+		cc = 0;
+		for (v = 0; v < alen; v ++) {
+			uint32_t z;
+
+			z = (uint32_t)d[1 + u + v] + MUL15(f, a[1 + v]) + cc;
+			cc = z >> 15;
+			d[1 + u + v] = z & 0x7FFF;
+		}
+		d[1 + u + alen] = cc;
+	}
+}
diff --git a/src/int/i31_fmont.c b/src/int/i31_fmont.c
index 4e14361..c24b417 100644
--- a/src/int/i31_fmont.c
+++ b/src/int/i31_fmont.c
@@ -35,7 +35,7 @@ br_i31_from_monty(uint32_t *x, const uint32_t *m, uint32_t m0i)
 		uint32_t f;
 		uint64_t cc;
 
-		f = (x[1] * m0i) & 0x7FFFFFFF;
+		f = MUL31_lo(x[1], m0i);
 		cc = 0;
 		for (v = 0; v < len; v ++) {
 			uint64_t z;
diff --git a/src/int/i31_montmul.c b/src/int/i31_montmul.c
index 0857797..b953aa3 100644
--- a/src/int/i31_montmul.c
+++ b/src/int/i31_montmul.c
@@ -41,7 +41,7 @@ br_i31_montymul(uint32_t *d, const uint32_t *x, const uint32_t *y,
 		uint64_t r, zh;
 
 		xu = x[u + 1];
-		f = ((d[1] + x[u + 1] * y[1]) * m0i) & 0x7FFFFFFF;
+		f = MUL31_lo((d[1] + MUL31_lo(x[u + 1], y[1])), m0i);
 
 		r = 0;
 		for (v = 0; v < len4; v += 4) {
diff --git a/src/int/i31_muladd.c b/src/int/i31_muladd.c
index 6633d92..eecd9e2 100644
--- a/src/int/i31_muladd.c
+++ b/src/int/i31_muladd.c
@@ -75,7 +75,7 @@ br_i31_muladd_small(uint32_t *x, uint32_t z, const uint32_t *m)
 	 *
 	 * If a = b*q + r (with 0 <= r < q), we can estimate q by
 	 * doing an Euclidean division on the top words:
-	 *   a0*w+a1 = b0*u + v  (with 0 <= v < w)
+	 *   a0*w+a1 = b0*u + v  (with 0 <= v < b0)
 	 * Then the following holds:
 	 *   0 <= u <= w
 	 *   u-2 <= q <= u
diff --git a/src/rsa/rsa_i15_pkcs1_sign.c b/src/rsa/rsa_i15_pkcs1_sign.c
new file mode 100644
index 0000000..f519423
--- /dev/null
+++ b/src/rsa/rsa_i15_pkcs1_sign.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_pkcs1_sign(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	const br_rsa_private_key *sk, unsigned char *x)
+{
+	if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) {
+		return 0;
+	}
+	return br_rsa_i15_private(x, sk);
+}
diff --git a/src/rsa/rsa_i15_pkcs1_vrfy.c b/src/rsa/rsa_i15_pkcs1_vrfy.c
new file mode 100644
index 0000000..2c35184
--- /dev/null
+++ b/src/rsa/rsa_i15_pkcs1_vrfy.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_pkcs1_vrfy(const unsigned char *x, size_t xlen,
+	const unsigned char *hash_oid, size_t hash_len,
+	const br_rsa_public_key *pk, unsigned char *hash_out)
+{
+	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
+
+	if (xlen > (sizeof sig)) {
+		return 0;
+	}
+	memcpy(sig, x, xlen);
+	if (!br_rsa_i15_public(sig, xlen, pk)) {
+		return 0;
+	}
+	return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out);
+}
diff --git a/src/rsa/rsa_i15_priv.c b/src/rsa/rsa_i15_priv.c
new file mode 100644
index 0000000..6ecb198
--- /dev/null
+++ b/src/rsa/rsa_i15_priv.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+#define U   (1 + ((BR_MAX_RSA_FACTOR + 14) / 15))
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_private(unsigned char *x, const br_rsa_private_key *sk)
+{
+	const unsigned char *p, *q;
+	size_t plen, qlen;
+	uint16_t tmp[6 * U];
+	uint16_t *mp, *mq, *s1, *s2, *t1, *t2, *t3;
+	uint16_t p0i, q0i;
+	size_t xlen;
+
+	/*
+	 * All our temporary buffers are from the tmp[] array.
+	 *
+	 * The mp, mq, s1, s2, t1 and t2 buffers are large enough to
+	 * contain a RSA factor. The t3 buffer can contain a complete
+	 * RSA modulus. t3 shares its storage space with s2, s1 and t1,
+	 * in that order (this is important, see below).
+	 */
+	mq = tmp;
+	mp = tmp + U;
+	t2 = tmp + 2 * U;
+	s2 = tmp + 3 * U;
+	s1 = tmp + 4 * U;
+	t1 = tmp + 5 * U;
+	t3 = s2;
+
+	/*
+	 * Compute the actual lengths (in bytes) of p and q, and check
+	 * that they fit within our stack buffers.
+	 */
+	p = sk->p;
+	plen = sk->plen;
+	while (plen > 0 && *p == 0) {
+		p ++;
+		plen --;
+	}
+	q = sk->q;
+	qlen = sk->qlen;
+	while (qlen > 0 && *q == 0) {
+		q ++;
+		qlen --;
+	}
+	if (plen > (BR_MAX_RSA_FACTOR >> 3)
+		|| qlen > (BR_MAX_RSA_FACTOR >> 3))
+	{
+		return 0;
+	}
+
+	/*
+	 * Decode p and q.
+	 */
+	br_i15_decode(mp, p, plen);
+	br_i15_decode(mq, q, qlen);
+
+	/*
+	 * Compute signature length (in bytes).
+	 */
+	xlen = (sk->n_bitlen + 7) >> 3;
+
+	/*
+	 * Compute s1 = x^dp mod p.
+	 */
+	p0i = br_i15_ninv15(mp[1]);
+	br_i15_decode_reduce(s1, x, xlen, mp);
+	br_i15_modpow(s1, sk->dp, sk->dplen, mp, p0i, t1, t2);
+
+	/*
+	 * Compute s2 = x^dq mod q.
+	 */
+	q0i = br_i15_ninv15(mq[1]);
+	br_i15_decode_reduce(s2, x, xlen, mq);
+	br_i15_modpow(s2, sk->dq, sk->dqlen, mq, q0i, t1, t2);
+
+	/*
+	 * Compute:
+	 *   h = (s1 - s2)*(1/q) mod p
+	 * s1 is an integer modulo p, but s2 is modulo q. PKCS#1 is
+	 * unclear about whether p may be lower than q (some existing,
+	 * widely deployed implementations of RSA don't tolerate p < q),
+	 * but we want to support that occurrence, so we need to use the
+	 * reduction function.
+	 *
+	 * Since we use br_i15_decode_reduce() for iq (purportedly, the
+	 * inverse of q modulo p), we also tolerate improperly large
+	 * values for this parameter.
+	 */
+	br_i15_reduce(t2, s2, mp);
+	br_i15_add(s1, mp, br_i15_sub(s1, t2, 1));
+	br_i15_to_monty(s1, mp);
+	br_i15_decode_reduce(t1, sk->iq, sk->iqlen, mp);
+	br_i15_montymul(t2, s1, t1, mp, p0i);
+
+	/*
+	 * h is now in t2. We compute the final result:
+	 *   s = s2 + q*h
+	 * All these operations are non-modular.
+	 *
+	 * We need mq, s2 and t2. We use the t3 buffer as destination.
+	 * The buffers mp, s1 and t1 are no longer needed. Moreover,
+	 * the first step is to copy s2 into the destination buffer t3.
+	 * We thus arranged for t3 to actually share space with s2, and
+	 * to be followed by the space formerly used by s1 and t1.
+	 */
+	br_i15_mulacc(t3, mq, t2);
+
+	/*
+	 * Encode the result. Since we already checked the value of xlen,
+	 * we can just use it right away.
+	 */
+	br_i15_encode(x, xlen, t3);
+
+	/*
+	 * The only error conditions remaining at that point are invalid
+	 * values for p and q (even integers).
+	 */
+	return p0i & q0i & 1;
+}
diff --git a/src/rsa/rsa_i15_pub.c b/src/rsa/rsa_i15_pub.c
new file mode 100644
index 0000000..37ebbd1
--- /dev/null
+++ b/src/rsa/rsa_i15_pub.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_i15_public(unsigned char *x, size_t xlen,
+	const br_rsa_public_key *pk)
+{
+	const unsigned char *n;
+	size_t nlen;
+	uint16_t m[1 + ((BR_MAX_RSA_SIZE + 14) / 15)];
+	uint16_t a[1 + ((BR_MAX_RSA_SIZE + 14) / 15)];
+	uint16_t t1[1 + ((BR_MAX_RSA_SIZE + 14) / 15)];
+	uint16_t t2[1 + ((BR_MAX_RSA_SIZE + 14) / 15)];
+	uint16_t m0i;
+	uint32_t r;
+
+	/*
+	 * Get the actual length of the modulus, and see if it fits within
+	 * our stack buffer. We also check that the length of x[] is valid.
+	 */
+	n = pk->n;
+	nlen = pk->nlen;
+	while (nlen > 0 && *n == 0) {
+		n ++;
+		nlen --;
+	}
+	if (nlen == 0 || nlen > (BR_MAX_RSA_SIZE >> 3) || xlen != nlen) {
+		return 0;
+	}
+	br_i15_decode(m, n, nlen);
+	m0i = br_i15_ninv15(m[1]);
+
+	/*
+	 * Note: if m[] is even, then m0i == 0. Otherwise, m0i must be
+	 * an odd integer.
+	 */
+	r = m0i & 1;
+
+	/*
+	 * Decode x[] into a[]; we also check that its value is proper.
+	 */
+	r &= br_i15_decode_mod(a, x, xlen, m);
+
+	/*
+	 * Compute the modular exponentiation.
+	 */
+	br_i15_modpow(a, pk->e, pk->elen, m, m0i, t1, t2);
+
+	/*
+	 * Encode the result.
+	 */
+	br_i15_encode(x, xlen, a);
+	return r;
+}
diff --git a/src/rsa/rsa_i31_pkcs1_sign.c b/src/rsa/rsa_i31_pkcs1_sign.c
index 431a119..784d3c2 100644
--- a/src/rsa/rsa_i31_pkcs1_sign.c
+++ b/src/rsa/rsa_i31_pkcs1_sign.c
@@ -30,75 +30,8 @@ br_rsa_i31_pkcs1_sign(const unsigned char *hash_oid,
 	const unsigned char *hash, size_t hash_len,
 	const br_rsa_private_key *sk, unsigned char *x)
 {
-	size_t u, x3, xlen;
-
-	/*
-	 * Padded hash value has format:
-	 *  00 01 FF .. FF 00 30 x1 30 x2 06 x3 OID 05 00 04 x4 HASH
-	 *
-	 * with the following rules:
-	 *
-	 *  -- Total length is equal to the modulus length (unsigned
-	 *     encoding).
-	 *
-	 *  -- There must be at least eight bytes of value 0xFF.
-	 *
-	 *  -- x4 is equal to the hash length (hash_len).
-	 *
-	 *  -- x3 is equal to the encoded OID value length (hash_oid[0]).
-	 *
-	 *  -- x2 = x3 + 4.
-	 *
-	 *  -- x1 = x2 + x4 + 4 = x3 + x4 + 8.
-	 *
-	 * Note: the "05 00" is optional (signatures with and without
-	 * that sequence exist in practice), but notes in PKCS#1 seem to
-	 * indicate that the presence of that sequence (specifically,
-	 * an ASN.1 NULL value for the hash parameters) may be slightly
-	 * more "standard" than the opposite.
-	 */
-	xlen = (sk->n_bitlen + 7) >> 3;
-
-	if (hash_oid == NULL) {
-		if (xlen < hash_len + 11) {
-			return 0;
-		}
-		x[0] = 0x00;
-		x[1] = 0x01;
-		u = xlen - hash_len;
-		memset(x + 2, 0xFF, u - 3);
-		x[u - 1] = 0x00;
-	} else {
-		x3 = hash_oid[0];
-
-		/*
-		 * Check that there is enough room for all the elements,
-		 * including at least eight bytes of value 0xFF.
-		 */
-		if (xlen < (x3 + hash_len + 21)) {
-			return 0;
-		}
-		x[0] = 0x00;
-		x[1] = 0x01;
-		u = xlen - x3 - hash_len - 11;
-		memset(x + 2, 0xFF, u - 2);
-		x[u] = 0x00;
-		x[u + 1] = 0x30;
-		x[u + 2] = x3 + hash_len + 8;
-		x[u + 3] = 0x30;
-		x[u + 4] = x3 + 4;
-		x[u + 5] = 0x06;
-		memcpy(x + u + 6, hash_oid, x3 + 1);
-		u += x3 + 7;
-		x[u ++] = 0x05;
-		x[u ++] = 0x00;
-		x[u ++] = 0x04;
-		x[u ++] = hash_len;
+	if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) {
+		return 0;
 	}
-	memcpy(x + u, hash, hash_len);
-
-	/*
-	 * Do the actual computation.
-	 */
 	return br_rsa_i31_private(x, sk);
 }
diff --git a/src/rsa/rsa_i31_pkcs1_vrfy.c b/src/rsa/rsa_i31_pkcs1_vrfy.c
index c3ffb62..e79a002 100644
--- a/src/rsa/rsa_i31_pkcs1_vrfy.c
+++ b/src/rsa/rsa_i31_pkcs1_vrfy.c
@@ -30,97 +30,14 @@ br_rsa_i31_pkcs1_vrfy(const unsigned char *x, size_t xlen,
 	const unsigned char *hash_oid, size_t hash_len,
 	const br_rsa_public_key *pk, unsigned char *hash_out)
 {
-	static const unsigned char pad1[] = {
-		0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
-	};
-
 	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
-	unsigned char pad2[43];
-	size_t u, x2, x3, pad_len, zlen;
 
-	if (xlen > (sizeof sig) || xlen < 11) {
+	if (xlen > (sizeof sig)) {
 		return 0;
 	}
 	memcpy(sig, x, xlen);
 	if (!br_rsa_i31_public(sig, xlen, pk)) {
 		return 0;
 	}
-
-	/*
-	 * Expected format:
-	 *  00 01 FF ... FF 00 30 x1 30 x2 06 x3 OID [ 05 00 ] 04 x4 HASH
-	 *
-	 * with the following rules:
-	 *
-	 *  -- Total length is that of the modulus and the signature
-	 *     (this was already verified by br_rsa_i31_public()).
-	 *
-	 *  -- There are at least eight bytes of value 0xFF.
-	 *
-	 *  -- x4 is equal to the hash length (hash_len).
-	 *
-	 *  -- x3 is equal to the encoded OID value length (so x3 is the
-	 *     first byte of hash_oid[]).
-	 *
-	 *  -- If the "05 00" is present, then x2 == x3 + 4; otherwise,
-	 *     x2 == x3 + 2.
-	 *
-	 *  -- x1 == x2 + x4 + 4.
-	 *
-	 * So the total length after the last "FF" is either x3 + x4 + 11
-	 * (with the "05 00") or x3 + x4 + 9 (without the "05 00").
-	 */
-
-	/*
-	 * Check the "00 01 FF .. FF 00" with at least eight 0xFF bytes.
-	 * The comparaison is valid because we made sure that the signature
-	 * is at least 11 bytes long.
-	 */
-	if (memcmp(sig, pad1, sizeof pad1) != 0) {
-		return 0;
-	}
-	for (u = sizeof pad1; u < xlen; u ++) {
-		if (sig[u] != 0xFF) {
-			break;
-		}
-	}
-
-	/*
-	 * Remaining length is xlen - u bytes (including the 00 just
-	 * after the last FF). This must be equal to one of the two
-	 * possible values (depending on whether the "05 00" sequence is
-	 * present or not).
-	 */
-	if (hash_oid == NULL) {
-		if (xlen - u != hash_len + 1 || sig[u] != 0x00) {
-			return 0;
-		}
-	} else {
-		x3 = hash_oid[0];
-		pad_len = x3 + 9;
-		memset(pad2, 0, pad_len);
-		zlen = xlen - u - hash_len;
-		if (zlen == pad_len) {
-			x2 = x3 + 2;
-		} else if (zlen == pad_len + 2) {
-			x2 = x3 + 4;
-			pad_len = zlen;
-			pad2[pad_len - 4] = 0x05;
-		} else {
-			return 0;
-		}
-		pad2[1] = 0x30;
-		pad2[2] = x2 + hash_len + 4;
-		pad2[3] = 0x30;
-		pad2[4] = x2;
-		pad2[5] = 0x06;
-		memcpy(pad2 + 6, hash_oid, x3 + 1);
-		pad2[pad_len - 2] = 0x04;
-		pad2[pad_len - 1] = hash_len;
-		if (memcmp(pad2, sig + u, pad_len) != 0) {
-			return 0;
-		}
-	}
-	memcpy(hash_out, sig + xlen - hash_len, hash_len);
-	return 1;
+	return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out);
 }
diff --git a/src/rsa/rsa_i32_pkcs1_sign.c b/src/rsa/rsa_i32_pkcs1_sign.c
index c901bad..44b6e6d 100644
--- a/src/rsa/rsa_i32_pkcs1_sign.c
+++ b/src/rsa/rsa_i32_pkcs1_sign.c
@@ -30,75 +30,8 @@ br_rsa_i32_pkcs1_sign(const unsigned char *hash_oid,
 	const unsigned char *hash, size_t hash_len,
 	const br_rsa_private_key *sk, unsigned char *x)
 {
-	size_t u, x3, xlen;
-
-	/*
-	 * Padded hash value has format:
-	 *  00 01 FF .. FF 00 30 x1 30 x2 06 x3 OID 05 00 04 x4 HASH
-	 *
-	 * with the following rules:
-	 *
-	 *  -- Total length is equal to the modulus length (unsigned
-	 *     encoding).
-	 *
-	 *  -- There must be at least eight bytes of value 0xFF.
-	 *
-	 *  -- x4 is equal to the hash length (hash_len).
-	 *
-	 *  -- x3 is equal to the encoded OID value length (hash_oid[0]).
-	 *
-	 *  -- x2 = x3 + 4.
-	 *
-	 *  -- x1 = x2 + x4 + 4 = x3 + x4 + 8.
-	 *
-	 * Note: the "05 00" is optional (signatures with and without
-	 * that sequence exist in practice), but notes in PKCS#1 seem to
-	 * indicate that the presence of that sequence (specifically,
-	 * an ASN.1 NULL value for the hash parameters) may be slightly
-	 * more "standard" than the opposite.
-	 */
-	xlen = (sk->n_bitlen + 7) >> 3;
-
-	if (hash_oid == NULL) {
-		if (xlen < hash_len + 11) {
-			return 0;
-		}
-		x[0] = 0x00;
-		x[1] = 0x01;
-		u = xlen - hash_len;
-		memset(x + 2, 0xFF, u - 3);
-		x[u - 1] = 0x00;
-	} else {
-		x3 = hash_oid[0];
-
-		/*
-		 * Check that there is enough room for all the elements,
-		 * including at least eight bytes of value 0xFF.
-		 */
-		if (xlen < (x3 + hash_len + 21)) {
-			return 0;
-		}
-		x[0] = 0x00;
-		x[1] = 0x01;
-		u = xlen - x3 - hash_len - 11;
-		memset(x + 2, 0xFF, u - 2);
-		x[u] = 0x00;
-		x[u + 1] = 0x30;
-		x[u + 2] = x3 + hash_len + 8;
-		x[u + 3] = 0x30;
-		x[u + 4] = x3 + 4;
-		x[u + 5] = 0x06;
-		memcpy(x + u + 6, hash_oid, x3 + 1);
-		u += x3 + 7;
-		x[u ++] = 0x05;
-		x[u ++] = 0x00;
-		x[u ++] = 0x04;
-		x[u ++] = hash_len;
+	if (!br_rsa_pkcs1_sig_pad(hash_oid, hash, hash_len, sk->n_bitlen, x)) {
+		return 0;
 	}
-	memcpy(x + u, hash, hash_len);
-
-	/*
-	 * Do the actual computation.
-	 */
 	return br_rsa_i32_private(x, sk);
 }
diff --git a/src/rsa/rsa_i32_pkcs1_vrfy.c b/src/rsa/rsa_i32_pkcs1_vrfy.c
index cc20ba8..6ee7a19 100644
--- a/src/rsa/rsa_i32_pkcs1_vrfy.c
+++ b/src/rsa/rsa_i32_pkcs1_vrfy.c
@@ -30,97 +30,14 @@ br_rsa_i32_pkcs1_vrfy(const unsigned char *x, size_t xlen,
 	const unsigned char *hash_oid, size_t hash_len,
 	const br_rsa_public_key *pk, unsigned char *hash_out)
 {
-	static const unsigned char pad1[] = {
-		0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
-	};
-
 	unsigned char sig[BR_MAX_RSA_SIZE >> 3];
-	unsigned char pad2[43];
-	size_t u, x2, x3, pad_len, zlen;
 
-	if (xlen > (sizeof sig) || xlen < 11) {
+	if (xlen > (sizeof sig)) {
 		return 0;
 	}
 	memcpy(sig, x, xlen);
 	if (!br_rsa_i32_public(sig, xlen, pk)) {
 		return 0;
 	}
-
-	/*
-	 * Expected format:
-	 *  00 01 FF ... FF 00 30 x1 30 x2 06 x3 OID [ 05 00 ] 04 x4 HASH
-	 *
-	 * with the following rules:
-	 *
-	 *  -- Total length is that of the modulus and the signature
-	 *     (this was already verified by br_rsa_i32_public()).
-	 *
-	 *  -- There are at least eight bytes of value 0xFF.
-	 *
-	 *  -- x4 is equal to the hash length (hash_len).
-	 *
-	 *  -- x3 is equal to the encoded OID value length (so x3 is the
-	 *     first byte of hash_oid[]).
-	 *
-	 *  -- If the "05 00" is present, then x2 == x3 + 4; otherwise,
-	 *     x2 == x3 + 2.
-	 *
-	 *  -- x1 == x2 + x4 + 4.
-	 *
-	 * So the total length after the last "FF" is either x3 + x4 + 11
-	 * (with the "05 00") or x3 + x4 + 9 (without the "05 00").
-	 */
-
-	/*
-	 * Check the "00 01 FF .. FF 00" with at least eight 0xFF bytes.
-	 * The comparaison is valid because we made sure that the signature
-	 * is at least 11 bytes long.
-	 */
-	if (memcmp(sig, pad1, sizeof pad1) != 0) {
-		return 0;
-	}
-	for (u = sizeof pad1; u < xlen; u ++) {
-		if (sig[u] != 0xFF) {
-			break;
-		}
-	}
-
-	/*
-	 * Remaining length is xlen - u bytes (including the 00 just
-	 * after the last FF). This must be equal to one of the two
-	 * possible values (depending on whether the "05 00" sequence is
-	 * present or not).
-	 */
-	if (hash_oid == NULL) {
-		if (xlen - u != hash_len + 1 || sig[u] != 0x00) {
-			return 0;
-		}
-	} else {
-		x3 = hash_oid[0];
-		pad_len = x3 + 9;
-		memset(pad2, 0, pad_len);
-		zlen = xlen - u - hash_len;
-		if (zlen == pad_len) {
-			x2 = x3 + 2;
-		} else if (zlen == pad_len + 2) {
-			x2 = x3 + 4;
-			pad_len = zlen;
-			pad2[pad_len - 4] = 0x05;
-		} else {
-			return 0;
-		}
-		pad2[1] = 0x30;
-		pad2[2] = x2 + hash_len + 4;
-		pad2[3] = 0x30;
-		pad2[4] = x2;
-		pad2[5] = 0x06;
-		memcpy(pad2 + 6, hash_oid, x3 + 1);
-		pad2[pad_len - 2] = 0x04;
-		pad2[pad_len - 1] = hash_len;
-		if (memcmp(pad2, sig + u, pad_len) != 0) {
-			return 0;
-		}
-	}
-	memcpy(hash_out, sig + xlen - hash_len, hash_len);
-	return 1;
+	return br_rsa_pkcs1_sig_unpad(sig, xlen, hash_oid, hash_len, hash_out);
 }
diff --git a/src/rsa/rsa_pkcs1_sig_pad.c b/src/rsa/rsa_pkcs1_sig_pad.c
new file mode 100644
index 0000000..06c3bd7
--- /dev/null
+++ b/src/rsa/rsa_pkcs1_sig_pad.c
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see inner.h */
+uint32_t
+br_rsa_pkcs1_sig_pad(const unsigned char *hash_oid,
+	const unsigned char *hash, size_t hash_len,
+	uint32_t n_bitlen, unsigned char *x)
+{
+	size_t u, x3, xlen;
+
+	/*
+	 * Padded hash value has format:
+	 *  00 01 FF .. FF 00 30 x1 30 x2 06 x3 OID 05 00 04 x4 HASH
+	 *
+	 * with the following rules:
+	 *
+	 *  -- Total length is equal to the modulus length (unsigned
+	 *     encoding).
+	 *
+	 *  -- There must be at least eight bytes of value 0xFF.
+	 *
+	 *  -- x4 is equal to the hash length (hash_len).
+	 *
+	 *  -- x3 is equal to the encoded OID value length (hash_oid[0]).
+	 *
+	 *  -- x2 = x3 + 4.
+	 *
+	 *  -- x1 = x2 + x4 + 4 = x3 + x4 + 8.
+	 *
+	 * Note: the "05 00" is optional (signatures with and without
+	 * that sequence exist in practice), but notes in PKCS#1 seem to
+	 * indicate that the presence of that sequence (specifically,
+	 * an ASN.1 NULL value for the hash parameters) may be slightly
+	 * more "standard" than the opposite.
+	 */
+	xlen = (n_bitlen + 7) >> 3;
+
+	if (hash_oid == NULL) {
+		if (xlen < hash_len + 11) {
+			return 0;
+		}
+		x[0] = 0x00;
+		x[1] = 0x01;
+		u = xlen - hash_len;
+		memset(x + 2, 0xFF, u - 3);
+		x[u - 1] = 0x00;
+	} else {
+		x3 = hash_oid[0];
+
+		/*
+		 * Check that there is enough room for all the elements,
+		 * including at least eight bytes of value 0xFF.
+		 */
+		if (xlen < (x3 + hash_len + 21)) {
+			return 0;
+		}
+		x[0] = 0x00;
+		x[1] = 0x01;
+		u = xlen - x3 - hash_len - 11;
+		memset(x + 2, 0xFF, u - 2);
+		x[u] = 0x00;
+		x[u + 1] = 0x30;
+		x[u + 2] = x3 + hash_len + 8;
+		x[u + 3] = 0x30;
+		x[u + 4] = x3 + 4;
+		x[u + 5] = 0x06;
+		memcpy(x + u + 6, hash_oid, x3 + 1);
+		u += x3 + 7;
+		x[u ++] = 0x05;
+		x[u ++] = 0x00;
+		x[u ++] = 0x04;
+		x[u ++] = hash_len;
+	}
+	memcpy(x + u, hash, hash_len);
+	return 1;
+}
diff --git a/src/rsa/rsa_pkcs1_sig_unpad.c b/src/rsa/rsa_pkcs1_sig_unpad.c
new file mode 100644
index 0000000..d179945
--- /dev/null
+++ b/src/rsa/rsa_pkcs1_sig_unpad.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining 
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be 
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "inner.h"
+
+/* see bearssl_rsa.h */
+uint32_t
+br_rsa_pkcs1_sig_unpad(const unsigned char *sig, size_t sig_len,
+	const unsigned char *hash_oid, size_t hash_len,
+	unsigned char *hash_out)
+{
+	static const unsigned char pad1[] = {
+		0x00, 0x01, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+	};
+
+	unsigned char pad2[43];
+	size_t u, x2, x3, pad_len, zlen;
+
+	if (sig_len < 11) {
+		return 0;
+	}
+
+	/*
+	 * Expected format:
+	 *  00 01 FF ... FF 00 30 x1 30 x2 06 x3 OID [ 05 00 ] 04 x4 HASH
+	 *
+	 * with the following rules:
+	 *
+	 *  -- Total length is that of the modulus and the signature
+	 *     (this was already verified by br_rsa_i31_public()).
+	 *
+	 *  -- There are at least eight bytes of value 0xFF.
+	 *
+	 *  -- x4 is equal to the hash length (hash_len).
+	 *
+	 *  -- x3 is equal to the encoded OID value length (so x3 is the
+	 *     first byte of hash_oid[]).
+	 *
+	 *  -- If the "05 00" is present, then x2 == x3 + 4; otherwise,
+	 *     x2 == x3 + 2.
+	 *
+	 *  -- x1 == x2 + x4 + 4.
+	 *
+	 * So the total length after the last "FF" is either x3 + x4 + 11
+	 * (with the "05 00") or x3 + x4 + 9 (without the "05 00").
+	 */
+
+	/*
+	 * Check the "00 01 FF .. FF 00" with at least eight 0xFF bytes.
+	 * The comparaison is valid because we made sure that the signature
+	 * is at least 11 bytes long.
+	 */
+	if (memcmp(sig, pad1, sizeof pad1) != 0) {
+		return 0;
+	}
+	for (u = sizeof pad1; u < sig_len; u ++) {
+		if (sig[u] != 0xFF) {
+			break;
+		}
+	}
+
+	/*
+	 * Remaining length is sig_len - u bytes (including the 00 just
+	 * after the last FF). This must be equal to one of the two
+	 * possible values (depending on whether the "05 00" sequence is
+	 * present or not).
+	 */
+	if (hash_oid == NULL) {
+		if (sig_len - u != hash_len + 1 || sig[u] != 0x00) {
+			return 0;
+		}
+	} else {
+		x3 = hash_oid[0];
+		pad_len = x3 + 9;
+		memset(pad2, 0, pad_len);
+		zlen = sig_len - u - hash_len;
+		if (zlen == pad_len) {
+			x2 = x3 + 2;
+		} else if (zlen == pad_len + 2) {
+			x2 = x3 + 4;
+			pad_len = zlen;
+			pad2[pad_len - 4] = 0x05;
+		} else {
+			return 0;
+		}
+		pad2[1] = 0x30;
+		pad2[2] = x2 + hash_len + 4;
+		pad2[3] = 0x30;
+		pad2[4] = x2;
+		pad2[5] = 0x06;
+		memcpy(pad2 + 6, hash_oid, x3 + 1);
+		pad2[pad_len - 2] = 0x04;
+		pad2[pad_len - 1] = hash_len;
+		if (memcmp(pad2, sig + u, pad_len) != 0) {
+			return 0;
+		}
+	}
+	memcpy(hash_out, sig + sig_len - hash_len, hash_len);
+	return 1;
+}
diff --git a/test/test_crypto.c b/test/test_crypto.c
index 10faa9c..bed4927 100644
--- a/test/test_crypto.c
+++ b/test/test_crypto.c
@@ -4367,6 +4367,14 @@ test_RSA_sign(const char *name, br_rsa_private fpriv,
 	fflush(stdout);
 }
 
+static void
+test_RSA_i15(void)
+{
+	test_RSA_core("RSA i15 core", &br_rsa_i15_public, &br_rsa_i15_private);
+	test_RSA_sign("RSA i15 sign", &br_rsa_i15_private,
+		&br_rsa_i15_pkcs1_sign, &br_rsa_i15_pkcs1_vrfy);
+}
+
 static void
 test_RSA_i31(void)
 {
@@ -4813,6 +4821,15 @@ test_EC_KAT(const char *name, const br_ec_impl *impl, uint32_t curve_mask)
 	fflush(stdout);
 }
 
+static void
+test_EC_prime_i15(void)
+{
+	test_EC_KAT("EC_prime_i15", &br_ec_prime_i15,
+		(uint32_t)1 << BR_EC_secp256r1
+		| (uint32_t)1 << BR_EC_secp384r1
+		| (uint32_t)1 << BR_EC_secp521r1);
+}
+
 static void
 test_EC_prime_i31(void)
 {
@@ -5201,7 +5218,8 @@ const ecdsa_kat_vector ECDSA_KAT[] = {
 };
 
 static void
-test_ECDSA_KAT(br_ecdsa_sign sign, br_ecdsa_vrfy vrfy, int asn1)
+test_ECDSA_KAT(const br_ec_impl *iec,
+	br_ecdsa_sign sign, br_ecdsa_vrfy vrfy, int asn1)
 {
 	size_t u;
 
@@ -5228,28 +5246,28 @@ test_ECDSA_KAT(br_ecdsa_sign sign, br_ecdsa_vrfy vrfy, int asn1)
 			sig_len = hextobin(sig, kv->sraw);
 		}
 
-		if (vrfy(&br_ec_prime_i31, hash, hash_len,
+		if (vrfy(iec, hash, hash_len,
 			kv->pub, sig, sig_len) != 1)
 		{
 			fprintf(stderr, "ECDSA KAT verify failed (1)\n");
 			exit(EXIT_FAILURE);
 		}
 		hash[0] ^= 0x80;
-		if (vrfy(&br_ec_prime_i31, hash, hash_len,
+		if (vrfy(iec, hash, hash_len,
 			kv->pub, sig, sig_len) != 0)
 		{
 			fprintf(stderr, "ECDSA KAT verify shoud have failed\n");
 			exit(EXIT_FAILURE);
 		}
 		hash[0] ^= 0x80;
-		if (vrfy(&br_ec_prime_i31, hash, hash_len,
+		if (vrfy(iec, hash, hash_len,
 			kv->pub, sig, sig_len) != 1)
 		{
 			fprintf(stderr, "ECDSA KAT verify failed (2)\n");
 			exit(EXIT_FAILURE);
 		}
 
-		sig2_len = sign(&br_ec_prime_i31, kv->hf, hash, kv->priv, sig2);
+		sig2_len = sign(iec, kv->hf, hash, kv->priv, sig2);
 		if (sig2_len == 0) {
 			fprintf(stderr, "ECDSA KAT sign failed\n");
 			exit(EXIT_FAILURE);
@@ -5271,10 +5289,29 @@ test_ECDSA_i31(void)
 	fflush(stdout);
 	printf("[raw]");
 	fflush(stdout);
-	test_ECDSA_KAT(&br_ecdsa_i31_sign_raw, &br_ecdsa_i31_vrfy_raw, 0);
+	test_ECDSA_KAT(&br_ec_prime_i31,
+		&br_ecdsa_i31_sign_raw, &br_ecdsa_i31_vrfy_raw, 0);
+	printf(" [asn1]");
+	fflush(stdout);
+	test_ECDSA_KAT(&br_ec_prime_i31,
+		&br_ecdsa_i31_sign_asn1, &br_ecdsa_i31_vrfy_asn1, 1);
+	printf(" done.\n");
+	fflush(stdout);
+}
+
+static void
+test_ECDSA_i15(void)
+{
+	printf("Test ECDSA/i15: ");
+	fflush(stdout);
+	printf("[raw]");
+	fflush(stdout);
+	test_ECDSA_KAT(&br_ec_prime_i15,
+		&br_ecdsa_i15_sign_raw, &br_ecdsa_i15_vrfy_raw, 0);
 	printf(" [asn1]");
 	fflush(stdout);
-	test_ECDSA_KAT(&br_ecdsa_i31_sign_asn1, &br_ecdsa_i31_vrfy_asn1, 1);
+	test_ECDSA_KAT(&br_ec_prime_i31,
+		&br_ecdsa_i15_sign_asn1, &br_ecdsa_i15_vrfy_asn1, 1);
 	printf(" done.\n");
 	fflush(stdout);
 }
@@ -5343,14 +5380,17 @@ static const struct {
 	STU(DES_ct),
 	STU(ChaCha20_ct),
 	STU(Poly1305_ctmul),
+	STU(RSA_i15),
 	STU(RSA_i31),
 	STU(RSA_i32),
 	STU(GHASH_ctmul),
 	STU(GHASH_ctmul32),
 	STU(GHASH_ctmul64),
+	STU(EC_prime_i15),
 	STU(EC_prime_i31),
 	STU(EC_p256_i15),
 	/* STU(EC_prime_i32), */
+	STU(ECDSA_i15),
 	STU(ECDSA_i31),
 	{ 0, 0 }
 };
diff --git a/test/test_speed.c b/test/test_speed.c
index fccb5eb..5e17714 100644
--- a/test/test_speed.c
+++ b/test/test_speed.c
@@ -557,6 +557,13 @@ test_speed_rsa_inner(char *name,
 	}
 }
 
+static void
+test_speed_rsa_i15(void)
+{
+	test_speed_rsa_inner("RSA i15",
+		&br_rsa_i15_public, &br_rsa_i15_private);
+}
+
 static void
 test_speed_rsa_i31(void)
 {
@@ -616,7 +623,16 @@ test_speed_ec_inner(const char *name,
 static void
 test_speed_ec_p256_i15(void)
 {
-	test_speed_ec_inner("EC i15 P-256", &br_ec_p256_i15, &br_secp256r1);
+	test_speed_ec_inner("EC i15/spec P-256",
+		&br_ec_p256_i15, &br_secp256r1);
+}
+
+static void
+test_speed_ec_prime_i15(void)
+{
+	test_speed_ec_inner("EC i15 P-256", &br_ec_prime_i15, &br_secp256r1);
+	test_speed_ec_inner("EC i15 P-384", &br_ec_prime_i15, &br_secp384r1);
+	test_speed_ec_inner("EC i15 P-521", &br_ec_prime_i15, &br_secp521r1);
 }
 
 static void
@@ -712,6 +728,23 @@ test_speed_ecdsa_inner(const char *name,
 	}
 }
 
+static void
+test_speed_ecdsa_i15(void)
+{
+	test_speed_ecdsa_inner("ECDSA i15 P-256",
+		&br_ec_prime_i15, &br_secp256r1,
+		&br_ecdsa_i15_sign_asn1,
+		&br_ecdsa_i15_vrfy_asn1);
+	test_speed_ecdsa_inner("ECDSA i15 P-384",
+		&br_ec_prime_i15, &br_secp384r1,
+		&br_ecdsa_i15_sign_asn1,
+		&br_ecdsa_i15_vrfy_asn1);
+	test_speed_ecdsa_inner("ECDSA i15 P-521",
+		&br_ec_prime_i15, &br_secp521r1,
+		&br_ecdsa_i15_sign_asn1,
+		&br_ecdsa_i15_vrfy_asn1);
+}
+
 static void
 test_speed_ecdsa_i31(void)
 {
@@ -1134,10 +1167,13 @@ static const struct {
 
 	STU(poly1305_ctmul),
 
+	STU(rsa_i15),
 	STU(rsa_i31),
 	STU(rsa_i32),
 	STU(ec_p256_i15),
+	STU(ec_prime_i15),
 	STU(ec_prime_i31),
+	STU(ecdsa_i15),
 	STU(ecdsa_i31),
 
 	STU(i31)
-- 
2.17.1