X-Git-Url: https://www.bearssl.org/gitweb//home/git/?p=BearSSL;a=blobdiff_plain;f=src%2Fkdf%2Fshake.c;fp=src%2Fkdf%2Fshake.c;h=80d7176dc707d91548ee24fb9f5fcc7449acc838;hp=0000000000000000000000000000000000000000;hb=966078b3373a273f8417ede42ecab32a414dc109;hpb=8ef7680081c61b486622f2d983c0d3d21e83caad diff --git a/src/kdf/shake.c b/src/kdf/shake.c new file mode 100644 index 0000000..80d7176 --- /dev/null +++ b/src/kdf/shake.c @@ -0,0 +1,590 @@ +/* + * Copyright (c) 2018 Thomas Pornin + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "inner.h" + +/* + * Round constants. + */ +static const uint64_t RC[] = { + 0x0000000000000001, 0x0000000000008082, + 0x800000000000808A, 0x8000000080008000, + 0x000000000000808B, 0x0000000080000001, + 0x8000000080008081, 0x8000000000008009, + 0x000000000000008A, 0x0000000000000088, + 0x0000000080008009, 0x000000008000000A, + 0x000000008000808B, 0x800000000000008B, + 0x8000000000008089, 0x8000000000008003, + 0x8000000000008002, 0x8000000000000080, + 0x000000000000800A, 0x800000008000000A, + 0x8000000080008081, 0x8000000000008080, + 0x0000000080000001, 0x8000000080008008 +}; + +/* + * XOR a block of data into the provided state. This supports only + * blocks whose length is a multiple of 64 bits. + */ +static void +xor_block(uint64_t *A, const void *data, size_t rate) +{ + size_t u; + + for (u = 0; u < rate; u += 8) { + A[u >> 3] ^= br_dec64le((const unsigned char *)data + u); + } +} + +/* + * Process a block with the provided data. The data length must be a + * multiple of 8 (in bytes); normally, this is the "rate". + */ +static void +process_block(uint64_t *A) +{ + uint64_t t0, t1, t2, t3, t4; + uint64_t tt0, tt1, tt2, tt3; + uint64_t t, kt; + uint64_t c0, c1, c2, c3, c4, bnn; + int j; + + /* + * Compute the 24 rounds. This loop is partially unrolled (each + * iteration computes two rounds). + */ + for (j = 0; j < 24; j += 2) { + + tt0 = A[ 1] ^ A[ 6]; + tt1 = A[11] ^ A[16]; + tt0 ^= A[21] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 4] ^ A[ 9]; + tt3 = A[14] ^ A[19]; + tt0 ^= A[24]; + tt2 ^= tt3; + t0 = tt0 ^ tt2; + + tt0 = A[ 2] ^ A[ 7]; + tt1 = A[12] ^ A[17]; + tt0 ^= A[22] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 0] ^ A[ 5]; + tt3 = A[10] ^ A[15]; + tt0 ^= A[20]; + tt2 ^= tt3; + t1 = tt0 ^ tt2; + + tt0 = A[ 3] ^ A[ 8]; + tt1 = A[13] ^ A[18]; + tt0 ^= A[23] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 1] ^ A[ 6]; + tt3 = A[11] ^ A[16]; + tt0 ^= A[21]; + tt2 ^= tt3; + t2 = tt0 ^ tt2; + + tt0 = A[ 4] ^ A[ 9]; + tt1 = A[14] ^ A[19]; + tt0 ^= A[24] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 2] ^ A[ 7]; + tt3 = A[12] ^ A[17]; + tt0 ^= A[22]; + tt2 ^= tt3; + t3 = tt0 ^ tt2; + + tt0 = A[ 0] ^ A[ 5]; + tt1 = A[10] ^ A[15]; + tt0 ^= A[20] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 3] ^ A[ 8]; + tt3 = A[13] ^ A[18]; + tt0 ^= A[23]; + tt2 ^= tt3; + t4 = tt0 ^ tt2; + + A[ 0] = A[ 0] ^ t0; + A[ 5] = A[ 5] ^ t0; + A[10] = A[10] ^ t0; + A[15] = A[15] ^ t0; + A[20] = A[20] ^ t0; + A[ 1] = A[ 1] ^ t1; + A[ 6] = A[ 6] ^ t1; + A[11] = A[11] ^ t1; + A[16] = A[16] ^ t1; + A[21] = A[21] ^ t1; + A[ 2] = A[ 2] ^ t2; + A[ 7] = A[ 7] ^ t2; + A[12] = A[12] ^ t2; + A[17] = A[17] ^ t2; + A[22] = A[22] ^ t2; + A[ 3] = A[ 3] ^ t3; + A[ 8] = A[ 8] ^ t3; + A[13] = A[13] ^ t3; + A[18] = A[18] ^ t3; + A[23] = A[23] ^ t3; + A[ 4] = A[ 4] ^ t4; + A[ 9] = A[ 9] ^ t4; + A[14] = A[14] ^ t4; + A[19] = A[19] ^ t4; + A[24] = A[24] ^ t4; + A[ 5] = (A[ 5] << 36) | (A[ 5] >> (64 - 36)); + A[10] = (A[10] << 3) | (A[10] >> (64 - 3)); + A[15] = (A[15] << 41) | (A[15] >> (64 - 41)); + A[20] = (A[20] << 18) | (A[20] >> (64 - 18)); + A[ 1] = (A[ 1] << 1) | (A[ 1] >> (64 - 1)); + A[ 6] = (A[ 6] << 44) | (A[ 6] >> (64 - 44)); + A[11] = (A[11] << 10) | (A[11] >> (64 - 10)); + A[16] = (A[16] << 45) | (A[16] >> (64 - 45)); + A[21] = (A[21] << 2) | (A[21] >> (64 - 2)); + A[ 2] = (A[ 2] << 62) | (A[ 2] >> (64 - 62)); + A[ 7] = (A[ 7] << 6) | (A[ 7] >> (64 - 6)); + A[12] = (A[12] << 43) | (A[12] >> (64 - 43)); + A[17] = (A[17] << 15) | (A[17] >> (64 - 15)); + A[22] = (A[22] << 61) | (A[22] >> (64 - 61)); + A[ 3] = (A[ 3] << 28) | (A[ 3] >> (64 - 28)); + A[ 8] = (A[ 8] << 55) | (A[ 8] >> (64 - 55)); + A[13] = (A[13] << 25) | (A[13] >> (64 - 25)); + A[18] = (A[18] << 21) | (A[18] >> (64 - 21)); + A[23] = (A[23] << 56) | (A[23] >> (64 - 56)); + A[ 4] = (A[ 4] << 27) | (A[ 4] >> (64 - 27)); + A[ 9] = (A[ 9] << 20) | (A[ 9] >> (64 - 20)); + A[14] = (A[14] << 39) | (A[14] >> (64 - 39)); + A[19] = (A[19] << 8) | (A[19] >> (64 - 8)); + A[24] = (A[24] << 14) | (A[24] >> (64 - 14)); + bnn = ~A[12]; + kt = A[ 6] | A[12]; + c0 = A[ 0] ^ kt; + kt = bnn | A[18]; + c1 = A[ 6] ^ kt; + kt = A[18] & A[24]; + c2 = A[12] ^ kt; + kt = A[24] | A[ 0]; + c3 = A[18] ^ kt; + kt = A[ 0] & A[ 6]; + c4 = A[24] ^ kt; + A[ 0] = c0; + A[ 6] = c1; + A[12] = c2; + A[18] = c3; + A[24] = c4; + bnn = ~A[22]; + kt = A[ 9] | A[10]; + c0 = A[ 3] ^ kt; + kt = A[10] & A[16]; + c1 = A[ 9] ^ kt; + kt = A[16] | bnn; + c2 = A[10] ^ kt; + kt = A[22] | A[ 3]; + c3 = A[16] ^ kt; + kt = A[ 3] & A[ 9]; + c4 = A[22] ^ kt; + A[ 3] = c0; + A[ 9] = c1; + A[10] = c2; + A[16] = c3; + A[22] = c4; + bnn = ~A[19]; + kt = A[ 7] | A[13]; + c0 = A[ 1] ^ kt; + kt = A[13] & A[19]; + c1 = A[ 7] ^ kt; + kt = bnn & A[20]; + c2 = A[13] ^ kt; + kt = A[20] | A[ 1]; + c3 = bnn ^ kt; + kt = A[ 1] & A[ 7]; + c4 = A[20] ^ kt; + A[ 1] = c0; + A[ 7] = c1; + A[13] = c2; + A[19] = c3; + A[20] = c4; + bnn = ~A[17]; + kt = A[ 5] & A[11]; + c0 = A[ 4] ^ kt; + kt = A[11] | A[17]; + c1 = A[ 5] ^ kt; + kt = bnn | A[23]; + c2 = A[11] ^ kt; + kt = A[23] & A[ 4]; + c3 = bnn ^ kt; + kt = A[ 4] | A[ 5]; + c4 = A[23] ^ kt; + A[ 4] = c0; + A[ 5] = c1; + A[11] = c2; + A[17] = c3; + A[23] = c4; + bnn = ~A[ 8]; + kt = bnn & A[14]; + c0 = A[ 2] ^ kt; + kt = A[14] | A[15]; + c1 = bnn ^ kt; + kt = A[15] & A[21]; + c2 = A[14] ^ kt; + kt = A[21] | A[ 2]; + c3 = A[15] ^ kt; + kt = A[ 2] & A[ 8]; + c4 = A[21] ^ kt; + A[ 2] = c0; + A[ 8] = c1; + A[14] = c2; + A[15] = c3; + A[21] = c4; + A[ 0] = A[ 0] ^ RC[j + 0]; + + tt0 = A[ 6] ^ A[ 9]; + tt1 = A[ 7] ^ A[ 5]; + tt0 ^= A[ 8] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[24] ^ A[22]; + tt3 = A[20] ^ A[23]; + tt0 ^= A[21]; + tt2 ^= tt3; + t0 = tt0 ^ tt2; + + tt0 = A[12] ^ A[10]; + tt1 = A[13] ^ A[11]; + tt0 ^= A[14] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 0] ^ A[ 3]; + tt3 = A[ 1] ^ A[ 4]; + tt0 ^= A[ 2]; + tt2 ^= tt3; + t1 = tt0 ^ tt2; + + tt0 = A[18] ^ A[16]; + tt1 = A[19] ^ A[17]; + tt0 ^= A[15] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[ 6] ^ A[ 9]; + tt3 = A[ 7] ^ A[ 5]; + tt0 ^= A[ 8]; + tt2 ^= tt3; + t2 = tt0 ^ tt2; + + tt0 = A[24] ^ A[22]; + tt1 = A[20] ^ A[23]; + tt0 ^= A[21] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[12] ^ A[10]; + tt3 = A[13] ^ A[11]; + tt0 ^= A[14]; + tt2 ^= tt3; + t3 = tt0 ^ tt2; + + tt0 = A[ 0] ^ A[ 3]; + tt1 = A[ 1] ^ A[ 4]; + tt0 ^= A[ 2] ^ tt1; + tt0 = (tt0 << 1) | (tt0 >> 63); + tt2 = A[18] ^ A[16]; + tt3 = A[19] ^ A[17]; + tt0 ^= A[15]; + tt2 ^= tt3; + t4 = tt0 ^ tt2; + + A[ 0] = A[ 0] ^ t0; + A[ 3] = A[ 3] ^ t0; + A[ 1] = A[ 1] ^ t0; + A[ 4] = A[ 4] ^ t0; + A[ 2] = A[ 2] ^ t0; + A[ 6] = A[ 6] ^ t1; + A[ 9] = A[ 9] ^ t1; + A[ 7] = A[ 7] ^ t1; + A[ 5] = A[ 5] ^ t1; + A[ 8] = A[ 8] ^ t1; + A[12] = A[12] ^ t2; + A[10] = A[10] ^ t2; + A[13] = A[13] ^ t2; + A[11] = A[11] ^ t2; + A[14] = A[14] ^ t2; + A[18] = A[18] ^ t3; + A[16] = A[16] ^ t3; + A[19] = A[19] ^ t3; + A[17] = A[17] ^ t3; + A[15] = A[15] ^ t3; + A[24] = A[24] ^ t4; + A[22] = A[22] ^ t4; + A[20] = A[20] ^ t4; + A[23] = A[23] ^ t4; + A[21] = A[21] ^ t4; + A[ 3] = (A[ 3] << 36) | (A[ 3] >> (64 - 36)); + A[ 1] = (A[ 1] << 3) | (A[ 1] >> (64 - 3)); + A[ 4] = (A[ 4] << 41) | (A[ 4] >> (64 - 41)); + A[ 2] = (A[ 2] << 18) | (A[ 2] >> (64 - 18)); + A[ 6] = (A[ 6] << 1) | (A[ 6] >> (64 - 1)); + A[ 9] = (A[ 9] << 44) | (A[ 9] >> (64 - 44)); + A[ 7] = (A[ 7] << 10) | (A[ 7] >> (64 - 10)); + A[ 5] = (A[ 5] << 45) | (A[ 5] >> (64 - 45)); + A[ 8] = (A[ 8] << 2) | (A[ 8] >> (64 - 2)); + A[12] = (A[12] << 62) | (A[12] >> (64 - 62)); + A[10] = (A[10] << 6) | (A[10] >> (64 - 6)); + A[13] = (A[13] << 43) | (A[13] >> (64 - 43)); + A[11] = (A[11] << 15) | (A[11] >> (64 - 15)); + A[14] = (A[14] << 61) | (A[14] >> (64 - 61)); + A[18] = (A[18] << 28) | (A[18] >> (64 - 28)); + A[16] = (A[16] << 55) | (A[16] >> (64 - 55)); + A[19] = (A[19] << 25) | (A[19] >> (64 - 25)); + A[17] = (A[17] << 21) | (A[17] >> (64 - 21)); + A[15] = (A[15] << 56) | (A[15] >> (64 - 56)); + A[24] = (A[24] << 27) | (A[24] >> (64 - 27)); + A[22] = (A[22] << 20) | (A[22] >> (64 - 20)); + A[20] = (A[20] << 39) | (A[20] >> (64 - 39)); + A[23] = (A[23] << 8) | (A[23] >> (64 - 8)); + A[21] = (A[21] << 14) | (A[21] >> (64 - 14)); + bnn = ~A[13]; + kt = A[ 9] | A[13]; + c0 = A[ 0] ^ kt; + kt = bnn | A[17]; + c1 = A[ 9] ^ kt; + kt = A[17] & A[21]; + c2 = A[13] ^ kt; + kt = A[21] | A[ 0]; + c3 = A[17] ^ kt; + kt = A[ 0] & A[ 9]; + c4 = A[21] ^ kt; + A[ 0] = c0; + A[ 9] = c1; + A[13] = c2; + A[17] = c3; + A[21] = c4; + bnn = ~A[14]; + kt = A[22] | A[ 1]; + c0 = A[18] ^ kt; + kt = A[ 1] & A[ 5]; + c1 = A[22] ^ kt; + kt = A[ 5] | bnn; + c2 = A[ 1] ^ kt; + kt = A[14] | A[18]; + c3 = A[ 5] ^ kt; + kt = A[18] & A[22]; + c4 = A[14] ^ kt; + A[18] = c0; + A[22] = c1; + A[ 1] = c2; + A[ 5] = c3; + A[14] = c4; + bnn = ~A[23]; + kt = A[10] | A[19]; + c0 = A[ 6] ^ kt; + kt = A[19] & A[23]; + c1 = A[10] ^ kt; + kt = bnn & A[ 2]; + c2 = A[19] ^ kt; + kt = A[ 2] | A[ 6]; + c3 = bnn ^ kt; + kt = A[ 6] & A[10]; + c4 = A[ 2] ^ kt; + A[ 6] = c0; + A[10] = c1; + A[19] = c2; + A[23] = c3; + A[ 2] = c4; + bnn = ~A[11]; + kt = A[ 3] & A[ 7]; + c0 = A[24] ^ kt; + kt = A[ 7] | A[11]; + c1 = A[ 3] ^ kt; + kt = bnn | A[15]; + c2 = A[ 7] ^ kt; + kt = A[15] & A[24]; + c3 = bnn ^ kt; + kt = A[24] | A[ 3]; + c4 = A[15] ^ kt; + A[24] = c0; + A[ 3] = c1; + A[ 7] = c2; + A[11] = c3; + A[15] = c4; + bnn = ~A[16]; + kt = bnn & A[20]; + c0 = A[12] ^ kt; + kt = A[20] | A[ 4]; + c1 = bnn ^ kt; + kt = A[ 4] & A[ 8]; + c2 = A[20] ^ kt; + kt = A[ 8] | A[12]; + c3 = A[ 4] ^ kt; + kt = A[12] & A[16]; + c4 = A[ 8] ^ kt; + A[12] = c0; + A[16] = c1; + A[20] = c2; + A[ 4] = c3; + A[ 8] = c4; + A[ 0] = A[ 0] ^ RC[j + 1]; + t = A[ 5]; + A[ 5] = A[18]; + A[18] = A[11]; + A[11] = A[10]; + A[10] = A[ 6]; + A[ 6] = A[22]; + A[22] = A[20]; + A[20] = A[12]; + A[12] = A[19]; + A[19] = A[15]; + A[15] = A[24]; + A[24] = A[ 8]; + A[ 8] = t; + t = A[ 1]; + A[ 1] = A[ 9]; + A[ 9] = A[14]; + A[14] = A[ 2]; + A[ 2] = A[13]; + A[13] = A[23]; + A[23] = A[ 4]; + A[ 4] = A[21]; + A[21] = A[16]; + A[16] = A[ 3]; + A[ 3] = A[17]; + A[17] = A[ 7]; + A[ 7] = t; + } +} + +/* see bearssl_kdf.h */ +void +br_shake_init(br_shake_context *sc, int security_level) +{ + sc->rate = 200 - (size_t)(security_level >> 2); + sc->dptr = 0; + memset(sc->A, 0, sizeof sc->A); + sc->A[ 1] = ~(uint64_t)0; + sc->A[ 2] = ~(uint64_t)0; + sc->A[ 8] = ~(uint64_t)0; + sc->A[12] = ~(uint64_t)0; + sc->A[17] = ~(uint64_t)0; + sc->A[20] = ~(uint64_t)0; +} + +/* see bearssl_kdf.h */ +void +br_shake_inject(br_shake_context *sc, const void *data, size_t len) +{ + const unsigned char *buf; + size_t rate, dptr; + + buf = data; + rate = sc->rate; + dptr = sc->dptr; + while (len > 0) { + size_t clen; + + clen = rate - dptr; + if (clen > len) { + clen = len; + } + memcpy(sc->dbuf + dptr, buf, clen); + dptr += clen; + buf += clen; + len -= clen; + if (dptr == rate) { + xor_block(sc->A, sc->dbuf, rate); + process_block(sc->A); + dptr = 0; + } + } + sc->dptr = dptr; +} + +/* see bearssl_kdf.h */ +void +br_shake_flip(br_shake_context *sc) +{ + /* + * We apply padding and pre-XOR the value into the state. We + * set dptr to the end of the buffer, so that first call to + * shake_extract() will process the block. + */ + if ((sc->dptr + 1) == sc->rate) { + sc->dbuf[sc->dptr ++] = 0x9F; + } else { + sc->dbuf[sc->dptr ++] = 0x1F; + memset(sc->dbuf + sc->dptr, 0x00, sc->rate - sc->dptr - 1); + sc->dbuf[sc->rate - 1] = 0x80; + sc->dptr = sc->rate; + } + xor_block(sc->A, sc->dbuf, sc->rate); +} + +/* see bearssl_kdf.h */ +void +br_shake_produce(br_shake_context *sc, void *out, size_t len) +{ + unsigned char *buf; + size_t dptr, rate; + + buf = out; + dptr = sc->dptr; + rate = sc->rate; + while (len > 0) { + size_t clen; + + if (dptr == rate) { + unsigned char *dbuf; + uint64_t *A; + + A = sc->A; + dbuf = sc->dbuf; + process_block(A); + br_enc64le(dbuf + 0, A[ 0]); + br_enc64le(dbuf + 8, ~A[ 1]); + br_enc64le(dbuf + 16, ~A[ 2]); + br_enc64le(dbuf + 24, A[ 3]); + br_enc64le(dbuf + 32, A[ 4]); + br_enc64le(dbuf + 40, A[ 5]); + br_enc64le(dbuf + 48, A[ 6]); + br_enc64le(dbuf + 56, A[ 7]); + br_enc64le(dbuf + 64, ~A[ 8]); + br_enc64le(dbuf + 72, A[ 9]); + br_enc64le(dbuf + 80, A[10]); + br_enc64le(dbuf + 88, A[11]); + br_enc64le(dbuf + 96, ~A[12]); + br_enc64le(dbuf + 104, A[13]); + br_enc64le(dbuf + 112, A[14]); + br_enc64le(dbuf + 120, A[15]); + br_enc64le(dbuf + 128, A[16]); + br_enc64le(dbuf + 136, ~A[17]); + br_enc64le(dbuf + 144, A[18]); + br_enc64le(dbuf + 152, A[19]); + br_enc64le(dbuf + 160, ~A[20]); + br_enc64le(dbuf + 168, A[21]); + br_enc64le(dbuf + 176, A[22]); + br_enc64le(dbuf + 184, A[23]); + br_enc64le(dbuf + 192, A[24]); + dptr = 0; + } + clen = rate - dptr; + if (clen > len) { + clen = len; + } + memcpy(buf, sc->dbuf + dptr, clen); + dptr += clen; + buf += clen; + len -= clen; + } + sc->dptr = dptr; +}