2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29 br_aes_ct64_bitslice_Sbox(uint64_t *q
)
32 * This S-box implementation is a straightforward translation of
33 * the circuit described by Boyar and Peralta in "A new
34 * combinational logic minimization technique with applications
35 * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
37 * Note that variables x* (input) and s* (output) are numbered
38 * in "reverse" order (x0 is the high bit, x7 is the low bit).
41 uint64_t x0
, x1
, x2
, x3
, x4
, x5
, x6
, x7
;
42 uint64_t y1
, y2
, y3
, y4
, y5
, y6
, y7
, y8
, y9
;
43 uint64_t y10
, y11
, y12
, y13
, y14
, y15
, y16
, y17
, y18
, y19
;
45 uint64_t z0
, z1
, z2
, z3
, z4
, z5
, z6
, z7
, z8
, z9
;
46 uint64_t z10
, z11
, z12
, z13
, z14
, z15
, z16
, z17
;
47 uint64_t t0
, t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
, t9
;
48 uint64_t t10
, t11
, t12
, t13
, t14
, t15
, t16
, t17
, t18
, t19
;
49 uint64_t t20
, t21
, t22
, t23
, t24
, t25
, t26
, t27
, t28
, t29
;
50 uint64_t t30
, t31
, t32
, t33
, t34
, t35
, t36
, t37
, t38
, t39
;
51 uint64_t t40
, t41
, t42
, t43
, t44
, t45
, t46
, t47
, t48
, t49
;
52 uint64_t t50
, t51
, t52
, t53
, t54
, t55
, t56
, t57
, t58
, t59
;
53 uint64_t t60
, t61
, t62
, t63
, t64
, t65
, t66
, t67
;
54 uint64_t s0
, s1
, s2
, s3
, s4
, s5
, s6
, s7
;
66 * Top linear transformation.
161 * Bottom linear transformation.
206 br_aes_ct64_ortho(uint64_t *q
)
208 #define SWAPN(cl, ch, s, x, y) do { \
212 (x) = (a & (uint64_t)cl) | ((b & (uint64_t)cl) << (s)); \
213 (y) = ((a & (uint64_t)ch) >> (s)) | (b & (uint64_t)ch); \
216 #define SWAP2(x, y) SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA, 1, x, y)
217 #define SWAP4(x, y) SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC, 2, x, y)
218 #define SWAP8(x, y) SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0, 4, x, y)
238 br_aes_ct64_interleave_in(uint64_t *q0
, uint64_t *q1
, const uint32_t *w
)
240 uint64_t x0
, x1
, x2
, x3
;
250 x0
&= (uint64_t)0x0000FFFF0000FFFF;
251 x1
&= (uint64_t)0x0000FFFF0000FFFF;
252 x2
&= (uint64_t)0x0000FFFF0000FFFF;
253 x3
&= (uint64_t)0x0000FFFF0000FFFF;
258 x0
&= (uint64_t)0x00FF00FF00FF00FF;
259 x1
&= (uint64_t)0x00FF00FF00FF00FF;
260 x2
&= (uint64_t)0x00FF00FF00FF00FF;
261 x3
&= (uint64_t)0x00FF00FF00FF00FF;
262 *q0
= x0
| (x2
<< 8);
263 *q1
= x1
| (x3
<< 8);
268 br_aes_ct64_interleave_out(uint32_t *w
, uint64_t q0
, uint64_t q1
)
270 uint64_t x0
, x1
, x2
, x3
;
272 x0
= q0
& (uint64_t)0x00FF00FF00FF00FF;
273 x1
= q1
& (uint64_t)0x00FF00FF00FF00FF;
274 x2
= (q0
>> 8) & (uint64_t)0x00FF00FF00FF00FF;
275 x3
= (q1
>> 8) & (uint64_t)0x00FF00FF00FF00FF;
280 x0
&= (uint64_t)0x0000FFFF0000FFFF;
281 x1
&= (uint64_t)0x0000FFFF0000FFFF;
282 x2
&= (uint64_t)0x0000FFFF0000FFFF;
283 x3
&= (uint64_t)0x0000FFFF0000FFFF;
284 w
[0] = (uint32_t)x0
| (uint32_t)(x0
>> 16);
285 w
[1] = (uint32_t)x1
| (uint32_t)(x1
>> 16);
286 w
[2] = (uint32_t)x2
| (uint32_t)(x2
>> 16);
287 w
[3] = (uint32_t)x3
| (uint32_t)(x3
>> 16);
290 static const unsigned char Rcon
[] = {
291 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
299 memset(q
, 0, sizeof q
);
301 br_aes_ct64_ortho(q
);
302 br_aes_ct64_bitslice_Sbox(q
);
303 br_aes_ct64_ortho(q
);
304 return (uint32_t)q
[0];
309 br_aes_ct64_keysched(uint64_t *comp_skey
, const void *key
, size_t key_len
)
312 int i
, j
, k
, nk
, nkf
;
330 nk
= (int)(key_len
>> 2);
331 nkf
= (int)((num_rounds
+ 1) << 2);
332 br_range_dec32le(skey
, (key_len
>> 2), key
);
333 tmp
= skey
[(key_len
>> 2) - 1];
334 for (i
= nk
, j
= 0, k
= 0; i
< nkf
; i
++) {
336 tmp
= (tmp
<< 24) | (tmp
>> 8);
337 tmp
= sub_word(tmp
) ^ Rcon
[k
];
338 } else if (nk
> 6 && j
== 4) {
349 for (i
= 0, j
= 0; i
< nkf
; i
+= 4, j
+= 2) {
352 br_aes_ct64_interleave_in(&q
[0], &q
[4], skey
+ i
);
359 br_aes_ct64_ortho(q
);
361 (q
[0] & (uint64_t)0x1111111111111111)
362 | (q
[1] & (uint64_t)0x2222222222222222)
363 | (q
[2] & (uint64_t)0x4444444444444444)
364 | (q
[3] & (uint64_t)0x8888888888888888);
366 (q
[4] & (uint64_t)0x1111111111111111)
367 | (q
[5] & (uint64_t)0x2222222222222222)
368 | (q
[6] & (uint64_t)0x4444444444444444)
369 | (q
[7] & (uint64_t)0x8888888888888888);
376 br_aes_ct64_skey_expand(uint64_t *skey
,
377 unsigned num_rounds
, const uint64_t *comp_skey
)
381 n
= (num_rounds
+ 1) << 1;
382 for (u
= 0, v
= 0; u
< n
; u
++, v
+= 4) {
383 uint64_t x0
, x1
, x2
, x3
;
385 x0
= x1
= x2
= x3
= comp_skey
[u
];
386 x0
&= (uint64_t)0x1111111111111111;
387 x1
&= (uint64_t)0x2222222222222222;
388 x2
&= (uint64_t)0x4444444444444444;
389 x3
&= (uint64_t)0x8888888888888888;
393 skey
[v
+ 0] = (x0
<< 4) - x0
;
394 skey
[v
+ 1] = (x1
<< 4) - x1
;
395 skey
[v
+ 2] = (x2
<< 4) - x2
;
396 skey
[v
+ 3] = (x3
<< 4) - x3
;