2 * Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #define BR_POWER_ASM_MACROS 1
30 /* see bearssl_block.h */
32 br_aes_pwr8_cbcdec_init(br_aes_pwr8_cbcdec_keys
*ctx
,
33 const void *key
, size_t len
)
35 ctx
->vtable
= &br_aes_pwr8_cbcdec_vtable
;
36 ctx
->num_rounds
= br_aes_pwr8_keysched(ctx
->skey
.skni
, key
, len
);
40 cbcdec_128(const unsigned char *sk
,
41 const unsigned char *iv
, unsigned char *buf
, size_t num_blocks
)
43 long cc0
, cc1
, cc2
, cc3
;
46 static const uint32_t idx2be
[] = {
47 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
58 * Load subkeys into v0..v10
60 lxvw4x(32, %[cc0
], %[sk
])
61 addi(%[cc0
], %[cc0
], 16)
62 lxvw4x(33, %[cc0
], %[sk
])
63 addi(%[cc0
], %[cc0
], 16)
64 lxvw4x(34, %[cc0
], %[sk
])
65 addi(%[cc0
], %[cc0
], 16)
66 lxvw4x(35, %[cc0
], %[sk
])
67 addi(%[cc0
], %[cc0
], 16)
68 lxvw4x(36, %[cc0
], %[sk
])
69 addi(%[cc0
], %[cc0
], 16)
70 lxvw4x(37, %[cc0
], %[sk
])
71 addi(%[cc0
], %[cc0
], 16)
72 lxvw4x(38, %[cc0
], %[sk
])
73 addi(%[cc0
], %[cc0
], 16)
74 lxvw4x(39, %[cc0
], %[sk
])
75 addi(%[cc0
], %[cc0
], 16)
76 lxvw4x(40, %[cc0
], %[sk
])
77 addi(%[cc0
], %[cc0
], 16)
78 lxvw4x(41, %[cc0
], %[sk
])
79 addi(%[cc0
], %[cc0
], 16)
80 lxvw4x(42, %[cc0
], %[sk
])
85 * v15 = constant for byteswapping words
87 lxvw4x(47, 0, %[idx2be
])
100 * Load next ciphertext words in v16..v19. Also save them
103 lxvw4x(48, %[cc0
], %[buf
])
104 lxvw4x(49, %[cc1
], %[buf
])
105 lxvw4x(50, %[cc2
], %[buf
])
106 lxvw4x(51, %[cc3
], %[buf
])
108 vperm(16, 16, 16, 15)
109 vperm(17, 17, 17, 15)
110 vperm(18, 18, 18, 15)
111 vperm(19, 19, 19, 15)
119 * Decrypt the blocks.
161 vncipherlast(16, 16, 0)
162 vncipherlast(17, 17, 0)
163 vncipherlast(18, 18, 0)
164 vncipherlast(19, 19, 0)
167 * XOR decrypted blocks with IV / previous block.
175 * Store back result (with byteswap)
178 vperm(16, 16, 16, 15)
179 vperm(17, 17, 17, 15)
180 vperm(18, 18, 18, 15)
181 vperm(19, 19, 19, 15)
183 stxvw4x(48, %[cc0
], %[buf
])
184 stxvw4x(49, %[cc1
], %[buf
])
185 stxvw4x(50, %[cc2
], %[buf
])
186 stxvw4x(51, %[cc3
], %[buf
])
189 * Fourth encrypted block is IV for next run.
193 addi(%[buf
], %[buf
], 64)
197 : [cc0
] "+b" (cc0
), [cc1
] "+b" (cc1
), [cc2
] "+b" (cc2
), [cc3
] "+b" (cc3
),
199 : [sk
] "b" (sk
), [iv
] "b" (iv
), [num_blocks
] "b" (num_blocks
>> 2)
201 , [idx2be
] "b" (idx2be
)
203 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
204 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
205 "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
211 cbcdec_192(const unsigned char *sk
,
212 const unsigned char *iv
, unsigned char *buf
, size_t num_blocks
)
214 long cc0
, cc1
, cc2
, cc3
;
217 static const uint32_t idx2be
[] = {
218 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
229 * Load subkeys into v0..v12
231 lxvw4x(32, %[cc0
], %[sk
])
232 addi(%[cc0
], %[cc0
], 16)
233 lxvw4x(33, %[cc0
], %[sk
])
234 addi(%[cc0
], %[cc0
], 16)
235 lxvw4x(34, %[cc0
], %[sk
])
236 addi(%[cc0
], %[cc0
], 16)
237 lxvw4x(35, %[cc0
], %[sk
])
238 addi(%[cc0
], %[cc0
], 16)
239 lxvw4x(36, %[cc0
], %[sk
])
240 addi(%[cc0
], %[cc0
], 16)
241 lxvw4x(37, %[cc0
], %[sk
])
242 addi(%[cc0
], %[cc0
], 16)
243 lxvw4x(38, %[cc0
], %[sk
])
244 addi(%[cc0
], %[cc0
], 16)
245 lxvw4x(39, %[cc0
], %[sk
])
246 addi(%[cc0
], %[cc0
], 16)
247 lxvw4x(40, %[cc0
], %[sk
])
248 addi(%[cc0
], %[cc0
], 16)
249 lxvw4x(41, %[cc0
], %[sk
])
250 addi(%[cc0
], %[cc0
], 16)
251 lxvw4x(42, %[cc0
], %[sk
])
252 addi(%[cc0
], %[cc0
], 16)
253 lxvw4x(43, %[cc0
], %[sk
])
254 addi(%[cc0
], %[cc0
], 16)
255 lxvw4x(44, %[cc0
], %[sk
])
260 * v15 = constant for byteswapping words
262 lxvw4x(47, 0, %[idx2be
])
269 vperm(24, 24, 24, 15)
275 * Load next ciphertext words in v16..v19. Also save them
278 lxvw4x(48, %[cc0
], %[buf
])
279 lxvw4x(49, %[cc1
], %[buf
])
280 lxvw4x(50, %[cc2
], %[buf
])
281 lxvw4x(51, %[cc3
], %[buf
])
283 vperm(16, 16, 16, 15)
284 vperm(17, 17, 17, 15)
285 vperm(18, 18, 18, 15)
286 vperm(19, 19, 19, 15)
294 * Decrypt the blocks.
344 vncipherlast(16, 16, 0)
345 vncipherlast(17, 17, 0)
346 vncipherlast(18, 18, 0)
347 vncipherlast(19, 19, 0)
350 * XOR decrypted blocks with IV / previous block.
358 * Store back result (with byteswap)
361 vperm(16, 16, 16, 15)
362 vperm(17, 17, 17, 15)
363 vperm(18, 18, 18, 15)
364 vperm(19, 19, 19, 15)
366 stxvw4x(48, %[cc0
], %[buf
])
367 stxvw4x(49, %[cc1
], %[buf
])
368 stxvw4x(50, %[cc2
], %[buf
])
369 stxvw4x(51, %[cc3
], %[buf
])
372 * Fourth encrypted block is IV for next run.
376 addi(%[buf
], %[buf
], 64)
380 : [cc0
] "+b" (cc0
), [cc1
] "+b" (cc1
), [cc2
] "+b" (cc2
), [cc3
] "+b" (cc3
),
382 : [sk
] "b" (sk
), [iv
] "b" (iv
), [num_blocks
] "b" (num_blocks
>> 2)
384 , [idx2be
] "b" (idx2be
)
386 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
387 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
388 "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
394 cbcdec_256(const unsigned char *sk
,
395 const unsigned char *iv
, unsigned char *buf
, size_t num_blocks
)
397 long cc0
, cc1
, cc2
, cc3
;
400 static const uint32_t idx2be
[] = {
401 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C
412 * Load subkeys into v0..v14
414 lxvw4x(32, %[cc0
], %[sk
])
415 addi(%[cc0
], %[cc0
], 16)
416 lxvw4x(33, %[cc0
], %[sk
])
417 addi(%[cc0
], %[cc0
], 16)
418 lxvw4x(34, %[cc0
], %[sk
])
419 addi(%[cc0
], %[cc0
], 16)
420 lxvw4x(35, %[cc0
], %[sk
])
421 addi(%[cc0
], %[cc0
], 16)
422 lxvw4x(36, %[cc0
], %[sk
])
423 addi(%[cc0
], %[cc0
], 16)
424 lxvw4x(37, %[cc0
], %[sk
])
425 addi(%[cc0
], %[cc0
], 16)
426 lxvw4x(38, %[cc0
], %[sk
])
427 addi(%[cc0
], %[cc0
], 16)
428 lxvw4x(39, %[cc0
], %[sk
])
429 addi(%[cc0
], %[cc0
], 16)
430 lxvw4x(40, %[cc0
], %[sk
])
431 addi(%[cc0
], %[cc0
], 16)
432 lxvw4x(41, %[cc0
], %[sk
])
433 addi(%[cc0
], %[cc0
], 16)
434 lxvw4x(42, %[cc0
], %[sk
])
435 addi(%[cc0
], %[cc0
], 16)
436 lxvw4x(43, %[cc0
], %[sk
])
437 addi(%[cc0
], %[cc0
], 16)
438 lxvw4x(44, %[cc0
], %[sk
])
439 addi(%[cc0
], %[cc0
], 16)
440 lxvw4x(45, %[cc0
], %[sk
])
441 addi(%[cc0
], %[cc0
], 16)
442 lxvw4x(46, %[cc0
], %[sk
])
447 * v15 = constant for byteswapping words
449 lxvw4x(47, 0, %[idx2be
])
456 vperm(24, 24, 24, 15)
462 * Load next ciphertext words in v16..v19. Also save them
465 lxvw4x(48, %[cc0
], %[buf
])
466 lxvw4x(49, %[cc1
], %[buf
])
467 lxvw4x(50, %[cc2
], %[buf
])
468 lxvw4x(51, %[cc3
], %[buf
])
470 vperm(16, 16, 16, 15)
471 vperm(17, 17, 17, 15)
472 vperm(18, 18, 18, 15)
473 vperm(19, 19, 19, 15)
481 * Decrypt the blocks.
539 vncipherlast(16, 16, 0)
540 vncipherlast(17, 17, 0)
541 vncipherlast(18, 18, 0)
542 vncipherlast(19, 19, 0)
545 * XOR decrypted blocks with IV / previous block.
553 * Store back result (with byteswap)
556 vperm(16, 16, 16, 15)
557 vperm(17, 17, 17, 15)
558 vperm(18, 18, 18, 15)
559 vperm(19, 19, 19, 15)
561 stxvw4x(48, %[cc0
], %[buf
])
562 stxvw4x(49, %[cc1
], %[buf
])
563 stxvw4x(50, %[cc2
], %[buf
])
564 stxvw4x(51, %[cc3
], %[buf
])
567 * Fourth encrypted block is IV for next run.
571 addi(%[buf
], %[buf
], 64)
575 : [cc0
] "+b" (cc0
), [cc1
] "+b" (cc1
), [cc2
] "+b" (cc2
), [cc3
] "+b" (cc3
),
577 : [sk
] "b" (sk
), [iv
] "b" (iv
), [num_blocks
] "b" (num_blocks
>> 2)
579 , [idx2be
] "b" (idx2be
)
581 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9",
582 "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19",
583 "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29",
588 /* see bearssl_block.h */
590 br_aes_pwr8_cbcdec_run(const br_aes_pwr8_cbcdec_keys
*ctx
,
591 void *iv
, void *data
, size_t len
)
593 unsigned char nextiv
[16];
600 memcpy(nextiv
, buf
+ len
- 16, 16);
603 unsigned char tmp
[16];
605 num_blocks
= (len
>> 4) & ~(size_t)3;
606 memcpy(tmp
, buf
+ (num_blocks
<< 4) - 16, 16);
607 switch (ctx
->num_rounds
) {
609 cbcdec_128(ctx
->skey
.skni
, iv
, buf
, num_blocks
);
612 cbcdec_192(ctx
->skey
.skni
, iv
, buf
, num_blocks
);
615 cbcdec_256(ctx
->skey
.skni
, iv
, buf
, num_blocks
);
618 buf
+= num_blocks
<< 4;
623 unsigned char tmp
[64];
625 memcpy(tmp
, buf
, len
);
626 memset(tmp
+ len
, 0, (sizeof tmp
) - len
);
627 switch (ctx
->num_rounds
) {
629 cbcdec_128(ctx
->skey
.skni
, iv
, tmp
, 4);
632 cbcdec_192(ctx
->skey
.skni
, iv
, tmp
, 4);
635 cbcdec_256(ctx
->skey
.skni
, iv
, tmp
, 4);
638 memcpy(buf
, tmp
, len
);
640 memcpy(iv
, nextiv
, 16);
643 /* see bearssl_block.h */
644 const br_block_cbcdec_class br_aes_pwr8_cbcdec_vtable
= {
645 sizeof(br_aes_pwr8_cbcdec_keys
),
648 (void (*)(const br_block_cbcdec_class
**, const void *, size_t))
649 &br_aes_pwr8_cbcdec_init
,
650 (void (*)(const br_block_cbcdec_class
*const *, void *, void *, size_t))
651 &br_aes_pwr8_cbcdec_run
654 /* see bearssl_block.h */
655 const br_block_cbcdec_class
*
656 br_aes_pwr8_cbcdec_get_vtable(void)
658 return br_aes_pwr8_supported() ? &br_aes_pwr8_cbcdec_vtable
: NULL
;
663 /* see bearssl_block.h */
664 const br_block_cbcdec_class
*
665 br_aes_pwr8_cbcdec_get_vtable(void)