X-Git-Url: https://www.bearssl.org/gitweb//home/git/?p=BearSSL;a=blobdiff_plain;f=src%2Fint%2Fi15_montmul.c;h=e98bc32c42ebb2e8c41d569c0bf80a4f42f62091;hp=8da938fb7704594dba9d9cb47920d7d68dcb4fc4;hb=a0054ad8212ecf0f31608fee60cafb32fbde789d;hpb=2f454aad577ae53798935cc32438a2d3f02ba31f diff --git a/src/int/i15_montmul.c b/src/int/i15_montmul.c index 8da938f..e98bc32 100644 --- a/src/int/i15_montmul.c +++ b/src/int/i15_montmul.c @@ -42,7 +42,99 @@ br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y, xu = x[u + 1]; f = MUL15((d[1] + MUL15(x[u + 1], y[1])) & 0x7FFF, m0i) & 0x7FFF; +#if BR_ARMEL_CORTEXM_GCC + if (len4 != 0) { + uint16_t *limit; + limit = d + len4; + asm volatile ( +"\n\ + @ carry: r=r2 \n\ + @ multipliers: xu=r3 f=r4 \n\ + @ base registers: d+v=r5 y+v=r6 m+v=r7 \n\ + @ r8 contains 0x7FFF \n\ + @ r9 contains d+len4 \n\ + ldr r0, %[limit] \n\ + ldr r3, %[xu] \n\ + mov r9, r0 \n\ + ldr r4, %[f] \n\ + eor r2, r2 \n\ + ldr r5, %[d] \n\ + sub r1, r2, #1 \n\ + ldr r6, %[y] \n\ + lsr r1, r1, #17 \n\ + ldr r7, %[m] \n\ + mov r8, r1 \n\ +loop%=: \n\ + ldrh r0, [r6, #2] \n\ + ldrh r1, [r7, #2] \n\ + mul r0, r3 \n\ + mul r1, r4 \n\ + add r2, r0, r2 \n\ + ldrh r0, [r5, #2] \n\ + add r2, r1, r2 \n\ + mov r1, r8 \n\ + add r2, r0, r2 \n\ + and r1, r2 \n\ + lsr r2, r2, #15 \n\ + strh r1, [r5, #0] \n\ + \n\ + ldrh r0, [r6, #4] \n\ + ldrh r1, [r7, #4] \n\ + mul r0, r3 \n\ + mul r1, r4 \n\ + add r2, r0, r2 \n\ + ldrh r0, [r5, #4] \n\ + add r2, r1, r2 \n\ + mov r1, r8 \n\ + add r2, r0, r2 \n\ + and r1, r2 \n\ + lsr r2, r2, #15 \n\ + strh r1, [r5, #2] \n\ + \n\ + ldrh r0, [r6, #6] \n\ + ldrh r1, [r7, #6] \n\ + mul r0, r3 \n\ + mul r1, r4 \n\ + add r2, r0, r2 \n\ + ldrh r0, [r5, #6] \n\ + add r2, r1, r2 \n\ + mov r1, r8 \n\ + add r2, r0, r2 \n\ + and r1, r2 \n\ + lsr r2, r2, #15 \n\ + strh r1, [r5, #4] \n\ + \n\ + ldrh r0, [r6, #8] \n\ + ldrh r1, [r7, #8] \n\ + mul r0, r3 \n\ + mul r1, r4 \n\ + add r2, r0, r2 \n\ + ldrh r0, [r5, #8] \n\ + add r2, r1, r2 \n\ + mov r1, r8 \n\ + add r2, r0, r2 \n\ + and r1, r2 \n\ + lsr r2, r2, #15 \n\ + strh r1, [r5, #6] \n\ + \n\ + add r5, r5, #8 \n\ + add r6, r6, #8 \n\ + add r7, r7, #8 \n\ + cmp r5, r9 \n\ + bne loop%= \n\ + \n\ + str r2, %[carry] \n\ +" +: [carry] "=m" (r) +: [xu] "m" (xu), [f] "m" (f), [d] "m" (d), [y] "m" (y), + [m] "m" (m), [limit] "m" (limit) +: "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9" ); + } else { + r = 0; + } + v = len4; +#else r = 0; for (v = 0; v < len4; v += 4) { uint32_t z; @@ -64,6 +156,7 @@ br_i15_montymul(uint16_t *d, const uint16_t *x, const uint16_t *y, r = z >> 15; d[v + 3] = z & 0x7FFF; } +#endif for (; v < len; v ++) { uint32_t z;