Signed-off-by: Amitay Isaacs amitay@ozlabs.org Signed-off-by: Martin Schwenke martin@meltin.net --- powerpc64/ecc-secp256r1-redc.asm | 144 +++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) create mode 100644 powerpc64/ecc-secp256r1-redc.asm
diff --git a/powerpc64/ecc-secp256r1-redc.asm b/powerpc64/ecc-secp256r1-redc.asm new file mode 100644 index 00000000..59447567 --- /dev/null +++ b/powerpc64/ecc-secp256r1-redc.asm @@ -0,0 +1,144 @@ +C powerpc64/ecc-secp256r1-redc.asm + +ifelse(` + Copyright (C) 2021 Amitay Isaacs & Martin Schwenke, IBM Corporation + + Based on x86_64/ecc-secp256r1-redc.asm + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Register usage: + +define(`SP', `r1') + +define(`RP', `r4') +define(`XP', `r5') + +define(`F0', `r3') +define(`F1', `r6') +define(`F2', `r7') +define(`F3', `r8') + +define(`U0', `r9') +define(`U1', `r10') +define(`U2', `r11') +define(`U3', `r12') +define(`U4', `r14') +define(`U5', `r15') +define(`U6', `r16') +define(`U7', `r17') + + .file "ecc-secp256r1-redc.asm" + +C FOLD(x), sets (F3,F2,F1,F0) <-- [(x << 224) - (x << 192) - (x << 96)] >> 64 +define(`FOLD', ` + sldi F2, $1, 32 + srdi F3, $1, 32 + li F0, 0 + li F1, 0 + subfc F0, F2, F0 + subfe F1, F3, F1 + subfe F2, $1, F2 + addme F3, F3 +') + + C void ecc_secp256r1_redc (const struct ecc_modulo *p, mp_limb_t *rp, mp_limb_t *xp) + .text +define(`FUNC_ALIGN', `5') +PROLOGUE(_nettle_ecc_secp256r1_redc) + + std U4,-32(SP) + std U5,-24(SP) + std U6,-16(SP) + std U7,-8(SP) + + ld U0, 0(XP) + ld U1, 8(XP) + ld U2, 16(XP) + ld U3, 24(XP) + ld U4, 32(XP) + ld U5, 40(XP) + ld U6, 48(XP) + ld U7, 56(XP) + + FOLD(U0) + subfc U1, F0, U1 + subfe U2, F1, U2 + subfe U3, F2, U3 + subfe U0, F3, U0 + + FOLD(U1) + subfc U2, F0, U2 + subfe U3, F1, U3 + subfe U4, F2, U4 + subfe U1, F3, U1 + + FOLD(U2) + subfc U3, F0, U3 + subfe U4, F1, U4 + subfe U5, F2, U5 + subfe U2, F3, U2 + + FOLD(U3) + subfc U4, F0, U4 + subfe U5, F1, U5 + subfe U6, F2, U6 + subfe U3, F3, U3 + + addc U0, U4, U0 + adde U1, U5, U1 + adde U2, U6, U2 + adde U3, U7, U3 + + C If carry, we need to add in + C 2^256 - p = <0xfffffffe, 0xff..ff, 0xffffffff00000000, 1> + li F0, 0 + addze F0, F0 + neg F2, F0 + sldi F1, F2, 32 + srdi F3, F2, 32 + li U7, -2 + and F3, F3, U7 + + addc U0, F0, U0 + adde U1, F1, U1 + adde U2, F2, U2 + adde U3, F3, U3 + + std U0, 0(RP) + std U1, 8(RP) + std U2, 16(RP) + std U3, 24(RP) + + ld U4,-32(SP) + ld U5,-24(SP) + ld U6,-16(SP) + ld U7,-8(SP) + + blr +EPILOGUE(_nettle_ecc_secp256r1_redc)