C arm/v6/aes-decrypt-internal.asm ifelse(< rijndael-arm.S - ARM assembly implementation of AES cipher Copyright (C) 2013 Jussi Kivilinna jussi.kivilinna@iki.fi This file is part of Libgcrypt. Libgcrypt is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. Libgcrypt is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program; if not, see http://www.gnu.org/licenses/. >) .text .file "aes-decrypt-internal.asm" .arch armv6 .syntax unified .arm define(,) define(,, KEYSCHEDULE_REVERSED,no,<$2>)>) C register macros define(, ) define(, ) define(,<[sp, #0]>) define(,<[sp, #4]>) define(,<[sp, #(48+0)]>) define(,<[sp, #(48+4)]>) define(, <%r1>) define(, <%r2>) define(, <%ip>) define(, <%r4>) define(, <%r5>) define(, <%r6>) define(, <%r7>) define(, <%r8>) define(, <%r9>) define(, <%r10>) define(, <%r11>) define(, <%r0>) define(, <%r3>) define(, <%lr>) C helper macros .macro ldr_unaligned_le rout rsrc offs rtmp ldrb \rout, [\rsrc, #((\offs) + 0)] ldrb \rtmp, [\rsrc, #((\offs) + 1)] orr \rout, \rout, \rtmp, lsl #8 ldrb \rtmp, [\rsrc, #((\offs) + 2)] orr \rout, \rout, \rtmp, lsl #16 ldrb \rtmp, [\rsrc, #((\offs) + 3)] orr \rout, \rout, \rtmp, lsl #24 .endm .macro str_unaligned_le rin rdst offs rtmp0 rtmp1 mov \rtmp0, \rin, lsr #8 strb \rin, [\rdst, #((\offs) + 0)] mov \rtmp1, \rin, lsr #16 strb \rtmp0, [\rdst, #((\offs) + 1)] mov \rtmp0, \rin, lsr #24 strb \rtmp1, [\rdst, #((\offs) + 2)] strb \rtmp0, [\rdst, #((\offs) + 3)] .endm C *********************************************************************** C ARM assembly implementation of the AES cipher C *********************************************************************** .macro preload_first_key round ra IF_KEYSCHEDULE_REVERSED(< ldr \ra, [CTX], #+4 >,< ldr \ra, [CTX, #(((\round) * 16) + 0 * 4)] >) .endm .macro dummy round ra .endm .macro addroundkey ra rb rc rd rna rnb rnc rnd preload_key IF_KEYSCHEDULE_REVERSED(< ldm CTX!, {\rna, \rnb, \rnc, \rnd} >,< ldm CTX, {\rna, \rnb, \rnc, \rnd} >) eor \ra, \rna eor \rb, \rnb eor \rc, \rnc \preload_key 1, \rna eor \rd, \rnd .endm .macro addroundkey_dec round ra rb rc rd rna rnb rnc rnd IF_KEYSCHEDULE_REVERSED(< addroundkey \ra,\rb,\rc,\rd,\rna,\rnb,\rnc,\rnd,preload_first_key >,< ldr \rna, [CTX, #(((\round) * 16) + 0 * 4)] ldr \rnb, [CTX, #(((\round) * 16) + 1 * 4)] eor \ra, \rna ldr \rnc, [CTX, #(((\round) * 16) + 2 * 4)] eor \rb, \rnb ldr \rnd, [CTX, #(((\round) * 16) + 3 * 4)] eor \rc, \rnc preload_first_key (\round) - 1, \rna eor \rd, \rnd >) .endm .macro do_decround next_r ra rb rc rd rna rnb rnc rnd preload_key IF_KEYSCHEDULE_REVERSED(< ldr \rnb, [CTX], #+4 >,< ldr \rnb, [CTX, #(((\next_r) * 16) + 1 * 4)] >) and RT0, RMASK, \ra, lsl#2 IF_KEYSCHEDULE_REVERSED(< ldr \rnc, [CTX], #+4 >,< ldr \rnc, [CTX, #(((\next_r) * 16) + 2 * 4)] >) and RT1, RMASK, \ra, lsr#(8 - 2) IF_KEYSCHEDULE_REVERSED(< ldr \rnd, [CTX], #+4 >,< ldr \rnd, [CTX, #(((\next_r) * 16) + 3 * 4)] >) and RT2, RMASK, \ra, lsr#(16 - 2) ldr RT0, [RTAB, RT0] and \ra, RMASK, \ra, lsr#(24 - 2) ldr RT1, [RTAB, RT1] eor \rna, \rna, RT0 ldr RT2, [RTAB, RT2] and RT0, RMASK, \rb, lsl#2 ldr \ra, [RTAB, \ra] eor \rnb, \rnb, RT1, ror #24 and RT1, RMASK, \rb, lsr#(8 - 2) eor \rnc, \rnc, RT2, ror #16 and RT2, RMASK, \rb, lsr#(16 - 2) eor \rnd, \rnd, \ra, ror #8 ldr RT0, [RTAB, RT0] and \rb, RMASK, \rb, lsr#(24 - 2) ldr RT1, [RTAB, RT1] eor \rnb, \rnb, RT0 ldr RT2, [RTAB, RT2] and RT0, RMASK, \rc, lsl#2 ldr \rb, [RTAB, \rb] eor \rnc, \rnc, RT1, ror #24 and RT1, RMASK, \rc, lsr#(8 - 2) eor \rnd, \rnd, RT2, ror #16 and RT2, RMASK, \rc, lsr#(16 - 2) eor \rna, \rna, \rb, ror #8 ldr RT0, [RTAB, RT0] and \rc, RMASK, \rc, lsr#(24 - 2) ldr RT1, [RTAB, RT1] eor \rnc, \rnc, RT0 ldr RT2, [RTAB, RT2] and RT0, RMASK, \rd, lsl#2 ldr \rc, [RTAB, \rc] eor \rnd, \rnd, RT1, ror #24 and RT1, RMASK, \rd, lsr#(8 - 2) eor \rna, \rna, RT2, ror #16 and RT2, RMASK, \rd, lsr#(16 - 2) eor \rnb, \rnb, \rc, ror #8 ldr RT0, [RTAB, RT0] and \rd, RMASK, \rd, lsr#(24 - 2) ldr RT1, [RTAB, RT1] eor \rnd, \rnd, RT0 ldr RT2, [RTAB, RT2] eor \rna, \rna, RT1, ror #24 ldr \rd, [RTAB, \rd] eor \rnb, \rnb, RT2, ror #16 \preload_key (\next_r) - 1, \ra eor \rnc, \rnc, \rd, ror #8 .endm .macro do_lastdecround ra rb rc rd rna rnb rnc rnd and RT0, RMASK, \ra and RT1, RMASK, \ra, lsr#8 and RT2, RMASK, \ra, lsr#16 ldrb \rna, [RTAB, RT0] mov \ra, \ra, lsr#24 ldrb \rnb, [RTAB, RT1] and RT0, RMASK, \rb ldrb \rnc, [RTAB, RT2] mov \rnb, \rnb, ror #24 ldrb \rnd, [RTAB, \ra] and RT1, RMASK, \rb, lsr#8 mov \rnc, \rnc, ror #16 and RT2, RMASK, \rb, lsr#16 mov \rnd, \rnd, ror #8 ldrb RT0, [RTAB, RT0] mov \rb, \rb, lsr#24 ldrb RT1, [RTAB, RT1] orr \rnb, \rnb, RT0 ldrb RT2, [RTAB, RT2] and RT0, RMASK, \rc ldrb \rb, [RTAB, \rb] orr \rnc, \rnc, RT1, ror #24 and RT1, RMASK, \rc, lsr#8 orr \rnd, \rnd, RT2, ror #16 and RT2, RMASK, \rc, lsr#16 orr \rna, \rna, \rb, ror #8 ldrb RT0, [RTAB, RT0] mov \rc, \rc, lsr#24 ldrb RT1, [RTAB, RT1] orr \rnc, \rnc, RT0 ldrb RT2, [RTAB, RT2] and RT0, RMASK, \rd ldrb \rc, [RTAB, \rc] orr \rnd, \rnd, RT1, ror #24 and RT1, RMASK, \rd, lsr#8 orr \rna, \rna, RT2, ror #16 ldrb RT0, [RTAB, RT0] and RT2, RMASK, \rd, lsr#16 ldrb RT1, [RTAB, RT1] orr \rnb, \rnb, \rc, ror #8 ldrb RT2, [RTAB, RT2] mov \rd, \rd, lsr#24 ldrb \rd, [RTAB, \rd] orr \rnd, \rnd, RT0 orr \rna, \rna, RT1, ror #24 orr \rnb, \rnb, RT2, ror #16 orr \rnc, \rnc, \rd, ror #8 .endm .macro firstdecround round ra rb rc rd rna rnb rnc rnd addroundkey_dec ((\round) + 1), \ra, \rb, \rc, \rd, \rna, \rnb, \rnc, \rnd do_decround \round, \ra, \rb, \rc, \rd, \rna, \rnb, \rnc, \rnd, preload_first_key .endm .macro decround round ra rb rc rd rna rnb rnc rnd preload_key do_decround \round, \ra, \rb, \rc, \rd, \rna, \rnb, \rnc, \rnd, \preload_key .endm .macro set_last_round_rmask _ __ mov RMASK, #0xff .endm .macro lastdecround round ra rb rc rd rna rnb rnc rnd sub RTAB, #AES_TABLE0 do_lastdecround \ra, \rb, \rc, \rd, \rna, \rnb, \rnc, \rnd addroundkey \rna, \rnb, \rnc, \rnd, \ra, \rb, \rc, \rd, dummy add RTAB, #AES_TABLE0 .endm C _aes_decrypt(unsigned rounds, const uint32_t *keys, C const struct aes_table *T, C size_t length, uint8_t *dst, C uint8_t *src) C r0 rounds C r1 ctx C r2 table C r3 length C [sp, #0] dst C [sp, #4] src PROLOGUE(_nettle_aes_decrypt) .cfi_startproc teq PARAM_LENGTH, #0 bxeq lr push {r0,r3,%r4-%r11, %ip, %lr} .cfi_adjust_cfa_offset 48 .cfi_rel_offset r0, 0 C PARAM_LENGTH .cfi_rel_offset r3, 4 C PARAM_ROUNDS .cfi_rel_offset r4, 8 .cfi_rel_offset r5, 12 .cfi_rel_offset r6, 16 .cfi_rel_offset r7, 20 .cfi_rel_offset r8, 24 .cfi_rel_offset r9, 28 .cfi_rel_offset r10, 32 .cfi_rel_offset r11, 36 .cfi_rel_offset ip, 40 .cfi_rel_offset lr, 44 add RTAB, RTAB, #AES_TABLE0 C read input block .Lblock_loop: ldr RT0, FRAME_SRC ifelse(V6,V6,< ldr RA, [RT0] ldr RB, [RT0, #4] ldr RC, [RT0, #8] ldr RD, [RT0, #12] IF_BE(< rev RA, RA rev RB, RB rev RC, RC rev RD, RD >) >,< IF_LE(< C test if src is unaligned tst RT0, #3 beq 1f >) C unaligned load ldr_unaligned_le RA, RT0, 0, RNA ldr_unaligned_le RB, RT0, 4, RNB ldr_unaligned_le RC, RT0, 8, RNA ldr_unaligned_le RD, RT0, 12, RNB IF_LE(< b 2f .ltorg 1: C aligned load ldm RT0, {RA, RB, RC, RD} 2: >) >) add RT0, RT0, #16 mov RMASK, #0xff str RT0, FRAME_SRC ldr RT1, FRAME_ROUNDS mov RMASK, RMASK, lsl#2; C byte mask cmp RT1, #12 bge .Ldec_256 firstdecround 9, RA, RB, RC, RD, RNA, RNB, RNC, RND .Ldec_tail: decround 8, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key decround 7, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key decround 6, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key decround 5, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key decround 4, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key decround 3, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key decround 2, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key decround 1, RA, RB, RC, RD, RNA, RNB, RNC, RND, set_last_round_rmask lastdecround 0, RNA, RNB, RNC, RND, RA, RB, RC, RD IF_KEYSCHEDULE_REVERSED(< ldr RT1, FRAME_ROUNDS >) ldr RT0, FRAME_DST IF_KEYSCHEDULE_REVERSED(< add RT1, 1 >) ldr RT2, FRAME_LENGTH IF_KEYSCHEDULE_REVERSED(< sub CTX, CTX, RT1, lsl#4 >) C store output block ifelse(V6,V6,< IF_BE(< rev RA, RA rev RB, RB rev RC, RC rev RD, RD >) str RA, [RT0] str RB, [RT0, #4] str RC, [RT0, #8] str RD, [RT0, #12] >,< IF_LE(< C test if dst is unaligned tst RT0, #3 beq 1f >) C unaligned store str_unaligned_le RA, RT0, 0, RNA, RNB str_unaligned_le RB, RT0, 4, RNA, RNB str_unaligned_le RC, RT0, 8, RNA, RNB str_unaligned_le RD, RT0, 12, RNA, RNB IF_LE(< b 2f .ltorg 1: C aligned store C write output block stm RT0, {RA, RB, RC, RD} 2: >) >) add RT0, RT0, #16 subs RT2, RT2, #16 str RT0, FRAME_DST str RT2, FRAME_LENGTH bhi .Lblock_loop .cfi_remember_state pop {%r0,%r3, %r4-%r11,%ip,%pc} .cfi_restore_state .ltorg .Ldec_256: beq .Ldec_192 firstdecround 13, RA, RB, RC, RD, RNA, RNB, RNC, RND decround 12, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key decround 11, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key decround 10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key decround 9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key b .Ldec_tail .ltorg .Ldec_192: firstdecround 11, RA, RB, RC, RD, RNA, RNB, RNC, RND decround 10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key decround 9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key b .Ldec_tail .cfi_endproc EPILOGUE(_nettle_aes_decrypt)