Skip to content

Commit 1984296

Browse files
Ard Biesheuvelherbertx
Ard Biesheuvel
authored andcommitted
crypto: arm64/aegis128 - implement plain NEON version
Provide a version of the core AES transform to the aegis128 SIMD code that does not rely on the special AES instructions, but uses plain NEON instructions instead. This allows the SIMD version of the aegis128 driver to be used on arm64 systems that do not implement those instructions (which are not mandatory in the architecture), such as the Raspberry Pi 3. Since GCC makes a mess of this when using the tbl/tbx intrinsics to perform the sbox substitution, preload the Sbox into v16..v31 in this case and use inline asm to emit the tbl/tbx instructions. Clang does not support this approach, nor does it require it, since it does a much better job at code generation, so there we use the intrinsics as usual. Cc: Nick Desaulniers <[email protected]> Signed-off-by: Ard Biesheuvel <[email protected]> Acked-by: Nick Desaulniers <[email protected]> Signed-off-by: Herbert Xu <[email protected]>
1 parent a439763 commit 1984296

File tree

3 files changed

+80
-2
lines changed

3 files changed

+80
-2
lines changed

crypto/Makefile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,14 @@ CFLAGS_aegis128-neon-inner.o += -mfpu=crypto-neon-fp-armv8
9898
aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o
9999
endif
100100
ifeq ($(ARCH),arm64)
101-
CFLAGS_aegis128-neon-inner.o += -ffreestanding -mcpu=generic+crypto
101+
aegis128-cflags-y := -ffreestanding -mcpu=generic+crypto
102+
aegis128-cflags-$(CONFIG_CC_IS_GCC) += -ffixed-q16 -ffixed-q17 -ffixed-q18 \
103+
-ffixed-q19 -ffixed-q20 -ffixed-q21 \
104+
-ffixed-q22 -ffixed-q23 -ffixed-q24 \
105+
-ffixed-q25 -ffixed-q26 -ffixed-q27 \
106+
-ffixed-q28 -ffixed-q29 -ffixed-q30 \
107+
-ffixed-q31
108+
CFLAGS_aegis128-neon-inner.o += $(aegis128-cflags-y)
102109
CFLAGS_REMOVE_aegis128-neon-inner.o += -mgeneral-regs-only
103110
aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o
104111
endif

crypto/aegis128-neon-inner.c

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,17 @@
1717

1818
#include <stddef.h>
1919

20+
extern int aegis128_have_aes_insn;
21+
2022
void *memcpy(void *dest, const void *src, size_t n);
2123
void *memset(void *s, int c, size_t n);
2224

2325
struct aegis128_state {
2426
uint8x16_t v[5];
2527
};
2628

29+
extern const uint8x16x4_t crypto_aes_sbox[];
30+
2731
static struct aegis128_state aegis128_load_state_neon(const void *state)
2832
{
2933
return (struct aegis128_state){ {
@@ -49,6 +53,46 @@ uint8x16_t aegis_aes_round(uint8x16_t w)
4953
{
5054
uint8x16_t z = {};
5155

56+
#ifdef CONFIG_ARM64
57+
if (!__builtin_expect(aegis128_have_aes_insn, 1)) {
58+
static const uint8x16_t shift_rows = {
59+
0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
60+
0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
61+
};
62+
static const uint8x16_t ror32by8 = {
63+
0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
64+
0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
65+
};
66+
uint8x16_t v;
67+
68+
// shift rows
69+
w = vqtbl1q_u8(w, shift_rows);
70+
71+
// sub bytes
72+
if (!IS_ENABLED(CONFIG_CC_IS_GCC)) {
73+
v = vqtbl4q_u8(crypto_aes_sbox[0], w);
74+
v = vqtbx4q_u8(v, crypto_aes_sbox[1], w - 0x40);
75+
v = vqtbx4q_u8(v, crypto_aes_sbox[2], w - 0x80);
76+
v = vqtbx4q_u8(v, crypto_aes_sbox[3], w - 0xc0);
77+
} else {
78+
asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
79+
w -= 0x40;
80+
asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
81+
w -= 0x40;
82+
asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
83+
w -= 0x40;
84+
asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
85+
}
86+
87+
// mix columns
88+
w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
89+
w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
90+
w ^= vqtbl1q_u8(v ^ w, ror32by8);
91+
92+
return w;
93+
}
94+
#endif
95+
5296
/*
5397
* We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics
5498
* to force the compiler to issue the aese/aesmc instructions in pairs.
@@ -73,10 +117,27 @@ struct aegis128_state aegis128_update_neon(struct aegis128_state st,
73117
return st;
74118
}
75119

120+
static inline __attribute__((always_inline))
121+
void preload_sbox(void)
122+
{
123+
if (!IS_ENABLED(CONFIG_ARM64) ||
124+
!IS_ENABLED(CONFIG_CC_IS_GCC) ||
125+
__builtin_expect(aegis128_have_aes_insn, 1))
126+
return;
127+
128+
asm("ld1 {v16.16b-v19.16b}, [%0], #64 \n\t"
129+
"ld1 {v20.16b-v23.16b}, [%0], #64 \n\t"
130+
"ld1 {v24.16b-v27.16b}, [%0], #64 \n\t"
131+
"ld1 {v28.16b-v31.16b}, [%0] \n\t"
132+
:: "r"(crypto_aes_sbox));
133+
}
134+
76135
void crypto_aegis128_update_neon(void *state, const void *msg)
77136
{
78137
struct aegis128_state st = aegis128_load_state_neon(state);
79138

139+
preload_sbox();
140+
80141
st = aegis128_update_neon(st, vld1q_u8(msg));
81142

82143
aegis128_save_state_neon(st, state);
@@ -88,6 +149,8 @@ void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
88149
struct aegis128_state st = aegis128_load_state_neon(state);
89150
uint8x16_t msg;
90151

152+
preload_sbox();
153+
91154
while (size >= AEGIS_BLOCK_SIZE) {
92155
uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4];
93156

@@ -120,6 +183,8 @@ void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
120183
struct aegis128_state st = aegis128_load_state_neon(state);
121184
uint8x16_t msg;
122185

186+
preload_sbox();
187+
123188
while (size >= AEGIS_BLOCK_SIZE) {
124189
msg = vld1q_u8(src) ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4];
125190
st = aegis128_update_neon(st, msg);

crypto/aegis128-neon.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,15 @@ void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
1414
void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
1515
unsigned int size);
1616

17+
int aegis128_have_aes_insn __ro_after_init;
18+
1719
bool crypto_aegis128_have_simd(void)
1820
{
19-
return cpu_have_feature(cpu_feature(AES));
21+
if (cpu_have_feature(cpu_feature(AES))) {
22+
aegis128_have_aes_insn = 1;
23+
return true;
24+
}
25+
return IS_ENABLED(CONFIG_ARM64);
2026
}
2127

2228
void crypto_aegis128_update_simd(union aegis_block *state, const void *msg)

0 commit comments

Comments
 (0)