|
| 1 | +// Copyright 2024 The Go Authors. All rights reserved. |
| 2 | +// Use of this source code is governed by a BSD-style |
| 3 | +// license that can be found in the LICENSE file. |
| 4 | + |
| 5 | +// Ported from https://github.com/torvalds/linux/blob/1b294a1f35616977caddaddf3e9d28e576a1adbc/arch/mips/crypto/chacha-core.S |
| 6 | +// which is licensed under: |
| 7 | +// # ==================================================================== |
| 8 | +// # SPDX-License-Identifier: GPL-2.0 OR MIT |
| 9 | +// # |
| 10 | +// # Copyright (C) 2016-2018 René van Dorst <[email protected]>. All Rights Reserved. |
| 11 | +// # Copyright (C) 2015-2019 Jason A. Donenfeld <[email protected]>. All Rights Reserved. |
| 12 | +// # ==================================================================== |
| 13 | + |
| 14 | +//go:build gc && !purego |
| 15 | + |
| 16 | +#include "textflag.h" |
| 17 | + |
| 18 | +#define X0 R1 |
| 19 | +#define X1 R2 |
| 20 | +#define X2 R3 |
| 21 | +#define X3 R4 |
| 22 | +#define X4 R5 |
| 23 | +#define X5 R6 |
| 24 | +#define X6 R7 |
| 25 | +#define X7 R8 |
| 26 | +#define X8 R9 |
| 27 | +#define X9 R10 |
| 28 | +#define X10 R11 |
| 29 | +#define X11 R12 |
| 30 | +#define X12 R13 |
| 31 | +#define X13 R14 |
| 32 | +#define X14 R15 |
| 33 | +#define X15 R16 |
| 34 | + |
| 35 | +#define DST R17 |
| 36 | +#define SRC R18 |
| 37 | +#define SRC_LEN R19 |
| 38 | +#define KEY R20 |
| 39 | +#define NONCE R21 |
| 40 | +#define CTR R22 |
| 41 | + |
| 42 | +#define LOOP_I R24 |
| 43 | +#define TMP R25 |
| 44 | + |
| 45 | +#ifdef GOMIPS_r2 |
| 46 | +#define hasROTR |
| 47 | +#endif |
| 48 | +#ifdef GOMIPS_r5 |
| 49 | +#define hasROTR |
| 50 | +#endif |
| 51 | + |
| 52 | +#ifdef hasROTR |
| 53 | +#define ROTL(S, R) \ |
| 54 | + ROTR $(32-S), R |
| 55 | +#else |
| 56 | +#define ROTL(S, R) \ |
| 57 | + SLL $(S), R, TMP \ |
| 58 | + SRL $(32-S), R \ |
| 59 | + OR TMP, R |
| 60 | +#endif |
| 61 | + |
| 62 | +#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ |
| 63 | + ADDU K, A \ |
| 64 | + ADDU L, B \ |
| 65 | + ADDU M, C \ |
| 66 | + ADDU N, D \ |
| 67 | + XOR A, V \ |
| 68 | + XOR B, W \ |
| 69 | + XOR C, Y \ |
| 70 | + XOR D, Z \ |
| 71 | + ROTL (S, V) \ |
| 72 | + ROTL (S, W) \ |
| 73 | + ROTL (S, Y) \ |
| 74 | + ROTL (S, Z) |
| 75 | + |
| 76 | +#define FOR_STATE(OP, OP_MEM) \ |
| 77 | + OP ( $0x61707865, X0 ) \ // expa |
| 78 | + OP ( $0x3320646e, X1 ) \ // nd 3 |
| 79 | + OP ( $0x79622d32, X2 ) \ // 2-by |
| 80 | + OP ( $0x6b206574, X3 ) \ // te k |
| 81 | + OP_MEM ( 0(KEY), X4 ) \ |
| 82 | + OP_MEM ( 4(KEY), X5 ) \ |
| 83 | + OP_MEM ( 8(KEY), X6 ) \ |
| 84 | + OP_MEM ( 12(KEY), X7 ) \ |
| 85 | + OP_MEM ( 16(KEY), X8 ) \ |
| 86 | + OP_MEM ( 20(KEY), X9 ) \ |
| 87 | + OP_MEM ( 24(KEY), X10 ) \ |
| 88 | + OP_MEM ( 28(KEY), X11 ) \ |
| 89 | + OP ( CTR, X12 ) \ |
| 90 | + OP_MEM ( 0(NONCE), X13 ) \ |
| 91 | + OP_MEM ( 4(NONCE), X14 ) \ |
| 92 | + OP_MEM ( 8(NONCE), X15 ) |
| 93 | + |
| 94 | +#define movw(x, y) \ |
| 95 | + MOVW x, y |
| 96 | + |
| 97 | +#define ADD(V, REG) \ |
| 98 | + ADDU V, REG |
| 99 | + |
| 100 | +#define ADD_MEM(ADDR, REG) \ |
| 101 | + MOVW ADDR, TMP \ |
| 102 | + ADDU TMP, REG |
| 103 | + |
| 104 | +// XOR_STREAM_WORD works with unaligned memory, this is quite important since the strams might not be aligned. |
| 105 | +// Especially during the use in TLS the memory is often unaligned. |
| 106 | +#define XOR_STREAM_WORD( OFF, REG) \ |
| 107 | + MOVWL (4*OFF + 3)(SRC), TMP \ |
| 108 | + MOVWR (4*OFF)(SRC), TMP \ |
| 109 | + XOR REG, TMP \ |
| 110 | + MOVWL TMP, (4*OFF + 3)(DST) \ |
| 111 | + MOVWR TMP, (4*OFF)(DST) |
| 112 | + |
| 113 | +// func xorKeyStream(dst, src []byte, key *[8]uint32, nonce *[3]uint32, counter *uint32) |
| 114 | +TEXT ·xorKeyStream(SB), NOSPLIT|NOFRAME, $0 |
| 115 | + MOVW dst+0(FP), DST |
| 116 | + MOVW src+12(FP), SRC |
| 117 | + MOVW src_len+16(FP), SRC_LEN |
| 118 | + MOVW key+24(FP), KEY |
| 119 | + MOVW nonce+28(FP), NONCE |
| 120 | + MOVW counter+32(FP), CTR |
| 121 | + |
| 122 | + // load counter |
| 123 | + MOVW (CTR), CTR |
| 124 | + |
| 125 | +chacha: |
| 126 | + |
| 127 | + // load initial State into X* |
| 128 | + FOR_STATE ( movw, movw ) |
| 129 | + |
| 130 | + // set number of rounds |
| 131 | + MOVW $20, LOOP_I |
| 132 | + |
| 133 | +loop: |
| 134 | + AXR( X0,X1,X2,X3, X4,X5,X6,X7, X12,X13,X14,X15, 16) |
| 135 | + AXR( X8,X9,X10,X11, X12,X13,X14,X15, X4,X5,X6,X7, 12) |
| 136 | + AXR( X0,X1,X2,X3, X4,X5,X6,X7, X12,X13,X14,X15, 8) |
| 137 | + AXR( X8,X9,X10,X11, X12,X13,X14,X15, X4,X5,X6,X7, 7) |
| 138 | + AXR( X0,X1,X2,X3, X5,X6,X7,X4, X15,X12,X13,X14, 16) |
| 139 | + AXR( X10,X11,X8,X9, X15,X12,X13,X14, X5,X6,X7,X4, 12) |
| 140 | + AXR( X0,X1,X2,X3, X5,X6,X7,X4, X15,X12,X13,X14, 8) |
| 141 | + AXR( X10,X11,X8,X9, X15,X12,X13,X14, X5,X6,X7,X4, 7) |
| 142 | + |
| 143 | + ADDU $-2, LOOP_I |
| 144 | + BNE LOOP_I, loop |
| 145 | + |
| 146 | + // add back the initial state to generate the key stream |
| 147 | + FOR_STATE ( ADD, ADD_MEM ) |
| 148 | + |
| 149 | + // xor the key stream with the source and write out the result |
| 150 | + XOR_STREAM_WORD (0, X0) |
| 151 | + XOR_STREAM_WORD (1, X1) |
| 152 | + XOR_STREAM_WORD (2, X2) |
| 153 | + XOR_STREAM_WORD (3, X3) |
| 154 | + XOR_STREAM_WORD (4, X4) |
| 155 | + XOR_STREAM_WORD (5, X5) |
| 156 | + XOR_STREAM_WORD (6, X6) |
| 157 | + XOR_STREAM_WORD (7, X7) |
| 158 | + XOR_STREAM_WORD (8, X8) |
| 159 | + XOR_STREAM_WORD (9, X9) |
| 160 | + XOR_STREAM_WORD (10, X10) |
| 161 | + XOR_STREAM_WORD (11, X11) |
| 162 | + XOR_STREAM_WORD (12, X12) |
| 163 | + XOR_STREAM_WORD (13, X13) |
| 164 | + XOR_STREAM_WORD (14, X14) |
| 165 | + XOR_STREAM_WORD (15, X15) |
| 166 | + |
| 167 | + // decrement length |
| 168 | + ADDU $-64, SRC_LEN, SRC_LEN |
| 169 | + |
| 170 | + // increment pointers |
| 171 | + MOVW $64(DST), DST |
| 172 | + MOVW $64(SRC), SRC |
| 173 | + |
| 174 | + // increment counter |
| 175 | + ADDU $1, CTR |
| 176 | + |
| 177 | + // loop if there's still data |
| 178 | + BNE SRC_LEN, chacha |
| 179 | + |
| 180 | + // store Counter |
| 181 | + MOVW counter+32(FP), TMP |
| 182 | + MOVW CTR, (TMP) |
| 183 | + |
| 184 | + RET |
| 185 | + |
0 commit comments