Line 0
Link Here
|
|
|
1 |
/*- |
2 |
* Copyright (c) 2012 Shane Nievera Tablizo |
3 |
* All rights reserved. |
4 |
* |
5 |
* Redistribution and use in source and binary forms, with or without |
6 |
* modification, are permitted provided that the following conditions |
7 |
* are met: |
8 |
* 1. Redistributions of source code must retain the above copyright |
9 |
* notice, this list of conditions and the following disclaimer. |
10 |
* 2. Redistributions in binary form must reproduce the above copyright |
11 |
* notice, this list of conditions and the following disclaimer in the |
12 |
* documentation and/or other materials provided with the distribution. |
13 |
* |
14 |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
15 |
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
17 |
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
18 |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
19 |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
20 |
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
21 |
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
22 |
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
23 |
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
24 |
* SUCH DAMAGE. |
25 |
* |
26 |
*/ |
27 |
|
28 |
#include <sys/param.h> |
29 |
#ifdef _KERNEL |
30 |
#include <crypto/aesni/aesni.h> |
31 |
#endif |
32 |
|
33 |
#ifndef _KERNEL |
34 |
#include "aes_encdec.h" |
35 |
#endif |
36 |
|
37 |
typedef uint64_t dquad __attribute__((__vector_size__(16))); |
38 |
typedef int q_int __attribute__((__vector_size__(16))); |
39 |
|
40 |
#define V4ZERO (q_int){ 0, 0, 0, 0} |
41 |
|
42 |
#define DEFASM(mn, out_t, in_t, in_spec) \ |
43 |
static inline out_t __attribute__((always_inline)) \ |
44 |
mn ## _asm(out_t __a, in_t __b) \ |
45 |
{ \ |
46 |
__asm__(#mn " %1, %0" : "+&x" (__a) : in_spec (__b)); \ |
47 |
return (__a); \ |
48 |
} |
49 |
|
50 |
#define SW_ROUNDS(rounds, f) \ |
51 |
switch (rounds) { \ |
52 |
case AES128_ROUNDS: rounds = AES128_ROUNDS; f; break; \ |
53 |
case AES192_ROUNDS: rounds = AES192_ROUNDS; f; break; \ |
54 |
case AES256_ROUNDS: rounds = AES256_ROUNDS; f; break; \ |
55 |
} |
56 |
|
57 |
DEFASM(psrad, dquad, int, "I") |
58 |
DEFASM(aesenc, dquad, dquad, "x") |
59 |
DEFASM(aesdec, dquad, dquad, "x") |
60 |
DEFASM(aesenclast, dquad, dquad, "x") |
61 |
DEFASM(aesdeclast, dquad, dquad, "x") |
62 |
|
63 |
static inline dquad __attribute__((always_inline)) |
64 |
loadu_asm(const dquad *restrict p) |
65 |
{ |
66 |
dquad z; |
67 |
|
68 |
__asm__("movdqu %1, %0" : "=x" (z) : "m" (*p)); |
69 |
|
70 |
return (z); |
71 |
} |
72 |
|
73 |
#define loadu(x) loadu_asm((x)) |
74 |
#define psrad(i,x) psrad_asm((x), (i)) |
75 |
#define aesenc aesenc_asm |
76 |
#define aesdec aesdec_asm |
77 |
#define aesdeclast aesdeclast_asm |
78 |
#define aesenclast aesenclast_asm |
79 |
|
80 |
static inline dquad __attribute__((always_inline)) |
81 |
movdqu(const uint8_t *restrict p) |
82 |
{ |
83 |
|
84 |
return (loadu((dquad*)p)); |
85 |
} |
86 |
|
87 |
static inline dquad __attribute((always_inline)) |
88 |
encdec(int dir, int rounds, const dquad *restrict sched, dquad t) |
89 |
{ |
90 |
|
91 |
t ^= *sched; |
92 |
|
93 |
for (; --rounds;) |
94 |
t = (dir ? aesenc : aesdec)(t, *++sched); |
95 |
|
96 |
return (dir ? aesenclast : aesdeclast)(t, *++sched); |
97 |
} |
98 |
#define CRYPT(x) encdec(dir, rounds, (dquad*)sched, (x)) |
99 |
#define DEC(x) encdec(0, rounds, (dquad*)sched, (x)) |
100 |
|
101 |
static inline void __attribute((always_inline)) |
102 |
decrypt_cbc_X6(int rounds, const dquad *restrict sched, size_t n, |
103 |
dquad *restrict src, dquad t0) |
104 |
/* TODO: Pipeline this without explicit vars. */ |
105 |
{ |
106 |
dquad t1, t2, t3, t4, t5, t6; |
107 |
dquad da, db, dc, dd, de, df; |
108 |
|
109 |
size_t m; |
110 |
|
111 |
if (0 < (m = n / 6)) { |
112 |
da = t1 = src[0]; db = t2 = src[1]; dc = t3 = src[2]; |
113 |
dd = t4 = src[3]; de = t5 = src[4]; df = t6 = src[5]; |
114 |
for (;;) { |
115 |
da = DEC(da); db = DEC(db); dc = DEC(dc); |
116 |
dd = DEC(dd); de = DEC(de); df = DEC(df); |
117 |
|
118 |
*src = da ^ t0; *++src = db ^ t1; *++src = dc ^ t2; |
119 |
*++src = dd ^ t3; *++src = de ^ t4; *++src = df ^ t5; |
120 |
|
121 |
++src; |
122 |
t0 = t6; |
123 |
if (--m == 0) |
124 |
break; |
125 |
da = t1 = src[0]; db = t2 = src[1]; dc = t3 = src[2]; |
126 |
dd = t4 = src[3]; de = t5 = src[4]; df = t6 = src[5]; |
127 |
} |
128 |
} |
129 |
if (n % 2 == 1) { |
130 |
da = t1 = src[0]; |
131 |
da = DEC(da); |
132 |
*src = da ^ t0; |
133 |
++src; --n; |
134 |
t0 = t1; |
135 |
|
136 |
} |
137 |
switch (n %= 6) { |
138 |
case 2: |
139 |
da = t1 = src[0]; db = t2 = src[1]; |
140 |
da = DEC(da); db = DEC(db); |
141 |
*src = da ^ t0; *++src = db ^ t1; |
142 |
++src; |
143 |
t0 = t2; |
144 |
break; |
145 |
case 4: |
146 |
da = t1 = src[0]; db = t2 = src[1]; |
147 |
dc = t3 = src[2]; dd = t4 = src[3]; |
148 |
da = DEC(da); db = DEC(db); dc = DEC(dc); dd = DEC(dd); |
149 |
*src = da ^ t0; *++src = db ^ t1; |
150 |
*++src = dc ^ t2; *++src = dd ^ t3; |
151 |
break; |
152 |
} |
153 |
} |
154 |
|
155 |
/* #define GF128_GENERATOR 0x87 */ |
156 |
|
157 |
static inline dquad __attribute((always_inline)) |
158 |
GF128_shl(dquad z) |
159 |
{ |
160 |
dquad a, c; |
161 |
|
162 |
c = (dquad){ 0x87 /* GF128 generator */, 1 }; |
163 |
|
164 |
a = psrad(31, __builtin_shufflevector((q_int)z, V4ZERO, 3, 3, 1, 1)); |
165 |
|
166 |
return (z + z ^ a & c); |
167 |
} |
168 |
|
169 |
static inline void |
170 |
init_tweaks(dquad *restrict ts, int n) |
171 |
{ |
172 |
int i; |
173 |
|
174 |
for (i = 0; i < n - 1; ++i) |
175 |
ts[i+1] = GF128_shl(ts[i]); |
176 |
} |
177 |
|
178 |
static inline void |
179 |
out_xor(dquad *restrict ds, dquad *restrict ts, dquad *restrict *dst, int n) |
180 |
{ |
181 |
int i; |
182 |
|
183 |
for (i = 0; i < n; ++i, ++*dst) |
184 |
**dst = ds[i] ^ ts[i]; |
185 |
} |
186 |
|
187 |
static inline void |
188 |
llel_crypt(int dir, int rounds, const dquad *restrict sched, dquad *restrict ds, |
189 |
dquad *restrict ts, const dquad **src, int n) |
190 |
{ |
191 |
int i; |
192 |
|
193 |
for (i = 0; i < n; ++i, ++*src) |
194 |
ds[i] = CRYPT( **src ^ ts[i]); |
195 |
} |
196 |
|
197 |
static inline void __attribute((always_inline)) |
198 |
crypt_xex_X6(int dir, int rounds, const dquad *restrict sched, size_t n, |
199 |
const dquad *src, dquad *dst, dquad t0) |
200 |
{ |
201 |
dquad d[6]; |
202 |
dquad t[6]; |
203 |
size_t m; |
204 |
|
205 |
t[0] = t0; |
206 |
|
207 |
if (0 < (m = n / 6)) { |
208 |
init_tweaks(t, 6); |
209 |
for (;;) { |
210 |
llel_crypt(dir, rounds, sched, d, t, &src, 6); |
211 |
out_xor(d, t, &dst, 6); |
212 |
t[0] = GF128_shl(t[5]); |
213 |
if (--m == 0) |
214 |
break; |
215 |
init_tweaks(t, 6); |
216 |
} |
217 |
|
218 |
} |
219 |
if (n % 2 == 1) { |
220 |
*dst++ = *t ^ CRYPT(*t ^ *src++); |
221 |
*t = GF128_shl(*t); |
222 |
--n; |
223 |
} |
224 |
switch (n %= 6) { |
225 |
case 2: |
226 |
n = 2; |
227 |
init_tweaks(t, n); |
228 |
llel_crypt(dir, rounds, sched, d, t, &src, n); |
229 |
out_xor(d, t, &dst, n); |
230 |
break; |
231 |
case 4: |
232 |
n = 4; |
233 |
init_tweaks(t, n); |
234 |
llel_crypt(dir, rounds, sched, d, t, &src, n); |
235 |
out_xor(d, t, &dst, n); |
236 |
break; |
237 |
} |
238 |
} |
239 |
|
240 |
void |
241 |
aesni_enc(int rounds, const uint8_t *restrict sched, const uint8_t *src, |
242 |
uint8_t *dst, const uint8_t *restrict iv) |
243 |
{ |
244 |
|
245 |
*(dquad*)dst = encdec(1, rounds, (dquad*)sched, iv == NULL ? |
246 |
*(dquad*)src : *(dquad*)src ^ movdqu(iv)); |
247 |
} |
248 |
|
249 |
void |
250 |
aesni_dec(int rounds, const uint8_t *restrict sched, const uint8_t *src, |
251 |
uint8_t *dst, const uint8_t *restrict iv) |
252 |
{ |
253 |
|
254 |
*(dquad*)dst = encdec(0, rounds, (dquad*)sched, *(dquad*)src); |
255 |
*(dquad*)dst = (iv == NULL) ? *(dquad*)dst : *(dquad*)dst ^ movdqu(iv); |
256 |
} |
257 |
|
258 |
void |
259 |
aesni_encrypt_cbc(int rounds, const void *restrict sched, size_t len, |
260 |
const uint8_t *src, uint8_t *dst, const uint8_t *restrict ivp) |
261 |
{ |
262 |
dquad iv; |
263 |
dquad *p, *q; |
264 |
|
265 |
len /= AES_BLOCK_LEN; |
266 |
iv = movdqu(ivp); |
267 |
p = (dquad*)src; |
268 |
q = (dquad*)dst; |
269 |
|
270 |
#define ENCRYPT_CBC_LOOP \ |
271 |
for (; len--; ++p, ++q) { \ |
272 |
iv = encdec(1, rounds, (dquad*)sched, *p ^ iv); \ |
273 |
*q = iv; \ |
274 |
} |
275 |
SW_ROUNDS(rounds, ENCRYPT_CBC_LOOP) |
276 |
} |
277 |
|
278 |
void |
279 |
aesni_decrypt_cbc(int rounds, const void *restrict sched, size_t len, |
280 |
const uint8_t *restrict dat, const uint8_t *restrict ivp) |
281 |
{ |
282 |
dquad iv; |
283 |
|
284 |
iv = movdqu(ivp); |
285 |
SW_ROUNDS(rounds, decrypt_cbc_X6(rounds, sched, len / AES_BLOCK_LEN, |
286 |
(dquad*)dat, iv)) |
287 |
} |
288 |
|
289 |
#define CRYPT_XEX_X6(dir) (\ |
290 |
crypt_xex_X6((dir), rounds, sched, len / AES_BLOCK_LEN, \ |
291 |
(dquad*)src, (dquad*)dst, \ |
292 |
encdec(1, rounds, xsched, movdqu(ivp))) \ |
293 |
) |
294 |
void |
295 |
aesni_encrypt_xts(int rounds, const void *restrict sched, |
296 |
const void *restrict xsched, size_t len, const uint8_t *src, uint8_t *dst, |
297 |
const uint8_t *restrict ivp) |
298 |
{ |
299 |
|
300 |
SW_ROUNDS(rounds, CRYPT_XEX_X6(1)) |
301 |
} |
302 |
|
303 |
void |
304 |
aesni_decrypt_xts(int rounds, const void *restrict sched, |
305 |
const void *restrict xsched, size_t len, const uint8_t *src, uint8_t *dst, |
306 |
const uint8_t *restrict ivp) |
307 |
{ |
308 |
|
309 |
SW_ROUNDS(rounds, CRYPT_XEX_X6(0)) |
310 |
} |
311 |
|