View | Details | Raw Unified | Return to bug 170200 | Differences between
and this patch

Collapse All | Expand All

(-)conf/files.amd64 (-1 / +6 lines)
Lines 93-98 Link Here
93
	no-obj no-implicit-rule	before-depend				\
93
	no-obj no-implicit-rule	before-depend				\
94
	clean		"acpi_wakedata.h"
94
	clean		"acpi_wakedata.h"
95
#
95
#
96
aes_encdec.o			optional	aesni			\
97
	dependency	"$S/crypto/aesni/aes_encdec.c"			\
98
	compile-with	"clang ${CFLAGS:N-mno-sse} -msse -c ${.IMPSRC}"	\
99
	no-implicit-rule clean "aes_encdec.o"
100
#
96
amd64/amd64/amd64_mem.c		optional	mem
101
amd64/amd64/amd64_mem.c		optional	mem
97
#amd64/amd64/apic_vector.S	standard
102
#amd64/amd64/apic_vector.S	standard
98
amd64/amd64/atomic.c		standard
103
amd64/amd64/atomic.c		standard
Lines 131-137 Link Here
131
amd64/amd64/vm_machdep.c	standard
136
amd64/amd64/vm_machdep.c	standard
132
amd64/pci/pci_cfgreg.c		optional	pci
137
amd64/pci/pci_cfgreg.c		optional	pci
133
cddl/contrib/opensolaris/common/atomic/amd64/opensolaris_atomic.S	optional zfs compile-with "${ZFS_S}"
138
cddl/contrib/opensolaris/common/atomic/amd64/opensolaris_atomic.S	optional zfs compile-with "${ZFS_S}"
134
crypto/aesni/aesencdec_amd64.S	optional aesni
139
#crypto/aesni/aesencdec_amd64.S	optional aesni
135
crypto/aesni/aeskeys_amd64.S	optional aesni
140
crypto/aesni/aeskeys_amd64.S	optional aesni
136
crypto/aesni/aesni.c		optional aesni
141
crypto/aesni/aesni.c		optional aesni
137
crypto/aesni/aesni_wrap.c	optional aesni
142
crypto/aesni/aesni_wrap.c	optional aesni
(-)crypto/aesni/aesni.h (+9 lines)
Lines 94-99 Link Here
94
void aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
94
void aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
95
    const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]);
95
    const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]);
96
96
97
#ifdef __amd64__
98
void aesni_encrypt_xts(int rounds, const void *data_schedule,
99
    const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
100
    const uint8_t iv[AES_BLOCK_LEN]);
101
void aesni_decrypt_xts(int rounds, const void *data_schedule,
102
    const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
103
    const uint8_t iv[AES_BLOCK_LEN]);
104
#endif
105
97
int aesni_cipher_setup(struct aesni_session *ses,
106
int aesni_cipher_setup(struct aesni_session *ses,
98
    struct cryptoini *encini);
107
    struct cryptoini *encini);
99
int aesni_cipher_process(struct aesni_session *ses,
108
int aesni_cipher_process(struct aesni_session *ses,
(-)crypto/aesni/aes_encdec.c (+311 lines)
Line 0 Link Here
1
/*-
2
 * Copyright (c) 2012 Shane Nievera Tablizo
3
 * All rights reserved.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 *
14
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24
 * SUCH DAMAGE.
25
 *
26
 */
27
28
#include <sys/param.h>
29
#ifdef _KERNEL
30
#include <crypto/aesni/aesni.h>
31
#endif
32
33
#ifndef _KERNEL
34
#include "aes_encdec.h"
35
#endif
36
37
typedef uint64_t	dquad __attribute__((__vector_size__(16)));
38
typedef int		q_int __attribute__((__vector_size__(16)));
39
40
#define V4ZERO		(q_int){ 0, 0, 0, 0}
41
42
#define DEFASM(mn, out_t, in_t, in_spec)				\
43
	static inline out_t __attribute__((always_inline))		\
44
	mn ## _asm(out_t __a, in_t __b)					\
45
	{                                                               \
46
		__asm__(#mn " %1, %0" : "+&x" (__a) : in_spec (__b));	\
47
		return (__a);						\
48
	}
49
50
#define SW_ROUNDS(rounds, f)						\
51
	switch (rounds) {						\
52
		case AES128_ROUNDS: rounds = AES128_ROUNDS; f; break;	\
53
		case AES192_ROUNDS: rounds = AES192_ROUNDS; f; break;	\
54
		case AES256_ROUNDS: rounds = AES256_ROUNDS; f; break;	\
55
	}
56
57
DEFASM(psrad, dquad, int, "I")
58
DEFASM(aesenc, dquad, dquad, "x")
59
DEFASM(aesdec, dquad, dquad, "x")
60
DEFASM(aesenclast, dquad, dquad, "x")
61
DEFASM(aesdeclast, dquad, dquad, "x")
62
63
static inline dquad __attribute__((always_inline))
64
loadu_asm(const dquad *restrict p)
65
{
66
	dquad z;
67
68
	__asm__("movdqu	%1, %0" : "=x" (z) : "m" (*p));
69
70
	return (z);
71
}
72
73
#define loadu(x)		loadu_asm((x))
74
#define psrad(i,x)		psrad_asm((x), (i))
75
#define aesenc			aesenc_asm
76
#define aesdec			aesdec_asm
77
#define aesdeclast		aesdeclast_asm
78
#define aesenclast		aesenclast_asm
79
80
static inline dquad __attribute__((always_inline))
81
movdqu(const uint8_t *restrict p)
82
{
83
84
	return (loadu((dquad*)p));
85
}
86
87
static inline dquad __attribute((always_inline))
88
encdec(int dir, int rounds, const dquad *restrict sched, dquad t)
89
{
90
91
	t ^= *sched;
92
93
	for (; --rounds;)
94
		t = (dir ? aesenc : aesdec)(t, *++sched);
95
96
	return (dir ? aesenclast : aesdeclast)(t, *++sched);
97
}
98
#define CRYPT(x)	encdec(dir, rounds, (dquad*)sched, (x))
99
#define DEC(x)		encdec(0, rounds, (dquad*)sched, (x))
100
101
static inline void __attribute((always_inline))
102
decrypt_cbc_X6(int rounds, const dquad *restrict sched, size_t n,
103
    dquad *restrict src, dquad t0)
104
/* TODO:  Pipeline this without explicit vars. */
105
{
106
	dquad	 t1, t2, t3, t4, t5, t6;
107
	dquad	 da, db, dc, dd, de, df;
108
109
	size_t	 m;
110
	
111
	if (0 < (m = n / 6)) {
112
		da = t1 = src[0]; db = t2 = src[1]; dc = t3 = src[2];
113
		dd = t4 = src[3]; de = t5 = src[4]; df = t6 = src[5];
114
		for (;;) {
115
			da = DEC(da); db = DEC(db); dc = DEC(dc);
116
			dd = DEC(dd); de = DEC(de); df = DEC(df);
117
118
			*src = da ^ t0; *++src = db ^ t1; *++src = dc ^ t2;
119
			*++src = dd ^ t3; *++src = de ^ t4; *++src = df ^ t5;
120
121
			++src;
122
			t0 = t6;
123
			if (--m == 0)
124
				break;
125
			da = t1 = src[0]; db = t2 = src[1]; dc = t3 = src[2];
126
			dd = t4 = src[3]; de = t5 = src[4]; df = t6 = src[5];
127
		}
128
	}
129
	if (n % 2 == 1) {
130
		da = t1 = src[0];
131
		da = DEC(da);
132
		*src = da ^ t0;
133
		++src; --n;
134
		t0 = t1;
135
136
	}
137
	switch (n %= 6) {
138
		case 2:
139
			da = t1 = src[0]; db = t2 = src[1];
140
			da = DEC(da); db = DEC(db);
141
			*src = da ^ t0; *++src = db ^ t1;
142
			++src;
143
			t0 = t2;
144
			break;
145
		case 4:
146
			da = t1 = src[0]; db = t2 = src[1];
147
			dc = t3 = src[2]; dd = t4 = src[3];
148
			da = DEC(da); db = DEC(db); dc = DEC(dc); dd = DEC(dd);
149
			*src = da ^ t0; *++src = db ^ t1;
150
			*++src = dc ^ t2; *++src = dd ^ t3;
151
			break;
152
	}
153
}
154
155
/* #define GF128_GENERATOR	0x87 */
156
157
static inline dquad __attribute((always_inline))
158
GF128_shl(dquad z)
159
{
160
	dquad a, c;
161
162
	c = (dquad){ 0x87 /* GF128 generator */, 1 };
163
164
	a = psrad(31, __builtin_shufflevector((q_int)z, V4ZERO, 3, 3, 1, 1));
165
166
	return (z + z ^ a & c);
167
}
168
169
static inline void
170
init_tweaks(dquad *restrict ts, int n)
171
{
172
	int i;
173
174
	for (i = 0;  i < n - 1; ++i)
175
		ts[i+1] = GF128_shl(ts[i]);
176
}
177
178
static inline void
179
out_xor(dquad *restrict ds, dquad *restrict ts, dquad *restrict *dst, int n)
180
{
181
	int i;
182
183
	for (i = 0; i < n; ++i, ++*dst)
184
		**dst = ds[i] ^ ts[i];
185
}
186
187
static inline void
188
llel_crypt(int dir, int rounds, const dquad *restrict sched, dquad *restrict ds,
189
    dquad *restrict ts, const dquad **src, int n)
190
{
191
	int i;
192
193
	for (i = 0; i < n; ++i, ++*src)
194
		ds[i] = CRYPT( **src ^ ts[i]);
195
}
196
197
static inline void __attribute((always_inline))
198
crypt_xex_X6(int dir, int rounds, const dquad *restrict sched, size_t n,
199
    const dquad *src, dquad *dst, dquad t0)
200
{
201
	dquad	d[6];
202
	dquad	t[6];
203
	size_t	m;
204
205
	t[0] = t0;
206
207
	if (0 < (m = n / 6)) {
208
		init_tweaks(t, 6);
209
		for (;;) {
210
			llel_crypt(dir, rounds, sched, d, t, &src, 6);
211
			out_xor(d, t, &dst, 6);
212
			t[0] = GF128_shl(t[5]);
213
			if (--m == 0)
214
				break;
215
			init_tweaks(t, 6);
216
		}
217
218
	}
219
	if (n % 2 == 1) {
220
		*dst++ = *t ^ CRYPT(*t ^ *src++);
221
		*t = GF128_shl(*t);
222
		--n;
223
	}
224
	switch (n %= 6) {
225
	case 2:
226
		n = 2;
227
		init_tweaks(t, n);
228
		llel_crypt(dir, rounds, sched, d, t, &src, n);
229
		out_xor(d, t, &dst, n);
230
		break;
231
	case 4:
232
		n = 4;
233
		init_tweaks(t, n);
234
		llel_crypt(dir, rounds, sched, d, t, &src, n);
235
		out_xor(d, t, &dst, n);
236
		break;
237
	}
238
}
239
240
void
241
aesni_enc(int rounds, const uint8_t *restrict sched, const uint8_t *src,
242
    uint8_t *dst, const uint8_t *restrict iv)
243
{
244
245
	*(dquad*)dst = encdec(1, rounds, (dquad*)sched, iv == NULL ?
246
				*(dquad*)src : *(dquad*)src ^ movdqu(iv));
247
}
248
249
void
250
aesni_dec(int rounds, const uint8_t *restrict sched, const uint8_t *src,
251
    uint8_t *dst, const uint8_t *restrict iv)
252
{
253
254
	*(dquad*)dst = encdec(0, rounds, (dquad*)sched, *(dquad*)src);
255
	*(dquad*)dst = (iv == NULL) ? *(dquad*)dst : *(dquad*)dst ^ movdqu(iv);
256
}
257
258
void
259
aesni_encrypt_cbc(int rounds, const void *restrict sched, size_t len,
260
    const uint8_t *src, uint8_t *dst, const uint8_t *restrict ivp)
261
{
262
	dquad	 iv;
263
	dquad	*p, *q;
264
265
	len /= AES_BLOCK_LEN;
266
	iv = movdqu(ivp);
267
	p = (dquad*)src;
268
	q = (dquad*)dst;
269
270
#define	ENCRYPT_CBC_LOOP						\
271
	for (; len--; ++p, ++q) {					\
272
		iv = encdec(1, rounds, (dquad*)sched, *p ^ iv);		\
273
		*q = iv;						\
274
	}
275
	SW_ROUNDS(rounds, ENCRYPT_CBC_LOOP)
276
}
277
278
void
279
aesni_decrypt_cbc(int rounds, const void *restrict sched, size_t len,
280
    const uint8_t *restrict dat, const uint8_t *restrict ivp)
281
{
282
	dquad iv;
283
284
	iv = movdqu(ivp);
285
	SW_ROUNDS(rounds, decrypt_cbc_X6(rounds, sched, len / AES_BLOCK_LEN,
286
				(dquad*)dat, iv))
287
}
288
289
#define CRYPT_XEX_X6(dir)	(\
290
		crypt_xex_X6((dir), rounds, sched, len / AES_BLOCK_LEN,	\
291
			(dquad*)src, (dquad*)dst,			\
292
			encdec(1, rounds, xsched, movdqu(ivp)))		\
293
		)
294
void
295
aesni_encrypt_xts(int rounds, const void *restrict sched,
296
    const void *restrict xsched, size_t len, const uint8_t *src, uint8_t *dst,
297
    const uint8_t *restrict ivp)
298
{
299
300
	SW_ROUNDS(rounds, CRYPT_XEX_X6(1))
301
}
302
303
void
304
aesni_decrypt_xts(int rounds, const void *restrict sched,
305
    const void *restrict xsched, size_t len, const uint8_t *src, uint8_t *dst,
306
    const uint8_t *restrict ivp)
307
{
308
309
	SW_ROUNDS(rounds, CRYPT_XEX_X6(0))
310
}
311
(-)crypto/aesni/aesni_wrap.c (+4 lines)
Lines 37-42 Link Here
37
37
38
MALLOC_DECLARE(M_AESNI);
38
MALLOC_DECLARE(M_AESNI);
39
39
40
#ifndef __amd64__
40
void
41
void
41
aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
42
aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
42
    const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
43
    const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
Lines 53-58 Link Here
53
		to += AES_BLOCK_LEN;
54
		to += AES_BLOCK_LEN;
54
	}
55
	}
55
}
56
}
57
#endif
56
58
57
void
59
void
58
aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
60
aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
Lines 82-87 Link Here
82
	}
84
	}
83
}
85
}
84
86
87
#ifndef __amd64__
85
#define	AES_XTS_BLOCKSIZE	16
88
#define	AES_XTS_BLOCKSIZE	16
86
#define	AES_XTS_IVSIZE		8
89
#define	AES_XTS_IVSIZE		8
87
#define	AES_XTS_ALPHA		0x87	/* GF(2^128) generator polynomial */
90
#define	AES_XTS_ALPHA		0x87	/* GF(2^128) generator polynomial */
Lines 169-174 Link Here
169
	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
172
	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
170
	    iv, 0);
173
	    iv, 0);
171
}
174
}
175
#endif
172
176
173
static int
177
static int
174
aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
178
aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
(-)modules/aesni/Makefile (-1 / +13 lines)
Lines 4-10 Link Here
4
4
5
KMOD=	aesni
5
KMOD=	aesni
6
SRCS=	aesni.c aesni_wrap.c
6
SRCS=	aesni.c aesni_wrap.c
7
SRCS+=	aesencdec_${MACHINE_CPUARCH}.S aeskeys_${MACHINE_CPUARCH}.S
7
SRCS+=	aeskeys_${MACHINE_CPUARCH}.S
8
9
.if ${MACHINE_CPUARCH} == "i386"
10
SRCS+=	aesencdec_${MACHINE_CPUARCH}.S
11
.endif
12
13
.if ${MACHINE_CPUARCH} == "amd64"
14
AESNI_CFLAGS= $(CFLAGS:N-mno-sse) -msse
15
OBJS=	aes_encdec.o
16
aes_encdec.o:	aes_encdec.c
17
	clang $(AESNI_CFLAGS) -o $(.TARGET) -c $(.IMPSRC)
18
.endif
19
8
SRCS+=	device_if.h bus_if.h opt_bus.h cryptodev_if.h
20
SRCS+=	device_if.h bus_if.h opt_bus.h cryptodev_if.h
9
21
10
.include <bsd.kmod.mk>
22
.include <bsd.kmod.mk>
11
  3.917 sec, 2000000    aes crypts,      16 bytes,  8169394 byte/sec,    62.3 Mb/sec
23
  3.917 sec, 2000000    aes crypts,      16 bytes,  8169394 byte/sec,    62.3 Mb/sec
12
  3.957 sec, 2000000    xts crypts,      16 bytes,  8087413 byte/sec,    61.7 Mb/sec
24
  3.957 sec, 2000000    xts crypts,      16 bytes,  8087413 byte/sec,    61.7 Mb/sec
13
  3.925 sec, 2000000 aes192 crypts,      16 bytes,  8152586 byte/sec,    62.2 Mb/sec
25
  3.925 sec, 2000000 aes192 crypts,      16 bytes,  8152586 byte/sec,    62.2 Mb/sec
14
  3.929 sec, 2000000 aes256 crypts,      16 bytes,  8143737 byte/sec,    62.1 Mb/sec
26
  3.929 sec, 2000000 aes256 crypts,      16 bytes,  8143737 byte/sec,    62.1 Mb/sec
15
  3.988 sec, 2000000 xts256 crypts,      16 bytes,  8023624 byte/sec,    61.2 Mb/sec
27
  3.988 sec, 2000000 xts256 crypts,      16 bytes,  8023624 byte/sec,    61.2 Mb/sec
16
  5.309 sec, 2000000    aes crypts,     512 bytes, 192891714 byte/sec,  1471.6 Mb/sec
28
  5.309 sec, 2000000    aes crypts,     512 bytes, 192891714 byte/sec,  1471.6 Mb/sec
17
  4.804 sec, 2000000    xts crypts,     512 bytes, 213153973 byte/sec,  1626.2 Mb/sec
29
  4.804 sec, 2000000    xts crypts,     512 bytes, 213153973 byte/sec,  1626.2 Mb/sec
18
  5.515 sec, 2000000 aes192 crypts,     512 bytes, 185674185 byte/sec,  1416.6 Mb/sec
30
  5.515 sec, 2000000 aes192 crypts,     512 bytes, 185674185 byte/sec,  1416.6 Mb/sec
19
  5.734 sec, 2000000 aes256 crypts,     512 bytes, 178570712 byte/sec,  1362.4 Mb/sec
31
  5.734 sec, 2000000 aes256 crypts,     512 bytes, 178570712 byte/sec,  1362.4 Mb/sec
20
  4.881 sec, 2000000 xts256 crypts,     512 bytes, 209785511 byte/sec,  1600.5 Mb/sec
32
  4.881 sec, 2000000 xts256 crypts,     512 bytes, 209785511 byte/sec,  1600.5 Mb/sec
21
  1.505 sec,  200000    aes crypts,    4096 bytes, 544330149 byte/sec,  4152.9 Mb/sec
33
  1.505 sec,  200000    aes crypts,    4096 bytes, 544330149 byte/sec,  4152.9 Mb/sec
22
  0.990 sec,  200000    xts crypts,    4096 bytes, 827397858 byte/sec,  6312.5 Mb/sec
34
  0.990 sec,  200000    xts crypts,    4096 bytes, 827397858 byte/sec,  6312.5 Mb/sec
23
  1.662 sec,  200000 aes192 crypts,    4096 bytes, 492907535 byte/sec,  3760.6 Mb/sec
35
  1.662 sec,  200000 aes192 crypts,    4096 bytes, 492907535 byte/sec,  3760.6 Mb/sec
24
  1.816 sec,  200000 aes256 crypts,    4096 bytes, 451074247 byte/sec,  3441.4 Mb/sec
36
  1.816 sec,  200000 aes256 crypts,    4096 bytes, 451074247 byte/sec,  3441.4 Mb/sec
25
  1.081 sec,  200000 xts256 crypts,    4096 bytes, 757781786 byte/sec,  5781.4 Mb/sec
37
  1.081 sec,  200000 xts256 crypts,    4096 bytes, 757781786 byte/sec,  5781.4 Mb/sec

Return to bug 170200