FreeBSD Bugzilla – Attachment 149507 Details for
Bug 182610
[patch] arc4random(3): replace RC4 with ChaCha20, follow OpenBSD
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Updated patch
patch-arc4random-chacha.diff (text/plain), 27.17 KB, created by
David CARLIER
on 2014-11-17 10:19:41 UTC
(
hide
)
Description:
Updated patch
Filename:
MIME Type:
Creator:
David CARLIER
Created:
2014-11-17 10:19:41 UTC
Size:
27.17 KB
patch
obsolete
>diff --git a/lib/libc/gen/arc4random.c b/lib/libc/gen/arc4random.c >index 59c4f7f..33cfd1e 100644 >--- a/lib/libc/gen/arc4random.c >+++ b/lib/libc/gen/arc4random.c >@@ -3,6 +3,7 @@ > /* > * Copyright (c) 1996, David Mazieres <dm@uun.org> > * Copyright (c) 2008, Damien Miller <djm@openbsd.org> >+ * Copyright (c) 2013, Markus Friedl <markus@openbsd.org> > * > * Permission to use, copy, modify, and distribute this software for any > * purpose with or without fee is hereby granted, provided that the above >@@ -18,15 +19,7 @@ > */ > > /* >- * Arc4 random number generator for OpenBSD. >- * >- * This code is derived from section 17.1 of Applied Cryptography, >- * second edition, which describes a stream cipher allegedly >- * compatible with RSA Labs "RC4" cipher (the actual description of >- * which is a trade secret). The same algorithm is used as a stream >- * cipher called "arcfour" in Tatu Ylonen's ssh package. >- * >- * RC4 is a registered trademark of RSA Laboratories. >+ * ChaCha based random number generator for OpenBSD. > */ > > #include <sys/cdefs.h> >@@ -37,31 +30,33 @@ __FBSDID("$FreeBSD$"); > #include <limits.h> > #include <stdlib.h> > #include <unistd.h> >+#include <string.h> > #include <sys/types.h> > #include <sys/param.h> > #include <sys/sysctl.h> > #include <sys/time.h> >+#include <sys/mman.h> > #include <pthread.h> > > #include "libc_private.h" > #include "un-namespace.h" > >+#define KEYSTREAM_ONLY >+#include "chacha_private.h" >+ > #ifdef __GNUC__ > #define inline __inline > #else /* !__GNUC__ */ > #define inline > #endif /* !__GNUC__ */ > >-struct arc4_stream { >- u_int8_t i; >- u_int8_t j; >- u_int8_t s[256]; >-}; >- > static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; > > #define RANDOMDEV "/dev/random" >-#define KEYSIZE 128 >+#define KEYSZ 32 >+#define IVSZ 8 >+#define BLOCKSZ 64 >+#define RSBUFSZ (16 * BLOCKSZ) > #define _ARC4_LOCK() \ > do { \ > if (__isthreaded) \ >@@ -74,47 +69,62 @@ static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; > _pthread_mutex_unlock(&arc4random_mtx); \ > } while (0) > >-static int rs_initialized; >-static struct arc4_stream rs; >-static pid_t arc4_stir_pid; >-static int arc4_count; >+/* Marked INHERIT_ZERO, so zero'd out in fork children. */ >+static struct { >+ /* valid bytes at end of rs_buf */ >+ size_t rs_have; >+ /* bytes till reseed */ >+ size_t rs_count; >+} *rs; >+ >+/* Preserved in fork children */ >+static struct { >+ /* chacha context for random keystream */ >+ chacha_ctx ctx; >+ /* keystream blocks */ >+ u_char rs_buf[RSBUFSZ]; >+} *rsx; > > extern int __sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, > void *newp, size_t newlen); > >-static inline u_int8_t arc4_getbyte(void); >-static void arc4_stir(void); >+static inline void _rs_rekey(u_char *dat, size_t datlen); > > static inline void >-arc4_init(void) >+_rs_init(u_char *buf, size_t n) > { >- int n; >+ if (n < (KEYSZ+ IVSZ)) >+ return; > >- for (n = 0; n < 256; n++) >- rs.s[n] = n; >- rs.i = 0; >- rs.j = 0; >-} >+ if (rs == NULL) { >+ if ((rs = mmap(NULL, sizeof(*rs), PROT_READ|PROT_WRITE, >+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) >+ abort(); > >-static inline void >-arc4_addrandom(u_char *dat, int datlen) >-{ >- int n; >- u_int8_t si; >- >- rs.i--; >- for (n = 0; n < 256; n++) { >- rs.i = (rs.i + 1); >- si = rs.s[rs.i]; >- rs.j = (rs.j + si + dat[n % datlen]); >- rs.s[rs.i] = rs.s[rs.j]; >- rs.s[rs.j] = si; >+ if (minherit(rs, sizeof(*rs), INHERIT_ZERO) == -1) { >+ munmap(rs, sizeof(*rs)); >+ abort(); >+ } >+ >+ if ((rsx = mmap(NULL, sizeof(*rsx), PROT_READ|PROT_WRITE, >+ MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { >+ munmap(rs, sizeof(*rs)); >+ abort(); >+ } >+ >+ if (minherit(rsx, sizeof(*rsx), INHERIT_ZERO) == -1) { >+ munmap(rsx, sizeof(*rsx)); >+ munmap(rs, sizeof(*rs)); >+ abort(); >+ } > } >- rs.j = rs.i; >+ >+ chacha_keysetup(&rsx->ctx, buf, KEYSZ * 8, 0); >+ chacha_ivsetup(&rsx->ctx, buf + KEYSZ); > } > > static size_t >-arc4_sysctl(u_char *buf, size_t size) >+_rs_sysctl(u_char *buf, size_t size) > { > int mib[2]; > size_t len, done; >@@ -135,99 +145,139 @@ arc4_sysctl(u_char *buf, size_t size) > return (done); > } > >+static size_t >+arc4_sysctl(u_char *buf, size_t size) >+{ >+ return (_rs_sysctl(buf, size)); >+} >+ > static void >-arc4_stir(void) >+_rs_stir(void) > { >- int done, fd, i; > struct { > struct timeval tv; >- pid_t pid; >- u_char rnd[KEYSIZE]; >+ u_char rnd[KEYSZ + IVSZ]; > } rdat; >+ int done, fd; > >- if (!rs_initialized) { >- arc4_init(); >- rs_initialized = 1; >- } > done = 0; >- if (arc4_sysctl((u_char *)&rdat, KEYSIZE) == KEYSIZE) >+ if (_rs_sysctl((u_char *)&rdat, KEYSZ + IVSZ) == (KEYSZ + IVSZ)) > done = 1; >+ > if (!done) { > fd = _open(RANDOMDEV, O_RDONLY | O_CLOEXEC, 0); > if (fd >= 0) { >- if (_read(fd, &rdat, KEYSIZE) == KEYSIZE) >+ if (_read(fd, &rdat, (KEYSZ + IVSZ)) == (KEYSZ + IVSZ)) > done = 1; > (void)_close(fd); > } > } >+ > if (!done) { > (void)gettimeofday(&rdat.tv, NULL); >- rdat.pid = getpid(); > /* We'll just take whatever was on the stack too... */ > } > >- arc4_addrandom((u_char *)&rdat, KEYSIZE); >+ if (!rs) { >+ _rs_init((u_char *)&rdat, KEYSZ + IVSZ); >+ } else { >+ _rs_rekey((u_char *)&rdat, KEYSZ + IVSZ); >+ } > >- /* >- * Discard early keystream, as per recommendations in: >- * "(Not So) Random Shuffles of RC4" by Ilya Mironov. >- */ >- for (i = 0; i < 1024; i++) >- (void)arc4_getbyte(); >- arc4_count = 1600000; >+ memset((u_char *)&rdat, 0, sizeof(rdat)); >+ >+ /* invalidate rs_buf */ >+ rs->rs_have = 0; >+ memset(rsx->rs_buf, 0, RSBUFSZ); >+ >+ rs->rs_count = 1600000; > } > >-static void >-arc4_stir_if_needed(void) >+static inline void >+_rs_stir_if_needed(size_t len) > { >- pid_t pid = getpid(); >- >- if (arc4_count <= 0 || !rs_initialized || arc4_stir_pid != pid) { >- arc4_stir_pid = pid; >- arc4_stir(); >- } >+ if (!rs || rs->rs_count <= len) >+ _rs_stir(); >+ else >+ rs->rs_count -= len; > } > >-static inline u_int8_t >-arc4_getbyte(void) >+static inline void >+_rs_rekey(u_char *dat, size_t datlen) > { >- u_int8_t si, sj; >- >- rs.i = (rs.i + 1); >- si = rs.s[rs.i]; >- rs.j = (rs.j + si); >- sj = rs.s[rs.j]; >- rs.s[rs.i] = sj; >- rs.s[rs.j] = si; >- return (rs.s[(si + sj) & 0xff]); >+#ifndef KEYSTREAM_ONLY >+ memset(rsx->rs_buf, 0, RSBUFSZ); >+#endif >+ >+ /* fill rs_buf with the keystream */ >+ chacha_encrypt_bytes(&rsx->ctx, rsx->rs_buf, rsx->rs_buf, RSBUFSZ); >+ /* mix in optional user provided data */ >+ if (dat) { >+ size_t i, m; >+ >+ m = MIN(datlen, (KEYSZ + IVSZ)); >+ for (i = 0; i < m; i++) >+ rsx->rs_buf[i] ^= dat[i]; >+ } >+ /* immediatly reinit for backtracking resistance */ >+ _rs_init(rsx->rs_buf, (KEYSZ + IVSZ)); >+ memset(rsx->rs_buf, 0, (KEYSZ + IVSZ)); >+ rs->rs_have = (RSBUFSZ - KEYSZ - IVSZ); > } > >-static inline u_int32_t >-arc4_getword(void) >+static inline void >+_rs_random_buf(void *_buf, size_t n) > { >- u_int32_t val; >- val = arc4_getbyte() << 24; >- val |= arc4_getbyte() << 16; >- val |= arc4_getbyte() << 8; >- val |= arc4_getbyte(); >- return val; >+ u_char *buf = (u_char *)_buf; >+ u_char *keystream; >+ size_t m; >+ >+ _rs_stir_if_needed(n); >+ while (n > 0) { >+ if (rs->rs_have > 0) { >+ m = MIN(n, rs->rs_have); >+ keystream = (rsx->rs_buf + RSBUFSZ - rs->rs_have); >+ memcpy(buf, keystream, m); >+ memset(keystream, 0, m); >+ buf += m; >+ n -= m; >+ rs->rs_have -= m; >+ } >+ >+ if (rs->rs_have == 0) >+ _rs_rekey(NULL, 0); >+ } > } > >-void >-arc4random_stir(void) >+static inline void >+_rs_random_u32(u_int32_t *val) > { >- _ARC4_LOCK(); >- arc4_stir(); >- _ARC4_UNLOCK(); >+ u_char *keystream; >+ >+ _rs_stir_if_needed(sizeof(*val)); >+ if (rs->rs_have < sizeof(*val)) >+ _rs_rekey(NULL, 0); >+ keystream = (rsx->rs_buf + RSBUFSZ - rs->rs_have); >+ memcpy(val, keystream, sizeof(*val)); >+ memset(keystream, 0, sizeof(*val)); >+ rs->rs_have -= sizeof(*val); > } > > void > arc4random_addrandom(u_char *dat, int datlen) > { >+ int m; >+ > _ARC4_LOCK(); >- if (!rs_initialized) >- arc4_stir(); >- arc4_addrandom(dat, datlen); >+ if (!rs) >+ _rs_stir(); >+ >+ while (datlen > 0) { >+ m = MIN(datlen, (KEYSZ + IVSZ)); >+ _rs_rekey(dat, m); >+ dat += m; >+ datlen -= m; >+ } > _ARC4_UNLOCK(); > } > >@@ -235,10 +285,9 @@ u_int32_t > arc4random(void) > { > u_int32_t val; >+ > _ARC4_LOCK(); >- arc4_count -= 4; >- arc4_stir_if_needed(); >- val = arc4_getword(); >+ _rs_random_u32(&val); > _ARC4_UNLOCK(); > return val; > } >@@ -246,50 +295,44 @@ arc4random(void) > void > arc4random_buf(void *_buf, size_t n) > { >- u_char *buf = (u_char *)_buf; > _ARC4_LOCK(); >- arc4_stir_if_needed(); >- while (n--) { >- if (--arc4_count <= 0) >- arc4_stir(); >- buf[n] = arc4_getbyte(); >- } >+ _rs_random_buf(_buf, n); >+ _ARC4_UNLOCK(); >+} >+ >+void >+arc4random_stir(void) >+{ >+ _ARC4_LOCK(); >+ _rs_stir(); > _ARC4_UNLOCK(); > } > >-/* >- * Calculate a uniformly distributed random number less than upper_bound >- * avoiding "modulo bias". >- * >- * Uniformity is achieved by generating new random numbers until the one >- * returned is outside the range [0, 2**32 % upper_bound). This >- * guarantees the selected random number will be inside >- * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound) >- * after reduction modulo upper_bound. >- */ > u_int32_t > arc4random_uniform(u_int32_t upper_bound) > { > u_int32_t r, min; > > if (upper_bound < 2) >- return 0; >+ return (0); > > /* 2**32 % x == (2**32 - x) % x */ > min = -upper_bound % upper_bound; >+ > /* >- * This could theoretically loop forever but each retry has >+ * This could theorically loop forever but each retry has > * p > 0.5 (worst case, usually far better) of selecting a > * number inside the range we need, so it should rarely need > * to re-roll. > */ >+ > for (;;) { > r = arc4random(); > if (r >= min) > break; > } > >- return r % upper_bound; >+ return (r % upper_bound); > } > > #if 0 >diff --git a/lib/libc/gen/chacha_private.h b/lib/libc/gen/chacha_private.h >new file mode 100644 >index 0000000..0995fac >--- /dev/null >+++ b/lib/libc/gen/chacha_private.h >@@ -0,0 +1,233 @@ >+/* >+chacha-merged.c version 20080118 >+D.J. Bernstein >+Public domain. >+*/ >+ >+/* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */ >+ >+typedef unsigned char u8; >+typedef unsigned int u32; >+ >+typedef struct >+{ >+ u32 input[16]; /* could be compressed */ >+} chacha_ctx; >+ >+#define U8C(v) (v##U) >+#define U32C(v) (v##U) >+ >+#define U8V(v) ((u8)(v) & U8C(0xFF)) >+#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) >+ >+#define ROTL32(v, n) \ >+ (U32V((v) << (n)) | ((v) >> (32 - (n)))) >+ >+#define U8TO32_LITTLE(p) \ >+ (((u32)((p)[0])) | \ >+ ((u32)((p)[1]) << 8) | \ >+ ((u32)((p)[2]) << 16) | \ >+ ((u32)((p)[3]) << 24)) >+ >+#define U32TO8_LITTLE(p, v) \ >+ do { \ >+ (p)[0] = U8V((v)); \ >+ (p)[1] = U8V((v) >> 8); \ >+ (p)[2] = U8V((v) >> 16); \ >+ (p)[3] = U8V((v) >> 24); \ >+ } while (0) >+ >+#define ROTATE(v, c) (ROTL32(v, c)) >+#define XOR(v, w) ((v) ^ (w)) >+#define PLUS(v, w) (U32V((v) + (w))) >+#define PLUSONE(v) (PLUS((v), 1)) >+ >+#define QUARTERROUND(a, b, c, d) \ >+ a = PLUS(a, b); d = ROTATE(XOR(d, a), 16); \ >+ c = PLUS(c, d); b = ROTATE(XOR(b, c), 12); \ >+ a = PLUS(a, b); d = ROTATE(XOR(d, a), 8); \ >+ c = PLUS(c, d); b = ROTATE(XOR(b, c), 7); >+ >+static const char sigma[16] = "expand 32-byte k"; >+static const char tau[16] = "expand 16-byte k"; >+ >+static void >+chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits, u32 ivbits) >+{ >+ const char *constants; >+ >+ x->input[4] = U8TO32_LITTLE(k + 0); >+ x->input[5] = U8TO32_LITTLE(k + 4); >+ x->input[6] = U8TO32_LITTLE(k + 8); >+ x->input[7] = U8TO32_LITTLE(k + 12); >+ >+ if (kbits == 256) { /* recommended */ >+ k += 16; >+ constants = sigma; >+ } else { /* kbits == 128 */ >+ constants = tau; >+ } >+ >+ x->input[8] = U8TO32_LITTLE(k + 0); >+ x->input[9] = U8TO32_LITTLE(k + 4); >+ x->input[10] = U8TO32_LITTLE(k + 8); >+ x->input[11] = U8TO32_LITTLE(k + 12); >+ x->input[0] = U8TO32_LITTLE(constants + 0); >+ x->input[1] = U8TO32_LITTLE(constants + 4); >+ x->input[2] = U8TO32_LITTLE(constants + 8); >+ x->input[3] = U8TO32_LITTLE(constants + 12); >+} >+ >+static void >+chacha_ivsetup(chacha_ctx *x, const u8 *iv) >+{ >+ x->input[12] = 0; >+ x->input[13] = 0; >+ x->input[14] = U8TO32_LITTLE(iv + 0); >+ x->input[15] = U8TO32_LITTLE(iv + 4); >+} >+ >+static void >+chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes) >+{ >+ u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; >+ u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; >+ u8 *ctarget = NULL; >+ u8 tmp[64]; >+ u_int i; >+ >+ if (!bytes) >+ return; >+ >+ j0 = x->input[0]; >+ j1 = x->input[1]; >+ j2 = x->input[2]; >+ j3 = x->input[3]; >+ j4 = x->input[4]; >+ j5 = x->input[5]; >+ j6 = x->input[6]; >+ j7 = x->input[7]; >+ j8 = x->input[8]; >+ j9 = x->input[9]; >+ j10 = x->input[10]; >+ j11 = x->input[11]; >+ j12 = x->input[12]; >+ j13 = x->input[13]; >+ j14 = x->input[14]; >+ j15 = x->input[15]; >+ >+ for (;;) { >+ if (bytes < 64) { >+ for (i = 0; i < bytes; ++i) >+ tmp[i] = m[i]; >+ m = tmp; >+ ctarget = c; >+ c = tmp; >+ } >+ >+ x0 = j0; >+ x1 = j1; >+ x2 = j2; >+ x3 = j3; >+ x4 = j4; >+ x5 = j5; >+ x6 = j6; >+ x7 = j7; >+ x8 = j8; >+ x9 = j9; >+ x10 = j10; >+ x11 = j11; >+ x12 = j12; >+ x13 = j13; >+ x14 = j14; >+ x15 = j15; >+ >+ for (i = 20; i > 0; i -= 2) { >+ QUARTERROUND(x0, x4, x8, x12) >+ QUARTERROUND(x1, x5, x9, x13) >+ QUARTERROUND(x2, x6, x10, x14) >+ QUARTERROUND(x3, x7, x11, x15) >+ QUARTERROUND(x0, x5, x10, x15) >+ QUARTERROUND(x1, x6, x11, x12) >+ QUARTERROUND(x2, x7, x8, x13) >+ QUARTERROUND(x3, x4, x9, x14) >+ } >+ >+ x0 = PLUS(x0, j0); >+ x1 = PLUS(x1, j1); >+ x2 = PLUS(x2, j2); >+ x3 = PLUS(x3, j3); >+ x4 = PLUS(x4, j4); >+ x5 = PLUS(x5, j5); >+ x6 = PLUS(x6, j6); >+ x7 = PLUS(x7, j7); >+ x8 = PLUS(x8, j8); >+ x9 = PLUS(x9, j9); >+ x10 = PLUS(x10, j10); >+ x11 = PLUS(x11, j11); >+ x12 = PLUS(x12, j12); >+ x13 = PLUS(x13, j13); >+ x14 = PLUS(x14, j14); >+ x15 = PLUS(x15, j15); >+ >+#ifndef KEYSTREAM_ONLY >+ x0 = XOR(x0, U8TO32_LITTLE(m + 0)); >+ x1 = XOR(x1, U8TO32_LITTLE(m + 4)); >+ x2 = XOR(x2, U8TO32_LITTLE(m + 8)); >+ x3 = XOR(x3, U8TO32_LITTLE(m + 12)); >+ x4 = XOR(x4, U8TO32_LITTLE(m + 16)); >+ x5 = XOR(x5, U8TO32_LITTLE(m + 20)); >+ x6 = XOR(x6, U8TO32_LITTLE(m + 24)); >+ x7 = XOR(x7, U8TO32_LITTLE(m + 28)); >+ x8 = XOR(x8, U8TO32_LITTLE(m + 32)); >+ x9 = XOR(x9, U8TO32_LITTLE(m + 36)); >+ x10 = XOR(x10, U8TO32_LITTLE(m + 40)); >+ x11 = XOR(x11, U8TO32_LITTLE(m + 44)); >+ x12 = XOR(x12, U8TO32_LITTLE(m + 48)); >+ x13 = XOR(x13, U8TO32_LITTLE(m + 52)); >+ x14 = XOR(x14, U8TO32_LITTLE(m + 56)); >+ x15 = XOR(x15, U8TO32_LITTLE(m + 60)); >+#endif >+ >+ j12 = PLUSONE(j12); >+ >+ if (!j12) { >+ j13 = PLUSONE(j13); >+ /* stopping at 2^70 bytes per nonce is user responsability */ >+ } >+ >+ U32TO8_LITTLE(c + 0, x0); >+ U32TO8_LITTLE(c + 4, x1); >+ U32TO8_LITTLE(c + 8, x2); >+ U32TO8_LITTLE(c + 12, x3); >+ U32TO8_LITTLE(c + 16, x4); >+ U32TO8_LITTLE(c + 20, x5); >+ U32TO8_LITTLE(c + 24, x6); >+ U32TO8_LITTLE(c + 28, x7); >+ U32TO8_LITTLE(c + 32, x8); >+ U32TO8_LITTLE(c + 36, x9); >+ U32TO8_LITTLE(c + 40, x10); >+ U32TO8_LITTLE(c + 44, x11); >+ U32TO8_LITTLE(c + 48, x12); >+ U32TO8_LITTLE(c + 52, x13); >+ U32TO8_LITTLE(c + 56, x14); >+ U32TO8_LITTLE(c + 60, x15); >+ >+ if (bytes <= 64) { >+ if (bytes < 64) { >+ for (i = 0; i < bytes; ++i) >+ ctarget[i] = c[i]; >+ } >+ >+ x->input[12] = j12; >+ x->input[13] = j13; >+ return; >+ } >+ >+ bytes -= 64; >+ c += 64; >+#ifndef KEYSTREAM_ONLY >+ m += 64; >+#endif >+ } >+} >diff --git a/lib/libc/sys/minherit.2 b/lib/libc/sys/minherit.2 >index dc85d09..6075506 100644 >--- a/lib/libc/sys/minherit.2 >+++ b/lib/libc/sys/minherit.2 >@@ -91,6 +91,9 @@ it will no longer be shared in the parent > after the parent forks and there is no way to get the previous > shared-backing-store mapping without unmapping and remapping the address > space in the parent. >+.It Dv INHERIT_ZERO >+This option guarantees that a fork has >+zero'd memory mapping. > .El > .Sh RETURN VALUES > .Rv -std minherit >diff --git a/sys/crypto/chacha_private.h b/sys/crypto/chacha_private.h >new file mode 100644 >index 0000000..0995fac >--- /dev/null >+++ b/sys/crypto/chacha_private.h >@@ -0,0 +1,233 @@ >+/* >+chacha-merged.c version 20080118 >+D.J. Bernstein >+Public domain. >+*/ >+ >+/* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */ >+ >+typedef unsigned char u8; >+typedef unsigned int u32; >+ >+typedef struct >+{ >+ u32 input[16]; /* could be compressed */ >+} chacha_ctx; >+ >+#define U8C(v) (v##U) >+#define U32C(v) (v##U) >+ >+#define U8V(v) ((u8)(v) & U8C(0xFF)) >+#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) >+ >+#define ROTL32(v, n) \ >+ (U32V((v) << (n)) | ((v) >> (32 - (n)))) >+ >+#define U8TO32_LITTLE(p) \ >+ (((u32)((p)[0])) | \ >+ ((u32)((p)[1]) << 8) | \ >+ ((u32)((p)[2]) << 16) | \ >+ ((u32)((p)[3]) << 24)) >+ >+#define U32TO8_LITTLE(p, v) \ >+ do { \ >+ (p)[0] = U8V((v)); \ >+ (p)[1] = U8V((v) >> 8); \ >+ (p)[2] = U8V((v) >> 16); \ >+ (p)[3] = U8V((v) >> 24); \ >+ } while (0) >+ >+#define ROTATE(v, c) (ROTL32(v, c)) >+#define XOR(v, w) ((v) ^ (w)) >+#define PLUS(v, w) (U32V((v) + (w))) >+#define PLUSONE(v) (PLUS((v), 1)) >+ >+#define QUARTERROUND(a, b, c, d) \ >+ a = PLUS(a, b); d = ROTATE(XOR(d, a), 16); \ >+ c = PLUS(c, d); b = ROTATE(XOR(b, c), 12); \ >+ a = PLUS(a, b); d = ROTATE(XOR(d, a), 8); \ >+ c = PLUS(c, d); b = ROTATE(XOR(b, c), 7); >+ >+static const char sigma[16] = "expand 32-byte k"; >+static const char tau[16] = "expand 16-byte k"; >+ >+static void >+chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits, u32 ivbits) >+{ >+ const char *constants; >+ >+ x->input[4] = U8TO32_LITTLE(k + 0); >+ x->input[5] = U8TO32_LITTLE(k + 4); >+ x->input[6] = U8TO32_LITTLE(k + 8); >+ x->input[7] = U8TO32_LITTLE(k + 12); >+ >+ if (kbits == 256) { /* recommended */ >+ k += 16; >+ constants = sigma; >+ } else { /* kbits == 128 */ >+ constants = tau; >+ } >+ >+ x->input[8] = U8TO32_LITTLE(k + 0); >+ x->input[9] = U8TO32_LITTLE(k + 4); >+ x->input[10] = U8TO32_LITTLE(k + 8); >+ x->input[11] = U8TO32_LITTLE(k + 12); >+ x->input[0] = U8TO32_LITTLE(constants + 0); >+ x->input[1] = U8TO32_LITTLE(constants + 4); >+ x->input[2] = U8TO32_LITTLE(constants + 8); >+ x->input[3] = U8TO32_LITTLE(constants + 12); >+} >+ >+static void >+chacha_ivsetup(chacha_ctx *x, const u8 *iv) >+{ >+ x->input[12] = 0; >+ x->input[13] = 0; >+ x->input[14] = U8TO32_LITTLE(iv + 0); >+ x->input[15] = U8TO32_LITTLE(iv + 4); >+} >+ >+static void >+chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes) >+{ >+ u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; >+ u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; >+ u8 *ctarget = NULL; >+ u8 tmp[64]; >+ u_int i; >+ >+ if (!bytes) >+ return; >+ >+ j0 = x->input[0]; >+ j1 = x->input[1]; >+ j2 = x->input[2]; >+ j3 = x->input[3]; >+ j4 = x->input[4]; >+ j5 = x->input[5]; >+ j6 = x->input[6]; >+ j7 = x->input[7]; >+ j8 = x->input[8]; >+ j9 = x->input[9]; >+ j10 = x->input[10]; >+ j11 = x->input[11]; >+ j12 = x->input[12]; >+ j13 = x->input[13]; >+ j14 = x->input[14]; >+ j15 = x->input[15]; >+ >+ for (;;) { >+ if (bytes < 64) { >+ for (i = 0; i < bytes; ++i) >+ tmp[i] = m[i]; >+ m = tmp; >+ ctarget = c; >+ c = tmp; >+ } >+ >+ x0 = j0; >+ x1 = j1; >+ x2 = j2; >+ x3 = j3; >+ x4 = j4; >+ x5 = j5; >+ x6 = j6; >+ x7 = j7; >+ x8 = j8; >+ x9 = j9; >+ x10 = j10; >+ x11 = j11; >+ x12 = j12; >+ x13 = j13; >+ x14 = j14; >+ x15 = j15; >+ >+ for (i = 20; i > 0; i -= 2) { >+ QUARTERROUND(x0, x4, x8, x12) >+ QUARTERROUND(x1, x5, x9, x13) >+ QUARTERROUND(x2, x6, x10, x14) >+ QUARTERROUND(x3, x7, x11, x15) >+ QUARTERROUND(x0, x5, x10, x15) >+ QUARTERROUND(x1, x6, x11, x12) >+ QUARTERROUND(x2, x7, x8, x13) >+ QUARTERROUND(x3, x4, x9, x14) >+ } >+ >+ x0 = PLUS(x0, j0); >+ x1 = PLUS(x1, j1); >+ x2 = PLUS(x2, j2); >+ x3 = PLUS(x3, j3); >+ x4 = PLUS(x4, j4); >+ x5 = PLUS(x5, j5); >+ x6 = PLUS(x6, j6); >+ x7 = PLUS(x7, j7); >+ x8 = PLUS(x8, j8); >+ x9 = PLUS(x9, j9); >+ x10 = PLUS(x10, j10); >+ x11 = PLUS(x11, j11); >+ x12 = PLUS(x12, j12); >+ x13 = PLUS(x13, j13); >+ x14 = PLUS(x14, j14); >+ x15 = PLUS(x15, j15); >+ >+#ifndef KEYSTREAM_ONLY >+ x0 = XOR(x0, U8TO32_LITTLE(m + 0)); >+ x1 = XOR(x1, U8TO32_LITTLE(m + 4)); >+ x2 = XOR(x2, U8TO32_LITTLE(m + 8)); >+ x3 = XOR(x3, U8TO32_LITTLE(m + 12)); >+ x4 = XOR(x4, U8TO32_LITTLE(m + 16)); >+ x5 = XOR(x5, U8TO32_LITTLE(m + 20)); >+ x6 = XOR(x6, U8TO32_LITTLE(m + 24)); >+ x7 = XOR(x7, U8TO32_LITTLE(m + 28)); >+ x8 = XOR(x8, U8TO32_LITTLE(m + 32)); >+ x9 = XOR(x9, U8TO32_LITTLE(m + 36)); >+ x10 = XOR(x10, U8TO32_LITTLE(m + 40)); >+ x11 = XOR(x11, U8TO32_LITTLE(m + 44)); >+ x12 = XOR(x12, U8TO32_LITTLE(m + 48)); >+ x13 = XOR(x13, U8TO32_LITTLE(m + 52)); >+ x14 = XOR(x14, U8TO32_LITTLE(m + 56)); >+ x15 = XOR(x15, U8TO32_LITTLE(m + 60)); >+#endif >+ >+ j12 = PLUSONE(j12); >+ >+ if (!j12) { >+ j13 = PLUSONE(j13); >+ /* stopping at 2^70 bytes per nonce is user responsability */ >+ } >+ >+ U32TO8_LITTLE(c + 0, x0); >+ U32TO8_LITTLE(c + 4, x1); >+ U32TO8_LITTLE(c + 8, x2); >+ U32TO8_LITTLE(c + 12, x3); >+ U32TO8_LITTLE(c + 16, x4); >+ U32TO8_LITTLE(c + 20, x5); >+ U32TO8_LITTLE(c + 24, x6); >+ U32TO8_LITTLE(c + 28, x7); >+ U32TO8_LITTLE(c + 32, x8); >+ U32TO8_LITTLE(c + 36, x9); >+ U32TO8_LITTLE(c + 40, x10); >+ U32TO8_LITTLE(c + 44, x11); >+ U32TO8_LITTLE(c + 48, x12); >+ U32TO8_LITTLE(c + 52, x13); >+ U32TO8_LITTLE(c + 56, x14); >+ U32TO8_LITTLE(c + 60, x15); >+ >+ if (bytes <= 64) { >+ if (bytes < 64) { >+ for (i = 0; i < bytes; ++i) >+ ctarget[i] = c[i]; >+ } >+ >+ x->input[12] = j12; >+ x->input[13] = j13; >+ return; >+ } >+ >+ bytes -= 64; >+ c += 64; >+#ifndef KEYSTREAM_ONLY >+ m += 64; >+#endif >+ } >+} >diff --git a/sys/libkern/arc4random.c b/sys/libkern/arc4random.c >index 62ace2c..6b37704 100644 >--- a/sys/libkern/arc4random.c >+++ b/sys/libkern/arc4random.c >@@ -19,6 +19,10 @@ __FBSDID("$FreeBSD$"); > #include <sys/lock.h> > #include <sys/mutex.h> > #include <sys/time.h> >+#include <sys/systm.h> >+ >+#define KEYSTREAM_ONLY >+#include <crypto/chacha_private.h> > > #define ARC4_RESEED_BYTES 65536 > #define ARC4_RESEED_SECONDS 300 >@@ -26,65 +30,163 @@ __FBSDID("$FreeBSD$"); > > int arc4rand_iniseed_state = ARC4_ENTR_NONE; > >-static u_int8_t arc4_i, arc4_j; > static int arc4_numruns = 0; >-static u_int8_t arc4_sbox[256]; > static time_t arc4_t_reseed; > static struct mtx arc4_mtx; > >-static u_int8_t arc4_randbyte(void); >+#define KEYSZ 32 >+#define IVSZ 8 >+#define BLOCKSZ 64 >+#define RSBUFSZ (16*BLOCKSZ) >+ >+static int rs_initialized; >+static chacha_ctx rs; /* chacha context for random keystream */ >+/* keystream blocks */ >+static u_char rs_buf[RSBUFSZ]; >+static size_t rs_have; /* valid bytes at end of rs_buf */ >+static size_t rs_count; /* bytes till reseed */ >+ >+static __inline void _rs_rekey(u_char *dat, size_t datlen); >+static __inline void _rs_stir(int); >+ >+static __inline void >+_rs_init(u_char *buf, size_t n) >+{ >+ KASSERT(n >= (KEYSZ + IVSZ), ("_rs_init size too small")); >+ >+ chacha_keysetup(&rs, buf, (KEYSZ * 8), 0); >+ chacha_ivsetup(&rs, (buf + KEYSZ)); >+} >+ >+static void >+_rs_seed(u_char *buf, size_t n) >+{ >+ _rs_rekey(buf, n); >+ >+ /* reset rs_buf */ >+ rs_have = 0; >+ memset(rs_buf, 0, sizeof(rs_buf)); >+ >+ rs_count = 1600000; >+} > > static __inline void >-arc4_swap(u_int8_t *a, u_int8_t *b) >+_rs_stir_if_needed(size_t len) > { >- u_int8_t c; >+ if (!rs_initialized) { >+ _rs_init(rs_buf, (KEYSZ + IVSZ)); >+ rs_count = 1024 * 1024 * 1024; >+ rs_initialized = 1; >+ } else if (rs_count <= len) { >+ _rs_stir(0); >+ } else { >+ rs_count -= len; >+ } >+} > >- c = *a; >- *a = *b; >- *b = c; >-} >+static __inline void >+_rs_rekey(u_char *dat, size_t datlen) >+{ >+ size_t n, r; >+#ifndef KEYSTREAM_ONLY >+ memset(rs_buf, 0, RSBUFSZ); >+#endif >+ >+ chacha_encrypt_bytes(&rs, rs_buf, rs_buf, RSBUFSZ); >+ /* with user provided data, we fill a bit more */ >+ if (dat) { >+ r = MIN(datlen, (KEYSZ + IVSZ)); >+ for (n = 0; n < r; n++) >+ rs_buf[n] ^= dat[n]; >+ } >+ >+ /* backtracking resistance, we force the reinitialization */ >+ _rs_init(rs_buf, (KEYSZ + IVSZ)); >+ memset(rs_buf, 0, (KEYSZ + IVSZ)); >+ rs_have = (RSBUFSZ - KEYSZ - IVSZ); >+} >+ >+static __inline void >+_rs_random_buf(void *_buf, size_t n) >+{ >+ u_char *buf = (u_char *)_buf; >+ u_char *keystream; >+ size_t m; >+ >+ _rs_stir_if_needed(n); >+ while (n > 0) { >+ if (rs_have > 0) { >+ m = MIN(n, rs_have); >+ keystream = (rs_buf + RSBUFSZ - rs_have); >+ memcpy(buf, keystream, m); >+ memset(keystream, 0, m); >+ buf += m; >+ n -= m; >+ rs_have -= m; >+ } >+ >+ if (rs_have == 0) >+ _rs_rekey(NULL, 0); >+ } >+} >+ >+static __inline void >+_rs_random_u32(u_int32_t *val) >+{ >+ u_char *keystream; >+ >+ _rs_stir_if_needed(sizeof(*val)); >+ if (rs_have < sizeof(*val)) >+ _rs_rekey(NULL, 0); >+ keystream = (rs_buf + RSBUFSZ - rs_have); >+ memcpy(val, keystream, sizeof(*val)); >+ memset(keystream, 0, sizeof(*val)); >+ rs_have -= sizeof(*val); >+ return; >+} > > /* > * Stir our S-box. > */ > static void >-arc4_randomstir (void) >+_rs_stir(int lock) > { >- u_int8_t key[256]; >+ u_int8_t key[KEYSZ + IVSZ], *p; > int r, n; >- struct timeval tv_now; >+ struct timespec ts_now; > > /* > * XXX read_random() returns unsafe numbers if the entropy > * device is not loaded -- MarkM. > */ > r = read_random(key, ARC4_KEYBYTES); >- getmicrouptime(&tv_now); >- mtx_lock(&arc4_mtx); >+ nanotime(&ts_now); >+ >+ if (lock) >+ mtx_lock(&arc4_mtx); >+ > /* If r == 0 || -1, just use what was on the stack. */ > if (r > 0) { > for (n = r; n < sizeof(key); n++) > key[n] = key[n % r]; > } > >- for (n = 0; n < 256; n++) { >- arc4_j = (arc4_j + arc4_sbox[n] + key[n]) % 256; >- arc4_swap(&arc4_sbox[n], &arc4_sbox[arc4_j]); >- } >- arc4_i = arc4_j = 0; >+ /* >+ * Even if read_random does not provide some bytes >+ * we have at least the possibility to fill with some time value >+ */ >+ for (p = (u_int8_t *)&ts_now, n = 0; n < sizeof(ts_now); n++) >+ key[n] ^= p[n]; >+ >+ _rs_seed(key, sizeof(key)); > >- /* Reset for next reseed cycle. */ >- arc4_t_reseed = tv_now.tv_sec + ARC4_RESEED_SECONDS; >+ arc4_t_reseed = ts_now.tv_sec + ARC4_RESEED_SECONDS; > arc4_numruns = 0; > >- /* >- * Throw away the first N words of output, as suggested in the >- * paper "Weaknesses in the Key Scheduling Algorithm of RC4" >- * by Fluher, Mantin, and Shamir. (N = 256 in our case.) >- */ >- for (n = 0; n < 256*4; n++) >- arc4_randbyte(); >- mtx_unlock(&arc4_mtx); >+ if (lock) >+ mtx_unlock(&arc4_mtx); >+ >+ explicit_bzero(key, sizeof(key)); > } > > /* >@@ -93,12 +195,8 @@ arc4_randomstir (void) > static void > arc4_init(void) > { >- int n; >- > mtx_init(&arc4_mtx, "arc4_mtx", NULL, MTX_DEF); >- arc4_i = arc4_j = 0; >- for (n = 0; n < 256; n++) >- arc4_sbox[n] = (u_int8_t) n; >+ _rs_stir(1); > > arc4_t_reseed = 0; > } >@@ -106,43 +204,25 @@ arc4_init(void) > SYSINIT(arc4_init, SI_SUB_LOCK, SI_ORDER_ANY, arc4_init, NULL); > > /* >- * Generate a random byte. >- */ >-static u_int8_t >-arc4_randbyte(void) >-{ >- u_int8_t arc4_t; >- >- arc4_i = (arc4_i + 1) % 256; >- arc4_j = (arc4_j + arc4_sbox[arc4_i]) % 256; >- >- arc4_swap(&arc4_sbox[arc4_i], &arc4_sbox[arc4_j]); >- >- arc4_t = (arc4_sbox[arc4_i] + arc4_sbox[arc4_j]) % 256; >- return arc4_sbox[arc4_t]; >-} >- >-/* > * MPSAFE > */ > void > arc4rand(void *ptr, u_int len, int reseed) > { >- u_char *p; >- struct timeval tv; >+ struct timespec ts; > >- getmicrouptime(&tv); >+ nanotime(&ts); > if (atomic_cmpset_int(&arc4rand_iniseed_state, ARC4_ENTR_HAVE, >- ARC4_ENTR_SEED) || reseed || >- (arc4_numruns > ARC4_RESEED_BYTES) || >- (tv.tv_sec > arc4_t_reseed)) >- arc4_randomstir(); >+ ARC4_ENTR_SEED) || reseed || >+ (arc4_numruns > ARC4_RESEED_BYTES) || >+ (ts.tv_sec > arc4_t_reseed)) >+ _rs_stir(0); > > mtx_lock(&arc4_mtx); > arc4_numruns += len; >- p = ptr; >- while (len--) >- *p++ = arc4_randbyte(); >+ >+ _rs_random_buf(ptr, len); >+ > mtx_unlock(&arc4_mtx); > } > >@@ -150,7 +230,10 @@ uint32_t > arc4random(void) > { > uint32_t ret; >+ >+ mtx_lock(&arc4_mtx); >+ _rs_random_u32(&ret); >+ mtx_unlock(&arc4_mtx); > >- arc4rand(&ret, sizeof ret, 0); >- return ret; >+ return (ret); > }
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 182610
:
147975
|
147976
|
149068
|
149507
|
189567
|
196272
|
196337
|
196342