diff --git a/lib/libc/gen/arc4random.c b/lib/libc/gen/arc4random.c index 59c4f7f..33cfd1e 100644 --- a/lib/libc/gen/arc4random.c +++ b/lib/libc/gen/arc4random.c @@ -3,6 +3,7 @@ /* * Copyright (c) 1996, David Mazieres * Copyright (c) 2008, Damien Miller + * Copyright (c) 2013, Markus Friedl * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -18,15 +19,7 @@ */ /* - * Arc4 random number generator for OpenBSD. - * - * This code is derived from section 17.1 of Applied Cryptography, - * second edition, which describes a stream cipher allegedly - * compatible with RSA Labs "RC4" cipher (the actual description of - * which is a trade secret). The same algorithm is used as a stream - * cipher called "arcfour" in Tatu Ylonen's ssh package. - * - * RC4 is a registered trademark of RSA Laboratories. + * ChaCha based random number generator for OpenBSD. */ #include @@ -37,31 +30,33 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include +#include #include #include "libc_private.h" #include "un-namespace.h" +#define KEYSTREAM_ONLY +#include "chacha_private.h" + #ifdef __GNUC__ #define inline __inline #else /* !__GNUC__ */ #define inline #endif /* !__GNUC__ */ -struct arc4_stream { - u_int8_t i; - u_int8_t j; - u_int8_t s[256]; -}; - static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; #define RANDOMDEV "/dev/random" -#define KEYSIZE 128 +#define KEYSZ 32 +#define IVSZ 8 +#define BLOCKSZ 64 +#define RSBUFSZ (16 * BLOCKSZ) #define _ARC4_LOCK() \ do { \ if (__isthreaded) \ @@ -74,47 +69,62 @@ static pthread_mutex_t arc4random_mtx = PTHREAD_MUTEX_INITIALIZER; _pthread_mutex_unlock(&arc4random_mtx); \ } while (0) -static int rs_initialized; -static struct arc4_stream rs; -static pid_t arc4_stir_pid; -static int arc4_count; +/* Marked INHERIT_ZERO, so zero'd out in fork children. */ +static struct { + /* valid bytes at end of rs_buf */ + size_t rs_have; + /* bytes till reseed */ + size_t rs_count; +} *rs; + +/* Preserved in fork children */ +static struct { + /* chacha context for random keystream */ + chacha_ctx ctx; + /* keystream blocks */ + u_char rs_buf[RSBUFSZ]; +} *rsx; extern int __sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen); -static inline u_int8_t arc4_getbyte(void); -static void arc4_stir(void); +static inline void _rs_rekey(u_char *dat, size_t datlen); static inline void -arc4_init(void) +_rs_init(u_char *buf, size_t n) { - int n; + if (n < (KEYSZ+ IVSZ)) + return; - for (n = 0; n < 256; n++) - rs.s[n] = n; - rs.i = 0; - rs.j = 0; -} + if (rs == NULL) { + if ((rs = mmap(NULL, sizeof(*rs), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) + abort(); -static inline void -arc4_addrandom(u_char *dat, int datlen) -{ - int n; - u_int8_t si; - - rs.i--; - for (n = 0; n < 256; n++) { - rs.i = (rs.i + 1); - si = rs.s[rs.i]; - rs.j = (rs.j + si + dat[n % datlen]); - rs.s[rs.i] = rs.s[rs.j]; - rs.s[rs.j] = si; + if (minherit(rs, sizeof(*rs), INHERIT_ZERO) == -1) { + munmap(rs, sizeof(*rs)); + abort(); + } + + if ((rsx = mmap(NULL, sizeof(*rsx), PROT_READ|PROT_WRITE, + MAP_ANON|MAP_PRIVATE, -1, 0)) == MAP_FAILED) { + munmap(rs, sizeof(*rs)); + abort(); + } + + if (minherit(rsx, sizeof(*rsx), INHERIT_ZERO) == -1) { + munmap(rsx, sizeof(*rsx)); + munmap(rs, sizeof(*rs)); + abort(); + } } - rs.j = rs.i; + + chacha_keysetup(&rsx->ctx, buf, KEYSZ * 8, 0); + chacha_ivsetup(&rsx->ctx, buf + KEYSZ); } static size_t -arc4_sysctl(u_char *buf, size_t size) +_rs_sysctl(u_char *buf, size_t size) { int mib[2]; size_t len, done; @@ -135,99 +145,139 @@ arc4_sysctl(u_char *buf, size_t size) return (done); } +static size_t +arc4_sysctl(u_char *buf, size_t size) +{ + return (_rs_sysctl(buf, size)); +} + static void -arc4_stir(void) +_rs_stir(void) { - int done, fd, i; struct { struct timeval tv; - pid_t pid; - u_char rnd[KEYSIZE]; + u_char rnd[KEYSZ + IVSZ]; } rdat; + int done, fd; - if (!rs_initialized) { - arc4_init(); - rs_initialized = 1; - } done = 0; - if (arc4_sysctl((u_char *)&rdat, KEYSIZE) == KEYSIZE) + if (_rs_sysctl((u_char *)&rdat, KEYSZ + IVSZ) == (KEYSZ + IVSZ)) done = 1; + if (!done) { fd = _open(RANDOMDEV, O_RDONLY | O_CLOEXEC, 0); if (fd >= 0) { - if (_read(fd, &rdat, KEYSIZE) == KEYSIZE) + if (_read(fd, &rdat, (KEYSZ + IVSZ)) == (KEYSZ + IVSZ)) done = 1; (void)_close(fd); } } + if (!done) { (void)gettimeofday(&rdat.tv, NULL); - rdat.pid = getpid(); /* We'll just take whatever was on the stack too... */ } - arc4_addrandom((u_char *)&rdat, KEYSIZE); + if (!rs) { + _rs_init((u_char *)&rdat, KEYSZ + IVSZ); + } else { + _rs_rekey((u_char *)&rdat, KEYSZ + IVSZ); + } - /* - * Discard early keystream, as per recommendations in: - * "(Not So) Random Shuffles of RC4" by Ilya Mironov. - */ - for (i = 0; i < 1024; i++) - (void)arc4_getbyte(); - arc4_count = 1600000; + memset((u_char *)&rdat, 0, sizeof(rdat)); + + /* invalidate rs_buf */ + rs->rs_have = 0; + memset(rsx->rs_buf, 0, RSBUFSZ); + + rs->rs_count = 1600000; } -static void -arc4_stir_if_needed(void) +static inline void +_rs_stir_if_needed(size_t len) { - pid_t pid = getpid(); - - if (arc4_count <= 0 || !rs_initialized || arc4_stir_pid != pid) { - arc4_stir_pid = pid; - arc4_stir(); - } + if (!rs || rs->rs_count <= len) + _rs_stir(); + else + rs->rs_count -= len; } -static inline u_int8_t -arc4_getbyte(void) +static inline void +_rs_rekey(u_char *dat, size_t datlen) { - u_int8_t si, sj; - - rs.i = (rs.i + 1); - si = rs.s[rs.i]; - rs.j = (rs.j + si); - sj = rs.s[rs.j]; - rs.s[rs.i] = sj; - rs.s[rs.j] = si; - return (rs.s[(si + sj) & 0xff]); +#ifndef KEYSTREAM_ONLY + memset(rsx->rs_buf, 0, RSBUFSZ); +#endif + + /* fill rs_buf with the keystream */ + chacha_encrypt_bytes(&rsx->ctx, rsx->rs_buf, rsx->rs_buf, RSBUFSZ); + /* mix in optional user provided data */ + if (dat) { + size_t i, m; + + m = MIN(datlen, (KEYSZ + IVSZ)); + for (i = 0; i < m; i++) + rsx->rs_buf[i] ^= dat[i]; + } + /* immediatly reinit for backtracking resistance */ + _rs_init(rsx->rs_buf, (KEYSZ + IVSZ)); + memset(rsx->rs_buf, 0, (KEYSZ + IVSZ)); + rs->rs_have = (RSBUFSZ - KEYSZ - IVSZ); } -static inline u_int32_t -arc4_getword(void) +static inline void +_rs_random_buf(void *_buf, size_t n) { - u_int32_t val; - val = arc4_getbyte() << 24; - val |= arc4_getbyte() << 16; - val |= arc4_getbyte() << 8; - val |= arc4_getbyte(); - return val; + u_char *buf = (u_char *)_buf; + u_char *keystream; + size_t m; + + _rs_stir_if_needed(n); + while (n > 0) { + if (rs->rs_have > 0) { + m = MIN(n, rs->rs_have); + keystream = (rsx->rs_buf + RSBUFSZ - rs->rs_have); + memcpy(buf, keystream, m); + memset(keystream, 0, m); + buf += m; + n -= m; + rs->rs_have -= m; + } + + if (rs->rs_have == 0) + _rs_rekey(NULL, 0); + } } -void -arc4random_stir(void) +static inline void +_rs_random_u32(u_int32_t *val) { - _ARC4_LOCK(); - arc4_stir(); - _ARC4_UNLOCK(); + u_char *keystream; + + _rs_stir_if_needed(sizeof(*val)); + if (rs->rs_have < sizeof(*val)) + _rs_rekey(NULL, 0); + keystream = (rsx->rs_buf + RSBUFSZ - rs->rs_have); + memcpy(val, keystream, sizeof(*val)); + memset(keystream, 0, sizeof(*val)); + rs->rs_have -= sizeof(*val); } void arc4random_addrandom(u_char *dat, int datlen) { + int m; + _ARC4_LOCK(); - if (!rs_initialized) - arc4_stir(); - arc4_addrandom(dat, datlen); + if (!rs) + _rs_stir(); + + while (datlen > 0) { + m = MIN(datlen, (KEYSZ + IVSZ)); + _rs_rekey(dat, m); + dat += m; + datlen -= m; + } _ARC4_UNLOCK(); } @@ -235,10 +285,9 @@ u_int32_t arc4random(void) { u_int32_t val; + _ARC4_LOCK(); - arc4_count -= 4; - arc4_stir_if_needed(); - val = arc4_getword(); + _rs_random_u32(&val); _ARC4_UNLOCK(); return val; } @@ -246,50 +295,44 @@ arc4random(void) void arc4random_buf(void *_buf, size_t n) { - u_char *buf = (u_char *)_buf; _ARC4_LOCK(); - arc4_stir_if_needed(); - while (n--) { - if (--arc4_count <= 0) - arc4_stir(); - buf[n] = arc4_getbyte(); - } + _rs_random_buf(_buf, n); + _ARC4_UNLOCK(); +} + +void +arc4random_stir(void) +{ + _ARC4_LOCK(); + _rs_stir(); _ARC4_UNLOCK(); } -/* - * Calculate a uniformly distributed random number less than upper_bound - * avoiding "modulo bias". - * - * Uniformity is achieved by generating new random numbers until the one - * returned is outside the range [0, 2**32 % upper_bound). This - * guarantees the selected random number will be inside - * [2**32 % upper_bound, 2**32) which maps back to [0, upper_bound) - * after reduction modulo upper_bound. - */ u_int32_t arc4random_uniform(u_int32_t upper_bound) { u_int32_t r, min; if (upper_bound < 2) - return 0; + return (0); /* 2**32 % x == (2**32 - x) % x */ min = -upper_bound % upper_bound; + /* - * This could theoretically loop forever but each retry has + * This could theorically loop forever but each retry has * p > 0.5 (worst case, usually far better) of selecting a * number inside the range we need, so it should rarely need * to re-roll. */ + for (;;) { r = arc4random(); if (r >= min) break; } - return r % upper_bound; + return (r % upper_bound); } #if 0 diff --git a/lib/libc/gen/chacha_private.h b/lib/libc/gen/chacha_private.h new file mode 100644 index 0000000..0995fac --- /dev/null +++ b/lib/libc/gen/chacha_private.h @@ -0,0 +1,233 @@ +/* +chacha-merged.c version 20080118 +D.J. Bernstein +Public domain. +*/ + +/* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */ + +typedef unsigned char u8; +typedef unsigned int u32; + +typedef struct +{ + u32 input[16]; /* could be compressed */ +} chacha_ctx; + +#define U8C(v) (v##U) +#define U32C(v) (v##U) + +#define U8V(v) ((u8)(v) & U8C(0xFF)) +#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) + +#define ROTL32(v, n) \ + (U32V((v) << (n)) | ((v) >> (32 - (n)))) + +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0])) | \ + ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | \ + ((u32)((p)[3]) << 24)) + +#define U32TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v)); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + } while (0) + +#define ROTATE(v, c) (ROTL32(v, c)) +#define XOR(v, w) ((v) ^ (w)) +#define PLUS(v, w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v), 1)) + +#define QUARTERROUND(a, b, c, d) \ + a = PLUS(a, b); d = ROTATE(XOR(d, a), 16); \ + c = PLUS(c, d); b = ROTATE(XOR(b, c), 12); \ + a = PLUS(a, b); d = ROTATE(XOR(d, a), 8); \ + c = PLUS(c, d); b = ROTATE(XOR(b, c), 7); + +static const char sigma[16] = "expand 32-byte k"; +static const char tau[16] = "expand 16-byte k"; + +static void +chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits, u32 ivbits) +{ + const char *constants; + + x->input[4] = U8TO32_LITTLE(k + 0); + x->input[5] = U8TO32_LITTLE(k + 4); + x->input[6] = U8TO32_LITTLE(k + 8); + x->input[7] = U8TO32_LITTLE(k + 12); + + if (kbits == 256) { /* recommended */ + k += 16; + constants = sigma; + } else { /* kbits == 128 */ + constants = tau; + } + + x->input[8] = U8TO32_LITTLE(k + 0); + x->input[9] = U8TO32_LITTLE(k + 4); + x->input[10] = U8TO32_LITTLE(k + 8); + x->input[11] = U8TO32_LITTLE(k + 12); + x->input[0] = U8TO32_LITTLE(constants + 0); + x->input[1] = U8TO32_LITTLE(constants + 4); + x->input[2] = U8TO32_LITTLE(constants + 8); + x->input[3] = U8TO32_LITTLE(constants + 12); +} + +static void +chacha_ivsetup(chacha_ctx *x, const u8 *iv) +{ + x->input[12] = 0; + x->input[13] = 0; + x->input[14] = U8TO32_LITTLE(iv + 0); + x->input[15] = U8TO32_LITTLE(iv + 4); +} + +static void +chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes) +{ + u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + u8 *ctarget = NULL; + u8 tmp[64]; + u_int i; + + if (!bytes) + return; + + j0 = x->input[0]; + j1 = x->input[1]; + j2 = x->input[2]; + j3 = x->input[3]; + j4 = x->input[4]; + j5 = x->input[5]; + j6 = x->input[6]; + j7 = x->input[7]; + j8 = x->input[8]; + j9 = x->input[9]; + j10 = x->input[10]; + j11 = x->input[11]; + j12 = x->input[12]; + j13 = x->input[13]; + j14 = x->input[14]; + j15 = x->input[15]; + + for (;;) { + if (bytes < 64) { + for (i = 0; i < bytes; ++i) + tmp[i] = m[i]; + m = tmp; + ctarget = c; + c = tmp; + } + + x0 = j0; + x1 = j1; + x2 = j2; + x3 = j3; + x4 = j4; + x5 = j5; + x6 = j6; + x7 = j7; + x8 = j8; + x9 = j9; + x10 = j10; + x11 = j11; + x12 = j12; + x13 = j13; + x14 = j14; + x15 = j15; + + for (i = 20; i > 0; i -= 2) { + QUARTERROUND(x0, x4, x8, x12) + QUARTERROUND(x1, x5, x9, x13) + QUARTERROUND(x2, x6, x10, x14) + QUARTERROUND(x3, x7, x11, x15) + QUARTERROUND(x0, x5, x10, x15) + QUARTERROUND(x1, x6, x11, x12) + QUARTERROUND(x2, x7, x8, x13) + QUARTERROUND(x3, x4, x9, x14) + } + + x0 = PLUS(x0, j0); + x1 = PLUS(x1, j1); + x2 = PLUS(x2, j2); + x3 = PLUS(x3, j3); + x4 = PLUS(x4, j4); + x5 = PLUS(x5, j5); + x6 = PLUS(x6, j6); + x7 = PLUS(x7, j7); + x8 = PLUS(x8, j8); + x9 = PLUS(x9, j9); + x10 = PLUS(x10, j10); + x11 = PLUS(x11, j11); + x12 = PLUS(x12, j12); + x13 = PLUS(x13, j13); + x14 = PLUS(x14, j14); + x15 = PLUS(x15, j15); + +#ifndef KEYSTREAM_ONLY + x0 = XOR(x0, U8TO32_LITTLE(m + 0)); + x1 = XOR(x1, U8TO32_LITTLE(m + 4)); + x2 = XOR(x2, U8TO32_LITTLE(m + 8)); + x3 = XOR(x3, U8TO32_LITTLE(m + 12)); + x4 = XOR(x4, U8TO32_LITTLE(m + 16)); + x5 = XOR(x5, U8TO32_LITTLE(m + 20)); + x6 = XOR(x6, U8TO32_LITTLE(m + 24)); + x7 = XOR(x7, U8TO32_LITTLE(m + 28)); + x8 = XOR(x8, U8TO32_LITTLE(m + 32)); + x9 = XOR(x9, U8TO32_LITTLE(m + 36)); + x10 = XOR(x10, U8TO32_LITTLE(m + 40)); + x11 = XOR(x11, U8TO32_LITTLE(m + 44)); + x12 = XOR(x12, U8TO32_LITTLE(m + 48)); + x13 = XOR(x13, U8TO32_LITTLE(m + 52)); + x14 = XOR(x14, U8TO32_LITTLE(m + 56)); + x15 = XOR(x15, U8TO32_LITTLE(m + 60)); +#endif + + j12 = PLUSONE(j12); + + if (!j12) { + j13 = PLUSONE(j13); + /* stopping at 2^70 bytes per nonce is user responsability */ + } + + U32TO8_LITTLE(c + 0, x0); + U32TO8_LITTLE(c + 4, x1); + U32TO8_LITTLE(c + 8, x2); + U32TO8_LITTLE(c + 12, x3); + U32TO8_LITTLE(c + 16, x4); + U32TO8_LITTLE(c + 20, x5); + U32TO8_LITTLE(c + 24, x6); + U32TO8_LITTLE(c + 28, x7); + U32TO8_LITTLE(c + 32, x8); + U32TO8_LITTLE(c + 36, x9); + U32TO8_LITTLE(c + 40, x10); + U32TO8_LITTLE(c + 44, x11); + U32TO8_LITTLE(c + 48, x12); + U32TO8_LITTLE(c + 52, x13); + U32TO8_LITTLE(c + 56, x14); + U32TO8_LITTLE(c + 60, x15); + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0; i < bytes; ++i) + ctarget[i] = c[i]; + } + + x->input[12] = j12; + x->input[13] = j13; + return; + } + + bytes -= 64; + c += 64; +#ifndef KEYSTREAM_ONLY + m += 64; +#endif + } +} diff --git a/lib/libc/sys/minherit.2 b/lib/libc/sys/minherit.2 index dc85d09..6075506 100644 --- a/lib/libc/sys/minherit.2 +++ b/lib/libc/sys/minherit.2 @@ -91,6 +91,9 @@ it will no longer be shared in the parent after the parent forks and there is no way to get the previous shared-backing-store mapping without unmapping and remapping the address space in the parent. +.It Dv INHERIT_ZERO +This option guarantees that a fork has +zero'd memory mapping. .El .Sh RETURN VALUES .Rv -std minherit diff --git a/sys/crypto/chacha_private.h b/sys/crypto/chacha_private.h new file mode 100644 index 0000000..0995fac --- /dev/null +++ b/sys/crypto/chacha_private.h @@ -0,0 +1,233 @@ +/* +chacha-merged.c version 20080118 +D.J. Bernstein +Public domain. +*/ + +/* $OpenBSD: chacha_private.h,v 1.2 2013/10/04 07:02:27 djm Exp $ */ + +typedef unsigned char u8; +typedef unsigned int u32; + +typedef struct +{ + u32 input[16]; /* could be compressed */ +} chacha_ctx; + +#define U8C(v) (v##U) +#define U32C(v) (v##U) + +#define U8V(v) ((u8)(v) & U8C(0xFF)) +#define U32V(v) ((u32)(v) & U32C(0xFFFFFFFF)) + +#define ROTL32(v, n) \ + (U32V((v) << (n)) | ((v) >> (32 - (n)))) + +#define U8TO32_LITTLE(p) \ + (((u32)((p)[0])) | \ + ((u32)((p)[1]) << 8) | \ + ((u32)((p)[2]) << 16) | \ + ((u32)((p)[3]) << 24)) + +#define U32TO8_LITTLE(p, v) \ + do { \ + (p)[0] = U8V((v)); \ + (p)[1] = U8V((v) >> 8); \ + (p)[2] = U8V((v) >> 16); \ + (p)[3] = U8V((v) >> 24); \ + } while (0) + +#define ROTATE(v, c) (ROTL32(v, c)) +#define XOR(v, w) ((v) ^ (w)) +#define PLUS(v, w) (U32V((v) + (w))) +#define PLUSONE(v) (PLUS((v), 1)) + +#define QUARTERROUND(a, b, c, d) \ + a = PLUS(a, b); d = ROTATE(XOR(d, a), 16); \ + c = PLUS(c, d); b = ROTATE(XOR(b, c), 12); \ + a = PLUS(a, b); d = ROTATE(XOR(d, a), 8); \ + c = PLUS(c, d); b = ROTATE(XOR(b, c), 7); + +static const char sigma[16] = "expand 32-byte k"; +static const char tau[16] = "expand 16-byte k"; + +static void +chacha_keysetup(chacha_ctx *x, const u8 *k, u32 kbits, u32 ivbits) +{ + const char *constants; + + x->input[4] = U8TO32_LITTLE(k + 0); + x->input[5] = U8TO32_LITTLE(k + 4); + x->input[6] = U8TO32_LITTLE(k + 8); + x->input[7] = U8TO32_LITTLE(k + 12); + + if (kbits == 256) { /* recommended */ + k += 16; + constants = sigma; + } else { /* kbits == 128 */ + constants = tau; + } + + x->input[8] = U8TO32_LITTLE(k + 0); + x->input[9] = U8TO32_LITTLE(k + 4); + x->input[10] = U8TO32_LITTLE(k + 8); + x->input[11] = U8TO32_LITTLE(k + 12); + x->input[0] = U8TO32_LITTLE(constants + 0); + x->input[1] = U8TO32_LITTLE(constants + 4); + x->input[2] = U8TO32_LITTLE(constants + 8); + x->input[3] = U8TO32_LITTLE(constants + 12); +} + +static void +chacha_ivsetup(chacha_ctx *x, const u8 *iv) +{ + x->input[12] = 0; + x->input[13] = 0; + x->input[14] = U8TO32_LITTLE(iv + 0); + x->input[15] = U8TO32_LITTLE(iv + 4); +} + +static void +chacha_encrypt_bytes(chacha_ctx *x, const u8 *m, u8 *c, u32 bytes) +{ + u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + u32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; + u8 *ctarget = NULL; + u8 tmp[64]; + u_int i; + + if (!bytes) + return; + + j0 = x->input[0]; + j1 = x->input[1]; + j2 = x->input[2]; + j3 = x->input[3]; + j4 = x->input[4]; + j5 = x->input[5]; + j6 = x->input[6]; + j7 = x->input[7]; + j8 = x->input[8]; + j9 = x->input[9]; + j10 = x->input[10]; + j11 = x->input[11]; + j12 = x->input[12]; + j13 = x->input[13]; + j14 = x->input[14]; + j15 = x->input[15]; + + for (;;) { + if (bytes < 64) { + for (i = 0; i < bytes; ++i) + tmp[i] = m[i]; + m = tmp; + ctarget = c; + c = tmp; + } + + x0 = j0; + x1 = j1; + x2 = j2; + x3 = j3; + x4 = j4; + x5 = j5; + x6 = j6; + x7 = j7; + x8 = j8; + x9 = j9; + x10 = j10; + x11 = j11; + x12 = j12; + x13 = j13; + x14 = j14; + x15 = j15; + + for (i = 20; i > 0; i -= 2) { + QUARTERROUND(x0, x4, x8, x12) + QUARTERROUND(x1, x5, x9, x13) + QUARTERROUND(x2, x6, x10, x14) + QUARTERROUND(x3, x7, x11, x15) + QUARTERROUND(x0, x5, x10, x15) + QUARTERROUND(x1, x6, x11, x12) + QUARTERROUND(x2, x7, x8, x13) + QUARTERROUND(x3, x4, x9, x14) + } + + x0 = PLUS(x0, j0); + x1 = PLUS(x1, j1); + x2 = PLUS(x2, j2); + x3 = PLUS(x3, j3); + x4 = PLUS(x4, j4); + x5 = PLUS(x5, j5); + x6 = PLUS(x6, j6); + x7 = PLUS(x7, j7); + x8 = PLUS(x8, j8); + x9 = PLUS(x9, j9); + x10 = PLUS(x10, j10); + x11 = PLUS(x11, j11); + x12 = PLUS(x12, j12); + x13 = PLUS(x13, j13); + x14 = PLUS(x14, j14); + x15 = PLUS(x15, j15); + +#ifndef KEYSTREAM_ONLY + x0 = XOR(x0, U8TO32_LITTLE(m + 0)); + x1 = XOR(x1, U8TO32_LITTLE(m + 4)); + x2 = XOR(x2, U8TO32_LITTLE(m + 8)); + x3 = XOR(x3, U8TO32_LITTLE(m + 12)); + x4 = XOR(x4, U8TO32_LITTLE(m + 16)); + x5 = XOR(x5, U8TO32_LITTLE(m + 20)); + x6 = XOR(x6, U8TO32_LITTLE(m + 24)); + x7 = XOR(x7, U8TO32_LITTLE(m + 28)); + x8 = XOR(x8, U8TO32_LITTLE(m + 32)); + x9 = XOR(x9, U8TO32_LITTLE(m + 36)); + x10 = XOR(x10, U8TO32_LITTLE(m + 40)); + x11 = XOR(x11, U8TO32_LITTLE(m + 44)); + x12 = XOR(x12, U8TO32_LITTLE(m + 48)); + x13 = XOR(x13, U8TO32_LITTLE(m + 52)); + x14 = XOR(x14, U8TO32_LITTLE(m + 56)); + x15 = XOR(x15, U8TO32_LITTLE(m + 60)); +#endif + + j12 = PLUSONE(j12); + + if (!j12) { + j13 = PLUSONE(j13); + /* stopping at 2^70 bytes per nonce is user responsability */ + } + + U32TO8_LITTLE(c + 0, x0); + U32TO8_LITTLE(c + 4, x1); + U32TO8_LITTLE(c + 8, x2); + U32TO8_LITTLE(c + 12, x3); + U32TO8_LITTLE(c + 16, x4); + U32TO8_LITTLE(c + 20, x5); + U32TO8_LITTLE(c + 24, x6); + U32TO8_LITTLE(c + 28, x7); + U32TO8_LITTLE(c + 32, x8); + U32TO8_LITTLE(c + 36, x9); + U32TO8_LITTLE(c + 40, x10); + U32TO8_LITTLE(c + 44, x11); + U32TO8_LITTLE(c + 48, x12); + U32TO8_LITTLE(c + 52, x13); + U32TO8_LITTLE(c + 56, x14); + U32TO8_LITTLE(c + 60, x15); + + if (bytes <= 64) { + if (bytes < 64) { + for (i = 0; i < bytes; ++i) + ctarget[i] = c[i]; + } + + x->input[12] = j12; + x->input[13] = j13; + return; + } + + bytes -= 64; + c += 64; +#ifndef KEYSTREAM_ONLY + m += 64; +#endif + } +} diff --git a/sys/libkern/arc4random.c b/sys/libkern/arc4random.c index 62ace2c..99909d2 100644 --- a/sys/libkern/arc4random.c +++ b/sys/libkern/arc4random.c @@ -19,6 +19,10 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include + +#define KEYSTREAM_ONLY +#include #define ARC4_RESEED_BYTES 65536 #define ARC4_RESEED_SECONDS 300 @@ -26,65 +30,166 @@ __FBSDID("$FreeBSD$"); int arc4rand_iniseed_state = ARC4_ENTR_NONE; -static u_int8_t arc4_i, arc4_j; static int arc4_numruns = 0; -static u_int8_t arc4_sbox[256]; static time_t arc4_t_reseed; static struct mtx arc4_mtx; +#define KEYSZ 32 +#define IVSZ 8 +#define BLOCKSZ 64 +#define RSBUFSZ (16*BLOCKSZ) + +static int rs_initialized; +static chacha_ctx rs; /* chacha context for random keystream */ +/* keystream blocks */ +static u_char rs_buf[RSBUFSZ]; +static size_t rs_have; /* valid bytes at end of rs_buf */ +static size_t rs_count; /* bytes till reseed */ + static u_int8_t arc4_randbyte(void); +static __inline void _rs_rekey(u_char *dat, size_t datlen); +static __inline void _rs_stir(int); + +static __inline void +_rs_init(u_char *buf, size_t n) +{ + KASSERT(n >= (KEYSZ + IVSZ), ("_rs_init size too small")); + + chacha_keysetup(&rs, buf, (KEYSZ * 8), 0); + chacha_ivsetup(&rs, (buf + KEYSZ)); +} + +static void +_rs_seed(u_char *buf, size_t n) +{ + _rs_rekey(buf, n); + + /* reset rs_buf */ + rs_have = 0; + memset(rs_buf, 0, sizeof(rs_buf)); + + rs_count = 1600000; +} + +static __inline void +_rs_stir_if_needed(size_t len) +{ + if (!rs_initialized) { + _rs_init(rs_buf, (KEYSZ + IVSZ)); + rs_count = 1024 * 1024 * 1024; + rs_initialized = 1; + } else if (rs_count <= len) { + _rs_stir(0); + } else { + rs_count -= len; + } +} + static __inline void -arc4_swap(u_int8_t *a, u_int8_t *b) +_rs_rekey(u_char *dat, size_t datlen) { - u_int8_t c; + size_t n, r; +#ifndef KEYSTREAM_ONLY + memset(rs_buf, 0, RSBUFSZ); +#endif + + chacha_encrypt_bytes(&rs, rs_buf, rs_buf, RSBUFSZ); + /* with user provided data, we fill a bit more */ + if (dat) { + r = MIN(datlen, (KEYSZ + IVSZ)); + for (n = 0; n < r; n++) + rs_buf[n] ^= dat[n]; + } + + /* backtracking resistance, we force the reinitialization */ + _rs_init(rs_buf, (KEYSZ + IVSZ)); + memset(rs_buf, 0, (KEYSZ + IVSZ)); + rs_have = (RSBUFSZ - KEYSZ - IVSZ); +} + +static __inline void +_rs_random_buf(void *_buf, size_t n) +{ + u_char *buf = (u_char *)_buf; + u_char *keystream; + size_t m; + + _rs_stir_if_needed(n); + while (n > 0) { + if (rs_have > 0) { + m = MIN(n, rs_have); + keystream = (rs_buf + RSBUFSZ - rs_have); + memcpy(buf, keystream, m); + memset(keystream, 0, m); + buf += m; + n -= m; + rs_have -= m; + } + + if (rs_have == 0) + _rs_rekey(NULL, 0); + } +} - c = *a; - *a = *b; - *b = c; -} +static __inline void +_rs_random_u32(u_int32_t *val) +{ + u_char *keystream; + + _rs_stir_if_needed(sizeof(*val)); + if (rs_have < sizeof(*val)) + _rs_rekey(NULL, 0); + keystream = (rs_buf + RSBUFSZ - rs_have); + memcpy(val, keystream, sizeof(*val)); + memset(keystream, 0, sizeof(*val)); + rs_have -= sizeof(*val); + return; +} /* * Stir our S-box. */ static void -arc4_randomstir (void) +_rs_stir(int lock) { - u_int8_t key[256]; + u_int8_t key[KEYSZ + IVSZ], *p; int r, n; - struct timeval tv_now; + struct timespec ts_now; /* * XXX read_random() returns unsafe numbers if the entropy * device is not loaded -- MarkM. */ r = read_random(key, ARC4_KEYBYTES); - getmicrouptime(&tv_now); - mtx_lock(&arc4_mtx); + nanotime(&ts_now); + + if (lock) + mtx_lock(&arc4_mtx); + + _rs_random_buf(key, sizeof(key)); /* If r == 0 || -1, just use what was on the stack. */ if (r > 0) { for (n = r; n < sizeof(key); n++) key[n] = key[n % r]; } - for (n = 0; n < 256; n++) { - arc4_j = (arc4_j + arc4_sbox[n] + key[n]) % 256; - arc4_swap(&arc4_sbox[n], &arc4_sbox[arc4_j]); - } - arc4_i = arc4_j = 0; + /* + * Even if read_random does not provide some bytes + * we have at least the possibility to fill with some time value + */ + for (p = (u_int8_t *)&ts_now, n = 0; n < sizeof(ts_now); n++) + key[n] ^= p[n]; + + _rs_seed(key, sizeof(key)); - /* Reset for next reseed cycle. */ - arc4_t_reseed = tv_now.tv_sec + ARC4_RESEED_SECONDS; + arc4_t_reseed = ts_now.tv_sec + ARC4_RESEED_SECONDS; arc4_numruns = 0; - /* - * Throw away the first N words of output, as suggested in the - * paper "Weaknesses in the Key Scheduling Algorithm of RC4" - * by Fluher, Mantin, and Shamir. (N = 256 in our case.) - */ - for (n = 0; n < 256*4; n++) - arc4_randbyte(); - mtx_unlock(&arc4_mtx); + if (lock) + mtx_unlock(&arc4_mtx); + + explicit_bzero(key, sizeof(key)); } /* @@ -93,12 +198,8 @@ arc4_randomstir (void) static void arc4_init(void) { - int n; - mtx_init(&arc4_mtx, "arc4_mtx", NULL, MTX_DEF); - arc4_i = arc4_j = 0; - for (n = 0; n < 256; n++) - arc4_sbox[n] = (u_int8_t) n; + _rs_stir(1); arc4_t_reseed = 0; } @@ -106,43 +207,25 @@ arc4_init(void) SYSINIT(arc4_init, SI_SUB_LOCK, SI_ORDER_ANY, arc4_init, NULL); /* - * Generate a random byte. - */ -static u_int8_t -arc4_randbyte(void) -{ - u_int8_t arc4_t; - - arc4_i = (arc4_i + 1) % 256; - arc4_j = (arc4_j + arc4_sbox[arc4_i]) % 256; - - arc4_swap(&arc4_sbox[arc4_i], &arc4_sbox[arc4_j]); - - arc4_t = (arc4_sbox[arc4_i] + arc4_sbox[arc4_j]) % 256; - return arc4_sbox[arc4_t]; -} - -/* * MPSAFE */ void arc4rand(void *ptr, u_int len, int reseed) { - u_char *p; - struct timeval tv; + struct timespec ts; - getmicrouptime(&tv); + nanotime(&ts); if (atomic_cmpset_int(&arc4rand_iniseed_state, ARC4_ENTR_HAVE, - ARC4_ENTR_SEED) || reseed || - (arc4_numruns > ARC4_RESEED_BYTES) || - (tv.tv_sec > arc4_t_reseed)) - arc4_randomstir(); + ARC4_ENTR_SEED) || reseed || + (arc4_numruns > ARC4_RESEED_BYTES) || + (ts.tv_sec > arc4_t_reseed)) + _rs_stir(0); mtx_lock(&arc4_mtx); arc4_numruns += len; - p = ptr; - while (len--) - *p++ = arc4_randbyte(); + + _rs_random_buf(ptr, len); + mtx_unlock(&arc4_mtx); } @@ -150,7 +233,10 @@ uint32_t arc4random(void) { uint32_t ret; + + mtx_lock(&arc4_mtx); + _rs_random_u32(&ret); + mtx_unlock(&arc4_mtx); - arc4rand(&ret, sizeof ret, 0); - return ret; + return (ret); } diff --git a/sys/sys/mman.h b/sys/sys/mman.h index 53b656c..6d86d99 100644 --- a/sys/sys/mman.h +++ b/sys/sys/mman.h @@ -43,6 +43,7 @@ #define INHERIT_SHARE 0 #define INHERIT_COPY 1 #define INHERIT_NONE 2 +#define INHERIT_ZERO 3 #endif /* diff --git a/sys/vm/vm.h b/sys/vm/vm.h index d87495d..0eb7568 100644 --- a/sys/vm/vm.h +++ b/sys/vm/vm.h @@ -68,6 +68,7 @@ typedef char vm_inherit_t; /* inheritance codes */ #define VM_INHERIT_SHARE ((vm_inherit_t) 0) #define VM_INHERIT_COPY ((vm_inherit_t) 1) #define VM_INHERIT_NONE ((vm_inherit_t) 2) +#define VM_INHERIT_ZERO ((vm_inherit_t) 3) #define VM_INHERIT_DEFAULT VM_INHERIT_COPY typedef u_char vm_prot_t; /* protection codes */ diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index cfd9760..3b1fcae 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -2258,6 +2258,7 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, case VM_INHERIT_NONE: case VM_INHERIT_COPY: case VM_INHERIT_SHARE: + case VM_INHERIT_ZERO: break; default: return (KERN_INVALID_ARGUMENT); @@ -3390,6 +3391,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) break; case VM_INHERIT_COPY: + case VM_INHERIT_ZERO: /* * Clone the entry and link into the map. */ @@ -3407,8 +3409,9 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) vm_map_entry_link(new_map, new_map->header.prev, new_entry); vmspace_map_entry_forked(vm1, vm2, new_entry); - vm_map_copy_entry(old_map, new_map, old_entry, - new_entry, fork_charge); + if (old_entry->inheritance == VM_INHERIT_COPY) + vm_map_copy_entry(old_map, new_map, old_entry, + new_entry, fork_charge); break; } old_entry = old_entry->next;