--- sys/param.h.savbuf 2017-05-31 08:27:56.183945000 -0400 +++ sys/param.h 2017-06-02 00:36:56.560864000 -0400 @@ -244,9 +244,7 @@ * Filesystems can of course request smaller chunks. Actual * backing memory uses a chunk size of a page (PAGE_SIZE). * The default value here can be overridden on a per-architecture - * basis by defining it in . This should - * probably be done to increase its value, when MAXBCACHEBUF is - * defined as a larger value in . + * basis by defining it in . * * If you make BKVASIZE too small you risk seriously fragmenting * the buffer KVM map which may slow things down a bit. If you @@ -267,6 +265,14 @@ #define BKVAMASK (BKVASIZE-1) /* + * This variable is tuned via vfs.maxbcachebuf and is set to the value of + * MAXBCACHEBUF by default. + */ +#ifdef _KERNEL +extern int maxbcachebuf; +#endif + +/* * MAXPATHLEN defines the longest permissible path length after expanding * symbolic links. It is used to allocate a temporary buffer from the buffer * pool in which to do the name expansion, hence should be a power of two, --- fs/nfs/nfsport.h.savbuf 2017-05-29 09:16:04.798728000 -0400 +++ fs/nfs/nfsport.h 2017-05-29 09:16:22.245082000 -0400 @@ -1028,7 +1028,7 @@ struct nfsreq { }; #ifndef NFS_MAXBSIZE -#define NFS_MAXBSIZE MAXBCACHEBUF +#define NFS_MAXBSIZE (maxbcachebuf) #endif /* --- fs/nfs/nfsproto.h.savbuf 2017-05-29 10:33:01.858512000 -0400 +++ fs/nfs/nfsproto.h 2017-05-29 10:35:18.036660000 -0400 @@ -56,8 +56,22 @@ #define NFS_MAXDGRAMDATA 16384 #define NFS_MAXPATHLEN 1024 #define NFS_MAXNAMLEN 255 +/* + * Calculating the maximum XDR overhead for an NFS RPC isn't easy. + * NFS_MAXPKTHDR is antiquated and assumes AUTH_SYS over UDP. + * NFS_MAXXDR should be sufficient for all NFS versions over TCP. + * It includes: + * - Maximum RPC message header. It can include 2 400byte authenticators plus + * a machine name of unlimited length, although it is usually relatively + * small. + * - XDR overheads for the NFSv4 compound. This can include Owner and + * Owner_group strings, which are usually fairly small, but are allowed + * to be up to 1024 bytes each. + * 4096 is overkill, but should always be sufficient. + */ #define NFS_MAXPKTHDR 404 -#define NFS_MAXPACKET (NFS_SRVMAXIO + 2048) +#define NFS_MAXXDR 4096 +#define NFS_MAXPACKET (NFS_SRVMAXIO + NFS_MAXXDR) #define NFS_MINPACKET 20 #define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ #define NFSV4_MINORVERSION 0 /* V4 Minor version */ --- fs/nfs/nfs_commonkrpc.c.savbuf 2017-05-29 09:16:43.958766000 -0400 +++ fs/nfs/nfs_commonkrpc.c 2017-05-31 09:11:03.622427000 -0400 @@ -161,7 +161,7 @@ newnfs_connect(struct nfsmount *nmp, str struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) { int rcvreserve, sndreserve; - int pktscale; + int pktscale, pktscalesav; struct sockaddr *saddr; struct ucred *origcred; CLIENT *client; @@ -210,6 +210,7 @@ newnfs_connect(struct nfsmount *nmp, str pktscale = 2; if (pktscale > 64) pktscale = 64; + pktscalesav = pktscale; /* * soreserve() can fail if sb_max is too small, so shrink pktscale * and try again if there is an error. @@ -228,8 +229,12 @@ newnfs_connect(struct nfsmount *nmp, str goto out; } do { - if (error != 0 && pktscale > 2) + if (error != 0 && pktscale > 2) { + if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM && + pktscale == pktscalesav) + printf("Consider increasing kern.ipc.maxsockbuf\n"); pktscale--; + } if (nrp->nr_sotype == SOCK_DGRAM) { if (nmp != NULL) { sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * @@ -243,15 +248,19 @@ newnfs_connect(struct nfsmount *nmp, str if (nrp->nr_sotype != SOCK_STREAM) panic("nfscon sotype"); if (nmp != NULL) { - sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + + sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; - rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + + rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR + sizeof (u_int32_t)) * pktscale; } else { sndreserve = rcvreserve = 1024 * pktscale; } } error = soreserve(so, sndreserve, rcvreserve); + if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM && + pktscale <= 2) + printf("Must increase kern.ipc.maxsockbuf or reduce" + " rsize, wsize\n"); } while (error != 0 && pktscale > 2); soclose(so); if (error) { --- fs/nfsclient/nfs_clrpcops.c.savbuf 2017-05-29 09:10:28.666042000 -0400 +++ fs/nfsclient/nfs_clrpcops.c 2017-05-29 09:20:22.950181000 -0400 @@ -4703,7 +4703,7 @@ nfsrpc_createsession(struct nfsmount *nm struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, NFSPROC_T *p) { - uint32_t crflags, *tl; + uint32_t crflags, maxval, *tl; struct nfsrv_descript nfsd; struct nfsrv_descript *nd = &nfsd; int error, irdcnt; @@ -4721,8 +4721,8 @@ nfsrpc_createsession(struct nfsmount *nm /* Fill in fore channel attributes. */ NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); *tl++ = 0; /* Header pad size */ - *tl++ = txdr_unsigned(100000); /* Max request size */ - *tl++ = txdr_unsigned(100000); /* Max response size */ + *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */ + *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */ *tl++ = txdr_unsigned(4096); /* Max response size cached */ *tl++ = txdr_unsigned(20); /* Max operations */ *tl++ = txdr_unsigned(64); /* Max slots */ @@ -4769,7 +4769,26 @@ nfsrpc_createsession(struct nfsmount *nm /* Get the fore channel slot count. */ NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); - tl += 3; /* Skip the other counts. */ + tl++; /* Skip the header pad size. */ + + /* Make sure nm_wsize is small enough. */ + maxval = fxdr_unsigned(uint32_t, *tl++); + while (maxval < nmp->nm_wsize + NFS_MAXXDR) { + if (nmp->nm_wsize > 8096) + nmp->nm_wsize /= 2; + else + break; + } + + /* Make sure nm_rsize is small enough. */ + maxval = fxdr_unsigned(uint32_t, *tl++); + while (maxval < nmp->nm_rsize + NFS_MAXXDR) { + if (nmp->nm_rsize > 8096) + nmp->nm_rsize /= 2; + else + break; + } + sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); tl++; sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); --- kern/vfs_bio.c.savbuf 2017-05-29 09:22:28.311438000 -0400 +++ kern/vfs_bio.c 2017-06-02 00:18:15.269994000 -0400 @@ -131,6 +131,7 @@ static void bufkva_reclaim(vmem_t *, int static void bufkva_free(struct buf *); static int buf_import(void *, void **, int, int); static void buf_release(void *, void **, int); +static void maxbcachebuf_adjust(void); #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) @@ -245,6 +246,9 @@ SYSCTL_LONG(_vfs, OID_AUTO, barrierwrite SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD, &unmapped_buf_allowed, 0, "Permit the use of the unmapped i/o"); +int maxbcachebuf = MAXBCACHEBUF; +SYSCTL_INT(_vfs, OID_AUTO, maxbcachebuf, CTLFLAG_RDTUN, &maxbcachebuf, 0, + "Maximum size of a buffer cache block"); /* * This lock synchronizes access to bd_request. @@ -847,6 +851,27 @@ bd_wakeup(void) } /* + * Adjust the maxbcachbuf tunable. + */ +static void +maxbcachebuf_adjust(void) +{ + int i; + + /* + * maxbcachebuf must be a power of 2 >= MAXBSIZE. + */ + i = 2; + while (i * 2 <= maxbcachebuf) + i *= 2; + maxbcachebuf = i; + if (maxbcachebuf < MAXBSIZE) + maxbcachebuf = MAXBSIZE; + if (maxbcachebuf != MAXBCACHEBUF) + printf("maxbcachebuf=%d\n", maxbcachebuf); +} + +/* * bd_speedup - speedup the buffer cache flushing code */ void @@ -893,6 +918,7 @@ kern_vfs_bio_buffer_alloc(caddr_t v, lon */ physmem_est = physmem_est * (PAGE_SIZE / 1024); + maxbcachebuf_adjust(); /* * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. * For the first 64MB of ram nominally allocate sufficient buffers to @@ -1003,7 +1029,8 @@ bufinit(void) struct buf *bp; int i; - CTASSERT(MAXBCACHEBUF >= MAXBSIZE); + KASSERT(maxbcachebuf >= MAXBSIZE, + ("maxbcachebuf must be >= MAXBSIZE\n")); mtx_init(&bqlocks[QUEUE_DIRTY], "bufq dirty lock", NULL, MTX_DEF); mtx_init(&bqlocks[QUEUE_EMPTY], "bufq empty lock", NULL, MTX_DEF); for (i = QUEUE_CLEAN; i < QUEUE_CLEAN + CLEAN_QUEUES; i++) @@ -1050,7 +1077,7 @@ bufinit(void) * PAGE_SIZE. */ maxbufspace = (long)nbuf * BKVASIZE; - hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10); + hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - maxbcachebuf * 10); lobufspace = (hibufspace / 20) * 19; /* 95% */ bufspacethresh = lobufspace + (hibufspace - lobufspace) / 2; @@ -1062,9 +1089,9 @@ bufinit(void) * The lower 1 MiB limit is the historical upper limit for * hirunningspace. */ - hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF), + hirunningspace = lmax(lmin(roundup(hibufspace / 64, maxbcachebuf), 16 * 1024 * 1024), 1024 * 1024); - lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF); + lorunningspace = roundup((hirunningspace * 2) / 3, maxbcachebuf); /* * Limit the amount of malloc memory since it is wired permanently into @@ -3484,9 +3511,9 @@ getblk(struct vnode *vp, daddr_t blkno, KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); ASSERT_VOP_LOCKED(vp, "getblk"); - if (size > MAXBCACHEBUF) - panic("getblk: size(%d) > MAXBCACHEBUF(%d)\n", size, - MAXBCACHEBUF); + if (size > maxbcachebuf) + panic("getblk: size(%d) > maxbcachebuf(%d)\n", size, + maxbcachebuf); if (!unmapped_buf_allowed) flags &= ~(GB_UNMAPPED | GB_KVAALLOC);