FreeBSD Bugzilla – Attachment 183164 Details for
Bug 219645
max NFS I/O size is not tunable
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
make max NFS client I/O size tunable
maxiotune2.patch (text/plain), 9.57 KB, created by
Rick Macklem
on 2017-06-02 21:19:00 UTC
(
hide
)
Description:
make max NFS client I/O size tunable
Filename:
MIME Type:
Creator:
Rick Macklem
Created:
2017-06-02 21:19:00 UTC
Size:
9.57 KB
patch
obsolete
>--- sys/param.h.savbuf 2017-05-31 08:27:56.183945000 -0400 >+++ sys/param.h 2017-06-02 00:36:56.560864000 -0400 >@@ -244,9 +244,7 @@ > * Filesystems can of course request smaller chunks. Actual > * backing memory uses a chunk size of a page (PAGE_SIZE). > * The default value here can be overridden on a per-architecture >- * basis by defining it in <machine/param.h>. This should >- * probably be done to increase its value, when MAXBCACHEBUF is >- * defined as a larger value in <machine/param.h>. >+ * basis by defining it in <machine/param.h>. > * > * If you make BKVASIZE too small you risk seriously fragmenting > * the buffer KVM map which may slow things down a bit. If you >@@ -267,6 +265,14 @@ > #define BKVAMASK (BKVASIZE-1) > > /* >+ * This variable is tuned via vfs.maxbcachebuf and is set to the value of >+ * MAXBCACHEBUF by default. >+ */ >+#ifdef _KERNEL >+extern int maxbcachebuf; >+#endif >+ >+/* > * MAXPATHLEN defines the longest permissible path length after expanding > * symbolic links. It is used to allocate a temporary buffer from the buffer > * pool in which to do the name expansion, hence should be a power of two, >--- fs/nfs/nfsport.h.savbuf 2017-05-29 09:16:04.798728000 -0400 >+++ fs/nfs/nfsport.h 2017-05-29 09:16:22.245082000 -0400 >@@ -1028,7 +1028,7 @@ struct nfsreq { > }; > > #ifndef NFS_MAXBSIZE >-#define NFS_MAXBSIZE MAXBCACHEBUF >+#define NFS_MAXBSIZE (maxbcachebuf) > #endif > > /* >--- fs/nfs/nfsproto.h.savbuf 2017-05-29 10:33:01.858512000 -0400 >+++ fs/nfs/nfsproto.h 2017-05-29 10:35:18.036660000 -0400 >@@ -56,8 +56,22 @@ > #define NFS_MAXDGRAMDATA 16384 > #define NFS_MAXPATHLEN 1024 > #define NFS_MAXNAMLEN 255 >+/* >+ * Calculating the maximum XDR overhead for an NFS RPC isn't easy. >+ * NFS_MAXPKTHDR is antiquated and assumes AUTH_SYS over UDP. >+ * NFS_MAXXDR should be sufficient for all NFS versions over TCP. >+ * It includes: >+ * - Maximum RPC message header. It can include 2 400byte authenticators plus >+ * a machine name of unlimited length, although it is usually relatively >+ * small. >+ * - XDR overheads for the NFSv4 compound. This can include Owner and >+ * Owner_group strings, which are usually fairly small, but are allowed >+ * to be up to 1024 bytes each. >+ * 4096 is overkill, but should always be sufficient. >+ */ > #define NFS_MAXPKTHDR 404 >-#define NFS_MAXPACKET (NFS_SRVMAXIO + 2048) >+#define NFS_MAXXDR 4096 >+#define NFS_MAXPACKET (NFS_SRVMAXIO + NFS_MAXXDR) > #define NFS_MINPACKET 20 > #define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ > #define NFSV4_MINORVERSION 0 /* V4 Minor version */ >--- fs/nfs/nfs_commonkrpc.c.savbuf 2017-05-29 09:16:43.958766000 -0400 >+++ fs/nfs/nfs_commonkrpc.c 2017-05-31 09:11:03.622427000 -0400 >@@ -161,7 +161,7 @@ newnfs_connect(struct nfsmount *nmp, str > struct ucred *cred, NFSPROC_T *p, int callback_retry_mult) > { > int rcvreserve, sndreserve; >- int pktscale; >+ int pktscale, pktscalesav; > struct sockaddr *saddr; > struct ucred *origcred; > CLIENT *client; >@@ -210,6 +210,7 @@ newnfs_connect(struct nfsmount *nmp, str > pktscale = 2; > if (pktscale > 64) > pktscale = 64; >+ pktscalesav = pktscale; > /* > * soreserve() can fail if sb_max is too small, so shrink pktscale > * and try again if there is an error. >@@ -228,8 +229,12 @@ newnfs_connect(struct nfsmount *nmp, str > goto out; > } > do { >- if (error != 0 && pktscale > 2) >+ if (error != 0 && pktscale > 2) { >+ if (nmp != NULL && nrp->nr_sotype == SOCK_STREAM && >+ pktscale == pktscalesav) >+ printf("Consider increasing kern.ipc.maxsockbuf\n"); > pktscale--; >+ } > if (nrp->nr_sotype == SOCK_DGRAM) { > if (nmp != NULL) { > sndreserve = (NFS_MAXDGRAMDATA + NFS_MAXPKTHDR) * >@@ -243,15 +248,19 @@ newnfs_connect(struct nfsmount *nmp, str > if (nrp->nr_sotype != SOCK_STREAM) > panic("nfscon sotype"); > if (nmp != NULL) { >- sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + >+ sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR + > sizeof (u_int32_t)) * pktscale; >- rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + >+ rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR + > sizeof (u_int32_t)) * pktscale; > } else { > sndreserve = rcvreserve = 1024 * pktscale; > } > } > error = soreserve(so, sndreserve, rcvreserve); >+ if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM && >+ pktscale <= 2) >+ printf("Must increase kern.ipc.maxsockbuf or reduce" >+ " rsize, wsize\n"); > } while (error != 0 && pktscale > 2); > soclose(so); > if (error) { >--- fs/nfsclient/nfs_clrpcops.c.savbuf 2017-05-29 09:10:28.666042000 -0400 >+++ fs/nfsclient/nfs_clrpcops.c 2017-05-29 09:20:22.950181000 -0400 >@@ -4703,7 +4703,7 @@ nfsrpc_createsession(struct nfsmount *nm > struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, > NFSPROC_T *p) > { >- uint32_t crflags, *tl; >+ uint32_t crflags, maxval, *tl; > struct nfsrv_descript nfsd; > struct nfsrv_descript *nd = &nfsd; > int error, irdcnt; >@@ -4721,8 +4721,8 @@ nfsrpc_createsession(struct nfsmount *nm > /* Fill in fore channel attributes. */ > NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); > *tl++ = 0; /* Header pad size */ >- *tl++ = txdr_unsigned(100000); /* Max request size */ >- *tl++ = txdr_unsigned(100000); /* Max response size */ >+ *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */ >+ *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */ > *tl++ = txdr_unsigned(4096); /* Max response size cached */ > *tl++ = txdr_unsigned(20); /* Max operations */ > *tl++ = txdr_unsigned(64); /* Max slots */ >@@ -4769,7 +4769,26 @@ nfsrpc_createsession(struct nfsmount *nm > > /* Get the fore channel slot count. */ > NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); >- tl += 3; /* Skip the other counts. */ >+ tl++; /* Skip the header pad size. */ >+ >+ /* Make sure nm_wsize is small enough. */ >+ maxval = fxdr_unsigned(uint32_t, *tl++); >+ while (maxval < nmp->nm_wsize + NFS_MAXXDR) { >+ if (nmp->nm_wsize > 8096) >+ nmp->nm_wsize /= 2; >+ else >+ break; >+ } >+ >+ /* Make sure nm_rsize is small enough. */ >+ maxval = fxdr_unsigned(uint32_t, *tl++); >+ while (maxval < nmp->nm_rsize + NFS_MAXXDR) { >+ if (nmp->nm_rsize > 8096) >+ nmp->nm_rsize /= 2; >+ else >+ break; >+ } >+ > sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); > tl++; > sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); >--- kern/vfs_bio.c.savbuf 2017-05-29 09:22:28.311438000 -0400 >+++ kern/vfs_bio.c 2017-06-02 00:18:15.269994000 -0400 >@@ -131,6 +131,7 @@ static void bufkva_reclaim(vmem_t *, int > static void bufkva_free(struct buf *); > static int buf_import(void *, void **, int, int); > static void buf_release(void *, void **, int); >+static void maxbcachebuf_adjust(void); > > #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ > defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) >@@ -245,6 +246,9 @@ SYSCTL_LONG(_vfs, OID_AUTO, barrierwrite > SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD, > &unmapped_buf_allowed, 0, > "Permit the use of the unmapped i/o"); >+int maxbcachebuf = MAXBCACHEBUF; >+SYSCTL_INT(_vfs, OID_AUTO, maxbcachebuf, CTLFLAG_RDTUN, &maxbcachebuf, 0, >+ "Maximum size of a buffer cache block"); > > /* > * This lock synchronizes access to bd_request. >@@ -847,6 +851,27 @@ bd_wakeup(void) > } > > /* >+ * Adjust the maxbcachbuf tunable. >+ */ >+static void >+maxbcachebuf_adjust(void) >+{ >+ int i; >+ >+ /* >+ * maxbcachebuf must be a power of 2 >= MAXBSIZE. >+ */ >+ i = 2; >+ while (i * 2 <= maxbcachebuf) >+ i *= 2; >+ maxbcachebuf = i; >+ if (maxbcachebuf < MAXBSIZE) >+ maxbcachebuf = MAXBSIZE; >+ if (maxbcachebuf != MAXBCACHEBUF) >+ printf("maxbcachebuf=%d\n", maxbcachebuf); >+} >+ >+/* > * bd_speedup - speedup the buffer cache flushing code > */ > void >@@ -893,6 +918,7 @@ kern_vfs_bio_buffer_alloc(caddr_t v, lon > */ > physmem_est = physmem_est * (PAGE_SIZE / 1024); > >+ maxbcachebuf_adjust(); > /* > * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. > * For the first 64MB of ram nominally allocate sufficient buffers to >@@ -1003,7 +1029,8 @@ bufinit(void) > struct buf *bp; > int i; > >- CTASSERT(MAXBCACHEBUF >= MAXBSIZE); >+ KASSERT(maxbcachebuf >= MAXBSIZE, >+ ("maxbcachebuf must be >= MAXBSIZE\n")); > mtx_init(&bqlocks[QUEUE_DIRTY], "bufq dirty lock", NULL, MTX_DEF); > mtx_init(&bqlocks[QUEUE_EMPTY], "bufq empty lock", NULL, MTX_DEF); > for (i = QUEUE_CLEAN; i < QUEUE_CLEAN + CLEAN_QUEUES; i++) >@@ -1050,7 +1077,7 @@ bufinit(void) > * PAGE_SIZE. > */ > maxbufspace = (long)nbuf * BKVASIZE; >- hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10); >+ hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - maxbcachebuf * 10); > lobufspace = (hibufspace / 20) * 19; /* 95% */ > bufspacethresh = lobufspace + (hibufspace - lobufspace) / 2; > >@@ -1062,9 +1089,9 @@ bufinit(void) > * The lower 1 MiB limit is the historical upper limit for > * hirunningspace. > */ >- hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF), >+ hirunningspace = lmax(lmin(roundup(hibufspace / 64, maxbcachebuf), > 16 * 1024 * 1024), 1024 * 1024); >- lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF); >+ lorunningspace = roundup((hirunningspace * 2) / 3, maxbcachebuf); > > /* > * Limit the amount of malloc memory since it is wired permanently into >@@ -3484,9 +3511,9 @@ getblk(struct vnode *vp, daddr_t blkno, > KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, > ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); > ASSERT_VOP_LOCKED(vp, "getblk"); >- if (size > MAXBCACHEBUF) >- panic("getblk: size(%d) > MAXBCACHEBUF(%d)\n", size, >- MAXBCACHEBUF); >+ if (size > maxbcachebuf) >+ panic("getblk: size(%d) > maxbcachebuf(%d)\n", size, >+ maxbcachebuf); > if (!unmapped_buf_allowed) > flags &= ~(GB_UNMAPPED | GB_KVAALLOC); >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 219645
:
183045
|
183067
| 183164