FreeBSD Bugzilla – Attachment 183045 Details for
Bug 219645
max NFS I/O size is not tunable
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
make max NFS client I/O size tunable
maxiotune.patch (text/plain), 11.38 KB, created by
Rick Macklem
on 2017-05-29 22:51:08 UTC
(
hide
)
Description:
make max NFS client I/O size tunable
Filename:
MIME Type:
Creator:
Rick Macklem
Created:
2017-05-29 22:51:08 UTC
Size:
11.38 KB
patch
obsolete
>--- fs/nfs/nfsport.h.savbuf 2017-05-29 09:16:04.798728000 -0400 >+++ fs/nfs/nfsport.h 2017-05-29 09:16:22.245082000 -0400 >@@ -1028,7 +1028,7 @@ struct nfsreq { > }; > > #ifndef NFS_MAXBSIZE >-#define NFS_MAXBSIZE MAXBCACHEBUF >+#define NFS_MAXBSIZE (maxbcachebuf) > #endif > > /* >--- fs/nfs/nfs_commonkrpc.c.savbuf 2017-05-29 09:16:43.958766000 -0400 >+++ fs/nfs/nfs_commonkrpc.c 2017-05-29 09:52:25.924553000 -0400 >@@ -96,6 +96,7 @@ extern int nfscl_ticks; > extern void (*ncl_call_invalcaches)(struct vnode *); > extern int nfs_numnfscbd; > extern int nfscl_debuglevel; >+extern int maxbcachebuf; > > SVCPOOL *nfscbd_pool; > static int nfsrv_gsscallbackson = 0; >@@ -243,15 +244,17 @@ newnfs_connect(struct nfsmount *nmp, str > if (nrp->nr_sotype != SOCK_STREAM) > panic("nfscon sotype"); > if (nmp != NULL) { >- sndreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + >+ sndreserve = (NFS_MAXBSIZE + NFS_MAXXDR + > sizeof (u_int32_t)) * pktscale; >- rcvreserve = (NFS_MAXBSIZE + NFS_MAXPKTHDR + >+ rcvreserve = (NFS_MAXBSIZE + NFS_MAXXDR + > sizeof (u_int32_t)) * pktscale; > } else { > sndreserve = rcvreserve = 1024 * pktscale; > } > } > error = soreserve(so, sndreserve, rcvreserve); >+ if (error != 0 && nmp != NULL && nrp->nr_sotype == SOCK_STREAM) >+ printf("Consider increasing kern.ipc.maxsockbuf\n"); > } while (error != 0 && pktscale > 2); > soclose(so); > if (error) { >--- fs/nfsclient/nfs_clrpcops.c.savbuf 2017-05-29 09:10:28.666042000 -0400 >+++ fs/nfsclient/nfs_clrpcops.c 2017-05-29 09:20:22.950181000 -0400 >@@ -4703,7 +4703,7 @@ nfsrpc_createsession(struct nfsmount *nm > struct nfssockreq *nrp, uint32_t sequenceid, int mds, struct ucred *cred, > NFSPROC_T *p) > { >- uint32_t crflags, *tl; >+ uint32_t crflags, maxval, *tl; > struct nfsrv_descript nfsd; > struct nfsrv_descript *nd = &nfsd; > int error, irdcnt; >@@ -4721,8 +4721,8 @@ nfsrpc_createsession(struct nfsmount *nm > /* Fill in fore channel attributes. */ > NFSM_BUILD(tl, uint32_t *, 7 * NFSX_UNSIGNED); > *tl++ = 0; /* Header pad size */ >- *tl++ = txdr_unsigned(100000); /* Max request size */ >- *tl++ = txdr_unsigned(100000); /* Max response size */ >+ *tl++ = txdr_unsigned(nmp->nm_wsize + NFS_MAXXDR);/* Max request size */ >+ *tl++ = txdr_unsigned(nmp->nm_rsize + NFS_MAXXDR);/* Max reply size */ > *tl++ = txdr_unsigned(4096); /* Max response size cached */ > *tl++ = txdr_unsigned(20); /* Max operations */ > *tl++ = txdr_unsigned(64); /* Max slots */ >@@ -4769,7 +4769,26 @@ nfsrpc_createsession(struct nfsmount *nm > > /* Get the fore channel slot count. */ > NFSM_DISSECT(tl, uint32_t *, 7 * NFSX_UNSIGNED); >- tl += 3; /* Skip the other counts. */ >+ tl++; /* Skip the header pad size. */ >+ >+ /* Make sure nm_wsize is small enough. */ >+ maxval = fxdr_unsigned(uint32_t, *tl++); >+ while (maxval < nmp->nm_wsize + NFS_MAXXDR) { >+ if (nmp->nm_wsize > 8096) >+ nmp->nm_wsize /= 2; >+ else >+ break; >+ } >+ >+ /* Make sure nm_rsize is small enough. */ >+ maxval = fxdr_unsigned(uint32_t, *tl++); >+ while (maxval < nmp->nm_rsize + NFS_MAXXDR) { >+ if (nmp->nm_rsize > 8096) >+ nmp->nm_rsize /= 2; >+ else >+ break; >+ } >+ > sep->nfsess_maxcache = fxdr_unsigned(int, *tl++); > tl++; > sep->nfsess_foreslots = fxdr_unsigned(uint16_t, *tl++); >--- fs/nfsclient/nfs_clvfsops.c.savbuf 2017-05-29 09:31:15.357098000 -0400 >+++ fs/nfsclient/nfs_clvfsops.c 2017-05-29 09:31:30.524470000 -0400 >@@ -83,6 +83,7 @@ extern int nfscl_debuglevel; > extern enum nfsiod_state ncl_iodwant[NFS_MAXASYNCDAEMON]; > extern struct nfsmount *ncl_iodmount[NFS_MAXASYNCDAEMON]; > extern struct mtx ncl_iod_mutex; >+extern int maxbcachebuf; > NFSCLSTATEMUTEX; > > MALLOC_DEFINE(M_NEWNFSREQ, "newnfsclient_req", "NFS request header"); >--- kern/vfs_bio.c.savbuf 2017-05-29 09:22:28.311438000 -0400 >+++ kern/vfs_bio.c 2017-05-29 09:42:43.801181000 -0400 >@@ -131,6 +131,7 @@ static void bufkva_reclaim(vmem_t *, int > static void bufkva_free(struct buf *); > static int buf_import(void *, void **, int, int); > static void buf_release(void *, void **, int); >+static void maxbcachebuf_adjust(void); > > #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ > defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) >@@ -245,6 +246,10 @@ SYSCTL_LONG(_vfs, OID_AUTO, barrierwrite > SYSCTL_INT(_vfs, OID_AUTO, unmapped_buf_allowed, CTLFLAG_RD, > &unmapped_buf_allowed, 0, > "Permit the use of the unmapped i/o"); >+static int bkvasize = BKVASIZE; >+int maxbcachebuf = MAXBCACHEBUF; >+SYSCTL_INT(_vfs, OID_AUTO, maxbcachebuf, CTLFLAG_RDTUN, &maxbcachebuf, 0, >+ "Maximum size of a buffer cache block"); > > /* > * This lock synchronizes access to bd_request. >@@ -847,6 +852,33 @@ bd_wakeup(void) > } > > /* >+ * Adjust the maxbcachbuf and bkvasize tunables. >+ */ >+static void >+maxbcachebuf_adjust(void) >+{ >+ int i; >+ >+ /* >+ * maxbcachebuf must be a power of 2 >= MAXBSIZE. >+ * If it has been tuned, set bkvasize to maxbcachebuf / 2. >+ */ >+ i = 2; >+ while (i * 2 <= maxbcachebuf) >+ i *= 2; >+ maxbcachebuf = i; >+ if (maxbcachebuf < MAXBSIZE) >+ maxbcachebuf = MAXBSIZE; >+ if (maxbcachebuf != MAXBCACHEBUF && maxbcachebuf > MAXBSIZE) { >+ bkvasize = maxbcachebuf / 2; >+ if (bkvasize < BKVASIZE) >+ bkvasize = BKVASIZE; >+ } >+ if (maxbcachebuf != MAXBCACHEBUF || bkvasize != BKVASIZE) >+ printf("maxbcachebuf=%d bkvasize=%d\n", maxbcachebuf, bkvasize); >+} >+ >+/* > * bd_speedup - speedup the buffer cache flushing code > */ > void >@@ -893,8 +925,9 @@ kern_vfs_bio_buffer_alloc(caddr_t v, lon > */ > physmem_est = physmem_est * (PAGE_SIZE / 1024); > >+ maxbcachebuf_adjust(); > /* >- * The nominal buffer size (and minimum KVA allocation) is BKVASIZE. >+ * The nominal buffer size (and minimum KVA allocation) is bkvasize. > * For the first 64MB of ram nominally allocate sufficient buffers to > * cover 1/4 of our ram. Beyond the first 64MB allocate additional > * buffers to cover 1/10 of our ram over 64MB. When auto-sizing >@@ -904,7 +937,7 @@ kern_vfs_bio_buffer_alloc(caddr_t v, lon > * factor represents the 1/4 x ram conversion. > */ > if (nbuf == 0) { >- int factor = 4 * BKVASIZE / 1024; >+ int factor = 4 * bkvasize / 1024; > > nbuf = 50; > if (physmem_est > 4096) >@@ -914,14 +947,14 @@ kern_vfs_bio_buffer_alloc(caddr_t v, lon > nbuf += min((physmem_est - 65536) * 2 / (factor * 5), > 32 * 1024 * 1024 / (factor * 5)); > >- if (maxbcache && nbuf > maxbcache / BKVASIZE) >- nbuf = maxbcache / BKVASIZE; >+ if (maxbcache && nbuf > maxbcache / bkvasize) >+ nbuf = maxbcache / bkvasize; > tuned_nbuf = 1; > } else > tuned_nbuf = 0; > > /* XXX Avoid unsigned long overflows later on with maxbufspace. */ >- maxbuf = (LONG_MAX / 3) / BKVASIZE; >+ maxbuf = (LONG_MAX / 3) / bkvasize; > if (nbuf > maxbuf) { > if (!tuned_nbuf) > printf("Warning: nbufs lowered from %d to %ld\n", nbuf, >@@ -943,8 +976,8 @@ kern_vfs_bio_buffer_alloc(caddr_t v, lon > * with ample KVA space. > */ > if (bio_transient_maxcnt == 0 && unmapped_buf_allowed) { >- maxbuf_sz = maxbcache != 0 ? maxbcache : maxbuf * BKVASIZE; >- buf_sz = (long)nbuf * BKVASIZE; >+ maxbuf_sz = maxbcache != 0 ? maxbcache : maxbuf * bkvasize; >+ buf_sz = (long)nbuf * bkvasize; > if (buf_sz < maxbuf_sz / TRANSIENT_DENOM * > (TRANSIENT_DENOM - 1)) { > /* >@@ -973,7 +1006,7 @@ kern_vfs_bio_buffer_alloc(caddr_t v, lon > if (bio_transient_maxcnt > 1024) > bio_transient_maxcnt = 1024; > if (tuned_nbuf) >- nbuf = buf_sz / BKVASIZE; >+ nbuf = buf_sz / bkvasize; > } > > /* >@@ -1003,7 +1036,6 @@ bufinit(void) > struct buf *bp; > int i; > >- CTASSERT(MAXBCACHEBUF >= MAXBSIZE); > mtx_init(&bqlocks[QUEUE_DIRTY], "bufq dirty lock", NULL, MTX_DEF); > mtx_init(&bqlocks[QUEUE_EMPTY], "bufq empty lock", NULL, MTX_DEF); > for (i = QUEUE_CLEAN; i < QUEUE_CLEAN + CLEAN_QUEUES; i++) >@@ -1044,13 +1076,13 @@ bufinit(void) > * used by most other requests. The differential is required to > * ensure that metadata deadlocks don't occur. > * >- * maxbufspace is based on BKVASIZE. Allocating buffers larger then >+ * maxbufspace is based on bkvasize. Allocating buffers larger then > * this may result in KVM fragmentation which is not handled optimally > * by the system. XXX This is less true with vmem. We could use > * PAGE_SIZE. > */ >- maxbufspace = (long)nbuf * BKVASIZE; >- hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - MAXBCACHEBUF * 10); >+ maxbufspace = (long)nbuf * bkvasize; >+ hibufspace = lmax(3 * maxbufspace / 4, maxbufspace - maxbcachebuf * 10); > lobufspace = (hibufspace / 20) * 19; /* 95% */ > bufspacethresh = lobufspace + (hibufspace - lobufspace) / 2; > >@@ -1062,9 +1094,9 @@ bufinit(void) > * The lower 1 MiB limit is the historical upper limit for > * hirunningspace. > */ >- hirunningspace = lmax(lmin(roundup(hibufspace / 64, MAXBCACHEBUF), >+ hirunningspace = lmax(lmin(roundup(hibufspace / 64, maxbcachebuf), > 16 * 1024 * 1024), 1024 * 1024); >- lorunningspace = roundup((hirunningspace * 2) / 3, MAXBCACHEBUF); >+ lorunningspace = roundup((hirunningspace * 2) / 3, maxbcachebuf); > > /* > * Limit the amount of malloc memory since it is wired permanently into >@@ -1086,9 +1118,9 @@ bufinit(void) > * To support extreme low-memory systems, make sure hidirtybuffers > * cannot eat up all available buffer space. This occurs when our > * minimum cannot be met. We try to size hidirtybuffers to 3/4 our >- * buffer space assuming BKVASIZE'd buffers. >+ * buffer space assuming bkvasize'd buffers. > */ >- while ((long)hidirtybuffers * BKVASIZE > 3 * hibufspace / 4) { >+ while ((long)hidirtybuffers * bkvasize > 3 * hibufspace / 4) { > hidirtybuffers >>= 1; > } > lodirtybuffers = hidirtybuffers / 2; >@@ -2887,7 +2919,7 @@ getnewbuf_kva(struct buf *bp, int gbflag > if ((gbflags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_UNMAPPED) { > /* > * In order to keep fragmentation sane we only allocate kva >- * in BKVASIZE chunks. XXX with vmem we can do page size. >+ * in bkvasize chunks. XXX with vmem we can do page size. > */ > maxsize = (maxsize + BKVAMASK) & ~BKVAMASK; > >@@ -3484,9 +3516,9 @@ getblk(struct vnode *vp, daddr_t blkno, > KASSERT((flags & (GB_UNMAPPED | GB_KVAALLOC)) != GB_KVAALLOC, > ("GB_KVAALLOC only makes sense with GB_UNMAPPED")); > ASSERT_VOP_LOCKED(vp, "getblk"); >- if (size > MAXBCACHEBUF) >- panic("getblk: size(%d) > MAXBCACHEBUF(%d)\n", size, >- MAXBCACHEBUF); >+ if (size > maxbcachebuf) >+ panic("getblk: size(%d) > maxbcachebuf(%d)\n", size, >+ maxbcachebuf); > if (!unmapped_buf_allowed) > flags &= ~(GB_UNMAPPED | GB_KVAALLOC); > >--- fs/nfs/nfsproto.h.savbuf 2017-05-29 10:33:01.858512000 -0400 >+++ fs/nfs/nfsproto.h 2017-05-19 13:23:21.991290000 -0400 >@@ -56,8 +56,22 @@ > #define NFS_MAXDGRAMDATA 16384 > #define NFS_MAXPATHLEN 1024 > #define NFS_MAXNAMLEN 255 >+/* >+ * Calculating the maximum XDR overhead for an NFS RPC isn't easy. >+ * NFS_MAXPKTHDR is antiquated and assume AUTH_SYS over UDP. >+ * NFS_MAXXDR should be sufficient for all NFS versions over TCP. >+ * It includes: >+ * - Maximum RPC message header. It can include 2 400byte authenticators plus >+ * a machine name of unlimited length, although it is usually relatively >+ * small. >+ * - XDR overheads for the NFSv4 compound. This can include Owner and >+ * Owner_group strings, which are usually fairly small, but are allowed >+ * to be up to 1024 bytes each. >+ * 4096 is overkill, but should always be sufficient. >+ */ > #define NFS_MAXPKTHDR 404 >-#define NFS_MAXPACKET (NFS_SRVMAXIO + 2048) >+#define NFS_MAXXDR 4096 >+#define NFS_MAXPACKET (NFS_SRVMAXIO + NFS_MAXXDR) > #define NFS_MINPACKET 20 > #define NFS_FABLKSIZE 512 /* Size in bytes of a block wrt fa_blocks */ > #define NFSV4_MINORVERSION 0 /* V4 Minor version */
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 219645
:
183045
|
183067
|
183164