From 919a9566c32be82c52bb12b748702198e3509fb8 Mon Sep 17 00:00:00 2001 From: Alan Somers Date: Wed, 9 Feb 2022 17:32:30 +0000 Subject: [PATCH] nfs: Optionally workaround a bug in the VMWare ESXi NFS 4.1 client The ESXi NFS 4.1 client, for reasons of its own, will send a DESTROY_SESSION/CREATE_SESSION if it loses connectivity to the server for more than a few seconds. But it sets the wrong value for csa_sequence. RFC 5661 section 18.36.4 specifies that the CREATE_SESSION's csa_sequence should be the same as the preceding EXCHANGE_ID result's eir_sequenceid. But ESX uses something else instead, leading to the server returning NFSERR_SEQMISORDERED and the client retrying, ad infinitum. This commit adds a vfs.nfsd.esx_seqid_hack sysctl. When set, if the client's nii_domain is set to "vmware.com" during EXCHANGE_ID, then the server will ignore the value of csa_sequence during a subsequent CREATE_SESSION. CEWIN-1927 PR: 261291 --- sys/fs/nfs/nfsrvstate.h | 1 + sys/fs/nfsserver/nfs_nfsdserv.c | 25 +++++++++++++++++++++++++ sys/fs/nfsserver/nfs_nfsdstate.c | 8 +++++++- 3 files changed, 33 insertions(+), 1 deletion(-) diff --git a/sys/fs/nfs/nfsrvstate.h b/sys/fs/nfs/nfsrvstate.h index 427d5b132281..7991214ce1ee 100644 --- a/sys/fs/nfs/nfsrvstate.h +++ b/sys/fs/nfs/nfsrvstate.h @@ -116,6 +116,7 @@ struct nfsclient { struct nfssockreq lc_req; /* Callback info */ u_int32_t lc_flags; /* LCL_ flag bits */ u_char lc_verf[NFSX_VERF]; /* client verifier */ + bool lc_is_vmware; /* VMWare.com NFS client? */ u_char lc_id[1]; /* Malloc'd correct size */ }; diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c index ab02df2c4a46..bfff670ba4b7 100644 --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -4362,6 +4362,31 @@ nfsrvd_exchangeid(struct nfsrv_descript *nd, __unused int isdgram, goto nfsmout; } + /* + * Check if the client implementation is VMWare ESX + * XXX RFC 5661 section 18.35.3 explicitly tells us not to take any + * action based on this, but we've got a bug to workaround. + */ + clp->lc_is_vmware = false; + tl = nfsm_dissect(nd, sizeof(uint32_t)); + if (!tl) + goto no_implname; + uint32_t eia_client_impl_id = fxdr_unsigned(uint32_t, *tl); + if (eia_client_impl_id == 0) + goto no_implname; + tl = nfsm_dissect(nd, sizeof(uint32_t)); + if (!tl) + goto no_implname; + uint32_t nii_domain_len = fxdr_unsigned(uint32_t, *tl); + if (nii_domain_len <= 0) + goto no_implname; + s = nfsm_dissect(nd, nii_domain_len * sizeof(char)); + if (!s) + goto no_implname; + if (strncmp(s, "vmware.com", nii_domain_len) == 0) + clp->lc_is_vmware = true; + +no_implname: /* * nfsrv_setclient() does the actual work of adding it to the * client list. If there is no error, the structure has been diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index 47592087be4a..15f345bf456e 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -115,6 +115,11 @@ SYSCTL_INT(_vfs_nfsd, OID_AUTO, flexlinuxhack, CTLFLAG_RW, &nfsrv_flexlinuxhack, 0, "For Linux clients, hack around Flex File Layout bug"); +int nfsrv_esx_seqid_hack = 0; +SYSCTL_INT(_vfs_nfsd, OID_AUTO, esx_seqid_hack, CTLFLAG_RW, + &nfsrv_esx_seqid_hack, 0, + "Disregard a CREATE_SESSION request's sequence id for VMWare ESXi clients."); + /* * Hash lists for nfs V4. */ @@ -677,7 +682,8 @@ nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp, */ if (opflags & CLOPS_CONFIRM) { if ((nd->nd_flag & ND_NFSV41) != 0 && - clp->lc_confirm.lval[0] != confirm.lval[0]) + (!(nfsrv_esx_seqid_hack && clp->lc_is_vmware) && + (clp->lc_confirm.lval[0] != confirm.lval[0]))) error = NFSERR_SEQMISORDERED; else if ((nd->nd_flag & ND_NFSV41) == 0 && clp->lc_confirm.qval != confirm.qval) -- 2.35.1