From e911fc4678f46820382a93634eabcd2f03734253 Mon Sep 17 00:00:00 2001 From: yuri Date: Sat, 2 Apr 2016 14:46:59 -0700 Subject: [PATCH 1/4] fix dropped control messages on uipc sockets There is the case when sendmsg(2) silently loses packets for AF_LOCAL domain when large packets with control part in them are sent. * Problem Description There is the watermark limit on sockbuf determined by net.local.stream.sendspace, default is 8192 bytes (field sockbuf.sb_hiwat). When sendmsg(2) sends large enough data (8K+ that hits this 8192 limit) with control message, sosend_generic will be cutting the message data into separate mbufs based on 'sbspace' (derived from the above-mentioned sb_hiwat limit) with adjustment for control message size as it sees it. This way it tries to make sure this sb_hiwat limit is enforced. However, down on uipc level control message is being further modified in two ways: unp_internalize modifies it into some 'internal' form, also unp_addsockcred function adds another control message when LOCAL_CREDS are requested by client. Both functions only increase control message size beyond its original size (seen by sosend_generic). So that the first final mbuf sent (concatenation of control and data) will always be larger than 'sbspace' limit that sosend_generic was cutting data for. There is also the function sbappendcontrol_locked. It checks the 'sbspace' limit again, and discards the packet when sbspace llimit is exceeded. Its result code is essentially ignored in uipc_send. I believe, sbappendcontrol_locked shouldn't be checking space at all, since packets are expected to be properly sized to begin with. But this won't be the right fix, since sizes would be exceeding the sbspace limit anyway. sosend_default is one level up over uipc level, and it doesn't know what uipc will do with control message. Therefore it can't know what the real adjustment for control message is needed (to properly cut data). It wrongly takes the original control size and this makes the first packet too large and discarded by sbappendcontrol_locked. * Patch synopsys - Added the new function into struct pr_usrreqs: int (*pru_finalizecontrol)(struct socket *so, int flags, struct mbuf **pcontrol); It is called by sosend_generic to update control message to its final form. - Removed 'sbspace' check from sbappendcontrol_locked. The only context it is called from is uipc_send, and all packet sizes are already conforming. - Fixed few wrong error codes relevant to this situation. [This patch is from: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=181741 but updated by Chris Torek to fit in a more recent FreeBSD kernel.] --- sys/kern/uipc_sockbuf.c | 21 ++++---------- sys/kern/uipc_socket.c | 20 ++++++++++++- sys/kern/uipc_usrreq.c | 77 ++++++++++++++++++++++++++++++++++--------------- sys/sys/protosw.h | 2 ++ sys/sys/sockbuf.h | 4 +-- 5 files changed, 81 insertions(+), 43 deletions(-) diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c index 0cdd43b..f22a3bf 100644 --- a/sys/kern/uipc_sockbuf.c +++ b/sys/kern/uipc_sockbuf.c @@ -876,23 +876,16 @@ sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, return (retval); } -int +void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) { - struct mbuf *m, *n, *mlast; - int space; + struct mbuf *m, *mlast; SOCKBUF_LOCK_ASSERT(sb); - if (control == 0) - panic("sbappendcontrol_locked"); - space = m_length(control, &n) + m_length(m0, NULL); - - if (space > sbspace(sb)) - return (0); m_clrprotoflags(m0); - n->m_next = m0; /* concatenate data to control */ + m_last(control)->m_next = m0; /* concatenate data to control */ SBLASTRECORDCHK(sb); @@ -906,18 +899,14 @@ sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, SBLASTMBUFCHK(sb); SBLASTRECORDCHK(sb); - return (1); } -int +void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) { - int retval; - SOCKBUF_LOCK(sb); - retval = sbappendcontrol_locked(sb, m0, control); + sbappendcontrol_locked(sb, m0, control); SOCKBUF_UNLOCK(sb); - return (retval); } /* diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index d873217..7a7cb30 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1061,6 +1061,11 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, KASSERT(so->so_proto->pr_flags & PR_ATOMIC, ("sosend_dgram: !PR_ATOMIC")); + if (so->so_proto->pr_usrreqs->pru_finalizecontrol && + (error = (*so->so_proto->pr_usrreqs->pru_finalizecontrol)(so, + flags, &control, td))) + goto out; + if (uio != NULL) resid = uio->uio_resid; else @@ -1125,10 +1130,14 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, * problem and need fixing. */ space = sbspace(&so->so_snd); + SOCKBUF_UNLOCK(&so->so_snd); if (flags & MSG_OOB) space += 1024; + if (clen > space) { + error = EMSGSIZE; + goto out; + } space -= clen; - SOCKBUF_UNLOCK(&so->so_snd); if (resid > space) { error = EMSGSIZE; goto out; @@ -1222,6 +1231,11 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, int clen = 0, error, dontroute; int atomic = sosendallatonce(so) || top; + if (so->so_proto->pr_usrreqs->pru_finalizecontrol && + (error = (*so->so_proto->pr_usrreqs->pru_finalizecontrol)(so, + flags, &control, td))) + goto out; + if (uio != NULL) resid = uio->uio_resid; else @@ -1314,6 +1328,10 @@ restart: goto restart; } SOCKBUF_UNLOCK(&so->so_snd); + if (clen > space) { + error = EMSGSIZE; + goto release; + } space -= clen; do { if (uio == NULL) { diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 4e5aa88..0380fd8 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -290,7 +290,7 @@ static int unp_internalize(struct mbuf **, struct thread *); static void unp_internalize_fp(struct file *); static int unp_externalize(struct mbuf *, struct mbuf **, int); static int unp_externalize_fp(struct file *); -static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); +static int unp_addsockcred(struct mbuf **, struct thread *); static void unp_process_defers(void * __unused, int); /* @@ -789,6 +789,43 @@ uipc_peeraddr(struct socket *so, struct sockaddr **nam) } static int +uipc_finalizecontrol(struct socket *so, int flags, struct mbuf **pcontrol, + struct thread *td) +{ + struct unpcb *unp, *unp2; + int error = 0; + + unp = sotounpcb(so); + KASSERT(unp != NULL, ("uipc_finalizecontrol: unp == NULL")); + + unp2 = unp->unp_conn; + + if (*pcontrol != NULL && (error = unp_internalize(pcontrol, td))) + return (error); + + /* Lockless read, ignore when not connected. */ + if (unp2 && unp2->unp_flags & UNP_WANTCRED) { + switch (so->so_type) { + case SOCK_SEQPACKET: + case SOCK_STREAM: + /* Credentials are passed only once on streams */ + UNP_PCB_LOCK(unp2); + if (unp2->unp_flags & UNP_WANTCRED) { + unp2->unp_flags &= ~UNP_WANTCRED; + error = unp_addsockcred(pcontrol, td); + } + UNP_PCB_UNLOCK(unp2); + break; + case SOCK_DGRAM: + error = unp_addsockcred(pcontrol, td); + break; + } + } + + return (error); +} + +static int uipc_rcvd(struct socket *so, int flags) { struct unpcb *unp, *unp2; @@ -857,8 +894,6 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, error = EOPNOTSUPP; goto release; } - if (control != NULL && (error = unp_internalize(&control, td))) - goto release; if ((nam != NULL) || (flags & PRUS_EOF)) UNP_LINK_WLOCK(); else @@ -892,9 +927,6 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, error = ENOTCONN; break; } - /* Lockless read. */ - if (unp2->unp_flags & UNP_WANTCRED) - control = unp_addsockcred(td, control); UNP_PCB_LOCK(unp); if (unp->unp_addr != NULL) from = (struct sockaddr *)unp->unp_addr; @@ -962,14 +994,6 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, so2 = unp2->unp_socket; UNP_PCB_LOCK(unp2); SOCKBUF_LOCK(&so2->so_rcv); - if (unp2->unp_flags & UNP_WANTCRED) { - /* - * Credentials are passed only once on SOCK_STREAM - * and SOCK_SEQPACKET. - */ - unp2->unp_flags &= ~UNP_WANTCRED; - control = unp_addsockcred(td, control); - } /* * Send to paired receive port, and then reduce send buffer * hiwater marks to maintain backpressure. Wake up readers. @@ -977,9 +1001,9 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, switch (so->so_type) { case SOCK_STREAM: if (control != NULL) { - if (sbappendcontrol_locked(&so2->so_rcv, m, - control)) - control = NULL; + sbappendcontrol_locked(&so2->so_rcv, m, + control); + control = NULL; } else sbappend_locked(&so2->so_rcv, m, flags); break; @@ -1000,6 +1024,7 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, break; } } + m = NULL; mbcnt = so2->so_rcv.sb_mbcnt; sbcc = sbavail(&so2->so_rcv); @@ -1020,7 +1045,6 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, so->so_snd.sb_flags |= SB_STOP; SOCKBUF_UNLOCK(&so->so_snd); UNP_PCB_UNLOCK(unp2); - m = NULL; break; } @@ -1144,6 +1168,7 @@ static struct pr_usrreqs uipc_usrreqs_dgram = { .pru_disconnect = uipc_disconnect, .pru_listen = uipc_listen, .pru_peeraddr = uipc_peeraddr, + .pru_finalizecontrol = uipc_finalizecontrol, .pru_rcvd = uipc_rcvd, .pru_send = uipc_send, .pru_sense = uipc_sense, @@ -1166,6 +1191,7 @@ static struct pr_usrreqs uipc_usrreqs_seqpacket = { .pru_disconnect = uipc_disconnect, .pru_listen = uipc_listen, .pru_peeraddr = uipc_peeraddr, + .pru_finalizecontrol = uipc_finalizecontrol, .pru_rcvd = uipc_rcvd, .pru_send = uipc_send, .pru_sense = uipc_sense, @@ -1188,6 +1214,7 @@ static struct pr_usrreqs uipc_usrreqs_stream = { .pru_disconnect = uipc_disconnect, .pru_listen = uipc_listen, .pru_peeraddr = uipc_peeraddr, + .pru_finalizecontrol = uipc_finalizecontrol, .pru_rcvd = uipc_rcvd, .pru_send = uipc_send, .pru_ready = uipc_ready, @@ -1787,7 +1814,7 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags) SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { FILEDESC_XUNLOCK(fdesc); - error = E2BIG; + error = ENOBUFS; unp_freerights(fdep, newfds); goto next; } @@ -1964,7 +1991,7 @@ unp_internalize(struct mbuf **controlp, struct thread *td) SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { FILEDESC_SUNLOCK(fdesc); - error = E2BIG; + error = ENOBUFS; goto out; } fdp = data; @@ -2028,9 +2055,10 @@ out: return (error); } -static struct mbuf * -unp_addsockcred(struct thread *td, struct mbuf *control) +static int +unp_addsockcred(struct mbuf **pcontrol, struct thread *td) { + struct mbuf *control = *pcontrol; struct mbuf *m, *n, *n_prev; struct sockcred *sc; const struct cmsghdr *cm; @@ -2040,7 +2068,7 @@ unp_addsockcred(struct thread *td, struct mbuf *control) ngroups = MIN(td->td_ucred->cr_ngroups, CMGROUP_MAX); m = sbcreatecontrol(NULL, SOCKCREDSIZE(ngroups), SCM_CREDS, SOL_SOCKET); if (m == NULL) - return (control); + return (ENOBUFS); sc = (struct sockcred *) CMSG_DATA(mtod(m, struct cmsghdr *)); sc->sc_uid = td->td_ucred->cr_ruid; @@ -2074,7 +2102,8 @@ unp_addsockcred(struct thread *td, struct mbuf *control) /* Prepend it to the head. */ m->m_next = control; - return (m); + *pcontrol = m; + return (0); } static struct unpcb * diff --git a/sys/sys/protosw.h b/sys/sys/protosw.h index 55db0e3..fc7f5ba 100644 --- a/sys/sys/protosw.h +++ b/sys/sys/protosw.h @@ -200,6 +200,8 @@ struct pr_usrreqs { int (*pru_listen)(struct socket *so, int backlog, struct thread *td); int (*pru_peeraddr)(struct socket *so, struct sockaddr **nam); + int (*pru_finalizecontrol)(struct socket *so, int flags, struct mbuf **pcontrol, + struct thread *td); int (*pru_rcvd)(struct socket *so, int flags); int (*pru_rcvoob)(struct socket *so, struct mbuf *m, int flags); int (*pru_send)(struct socket *so, int flags, struct mbuf *m, diff --git a/sys/sys/sockbuf.h b/sys/sys/sockbuf.h index c0148b4..a1dc6a0 100644 --- a/sys/sys/sockbuf.h +++ b/sys/sys/sockbuf.h @@ -146,9 +146,9 @@ int sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); int sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control); -int sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, +void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control); -int sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, +void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control); void sbappendrecord(struct sockbuf *sb, struct mbuf *m0); void sbappendrecord_locked(struct sockbuf *sb, struct mbuf *m0); -- 2.7.4 From 87de013811b182770419851af76d27a0260118bf Mon Sep 17 00:00:00 2001 From: Chris Torek Date: Sat, 2 Apr 2016 16:48:07 -0700 Subject: [PATCH 2/4] socket send: use correct control message length The new internalize function may (and actually does) replace a single control message mbuf with a chain, so we must use m_length() rather than just control->m_len. Add some comments to that effect. Also, while here, consolidate the send-space calculation (which adds an extra magic-number 1024 for out of band data) into a single inline, so that we have one magic constant instead of two. --- sys/kern/uipc_socket.c | 43 +++++++++++++++++++++++++++++++------------ sys/kern/uipc_usrreq.c | 10 ++++++++++ 2 files changed, 41 insertions(+), 12 deletions(-) diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 7a7cb30..f8cc339 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1049,6 +1049,23 @@ sodisconnect(struct socket *so) #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) +/* + * Like sbspace(&so->so_snd), but allow extra room for MSG_OOB. + * + * It's not clear whether the magic 1024 number is sensible, but + * at least it's now in only one place. + */ +static inline int +sosend_space(struct socket *so, int flags) +{ + long space; + + space = sbspace(&so->so_snd); + if (flags & MSG_OOB) + space += 1024; + return (space); +} + int sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) @@ -1086,8 +1103,17 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0; if (td != NULL) td->td_ru.ru_msgsnd++; + /* + * NB: Original user supplied control message may have + * fit and we may have made it too big when we finalized + * it, which will have us return EMSGSIZE below. This + * seems rude, but it is at least functional: the user + * can try sending smaller control values (mainly, fewer + * fd's at a time, as those are the ones that expand to + * twice their size on I32LP64 systems). + */ if (control != NULL) - clen = control->m_len; + clen = m_length(control, NULL); SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_state & SBS_CANTSENDMORE) { @@ -1125,14 +1151,8 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, } } - /* - * Do we need MSG_OOB support in SOCK_DGRAM? Signs here may be a - * problem and need fixing. - */ - space = sbspace(&so->so_snd); + space = sosend_space(so, flags); SOCKBUF_UNLOCK(&so->so_snd); - if (flags & MSG_OOB) - space += 1024; if (clen > space) { error = EMSGSIZE; goto out; @@ -1261,7 +1281,7 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, if (td != NULL) td->td_ru.ru_msgsnd++; if (control != NULL) - clen = control->m_len; + clen = m_length(control, NULL); error = sblock(&so->so_snd, SBLOCKWAIT(flags)); if (error) @@ -1305,9 +1325,8 @@ restart: goto release; } } - space = sbspace(&so->so_snd); - if (flags & MSG_OOB) - space += 1024; + space = sosend_space(so, flags); + /* NB: control msg is implicitly atomic */ if ((atomic && resid > so->so_snd.sb_hiwat) || clen > so->so_snd.sb_hiwat) { SOCKBUF_UNLOCK(&so->so_snd); diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 0380fd8..7a4c426 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1902,6 +1902,16 @@ unp_init(void) UNP_DEFERRED_LOCK_INIT(); } +/* + * Convert incoming control message from user-supplied format + * to internal form. + * + * Note that when we're called, *controlp is a single mbuf + * whose m_len is the length of the cmsg data structures + * that have not yet been internalized. On return, *controlp + * is an mbuf chain whose individual mbufs are internalized; + * this chain may have a different length. + */ static int unp_internalize(struct mbuf **controlp, struct thread *td) { -- 2.7.4 From 51f3e49565ccdeb2690b1192d114b71158b8ab3f Mon Sep 17 00:00:00 2001 From: Chris Torek Date: Sat, 2 Apr 2016 16:55:48 -0700 Subject: [PATCH 3/4] uipc_finalizecontrol: read-lock the unp link Alan Somers pointed out [1] that the socket linkage has a lock; we should grab it for form's sake, at least. While here, rewrite the code for clarity, and note a bug. [1] https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=181741#c7 --- sys/kern/uipc_usrreq.c | 52 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 19 deletions(-) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 7a4c426..af58b20 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -794,34 +794,48 @@ uipc_finalizecontrol(struct socket *so, int flags, struct mbuf **pcontrol, { struct unpcb *unp, *unp2; int error = 0; + bool wantcred, oneshot; unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_finalizecontrol: unp == NULL")); - unp2 = unp->unp_conn; - if (*pcontrol != NULL && (error = unp_internalize(pcontrol, td))) return (error); - /* Lockless read, ignore when not connected. */ - if (unp2 && unp2->unp_flags & UNP_WANTCRED) { - switch (so->so_type) { - case SOCK_SEQPACKET: - case SOCK_STREAM: - /* Credentials are passed only once on streams */ - UNP_PCB_LOCK(unp2); - if (unp2->unp_flags & UNP_WANTCRED) { - unp2->unp_flags &= ~UNP_WANTCRED; - error = unp_addsockcred(pcontrol, td); - } - UNP_PCB_UNLOCK(unp2); - break; - case SOCK_DGRAM: - error = unp_addsockcred(pcontrol, td); - break; - } + UNP_LINK_RLOCK(); + unp2 = unp->unp_conn; + UNP_LINK_RUNLOCK(); + + /* + * If not connected, we're done now (might be auto-connect + * on send, leave everything to caller). Otherwise, handle + * one-shot credentials on stream and seqpacket sockets here. + * + * XXX If the send fails, we never get another chance. + * We could restore UNP_WANTCRED if the unp_addsockcred() + * call fails here but we can't handle the more likely + * entire-send-fails case. Deferring clearing the flag + * is not a great solution either. Perhaps best would be + * to have an additional UNP_CREDS_SENT_SUCCESSFULLY flag + * and check that here. For now, just leave it this way. + */ + if (unp2 == NULL) + return (0); + + oneshot = so->so_type == SOCK_SEQPACKET || + so->so_type == SOCK_STREAM; + if (oneshot) { + UNP_PCB_LOCK(unp2); + wantcred = (unp2->unp_flags & UNP_WANTCRED) != 0; + unp2->unp_flags &= ~UNP_WANTCRED; + UNP_PCB_UNLOCK(unp2); + } else { + wantcred = true; } + if (wantcred) + error = unp_addsockcred(pcontrol, td); + return (error); } -- 2.7.4 From 2ed704cd839142a0ad93e91c38c05818dc80ef0e Mon Sep 17 00:00:00 2001 From: Chris Torek Date: Sat, 2 Apr 2016 19:23:32 -0700 Subject: [PATCH 4/4] rewrite unp_internalize() The existing code was nearly unreadable and probably buggy if you sent an SCM_RIGHTS control message with an empty fd list and then any subsequent SCM_* message. This moves the various internalizers into separate functions, so that the control path, where we operate on the outer control-message data, is firewalled off from the transforms, where we turn the sender's SCM_* messages into the internal SCM_* forms that go across the unp link to the other socket. --- sys/kern/uipc_usrreq.c | 338 +++++++++++++++++++++++++++++++------------------ 1 file changed, 212 insertions(+), 126 deletions(-) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index af58b20..64995c5 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1917,6 +1917,40 @@ unp_init(void) } /* + * Arguments passed to internalizer/transformer (ix = internal xform). + * The transformation function may fill in a new ix_mbuf. + */ +struct internalize_transform_data { + socklen_t ix_odatalen; /* original data length in bytes */ + socklen_t ix_ndatalen; /* new data length, or 0 */ + void *ix_odata; /* original data */ + void *ix_ndata; /* new data area, or NULL */ + struct mbuf *ix_mbuf; /* mbuf for new data */ + struct thread *ix_td; /* calling thread */ +}; + +/* + * Internalizers. If you provide a nonzero size, we pre-allocate + * the ix_mbuf and you get a nonzero ndatasize and non-NULL ndata. + */ +struct unp_scm_internalize_op { + socklen_t size; /* predefined output size, or 0 */ + int (*func)(struct internalize_transform_data *); +}; + +static int unp_internalize_creds(struct internalize_transform_data *); +static int unp_internalize_fds(struct internalize_transform_data *); +static int unp_internalize_timestamp(struct internalize_transform_data *); +static int unp_internalize_bintime(struct internalize_transform_data *); + +static struct unp_scm_internalize_op unp_internalize_ops[] = { + [SCM_CREDS] = { sizeof(struct cmsgcred), unp_internalize_creds }, + [SCM_RIGHTS] = { 0, unp_internalize_fds }, + [SCM_TIMESTAMP] = { sizeof(struct timeval), unp_internalize_timestamp }, + [SCM_BINTIME] = { sizeof(struct bintime), unp_internalize_bintime }, +}; + +/* * Convert incoming control message from user-supplied format * to internal form. * @@ -1925,160 +1959,212 @@ unp_init(void) * that have not yet been internalized. On return, *controlp * is an mbuf chain whose individual mbufs are internalized; * this chain may have a different length. + * + * Caller will always m_freem(*controlp), even if we return error. */ static int unp_internalize(struct mbuf **controlp, struct thread *td) { - struct mbuf *control = *controlp; - struct proc *p = td->td_proc; - struct filedesc *fdesc = p->p_fd; - struct bintime *bt; - struct cmsghdr *cm = mtod(control, struct cmsghdr *); - struct cmsgcred *cmcred; - struct filedescent *fde, **fdep, *fdev; - struct file *fp; - struct timeval *tv; - int i, *fdp; - void *data; - socklen_t clen = control->m_len, datalen; - int error, oldfds; - u_int newlen; + struct unp_scm_internalize_op *op; + struct internalize_transform_data ix; + struct cmsghdr *cm; + struct mbuf *control, *m; + void *odata; + int cmtype, error; + socklen_t clen, size, odatalen; UNP_LINK_UNLOCK_ASSERT(); + ix.ix_td = td; /* never changes, just passed through */ + error = 0; + control = *controlp; *controlp = NULL; - while (cm != NULL) { - if (sizeof(*cm) > clen || cm->cmsg_level != SOL_SOCKET - || cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) { - error = EINVAL; - goto out; - } - data = CMSG_DATA(cm); - datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; + clen = control->m_len; + cm = mtod(control, struct cmsghdr *); - switch (cm->cmsg_type) { + while (error == 0) { /* - * Fill in credential information. + * Verify current control message, and set up type + * and old data pointer and size values. */ - case SCM_CREDS: - *controlp = sbcreatecontrol(NULL, sizeof(*cmcred), - SCM_CREDS, SOL_SOCKET); - if (*controlp == NULL) { - error = ENOBUFS; - goto out; - } - cmcred = (struct cmsgcred *) - CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - cmcred->cmcred_pid = p->p_pid; - cmcred->cmcred_uid = td->td_ucred->cr_ruid; - cmcred->cmcred_gid = td->td_ucred->cr_rgid; - cmcred->cmcred_euid = td->td_ucred->cr_uid; - cmcred->cmcred_ngroups = MIN(td->td_ucred->cr_ngroups, - CMGROUP_MAX); - for (i = 0; i < cmcred->cmcred_ngroups; i++) - cmcred->cmcred_groups[i] = - td->td_ucred->cr_groups[i]; + if (clen < sizeof(*cm) || cm->cmsg_level != SOL_SOCKET || + cm->cmsg_len > clen || cm->cmsg_len < sizeof(*cm)) { + error = EINVAL; break; + } - case SCM_RIGHTS: - oldfds = datalen / sizeof (int); - if (oldfds == 0) - break; - /* - * Check that all the FDs passed in refer to legal - * files. If not, reject the entire operation. - */ - fdp = data; - FILEDESC_SLOCK(fdesc); - for (i = 0; i < oldfds; i++, fdp++) { - fp = fget_locked(fdesc, *fdp); - if (fp == NULL) { - FILEDESC_SUNLOCK(fdesc); - error = EBADF; - goto out; - } - if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { - FILEDESC_SUNLOCK(fdesc); - error = EOPNOTSUPP; - goto out; - } - - } - - /* - * Now replace the integer FDs with pointers to the - * file structure and capability rights. - */ - newlen = oldfds * sizeof(fdep[0]); - *controlp = sbcreatecontrol(NULL, newlen, - SCM_RIGHTS, SOL_SOCKET); - if (*controlp == NULL) { - FILEDESC_SUNLOCK(fdesc); - error = ENOBUFS; - goto out; - } - fdp = data; - fdep = (struct filedescent **) - CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS, - M_WAITOK); - for (i = 0; i < oldfds; i++, fdev++, fdp++) { - fde = &fdesc->fd_ofiles[*fdp]; - fdep[i] = fdev; - fdep[i]->fde_file = fde->fde_file; - filecaps_copy(&fde->fde_caps, - &fdep[i]->fde_caps, true); - unp_internalize_fp(fdep[i]->fde_file); - } - FILEDESC_SUNLOCK(fdesc); + cmtype = cm->cmsg_type; + if (cmtype < 0 || cmtype >= nitems(unp_internalize_ops)) { + error = EINVAL; break; - - case SCM_TIMESTAMP: - *controlp = sbcreatecontrol(NULL, sizeof(*tv), - SCM_TIMESTAMP, SOL_SOCKET); - if (*controlp == NULL) { - error = ENOBUFS; - goto out; - } - tv = (struct timeval *) - CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - microtime(tv); + } + op = &unp_internalize_ops[cmtype]; + if (op->func == NULL) { + error = EINVAL; break; + } - case SCM_BINTIME: - *controlp = sbcreatecontrol(NULL, sizeof(*bt), - SCM_BINTIME, SOL_SOCKET); - if (*controlp == NULL) { + odata = CMSG_DATA(cm); + odatalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)odata; + + ix.ix_odata = odata; + ix.ix_odatalen = odatalen; + + /* + * If transform function gives us a fixed data + * size, allocate new mbuf here, else leave it + * to the function. + */ + if ((size = op->size) != 0) { + m = sbcreatecontrol(NULL, size, cmtype, SOL_SOCKET); + if (m == NULL) { error = ENOBUFS; - goto out; + break; } - bt = (struct bintime *) - CMSG_DATA(mtod(*controlp, struct cmsghdr *)); - bintime(bt); - break; - - default: - error = EINVAL; - goto out; + ix.ix_mbuf = m; + ix.ix_ndata = CMSG_DATA(mtod(m, struct cmsghdr *)); + ix.ix_ndatalen = size; + } else { + ix.ix_mbuf = NULL; + ix.ix_ndata = NULL; + ix.ix_ndatalen = 0; } - controlp = &(*controlp)->m_next; - if (CMSG_SPACE(datalen) < clen) { - clen -= CMSG_SPACE(datalen); - cm = (struct cmsghdr *) - ((caddr_t)cm + CMSG_SPACE(datalen)); - } else { - clen = 0; - cm = NULL; + /* + * Apply transform and append new mbuf (if any) to + * new control chain, even on error, so that it + * will get freed. + */ + error = (*op->func)(&ix); + if ((m = ix.ix_mbuf) != NULL) { + *controlp = m; + controlp = &m->m_next; } + + /* Advance to next message. */ + size = CMSG_SPACE(odatalen); + if (size >= clen) + break; + cm = (struct cmsghdr *)((caddr_t)cm + size); + clen -= size; } -out: m_freem(control); return (error); } +/* + * Internalize file descriptors ("rights"). + */ +static int +unp_internalize_fds(struct internalize_transform_data *ix) +{ + struct proc *p = ix->ix_td->td_proc; + struct filedesc *fdesc = p->p_fd; + struct filedescent *fde, **fdep, *fdev; + struct file *fp; + struct mbuf *m; + int i, *fdp; + int oldfds; + u_int newlen; + + KASSERT(ix->ix_ndatalen == 0, ("%s: datalen", __func__)); + + /* Round down: this is forgiving, if slightly wrong. */ + oldfds = ix->ix_odatalen / sizeof (int); + if (oldfds == 0) + return (0); + + /* + * Check that all the FDs passed in refer to legal + * files. If not, reject the entire operation. + */ + fdp = ix->ix_odata; + FILEDESC_SLOCK(fdesc); + for (i = 0; i < oldfds; i++, fdp++) { + fp = fget_locked(fdesc, *fdp); + if (fp == NULL) { + FILEDESC_SUNLOCK(fdesc); + return (EBADF); + } + if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) { + FILEDESC_SUNLOCK(fdesc); + return (EOPNOTSUPP); + } + + } + + /* + * Now replace the integer FDs with pointers to the + * file structure and capability rights. + */ + newlen = oldfds * sizeof(fdep[0]); + m = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); + if (m == NULL) { + FILEDESC_SUNLOCK(fdesc); + return (ENOBUFS); + } + + fdp = ix->ix_odata; + fdep = (struct filedescent **)CMSG_DATA(mtod(m, struct cmsghdr *)); + fdev = malloc(sizeof(*fdev) * oldfds, M_FILECAPS, M_WAITOK); + for (i = 0; i < oldfds; i++, fdev++, fdp++) { + fde = &fdesc->fd_ofiles[*fdp]; + fdep[i] = fdev; + fdep[i]->fde_file = fde->fde_file; + filecaps_copy(&fde->fde_caps, &fdep[i]->fde_caps, true); + unp_internalize_fp(fdep[i]->fde_file); + } + FILEDESC_SUNLOCK(fdesc); + + ix->ix_mbuf = m; + return (0); +} + +static int +unp_internalize_creds(struct internalize_transform_data *ix) +{ + struct cmsgcred *cmcred; + int i, ngroups; + struct thread *td = ix->ix_td; + struct ucred *cr = td->td_ucred; + + KASSERT(ix->ix_ndatalen == sizeof(*cmcred), ("%s: datalen", __func__)); + cmcred = ix->ix_ndata; + cmcred->cmcred_pid = td->td_proc->p_pid; + cmcred->cmcred_uid = cr->cr_ruid; + cmcred->cmcred_gid = cr->cr_rgid; + cmcred->cmcred_euid = cr->cr_uid; + ngroups = MIN(cr->cr_ngroups, CMGROUP_MAX); + cmcred->cmcred_ngroups = ngroups; + for (i = 0; i < ngroups; i++) + cmcred->cmcred_groups[i] = cr->cr_groups[i]; + return (0); +} + +static int +unp_internalize_timestamp(struct internalize_transform_data *ix) +{ + struct timeval *tv; + + KASSERT(ix->ix_ndatalen == sizeof(*tv), ("%s: datalen", __func__)); + tv = ix->ix_ndata; + microtime(tv); + return (0); +} + +static int +unp_internalize_bintime(struct internalize_transform_data *ix) +{ + struct bintime *bt; + + KASSERT(ix->ix_ndatalen == sizeof(*bt), ("%s: datalen", __func__)); + bt = ix->ix_ndata; + bintime(bt); + return (0); +} + static int unp_addsockcred(struct mbuf **pcontrol, struct thread *td) { -- 2.7.4