FreeBSD Bugzilla – Attachment 207916 Details for
Bug 240590
Linuxulator: EPOLLONESHOT is broken
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
EPOLLHUP.patch
EPOLLHUP.patch (text/plain), 15.26 KB, created by
Vladimir Kondratyev
on 2019-09-28 12:39:18 UTC
(
hide
)
Description:
EPOLLHUP.patch
Filename:
MIME Type:
Creator:
Vladimir Kondratyev
Created:
2019-09-28 12:39:18 UTC
Size:
15.26 KB
patch
obsolete
>diff --git a/sys/compat/linux/linux_emul.c b/sys/compat/linux/linux_emul.c >index e88eb554aed1..39c2a62ed60e 100644 >--- a/sys/compat/linux/linux_emul.c >+++ b/sys/compat/linux/linux_emul.c >@@ -92,7 +92,6 @@ linux_proc_init(struct thread *td, struct thread *newtd, int flags) > { > struct linux_emuldata *em; > struct linux_pemuldata *pem; >- struct epoll_emuldata *emd; > struct proc *p; > > if (newtd != NULL) { >@@ -131,15 +130,9 @@ linux_proc_init(struct thread *td, struct thread *newtd, int flags) > em->child_clear_tid = NULL; > em->child_set_tid = NULL; > >- /* epoll should be destroyed in a case of exec. */ > pem = pem_find(p); > KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n")); > pem->persona = 0; >- if (pem->epoll != NULL) { >- emd = pem->epoll; >- pem->epoll = NULL; >- free(emd, M_EPOLL); >- } > } > > } >@@ -148,7 +141,6 @@ void > linux_proc_exit(void *arg __unused, struct proc *p) > { > struct linux_pemuldata *pem; >- struct epoll_emuldata *emd; > struct thread *td = curthread; > > if (__predict_false(SV_CURPROC_ABI() != SV_ABI_LINUX)) >@@ -164,12 +156,6 @@ linux_proc_exit(void *arg __unused, struct proc *p) > > p->p_emuldata = NULL; > >- if (pem->epoll != NULL) { >- emd = pem->epoll; >- pem->epoll = NULL; >- free(emd, M_EPOLL); >- } >- > sx_destroy(&pem->pem_sx); > free(pem, M_LINUX); > } >@@ -214,7 +200,6 @@ int > linux_common_execve(struct thread *td, struct image_args *eargs) > { > struct linux_pemuldata *pem; >- struct epoll_emuldata *emd; > struct vmspace *oldvmspace; > struct linux_emuldata *em; > struct proc *p; >@@ -246,12 +231,6 @@ linux_common_execve(struct thread *td, struct image_args *eargs) > p->p_emuldata = NULL; > PROC_UNLOCK(p); > >- if (pem->epoll != NULL) { >- emd = pem->epoll; >- pem->epoll = NULL; >- free(emd, M_EPOLL); >- } >- > free(em, M_TEMP); > free(pem, M_LINUX); > } >diff --git a/sys/compat/linux/linux_emul.h b/sys/compat/linux/linux_emul.h >index daa92e15b502..247a6c2d10b5 100644 >--- a/sys/compat/linux/linux_emul.h >+++ b/sys/compat/linux/linux_emul.h >@@ -68,7 +68,6 @@ int linux_common_execve(struct thread *, struct image_args *); > struct linux_pemuldata { > uint32_t flags; /* process emuldata flags */ > struct sx pem_sx; /* lock for this struct */ >- void *epoll; /* epoll data */ > uint32_t persona; /* process execution domain */ > uint32_t ptrace_flags; /* used by ptrace(2) */ > }; >diff --git a/sys/compat/linux/linux_event.c b/sys/compat/linux/linux_event.c >index 7d26ee3e5051..4214727d4f98 100644 >--- a/sys/compat/linux/linux_event.c >+++ b/sys/compat/linux/linux_event.c >@@ -67,25 +67,8 @@ __FBSDID("$FreeBSD$"); > #include <compat/linux/linux_timer.h> > #include <compat/linux/linux_util.h> > >-/* >- * epoll defines 'struct epoll_event' with the field 'data' as 64 bits >- * on all architectures. But on 32 bit architectures BSD 'struct kevent' only >- * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied >- * data verbatuim. Therefore we allocate 64-bit memory block to pass >- * user supplied data for every file descriptor. >- */ >- > typedef uint64_t epoll_udata_t; > >-struct epoll_emuldata { >- uint32_t fdc; /* epoll udata max index */ >- epoll_udata_t udata[1]; /* epoll user data vector */ >-}; >- >-#define EPOLL_DEF_SZ 16 >-#define EPOLL_SIZE(fdn) \ >- (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t)) >- > struct epoll_event { > uint32_t events; > epoll_udata_t data; >@@ -97,15 +80,16 @@ __attribute__((packed)) > > #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) > >-static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata); > static int epoll_to_kevent(struct thread *td, struct file *epfp, > int fd, struct epoll_event *l_event, int *kev_flags, > struct kevent *kevent, int *nkevents); > static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); > static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); > static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); >-static int epoll_delete_event(struct thread *td, struct file *epfp, >- int fd, int filter); >+static int epoll_register_kevent(struct thread *td, struct file *epfp, >+ int fd, int filter, unsigned int flags); >+static int epoll_fd_registered(struct thread *td, struct file *epfp, >+ struct file *fp, int fd); > static int epoll_delete_all_events(struct thread *td, struct file *epfp, > int fd); > >@@ -220,35 +204,6 @@ static void linux_timerfd_expire(void *); > static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); > > >-static void >-epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata) >-{ >- struct linux_pemuldata *pem; >- struct epoll_emuldata *emd; >- struct proc *p; >- >- p = td->td_proc; >- >- pem = pem_find(p); >- KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); >- >- LINUX_PEM_XLOCK(pem); >- if (pem->epoll == NULL) { >- emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); >- emd->fdc = fd; >- pem->epoll = emd; >- } else { >- emd = pem->epoll; >- if (fd > emd->fdc) { >- emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); >- emd->fdc = fd; >- pem->epoll = emd; >- } >- } >- emd->udata[fd] = udata; >- LINUX_PEM_XUNLOCK(pem); >-} >- > static int > epoll_create_common(struct thread *td, int flags) > { >@@ -258,8 +213,6 @@ epoll_create_common(struct thread *td, int flags) > if (error != 0) > return (error); > >- epoll_fd_install(td, EPOLL_DEF_SZ, 0); >- > return (0); > } > >@@ -296,13 +249,14 @@ linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) > > /* Structure converting function from epoll to kevent. */ > static int >-epoll_to_kevent(struct thread *td, struct file *epfp, >+epoll_to_kevent(struct thread *td, struct file *fp, > int fd, struct epoll_event *l_event, int *kev_flags, > struct kevent *kevent, int *nkevents) > { > uint32_t levents = l_event->events; > struct linux_pemuldata *pem; > struct proc *p; >+ bool need_prot = false; > > /* flags related to how event is registered */ > if ((levents & LINUX_EPOLLONESHOT) != 0) >@@ -316,11 +270,46 @@ epoll_to_kevent(struct thread *td, struct file *epfp, > > /* flags related to what event is registered */ > if ((levents & LINUX_EPOLL_EVRD) != 0) { >- EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0); >+ EV_SET(kevent, fd, EVFILT_READ, *kev_flags, 0, 0, 0); >+ kevent->ext[0] = l_event->data; >+ kevent->ext[1] = fp->f_type; >+ ++kevent; >+ ++(*nkevents); >+ } else if ((levents & LINUX_EPOLLRDHUP) != 0 && >+ fp->f_type == DTYPE_SOCKET) { >+ /* Set lowat arbitrary high to block data-flow events */ >+ EV_SET(kevent, fd, EVFILT_READ, *kev_flags, NOTE_LOWAT, >+ INT_MAX, 0); >+ kevent->ext[0] = l_event->data; >+ kevent->ext[1] = fp->f_type; >+ ++kevent; > ++(*nkevents); >+ need_prot = true; > } > if ((levents & LINUX_EPOLL_EVWR) != 0) { >- EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); >+ EV_SET(kevent, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); >+ kevent->ext[0] = l_event->data; >+ kevent->ext[1] = fp->f_type; >+ ++kevent; >+ ++(*nkevents); >+ } else if (fp->f_type == DTYPE_SOCKET) { >+ /* Always set EVFILT_WRITE to catch socket's EPOLLHUP events */ >+ EV_SET(kevent, fd, EVFILT_WRITE, *kev_flags, NOTE_LOWAT, >+ INT_MAX, 0); >+ kevent->ext[0] = l_event->data; >+ kevent->ext[1] = fp->f_type; >+ ++kevent; >+ ++(*nkevents); >+ if ((levents & LINUX_EPOLL_EVRD) == 0) >+ need_prot = true; >+ } >+ if (need_prot) { >+ /* >+ * Add protective event to prevent clobbering of fflags and >+ * data fields of kevents in epoll_fd_registered() routine. >+ */ >+ EV_SET(kevent, fd, EVFILT_EMPTY, EV_ADD | EV_DISABLE, 0, 0, 0); >+ ++kevent; > ++(*nkevents); > } > >@@ -329,7 +318,6 @@ epoll_to_kevent(struct thread *td, struct file *epfp, > > pem = pem_find(p); > KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); >- KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n")); > > LINUX_PEM_XLOCK(pem); > if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { >@@ -354,20 +342,33 @@ static void > kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) > { > >+ l_event->data = kevent->ext[0]; >+ > if ((kevent->flags & EV_ERROR) != 0) { > l_event->events = LINUX_EPOLLERR; > return; > } > >- /* XXX EPOLLPRI, EPOLLHUP */ >+ /* XXX EPOLLPRI */ > switch (kevent->filter) { > case EVFILT_READ: >- l_event->events = LINUX_EPOLLIN; > if ((kevent->flags & EV_EOF) != 0) >- l_event->events |= LINUX_EPOLLRDHUP; >+ l_event->events = (kevent->ext[1] == DTYPE_SOCKET) ? >+ LINUX_EPOLLRDHUP | LINUX_EPOLLIN : LINUX_EPOLLHUP; >+ else >+ l_event->events = LINUX_EPOLLIN; > break; > case EVFILT_WRITE: >- l_event->events = LINUX_EPOLLOUT; >+ /* >+ * XXX: socket's EPOLLHUP requires EV_EOF from both send and >+ * receive directions at the same time. As now only send is >+ * taken into account, it is posible to erroneously trigger >+ * EPOLLHUP with shutdown(fd, SHUT_WR) call. >+ */ >+ if ((kevent->flags & EV_EOF) != 0) >+ l_event->events = LINUX_EPOLLHUP; >+ else >+ l_event->events = LINUX_EPOLLOUT; > break; > } > } >@@ -382,30 +383,15 @@ static int > epoll_kev_copyout(void *arg, struct kevent *kevp, int count) > { > struct epoll_copyout_args *args; >- struct linux_pemuldata *pem; >- struct epoll_emuldata *emd; > struct epoll_event *eep; >- int error, fd, i; >+ int error, i; > > args = (struct epoll_copyout_args*) arg; > eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); > >- pem = pem_find(args->p); >- KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); >- LINUX_PEM_SLOCK(pem); >- emd = pem->epoll; >- KASSERT(emd != NULL, ("epoll proc epolldata not found.\n")); >- >- for (i = 0; i < count; i++) { >+ for (i = 0; i < count; i++) > kevent_to_epoll(&kevp[i], &eep[i]); > >- fd = kevp[i].ident; >- KASSERT(fd <= emd->fdc, ("epoll user data vector" >- " is too small.\n")); >- eep[i].data = emd->udata[fd]; >- } >- LINUX_PEM_SUNLOCK(pem); >- > error = copyout(eep, args->leventlist, count * sizeof(*eep)); > if (error == 0) { > args->leventlist += count; >@@ -445,7 +431,7 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) > { > struct file *epfp, *fp; > struct epoll_copyin_args ciargs; >- struct kevent kev[2]; >+ struct kevent kev[3]; > struct kevent_copyops k_ops = { &ciargs, > NULL, > epoll_kev_copyin}; >@@ -485,7 +471,7 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) > > if (args->op != LINUX_EPOLL_CTL_DEL) { > kev_flags = EV_ADD | EV_ENABLE; >- error = epoll_to_kevent(td, epfp, args->fd, &le, >+ error = epoll_to_kevent(td, fp, args->fd, &le, > &kev_flags, kev, &nchanges); > if (error != 0) > goto leave0; >@@ -499,19 +485,10 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) > break; > > case LINUX_EPOLL_CTL_ADD: >- /* >- * kqueue_register() return ENOENT if event does not exists >- * and the EV_ADD flag is not set. Reset EV_ENABLE flag to >- * avoid accidental activation of fired oneshot events. >- */ >- kev[0].flags &= ~(EV_ADD | EV_ENABLE); >- error = kqfd_register(args->epfd, &kev[0], td, M_WAITOK); >- if (error != ENOENT) { >+ if (epoll_fd_registered(td, epfp, fp, args->fd)) { > error = EEXIST; > goto leave0; > } >- error = 0; >- kev[0].flags |= (EV_ADD | EV_ENABLE); > break; > > case LINUX_EPOLL_CTL_DEL: >@@ -524,8 +501,6 @@ linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) > goto leave0; > } > >- epoll_fd_install(td, args->fd, le.data); >- > error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); > > leave0: >@@ -562,13 +537,13 @@ linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, > return (error); > if (epfp->f_type != DTYPE_KQUEUE) { > error = EINVAL; >- goto leave1; >+ goto leave; > } > if (uset != NULL) { > error = kern_sigprocmask(td, SIG_SETMASK, uset, > &omask, 0); > if (error != 0) >- goto leave1; >+ goto leave; > td->td_pflags |= TDP_OLDMASK; > /* > * Make sure that ast() is called on return to >@@ -586,11 +561,7 @@ linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, > coargs.count = 0; > coargs.error = 0; > >- if (timeout != -1) { >- if (timeout < 0) { >- error = EINVAL; >- goto leave0; >- } >+ if (timeout >= 0) { > /* Convert from milliseconds to timespec. */ > ts.tv_sec = timeout / 1000; > ts.tv_nsec = (timeout % 1000) * 1000000; >@@ -610,11 +581,10 @@ linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, > if (error == 0) > td->td_retval[0] = coargs.count; > >-leave0: > if (uset != NULL) > error = kern_sigprocmask(td, SIG_SETMASK, &omask, > NULL, 0); >-leave1: >+leave: > fdrop(epfp, td); > return (error); > } >@@ -651,7 +621,8 @@ linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) > } > > static int >-epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) >+epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter, >+ unsigned int flags) > { > struct epoll_copyin_args ciargs; > struct kevent kev; >@@ -660,21 +631,45 @@ epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) > epoll_kev_copyin}; > > ciargs.changelist = &kev; >- EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); >+ EV_SET(&kev, fd, filter, flags, 0, 0, 0); > > return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL)); > } > >+static int >+epoll_fd_registered(struct thread *td, struct file *epfp, struct file *fp, >+ int fd) >+{ >+ /* >+ * Set empty filter flags to avoid accidental modification of already >+ * registered events. In the case of re-registration: >+ * 1. If event does not exists kevent() does nothing and returns ENOENT >+ * 2. If event does exists, it's enabled/disabled state is preserved >+ * but fflags, data and udata fields are overwritten. >+ * p.2 means that we can not store user's context pointer in udata. >+ * The order of checking is important for sockets and should be in line >+ * with epoll_to_kevent() routine. >+ */ >+ if ((fp->f_type == DTYPE_SOCKET && >+ epoll_register_kevent(td, epfp, fd, EVFILT_EMPTY, 0) != ENOENT) || >+ epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT || >+ epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT) >+ return (1); >+ >+ return (0); >+} >+ > static int > epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) > { >- int error1, error2; >+ int error1, error2, error3; > >- error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ); >- error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE); >+ error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE); >+ error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE); >+ error3 = epoll_register_kevent(td, epfp, fd, EVFILT_EMPTY, EV_DELETE); > > /* return 0 if at least one result positive */ >- return (error1 == 0 ? 0 : error2); >+ return (error1 == 0 ? 0 : (error2 == 0 ? 0 : error3)); > } > > static int >diff --git a/sys/compat/linux/linux_event.h b/sys/compat/linux/linux_event.h >index c483df58736b..e78b3320657d 100644 >--- a/sys/compat/linux/linux_event.h >+++ b/sys/compat/linux/linux_event.h >@@ -45,11 +45,11 @@ > #define LINUX_EPOLLONESHOT 1u<<30 > #define LINUX_EPOLLET 1u<<31 > >-#define LINUX_EPOLL_EVRD (LINUX_EPOLLIN|LINUX_EPOLLRDNORM \ >- |LINUX_EPOLLHUP|LINUX_EPOLLERR|LINUX_EPOLLPRI) >+#define LINUX_EPOLL_EVRD (LINUX_EPOLLIN|LINUX_EPOLLRDNORM) > #define LINUX_EPOLL_EVWR (LINUX_EPOLLOUT|LINUX_EPOLLWRNORM) > #define LINUX_EPOLL_EVSUP (LINUX_EPOLLET|LINUX_EPOLLONESHOT \ >- |LINUX_EPOLL_EVRD|LINUX_EPOLL_EVWR|LINUX_EPOLLRDHUP) >+ |LINUX_EPOLL_EVRD|LINUX_EPOLL_EVWR|LINUX_EPOLLRDHUP \ >+ |LINUX_EPOLLHUP|LINUX_EPOLLERR|LINUX_EPOLLPRI) > > #define LINUX_EPOLL_CTL_ADD 1 > #define LINUX_EPOLL_CTL_DEL 2
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 240590
:
207708
| 207916 |
207926