| Summary: | libc_r: accept(2) can't handle descriptor being closed/shutdown | ||
|---|---|---|---|
| Product: | Base System | Reporter: | Archie Cobbs <archie> |
| Component: | bin | Assignee: | Archie Cobbs <archie> |
| Status: | Closed FIXED | ||
| Severity: | Affects Only Me | ||
| Priority: | Normal | ||
| Version: | Unspecified | ||
| Hardware: | Any | ||
| OS: | Any | ||
Hmm.. guess I forgot to include the test program! Here it is.
============================== CUT HERE ========================
/* Set to "1" to abort on signal, or "0" for thread-based delayed abort */
#ifndef SIGNAL_TEST
#define SIGNAL_TEST 1
#endif
#ifdef __linux__
#define _XOPEN_SOURCE 600
#define _GNU_SOURCE 1
#define _BSD_SOURCE 1
#define _ISOC99_SOURCE 1
#endif
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <signal.h>
#include <errno.h>
#include <unistd.h>
#include <time.h>
#include <err.h>
#if SIGNAL_TEST
static void signal_handler(int sig);
#else
#include <pthread.h>
static void *thread_entry(void *arg);
static pthread_t tid;
#endif
static int sock;
int
main(int ac, char **av)
{
struct sockaddr_in sin;
socklen_t slen;
/* Create socket */
if ((sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1)
err(1, "socket");
memset(&sin, 0, sizeof(sin));
#ifndef __linux__
sin.sin_len = sizeof(sin);
#endif
sin.sin_family = AF_INET;
if (bind(sock, (struct sockaddr *)&sin, sizeof(sin)) == -1)
err(1, "socket");
slen = sizeof(sin);
if (listen(sock, 10) == -1)
err(1, "listen");
#if SIGNAL_TEST
/* Register signal handler */
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
#else
/* Spawn thread */
if ((errno = pthread_create(&tid, NULL, thread_entry, NULL)) != 0)
err(1, "pthread_create");
#endif
/* Listen for connections on socket */
printf("main: waiting for connection...\n");
if (accept(sock, (struct sockaddr *)&sin, &slen) == -1)
err(1, "main: accept");
printf("main: got connection?\n");
return (0);
}
#if SIGNAL_TEST
static void
signal_handler(int sig)
#else
static void *
thread_entry(void *arg)
#endif
{
#if SIGNAL_TEST
printf("%s: rec'd signal %d...\n", __FUNCTION__, sig);
#else
printf("%s: sleeping 2 seconds...\n", __FUNCTION__);
sleep(2);
#endif
srandom(getpid() ^ time(NULL));
if ((random() & 0x00400000) != 0) {
printf("%s: shutdown()'ing socket...\n", __FUNCTION__);
if (shutdown(sock, SHUT_RDWR) == -1)
err(1, "thread: shutdown");
} else {
printf("%s: close()'ing socket...\n", __FUNCTION__);
if (close(sock) == -1)
err(1, "thread: shutdown");
}
#if !SIGNAL_TEST
return (NULL);
#endif
}
I hacked up your program a bit and it looks like the kernel
isn't doing the right thing for accept() on a closed file
descriptor. If you take a look at lib/libc_r/uthread_accept.c,
you'll see that it loops:
while ((ret = __sys_accept(fd, name, namelen)) < 0) {
/* Check if the socket is to block: */
if ((_thread_fd_table[fd]->flags & O_NONBLOCK) == 0
&& (errno == EWOULDBLOCK || errno == EAGAIN)) {
/* Save the socket file descriptor: */
curthread->data.fd.fd = fd;
curthread->data.fd.fname = __FILE__;
curthread->data.fd.branch = __LINE__;
[...]
waiting for errno to be something other than EWOULDBLOCK or
EAGAIN. So I modified your test program to mimic what libc_r
was doing. The kernel seems to ignore the fact that the
file is closed and always returns EAGAIN. I would expect
EBADF or something.
--
Dan Eischen
/* Set to "1" to abort on signal, or "0" for thread-based delayed abort */
#ifndef SIGNAL_TEST
#define SIGNAL_TEST 1
#endif
#ifdef __linux__
#define _XOPEN_SOURCE 600
#define _GNU_SOURCE 1
#define _BSD_SOURCE 1
#define _ISOC99_SOURCE 1
#endif
#include <sys/types.h>
#define accept __sys_accept
#include <sys/socket.h>
#undef accept
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <fcntl.h>
#define poll __sys_poll
#include <poll.h>
#undef poll
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <signal.h>
#include <errno.h>
#include <unistd.h>
#include <time.h>
#include <err.h>
#if SIGNAL_TEST
static void signal_handler(int sig);
#else
#include <pthread.h>
static void *thread_entry(void *arg);
static pthread_t tid;
#endif
static int sock;
int
main(int ac, char **av)
{
struct sockaddr_in sin;
socklen_t slen;
struct pollfd fds;
int flags;
int ret;
/* Create socket */
if ((sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1)
err(1, "socket");
memset(&sin, 0, sizeof(sin));
#ifndef __linux__
sin.sin_len = sizeof(sin);
#endif
sin.sin_family = AF_INET;
if (bind(sock, (struct sockaddr *)&sin, sizeof(sin)) == -1)
err(1, "socket");
slen = sizeof(sin);
if (listen(sock, 10) == -1)
err(1, "listen");
/* Set non-blocking. */
if ((flags = fcntl(sock, F_GETFL, 0) < 0))
err(1, "fcntl");
flags |= O_NONBLOCK;
if (fcntl(sock, F_SETFL, flags) < 0)
err(1, "fcntl");
#if SIGNAL_TEST
/* Register signal handler */
signal(SIGINT, signal_handler);
signal(SIGTERM, signal_handler);
#else
/* Spawn thread */
if ((errno = pthread_create(&tid, NULL, thread_entry, NULL)) != 0)
err(1, "pthread_create");
#endif
/* Listen for connections on socket */
printf("main: waiting for connection...\n");
while (((ret = __sys_accept(sock, (struct sockaddr *)&sin, &slen)) < 0)
&& (errno == EWOULDBLOCK || errno == EAGAIN)) {
printf("accept returned %d, errno %d\n", ret, errno);
fds.events = POLLRDNORM | POLLERR | POLLHUP;
fds.fd = sock;
__sys_poll(&fds, 1, 2000);
}
if (ret < 0)
err(1, "main: accept");
printf("main: got connection?\n");
return (0);
}
#if SIGNAL_TEST
static void
signal_handler(int sig)
#else
static void *
thread_entry(void *arg)
#endif
{
#if SIGNAL_TEST
printf("%s: rec'd signal %d...\n", __FUNCTION__, sig);
#else
printf("%s: sleeping 2 seconds...\n", __FUNCTION__);
sleep(2);
#endif
srandom(getpid() ^ time(NULL));
if ((random() & 0x00400000) != 0) {
printf("%s: shutdown()'ing socket...\n", __FUNCTION__);
if (shutdown(sock, SHUT_RDWR) == -1)
err(1, "thread: shutdown");
} else {
printf("%s: close()'ing socket...\n", __FUNCTION__);
if (close(sock) == -1)
err(1, "thread: shutdown");
}
#if !SIGNAL_TEST
return (NULL);
#endif
}
Daniel Eischen wrote: > I hacked up your program a bit and it looks like the kernel Hang on.. I want to make sure I fully understand what you're saying. If it's the kernel's fault, then libc_r should have nothing to do with it, right? So I wrote another test program (below) and compiled it *without* -pthread.. There are four cases: shutdown() vs. close() and FreeBSD vs. Linux. Here is what accept() returns in these four cases: shutdown() | close() ----------------+-------------- FreeBSD EAGAIN | EBADF Linux EINVAL | EBADF So there are two conclusions: 1. FreeBSD returns EAGAIN for a file descriptor that has been shutdown(). This is at best inconsistent with Linux, but is probably plain wrong because EAGAIN has a special meaning for O_NONBLOCK file descriptors. 2. The kernel is behaving properly in the close() case, which contradicts your claim that a kernel problem is responsible for the bug in the original test program in the close() case. -Archie __________________________________________________________________________ Archie Cobbs * Packet Design * http://www.packetdesign.com ============================= CUT HERE =============================== /* Compile as: cc -o xx -Wall xx.c */ #ifdef __linux__ #define _XOPEN_SOURCE 600 #define _GNU_SOURCE 1 #define _BSD_SOURCE 1 #define _ISOC99_SOURCE 1 #endif #include <sys/types.h> #include <sys/socket.h> #include <netinet/in.h> #include <netinet/tcp.h> #include <stdio.h> #include <string.h> #include <stdlib.h> #include <signal.h> #include <fcntl.h> #include <errno.h> #include <unistd.h> #include <time.h> #include <err.h> #define USE_SHUTDOWN 1 int main(int ac, char **av) { struct sockaddr_in sin; socklen_t slen; int sock; /* Create socket */ if ((sock = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) == -1) err(1, "socket"); memset(&sin, 0, sizeof(sin)); #ifndef __linux__ sin.sin_len = sizeof(sin); #endif sin.sin_family = AF_INET; if (bind(sock, (struct sockaddr *)&sin, sizeof(sin)) == -1) err(1, "socket"); /* Set socket to non-blocking */ if (fcntl(sock, F_SETFL, O_NONBLOCK) == -1) err(1, "fcntl"); /* Listen for connections on socket */ if (listen(sock, 10) == -1) err(1, "listen"); slen = sizeof(sin); printf("created socket\n"); #if USE_SHUTDOWN /* Shutdown socket */ printf("calling shutdown()\n"); if (shutdown(sock, SHUT_RDWR) == -1) err(1, "shutdown"); #else /* Close socket */ printf("calling close()\n"); if (close(sock) == -1) err(1, "close"); #endif /* Try to accept on socket */ printf("calling accept()\n"); if (accept(sock, (struct sockaddr *)&sin, &slen) == -1) err(1, "accept"); printf("got connection?\n"); return (0); } The patch below fixes the problem of accept(2) returning EAGAIN on a O_NONBLOCK socket that has been shutdown(2). Now it returns ECONNABORTED, which is consistent with the normal blocking case. This makes the original test program work in the shutdown() case. However, it still fails in the close() case, and the core dump, although worked-around with this patch, remains unexplained. -Archie __________________________________________________________________________ Archie Cobbs * Packet Design * http://www.packetdesign.com Index: sys/kern/uipc_syscalls.c =================================================================== RCS file: /home/ncvs/src/sys/kern/uipc_syscalls.c,v retrieving revision 1.129 diff -u -r1.129 uipc_syscalls.c --- sys/kern/uipc_syscalls.c 15 Aug 2002 20:55:04 -0000 1.129 +++ sys/kern/uipc_syscalls.c 28 Aug 2002 17:48:28 -0000 @@ -268,14 +268,13 @@ error = EINVAL; goto done; } - if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { - splx(s); - error = EWOULDBLOCK; - goto done; - } while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { if (head->so_state & SS_CANTRCVMORE) { head->so_error = ECONNABORTED; + break; + } + if ((head->so_state & SS_NBIO) != 0) { + head->so_error = EWOULDBLOCK; break; } error = tsleep(&head->so_timeo, PSOCK | PCATCH, Here's a patch that might fix libc_r in the close() case.
The original is at home, and I hacked this one up but can't
compile it. Please test.
Index: uthread_kern.c
===================================================================
RCS file: /home/ncvs/src/lib/libc_r/uthread/uthread_kern.c,v
retrieving revision 1.41
diff -u -r1.41 uthread_kern.c
--- uthread_kern.c 25 Aug 2002 13:06:29 -0000 1.41
+++ uthread_kern.c 28 Aug 2002 18:25:24 -0000
@@ -880,7 +880,8 @@
/* File descriptor read wait: */
case PS_FDR_WAIT:
if ((nfds < _thread_dtablesize) &&
- (_thread_pfd_table[nfds].revents & POLLRDNORM)) {
+ ((_thread_pfd_table[nfds].revents
+ & (POLLRDNORM | POLLHUP | POLLERR)) != 0)) {
PTHREAD_WAITQ_CLEARACTIVE();
PTHREAD_WORKQ_REMOVE(pthread);
PTHREAD_NEW_STATE(pthread,PS_RUNNING);
@@ -892,7 +893,8 @@
/* File descriptor write wait: */
case PS_FDW_WAIT:
if ((nfds < _thread_dtablesize) &&
- (_thread_pfd_table[nfds].revents & POLLWRNORM)) {
+ ((_thread_pfd_table[nfds].revents
+ & (POLLWRNORM | POLLHUP | POLLERR)) != 0)) {
PTHREAD_WAITQ_CLEARACTIVE();
PTHREAD_WORKQ_REMOVE(pthread);
PTHREAD_NEW_STATE(pthread,PS_RUNNING);
*** SUMMARY OF THIS BUG ***
There are three separate bugs going on here. The first bug is a
kernel bug. The second two bugs are libc_r bugs.
#1. Calling accept() on a socket that has been shutdown() and is
marked as non-blocking returns EAGAIN instead of ECONNABORTED
(which is what you get with blocking sockets). This bug is not
specific to libc_r.
--> This has been fixed in sys/kern/uipc_syscalls.c, rev. 1.130
and will be MFC'd soon.
#2. With libc_r, if one thread is blocked in an accept() call and
another thread close()'s the file descriptor, or the first thread
receives a signal and close()'s the file descriptor from within
the signal handler, then the first thread never wakes up. Same
thing will happen for lots of other calls besides accept().
--> Daniel's patch to uthread_kern.c fixes this problem *IF* you also
include the POLLNVAL flag along with POLLHUP and POLLERR. However,
see #3.
Hopefully this will be commited soon as well.
#3. With libc_r plus the fix for #2, if a program blocks in an
accept() (or other calls), receives a signal, and the signal handler
close()'s the file descriptor, then the process core dumps. The same
program not linked with -pthread behaves correctly (i.e, accept()
returns EBADF). Same thing happens if you use separate threads
instead of signals. E.g., see the original test program.
--> This is because in libc_r, when a file descriptor is closed,
_thread_fd_table[fd] is free()'d. When the first thread wakes
back up, it dereferences the NULL pointer. This could be fixed by
checking for NULL each time around the loop and returning EBADF
if so.
-Archie
__________________________________________________________________________
Archie Cobbs * Packet Design * http://www.packetdesign.com
Responsible Changed From-To: freebsd-bugs->archie Assign to me since I'm already working on it. State Changed From-To: open->closed All of the problems described in this bug have been fixed and MFC'd. |
The program below behaves differently when linked with "-pthread" vs. when linked with the normal libc. If you run the program, then hit control-C. This will cause the socket to be either close(2)'d or shutdown(2) (one is randomly chosen). When linked with libc, the accept() call immediately returns with an error. But when linked with libc_r, the accept() call never returns (in either the close() or shutdown() cases). FYI, on RedHat Linux 7.3, the program always exits even if linked with '-lpthread' for both the close() and shutdown() cases. The same libc_r behavior happens if you compile with the '-DSIGNAL_TEST=0' flag, which uses a separate thread to close the socket. I'd suspect that other blocking I/O calls besides accept(2) have this same problem. How-To-Repeat: Save the program below as 'xx.c' and compile with libc: $ cc -Wall -o xx xx.c Start the program, then press control-c. Notice that it always terminates immediately: $ ./xx main: waiting for connection... ^Csignal_handler: rec'd signal 2... signal_handler: shutdown()'ing socket... xx: main: accept: Software caused connection abort $ ./xx main: waiting for connection... ^Csignal_handler: rec'd signal 2... signal_handler: close()'ing socket... xx: main: accept: Bad file descriptor $ Now compile the program with the '-pthread' flag: $ cc -Wall -o xx xx.c -pthread -D_THREAD_SAFE Now notice that the program never terminates after (the first time) control-C is pressed: $ ./xx main: waiting for connection... ^Csignal_handler: rec'd signal 2... signal_handler: shutdown()'ing socket... Also, occasionally when running the '-pthread' version you'll get this core dump after two control-C presses: $ ./xx main: waiting for connection... ^Csignal_handler: rec'd signal 2... signal_handler: shutdown()'ing socket... ^Csignal_handler: rec'd signal 2... signal_handler: close()'ing socket... Segmentation fault(core dumped) Here's the stack trace and reason for the segfault: Core was generated by `xx'. Program terminated with signal 11, Segmentation fault. Reading symbols from /usr/lib/libc_r.so.4...done. Reading symbols from /usr/libexec/ld-elf.so.1...done. (gdb) where #0 0x280a64ba in _accept (fd=5, name=0xbfbff738, namelen=0xbfbff734) at /usr/src/lib/libc_r/uthread/uthread_accept.c:53 #1 0x8048866 in main () #2 0x80486d1 in _start () (gdb) list 48 /* Lock the file descriptor: */ 49 if ((ret = _FD_LOCK(fd, FD_RDWR, NULL)) == 0) { 50 /* Enter a loop to wait for a connection request: */ 51 while ((ret = _thread_sys_accept(fd, name, namelen)) < 0) { 52 /* Check if the socket is to block: */ 53 if ((_thread_fd_table[fd]->flags & O_NONBLOCK) == 0 && (errno == EWOULDBLOCK || errno == EAGAIN)) { 54 /* Save the socket file descriptor: */ 55 _thread_run->data.fd.fd = fd; 56 _thread_run->data.fd.fname = __FILE__; 57 _thread_run->data.fd.branch = __LINE__; (gdb) p _thread_fd_table $1 = (struct fd_table_entry **) 0x8058000 (gdb) p fd $2 = 5 (gdb) p _thread_fd_table[5] $3 = (struct fd_table_entry *) 0x0