Bug 35399 - poll(2) botches revents on dropped socket connections
Summary: poll(2) botches revents on dropped socket connections
Status: Closed Overcome By Events
Alias: None
Product: Base System
Classification: Unclassified
Component: kern (show other bugs)
Version: 4.3-RELEASE
Hardware: Any Any
: Normal Affects Only Me
Assignee: freebsd-bugs (Nobody)
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2002-02-28 09:20 UTC by Ronald F. Guilmette
Modified: 2016-06-13 15:17 UTC (History)
0 users

See Also:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description Ronald F. Guilmette 2002-02-28 09:20:01 UTC
In an Ideal Universe, if a given stream socket is in a connected state,
connected to some peer elsewhere, and if the peer performs a normal
close of its end of the socket, and if the socket is being polled via
the poll(2) syetem call, then the call to poll(2) should return with
the `revents' value of the corresponding `struct pollfd' structure set
to just POLLHUP, to indicate that a hangup has occured on the socket.

Of course, we don't live in an Ideal Universe, and the poll(2) man page
suggests that the actual behavior of poll(2) in the context just described
may be somewhat different from the ideal:

     This implementation differs from the historical one in that a given file
     descriptor may not cause poll() to return with an error.  In cases where
     this would have happened in the historical implementation (e.g. trying to
     poll a revoke(2)ed descriptor), this implementation instead copies the
     events bitmask to the revents bitmask.  Attempting to perform I/O on this
     descriptor will then return an error.  This behaviour is believed to be
     more useful.

Unfortunately, the actual behavior of poll(2) in the context described
above does not match _either_ the ideal behavior _or_ the documented
behavior (as quoted above).

As execution of the example program below illustrates, a connected
socket descriptor which is being polled for input via a call to poll(2),
and which has its peer drop the peer's end of the connection, will
have the `revents' bit mask in the corresponding `struct pollfd'
structure set _only_ to POLLIN.

Ideally, in such cases, revents would get set _only_ to POLLHUP.

Acording to the documentation, revents should in practice get set to
the value of (POLLIN | POLLHUP | POLLERR) because that is the value
that was set in the original `events' bit mask for the relevant
`struct pollfd' structure at the time the call was made.

Either the ideal behavior (i.e. revents set only to POLLHUP) or the
documented behavior (i.e. revents set to a copy of events) would be
vastly preferable to the actual behavior, as illustrated by the sample
program below, because either the ideal behavior or the documented
behavior would allow detection of hangup/error conditions on the
socket without having to make yet another kernel call (e.g. read or
write) on the socket whose peer has dropped its end of the connection.

The need to make additional kernel calls in order to accurately diag-
nose something as simple and common as a hangup on a socket may lead
to serious inefficiencies in some programs.  Therefore, the current
behavior of poll(2) should be corrected and should be made consistant
either with the ideal behavior or, at the very least, with the documented
behavior.

(Note that recent Linux kernels already appear to produce the ideal
behavior in this sort of context.  But I hope that that fact won't
stop people from implementing the correct behavior in FreeBSD also. :-)

Fix: 

Hack the kernel and arrange things so that whenever a peer hangup occurs
for a connected stream socket, if the socket is being polled via a call
to poll(2) the revents field of the relevant `struct pollfd' structure
gets set either to the ideal value (i.e. just POLLHUP) or else, at the
very least, to the documented value (i.e. a copy of the events field).
How-To-Repeat: 
To repeat, just compile and execute the example program below.  If the
behavior of poll(2) were either consistant with the ideal or, at least,
consistant with the documented behavior, then this progrm should end by
printing "poll(2) indicates hangup" but instead it ends by printing
"poll(2) indicates input waiting", which is clearly incorrect and in-
accurate.

====================================================================
/* poll(2) error test #2 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include <poll.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>

static struct protoent *tcp_proto;

static void
fatal (register char const *const fmt, register char const *const arg)
{
  fprintf (stderr, fmt, arg);
  putc ('\n', stderr);
  exit (1);
}

static void
do_poll (register int const fd)
{
  auto struct pollfd pfd;

  pfd.fd = fd;
  pfd.events = POLLIN | POLLHUP | POLLERR;
  pfd.revents = 0;

  if (poll (&pfd, 1, -1) == -1)
    fatal ("Error in poll: %s", strerror (errno));

  fprintf (stderr, "poll(2) did return\n");

  if (pfd.revents & POLLHUP)
    fatal ("poll(2) indicates hangup", NULL);

  if (pfd.revents & POLLIN)
    fatal ("poll(2) indicates input waiting", NULL);
}

static void
do_connect_and_poll (struct in_addr addr, unsigned short port)
{
  auto struct sockaddr_in sin;
  register int fd;

  if ((fd = socket (PF_INET, SOCK_STREAM, tcp_proto->p_proto)) == -1)
    fatal ("Error creating socket: %s", strerror (errno));

  memset (&sin, 0, sizeof sin);
  sin.sin_family = AF_INET;
  sin.sin_addr = addr;
  sin.sin_port = htons (port);

  if (connect (fd, (struct sockaddr *) &sin, sizeof sin) == -1)
    fatal ("Error in connect: %s", strerror (errno));

  fprintf (stderr, "Client connected to server\n");

  do_poll (fd);
}

static void
do_listen_sleep_and_die (struct in_addr addr, unsigned short port)
{
  auto int one = 1;
  auto struct sockaddr_in sin;
  register int listen_fd;
  register int client_fd;

  if ((listen_fd = socket (PF_INET, SOCK_STREAM, tcp_proto->p_proto)) == -1)
    fatal ("Error creating socket: %s", strerror (errno));

  memset (&sin, 0, sizeof sin);
  sin.sin_family = AF_INET;
  sin.sin_addr = addr;
  sin.sin_port = htons (port);

  if (bind (listen_fd, (struct sockaddr *) &sin, sizeof sin) == -1)
    fatal ("Error in bind: %s", strerror (errno));

  if (setsockopt (listen_fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof one) == -1)
    fatal ("Error in setsockopt: %s", strerror (errno));

  if (listen (listen_fd, 1) == -1)
    fatal ("Error in listen: %s", strerror (errno));

  if ((client_fd = listen (listen_fd, 1)) == -1)
    fatal ("Error in listen: %s", strerror (errno));

  fprintf (stderr, "Server accepted connection from client\n");

  sleep (1);
  close (client_fd);  /* Shut it down.  */
  fprintf (stderr, "Server closed connection\n");
}

int
main (void)
{
  static char const protocol_name[] = "tcp";
  auto struct in_addr addr;
  register int const port = 32767;
  register int pid;

  if ((tcp_proto = getprotobyname (protocol_name)) == NULL)
    fatal ("Cannot find number for protocol: %s", protocol_name);

  inet_aton ("127.0.0.1", &addr);

  if ((pid = fork ()) == -1)
    fatal ("Error in fork: %s", strerror (errno));

  if (pid)
    {
      sleep (1);  /* Give server time to start up. */
      do_connect_and_poll (addr, port);	/* Child procss does this. */
    }
  else
    do_listen_sleep_and_die (addr, port);  /* Parent process does this.  */

  return 0;
}
====================================================================
Comment 1 Baptiste Daroussin freebsd_committer freebsd_triage 2016-06-13 15:17:16 UTC
This has been fixed in the meantime