--- b/contrib/gdb/gdb/gdbthread.h	
+++ b/contrib/gdb/gdb/gdbthread.h	
@@ -75,6 +75,8 @@ struct thread_info
   struct private_thread_info *private;
 };
 
+extern int thread_list_empty (void);
+
 /* Create an empty thread list, or empty the existing one.  */
 extern void init_thread_list (void);
 
--- b/contrib/gdb/gdb/infrun.c	
+++ b/contrib/gdb/gdb/infrun.c	
@@ -384,9 +384,22 @@ follow_inferior_reset_breakpoints (void)
   insert_breakpoints ();
 }
 
+void 
+clear_step_resume_breakpoint_thread (void)
+{
+  if (step_resume_breakpoint)
+    step_resume_breakpoint->thread = -1;
+}
+
+void 
+clear_step_resume_breakpoint (void)
+{
+  step_resume_breakpoint = NULL;
+}
+
 /* EXECD_PATHNAME is assumed to be non-NULL. */
 
-static void
+void
 follow_exec (int pid, char *execd_pathname)
 {
   int saved_pid = pid;
@@ -1648,7 +1661,8 @@ handle_inferior_event (struct execution_control_state *ecs)
 
       /* This causes the eventpoints and symbol table to be reset.  Must
          do this now, before trying to determine whether to stop. */
-      follow_exec (PIDGET (inferior_ptid), pending_follow.execd_pathname);
+      target_follow_exec (PIDGET (inferior_ptid), 
+			  pending_follow.execd_pathname);
       xfree (pending_follow.execd_pathname);
 
       stop_pc = read_pc_pid (ecs->ptid);
--- b/contrib/gdb/gdb/objfiles.c	
+++ b/contrib/gdb/gdb/objfiles.c	
@@ -482,11 +482,11 @@ free_all_objfiles (void)
 {
   struct objfile *objfile, *temp;
 
+  clear_symtab_users ();
   ALL_OBJFILES_SAFE (objfile, temp)
   {
     free_objfile (objfile);
   }
-  clear_symtab_users ();
 }
 
 /* Relocate OBJFILE to NEW_OFFSETS.  There should be OBJFILE->NUM_SECTIONS
--- b/contrib/gdb/gdb/target.c	
+++ b/contrib/gdb/gdb/target.c	
@@ -1307,6 +1307,52 @@ target_async_mask (int mask)
 }
 
 /* Look through the list of possible targets for a target that can
+   follow forks.  */
+
+int
+target_follow_fork (int follow_child)
+{
+  struct target_ops *t;
+
+  for (t = current_target.beneath; t != NULL; t = t->beneath)
+    {
+      if (t->to_follow_fork != NULL)
+	{
+	  int retval = t->to_follow_fork (t, follow_child);
+	  if (targetdebug)
+	    fprintf_unfiltered (gdb_stdlog, "target_follow_fork (%d) = %d\n",
+				follow_child, retval);
+	  return retval;
+	}
+    }
+
+  /* Some target returned a fork event, but did not know how to follow it.  */
+  internal_error (__FILE__, __LINE__,
+		  "could not find a target to follow fork");
+}
+
+void
+target_follow_exec (int pid, char *execd_pathname)
+{
+  struct target_ops *t;
+
+  for (t = current_target.beneath; t != NULL; t = t->beneath)
+    {
+      if (t->to_follow_exec != NULL)
+	{
+	  t->to_follow_exec (pid, execd_pathname);
+	  if (targetdebug)
+	    fprintf_unfiltered (gdb_stdlog, "target_follow_exec (%d, %s)\n",
+				pid, execd_pathname);
+	  return;
+	}
+    }
+
+  /* If target does not specify a follow_exec handler, call the default. */
+  follow_exec (pid, execd_pathname);
+}
+
+/* Look through the list of possible targets for a target that can
    execute a run or attach command without any other data.  This is
    used to locate the default process stratum.
 
@@ -2159,9 +2205,9 @@ debug_to_remove_vfork_catchpoint (int pid)
 }
 
 static int
-debug_to_follow_fork (int follow_child)
+debug_to_follow_fork (struct target_ops* ops, int follow_child)
 {
-  int retval =  debug_target.to_follow_fork (follow_child);
+  int retval =  debug_target.to_follow_fork (ops, follow_child);
 
   fprintf_unfiltered (gdb_stdlog, "target_follow_fork (%d) = %d\n",
 		      follow_child, retval);
--- b/contrib/gdb/gdb/target.h	
+++ b/contrib/gdb/gdb/target.h	
@@ -362,7 +362,8 @@ struct target_ops
     int (*to_remove_fork_catchpoint) (int);
     int (*to_insert_vfork_catchpoint) (int);
     int (*to_remove_vfork_catchpoint) (int);
-    int (*to_follow_fork) (int);
+    int (*to_follow_fork) (struct target_ops*, int);
+    void (*to_follow_exec) (int, char*);
     int (*to_insert_exec_catchpoint) (int);
     int (*to_remove_exec_catchpoint) (int);
     int (*to_reported_exec_events_per_exec_call) (void);
@@ -761,8 +762,7 @@ extern void target_load (char *arg, int from_tty);
    This function returns 1 if the inferior should not be resumed
    (i.e. there is another event pending).  */
 
-#define target_follow_fork(follow_child) \
-     (*current_target.to_follow_fork) (follow_child)
+int target_follow_fork (int follow_child);
 
 /* On some targets, we can catch an inferior exec event when it
    occurs.  These functions insert/remove an already-created
@@ -1248,4 +1248,6 @@ extern void push_remote_target (char *name, int from_tty);
 /* Blank target vector entries are initialized to target_ignore. */
 void target_ignore (void);
 
+void target_follow_exec (int pid, char *execd_pathname);
+
 #endif /* !defined (TARGET_H) */
--- b/contrib/gdb/gdb/thread.c	
+++ b/contrib/gdb/gdb/thread.c	
@@ -65,6 +65,12 @@ static void restore_current_thread (ptid_t);
 static void switch_to_thread (ptid_t ptid);
 static void prune_threads (void);
 
+int
+thread_list_empty ()
+{
+  return thread_list == NULL;
+}
+
 void
 delete_step_resume_breakpoint (void *arg)
 {
--- b/gnu/usr.bin/gdb/arch/amd64/Makefile	
+++ b/gnu/usr.bin/gdb/arch/amd64/Makefile	
@@ -2,7 +2,7 @@ 
 
 GENSRCS+= xm.h
 .if !defined(GDB_CROSS_DEBUGGER)
-LIBSRCS+= fbsd-proc.c fbsd-threads.c gcore.c
+LIBSRCS+= fbsd-nat.c fbsd-proc.c fbsd-threads.c gcore.c
 LIBSRCS+= amd64-nat.c amd64bsd-nat.c amd64fbsd-nat.c
 .endif
 LIBSRCS+= solib.c solib-svr4.c
--- b/gnu/usr.bin/gdb/arch/amd64/init.c	
+++ b/gnu/usr.bin/gdb/arch/amd64/init.c	
@@ -115,6 +115,7 @@ extern initialize_file_ftype _initialize_tui_out;
 extern initialize_file_ftype _initialize_tui_regs;
 extern initialize_file_ftype _initialize_tui_stack;
 extern initialize_file_ftype _initialize_tui_win;
+extern initialize_file_ftype _initialize_fbsdnat;
 void
 initialize_all_files (void)
 {
@@ -231,4 +232,5 @@ initialize_all_files (void)
   _initialize_tui_regs ();
   _initialize_tui_stack ();
   _initialize_tui_win ();
+  _initialize_fbsdnat ();
 }
--- b/gnu/usr.bin/gdb/arch/arm/Makefile	
+++ b/gnu/usr.bin/gdb/arch/arm/Makefile	
@@ -1,7 +1,7 @@ 
 # $FreeBSD$
 
 GENSRCS+= xm.h
-LIBSRCS+= armfbsd-nat.c
+LIBSRCS+= armfbsd-nat.c fbsd-nat.c
 LIBSRCS+= arm-tdep.c armfbsd-tdep.c solib.c solib-svr4.c
 .if !defined(GDB_CROSS_DEBUGGER)
 LIBSRCS+= fbsd-threads.c
--- b/gnu/usr.bin/gdb/arch/arm/init.c	
+++ b/gnu/usr.bin/gdb/arch/arm/init.c	
@@ -113,6 +113,7 @@ extern initialize_file_ftype _initialize_tui_out;
 extern initialize_file_ftype _initialize_tui_regs;
 extern initialize_file_ftype _initialize_tui_stack;
 extern initialize_file_ftype _initialize_tui_win;
+extern initialize_file_ftype _initialize_fbsdnat;
 void
 initialize_all_files (void)
 {
@@ -225,4 +226,5 @@ initialize_all_files (void)
   _initialize_tui_regs ();
   _initialize_tui_stack ();
   _initialize_tui_win ();
+  _initialize_fbsdnat ();
 }
--- b/gnu/usr.bin/gdb/arch/i386/Makefile	
+++ b/gnu/usr.bin/gdb/arch/i386/Makefile	
@@ -2,7 +2,7 @@ 
 
 GENSRCS+= xm.h
 .if !defined(GDB_CROSS_DEBUGGER)
-LIBSRCS+= fbsd-proc.c fbsd-threads.c gcore.c
+LIBSRCS+= fbsd-nat.c fbsd-proc.c fbsd-threads.c gcore.c
 LIBSRCS+= i386-nat.c i386bsd-nat.c i386fbsd-nat.c
 .endif
 LIBSRCS+= solib.c solib-svr4.c
--- b/gnu/usr.bin/gdb/arch/i386/init.c	
+++ b/gnu/usr.bin/gdb/arch/i386/init.c	
@@ -116,6 +116,7 @@ extern initialize_file_ftype _initialize_tui_out;
 extern initialize_file_ftype _initialize_tui_regs;
 extern initialize_file_ftype _initialize_tui_stack;
 extern initialize_file_ftype _initialize_tui_win;
+extern initialize_file_ftype _initialize_fbsdnat;
 void
 initialize_all_files (void)
 {
@@ -233,4 +234,5 @@ initialize_all_files (void)
   _initialize_tui_regs ();
   _initialize_tui_stack ();
   _initialize_tui_win ();
+  _initialize_fbsdnat ();
 }
--- b/gnu/usr.bin/gdb/arch/ia64/Makefile	
+++ b/gnu/usr.bin/gdb/arch/ia64/Makefile	
@@ -1,7 +1,7 @@ 
 # $FreeBSD$
 
 .if !defined(GDB_CROSS_DEBUGGER)
-LIBSRCS+= fbsd-proc.c fbsd-threads.c gcore.c
+LIBSRCS+= fbsd-nat.c fbsd-proc.c fbsd-threads.c gcore.c
 LIBSRCS+= ia64-fbsd-nat.c
 .endif
 LIBSRCS+= solib.c solib-svr4.c
--- b/gnu/usr.bin/gdb/arch/ia64/init.c	
+++ b/gnu/usr.bin/gdb/arch/ia64/init.c	
@@ -113,6 +113,7 @@ extern initialize_file_ftype _initialize_tui_out;
 extern initialize_file_ftype _initialize_tui_regs;
 extern initialize_file_ftype _initialize_tui_stack;
 extern initialize_file_ftype _initialize_tui_win;
+extern initialize_file_ftype _initialize_fbsdnat;
 void
 initialize_all_files (void)
 {
@@ -227,4 +228,5 @@ initialize_all_files (void)
   _initialize_tui_regs ();
   _initialize_tui_stack ();
   _initialize_tui_win ();
+  _initialize_fbsdnat ();
 }
--- b/gnu/usr.bin/gdb/arch/mips/Makefile	
+++ b/gnu/usr.bin/gdb/arch/mips/Makefile	
@@ -4,7 +4,7 @@ 
 # XXX Should set DEFAULT_BFD_VEC based on target.
 #
 .if !defined(GDB_CROSS_DEBUGGER)
-LIBSRCS+= mipsfbsd-nat.c fbsd-threads.c
+LIBSRCS+= fbsd-nat.c mipsfbsd-nat.c fbsd-threads.c
 .endif
 LIBSRCS+= solib.c solib-svr4.c
 LIBSRCS+= mips-tdep.c mipsfbsd-tdep.c fbsd-proc.c
--- b/gnu/usr.bin/gdb/arch/mips/init.c	
+++ b/gnu/usr.bin/gdb/arch/mips/init.c	
@@ -112,6 +112,7 @@ extern initialize_file_ftype _initialize_tui_out;
 extern initialize_file_ftype _initialize_tui_regs;
 extern initialize_file_ftype _initialize_tui_stack;
 extern initialize_file_ftype _initialize_tui_win;
+extern initialize_file_ftype _initialize_fbsdnat;
 void
 initialize_all_files (void)
 {
@@ -230,4 +231,5 @@ initialize_all_files (void)
   _initialize_tui_regs ();
   _initialize_tui_stack ();
   _initialize_tui_win ();
+  _initialize_fbsdnat ();
 }
--- b/gnu/usr.bin/gdb/arch/powerpc/Makefile	
+++ b/gnu/usr.bin/gdb/arch/powerpc/Makefile	
@@ -1,7 +1,7 @@ 
 # $FreeBSD$
 
 .if !defined(GDB_CROSS_DEBUGGER)
-LIBSRCS+= fbsd-proc.c fbsd-threads.c gcore.c
+LIBSRCS+= fbsd-nat.c fbsd-proc.c fbsd-threads.c gcore.c
 LIBSRCS+= ppcfbsd-nat.c
 .endif
 LIBSRCS+= solib.c solib-svr4.c
--- b/gnu/usr.bin/gdb/arch/powerpc/init.c	
+++ b/gnu/usr.bin/gdb/arch/powerpc/init.c	
@@ -113,6 +113,7 @@ extern initialize_file_ftype _initialize_tui_out;
 extern initialize_file_ftype _initialize_tui_regs;
 extern initialize_file_ftype _initialize_tui_stack;
 extern initialize_file_ftype _initialize_tui_win;
+extern initialize_file_ftype _initialize_fbsdnat;
 void
 initialize_all_files (void)
 {
@@ -227,4 +228,5 @@ initialize_all_files (void)
   _initialize_tui_regs ();
   _initialize_tui_stack ();
   _initialize_tui_win ();
+  _initialize_fbsdnat ();
 }
--- b/gnu/usr.bin/gdb/arch/powerpc64/Makefile	
+++ b/gnu/usr.bin/gdb/arch/powerpc64/Makefile	
@@ -1,7 +1,7 @@ 
 # $FreeBSD$
 
 .if !defined(GDB_CROSS_DEBUGGER)
-LIBSRCS+= fbsd-proc.c fbsd-threads.c gcore.c
+LIBSRCS+= fbsd-nat.c fbsd-proc.c fbsd-threads.c gcore.c
 LIBSRCS+= ppcfbsd-nat.c
 .endif
 LIBSRCS+= solib.c solib-svr4.c
--- b/gnu/usr.bin/gdb/arch/powerpc64/init.c	
+++ b/gnu/usr.bin/gdb/arch/powerpc64/init.c	
@@ -113,6 +113,7 @@ extern initialize_file_ftype _initialize_tui_out;
 extern initialize_file_ftype _initialize_tui_regs;
 extern initialize_file_ftype _initialize_tui_stack;
 extern initialize_file_ftype _initialize_tui_win;
+extern initialize_file_ftype _initialize_fbsdnat;
 void
 initialize_all_files (void)
 {
@@ -227,4 +228,5 @@ initialize_all_files (void)
   _initialize_tui_regs ();
   _initialize_tui_stack ();
   _initialize_tui_win ();
+  _initialize_fbsdnat ();
 }
--- b/gnu/usr.bin/gdb/arch/sparc64/init.c	
+++ b/gnu/usr.bin/gdb/arch/sparc64/init.c	
@@ -114,6 +114,7 @@ extern initialize_file_ftype _initialize_tui_out;
 extern initialize_file_ftype _initialize_tui_regs;
 extern initialize_file_ftype _initialize_tui_stack;
 extern initialize_file_ftype _initialize_tui_win;
+extern initialize_file_ftype _initialize_fbsdnat;
 void
 initialize_all_files (void)
 {
@@ -229,4 +230,5 @@ initialize_all_files (void)
   _initialize_tui_regs ();
   _initialize_tui_stack ();
   _initialize_tui_win ();
+  _initialize_fbsdnat ();
 }
--- /dev/null	
+++ b/gnu/usr.bin/gdb/libgdb/fbsd-nat.c	
@@ -0,0 +1,342 @@ 
+/* Native-dependent code for FreeBSD.
+
+   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#include "defs.h"
+#include "inferior.h"
+#include "symfile.h"
+#include "gdbcore.h"
+#include "gdbthread.h"
+#include "gdb_assert.h"
+#include <sys/types.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+
+extern struct target_ops child_ops;
+void clear_step_resume_breakpoint (void);
+void clear_step_resume_breakpoint_thread (void);
+void (*reactivate_threads) (char*) = NULL;
+void (*disable_threads) (void) = NULL;
+
+static void (*mourn_inferior_beneath) (void);
+static void (*detach_beneath) (char *args, int from_tty);
+static ptid_t (*wait_beneath) (ptid_t ptid, 
+			       struct target_waitstatus *ourstatus);
+int follow_event_pid = 0;
+
+/* Return a the name of file that can be opened to get the symbols for
+   the child process identified by PID.  */
+
+char *
+fbsd_pid_to_exec_file (int pid)
+{
+  size_t len = MAXPATHLEN;
+  char *buf = xcalloc (len, sizeof (char));
+  char *path;
+
+#ifdef KERN_PROC_PATHNAME
+  int mib[4];
+
+  mib[0] = CTL_KERN;
+  mib[1] = KERN_PROC;
+  mib[2] = KERN_PROC_PATHNAME;
+  mib[3] = pid;
+  if (sysctl (mib, 4, buf, &len, NULL, 0) == 0)
+    return buf;
+#endif
+
+  path = xstrprintf ("/proc/%d/file", pid);
+  if (readlink (path, buf, MAXPATHLEN) == -1)
+    {
+      xfree (buf);
+      buf = NULL;
+    }
+
+  xfree (path);
+  return buf;
+}
+
+/* Wait for the child specified by PTID to do something.  Return the
+   process ID of the child, or MINUS_ONE_PTID in case of error; store
+   the status in *OURSTATUS.  */
+
+static ptid_t
+inf_ptrace_wait (ptid_t ptid, struct target_waitstatus *ourstatus)
+{
+  pid_t pid;
+  int status, save_errno;
+
+  do
+    {
+      set_sigint_trap ();
+      set_sigio_trap ();
+      do
+	{
+	  pid = waitpid (PIDGET (ptid), &status, 0);
+	  save_errno = errno;
+	}
+      while (pid == -1 && errno == EINTR);
+
+      clear_sigio_trap ();
+      clear_sigint_trap ();
+
+      if (pid == -1)
+	{
+	  fprintf_unfiltered (gdb_stderr,
+			      _("Child process unexpectedly missing: %s.\n"),
+			      safe_strerror (save_errno));
+
+	  /* Claim it exited with unknown signal.  */
+	  ourstatus->kind = TARGET_WAITKIND_SIGNALLED;
+	  ourstatus->value.sig = TARGET_SIGNAL_UNKNOWN;
+	  return minus_one_ptid;
+	}
+
+      /* Ignore terminated detached child processes.  */
+      if (!WIFSTOPPED (status) && pid != PIDGET (inferior_ptid))
+	pid = -1;
+    }
+  while (pid == -1);
+
+  store_waitstatus (ourstatus, status);
+  return pid_to_ptid (pid);
+}
+
+static ptid_t
+fbsd_wait (ptid_t ptid, struct target_waitstatus *ourstatus)
+{
+  long lwp;
+  struct ptrace_lwpinfo lwpinfo;
+  struct target_waitstatus stat;
+  ptid_t ret;
+  static ptid_t forking_child = {0,0,0};
+
+  ret = wait_beneath (ptid, ourstatus);
+
+  if (PIDGET (ret) >= 0 && ourstatus->kind == TARGET_WAITKIND_STOPPED &&
+      (ourstatus->value.sig == TARGET_SIGNAL_TRAP ||
+       ourstatus->value.sig == TARGET_SIGNAL_STOP) &&
+      (ptrace(PT_LWPINFO, PIDGET (ret), (caddr_t)&lwpinfo, 
+	      sizeof lwpinfo) == 0))
+    {
+      if (lwpinfo.pl_flags & PL_FLAG_CHILD)
+	{
+	  /* Leave the child in a stopped state until we get a fork event in 
+	     the parent. That's when we decide which process to follow. */
+	  ourstatus->kind = TARGET_WAITKIND_IGNORE;
+	  forking_child = ret;
+	}
+      else if (lwpinfo.pl_flags & PL_FLAG_FORKED)
+	{
+	  /* We'd better be in the middle of processing a fork() event. */
+	  gdb_assert (!ptid_equal (forking_child, null_ptid));
+	  ourstatus->kind = TARGET_WAITKIND_FORKED;
+	  ourstatus->value.related_pid = lwpinfo.pl_child_pid;
+	  forking_child = null_ptid;
+	}
+      else if (lwpinfo.pl_flags & PL_FLAG_EXEC &&
+	  PIDGET (ret) == follow_event_pid)
+	{
+	  ourstatus->kind = TARGET_WAITKIND_EXECD;
+	  ourstatus->value.execd_pathname =
+	    xstrdup (fbsd_pid_to_exec_file (PIDGET (ret)));
+	}
+    }
+
+  return ret;
+}
+
+static void
+fbsd_enable_event_reporting (int pid)
+{
+#ifdef PT_FOLLOW_FORK
+  follow_event_pid = pid;
+  if (ptrace(PT_FOLLOW_FORK, pid, 0, 1) < 0)
+    error (_("Cannot follow fork on this target."));
+#endif 
+}
+
+static void
+fbsd_post_attach (int pid)
+{
+  fbsd_enable_event_reporting (pid);
+}
+
+static void
+fbsd_post_startup_inferior (ptid_t ptid)
+{
+  fbsd_enable_event_reporting (PIDGET (ptid));
+}
+
+int
+fbsd_follow_fork (struct target_ops *ops, int follow_child)
+{
+  ptid_t last_ptid, ret, child_ptid;
+  struct target_waitstatus last_status;
+  int parent_pid, child_pid;
+  struct target_waitstatus ourstatus;
+
+  get_last_target_status (&last_ptid, &last_status);
+  parent_pid = PIDGET (last_ptid);
+  child_pid = last_status.value.related_pid;
+
+  if (follow_child)
+    {
+      detach_breakpoints (child_pid);
+      remove_breakpoints ();
+      child_ptid = pid_to_ptid (child_pid);
+
+      target_detach (NULL, 0);
+      inferior_ptid = child_ptid;
+
+      /* Reinstall ourselves, since we might have been removed in
+	 target_detach (which does other necessary cleanup).  */
+      push_target (ops);
+
+      /* Need to restore some of the actions done by the threaded detach */
+      if (reactivate_threads) 
+	{
+	  reactivate_threads (fbsd_pid_to_exec_file (child_pid));
+	  reactivate_threads = NULL;
+	}
+
+      /* Reset breakpoints in the child as appropriate.  */
+      clear_step_resume_breakpoint_thread ();
+      follow_inferior_reset_breakpoints ();
+
+      /* Enable fork/exec event reporting for the child. */
+      fbsd_enable_event_reporting (child_pid);
+    }
+  else /* Follow parent */
+    {
+      /* Before detaching from the child, remove all breakpoints from
+         it.  (This won't actually modify the breakpoint list, but will
+         physically remove the breakpoints from the child.) */
+      detach_breakpoints (child_pid);
+      ptrace (PT_DETACH, child_pid, (caddr_t) 1, 0);
+    }
+
+  return 0;
+}
+
+/* EXECD_PATHNAME is assumed to be non-NULL. */
+
+static void
+fbsd_follow_exec (int pid, char *execd_pathname)
+{
+  struct target_waitstatus status;
+  ptid_t ret = inferior_ptid;
+
+  /* This is an exec event that we actually wish to pay attention to.
+     Refresh our symbol table to the newly exec'd program, remove any
+     momentary bp's, etc.
+
+     If there are breakpoints, they aren't really inserted now,
+     since the exec() transformed our inferior into a fresh set
+     of instructions.
+
+     We want to preserve symbolic breakpoints on the list, since
+     we have hopes that they can be reset after the new a.out's
+     symbol table is read.
+
+     However, any "raw" breakpoints must be removed from the list
+     (e.g., the solib bp's), since their address is probably invalid
+     now.
+
+     And, we DON'T want to call delete_breakpoints() here, since
+     that may write the bp's "shadow contents" (the instruction
+     value that was overwritten witha TRAP instruction).  Since
+     we now have a new a.out, those shadow contents aren't valid. */
+  update_breakpoints_after_exec ();
+
+  /* If there was one, it's gone now.  We cannot truly step-to-next
+     statement through an exec(). */
+  clear_step_resume_breakpoint ();
+  step_range_start = 0;
+  step_range_end = 0;
+
+  /* What is this a.out's name? */
+  printf_unfiltered (_("Executing new program: %s\n"), execd_pathname);
+
+  /* We've followed the inferior through an exec.  Therefore, the
+     inferior has essentially been killed & reborn. */
+
+  gdb_flush (gdb_stdout);
+
+  /* Disable thread library */
+  if (disable_threads)
+    {
+      disable_threads ();
+      disable_threads = NULL;
+    }
+
+  generic_mourn_inferior ();
+  inferior_ptid = ret;
+
+  /* That a.out is now the one to use. */
+  exec_file_attach (execd_pathname, 0);
+
+  /* And also is where symbols can be found. */
+  symbol_file_add_main (execd_pathname, 0);
+
+  /* Reset the shared library package.  This ensures that we get
+     a shlib event when the child reaches "_start", at which point
+     the dld will have had a chance to initialize the child. */
+#if defined(SOLIB_RESTART)
+  SOLIB_RESTART ();
+#endif
+#ifdef SOLIB_CREATE_INFERIOR_HOOK
+  SOLIB_CREATE_INFERIOR_HOOK (PIDGET (inferior_ptid));
+#else
+  solib_create_inferior_hook ();
+#endif
+
+  /* Reinsert all breakpoints.  (Those which were symbolic have
+     been reset to the proper address in the new a.out, thanks
+     to symbol_file_command...) */
+  insert_breakpoints ();
+}
+
+static void fbsd_mourn_inferior (void)
+{
+  follow_event_pid = 0;
+  mourn_inferior_beneath ();
+}
+
+static void fbsd_detach (char *args, int from_tty)
+{
+  follow_event_pid = 0;
+  detach_beneath (args, from_tty);
+}
+
+void
+_initialize_fbsdnat (void)
+{
+  wait_beneath = inf_ptrace_wait;
+  detach_beneath = child_ops.to_detach;
+  mourn_inferior_beneath = child_ops.to_mourn_inferior;
+  child_ops.to_wait = fbsd_wait;
+  child_ops.to_detach = fbsd_detach;
+  child_ops.to_mourn_inferior = fbsd_mourn_inferior;
+  child_ops.to_post_attach = fbsd_post_attach;
+  child_ops.to_post_startup_inferior = fbsd_post_startup_inferior;
+  child_ops.to_follow_fork = fbsd_follow_fork;
+  child_ops.to_follow_exec = fbsd_follow_exec;
+}
--- b/gnu/usr.bin/gdb/libgdb/fbsd-threads.c	
+++ b/gnu/usr.bin/gdb/libgdb/fbsd-threads.c	
@@ -68,6 +68,9 @@ extern struct target_ops core_ops;
 
 /* Pointer to the next function on the objfile event chain.  */
 static void (*target_new_objfile_chain) (struct objfile *objfile);
+ 
+/* Non-zero while processing thread library re-activation after fork() */
+static int fbsd_forking;
 
 /* Non-zero if there is a thread module */
 static int fbsd_thread_present;
@@ -154,6 +157,10 @@ static int fbsd_thread_alive (ptid_t ptid);
 static void attach_thread (ptid_t ptid, const td_thrhandle_t *th_p,
                const td_thrinfo_t *ti_p, int verbose);
 static void fbsd_thread_detach (char *args, int from_tty);
+extern void (*reactivate_threads) (char*);
+extern void (*disable_threads) (void);
+static void fbsd_thread_activate (void);
+static void fbsd_thread_deactivate (void);
 
 /* Building process ids.  */
 
@@ -405,15 +412,50 @@ disable_thread_event_reporting (void)
   td_death_bp_addr = 0;
 }
 
+static void 
+fbsd_thread_reactivate_after_fork (char *pathname)
+{
+  fbsd_forking = 1;
+
+  /* That a.out is now the one to use. */
+  exec_file_attach (pathname, 0);
+
+  /* And also is where symbols can be found. */
+  symbol_file_add_main (pathname, 0);
+  push_target (&fbsd_thread_ops);
+
+#ifdef SOLIB_CREATE_INFERIOR_HOOK
+  SOLIB_CREATE_INFERIOR_HOOK (PIDGET (inferior_ptid));
+#else
+  solib_create_inferior_hook ();
+#endif
+  fbsd_forking = 0;
+}
+
+static void 
+fbsd_thread_disable_after_exec (void)
+{
+  if (fbsd_thread_active)
+    fbsd_thread_deactivate ();
+
+  unpush_target (&fbsd_thread_ops);
+}
+
 static void
 fbsd_thread_activate (void)
 {
   fbsd_thread_active = 1;
+  reactivate_threads = fbsd_thread_reactivate_after_fork;
+  disable_threads = fbsd_thread_disable_after_exec;
   init_thread_list();
   if (fbsd_thread_core == 0)
     enable_thread_event_reporting ();
-  fbsd_thread_find_new_threads ();
-  get_current_thread ();
+
+  if (!fbsd_forking) 
+    {
+      fbsd_thread_find_new_threads ();
+      get_current_thread ();
+    }
 }
 
 static void
@@ -626,7 +668,7 @@ fbsd_thread_resume (ptid_t ptid, int step, enum target_signal signo)
     }
 
   lwp = GET_LWP (work_ptid);
-  if (lwp == 0)
+  if (lwp == 0 && GET_THREAD (work_ptid) != 0)
     {
       /* check user thread */
       ret = td_ta_map_id2thr_p (thread_agent, GET_THREAD(work_ptid), &th);
@@ -790,6 +832,9 @@ fbsd_thread_wait (ptid_t ptid, struct target_waitstatus *ourstatus)
   ret = child_ops.to_wait (ptid, ourstatus);
   if (GET_PID(ret) >= 0 && ourstatus->kind == TARGET_WAITKIND_STOPPED)
     {
+      if (thread_list_empty ())
+	fbsd_thread_find_new_threads ();
+
       lwp = get_current_lwp (GET_PID(ret));
       ret = thread_from_lwp (BUILD_LWP(lwp, GET_PID(ret)),
          &th, &ti);
@@ -1065,6 +1110,9 @@ fbsd_thread_create_inferior (char *exec_file, char *allargs, char **env)
 static void
 fbsd_thread_post_startup_inferior (ptid_t ptid)
 {
+  if (child_ops.to_post_startup_inferior)
+    child_ops.to_post_startup_inferior (ptid);
+
   if (fbsd_thread_present && !fbsd_thread_active)
     {
       /* The child process is now the actual multi-threaded
--- b/share/man/man9/fpu_kern.9	
+++ b/share/man/man9/fpu_kern.9	
@@ -120,6 +120,16 @@ could be used from both kernel thread and syscall contexts.
 The
 .Fn fpu_kern_leave
 function correctly handles such contexts.
+.It Dv FPU_KERN_NOCTX
+Avoid nesting save area.
+If the flag is specified, the
+.Fa ctx
+must be passed as
+.Va NULL .
+The flag should only be used for really short code blocks
+which can be executed in a critical section.
+It avoids the need to allocate the FPU context by the cost
+of increased system latency.
 .El
 .El
 .Pp
--- b/sys/amd64/amd64/fpu.c	
+++ b/sys/amd64/amd64/fpu.c	
@@ -348,7 +348,7 @@ fpuexit(struct thread *td)
 		stop_emulating();
 		fpusave(curpcb->pcb_save);
 		start_emulating();
-		PCPU_SET(fpcurthread, 0);
+		PCPU_SET(fpcurthread, NULL);
 	}
 	critical_exit();
 }
@@ -603,6 +603,8 @@ fpudna(void)
 {
 
 	critical_enter();
+	KASSERT((curpcb->pcb_flags & PCB_FPUNOSAVE) == 0,
+	    ("fpudna while in fpu_kern_enter(FPU_KERN_NOCTX)"));
 	if (PCPU_GET(fpcurthread) == curthread) {
 		printf("fpudna: fpcurthread == curthread %d times\n",
 		    ++err_count);
@@ -636,7 +638,8 @@ fpudna(void)
 		 * fpu_initialstate, to ignite the XSAVEOPT
 		 * tracking engine.
 		 */
-		bcopy(fpu_initialstate, curpcb->pcb_save, cpu_max_ext_state_size);
+		bcopy(fpu_initialstate, curpcb->pcb_save,
+		    cpu_max_ext_state_size);
 		fpurestore(curpcb->pcb_save);
 		if (curpcb->pcb_initial_fpucw != __INITIAL_FPUCW__)
 			fldcw(curpcb->pcb_initial_fpucw);
@@ -934,11 +937,36 @@ fpu_kern_enter(struct thread *td, struct fpu_kern_ctx *ctx, u_int flags)
 {
 	struct pcb *pcb;
 
+	KASSERT((flags & FPU_KERN_NOCTX) != 0 || ctx != NULL,
+	    ("ctx is required when !FPU_KERN_NOCTX"));
+	pcb = td->td_pcb;
+	KASSERT((pcb->pcb_flags & PCB_FPUNOSAVE) == 0,
+	    ("recursive fpu_kern_enter while in PCB_FPUNOSAVE state"));
+	if ((flags & FPU_KERN_NOCTX) != 0) {
+		critical_enter();
+		stop_emulating();
+		if (curthread == PCPU_GET(fpcurthread)) {
+			fpusave(curpcb->pcb_save);
+			PCPU_SET(fpcurthread, NULL);
+		} else {
+			KASSERT(PCPU_GET(fpcurthread) == NULL,
+			    ("invalid fpcurthread"));
+		}
+
+		/*
+		 * This breaks XSAVEOPT tracker, but
+		 * PCB_FPUNOSAVE state is supposed to never need to
+		 * save FPU context at all.
+		 */
+		fpurestore(fpu_initialstate);
+		set_pcb_flags(pcb, PCB_KERNFPU | PCB_FPUNOSAVE |
+		    PCB_FPUINITDONE);
+		return (0);
+	}
 	if ((flags & FPU_KERN_KTHR) != 0 && is_fpu_kern_thread(0)) {
 		ctx->flags = FPU_KERN_CTX_DUMMY;
 		return (0);
 	}
-	pcb = td->td_pcb;
 	KASSERT(!PCB_USER_FPU(pcb) || pcb->pcb_save ==
 	    get_pcb_user_save_pcb(pcb), ("mangled pcb_save"));
 	ctx->flags = 0;
@@ -957,15 +985,26 @@ fpu_kern_leave(struct thread *td, struct fpu_kern_ctx *ctx)
 {
 	struct pcb *pcb;
 
+	pcb = td->td_pcb;
+	if ((pcb->pcb_flags & PCB_FPUNOSAVE) != 0) {
+		KASSERT(ctx == NULL, ("non-null ctx after FPU_KERN_NOCTX"));
+		KASSERT(PCPU_GET(fpcurthread) == NULL,
+		    ("non-NULL fpcurthread for PCB_FPUNOSAVE"));
+		CRITICAL_ASSERT(td);
+		clear_pcb_flags(pcb,  PCB_FPUNOSAVE | PCB_FPUINITDONE);
+		start_emulating();
+		critical_exit();
+		goto restore_flags;
+	}
 	if (is_fpu_kern_thread(0) && (ctx->flags & FPU_KERN_CTX_DUMMY) != 0)
 		return (0);
 	KASSERT((ctx->flags & FPU_KERN_CTX_DUMMY) == 0, ("dummy ctx"));
-	pcb = td->td_pcb;
 	critical_enter();
 	if (curthread == PCPU_GET(fpcurthread))
 		fpudrop();
 	critical_exit();
 	pcb->pcb_save = ctx->prev;
+restore_flags:
 	if (pcb->pcb_save == get_pcb_user_save_pcb(pcb)) {
 		if ((pcb->pcb_flags & PCB_USERFPUINITDONE) != 0) {
 			set_pcb_flags(pcb, PCB_FPUINITDONE);
--- b/sys/amd64/amd64/initcpu.c	
+++ b/sys/amd64/amd64/initcpu.c	
@@ -88,6 +88,11 @@ static void
 init_amd(void)
 {
 
+	if (CPUID_TO_FAMILY(cpu_id) == 0x9) {
+		if ((cpu_feature2 & CPUID2_HV) == 0)
+			wrmsr(MSR_HWCR, rdmsr(MSR_HWCR) | (1 << 6));
+	}
+
 	/*
 	 * Work around Erratum 721 for Family 10h and 12h processors.
 	 * These processors may incorrectly update the stack pointer
--- b/sys/amd64/amd64/mp_machdep.c	
+++ b/sys/amd64/amd64/mp_machdep.c	
@@ -58,6 +58,12 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 
+#include "opt_ddb.h"
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <machine/setjmp.h>
+#endif
+
 #include <x86/apicreg.h>
 #include <machine/clock.h>
 #include <machine/cputypes.h>
@@ -1415,6 +1421,13 @@ ipi_nmi_handler()
 	cpustop_handler();
 	return (0);
 }
+
+#ifdef DDB
+static int ddb_migrate_cpu = -1;
+static int ddb_orig_cpu = -1;
+static jmp_buf ddb_migrate_buf;
+void db_command_loop(void);
+#endif
      
 /*
  * Handle an IPI_STOP by saving our current context and spinning until we
@@ -1429,6 +1442,9 @@ cpustop_handler(void)
 
 	savectx(&stoppcbs[cpu]);
 
+#ifdef DDB
+migration_exited:
+#endif
 	/* Indicate that we are stopped */
 	CPU_SET_ATOMIC(cpu, &stopped_cpus);
 
@@ -1436,6 +1452,21 @@ cpustop_handler(void)
 	while (!CPU_ISSET(cpu, &started_cpus))
 	    ia32_pause();
 
+#ifdef DDB
+	if (ddb_migrate_cpu == cpu) {
+		if (setjmp(ddb_migrate_buf)) {
+			db_printf("leaving cpu %d\n", cpu);
+			ddb_migrate_cpu = -1;
+			CPU_CLR_ATOMIC(cpu, &started_cpus);
+			CPU_SET_ATOMIC(ddb_orig_cpu, &started_cpus);
+			goto migration_exited;
+		}
+		db_printf("current cpu %d\n", cpu);
+		db_command_loop();
+		panic("continued from migrated\n");
+	}
+#endif
+
 	CPU_CLR_ATOMIC(cpu, &started_cpus);
 	CPU_CLR_ATOMIC(cpu, &stopped_cpus);
 
@@ -1449,6 +1480,50 @@ cpustop_handler(void)
 	}
 }
 
+#ifdef DDB
+DB_COMMAND(cpuret, db_cpuret)
+{
+
+	if (ddb_migrate_cpu == -1) {
+		db_printf("not migrated\n");
+		return;
+	}
+	longjmp(ddb_migrate_buf, 1);
+}
+
+DB_COMMAND(cpu, db_cpu)
+{
+	int mcpu, currcpu;
+
+	if (ddb_migrate_cpu != -1) {
+		db_printf("already migrated, return to orig cpu first\n");
+		return;
+	}
+	if (!have_addr) {
+		db_printf("specify cpu to migrate\n");
+		return;
+	}
+	mcpu = (int)addr;
+	if (mcpu < 0 || mcpu >= mp_ncpus) {
+		db_printf("cpu %d does not exist\n", mcpu);
+		return;
+	}
+
+	ddb_migrate_cpu = mcpu;
+	currcpu = PCPU_GET(cpuid);
+	ddb_orig_cpu = cpu;
+	savectx(&stoppcbs[currcpu]);
+	CPU_CLR_ATOMIC(currcpu, &started_cpus);
+	CPU_SET_ATOMIC(currcpu, &stopped_cpus);
+	CPU_SET_ATOMIC(mcpu, &started_cpus);
+	while (!CPU_ISSET(currcpu, &started_cpus))
+	    ia32_pause();
+	CPU_CLR_ATOMIC(currcpu, &started_cpus);
+	CPU_CLR_ATOMIC(currcpu, &stopped_cpus);
+	db_printf("current cpu %d\n", currcpu);
+}
+#endif
+
 /*
  * Handle an IPI_SUSPEND by saving our current context and spinning until we
  * are resumed.
--- b/sys/amd64/include/fpu.h	
+++ b/sys/amd64/include/fpu.h	
@@ -85,6 +85,7 @@ void	fpu_save_area_reset(struct savefpu *fsa);
 #define	FPU_KERN_NORMAL	0x0000
 #define	FPU_KERN_NOWAIT	0x0001
 #define	FPU_KERN_KTHR	0x0002
+#define	FPU_KERN_NOCTX	0x0004
 
 #endif
 
--- b/sys/amd64/include/pcb.h	
+++ b/sys/amd64/include/pcb.h	
@@ -79,6 +79,7 @@ struct pcb {
 #define	PCB_FPUINITDONE	0x08	/* fpu state is initialized */
 #define	PCB_USERFPUINITDONE 0x10 /* fpu user state is initialized */
 #define	PCB_32BIT	0x40	/* process has 32 bit context (segs etc) */
+#define	PCB_FPUNOSAVE	0x80	/* no save area for current FPU ctx */
 
 	uint16_t	pcb_initial_fpucw;
 
--- b/sys/dev/random/ivy.c	
+++ b/sys/dev/random/ivy.c	
@@ -58,7 +58,8 @@ static int random_ivy_read(void *, int);
 static struct random_hardware_source random_ivy = {
 	.ident = "Hardware, Intel Secure Key RNG",
 	.source = RANDOM_PURE_RDRAND,
-	.read = random_ivy_read
+	.read = random_ivy_read,
+	.entropy_cdev_name = "ivy",
 };
 
 static inline int
--- b/sys/dev/random/live_entropy_sources.c	
+++ b/sys/dev/random/live_entropy_sources.c	
@@ -28,6 +28,7 @@ 
 #include <sys/param.h>
 __FBSDID("$FreeBSD$");
 
+#include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/libkern.h>
 #include <sys/lock.h>
@@ -38,6 +39,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
+#include <sys/uio.h>
 #include <sys/unistd.h>
 
 #include <machine/cpu.h>
@@ -57,6 +59,41 @@ static struct les_head sources = LIST_HEAD_INITIALIZER(sources);
  */
 static struct sx les_lock; /* need a sleepable lock */
 
+static int
+entropy_read(struct cdev *dev, struct uio *uio, int flags)
+{
+	uint8_t buf[HARVESTSIZE];
+	struct random_hardware_source *rsource;
+	ssize_t resid;
+	int c, error;
+
+	sx_slock(&les_lock);
+	rsource = dev->si_drv1;
+	if (rsource == NULL) {
+		error = ENXIO;
+	} else {
+		error = 0;
+		resid = uio->uio_resid;
+		while (uio->uio_resid > 0) {
+			c = rsource->read(buf, sizeof(buf));
+			if (c > 0)
+				error = uiomove(buf, c, uio);
+			if (error != 0 || c == 0)
+				break;
+		}
+		if (resid != uio->uio_resid)
+			error = 0;
+	}
+	sx_sunlock(&les_lock);
+	return (error);
+}
+
+static struct cdevsw entropy_cdevsw = {
+	.d_version = D_VERSION,
+	.d_read = entropy_read,
+	.d_name = "entropy",
+};
+
 void
 live_entropy_source_register(struct random_hardware_source *rsource)
 {
@@ -66,8 +103,13 @@ live_entropy_source_register(struct random_hardware_source *rsource)
 
 	les = malloc(sizeof(struct live_entropy_sources), M_ENTROPY, M_WAITOK);
 	les->rsource = rsource;
+	les->dev = make_dev_credf(MAKEDEV_ETERNAL_KLD | MAKEDEV_WAITOK |
+	    MAKEDEV_CHECKNAME, &entropy_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL,
+	    0400, "entropy/%s", rsource->entropy_cdev_name);
 
 	sx_xlock(&les_lock);
+	if (les->dev != NULL)
+		les->dev->si_drv1 = rsource;
 	LIST_INSERT_HEAD(&sources, les, entries);
 	sx_xunlock(&les_lock);
 }
@@ -76,18 +118,27 @@ void
 live_entropy_source_deregister(struct random_hardware_source *rsource)
 {
 	struct live_entropy_sources *les = NULL;
+	struct cdev *dev;
 
 	KASSERT(rsource != NULL, ("invalid input to %s", __func__));
 
+	dev = NULL;
 	sx_xlock(&les_lock);
-	LIST_FOREACH(les, &sources, entries)
+	LIST_FOREACH(les, &sources, entries) {
 		if (les->rsource == rsource) {
 			LIST_REMOVE(les, entries);
 			break;
 		}
+	}
+	if (les != NULL) {
+		dev = les->dev;
+		if (dev != NULL)
+			dev->si_drv1 = NULL;
+	}
 	sx_xunlock(&les_lock);
-	if (les != NULL)
-		free(les, M_ENTROPY);
+	if (dev != NULL)
+		destroy_dev(dev);
+	free(les, M_ENTROPY);
 }
 
 static int
--- b/sys/dev/random/live_entropy_sources.h	
+++ b/sys/dev/random/live_entropy_sources.h	
@@ -38,6 +38,7 @@ 
 struct live_entropy_sources {
 	LIST_ENTRY(live_entropy_sources) entries;	/* list of providers */
 	struct random_hardware_source	*rsource;	/* associated random adaptor */
+	struct cdev *dev;
 };
 
 extern struct mtx live_mtx;
--- b/sys/dev/random/nehemiah.c	
+++ b/sys/dev/random/nehemiah.c	
@@ -55,7 +55,8 @@ static int random_nehemiah_read(void *, int);
 static struct random_hardware_source random_nehemiah = {
 	.ident = "Hardware, VIA Nehemiah Padlock RNG",
 	.source = RANDOM_PURE_NEHEMIAH,
-	.read = random_nehemiah_read
+	.read = random_nehemiah_read,
+	.entropy_cdev_name = "nehemiah",
 };
 
 /* TODO: now that the Davies-Meyer hash is gone and we only use
--- b/sys/dev/random/randomdev.h	
+++ b/sys/dev/random/randomdev.h	
@@ -55,6 +55,7 @@ struct random_adaptor {
 
 struct random_hardware_source {
 	const char		*ident;
+	const char		*entropy_cdev_name;
 	enum esource		source;
 	random_read_func_t	*read;
 };
--- b/sys/fs/nullfs/null_subr.c	
+++ b/sys/fs/nullfs/null_subr.c	
@@ -251,6 +251,7 @@ null_nodeget(mp, lowervp, vpp)
 	vp->v_type = lowervp->v_type;
 	vp->v_data = xp;
 	vp->v_vnlock = lowervp->v_vnlock;
+	vp->v_vflag = lowervp->v_vflag & VV_ROOT;
 	error = insmntque1(vp, mp, null_insmntque_dtr, xp);
 	if (error != 0)
 		return (error);
--- b/sys/fs/tmpfs/tmpfs.h	
+++ b/sys/fs/tmpfs/tmpfs.h	
@@ -51,7 +51,8 @@ 
 #include <sys/systm.h>
 #include <sys/tree.h>
 #include <sys/vmmeter.h>
-#include <vm/swap_pager.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
 
 MALLOC_DECLARE(M_TMPFSMNT);
 MALLOC_DECLARE(M_TMPFSNAME);
--- b/sys/fs/tmpfs/tmpfs_subr.c	
+++ b/sys/fs/tmpfs/tmpfs_subr.c	
@@ -53,9 +53,10 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_pageout.h>
 #include <vm/vm_extern.h>
+#include <vm/swap_pager.h>
 
 #include <fs/tmpfs/tmpfs.h>
 #include <fs/tmpfs/tmpfs_fifoops.h>
--- b/sys/fs/tmpfs/tmpfs_vnops.c	
+++ b/sys/fs/tmpfs/tmpfs_vnops.c	
@@ -45,19 +45,12 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
-#include <sys/sf_buf.h>
 #include <sys/stat.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pager.h>
-
 #include <fs/tmpfs/tmpfs_vnops.h>
 #include <fs/tmpfs/tmpfs.h>
 
--- b/sys/i386/i386/initcpu.c	
+++ b/sys/i386/i386/initcpu.c	
@@ -651,6 +651,32 @@ init_transmeta(void)
 }
 #endif
 
+static void
+init_amd(void)
+{
+
+#ifdef CPU_ATHLON_SSE_HACK
+	/*
+	 * Sometimes the BIOS doesn't enable SSE instructions.
+	 * According to AMD document 20734, the mobile Duron, the
+	 * (mobile) Athlon 4 and the Athlon MP support SSE. These
+	 * correspond to cpu_id 0x66X or 0x67X.
+	 */
+	if ((cpu_feature & CPUID_XMM) == 0 && ((cpu_id & ~0xf) == 0x660 ||
+	    (cpu_id & ~0xf) == 0x670 || (cpu_id & ~0xf) == 0x680)) {
+		u_int regs[4];
+
+		wrmsr(MSR_HWCR, rdmsr(MSR_HWCR) & ~0x08000);
+		do_cpuid(1, regs);
+		cpu_feature = regs[3];
+	}
+#endif
+	if (CPUID_TO_FAMILY(cpu_id) == 0x9) {
+		if ((cpu_feature2 & CPUID2_HV) == 0)
+			wrmsr(MSR_HWCR, rdmsr(MSR_HWCR) | (1 << 6));
+	}
+}
+
 /*
  * Initialize CR4 (Control register 4) to enable SSE instructions.
  */
@@ -725,26 +751,9 @@ initializecpu(void)
 				break;
 			}
 			break;
-#ifdef CPU_ATHLON_SSE_HACK
 		case CPU_VENDOR_AMD:
-			/*
-			 * Sometimes the BIOS doesn't enable SSE instructions.
-			 * According to AMD document 20734, the mobile
-			 * Duron, the (mobile) Athlon 4 and the Athlon MP
-			 * support SSE. These correspond to cpu_id 0x66X
-			 * or 0x67X.
-			 */
-			if ((cpu_feature & CPUID_XMM) == 0 &&
-			    ((cpu_id & ~0xf) == 0x660 ||
-			     (cpu_id & ~0xf) == 0x670 ||
-			     (cpu_id & ~0xf) == 0x680)) {
-				u_int regs[4];
-				wrmsr(MSR_HWCR, rdmsr(MSR_HWCR) & ~0x08000);
-				do_cpuid(1, regs);
-				cpu_feature = regs[3];
-			}
+			init_amd();
 			break;
-#endif
 		case CPU_VENDOR_CENTAUR:
 			init_via();
 			break;
--- b/sys/i386/i386/pmap.c	
+++ b/sys/i386/i386/pmap.c	
@@ -3477,17 +3477,21 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_prot_t access, vm_page_t m,
 	PMAP_LOCK(pmap);
 	sched_pin();
 
-	/*
-	 * In the case that a page table page is not
-	 * resident, we are creating it here.
-	 */
-	if (va < VM_MAXUSER_ADDRESS) {
+	pde = pmap_pde(pmap, va);
+	if ((*pde & PG_PS) != 0) {
+		/* PG_V is asserted by pmap_demote_pde */
+		pmap_demote_pde(pmap, pde, va);
+		if (va < VM_MAXUSER_ADDRESS) {
+			mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME);
+			mpte->wire_count++;
+		}
+	} else if (va < VM_MAXUSER_ADDRESS) {
+		/*
+		 * In the case that a page table page is not resident,
+		 * we are creating it here.
+		 */
 		mpte = pmap_allocpte(pmap, va, M_WAITOK);
 	}
-
-	pde = pmap_pde(pmap, va);
-	if ((*pde & PG_PS) != 0)
-		panic("pmap_enter: attempted pmap_enter on 4MB page");
 	pte = pmap_pte_quick(pmap, va);
 
 	/*
--- b/sys/kern/kern_descrip.c	
+++ b/sys/kern/kern_descrip.c	
@@ -314,20 +314,25 @@ struct getdtablesize_args {
 	int	dummy;
 };
 #endif
-/* ARGSUSED */
+
 int
 sys_getdtablesize(struct thread *td, struct getdtablesize_args *uap)
 {
-	struct proc *p = td->td_proc;
+	struct proc *p;
 	uint64_t lim;
+	int maxfd, res;
 
+	p = td->td_proc;
 	PROC_LOCK(p);
-	td->td_retval[0] =
-	    min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
+	res = lim_cur(p, RLIMIT_NOFILE);
 	lim = racct_get_limit(td->td_proc, RACCT_NOFILE);
 	PROC_UNLOCK(p);
-	if (lim < td->td_retval[0])
-		td->td_retval[0] = lim;
+	maxfd = maxfilesperproc;
+	if (maxfd > res)
+		maxfd = res;
+	if (maxfd > lim)
+		maxfd = lim;
+	td->td_retval[0] = maxfd;
 	return (0);
 }
 
@@ -775,13 +780,8 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
 static int
 getmaxfd(struct proc *p)
 {
-	int maxfd;
-
-	PROC_LOCK(p);
-	maxfd = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
-	PROC_UNLOCK(p);
 
-	return (maxfd);
+	return (imin(lim_cur_unlocked(p, RLIMIT_NOFILE), maxfilesperproc));
 }
 
 /*
--- b/sys/kern/kern_proc.c	
+++ b/sys/kern/kern_proc.c	
@@ -141,6 +141,10 @@ uma_zone_t proc_zone;
 int kstack_pages = KSTACK_PAGES;
 SYSCTL_INT(_kern, OID_AUTO, kstack_pages, CTLFLAG_RD, &kstack_pages, 0,
     "Kernel stack size in pages");
+static int vmmap_skip_res_cnt = 1;
+SYSCTL_INT(_kern, OID_AUTO, proc_vmmap_skip_resident_count, CTLFLAG_RW,
+    &vmmap_skip_res_cnt, 0,
+    "Skip calculation of the pages resident count in kern.proc.vmmap");
 
 CTASSERT(sizeof(struct kinfo_proc) == KINFO_PROC_SIZE);
 #ifdef COMPAT_FREEBSD32
@@ -2136,15 +2140,19 @@ int
 kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
 {
 	vm_map_entry_t entry, tmp_entry;
-	unsigned int last_timestamp;
+	struct vattr va;
+	vm_map_t map;
+	vm_page_t m;
+	vm_object_t obj, tobj, lobj;
 	char *fullpath, *freepath;
 	struct kinfo_vmentry *kve;
-	struct vattr va;
 	struct ucred *cred;
-	int error;
 	struct vnode *vp;
 	struct vmspace *vm;
-	vm_map_t map;
+	vm_pindex_t pindex;
+	vm_offset_t addr, clp;
+	unsigned int last_timestamp;
+	int error;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 
@@ -2162,44 +2170,57 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
 	vm_map_lock_read(map);
 	for (entry = map->header.next; entry != &map->header;
 	    entry = entry->next) {
-		vm_object_t obj, tobj, lobj;
-		vm_offset_t addr;
-		vm_paddr_t locked_pa;
-		int mincoreinfo;
-
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 
 		bzero(kve, sizeof(*kve));
 
 		kve->kve_private_resident = 0;
+		kve->kve_resident = 0;
 		obj = entry->object.vm_object;
 		if (obj != NULL) {
-			VM_OBJECT_RLOCK(obj);
+			for (tobj = obj; tobj != NULL;
+			    tobj = tobj->backing_object) {
+				VM_OBJECT_RLOCK(tobj);
+				lobj = tobj;
+			}
 			if (obj->shadow_count == 1)
 				kve->kve_private_resident =
 				    obj->resident_page_count;
-		}
-		kve->kve_resident = 0;
-		addr = entry->start;
-		while (addr < entry->end) {
-			locked_pa = 0;
-			mincoreinfo = pmap_mincore(map->pmap, addr, &locked_pa);
-			if (locked_pa != 0)
-				vm_page_unlock(PHYS_TO_VM_PAGE(locked_pa));
-			if (mincoreinfo & MINCORE_INCORE)
-				kve->kve_resident++;
-			if (mincoreinfo & MINCORE_SUPER)
-				kve->kve_flags |= KVME_FLAG_SUPER;
-			addr += PAGE_SIZE;
-		}
-
-		for (lobj = tobj = obj; tobj; tobj = tobj->backing_object) {
-			if (tobj != obj)
-				VM_OBJECT_RLOCK(tobj);
-			if (lobj != obj)
-				VM_OBJECT_RUNLOCK(lobj);
-			lobj = tobj;
+			if (vmmap_skip_res_cnt)
+				goto skip_resident_count;
+			for (addr = entry->start; addr < entry->end;) {
+				pindex = OFF_TO_IDX(entry->offset + addr -
+				    entry->start);
+				for (tobj = obj;;) {
+					m = vm_page_lookup(tobj, pindex);
+					if (m != NULL)
+						break;
+					if (tobj->backing_object == NULL)
+						break;
+					pindex += OFF_TO_IDX(
+					    tobj->backing_object_offset);
+					tobj = tobj->backing_object;
+				}
+				if (m == NULL) {
+					addr += PAGE_SIZE;
+					continue;
+				}
+				if (m->psind != 0)
+					kve->kve_flags |= KVME_FLAG_SUPER;
+				clp = addr + pagesizes[m->psind] <= entry->end ?
+				    pagesizes[m->psind] : entry->end - addr;
+				kve->kve_resident += clp / PAGE_SIZE;
+				addr += pagesizes[m->psind];
+			}
+skip_resident_count:
+			for (tobj = obj; tobj != NULL;
+			    tobj = tobj->backing_object) {
+				if (tobj != obj && tobj != lobj)
+					VM_OBJECT_RUNLOCK(tobj);
+			}
+		} else {
+			lobj = NULL;
 		}
 
 		kve->kve_start = entry->start;
@@ -2229,7 +2250,7 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
 
 		freepath = NULL;
 		fullpath = "";
-		if (lobj) {
+		if (lobj != NULL) {
 			vp = NULL;
 			switch (lobj->type) {
 			case OBJT_DEFAULT:
--- b/sys/kern/kern_resource.c	
+++ b/sys/kern/kern_resource.c	
@@ -1212,6 +1212,17 @@ lim_cur(struct proc *p, int which)
 	return (rl.rlim_cur);
 }
 
+rlim_t
+lim_cur_unlocked(struct proc *p, int which)
+{
+	struct rlimit rl;
+
+	PROC_LOCK(p);
+	lim_rlimit(p, which, &rl);
+	PROC_UNLOCK(p);
+	return (rl.rlim_cur);
+}
+
 /*
  * Return a copy of the entire rlimit structure for the system limit
  * specified by 'which' in the rlimit structure pointed to by 'rlp'.
--- b/sys/kern/sys_pipe.c	
+++ b/sys/kern/sys_pipe.c	
@@ -1293,13 +1293,13 @@ pipe_write(fp, uio, active_cred, flags, td)
 	}
 
 	/*
-	 * Don't return EPIPE if I/O was successful
+	 * Don't return EPIPE if any byte was written.
+	 * EINTR and other interrupts are handled by generic I/O layer.
+	 * Do not pretend that I/O succeeded for obvious user error
+	 * like EFAULT.
 	 */
-	if ((wpipe->pipe_buffer.cnt == 0) &&
-	    (uio->uio_resid == 0) &&
-	    (error == EPIPE)) {
+	if (uio->uio_resid != orig_resid && error == EPIPE)
 		error = 0;
-	}
 
 	if (error == 0)
 		vfs_timestamp(&wpipe->pipe_mtime);
--- b/sys/kern/vfs_vnops.c	
+++ b/sys/kern/vfs_vnops.c	
@@ -2177,12 +2177,10 @@ vn_utimes_perm(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 {
 	int error;
 
-	error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
-
 	/*
-	 * From utimes(2):
-	 * Grant permission if the caller is the owner of the file or
-	 * the super-user.  If the time pointer is null, then write
+	 * Grant permission if the caller is the owner of the file, or
+	 * the super-user, or has ACL_WRITE_ATTRIBUTES permission on
+	 * on the file.  If the time pointer is null, then write
 	 * permission on the file is also sufficient.
 	 *
 	 * From NFSv4.1, draft 21, 6.2.1.3.1, Discussion of Mask Attributes:
@@ -2190,6 +2188,7 @@ vn_utimes_perm(struct vnode *vp, struct vattr *vap, struct ucred *cred,
 	 * will be allowed to set the times [..] to the current
 	 * server time.
 	 */
+	error = VOP_ACCESSX(vp, VWRITE_ATTRIBUTES, cred, td);
 	if (error != 0 && (vap->va_vaflags & VA_UTIMES_NULL) != 0)
 		error = VOP_ACCESS(vp, VWRITE, cred, td);
 	return (error);
--- b/sys/sys/resourcevar.h	
+++ b/sys/sys/resourcevar.h	
@@ -128,6 +128,7 @@ struct plimit
 	*lim_alloc(void);
 void	 lim_copy(struct plimit *dst, struct plimit *src);
 rlim_t	 lim_cur(struct proc *p, int which);
+rlim_t	 lim_cur_unlocked(struct proc *p, int which);
 void	 lim_fork(struct proc *p1, struct proc *p2);
 void	 lim_free(struct plimit *limp);
 struct plimit
--- b/sys/vm/vm_fault.c	
+++ b/sys/vm/vm_fault.c	
@@ -1252,30 +1252,59 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
 	src_object = src_entry->object.vm_object;
 	src_pindex = OFF_TO_IDX(src_entry->offset);
 
+	KASSERT(upgrade || dst_entry->object.vm_object == NULL,
+	    ("vm_fault_copy_entry: vm_object not NULL"));
 	if (upgrade && (dst_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) {
 		dst_object = src_object;
 		vm_object_reference(dst_object);
 	} else {
 		/*
-		 * Create the top-level object for the destination entry. (Doesn't
-		 * actually shadow anything - we copy the pages directly.)
+		 * Create the top-level object for the destination
+		 * entry. (Doesn't actually shadow anything - we copy
+		 * the pages directly.)
 		 */
-		dst_object = vm_object_allocate(OBJT_DEFAULT,
-		    OFF_TO_IDX(dst_entry->end - dst_entry->start));
+		vm_object_shadow(&dst_entry->object.vm_object,
+		    &dst_entry->offset, OFF_TO_IDX(dst_entry->end -
+		    dst_entry->start));
+		dst_object = dst_entry->object.vm_object;
 #if VM_NRESERVLEVEL > 0
-		dst_object->flags |= OBJ_COLORED;
-		dst_object->pg_color = atop(dst_entry->start);
+		if (dst_object != src_object) {
+			dst_object->flags |= OBJ_COLORED;
+			dst_object->pg_color = atop(dst_entry->start);
+		}
 #endif
+
+		/*
+		 * If not an upgrade, then enter the mappings in the
+		 * pmap as read and/or execute accesses.  Otherwise,
+		 * enter them as write accesses.
+		 *
+		 * A writeable large page mapping is only created if
+		 * all of the constituent small page mappings are
+		 * modified. Marking PTEs as modified on inception
+		 * allows promotion to happen without taking
+		 * potentially large number of soft faults.
+		 */
+		access &= ~VM_PROT_WRITE;
 	}
+	/*
+	 * dst_entry->offset is either left unchanged in the upgrade
+	 * case, or vm_object_shadow takes care of recalculating the
+	 * offset depending on creation of the new object.
+	 */
 
-	VM_OBJECT_WLOCK(dst_object);
-	KASSERT(upgrade || dst_entry->object.vm_object == NULL,
-	    ("vm_fault_copy_entry: vm_object not NULL"));
-	if (src_object != dst_object) {
-		dst_entry->object.vm_object = dst_object;
-		dst_entry->offset = 0;
-		dst_object->charge = dst_entry->end - dst_entry->start;
+	/*
+	 * This can only happen for upgrade case, due to src_object
+	 * reference bump above, and it means that all pages are
+	 * private already.
+	 */
+	if (dst_object == src_object &&
+	    (src_entry->protection & VM_PROT_WRITE) == 0) {
+		KASSERT(upgrade, ("XXX"));
+		goto uncow;
 	}
+
+	VM_OBJECT_WLOCK(dst_object);
 	if (fork_charge != NULL) {
 		KASSERT(dst_entry->cred == NULL,
 		    ("vm_fault_copy_entry: leaked swp charge"));
@@ -1290,19 +1319,6 @@ vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map,
 	}
 
 	/*
-	 * If not an upgrade, then enter the mappings in the pmap as
-	 * read and/or execute accesses.  Otherwise, enter them as
-	 * write accesses.
-	 *
-	 * A writeable large page mapping is only created if all of
-	 * the constituent small page mappings are modified. Marking
-	 * PTEs as modified on inception allows promotion to happen
-	 * without taking potentially large number of soft faults.
-	 */
-	if (!upgrade)
-		access &= ~VM_PROT_WRITE;
-
-	/*
 	 * Loop through all of the virtual pages within the entry's
 	 * range, copying each page from the source object to the
 	 * destination object.  Since the source is wired, those pages
@@ -1408,6 +1424,7 @@ again:
 	}
 	VM_OBJECT_WUNLOCK(dst_object);
 	if (upgrade) {
+uncow:
 		dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY);
 		vm_object_deallocate(src_object);
 	}
--- b/sys/vm/vm_object.c	
+++ b/sys/vm/vm_object.c	
@@ -2096,17 +2096,19 @@ boolean_t
 vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
     vm_size_t prev_size, vm_size_t next_size, boolean_t reserved)
 {
-	vm_pindex_t next_pindex;
+	vm_object_t shadow_object;
+	vm_page_t m;
+	vm_pindex_t next_pindex, pi;
+	boolean_t ret;
 
 	if (prev_object == NULL)
 		return (TRUE);
+	ret = FALSE;
 	VM_OBJECT_WLOCK(prev_object);
 	if ((prev_object->type != OBJT_DEFAULT &&
 	    prev_object->type != OBJT_SWAP) ||
-	    (prev_object->flags & OBJ_TMPFS) != 0) {
-		VM_OBJECT_WUNLOCK(prev_object);
-		return (FALSE);
-	}
+	    (prev_object->flags & OBJ_TMPFS) != 0)
+		goto out;
 
 	/*
 	 * Try to collapse the object first
@@ -2114,24 +2116,61 @@ vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
 	vm_object_collapse(prev_object);
 
 	/*
-	 * Can't coalesce if: . more than one reference . paged out . shadows
-	 * another object . has a copy elsewhere (any of which mean that the
-	 * pages not mapped to prev_entry may be in use anyway)
+	 * Can't coalesce if shadows another object, which means that
+	 * the pages not mapped to prev_entry may be in use anyway.
 	 */
-	if (prev_object->backing_object != NULL) {
-		VM_OBJECT_WUNLOCK(prev_object);
-		return (FALSE);
-	}
+	if (prev_object->backing_object != NULL)
+		goto out;
 
 	prev_size >>= PAGE_SHIFT;
 	next_size >>= PAGE_SHIFT;
 	next_pindex = OFF_TO_IDX(prev_offset) + prev_size;
 
-	if ((prev_object->ref_count > 1) &&
-	    (prev_object->size != next_pindex)) {
-		VM_OBJECT_WUNLOCK(prev_object);
-		return (FALSE);
+	/*
+	 * If object has more than one reference or is larger than the
+	 * end of the previous mapping, still allow coalescing map
+	 * entries for the case when this is due to other mappings of
+	 * the object into the current address space.
+	 */
+	if (prev_object->ref_count > 1 && prev_object->size != next_pindex) {
+		/*
+		 * Only one mapping allowed, otherwise coalesce could
+		 * result in the contradictory content in the regions.
+		 */
+		if ((prev_object->flags & OBJ_ONEMAPPING) == 0)
+			goto out;
+
+		/* No pages in the region, either resident ... */
+		m = vm_page_find_least(prev_object, next_pindex);
+		if (m != NULL && m->pindex < next_pindex + next_size)
+			goto out;
+		/* ... or swapped out. */
+		if (prev_object->type == OBJT_SWAP) {
+			for (pi = next_pindex; pi < next_pindex + next_size;
+			    pi++) {
+				if (vm_pager_has_page(prev_object, pi, NULL,
+				    NULL))
+					goto out;
+			}
+		}
+
+		/*
+		 * Region must be not shadowed, otherwise the
+		 * instantiated page in the our (backing) object could
+		 * leak to the shadow.
+		 */
+		LIST_FOREACH(shadow_object, &prev_object->shadow_head,
+		    shadow_list) {
+			KASSERT(shadow_object->backing_object == prev_object,
+			    ("corrupted shadow"));
+			if (shadow_object->backing_object_offset <
+			    next_pindex + next_size &&
+			    shadow_object->backing_object_offset +
+			    shadow_object->size > next_pindex)
+				goto out;
+		}
 	}
+	ret = TRUE;
 
 	/*
 	 * Account for the charge.
@@ -2183,8 +2222,9 @@ vm_object_coalesce(vm_object_t prev_object, vm_ooffset_t prev_offset,
 	if (next_pindex + next_size > prev_object->size)
 		prev_object->size = next_pindex + next_size;
 
+out:
 	VM_OBJECT_WUNLOCK(prev_object);
-	return (TRUE);
+	return (ret);
 }
 
 void
--- b/sys/vm/vm_page.h	
+++ b/sys/vm/vm_page.h	
@@ -227,6 +227,7 @@ struct vm_domain {
 	long vmd_segs;	/* bitmask of the segments */
 	boolean_t vmd_oom;
 	int vmd_pass;	/* local pagedaemon pass */
+	int vmd_oom_seq;
 	struct vm_page vmd_marker; /* marker for pagedaemon private use */
 };
 
--- b/sys/vm/vm_pageout.c	
+++ b/sys/vm/vm_pageout.c	
@@ -117,7 +117,8 @@ __FBSDID("$FreeBSD$");
 static void vm_pageout(void);
 static int vm_pageout_clean(vm_page_t);
 static void vm_pageout_scan(struct vm_domain *vmd, int pass);
-static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass);
+static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
+    int starting_page_shortage);
 
 struct proc *pageproc;
 
@@ -147,6 +148,7 @@ int vm_pages_needed;		/* Event on which pageout daemon sleeps */
 int vm_pageout_deficit;		/* Estimated number of pages deficit */
 int vm_pageout_pages_needed;	/* flag saying that the pageout daemon needs pages */
 int vm_pageout_wakeup_thresh;
+static int vm_pageout_oom_seq = 24;
 
 #if !defined(NO_SWAPPING)
 static int vm_pageout_req_swapout;	/* XXX */
@@ -206,6 +208,10 @@ static int pageout_lock_miss;
 SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
 	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
 
+SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
+	CTLFLAG_RW, &vm_pageout_oom_seq, 0,
+	"side-to-side calls to oom detector to start OOM");
+
 #define VM_PAGEOUT_PAGE_COUNT 16
 int vm_pageout_page_count = VM_PAGEOUT_PAGE_COUNT;
 
@@ -910,7 +916,8 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
 	vm_page_t m, next;
 	struct vm_pagequeue *pq;
 	vm_object_t object;
-	int act_delta, addl_page_shortage, deficit, maxscan, page_shortage;
+	int act_delta, addl_page_shortage, deficit, maxscan;
+	int page_shortage, starting_page_shortage;
 	int vnodes_skipped = 0;
 	int maxlaunder;
 	int lockmode;
@@ -951,6 +958,7 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
 		page_shortage = vm_paging_target() + deficit;
 	} else
 		page_shortage = deficit = 0;
+	starting_page_shortage = page_shortage;
 
 	/*
 	 * maxlaunder limits the number of dirty pages we flush per scan.
@@ -1309,6 +1317,15 @@ relock_queues:
 	vm_pagequeue_unlock(pq);
 
 	/*
+	 * If we are critically low on one of RAM or swap and low on
+	 * the other, kill the largest process.  However, we avoid
+	 * doing this on the first pass in order to give ourselves a
+	 * chance to flush out dirty vnode-backed pages and to allow
+	 * active pages to be moved to the inactive queue and reclaimed.
+	 */
+	vm_pageout_mightbe_oom(vmd, page_shortage, starting_page_shortage);
+
+	/*
 	 * Compute the number of pages we want to try to move from the
 	 * active queue to the inactive queue.
 	 */
@@ -1431,15 +1448,6 @@ relock_queues:
 			vm_req_vmdaemon(VM_SWAP_NORMAL);
 #endif
 	}
-
-	/*
-	 * If we are critically low on one of RAM or swap and low on
-	 * the other, kill the largest process.  However, we avoid
-	 * doing this on the first pass in order to give ourselves a
-	 * chance to flush out dirty vnode-backed pages and to allow
-	 * active pages to be moved to the inactive queue and reclaimed.
-	 */
-	vm_pageout_mightbe_oom(vmd, pass);
 }
 
 static int vm_pageout_oom_vote;
@@ -1450,18 +1458,36 @@ static int vm_pageout_oom_vote;
  * failed to reach free target is premature.
  */
 static void
-vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass)
+vm_pageout_mightbe_oom(struct vm_domain *vmd, int page_shortage,
+    int starting_page_shortage)
 {
 	int old_vote;
 
-	if (pass <= 1 || !((swap_pager_avail < 64 && vm_page_count_min()) ||
-	    (swap_pager_full && vm_paging_target() > 0))) {
+	if (starting_page_shortage <= 0 || starting_page_shortage !=
+	    page_shortage) {
+#if 0
+		if (vmd->vmd_oom_seq != 0)
+			printf("CLR oom_seq %d ps %d sps %d\n", vmd->vmd_oom_seq, page_shortage, starting_page_shortage);
+#endif
+		vmd->vmd_oom_seq = 0;
+	} else
+		vmd->vmd_oom_seq++;
+	if (vmd->vmd_oom_seq < vm_pageout_oom_seq) {
 		if (vmd->vmd_oom) {
 			vmd->vmd_oom = FALSE;
 			atomic_subtract_int(&vm_pageout_oom_vote, 1);
 		}
 		return;
 	}
+#if 0
+printf("OOM oom_seq %d ps %d sps %d\n", vmd->vmd_oom_seq, page_shortage, starting_page_shortage);
+#endif
+
+	/*
+	 * Do not follow the call sequence until OOM condition is
+	 * cleared.
+	 */
+	vmd->vmd_oom_seq = 0;
 
 	if (vmd->vmd_oom)
 		return;