Bug 29915

Summary: kernel panics on interaction with mlock and mmap
Product: Base System Reporter: Joe DiMartino <joe>
Component: kernAssignee: Bruce M Simpson <bms>
Status: Closed FIXED    
Severity: Affects Only Me    
Priority: Normal    
Version: 4.1-RELEASE   
Hardware: Any   
OS: Any   

Description Joe DiMartino 2001-08-21 01:10:01 UTC
There appears to be a bad interaction between mmap'ed kernel addresses
and mlock() that causes a VM panic upon exit.

panic: vm_page_unwire: invalid wire count: 0

Program received signal SIGTRAP, Trace/breakpoint trap.
Debugger (msg=0xc023165b "panic") at /usr/src/sys/i386/i386/db_interface.c:332
332                 cndebugoff(); 
(kgdb) bt           
#0  Debugger (msg=0xc023165b "panic") at /usr/src/sys/i386/i386/db_interface.c:332
#1  0xc016601a in panic (
    fmt=0xc0241600 "vm_page_unwire: invalid wire count: %d\n")
    at /usr/src/sys/kern/kern_shutdown.c:552
#2  0xc01e61df in vm_page_unwire (m=0xc04335c0, activate=1)
    at /usr/src/sys/vm/vm_page.c:1219
#3  0xc01de65c in vm_fault_unwire (map=0xd1669fc0, start=672096256,
    end=672100352) at /usr/src/sys/vm/vm_fault.c:981
#4  0xc01e11b0 in vm_map_entry_unwire (map=0xd1669fc0, entry=0xd1be1840)
    at /usr/src/sys/vm/vm_map.c:1713
#5  0xc01e12d2 in vm_map_delete (map=0xd1669fc0, start=0, end=3217031168)
    at /usr/src/sys/vm/vm_map.c:1803
#6  0xc01e1409 in vm_map_remove (map=0xd1669fc0, start=0, end=3217031168)
    at /usr/src/sys/vm/vm_map.c:1859
#7  0xc015e421 in exit1 (p=0xd1ba6300, rv=0) at /usr/src/sys/kern/kern_exit.c:216
#8  0xc015e1f9 in exit (p=0xd1ba6300, uap=0xd1c08f80)
    at /usr/src/sys/kern/kern_exit.c:103
#9  0xc0219a22 in syscall2 (frame={tf_fs = -1078001617, tf_es = -1078001617,
      tf_ds = -1078001617, tf_edi = 0, tf_esi = -1, tf_ebp = -1077937156, 
      tf_isp = -775909420, tf_ebx = 672019876, tf_edx = 672019488, 
      tf_ecx = -1077937224, tf_eax = 1, tf_trapno = 8, tf_err = 2, 
      tf_eip = 671729148, tf_cs = 31, tf_eflags = 647, tf_esp = -1077937200,
      tf_ss = 47}) at /usr/src/sys/i386/i386/trap.c:1253
#10 0xc020b115 in Xint0x80_syscall ()

How-To-Repeat: The following program will display this behaviour every time it is run.
It does an nlist to extract the kernel virtual address for "boottime",
mmap()'s that into the process address space and then mlock()'s the
entire address space (including the mmap region).  It panics on exit
(see stack trace in Full Description).  Of course, you have to have
read permission on /dev/kmem.

[I know I'm not supposed to submit code as plain text, but there was
nowhere in the problem report form to submit a tarball.]

--------- Begin source ----------
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/time.h>
#include <sys/sysctl.h>
#include <sys/ticksinfo.h>
#include <fcntl.h>

#include <sys/types.h>
#include <sys/mman.h>
#include <sys/ipc.h>

#include <paths.h>
#include <nlist.h>
#include <errno.h>

typedef struct timeval  timeval_t;

static void             mlock_as(void);
static timeval_t        getboottime(void);


struct nlist    timeaddr_nl[] = {
        { "boottime" },
        { "" },
};

int
main(int argc, char *argv[])
{
        timeval_t       time1, time2;

        time1 = getboottime();

        /*
         * lock all existing pages of user virtual space into memory.
         */
        mlock_as();

        time2 = getboottime();
        printf("It took %ld:%03ld seconds to lock pages.\n",
                time2.tv_sec - time1.tv_sec,
                time2.tv_usec - time1.tv_usec);

        exit(0);
}

/*
 * void
 * mlock_as(void)
 *      Lock all pages presently in existence in the address space of the
 *      calling process into memory, such that we are immune to page-faults.
 */
static void
mlock_as(void)
{
        void    *addr;
        size_t  pagesize;
        char    pagestate;
        size_t  npages;

        pagesize = getpagesize();
        npages = 0;
        addr = (void *)0;
        do {
                if (mincore(addr, pagesize, &pagestate) == -1 &&
                    errno == EINVAL) {
                        /* We reached the end of user virtual space */
                        break;
                }
                (void)mlock(addr, pagesize);
                addr += pagesize;
        } while (addr != (void *)0);            /* safety check for addr wrap */
}

static volatile timeval_t *kboottime = 0;

static timeval_t
getboottime(void)
{
        timeval_t       now;

        if (kboottime == 0) {
                int     nl;
                int     kfd;
                off_t   kbootaddr;

                printf("bootfile is: '%s'\n", getbootfile());
                if ((nl = nlist(getbootfile(), timeaddr_nl)) == -1) {
                        (void) perror("nlist failed");
                        exit(errno);
                }
                kbootaddr = (off_t)timeaddr_nl[0].n_value;
                printf("boottime addr is: 0x%qx\n", kbootaddr);

                /*
                 * Map the kernel boottime into this processes address
                 * space to avoid the syscall overhead for repeated use.
                 */
                kfd = open("/dev/kmem", O_RDONLY);
                kboottime = (timeval_t *)mmap((void *)NULL, sizeof(*kboottime),
                                                PROT_READ, MAP_NOCORE,
                                                kfd, kbootaddr);
                (void) close(kfd);
                if (kboottime == MAP_FAILED) {
                        (void) perror("Cannot map kernel boottime");
                        fflush(stderr);
                        exit(errno);
                }
        }

        now = *kboottime;
        return(now);
}
--------- End source ----------
Comment 1 Bruce M Simpson freebsd_committer freebsd_triage 2003-10-11 08:51:39 UTC
Responsible Changed
From-To: freebsd-bugs->bms

May be related to other problems within VM, will attempt to reproduce.
Comment 2 Bruce M Simpson freebsd_committer freebsd_triage 2003-11-25 15:25:01 UTC
State Changed
From-To: open->analyzed

Waiting on feedback from alc@. I believe it's to do with the fact that the 
underlying system pages are effectively *always* wired yet the wirecount is 
*always* 0 for such pages. We probably need to check for VM_ALLOC_NOOBJ 
in vm_page_wire(9).
Comment 3 Alan Cox freebsd_committer freebsd_triage 2004-05-22 05:58:21 UTC
State Changed
From-To: analyzed->patched

Patch applied to HEAD.
Comment 4 Bruce M Simpson freebsd_committer freebsd_triage 2004-11-02 06:57:08 UTC
State Changed
From-To: patched->closed

RELENG_5 has now been branched as -STABLE