| Summary: | 3.3-RELEASE crashing often | ||
|---|---|---|---|
| Product: | Base System | Reporter: | dgilbert <dgilbert> |
| Component: | kern | Assignee: | freebsd-bugs (Nobody) <bugs> |
| Status: | Closed FIXED | ||
| Severity: | Affects Only Me | ||
| Priority: | Normal | ||
| Version: | 3.3-RELEASE | ||
| Hardware: | Any | ||
| OS: | Any | ||
More news on this one. We moved to new hardware, and now I'm getting multiple crashes with the following signature: The common things here seem to be the vfork() --> fork1() --> fdcopy() Fatal trap 12: page fault while in kernel mode fault virtual address = 0xd fault code = supervisor write, page not present instruction pointer = 0x8:0xc01435d7 stack pointer = 0x10:0xcc348f08 frame pointer = 0x10:0xcc348f2c code segment = base rx0, limit 0xfffff, type 0x1b = DPL 0, pres 1, def32 1, gran 1 processor eflags = interrupt enabled, resume, IOPL = 0 current process = 38090 (httpd_1.3.9) interrupt mask = trap number = 12 panic: page fault (kgdb) bt #0 boot (howto=256) at ../../kern/kern_shutdown.c:285 #1 0xc014aad1 in panic (fmt=0xc02385ba "page fault") at ../../kern/kern_shutdown.c:446 #2 0xc0209712 in trap_fatal (frame=0xcc348ecc, eva=13) at ../../i386/i386/trap.c:942 #3 0xc02093cb in trap_pfault (frame=0xcc348ecc, usermode=0, eva=13) at ../../i386/i386/trap.c:835 #4 0xc0208ffe in trap (frame={tf_es = 16, tf_ds = 16, tf_edi = -1044124544, tf_esi = -1037650432, tf_ebp = -868970708, tf_isp = -868970764, tf_ebx = 1769, tf_edx = -1052106376, tf_ecx = 0, tf_eax = 1, tf_trapno = 12, tf_err = 2, tf_eip = -1072417321, tf_cs = 8, tf_eflags = 66050, tf_esp = -1052187648, tf_ss = -1052089344}) at ../../i386/i386/trap.c:437 #5 0xc01435d7 in fdcopy (p=0xcc3079c0) at ../../kern/kern_descrip.c:951 #6 0xc014587b in fork1 (p1=0xcc3079c0, flags=-2147483596) at ../../kern/kern_fork.c:379 #7 0xc014533b in vfork (p=0xcc3079c0, uap=0xcc348f94) at ../../kern/kern_fork.c:109 #8 0xc020995b in syscall (frame={tf_es = 39, tf_ds = 39, tf_edi = 226087504, tf_esi = 226086800, tf_ebp = -1077949752, tf_isp = -868970524, tf_ebx = 673171048, tf_edx = 226066448, tf_ecx = 672877149, tf_eax = 66, tf_trapno = 7, tf_err = 2, tf_eip = 672936705, tf_cs = 31, tf_eflags = 514, tf_esp = -1077949796, tf_ss = 39}) at ../../i386/i386/trap.c:1100 #9 0xc01feedc in Xint0x80_syscall () This is very worrysome. What we seem to have here is memory
corruption of some sort, I think (but years of sysadm has dulled by
gdb usage)...
From a simlar, but hour later crash dump:
(kgdb) bt
#0 boot (howto=256) at ../../kern/kern_shutdown.c:285
#1 0xc014aad1 in panic (fmt=0xc02385ba "page fault")
at ../../kern/kern_shutdown.c:446
#2 0xc0209712 in trap_fatal (frame=0xcc332ecc, eva=43)
at ../../i386/i386/trap.c:942
#3 0xc02093cb in trap_pfault (frame=0xcc332ecc, usermode=0, eva=43)
at ../../i386/i386/trap.c:835
#4 0xc0208ffe in trap (frame={tf_es = 16, tf_ds = 16, tf_edi = -1051776000,
tf_esi = -1051100288, tf_ebp = -869060820, tf_isp = -869060876,
tf_ebx = 1863, tf_edx = -1050021888, tf_ecx = 0, tf_eax = 31,
tf_trapno = 12, tf_err = 2, tf_eip = -1072417321, tf_cs = 8,
tf_eflags = 66050, tf_esp = -1051614208, tf_ss = -1050004480})
at ../../i386/i386/trap.c:437
#5 0xc01435d7 in fdcopy (p=0xcc304f40) at ../../kern/kern_descrip.c:951
#6 0xc014587b in fork1 (p1=0xcc304f40, flags=-2147483596)
at ../../kern/kern_fork.c:379
#7 0xc014533b in vfork (p=0xcc304f40, uap=0xcc332f94)
at ../../kern/kern_fork.c:109
#8 0xc020995b in syscall (frame={tf_es = 39, tf_ds = 39, tf_edi = 226316880,
tf_esi = 226066288, tf_ebp = -1077949592, tf_isp = -869060636,
tf_ebx = 673171048, tf_edx = 226250768, tf_ecx = 672877149, tf_eax = 66,
tf_trapno = 12, tf_err = 2, tf_eip = 672936705, tf_cs = 31,
tf_eflags = 518, tf_esp = -1077949636, tf_ss = 39})
at ../../i386/i386/trap.c:1100
#9 0xc01feedc in Xint0x80_syscall ()
(kgdb) frame 5
#5 0xc01435d7 in fdcopy (p=0xcc304f40) at ../../kern/kern_descrip.c:951
951 (*fpp)->f_count++;
(kgdb) p fpp
$1 = (struct file **) 0x0
(kgdb) l
946 bcopy(fdp->fd_ofiles, newfdp->fd_ofiles, i * sizeof(struct file **));
947 bcopy(fdp->fd_ofileflags, newfdp->fd_ofileflags, i * sizeof(char));
948 fpp = newfdp->fd_ofiles;
949 for (i = newfdp->fd_lastfile; i-- >= 0; fpp++)
950 if (*fpp != NULL)
951 (*fpp)->f_count++;
952 return (newfdp);
953 }
954
955 /*
(kgdb) p newfdp
$2 = (struct filedesc *) 0xc1597b80
(kgdb) p *newfdp
$3 = {fd_ofiles = 0xc169d000, fd_ofileflags = 0xc16a3400 "",
fd_cdir = 0xcc3b5bc0, fd_rdir = 0xcc1cfe00, fd_nfiles = 6400,
fd_lastfile = 3912, fd_freefile = 6, fd_cmask = 18, fd_refcnt = 1}
now... this implies that fpp somehow gets set to NULL, but:
(kgdb) p **(newfdp->fd_ofiles)
$7 = {f_list = {le_next = 0xc1696fc0, le_prev = 0xc1567b00}, f_flag = 1,
f_type = 1, f_count = 41, f_msgcount = 0, f_cred = 0xc0e27200,
f_ops = 0xc024a294, f_seqcount = 1, f_nextread = 0, f_offset = 0,
f_data = 0xcc1cde80 ""}
(kgdb) p **(newfdp->fd_ofiles+1)
$8 = {f_list = {le_next = 0xc13958c0, le_prev = 0xc1419f40}, f_flag = 2,
f_type = 1, f_count = 41, f_msgcount = 0, f_cred = 0xc0e27200,
f_ops = 0xc024a294, f_seqcount = 1, f_nextread = 0, f_offset = 0,
f_data = 0xcc1cde80 ""}
(kgdb) p **(newfdp->fd_ofiles+2)
$9 = {f_list = {le_next = 0xc13c60c0, le_prev = 0xc139b900}, f_flag = 10,
f_type = 1, f_count = 82, f_msgcount = 0, f_cred = 0xc0e27200,
f_ops = 0xc024a294, f_seqcount = 1, f_nextread = 0, f_offset = 26016907,
f_data = 0xcc27b2c0 ""}
Then I thought about it... and fpp itself should not become NULL in
this case... it's being incremented from some valid pointer value.
Ideas?
Dave.
--
============================================================================
|David Gilbert, Velocet Communications. | Two things can only be |
|Mail: dgilbert@velocet.net | equal if and only if they |
|http://www.velocet.net/~dgilbert | are precisely opposite. |
=========================================================GLO================
As shown below, the problem has been resolved. Close the bug. -- [ Joseph Mallett <jmallett@xMach.org> ] [ http://srcsys.org ] [ xMach Core Team xMach: Proactively Unbloated Microkernel BSD ] [ FreeBSD, NetBSD, & xMach User; (Obj)C(++) Coder ] [ http://xMach.org ] On Thu, 24 May 2001, David Gilbert wrote: > Oi... I'm sure that the issue got solved somehow ... 'cause that was a > core server (if I'm not mistaken, it's the server that xmach.org is > on). That machine is running some concocktion of 3.4-STABLE (IIRC) > now... not 3.3 as the PR intones. It may have been the whole tl0 > driver debacle... in which our solution was to swap out the card for > dc0 drivers. > > I don't think I can be more helpful because I don't have the resources > to attempt repeating the problem. > > Dave. > State Changed From-To: open->closed Closed at originator's request. |
The machine is running a busy webserver with many virtual hosts. The virtual size of the httpd process may be an issue --- they average about 90M virtual, but often don't touch all that memory... as then they'd be huge. I'm including several tracebacks. I have kernel droppings for these, if helpful panic: rlist_free: free end overlaps already freed area (kgdb) bt #0 boot (howto=256) at ../../kern/kern_shutdown.c:285 #1 0xc014838c in at_shutdown ( function=0xc01e9737 <__set_sysinit_set_sym_logdev_sys_init+75>, arg=0x7, queue=0) at ../../kern/kern_shutdown.c:446 #2 0xc0151da5 in rlist_free (rlh=0xc0236c58, start=0, end=7) at ../../kern/subr_rlist.c:155 #3 0xc01aa887 in swap_pager_freeswapspace (object=0xc962a6c8, from=0, to=7) at ../../vm/swap_pager.c:422 #4 0xc01aaabd in swap_pager_free_swap (object=0xc962a6c8) at ../../vm/swap_pager.c:518 #5 0xc01aaf56 in swap_pager_dealloc (object=0xc962a6c8) at ../../vm/swap_pager.c:771 #6 0xc01b6cba in vm_pager_deallocate (object=0xc962a6c8) at ../../vm/vm_pager.c:246 #7 0xc01b297a in vm_object_terminate (object=0xc962a6c8) at ../../vm/vm_object.c:467 #8 0xc01b285c in vm_object_deallocate (object=0xc962a6c8) at ../../vm/vm_object.c:386 #9 0xc01afc07 in vm_map_entry_delete (map=0xc9623940, entry=0xc962f6e0) at ../../vm/vm_map.c:1735 #10 0xc01afde8 in vm_map_delete (map=0xc9623940, start=0, end=3217022976) at ../../vm/vm_map.c:1849 #11 0xc01afe6c in vm_map_remove (map=0xc9623940, start=0, end=3217022976) at ../../vm/vm_map.c:1874 #12 0xc014249b in exit1 (p=0xc960bc80, rv=0) at ../../kern/kern_exit.c:218 #13 0xc01422a0 in exit1 (p=0xc960bc80, rv=-916185196) at ../../kern/kern_exit.c:104 #14 0xc01d03e7 in syscall (frame={tf_es = 39, tf_ds = 39, tf_edi = 0, tf_esi = -1, tf_ebp = -1077945368, tf_isp = -916185116, tf_ebx = 671987304, tf_edx = 0, tf_ecx = 134540128, tf_eax = 1, tf_trapno = 0, tf_err = 2, tf_eip = 671732356, tf_cs = 31, tf_eflags = 582, tf_esp = -1077945388, tf_ss = 39}) at ../../i386/i386/trap.c:1100 #15 0xc01c64bc in Xint0x80_syscall () panic: rlist_free: free start overlaps already freed area #0 boot (howto=256) at ../../kern/kern_shutdown.c:285 #1 0xc014838c in at_shutdown ( function=0xc01e9768 <__set_sysinit_set_sym_logdev_sys_init+124>, arg=0x7, queue=0) at ../../kern/kern_shutdown.c:446 #2 0xc0151dbe in rlist_free (rlh=0xc0236c58, start=0, end=7) at ../../kern/subr_rlist.c:159 #3 0xc01aa887 in swap_pager_freeswapspace (object=0xc95b80f8, from=0, to=7) at ../../vm/swap_pager.c:422 #4 0xc01aa968 in swap_pager_freespace (object=0xc95b80f8, start=39, size=19352) at ../../vm/swap_pager.c:445 #5 0xc01afdc9 in vm_map_delete (map=0xc95954c0, start=134832128, end=214097920) at ../../vm/vm_map.c:1833 #6 0xc01afe6c in vm_map_remove (map=0xc95954c0, start=134832128, end=214097920) at ../../vm/vm_map.c:1874 #7 0xc01b7503 in obreak (p=0xc957d020, uap=0xc9615f94) at ../../vm/vm_unix.c:107 #8 0xc01d03e7 in syscall (frame={tf_es = 39, tf_ds = 39, tf_edi = 134832128, tf_esi = 134549824, tf_ebp = -1077946648, tf_isp = -916365340, tf_ebx = 671987304, tf_edx = 671987284, tf_ecx = 671987280, tf_eax = 17, tf_trapno = 0, tf_err = 2, tf_eip = 671951256, tf_cs = 31, tf_eflags = 647, tf_esp = -1077946684, tf_ss = 39}) at ../../i386/i386/trap.c:1100 #9 0xc01c64bc in Xint0x80_syscall () #10 0x280d2402 in ?? () #11 0x804c1a8 in ?? () #12 0x804b087 in ?? () #13 0x804a6c1 in ?? () #14 0x80490f5 in ?? () panic: rlist_free: free end overlaps already freed area (kgdb) bt #0 boot (howto=256) at ../../kern/kern_shutdown.c:285 #1 0xc014838c in at_shutdown ( function=0xc01e9737 <__set_sysinit_set_sym_logdev_sys_init+75>, arg=0x9, queue=2) at ../../kern/kern_shutdown.c:446 #2 0xc0151da5 in rlist_free (rlh=0xc0236c58, start=2, end=9) at ../../kern/subr_rlist.c:155 #3 0xc01aa887 in swap_pager_freeswapspace (object=0xc95cf174, from=2, to=9) at ../../vm/swap_pager.c:422 #4 0xc01aa968 in swap_pager_freespace (object=0xc95cf174, start=37, size=15946) at ../../vm/swap_pager.c:445 #5 0xc01afdc9 in vm_map_delete (map=0xc9476d80, start=134823936, end=200138752) at ../../vm/vm_map.c:1833 #6 0xc01afe6c in vm_map_remove (map=0xc9476d80, start=134823936, end=200138752) at ../../vm/vm_map.c:1874 #7 0xc01b7503 in obreak (p=0xc94691e0, uap=0xc9530f94) at ../../vm/vm_unix.c:107 #8 0xc01d03e7 in syscall (frame={tf_es = 39, tf_ds = 39, tf_edi = 134823936, tf_esi = 134549792, tf_ebp = -1077946648, tf_isp = -917303324, tf_ebx = 671987304, tf_edx = 671987284, tf_ecx = 671987280, tf_eax = 17, tf_trapno = 0, tf_err = 2, tf_eip = 671951256, tf_cs = 31, tf_eflags = 647, tf_esp = -1077946684, tf_ss = 39}) at ../../i386/i386/trap.c:1100 #9 0xc01c64bc in Xint0x80_syscall () #10 0x280d2402 in ?? () #11 0x804c002 in ?? () #12 0x804b087 in ?? () #13 0x804a6c1 in ?? () #14 0x80490f5 in ?? () panic: rlist_free: free start overlaps already freed area (kgdb) bt #0 boot (howto=256) at ../../kern/kern_shutdown.c:285 #1 0xc014838c in at_shutdown ( function=0xc01e9768 <__set_sysinit_set_sym_logdev_sys_init+124>, arg=0x7, queue=0) at ../../kern/kern_shutdown.c:446 #2 0xc0151dbe in rlist_free (rlh=0xc0236c58, start=0, end=7) at ../../kern/subr_rlist.c:159 #3 0xc01aa887 in swap_pager_freeswapspace (object=0xc9630c1c, from=0, to=7) at ../../vm/swap_pager.c:422 #4 0xc01aa968 in swap_pager_freespace (object=0xc9630c1c, start=39, size=18755) at ../../vm/swap_pager.c:445 #5 0xc01afdc9 in vm_map_delete (map=0xc9466180, start=134832128, end=211652608) at ../../vm/vm_map.c:1833 #6 0xc01afe6c in vm_map_remove (map=0xc9466180, start=134832128, end=211652608) at ../../vm/vm_map.c:1874 #7 0xc01b7503 in obreak (p=0xc9470780, uap=0xc9492f94) at ../../vm/vm_unix.c:107 #8 0xc01d03e7 in syscall (frame={tf_es = 39, tf_ds = 39, tf_edi = 134832128, tf_esi = 134549824, tf_ebp = -1077946648, tf_isp = -917950492, tf_ebx = 671987304, tf_edx = 671987284, tf_ecx = 671987280, tf_eax = 17, tf_trapno = 0, tf_err = 2, tf_eip = 671951256, tf_cs = 31, tf_eflags = 647, tf_esp = -1077946684, tf_ss = 39}) at ../../i386/i386/trap.c:1100 #9 0xc01c64bc in Xint0x80_syscall () #10 0x280d2402 in ?? () #11 0x804c002 in ?? () #12 0x804b087 in ?? () #13 0x804a6c1 in ?? () #14 0x80490f5 in ?? () Fix: So far, (will update this is not true) a 3.2 kernel installed on this machine appears to not crash. How-To-Repeat: I can definately reproduce this at will, but I don't have the leaway with the customers to experiment too much to determine the exact conditions. I expect that load is an issue as my other 3.3 machines don't exhibit these symptoms