Bug 48560

Summary: Panic in if_vlan.c on CURRENT
Product: Base System Reporter: Tilman Linneweh <tilman>
Component: kernAssignee: Andre Oppermann <andre>
Status: Closed FIXED    
Severity: Affects Only Me CC: arved
Priority: Normal    
Version: 5.0-CURRENT   
Hardware: Any   
OS: Any   

Description Tilman Linneweh 2003-02-22 13:50:09 UTC
	I can reproducable panic my CURRENT systems with vlan devices.

/usr/src/sys/vm/uma_core.c:1330: could sleep with "inp" locked from
/usr/src/sys/netinet/udp_usrreq.c:982/usr/src/sys/vm/uma_core.c:1330:
could sleep with "udp" locked from /usr/src/sys/netinet/udp_usrreq.c:976

#7  0xc0336cd8 in calltrap () at {standard input}:96
#8  0xc0248939 in witness_sleep (check_only=1, lock=0x0,
    file=0xc039d924 "/usr/src/sys/vm/uma_core.c", line=1330)
    at /usr/src/sys/kern/subr_witness.c:962
#9  0xc031815e in uma_zalloc_arg (zone=0xc083aa20, udata=0x0, flags=0)
    at /usr/src/sys/vm/uma_core.c:1330
#10 0xc0222f27 in malloc (size=0, type=0xc03e2ec0, flags=0)
    at /usr/src/sys/kern/kern_malloc.c:185
---Type <return> to continue, or q <return> to quit---
#11 0xc028dd37 in vlan_setmulti (ifp=0xc1970000)
    at /usr/src/sys/net/if_vlan.c:171
#12 0xc028e8fb in vlan_ioctl (ifp=0xc1970000, cmd=0, data=0x0)
    at /usr/src/sys/net/if_vlan.c:771
#13 0xc0284401 in if_delmulti (ifp=0xc1970000, sa=0xc1c95080)
    at /usr/src/sys/net/if.c:1916
#14 0xc0295cc8 in in_delmulti (inm=0xc18879a0) at
#/usr/src/sys/netinet/in.c:888

(kgdb) fr 13
#13 0xc0284401 in if_delmulti (ifp=0xc1970000, sa=0xc1c95080)
    at /usr/src/sys/net/if.c:1916
1916            ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
(kgdb) list 1916
1911                    return 0;
1912            }
1913
1914            s = splimp();
1915            TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
1916            ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
1917            splx(s);
1918            free(ifma->ifma_addr, M_IFMADDR);
1919            free(sa, M_IFMADDR);
1920            free(ifma, M_IFMADDR);

(kgdb) fr 12
#12 0xc028e8fb in vlan_ioctl (ifp=0xc1970000, cmd=0, data=0x0)
    at /usr/src/sys/net/if_vlan.c:771
771                     error = vlan_setmulti(ifp);
(kgdb) list 771
766                             error = vlan_set_promisc(ifp);
767                     break;
768
769             case SIOCADDMULTI:
770             case SIOCDELMULTI:
771                     error = vlan_setmulti(ifp);
772                     break;
773             default:
774                     error = EINVAL;
775             }

(kgdb) fr 11
#11 0xc028dd37 in vlan_setmulti (ifp=0xc1970000)
    at /usr/src/sys/net/if_vlan.c:171
171                     mc = malloc(sizeof(struct vlan_mc_entry),
M_VLAN, 0);(kgdb) list 171
166
167             /* Now program new ones. */
168             TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
169                     if (ifma->ifma_addr->sa_family != AF_LINK)
170                             continue;
171                     mc = malloc(sizeof(struct vlan_mc_entry), M_VLAN, 0); 
172                     bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 
173                         (char *)&mc->mc_addr, ETHER_ADDR_LEN); 
174                    SLIST_INSERT_HEAD(&sc->vlan_mc_listhead, mc, mc_entries); 
175           bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),

(kgdb) fr 10
#10 0xc0222f27 in malloc (size=0, type=0xc03e2ec0, flags=0)
    at /usr/src/sys/kern/kern_malloc.c:185
185                     va = uma_zalloc(zone, flags);
(kgdb) list 185
180                     indx = kmemsize[size >> KMEM_ZSHIFT];
181                     zone = kmemzones[indx].kz_zone;
182     #ifdef MALLOC_PROFILE
183                     krequests[size >> KMEM_ZSHIFT]++;
184     #endif
185                     va = uma_zalloc(zone, flags);
186                     mtx_lock(&ksp->ks_mtx);
187                     if (va == NULL)
188                             goto out;
189

(kgdb) fr 9
#9  0xc031815e in uma_zalloc_arg (zone=0xc083aa20, udata=0x0, flags=0)
    at /usr/src/sys/vm/uma_core.c:1330
1330                    WITNESS_SLEEP(1, NULL);
(kgdb) list 1330
1325    #endif
1326
1327            if (!(flags & M_NOWAIT)) {
1328                    KASSERT(curthread->td_intr_nesting_level == 0,
1329                       ("malloc without M_NOWAIT in interrupt
context")); 1330                    WITNESS_SLEEP(1, NULL);
1331            }
1332
1333    zalloc_restart:
1334            cpu = PCPU_GET(cpuid);

Fix: 

Jeffrey Hsu suggested changing M_WAITOK in line 171 to M_NOWAIT.

This results in the following backtrace:

Fatal trap 12: page fault while in kernel mode
fault virtual address   = 0xdeadc0de

#9  0xc0336d18 in calltrap () at {standard input}:96
#10 0xc028dcf7 in vlan_setmulti (ifp=0xc1970000)
    at /usr/src/sys/net/if_vlan.c:160
---Type <return> to continue, or q <return> to quit---
#11 0xc028e93b in vlan_ioctl (ifp=0xc1970000, cmd=0, data=0x0)
    at /usr/src/sys/net/if_vlan.c:777
#12 0xc0284401 in if_delmulti (ifp=0xc1970000, sa=0xc1c97140)
    at /usr/src/sys/net/if.c:1916
#13 0xc0295d08 in in_delmulti (inm=0xc18874c0) at
#/usr/src/sys/netinet/in.c:888 14 0xc029ca41 in ip_freemoptions
#(imo=0xc1c85c80)
    at /usr/src/sys/netinet/ip_output.c:2126

(kgdb) fr 12
#12 0xc0284401 in if_delmulti (ifp=0xc1970000, sa=0xc1c97140)
    at /usr/src/sys/net/if.c:1916
1916            ifp->if_ioctl(ifp, SIOCDELMULTI, 0);
(kgdb) fr 11
#11 0xc028e93b in vlan_ioctl (ifp=0xc1970000, cmd=0, data=0x0)
    at /usr/src/sys/net/if_vlan.c:777
777                     error = vlan_setmulti(ifp);
(kgdb) fr 10
#10 0xc028dcf7 in vlan_setmulti (ifp=0xc1970000)
    at /usr/src/sys/net/if_vlan.c:160
160                     error = if_delmulti(ifp_p, (struct sockaddr
*)&sdl);(kgdb) list 160
155
156             /* First, remove any existing filter entries. */
157             while(SLIST_FIRST(&sc->vlan_mc_listhead) != NULL) {
158                     mc = SLIST_FIRST(&sc->vlan_mc_listhead);
159                     bcopy((char *)&mc->mc_addr, LLADDR(&sdl),
ETHER_ADDR_LEN); 160                     error = if_delmulti(ifp_p,
(struct sockaddr *)&sdl); 161                     if (error)
162                             return(error);
163                     SLIST_REMOVE_HEAD(&sc->vlan_mc_listhead,
mc_entries); 164                     free(mc, M_VLAN);


This may be because of a race condition.
How-To-Repeat: 	Create some vlan Devices on a CURRENT system.
	Type:
	# routed; killall routed
Comment 1 Tilman Linneweh 2003-05-21 09:05:41 UTC
FYI: This panic is still reproducable on a 5.1-BETA

# routed; killall routed
malloc() of "16" with the following non-sleepablelocks held:
exclusive sleep mutex inp r = 0 (0xc1a58b60) locked @ /usr/src/sys/netinet/udp_usrreq.c:983
exclusive sleep mutex udp r = 0 (0xc045328c) locked @ /usr/src/sys/netinet/udp_usrreq.c:977
Debugger("witness_warn")
Stopped at      Debugger+0x45:  xchgl   %ebx,in_Debugger.0

db> trace
Debugger(c037c890) at Debugger+0x45
witness_warn(5,0,c03b97bc,c0393588) at witness_warn+0x179
uma_zalloc_arg(c083aa20,0,2) at uma_zalloc_arg+0x3c
malloc(c,c03fef00,2,c18b0400,0) at malloc+0xb6
vlan_setmulti(c198c000,0,c03fe524,0,c039e8f9) at vlan_setmulti+0xd7
vlan_ioctl(c198c000,80206932,0) at vlan_ioctl+0x29b
if_delmulti(c198c000,c1990ee0) at if_delmulti+0x121
in_delmulti(c18d7b80) at in_delmulti+0x68
ip_freemoptions(c1acb180,c1a58ab0,c1b34d00,0,cd46ab74) at ip_freemoptions+0x21
in_pcbdetach(c1a58ab0,c1897258,cd46ab8c,c0253bdb,c1b34d00) at in_pcbdetach+0x80
udp_detach(c1b34d00) at udp_detach+0x65
soclose(c1b34d00,cd46abf0,c020a66a,c1897258,c1d3d260) at soclose+0x11b
soo_close(c1897258,c1d3d260) at soo_close+0x26
fdrop_locked(c1897258,c1d3d260,c0429a34,0,c039d7dd) at fdrop_locked+0x12a
fdrop(c1897258,c1d3d260,c02401d1,246,246) at fdrop+0x24
closef(c1897258,c1d3d260) at closef+0xa7
fdfree(c1d3d260,c1ded124,c1ded068,0,c039dc2e) at fdfree+0x85
exit1(c1d3d260,8f00,c0428140,0,c039dc2e) at exit1+0x3a8
sys_exit(c1d3d260,cd46ad14,1,2,292) at sys_exit+0x27
syscall(2f,2f,2f,bfbffbdc,0) at syscall+0x1ed
Xint0x80_syscall() at Xint0x80_syscall+0x1d
Comment 2 Tilman Linneweh 2003-08-16 15:49:03 UTC
This is still reproducable with 

# uname -a
FreeBSD polly.arved.de 5.1-CURRENT FreeBSD 5.1-CURRENT #1: Sat Aug 16
10:11:52 CEST 2003    
tilman@sauna.arved.de:/usr/obj/usr/source/CURRENT/sys/POLLY  i386

malloc() of "16" with the following non-sleepable locks held:
exclusive sleep mutex inp r = 0 (0xc19deb60) locked @ 
/usr/source/CURRENT/sys/netinet/udp_usrreq.c:983
exclusive sleep mutex udp r = 0 (0xc046c44c) locked @ 
/usr/source/CURRENT/sys/netinet/udp_usrreq.c:977
Debugger("witness_warn")
Stopped at      Debugger+0x45:  xchgl   %ebx,in_Debugger.0
db> show locks
exclusive sleep mutex inp r = 0 (0xc19deb60) locked @ 
/usr/source/CURRENT/sys/netinet/udp_usrreq.c:983
exclusive sleep mutex udp r = 0 (0xc046c44c) locked @ 
/usr/source/CURRENT/sys/netinet/udp_usrreq.c:977
exclusive sleep mutex Giant r = 1 (0xc0441020) locked @ 
/usr/source/CURRENT/sys/kern/kern_descrip.c:1996

Debugger(c0394028) at Debugger+0x45
witness_warn(5,0,c03d248a,c03aaf6f) at witness_warn+0x179
uma_zalloc_arg(c083a7e0,0,2) at uma_zalloc_arg+0x84
malloc(c,c0417f60,2,c1895000,0) at malloc+0xb6
vlan_setmulti(c196c000,0,c04176a4,0,c03b69a2) at vlan_setmulti+0xd7
vlan_ioctl(c196c000,80206932,0) at vlan_ioctl+0x2a9
if_delmulti(c196c000,c189e4c0) at if_delmulti+0x121
in_delmulti(c18c9d20) at in_delmulti+0x68
ip_freemoptions(c1ce9b80,c19deab0,c1cee900,0,cd48eb1c) at ip_freemoptions+0x21
in_pcbdetach(c19deab0,c19a4bf4,cd48eb34,c026527b,c1cee900) at 
in_pcbdetach+0x80
udp_detach(c1cee900) at udp_detach+0x65
soclose(c1cee900,cd48eb98,c021a1c7,c19a4bf4,c1a5e720) at soclose+0x11b
soo_close(c19a4bf4,c1a5e720) at soo_close+0x26
fdrop_locked(c19a4bf4,c1a5e720,c0b731bc,0,c03b5718) at fdrop_locked+0x107
fdrop(c19a4bf4,c1a5e720,3,c1a5e720,cd48ebf4) at fdrop+0x24
closef(c19a4bf4,c1a5e720) at closef+0x1da
fdfree(c1a5e720,c1a61c80,c1a61bc4,0,c03b5bed) at fdfree+0x2d9
exit1(c1a5e720,8f00,c0441020,0,c03b5bed) at exit1+0x3fb
sys_exit(c1a5e720,cd48ed14,1,2,292) at sys_exit+0x27
syscall(2f,2f,2f,bfbffbdc,0) at syscall+0x1ed
Xint0x80_syscall() at Xint0x80_syscall+0x1d
--- syscall (1, FreeBSD ELF32, sys_exit), eip = 0x80571f7, esp = 0xbfbff9ec, 
ebp = 0xbfbffa08 ---
Comment 3 Andre Oppermann freebsd_committer freebsd_triage 2003-12-27 16:41:28 UTC
Responsible Changed
From-To: freebsd-bugs->andre

Take over.
Comment 4 Andre Oppermann freebsd_committer freebsd_triage 2003-12-30 12:22:51 UTC
State Changed
From-To: open->feedback

Locking violations are supposed to be fixed.  Asked Originator 
if problem persits.
Comment 5 Andre Oppermann freebsd_committer freebsd_triage 2003-12-30 12:22:57 UTC
Tilman,

could you please check again with 5.2RC2 or -CURRENT as we have fixed
many locking conditions in the code since then?

I have done some vlan settings after the locking changes and I didn't
have any of the problems.

-- 
Andre
Comment 6 Tilman Keskinoz freebsd_committer freebsd_triage 2004-02-11 13:33:34 UTC
State Changed
From-To: feedback->open

Sorry for the long delay. 

Yes this is still reproducable. 

# routed; killall routed 
malloc() of "16" with the following non-sleepable locks held: 
exclusive sleep mutex inp (rawinp) r = 0 (0xc214d090) locked @ /usr/source/CURR6 
exclusive sleep mutex rip r = 0 (0xc07b306c) locked @ /usr/source/CURRENT/src/s9 
Debugger("witness_warn") 
Stopped at      Debugger+0x45:  xchgl   %ebx,in_Debugger.0 
db> trace 
Debugger(c06d221e) at Debugger+0x45 
witness_warn(5,0,c071c81b,c06f269d) at witness_warn+0x17d 
uma_zalloc_arg(c1045900,0,2) at uma_zalloc_arg+0x85 
malloc(c,c2145140,2,c1ef5800,0) at malloc+0xb7 
vlan_setmulti(c1f40400,0,c0766364,0,c06fe990) at vlan_setmulti+0xdb 
vlan_ioctl(c1f40400,80206932,0) at vlan_ioctl+0x3a7 
if_delmulti(c1f40400,c1f1d540) at if_delmulti+0x121 
in_delmulti(c2018000) at in_delmulti+0x6a 
ip_freemoptions(c2048780,c2078780,c2078780,0,cd55aaf4) at ip_freemoptions+0x21 
in_pcbdetach(c214d000,c214d000,cd55ab1c,c05e13db,c2078780) at in_pcbdetach+0x74 
rip_pcbdetach(c2078780,c214d000,c214d090,0,c070b4a6) at rip_pcbdetach+0x4c 
rip_detach(c2078780) at rip_detach+0x67 
soclose(c2078780,cd55ab98,c054e33f,c1fdb8c4,c1fbb2a0) at soclose+0x104 
soo_close(c1fdb8c4,c1fbb2a0) at soo_close+0x26 
fdrop_locked(c1fdb8c4,c1fbb2a0,c128e5f8,0,c06fd5bf) at fdrop_locked+0x117 
fdrop(c1fdb8c4,c1fbb2a0,3,c1fbb2a0,cd55abf4) at fdrop+0x24 
closef(c1fdb8c4,c1fbb2a0) at closef+0x1db 
fdfree(c1fbb2a0,c2122ed8,c2122e2c,0,c06fdb27) at fdfree+0x2eb 
exit1(c1fbb2a0,8f00,c07877e0,0,c06fdb27) at exit1+0x402 
sys_exit(c1fbb2a0,cd55ad14,1,8,292) at sys_exit+0x27 
syscall(2f,2f,2f,bfbfecdc,bfbfeb3c) at syscall+0x217 
Xint0x80_syscall() at Xint0x80_syscall+0x1d8 


Unfortunately I was not able to get a dump, because of ata suckage 
causing a debugger panic. 

If you need more information, please ask, serial console access maybe possible, 

polly# uname -a 
FreeBSD polly.arved.de 5.2-CURRENT FreeBSD 5.2-CURRENT #1: Wed Feb 11 13:54:42 6
Comment 7 Bruce M Simpson freebsd_committer freebsd_triage 2004-07-04 19:36:18 UTC
State Changed
From-To: open->analyzed

I've committed a change to -CURRENT which may fix this issue.
Comment 8 Tilman Keskinoz freebsd_committer freebsd_triage 2004-07-22 10:03:11 UTC
State Changed
From-To: analyzed->closed

Thank you very much Bruce. Your fix works for me.