Hi, I have a VPN client running which has automatic restarts activated. That means the TUN device is regularly opened and closed. Every time the TUN device closes, an mbuf header of 256 bytes is leaked. After some weeks of uptime the system stops working. I've added some additional code into the kernel to trace this, and the backtrace of one of those allocations what are not freed after 60 seconds, are as follows: X=184 KDB: stack backtrace: db_trace_self_wrapper(c0a38777,c0a678ac,c06c87f0,c7bd3750,a,c7bd3798,5,ffffffff,0,0,553b80,c7bd3818,c2a47440,c7bd3778,c06c824e, c7bd3798,c7bd378c,c06c82fb,c0a678ac,c7bd3798) at db_trace_self_wrapper+0x26/frame 0xc7bd3730 kdb_backtrace(c0a678ac,b8,20,1,c29525e0,...) at kdb_backtrace+0x2b/frame 0xc7bd378c uma_zalloc_arg(c13e45a0,c7bd3860,1,c29525e0,c0aff0e4,...) at uma_zalloc_arg+0x706/frame 0xc7bd37dc mld_v2_enqueue_group_record(0,0,2,20,c2a12a60,...) at mld_v2_enqueue_group_record+0x909/frame 0xc7bd38a8 mld_change_state(c302e200,0,0,0,2,...) at mld_change_state+0x509/frame 0xc7bd3904 in6_mc_leave_locked(c302e200,0,c2895000,c7bd394c,c0676d4a,...) at in6_mc_leave_locked+0x2d/frame 0xc7bd3928 in6_mc_leave(c302e200,0,4,c7bd39ec,c084a1fa,...) at in6_mc_leave+0x37/frame 0xc7bd394c in6_leavegroup(c2e163d0,c3250e90,10,4,0,...) at in6_leavegroup+0x20/frame 0xc7bd3960 in6_purgeaddr(c3250e00,0,0,c3274238,c3274200,...) at in6_purgeaddr+0xea/frame 0xc7bd39ec if_purgeaddrs(c2a54800,2,4,c7bd3aa8,0,...) at if_purgeaddrs+0x10a/frame 0xc7bd3a58 tunclose(c3281800,7,2000,c2afc2f0,c7bd3abc,...) at tunclose+0x15f/frame 0xc7bd3a80 devfs_close(c7bd3af4,c7bd3af4,c338cd50,7,c7bd3b18,...) at devfs_close+0x17f/frame 0xc7bd3ac4 VOP_CLOSE_APV(c0a99ce0,c7bd3af4,c0a40f74,141,c0ad08a0,...) at VOP_CLOSE_APV+0x4a/frame 0xc7bd3adc vn_close(c338cd50,7,c2956180,c2afc2f0,0,...) at vn_close+0x99/frame 0xc7bd3b18 vn_closefile(c314b460,c2afc2f0,c314b460,0,c2afc2f0,...) at vn_closefile+0x53/frame 0xc7bd3b74 devfs_close_f(c314b460,c2afc2f0,3000000,0,1,...) at devfs_close_f+0x34/frame 0xc7bd3b90 _fdrop(c314b460,c2afc2f0,0,c7bd3c00,2,0,0,c2b7d2d8,4,2,c7bd3c1c,c09b22e9,c2957760,288df000,2,0,c7bd3c10,c06462a4,1f,c314b460) a t _fdrop+0x2d/frame 0xc7bd3bac closef(c314b460,c2afc2f0,0,c7bd3c38,c09b1d76,...) at closef+0x5b/frame 0xc7bd3c10 kern_close(c2afc2f0,6,c7bd3c98,c09bb3b2,c0aff1c0,...) at kern_close+0x18d/frame 0xc7bd3c48 syscall(c7bd3d08) at syscall+0x535/frame 0xc7bd3cfc Xint0x80_syscall() at Xint0x80_syscall+0x21/frame 0xc7bd3cfc --- syscall (6, FreeBSD ELF32, sys_close), eip = 0x283c2393, esp = 0xbfbfe37c, ebp = 0xbfbfe388 --- X=176 KDB: stack backtrace: db_trace_self_wrapper(c0a38777,c0a678ac,c06c87f0,d26fca30,a,d26fca78,5,ffffffff,0,0,d26fca70,c06d3f9d,c2e0ebc0,d26fca58,c06c824 e,d26fca78,d26fca6c,c06c82fb,c0a678ac,d26fca78) at db_trace_self_wrapper+0x26/frame 0xd26fca10 kdb_backtrace(c0a678ac,b0,20,2,c2edc6d4,...) at kdb_backtrace+0x2b/frame 0xd26fca6c uma_zalloc_arg(c13e45a0,d26fcadc,2,9d0001,0,...) at uma_zalloc_arg+0x706/frame 0xd26fcabc m_getm2(0,a1,2,1,2,...) at m_getm2+0xc1/frame 0xd26fcaf0 m_uiotombuf(d26fcbb0,2,800,64,2,...) at m_uiotombuf+0x80/frame 0xd26fcb24 sosend_generic(c32579c0,0,d26fcbb0,0,0,...) at sosend_generic+0x2be/frame 0xd26fcb80 kern_sendit(c326d000,4,d26fcc24,0,0,...) at kern_sendit+0x185/frame 0xd26fcbe0 sendit(0,0,0,d26fcc40,1,...) at sendit+0xda/frame 0xd26fcc18 sys_sendto(c326d000,d26fcccc,c0aff0e4,c09bb3b2,c0aff1c0,...) at sys_sendto+0x48/frame 0xd26fcc48 syscall(d26fcd08) at syscall+0x535/frame 0xd26fccfc Xint0x80_syscall() at Xint0x80_syscall+0x21/frame 0xd26fccfc --- syscall (133, FreeBSD ELF32, sys_sendto), eip = 0x283a148b, esp = 0xbfbfd39c, ebp = 0xbfbfd3c8 --- X=177 KDB: stack backtrace: db_trace_self_wrapper(c0a38777,c0a678ac,c06c87f0,d26fc9d4,a,d26fca1c,5,ffffffff,0,0,c0aff1c0,c326d000,c326d000,d26fc9fc,c06c824 e,d26fca1c,d26fca10,c06c82fb,c0a678ac,d26fca1c) at db_trace_self_wrapper+0x26/frame 0xd26fc9b4 kdb_backtrace(c0a678ac,b1,20,1,c0695933,...) at kdb_backtrace+0x2b/frame 0xd26fca10 uma_zalloc_arg(c13e45a0,d26fca78,1,c2b06d00,0,...) at uma_zalloc_arg+0x706/frame 0xd26fca60 sbappendaddr_locked_internal(0,0,0,c2edc680,4,...) at sbappendaddr_locked_internal+0x49/frame 0xd26fca8c sbappendaddr_locked(c2edc6d4,c0a3dec0,c2b06d00,0,c2b06d9c,...) at sbappendaddr_locked+0x6c/frame 0xd26fcaac uipc_send(c32579c0,0,c2b06d00,0,0,...) at uipc_send+0x763/frame 0xd26fcb24 sosend_generic(c32579c0,0,d26fcbb0,c2b06d00,0,...) at sosend_generic+0x385/frame 0xd26fcb80 kern_sendit(c326d000,4,d26fcc24,0,0,...) at kern_sendit+0x185/frame 0xd26fcbe0 sendit(0,0,0,d26fcc40,1,...) at sendit+0xda/frame 0xd26fcc18 sys_sendto(c326d000,d26fcccc,c0aff0e4,c09bb3b2,c0aff1c0,...) at sys_sendto+0x48/frame 0xd26fcc48 syscall(d26fcd08) at syscall+0x535/frame 0xd26fccfc Xint0x80_syscall() at Xint0x80_syscall+0x21/frame 0xd26fccfc --- syscall (133, FreeBSD ELF32, sys_sendto), eip = 0x283a148b, esp = 0xbfbfd39c, ebp = 0xbfbfd3c8 --- X=178 KDB: stack backtrace: db_trace_self_wrapper(c0a38777,c0a678ac,c06c87f0,d26fc9d4,a,d26fca1c,5,ffffffff,0,0,c0aff1c0,c326d000,c326d000,d26fc9fc,c06c824 e,d26fca1c,d26fca10,c06c82fb,c0a678ac,d26fca1c) at db_trace_self_wrapper+0x26/frame 0xd26fc9b4 kdb_backtrace(c0a678ac,b2,20,1,c0695933,...) at kdb_backtrace+0x2b/frame 0xd26fca10 uma_zalloc_arg(c13e45a0,d26fca78,1,c2b03700,0,...) at uma_zalloc_arg+0x706/frame 0xd26fca60 sbappendaddr_locked_internal(0,0,0,c2edc680,4,...) at sbappendaddr_locked_internal+0x49/frame 0xd26fca8c sbappendaddr_locked(c2edc6d4,c0a3dec0,c2b03700,0,c2b0379c,...) at sbappendaddr_locked+0x6c/frame 0xd26fcaac uipc_send(c32579c0,0,c2b03700,0,0,...) at uipc_send+0x763/frame 0xd26fcb24 sosend_generic(c32579c0,0,d26fcbb0,c2b03700,0,...) at sosend_generic+0x385/frame 0xd26fcb80 kern_sendit(c326d000,4,d26fcc24,0,0,...) at kern_sendit+0x185/frame 0xd26fcbe0 sendit(0,0,0,d26fcc40,1,...) at sendit+0xda/frame 0xd26fcc18 sys_sendto(c326d000,d26fcccc,d26fcc98,c09bb3b2,c0aff1c0,...) at sys_sendto+0x48/frame 0xd26fcc48 syscall(d26fcd08) at syscall+0x535/frame 0xd26fccfc Xint0x80_syscall() at Xint0x80_syscall+0x21/frame 0xd26fccfc --- syscall (133, FreeBSD ELF32, sys_sendto), eip = 0x283a148b, esp = 0xbfbfd58c, ebp = 0xbfbfd5b8 --- --HPS
Hi, Here are some more mbuf allocations, after TUN close, which my test program did not mark as stuck: KDB: stack backtrace: db_trace_self_wrapper(c0a38777,c0a678ac,c06c87f0,c7bd3838,a,c7bd3880,5,ffffffff,0,0,c326fd18,c13eea14,c13eea10,c7bd3860,c06c824 e,c7bd3880,c7bd3874,c06c82fb,c0a678ac,c7bd3880) at db_trace_self_wrapper+0x26/frame 0xc7bd3818 kdb_backtrace(c0a678ac,b6,20,1,c0ab8680,...) at kdb_backtrace+0x2b/frame 0xc7bd3874 uma_zalloc_arg(c13e45a0,c7bd38ec,1,c0b0a194,c7bd3914,...) at uma_zalloc_arg+0x706/frame 0xc7bd38c4 rt_msg1(c7bd3914,30,c2a54800,0,0,...) at rt_msg1+0x59/frame 0xc7bd3900 rtsock_addrmsg(2,c3250e00,0,c2eee9b4,0,...) at rtsock_addrmsg+0x73/frame 0xc7bd3950 rtinit(c3250e00,2,0,c7bd3aa8,0,...) at rtinit+0x1a4/frame 0xc7bd3a58 tunclose(c3281800,7,2000,c2afc2f0,c7bd3abc,...) at tunclose+0x280/frame 0xc7bd3a80 devfs_close(c7bd3af4,c7bd3af4,c338cd50,7,c7bd3b18,...) at devfs_close+0x17f/frame 0xc7bd3ac4 VOP_CLOSE_APV(c0a99ce0,c7bd3af4,c0a40f74,141,c0ad08a0,...) at VOP_CLOSE_APV+0x4a/frame 0xc7bd3adc vn_close(c338cd50,7,c2956180,c2afc2f0,0,...) at vn_close+0x99/frame 0xc7bd3b18 vn_closefile(c314b460,c2afc2f0,c314b460,0,c2afc2f0,...) at vn_closefile+0x53/frame 0xc7bd3b74 devfs_close_f(c314b460,c2afc2f0,3000000,0,1,...) at devfs_close_f+0x34/frame 0xc7bd3b90 _fdrop(c314b460,c2afc2f0,0,c7bd3c00,2,0,0,c2b7d2d8,4,2,c7bd3c1c,c09b22e9,c2957760,288df000,2,0,c7bd3c10,c06462a4,1f,c314b460) at _fdrop+0x2d/frame 0xc7bd3bac closef(c314b460,c2afc2f0,0,c7bd3c38,c09b1d76,...) at closef+0x5b/frame 0xc7bd3c10 kern_close(c2afc2f0,6,c7bd3c98,c09bb3b2,c0aff1c0,...) at kern_close+0x18d/frame 0xc7bd3c48 syscall(c7bd3d08) at syscall+0x535/frame 0xc7bd3cfc Xint0x80_syscall() at Xint0x80_syscall+0x21/frame 0xc7bd3cfc --- syscall (6, FreeBSD ELF32, sys_close), eip = 0x283c2393, esp = 0xbfbfe37c, ebp = 0xbfbfe388 --- KDB: stack backtrace: db_trace_self_wrapper(c0a38777,c0a678ac,c06c87f0,c7bd380c,a,c7bd3854,5,ffffffff,0,0,c0ab35e0,c7bd3840,c0676dca,c7bd3834,c06c824 e,c7bd3854,c7bd3848,c06c82fb,c0a678ac,c7bd3854) at db_trace_self_wrapper+0x26/frame 0xc7bd37ec kdb_backtrace(c0a678ac,b6,20,1,c7bd38c0,...) at kdb_backtrace+0x2b/frame 0xc7bd3848 uma_zalloc_arg(c13e45a0,c7bd38c0,1,936,c7bd38e4,...) at uma_zalloc_arg+0x706/frame 0xc7bd3898 rt_msg1(c7bd38e4,30,0,c32eba00,c32eba1c,...) at rt_msg1+0x59/frame 0xc7bd38d4 rtsock_routemsg(2,c2a54800,0,c2eee9b4,0,...) at rtsock_routemsg+0x4f/frame 0xc7bd3920 rt_newaddrmsg_fib(2,c3250e00,0,c2eee9b4,0,...) at rt_newaddrmsg_fib+0x4e/frame 0xc7bd3950 rtinit(c3250e00,2,0,c7bd3aa8,0,...) at rtinit+0x1a4/frame 0xc7bd3a58 tunclose(c3281800,7,2000,c2afc2f0,c7bd3abc,...) at tunclose+0x280/frame 0xc7bd3a80 devfs_close(c7bd3af4,c7bd3af4,c338cd50,7,c7bd3b18,...) at devfs_close+0x17f/frame 0xc7bd3ac4 VOP_CLOSE_APV(c0a99ce0,c7bd3af4,c0a40f74,141,c0ad08a0,...) at VOP_CLOSE_APV+0x4a/frame 0xc7bd3adc vn_close(c338cd50,7,c2956180,c2afc2f0,0,...) at vn_close+0x99/frame 0xc7bd3b18 vn_closefile(c314b460,c2afc2f0,c314b460,0,c2afc2f0,...) at vn_closefile+0x53/frame 0xc7bd3b74 devfs_close_f(c314b460,c2afc2f0,3000000,0,1,...) at devfs_close_f+0x34/frame 0xc7bd3b90 _fdrop(c314b460,c2afc2f0,0,c7bd3c00,2,0,0,c2b7d2d8,4,2,c7bd3c1c,c09b22e9,c2957760,288df000,2,0,c7bd3c10,c06462a4,1f,c314b460) a t _fdrop+0x2d/frame 0xc7bd3bac closef(c314b460,c2afc2f0,0,c7bd3c38,c09b1d76,...) at closef+0x5b/frame 0xc7bd3c10 kern_close(c2afc2f0,6,c7bd3c98,c09bb3b2,c0aff1c0,...) at kern_close+0x18d/frame 0xc7bd3c48 syscall(c7bd3d08) at syscall+0x535/frame 0xc7bd3cfc Xint0x80_syscall() at Xint0x80_syscall+0x21/frame 0xc7bd3cfc --- syscall (6, FreeBSD ELF32, sys_close), eip = 0x283c2393, esp = 0xbfbfe37c, ebp = 0xbfbfe388 ---
Hi, Hans, If I understand correctly, you are able to track what mbuf were allocated and not freed in some period. Is it possible to modify your patch for printing content of these mbufs? I mean something like this: struct ip *ip; struct ip6_hdr *ip6; if (m->m_len > sizeof(struct ip)) { ip = mtod(m, struct ip*); printf("IP version: %u\n", ip->ip_v); switch(ip->ip_v) { case IPVERSION: /* print ip_src, ip_dst, ip_p */ break; case (IPV6_VERSION >> 4): ip6 = mtod(m, struct ip6_hdr *); /* print ip6_src, ip6_dst, ip6_nxt */ break; } }
Yes, can you send me a working example code with everything you want to print, and I'll add it to the kernel, taking a "struct mbuf *" as input. --HPS
Created attachment 148695 [details] kernel function for printing mbuf info
the function is untested, so be careful :)
Testing right now. You will have the result soonish ...
Hi, Here are the candidates for lost mbufs: Stuck MBUF[2] TID=100082 LEN=0 mbuf 0xc3265e00: len 20, flags 0x00000002 mbuf 0xc3265e00: ip_v 0 Stuck MBUF[3] TID=100087 LEN=0 mbuf 0xc2b04e00: len 20, flags 0x00000002 mbuf 0xc2b04e00: ip_v 0 Stuck MBUF[5] TID=100087 LEN=0 mbuf 0xc2b02900: len 20, flags 0x00000002 mbuf 0xc2b02900: ip_v 0 Stuck MBUF[22] TID=100082 LEN=0 mbuf 0xc2b05100: len 20, flags 0x00000002 mbuf 0xc2b05100: ip_v 0 Stuck MBUF[23] TID=100082 LEN=0 mbuf 0xc2b07600: len 20, flags 0x00000002 mbuf 0xc2b07600: ip_v 0 Stuck MBUF[89] TID=100087 LEN=0 mbuf 0xc325da00: len 20, flags 0x00000002 mbuf 0xc325da00: ip_v 0 Note, this is 9-stable, so the M_PRINT_FLAGS was not there, and I used 0x%08x instead. --HPS
Comment on attachment 148695 [details] kernel function for printing mbuf info > printf("mbuf %p: len %u, flags %b\n", m, m->m_len, > m->m_flags, M_FLAG_PRINTF); Nothing specific :( It seems the last what here can be interesting is the following: if (m->m_flags & M_PKTHDR) printf(" hdrlen %u\n", m, m->m_pkthdr.len) > if (m->m_len < sizeof(struct ip)) > return;
Hi, Here is how to reproduce: FreeBSD-9-stable: 1) install openvpn from ports 2) generate an openvpn key 3) start two instances of openvpn like this (no need for a client!) /usr/local/sbin/openvpn --keepalive 120 240 --float --lport 543 --dev tun3 \ --ifconfig 10.1.2.6 10.1.2.7 \ --secret xxxx.key --daemon testlink /usr/local/sbin/openvpn --proto tcp_server --keepalive 120 240 --float --lport 544 --dev tun4 \ --ifconfig 10.1.2.8 10.1.2.9 \ --secret xxxx.key --daemon testlink2 4) watch vmstat -z | grep -E "LIMIT|mbuf:" After some minutes, the number of mbufs in use starts growing simply. Maybe that's simpler for you to reproduce? --HPS
Created attachment 148778 [details] Proposed patch Hi, Hans, can you try this patch? My investigations led me to the following conclusions. The leak isn't specific to tun(4) device, it could be reproduced with any device where MLD works. The backtrace to the allocation that will not be freed is uma_zalloc_arg mld_v2_enqueue_group_record+0x678 mld_change_state+0x3b9 in6_mc_join_locked+0x346 in6_mc_join+0x94 in6_joingroup+0x58 in6_update_ifa+0xd2c in6_ifattach+0x506 ifioctl+0x8e0 kern_ioctl+0x3cd sys_ioctl+0x13c
Hi, I see no more buffer leakages currently. I'll let you know if I find more. Don't forget to MFC to 9- and 10- stable. Might be possible to get it in before 10.1-R too ... --HPS
Created attachment 148792 [details] Cleanup altq before destroy Hi, While at it, I suggest you add the attached patch to the commit aswell. What do you think? --HPS
A commit references this bug: Author: ae Date: Thu Oct 30 10:59:58 UTC 2014 New revision: 273855 URL: https://svnweb.freebsd.org/changeset/base/273855 Log: Fix mbuf leak in IPv6 multicast code. When multicast capable interface goes away, it leaves multicast groups, this leads to generate MLD reports, but MLD code does deffered send and MLD reports are queued in the in6_multi's in6m_scq ifq. The problem is that in6_multi structures are freed when interface leaves multicast groups and thread that does deffered send will not take these queued packets. PR: 194577 MFC after: 1 week Sponsored by: Yandex LLC Changes: head/sys/netinet6/in6_mcast.c
Patched in head/.
A commit references this bug: Author: ae Date: Thu Nov 6 09:16:53 UTC 2014 New revision: 274168 URL: https://svnweb.freebsd.org/changeset/base/274168 Log: MFC r273855: Fix mbuf leak in IPv6 multicast code. When multicast capable interface goes away, it leaves multicast groups, this leads to generate MLD reports, but MLD code does deffered send and MLD reports are queued in the in6_multi's in6m_scq ifq. The problem is that in6_multi structures are freed when interface leaves multicast groups and thread that does deffered send will not take these queued packets. PR: 194577 MFC r273857: Move ifq drain into in6m_purge(). Suggested by: bms Sponsored by: Yandex LLC Changes: _U stable/10/ stable/10/sys/netinet6/in6_mcast.c
A commit references this bug: Author: ae Date: Thu Nov 6 16:31:49 UTC 2014 New revision: 274181 URL: https://svnweb.freebsd.org/changeset/base/274181 Log: MFC r273855: Fix mbuf leak in IPv6 multicast code. When multicast capable interface goes away, it leaves multicast groups, this leads to generate MLD reports, but MLD code does deffered send and MLD reports are queued in the in6_multi's in6m_scq ifq. The problem is that in6_multi structures are freed when interface leaves multicast groups and thread that does deffered send will not take these queued packets. PR: 194577 MFC r273857: Move ifq drain into in6m_purge(). Suggested by: bms Sponsored by: Yandex LLC Changes: _U stable/9/sys/ stable/9/sys/netinet6/in6_mcast.c