Bug 220140 - net/mpd5 - on pine64 - system crash - Fatal data abort
Summary: net/mpd5 - on pine64 - system crash - Fatal data abort
Status: Closed FIXED
Alias: None
Product: Ports & Packages
Classification: Unclassified
Component: Individual Port(s) (show other bugs)
Version: Latest
Hardware: arm64 Any
: --- Affects Only Me
Assignee: Eugene Grosbein
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2017-06-19 16:29 UTC by hlh
Modified: 2018-02-05 16:58 UTC (History)
3 users (show)

See Also:
eugen: maintainer-feedback-


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description hlh 2017-06-19 16:29:24 UTC
on a pine64+ 2GB
FreeBSD norquay.restart.bel 12.0-CURRENT FreeBSD 12.0-CURRENT #0 r319859M: Sat Jun 17 16:11:45 CEST 2017     root@norquay.restart.bel:/usr/obj/usr/src/sys/NORQUAY  arm64

When I start mpd5 to establish a internet connection, the system crash:

[root@norquay ~]# service mpd5 forcestart
Starting mpd5.
[root@norquay ~]# WARNING: attempt to domain_add(netgraph) after domainfinalize()
Fatal data abort:
  x0: fffffd004bc70b88
  x1:                4
  x2: fffffd0002c7c9c0
  x3:              915
  x4:                0
  x5:                0
  x6: ffffffffffffffff
  x7:               40
  x8:                2
  x9:                0
 x10: fffffd0002c7c9c0
 x11:                0
 x12:                1
 x13:                0
 x14:                1
 x15: ffff0000007681f8
 x16: ffff000052b4c7b0
 x17: ffff000000271704
 x18: ffff00009b2a4680
 x19:                0
 x20: fffffd004bc70b88
 x21: fffffd004bc70b00
 x22: fffffd004bc70b68
 x23: ffff00009b2a46d0
 x24: fffffd004bc70b00
 x25: fffffd003cd3de80
 x26:                1
 x27: fffffd004bce06c8
 x28:                0
 x29: ffff00009b2a46b0
  sp: ffff00009b2a4680
  lr: ffff000052b3e484
 elr: ffff000052b3e490
spsr:         60000345
 far:                0
 esr:         96000007
[ thread pid 1512 tid 101427 ]
Stopped at      ng_snd_item+0x384:      ldrb    w9, [x9]
db> bt
Tracing pid 1512 tid 101427 td 0xfffffd0002c7c9c0
db_trace_self() at db_stack_trace+0xec
         pc = 0xffff0000005a2700  lr = 0xffff0000000851e8
         sp = 0xffff00009b2a4020  fp = 0xffff00009b2a4050

db_stack_trace() at db_command+0x23c
         pc = 0xffff0000000851e8  lr = 0xffff000000084e68
         sp = 0xffff00009b2a4060  fp = 0xffff00009b2a4140

db_command() at db_command_loop+0x60
         pc = 0xffff000000084e68  lr = 0xffff000000084c10
         sp = 0xffff00009b2a4150  fp = 0xffff00009b2a4170

db_command_loop() at db_trap+0xf4
         pc = 0xffff000000084c10  lr = 0xffff000000087c14
         sp = 0xffff00009b2a4180  fp = 0xffff00009b2a43a0

db_trap() at kdb_trap+0x180
         pc = 0xffff000000087c14  lr = 0xffff0000002d2110
         sp = 0xffff00009b2a43b0  fp = 0xffff00009b2a4410
        
kdb_trap() at data_abort+0x1a0
         pc = 0xffff0000002d2110  lr = 0xffff0000005b8020
         sp = 0xffff00009b2a4420  fp = 0xffff00009b2a44d0

data_abort() at handle_el1h_sync+0x74
         pc = 0xffff0000005b8020  lr = 0xffff0000005a4074
         sp = 0xffff00009b2a44e0  fp = 0xffff00009b2a45f0

handle_el1h_sync() at ng_snd_item+0x374
         pc = 0xffff0000005a4074  lr = 0xffff000052b3e480
         sp = 0xffff00009b2a4600  fp = 0xffff00009b2a46b0

ng_snd_item() at ngc_send+0x1bc
         pc = 0xffff000052b3e480  lr = 0xffff000052abbcc0
         sp = 0xffff00009b2a46c0  fp = 0xffff00009b2a4740

ngc_send() at sosend_generic+0x454
         pc = 0xffff000052abbcc0  lr = 0xffff00000031a19c
         sp = 0xffff00009b2a4750  fp = 0xffff00009b2a47f0
        
sosend_generic() at kern_sendit+0x264
         pc = 0xffff00000031a19c  lr = 0xffff00000032098c
         sp = 0xffff00009b2a4800  fp = 0xffff00009b2a4890

kern_sendit() at sendit+0x1a4
         pc = 0xffff00000032098c  lr = 0xffff000000320ce0
         sp = 0xffff00009b2a48a0  fp = 0xffff00009b2a48e0

sendit() at sys_sendto+0x50
         pc = 0xffff000000320ce0  lr = 0xffff000000320b2c
         sp = 0xffff00009b2a48f0  fp = 0xffff00009b2a4930

sys_sendto() at do_el0_sync+0xa90
         pc = 0xffff000000320b2c  lr = 0xffff0000005b8b30
         sp = 0xffff00009b2a4940  fp = 0xffff00009b2a4a70

do_el0_sync() at handle_el0_sync+0x74
         pc = 0xffff0000005b8b30  lr = 0xffff0000005a41f4
         sp = 0xffff00009b2a4a80  fp = 0xffff00009b2a4b90
        
handle_el0_sync() at 0x405e4e28
         pc = 0xffff0000005a41f4  lr = 0x00000000405e4e28
         sp = 0xffff00009b2a4ba0  fp = 0x0000ffffffffe760

db> 

With the mpd.conf:

startup:
	# enable TCP-Wrapper (hosts_access(5)) to block unfriendly clients
	set global enable tcp-wrapper
	set user hlh guru admin
	# configure the console
	set console self 0.0.0.0 5005
	set console open
	# configure the web interface
	set web self 0.0.0.0 5006
	set web open

default:
	load scarlet

scarlet:
	create bundle static B0
	set bundle enable compression

# Configure IP addressing -- we get both endpoint assignments from peer.
	set ipcp ranges 0.0.0.0/0 0.0.0.0/0
	set ipcp yes vjcomp

# Configure the interface: default route, idle timeout.
	set iface route default
	set iface enable tcpmssfix
	set iface disable on-demand
	set iface idle 0
	set iface up-script /usr/local/etc/mpd5/adsl.linkup
	set iface down-script /usr/local/etc/mpd5/adsl.linkdown
	
# Configure the (only) link. We expect to be authenticated by peer.
	create link static L0 pppoe
	set link action bundle B0
	set auth authname "xxxxxxxx@SCARLET"
	set link max-redial 0
	set link mtu 1492
	set link keep-alive 10 60
	set link no acfcomp protocomp
	set link disable chap pap
	set link accept chap

	set pppoe iface awg0
#	set pppoe service "SCARLET"

	open

Henri
Comment 1 Takanori Watanabe freebsd_committer 2017-06-27 16:07:13 UTC
https://svnweb.freebsd.org/changeset/base/320403 may fix this. 
Update your system and retry.
Comment 2 Eugene Grosbein freebsd_committer 2017-06-27 17:49:37 UTC
mav@ is not maintainer of this port anymore. Also, the problem seems to be ARM-specific.
Comment 3 hlh 2017-06-27 18:50:35 UTC
I upgrade my kernel:

FreeBSD norquay.restart.bel 12.0-CURRENT FreeBSD 12.0-CURRENT #0 r320403M: Tue Jun 27 19:59:40 CEST 2017     root@norquay.restart.bel:/usr/obj/usr/src/sys/NORQUAY  arm64

I reinstall net/mpd5

But I get the same error:

[ thread pid 1277 tid 101379 ]
Stopped at      ng_snd_item+0x384:      ldrb    w9, [x9]

Henri
Comment 4 Eugene Grosbein freebsd_committer 2017-06-27 19:30:50 UTC
(In reply to hlh from comment #3)

May be you should try a patch against ng_iface(4) from https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=220076
Comment 5 Eugene Grosbein freebsd_committer 2017-06-27 19:32:53 UTC
(In reply to hlh from comment #3)

Also, it won't hurt trying another set of kernel patches I've created trying to run busy mpd5 server using amd64, they are not platform-specific:

https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=220078
Comment 6 Takanori Watanabe freebsd_committer 2017-06-28 04:27:36 UTC
(In reply to hlh from comment #3)
Are you sure to reinstall kernel module?
Comment 7 hlh 2017-06-28 07:48:39 UTC
Shame on me!

You are right. I update the kernel and it's modules on the sd card
(used to boot the kernel) but i'm running with root on zfs and I
forget to update the /boot/kernel directory on the zfs root which
is used when I run mpd5 ... 

I will test with all the patches.
Comment 8 Takanori Watanabe freebsd_committer 2017-06-28 10:04:01 UTC
(In reply to hlh from comment #7)
I'm grad to here this. The fix is embedded to each module, 
so module replacement is mandatory. I'll close this PR. 
OK?
Comment 9 hlh 2017-06-28 10:32:10 UTC
You are right The problem is solved.

Thank you for your help!

For the record, I have these patches:

[root@norquay sys]# svn diff
Index: arm/allwinner/if_awg.c
===================================================================
--- arm/allwinner/if_awg.c	(revision 320403)
+++ arm/allwinner/if_awg.c	(working copy)
@@ -92,7 +92,7 @@
 #define	TX_SKIP(n, o)		(((n) + (o)) & (TX_DESC_COUNT - 1))
 #define	RX_NEXT(n)		(((n) + 1) & (RX_DESC_COUNT - 1))
 
-#define	TX_MAX_SEGS		10
+#define	TX_MAX_SEGS		20
 
 #define	SOFT_RST_RETRY		1000
 #define	MII_BUSY_RETRY		1000
@@ -419,14 +419,18 @@
 	    sc->tx.buf_map[index].map, m, segs, &nsegs, BUS_DMA_NOWAIT);
 	if (error == EFBIG) {
 		m = m_collapse(m, M_NOWAIT, TX_MAX_SEGS);
-		if (m == NULL)
+		if (m == NULL) {
+			device_printf(sc->miibus, "awg_setup_txbuf: m_collapse failed\n");
 			return (0);
+		}
 		*mp = m;
 		error = bus_dmamap_load_mbuf_sg(sc->tx.buf_tag,
 		    sc->tx.buf_map[index].map, m, segs, &nsegs, BUS_DMA_NOWAIT);
 	}
-	if (error != 0)
+	if (error != 0) {
+		device_printf(sc->miibus, "awg_setup_txbuf: bus_dmamap_load_mbuf_sg failed\n");
 		return (0);
+	}
 
 	bus_dmamap_sync(sc->tx.buf_tag, sc->tx.buf_map[index].map,
 	    BUS_DMASYNC_PREWRITE);
Index: arm/arm/gic.h
===================================================================
--- arm/arm/gic.h	(revision 320403)
+++ arm/arm/gic.h	(working copy)
@@ -39,7 +39,9 @@
 #ifndef _ARM_GIC_H_
 #define _ARM_GIC_H_
 
+/*--- RestartSoft ---
 #define GIC_DEBUG_SPURIOUS
+  --- RestartSoft ---*/
 
 #define	GIC_FIRST_SGI		 0	/* Irqs 0-15 are SGIs/IPIs. */
 #define	GIC_LAST_SGI		15
Index: conf/kern.pre.mk
===================================================================
--- conf/kern.pre.mk	(revision 320403)
+++ conf/kern.pre.mk	(working copy)
@@ -90,7 +90,7 @@
 .if defined(CFLAGS_ARCH_PARAMS)
 CFLAGS.gcc+=${CFLAGS_ARCH_PARAMS}
 .endif
-WERROR?= -Werror
+#-hlh-WERROR?= -Werror
 
 # XXX LOCORE means "don't declare C stuff" not "for locore.s".
 ASM_CFLAGS= -x assembler-with-cpp -DLOCORE ${CFLAGS} ${ASM_CFLAGS.${.IMPSRC:T}} 
Index: net/if_stf.c
===================================================================
--- net/if_stf.c	(revision 320403)
+++ net/if_stf.c	(working copy)
@@ -378,6 +378,7 @@
 static int
 stf_getsrcifa6(struct ifnet *ifp, struct in6_addr *addr, struct in6_addr *mask)
 {
+	struct rm_priotracker in_ifa_tracker;
 	struct ifaddr *ia;
 	struct in_ifaddr *ia4;
 	struct in6_ifaddr *ia6;
@@ -393,9 +394,11 @@
 			continue;
 
 		bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in));
+		IN_IFADDR_RLOCK(&in_ifa_tracker);
 		LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash)
 			if (ia4->ia_addr.sin_addr.s_addr == in.s_addr)
 				break;
+		IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 		if (ia4 == NULL)
 			continue;
 
Index: netgraph/ng_iface.c
===================================================================
--- netgraph/ng_iface.c	(revision 320403)
+++ netgraph/ng_iface.c	(working copy)
@@ -64,6 +64,7 @@
 #include <sys/errno.h>
 #include <sys/proc.h>
 #include <sys/random.h>
+#include <sys/rmlock.h>
 #include <sys/sockio.h>
 #include <sys/socket.h>
 #include <sys/syslog.h>
@@ -112,9 +113,15 @@
 	int	unit;			/* Interface unit number */
 	node_p	node;			/* Our netgraph node */
 	hook_p	hooks[NUM_FAMILIES];	/* Hook for each address family */
+	struct rmlock	lock;		/* Protect private data changes */
 };
 typedef struct ng_iface_private *priv_p;
 
+#define	PRIV_RLOCK(priv, t)	rm_rlock(&priv->lock, t)
+#define	PRIV_RUNLOCK(priv, t)	rm_runlock(&priv->lock, t)
+#define	PRIV_WLOCK(priv)	rm_wlock(&priv->lock)
+#define	PRIV_WUNLOCK(priv)	rm_wunlock(&priv->lock)
+
 /* Interface methods */
 static void	ng_iface_start(struct ifnet *ifp);
 static int	ng_iface_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
@@ -431,6 +438,7 @@
 static int
 ng_iface_send(struct ifnet *ifp, struct mbuf *m, sa_family_t sa)
 {
+	struct rm_priotracker priv_tracker;
 	const priv_p priv = (priv_p) ifp->if_softc;
 	const iffam_p iffam = get_iffam_from_af(sa);
 	int error;
@@ -448,7 +456,9 @@
 
 	/* Send packet. If hook is not connected, mbuf will get freed. */
 	NG_OUTBOUND_THREAD_REF();
+	PRIV_RLOCK(priv, &priv_tracker);
 	NG_SEND_DATA_ONLY(error, *get_hook_from_iffam(priv, iffam), m);
+	PRIV_RUNLOCK(priv, &priv_tracker);
 	NG_OUTBOUND_THREAD_UNREF();
 
 	/* Update stats. */
@@ -516,6 +526,8 @@
 		return (ENOMEM);
 	}
 
+	rm_init(&priv->lock, "ng_iface private rmlock");
+
 	/* Link them together */
 	ifp->if_softc = priv;
 	priv->ifp = ifp;
@@ -562,16 +574,21 @@
 ng_iface_newhook(node_p node, hook_p hook, const char *name)
 {
 	const iffam_p iffam = get_iffam_from_name(name);
+	const priv_p priv = NG_NODE_PRIVATE(node);
 	hook_p *hookptr;
 
 	if (iffam == NULL)
 		return (EPFNOSUPPORT);
-	hookptr = get_hook_from_iffam(NG_NODE_PRIVATE(node), iffam);
-	if (*hookptr != NULL)
+	PRIV_WLOCK(priv);
+	hookptr = get_hook_from_iffam(priv, iffam);
+	if (*hookptr != NULL) {
+		PRIV_WUNLOCK(priv);
 		return (EISCONN);
+	}
 	*hookptr = hook;
 	NG_HOOK_HI_STACK(hook);
 	NG_HOOK_SET_TO_INBOUND(hook);
+	PRIV_WUNLOCK(priv);
 	return (0);
 }
 
@@ -730,6 +747,7 @@
 	CURVNET_RESTORE();
 	priv->ifp = NULL;
 	free_unr(V_ng_iface_unit, priv->unit);
+	rm_destroy(&priv->lock);
 	free(priv, M_NETGRAPH_IFACE);
 	NG_NODE_SET_PRIVATE(node, NULL);
 	NG_NODE_UNREF(node);
@@ -748,7 +766,9 @@
 
 	if (iffam == NULL)
 		panic("%s", __func__);
+	PRIV_WLOCK(priv);
 	*get_hook_from_iffam(priv, iffam) = NULL;
+	PRIV_WUNLOCK(priv);
 	return (0);
 }
 
Index: netinet/in_mcast.c
===================================================================
--- netinet/in_mcast.c	(revision 320403)
+++ netinet/in_mcast.c	(working copy)
@@ -1339,6 +1339,7 @@
 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
 {
 	struct group_source_req		 gsr;
+	struct rm_priotracker		 in_ifa_tracker;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in_mfilter		*imf;
@@ -1376,9 +1377,11 @@
 		ssa->sin.sin_len = sizeof(struct sockaddr_in);
 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
 
-		if (!in_nullhost(mreqs.imr_interface))
+		if (!in_nullhost(mreqs.imr_interface)) {
+			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
-
+			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
+		}
 		if (sopt->sopt_name == IP_BLOCK_SOURCE)
 			doblock = 1;
 
@@ -1874,7 +1877,6 @@
  *
  * Returns NULL if no ifp could be found.
  *
- * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP.
  * FUTURE: Implement IPv4 source-address selection.
  */
 static struct ifnet *
@@ -1892,7 +1894,9 @@
 
 	ifp = NULL;
 	if (!in_nullhost(ina)) {
+		IN_IFADDR_RLOCK(&in_ifa_tracker);
 		INADDR_TO_IFP(ina, ifp);
+		IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 	} else {
 		fibnum = inp ? inp->inp_inc.inc_fibnum : 0;
 		if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0)
@@ -2224,6 +2228,7 @@
 {
 	struct group_source_req		 gsr;
 	struct ip_mreq_source		 mreqs;
+	struct rm_priotracker		 in_ifa_tracker;
 	sockunion_t			*gsa, *ssa;
 	struct ifnet			*ifp;
 	struct in_mfilter		*imf;
@@ -2282,9 +2287,11 @@
 		 * XXX NOTE WELL: The RFC 3678 API is preferred because
 		 * using an IPv4 address as a key is racy.
 		 */
-		if (!in_nullhost(mreqs.imr_interface))
+		if (!in_nullhost(mreqs.imr_interface)) {
+			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
-
+			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
+		}
 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
 
@@ -2444,6 +2451,7 @@
 static int
 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
 {
+	struct rm_priotracker	 in_ifa_tracker;
 	struct in_addr		 addr;
 	struct ip_mreqn		 mreqn;
 	struct ifnet		*ifp;
@@ -2482,7 +2490,9 @@
 		if (in_nullhost(addr)) {
 			ifp = NULL;
 		} else {
+			IN_IFADDR_RLOCK(&in_ifa_tracker);
 			INADDR_TO_IFP(addr, ifp);
+			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			if (ifp == NULL)
 				return (EADDRNOTAVAIL);
 		}
Index: netinet/ip_input.c
===================================================================
--- netinet/ip_input.c	(revision 320403)
+++ netinet/ip_input.c	(working copy)
@@ -446,6 +446,7 @@
 void
 ip_input(struct mbuf *m)
 {
+	struct rm_priotracker in_ifa_tracker;
 	struct ip *ip = NULL;
 	struct in_ifaddr *ia = NULL;
 	struct ifaddr *ifa;
@@ -677,7 +678,7 @@
 	/*
 	 * Check for exact addresses in the hash bucket.
 	 */
-	/* IN_IFADDR_RLOCK(); */
+	IN_IFADDR_RLOCK(&in_ifa_tracker);
 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
 		/*
 		 * If the address matches, verify that the packet
@@ -689,11 +690,11 @@
 			counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
 			counter_u64_add(ia->ia_ifa.ifa_ibytes,
 			    m->m_pkthdr.len);
-			/* IN_IFADDR_RUNLOCK(); */
+			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 			goto ours;
 		}
 	}
-	/* IN_IFADDR_RUNLOCK(); */
+	IN_IFADDR_RUNLOCK(&in_ifa_tracker);
 
 	/*
 	 * Check for broadcast addresses.
[root@norquay sys]#
Comment 10 Eugene Grosbein freebsd_committer 2018-02-05 16:42:08 UTC
Reassign this to me for uncommitted patches I produced for netgraph/netinet.