Bug 164901 - [regression] [patch] [lagg] igb/lagg poor traffic distribution
Summary: [regression] [patch] [lagg] igb/lagg poor traffic distribution
Status: Closed FIXED
Alias: None
Product: Base System
Classification: Unclassified
Component: kern (show other bugs)
Version: 8.2-STABLE
Hardware: Any Any
: Normal Affects Only Me
Assignee: Andrew Thompson
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 2012-02-08 09:00 UTC by Eugene Grosbein
Modified: 2015-12-31 18:05 UTC (History)
0 users

See Also:


Attachments
file.diff (2.34 KB, patch)
2012-02-08 09:00 UTC, Eugene Grosbein
no flags Details | Diff

Note You need to log in before you can comment on or make changes to this bug.
Description Eugene Grosbein 2012-02-08 09:00:20 UTC
	Suppose, we have a router (BRAS) using two lagg(4) interfaces in LACP mode.

	Two-port lagg0 has IP address and its ports carry untagged IPoE frames.
	lagg1 has no IP address and has two ports (82576-based igb0 and igb1)
	that carry 1000 dot-q vlans with PPPoE frames only.

	In RELENG_7, lagg(4) evenly distributes traffic going from lagg1 to lagg0.
	Since 8.0-RELEASE all this traffic goes out through one	of lagg0's ports only.

	82576-based NICs and igb(4) support Microsoft Receive-Side Scaling (RSS),
	see http://download.intel.com/design/network/datashts/82576_Datasheet.pdf
	
	RSS states that queue number for non-IP frames (PPPoE/GRE/etc.)
	is not computed with hash. So, all these frames get same (zero)
	queue number and igb(4) assigns tag M_FLOWID=0 to mbufs.

	Since 8.0-RELEASE, lagg(4) skips its own hash computation for mbuts
	having M_FLOWID tag attached. Hence, it directs all such traffic
	to its first port only in this setup.

Fix: The following patch fixes the regression by introducing new sysctls
	that disable usage of M_FLOWID per lagg interface:

net.link.lagg.0.use_flowid
net.link.lagg.1.use_flowid

	Default value is 1 that corresponds to current behaviour of lagg(4).
	To fix our issue, we set net.link.lagg.0.use_flowid=0
	that restores pre-8 behaviour for lagg0 only, so it ignores misleading
	M_FLOWID assigned to mbufs by lagg1's ports.
How-To-Repeat: 	
	See above.
Comment 1 Mark Linimon freebsd_committer freebsd_triage 2012-02-17 04:26:12 UTC
Responsible Changed
From-To: freebsd-bugs->freebsd-net

Over to maintainer(s).
Comment 2 dfilter service freebsd_committer freebsd_triage 2012-02-22 22:01:45 UTC
Author: thompsa
Date: Wed Feb 22 22:01:30 2012
New Revision: 232008
URL: http://svn.freebsd.org/changeset/base/232008

Log:
  Using the flowid in the mbuf assumes the network card is giving a good hash for
  the traffic flow, this may not be the case giving poor traffic distribution.
  Add a sysctl which allows us to fall back to our own flow hash code.
  
  PR:		kern/164901
  Submitted by:	Eugene Grosbein
  MFC after:	1 week

Modified:
  head/sys/net/ieee8023ad_lacp.c
  head/sys/net/if_lagg.c
  head/sys/net/if_lagg.h

Modified: head/sys/net/ieee8023ad_lacp.c
==============================================================================
--- head/sys/net/ieee8023ad_lacp.c	Wed Feb 22 21:47:50 2012	(r232007)
+++ head/sys/net/ieee8023ad_lacp.c	Wed Feb 22 22:01:30 2012	(r232008)
@@ -812,7 +812,7 @@ lacp_select_tx_port(struct lagg_softc *s
 		return (NULL);
 	}
 
-	if (m->m_flags & M_FLOWID)
+	if (sc->use_flowid && (m->m_flags & M_FLOWID))
 		hash = m->m_pkthdr.flowid;
 	else
 		hash = lagg_hashmbuf(m, lsc->lsc_hashkey);

Modified: head/sys/net/if_lagg.c
==============================================================================
--- head/sys/net/if_lagg.c	Wed Feb 22 21:47:50 2012	(r232007)
+++ head/sys/net/if_lagg.c	Wed Feb 22 22:01:30 2012	(r232008)
@@ -262,6 +262,8 @@ lagg_clone_create(struct if_clone *ifc, 
 	struct ifnet *ifp;
 	int i, error = 0;
 	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
+	struct sysctl_oid *oid;
+	char num[14];			/* sufficient for 32 bits */
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -270,6 +272,15 @@ lagg_clone_create(struct if_clone *ifc, 
 		return (ENOSPC);
 	}
 
+	sysctl_ctx_init(&sc->ctx);
+	snprintf(num, sizeof(num), "%u", unit);
+	sc->use_flowid = 1;
+	oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg),
+		OID_AUTO, num, CTLFLAG_RD, NULL, "");
+	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
+		"use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
+		"Use flow id for load sharing");
+
 	sc->sc_proto = LAGG_PROTO_NONE;
 	for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
 		if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
@@ -349,6 +360,7 @@ lagg_clone_destroy(struct ifnet *ifp)
 
 	LAGG_WUNLOCK(sc);
 
+	sysctl_ctx_free(&sc->ctx);
 	ifmedia_removeall(&sc->sc_media);
 	ether_ifdetach(ifp);
 	if_free(ifp);
@@ -1676,7 +1688,7 @@ lagg_lb_start(struct lagg_softc *sc, str
 	struct lagg_port *lp = NULL;
 	uint32_t p = 0;
 
-	if (m->m_flags & M_FLOWID)
+	if (sc->use_flowid && (m->m_flags & M_FLOWID))
 		p = m->m_pkthdr.flowid;
 	else
 		p = lagg_hashmbuf(m, lb->lb_key);

Modified: head/sys/net/if_lagg.h
==============================================================================
--- head/sys/net/if_lagg.h	Wed Feb 22 21:47:50 2012	(r232007)
+++ head/sys/net/if_lagg.h	Wed Feb 22 22:01:30 2012	(r232008)
@@ -21,6 +21,8 @@
 #ifndef _NET_LAGG_H
 #define _NET_LAGG_H
 
+#include <sys/sysctl.h>
+
 /*
  * Global definitions
  */
@@ -202,6 +204,8 @@ struct lagg_softc {
 	eventhandler_tag vlan_attach;
 	eventhandler_tag vlan_detach;
 #endif
+	struct sysctl_ctx_list		ctx;		/* sysctl variables */
+	int				use_flowid;	/* use M_FLOWID */
 };
 
 struct lagg_port {
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
Comment 3 Andrew Thompson freebsd_committer freebsd_triage 2012-02-22 22:37:24 UTC
State Changed
From-To: open->patched

Committed to head. 


Comment 4 Andrew Thompson freebsd_committer freebsd_triage 2012-02-22 22:37:24 UTC
Responsible Changed
From-To: freebsd-net->thompsa

Committed to head.
Comment 5 dfilter service freebsd_committer freebsd_triage 2012-02-29 00:53:12 UTC
Author: thompsa
Date: Wed Feb 29 00:52:56 2012
New Revision: 232279
URL: http://svn.freebsd.org/changeset/base/232279

Log:
  MFC r232008,232010,232080,232089
  
   Using the flowid in the mbuf assumes the network card is giving a good hash for
   the traffic flow, this may not be the case giving poor traffic distribution.
   Add a sysctl which allows us to fall back to our own flow hash code.
  
  PR:		kern/164901

Modified:
  stable/9/share/man/man4/lagg.4
  stable/9/sys/net/ieee8023ad_lacp.c
  stable/9/sys/net/if_lagg.c
  stable/9/sys/net/if_lagg.h
Directory Properties:
  stable/9/share/man/man4/   (props changed)
  stable/9/sys/   (props changed)
  stable/9/sys/amd64/include/xen/   (props changed)
  stable/9/sys/boot/   (props changed)
  stable/9/sys/boot/i386/efi/   (props changed)
  stable/9/sys/boot/ia64/efi/   (props changed)
  stable/9/sys/boot/ia64/ski/   (props changed)
  stable/9/sys/boot/powerpc/boot1.chrp/   (props changed)
  stable/9/sys/boot/powerpc/ofw/   (props changed)
  stable/9/sys/cddl/contrib/opensolaris/   (props changed)
  stable/9/sys/conf/   (props changed)
  stable/9/sys/contrib/dev/acpica/   (props changed)
  stable/9/sys/contrib/octeon-sdk/   (props changed)
  stable/9/sys/contrib/pf/   (props changed)
  stable/9/sys/contrib/x86emu/   (props changed)
  stable/9/sys/i386/conf/XENHVM   (props changed)

Modified: stable/9/share/man/man4/lagg.4
==============================================================================
--- stable/9/share/man/man4/lagg.4	Wed Feb 29 00:30:18 2012	(r232278)
+++ stable/9/share/man/man4/lagg.4	Wed Feb 29 00:52:56 2012	(r232279)
@@ -16,7 +16,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 18, 2010
+.Dd February 23, 2012
 .Dt LAGG 4
 .Os
 .Sh NAME
@@ -133,6 +133,21 @@ variable in
 .Pp
 The MTU of the first interface to be added is used as the lagg MTU.
 All additional interfaces are required to have exactly the same value.
+.Pp
+The
+.Ic loadbalance
+and
+.Ic lacp
+modes will use the RSS hash from the network card if available to avoid
+computing one, this may give poor traffic distribution if the hash is invalid
+or uses less of the protocol header information.
+Local hash computation can be forced per interface by setting the
+.Va net.link.lagg.X.use_flowid
+.Xr sysctl 8
+variable to zero where X is the interface number.
+The default for new interfaces is set via the
+.Va net.link.lagg.default_use_flowid
+.Xr sysctl 8 .
 .Sh EXAMPLES
 Create a 802.3ad link aggregation using LACP with two
 .Xr bge 4

Modified: stable/9/sys/net/ieee8023ad_lacp.c
==============================================================================
--- stable/9/sys/net/ieee8023ad_lacp.c	Wed Feb 29 00:30:18 2012	(r232278)
+++ stable/9/sys/net/ieee8023ad_lacp.c	Wed Feb 29 00:52:56 2012	(r232279)
@@ -812,7 +812,7 @@ lacp_select_tx_port(struct lagg_softc *s
 		return (NULL);
 	}
 
-	if (m->m_flags & M_FLOWID)
+	if (sc->use_flowid && (m->m_flags & M_FLOWID))
 		hash = m->m_pkthdr.flowid;
 	else
 		hash = lagg_hashmbuf(m, lsc->lsc_hashkey);

Modified: stable/9/sys/net/if_lagg.c
==============================================================================
--- stable/9/sys/net/if_lagg.c	Wed Feb 29 00:30:18 2012	(r232278)
+++ stable/9/sys/net/if_lagg.c	Wed Feb 29 00:52:56 2012	(r232279)
@@ -171,6 +171,11 @@ static int lagg_failover_rx_all = 0; /* 
 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
     &lagg_failover_rx_all, 0,
     "Accept input from any interface in a failover lagg");
+static int def_use_flowid = 1; /* Default value for using M_FLOWID */
+TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
+    &def_use_flowid, 0,
+    "Default setting for using flow id for load sharing");
 
 static int
 lagg_modevent(module_t mod, int type, void *data)
@@ -261,6 +266,8 @@ lagg_clone_create(struct if_clone *ifc, 
 	struct ifnet *ifp;
 	int i, error = 0;
 	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
+	struct sysctl_oid *oid;
+	char num[14];			/* sufficient for 32 bits */
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -269,6 +276,15 @@ lagg_clone_create(struct if_clone *ifc, 
 		return (ENOSPC);
 	}
 
+	sysctl_ctx_init(&sc->ctx);
+	snprintf(num, sizeof(num), "%u", unit);
+	sc->use_flowid = def_use_flowid;
+	oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg),
+		OID_AUTO, num, CTLFLAG_RD, NULL, "");
+	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
+		"use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
+		"Use flow id for load sharing");
+
 	sc->sc_proto = LAGG_PROTO_NONE;
 	for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
 		if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
@@ -349,6 +365,7 @@ lagg_clone_destroy(struct ifnet *ifp)
 
 	LAGG_WUNLOCK(sc);
 
+	sysctl_ctx_free(&sc->ctx);
 	ifmedia_removeall(&sc->sc_media);
 	ether_ifdetach(ifp);
 	if_free_type(ifp, IFT_ETHER);
@@ -1676,7 +1693,7 @@ lagg_lb_start(struct lagg_softc *sc, str
 	struct lagg_port *lp = NULL;
 	uint32_t p = 0;
 
-	if (m->m_flags & M_FLOWID)
+	if (sc->use_flowid && (m->m_flags & M_FLOWID))
 		p = m->m_pkthdr.flowid;
 	else
 		p = lagg_hashmbuf(m, lb->lb_key);

Modified: stable/9/sys/net/if_lagg.h
==============================================================================
--- stable/9/sys/net/if_lagg.h	Wed Feb 29 00:30:18 2012	(r232278)
+++ stable/9/sys/net/if_lagg.h	Wed Feb 29 00:52:56 2012	(r232279)
@@ -21,6 +21,8 @@
 #ifndef _NET_LAGG_H
 #define _NET_LAGG_H
 
+#include <sys/sysctl.h>
+
 /*
  * Global definitions
  */
@@ -202,6 +204,8 @@ struct lagg_softc {
 	eventhandler_tag vlan_attach;
 	eventhandler_tag vlan_detach;
 #endif
+	struct sysctl_ctx_list		ctx;		/* sysctl variables */
+	int				use_flowid;	/* use M_FLOWID */
 };
 
 struct lagg_port {
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
Comment 6 dfilter service freebsd_committer freebsd_triage 2012-02-29 20:22:59 UTC
Author: thompsa
Date: Wed Feb 29 20:22:45 2012
New Revision: 232314
URL: http://svn.freebsd.org/changeset/base/232314

Log:
  MFC r232008,232010,232080,232089
  
   Using the flowid in the mbuf assumes the network card is giving a good hash for
   the traffic flow, this may not be the case giving poor traffic distribution.
   Add a sysctl which allows us to fall back to our own flow hash code.
  
  PR:		kern/164901
  Approved by:	re (bz)

Modified:
  stable/8/share/man/man4/lagg.4
  stable/8/sys/net/ieee8023ad_lacp.c
  stable/8/sys/net/if_lagg.c
  stable/8/sys/net/if_lagg.h
Directory Properties:
  stable/8/share/man/man4/   (props changed)
  stable/8/sys/   (props changed)
  stable/8/sys/amd64/include/xen/   (props changed)
  stable/8/sys/boot/   (props changed)
  stable/8/sys/cddl/contrib/opensolaris/   (props changed)
  stable/8/sys/contrib/dev/acpica/   (props changed)
  stable/8/sys/contrib/pf/   (props changed)
  stable/8/sys/dev/e1000/   (props changed)
  stable/8/sys/i386/conf/XENHVM   (props changed)

Modified: stable/8/share/man/man4/lagg.4
==============================================================================
--- stable/8/share/man/man4/lagg.4	Wed Feb 29 20:13:53 2012	(r232313)
+++ stable/8/share/man/man4/lagg.4	Wed Feb 29 20:22:45 2012	(r232314)
@@ -16,7 +16,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd October 18, 2010
+.Dd February 23, 2012
 .Dt LAGG 4
 .Os
 .Sh NAME
@@ -133,6 +133,21 @@ variable in
 .Pp
 The MTU of the first interface to be added is used as the lagg MTU.
 All additional interfaces are required to have exactly the same value.
+.Pp
+The
+.Ic loadbalance
+and
+.Ic lacp
+modes will use the RSS hash from the network card if available to avoid
+computing one, this may give poor traffic distribution if the hash is invalid
+or uses less of the protocol header information.
+Local hash computation can be forced per interface by setting the
+.Va net.link.lagg.X.use_flowid
+.Xr sysctl 8
+variable to zero where X is the interface number.
+The default for new interfaces is set via the
+.Va net.link.lagg.default_use_flowid
+.Xr sysctl 8 .
 .Sh EXAMPLES
 Create a 802.3ad link aggregation using LACP with two
 .Xr bge 4

Modified: stable/8/sys/net/ieee8023ad_lacp.c
==============================================================================
--- stable/8/sys/net/ieee8023ad_lacp.c	Wed Feb 29 20:13:53 2012	(r232313)
+++ stable/8/sys/net/ieee8023ad_lacp.c	Wed Feb 29 20:22:45 2012	(r232314)
@@ -812,7 +812,7 @@ lacp_select_tx_port(struct lagg_softc *s
 		return (NULL);
 	}
 
-	if (m->m_flags & M_FLOWID)
+	if (sc->use_flowid && (m->m_flags & M_FLOWID))
 		hash = m->m_pkthdr.flowid;
 	else
 		hash = lagg_hashmbuf(m, lsc->lsc_hashkey);

Modified: stable/8/sys/net/if_lagg.c
==============================================================================
--- stable/8/sys/net/if_lagg.c	Wed Feb 29 20:13:53 2012	(r232313)
+++ stable/8/sys/net/if_lagg.c	Wed Feb 29 20:22:45 2012	(r232314)
@@ -167,6 +167,11 @@ static int lagg_failover_rx_all = 0; /* 
 SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW,
     &lagg_failover_rx_all, 0,
     "Accept input from any interface in a failover lagg");
+static int def_use_flowid = 1; /* Default value for using M_FLOWID */
+TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid);
+SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW,
+    &def_use_flowid, 0,
+    "Default setting for using flow id for load sharing");
 
 static int
 lagg_modevent(module_t mod, int type, void *data)
@@ -257,6 +262,8 @@ lagg_clone_create(struct if_clone *ifc, 
 	struct ifnet *ifp;
 	int i, error = 0;
 	static const u_char eaddr[6];	/* 00:00:00:00:00:00 */
+	struct sysctl_oid *oid;
+	char num[14];			/* sufficient for 32 bits */
 
 	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
 	ifp = sc->sc_ifp = if_alloc(IFT_ETHER);
@@ -265,6 +272,15 @@ lagg_clone_create(struct if_clone *ifc, 
 		return (ENOSPC);
 	}
 
+	sysctl_ctx_init(&sc->ctx);
+	snprintf(num, sizeof(num), "%u", unit);
+	sc->use_flowid = def_use_flowid;
+	oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg),
+		OID_AUTO, num, CTLFLAG_RD, NULL, "");
+	SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO,
+		"use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid,
+		"Use flow id for load sharing");
+
 	sc->sc_proto = LAGG_PROTO_NONE;
 	for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) {
 		if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) {
@@ -344,6 +360,7 @@ lagg_clone_destroy(struct ifnet *ifp)
 
 	LAGG_WUNLOCK(sc);
 
+	sysctl_ctx_free(&sc->ctx);
 	ifmedia_removeall(&sc->sc_media);
 	ether_ifdetach(ifp);
 	if_free_type(ifp, IFT_ETHER);
@@ -1668,7 +1685,7 @@ lagg_lb_start(struct lagg_softc *sc, str
 	struct lagg_port *lp = NULL;
 	uint32_t p = 0;
 
-	if (m->m_flags & M_FLOWID)
+	if (sc->use_flowid && (m->m_flags & M_FLOWID))
 		p = m->m_pkthdr.flowid;
 	else
 		p = lagg_hashmbuf(m, lb->lb_key);

Modified: stable/8/sys/net/if_lagg.h
==============================================================================
--- stable/8/sys/net/if_lagg.h	Wed Feb 29 20:13:53 2012	(r232313)
+++ stable/8/sys/net/if_lagg.h	Wed Feb 29 20:22:45 2012	(r232314)
@@ -21,6 +21,8 @@
 #ifndef _NET_LAGG_H
 #define _NET_LAGG_H
 
+#include <sys/sysctl.h>
+
 /*
  * Global definitions
  */
@@ -202,6 +204,8 @@ struct lagg_softc {
 	eventhandler_tag vlan_attach;
 	eventhandler_tag vlan_detach;
 #endif
+	struct sysctl_ctx_list		ctx;		/* sysctl variables */
+	int				use_flowid;	/* use M_FLOWID */
 };
 
 struct lagg_port {
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
Comment 7 Andrew Thompson freebsd_committer freebsd_triage 2012-02-29 20:23:00 UTC
State Changed
From-To: patched->closed

MFC complete, thanks for the pr.
Comment 8 commit-hook freebsd_committer freebsd_triage 2015-12-31 18:05:17 UTC
A commit references this bug:

Author: crees
Date: Thu Dec 31 18:04:31 UTC 2015
New revision: 404956
URL: https://svnweb.freebsd.org/changeset/ports/404956

Log:
  Fix bashism

  PR:		ports/164901
  Submitted by:	Martin Waschbuesch

Changes:
  head/mail/automx/Makefile
  head/mail/automx/files/patch-src-automx-test