Suppose, we have a router (BRAS) using two lagg(4) interfaces in LACP mode. Two-port lagg0 has IP address and its ports carry untagged IPoE frames. lagg1 has no IP address and has two ports (82576-based igb0 and igb1) that carry 1000 dot-q vlans with PPPoE frames only. In RELENG_7, lagg(4) evenly distributes traffic going from lagg1 to lagg0. Since 8.0-RELEASE all this traffic goes out through one of lagg0's ports only. 82576-based NICs and igb(4) support Microsoft Receive-Side Scaling (RSS), see http://download.intel.com/design/network/datashts/82576_Datasheet.pdf RSS states that queue number for non-IP frames (PPPoE/GRE/etc.) is not computed with hash. So, all these frames get same (zero) queue number and igb(4) assigns tag M_FLOWID=0 to mbufs. Since 8.0-RELEASE, lagg(4) skips its own hash computation for mbuts having M_FLOWID tag attached. Hence, it directs all such traffic to its first port only in this setup. Fix: The following patch fixes the regression by introducing new sysctls that disable usage of M_FLOWID per lagg interface: net.link.lagg.0.use_flowid net.link.lagg.1.use_flowid Default value is 1 that corresponds to current behaviour of lagg(4). To fix our issue, we set net.link.lagg.0.use_flowid=0 that restores pre-8 behaviour for lagg0 only, so it ignores misleading M_FLOWID assigned to mbufs by lagg1's ports. How-To-Repeat: See above.
Responsible Changed From-To: freebsd-bugs->freebsd-net Over to maintainer(s).
Author: thompsa Date: Wed Feb 22 22:01:30 2012 New Revision: 232008 URL: http://svn.freebsd.org/changeset/base/232008 Log: Using the flowid in the mbuf assumes the network card is giving a good hash for the traffic flow, this may not be the case giving poor traffic distribution. Add a sysctl which allows us to fall back to our own flow hash code. PR: kern/164901 Submitted by: Eugene Grosbein MFC after: 1 week Modified: head/sys/net/ieee8023ad_lacp.c head/sys/net/if_lagg.c head/sys/net/if_lagg.h Modified: head/sys/net/ieee8023ad_lacp.c ============================================================================== --- head/sys/net/ieee8023ad_lacp.c Wed Feb 22 21:47:50 2012 (r232007) +++ head/sys/net/ieee8023ad_lacp.c Wed Feb 22 22:01:30 2012 (r232008) @@ -812,7 +812,7 @@ lacp_select_tx_port(struct lagg_softc *s return (NULL); } - if (m->m_flags & M_FLOWID) + if (sc->use_flowid && (m->m_flags & M_FLOWID)) hash = m->m_pkthdr.flowid; else hash = lagg_hashmbuf(m, lsc->lsc_hashkey); Modified: head/sys/net/if_lagg.c ============================================================================== --- head/sys/net/if_lagg.c Wed Feb 22 21:47:50 2012 (r232007) +++ head/sys/net/if_lagg.c Wed Feb 22 22:01:30 2012 (r232008) @@ -262,6 +262,8 @@ lagg_clone_create(struct if_clone *ifc, struct ifnet *ifp; int i, error = 0; static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ + struct sysctl_oid *oid; + char num[14]; /* sufficient for 32 bits */ sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); @@ -270,6 +272,15 @@ lagg_clone_create(struct if_clone *ifc, return (ENOSPC); } + sysctl_ctx_init(&sc->ctx); + snprintf(num, sizeof(num), "%u", unit); + sc->use_flowid = 1; + oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg), + OID_AUTO, num, CTLFLAG_RD, NULL, ""); + SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, + "use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid, + "Use flow id for load sharing"); + sc->sc_proto = LAGG_PROTO_NONE; for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) { if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) { @@ -349,6 +360,7 @@ lagg_clone_destroy(struct ifnet *ifp) LAGG_WUNLOCK(sc); + sysctl_ctx_free(&sc->ctx); ifmedia_removeall(&sc->sc_media); ether_ifdetach(ifp); if_free(ifp); @@ -1676,7 +1688,7 @@ lagg_lb_start(struct lagg_softc *sc, str struct lagg_port *lp = NULL; uint32_t p = 0; - if (m->m_flags & M_FLOWID) + if (sc->use_flowid && (m->m_flags & M_FLOWID)) p = m->m_pkthdr.flowid; else p = lagg_hashmbuf(m, lb->lb_key); Modified: head/sys/net/if_lagg.h ============================================================================== --- head/sys/net/if_lagg.h Wed Feb 22 21:47:50 2012 (r232007) +++ head/sys/net/if_lagg.h Wed Feb 22 22:01:30 2012 (r232008) @@ -21,6 +21,8 @@ #ifndef _NET_LAGG_H #define _NET_LAGG_H +#include <sys/sysctl.h> + /* * Global definitions */ @@ -202,6 +204,8 @@ struct lagg_softc { eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; #endif + struct sysctl_ctx_list ctx; /* sysctl variables */ + int use_flowid; /* use M_FLOWID */ }; struct lagg_port { _______________________________________________ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
State Changed From-To: open->patched Committed to head.
Responsible Changed From-To: freebsd-net->thompsa Committed to head.
Author: thompsa Date: Wed Feb 29 00:52:56 2012 New Revision: 232279 URL: http://svn.freebsd.org/changeset/base/232279 Log: MFC r232008,232010,232080,232089 Using the flowid in the mbuf assumes the network card is giving a good hash for the traffic flow, this may not be the case giving poor traffic distribution. Add a sysctl which allows us to fall back to our own flow hash code. PR: kern/164901 Modified: stable/9/share/man/man4/lagg.4 stable/9/sys/net/ieee8023ad_lacp.c stable/9/sys/net/if_lagg.c stable/9/sys/net/if_lagg.h Directory Properties: stable/9/share/man/man4/ (props changed) stable/9/sys/ (props changed) stable/9/sys/amd64/include/xen/ (props changed) stable/9/sys/boot/ (props changed) stable/9/sys/boot/i386/efi/ (props changed) stable/9/sys/boot/ia64/efi/ (props changed) stable/9/sys/boot/ia64/ski/ (props changed) stable/9/sys/boot/powerpc/boot1.chrp/ (props changed) stable/9/sys/boot/powerpc/ofw/ (props changed) stable/9/sys/cddl/contrib/opensolaris/ (props changed) stable/9/sys/conf/ (props changed) stable/9/sys/contrib/dev/acpica/ (props changed) stable/9/sys/contrib/octeon-sdk/ (props changed) stable/9/sys/contrib/pf/ (props changed) stable/9/sys/contrib/x86emu/ (props changed) stable/9/sys/i386/conf/XENHVM (props changed) Modified: stable/9/share/man/man4/lagg.4 ============================================================================== --- stable/9/share/man/man4/lagg.4 Wed Feb 29 00:30:18 2012 (r232278) +++ stable/9/share/man/man4/lagg.4 Wed Feb 29 00:52:56 2012 (r232279) @@ -16,7 +16,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 18, 2010 +.Dd February 23, 2012 .Dt LAGG 4 .Os .Sh NAME @@ -133,6 +133,21 @@ variable in .Pp The MTU of the first interface to be added is used as the lagg MTU. All additional interfaces are required to have exactly the same value. +.Pp +The +.Ic loadbalance +and +.Ic lacp +modes will use the RSS hash from the network card if available to avoid +computing one, this may give poor traffic distribution if the hash is invalid +or uses less of the protocol header information. +Local hash computation can be forced per interface by setting the +.Va net.link.lagg.X.use_flowid +.Xr sysctl 8 +variable to zero where X is the interface number. +The default for new interfaces is set via the +.Va net.link.lagg.default_use_flowid +.Xr sysctl 8 . .Sh EXAMPLES Create a 802.3ad link aggregation using LACP with two .Xr bge 4 Modified: stable/9/sys/net/ieee8023ad_lacp.c ============================================================================== --- stable/9/sys/net/ieee8023ad_lacp.c Wed Feb 29 00:30:18 2012 (r232278) +++ stable/9/sys/net/ieee8023ad_lacp.c Wed Feb 29 00:52:56 2012 (r232279) @@ -812,7 +812,7 @@ lacp_select_tx_port(struct lagg_softc *s return (NULL); } - if (m->m_flags & M_FLOWID) + if (sc->use_flowid && (m->m_flags & M_FLOWID)) hash = m->m_pkthdr.flowid; else hash = lagg_hashmbuf(m, lsc->lsc_hashkey); Modified: stable/9/sys/net/if_lagg.c ============================================================================== --- stable/9/sys/net/if_lagg.c Wed Feb 29 00:30:18 2012 (r232278) +++ stable/9/sys/net/if_lagg.c Wed Feb 29 00:52:56 2012 (r232279) @@ -171,6 +171,11 @@ static int lagg_failover_rx_all = 0; /* SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW, &lagg_failover_rx_all, 0, "Accept input from any interface in a failover lagg"); +static int def_use_flowid = 1; /* Default value for using M_FLOWID */ +TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid); +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW, + &def_use_flowid, 0, + "Default setting for using flow id for load sharing"); static int lagg_modevent(module_t mod, int type, void *data) @@ -261,6 +266,8 @@ lagg_clone_create(struct if_clone *ifc, struct ifnet *ifp; int i, error = 0; static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ + struct sysctl_oid *oid; + char num[14]; /* sufficient for 32 bits */ sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); @@ -269,6 +276,15 @@ lagg_clone_create(struct if_clone *ifc, return (ENOSPC); } + sysctl_ctx_init(&sc->ctx); + snprintf(num, sizeof(num), "%u", unit); + sc->use_flowid = def_use_flowid; + oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg), + OID_AUTO, num, CTLFLAG_RD, NULL, ""); + SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, + "use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid, + "Use flow id for load sharing"); + sc->sc_proto = LAGG_PROTO_NONE; for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) { if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) { @@ -349,6 +365,7 @@ lagg_clone_destroy(struct ifnet *ifp) LAGG_WUNLOCK(sc); + sysctl_ctx_free(&sc->ctx); ifmedia_removeall(&sc->sc_media); ether_ifdetach(ifp); if_free_type(ifp, IFT_ETHER); @@ -1676,7 +1693,7 @@ lagg_lb_start(struct lagg_softc *sc, str struct lagg_port *lp = NULL; uint32_t p = 0; - if (m->m_flags & M_FLOWID) + if (sc->use_flowid && (m->m_flags & M_FLOWID)) p = m->m_pkthdr.flowid; else p = lagg_hashmbuf(m, lb->lb_key); Modified: stable/9/sys/net/if_lagg.h ============================================================================== --- stable/9/sys/net/if_lagg.h Wed Feb 29 00:30:18 2012 (r232278) +++ stable/9/sys/net/if_lagg.h Wed Feb 29 00:52:56 2012 (r232279) @@ -21,6 +21,8 @@ #ifndef _NET_LAGG_H #define _NET_LAGG_H +#include <sys/sysctl.h> + /* * Global definitions */ @@ -202,6 +204,8 @@ struct lagg_softc { eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; #endif + struct sysctl_ctx_list ctx; /* sysctl variables */ + int use_flowid; /* use M_FLOWID */ }; struct lagg_port { _______________________________________________ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
Author: thompsa Date: Wed Feb 29 20:22:45 2012 New Revision: 232314 URL: http://svn.freebsd.org/changeset/base/232314 Log: MFC r232008,232010,232080,232089 Using the flowid in the mbuf assumes the network card is giving a good hash for the traffic flow, this may not be the case giving poor traffic distribution. Add a sysctl which allows us to fall back to our own flow hash code. PR: kern/164901 Approved by: re (bz) Modified: stable/8/share/man/man4/lagg.4 stable/8/sys/net/ieee8023ad_lacp.c stable/8/sys/net/if_lagg.c stable/8/sys/net/if_lagg.h Directory Properties: stable/8/share/man/man4/ (props changed) stable/8/sys/ (props changed) stable/8/sys/amd64/include/xen/ (props changed) stable/8/sys/boot/ (props changed) stable/8/sys/cddl/contrib/opensolaris/ (props changed) stable/8/sys/contrib/dev/acpica/ (props changed) stable/8/sys/contrib/pf/ (props changed) stable/8/sys/dev/e1000/ (props changed) stable/8/sys/i386/conf/XENHVM (props changed) Modified: stable/8/share/man/man4/lagg.4 ============================================================================== --- stable/8/share/man/man4/lagg.4 Wed Feb 29 20:13:53 2012 (r232313) +++ stable/8/share/man/man4/lagg.4 Wed Feb 29 20:22:45 2012 (r232314) @@ -16,7 +16,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 18, 2010 +.Dd February 23, 2012 .Dt LAGG 4 .Os .Sh NAME @@ -133,6 +133,21 @@ variable in .Pp The MTU of the first interface to be added is used as the lagg MTU. All additional interfaces are required to have exactly the same value. +.Pp +The +.Ic loadbalance +and +.Ic lacp +modes will use the RSS hash from the network card if available to avoid +computing one, this may give poor traffic distribution if the hash is invalid +or uses less of the protocol header information. +Local hash computation can be forced per interface by setting the +.Va net.link.lagg.X.use_flowid +.Xr sysctl 8 +variable to zero where X is the interface number. +The default for new interfaces is set via the +.Va net.link.lagg.default_use_flowid +.Xr sysctl 8 . .Sh EXAMPLES Create a 802.3ad link aggregation using LACP with two .Xr bge 4 Modified: stable/8/sys/net/ieee8023ad_lacp.c ============================================================================== --- stable/8/sys/net/ieee8023ad_lacp.c Wed Feb 29 20:13:53 2012 (r232313) +++ stable/8/sys/net/ieee8023ad_lacp.c Wed Feb 29 20:22:45 2012 (r232314) @@ -812,7 +812,7 @@ lacp_select_tx_port(struct lagg_softc *s return (NULL); } - if (m->m_flags & M_FLOWID) + if (sc->use_flowid && (m->m_flags & M_FLOWID)) hash = m->m_pkthdr.flowid; else hash = lagg_hashmbuf(m, lsc->lsc_hashkey); Modified: stable/8/sys/net/if_lagg.c ============================================================================== --- stable/8/sys/net/if_lagg.c Wed Feb 29 20:13:53 2012 (r232313) +++ stable/8/sys/net/if_lagg.c Wed Feb 29 20:22:45 2012 (r232314) @@ -167,6 +167,11 @@ static int lagg_failover_rx_all = 0; /* SYSCTL_INT(_net_link_lagg, OID_AUTO, failover_rx_all, CTLFLAG_RW, &lagg_failover_rx_all, 0, "Accept input from any interface in a failover lagg"); +static int def_use_flowid = 1; /* Default value for using M_FLOWID */ +TUNABLE_INT("net.link.lagg.default_use_flowid", &def_use_flowid); +SYSCTL_INT(_net_link_lagg, OID_AUTO, default_use_flowid, CTLFLAG_RW, + &def_use_flowid, 0, + "Default setting for using flow id for load sharing"); static int lagg_modevent(module_t mod, int type, void *data) @@ -257,6 +262,8 @@ lagg_clone_create(struct if_clone *ifc, struct ifnet *ifp; int i, error = 0; static const u_char eaddr[6]; /* 00:00:00:00:00:00 */ + struct sysctl_oid *oid; + char num[14]; /* sufficient for 32 bits */ sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); ifp = sc->sc_ifp = if_alloc(IFT_ETHER); @@ -265,6 +272,15 @@ lagg_clone_create(struct if_clone *ifc, return (ENOSPC); } + sysctl_ctx_init(&sc->ctx); + snprintf(num, sizeof(num), "%u", unit); + sc->use_flowid = def_use_flowid; + oid = SYSCTL_ADD_NODE(&sc->ctx, &SYSCTL_NODE_CHILDREN(_net_link, lagg), + OID_AUTO, num, CTLFLAG_RD, NULL, ""); + SYSCTL_ADD_INT(&sc->ctx, SYSCTL_CHILDREN(oid), OID_AUTO, + "use_flowid", CTLTYPE_INT|CTLFLAG_RW, &sc->use_flowid, sc->use_flowid, + "Use flow id for load sharing"); + sc->sc_proto = LAGG_PROTO_NONE; for (i = 0; lagg_protos[i].ti_proto != LAGG_PROTO_NONE; i++) { if (lagg_protos[i].ti_proto == LAGG_PROTO_DEFAULT) { @@ -344,6 +360,7 @@ lagg_clone_destroy(struct ifnet *ifp) LAGG_WUNLOCK(sc); + sysctl_ctx_free(&sc->ctx); ifmedia_removeall(&sc->sc_media); ether_ifdetach(ifp); if_free_type(ifp, IFT_ETHER); @@ -1668,7 +1685,7 @@ lagg_lb_start(struct lagg_softc *sc, str struct lagg_port *lp = NULL; uint32_t p = 0; - if (m->m_flags & M_FLOWID) + if (sc->use_flowid && (m->m_flags & M_FLOWID)) p = m->m_pkthdr.flowid; else p = lagg_hashmbuf(m, lb->lb_key); Modified: stable/8/sys/net/if_lagg.h ============================================================================== --- stable/8/sys/net/if_lagg.h Wed Feb 29 20:13:53 2012 (r232313) +++ stable/8/sys/net/if_lagg.h Wed Feb 29 20:22:45 2012 (r232314) @@ -21,6 +21,8 @@ #ifndef _NET_LAGG_H #define _NET_LAGG_H +#include <sys/sysctl.h> + /* * Global definitions */ @@ -202,6 +204,8 @@ struct lagg_softc { eventhandler_tag vlan_attach; eventhandler_tag vlan_detach; #endif + struct sysctl_ctx_list ctx; /* sysctl variables */ + int use_flowid; /* use M_FLOWID */ }; struct lagg_port { _______________________________________________ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
State Changed From-To: patched->closed MFC complete, thanks for the pr.
A commit references this bug: Author: crees Date: Thu Dec 31 18:04:31 UTC 2015 New revision: 404956 URL: https://svnweb.freebsd.org/changeset/ports/404956 Log: Fix bashism PR: ports/164901 Submitted by: Martin Waschbuesch Changes: head/mail/automx/Makefile head/mail/automx/files/patch-src-automx-test