FreeBSD Bugzilla – Attachment 167137 Details for
Bug 207297
[Hyper-V]FreeBSD 10.2 on hyperv lost network under heavy load for OACTIVE
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch to fix OACTIVE issue
patch_for_network_loss_by_oactive.patch (text/plain), 46.94 KB, created by
Hongjiang
on 2016-02-18 05:57:35 UTC
(
hide
)
Description:
Patch to fix OACTIVE issue
Filename:
MIME Type:
Creator:
Hongjiang
Created:
2016-02-18 05:57:35 UTC
Size:
46.94 KB
patch
obsolete
>Index: sys/dev/hyperv/netvsc/hv_net_vsc.c >=================================================================== >--- sys/dev/hyperv/netvsc/hv_net_vsc.c (revision 294703) >+++ sys/dev/hyperv/netvsc/hv_net_vsc.c (working copy) >@@ -641,6 +641,12 @@ > /* sema_wait(&NetVscChannel->channel_init_sema); */ > > /* Post the big receive buffer to NetVSP */ >+ if (net_dev->nvsp_version <= NVSP_PROTOCOL_VERSION_2) >+ net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY; >+ else >+ net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; >+ net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE; >+ > ret = hv_nv_init_rx_buffer_with_net_vsp(device); > if (ret == 0) > ret = hv_nv_init_send_buffer_with_net_vsp(device); >@@ -675,10 +681,7 @@ > goto cleanup; > > /* Initialize the NetVSC channel extension */ >- net_dev->rx_buf_size = NETVSC_RECEIVE_BUFFER_SIZE; > >- net_dev->send_buf_size = NETVSC_SEND_BUFFER_SIZE; >- > sema_init(&net_dev->channel_init_sema, 0, "netdev_sema"); > > /* >@@ -918,6 +921,7 @@ > */ > hv_nv_on_receive_completion(device, vm_xfer_page_pkt->d.transaction_id, > status); >+ hv_rf_receive_rollup(net_dev); > } > > /* >@@ -1023,4 +1027,6 @@ > > if (bufferlen > NETVSC_PACKET_SIZE) > free(buffer, M_NETVSC); >+ >+ hv_rf_channel_rollup(net_dev); > } >Index: sys/dev/hyperv/netvsc/hv_net_vsc.h >=================================================================== >--- sys/dev/hyperv/netvsc/hv_net_vsc.h (revision 294703) >+++ sys/dev/hyperv/netvsc/hv_net_vsc.h (working copy) >@@ -38,12 +38,22 @@ > #ifndef __HV_NET_VSC_H__ > #define __HV_NET_VSC_H__ > >-#include <sys/types.h> > #include <sys/param.h> > #include <sys/lock.h> > #include <sys/malloc.h> >+#include <sys/queue.h> > #include <sys/sx.h> > >+#include <machine/bus.h> >+#include <sys/bus.h> >+#include <sys/bus_dma.h> >+ >+#include <netinet/in.h> >+#include <netinet/tcp_lro.h> >+ >+#include <net/if.h> >+#include <net/if_media.h> >+ > #include <dev/hyperv/include/hyperv.h> > > MALLOC_DECLARE(M_NETVSC); >@@ -851,7 +861,7 @@ > #define NETVSC_SEND_BUFFER_SIZE (1024*1024*15) /* 15M */ > #define NETVSC_SEND_BUFFER_ID 0xface > >- >+#define NETVSC_RECEIVE_BUFFER_SIZE_LEGACY (1024*1024*15) /* 15MB */ > #define NETVSC_RECEIVE_BUFFER_SIZE (1024*1024*16) /* 16MB */ > > #define NETVSC_RECEIVE_BUFFER_ID 0xcafe >@@ -978,6 +988,9 @@ > hv_bool_uint8_t link_state; > } netvsc_device_info; > >+struct hn_txdesc; >+SLIST_HEAD(hn_txdesc_list, hn_txdesc); >+ > /* > * Device-specific softc structure > */ >@@ -984,6 +997,7 @@ > typedef struct hn_softc { > struct ifnet *hn_ifp; > struct arpcom arpcom; >+ struct ifmedia hn_media; > device_t hn_dev; > uint8_t hn_unit; > int hn_carrier; >@@ -994,6 +1008,35 @@ > int temp_unusable; > struct hv_device *hn_dev_obj; > netvsc_dev *net_dev; >+ >+ int hn_txdesc_cnt; >+ struct hn_txdesc *hn_txdesc; >+ bus_dma_tag_t hn_tx_data_dtag; >+ bus_dma_tag_t hn_tx_rndis_dtag; >+ int hn_tx_chimney_size; >+ int hn_tx_chimney_max; >+ >+ struct mtx hn_txlist_spin; >+ struct hn_txdesc_list hn_txlist; >+ int hn_txdesc_avail; >+ int hn_txeof; >+ >+ struct lro_ctrl hn_lro; >+ int hn_lro_hiwat; >+ >+ /* Trust tcp segments verification on host side */ >+ int hn_trust_hosttcp; >+ >+ u_long hn_csum_ip; >+ u_long hn_csum_tcp; >+ u_long hn_csum_trusted; >+ u_long hn_lro_tried; >+ u_long hn_small_pkts; >+ u_long hn_no_txdescs; >+ u_long hn_send_failed; >+ u_long hn_txdma_failed; >+ u_long hn_tx_collapsed; >+ u_long hn_tx_chimney; > } hn_softc_t; > > >Index: sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c >=================================================================== >--- sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (revision 294703) >+++ sys/dev/hyperv/netvsc/hv_netvsc_drv_freebsd.c (working copy) >@@ -69,6 +69,7 @@ > #include <sys/queue.h> > #include <sys/lock.h> > #include <sys/sx.h> >+#include <sys/sysctl.h> > > #include <net/if.h> > #include <net/if_arp.h> >@@ -128,14 +129,58 @@ > #define HV_NV_SC_PTR_OFFSET_IN_BUF 0 > #define HV_NV_PACKET_OFFSET_IN_BUF 16 > >+/* YYY should get it from the underlying channel */ >+#define HN_TX_DESC_CNT 512 > >+#define HN_RNDIS_MSG_LEN \ >+ (sizeof(rndis_msg) + \ >+ RNDIS_VLAN_PPI_SIZE + \ >+ RNDIS_TSO_PPI_SIZE + \ >+ RNDIS_CSUM_PPI_SIZE) >+#define HN_RNDIS_MSG_BOUNDARY PAGE_SIZE >+#define HN_RNDIS_MSG_ALIGN CACHE_LINE_SIZE >+ >+#define HN_TX_DATA_BOUNDARY PAGE_SIZE >+#define HN_TX_DATA_MAXSIZE IP_MAXPACKET >+#define HN_TX_DATA_SEGSIZE PAGE_SIZE >+#define HN_TX_DATA_SEGCNT_MAX \ >+ (NETVSC_PACKET_MAXPAGE - HV_RF_NUM_TX_RESERVED_PAGE_BUFS) >+ >+struct hn_txdesc { >+ SLIST_ENTRY(hn_txdesc) link; >+ struct mbuf *m; >+ struct hn_softc *sc; >+ int refs; >+ uint32_t flags; /* HN_TXD_FLAG_ */ >+ netvsc_packet netvsc_pkt; /* XXX to be removed */ >+ >+ bus_dmamap_t data_dmap; >+ >+ bus_addr_t rndis_msg_paddr; >+ rndis_msg *rndis_msg; >+ bus_dmamap_t rndis_msg_dmap; >+}; >+ >+#define HN_TXD_FLAG_ONLIST 0x1 >+#define HN_TXD_FLAG_DMAMAP 0x2 >+ > /* >- * Data types >+ * A unified flag for all outbound check sum flags is useful, >+ * and it helps avoiding unnecessary check sum calculation in >+ * network forwarding scenario. > */ >+#define HV_CSUM_FOR_OUTBOUND \ >+ (CSUM_IP|CSUM_IP_UDP|CSUM_IP_TCP|CSUM_IP_SCTP|CSUM_IP_TSO|CSUM_IP_ISCSI| \ >+ CSUM_IP6_UDP|CSUM_IP6_TCP|CSUM_IP6_SCTP|CSUM_IP6_TSO|CSUM_IP6_ISCSI) > >-struct hv_netvsc_driver_context { >- uint32_t drv_inited; >-}; >+/* XXX move to netinet/tcp_lro.h */ >+#define HN_LRO_HIWAT_MAX 65535 >+#define HN_LRO_HIWAT_DEF HN_LRO_HIWAT_MAX >+/* YYY 2*MTU is a bit rough, but should be good enough. */ >+#define HN_LRO_HIWAT_MTULIM(ifp) (2 * (ifp)->if_mtu) >+#define HN_LRO_HIWAT_ISVALID(sc, hiwat) \ >+ ((hiwat) >= HN_LRO_HIWAT_MTULIM((sc)->hn_ifp) || \ >+ (hiwat) <= HN_LRO_HIWAT_MAX) > > /* > * Be aware that this sleepable mutex will exhibit WITNESS errors when >@@ -159,10 +204,20 @@ > > int hv_promisc_mode = 0; /* normal mode by default */ > >-/* The one and only one */ >-static struct hv_netvsc_driver_context g_netvsc_drv; >+/* Trust tcp segements verification on host side. */ >+static int hn_trust_hosttcp = 0; >+TUNABLE_INT("dev.hn.trust_hosttcp", &hn_trust_hosttcp); > >+#if __FreeBSD_version >= 1100045 >+/* Limit TSO burst size */ >+static int hn_tso_maxlen = 0; >+TUNABLE_INT("dev.hn.tso_maxlen", &hn_tso_maxlen); >+#endif > >+/* Limit chimney send size */ >+static int hn_tx_chimney_size = 0; >+TUNABLE_INT("dev.hn.tx_chimney_size", &hn_tx_chimney_size); >+ > /* > * Forward declarations > */ >@@ -170,9 +225,27 @@ > static void hn_ifinit_locked(hn_softc_t *sc); > static void hn_ifinit(void *xsc); > static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data); >-static int hn_start_locked(struct ifnet *ifp); >+static void hn_start_locked(struct ifnet *ifp); > static void hn_start(struct ifnet *ifp); >+static int hn_ifmedia_upd(struct ifnet *ifp); >+static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr); >+#ifdef HN_LRO_HIWAT >+static int hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS); >+#endif >+static int hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS); >+static int hn_check_iplen(const struct mbuf *, int); >+static int hn_create_tx_ring(struct hn_softc *sc); >+static void hn_destroy_tx_ring(struct hn_softc *sc); > >+static __inline void >+hn_set_lro_hiwat(struct hn_softc *sc, int hiwat) >+{ >+ sc->hn_lro_hiwat = hiwat; >+#ifdef HN_LRO_HIWAT >+ sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; >+#endif >+} >+ > /* > * NetVsc get message transport protocol type > */ >@@ -229,35 +302,27 @@ > return (ret_val); > } > >-/* >- * NetVsc driver initialization >- * Note: Filter init is no longer required >- */ > static int >-netvsc_drv_init(void) >+hn_ifmedia_upd(struct ifnet *ifp __unused) > { >- return (0); >+ >+ return EOPNOTSUPP; > } > >-/* >- * NetVsc global initialization entry point >- */ > static void >-netvsc_init(void) >+hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) > { >- if (bootverbose) >- printf("Netvsc initializing... "); >+ struct hn_softc *sc = ifp->if_softc; > >- /* >- * XXXKYS: cleanup initialization >- */ >- if (!cold && !g_netvsc_drv.drv_inited) { >- g_netvsc_drv.drv_inited = 1; >- netvsc_drv_init(); >- if (bootverbose) >- printf("done!\n"); >- } else if (bootverbose) >- printf("Already initialized!\n"); >+ ifmr->ifm_status = IFM_AVALID; >+ ifmr->ifm_active = IFM_ETHER; >+ >+ if (!sc->hn_carrier) { >+ ifmr->ifm_active |= IFM_NONE; >+ return; >+ } >+ ifmr->ifm_status |= IFM_ACTIVE; >+ ifmr->ifm_active |= IFM_10G_T | IFM_FDX; > } > > /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */ >@@ -300,11 +365,14 @@ > netvsc_device_info device_info; > hn_softc_t *sc; > int unit = device_get_unit(dev); >- struct ifnet *ifp; >- int ret; >+ struct ifnet *ifp = NULL; >+ struct sysctl_oid_list *child; >+ struct sysctl_ctx_list *ctx; >+ int error; >+#if __FreeBSD_version >= 1100045 >+ int tso_maxlen; >+#endif > >- netvsc_init(); >- > sc = device_get_softc(dev); > if (sc == NULL) { > return (ENOMEM); >@@ -313,7 +381,13 @@ > bzero(sc, sizeof(hn_softc_t)); > sc->hn_unit = unit; > sc->hn_dev = dev; >+ sc->hn_lro_hiwat = HN_LRO_HIWAT_DEF; >+ sc->hn_trust_hosttcp = hn_trust_hosttcp; > >+ error = hn_create_tx_ring(sc); >+ if (error) >+ goto failed; >+ > NV_LOCK_INIT(sc, "NetVSCLock"); > > sc->hn_dev_obj = device_ctx; >@@ -335,14 +409,22 @@ > ifp->if_snd.ifq_drv_maxlen = 511; > IFQ_SET_READY(&ifp->if_snd); > >+ ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts); >+ ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL); >+ ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO); >+ /* XXX ifmedia_set really should do this for us */ >+ sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media; >+ > /* > * Tell upper layers that we support full VLAN capability. > */ > ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); > ifp->if_capabilities |= >- IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO; >+ IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | >+ IFCAP_LRO; > ifp->if_capenable |= >- IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO; >+ IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | IFCAP_TSO | >+ IFCAP_LRO; > /* > * Only enable UDP checksum offloading when it is on 2012R2 or > * later. UDP checksum offloading doesn't work on earlier >@@ -353,19 +435,126 @@ > else > ifp->if_hwassist = CSUM_TCP | CSUM_TSO; > >- ret = hv_rf_on_device_add(device_ctx, &device_info); >- if (ret != 0) { >- if_free(ifp); >+ error = hv_rf_on_device_add(device_ctx, &device_info); >+ if (error) >+ goto failed; > >- return (ret); >- } > if (device_info.link_state == 0) { > sc->hn_carrier = 1; > } > >+#if defined(INET) || defined(INET6) >+ tcp_lro_init(&sc->hn_lro); >+ /* Driver private LRO settings */ >+ sc->hn_lro.ifp = ifp; >+#ifdef HN_LRO_HIWAT >+ sc->hn_lro.lro_hiwat = sc->hn_lro_hiwat; >+#endif >+#endif /* INET || INET6 */ >+ >+#if __FreeBSD_version >= 1100045 >+ tso_maxlen = hn_tso_maxlen; >+ if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET) >+ tso_maxlen = IP_MAXPACKET; >+ >+ ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX; >+ ifp->if_hw_tsomaxsegsize = PAGE_SIZE; >+ ifp->if_hw_tsomax = tso_maxlen - >+ (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); >+#endif >+ > ether_ifattach(ifp, device_info.mac_addr); > >+#if __FreeBSD_version >= 1100045 >+ if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax, >+ ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); >+#endif >+ >+ sc->hn_tx_chimney_max = sc->net_dev->send_section_size; >+ sc->hn_tx_chimney_size = sc->hn_tx_chimney_max; >+ if (hn_tx_chimney_size > 0 && >+ hn_tx_chimney_size < sc->hn_tx_chimney_max) >+ sc->hn_tx_chimney_size = hn_tx_chimney_size; >+ >+ ctx = device_get_sysctl_ctx(dev); >+ child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); >+ >+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_queued", >+ CTLFLAG_RW, &sc->hn_lro.lro_queued, 0, "LRO queued"); >+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "lro_flushed", >+ CTLFLAG_RW, &sc->hn_lro.lro_flushed, 0, "LRO flushed"); >+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "lro_tried", >+ CTLFLAG_RW, &sc->hn_lro_tried, "# of LRO tries"); >+#ifdef HN_LRO_HIWAT >+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_hiwat", >+ CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_lro_hiwat_sysctl, >+ "I", "LRO high watermark"); >+#endif >+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "trust_hosttcp", >+ CTLFLAG_RW, &sc->hn_trust_hosttcp, 0, >+ "Trust tcp segement verification on host side, " >+ "when csum info is missing"); >+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_ip", >+ CTLFLAG_RW, &sc->hn_csum_ip, "RXCSUM IP"); >+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_tcp", >+ CTLFLAG_RW, &sc->hn_csum_tcp, "RXCSUM TCP"); >+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "csum_trusted", >+ CTLFLAG_RW, &sc->hn_csum_trusted, >+ "# of TCP segements that we trust host's csum verification"); >+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "small_pkts", >+ CTLFLAG_RW, &sc->hn_small_pkts, "# of small packets received"); >+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "no_txdescs", >+ CTLFLAG_RW, &sc->hn_no_txdescs, "# of times short of TX descs"); >+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "send_failed", >+ CTLFLAG_RW, &sc->hn_send_failed, "# of hyper-v sending failure"); >+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "txdma_failed", >+ CTLFLAG_RW, &sc->hn_txdma_failed, "# of TX DMA failure"); >+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_collapsed", >+ CTLFLAG_RW, &sc->hn_tx_collapsed, "# of TX mbuf collapsed"); >+ SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_chimney", >+ CTLFLAG_RW, &sc->hn_tx_chimney, "# of chimney send"); >+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt", >+ CTLFLAG_RD, &sc->hn_txdesc_cnt, 0, "# of total TX descs"); >+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail", >+ CTLFLAG_RD, &sc->hn_txdesc_avail, 0, "# of available TX descs"); >+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max", >+ CTLFLAG_RD, &sc->hn_tx_chimney_max, 0, >+ "Chimney send packet size upper boundary"); >+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size", >+ CTLTYPE_INT | CTLFLAG_RW, sc, 0, hn_tx_chimney_size_sysctl, >+ "I", "Chimney send packet size limit"); >+ >+ if (unit == 0) { >+ struct sysctl_ctx_list *dc_ctx; >+ struct sysctl_oid_list *dc_child; >+ devclass_t dc; >+ >+ /* >+ * Add sysctl nodes for devclass >+ */ >+ dc = device_get_devclass(dev); >+ dc_ctx = devclass_get_sysctl_ctx(dc); >+ dc_child = SYSCTL_CHILDREN(devclass_get_sysctl_tree(dc)); >+ >+ SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "trust_hosttcp", >+ CTLFLAG_RD, &hn_trust_hosttcp, 0, >+ "Trust tcp segement verification on host side, " >+ "when csum info is missing (global setting)"); >+ SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "tx_chimney_size", >+ CTLFLAG_RD, &hn_tx_chimney_size, 0, >+ "Chimney send packet size limit"); >+#if __FreeBSD_version >= 1100045 >+ SYSCTL_ADD_INT(dc_ctx, dc_child, OID_AUTO, "tso_maxlen", >+ CTLFLAG_RD, &hn_tso_maxlen, 0, "TSO burst limit"); >+#endif >+ } >+ > return (0); >+failed: >+ hn_destroy_tx_ring(sc); >+ if (ifp != NULL) >+ if_free(ifp); >+ return (error); > } > > /* >@@ -374,6 +563,7 @@ > static int > netvsc_detach(device_t dev) > { >+ struct hn_softc *sc = device_get_softc(dev); > struct hv_device *hv_device = vmbus_get_devctx(dev); > > if (bootverbose) >@@ -392,6 +582,12 @@ > > hv_rf_on_device_remove(hv_device, HV_RF_NV_DESTROY_CHANNEL); > >+ ifmedia_removeall(&sc->hn_media); >+#if defined(INET) || defined(INET6) >+ tcp_lro_free(&sc->hn_lro); >+#endif >+ hn_destroy_tx_ring(sc); >+ > return (0); > } > >@@ -404,6 +600,112 @@ > return (0); > } > >+static __inline int >+hn_txdesc_dmamap_load(struct hn_softc *sc, struct hn_txdesc *txd, >+ struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs) >+{ >+ struct mbuf *m = *m_head; >+ int error; >+ >+ error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag, txd->data_dmap, >+ m, segs, nsegs, BUS_DMA_NOWAIT); >+ if (error == EFBIG) { >+ struct mbuf *m_new; >+ >+ m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX); >+ if (m_new == NULL) >+ return ENOBUFS; >+ else >+ *m_head = m = m_new; >+ sc->hn_tx_collapsed++; >+ >+ error = bus_dmamap_load_mbuf_sg(sc->hn_tx_data_dtag, >+ txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT); >+ } >+ if (!error) { >+ bus_dmamap_sync(sc->hn_tx_data_dtag, txd->data_dmap, >+ BUS_DMASYNC_PREWRITE); >+ txd->flags |= HN_TXD_FLAG_DMAMAP; >+ } >+ return error; >+} >+ >+static __inline void >+hn_txdesc_dmamap_unload(struct hn_softc *sc, struct hn_txdesc *txd) >+{ >+ >+ if (txd->flags & HN_TXD_FLAG_DMAMAP) { >+ bus_dmamap_sync(sc->hn_tx_data_dtag, >+ txd->data_dmap, BUS_DMASYNC_POSTWRITE); >+ bus_dmamap_unload(sc->hn_tx_data_dtag, >+ txd->data_dmap); >+ txd->flags &= ~HN_TXD_FLAG_DMAMAP; >+ } >+} >+ >+static __inline int >+hn_txdesc_put(struct hn_softc *sc, struct hn_txdesc *txd) >+{ >+ >+ KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0, >+ ("put an onlist txd %#x", txd->flags)); >+ >+ KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs)); >+ if (atomic_fetchadd_int(&txd->refs, -1) != 1) >+ return 0; >+ >+ hn_txdesc_dmamap_unload(sc, txd); >+ if (txd->m != NULL) { >+ m_freem(txd->m); >+ txd->m = NULL; >+ } >+ >+ txd->flags |= HN_TXD_FLAG_ONLIST; >+ >+ mtx_lock_spin(&sc->hn_txlist_spin); >+ KASSERT(sc->hn_txdesc_avail >= 0 && >+ sc->hn_txdesc_avail < sc->hn_txdesc_cnt, >+ ("txdesc_put: invalid txd avail %d", sc->hn_txdesc_avail)); >+ sc->hn_txdesc_avail++; >+ SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link); >+ mtx_unlock_spin(&sc->hn_txlist_spin); >+ >+ return 1; >+} >+ >+static __inline struct hn_txdesc * >+hn_txdesc_get(struct hn_softc *sc) >+{ >+ struct hn_txdesc *txd; >+ >+ mtx_lock_spin(&sc->hn_txlist_spin); >+ txd = SLIST_FIRST(&sc->hn_txlist); >+ if (txd != NULL) { >+ KASSERT(sc->hn_txdesc_avail > 0, >+ ("txdesc_get: invalid txd avail %d", sc->hn_txdesc_avail)); >+ sc->hn_txdesc_avail--; >+ SLIST_REMOVE_HEAD(&sc->hn_txlist, link); >+ } >+ mtx_unlock_spin(&sc->hn_txlist_spin); >+ >+ if (txd != NULL) { >+ KASSERT(txd->m == NULL && txd->refs == 0 && >+ (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd")); >+ txd->flags &= ~HN_TXD_FLAG_ONLIST; >+ txd->refs = 1; >+ } >+ return txd; >+} >+ >+static __inline void >+hn_txdesc_hold(struct hn_txdesc *txd) >+{ >+ >+ /* 0->1 transition will never work */ >+ KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs)); >+ atomic_add_int(&txd->refs, 1); >+} >+ > /* > * Send completion processing > * >@@ -414,34 +716,46 @@ > void > netvsc_xmit_completion(void *context) > { >- netvsc_packet *packet = (netvsc_packet *)context; >- struct mbuf *mb; >- uint8_t *buf; >+ netvsc_packet *packet = context; >+ struct hn_txdesc *txd; >+ struct hn_softc *sc; > >- mb = (struct mbuf *)(uintptr_t)packet->compl.send.send_completion_tid; >- buf = ((uint8_t *)packet) - HV_NV_PACKET_OFFSET_IN_BUF; >+ txd = (struct hn_txdesc *)(uintptr_t) >+ packet->compl.send.send_completion_tid; > >- free(buf, M_NETVSC); >+ sc = txd->sc; >+ sc->hn_txeof = 1; >+ hn_txdesc_put(sc, txd); >+} > >- if (mb != NULL) { >- m_freem(mb); >- } >+void >+netvsc_channel_rollup(struct hv_device *device_ctx) >+{ >+ struct hn_softc *sc = device_get_softc(device_ctx->device); >+ struct ifnet *ifp; >+ >+ if (!sc->hn_txeof) >+ return; >+ >+ sc->hn_txeof = 0; >+ ifp = sc->hn_ifp; >+ NV_LOCK(sc); >+ ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; >+ hn_start_locked(ifp); >+ NV_UNLOCK(sc); > } > > /* > * Start a transmit of one or more packets > */ >-static int >+static void > hn_start_locked(struct ifnet *ifp) > { > hn_softc_t *sc = ifp->if_softc; > struct hv_device *device_ctx = vmbus_get_devctx(sc->hn_dev); > netvsc_dev *net_dev = sc->net_dev; >- device_t dev = device_ctx->device; >- uint8_t *buf; > netvsc_packet *packet; > struct mbuf *m_head, *m; >- struct mbuf *mc_head = NULL; > struct ether_vlan_header *eh; > rndis_msg *rndis_mesg; > rndis_packet *rndis_pkt; >@@ -450,84 +764,40 @@ > rndis_tcp_ip_csum_info *csum_info; > rndis_tcp_tso_info *tso_info; > int ether_len; >- int i; >- int num_frags; >- int len; >- int retries = 0; >- int ret = 0; > uint32_t rndis_msg_size = 0; > uint32_t trans_proto_type; > uint32_t send_buf_section_idx = > NVSP_1_CHIMNEY_SEND_INVALID_SECTION_INDEX; > >- while (!IFQ_DRV_IS_EMPTY(&sc->hn_ifp->if_snd)) { >- IFQ_DRV_DEQUEUE(&sc->hn_ifp->if_snd, m_head); >- if (m_head == NULL) { >- break; >- } >+ if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != >+ IFF_DRV_RUNNING) >+ return; > >- len = 0; >- num_frags = 0; >+ while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { >+ bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX]; >+ int error, nsegs, i, send_failed = 0; >+ struct hn_txdesc *txd; > >- /* Walk the mbuf list computing total length and num frags */ >- for (m = m_head; m != NULL; m = m->m_next) { >- if (m->m_len != 0) { >- num_frags++; >- len += m->m_len; >- } >- } >+ IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head); >+ if (m_head == NULL) >+ break; > >- /* >- * Reserve the number of pages requested. Currently, >- * one page is reserved for the message in the RNDIS >- * filter packet >- */ >- num_frags += HV_RF_NUM_TX_RESERVED_PAGE_BUFS; >- >- /* If exceeds # page_buffers in netvsc_packet */ >- if (num_frags > NETVSC_PACKET_MAXPAGE) { >- device_printf(dev, "exceed max page buffers,%d,%d\n", >- num_frags, NETVSC_PACKET_MAXPAGE); >- m_freem(m_head); >- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); >- return (EINVAL); >+ txd = hn_txdesc_get(sc); >+ if (txd == NULL) { >+ sc->hn_no_txdescs++; >+ IF_PREPEND(&ifp->if_snd, m_head); >+ ifp->if_drv_flags |= IFF_DRV_OACTIVE; >+ break; > } > >- /* >- * Allocate a buffer with space for a netvsc packet plus a >- * number of reserved areas. First comes a (currently 16 >- * bytes, currently unused) reserved data area. Second is >- * the netvsc_packet. Third is an area reserved for an >- * rndis_filter_packet struct. Fourth (optional) is a >- * rndis_per_packet_info struct. >- * Changed malloc to M_NOWAIT to avoid sleep under spin lock. >- * No longer reserving extra space for page buffers, as they >- * are already part of the netvsc_packet. >- */ >- buf = malloc(HV_NV_PACKET_OFFSET_IN_BUF + >- sizeof(netvsc_packet) + >- sizeof(rndis_msg) + >- RNDIS_VLAN_PPI_SIZE + >- RNDIS_TSO_PPI_SIZE + >- RNDIS_CSUM_PPI_SIZE, >- M_NETVSC, M_ZERO | M_NOWAIT); >- if (buf == NULL) { >- device_printf(dev, "hn:malloc packet failed\n"); >- m_freem(m_head); >- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); >- return (ENOMEM); >- } >+ packet = &txd->netvsc_pkt; >+ /* XXX not necessary */ >+ memset(packet, 0, sizeof(*packet)); > >- packet = (netvsc_packet *)(buf + HV_NV_PACKET_OFFSET_IN_BUF); >- *(vm_offset_t *)buf = HV_NV_SC_PTR_OFFSET_IN_BUF; >- > packet->is_data_pkt = TRUE; > >- /* Set up the rndis header */ >- packet->page_buf_count = num_frags; >- > /* Initialize it from the mbuf */ >- packet->tot_data_buf_len = len; >+ packet->tot_data_buf_len = m_head->m_pkthdr.len; > > /* > * extension points to the area reserved for the >@@ -535,8 +805,9 @@ > * the netvsc_packet (and rppi struct, if present; > * length is updated later). > */ >- packet->rndis_mesg = packet + 1; >- rndis_mesg = (rndis_msg *)packet->rndis_mesg; >+ rndis_mesg = txd->rndis_msg; >+ /* XXX not necessary */ >+ memset(rndis_mesg, 0, HN_RNDIS_MSG_LEN); > rndis_mesg->ndis_msg_type = REMOTE_NDIS_PACKET_MSG; > > rndis_pkt = &rndis_mesg->msg.packet; >@@ -555,8 +826,6 @@ > * set up some additional fields so the Hyper-V infrastructure will stuff the VLAN tag > * into the frame. > */ >- packet->vlan_tci = m_head->m_pkthdr.ether_vtag; >- > rndis_msg_size += RNDIS_VLAN_PPI_SIZE; > > rppi = hv_set_rppi_data(rndis_mesg, RNDIS_VLAN_PPI_SIZE, >@@ -567,10 +836,11 @@ > rppi->per_packet_info_offset); > /* FreeBSD does not support CFI or priority */ > rppi_vlan_info->u1.s1.vlan_id = >- packet->vlan_tci & 0xfff; >+ m_head->m_pkthdr.ether_vtag & 0xfff; > } > >- if (0 == m_head->m_pkthdr.csum_flags) { >+ /* Only check the flags for outbount and ignore the ones for inbount */ >+ if (0 == (m_head->m_pkthdr.csum_flags & HV_CSUM_FOR_OUTBOUND)) { > goto pre_send; > } > >@@ -668,7 +938,7 @@ > packet->tot_data_buf_len = rndis_mesg->msg_len; > > /* send packet with send buffer */ >- if (packet->tot_data_buf_len < net_dev->send_section_size) { >+ if (packet->tot_data_buf_len < sc->hn_tx_chimney_size) { > send_buf_section_idx = > hv_nv_get_next_send_section(net_dev); > if (send_buf_section_idx != >@@ -693,15 +963,36 @@ > packet->send_buf_section_size = > packet->tot_data_buf_len; > packet->page_buf_count = 0; >+ sc->hn_tx_chimney++; > goto do_send; > } > } > >+ error = hn_txdesc_dmamap_load(sc, txd, &m_head, segs, &nsegs); >+ if (error) { >+ int freed; >+ >+ /* >+ * This mbuf is not linked w/ the txd yet, so free >+ * it now. >+ */ >+ m_freem(m_head); >+ freed = hn_txdesc_put(sc, txd); >+ KASSERT(freed != 0, >+ ("fail to free txd upon txdma error")); >+ >+ sc->hn_txdma_failed++; >+ if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); >+ continue; >+ } >+ >+ packet->page_buf_count = nsegs + >+ HV_RF_NUM_TX_RESERVED_PAGE_BUFS; >+ > /* send packet with page buffer */ >- packet->page_buffers[0].pfn = >- atop(hv_get_phys_addr(rndis_mesg)); >+ packet->page_buffers[0].pfn = atop(txd->rndis_msg_paddr); > packet->page_buffers[0].offset = >- (unsigned long)rndis_mesg & PAGE_MASK; >+ txd->rndis_msg_paddr & PAGE_MASK; > packet->page_buffers[0].length = rndis_msg_size; > > /* >@@ -708,18 +999,13 @@ > * Fill the page buffers with mbuf info starting at index > * HV_RF_NUM_TX_RESERVED_PAGE_BUFS. > */ >- i = HV_RF_NUM_TX_RESERVED_PAGE_BUFS; >- for (m = m_head; m != NULL; m = m->m_next) { >- if (m->m_len) { >- vm_offset_t paddr = >- vtophys(mtod(m, vm_offset_t)); >- packet->page_buffers[i].pfn = >- paddr >> PAGE_SHIFT; >- packet->page_buffers[i].offset = >- paddr & (PAGE_SIZE - 1); >- packet->page_buffers[i].length = m->m_len; >- i++; >- } >+ for (i = 0; i < nsegs; ++i) { >+ hv_vmbus_page_buffer *pb = &packet->page_buffers[ >+ i + HV_RF_NUM_TX_RESERVED_PAGE_BUFS]; >+ >+ pb->pfn = atop(segs[i].ds_addr); >+ pb->offset = segs[i].ds_addr & PAGE_MASK; >+ pb->length = segs[i].ds_len; > } > > packet->send_buf_section_idx = >@@ -727,63 +1013,64 @@ > packet->send_buf_section_size = 0; > > do_send: >+ txd->m = m_head; > >+ /* Set the completion routine */ >+ packet->compl.send.on_send_completion = netvsc_xmit_completion; >+ packet->compl.send.send_completion_context = packet; >+ packet->compl.send.send_completion_tid = >+ (uint64_t)(uintptr_t)txd; >+again: > /* >- * If bpf, copy the mbuf chain. This is less expensive than >- * it appears; the mbuf clusters are not copied, only their >- * reference counts are incremented. >- * Needed to avoid a race condition where the completion >- * callback is invoked, freeing the mbuf chain, before the >- * bpf_mtap code has a chance to run. >+ * Make sure that txd is not freed before ETHER_BPF_MTAP. > */ >- if (ifp->if_bpf) { >- mc_head = m_copypacket(m_head, M_DONTWAIT); >+ hn_txdesc_hold(txd); >+ error = hv_nv_on_send(device_ctx, packet); >+ if (!error) { >+ ETHER_BPF_MTAP(ifp, m_head); >+ if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); > } >-retry_send: >- /* Set the completion routine */ >- packet->compl.send.on_send_completion = netvsc_xmit_completion; >- packet->compl.send.send_completion_context = packet; >- packet->compl.send.send_completion_tid = (uint64_t)(uintptr_t)m_head; >+ hn_txdesc_put(sc, txd); > >- /* Removed critical_enter(), does not appear necessary */ >- ret = hv_nv_on_send(device_ctx, packet); >- if (ret == 0) { >- ifp->if_opackets++; >- /* if bpf && mc_head, call bpf_mtap code */ >- if (mc_head) { >- ETHER_BPF_MTAP(ifp, mc_head); >- } >- } else { >- retries++; >- if (retries < 4) { >- goto retry_send; >- } >+ if (__predict_false(error)) { >+ int freed; > >- IF_PREPEND(&ifp->if_snd, m_head); >- ifp->if_drv_flags |= IFF_DRV_OACTIVE; >- > /* >- * Null the mbuf pointer so the completion function >- * does not free the mbuf chain. We just pushed the >- * mbuf chain back on the if_snd queue. >+ * This should "really rarely" happen. >+ * >+ * XXX Too many RX to be acked or too many sideband >+ * commands to run? Ask netvsc_channel_rollup() >+ * to kick start later. > */ >- packet->compl.send.send_completion_tid = 0; >+ sc->hn_txeof = 1; >+ if (!send_failed) { >+ sc->hn_send_failed++; >+ send_failed = 1; >+ /* >+ * Try sending again after set hn_txeof; >+ * in case that we missed the last >+ * netvsc_channel_rollup(). >+ */ >+ goto again; >+ } >+ if_printf(ifp, "send failed\n"); > > /* >- * Release the resources since we will not get any >- * send completion >+ * This mbuf will be prepended, don't free it >+ * in hn_txdesc_put(); only unload it from the >+ * DMA map in hn_txdesc_put(), if it was loaded. > */ >- netvsc_xmit_completion(packet); >- if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); >- } >+ txd->m = NULL; >+ freed = hn_txdesc_put(sc, txd); >+ KASSERT(freed != 0, >+ ("fail to free txd upon send error")); > >- /* if bpf && mc_head, free the mbuf chain copy */ >- if (mc_head) { >- m_freem(mc_head); >+ sc->hn_send_failed++; >+ IF_PREPEND(&ifp->if_snd, m_head); >+ ifp->if_drv_flags |= IFF_DRV_OACTIVE; >+ break; > } > } >- >- return (ret); > } > > /* >@@ -877,7 +1164,7 @@ > struct mbuf *m_new; > struct ifnet *ifp; > device_t dev = device_ctx->device; >- int size; >+ int size, do_lro = 0; > > if (sc == NULL) { > return (0); /* TODO: KYS how can this be! */ >@@ -896,40 +1183,44 @@ > */ > if (packet->tot_data_buf_len > (ifp->if_mtu + ETHER_HDR_LEN)) { > return (0); >- } >+ } else if (packet->tot_data_buf_len <= MHLEN) { >+ m_new = m_gethdr(M_NOWAIT, MT_DATA); >+ if (m_new == NULL) >+ return (0); >+ memcpy(mtod(m_new, void *), packet->data, >+ packet->tot_data_buf_len); >+ m_new->m_pkthdr.len = m_new->m_len = packet->tot_data_buf_len; >+ sc->hn_small_pkts++; >+ } else { >+ /* >+ * Get an mbuf with a cluster. For packets 2K or less, >+ * get a standard 2K cluster. For anything larger, get a >+ * 4K cluster. Any buffers larger than 4K can cause problems >+ * if looped around to the Hyper-V TX channel, so avoid them. >+ */ >+ size = MCLBYTES; >+ if (packet->tot_data_buf_len > MCLBYTES) { >+ /* 4096 */ >+ size = MJUMPAGESIZE; >+ } > >- /* >- * Get an mbuf with a cluster. For packets 2K or less, >- * get a standard 2K cluster. For anything larger, get a >- * 4K cluster. Any buffers larger than 4K can cause problems >- * if looped around to the Hyper-V TX channel, so avoid them. >- */ >- size = MCLBYTES; >+ m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size); >+ if (m_new == NULL) { >+ device_printf(dev, "alloc mbuf failed.\n"); >+ return (0); >+ } > >- if (packet->tot_data_buf_len > MCLBYTES) { >- /* 4096 */ >- size = MJUMPAGESIZE; >+ hv_m_append(m_new, packet->tot_data_buf_len, packet->data); > } >- >- m_new = m_getjcl(M_DONTWAIT, MT_DATA, M_PKTHDR, size); >- >- if (m_new == NULL) { >- device_printf(dev, "alloc mbuf failed.\n"); >- return (0); >- } >- >- hv_m_append(m_new, packet->tot_data_buf_len, >- packet->data); >- > m_new->m_pkthdr.rcvif = ifp; > > /* receive side checksum offload */ >- m_new->m_pkthdr.csum_flags = 0; > if (NULL != csum_info) { > /* IP csum offload */ > if (csum_info->receive.ip_csum_succeeded) { > m_new->m_pkthdr.csum_flags |= > (CSUM_IP_CHECKED | CSUM_IP_VALID); >+ sc->hn_csum_ip++; > } > > /* TCP csum offload */ >@@ -937,9 +1228,50 @@ > m_new->m_pkthdr.csum_flags |= > (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); > m_new->m_pkthdr.csum_data = 0xffff; >+ sc->hn_csum_tcp++; > } >+ >+ if (csum_info->receive.ip_csum_succeeded && >+ csum_info->receive.tcp_csum_succeeded) >+ do_lro = 1; >+ } else { >+ const struct ether_header *eh; >+ uint16_t etype; >+ int hoff; >+ >+ hoff = sizeof(*eh); >+ if (m_new->m_len < hoff) >+ goto skip; >+ eh = mtod(m_new, struct ether_header *); >+ etype = ntohs(eh->ether_type); >+ if (etype == ETHERTYPE_VLAN) { >+ const struct ether_vlan_header *evl; >+ >+ hoff = sizeof(*evl); >+ if (m_new->m_len < hoff) >+ goto skip; >+ evl = mtod(m_new, struct ether_vlan_header *); >+ etype = ntohs(evl->evl_proto); >+ } >+ >+ if (etype == ETHERTYPE_IP) { >+ int pr; >+ >+ pr = hn_check_iplen(m_new, hoff); >+ if (pr == IPPROTO_TCP) { >+ if (sc->hn_trust_hosttcp) { >+ sc->hn_csum_trusted++; >+ m_new->m_pkthdr.csum_flags |= >+ (CSUM_IP_CHECKED | CSUM_IP_VALID | >+ CSUM_DATA_VALID | CSUM_PSEUDO_HDR); >+ m_new->m_pkthdr.csum_data = 0xffff; >+ } >+ /* Rely on SW csum verification though... */ >+ do_lro = 1; >+ } >+ } > } >- >+skip: > if ((packet->vlan_tci != 0) && > (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0) { > m_new->m_pkthdr.ether_vtag = packet->vlan_tci; >@@ -953,6 +1285,20 @@ > > ifp->if_ipackets++; > >+ if ((ifp->if_capenable & IFCAP_LRO) && do_lro) { >+#if defined(INET) || defined(INET6) >+ struct lro_ctrl *lro = &sc->hn_lro; >+ >+ if (lro->lro_cnt) { >+ sc->hn_lro_tried++; >+ if (tcp_lro_rx(lro, m_new, 0) == 0) { >+ /* DONE! */ >+ return 0; >+ } >+ } >+#endif >+ } >+ > /* We're not holding the lock here, so don't release it */ > (*ifp->if_input)(ifp, m_new); > >@@ -959,6 +1305,21 @@ > return (0); > } > >+void >+netvsc_recv_rollup(struct hv_device *device_ctx) >+{ >+#if defined(INET) || defined(INET6) >+ hn_softc_t *sc = device_get_softc(device_ctx->device); >+ struct lro_ctrl *lro = &sc->hn_lro; >+ struct lro_entry *queued; >+ >+ while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) { >+ SLIST_REMOVE_HEAD(&lro->lro_active, next); >+ tcp_lro_flush(lro, queued); >+ } >+#endif >+} >+ > /* > * Rules for using sc->temp_unusable: > * 1. sc->temp_unusable can only be read or written while holding NV_LOCK() >@@ -1014,7 +1375,13 @@ > > /* Obtain and record requested MTU */ > ifp->if_mtu = ifr->ifr_mtu; >- >+ /* >+ * Make sure that LRO high watermark is still valid, >+ * after MTU change (the 2*MTU limit). >+ */ >+ if (!HN_LRO_HIWAT_ISVALID(sc, sc->hn_lro_hiwat)) >+ hn_set_lro_hiwat(sc, HN_LRO_HIWAT_MTULIM(ifp)); >+ > do { > NV_LOCK(sc); > if (!sc->temp_unusable) { >@@ -1052,6 +1419,9 @@ > break; > } > >+ sc->hn_tx_chimney_max = sc->net_dev->send_section_size; >+ if (sc->hn_tx_chimney_size > sc->hn_tx_chimney_max) >+ sc->hn_tx_chimney_size = sc->hn_tx_chimney_max; > hn_ifinit_locked(sc); > > NV_LOCK(sc); >@@ -1139,6 +1509,8 @@ > ifp->if_capenable |= IFCAP_RXCSUM; > } > } >+ if (mask & IFCAP_LRO) >+ ifp->if_capenable ^= IFCAP_LRO; > > if (mask & IFCAP_TSO4) { > ifp->if_capenable ^= IFCAP_TSO4; >@@ -1163,10 +1535,11 @@ > error = 0; > } > #endif >- /* FALLTHROUGH */ >+ error = EINVAL; >+ break; > case SIOCSIFMEDIA: > case SIOCGIFMEDIA: >- error = EINVAL; >+ error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd); > break; > default: > error = ether_ioctl(ifp, cmd, data); >@@ -1284,6 +1657,265 @@ > } > #endif > >+#ifdef HN_LRO_HIWAT >+static int >+hn_lro_hiwat_sysctl(SYSCTL_HANDLER_ARGS) >+{ >+ struct hn_softc *sc = arg1; >+ int hiwat, error; >+ >+ hiwat = sc->hn_lro_hiwat; >+ error = sysctl_handle_int(oidp, &hiwat, 0, req); >+ if (error || req->newptr == NULL) >+ return error; >+ >+ if (!HN_LRO_HIWAT_ISVALID(sc, hiwat)) >+ return EINVAL; >+ >+ if (sc->hn_lro_hiwat != hiwat) >+ hn_set_lro_hiwat(sc, hiwat); >+ return 0; >+} >+#endif /* HN_LRO_HIWAT */ >+ >+static int >+hn_tx_chimney_size_sysctl(SYSCTL_HANDLER_ARGS) >+{ >+ struct hn_softc *sc = arg1; >+ int chimney_size, error; >+ >+ chimney_size = sc->hn_tx_chimney_size; >+ error = sysctl_handle_int(oidp, &chimney_size, 0, req); >+ if (error || req->newptr == NULL) >+ return error; >+ >+ if (chimney_size > sc->hn_tx_chimney_max || chimney_size <= 0) >+ return EINVAL; >+ >+ if (sc->hn_tx_chimney_size != chimney_size) >+ sc->hn_tx_chimney_size = chimney_size; >+ return 0; >+} >+ >+static int >+hn_check_iplen(const struct mbuf *m, int hoff) >+{ >+ const struct ip *ip; >+ int len, iphlen, iplen; >+ const struct tcphdr *th; >+ int thoff; /* TCP data offset */ >+ >+ len = hoff + sizeof(struct ip); >+ >+ /* The packet must be at least the size of an IP header. */ >+ if (m->m_pkthdr.len < len) >+ return IPPROTO_DONE; >+ >+ /* The fixed IP header must reside completely in the first mbuf. */ >+ if (m->m_len < len) >+ return IPPROTO_DONE; >+ >+ ip = mtodo(m, hoff); >+ >+ /* Bound check the packet's stated IP header length. */ >+ iphlen = ip->ip_hl << 2; >+ if (iphlen < sizeof(struct ip)) /* minimum header length */ >+ return IPPROTO_DONE; >+ >+ /* The full IP header must reside completely in the one mbuf. */ >+ if (m->m_len < hoff + iphlen) >+ return IPPROTO_DONE; >+ >+ iplen = ntohs(ip->ip_len); >+ >+ /* >+ * Check that the amount of data in the buffers is as >+ * at least much as the IP header would have us expect. >+ */ >+ if (m->m_pkthdr.len < hoff + iplen) >+ return IPPROTO_DONE; >+ >+ /* >+ * Ignore IP fragments. >+ */ >+ if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF)) >+ return IPPROTO_DONE; >+ >+ /* >+ * The TCP/IP or UDP/IP header must be entirely contained within >+ * the first fragment of a packet. >+ */ >+ switch (ip->ip_p) { >+ case IPPROTO_TCP: >+ if (iplen < iphlen + sizeof(struct tcphdr)) >+ return IPPROTO_DONE; >+ if (m->m_len < hoff + iphlen + sizeof(struct tcphdr)) >+ return IPPROTO_DONE; >+ th = (const struct tcphdr *)((const uint8_t *)ip + iphlen); >+ thoff = th->th_off << 2; >+ if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen) >+ return IPPROTO_DONE; >+ if (m->m_len < hoff + iphlen + thoff) >+ return IPPROTO_DONE; >+ break; >+ case IPPROTO_UDP: >+ if (iplen < iphlen + sizeof(struct udphdr)) >+ return IPPROTO_DONE; >+ if (m->m_len < hoff + iphlen + sizeof(struct udphdr)) >+ return IPPROTO_DONE; >+ break; >+ default: >+ if (iplen < iphlen) >+ return IPPROTO_DONE; >+ break; >+ } >+ return ip->ip_p; >+} >+ >+static void >+hn_dma_map_paddr(void *arg, bus_dma_segment_t *segs, int nseg, int error) >+{ >+ bus_addr_t *paddr = arg; >+ >+ if (error) >+ return; >+ >+ KASSERT(nseg == 1, ("too many segments %d!", nseg)); >+ *paddr = segs->ds_addr; >+} >+ >+static int >+hn_create_tx_ring(struct hn_softc *sc) >+{ >+ bus_dma_tag_t parent_dtag; >+ int error, i; >+ >+ sc->hn_txdesc_cnt = HN_TX_DESC_CNT; >+ sc->hn_txdesc = malloc(sizeof(struct hn_txdesc) * sc->hn_txdesc_cnt, >+ M_NETVSC, M_WAITOK | M_ZERO); >+ SLIST_INIT(&sc->hn_txlist); >+ mtx_init(&sc->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN); >+ >+ parent_dtag = bus_get_dma_tag(sc->hn_dev); >+ >+ /* DMA tag for RNDIS messages. */ >+ error = bus_dma_tag_create(parent_dtag, /* parent */ >+ HN_RNDIS_MSG_ALIGN, /* alignment */ >+ HN_RNDIS_MSG_BOUNDARY, /* boundary */ >+ BUS_SPACE_MAXADDR, /* lowaddr */ >+ BUS_SPACE_MAXADDR, /* highaddr */ >+ NULL, NULL, /* filter, filterarg */ >+ HN_RNDIS_MSG_LEN, /* maxsize */ >+ 1, /* nsegments */ >+ HN_RNDIS_MSG_LEN, /* maxsegsize */ >+ 0, /* flags */ >+ NULL, /* lockfunc */ >+ NULL, /* lockfuncarg */ >+ &sc->hn_tx_rndis_dtag); >+ if (error) { >+ device_printf(sc->hn_dev, "failed to create rndis dmatag\n"); >+ return error; >+ } >+ >+ /* DMA tag for data. */ >+ error = bus_dma_tag_create(parent_dtag, /* parent */ >+ 1, /* alignment */ >+ HN_TX_DATA_BOUNDARY, /* boundary */ >+ BUS_SPACE_MAXADDR, /* lowaddr */ >+ BUS_SPACE_MAXADDR, /* highaddr */ >+ NULL, NULL, /* filter, filterarg */ >+ HN_TX_DATA_MAXSIZE, /* maxsize */ >+ HN_TX_DATA_SEGCNT_MAX, /* nsegments */ >+ HN_TX_DATA_SEGSIZE, /* maxsegsize */ >+ 0, /* flags */ >+ NULL, /* lockfunc */ >+ NULL, /* lockfuncarg */ >+ &sc->hn_tx_data_dtag); >+ if (error) { >+ device_printf(sc->hn_dev, "failed to create data dmatag\n"); >+ return error; >+ } >+ >+ for (i = 0; i < sc->hn_txdesc_cnt; ++i) { >+ struct hn_txdesc *txd = &sc->hn_txdesc[i]; >+ >+ txd->sc = sc; >+ >+ /* >+ * Allocate and load RNDIS messages. >+ */ >+ error = bus_dmamem_alloc(sc->hn_tx_rndis_dtag, >+ (void **)&txd->rndis_msg, >+ BUS_DMA_WAITOK | BUS_DMA_COHERENT, >+ &txd->rndis_msg_dmap); >+ if (error) { >+ device_printf(sc->hn_dev, >+ "failed to allocate rndis_msg, %d\n", i); >+ return error; >+ } >+ >+ error = bus_dmamap_load(sc->hn_tx_rndis_dtag, >+ txd->rndis_msg_dmap, >+ txd->rndis_msg, HN_RNDIS_MSG_LEN, >+ hn_dma_map_paddr, &txd->rndis_msg_paddr, >+ BUS_DMA_NOWAIT); >+ if (error) { >+ device_printf(sc->hn_dev, >+ "failed to load rndis_msg, %d\n", i); >+ bus_dmamem_free(sc->hn_tx_rndis_dtag, >+ txd->rndis_msg, txd->rndis_msg_dmap); >+ return error; >+ } >+ >+ /* DMA map for TX data. */ >+ error = bus_dmamap_create(sc->hn_tx_data_dtag, 0, >+ &txd->data_dmap); >+ if (error) { >+ device_printf(sc->hn_dev, >+ "failed to allocate tx data dmamap\n"); >+ bus_dmamap_unload(sc->hn_tx_rndis_dtag, >+ txd->rndis_msg_dmap); >+ bus_dmamem_free(sc->hn_tx_rndis_dtag, >+ txd->rndis_msg, txd->rndis_msg_dmap); >+ return error; >+ } >+ >+ /* All set, put it to list */ >+ txd->flags |= HN_TXD_FLAG_ONLIST; >+ SLIST_INSERT_HEAD(&sc->hn_txlist, txd, link); >+ } >+ sc->hn_txdesc_avail = sc->hn_txdesc_cnt; >+ >+ return 0; >+} >+ >+static void >+hn_destroy_tx_ring(struct hn_softc *sc) >+{ >+ struct hn_txdesc *txd; >+ >+ while ((txd = SLIST_FIRST(&sc->hn_txlist)) != NULL) { >+ KASSERT(txd->m == NULL, ("still has mbuf installed")); >+ KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, >+ ("still dma mapped")); >+ SLIST_REMOVE_HEAD(&sc->hn_txlist, link); >+ >+ bus_dmamap_unload(sc->hn_tx_rndis_dtag, >+ txd->rndis_msg_dmap); >+ bus_dmamem_free(sc->hn_tx_rndis_dtag, >+ txd->rndis_msg, txd->rndis_msg_dmap); >+ >+ bus_dmamap_destroy(sc->hn_tx_data_dtag, txd->data_dmap); >+ } >+ >+ if (sc->hn_tx_data_dtag != NULL) >+ bus_dma_tag_destroy(sc->hn_tx_data_dtag); >+ if (sc->hn_tx_rndis_dtag != NULL) >+ bus_dma_tag_destroy(sc->hn_tx_rndis_dtag); >+ free(sc->hn_txdesc, M_NETVSC); >+ mtx_destroy(&sc->hn_txlist_spin); >+} >+ > static device_method_t netvsc_methods[] = { > /* Device interface */ > DEVMETHOD(device_probe, netvsc_probe), >@@ -1305,6 +1937,3 @@ > DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0); > MODULE_VERSION(hn, 1); > MODULE_DEPEND(hn, vmbus, 1, 1, 1); >-SYSINIT(netvsc_initx, SI_SUB_KTHREAD_IDLE, SI_ORDER_MIDDLE + 1, netvsc_init, >- NULL); >- >Index: sys/dev/hyperv/netvsc/hv_rndis.h >=================================================================== >--- sys/dev/hyperv/netvsc/hv_rndis.h (revision 294703) >+++ sys/dev/hyperv/netvsc/hv_rndis.h (working copy) >@@ -1049,6 +1049,8 @@ > int netvsc_recv(struct hv_device *device_ctx, > netvsc_packet *packet, > rndis_tcp_ip_csum_info *csum_info); >+void netvsc_recv_rollup(struct hv_device *device_ctx); >+void netvsc_channel_rollup(struct hv_device *device_ctx); > > void* hv_set_rppi_data(rndis_msg *rndis_mesg, > uint32_t rppi_size, >Index: sys/dev/hyperv/netvsc/hv_rndis_filter.c >=================================================================== >--- sys/dev/hyperv/netvsc/hv_rndis_filter.c (revision 294703) >+++ sys/dev/hyperv/netvsc/hv_rndis_filter.c (working copy) >@@ -963,3 +963,32 @@ > request->halt_complete_flag = 1; > } > >+/* >+ * RNDIS filter when "all" reception is done >+ */ >+void >+hv_rf_receive_rollup(netvsc_dev *net_dev) >+{ >+ rndis_device *rndis_dev; >+ >+ rndis_dev = (rndis_device *)net_dev->extension; >+ netvsc_recv_rollup(rndis_dev->net_dev->dev); >+} >+ >+void >+hv_rf_channel_rollup(netvsc_dev *net_dev) >+{ >+ rndis_device *rndis_dev; >+ >+ rndis_dev = (rndis_device *)net_dev->extension; >+ >+ /* >+ * This could be called pretty early, so we need >+ * to make sure everything has been setup. >+ */ >+ if (rndis_dev == NULL || >+ rndis_dev->net_dev == NULL || >+ rndis_dev->net_dev->dev == NULL) >+ return; >+ netvsc_channel_rollup(rndis_dev->net_dev->dev); >+} >Index: sys/dev/hyperv/netvsc/hv_rndis_filter.h >=================================================================== >--- sys/dev/hyperv/netvsc/hv_rndis_filter.h (revision 294703) >+++ sys/dev/hyperv/netvsc/hv_rndis_filter.h (working copy) >@@ -98,6 +98,8 @@ > > int hv_rf_on_receive(netvsc_dev *net_dev, > struct hv_device *device, netvsc_packet *pkt); >+void hv_rf_receive_rollup(netvsc_dev *net_dev); >+void hv_rf_channel_rollup(netvsc_dev *net_dev); > int hv_rf_on_device_add(struct hv_device *device, void *additl_info); > int hv_rf_on_device_remove(struct hv_device *device, boolean_t destroy_channel); > int hv_rf_on_open(struct hv_device *device); >Index: sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c >=================================================================== >--- sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (revision 294703) >+++ sys/dev/hyperv/storvsc/hv_storvsc_drv_freebsd.c (working copy) >@@ -1525,12 +1525,12 @@ > { > struct hv_sgl_node *sgl_node = NULL; > >- sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list); >- LIST_REMOVE(sgl_node, link); >- if (NULL == sgl_node) { >+ if (LIST_EMPTY(&g_hv_sgl_page_pool.in_use_sgl_list)) { > printf("storvsc error: not enough in use sgl\n"); > return; > } >+ sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.in_use_sgl_list); >+ LIST_REMOVE(sgl_node, link); > sgl_node->sgl_data = sgl; > LIST_INSERT_HEAD(&g_hv_sgl_page_pool.free_sgl_list, sgl_node, link); > } >@@ -1556,12 +1556,12 @@ > struct hv_sgl_node *sgl_node = NULL; > > /* get struct sglist from free_sgl_list */ >- sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); >- LIST_REMOVE(sgl_node, link); >- if (NULL == sgl_node) { >+ if (LIST_EMPTY(&g_hv_sgl_page_pool.free_sgl_list)) { > printf("storvsc error: not enough free sgl\n"); > return NULL; > } >+ sgl_node = LIST_FIRST(&g_hv_sgl_page_pool.free_sgl_list); >+ LIST_REMOVE(sgl_node, link); > bounce_sgl = sgl_node->sgl_data; > LIST_INSERT_HEAD(&g_hv_sgl_page_pool.in_use_sgl_list, sgl_node, link); > >@@ -1924,6 +1924,64 @@ > } > > /** >+ * Modified based on scsi_print_inquiry which is responsible to >+ * print the detail information for scsi_inquiry_data >+ * return 1 if it is valid, 0 otherwise. >+ */ >+static inline int >+is_scsi_valid(const struct scsi_inquiry_data *inq_data) >+{ >+ u_int8_t type; >+ char vendor[16], product[48], revision[16]; >+ /** >+ * Check device type and qualifier >+ */ >+ if (!(SID_QUAL_IS_VENDOR_UNIQUE(inq_data) || >+ SID_QUAL(inq_data) == SID_QUAL_LU_CONNECTED)) { >+ return (0); >+ } >+ >+ type = SID_TYPE(inq_data); >+ switch (type) { >+ case T_DIRECT: >+ case T_SEQUENTIAL: >+ case T_PRINTER: >+ case T_PROCESSOR: >+ case T_WORM: >+ case T_CDROM: >+ case T_SCANNER: >+ case T_OPTICAL: >+ case T_CHANGER: >+ case T_COMM: >+ case T_STORARRAY: >+ case T_ENCLOSURE: >+ case T_RBC: >+ case T_OCRW: >+ case T_OSD: >+ case T_ADC: >+ break; >+ case T_NODEVICE: >+ return (0); >+ default: >+ return (0); >+ } >+ /** >+ * Check vendor, product, and revision >+ */ >+ cam_strvis(vendor, inq_data->vendor, sizeof(inq_data->vendor), >+ sizeof(vendor)); >+ cam_strvis(product, inq_data->product, sizeof(inq_data->product), >+ sizeof(product)); >+ cam_strvis(revision, inq_data->revision, sizeof(inq_data->revision), >+ sizeof(revision)); >+ if (strlen(vendor) == 0 || >+ strlen(product) == 0 || >+ strlen(revision) == 0) { >+ return (0); >+ } >+ return (1); >+} >+/** > * @brief completion function before returning to CAM > * > * I/O process has been completed and the result needs >@@ -1992,12 +2050,33 @@ > > ccb->ccb_h.status &= ~CAM_SIM_QUEUED; > ccb->ccb_h.status &= ~CAM_STATUS_MASK; >+ /** >+ * check whether the SCSI device is valid for INQUIRY cmd. >+ * windows 10 and windows 2016 sends wrong information >+ * to VM for unknown reason. That is why there is is_scsi_valid >+ * check here. >+ */ >+ const struct scsi_generic *cmd; >+ cmd = (const struct scsi_generic *)((ccb->ccb_h.flags & CAM_CDB_POINTER) ? >+ csio->cdb_io.cdb_ptr : csio->cdb_io.cdb_bytes); >+ > if (vm_srb->scsi_status == SCSI_STATUS_OK) { >- ccb->ccb_h.status |= CAM_REQ_CMP; >- } else { >+ if (cmd->opcode == INQUIRY && >+ is_scsi_valid((struct scsi_inquiry_data *)csio->data_ptr) == 0) { >+ ccb->ccb_h.status |= CAM_DEV_NOT_THERE; >+ if (bootverbose) { >+ mtx_lock(&sc->hs_lock); >+ xpt_print(ccb->ccb_h.path, >+ "storvsc uninstalled device\n"); >+ mtx_unlock(&sc->hs_lock); >+ } >+ } else { >+ ccb->ccb_h.status |= CAM_REQ_CMP; >+ } >+ } else { > mtx_lock(&sc->hs_lock); > xpt_print(ccb->ccb_h.path, >- "srovsc scsi_status = %d\n", >+ "storvsc scsi_status = %d\n", > vm_srb->scsi_status); > mtx_unlock(&sc->hs_lock); > ccb->ccb_h.status |= CAM_SCSI_STATUS_ERROR;
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 207297
: 167137