--- sys/dev/virtio/network/if_vtnetvar.h.ori 2020-01-08 19:57:18.670256000 -0800 +++ sys/dev/virtio/network/if_vtnetvar.h 2020-01-08 19:58:47.614309220 -0800 @@ -82,6 +82,9 @@ struct taskqueue *vtnrx_tq; struct task vtnrx_intrtask; struct lro_ctrl vtnrx_lro; +#ifdef DEV_NETMAP + struct virtio_net_hdr_mrg_rxbuf vtnrx_shrhdr; +#endif /* DEV_NETMAP */ char vtnrx_name[16]; } __aligned(CACHE_LINE_SIZE); @@ -118,6 +121,9 @@ #ifndef VTNET_LEGACY_TX struct task vtntx_defrtask; #endif +#ifdef DEV_NETMAP + struct virtio_net_hdr_mrg_rxbuf vtntx_shrhdr; +#endif /* DEV_NETMAP */ char vtntx_name[16]; } __aligned(CACHE_LINE_SIZE); --- sys/dev/netmap/if_vtnet_netmap.h.ori 2020-01-11 19:20:40.235007000 -0800 +++ sys/dev/netmap/if_vtnet_netmap.h 2020-01-13 09:22:09.075604156 -0800 @@ -52,7 +52,50 @@ na->tx_rings[idx]->nr_mode == NKR_NETMAP_ON); } +/* Free all the unused buffer in all the RX virtqueues. + * This function is called when entering and exiting netmap mode. + * - buffers queued by the virtio driver return skbuf/mbuf pointer + * and need to be freed; + * - buffers queued by netmap return the txq/rxq, and do not need work + */ static void +vtnet_netmap_free_bufs(struct vtnet_softc *sc) +{ + int i, nmb = 0, n = 0, last; + + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + struct vtnet_rxq *rxq = &sc->vtnet_rxqs[i]; + struct vtnet_txq *txq = &sc->vtnet_txqs[i]; + struct vtnet_tx_header *txhdr; + struct virtqueue *vq; + struct mbuf *m; + + last = 0; + vq = rxq->vtnrx_vq; + while ((m = virtqueue_drain(vq, &last)) != NULL) { + n++; + if (m != (void *)rxq) + m_freem(m); + else + nmb++; + } + + last = 0; + vq = txq->vtntx_vq; + while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { + n++; + if (txhdr != (void *)txq) { + m_freem(txhdr->vth_mbuf); + uma_zfree(vtnet_tx_header_zone, txhdr); + } else + nmb++; + } + } + D("freed %d mbufs, %d netmap bufs on %d queues", + n - nmb, nmb, i); +} + +static void vtnet_free_used(struct virtqueue *vq, int netmap_bufs, enum txrx t, int idx) { void *cookie; @@ -89,7 +132,6 @@ { struct ifnet *ifp = na->ifp; struct vtnet_softc *sc = ifp->if_softc; - int success; enum txrx t; int i; @@ -98,15 +140,22 @@ vtnet_drain_taskqueues(sc); VTNET_CORE_LOCK(sc); - /* We need nm_netmap_on() to return true when called by * vtnet_init_locked() below. */ - if (state) + /* enable or disable flags and callbacks in na and ifp */ + if (state) { nm_set_native_flags(na); - + } else { + nm_clear_native_flags(na); + } /* We need to trigger a device reset in order to unexpose guest buffers * published to the host. */ ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); + + /* drain queues so netmap and native drivers + * do not interfere with each other + */ +// vtnet_netmap_free_bufs(sc); /* Get pending used buffers. The way they are freed depends on whether * they are netmap buffer or they are mbufs. We can tell apart the two * cases by looking at kring->nr_mode, before this is possibly updated @@ -128,9 +177,7 @@ kring->nr_mode == NKR_NETMAP_ON, NR_RX, i); VTNET_RXQ_UNLOCK(rxq); } - vtnet_init_locked(sc); - success = (ifp->if_drv_flags & IFF_DRV_RUNNING) ? 0 : ENXIO; - + vtnet_init_locked(sc); /* also enable intr */ if (state) { for_rx_tx(t) { /* Hardware rings. */ @@ -173,8 +220,7 @@ } VTNET_CORE_UNLOCK(sc); - - return success; + return (ifp->if_drv_flags & IFF_DRV_RUNNING) ? 0 : ENXIO; } @@ -187,6 +233,7 @@ struct netmap_ring *ring = kring->ring; u_int ring_nr = kring->ring_id; u_int nm_i; /* index into the netmap ring */ +// u_int nic_i; /* index into the NIC ring */ u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; @@ -206,6 +253,7 @@ if (nm_i != head) { /* we have new packets to send */ struct sglist *sg = txq->vtntx_sg; +// nic_i = netmap_idx_k2n(kring, nm_i); for (; nm_i != head; nm_i = nm_next(nm_i, lim)) { /* we use an empty header here */ struct netmap_slot *slot = &ring->slot[nm_i]; @@ -221,49 +269,81 @@ * and kick the hypervisor (if necessary). */ sglist_reset(sg); // cheap + // if vtnet_hdr_size > 0 ... err = sglist_append(sg, &txq->vtntx_shrhdr, sc->vtnet_hdr_size); + // XXX later, support multi segment +// err = sglist_append_phys(sg, paddr, len); err |= sglist_append_phys(sg, paddr, len); KASSERT(err == 0, ("%s: cannot append to sglist %d", __func__, err)); err = virtqueue_enqueue(vq, /*cookie=*/txq, sg, /*readable=*/sg->sg_nseg, /*writeable=*/0); + /* use na as the cookie */ + err = virtqueue_enqueue(vq, txq, sg, sg->sg_nseg, 0); if (unlikely(err)) { if (err != ENOSPC) nm_prerr("virtqueue_enqueue(%s) failed: %d\n", kring->name, err); break; } +// nic_i = nm_next(nic_i, lim); } + /* No more free TX slots? Ask the hypervisor for notifications, + * possibly only when a considerable amount of work has been + * done. + */ + ND(3,"sent %d packets, hwcur %d", n, nm_i); +// virtqueue_disable_intr(vq); // ?? virtqueue_notify(vq); /* Update hwcur depending on where we stopped. */ kring->nr_hwcur = nm_i; /* note we migth break early */ + + } else { + if (ring->head != ring->tail) + ND(5, "pure notify ? head %d tail %d nused %d %d", + ring->head, ring->tail, virtqueue_nused(vq), + (virtqueue_dump(vq), 1)); + virtqueue_notify(vq); + if (interrupts) { + virtqueue_enable_intr(vq); // like postpone with 0 + } } - /* Free used slots. We only consider our own used buffers, recognized - * by the token we passed to virtqueue_enqueue. + /* Free used slots. We only consider our own used buffers, recognized + * by the token we passed to virtqueue_add_outbuf. */ n = 0; for (;;) { - void *token = virtqueue_dequeue(vq, NULL); - if (token == NULL) + struct vtnet_tx_header *txhdr = virtqueue_dequeue(vq, NULL); + if (txhdr == NULL) break; - if (unlikely(token != (void *)txq)) - nm_prerr("BUG: TX token mismatch\n"); - else + if (likely(txhdr == (void *)txq)) { n++; + if (virtqueue_nused(vq) < 32) { // XXX slow release + break; + } + } else { /* leftover from previous transmission */ + nm_prerr("BUG: TX token mismatch\n"); + m_freem(txhdr->vth_mbuf); + uma_zfree(vtnet_tx_header_zone, txhdr); + } } - if (n > 0) { + if (n) { kring->nr_hwtail += n; if (kring->nr_hwtail > lim) kring->nr_hwtail -= lim + 1; } + if (nm_i != kring->nr_hwtail /* && vtnet_txq_below_threshold(txq) == 0*/) { + ND(3, "disable intr, hwcur %d", nm_i); + virtqueue_disable_intr(vq); + } else if (interrupts && virtqueue_nfree(vq) < 32) { + ND(3, "enable intr, hwcur %d", nm_i); + virtqueue_postpone_intr(vq, VQ_POSTPONE_SHORT); + } - if (interrupts && virtqueue_nfree(vq) < 32) - virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG); - return 0; } @@ -289,7 +369,7 @@ struct netmap_slot *slot = &ring->slot[nm_i]; uint64_t paddr; void *addr = PNMB(na, slot, &paddr); - int err; + int err = 0; if (addr == NETMAP_BUF_BASE(na)) { /* bad buf */ if (netmap_ring_reinit(kring)) @@ -297,22 +377,24 @@ } slot->flags &= ~NS_BUF_CHANGED; - sglist_reset(&sg); + sglist_reset(&sg); // cheap err = sglist_append(&sg, &rxq->vtnrx_shrhdr, sc->vtnet_hdr_size); +// err = sglist_append_phys(&sg, paddr, NETMAP_BUF_SIZE(na)); err |= sglist_append_phys(&sg, paddr, NETMAP_BUF_SIZE(na)); - KASSERT(err == 0, ("%s: cannot append to sglist %d", - __func__, err)); /* writable for the host */ err = virtqueue_enqueue(vq, /*cookie=*/rxq, &sg, /*readable=*/0, /*writeable=*/sg.sg_nseg); + KASSERT(err == 0, ("%s: cannot append to sglist %d", + __func__, err)); + if (unlikely(err)) { if (err != ENOSPC) nm_prerr("virtqueue_enqueue(%s) failed: %d\n", kring->name, err); break; } + nm_i = nm_next(nm_i, lim); } - return nm_i; } @@ -357,8 +439,7 @@ u_int nm_i; /* index into the netmap ring */ u_int const lim = kring->nkr_num_slots - 1; u_int const head = kring->rhead; - int force_update = (flags & NAF_FORCE_READ) || - (kring->nr_kflags & NKR_PENDINTR); + int force_update = (flags & NAF_FORCE_READ) || (kring->nr_kflags & NKR_PENDINTR); int interrupts = !(kring->nr_kflags & NKR_NOINTR); /* device-specific */ @@ -366,16 +447,22 @@ struct vtnet_rxq *rxq = &sc->vtnet_rxqs[ring_nr]; struct virtqueue *vq = rxq->vtnrx_vq; + /* XXX netif_carrier_ok ? */ + +// if (head > lim) +// return netmap_ring_reinit(kring); + rmb(); /* * First part: import newly received packets. * Only accept our own buffers (matching the token). We should only get - * matching buffers. We may need to stop early to avoid hwtail to overrun - * hwcur. + * matching buffers, because of vtnet_netmap_free_rx_unused_bufs() + * and vtnet_netmap_init_buffers(). We may need to stop early to avoid + * hwtail to overrun hwcur. */ if (netmap_no_pendintr || force_update) { uint32_t hwtail_lim = nm_prev(kring->nr_hwcur, lim); - void *token; + struct netmap_adapter *token; vtnet_rxq_disable_intr(rxq); @@ -408,24 +495,29 @@ kring->nr_hwtail = nm_i; kring->nr_kflags &= ~NKR_PENDINTR; } - ND("[B] h %d c %d hwcur %d hwtail %d", ring->head, ring->cur, - kring->nr_hwcur, kring->nr_hwtail); + ND("[B] h %d c %d hwcur %d hwtail %d", + ring->head, ring->cur, kring->nr_hwcur, + kring->nr_hwtail); /* * Second part: skip past packets that userspace has released. */ nm_i = kring->nr_hwcur; /* netmap ring index */ if (nm_i != head) { - int nm_j = vtnet_netmap_kring_refill(kring, nm_i, head); - if (nm_j < 0) - return nm_j; - kring->nr_hwcur = nm_j; + int err = vtnet_netmap_kring_refill(kring, nm_i, head); + if (err < 0) + return 1; + kring->nr_hwcur = err; virtqueue_notify(vq); + /* After draining the queue may need an intr from the hypervisor */ + if (interrupts) { + vtnet_rxq_enable_intr(rxq); + } } + ND("[C] h %d c %d t %d hwcur %d hwtail %d", + ring->head, ring->cur, ring->tail, + kring->nr_hwcur, kring->nr_hwtail); - ND("[C] h %d c %d t %d hwcur %d hwtail %d", ring->head, ring->cur, - ring->tail, kring->nr_hwcur, kring->nr_hwtail); - return 0; } @@ -452,7 +544,42 @@ } } +/* Make RX virtqueues buffers pointing to netmap buffers. */ static int +vtnet_netmap_init_rx_buffers(struct vtnet_softc *sc) +{ + struct ifnet *ifp = sc->vtnet_ifp; + struct netmap_adapter* na = NA(ifp); + unsigned int r; + + if (!nm_native_on(na)) + return 0; + for (r = 0; r < na->num_rx_rings; r++) { + struct netmap_kring *kring = na->rx_rings[r]; + struct vtnet_rxq *rxq = &sc->vtnet_rxqs[r]; + struct virtqueue *vq = rxq->vtnrx_vq; + struct netmap_slot* slot; + int err = 0; + + slot = netmap_reset(na, NR_RX, r, 0); + if (!slot) { + D("strange, null netmap ring %d", r); + return 0; + } + /* Add up to na>-num_rx_desc-1 buffers to this RX virtqueue. + * It's important to leave one virtqueue slot free, otherwise + * we can run into ring->cur/ring->tail wraparounds. + */ + err = vtnet_netmap_kring_refill(kring, 0, na->num_rx_desc-1); + if (err < 0) + return 0; + virtqueue_notify(vq); + } + + return 1; +} + +static int vtnet_netmap_tx_slots(struct vtnet_softc *sc) { int div; @@ -521,20 +648,23 @@ na.ifp = sc->vtnet_ifp; na.na_flags = 0; +// na.num_tx_desc = 1024;// sc->vtnet_rx_nmbufs; na.num_tx_desc = vtnet_netmap_tx_slots(sc); +// na.num_rx_desc = 1024; // sc->vtnet_rx_nmbufs; na.num_rx_desc = vtnet_netmap_rx_slots(sc); - na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs; na.rx_buf_maxsize = 0; na.nm_register = vtnet_netmap_reg; na.nm_txsync = vtnet_netmap_txsync; na.nm_rxsync = vtnet_netmap_rxsync; na.nm_intr = vtnet_netmap_intr; + na.num_tx_rings = na.num_rx_rings = sc->vtnet_max_vq_pairs; na.nm_config = vtnet_netmap_config; + D("max rings %d", sc->vtnet_max_vq_pairs); - netmap_attach(&na); - nm_prinf("vtnet attached txq=%d, txd=%d rxq=%d, rxd=%d\n", na.num_tx_rings, na.num_tx_desc, na.num_tx_rings, na.num_rx_desc); + + netmap_attach(&na); } /* end of file */