--- sys/conf/files.ori 2020-03-19 20:19:56.038941000 -0700 +++ sys/conf/files 2020-03-19 20:48:01.371238100 -0700 @@ -3246,6 +3246,9 @@ dev/virtio/virtio_bus_if.m optional virtio dev/virtio/virtio_if.m optional virtio dev/virtio/pci/virtio_pci.c optional virtio_pci +dev/virtio/pci/virtio_pci_if.m optional virtio_pci +dev/virtio/pci/virtio_pci_legacy.c optional virtio_pci +dev/virtio/pci/virtio_pci_modern.c optional virtio_pci dev/virtio/mmio/virtio_mmio.c optional virtio_mmio dev/virtio/mmio/virtio_mmio_if.m optional virtio_mmio dev/virtio/network/if_vtnet.c optional vtnet diff -urN sys/modules/virtio.ori/pci/Makefile sys/modules/virtio/pci/Makefile --- sys/modules/virtio.ori/pci/Makefile 2020-03-19 20:20:31.883916000 -0700 +++ sys/modules/virtio/pci/Makefile 2020-03-19 23:17:47.063607000 -0700 @@ -27,6 +27,8 @@ KMOD= virtio_pci SRCS= virtio_pci.c +SRCS+= virtio_pci_legacy.c virtio_pci_modern.c +SRCS+= virtio_pci_if.c virtio_pci_if.h SRCS+= virtio_bus_if.h virtio_if.h SRCS+= bus_if.h device_if.h pci_if.h diff -urN sys/dev/virtio.ori/balloon/virtio_balloon.c sys/dev/virtio/balloon/virtio_balloon.c --- sys/dev/virtio.ori/balloon/virtio_balloon.c 2020-03-19 20:20:23.685975000 -0700 +++ sys/dev/virtio/balloon/virtio_balloon.c 2020-03-19 23:17:51.680301000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2011, Bryan Venteicher * All rights reserved. * @@ -78,6 +80,7 @@ static struct virtio_feature_desc vtballoon_feature_desc[] = { { VIRTIO_BALLOON_F_MUST_TELL_HOST, "MustTellHost" }, { VIRTIO_BALLOON_F_STATS_VQ, "StatsVq" }, + { VIRTIO_BALLOON_F_DEFLATE_ON_OOM, "DeflateOnOOM" }, { 0, NULL } }; @@ -87,7 +90,8 @@ static int vtballoon_detach(device_t); static int vtballoon_config_change(device_t); -static void vtballoon_negotiate_features(struct vtballoon_softc *); +static int vtballoon_negotiate_features(struct vtballoon_softc *); +static int vtballoon_setup_features(struct vtballoon_softc *); static int vtballoon_alloc_virtqueues(struct vtballoon_softc *); static void vtballoon_vq_intr(void *); @@ -107,10 +111,13 @@ static int vtballoon_sleep(struct vtballoon_softc *); static void vtballoon_thread(void *); -static void vtballoon_add_sysctl(struct vtballoon_softc *); +static void vtballoon_setup_sysctl(struct vtballoon_softc *); +#define vtballoon_modern(_sc) \ + (((_sc)->vtballoon_features & VIRTIO_F_VERSION_1) != 0) + /* Features desired/implemented by this driver. */ -#define VTBALLOON_FEATURES 0 +#define VTBALLOON_FEATURES VIRTIO_BALLOON_F_MUST_TELL_HOST /* Timeout between retries when the balloon needs inflating. */ #define VTBALLOON_LOWMEM_TIMEOUT hz @@ -151,8 +158,10 @@ }; static devclass_t vtballoon_devclass; -DRIVER_MODULE(virtio_balloon, virtio_pci, vtballoon_driver, +DRIVER_MODULE(virtio_balloon, vtpcil, vtballoon_driver, vtballoon_devclass, 0, 0); +DRIVER_MODULE(virtio_balloon, vtpcim, vtballoon_driver, + vtballoon_devclass, 0, 0); MODULE_VERSION(virtio_balloon, 1); MODULE_DEPEND(virtio_balloon, virtio, 1, 1, 1); @@ -176,14 +185,18 @@ sc = device_get_softc(dev); sc->vtballoon_dev = dev; + virtio_set_feature_desc(dev, vtballoon_feature_desc); VTBALLOON_LOCK_INIT(sc, device_get_nameunit(dev)); TAILQ_INIT(&sc->vtballoon_pages); - vtballoon_add_sysctl(sc); + vtballoon_setup_sysctl(sc); - virtio_set_feature_desc(dev, vtballoon_feature_desc); - vtballoon_negotiate_features(sc); + error = vtballoon_setup_features(sc); + if (error) { + device_printf(dev, "cannot setup features\n"); + goto fail; + } sc->vtballoon_page_frames = malloc(VTBALLOON_PAGES_PER_REQUEST * sizeof(uint32_t), M_DEVBUF, M_NOWAIT | M_ZERO); @@ -269,18 +282,32 @@ return (1); } -static void +static int vtballoon_negotiate_features(struct vtballoon_softc *sc) { device_t dev; uint64_t features; dev = sc->vtballoon_dev; - features = virtio_negotiate_features(dev, VTBALLOON_FEATURES); - sc->vtballoon_features = features; + features = VTBALLOON_FEATURES; + + sc->vtballoon_features = virtio_negotiate_features(dev, features); + return (virtio_finalize_features(dev)); } static int +vtballoon_setup_features(struct vtballoon_softc *sc) +{ + int error; + + error = vtballoon_negotiate_features(sc); + if (error) + return (error); + + return (0); +} + +static int vtballoon_alloc_virtqueues(struct vtballoon_softc *sc) { device_t dev; @@ -438,7 +465,8 @@ { vm_page_t m; - m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ); + m = vm_page_alloc(NULL, 0, + VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP); if (m != NULL) sc->vtballoon_current_npages++; @@ -461,16 +489,23 @@ desired = virtio_read_dev_config_4(sc->vtballoon_dev, offsetof(struct virtio_balloon_config, num_pages)); - return (le32toh(desired)); + if (vtballoon_modern(sc)) + return (desired); + else + return (le32toh(desired)); } static void vtballoon_update_size(struct vtballoon_softc *sc) { + uint32_t npages; + npages = sc->vtballoon_current_npages; + if (!vtballoon_modern(sc)) + npages = htole32(npages); + virtio_write_dev_config_4(sc->vtballoon_dev, - offsetof(struct virtio_balloon_config, actual), - htole32(sc->vtballoon_current_npages)); + offsetof(struct virtio_balloon_config, actual), npages); } static int @@ -542,7 +577,7 @@ } static void -vtballoon_add_sysctl(struct vtballoon_softc *sc) +vtballoon_setup_sysctl(struct vtballoon_softc *sc) { device_t dev; struct sysctl_ctx_list *ctx; diff -urN sys/dev/virtio.ori/balloon/virtio_balloon.h sys/dev/virtio/balloon/virtio_balloon.h --- sys/dev/virtio.ori/balloon/virtio_balloon.h 2020-03-19 20:20:23.686287000 -0700 +++ sys/dev/virtio/balloon/virtio_balloon.h 2020-03-19 23:17:51.680465000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. * @@ -34,6 +36,7 @@ /* Feature bits. */ #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0x1 /* Tell before reclaiming pages */ #define VIRTIO_BALLOON_F_STATS_VQ 0x2 /* Memory stats virtqueue */ +#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 0x4 /* Deflate balloon on OOM */ /* Size of a PFN in the balloon interface. */ #define VIRTIO_BALLOON_PFN_SHIFT 12 @@ -52,8 +55,33 @@ #define VIRTIO_BALLOON_S_MINFLT 3 /* Number of minor faults */ #define VIRTIO_BALLOON_S_MEMFREE 4 /* Total amount of free memory */ #define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */ -#define VIRTIO_BALLOON_S_NR 6 +#define VIRTIO_BALLOON_S_AVAIL 6 /* Available memory as in /proc */ +#define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */ +#define VIRTIO_BALLOON_S_NR 8 +/* + * Memory statistics structure. + * Driver fills an array of these structures and passes to device. + * + * NOTE: fields are laid out in a way that would make compiler add padding + * between and after fields, so we have to use compiler-specific attributes to + * pack it, to disable this padding. This also often causes compiler to + * generate suboptimal code. + * + * We maintain this statistics structure format for backwards compatibility, + * but don't follow this example. + * + * If implementing a similar structure, do something like the below instead: + * struct virtio_balloon_stat { + * __virtio16 tag; + * __u8 reserved[6]; + * __virtio64 val; + * }; + * + * In other words, add explicit reserved fields to align field and + * structure boundaries at field size, avoiding compiler padding + * without the packed attribute. + */ struct virtio_balloon_stat { uint16_t tag; uint64_t val; diff -urN sys/dev/virtio.ori/block/virtio_blk.c sys/dev/virtio/block/virtio_blk.c --- sys/dev/virtio.ori/block/virtio_blk.c 2020-03-19 20:20:23.689042000 -0700 +++ sys/dev/virtio/block/virtio_blk.c 2020-03-19 23:17:51.682328000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2011, Bryan Venteicher * All rights reserved. * @@ -74,11 +76,10 @@ uint64_t vtblk_features; uint32_t vtblk_flags; #define VTBLK_FLAG_INDIRECT 0x0001 -#define VTBLK_FLAG_READONLY 0x0002 -#define VTBLK_FLAG_DETACH 0x0004 -#define VTBLK_FLAG_SUSPEND 0x0008 -#define VTBLK_FLAG_BARRIER 0x0010 -#define VTBLK_FLAG_WC_CONFIG 0x0020 +#define VTBLK_FLAG_DETACH 0x0002 +#define VTBLK_FLAG_SUSPEND 0x0004 +#define VTBLK_FLAG_BARRIER 0x0008 +#define VTBLK_FLAG_WCE_CONFIG 0x0010 struct virtqueue *vtblk_vq; struct sglist *vtblk_sglist; @@ -107,9 +108,10 @@ { VIRTIO_BLK_F_RO, "ReadOnly" }, { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" }, { VIRTIO_BLK_F_SCSI, "SCSICmds" }, - { VIRTIO_BLK_F_WCE, "WriteCache" }, + { VIRTIO_BLK_F_FLUSH, "FlushCmd" }, { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" }, + { VIRTIO_BLK_F_MQ, "Multiqueue" }, { 0, NULL } }; @@ -131,8 +133,8 @@ static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t); static void vtblk_strategy(struct bio *); -static void vtblk_negotiate_features(struct vtblk_softc *); -static void vtblk_setup_features(struct vtblk_softc *); +static int vtblk_negotiate_features(struct vtblk_softc *); +static int vtblk_setup_features(struct vtblk_softc *); static int vtblk_maximum_segments(struct vtblk_softc *, struct virtio_blk_config *); static int vtblk_alloc_virtqueue(struct vtblk_softc *); @@ -191,6 +193,14 @@ static void vtblk_setup_sysctl(struct vtblk_softc *); static int vtblk_tunable_int(struct vtblk_softc *, const char *, int); +#define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0) +#define vtblk_htog16(_sc, _val) virtio_htog16(vtblk_modern(_sc), _val) +#define vtblk_htog32(_sc, _val) virtio_htog32(vtblk_modern(_sc), _val) +#define vtblk_htog64(_sc, _val) virtio_htog64(vtblk_modern(_sc), _val) +#define vtblk_gtoh16(_sc, _val) virtio_gtoh16(vtblk_modern(_sc), _val) +#define vtblk_gtoh32(_sc, _val) virtio_gtoh32(vtblk_modern(_sc), _val) +#define vtblk_gtoh64(_sc, _val) virtio_gtoh64(vtblk_modern(_sc), _val) + /* Tunables. */ static int vtblk_no_ident = 0; TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident); @@ -198,18 +208,20 @@ TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode); /* Features desired/implemented by this driver. */ -#define VTBLK_FEATURES \ - (VIRTIO_BLK_F_BARRIER | \ - VIRTIO_BLK_F_SIZE_MAX | \ +#define VTBLK_COMMON_FEATURES \ + (VIRTIO_BLK_F_SIZE_MAX | \ VIRTIO_BLK_F_SEG_MAX | \ VIRTIO_BLK_F_GEOMETRY | \ VIRTIO_BLK_F_RO | \ VIRTIO_BLK_F_BLK_SIZE | \ - VIRTIO_BLK_F_WCE | \ + VIRTIO_BLK_F_FLUSH | \ VIRTIO_BLK_F_TOPOLOGY | \ VIRTIO_BLK_F_CONFIG_WCE | \ VIRTIO_RING_F_INDIRECT_DESC) +#define VTBLK_MODERN_FEATURES (VTBLK_COMMON_FEATURES) +#define VTBLK_LEGACY_FEATURES (VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES) + #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx #define VTBLK_LOCK_INIT(_sc, _name) \ mtx_init(VTBLK_MTX((_sc)), (_name), \ @@ -254,8 +266,10 @@ DRIVER_MODULE(virtio_blk, virtio_mmio, vtblk_driver, vtblk_devclass, vtblk_modevent, 0); -DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, +DRIVER_MODULE(virtio_blk, vtpcil, vtblk_driver, vtblk_devclass, vtblk_modevent, 0); +DRIVER_MODULE(virtio_blk, vtpcim, vtblk_driver, vtblk_devclass, + vtblk_modevent, 0); MODULE_VERSION(virtio_blk, 1); MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1); @@ -299,10 +313,10 @@ struct virtio_blk_config blkcfg; int error; - virtio_set_feature_desc(dev, vtblk_feature_desc); - sc = device_get_softc(dev); sc->vtblk_dev = dev; + virtio_set_feature_desc(dev, vtblk_feature_desc); + VTBLK_LOCK_INIT(sc, device_get_nameunit(dev)); bioq_init(&sc->vtblk_bioq); TAILQ_INIT(&sc->vtblk_dump_queue); @@ -310,8 +324,13 @@ TAILQ_INIT(&sc->vtblk_req_ready); vtblk_setup_sysctl(sc); - vtblk_setup_features(sc); + error = vtblk_setup_features(sc); + if (error) { + device_printf(dev, "cannot setup features\n"); + goto fail; + } + vtblk_read_config(sc, &blkcfg); /* @@ -539,16 +558,6 @@ return; } - /* - * Fail any write if RO. Unfortunately, there does not seem to - * be a better way to report our readonly'ness to GEOM above. - */ - if (sc->vtblk_flags & VTBLK_FLAG_READONLY && - (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { - vtblk_bio_done(sc, bp, EROFS); - return; - } - VTBLK_LOCK(sc); if (sc->vtblk_flags & VTBLK_FLAG_DETACH) { @@ -563,35 +572,40 @@ VTBLK_UNLOCK(sc); } -static void +static int vtblk_negotiate_features(struct vtblk_softc *sc) { device_t dev; uint64_t features; dev = sc->vtblk_dev; - features = VTBLK_FEATURES; + features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES : + VTBLK_LEGACY_FEATURES; sc->vtblk_features = virtio_negotiate_features(dev, features); + return (virtio_finalize_features(dev)); } -static void +static int vtblk_setup_features(struct vtblk_softc *sc) { device_t dev; + int error; dev = sc->vtblk_dev; - vtblk_negotiate_features(sc); + error = vtblk_negotiate_features(sc); + if (error) + return (error); if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; - if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) - sc->vtblk_flags |= VTBLK_FLAG_READONLY; - if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER)) - sc->vtblk_flags |= VTBLK_FLAG_BARRIER; if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE)) - sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG; + sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG; + if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER)) + sc->vtblk_flags |= VTBLK_FLAG_BARRIER; /* Legacy. */ + + return (0); } static int @@ -670,15 +684,19 @@ dp->d_name = VTBLK_DISK_NAME; dp->d_unit = device_get_unit(dev); dp->d_drv1 = sc; - dp->d_flags = DISKFLAG_CANFLUSHCACHE | DISKFLAG_UNMAPPED_BIO | - DISKFLAG_DIRECT_COMPLETION; + dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION; dp->d_hba_vendor = virtio_get_vendor(dev); dp->d_hba_device = virtio_get_device(dev); dp->d_hba_subvendor = virtio_get_subvendor(dev); dp->d_hba_subdevice = virtio_get_subdevice(dev); - if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0) + if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) + dp->d_flags |= DISKFLAG_WRITE_PROTECT; + else { + if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH)) + dp->d_flags |= DISKFLAG_CANFLUSHCACHE; dp->d_dump = vtblk_dump; + } /* Capacity is always in 512-byte units. */ dp->d_mediasize = blkcfg->capacity * 512; @@ -862,26 +880,27 @@ bp = bioq_takefirst(bioq); req->vbr_bp = bp; req->vbr_ack = -1; - req->vbr_hdr.ioprio = 1; + req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1); switch (bp->bio_cmd) { case BIO_FLUSH: - req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; + req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH); + req->vbr_hdr.sector = 0; break; case BIO_READ: - req->vbr_hdr.type = VIRTIO_BLK_T_IN; - req->vbr_hdr.sector = bp->bio_offset / 512; + req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN); + req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / 512); break; case BIO_WRITE: - req->vbr_hdr.type = VIRTIO_BLK_T_OUT; - req->vbr_hdr.sector = bp->bio_offset / 512; + req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT); + req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / 512); break; default: panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd); } if (bp->bio_flags & BIO_ORDERED) - req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; + req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER); return (req); } @@ -912,7 +931,8 @@ if (!virtqueue_empty(vq)) return (EBUSY); ordered = 1; - req->vbr_hdr.type &= ~VIRTIO_BLK_T_BARRIER; + req->vbr_hdr.type &= vtblk_gtoh32(sc, + ~VIRTIO_BLK_T_BARRIER); } } @@ -1016,15 +1036,16 @@ static void vtblk_drain(struct vtblk_softc *sc) { - struct bio_queue queue; struct bio_queue_head *bioq; struct vtblk_request *req; struct bio *bp; bioq = &sc->vtblk_bioq; - TAILQ_INIT(&queue); if (sc->vtblk_vq != NULL) { + struct bio_queue queue; + + TAILQ_INIT(&queue); vtblk_queue_completed(sc, &queue); vtblk_done_completed(sc, &queue); @@ -1115,10 +1136,22 @@ /* Read the configuration if the feature was negotiated. */ VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg); VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg); - VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, geometry, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, + geometry.cylinders, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, + geometry.heads, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, + geometry.sectors, blkcfg); VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg); - VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, topology, blkcfg); - VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, writeback, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, + topology.physical_block_exp, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, + topology.alignment_offset, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, + topology.min_io_size, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, + topology.opt_io_size, blkcfg); + VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg); } #undef VTBLK_GET_CONFIG @@ -1142,8 +1175,8 @@ return; req->vbr_ack = -1; - req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID; - req->vbr_hdr.ioprio = 1; + req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID); + req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1); req->vbr_hdr.sector = 0; req->vbr_bp = &buf; @@ -1274,9 +1307,9 @@ req = &sc->vtblk_dump_request; req->vbr_ack = -1; - req->vbr_hdr.type = VIRTIO_BLK_T_OUT; - req->vbr_hdr.ioprio = 1; - req->vbr_hdr.sector = offset / 512; + req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT); + req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1); + req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / 512); req->vbr_bp = &buf; g_reset_bio(&buf); @@ -1296,8 +1329,8 @@ req = &sc->vtblk_dump_request; req->vbr_ack = -1; - req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; - req->vbr_hdr.ioprio = 1; + req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH); + req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1); req->vbr_hdr.sector = 0; req->vbr_bp = &buf; @@ -1325,7 +1358,7 @@ /* Set either writeback (1) or writethrough (0) mode. */ virtio_write_dev_config_1(sc->vtblk_dev, - offsetof(struct virtio_blk_config, writeback), wc); + offsetof(struct virtio_blk_config, wce), wc); } static int @@ -1334,15 +1367,15 @@ { int wc; - if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) { + if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) { wc = vtblk_tunable_int(sc, "writecache_mode", vtblk_writecache_mode); if (wc >= 0 && wc < VTBLK_CACHE_MAX) vtblk_set_write_cache(sc, wc); else - wc = blkcfg->writeback; + wc = blkcfg->wce; } else - wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE); + wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH); return (wc); } @@ -1359,7 +1392,7 @@ error = sysctl_handle_int(oidp, &wc, 0, req); if (error || req->newptr == NULL) return (error); - if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0) + if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0) return (EPERM); if (wc < 0 || wc >= VTBLK_CACHE_MAX) return (EINVAL); diff -urN sys/dev/virtio.ori/block/virtio_blk.h sys/dev/virtio/block/virtio_blk.h --- sys/dev/virtio.ori/block/virtio_blk.h 2020-03-19 20:20:23.688780000 -0700 +++ sys/dev/virtio/block/virtio_blk.h 2020-03-19 23:17:51.681970000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. * @@ -32,17 +34,22 @@ #define _VIRTIO_BLK_H /* Feature bits */ -#define VIRTIO_BLK_F_BARRIER 0x0001 /* Does host support barriers? */ #define VIRTIO_BLK_F_SIZE_MAX 0x0002 /* Indicates maximum segment size */ #define VIRTIO_BLK_F_SEG_MAX 0x0004 /* Indicates maximum # of segments */ #define VIRTIO_BLK_F_GEOMETRY 0x0010 /* Legacy geometry available */ #define VIRTIO_BLK_F_RO 0x0020 /* Disk is read-only */ #define VIRTIO_BLK_F_BLK_SIZE 0x0040 /* Block size of disk is available*/ -#define VIRTIO_BLK_F_SCSI 0x0080 /* Supports scsi command passthru */ -#define VIRTIO_BLK_F_WCE 0x0200 /* Writeback mode enabled after reset */ +#define VIRTIO_BLK_F_FLUSH 0x0200 /* Flush command supported */ #define VIRTIO_BLK_F_TOPOLOGY 0x0400 /* Topology information is available */ #define VIRTIO_BLK_F_CONFIG_WCE 0x0800 /* Writeback mode available in config */ +#define VIRTIO_BLK_F_MQ 0x1000 /* Support more than one vq */ +/* Legacy feature bits */ +#define VIRTIO_BLK_F_BARRIER 0x0001 /* Does host support barriers? */ +#define VIRTIO_BLK_F_SCSI 0x0080 /* Supports scsi command passthru */ + +/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */ +#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH #define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ struct virtio_blk_config { @@ -64,15 +71,23 @@ /* Topology of the device (if VIRTIO_BLK_F_TOPOLOGY) */ struct virtio_blk_topology { + /* exponent for physical block per logical block. */ uint8_t physical_block_exp; + /* alignment offset in logical blocks. */ uint8_t alignment_offset; + /* minimum I/O size without performance penalty in logical + * blocks. */ uint16_t min_io_size; + /* optimal sustained I/O size in logical blocks. */ uint32_t opt_io_size; } topology; /* Writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ - uint8_t writeback; + uint8_t wce; + uint8_t unused; + /* Number of vqs, only available when VIRTIO_BLK_F_MQ is set */ + uint16_t num_queues; } __packed; /* @@ -105,7 +120,11 @@ /* ID string length */ #define VIRTIO_BLK_ID_BYTES 20 -/* This is the first element of the read scatter-gather list. */ +/* + * This comes first in the read scatter-gather list. + * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, + * this is the first element of the read scatter-gather list. + */ struct virtio_blk_outhdr { /* VIRTIO_BLK_T* */ uint32_t type; diff -urN sys/dev/virtio.ori/console/virtio_console.c sys/dev/virtio/console/virtio_console.c --- sys/dev/virtio.ori/console/virtio_console.c 2020-03-19 20:20:23.692571000 -0700 +++ sys/dev/virtio/console/virtio_console.c 2020-03-19 23:17:51.745620000 -0700 @@ -30,6 +30,7 @@ __FBSDID("$FreeBSD: releng/11.3/sys/dev/virtio/console/virtio_console.c 298955 2016-05-03 03:41:25Z pfg $"); #include +#include #include #include #include @@ -58,14 +59,19 @@ #define VTCON_MAX_PORTS 32 #define VTCON_TTY_PREFIX "V" +#define VTCON_TTY_ALIAS_PREFIX "vtcon" #define VTCON_BULK_BUFSZ 128 +#define VTCON_CTRL_BUFSZ 128 /* - * The buffer cannot cross more than one page boundary due to the + * The buffers cannot cross more than one page boundary due to the * size of the sglist segment array used. */ CTASSERT(VTCON_BULK_BUFSZ <= PAGE_SIZE); +CTASSERT(VTCON_CTRL_BUFSZ <= PAGE_SIZE); +CTASSERT(sizeof(struct virtio_console_config) <= VTCON_CTRL_BUFSZ); + struct vtcon_softc; struct vtcon_softc_port; @@ -80,6 +86,7 @@ int vtcport_flags; #define VTCON_PORT_FLAG_GONE 0x01 #define VTCON_PORT_FLAG_CONSOLE 0x02 +#define VTCON_PORT_FLAG_ALIAS 0x04 #if defined(KDB) int vtcport_alt_break_state; @@ -151,8 +158,8 @@ static int vtcon_detach(device_t); static int vtcon_config_change(device_t); -static void vtcon_setup_features(struct vtcon_softc *); -static void vtcon_negotiate_features(struct vtcon_softc *); +static int vtcon_setup_features(struct vtcon_softc *); +static int vtcon_negotiate_features(struct vtcon_softc *); static int vtcon_alloc_scports(struct vtcon_softc *); static int vtcon_alloc_virtqueues(struct vtcon_softc *); static void vtcon_read_config(struct vtcon_softc *, @@ -176,8 +183,10 @@ static void vtcon_ctrl_port_remove_event(struct vtcon_softc *, int); static void vtcon_ctrl_port_console_event(struct vtcon_softc *, int); static void vtcon_ctrl_port_open_event(struct vtcon_softc *, int); +static void vtcon_ctrl_port_name_event(struct vtcon_softc *, int, + const char *, size_t); static void vtcon_ctrl_process_event(struct vtcon_softc *, - struct virtio_console_control *); + struct virtio_console_control *, void *, size_t); static void vtcon_ctrl_task_cb(void *, int); static void vtcon_ctrl_event_intr(void *); static void vtcon_ctrl_poll(struct vtcon_softc *, @@ -191,6 +200,8 @@ static int vtcon_port_populate(struct vtcon_port *); static void vtcon_port_destroy(struct vtcon_port *); static int vtcon_port_create(struct vtcon_softc *, int); +static void vtcon_port_dev_alias(struct vtcon_port *, const char *, + size_t); static void vtcon_port_drain_bufs(struct virtqueue *); static void vtcon_port_drain(struct vtcon_port *); static void vtcon_port_teardown(struct vtcon_port *); @@ -216,6 +227,14 @@ static void vtcon_enable_interrupts(struct vtcon_softc *); static void vtcon_disable_interrupts(struct vtcon_softc *); +#define vtcon_modern(_sc) (((_sc)->vtcon_features & VIRTIO_F_VERSION_1) != 0) +#define vtcon_htog16(_sc, _val) virtio_htog16(vtcon_modern(_sc), _val) +#define vtcon_htog32(_sc, _val) virtio_htog32(vtcon_modern(_sc), _val) +#define vtcon_htog64(_sc, _val) virtio_htog64(vtcon_modern(_sc), _val) +#define vtcon_gtoh16(_sc, _val) virtio_gtoh16(vtcon_modern(_sc), _val) +#define vtcon_gtoh32(_sc, _val) virtio_gtoh32(vtcon_modern(_sc), _val) +#define vtcon_gtoh64(_sc, _val) virtio_gtoh64(vtcon_modern(_sc), _val) + static int vtcon_pending_free; static struct ttydevsw vtcon_tty_class = { @@ -245,8 +264,10 @@ }; static devclass_t vtcon_devclass; -DRIVER_MODULE(virtio_console, virtio_pci, vtcon_driver, vtcon_devclass, +DRIVER_MODULE(virtio_console, vtpcil, vtcon_driver, vtcon_devclass, vtcon_modevent, 0); +DRIVER_MODULE(virtio_console, vtpcim, vtcon_driver, vtcon_devclass, + vtcon_modevent, 0); MODULE_VERSION(virtio_console, 1); MODULE_DEPEND(virtio_console, virtio, 1, 1, 1); @@ -312,12 +333,16 @@ sc = device_get_softc(dev); sc->vtcon_dev = dev; + virtio_set_feature_desc(dev, vtcon_feature_desc); mtx_init(&sc->vtcon_mtx, "vtconmtx", NULL, MTX_DEF); mtx_init(&sc->vtcon_ctrl_tx_mtx, "vtconctrlmtx", NULL, MTX_DEF); - virtio_set_feature_desc(dev, vtcon_feature_desc); - vtcon_setup_features(sc); + error = vtcon_setup_features(sc); + if (error) { + device_printf(dev, "cannot setup features\n"); + goto fail; + } vtcon_read_config(sc, &concfg); vtcon_determine_max_ports(sc, &concfg); @@ -409,7 +434,7 @@ return (0); } -static void +static int vtcon_negotiate_features(struct vtcon_softc *sc) { device_t dev; @@ -419,21 +444,27 @@ features = VTCON_FEATURES; sc->vtcon_features = virtio_negotiate_features(dev, features); + return (virtio_finalize_features(dev)); } -static void +static int vtcon_setup_features(struct vtcon_softc *sc) { device_t dev; + int error; dev = sc->vtcon_dev; - vtcon_negotiate_features(sc); + error = vtcon_negotiate_features(sc); + if (error) + return (error); if (virtio_with_feature(dev, VIRTIO_CONSOLE_F_SIZE)) sc->vtcon_flags |= VTCON_FLAG_SIZE; if (virtio_with_feature(dev, VIRTIO_CONSOLE_F_MULTIPORT)) sc->vtcon_flags |= VTCON_FLAG_MULTIPORT; + + return (0); } #define VTCON_GET_CONFIG(_dev, _feature, _field, _cfg) \ @@ -597,8 +628,7 @@ vq = sc->vtcon_ctrl_rxvq; sglist_init(&sg, 2, segs); - error = sglist_append(&sg, control, - sizeof(struct virtio_console_control)); + error = sglist_append(&sg, control, VTCON_CTRL_BUFSZ); KASSERT(error == 0, ("%s: error %d adding control to sglist", __func__, error)); @@ -611,8 +641,7 @@ struct virtio_console_control *control; int error; - control = malloc(sizeof(struct virtio_console_control), M_DEVBUF, - M_ZERO | M_NOWAIT); + control = malloc(VTCON_CTRL_BUFSZ, M_DEVBUF, M_ZERO | M_NOWAIT); if (control == NULL) return (ENOMEM); @@ -629,7 +658,7 @@ { int error; - bzero(control, sizeof(struct virtio_console_control)); + bzero(control, VTCON_CTRL_BUFSZ); error = vtcon_ctrl_event_enqueue(sc, control); KASSERT(error == 0, @@ -796,21 +825,62 @@ } static void +vtcon_ctrl_port_name_event(struct vtcon_softc *sc, int id, const char *name, + size_t len) +{ + device_t dev; + struct vtcon_softc_port *scport; + struct vtcon_port *port; + + dev = sc->vtcon_dev; + scport = &sc->vtcon_ports[id]; + + /* + * The VirtIO specification says the NUL terminator is not included in + * the length, but QEMU includes it. Adjust the length if needed. + */ + if (name == NULL || len == 0) + return; + if (name[len - 1] == '\0') { + len--; + if (len == 0) + return; + } + + VTCON_LOCK(sc); + port = scport->vcsp_port; + if (port == NULL) { + VTCON_UNLOCK(sc); + device_printf(dev, "%s: name port %d, but does not exist\n", + __func__, id); + return; + } + + VTCON_PORT_LOCK(port); + VTCON_UNLOCK(sc); + vtcon_port_dev_alias(port, name, len); + VTCON_PORT_UNLOCK(port); +} + +static void vtcon_ctrl_process_event(struct vtcon_softc *sc, - struct virtio_console_control *control) + struct virtio_console_control *control, void *data, size_t data_len) { device_t dev; - int id; + uint32_t id; + uint16_t event; dev = sc->vtcon_dev; - id = control->id; + id = vtcon_htog32(sc, control->id); + event = vtcon_htog16(sc, control->event); - if (id < 0 || id >= sc->vtcon_max_ports) { - device_printf(dev, "%s: invalid port ID %d\n", __func__, id); + if (id >= sc->vtcon_max_ports) { + device_printf(dev, "%s: event %d invalid port ID %d\n", + __func__, event, id); return; } - switch (control->event) { + switch (event) { case VIRTIO_CONSOLE_PORT_ADD: vtcon_ctrl_port_add_event(sc, id); break; @@ -831,6 +901,7 @@ break; case VIRTIO_CONSOLE_PORT_NAME: + vtcon_ctrl_port_name_event(sc, id, (const char *)data, data_len); break; } } @@ -841,7 +912,10 @@ struct vtcon_softc *sc; struct virtqueue *vq; struct virtio_console_control *control; + void *data; + size_t data_len; int detached; + uint32_t len; sc = xsc; vq = sc->vtcon_ctrl_rxvq; @@ -849,12 +923,20 @@ VTCON_LOCK(sc); while ((detached = (sc->vtcon_flags & VTCON_FLAG_DETACHED)) == 0) { - control = virtqueue_dequeue(vq, NULL); + control = virtqueue_dequeue(vq, &len); if (control == NULL) break; + if (len > sizeof(struct virtio_console_control)) { + data = (void *) &control[1]; + data_len = len - sizeof(struct virtio_console_control); + } else { + data = NULL; + data_len = 0; + } + VTCON_UNLOCK(sc); - vtcon_ctrl_process_event(sc, control); + vtcon_ctrl_process_event(sc, control, data, data_len); VTCON_LOCK(sc); vtcon_ctrl_event_requeue(sc, control); } @@ -926,9 +1008,9 @@ if ((sc->vtcon_flags & VTCON_FLAG_MULTIPORT) == 0) return; - control.id = portid; - control.event = event; - control.value = value; + control.id = vtcon_gtoh32(sc, portid); + control.event = vtcon_gtoh16(sc, event); + control.value = vtcon_gtoh16(sc, value); vtcon_ctrl_poll(sc, &control); } @@ -1090,6 +1172,40 @@ device_get_unit(dev), id); return (0); +} + +static void +vtcon_port_dev_alias(struct vtcon_port *port, const char *name, size_t len) +{ + struct vtcon_softc *sc; + struct cdev *pdev; + struct tty *tp; + int i, error; + + sc = port->vtcport_sc; + tp = port->vtcport_tty; + + if (port->vtcport_flags & VTCON_PORT_FLAG_ALIAS) + return; + + /* Port name is UTF-8, but we can only handle ASCII. */ + for (i = 0; i < len; i++) { + if (!isascii(name[i])) + return; + } + + /* + * Port name may not conform to the devfs requirements so we cannot use + * tty_makealias() because the MAKEDEV_CHECKNAME flag must be specified. + */ + error = make_dev_alias_p(MAKEDEV_NOWAIT | MAKEDEV_CHECKNAME, &pdev, + tp->t_dev, "%s/%*s", VTCON_TTY_ALIAS_PREFIX, (int)len, name); + if (error) { + device_printf(sc->vtcon_dev, + "%s: cannot make dev alias (%s/%*s) error %d\n", __func__, + VTCON_TTY_ALIAS_PREFIX, (int)len, name, error); + } else + port->vtcport_flags |= VTCON_PORT_FLAG_ALIAS; } static void diff -urN sys/dev/virtio.ori/mmio/virtio_mmio.c sys/dev/virtio/mmio/virtio_mmio.c --- sys/dev/virtio.ori/mmio/virtio_mmio.c 2020-03-19 20:20:23.691737000 -0700 +++ sys/dev/virtio/mmio/virtio_mmio.c 2020-03-19 23:17:51.745051000 -0700 @@ -426,6 +426,10 @@ case VIRTIO_IVAR_VENDOR: *result = vtmmio_read_config_4(sc, VIRTIO_MMIO_VENDOR_ID); break; + case VIRTIO_IVAR_SUBVENDOR: + case VIRTIO_IVAR_MODERN: + *result = 0; + break; default: return (ENOENT); } @@ -512,7 +516,8 @@ if (sc->vtmmio_vqs == NULL) return (ENOMEM); - vtmmio_write_config_4(sc, VIRTIO_MMIO_GUEST_PAGE_SIZE, 1 << PAGE_SHIFT); + vtmmio_write_config_4(sc, VIRTIO_MMIO_GUEST_PAGE_SIZE, + (1 << PAGE_SHIFT)); for (idx = 0; idx < nvqs; idx++) { vqx = &sc->vtmmio_vqs[idx]; @@ -537,10 +542,10 @@ VIRTIO_MMIO_VRING_ALIGN); #if 0 device_printf(dev, "virtqueue paddr 0x%08lx\n", - (uint64_t)virtqueue_paddr(vq)); + (uint64_t)virtqueue_paddr(vq)); #endif vtmmio_write_config_4(sc, VIRTIO_MMIO_QUEUE_PFN, - virtqueue_paddr(vq) >> PAGE_SHIFT); + virtqueue_paddr(vq) >> PAGE_SHIFT); vqx->vtv_vq = *info->vqai_vq = vq; vqx->vtv_no_intr = info->vqai_intr == NULL; @@ -592,6 +597,9 @@ vtmmio_negotiate_features(dev, features); + vtmmio_write_config_4(sc, VIRTIO_MMIO_GUEST_PAGE_SIZE, + (1 << PAGE_SHIFT)); + for (idx = 0; idx < sc->vtmmio_nvqs; idx++) { error = vtmmio_reinit_virtqueue(sc, idx); if (error) @@ -766,6 +774,13 @@ if (error) return (error); + vtmmio_write_config_4(sc, VIRTIO_MMIO_QUEUE_NUM, size); + vtmmio_write_config_4(sc, VIRTIO_MMIO_QUEUE_ALIGN, + VIRTIO_MMIO_VRING_ALIGN); +#if 0 + device_printf(sc->dev, "virtqueue paddr 0x%08lx\n", + (uint64_t)virtqueue_paddr(vq)); +#endif vtmmio_write_config_4(sc, VIRTIO_MMIO_QUEUE_PFN, virtqueue_paddr(vq) >> PAGE_SHIFT); diff -urN sys/dev/virtio.ori/network/if_vtnet.c sys/dev/virtio/network/if_vtnet.c --- sys/dev/virtio.ori/network/if_vtnet.c 2020-03-19 20:20:23.687857000 -0700 +++ sys/dev/virtio/network/if_vtnet.c 2020-03-19 23:17:51.681486000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2011, Bryan Venteicher * All rights reserved. * @@ -67,22 +69,28 @@ #include #include #include +#include #include #include #include #include +#include + #include #include #include #include - #include "virtio_if.h" #include "opt_inet.h" #include "opt_inet6.h" +#if defined(INET) || defined(INET6) +#include +#endif + static int vtnet_modevent(module_t, int, void *); static int vtnet_probe(device_t); @@ -94,8 +102,8 @@ static int vtnet_attach_completed(device_t); static int vtnet_config_change(device_t); -static void vtnet_negotiate_features(struct vtnet_softc *); -static void vtnet_setup_features(struct vtnet_softc *); +static int vtnet_negotiate_features(struct vtnet_softc *); +static int vtnet_setup_features(struct vtnet_softc *); static int vtnet_init_rxq(struct vtnet_softc *, int); static int vtnet_init_txq(struct vtnet_softc *, int); static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); @@ -103,8 +111,12 @@ static int vtnet_alloc_rx_filters(struct vtnet_softc *); static void vtnet_free_rx_filters(struct vtnet_softc *); static int vtnet_alloc_virtqueues(struct vtnet_softc *); +static int vtnet_alloc_interface(struct vtnet_softc *); static int vtnet_setup_interface(struct vtnet_softc *); -static int vtnet_change_mtu(struct vtnet_softc *, int); +static int vtnet_ioctl_mtu(struct vtnet_softc *, int); +static int vtnet_ioctl_ifflags(struct vtnet_softc *); +static int vtnet_ioctl_multi(struct vtnet_softc *); +static int vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *); static int vtnet_ioctl(struct ifnet *, u_long, caddr_t); static uint64_t vtnet_get_counter(struct ifnet *, ift_counter); @@ -112,11 +124,15 @@ static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); static struct mbuf * vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); -static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *, +static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *, struct mbuf *, int); static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); static int vtnet_rxq_new_buf(struct vtnet_rxq *); +static int vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *, + uint16_t, int, struct virtio_net_hdr *); +static int vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *, + uint16_t, int, struct virtio_net_hdr *); static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, struct virtio_net_hdr *); static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); @@ -128,6 +144,7 @@ static void vtnet_rx_vq_intr(void *); static void vtnet_rxq_tq_intr(void *, int); +static int vtnet_txq_intr_threshold(struct vtnet_txq *); static int vtnet_txq_below_threshold(struct vtnet_txq *); static int vtnet_txq_notify(struct vtnet_txq *); static void vtnet_txq_free_mbufs(struct vtnet_txq *); @@ -140,7 +157,7 @@ struct virtio_net_hdr *); static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, struct vtnet_tx_header *); -static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **); +static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int); #ifdef VTNET_LEGACY_TX static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *); static void vtnet_start(struct ifnet *); @@ -177,6 +194,7 @@ static int vtnet_init_tx_queues(struct vtnet_softc *); static int vtnet_init_rxtx_queues(struct vtnet_softc *); static void vtnet_set_active_vq_pairs(struct vtnet_softc *); +static void vtnet_update_rx_offloads(struct vtnet_softc *); static int vtnet_reinit(struct vtnet_softc *); static void vtnet_init_locked(struct vtnet_softc *); static void vtnet_init(void *); @@ -185,11 +203,11 @@ static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, struct sglist *, int, int); static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); +static int vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t); static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); -static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int); +static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, int); static int vtnet_set_promisc(struct vtnet_softc *, int); static int vtnet_set_allmulti(struct vtnet_softc *, int); -static void vtnet_attach_disable_promisc(struct vtnet_softc *); static void vtnet_rx_filter(struct vtnet_softc *); static void vtnet_rx_filter_mac(struct vtnet_softc *); static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); @@ -198,21 +216,23 @@ static void vtnet_register_vlan(void *, struct ifnet *, uint16_t); static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t); +static void vtnet_update_speed_duplex(struct vtnet_softc *); static int vtnet_is_link_up(struct vtnet_softc *); static void vtnet_update_link_status(struct vtnet_softc *); static int vtnet_ifmedia_upd(struct ifnet *); static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *); -static void vtnet_get_hwaddr(struct vtnet_softc *); -static void vtnet_set_hwaddr(struct vtnet_softc *); +static void vtnet_get_macaddr(struct vtnet_softc *); +static void vtnet_set_macaddr(struct vtnet_softc *); +static void vtnet_attached_set_macaddr(struct vtnet_softc *); static void vtnet_vlan_tag_remove(struct mbuf *); static void vtnet_set_rx_process_limit(struct vtnet_softc *); -static void vtnet_set_tx_intr_threshold(struct vtnet_softc *); static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_rxq *); static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, struct sysctl_oid_list *, struct vtnet_txq *); static void vtnet_setup_queue_sysctl(struct vtnet_softc *); +static void vtnet_load_tunables(struct vtnet_softc *); static void vtnet_setup_sysctl(struct vtnet_softc *); static int vtnet_rxq_enable_intr(struct vtnet_rxq *); @@ -229,57 +249,84 @@ static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); /* Tunables. */ -static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters"); +#define vtnet_htog16(_sc, _val) virtio_htog16(vtnet_modern(_sc), _val) +#define vtnet_htog32(_sc, _val) virtio_htog32(vtnet_modern(_sc), _val) +#define vtnet_htog64(_sc, _val) virtio_htog64(vtnet_modern(_sc), _val) +#define vtnet_gtoh16(_sc, _val) virtio_gtoh16(vtnet_modern(_sc), _val) +#define vtnet_gtoh32(_sc, _val) virtio_gtoh32(vtnet_modern(_sc), _val) +#define vtnet_gtoh64(_sc, _val) virtio_gtoh64(vtnet_modern(_sc), _val) + +static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VirtIO Net driver"); + static int vtnet_csum_disable = 0; -TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable); SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN, &vtnet_csum_disable, 0, "Disables receive and send checksum offload"); + +static int vtnet_fixup_needs_csum = 0; +SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN, + &vtnet_fixup_needs_csum, 0, + "Calculate valid checksum for NEEDS_CSUM packets"); + static int vtnet_tso_disable = 0; -TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); -SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable, - 0, "Disables TCP Segmentation Offload"); +SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, + &vtnet_tso_disable, 0, "Disables TSO"); + static int vtnet_lro_disable = 0; -TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); -SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable, - 0, "Disables TCP Large Receive Offload"); +SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, + &vtnet_lro_disable, 0, "Disables hardware LRO"); + static int vtnet_mq_disable = 0; -TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); -SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable, - 0, "Disables Multi Queue support"); +SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, + &vtnet_mq_disable, 0, "Disables multiqueue support"); + static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS; -TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN, - &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs"); -static int vtnet_rx_process_limit = 512; -TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); + &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs"); + +static int vtnet_tso_maxlen = IP_MAXPACKET; +SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, + &vtnet_tso_maxlen, 0, "TSO burst limit"); + +static int vtnet_rx_process_limit = 1024; SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, &vtnet_rx_process_limit, 0, - "Limits the number RX segments processed in a single pass"); + "Number of RX segments processed in one pass"); +static int vtnet_lro_entry_count = 128; +SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, + &vtnet_lro_entry_count, 0, "Software LRO entry count"); + +/* Enable sorted LRO, and the depth of the mbuf queue. */ +static int vtnet_lro_mbufq_depth = 0; +SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, + &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue"); + static uma_zone_t vtnet_tx_header_zone; static struct virtio_feature_desc vtnet_feature_desc[] = { - { VIRTIO_NET_F_CSUM, "TxChecksum" }, - { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, - { VIRTIO_NET_F_MAC, "MacAddress" }, - { VIRTIO_NET_F_GSO, "TxAllGSO" }, - { VIRTIO_NET_F_GUEST_TSO4, "RxTSOv4" }, - { VIRTIO_NET_F_GUEST_TSO6, "RxTSOv6" }, - { VIRTIO_NET_F_GUEST_ECN, "RxECN" }, - { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, - { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, - { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, - { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, - { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, - { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, - { VIRTIO_NET_F_STATUS, "Status" }, - { VIRTIO_NET_F_CTRL_VQ, "ControlVq" }, - { VIRTIO_NET_F_CTRL_RX, "RxMode" }, - { VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" }, - { VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" }, - { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, - { VIRTIO_NET_F_MQ, "Multiqueue" }, - { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" }, + { VIRTIO_NET_F_CSUM, "TxChecksum" }, + { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, + { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "CtrlRxOffloads" }, + { VIRTIO_NET_F_MAC, "MAC" }, + { VIRTIO_NET_F_GSO, "TxGSO" }, + { VIRTIO_NET_F_GUEST_TSO4, "RxLROv4" }, + { VIRTIO_NET_F_GUEST_TSO6, "RxLROv6" }, + { VIRTIO_NET_F_GUEST_ECN, "RxLROECN" }, + { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, + { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, + { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, + { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, + { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, + { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, + { VIRTIO_NET_F_STATUS, "Status" }, + { VIRTIO_NET_F_CTRL_VQ, "CtrlVq" }, + { VIRTIO_NET_F_CTRL_RX, "CtrlRxMode" }, + { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLANFilter" }, + { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRxModeExtra" }, + { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, + { VIRTIO_NET_F_MQ, "Multiqueue" }, + { VIRTIO_NET_F_CTRL_MAC_ADDR, "CtrlMacAddr" }, + { VIRTIO_NET_F_SPEED_DUPLEX, "SpeedDuplex" }, { 0, NULL } }; @@ -305,16 +352,16 @@ #endif /* DEV_NETMAP */ static driver_t vtnet_driver = { - "vtnet", - vtnet_methods, - sizeof(struct vtnet_softc) + .name = "vtnet", + .methods = vtnet_methods, + .size = sizeof(struct vtnet_softc) }; static devclass_t vtnet_devclass; DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); -DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass, - vtnet_modevent, 0); +DRIVER_MODULE(vtnet, vtpcil, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); +DRIVER_MODULE(vtnet, vtpcim, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); MODULE_VERSION(vtnet, 1); MODULE_DEPEND(vtnet, virtio, 1, 1, 1); #ifdef DEV_NETMAP @@ -361,7 +408,7 @@ if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK) return (ENXIO); - device_set_desc(dev, "VirtIO Networking Adapter"); + device_set_desc(dev, "VirtIO Network Adapter"); return (BUS_PROBE_DEFAULT); } @@ -380,10 +427,22 @@ VTNET_CORE_LOCK_INIT(sc); callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); + vtnet_load_tunables(sc); + error = vtnet_alloc_interface(sc); + if (error) { + device_printf(dev, "cannot allocate interface\n"); + goto fail; + } + vtnet_setup_sysctl(sc); - vtnet_setup_features(sc); + error = vtnet_setup_features(sc); + if (error) { + device_printf(dev, "cannot setup features\n"); + goto fail; + } + error = vtnet_alloc_rx_filters(sc); if (error) { device_printf(dev, "cannot allocate Rx filters\n"); @@ -410,7 +469,7 @@ error = virtio_setup_intr(dev, INTR_TYPE_NET); if (error) { - device_printf(dev, "cannot setup virtqueue interrupts\n"); + device_printf(dev, "cannot setup interrupts\n"); /* BMV: This will crash if during boot! */ ether_ifdetach(sc->vtnet_ifp); goto fail; @@ -518,7 +577,6 @@ static int vtnet_shutdown(device_t dev) { - /* * Suspend already does all of what we need to * do here; we just never expect to be resumed. @@ -529,9 +587,14 @@ static int vtnet_attach_completed(device_t dev) { + struct vtnet_softc *sc; - vtnet_attach_disable_promisc(device_get_softc(dev)); + sc = device_get_softc(dev); + VTNET_CORE_LOCK(sc); + vtnet_attached_set_macaddr(sc); + VTNET_CORE_UNLOCK(sc); + return (0); } @@ -551,37 +614,67 @@ return (0); } -static void +static int vtnet_negotiate_features(struct vtnet_softc *sc) { device_t dev; - uint64_t mask, features; + uint64_t features, negotiated_features; + int no_csum; dev = sc->vtnet_dev; - mask = 0; + features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES : + VTNET_LEGACY_FEATURES; /* * TSO and LRO are only available when their corresponding checksum * offload feature is also negotiated. */ - if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) { - mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM; - mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES; - } - if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) - mask |= VTNET_TSO_FEATURES; - if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) - mask |= VTNET_LRO_FEATURES; + no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable); + if (no_csum) + features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM); + if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) + features &= ~VTNET_TSO_FEATURES; + if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) + features &= ~VTNET_LRO_FEATURES; + #ifndef VTNET_LEGACY_TX if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) - mask |= VIRTIO_NET_F_MQ; + features &= ~VIRTIO_NET_F_MQ; #else - mask |= VIRTIO_NET_F_MQ; + features &= ~VIRTIO_NET_F_MQ; #endif - features = VTNET_FEATURES & ~mask; - sc->vtnet_features = virtio_negotiate_features(dev, features); + negotiated_features = virtio_negotiate_features(dev, features); + if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) { + uint16_t mtu; + + mtu = virtio_read_dev_config_2(dev, + offsetof(struct virtio_net_config, mtu)); + if (mtu < VTNET_MIN_MTU /* || mtu > VTNET_MAX_MTU */) { + device_printf(dev, "Invalid MTU value: %d. " + "MTU feature disabled.\n", mtu); + features &= ~VIRTIO_NET_F_MTU; + negotiated_features = + virtio_negotiate_features(dev, features); + } + } + + if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) { + uint16_t npairs; + + npairs = virtio_read_dev_config_2(dev, + offsetof(struct virtio_net_config, max_virtqueue_pairs)); + if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || + npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) { + device_printf(dev, "Invalid max_virtqueue_pairs value: " + "%d. Multiqueue feature disabled.\n", npairs); + features &= ~VIRTIO_NET_F_MQ; + negotiated_features = + virtio_negotiate_features(dev, features); + } + } + if (virtio_with_feature(dev, VTNET_LRO_FEATURES) && virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) { /* @@ -595,26 +688,35 @@ */ if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { device_printf(dev, - "LRO disabled due to both mergeable buffers and " - "indirect descriptors not negotiated\n"); - + "Host LRO disabled since both mergeable buffers " + "and indirect descriptors were not negotiated\n"); features &= ~VTNET_LRO_FEATURES; - sc->vtnet_features = + negotiated_features = virtio_negotiate_features(dev, features); } else sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; } + + sc->vtnet_features = negotiated_features; + sc->vtnet_negotiated_features = negotiated_features; + + return (virtio_finalize_features(dev)); } -static void +static int vtnet_setup_features(struct vtnet_softc *sc) { device_t dev; + int error; dev = sc->vtnet_dev; - vtnet_negotiate_features(sc); + error = vtnet_negotiate_features(sc); + if (error) + return (error); + if (virtio_with_feature(dev, VIRTIO_F_VERSION_1)) + sc->vtnet_flags |= VTNET_FLAG_MODERN; if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) sc->vtnet_flags |= VTNET_FLAG_INDIRECT; if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) @@ -625,26 +727,46 @@ sc->vtnet_flags |= VTNET_FLAG_MAC; } + if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) { + sc->vtnet_max_mtu = virtio_read_dev_config_2(dev, + offsetof(struct virtio_net_config, mtu)); + } else + sc->vtnet_max_mtu = VTNET_MAX_MTU; + if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); + } else if (vtnet_modern(sc)) { + /* This is identical to the mergeable header. */ + sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1); } else sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); - if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) - sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS; + if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) + sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE; else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) - sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS; + sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG; else - sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS; + sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE; + /* + * Favor "hardware" LRO if negotiated, but support software LRO as + * a fallback; there is usually little benefit (or worse) with both. + */ + if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 && + virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0) + sc->vtnet_flags |= VTNET_FLAG_SW_LRO; + if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) - sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS; + sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX; else - sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS; + sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN; + sc->vtnet_req_vq_pairs = 1; + sc->vtnet_max_vq_pairs = 1; + if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; @@ -654,35 +776,37 @@ sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; + + if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) { + sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev, + offsetof(struct virtio_net_config, + max_virtqueue_pairs)); + } } - if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && - sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { - sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev, - offsetof(struct virtio_net_config, max_virtqueue_pairs)); - } else - sc->vtnet_max_vq_pairs = 1; - if (sc->vtnet_max_vq_pairs > 1) { + int req; + /* - * Limit the maximum number of queue pairs to the lower of - * the number of CPUs and the configured maximum. - * The actual number of queues that get used may be less. + * Limit the maximum number of requested queue pairs to the + * number of CPUs and the configured maximum. */ - int max; - - max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); - if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) { - if (max > mp_ncpus) - max = mp_ncpus; - if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) - max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX; - if (max > 1) { - sc->vtnet_requested_vq_pairs = max; - sc->vtnet_flags |= VTNET_FLAG_MULTIQ; - } + req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); + if (req < 0) + req = 1; + if (req == 0) + req = mp_ncpus; + if (req > sc->vtnet_max_vq_pairs) + req = sc->vtnet_max_vq_pairs; + if (req > mp_ncpus) + req = mp_ncpus; + if (req > 1) { + sc->vtnet_req_vq_pairs = req; + sc->vtnet_flags |= VTNET_FLAG_MQ; } } + + return (0); } static int @@ -703,6 +827,14 @@ if (rxq->vtnrx_sg == NULL) return (ENOMEM); +#if defined(INET) || defined(INET6) + if (vtnet_software_lro(sc)) { + if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp, + sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0) + return (ENOMEM); + } +#endif + TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, taskqueue_thread_enqueue, &rxq->vtnrx_tq); @@ -768,6 +900,7 @@ return (error); } + vtnet_set_rx_process_limit(sc); vtnet_setup_queue_sysctl(sc); return (0); @@ -780,6 +913,10 @@ rxq->vtnrx_sc = NULL; rxq->vtnrx_id = -1; +#if defined(INET) || defined(INET6) + tcp_lro_free(&rxq->vtnrx_lro); +#endif + if (rxq->vtnrx_sg != NULL) { sglist_free(rxq->vtnrx_sg); rxq->vtnrx_sg = NULL; @@ -888,28 +1025,39 @@ if (info == NULL) return (ENOMEM); - for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) { + for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) { rxq = &sc->vtnet_rxqs[i]; VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, - "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id); + "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id); txq = &sc->vtnet_txqs[i]; VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, vtnet_tx_vq_intr, txq, &txq->vtntx_vq, - "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id); + "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id); } + /* These queues will not be used so allocate the minimum resources. */ + for (/**/; i < sc->vtnet_max_vq_pairs; i++, idx += 2) { + rxq = &sc->vtnet_rxqs[i]; + VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq, + "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id); + + txq = &sc->vtnet_txqs[i]; + VQ_ALLOC_INFO_INIT(&info[idx+1], 0, NULL, txq, &txq->vtntx_vq, + "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id); + } + if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); } /* - * Enable interrupt binding if this is multiqueue. This only matters - * when per-vq MSIX is available. + * TODO: Enable interrupt binding if this is multiqueue. This will + * only matter when per-virtqueue MSIX is available. */ - if (sc->vtnet_flags & VTNET_FLAG_MULTIQ) + if (sc->vtnet_flags & VTNET_FLAG_MQ) flags |= 0; error = virtio_alloc_virtqueues(dev, flags, nvqs, info); @@ -919,23 +1067,35 @@ } static int -vtnet_setup_interface(struct vtnet_softc *sc) +vtnet_alloc_interface(struct vtnet_softc *sc) { device_t dev; struct ifnet *ifp; dev = sc->vtnet_dev; - ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) { - device_printf(dev, "cannot allocate ifnet structure\n"); - return (ENOSPC); - } + ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) + return (ENOMEM); - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_baudrate = IF_Gbps(10); /* Approx. */ + sc->vtnet_ifp = ifp; ifp->if_softc = sc; + if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + + return (0); +} + +static int +vtnet_setup_interface(struct vtnet_softc *sc) +{ + device_t dev; + struct ifnet *ifp; + + dev = sc->vtnet_dev; + ifp = sc->vtnet_ifp; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_baudrate = IF_Gbps(10); ifp->if_init = vtnet_init; ifp->if_ioctl = vtnet_ioctl; ifp->if_get_counter = vtnet_get_counter; @@ -950,51 +1110,58 @@ IFQ_SET_READY(&ifp->if_snd); #endif - ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, - vtnet_ifmedia_sts); - ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL); - ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE); + vtnet_get_macaddr(sc); - /* Read (or generate) the MAC address for the adapter. */ - vtnet_get_hwaddr(sc); - - ether_ifattach(ifp, sc->vtnet_hwaddr); - if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) ifp->if_capabilities |= IFCAP_LINKSTATE; - /* Tell the upper layer(s) we support long frames. */ - ifp->if_hdrlen = sizeof(struct ether_vlan_header); - ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU; + ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts); + ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL); + ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO); if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { + int gso; + ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6; - if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) { - ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; + gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO); + if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) + ifp->if_capabilities |= IFCAP_TSO4; + if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) + ifp->if_capabilities |= IFCAP_TSO6; + if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; - } else { - if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) - ifp->if_capabilities |= IFCAP_TSO4; - if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) - ifp->if_capabilities |= IFCAP_TSO6; - if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) - sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; - } - if (ifp->if_capabilities & IFCAP_TSO) + if (ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) { + int tso_maxlen; + ifp->if_capabilities |= IFCAP_VLAN_HWTSO; + + tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen", + vtnet_tso_maxlen); + ifp->if_hw_tsomax = tso_maxlen - + (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); + ifp->if_hw_tsomaxsegcount = sc->vtnet_tx_nsegs - 1; + ifp->if_hw_tsomaxsegsize = PAGE_SIZE; + } } if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { - ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; + ifp->if_capabilities |= IFCAP_RXCSUM; +#ifdef notyet + /* BMV: Rx checksums not distinguished between IPv4 and IPv6. */ + ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; +#endif - if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || - virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) - ifp->if_capabilities |= IFCAP_LRO; + if (vtnet_tunable_int(sc, "fixup_needs_csum", + vtnet_fixup_needs_csum) != 0) + sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM; + + /* Support either "hardware" or software LRO. */ + ifp->if_capabilities |= IFCAP_LRO; } - if (ifp->if_capabilities & IFCAP_HWCSUM) { + if (ifp->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) { /* * VirtIO does not support VLAN tagging, but we can fake * it by inserting and removing the 802.1Q header during @@ -1005,11 +1172,14 @@ IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; } - ifp->if_capenable = ifp->if_capabilities; + if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO) + ifp->if_capabilities |= IFCAP_JUMBO_MTU; + ifp->if_capabilities |= IFCAP_VLAN_MTU; /* * Capabilities after here are not enabled by default. */ + ifp->if_capenable = ifp->if_capabilities; if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; @@ -1020,57 +1190,214 @@ vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); } - vtnet_set_rx_process_limit(sc); - vtnet_set_tx_intr_threshold(sc); + ether_ifattach(ifp, sc->vtnet_hwaddr); + /* Tell the upper layer(s) we support long frames. */ + ifp->if_hdrlen = sizeof(struct ether_vlan_header); + return (0); } static int -vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu) +vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu) { + int framesz; + + if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) + return (MJUMPAGESIZE); + else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) + return (MCLBYTES); + + /* + * Try to scale the receive mbuf cluster size from the MTU. Without + * the GUEST_TSO[46] features, the VirtIO specification says the + * driver must only be able to receive ~1500 byte frames. But if + * jumbo frames can be transmitted then try to receive jumbo. + * + * BMV: Not quite true when F_MTU is negotiated! + */ + if (vtnet_modern(sc)) { + MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1)); + framesz = sizeof(struct virtio_net_hdr_v1); + } else + framesz = sizeof(struct vtnet_rx_header); + framesz += sizeof(struct ether_vlan_header) + mtu; + + if (framesz <= MCLBYTES) + return (MCLBYTES); + else if (framesz <= MJUMPAGESIZE) + return (MJUMPAGESIZE); + else if (framesz <= MJUM9BYTES) + return (MJUM9BYTES); + + /* Sane default; avoid 16KB clusters. */ + return (MCLBYTES); +} + +static int +vtnet_ioctl_mtu(struct vtnet_softc *sc, int mtu) +{ struct ifnet *ifp; - int frame_size, clsize; + int clustersz; ifp = sc->vtnet_ifp; + VTNET_CORE_LOCK_ASSERT(sc); - if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU) + if (ifp->if_mtu == mtu) + return (0); + else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu) return (EINVAL); - frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) + - new_mtu; + ifp->if_mtu = mtu; + clustersz = vtnet_rx_cluster_size(sc, mtu); - /* - * Based on the new MTU (and hence frame size) determine which - * cluster size is most appropriate for the receive queues. - */ - if (frame_size <= MCLBYTES) { - clsize = MCLBYTES; - } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { - /* Avoid going past 9K jumbos. */ - if (frame_size > MJUM9BYTES) - return (EINVAL); - clsize = MJUM9BYTES; - } else - clsize = MJUMPAGESIZE; + if (clustersz != sc->vtnet_rx_clustersz && + ifp->if_drv_flags & IFF_DRV_RUNNING) { + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + vtnet_init_locked(sc); + } - ifp->if_mtu = new_mtu; - sc->vtnet_rx_new_clsize = clsize; + return (0); +} - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; +static int +vtnet_ioctl_ifflags(struct vtnet_softc *sc) +{ + struct ifnet *ifp; + int drv_running; + + ifp = sc->vtnet_ifp; + drv_running = (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0; + + VTNET_CORE_LOCK_ASSERT(sc); + + if ((ifp->if_flags & IFF_UP) == 0) { + if (drv_running) + vtnet_stop(sc); + goto out; + } + + if (!drv_running) { vtnet_init_locked(sc); + goto out; } + if ((ifp->if_flags ^ sc->vtnet_if_flags) & + (IFF_PROMISC | IFF_ALLMULTI)) { + if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) + return (ENOTSUP); + vtnet_rx_filter(sc); + } + +out: + sc->vtnet_if_flags = ifp->if_flags; return (0); } static int +vtnet_ioctl_multi(struct vtnet_softc *sc) +{ + struct ifnet *ifp; + + ifp = sc->vtnet_ifp; + + VTNET_CORE_LOCK_ASSERT(sc); + + if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX && + ifp->if_drv_flags & IFF_DRV_RUNNING) + vtnet_rx_filter_mac(sc); + + return (0); +} + +static int +vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr) +{ + struct ifnet *ifp; + int mask, reinit, update; + + ifp = sc->vtnet_ifp; + mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ ifp->if_capenable; + reinit = update = 0; + + VTNET_CORE_LOCK_ASSERT(sc); + + if (mask & IFCAP_TXCSUM) + ifp->if_capenable ^= IFCAP_TXCSUM; + if (mask & IFCAP_TXCSUM_IPV6) + ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; + if (mask & IFCAP_TSO4) + ifp->if_capenable ^= IFCAP_TSO4; + if (mask & IFCAP_TSO6) + ifp->if_capenable ^= IFCAP_TSO6; + + if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) { + /* + * These Rx features require the negotiated features to + * be updated. Avoid a full reinit if possible. + */ + if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) + update = 1; + else + reinit = 1; + + /* BMV: Avoid needless renegotiation for just software LRO. */ + if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) == + IFCAP_LRO && vtnet_software_lro(sc)) + reinit = update = 0; + + if (mask & IFCAP_RXCSUM) + ifp->if_capenable ^= IFCAP_RXCSUM; + if (mask & IFCAP_RXCSUM_IPV6) + ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; + if (mask & IFCAP_LRO) + ifp->if_capenable ^= IFCAP_LRO; + + /* + * VirtIO does not distinguish between IPv4 and IPv6 checksums + * so treat them as a pair. Guest TSO (LRO) requires receive + * checksums. + */ + if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { + ifp->if_capenable |= IFCAP_RXCSUM; +#ifdef notyet + ifp->if_capenable |= IFCAP_RXCSUM_IPV6; +#endif + } else + ifp->if_capenable &= + ~(IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO); + } + + if (mask & IFCAP_VLAN_HWFILTER) { + /* These Rx features require renegotiation. */ + reinit = 1; + + if (mask & IFCAP_VLAN_HWFILTER) + ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; + } + + if (mask & IFCAP_VLAN_HWTSO) + ifp->if_capenable ^= IFCAP_VLAN_HWTSO; + if (mask & IFCAP_VLAN_HWTAGGING) + ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + if (reinit) { + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + vtnet_init_locked(sc); + } else if (update) + vtnet_update_rx_offloads(sc); + } + + return (0); +} + +static int vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct vtnet_softc *sc; struct ifreq *ifr; - int reinit, mask, error; + int error; sc = ifp->if_softc; ifr = (struct ifreq *) data; @@ -1078,45 +1405,21 @@ switch (cmd) { case SIOCSIFMTU: - if (ifp->if_mtu != ifr->ifr_mtu) { - VTNET_CORE_LOCK(sc); - error = vtnet_change_mtu(sc, ifr->ifr_mtu); - VTNET_CORE_UNLOCK(sc); - } + VTNET_CORE_LOCK(sc); + error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu); + VTNET_CORE_UNLOCK(sc); break; case SIOCSIFFLAGS: VTNET_CORE_LOCK(sc); - if ((ifp->if_flags & IFF_UP) == 0) { - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - vtnet_stop(sc); - } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - if ((ifp->if_flags ^ sc->vtnet_if_flags) & - (IFF_PROMISC | IFF_ALLMULTI)) { - if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) - vtnet_rx_filter(sc); - else { - ifp->if_flags |= IFF_PROMISC; - if ((ifp->if_flags ^ sc->vtnet_if_flags) - & IFF_ALLMULTI) - error = ENOTSUP; - } - } - } else - vtnet_init_locked(sc); - - if (error == 0) - sc->vtnet_if_flags = ifp->if_flags; + error = vtnet_ioctl_ifflags(sc); VTNET_CORE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: - if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) - break; VTNET_CORE_LOCK(sc); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) - vtnet_rx_filter_mac(sc); + error = vtnet_ioctl_multi(sc); VTNET_CORE_UNLOCK(sc); break; @@ -1127,46 +1430,9 @@ case SIOCSIFCAP: VTNET_CORE_LOCK(sc); - mask = ifr->ifr_reqcap ^ ifp->if_capenable; - - if (mask & IFCAP_TXCSUM) - ifp->if_capenable ^= IFCAP_TXCSUM; - if (mask & IFCAP_TXCSUM_IPV6) - ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; - if (mask & IFCAP_TSO4) - ifp->if_capenable ^= IFCAP_TSO4; - if (mask & IFCAP_TSO6) - ifp->if_capenable ^= IFCAP_TSO6; - - if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | - IFCAP_VLAN_HWFILTER)) { - /* These Rx features require us to renegotiate. */ - reinit = 1; - - if (mask & IFCAP_RXCSUM) - ifp->if_capenable ^= IFCAP_RXCSUM; - if (mask & IFCAP_RXCSUM_IPV6) - ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; - if (mask & IFCAP_LRO) - ifp->if_capenable ^= IFCAP_LRO; - if (mask & IFCAP_VLAN_HWFILTER) - ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; - } else - reinit = 0; - - if (mask & IFCAP_VLAN_HWTSO) - ifp->if_capenable ^= IFCAP_VLAN_HWTSO; - if (mask & IFCAP_VLAN_HWTAGGING) - ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; - - if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - vtnet_init_locked(sc); - } - + error = vtnet_ioctl_ifcap(sc, ifr); VTNET_CORE_UNLOCK(sc); VLAN_CAPABILITIES(ifp); - break; default: @@ -1185,12 +1451,6 @@ struct virtqueue *vq; int nbufs, error; -#ifdef DEV_NETMAP - error = vtnet_netmap_rxq_populate(rxq); - if (error >= 0) - return (error); -#endif /* DEV_NETMAP */ - vq = rxq->vtnrx_vq; error = ENOSPC; @@ -1220,20 +1480,12 @@ struct virtqueue *vq; struct mbuf *m; int last; -#ifdef DEV_NETMAP - int netmap_bufs = vtnet_netmap_queue_on(rxq->vtnrx_sc, NR_RX, - rxq->vtnrx_id); -#else /* !DEV_NETMAP */ - int netmap_bufs = 0; -#endif /* !DEV_NETMAP */ vq = rxq->vtnrx_vq; last = 0; - while ((m = virtqueue_drain(vq, &last)) != NULL) { - if (!netmap_bufs) - m_freem(m); - } + while ((m = virtqueue_drain(vq, &last)) != NULL) + m_freem(m); KASSERT(virtqueue_empty(vq), ("%s: mbufs remaining in rx queue %p", __func__, rxq)); @@ -1243,57 +1495,49 @@ vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) { struct mbuf *m_head, *m_tail, *m; - int i, clsize; + int i, size; - clsize = sc->vtnet_rx_clsize; + m_head = NULL; + size = sc->vtnet_rx_clustersz; KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, - ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs)); + ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs)); - m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize); - if (m_head == NULL) - goto fail; + for (i = 0; i < nbufs; i++) { + m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size); + if (m == NULL) { + sc->vtnet_stats.mbuf_alloc_failed++; + m_freem(m_head); + return (NULL); + } - m_head->m_len = clsize; - m_tail = m_head; - - /* Allocate the rest of the chain. */ - for (i = 1; i < nbufs; i++) { - m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize); - if (m == NULL) - goto fail; - - m->m_len = clsize; - m_tail->m_next = m; - m_tail = m; + m->m_len = size; + if (m_head != NULL) { + m_tail->m_next = m; + m_tail = m; + } else + m_head = m_tail = m; } if (m_tailp != NULL) *m_tailp = m_tail; return (m_head); - -fail: - sc->vtnet_stats.mbuf_alloc_failed++; - m_freem(m_head); - - return (NULL); } /* * Slow path for when LRO without mergeable buffers is negotiated. */ static int -vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0, +vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0, int len0) { struct vtnet_softc *sc; - struct mbuf *m, *m_prev; - struct mbuf *m_new, *m_tail; - int len, clsize, nreplace, error; + struct mbuf *m, *m_prev, *m_new, *m_tail; + int len, clustersz, nreplace, error; sc = rxq->vtnrx_sc; - clsize = sc->vtnet_rx_clsize; + clustersz = sc->vtnet_rx_clustersz; m_prev = NULL; m_tail = NULL; @@ -1303,25 +1547,23 @@ len = len0; /* - * Since these mbuf chains are so large, we avoid allocating an - * entire replacement chain if possible. When the received frame - * did not consume the entire chain, the unused mbufs are moved - * to the replacement chain. + * Since these mbuf chains are so large, avoid allocating a complete + * replacement when the received frame did not consume the entire + * chain. Unused mbufs are moved to the tail of the replacement mbuf. */ while (len > 0) { - /* - * Something is seriously wrong if we received a frame - * larger than the chain. Drop it. - */ if (m == NULL) { sc->vtnet_stats.rx_frame_too_large++; return (EMSGSIZE); } - /* We always allocate the same cluster size. */ - KASSERT(m->m_len == clsize, - ("%s: mbuf size %d is not the cluster size %d", - __func__, m->m_len, clsize)); + /* + * Every mbuf should have the expected cluster size sincethat + * is also used to allocate the replacements. + */ + KASSERT(m->m_len == clustersz, + ("%s: mbuf size %d not expected cluster size %d", __func__, + m->m_len, clustersz)); m->m_len = MIN(m->m_len, len); len -= m->m_len; @@ -1331,19 +1573,19 @@ nreplace++; } - KASSERT(nreplace <= sc->vtnet_rx_nmbufs, - ("%s: too many replacement mbufs %d max %d", __func__, nreplace, - sc->vtnet_rx_nmbufs)); + KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs, + ("%s: invalid replacement mbuf count %d max %d", __func__, + nreplace, sc->vtnet_rx_nmbufs)); m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); if (m_new == NULL) { - m_prev->m_len = clsize; + m_prev->m_len = clustersz; return (ENOBUFS); } /* - * Move any unused mbufs from the received chain onto the end - * of the new chain. + * Move any unused mbufs from the received mbuf chain onto the + * end of the replacement chain. */ if (m_prev->m_next != NULL) { m_tail->m_next = m_prev->m_next; @@ -1353,21 +1595,18 @@ error = vtnet_rxq_enqueue_buf(rxq, m_new); if (error) { /* - * BAD! We could not enqueue the replacement mbuf chain. We - * must restore the m0 chain to the original state if it was - * modified so we can subsequently discard it. + * The replacement is suppose to be an copy of the one + * dequeued so this is a very unexpected error. * - * NOTE: The replacement is suppose to be an identical copy - * to the one just dequeued so this is an unexpected error. + * Restore the m0 chain to the original state if it was + * modified so we can then discard it. */ - sc->vtnet_stats.rx_enq_replacement_failed++; - if (m_tail->m_next != NULL) { m_prev->m_next = m_tail->m_next; m_tail->m_next = NULL; } - - m_prev->m_len = clsize; + m_prev->m_len = clustersz; + sc->vtnet_stats.rx_enq_replacement_failed++; m_freem(m_new); } @@ -1383,31 +1622,23 @@ sc = rxq->vtnrx_sc; - KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, - ("%s: chained mbuf without LRO_NOMRG", __func__)); + if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) + return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len)); - if (m->m_next == NULL) { - /* Fast-path for the common case of just one mbuf. */ - if (m->m_len < len) - return (EINVAL); + MPASS(m->m_next == NULL); + if (m->m_len < len) + return (EMSGSIZE); - m_new = vtnet_rx_alloc_buf(sc, 1, NULL); - if (m_new == NULL) - return (ENOBUFS); + m_new = vtnet_rx_alloc_buf(sc, 1, NULL); + if (m_new == NULL) + return (ENOBUFS); - error = vtnet_rxq_enqueue_buf(rxq, m_new); - if (error) { - /* - * The new mbuf is suppose to be an identical - * copy of the one just dequeued so this is an - * unexpected error. - */ - m_freem(m_new); - sc->vtnet_stats.rx_enq_replacement_failed++; - } else - m->m_len = len; + error = vtnet_rxq_enqueue_buf(rxq, m_new); + if (error) { + sc->vtnet_stats.rx_enq_replacement_failed++; + m_freem(m_new); } else - error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len); + m->m_len = len; return (error); } @@ -1417,39 +1648,43 @@ { struct vtnet_softc *sc; struct sglist *sg; - struct vtnet_rx_header *rxhdr; - uint8_t *mdata; - int offset, error; + int header_inlined, error; sc = rxq->vtnrx_sc; sg = rxq->vtnrx_sg; - mdata = mtod(m, uint8_t *); + KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, + ("%s: mbuf chain without LRO_NOMRG", __func__)); VTNET_RXQ_LOCK_ASSERT(rxq); - KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, - ("%s: chained mbuf without LRO_NOMRG", __func__)); - KASSERT(m->m_len == sc->vtnet_rx_clsize, - ("%s: unexpected cluster size %d/%d", __func__, m->m_len, - sc->vtnet_rx_clsize)); sglist_reset(sg); - if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { + header_inlined = vtnet_modern(sc) || + (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */ + + if (header_inlined) + error = sglist_append_mbuf(sg, m); + else { + struct vtnet_rx_header *rxhdr = + mtod(m, struct vtnet_rx_header *); MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); - rxhdr = (struct vtnet_rx_header *) mdata; - sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); - offset = sizeof(struct vtnet_rx_header); - } else - offset = 0; - sglist_append(sg, mdata + offset, m->m_len - offset); - if (m->m_next != NULL) { - error = sglist_append_mbuf(sg, m->m_next); - MPASS(error == 0); + /* Append the header and remaining mbuf data. */ + error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); + if (error) + return (error); + error = sglist_append(sg, &rxhdr[1], + m->m_len - sizeof(struct vtnet_rx_header)); + if (error) + return (error); + + if (m->m_next != NULL) + error = sglist_append_mbuf(sg, m->m_next); } - error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg); + if (error) + return (error); - return (error); + return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg)); } static int @@ -1472,54 +1707,73 @@ return (error); } -/* - * Use the checksum offset in the VirtIO header to set the - * correct CSUM_* flags. - */ static int -vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m, - uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) +vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype, + int hoff, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; -#if defined(INET) || defined(INET6) - int offset = hdr->csum_start + hdr->csum_offset; -#endif + int error; sc = rxq->vtnrx_sc; - /* Only do a basic sanity check on the offset. */ - switch (eth_type) { -#if defined(INET) - case ETHERTYPE_IP: - if (__predict_false(offset < ip_start + sizeof(struct ip))) - return (1); - break; -#endif -#if defined(INET6) - case ETHERTYPE_IPV6: - if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) - return (1); - break; -#endif - default: - sc->vtnet_stats.rx_csum_bad_ethtype++; - return (1); + /* + * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does + * not have an analogous CSUM flag. The checksum has been validated, + * but is incomplete (TCP/UDP pseudo header). + * + * The packet is likely from another VM on the same host that itself + * performed checksum offloading so Tx/Rx is basically a memcpy and + * the checksum has little value. + * + * Default to receiving the packet as-is for performance reasons, but + * this can cause issues if the packet is to be forwarded because it + * does not contain a valid checksum. This patch may be helpful: + * https://reviews.freebsd.org/D6611. In the meantime, have the driver + * compute the checksum if requested. + * + * BMV: Need to add an CSUM_PARTIAL flag? + */ + if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) { + error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr); + return (error); } /* - * Use the offset to determine the appropriate CSUM_* flags. This is - * a bit dirty, but we can get by with it since the checksum offsets - * happen to be different. We assume the host host does not do IPv4 - * header checksum offloading. + * Compute the checksum in the driver so the packet will contain a + * valid checksum. The checksum is at csum_offset from csum_start. */ - switch (hdr->csum_offset) { - case offsetof(struct udphdr, uh_sum): - case offsetof(struct tcphdr, th_sum): + switch (etype) { +#if defined(INET) || defined(INET6) + case ETHERTYPE_IP: + case ETHERTYPE_IPV6: { + int csum_off, csum_end; + uint16_t csum; + + csum_off = hdr->csum_start + hdr->csum_offset; + csum_end = csum_off + sizeof(uint16_t); + + /* Assume checksum will be in the first mbuf. */ + if (m->m_len < csum_end || m->m_pkthdr.len < csum_end) + return (1); + + /* + * Like in_delayed_cksum()/in6_delayed_cksum(), compute the + * checksum and write it at the specified offset. We could + * try to verify the packet: csum_start should probably + * correspond to the start of the TCP/UDP header. + * + * BMV: Need to properly handle UDP with zero checksum. Is + * the IPv4 header checksum implicitly validated? + */ + csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start); + *(uint16_t *)(mtodo(m, csum_off)) = csum; m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; + } +#endif default: - sc->vtnet_stats.rx_csum_bad_offset++; + sc->vtnet_stats.rx_csum_bad_ethtype++; return (1); } @@ -1527,64 +1781,55 @@ } static int -vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m, - uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) +vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, + uint16_t etype, int hoff, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; - int offset, proto; + int protocol; sc = rxq->vtnrx_sc; - switch (eth_type) { + switch (etype) { #if defined(INET) - case ETHERTYPE_IP: { - struct ip *ip; - if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) - return (1); - ip = (struct ip *)(m->m_data + ip_start); - proto = ip->ip_p; - offset = ip_start + (ip->ip_hl << 2); + case ETHERTYPE_IP: + if (__predict_false(m->m_len < hoff + sizeof(struct ip))) + protocol = IPPROTO_DONE; + else { + struct ip *ip = (struct ip *)(m->m_data + hoff); + protocol = ip->ip_p; + } break; - } #endif #if defined(INET6) case ETHERTYPE_IPV6: - if (__predict_false(m->m_len < ip_start + - sizeof(struct ip6_hdr))) - return (1); - offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); - if (__predict_false(offset < 0)) - return (1); + if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr)) + || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) + protocol = IPPROTO_DONE; break; #endif default: - sc->vtnet_stats.rx_csum_bad_ethtype++; - return (1); + protocol = IPPROTO_DONE; + break; } - switch (proto) { + switch (protocol) { case IPPROTO_TCP: - if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) - return (1); - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xFFFF; - break; case IPPROTO_UDP: - if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) - return (1); m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; default: /* - * For the remaining protocols, FreeBSD does not support - * checksum offloading, so the checksum will be recomputed. + * FreeBSD does not support checksum offloading of this + * protocol. Let the stack re-verify the checksum later + * if the protocol is supported. */ #if 0 - if_printf(sc->vtnet_ifp, "cksum offload of unsupported " - "protocol eth_type=%#x proto=%d csum_start=%d " - "csum_offset=%d\n", __func__, eth_type, proto, - hdr->csum_start, hdr->csum_offset); + if_printf(sc->vtnet_ifp, + "%s: checksum offload of unsupported protocol " + "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n", + __func__, etype, protocol, hdr->csum_start, + hdr->csum_offset); #endif break; } @@ -1592,41 +1837,29 @@ return (0); } -/* - * Set the appropriate CSUM_* flags. Unfortunately, the information - * provided is not directly useful to us. The VirtIO header gives the - * offset of the checksum, which is all Linux needs, but this is not - * how FreeBSD does things. We are forced to peek inside the packet - * a bit. - * - * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD - * could accept the offsets and let the stack figure it out. - */ static int vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) { - struct ether_header *eh; - struct ether_vlan_header *evh; - uint16_t eth_type; - int offset, error; + const struct ether_header *eh; + int hoff; + uint16_t etype; - eh = mtod(m, struct ether_header *); - eth_type = ntohs(eh->ether_type); - if (eth_type == ETHERTYPE_VLAN) { - /* BMV: We should handle nested VLAN tags too. */ - evh = mtod(m, struct ether_vlan_header *); - eth_type = ntohs(evh->evl_proto); - offset = sizeof(struct ether_vlan_header); + eh = mtod(m, const struct ether_header *); + etype = ntohs(eh->ether_type); + if (etype == ETHERTYPE_VLAN) { + /* TODO BMV: Handle QinQ. */ + const struct ether_vlan_header *evh = + mtod(m, const struct ether_vlan_header *); + etype = ntohs(evh->evl_proto); + hoff = sizeof(struct ether_vlan_header); } else - offset = sizeof(struct ether_header); + hoff = sizeof(struct ether_header); if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) - error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr); - else - error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr); - - return (error); + return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr)); + else /* VIRTIO_NET_HDR_F_DATA_VALID */ + return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr)); } static void @@ -1661,14 +1894,16 @@ { struct vtnet_softc *sc; struct virtqueue *vq; - struct mbuf *m, *m_tail; - int len; + struct mbuf *m_tail; sc = rxq->vtnrx_sc; vq = rxq->vtnrx_vq; m_tail = m_head; while (--nbufs > 0) { + struct mbuf *m; + int len; + m = virtqueue_dequeue(vq, &len); if (m == NULL) { rxq->vtnrx_stats.vrxs_ierrors++; @@ -1703,19 +1938,35 @@ return (1); } +#if defined(INET) || defined(INET6) +static int +vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m) +{ + struct lro_ctrl *lro; + + lro = &rxq->vtnrx_lro; + + if (lro->lro_mbuf_max != 0) { + tcp_lro_queue_mbuf(lro, m); + return (0); + } + + return (tcp_lro_rx(lro, m, 0)); +} +#endif + static void vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct vtnet_softc *sc; struct ifnet *ifp; - struct ether_header *eh; sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { - eh = mtod(m, struct ether_header *); + struct ether_header *eh = mtod(m, struct ether_header *); if (eh->ether_type == htons(ETHERTYPE_VLAN)) { vtnet_vlan_tag_remove(m); /* @@ -1730,25 +1981,37 @@ m->m_pkthdr.flowid = rxq->vtnrx_id; M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); - /* - * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum - * distinction that Linux does. Need to reevaluate if performing - * offloading for the NEEDS_CSUM case is really appropriate. - */ - if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | - VIRTIO_NET_HDR_F_DATA_VALID)) { + if (hdr->flags & + (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) { if (vtnet_rxq_csum(rxq, m, hdr) == 0) rxq->vtnrx_stats.vrxs_csum++; else rxq->vtnrx_stats.vrxs_csum_failed++; } + if (hdr->gso_size != 0) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: +// not available in 11.x mbuf +// m->m_pkthdr.lro_nsegs = +// howmany(m->m_pkthdr.len, hdr->gso_size); + rxq->vtnrx_stats.vrxs_host_lro++; + break; + } + } + rxq->vtnrx_stats.vrxs_ipackets++; rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; - VTNET_RXQ_UNLOCK(rxq); +#if defined(INET) || defined(INET6) + if (vtnet_software_lro(sc) && ifp->if_capenable & IFCAP_LRO) { + if (vtnet_lro_rx(rxq, m) == 0) + return; + } +#endif + (*ifp->if_input)(ifp, m); - VTNET_RXQ_LOCK(rxq); } static int @@ -1758,20 +2021,25 @@ struct vtnet_softc *sc; struct ifnet *ifp; struct virtqueue *vq; - struct mbuf *m; - struct virtio_net_hdr_mrg_rxbuf *mhdr; - int len, deq, nbufs, adjsz, count; + int deq, count; sc = rxq->vtnrx_sc; vq = rxq->vtnrx_vq; ifp = sc->vtnet_ifp; - hdr = &lhdr; deq = 0; count = sc->vtnet_rx_process_limit; VTNET_RXQ_LOCK_ASSERT(rxq); +#ifdef DEV_NETMAP + if (netmap_rx_irq(ifp, 0, &deq)) + return (0); +#endif + while (count-- > 0) { + struct mbuf *m; + int len, nbufs, adjsz; + m = virtqueue_dequeue(vq, &len); if (m == NULL) break; @@ -1783,18 +2051,22 @@ continue; } - if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { + if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) { + struct virtio_net_hdr_mrg_rxbuf *mhdr = + mtod(m, struct virtio_net_hdr_mrg_rxbuf *); + nbufs = vtnet_htog16(sc, mhdr->num_buffers); + adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); + } else if (vtnet_modern(sc)) { + nbufs = 1; /* num_buffers is always 1 */ + adjsz = sizeof(struct virtio_net_hdr_v1); + } else { nbufs = 1; adjsz = sizeof(struct vtnet_rx_header); /* - * Account for our pad inserted between the header - * and the actual start of the frame. + * Account for our gap between the header and start of + * data to keep the segments separated. */ len += VTNET_RX_HEADER_PAD; - } else { - mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *); - nbufs = mhdr->num_buffers; - adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); } if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { @@ -1816,26 +2088,29 @@ } /* - * Save copy of header before we strip it. For both mergeable - * and non-mergeable, the header is at the beginning of the - * mbuf data. We no longer need num_buffers, so always use a - * regular header. - * - * BMV: Is this memcpy() expensive? We know the mbuf data is - * still valid even after the m_adj(). + * Save an endian swapped version of the header prior to it + * being stripped. The header is always at the start of the + * mbuf data. num_buffers was already saved (and not needed) + * so use the standard header. */ - memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr)); + hdr = mtod(m, struct virtio_net_hdr *); + lhdr.flags = hdr->flags; + lhdr.gso_type = hdr->gso_type; + lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len); + lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size); + lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start); + lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset); m_adj(m, adjsz); - vtnet_rxq_input(rxq, m, hdr); - - /* Must recheck after dropping the Rx lock. */ - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) - break; + vtnet_rxq_input(rxq, m, &lhdr); } - if (deq > 0) + if (deq > 0) { +#if defined(INET) || defined(INET6) + tcp_lro_flush_all(&rxq->vtnrx_lro); +#endif virtqueue_notify(vq); + } return (count > 0 ? 0 : EAGAIN); } @@ -1864,11 +2139,6 @@ return; } -#ifdef DEV_NETMAP - if (netmap_rx_irq(ifp, rxq->vtnrx_id, &more) != NM_IRQ_PASS) - return; -#endif /* DEV_NETMAP */ - VTNET_RXQ_LOCK(rxq); again: @@ -1888,8 +2158,8 @@ if (tries++ < VTNET_INTR_DISABLE_RETRIES) goto again; - VTNET_RXQ_UNLOCK(rxq); rxq->vtnrx_stats.vrxs_rescheduled++; + VTNET_RXQ_UNLOCK(rxq); taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); } else VTNET_RXQ_UNLOCK(rxq); @@ -1919,22 +2189,49 @@ if (!more) vtnet_rxq_disable_intr(rxq); rxq->vtnrx_stats.vrxs_rescheduled++; + VTNET_RXQ_UNLOCK(rxq); taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); - } + } else + VTNET_RXQ_UNLOCK(rxq); +} - VTNET_RXQ_UNLOCK(rxq); +static int +vtnet_txq_intr_threshold(struct vtnet_txq *txq) +{ + struct vtnet_softc *sc; + int threshold; + + sc = txq->vtntx_sc; + + /* + * The Tx interrupt is disabled until the queue free count falls + * below our threshold. Completed frames are drained from the Tx + * virtqueue before transmitting new frames and in the watchdog + * callout, so the frequency of Tx interrupts is greatly reduced, + * at the cost of not freeing mbufs as quickly as they otherwise + * would be. + */ + threshold = virtqueue_size(txq->vtntx_vq) / 4; + + /* + * Without indirect descriptors, leave enough room for the most + * segments we handle. + */ + if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && + threshold < sc->vtnet_tx_nsegs) + threshold = sc->vtnet_tx_nsegs; + + return (threshold); } static int vtnet_txq_below_threshold(struct vtnet_txq *txq) { - struct vtnet_softc *sc; struct virtqueue *vq; - sc = txq->vtntx_sc; vq = txq->vtntx_vq; - return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh); + return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold); } static int @@ -1969,21 +2266,13 @@ struct virtqueue *vq; struct vtnet_tx_header *txhdr; int last; -#ifdef DEV_NETMAP - int netmap_bufs = vtnet_netmap_queue_on(txq->vtntx_sc, NR_TX, - txq->vtntx_id); -#else /* !DEV_NETMAP */ - int netmap_bufs = 0; -#endif /* !DEV_NETMAP */ vq = txq->vtntx_vq; last = 0; while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { - if (!netmap_bufs) { - m_freem(txhdr->vth_mbuf); - uma_zfree(vtnet_tx_header_zone, txhdr); - } + m_freem(txhdr->vth_mbuf); + uma_zfree(vtnet_tx_header_zone, txhdr); } KASSERT(virtqueue_empty(vq), @@ -1991,12 +2280,11 @@ } /* - * BMV: Much of this can go away once we finally have offsets in - * the mbuf packet header. Bug andre@. + * BMV: This can go away once we finally have offsets in the mbuf header. */ static int -vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, - int *etype, int *proto, int *start) +vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype, + int *proto, int *start) { struct vtnet_softc *sc; struct ether_vlan_header *evh; @@ -2040,7 +2328,7 @@ break; #endif default: - sc->vtnet_stats.tx_csum_bad_ethtype++; + sc->vtnet_stats.tx_csum_unknown_ethtype++; return (EINVAL); } @@ -2048,7 +2336,7 @@ } static int -vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, +vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int flags, int offset, struct virtio_net_hdr *hdr) { static struct timeval lastecn; @@ -2064,16 +2352,17 @@ } else tcp = (struct tcphdr *)(m->m_data + offset); - hdr->hdr_len = offset + (tcp->th_off << 2); - hdr->gso_size = m->m_pkthdr.tso_segsz; - hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : - VIRTIO_NET_HDR_GSO_TCPV6; + hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2)); + hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz); + hdr->gso_type = (flags & CSUM_IP_TSO) ? + VIRTIO_NET_HDR_GSO_TCPV4 : VIRTIO_NET_HDR_GSO_TCPV6; - if (tcp->th_flags & TH_CWR) { + if (__predict_false(tcp->th_flags & TH_CWR)) { /* - * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, - * ECN support is not on a per-interface basis, but globally via - * the net.inet.tcp.ecn.enable sysctl knob. The default is off. + * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In + * FreeBSD, ECN support is not on a per-interface basis, + * but globally via the net.inet.tcp.ecn.enable sysctl + * knob. The default is off. */ if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { if (ppsratecheck(&lastecn, &curecn, 1)) @@ -2103,30 +2392,36 @@ if (error) goto drop; - if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) || - (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) { - /* - * We could compare the IP protocol vs the CSUM_ flag too, - * but that really should not be necessary. - */ + if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) { + /* Sanity check the parsed mbuf matches the offload flags. */ + if (__predict_false((flags & VTNET_CSUM_OFFLOAD && + etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6 + && etype != ETHERTYPE_IPV6))) { + sc->vtnet_stats.tx_csum_proto_mismatch++; + goto drop; + } + hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = csum_start; - hdr->csum_offset = m->m_pkthdr.csum_data; + hdr->csum_start = vtnet_gtoh16(sc, csum_start); + hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data); txq->vtntx_stats.vtxs_csum++; } - if (flags & CSUM_TSO) { + if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) { + /* + * Sanity check the parsed mbuf IP protocol is TCP, and + * VirtIO TSO reqires the checksum offloading above. + */ if (__predict_false(proto != IPPROTO_TCP)) { - /* Likely failed to correctly parse the mbuf. */ sc->vtnet_stats.tx_tso_not_tcp++; goto drop; + } else if (__predict_false((hdr->flags & + VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) { + sc->vtnet_stats.tx_tso_without_csum++; + goto drop; } - KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, - ("%s: mbuf %p TSO without checksum offload %#x", - __func__, m, flags)); - - error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); + error = vtnet_txq_offload_tso(txq, m, flags, csum_start, hdr); if (error) goto drop; } @@ -2155,8 +2450,11 @@ sglist_reset(sg); error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); - KASSERT(error == 0 && sg->sg_nseg == 1, - ("%s: error %d adding header to sglist", __func__, error)); + if (error != 0 || sg->sg_nseg != 1) { + KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d", + __func__, error, sg->sg_nseg)); + goto fail; + } error = sglist_append_mbuf(sg, m); if (error) { @@ -2186,7 +2484,7 @@ } static int -vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head) +vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags) { struct vtnet_tx_header *txhdr; struct virtio_net_hdr *hdr; @@ -2196,7 +2494,7 @@ m = *m_head; M_ASSERTPKTHDR(m); - txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO); + txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO); if (txhdr == NULL) { m_freem(m); *m_head = NULL; @@ -2204,9 +2502,9 @@ } /* - * Always use the non-mergeable header, regardless if the feature - * was negotiated. For transmit, num_buffers is always zero. The - * vtnet_hdr_size is used to enqueue the correct header size. + * Always use the non-mergeable header, regardless if mergable headers + * were negotiated, because for transmit num_buffers is always zero. + * The vtnet_hdr_size is used to enqueue the right header size segment. */ hdr = &txhdr->vth_uhdr.hdr; @@ -2228,11 +2526,9 @@ } error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); - if (error == 0) - return (0); - fail: - uma_zfree(vtnet_tx_header_zone, txhdr); + if (error) + uma_zfree(vtnet_tx_header_zone, txhdr); return (error); } @@ -2270,7 +2566,7 @@ if (m0 == NULL) break; - if (vtnet_txq_encap(txq, &m0) != 0) { + if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) { if (m0 != NULL) IFQ_DRV_PREPEND(&ifp->if_snd, m0); break; @@ -2347,7 +2643,7 @@ break; } - if (vtnet_txq_encap(txq, &m) != 0) { + if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) { if (m != NULL) drbr_putback(ifp, br, m); else @@ -2381,7 +2677,6 @@ sc = ifp->if_softc; npairs = sc->vtnet_act_vq_pairs; - /* check if flowid is set */ if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) i = m->m_pkthdr.flowid % npairs; else @@ -2471,6 +2766,13 @@ deq = 0; VTNET_TXQ_LOCK_ASSERT(txq); +#ifdef DEV_NETMAP + if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) { + virtqueue_disable_intr(vq); // XXX luigi + return (0); // XXX or 1 ? + } +#endif + while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { m = txhdr->vth_mbuf; deq++; @@ -2512,11 +2814,6 @@ return; } -#ifdef DEV_NETMAP - if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS) - return; -#endif /* DEV_NETMAP */ - VTNET_TXQ_LOCK(txq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { @@ -2703,7 +3000,7 @@ * Most drivers just ignore the return value - it only fails * with ENOMEM so an error is not likely. */ - for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + for (i = 0; i < sc->vtnet_req_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); @@ -2733,7 +3030,7 @@ rxq = &sc->vtnet_rxqs[i]; if (rxq->vtnrx_tq != NULL) { taskqueue_free(rxq->vtnrx_tq); - rxq->vtnrx_tq = NULL; + rxq->vtnrx_vq = NULL; } txq = &sc->vtnet_txqs[i]; @@ -2773,7 +3070,12 @@ struct vtnet_txq *txq; int i; - for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { +#ifdef DEV_NETMAP + if (nm_native_on(NA(sc->vtnet_ifp))) + return; +#endif + + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; vtnet_rxq_free_mbufs(rxq); @@ -2789,11 +3091,13 @@ struct vtnet_txq *txq; int i; + VTNET_CORE_LOCK_ASSERT(sc); + /* * Lock and unlock the per-queue mutex so we known the stop * state is visible. Doing only the active queues should be * sufficient, but it does not cost much extra to do all the - * queues. Note we hold the core mutex here too. + * queues. */ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; @@ -2832,8 +3136,8 @@ virtio_stop(dev); vtnet_stop_rendezvous(sc); - /* Free any mbufs left in the virtqueues. */ vtnet_drain_rxtx_queues(sc); + sc->vtnet_act_vq_pairs = 1; } static int @@ -2842,51 +3146,37 @@ device_t dev; struct ifnet *ifp; uint64_t features; - int mask, error; + int error; dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; - features = sc->vtnet_features; + features = sc->vtnet_negotiated_features; - mask = 0; -#if defined(INET) - mask |= IFCAP_RXCSUM; -#endif -#if defined (INET6) - mask |= IFCAP_RXCSUM_IPV6; -#endif - /* * Re-negotiate with the host, removing any disabled receive * features. Transmit features are disabled only on our side * via if_capenable and if_hwassist. */ - if (ifp->if_capabilities & mask) { - /* - * We require both IPv4 and IPv6 offloading to be enabled - * in order to negotiated it: VirtIO does not distinguish - * between the two. - */ - if ((ifp->if_capenable & mask) != mask) - features &= ~VIRTIO_NET_F_GUEST_CSUM; - } + if ((ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0) + features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES); - if (ifp->if_capabilities & IFCAP_LRO) { - if ((ifp->if_capenable & IFCAP_LRO) == 0) - features &= ~VTNET_LRO_FEATURES; - } + if ((ifp->if_capenable & IFCAP_LRO) == 0) + features &= ~VTNET_LRO_FEATURES; - if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) { - if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) - features &= ~VIRTIO_NET_F_CTRL_VLAN; - } + if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) + features &= ~VIRTIO_NET_F_CTRL_VLAN; error = virtio_reinit(dev, features); - if (error) + if (error) { device_printf(dev, "virtio reinit error %d\n", error); + return (error); + } - return (error); + sc->vtnet_features = features; + virtio_reinit_complete(dev); + + return (0); } static void @@ -2897,9 +3187,7 @@ ifp = sc->vtnet_ifp; if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { - /* Restore promiscuous and all-multicast modes. */ vtnet_rx_filter(sc); - /* Restore filtered MAC addresses. */ vtnet_rx_filter_mac(sc); } @@ -2911,32 +3199,30 @@ vtnet_init_rx_queues(struct vtnet_softc *sc) { device_t dev; + struct ifnet *ifp; struct vtnet_rxq *rxq; - int i, clsize, error; + int i, clustersz, error; dev = sc->vtnet_dev; + ifp = sc->vtnet_ifp; - /* - * Use the new cluster size if one has been set (via a MTU - * change). Otherwise, use the standard 2K clusters. - * - * BMV: It might make sense to use page sized clusters as - * the default (depending on the features negotiated). - */ - if (sc->vtnet_rx_new_clsize != 0) { - clsize = sc->vtnet_rx_new_clsize; - sc->vtnet_rx_new_clsize = 0; + clustersz = vtnet_rx_cluster_size(sc, ifp->if_mtu); + sc->vtnet_rx_clustersz = clustersz; + + if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) { + sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) + + VTNET_MAX_RX_SIZE, clustersz); + KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, + ("%s: too many rx mbufs %d for %d segments", __func__, + sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); } else - clsize = MCLBYTES; + sc->vtnet_rx_nmbufs = 1; - sc->vtnet_rx_clsize = clsize; - sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize); +#ifdef DEV_NETMAP + if (vtnet_netmap_init_rx_buffers(sc)) + return (0); +#endif - KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS || - sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, - ("%s: too many rx mbufs %d for %d segments", __func__, - sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); - for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { rxq = &sc->vtnet_rxqs[i]; @@ -2946,8 +3232,7 @@ VTNET_RXQ_UNLOCK(rxq); if (error) { - device_printf(dev, - "cannot allocate mbufs for Rx queue %d\n", i); + device_printf(dev, "cannot populate Rx queue %d\n", i); return (error); } } @@ -2964,6 +3249,7 @@ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { txq = &sc->vtnet_txqs[i]; txq->vtntx_watchdog = 0; + txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq); } return (0); @@ -2993,36 +3279,85 @@ dev = sc->vtnet_dev; - if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { + if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) { sc->vtnet_act_vq_pairs = 1; return; } - npairs = sc->vtnet_requested_vq_pairs; + npairs = sc->vtnet_req_vq_pairs; if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { - device_printf(dev, - "cannot set active queue pairs to %d\n", npairs); + device_printf(dev, "cannot set active queue pairs to %d, " + "falling back to 1 queue pair\n", npairs); npairs = 1; } sc->vtnet_act_vq_pairs = npairs; } +static void +vtnet_update_rx_offloads(struct vtnet_softc *sc) +{ + struct ifnet *ifp; + uint64_t features; + int error; + + ifp = sc->vtnet_ifp; + features = sc->vtnet_features; + + VTNET_CORE_LOCK_ASSERT(sc); + + if (ifp->if_capabilities & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { + if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) + features |= VIRTIO_NET_F_GUEST_CSUM; + else + features &= ~VIRTIO_NET_F_GUEST_CSUM; + } + + if (ifp->if_capabilities & IFCAP_LRO && !vtnet_software_lro(sc)) { + if (ifp->if_capenable & IFCAP_LRO) + features |= VTNET_LRO_FEATURES; + else + features &= ~VTNET_LRO_FEATURES; + } + + error = vtnet_ctrl_guest_offloads(sc, + features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | + VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN | + VIRTIO_NET_F_GUEST_UFO)); + if (error) { + device_printf(sc->vtnet_dev, + "%s: cannot update Rx features\n", __func__); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + vtnet_init_locked(sc); + } + } else + sc->vtnet_features = features; +} + static int vtnet_reinit(struct vtnet_softc *sc) { + device_t dev; struct ifnet *ifp; int error; + dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; - /* Use the current MAC address. */ bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); - vtnet_set_hwaddr(sc); + error = vtnet_virtio_reinit(sc); + if (error) + return (error); + + vtnet_set_macaddr(sc); vtnet_set_active_vq_pairs(sc); + if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) + vtnet_init_rx_filters(sc); + ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= VTNET_CSUM_OFFLOAD; @@ -3033,16 +3368,10 @@ if (ifp->if_capenable & IFCAP_TSO6) ifp->if_hwassist |= CSUM_IP6_TSO; - if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) - vtnet_init_rx_filters(sc); - error = vtnet_init_rxtx_queues(sc); if (error) return (error); - vtnet_enable_interrupts(sc); - ifp->if_drv_flags |= IFF_DRV_RUNNING; - return (0); } @@ -3062,22 +3391,15 @@ vtnet_stop(sc); - /* Reinitialize with the host. */ - if (vtnet_virtio_reinit(sc) != 0) - goto fail; + if (vtnet_reinit(sc) != 0) { + vtnet_stop(sc); + return; + } - if (vtnet_reinit(sc) != 0) - goto fail; - - virtio_reinit_complete(dev); - + ifp->if_drv_flags |= IFF_DRV_RUNNING; vtnet_update_link_status(sc); + vtnet_enable_interrupts(sc); callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); - - return; - -fail: - vtnet_stop(sc); } static void @@ -3087,6 +3409,13 @@ sc = xsc; +#ifdef DEV_NETMAP + if (!NA(sc->vtnet_ifp)) { + D("try to attach again"); + vtnet_netmap_attach(sc); + } +#endif + VTNET_CORE_LOCK(sc); vtnet_init_locked(sc); VTNET_CORE_UNLOCK(sc); @@ -3095,16 +3424,13 @@ static void vtnet_free_ctrl_vq(struct vtnet_softc *sc) { - struct virtqueue *vq; - vq = sc->vtnet_ctrl_vq; - /* * The control virtqueue is only polled and therefore it should * already be empty. */ - KASSERT(virtqueue_empty(vq), - ("%s: ctrl vq %p not empty", __func__, vq)); + KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq), + ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq)); } static void @@ -3115,47 +3441,88 @@ vq = sc->vtnet_ctrl_vq; + MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ); VTNET_CORE_LOCK_ASSERT(sc); - KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ, - ("%s: CTRL_VQ feature not negotiated", __func__)); if (!virtqueue_empty(vq)) return; - if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0) - return; /* - * Poll for the response, but the command is likely already - * done when we return from the notify. + * Poll for the response, but the command is likely completed before + * returning from the notify. */ - virtqueue_notify(vq); - virtqueue_poll(vq, NULL); + if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0) { + virtqueue_notify(vq); + virtqueue_poll(vq, NULL); + } } static int vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) { - struct virtio_net_ctrl_hdr hdr __aligned(2); struct sglist_seg segs[3]; struct sglist sg; - uint8_t ack; + struct { + struct virtio_net_ctrl_hdr hdr __aligned(2); + uint8_t pad1; + uint8_t addr[ETHER_ADDR_LEN] __aligned(8); + uint8_t pad2; + uint8_t ack; + } s; int error; - hdr.class = VIRTIO_NET_CTRL_MAC; - hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; - ack = VIRTIO_NET_ERR; + error = 0; + MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC); - sglist_init(&sg, 3, segs); + s.hdr.class = VIRTIO_NET_CTRL_MAC; + s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; + bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN); + s.ack = VIRTIO_NET_ERR; + + sglist_init(&sg, nitems(segs), segs); + error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN); + error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); + + if (error == 0) + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); + + return (s.ack == VIRTIO_NET_OK ? 0 : EIO); +} + +static int +vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads) +{ + struct sglist_seg segs[3]; + struct sglist sg; + struct { + struct virtio_net_ctrl_hdr hdr __aligned(2); + uint8_t pad1; + uint64_t offloads __aligned(8); + uint8_t pad2; + uint8_t ack; + } s; + int error; + error = 0; - error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); - error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN); - error |= sglist_append(&sg, &ack, sizeof(uint8_t)); - KASSERT(error == 0 && sg.sg_nseg == 3, - ("%s: error %d adding set MAC msg to sglist", __func__, error)); + MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); - vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); + s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; + s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; + s.offloads = vtnet_gtoh64(sc, offloads); + s.ack = VIRTIO_NET_ERR; - return (ack == VIRTIO_NET_OK ? 0 : EIO); + sglist_init(&sg, nitems(segs), segs); + error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t)); + error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); + + if (error == 0) + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); + + return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int @@ -3164,63 +3531,64 @@ struct sglist_seg segs[3]; struct sglist sg; struct { - struct virtio_net_ctrl_hdr hdr; + struct virtio_net_ctrl_hdr hdr __aligned(2); uint8_t pad1; - struct virtio_net_ctrl_mq mq; + struct virtio_net_ctrl_mq mq __aligned(2); uint8_t pad2; uint8_t ack; - } s __aligned(2); + } s; int error; + error = 0; + MPASS(sc->vtnet_flags & VTNET_FLAG_MQ); + s.hdr.class = VIRTIO_NET_CTRL_MQ; s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; - s.mq.virtqueue_pairs = npairs; + s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs); s.ack = VIRTIO_NET_ERR; - sglist_init(&sg, 3, segs); - error = 0; + sglist_init(&sg, nitems(segs), segs); error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); - KASSERT(error == 0 && sg.sg_nseg == 3, - ("%s: error %d adding MQ message to sglist", __func__, error)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); - vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); + if (error == 0) + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int -vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on) +vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, int on) { struct sglist_seg segs[3]; struct sglist sg; struct { - struct virtio_net_ctrl_hdr hdr; + struct virtio_net_ctrl_hdr hdr __aligned(2); uint8_t pad1; uint8_t onoff; uint8_t pad2; uint8_t ack; - } s __aligned(2); + } s; int error; - KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, - ("%s: CTRL_RX feature not negotiated", __func__)); + error = 0; + MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX); s.hdr.class = VIRTIO_NET_CTRL_RX; s.hdr.cmd = cmd; s.onoff = !!on; s.ack = VIRTIO_NET_ERR; - sglist_init(&sg, 3, segs); - error = 0; + sglist_init(&sg, nitems(segs), segs); error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); - KASSERT(error == 0 && sg.sg_nseg == 3, - ("%s: error %d adding Rx message to sglist", __func__, error)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); - vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); + if (error == 0) + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } @@ -3228,40 +3596,16 @@ static int vtnet_set_promisc(struct vtnet_softc *sc, int on) { - return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); } static int vtnet_set_allmulti(struct vtnet_softc *sc, int on) { - return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); } -/* - * The device defaults to promiscuous mode for backwards compatibility. - * Turn it off at attach time if possible. - */ static void -vtnet_attach_disable_promisc(struct vtnet_softc *sc) -{ - struct ifnet *ifp; - - ifp = sc->vtnet_ifp; - - VTNET_CORE_LOCK(sc); - if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) { - ifp->if_flags |= IFF_PROMISC; - } else if (vtnet_set_promisc(sc, 0) != 0) { - ifp->if_flags |= IFF_PROMISC; - device_printf(sc->vtnet_dev, - "cannot disable default promiscuous mode\n"); - } - VTNET_CORE_UNLOCK(sc); -} - -static void vtnet_rx_filter(struct vtnet_softc *sc) { device_t dev; @@ -3272,13 +3616,15 @@ VTNET_CORE_LOCK_ASSERT(sc); - if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) + if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) { device_printf(dev, "cannot %s promiscuous mode\n", ifp->if_flags & IFF_PROMISC ? "enable" : "disable"); + } - if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) + if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) { device_printf(dev, "cannot %s all-multicast mode\n", ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable"); + } } static void @@ -3296,14 +3642,15 @@ ifp = sc->vtnet_ifp; filter = sc->vtnet_mac_filter; + ucnt = 0; mcnt = 0; promisc = 0; allmulti = 0; + error = 0; + MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX); VTNET_CORE_LOCK_ASSERT(sc); - KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, - ("%s: CTRL_RX feature not negotiated", __func__)); /* Unicast MAC addresses: */ if_addr_rlock(ifp); @@ -3324,14 +3671,6 @@ } if_addr_runlock(ifp); - if (promisc != 0) { - filter->vmf_unicast.nentries = 0; - if_printf(ifp, "more than %d MAC addresses assigned, " - "falling back to promiscuous mode\n", - VTNET_MAX_MAC_ENTRIES); - } else - filter->vmf_unicast.nentries = ucnt; - /* Multicast MAC addresses: */ if_maddr_rlock(ifp); TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { @@ -3348,34 +3687,40 @@ } if_maddr_runlock(ifp); + if (promisc != 0) { + if_printf(ifp, "cannot filter more than %d MAC addresses, " + "falling back to promiscuous mode\n", + VTNET_MAX_MAC_ENTRIES); + ucnt = 0; + } if (allmulti != 0) { - filter->vmf_multicast.nentries = 0; - if_printf(ifp, "more than %d multicast MAC addresses " - "assigned, falling back to all-multicast mode\n", + if_printf(ifp, "cannot filter more than %d multicast MAC " + "addresses, falling back to all-multicast mode\n", VTNET_MAX_MAC_ENTRIES); - } else - filter->vmf_multicast.nentries = mcnt; + mcnt = 0; + } if (promisc != 0 && allmulti != 0) goto out; + filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt); + filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt); + hdr.class = VIRTIO_NET_CTRL_MAC; hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; ack = VIRTIO_NET_ERR; - sglist_init(&sg, 4, segs); - error = 0; + sglist_init(&sg, nitems(segs), segs); error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &filter->vmf_unicast, - sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN); + sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN); error |= sglist_append(&sg, &filter->vmf_multicast, - sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN); + sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN); error |= sglist_append(&sg, &ack, sizeof(uint8_t)); - KASSERT(error == 0 && sg.sg_nseg == 4, - ("%s: error %d adding MAC filter msg to sglist", __func__, error)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); - vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); - + if (error == 0) + vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); if (ack != VIRTIO_NET_OK) if_printf(ifp, "error setting host MAC filter table\n"); @@ -3392,28 +3737,30 @@ struct sglist_seg segs[3]; struct sglist sg; struct { - struct virtio_net_ctrl_hdr hdr; + struct virtio_net_ctrl_hdr hdr __aligned(2); uint8_t pad1; - uint16_t tag; + uint16_t tag __aligned(2); uint8_t pad2; uint8_t ack; - } s __aligned(2); + } s; int error; + error = 0; + MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER); + s.hdr.class = VIRTIO_NET_CTRL_VLAN; s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; - s.tag = tag; + s.tag = vtnet_gtoh16(sc, tag); s.ack = VIRTIO_NET_ERR; - sglist_init(&sg, 3, segs); - error = 0; + sglist_init(&sg, nitems(segs), segs); error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); - KASSERT(error == 0 && sg.sg_nseg == 3, - ("%s: error %d adding VLAN message to sglist", __func__, error)); + MPASS(error == 0 && sg.sg_nseg == nitems(segs)); - vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); + if (error == 0) + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } @@ -3421,13 +3768,12 @@ static void vtnet_rx_filter_vlan(struct vtnet_softc *sc) { + int i, bit; uint32_t w; uint16_t tag; - int i, bit; + MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER); VTNET_CORE_LOCK_ASSERT(sc); - KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER, - ("%s: VLAN_FILTER feature not negotiated", __func__)); /* Enable the filter for each configured VLAN. */ for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { @@ -3466,6 +3812,7 @@ sc->vtnet_vlan_filter[idx] &= ~(1 << bit); if (ifp->if_capenable & IFCAP_VLAN_HWFILTER && + ifp->if_drv_flags & IFF_DRV_RUNNING && vtnet_exec_vlan_filter(sc, add, tag) != 0) { device_printf(sc->vtnet_dev, "cannot %s VLAN %d %s the host filter table\n", @@ -3495,21 +3842,34 @@ vtnet_update_vlan_filter(arg, 0, tag); } +static void +vtnet_update_speed_duplex(struct vtnet_softc *sc) +{ + struct ifnet *ifp; + uint32_t speed; + + ifp = sc->vtnet_ifp; + + if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0) + return; + + /* BMV: Ignore duplex. */ + speed = virtio_read_dev_config_4(sc->vtnet_dev, + offsetof(struct virtio_net_config, speed)); + if (speed != -1) + ifp->if_baudrate = IF_Mbps(speed); +} + static int vtnet_is_link_up(struct vtnet_softc *sc) { - device_t dev; - struct ifnet *ifp; uint16_t status; - dev = sc->vtnet_dev; - ifp = sc->vtnet_ifp; + if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0) + return (1); - if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0) - status = VIRTIO_NET_S_LINK_UP; - else - status = virtio_read_dev_config_2(dev, - offsetof(struct virtio_net_config, status)); + status = virtio_read_dev_config_2(sc->vtnet_dev, + offsetof(struct virtio_net_config, status)); return ((status & VIRTIO_NET_S_LINK_UP) != 0); } @@ -3521,12 +3881,12 @@ int link; ifp = sc->vtnet_ifp; - VTNET_CORE_LOCK_ASSERT(sc); link = vtnet_is_link_up(sc); /* Notify if the link status has changed. */ if (link != 0 && sc->vtnet_link_active == 0) { + vtnet_update_speed_duplex(sc); sc->vtnet_link_active = 1; if_link_state_change(ifp, LINK_STATE_UP); } else if (link == 0 && sc->vtnet_link_active != 0) { @@ -3538,16 +3898,7 @@ static int vtnet_ifmedia_upd(struct ifnet *ifp) { - struct vtnet_softc *sc; - struct ifmedia *ifm; - - sc = ifp->if_softc; - ifm = &sc->vtnet_media; - - if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) - return (EINVAL); - - return (0); + return (EOPNOTSUPP); } static void @@ -3563,25 +3914,45 @@ VTNET_CORE_LOCK(sc); if (vtnet_is_link_up(sc) != 0) { ifmr->ifm_status |= IFM_ACTIVE; - ifmr->ifm_active |= VTNET_MEDIATYPE; + ifmr->ifm_active |= IFM_10G_T | IFM_FDX; } else ifmr->ifm_active |= IFM_NONE; VTNET_CORE_UNLOCK(sc); } static void -vtnet_set_hwaddr(struct vtnet_softc *sc) +vtnet_get_macaddr(struct vtnet_softc *sc) { + + if (sc->vtnet_flags & VTNET_FLAG_MAC) { + virtio_read_device_config_array(sc->vtnet_dev, + offsetof(struct virtio_net_config, mac), + &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN); + } else { + /* Generate a random locally administered unicast address. */ + sc->vtnet_hwaddr[0] = 0xB2; + arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); + } +} + +static void +vtnet_set_macaddr(struct vtnet_softc *sc) +{ device_t dev; - int i; + int error; dev = sc->vtnet_dev; if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { - if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0) + error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr); + if (error) device_printf(dev, "unable to set MAC address\n"); - } else if (sc->vtnet_flags & VTNET_FLAG_MAC) { - for (i = 0; i < ETHER_ADDR_LEN; i++) { + return; + } + + /* MAC in config is read-only in modern VirtIO. */ + if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) { + for (int i = 0; i < ETHER_ADDR_LEN; i++) { virtio_write_dev_config_1(dev, offsetof(struct virtio_net_config, mac) + i, sc->vtnet_hwaddr[i]); @@ -3590,31 +3961,12 @@ } static void -vtnet_get_hwaddr(struct vtnet_softc *sc) +vtnet_attached_set_macaddr(struct vtnet_softc *sc) { - device_t dev; - int i; - dev = sc->vtnet_dev; - - if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) { - /* - * Generate a random locally administered unicast address. - * - * It would be nice to generate the same MAC address across - * reboots, but it seems all the hosts currently available - * support the MAC feature, so this isn't too important. - */ - sc->vtnet_hwaddr[0] = 0xB2; - arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); - vtnet_set_hwaddr(sc); - return; - } - - for (i = 0; i < ETHER_ADDR_LEN; i++) { - sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev, - offsetof(struct virtio_net_config, mac) + i); - } + /* Assign MAC address if it was generated. */ + if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) + vtnet_set_macaddr(sc); } static void @@ -3645,36 +3997,6 @@ } static void -vtnet_set_tx_intr_threshold(struct vtnet_softc *sc) -{ - int size, thresh; - - size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq); - - /* - * The Tx interrupt is disabled until the queue free count falls - * below our threshold. Completed frames are drained from the Tx - * virtqueue before transmitting new frames and in the watchdog - * callout, so the frequency of Tx interrupts is greatly reduced, - * at the cost of not freeing mbufs as quickly as they otherwise - * would be. - * - * N.B. We assume all the Tx queues are the same size. - */ - thresh = size / 4; - - /* - * Without indirect descriptors, leave enough room for the most - * segments we handle. - */ - if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && - thresh < sc->vtnet_tx_nsegs) - thresh = sc->vtnet_tx_nsegs; - - sc->vtnet_tx_intr_thresh = thresh; -} - -static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child, struct vtnet_rxq *rxq) { @@ -3702,6 +4024,8 @@ &stats->vrxs_csum, "Receive checksum offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, &stats->vrxs_csum_failed, "Receive checksum offload failed"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD, + &stats->vrxs_host_lro, "Receive host segmentation offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, &stats->vrxs_rescheduled, "Receive interrupt handler rescheduled"); @@ -3732,7 +4056,7 @@ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, &stats->vtxs_csum, "Transmit checksum offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, - &stats->vtxs_tso, "Transmit segmentation offloaded"); + &stats->vtxs_tso, "Transmit TCP segmentation offloaded"); SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, &stats->vtxs_rescheduled, "Transmit interrupt handler rescheduled"); @@ -3752,7 +4076,7 @@ tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); - for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + for (i = 0; i < sc->vtnet_req_vq_pairs; i++) { vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); } @@ -3812,16 +4136,20 @@ CTLFLAG_RD, &stats->rx_task_rescheduled, "Times the receive interrupt task rescheduled itself"); - SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype", - CTLFLAG_RD, &stats->tx_csum_bad_ethtype, + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype", + CTLFLAG_RD, &stats->tx_csum_unknown_ethtype, "Aborted transmit of checksum offloaded buffer with unknown " "Ethernet type"); - SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype", - CTLFLAG_RD, &stats->tx_tso_bad_ethtype, - "Aborted transmit of TSO buffer with unknown Ethernet type"); + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch", + CTLFLAG_RD, &stats->tx_csum_proto_mismatch, + "Aborted transmit of checksum offloaded buffer because mismatched " + "protocols"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", CTLFLAG_RD, &stats->tx_tso_not_tcp, "Aborted transmit of TSO buffer with non TCP protocol"); + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum", + CTLFLAG_RD, &stats->tx_tso_without_csum, + "Aborted transmit of TSO buffer without TCP checksum offload"); SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", CTLFLAG_RD, &stats->tx_defragged, "Transmit mbufs defragged"); @@ -3854,10 +4182,10 @@ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, - "Maximum number of supported virtqueue pairs"); - SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs", - CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0, - "Requested number of virtqueue pairs"); + "Number of maximum supported virtqueue pairs"); + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs", + CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0, + "Number of requested virtqueue pairs"); SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, "Number of active virtqueue pairs"); @@ -3865,6 +4193,19 @@ vtnet_setup_stat_sysctl(ctx, child, sc); } +static void +vtnet_load_tunables(struct vtnet_softc *sc) +{ + + sc->vtnet_lro_entry_count = vtnet_tunable_int(sc, + "lro_entry_count", vtnet_lro_entry_count); + if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES) + sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES; + + sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc, + "lro_mbufq_depeth", vtnet_lro_mbufq_depth); +} + static int vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) { @@ -3906,10 +4247,14 @@ static void vtnet_enable_rx_interrupts(struct vtnet_softc *sc) { + struct vtnet_rxq *rxq; int i; - for (i = 0; i < sc->vtnet_act_vq_pairs; i++) - vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]); + for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { + rxq = &sc->vtnet_rxqs[i]; + if (vtnet_rxq_enable_intr(rxq) != 0) + taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); + } } static void @@ -3934,7 +4279,7 @@ { int i; - for (i = 0; i < sc->vtnet_act_vq_pairs; i++) + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); } @@ -3943,7 +4288,7 @@ { int i; - for (i = 0; i < sc->vtnet_act_vq_pairs; i++) + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); } diff -urN sys/dev/virtio.ori/network/if_vtnetvar.h sys/dev/virtio/network/if_vtnetvar.h --- sys/dev/virtio.ori/network/if_vtnetvar.h 2020-03-19 20:20:23.687482000 -0700 +++ sys/dev/virtio/network/if_vtnetvar.h 2020-03-19 23:17:51.681012000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2011, Bryan Venteicher * All rights reserved. * @@ -41,9 +43,10 @@ uint64_t rx_csum_bad_ipproto; uint64_t rx_csum_bad_offset; uint64_t rx_csum_bad_proto; - uint64_t tx_csum_bad_ethtype; - uint64_t tx_tso_bad_ethtype; + uint64_t tx_csum_unknown_ethtype; + uint64_t tx_csum_proto_mismatch; uint64_t tx_tso_not_tcp; + uint64_t tx_tso_without_csum; uint64_t tx_defragged; uint64_t tx_defrag_failed; @@ -65,6 +68,7 @@ uint64_t vrxs_ierrors; /* if_ierrors */ uint64_t vrxs_csum; uint64_t vrxs_csum_failed; + uint64_t vrxs_host_lro; uint64_t vrxs_rescheduled; }; @@ -77,6 +81,7 @@ struct vtnet_rxq_stats vtnrx_stats; struct taskqueue *vtnrx_tq; struct task vtnrx_intrtask; + struct lro_ctrl vtnrx_lro; #ifdef DEV_NETMAP struct virtio_net_hdr_mrg_rxbuf vtnrx_shrhdr; #endif /* DEV_NETMAP */ @@ -109,6 +114,7 @@ #endif int vtntx_id; int vtntx_watchdog; + int vtntx_intr_threshold; struct vtnet_txq_stats vtntx_stats; struct taskqueue *vtntx_tq; struct task vtntx_intrtask; @@ -134,9 +140,10 @@ struct ifnet *vtnet_ifp; struct vtnet_rxq *vtnet_rxqs; struct vtnet_txq *vtnet_txqs; + uint64_t vtnet_features; uint32_t vtnet_flags; -#define VTNET_FLAG_SUSPENDED 0x0001 +#define VTNET_FLAG_MODERN 0x0001 #define VTNET_FLAG_MAC 0x0002 #define VTNET_FLAG_CTRL_VQ 0x0004 #define VTNET_FLAG_CTRL_RX 0x0008 @@ -145,29 +152,33 @@ #define VTNET_FLAG_TSO_ECN 0x0040 #define VTNET_FLAG_MRG_RXBUFS 0x0080 #define VTNET_FLAG_LRO_NOMRG 0x0100 -#define VTNET_FLAG_MULTIQ 0x0200 +#define VTNET_FLAG_MQ 0x0200 #define VTNET_FLAG_INDIRECT 0x0400 #define VTNET_FLAG_EVENT_IDX 0x0800 +#define VTNET_FLAG_SUSPENDED 0x1000 +#define VTNET_FLAG_FIXUP_NEEDS_CSUM 0x2000 +#define VTNET_FLAG_SW_LRO 0x4000 - int vtnet_link_active; int vtnet_hdr_size; - int vtnet_rx_process_limit; - int vtnet_rx_nsegs; int vtnet_rx_nmbufs; - int vtnet_rx_clsize; - int vtnet_rx_new_clsize; - int vtnet_tx_intr_thresh; - int vtnet_tx_nsegs; - int vtnet_if_flags; + int vtnet_rx_clustersz; + int vtnet_rx_nsegs; + int vtnet_rx_process_limit; + int vtnet_link_active; int vtnet_act_vq_pairs; + int vtnet_req_vq_pairs; int vtnet_max_vq_pairs; - int vtnet_requested_vq_pairs; + int vtnet_tx_nsegs; + int vtnet_if_flags; + int vtnet_max_mtu; + int vtnet_lro_entry_count; + int vtnet_lro_mbufq_depth; struct virtqueue *vtnet_ctrl_vq; struct vtnet_mac_filter *vtnet_mac_filter; uint32_t *vtnet_vlan_filter; - uint64_t vtnet_features; + uint64_t vtnet_negotiated_features; struct vtnet_statistics vtnet_stats; struct callout vtnet_tick_ch; struct ifmedia vtnet_media; @@ -179,10 +190,22 @@ char vtnet_hwaddr[ETHER_ADDR_LEN]; }; +static bool +vtnet_modern(struct vtnet_softc *sc) +{ + return ((sc->vtnet_flags & VTNET_FLAG_MODERN) != 0); +} + +static bool +vtnet_software_lro(struct vtnet_softc *sc) +{ + return ((sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0); +} + /* * Maximum number of queue pairs we will autoconfigure to. */ -#define VTNET_MAX_QUEUE_PAIRS 8 +#define VTNET_MAX_QUEUE_PAIRS 32 /* * Additional completed entries can appear in a virtqueue before we can @@ -200,25 +223,24 @@ #define VTNET_NOTIFY_RETRIES 4 /* - * Fake the media type. The host does not provide us with any real media - * information. - */ -#define VTNET_MEDIATYPE (IFM_ETHER | IFM_10G_T | IFM_FDX) - -/* * Number of words to allocate for the VLAN shadow table. There is one * bit for each VLAN. */ #define VTNET_VLAN_FILTER_NWORDS (4096 / 32) +/* We depend on these being the same size (and same layout). */ +CTASSERT(sizeof(struct virtio_net_hdr_mrg_rxbuf) == + sizeof(struct virtio_net_hdr_v1)); + /* - * When mergeable buffers are not negotiated, the vtnet_rx_header structure - * below is placed at the beginning of the mbuf data. Use 4 bytes of pad to - * both keep the VirtIO header and the data non-contiguous and to keep the - * frame's payload 4 byte aligned. + * In legacy VirtIO when mergeable buffers are not negotiated, this structure + * is placed at the beginning of the mbuf data. Use 4 bytes of pad to keep + * both the VirtIO header and the data non-contiguous and the frame's payload + * 4 byte aligned. Note this padding would not be necessary if the + * VIRTIO_F_ANY_LAYOUT feature was negotiated (but we don't support that yet). * - * When mergeable buffers are negotiated, the host puts the VirtIO header in - * the beginning of the first mbuf's data. + * In modern VirtIO or when mergeable buffers are negotiated, the host puts + * the VirtIO header in the beginning of the first mbuf's data. */ #define VTNET_RX_HEADER_PAD 4 struct vtnet_rx_header { @@ -234,6 +256,7 @@ union { struct virtio_net_hdr hdr; struct virtio_net_hdr_mrg_rxbuf mhdr; + struct virtio_net_hdr_v1 v1hdr; } vth_uhdr; struct mbuf *vth_mbuf; @@ -248,6 +271,11 @@ */ #define VTNET_MAX_MAC_ENTRIES 128 +/* + * The driver version of struct virtio_net_ctrl_mac but with our predefined + * number of MAC addresses allocated. This structure is shared with the host, + * so nentries field is in the correct VirtIO endianness. + */ struct vtnet_mac_table { uint32_t nentries; uint8_t macs[VTNET_MAX_MAC_ENTRIES][ETHER_ADDR_LEN]; @@ -273,15 +301,16 @@ (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6 | CSUM_TSO) /* Features desired/implemented by this driver. */ -#define VTNET_FEATURES \ +#define VTNET_COMMON_FEATURES \ (VIRTIO_NET_F_MAC | \ VIRTIO_NET_F_STATUS | \ + VIRTIO_NET_F_CTRL_GUEST_OFFLOADS | \ + VIRTIO_NET_F_MTU | \ VIRTIO_NET_F_CTRL_VQ | \ VIRTIO_NET_F_CTRL_RX | \ VIRTIO_NET_F_CTRL_MAC_ADDR | \ VIRTIO_NET_F_CTRL_VLAN | \ VIRTIO_NET_F_CSUM | \ - VIRTIO_NET_F_GSO | \ VIRTIO_NET_F_HOST_TSO4 | \ VIRTIO_NET_F_HOST_TSO6 | \ VIRTIO_NET_F_HOST_ECN | \ @@ -291,9 +320,13 @@ VIRTIO_NET_F_GUEST_ECN | \ VIRTIO_NET_F_MRG_RXBUF | \ VIRTIO_NET_F_MQ | \ + VIRTIO_NET_F_SPEED_DUPLEX | \ VIRTIO_RING_F_EVENT_IDX | \ VIRTIO_RING_F_INDIRECT_DESC) +#define VTNET_MODERN_FEATURES (VTNET_COMMON_FEATURES) +#define VTNET_LEGACY_FEATURES (VTNET_COMMON_FEATURES | VIRTIO_NET_F_GSO) + /* * The VIRTIO_NET_F_HOST_TSO[46] features permit us to send the host * frames larger than 1514 bytes. @@ -303,48 +336,38 @@ /* * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us - * frames larger than 1514 bytes. We do not yet support software LRO - * via tcp_lro_rx(). + * frames larger than 1514 bytes. */ #define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \ VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN) +#define VTNET_MIN_MTU 68 #define VTNET_MAX_MTU 65536 #define VTNET_MAX_RX_SIZE 65550 /* - * Used to preallocate the Vq indirect descriptors. The first segment - * is reserved for the header, except for mergeable buffers since the - * header is placed inline with the data. + * Used to preallocate the VQ indirect descriptors. Modern and mergeable + * buffers do not required one segment for the VirtIO header since it is + * placed inline at the beginning of the receive buffer. */ -#define VTNET_MRG_RX_SEGS 1 -#define VTNET_MIN_RX_SEGS 2 -#define VTNET_MAX_RX_SEGS 34 -#define VTNET_MIN_TX_SEGS 4 -#define VTNET_MAX_TX_SEGS 64 +#define VTNET_RX_SEGS_HDR_INLINE 1 +#define VTNET_RX_SEGS_HDR_SEPARATE 2 +#define VTNET_RX_SEGS_LRO_NOMRG 34 +#define VTNET_TX_SEGS_MIN 32 +#define VTNET_TX_SEGS_MAX 64 /* * Assert we can receive and transmit the maximum with regular * size clusters. */ -CTASSERT(((VTNET_MAX_RX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE); -CTASSERT(((VTNET_MAX_TX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_MTU); +CTASSERT(((VTNET_RX_SEGS_LRO_NOMRG - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE); +CTASSERT(((VTNET_TX_SEGS_MAX - 1) * MCLBYTES) >= VTNET_MAX_MTU); /* * Number of slots in the Tx bufrings. This value matches most other * multiqueue drivers. */ #define VTNET_DEFAULT_BUFRING_SIZE 4096 - -/* - * Determine how many mbufs are in each receive buffer. For LRO without - * mergeable buffers, we must allocate an mbuf chain large enough to - * hold both the vtnet_rx_header and the maximum receivable data. - */ -#define VTNET_NEEDED_RX_MBUFS(_sc, _clsize) \ - ((_sc)->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0 ? 1 : \ - howmany(sizeof(struct vtnet_rx_header) + VTNET_MAX_RX_SIZE, \ - (_clsize)) #define VTNET_CORE_MTX(_sc) &(_sc)->vtnet_mtx #define VTNET_CORE_LOCK(_sc) mtx_lock(VTNET_CORE_MTX((_sc))) diff -urN sys/dev/virtio.ori/network/virtio_net.h sys/dev/virtio/network/virtio_net.h --- sys/dev/virtio.ori/network/virtio_net.h 2020-03-19 20:20:23.688174000 -0700 +++ sys/dev/virtio/network/virtio_net.h 2020-03-19 23:17:51.681662000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. * @@ -32,29 +34,33 @@ #define _VIRTIO_NET_H /* The feature bitmap for virtio net */ -#define VIRTIO_NET_F_CSUM 0x00001 /* Host handles pkts w/ partial csum */ -#define VIRTIO_NET_F_GUEST_CSUM 0x00002 /* Guest handles pkts w/ partial csum*/ -#define VIRTIO_NET_F_MAC 0x00020 /* Host has given MAC address. */ -#define VIRTIO_NET_F_GSO 0x00040 /* Host handles pkts w/ any GSO type */ -#define VIRTIO_NET_F_GUEST_TSO4 0x00080 /* Guest can handle TSOv4 in. */ -#define VIRTIO_NET_F_GUEST_TSO6 0x00100 /* Guest can handle TSOv6 in. */ -#define VIRTIO_NET_F_GUEST_ECN 0x00200 /* Guest can handle TSO[6] w/ ECN in.*/ -#define VIRTIO_NET_F_GUEST_UFO 0x00400 /* Guest can handle UFO in. */ -#define VIRTIO_NET_F_HOST_TSO4 0x00800 /* Host can handle TSOv4 in. */ -#define VIRTIO_NET_F_HOST_TSO6 0x01000 /* Host can handle TSOv6 in. */ -#define VIRTIO_NET_F_HOST_ECN 0x02000 /* Host can handle TSO[6] w/ ECN in. */ -#define VIRTIO_NET_F_HOST_UFO 0x04000 /* Host can handle UFO in. */ -#define VIRTIO_NET_F_MRG_RXBUF 0x08000 /* Host can merge receive buffers. */ -#define VIRTIO_NET_F_STATUS 0x10000 /* virtio_net_config.status available*/ -#define VIRTIO_NET_F_CTRL_VQ 0x20000 /* Control channel available */ -#define VIRTIO_NET_F_CTRL_RX 0x40000 /* Control channel RX mode support */ -#define VIRTIO_NET_F_CTRL_VLAN 0x80000 /* Control channel VLAN filtering */ -#define VIRTIO_NET_F_CTRL_RX_EXTRA 0x100000 /* Extra RX mode control support */ -#define VIRTIO_NET_F_GUEST_ANNOUNCE 0x200000 /* Announce device on network */ -#define VIRTIO_NET_F_MQ 0x400000 /* Device supports RFS */ -#define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000 /* Set MAC address */ +#define VIRTIO_NET_F_CSUM 0x000001 /* Host handles pkts w/ partial csum */ +#define VIRTIO_NET_F_GUEST_CSUM 0x000002 /* Guest handles pkts w/ partial csum*/ +#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 0x000004 /* Dynamic offload configuration. */ +#define VIRTIO_NET_F_MTU 0x000008 /* Initial MTU advice */ +#define VIRTIO_NET_F_MAC 0x000020 /* Host has given MAC address. */ +#define VIRTIO_NET_F_GSO 0x000040 /* Host handles pkts w/ any GSO type */ +#define VIRTIO_NET_F_GUEST_TSO4 0x000080 /* Guest can handle TSOv4 in. */ +#define VIRTIO_NET_F_GUEST_TSO6 0x000100 /* Guest can handle TSOv6 in. */ +#define VIRTIO_NET_F_GUEST_ECN 0x000200 /* Guest can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_GUEST_UFO 0x000400 /* Guest can handle UFO in. */ +#define VIRTIO_NET_F_HOST_TSO4 0x000800 /* Host can handle TSOv4 in. */ +#define VIRTIO_NET_F_HOST_TSO6 0x001000 /* Host can handle TSOv6 in. */ +#define VIRTIO_NET_F_HOST_ECN 0x002000 /* Host can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_HOST_UFO 0x004000 /* Host can handle UFO in. */ +#define VIRTIO_NET_F_MRG_RXBUF 0x008000 /* Host can merge receive buffers. */ +#define VIRTIO_NET_F_STATUS 0x010000 /* virtio_net_config.status available*/ +#define VIRTIO_NET_F_CTRL_VQ 0x020000 /* Control channel available */ +#define VIRTIO_NET_F_CTRL_RX 0x040000 /* Control channel RX mode support */ +#define VIRTIO_NET_F_CTRL_VLAN 0x080000 /* Control channel VLAN filtering */ +#define VIRTIO_NET_F_CTRL_RX_EXTRA 0x100000 /* Extra RX mode control support */ +#define VIRTIO_NET_F_GUEST_ANNOUNCE 0x200000 /* Announce device on network */ +#define VIRTIO_NET_F_MQ 0x400000 /* Device supports Receive Flow Steering */ +#define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000 /* Set MAC address */ +#define VIRTIO_NET_F_SPEED_DUPLEX (1ULL << 63) /* Device set linkspeed and duplex */ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ +#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ struct virtio_net_config { /* The config defining mac address (if VIRTIO_NET_F_MAC) */ @@ -66,16 +72,32 @@ * Legal values are between 1 and 0x8000. */ uint16_t max_virtqueue_pairs; + /* Default maximum transmit unit advice */ + uint16_t mtu; + /* + * speed, in units of 1Mb. All values 0 to INT_MAX are legal. + * Any other value stands for unknown. + */ + uint32_t speed; + /* + * 0x00 - half duplex + * 0x01 - full duplex + * Any other value stands for unknown. + */ + uint8_t duplex; } __packed; /* - * This is the first element of the scatter-gather list. If you don't + * This header comes first in the scatter-gather list. If you don't * specify GSO or CSUM features, you can simply ignore the header. + * + * This is bitwise-equivalent to the legacy struct virtio_net_hdr_mrg_rxbuf, + * only flattened. */ -struct virtio_net_hdr { -#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start,csum_offset*/ +struct virtio_net_hdr_v1 { +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ - uint8_t flags; + uint8_t flags; #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ @@ -86,9 +108,27 @@ uint16_t gso_size; /* Bytes to append to hdr_len per frame */ uint16_t csum_start; /* Position to start checksumming from */ uint16_t csum_offset; /* Offset after that to place checksum */ + uint16_t num_buffers; /* Number of merged rx buffers */ }; /* + * This header comes first in the scatter-gather list. + * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must + * be the first element of the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. + */ +struct virtio_net_hdr { + /* See VIRTIO_NET_HDR_F_* */ + uint8_t flags; + /* See VIRTIO_NET_HDR_GSO_* */ + uint8_t gso_type; + uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */ + uint16_t gso_size; /* Bytes to append to hdr_len per frame */ + uint16_t csum_start; /* Position to start checksumming from */ + uint16_t csum_offset; /* Offset after that to place checksum */ +}; + +/* * This is the version of the header to use when the MRG_RXBUF * feature has been negotiated. */ @@ -198,5 +238,20 @@ #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 + +/* + * Control network offloads + * + * Reconfigures the network offloads that Guest can handle. + * + * Available with the VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit. + * + * Command data format matches the feature bit mask exactly. + * + * See VIRTIO_NET_F_GUEST_* for the list of offloads + * that can be enabled/disabled. + */ +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 +#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 #endif /* _VIRTIO_NET_H */ diff -urN sys/dev/virtio.ori/pci/virtio_pci.c sys/dev/virtio/pci/virtio_pci.c --- sys/dev/virtio.ori/pci/virtio_pci.c 2020-03-19 20:20:23.683505000 -0700 +++ sys/dev/virtio/pci/virtio_pci.c 2020-03-19 23:17:51.677484000 -0700 @@ -1,5 +1,7 @@ /*- - * Copyright (c) 2011, Bryan Venteicher + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2017, Bryan Venteicher * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,6 +35,8 @@ #include #include #include +#include +#include #include #include @@ -47,355 +51,242 @@ #include #include #include +#include -#include "virtio_bus_if.h" +#include "virtio_pci_if.h" #include "virtio_if.h" -struct vtpci_interrupt { - struct resource *vti_irq; - int vti_rid; - void *vti_handler; -}; - -struct vtpci_virtqueue { - struct virtqueue *vtv_vq; - int vtv_no_intr; -}; - -struct vtpci_softc { - device_t vtpci_dev; - struct resource *vtpci_res; - struct resource *vtpci_msix_res; - uint64_t vtpci_features; - uint32_t vtpci_flags; -#define VTPCI_FLAG_NO_MSI 0x0001 -#define VTPCI_FLAG_NO_MSIX 0x0002 -#define VTPCI_FLAG_LEGACY 0x1000 -#define VTPCI_FLAG_MSI 0x2000 -#define VTPCI_FLAG_MSIX 0x4000 -#define VTPCI_FLAG_SHARED_MSIX 0x8000 -#define VTPCI_FLAG_ITYPE_MASK 0xF000 - - /* This "bus" will only ever have one child. */ - device_t vtpci_child_dev; - struct virtio_feature_desc *vtpci_child_feat_desc; - - int vtpci_nvqs; - struct vtpci_virtqueue *vtpci_vqs; - - /* - * Ideally, each virtqueue that the driver provides a callback for will - * receive its own MSIX vector. If there are not sufficient vectors - * available, then attempt to have all the VQs share one vector. For - * MSIX, the configuration changed notifications must be on their own - * vector. - * - * If MSIX is not available, we will attempt to have the whole device - * share one MSI vector, and then, finally, one legacy interrupt. - */ - struct vtpci_interrupt vtpci_device_interrupt; - struct vtpci_interrupt *vtpci_msix_vq_interrupts; - int vtpci_nmsix_resources; -}; - -static int vtpci_probe(device_t); -static int vtpci_attach(device_t); -static int vtpci_detach(device_t); -static int vtpci_suspend(device_t); -static int vtpci_resume(device_t); -static int vtpci_shutdown(device_t); -static void vtpci_driver_added(device_t, driver_t *); -static void vtpci_child_detached(device_t, device_t); -static int vtpci_read_ivar(device_t, device_t, int, uintptr_t *); -static int vtpci_write_ivar(device_t, device_t, int, uintptr_t); - -static uint64_t vtpci_negotiate_features(device_t, uint64_t); -static int vtpci_with_feature(device_t, uint64_t); -static int vtpci_alloc_virtqueues(device_t, int, int, - struct vq_alloc_info *); -static int vtpci_setup_intr(device_t, enum intr_type); -static void vtpci_stop(device_t); -static int vtpci_reinit(device_t, uint64_t); -static void vtpci_reinit_complete(device_t); -static void vtpci_notify_virtqueue(device_t, uint16_t); -static uint8_t vtpci_get_status(device_t); -static void vtpci_set_status(device_t, uint8_t); -static void vtpci_read_dev_config(device_t, bus_size_t, void *, int); -static void vtpci_write_dev_config(device_t, bus_size_t, void *, int); - -static void vtpci_describe_features(struct vtpci_softc *, const char *, +static void vtpci_describe_features(struct vtpci_common *, const char *, uint64_t); -static void vtpci_probe_and_attach_child(struct vtpci_softc *); - -static int vtpci_alloc_msix(struct vtpci_softc *, int); -static int vtpci_alloc_msi(struct vtpci_softc *); -static int vtpci_alloc_intr_msix_pervq(struct vtpci_softc *); -static int vtpci_alloc_intr_msix_shared(struct vtpci_softc *); -static int vtpci_alloc_intr_msi(struct vtpci_softc *); -static int vtpci_alloc_intr_legacy(struct vtpci_softc *); -static int vtpci_alloc_interrupt(struct vtpci_softc *, int, int, +static int vtpci_alloc_msix(struct vtpci_common *, int); +static int vtpci_alloc_msi(struct vtpci_common *); +static int vtpci_alloc_intr_msix_pervq(struct vtpci_common *); +static int vtpci_alloc_intr_msix_shared(struct vtpci_common *); +static int vtpci_alloc_intr_msi(struct vtpci_common *); +static int vtpci_alloc_intr_intx(struct vtpci_common *); +static int vtpci_alloc_interrupt(struct vtpci_common *, int, int, struct vtpci_interrupt *); -static int vtpci_alloc_intr_resources(struct vtpci_softc *); +static void vtpci_free_interrupt(struct vtpci_common *, + struct vtpci_interrupt *); -static int vtpci_setup_legacy_interrupt(struct vtpci_softc *, +static void vtpci_free_interrupts(struct vtpci_common *); +static void vtpci_free_virtqueues(struct vtpci_common *); +static void vtpci_cleanup_setup_intr_attempt(struct vtpci_common *); +static int vtpci_alloc_intr_resources(struct vtpci_common *); +static int vtpci_setup_intx_interrupt(struct vtpci_common *, enum intr_type); -static int vtpci_setup_pervq_msix_interrupts(struct vtpci_softc *, +static int vtpci_setup_pervq_msix_interrupts(struct vtpci_common *, enum intr_type); -static int vtpci_setup_msix_interrupts(struct vtpci_softc *, +static int vtpci_set_host_msix_vectors(struct vtpci_common *); +static int vtpci_setup_msix_interrupts(struct vtpci_common *, enum intr_type); -static int vtpci_setup_interrupts(struct vtpci_softc *, enum intr_type); - -static int vtpci_register_msix_vector(struct vtpci_softc *, int, - struct vtpci_interrupt *); -static int vtpci_set_host_msix_vectors(struct vtpci_softc *); -static int vtpci_reinit_virtqueue(struct vtpci_softc *, int); - -static void vtpci_free_interrupt(struct vtpci_softc *, - struct vtpci_interrupt *); -static void vtpci_free_interrupts(struct vtpci_softc *); -static void vtpci_free_virtqueues(struct vtpci_softc *); -static void vtpci_release_child_resources(struct vtpci_softc *); -static void vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *); -static void vtpci_reset(struct vtpci_softc *); - -static void vtpci_select_virtqueue(struct vtpci_softc *, int); - -static void vtpci_legacy_intr(void *); +static int vtpci_setup_intrs(struct vtpci_common *, enum intr_type); +static int vtpci_reinit_virtqueue(struct vtpci_common *, int); +static void vtpci_intx_intr(void *); static int vtpci_vq_shared_intr_filter(void *); static void vtpci_vq_shared_intr(void *); static int vtpci_vq_intr_filter(void *); static void vtpci_vq_intr(void *); static void vtpci_config_intr(void *); -#define vtpci_setup_msi_interrupt vtpci_setup_legacy_interrupt +static void vtpci_setup_sysctl(struct vtpci_common *); -#define VIRTIO_PCI_CONFIG(_sc) \ - VIRTIO_PCI_CONFIG_OFF((((_sc)->vtpci_flags & VTPCI_FLAG_MSIX)) != 0) +#define vtpci_setup_msi_interrupt vtpci_setup_intx_interrupt /* - * I/O port read/write wrappers. + * This module contains two drivers: + * - virtio_pci_legacy (vtpcil) for pre-V1 support + * - virtio_pci_modern (vtpcim) for V1 support */ -#define vtpci_read_config_1(sc, o) bus_read_1((sc)->vtpci_res, (o)) -#define vtpci_read_config_2(sc, o) bus_read_2((sc)->vtpci_res, (o)) -#define vtpci_read_config_4(sc, o) bus_read_4((sc)->vtpci_res, (o)) -#define vtpci_write_config_1(sc, o, v) bus_write_1((sc)->vtpci_res, (o), (v)) -#define vtpci_write_config_2(sc, o, v) bus_write_2((sc)->vtpci_res, (o), (v)) -#define vtpci_write_config_4(sc, o, v) bus_write_4((sc)->vtpci_res, (o), (v)) - -/* Tunables. */ -static int vtpci_disable_msix = 0; -TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix); - -static device_method_t vtpci_methods[] = { - /* Device interface. */ - DEVMETHOD(device_probe, vtpci_probe), - DEVMETHOD(device_attach, vtpci_attach), - DEVMETHOD(device_detach, vtpci_detach), - DEVMETHOD(device_suspend, vtpci_suspend), - DEVMETHOD(device_resume, vtpci_resume), - DEVMETHOD(device_shutdown, vtpci_shutdown), - - /* Bus interface. */ - DEVMETHOD(bus_driver_added, vtpci_driver_added), - DEVMETHOD(bus_child_detached, vtpci_child_detached), - DEVMETHOD(bus_read_ivar, vtpci_read_ivar), - DEVMETHOD(bus_write_ivar, vtpci_write_ivar), - - /* VirtIO bus interface. */ - DEVMETHOD(virtio_bus_negotiate_features, vtpci_negotiate_features), - DEVMETHOD(virtio_bus_with_feature, vtpci_with_feature), - DEVMETHOD(virtio_bus_alloc_virtqueues, vtpci_alloc_virtqueues), - DEVMETHOD(virtio_bus_setup_intr, vtpci_setup_intr), - DEVMETHOD(virtio_bus_stop, vtpci_stop), - DEVMETHOD(virtio_bus_reinit, vtpci_reinit), - DEVMETHOD(virtio_bus_reinit_complete, vtpci_reinit_complete), - DEVMETHOD(virtio_bus_notify_vq, vtpci_notify_virtqueue), - DEVMETHOD(virtio_bus_read_device_config, vtpci_read_dev_config), - DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config), - - DEVMETHOD_END -}; - -static driver_t vtpci_driver = { - "virtio_pci", - vtpci_methods, - sizeof(struct vtpci_softc) -}; - -devclass_t vtpci_devclass; - -DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, 0, 0); MODULE_VERSION(virtio_pci, 1); MODULE_DEPEND(virtio_pci, pci, 1, 1, 1); MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1); -static int -vtpci_probe(device_t dev) +int vtpci_disable_msix = 0; +TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix); + +static uint8_t +vtpci_read_isr(struct vtpci_common *cn) { - char desc[36]; - const char *name; + return (VIRTIO_PCI_READ_ISR(cn->vtpci_dev)); +} - if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID) - return (ENXIO); +static uint16_t +vtpci_get_vq_size(struct vtpci_common *cn, int idx) +{ + return (VIRTIO_PCI_GET_VQ_SIZE(cn->vtpci_dev, idx)); +} - if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN || - pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX) - return (ENXIO); +static bus_size_t +vtpci_get_vq_notify_off(struct vtpci_common *cn, int idx) +{ + return (VIRTIO_PCI_GET_VQ_NOTIFY_OFF(cn->vtpci_dev, idx)); +} - if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION) - return (ENXIO); +static void +vtpci_set_vq(struct vtpci_common *cn, struct virtqueue *vq) +{ + VIRTIO_PCI_SET_VQ(cn->vtpci_dev, vq); +} - name = virtio_device_name(pci_get_subdevice(dev)); - if (name == NULL) - name = "Unknown"; +static void +vtpci_disable_vq(struct vtpci_common *cn, int idx) +{ + VIRTIO_PCI_DISABLE_VQ(cn->vtpci_dev, idx); +} - snprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name); - device_set_desc_copy(dev, desc); - - return (BUS_PROBE_DEFAULT); +static int +vtpci_register_cfg_msix(struct vtpci_common *cn, struct vtpci_interrupt *intr) +{ + return (VIRTIO_PCI_REGISTER_CFG_MSIX(cn->vtpci_dev, intr)); } static int -vtpci_attach(device_t dev) +vtpci_register_vq_msix(struct vtpci_common *cn, int idx, + struct vtpci_interrupt *intr) { - struct vtpci_softc *sc; - device_t child; - int rid; + return (VIRTIO_PCI_REGISTER_VQ_MSIX(cn->vtpci_dev, idx, intr)); +} - sc = device_get_softc(dev); - sc->vtpci_dev = dev; +void +vtpci_init(struct vtpci_common *cn, device_t dev, bool modern) +{ + cn->vtpci_dev = dev; + pci_enable_busmaster(dev); - rid = PCIR_BAR(0); - sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, - RF_ACTIVE); - if (sc->vtpci_res == NULL) { - device_printf(dev, "cannot map I/O space\n"); - return (ENXIO); - } - + if (modern) + cn->vtpci_flags |= VTPCI_FLAG_MODERN; if (pci_find_cap(dev, PCIY_MSI, NULL) != 0) - sc->vtpci_flags |= VTPCI_FLAG_NO_MSI; + cn->vtpci_flags |= VTPCI_FLAG_NO_MSI; + if (pci_find_cap(dev, PCIY_MSIX, NULL) != 0) + cn->vtpci_flags |= VTPCI_FLAG_NO_MSIX; - if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) { - rid = PCIR_BAR(1); - sc->vtpci_msix_res = bus_alloc_resource_any(dev, - SYS_RES_MEMORY, &rid, RF_ACTIVE); - } + vtpci_setup_sysctl(cn); +} - if (sc->vtpci_msix_res == NULL) - sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX; +int +vtpci_add_child(struct vtpci_common *cn) +{ + device_t dev, child; - vtpci_reset(sc); + dev = cn->vtpci_dev; - /* Tell the host we've noticed this device. */ - vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK); - - if ((child = device_add_child(dev, NULL, -1)) == NULL) { + child = device_add_child(dev, NULL, -1); + if (child == NULL) { device_printf(dev, "cannot create child device\n"); - vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED); - vtpci_detach(dev); return (ENOMEM); } - sc->vtpci_child_dev = child; - vtpci_probe_and_attach_child(sc); + cn->vtpci_child_dev = child; return (0); } -static int -vtpci_detach(device_t dev) +int +vtpci_delete_child(struct vtpci_common *cn) { - struct vtpci_softc *sc; - device_t child; + device_t dev, child; int error; - sc = device_get_softc(dev); + dev = cn->vtpci_dev; - if ((child = sc->vtpci_child_dev) != NULL) { + child = cn->vtpci_child_dev; + if (child != NULL) { error = device_delete_child(dev, child); if (error) return (error); - sc->vtpci_child_dev = NULL; + cn->vtpci_child_dev = NULL; } - vtpci_reset(sc); - - if (sc->vtpci_msix_res != NULL) { - bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1), - sc->vtpci_msix_res); - sc->vtpci_msix_res = NULL; - } - - if (sc->vtpci_res != NULL) { - bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0), - sc->vtpci_res); - sc->vtpci_res = NULL; - } - return (0); } -static int -vtpci_suspend(device_t dev) +void +vtpci_child_detached(struct vtpci_common *cn) { - return (bus_generic_suspend(dev)); -} + vtpci_release_child_resources(cn); -static int -vtpci_resume(device_t dev) -{ - - return (bus_generic_resume(dev)); + cn->vtpci_child_feat_desc = NULL; + cn->vtpci_host_features = 0; + cn->vtpci_features = 0; } -static int -vtpci_shutdown(device_t dev) +int +vtpci_reinit(struct vtpci_common *cn) { + int idx, error; - (void) bus_generic_shutdown(dev); - /* Forcibly stop the host device. */ - vtpci_stop(dev); + for (idx = 0; idx < cn->vtpci_nvqs; idx++) { + error = vtpci_reinit_virtqueue(cn, idx); + if (error) + return (error); + } + if (vtpci_is_msix_enabled(cn)) { + error = vtpci_set_host_msix_vectors(cn); + if (error) + return (error); + } + return (0); } static void -vtpci_driver_added(device_t dev, driver_t *driver) +vtpci_describe_features(struct vtpci_common *cn, const char *msg, + uint64_t features) { - struct vtpci_softc *sc; + device_t dev, child; - sc = device_get_softc(dev); + dev = cn->vtpci_dev; + child = cn->vtpci_child_dev; - vtpci_probe_and_attach_child(sc); + if (device_is_attached(child) || bootverbose == 0) + return; + + virtio_describe(dev, msg, features, cn->vtpci_child_feat_desc); } -static void -vtpci_child_detached(device_t dev, device_t child) +uint64_t +vtpci_negotiate_features(struct vtpci_common *cn, + uint64_t child_features, uint64_t host_features) { - struct vtpci_softc *sc; + uint64_t features; - sc = device_get_softc(dev); + cn->vtpci_host_features = host_features; + vtpci_describe_features(cn, "host", host_features); - vtpci_reset(sc); - vtpci_release_child_resources(sc); + /* + * Limit negotiated features to what the driver, virtqueue, and + * host all support. + */ + features = host_features & child_features; + features = virtio_filter_transport_features(features); + + cn->vtpci_features = features; + vtpci_describe_features(cn, "negotiated", features); + + return (features); } -static int -vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) +int +vtpci_with_feature(struct vtpci_common *cn, uint64_t feature) { - struct vtpci_softc *sc; + return ((cn->vtpci_features & feature) != 0); +} - sc = device_get_softc(dev); +int +vtpci_read_ivar(struct vtpci_common *cn, int index, uintptr_t *result) +{ + device_t dev; + int error; - if (sc->vtpci_child_dev != child) - return (ENOENT); + dev = cn->vtpci_dev; + error = 0; switch (index) { - case VIRTIO_IVAR_DEVTYPE: case VIRTIO_IVAR_SUBDEVICE: *result = pci_get_subdevice(dev); break; @@ -408,100 +299,74 @@ case VIRTIO_IVAR_SUBVENDOR: *result = pci_get_subdevice(dev); break; + case VIRTIO_IVAR_MODERN: + *result = vtpci_is_modern(cn); + break; default: - return (ENOENT); + error = ENOENT; } - return (0); + return (error); } -static int -vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value) +int +vtpci_write_ivar(struct vtpci_common *cn, int index, uintptr_t value) { - struct vtpci_softc *sc; + int error; - sc = device_get_softc(dev); + error = 0; - if (sc->vtpci_child_dev != child) - return (ENOENT); - switch (index) { case VIRTIO_IVAR_FEATURE_DESC: - sc->vtpci_child_feat_desc = (void *) value; + cn->vtpci_child_feat_desc = (void *) value; break; default: - return (ENOENT); + error = ENOENT; } - return (0); + return (error); } -static uint64_t -vtpci_negotiate_features(device_t dev, uint64_t child_features) +int +vtpci_alloc_virtqueues(struct vtpci_common *cn, int flags, int nvqs, + struct vq_alloc_info *vq_info) { - struct vtpci_softc *sc; - uint64_t host_features, features; + device_t dev; + int idx, align, error; - sc = device_get_softc(dev); + dev = cn->vtpci_dev; - host_features = vtpci_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES); - vtpci_describe_features(sc, "host", host_features); - /* - * Limit negotiated features to what the driver, virtqueue, and - * host all support. + * This is VIRTIO_PCI_VRING_ALIGN from legacy VirtIO. In modern VirtIO, + * the tables do not have to be allocated contiguously, but we do so + * anyways. */ - features = host_features & child_features; - features = virtqueue_filter_features(features); - sc->vtpci_features = features; + align = 4096; - vtpci_describe_features(sc, "negotiated", features); - vtpci_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features); - - return (features); -} - -static int -vtpci_with_feature(device_t dev, uint64_t feature) -{ - struct vtpci_softc *sc; - - sc = device_get_softc(dev); - - return ((sc->vtpci_features & feature) != 0); -} - -static int -vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs, - struct vq_alloc_info *vq_info) -{ - struct vtpci_softc *sc; - struct virtqueue *vq; - struct vtpci_virtqueue *vqx; - struct vq_alloc_info *info; - int idx, error; - uint16_t size; - - sc = device_get_softc(dev); - - if (sc->vtpci_nvqs != 0) + if (cn->vtpci_nvqs != 0) return (EALREADY); if (nvqs <= 0) return (EINVAL); - sc->vtpci_vqs = malloc(nvqs * sizeof(struct vtpci_virtqueue), + cn->vtpci_vqs = malloc(nvqs * sizeof(struct vtpci_virtqueue), M_DEVBUF, M_NOWAIT | M_ZERO); - if (sc->vtpci_vqs == NULL) + if (cn->vtpci_vqs == NULL) return (ENOMEM); for (idx = 0; idx < nvqs; idx++) { - vqx = &sc->vtpci_vqs[idx]; + struct vtpci_virtqueue *vqx; + struct vq_alloc_info *info; + struct virtqueue *vq; + bus_size_t notify_offset; + uint16_t size; + + vqx = &cn->vtpci_vqs[idx]; info = &vq_info[idx]; - vtpci_select_virtqueue(sc, idx); - size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); + size = vtpci_get_vq_size(cn, idx); + notify_offset = vtpci_get_vq_notify_off(cn, idx); - error = virtqueue_alloc(dev, idx, size, VIRTIO_PCI_VRING_ALIGN, + error = virtqueue_alloc(dev, idx, size, notify_offset, align, 0xFFFFFFFFUL, info, &vq); if (error) { device_printf(dev, @@ -509,270 +374,27 @@ break; } - vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, - virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + vtpci_set_vq(cn, vq); vqx->vtv_vq = *info->vqai_vq = vq; vqx->vtv_no_intr = info->vqai_intr == NULL; - sc->vtpci_nvqs++; + cn->vtpci_nvqs++; } if (error) - vtpci_free_virtqueues(sc); + vtpci_free_virtqueues(cn); return (error); } static int -vtpci_setup_intr(device_t dev, enum intr_type type) +vtpci_alloc_msix(struct vtpci_common *cn, int nvectors) { - struct vtpci_softc *sc; - int attempt, error; - - sc = device_get_softc(dev); - - for (attempt = 0; attempt < 5; attempt++) { - /* - * Start with the most desirable interrupt configuration and - * fallback towards less desirable ones. - */ - switch (attempt) { - case 0: - error = vtpci_alloc_intr_msix_pervq(sc); - break; - case 1: - error = vtpci_alloc_intr_msix_shared(sc); - break; - case 2: - error = vtpci_alloc_intr_msi(sc); - break; - case 3: - error = vtpci_alloc_intr_legacy(sc); - break; - default: - device_printf(dev, - "exhausted all interrupt allocation attempts\n"); - return (ENXIO); - } - - if (error == 0 && vtpci_setup_interrupts(sc, type) == 0) - break; - - vtpci_cleanup_setup_intr_attempt(sc); - } - - if (bootverbose) { - if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) - device_printf(dev, "using legacy interrupt\n"); - else if (sc->vtpci_flags & VTPCI_FLAG_MSI) - device_printf(dev, "using MSI interrupt\n"); - else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) - device_printf(dev, "using shared MSIX interrupts\n"); - else - device_printf(dev, "using per VQ MSIX interrupts\n"); - } - - return (0); -} - -static void -vtpci_stop(device_t dev) -{ - - vtpci_reset(device_get_softc(dev)); -} - -static int -vtpci_reinit(device_t dev, uint64_t features) -{ - struct vtpci_softc *sc; - int idx, error; - - sc = device_get_softc(dev); - - /* - * Redrive the device initialization. This is a bit of an abuse of - * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to - * play nice. - * - * We do not allow the host device to change from what was originally - * negotiated beyond what the guest driver changed. MSIX state should - * not change, number of virtqueues and their size remain the same, etc. - * This will need to be rethought when we want to support migration. - */ - - if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET) - vtpci_stop(dev); - - /* - * Quickly drive the status through ACK and DRIVER. The device - * does not become usable again until vtpci_reinit_complete(). - */ - vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK); - vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER); - - vtpci_negotiate_features(dev, features); - - for (idx = 0; idx < sc->vtpci_nvqs; idx++) { - error = vtpci_reinit_virtqueue(sc, idx); - if (error) - return (error); - } - - if (sc->vtpci_flags & VTPCI_FLAG_MSIX) { - error = vtpci_set_host_msix_vectors(sc); - if (error) - return (error); - } - - return (0); -} - -static void -vtpci_reinit_complete(device_t dev) -{ - - vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK); -} - -static void -vtpci_notify_virtqueue(device_t dev, uint16_t queue) -{ - struct vtpci_softc *sc; - - sc = device_get_softc(dev); - - vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue); -} - -static uint8_t -vtpci_get_status(device_t dev) -{ - struct vtpci_softc *sc; - - sc = device_get_softc(dev); - - return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS)); -} - -static void -vtpci_set_status(device_t dev, uint8_t status) -{ - struct vtpci_softc *sc; - - sc = device_get_softc(dev); - - if (status != VIRTIO_CONFIG_STATUS_RESET) - status |= vtpci_get_status(dev); - - vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status); -} - -static void -vtpci_read_dev_config(device_t dev, bus_size_t offset, - void *dst, int length) -{ - struct vtpci_softc *sc; - bus_size_t off; - uint8_t *d; - int size; - - sc = device_get_softc(dev); - off = VIRTIO_PCI_CONFIG(sc) + offset; - - for (d = dst; length > 0; d += size, off += size, length -= size) { - if (length >= 4) { - size = 4; - *(uint32_t *)d = vtpci_read_config_4(sc, off); - } else if (length >= 2) { - size = 2; - *(uint16_t *)d = vtpci_read_config_2(sc, off); - } else { - size = 1; - *d = vtpci_read_config_1(sc, off); - } - } -} - -static void -vtpci_write_dev_config(device_t dev, bus_size_t offset, - void *src, int length) -{ - struct vtpci_softc *sc; - bus_size_t off; - uint8_t *s; - int size; - - sc = device_get_softc(dev); - off = VIRTIO_PCI_CONFIG(sc) + offset; - - for (s = src; length > 0; s += size, off += size, length -= size) { - if (length >= 4) { - size = 4; - vtpci_write_config_4(sc, off, *(uint32_t *)s); - } else if (length >= 2) { - size = 2; - vtpci_write_config_2(sc, off, *(uint16_t *)s); - } else { - size = 1; - vtpci_write_config_1(sc, off, *s); - } - } -} - -static void -vtpci_describe_features(struct vtpci_softc *sc, const char *msg, - uint64_t features) -{ - device_t dev, child; - - dev = sc->vtpci_dev; - child = sc->vtpci_child_dev; - - if (device_is_attached(child) || bootverbose == 0) - return; - - virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc); -} - -static void -vtpci_probe_and_attach_child(struct vtpci_softc *sc) -{ - device_t dev, child; - - dev = sc->vtpci_dev; - child = sc->vtpci_child_dev; - - if (child == NULL) - return; - - if (device_get_state(child) != DS_NOTPRESENT) - return; - - if (device_probe(child) != 0) - return; - - vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER); - if (device_attach(child) != 0) { - vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED); - vtpci_reset(sc); - vtpci_release_child_resources(sc); - /* Reset status for future attempt. */ - vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK); - } else { - vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK); - VIRTIO_ATTACH_COMPLETED(child); - } -} - -static int -vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors) -{ device_t dev; int nmsix, cnt, required; - dev = sc->vtpci_dev; + dev = cn->vtpci_dev; /* Allocate an additional vector for the config changes. */ required = nvectors + 1; @@ -783,7 +405,7 @@ cnt = required; if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) { - sc->vtpci_nmsix_resources = required; + cn->vtpci_nmsix_resources = required; return (0); } @@ -793,12 +415,12 @@ } static int -vtpci_alloc_msi(struct vtpci_softc *sc) +vtpci_alloc_msi(struct vtpci_common *cn) { device_t dev; int nmsi, cnt, required; - dev = sc->vtpci_dev; + dev = cn->vtpci_dev; required = 1; nmsi = pci_msi_count(dev); @@ -815,80 +437,78 @@ } static int -vtpci_alloc_intr_msix_pervq(struct vtpci_softc *sc) +vtpci_alloc_intr_msix_pervq(struct vtpci_common *cn) { int i, nvectors, error; - if (vtpci_disable_msix != 0 || - sc->vtpci_flags & VTPCI_FLAG_NO_MSIX) + if (vtpci_disable_msix != 0 || cn->vtpci_flags & VTPCI_FLAG_NO_MSIX) return (ENOTSUP); - for (nvectors = 0, i = 0; i < sc->vtpci_nvqs; i++) { - if (sc->vtpci_vqs[i].vtv_no_intr == 0) + for (nvectors = 0, i = 0; i < cn->vtpci_nvqs; i++) { + if (cn->vtpci_vqs[i].vtv_no_intr == 0) nvectors++; } - error = vtpci_alloc_msix(sc, nvectors); + error = vtpci_alloc_msix(cn, nvectors); if (error) return (error); - sc->vtpci_flags |= VTPCI_FLAG_MSIX; + cn->vtpci_flags |= VTPCI_FLAG_MSIX; return (0); } static int -vtpci_alloc_intr_msix_shared(struct vtpci_softc *sc) +vtpci_alloc_intr_msix_shared(struct vtpci_common *cn) { int error; - if (vtpci_disable_msix != 0 || - sc->vtpci_flags & VTPCI_FLAG_NO_MSIX) + if (vtpci_disable_msix != 0 || cn->vtpci_flags & VTPCI_FLAG_NO_MSIX) return (ENOTSUP); - error = vtpci_alloc_msix(sc, 1); + error = vtpci_alloc_msix(cn, 1); if (error) return (error); - sc->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX; + cn->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX; return (0); } static int -vtpci_alloc_intr_msi(struct vtpci_softc *sc) +vtpci_alloc_intr_msi(struct vtpci_common *cn) { int error; /* Only BHyVe supports MSI. */ - if (sc->vtpci_flags & VTPCI_FLAG_NO_MSI) + if (cn->vtpci_flags & VTPCI_FLAG_NO_MSI) return (ENOTSUP); - error = vtpci_alloc_msi(sc); + error = vtpci_alloc_msi(cn); if (error) return (error); - sc->vtpci_flags |= VTPCI_FLAG_MSI; + cn->vtpci_flags |= VTPCI_FLAG_MSI; return (0); } static int -vtpci_alloc_intr_legacy(struct vtpci_softc *sc) +vtpci_alloc_intr_intx(struct vtpci_common *cn) { - sc->vtpci_flags |= VTPCI_FLAG_LEGACY; + cn->vtpci_flags |= VTPCI_FLAG_INTX; return (0); } static int -vtpci_alloc_interrupt(struct vtpci_softc *sc, int rid, int flags, +vtpci_alloc_interrupt(struct vtpci_common *cn, int rid, int flags, struct vtpci_interrupt *intr) { struct resource *irq; - irq = bus_alloc_resource_any(sc->vtpci_dev, SYS_RES_IRQ, &rid, flags); + irq = bus_alloc_resource_any(cn->vtpci_dev, SYS_RES_IRQ, &rid, flags); if (irq == NULL) return (ENXIO); @@ -898,40 +518,136 @@ return (0); } +static void +vtpci_free_interrupt(struct vtpci_common *cn, struct vtpci_interrupt *intr) +{ + device_t dev; + + dev = cn->vtpci_dev; + + if (intr->vti_handler != NULL) { + bus_teardown_intr(dev, intr->vti_irq, intr->vti_handler); + intr->vti_handler = NULL; + } + + if (intr->vti_irq != NULL) { + bus_release_resource(dev, SYS_RES_IRQ, intr->vti_rid, + intr->vti_irq); + intr->vti_irq = NULL; + intr->vti_rid = -1; + } +} + +static void +vtpci_free_interrupts(struct vtpci_common *cn) +{ + struct vtpci_interrupt *intr; + int i, nvq_intrs; + + vtpci_free_interrupt(cn, &cn->vtpci_device_interrupt); + + if (cn->vtpci_nmsix_resources != 0) { + nvq_intrs = cn->vtpci_nmsix_resources - 1; + cn->vtpci_nmsix_resources = 0; + + if ((intr = cn->vtpci_msix_vq_interrupts) != NULL) { + for (i = 0; i < nvq_intrs; i++, intr++) + vtpci_free_interrupt(cn, intr); + + free(cn->vtpci_msix_vq_interrupts, M_DEVBUF); + cn->vtpci_msix_vq_interrupts = NULL; + } + } + + if (cn->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX)) + pci_release_msi(cn->vtpci_dev); + + cn->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK; +} + +static void +vtpci_free_virtqueues(struct vtpci_common *cn) +{ + struct vtpci_virtqueue *vqx; + int idx; + + for (idx = 0; idx < cn->vtpci_nvqs; idx++) { + vtpci_disable_vq(cn, idx); + + vqx = &cn->vtpci_vqs[idx]; + virtqueue_free(vqx->vtv_vq); + vqx->vtv_vq = NULL; + } + + free(cn->vtpci_vqs, M_DEVBUF); + cn->vtpci_vqs = NULL; + cn->vtpci_nvqs = 0; +} + +void +vtpci_release_child_resources(struct vtpci_common *cn) +{ + + vtpci_free_interrupts(cn); + vtpci_free_virtqueues(cn); +} + +static void +vtpci_cleanup_setup_intr_attempt(struct vtpci_common *cn) +{ + int idx; + + if (cn->vtpci_flags & VTPCI_FLAG_MSIX) { + vtpci_register_cfg_msix(cn, NULL); + + for (idx = 0; idx < cn->vtpci_nvqs; idx++) + vtpci_register_vq_msix(cn, idx, NULL); + } + + vtpci_free_interrupts(cn); +} + static int -vtpci_alloc_intr_resources(struct vtpci_softc *sc) +vtpci_alloc_intr_resources(struct vtpci_common *cn) { struct vtpci_interrupt *intr; int i, rid, flags, nvq_intrs, error; - rid = 0; flags = RF_ACTIVE; - if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) + if (cn->vtpci_flags & VTPCI_FLAG_INTX) { + rid = 0; flags |= RF_SHAREABLE; - else + } else rid = 1; /* - * For legacy and MSI interrupts, this single resource handles all - * interrupts. For MSIX, this resource is used for the configuration - * changed interrupt. + * When using INTX or MSI interrupts, this resource handles all + * interrupts. When using MSIX, this resource handles just the + * configuration changed interrupt. */ - intr = &sc->vtpci_device_interrupt; - error = vtpci_alloc_interrupt(sc, rid, flags, intr); - if (error || sc->vtpci_flags & (VTPCI_FLAG_LEGACY | VTPCI_FLAG_MSI)) + intr = &cn->vtpci_device_interrupt; + + error = vtpci_alloc_interrupt(cn, rid, flags, intr); + if (error || cn->vtpci_flags & (VTPCI_FLAG_INTX | VTPCI_FLAG_MSI)) return (error); - /* Subtract one for the configuration changed interrupt. */ - nvq_intrs = sc->vtpci_nmsix_resources - 1; + /* + * Now allocate the interrupts for the virtqueues. This may be one + * for all the virtqueues, or one for each virtqueue. Subtract one + * below for because of the configuration changed interrupt. + */ + nvq_intrs = cn->vtpci_nmsix_resources - 1; - intr = sc->vtpci_msix_vq_interrupts = malloc(nvq_intrs * + cn->vtpci_msix_vq_interrupts = malloc(nvq_intrs * sizeof(struct vtpci_interrupt), M_DEVBUF, M_NOWAIT | M_ZERO); - if (sc->vtpci_msix_vq_interrupts == NULL) + if (cn->vtpci_msix_vq_interrupts == NULL) return (ENOMEM); + intr = cn->vtpci_msix_vq_interrupts; + for (i = 0, rid++; i < nvq_intrs; i++, rid++, intr++) { - error = vtpci_alloc_interrupt(sc, rid, flags, intr); + error = vtpci_alloc_interrupt(cn, rid, flags, intr); if (error) return (error); } @@ -940,34 +656,35 @@ } static int -vtpci_setup_legacy_interrupt(struct vtpci_softc *sc, enum intr_type type) +vtpci_setup_intx_interrupt(struct vtpci_common *cn, enum intr_type type) { struct vtpci_interrupt *intr; int error; - intr = &sc->vtpci_device_interrupt; - error = bus_setup_intr(sc->vtpci_dev, intr->vti_irq, type, NULL, - vtpci_legacy_intr, sc, &intr->vti_handler); + intr = &cn->vtpci_device_interrupt; + error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, NULL, + vtpci_intx_intr, cn, &intr->vti_handler); + return (error); } static int -vtpci_setup_pervq_msix_interrupts(struct vtpci_softc *sc, enum intr_type type) +vtpci_setup_pervq_msix_interrupts(struct vtpci_common *cn, enum intr_type type) { struct vtpci_virtqueue *vqx; struct vtpci_interrupt *intr; int i, error; - intr = sc->vtpci_msix_vq_interrupts; + intr = cn->vtpci_msix_vq_interrupts; - for (i = 0; i < sc->vtpci_nvqs; i++) { - vqx = &sc->vtpci_vqs[i]; + for (i = 0; i < cn->vtpci_nvqs; i++) { + vqx = &cn->vtpci_vqs[i]; if (vqx->vtv_no_intr) continue; - error = bus_setup_intr(sc->vtpci_dev, intr->vti_irq, type, + error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, vtpci_vq_intr_filter, vtpci_vq_intr, vqx->vtv_vq, &intr->vti_handler); if (error) @@ -980,106 +697,24 @@ } static int -vtpci_setup_msix_interrupts(struct vtpci_softc *sc, enum intr_type type) +vtpci_set_host_msix_vectors(struct vtpci_common *cn) { - device_t dev; - struct vtpci_interrupt *intr; - int error; - - dev = sc->vtpci_dev; - intr = &sc->vtpci_device_interrupt; - - error = bus_setup_intr(dev, intr->vti_irq, type, NULL, - vtpci_config_intr, sc, &intr->vti_handler); - if (error) - return (error); - - if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) { - intr = sc->vtpci_msix_vq_interrupts; - error = bus_setup_intr(dev, intr->vti_irq, type, - vtpci_vq_shared_intr_filter, vtpci_vq_shared_intr, sc, - &intr->vti_handler); - } else - error = vtpci_setup_pervq_msix_interrupts(sc, type); - - return (error ? error : vtpci_set_host_msix_vectors(sc)); -} - -static int -vtpci_setup_interrupts(struct vtpci_softc *sc, enum intr_type type) -{ - int error; - - type |= INTR_MPSAFE; - KASSERT(sc->vtpci_flags & VTPCI_FLAG_ITYPE_MASK, - ("%s: no interrupt type selected %#x", __func__, sc->vtpci_flags)); - - error = vtpci_alloc_intr_resources(sc); - if (error) - return (error); - - if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) - error = vtpci_setup_legacy_interrupt(sc, type); - else if (sc->vtpci_flags & VTPCI_FLAG_MSI) - error = vtpci_setup_msi_interrupt(sc, type); - else - error = vtpci_setup_msix_interrupts(sc, type); - - return (error); -} - -static int -vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, - struct vtpci_interrupt *intr) -{ - device_t dev; - uint16_t vector; - - dev = sc->vtpci_dev; - - if (intr != NULL) { - /* Map from guest rid to host vector. */ - vector = intr->vti_rid - 1; - } else - vector = VIRTIO_MSI_NO_VECTOR; - - vtpci_write_config_2(sc, offset, vector); - - /* Read vector to determine if the host had sufficient resources. */ - if (vtpci_read_config_2(sc, offset) != vector) { - device_printf(dev, - "insufficient host resources for MSIX interrupts\n"); - return (ENODEV); - } - - return (0); -} - -static int -vtpci_set_host_msix_vectors(struct vtpci_softc *sc) -{ struct vtpci_interrupt *intr, *tintr; - int idx, offset, error; + int idx, error; - intr = &sc->vtpci_device_interrupt; - offset = VIRTIO_MSI_CONFIG_VECTOR; - - error = vtpci_register_msix_vector(sc, offset, intr); + intr = &cn->vtpci_device_interrupt; + error = vtpci_register_cfg_msix(cn, intr); if (error) return (error); - intr = sc->vtpci_msix_vq_interrupts; - offset = VIRTIO_MSI_QUEUE_VECTOR; - - for (idx = 0; idx < sc->vtpci_nvqs; idx++) { - vtpci_select_virtqueue(sc, idx); - - if (sc->vtpci_vqs[idx].vtv_no_intr) + intr = cn->vtpci_msix_vq_interrupts; + for (idx = 0; idx < cn->vtpci_nvqs; idx++) { + if (cn->vtpci_vqs[idx].vtv_no_intr) tintr = NULL; else tintr = intr; - error = vtpci_register_msix_vector(sc, offset, tintr); + error = vtpci_register_vq_msix(cn, idx, tintr); if (error) break; @@ -1087,8 +722,8 @@ * For shared MSIX, all the virtqueues share the first * interrupt. */ - if (!sc->vtpci_vqs[idx].vtv_no_intr && - (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) == 0) + if (!cn->vtpci_vqs[idx].vtv_no_intr && + (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) == 0) intr++; } @@ -1096,164 +731,141 @@ } static int -vtpci_reinit_virtqueue(struct vtpci_softc *sc, int idx) +vtpci_setup_msix_interrupts(struct vtpci_common *cn, enum intr_type type) { - struct vtpci_virtqueue *vqx; - struct virtqueue *vq; + struct vtpci_interrupt *intr; int error; - uint16_t size; - vqx = &sc->vtpci_vqs[idx]; - vq = vqx->vtv_vq; + intr = &cn->vtpci_device_interrupt; - KASSERT(vq != NULL, ("%s: vq %d not allocated", __func__, idx)); - - vtpci_select_virtqueue(sc, idx); - size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); - - error = virtqueue_reinit(vq, size); + error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, NULL, + vtpci_config_intr, cn, &intr->vti_handler); if (error) return (error); - vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, - virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); + if (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) { + intr = &cn->vtpci_msix_vq_interrupts[0]; - return (0); + error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, + vtpci_vq_shared_intr_filter, vtpci_vq_shared_intr, cn, + &intr->vti_handler); + } else + error = vtpci_setup_pervq_msix_interrupts(cn, type); + + return (error ? error : vtpci_set_host_msix_vectors(cn)); } -static void -vtpci_free_interrupt(struct vtpci_softc *sc, struct vtpci_interrupt *intr) +static int +vtpci_setup_intrs(struct vtpci_common *cn, enum intr_type type) { - device_t dev; + int error; - dev = sc->vtpci_dev; + type |= INTR_MPSAFE; + KASSERT(cn->vtpci_flags & VTPCI_FLAG_ITYPE_MASK, + ("%s: no interrupt type selected %#x", __func__, cn->vtpci_flags)); - if (intr->vti_handler != NULL) { - bus_teardown_intr(dev, intr->vti_irq, intr->vti_handler); - intr->vti_handler = NULL; - } + error = vtpci_alloc_intr_resources(cn); + if (error) + return (error); - if (intr->vti_irq != NULL) { - bus_release_resource(dev, SYS_RES_IRQ, intr->vti_rid, - intr->vti_irq); - intr->vti_irq = NULL; - intr->vti_rid = -1; - } + if (cn->vtpci_flags & VTPCI_FLAG_INTX) + error = vtpci_setup_intx_interrupt(cn, type); + else if (cn->vtpci_flags & VTPCI_FLAG_MSI) + error = vtpci_setup_msi_interrupt(cn, type); + else + error = vtpci_setup_msix_interrupts(cn, type); + + return (error); } -static void -vtpci_free_interrupts(struct vtpci_softc *sc) +int +vtpci_setup_interrupts(struct vtpci_common *cn, enum intr_type type) { - struct vtpci_interrupt *intr; - int i, nvq_intrs; + device_t dev; + int attempt, error; - vtpci_free_interrupt(sc, &sc->vtpci_device_interrupt); + dev = cn->vtpci_dev; - if (sc->vtpci_nmsix_resources != 0) { - nvq_intrs = sc->vtpci_nmsix_resources - 1; - sc->vtpci_nmsix_resources = 0; + for (attempt = 0; attempt < 5; attempt++) { + /* + * Start with the most desirable interrupt configuration and + * fallback towards less desirable ones. + */ + switch (attempt) { + case 0: + error = vtpci_alloc_intr_msix_pervq(cn); + break; + case 1: + error = vtpci_alloc_intr_msix_shared(cn); + break; + case 2: + error = vtpci_alloc_intr_msi(cn); + break; + case 3: + error = vtpci_alloc_intr_intx(cn); + break; + default: + device_printf(dev, + "exhausted all interrupt allocation attempts\n"); + return (ENXIO); + } - intr = sc->vtpci_msix_vq_interrupts; - if (intr != NULL) { - for (i = 0; i < nvq_intrs; i++, intr++) - vtpci_free_interrupt(sc, intr); + if (error == 0 && vtpci_setup_intrs(cn, type) == 0) + break; - free(sc->vtpci_msix_vq_interrupts, M_DEVBUF); - sc->vtpci_msix_vq_interrupts = NULL; - } + vtpci_cleanup_setup_intr_attempt(cn); } - if (sc->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX)) - pci_release_msi(sc->vtpci_dev); + if (bootverbose) { + if (cn->vtpci_flags & VTPCI_FLAG_INTX) + device_printf(dev, "using legacy interrupt\n"); + else if (cn->vtpci_flags & VTPCI_FLAG_MSI) + device_printf(dev, "using MSI interrupt\n"); + else if (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) + device_printf(dev, "using shared MSIX interrupts\n"); + else + device_printf(dev, "using per VQ MSIX interrupts\n"); + } - sc->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK; + return (0); } -static void -vtpci_free_virtqueues(struct vtpci_softc *sc) +static int +vtpci_reinit_virtqueue(struct vtpci_common *cn, int idx) { struct vtpci_virtqueue *vqx; - int idx; + struct virtqueue *vq; + int error; - for (idx = 0; idx < sc->vtpci_nvqs; idx++) { - vqx = &sc->vtpci_vqs[idx]; + vqx = &cn->vtpci_vqs[idx]; + vq = vqx->vtv_vq; - vtpci_select_virtqueue(sc, idx); - vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, 0); + KASSERT(vq != NULL, ("%s: vq %d not allocated", __func__, idx)); - virtqueue_free(vqx->vtv_vq); - vqx->vtv_vq = NULL; - } + error = virtqueue_reinit(vq, vtpci_get_vq_size(cn, idx)); + if (error == 0) + vtpci_set_vq(cn, vq); - free(sc->vtpci_vqs, M_DEVBUF); - sc->vtpci_vqs = NULL; - sc->vtpci_nvqs = 0; + return (error); } static void -vtpci_release_child_resources(struct vtpci_softc *sc) +vtpci_intx_intr(void *xcn) { - - vtpci_free_interrupts(sc); - vtpci_free_virtqueues(sc); -} - -static void -vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *sc) -{ - int idx; - - if (sc->vtpci_flags & VTPCI_FLAG_MSIX) { - vtpci_write_config_2(sc, VIRTIO_MSI_CONFIG_VECTOR, - VIRTIO_MSI_NO_VECTOR); - - for (idx = 0; idx < sc->vtpci_nvqs; idx++) { - vtpci_select_virtqueue(sc, idx); - vtpci_write_config_2(sc, VIRTIO_MSI_QUEUE_VECTOR, - VIRTIO_MSI_NO_VECTOR); - } - } - - vtpci_free_interrupts(sc); -} - -static void -vtpci_reset(struct vtpci_softc *sc) -{ - - /* - * Setting the status to RESET sets the host device to - * the original, uninitialized state. - */ - vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET); -} - -static void -vtpci_select_virtqueue(struct vtpci_softc *sc, int idx) -{ - - vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, idx); -} - -static void -vtpci_legacy_intr(void *xsc) -{ - struct vtpci_softc *sc; + struct vtpci_common *cn; struct vtpci_virtqueue *vqx; int i; uint8_t isr; - sc = xsc; - vqx = &sc->vtpci_vqs[0]; + cn = xcn; + isr = vtpci_read_isr(cn); - /* Reading the ISR also clears it. */ - isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR); - if (isr & VIRTIO_PCI_ISR_CONFIG) - vtpci_config_intr(sc); + vtpci_config_intr(cn); if (isr & VIRTIO_PCI_ISR_INTR) { - for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) { + vqx = &cn->vtpci_vqs[0]; + for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) { if (vqx->vtv_no_intr == 0) virtqueue_intr(vqx->vtv_vq); } @@ -1261,17 +873,17 @@ } static int -vtpci_vq_shared_intr_filter(void *xsc) +vtpci_vq_shared_intr_filter(void *xcn) { - struct vtpci_softc *sc; + struct vtpci_common *cn; struct vtpci_virtqueue *vqx; int i, rc; + cn = xcn; + vqx = &cn->vtpci_vqs[0]; rc = 0; - sc = xsc; - vqx = &sc->vtpci_vqs[0]; - for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) { + for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) { if (vqx->vtv_no_intr == 0) rc |= virtqueue_intr_filter(vqx->vtv_vq); } @@ -1280,16 +892,16 @@ } static void -vtpci_vq_shared_intr(void *xsc) +vtpci_vq_shared_intr(void *xcn) { - struct vtpci_softc *sc; + struct vtpci_common *cn; struct vtpci_virtqueue *vqx; int i; - sc = xsc; - vqx = &sc->vtpci_vqs[0]; + cn = xcn; + vqx = &cn->vtpci_vqs[0]; - for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) { + for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) { if (vqx->vtv_no_intr == 0) virtqueue_intr(vqx->vtv_vq); } @@ -1317,14 +929,75 @@ } static void -vtpci_config_intr(void *xsc) +vtpci_config_intr(void *xcn) { - struct vtpci_softc *sc; + struct vtpci_common *cn; device_t child; - sc = xsc; - child = sc->vtpci_child_dev; + cn = xcn; + child = cn->vtpci_child_dev; if (child != NULL) VIRTIO_CONFIG_CHANGE(child); +} + +static int +vtpci_feature_sysctl(struct sysctl_req *req, struct vtpci_common *cn, + uint64_t features) +{ + struct sbuf *sb; + int error; + + sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); + if (sb == NULL) + return (ENOMEM); + + error = virtio_describe_sbuf(sb, features, cn->vtpci_child_feat_desc); + sbuf_delete(sb); + + return (error); +} + +static int +vtpci_host_features_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct vtpci_common *cn; + + cn = arg1; + + return (vtpci_feature_sysctl(req, cn, cn->vtpci_host_features)); +} + +static int +vtpci_negotiated_features_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct vtpci_common *cn; + + cn = arg1; + + return (vtpci_feature_sysctl(req, cn, cn->vtpci_features)); +} + +static void +vtpci_setup_sysctl(struct vtpci_common *cn) +{ + device_t dev; + struct sysctl_ctx_list *ctx; + struct sysctl_oid *tree; + struct sysctl_oid_list *child; + + dev = cn->vtpci_dev; + ctx = device_get_sysctl_ctx(dev); + tree = device_get_sysctl_tree(dev); + child = SYSCTL_CHILDREN(tree); + + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nvqs", + CTLFLAG_RD, &cn->vtpci_nvqs, 0, "Number of virtqueues"); + + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "host_features", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, cn, 0, + vtpci_host_features_sysctl, "A", "Features supported by the host"); + SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "negotiated_features", + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, cn, 0, + vtpci_negotiated_features_sysctl, "A", "Features negotiated"); } diff -urN sys/dev/virtio.ori/pci/virtio_pci.h sys/dev/virtio/pci/virtio_pci.h --- sys/dev/virtio.ori/pci/virtio_pci.h 2020-03-19 20:20:23.683795000 -0700 +++ sys/dev/virtio/pci/virtio_pci.h 2020-03-19 23:17:51.677654000 -0700 @@ -1,86 +1,132 @@ /*- - * Copyright IBM Corp. 2007 + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * - * Authors: - * Anthony Liguori + * Copyright (c) 2017, Bryan Venteicher + * All rights reserved. * - * This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. - * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. + * notice unmodified, this list of conditions, and the following + * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. Neither the name of IBM nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * * $FreeBSD: releng/11.3/sys/dev/virtio/pci/virtio_pci.h 331722 2018-03-29 02:50:57Z eadler $ */ #ifndef _VIRTIO_PCI_H #define _VIRTIO_PCI_H -/* VirtIO PCI vendor/device ID. */ -#define VIRTIO_PCI_VENDORID 0x1AF4 -#define VIRTIO_PCI_DEVICEID_MIN 0x1000 -#define VIRTIO_PCI_DEVICEID_MAX 0x103F +struct vtpci_interrupt { + struct resource *vti_irq; + int vti_rid; + void *vti_handler; +}; -/* VirtIO ABI version, this must match exactly. */ -#define VIRTIO_PCI_ABI_VERSION 0 +struct vtpci_virtqueue { + struct virtqueue *vtv_vq; + int vtv_no_intr; + int vtv_notify_offset; +}; -/* - * VirtIO Header, located in BAR 0. - */ -#define VIRTIO_PCI_HOST_FEATURES 0 /* host's supported features (32bit, RO)*/ -#define VIRTIO_PCI_GUEST_FEATURES 4 /* guest's supported features (32, RW) */ -#define VIRTIO_PCI_QUEUE_PFN 8 /* physical address of VQ (32, RW) */ -#define VIRTIO_PCI_QUEUE_NUM 12 /* number of ring entries (16, RO) */ -#define VIRTIO_PCI_QUEUE_SEL 14 /* current VQ selection (16, RW) */ -#define VIRTIO_PCI_QUEUE_NOTIFY 16 /* notify host regarding VQ (16, RW) */ -#define VIRTIO_PCI_STATUS 18 /* device status register (8, RW) */ -#define VIRTIO_PCI_ISR 19 /* interrupt status register, reading - * also clears the register (8, RO) */ -/* Only if MSIX is enabled: */ -#define VIRTIO_MSI_CONFIG_VECTOR 20 /* configuration change vector (16, RW) */ -#define VIRTIO_MSI_QUEUE_VECTOR 22 /* vector for selected VQ notifications - (16, RW) */ +struct vtpci_common { + device_t vtpci_dev; + uint64_t vtpci_host_features; + uint64_t vtpci_features; + struct vtpci_virtqueue *vtpci_vqs; + int vtpci_nvqs; -/* The bit of the ISR which indicates a device has an interrupt. */ -#define VIRTIO_PCI_ISR_INTR 0x1 -/* The bit of the ISR which indicates a device configuration change. */ -#define VIRTIO_PCI_ISR_CONFIG 0x2 -/* Vector value used to disable MSI for queue. */ -#define VIRTIO_MSI_NO_VECTOR 0xFFFF + uint32_t vtpci_flags; +#define VTPCI_FLAG_NO_MSI 0x0001 +#define VTPCI_FLAG_NO_MSIX 0x0002 +#define VTPCI_FLAG_MODERN 0x0004 +#define VTPCI_FLAG_INTX 0x1000 +#define VTPCI_FLAG_MSI 0x2000 +#define VTPCI_FLAG_MSIX 0x4000 +#define VTPCI_FLAG_SHARED_MSIX 0x8000 +#define VTPCI_FLAG_ITYPE_MASK 0xF000 -/* - * The remaining space is defined by each driver as the per-driver - * configuration space. - */ -#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) + /* The VirtIO PCI "bus" will only ever have one child. */ + device_t vtpci_child_dev; + struct virtio_feature_desc *vtpci_child_feat_desc; -/* - * How many bits to shift physical queue address written to QUEUE_PFN. - * 12 is historical, and due to x86 page size. - */ -#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + /* + * Ideally, each virtqueue that the driver provides a callback for will + * receive its own MSIX vector. If there are not sufficient vectors + * available, then attempt to have all the VQs share one vector. For + * MSIX, the configuration changed notifications must be on their own + * vector. + * + * If MSIX is not available, attempt to have the whole device share + * one MSI vector, and then, finally, one intx interrupt. + */ + struct vtpci_interrupt vtpci_device_interrupt; + struct vtpci_interrupt *vtpci_msix_vq_interrupts; + int vtpci_nmsix_resources; +}; -/* The alignment to use between consumer and producer parts of vring. */ -#define VIRTIO_PCI_VRING_ALIGN 4096 +extern int vtpci_disable_msix; + +static inline device_t +vtpci_child_device(struct vtpci_common *cn) +{ + return (cn->vtpci_child_dev); +} + +static inline bool +vtpci_is_msix_available(struct vtpci_common *cn) +{ + return ((cn->vtpci_flags & VTPCI_FLAG_NO_MSIX) == 0); +} + +static inline bool +vtpci_is_msix_enabled(struct vtpci_common *cn) +{ + return ((cn->vtpci_flags & VTPCI_FLAG_MSIX) != 0); +} + +static inline bool +vtpci_is_modern(struct vtpci_common *cn) +{ + return ((cn->vtpci_flags & VTPCI_FLAG_MODERN) != 0); +} + +static inline int +vtpci_virtqueue_count(struct vtpci_common *cn) +{ + return (cn->vtpci_nvqs); +} + +void vtpci_init(struct vtpci_common *cn, device_t dev, bool modern); +int vtpci_add_child(struct vtpci_common *cn); +int vtpci_delete_child(struct vtpci_common *cn); +void vtpci_child_detached(struct vtpci_common *cn); +int vtpci_reinit(struct vtpci_common *cn); + +uint64_t vtpci_negotiate_features(struct vtpci_common *cn, + uint64_t child_features, uint64_t host_features); +int vtpci_with_feature(struct vtpci_common *cn, uint64_t feature); + +int vtpci_read_ivar(struct vtpci_common *cn, int index, uintptr_t *result); +int vtpci_write_ivar(struct vtpci_common *cn, int index, uintptr_t value); + +int vtpci_alloc_virtqueues(struct vtpci_common *cn, int flags, int nvqs, + struct vq_alloc_info *vq_info); +int vtpci_setup_interrupts(struct vtpci_common *cn, enum intr_type type); +void vtpci_release_child_resources(struct vtpci_common *cn); #endif /* _VIRTIO_PCI_H */ diff -urN sys/dev/virtio.ori/pci/virtio_pci_if.m sys/dev/virtio/pci/virtio_pci_if.m --- sys/dev/virtio.ori/pci/virtio_pci_if.m 1969-12-31 16:00:00.000000000 -0800 +++ sys/dev/virtio/pci/virtio_pci_if.m 2020-03-19 23:17:51.677856000 -0700 @@ -0,0 +1,71 @@ +#- +# Copyright (c) 2017, Bryan Venteicher +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ + +#include +#include + +INTERFACE virtio_pci; + +HEADER { +struct virtqueue; +struct vtpci_interrupt; +}; + +METHOD uint8_t read_isr { + device_t dev; +}; + +METHOD uint16_t get_vq_size { + device_t dev; + int idx; +}; + +METHOD bus_size_t get_vq_notify_off { + device_t dev; + int idx; +}; + +METHOD void set_vq { + device_t dev; + struct virtqueue *vq; +}; + +METHOD void disable_vq { + device_t dev; + int idx; +}; + +METHOD int register_cfg_msix { + device_t dev; + struct vtpci_interrupt *intr; +}; + +METHOD int register_vq_msix { + device_t dev; + int idx; + struct vtpci_interrupt *intr; +}; diff -urN sys/dev/virtio.ori/pci/virtio_pci_legacy.c sys/dev/virtio/pci/virtio_pci_legacy.c --- sys/dev/virtio.ori/pci/virtio_pci_legacy.c 1969-12-31 16:00:00.000000000 -0800 +++ sys/dev/virtio/pci/virtio_pci_legacy.c 2020-03-19 23:17:51.678068000 -0700 @@ -0,0 +1,714 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2011, Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Driver for the legacy VirtIO PCI interface. */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include "virtio_bus_if.h" +#include "virtio_pci_if.h" +#include "virtio_if.h" + +struct vtpci_legacy_softc { + device_t vtpci_dev; + struct vtpci_common vtpci_common; + struct resource *vtpci_res; + struct resource *vtpci_msix_res; +}; + +static int vtpci_legacy_probe(device_t); +static int vtpci_legacy_attach(device_t); +static int vtpci_legacy_detach(device_t); +static int vtpci_legacy_suspend(device_t); +static int vtpci_legacy_resume(device_t); +static int vtpci_legacy_shutdown(device_t); + +static void vtpci_legacy_driver_added(device_t, driver_t *); +static void vtpci_legacy_child_detached(device_t, device_t); +static int vtpci_legacy_read_ivar(device_t, device_t, int, uintptr_t *); +static int vtpci_legacy_write_ivar(device_t, device_t, int, uintptr_t); + +static uint8_t vtpci_legacy_read_isr(device_t); +static uint16_t vtpci_legacy_get_vq_size(device_t, int); +static bus_size_t vtpci_legacy_get_vq_notify_off(device_t, int); +static void vtpci_legacy_set_vq(device_t, struct virtqueue *); +static void vtpci_legacy_disable_vq(device_t, int); +static int vtpci_legacy_register_cfg_msix(device_t, + struct vtpci_interrupt *); +static int vtpci_legacy_register_vq_msix(device_t, int idx, + struct vtpci_interrupt *); + +static uint64_t vtpci_legacy_negotiate_features(device_t, uint64_t); +static int vtpci_legacy_with_feature(device_t, uint64_t); +static int vtpci_legacy_alloc_virtqueues(device_t, int, int, + struct vq_alloc_info *); +static int vtpci_legacy_setup_interrupts(device_t, enum intr_type); +static void vtpci_legacy_stop(device_t); +static int vtpci_legacy_reinit(device_t, uint64_t); +static void vtpci_legacy_reinit_complete(device_t); +static void vtpci_legacy_notify_vq(device_t, uint16_t, bus_size_t); +static void vtpci_legacy_read_dev_config(device_t, bus_size_t, void *, int); +static void vtpci_legacy_write_dev_config(device_t, bus_size_t, void *, int); + +static int vtpci_legacy_alloc_resources(struct vtpci_legacy_softc *); +static void vtpci_legacy_free_resources(struct vtpci_legacy_softc *); + +static void vtpci_legacy_probe_and_attach_child(struct vtpci_legacy_softc *); + +static uint8_t vtpci_legacy_get_status(struct vtpci_legacy_softc *); +static void vtpci_legacy_set_status(struct vtpci_legacy_softc *, uint8_t); +static void vtpci_legacy_select_virtqueue(struct vtpci_legacy_softc *, int); +static void vtpci_legacy_reset(struct vtpci_legacy_softc *); + +#define VIRTIO_PCI_LEGACY_CONFIG(_sc) \ + VIRTIO_PCI_CONFIG_OFF(vtpci_is_msix_enabled(&(_sc)->vtpci_common)) + +/* + * I/O port read/write wrappers. + */ +#define vtpci_legacy_read_config_1(sc, o) bus_read_1((sc)->vtpci_res, (o)) +#define vtpci_legacy_read_config_2(sc, o) bus_read_2((sc)->vtpci_res, (o)) +#define vtpci_legacy_read_config_4(sc, o) bus_read_4((sc)->vtpci_res, (o)) +#define vtpci_legacy_write_config_1(sc, o, v) \ + bus_write_1((sc)->vtpci_res, (o), (v)) +#define vtpci_legacy_write_config_2(sc, o, v) \ + bus_write_2((sc)->vtpci_res, (o), (v)) +#define vtpci_legacy_write_config_4(sc, o, v) \ + bus_write_4((sc)->vtpci_res, (o), (v)) + +static device_method_t vtpci_legacy_methods[] = { + /* Device interface. */ + DEVMETHOD(device_probe, vtpci_legacy_probe), + DEVMETHOD(device_attach, vtpci_legacy_attach), + DEVMETHOD(device_detach, vtpci_legacy_detach), + DEVMETHOD(device_suspend, vtpci_legacy_suspend), + DEVMETHOD(device_resume, vtpci_legacy_resume), + DEVMETHOD(device_shutdown, vtpci_legacy_shutdown), + + /* Bus interface. */ + DEVMETHOD(bus_driver_added, vtpci_legacy_driver_added), + DEVMETHOD(bus_child_detached, vtpci_legacy_child_detached), + DEVMETHOD(bus_read_ivar, vtpci_legacy_read_ivar), + DEVMETHOD(bus_write_ivar, vtpci_legacy_write_ivar), + + /* VirtIO PCI interface. */ + DEVMETHOD(virtio_pci_read_isr, vtpci_legacy_read_isr), + DEVMETHOD(virtio_pci_get_vq_size, vtpci_legacy_get_vq_size), + DEVMETHOD(virtio_pci_get_vq_notify_off, vtpci_legacy_get_vq_notify_off), + DEVMETHOD(virtio_pci_set_vq, vtpci_legacy_set_vq), + DEVMETHOD(virtio_pci_disable_vq, vtpci_legacy_disable_vq), + DEVMETHOD(virtio_pci_register_cfg_msix, vtpci_legacy_register_cfg_msix), + DEVMETHOD(virtio_pci_register_vq_msix, vtpci_legacy_register_vq_msix), + + /* VirtIO bus interface. */ + DEVMETHOD(virtio_bus_negotiate_features, vtpci_legacy_negotiate_features), + DEVMETHOD(virtio_bus_with_feature, vtpci_legacy_with_feature), + DEVMETHOD(virtio_bus_alloc_virtqueues, vtpci_legacy_alloc_virtqueues), + DEVMETHOD(virtio_bus_setup_intr, vtpci_legacy_setup_interrupts), + DEVMETHOD(virtio_bus_stop, vtpci_legacy_stop), + DEVMETHOD(virtio_bus_reinit, vtpci_legacy_reinit), + DEVMETHOD(virtio_bus_reinit_complete, vtpci_legacy_reinit_complete), + DEVMETHOD(virtio_bus_notify_vq, vtpci_legacy_notify_vq), + DEVMETHOD(virtio_bus_read_device_config, vtpci_legacy_read_dev_config), + DEVMETHOD(virtio_bus_write_device_config, vtpci_legacy_write_dev_config), + + DEVMETHOD_END +}; + +static driver_t vtpci_legacy_driver = { + .name = "vtpcil", + .methods = vtpci_legacy_methods, + .size = sizeof(struct vtpci_legacy_softc) +}; + +devclass_t vtpci_legacy_devclass; + +DRIVER_MODULE(vtpcil, pci, vtpci_legacy_driver, vtpci_legacy_devclass, 0, 0); + +static int +vtpci_legacy_probe(device_t dev) +{ + char desc[64]; + const char *name; + + if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID) + return (ENXIO); + + if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN || + pci_get_device(dev) > VIRTIO_PCI_DEVICEID_LEGACY_MAX) + return (ENXIO); + + if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION) + return (ENXIO); + + name = virtio_device_name(pci_get_subdevice(dev)); + if (name == NULL) + name = "Unknown"; + + snprintf(desc, sizeof(desc), "VirtIO PCI (legacy) %s adapter", name); + device_set_desc_copy(dev, desc); + + /* Prefer transitional modern VirtIO PCI. */ + return (BUS_PROBE_LOW_PRIORITY); +} + +static int +vtpci_legacy_attach(device_t dev) +{ + struct vtpci_legacy_softc *sc; + int error; + + sc = device_get_softc(dev); + sc->vtpci_dev = dev; + vtpci_init(&sc->vtpci_common, dev, false); + + error = vtpci_legacy_alloc_resources(sc); + if (error) { + device_printf(dev, "cannot map I/O space\n"); + return (error); + } + + vtpci_legacy_reset(sc); + + /* Tell the host we've noticed this device. */ + vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); + + error = vtpci_add_child(&sc->vtpci_common); + if (error) + goto fail; + + vtpci_legacy_probe_and_attach_child(sc); + + return (0); + +fail: + vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_FAILED); + vtpci_legacy_detach(dev); + + return (error); +} + +static int +vtpci_legacy_detach(device_t dev) +{ + struct vtpci_legacy_softc *sc; + int error; + + sc = device_get_softc(dev); + + error = vtpci_delete_child(&sc->vtpci_common); + if (error) + return (error); + + vtpci_legacy_reset(sc); + vtpci_legacy_free_resources(sc); + + return (0); +} + +static int +vtpci_legacy_suspend(device_t dev) +{ + return (bus_generic_suspend(dev)); +} + +static int +vtpci_legacy_resume(device_t dev) +{ + return (bus_generic_resume(dev)); +} + +static int +vtpci_legacy_shutdown(device_t dev) +{ + (void) bus_generic_shutdown(dev); + /* Forcibly stop the host device. */ + vtpci_legacy_stop(dev); + + return (0); +} + +static void +vtpci_legacy_driver_added(device_t dev, driver_t *driver) +{ + vtpci_legacy_probe_and_attach_child(device_get_softc(dev)); +} + +static void +vtpci_legacy_child_detached(device_t dev, device_t child) +{ + struct vtpci_legacy_softc *sc; + + sc = device_get_softc(dev); + + vtpci_legacy_reset(sc); + vtpci_child_detached(&sc->vtpci_common); + + /* After the reset, retell the host we've noticed this device. */ + vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); +} + +static int +vtpci_legacy_read_ivar(device_t dev, device_t child, int index, + uintptr_t *result) +{ + struct vtpci_legacy_softc *sc; + struct vtpci_common *cn; + + sc = device_get_softc(dev); + cn = &sc->vtpci_common; + + if (vtpci_child_device(cn) != child) + return (ENOENT); + + switch (index) { + case VIRTIO_IVAR_DEVTYPE: + *result = pci_get_subdevice(dev); + break; + default: + return (vtpci_read_ivar(cn, index, result)); + } + + return (0); +} + +static int +vtpci_legacy_write_ivar(device_t dev, device_t child, int index, uintptr_t value) +{ + struct vtpci_legacy_softc *sc; + struct vtpci_common *cn; + + sc = device_get_softc(dev); + cn = &sc->vtpci_common; + + if (vtpci_child_device(cn) != child) + return (ENOENT); + + switch (index) { + default: + return (vtpci_write_ivar(cn, index, value)); + } + + return (0); +} + +static uint64_t +vtpci_legacy_negotiate_features(device_t dev, uint64_t child_features) +{ + struct vtpci_legacy_softc *sc; + uint64_t host_features, features; + + sc = device_get_softc(dev); + host_features = vtpci_legacy_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES); + + features = vtpci_negotiate_features(&sc->vtpci_common, + child_features, host_features); + vtpci_legacy_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features); + + return (features); +} + +static int +vtpci_legacy_with_feature(device_t dev, uint64_t feature) +{ + struct vtpci_legacy_softc *sc; + + sc = device_get_softc(dev); + + return (vtpci_with_feature(&sc->vtpci_common, feature)); +} + +static int +vtpci_legacy_alloc_virtqueues(device_t dev, int flags, int nvqs, + struct vq_alloc_info *vq_info) +{ + struct vtpci_legacy_softc *sc; + struct vtpci_common *cn; + + sc = device_get_softc(dev); + cn = &sc->vtpci_common; + + return (vtpci_alloc_virtqueues(cn, flags, nvqs, vq_info)); +} + +static int +vtpci_legacy_setup_interrupts(device_t dev, enum intr_type type) +{ + struct vtpci_legacy_softc *sc; + + sc = device_get_softc(dev); + + return (vtpci_setup_interrupts(&sc->vtpci_common, type)); +} + +static void +vtpci_legacy_stop(device_t dev) +{ + vtpci_legacy_reset(device_get_softc(dev)); +} + +static int +vtpci_legacy_reinit(device_t dev, uint64_t features) +{ + struct vtpci_legacy_softc *sc; + struct vtpci_common *cn; + int error; + + sc = device_get_softc(dev); + cn = &sc->vtpci_common; + + /* + * Redrive the device initialization. This is a bit of an abuse of + * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to + * play nice. + * + * We do not allow the host device to change from what was originally + * negotiated beyond what the guest driver changed. MSIX state should + * not change, number of virtqueues and their size remain the same, etc. + * This will need to be rethought when we want to support migration. + */ + + if (vtpci_legacy_get_status(sc) != VIRTIO_CONFIG_STATUS_RESET) + vtpci_legacy_stop(dev); + + /* + * Quickly drive the status through ACK and DRIVER. The device does + * not become usable again until DRIVER_OK in reinit complete. + */ + vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); + vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER); + + vtpci_legacy_negotiate_features(dev, features); + + error = vtpci_reinit(cn); + if (error) + return (error); + + return (0); +} + +static void +vtpci_legacy_reinit_complete(device_t dev) +{ + struct vtpci_legacy_softc *sc; + + sc = device_get_softc(dev); + + vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER_OK); +} + +static void +vtpci_legacy_notify_vq(device_t dev, uint16_t queue, bus_size_t offset) +{ + struct vtpci_legacy_softc *sc; + + sc = device_get_softc(dev); + MPASS(offset == VIRTIO_PCI_QUEUE_NOTIFY); + + vtpci_legacy_write_config_2(sc, offset, queue); +} + +static uint8_t +vtpci_legacy_get_status(struct vtpci_legacy_softc *sc) +{ + return (vtpci_legacy_read_config_1(sc, VIRTIO_PCI_STATUS)); +} + +static void +vtpci_legacy_set_status(struct vtpci_legacy_softc *sc, uint8_t status) +{ + if (status != VIRTIO_CONFIG_STATUS_RESET) + status |= vtpci_legacy_get_status(sc); + + vtpci_legacy_write_config_1(sc, VIRTIO_PCI_STATUS, status); +} + +static void +vtpci_legacy_read_dev_config(device_t dev, bus_size_t offset, + void *dst, int length) +{ + struct vtpci_legacy_softc *sc; + bus_size_t off; + uint8_t *d; + int size; + + sc = device_get_softc(dev); + off = VIRTIO_PCI_LEGACY_CONFIG(sc) + offset; + + for (d = dst; length > 0; d += size, off += size, length -= size) { + if (length >= 4) { + size = 4; + *(uint32_t *)d = vtpci_legacy_read_config_4(sc, off); + } else if (length >= 2) { + size = 2; + *(uint16_t *)d = vtpci_legacy_read_config_2(sc, off); + } else { + size = 1; + *d = vtpci_legacy_read_config_1(sc, off); + } + } +} + +static void +vtpci_legacy_write_dev_config(device_t dev, bus_size_t offset, + void *src, int length) +{ + struct vtpci_legacy_softc *sc; + bus_size_t off; + uint8_t *s; + int size; + + sc = device_get_softc(dev); + off = VIRTIO_PCI_LEGACY_CONFIG(sc) + offset; + + for (s = src; length > 0; s += size, off += size, length -= size) { + if (length >= 4) { + size = 4; + vtpci_legacy_write_config_4(sc, off, *(uint32_t *)s); + } else if (length >= 2) { + size = 2; + vtpci_legacy_write_config_2(sc, off, *(uint16_t *)s); + } else { + size = 1; + vtpci_legacy_write_config_1(sc, off, *s); + } + } +} + +static int +vtpci_legacy_alloc_resources(struct vtpci_legacy_softc *sc) +{ + device_t dev; + int rid; + + dev = sc->vtpci_dev; + + rid = PCIR_BAR(0); + if ((sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, + &rid, RF_ACTIVE)) == NULL) + return (ENXIO); + + if (vtpci_is_msix_available(&sc->vtpci_common)) { + rid = PCIR_BAR(1); + if ((sc->vtpci_msix_res = bus_alloc_resource_any(dev, + SYS_RES_MEMORY, &rid, RF_ACTIVE)) == NULL) + return (ENXIO); + } + + return (0); +} + +static void +vtpci_legacy_free_resources(struct vtpci_legacy_softc *sc) +{ + device_t dev; + + dev = sc->vtpci_dev; + + if (sc->vtpci_msix_res != NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1), + sc->vtpci_msix_res); + sc->vtpci_msix_res = NULL; + } + + if (sc->vtpci_res != NULL) { + bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0), + sc->vtpci_res); + sc->vtpci_res = NULL; + } +} + +static void +vtpci_legacy_probe_and_attach_child(struct vtpci_legacy_softc *sc) +{ + device_t dev, child; + + dev = sc->vtpci_dev; + child = vtpci_child_device(&sc->vtpci_common); + + if (child == NULL || device_get_state(child) != DS_NOTPRESENT) + return; + + if (device_probe(child) != 0) + return; + + vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER); + + if (device_attach(child) != 0) { + vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_FAILED); + /* Reset status for future attempt. */ + vtpci_legacy_child_detached(dev, child); + } else { + vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER_OK); + VIRTIO_ATTACH_COMPLETED(child); + } +} + +static int +vtpci_legacy_register_msix(struct vtpci_legacy_softc *sc, int offset, + struct vtpci_interrupt *intr) +{ + device_t dev; + uint16_t vector; + + dev = sc->vtpci_dev; + + if (intr != NULL) { + /* Map from guest rid to host vector. */ + vector = intr->vti_rid - 1; + } else + vector = VIRTIO_MSI_NO_VECTOR; + + vtpci_legacy_write_config_2(sc, offset, vector); + return (vtpci_legacy_read_config_2(sc, offset) == vector ? 0 : ENODEV); +} + +static int +vtpci_legacy_register_cfg_msix(device_t dev, struct vtpci_interrupt *intr) +{ + struct vtpci_legacy_softc *sc; + int error; + + sc = device_get_softc(dev); + + error = vtpci_legacy_register_msix(sc, VIRTIO_MSI_CONFIG_VECTOR, intr); + if (error) { + device_printf(dev, + "unable to register config MSIX interrupt\n"); + return (error); + } + + return (0); +} + +static int +vtpci_legacy_register_vq_msix(device_t dev, int idx, + struct vtpci_interrupt *intr) +{ + struct vtpci_legacy_softc *sc; + int error; + + sc = device_get_softc(dev); + + vtpci_legacy_select_virtqueue(sc, idx); + error = vtpci_legacy_register_msix(sc, VIRTIO_MSI_QUEUE_VECTOR, intr); + if (error) { + device_printf(dev, + "unable to register virtqueue MSIX interrupt\n"); + return (error); + } + + return (0); +} + +static void +vtpci_legacy_reset(struct vtpci_legacy_softc *sc) +{ + /* + * Setting the status to RESET sets the host device to the + * original, uninitialized state. + */ + vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_RESET); + (void) vtpci_legacy_get_status(sc); +} + +static void +vtpci_legacy_select_virtqueue(struct vtpci_legacy_softc *sc, int idx) +{ + vtpci_legacy_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, idx); +} + +static uint8_t +vtpci_legacy_read_isr(device_t dev) +{ + struct vtpci_legacy_softc *sc; + + sc = device_get_softc(dev); + + return (vtpci_legacy_read_config_1(sc, VIRTIO_PCI_ISR)); +} + +static uint16_t +vtpci_legacy_get_vq_size(device_t dev, int idx) +{ + struct vtpci_legacy_softc *sc; + + sc = device_get_softc(dev); + + vtpci_legacy_select_virtqueue(sc, idx); + return (vtpci_legacy_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM)); +} + +static bus_size_t +vtpci_legacy_get_vq_notify_off(device_t dev, int idx) +{ + return (VIRTIO_PCI_QUEUE_NOTIFY); +} + +static void +vtpci_legacy_set_vq(device_t dev, struct virtqueue *vq) +{ + struct vtpci_legacy_softc *sc; + + sc = device_get_softc(dev); + + vtpci_legacy_select_virtqueue(sc, virtqueue_index(vq)); + vtpci_legacy_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, + virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); +} + +static void +vtpci_legacy_disable_vq(device_t dev, int idx) +{ + struct vtpci_legacy_softc *sc; + + sc = device_get_softc(dev); + + vtpci_legacy_select_virtqueue(sc, idx); + vtpci_legacy_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, 0); +} diff -urN sys/dev/virtio.ori/pci/virtio_pci_legacy_var.h sys/dev/virtio/pci/virtio_pci_legacy_var.h --- sys/dev/virtio.ori/pci/virtio_pci_legacy_var.h 1969-12-31 16:00:00.000000000 -0800 +++ sys/dev/virtio/pci/virtio_pci_legacy_var.h 2020-03-19 23:17:51.678364000 -0700 @@ -0,0 +1,78 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright IBM Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VIRTIO_PCI_LEGACY_VAR_H +#define _VIRTIO_PCI_LEGACY_VAR_H + +#include + +/* VirtIO ABI version, this must match exactly. */ +#define VIRTIO_PCI_ABI_VERSION 0 + +/* + * VirtIO Header, located in BAR 0. + */ +#define VIRTIO_PCI_HOST_FEATURES 0 /* host's supported features (32bit, RO)*/ +#define VIRTIO_PCI_GUEST_FEATURES 4 /* guest's supported features (32, RW) */ +#define VIRTIO_PCI_QUEUE_PFN 8 /* physical address of VQ (32, RW) */ +#define VIRTIO_PCI_QUEUE_NUM 12 /* number of ring entries (16, RO) */ +#define VIRTIO_PCI_QUEUE_SEL 14 /* current VQ selection (16, RW) */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 /* notify host regarding VQ (16, RW) */ +#define VIRTIO_PCI_STATUS 18 /* device status register (8, RW) */ +#define VIRTIO_PCI_ISR 19 /* interrupt status register, reading + * also clears the register (8, RO) */ +/* Only if MSIX is enabled: */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 /* configuration change vector (16, RW) */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 /* vector for selected VQ notifications + (16, RW) */ + +/* + * The remaining space is defined by each driver as the per-driver + * configuration space. + */ +#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) + +/* + * How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. + */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 + +#endif /* _VIRTIO_PCI_LEGACY_VAR_H */ diff -urN sys/dev/virtio.ori/pci/virtio_pci_modern.c sys/dev/virtio/pci/virtio_pci_modern.c --- sys/dev/virtio.ori/pci/virtio_pci_modern.c 1969-12-31 16:00:00.000000000 -0800 +++ sys/dev/virtio/pci/virtio_pci_modern.c 2020-03-19 23:17:51.678673000 -0700 @@ -0,0 +1,1443 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2017, Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Driver for the modern VirtIO PCI interface. */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include "virtio_bus_if.h" +#include "virtio_pci_if.h" +#include "virtio_if.h" + +struct vtpci_modern_resource_map { + struct resource_map vtrm_map; + int vtrm_cap_offset; + int vtrm_bar; + int vtrm_offset; + int vtrm_length; + int vtrm_type; /* SYS_RES_{MEMORY, IOPORT} */ +}; + +struct vtpci_modern_bar_resource { + struct resource *vtbr_res; + int vtbr_type; +}; + +struct vtpci_modern_softc { + device_t vtpci_dev; + struct vtpci_common vtpci_common; + uint32_t vtpci_notify_offset_multiplier; + uint16_t vtpci_devid; + int vtpci_msix_bar; + struct resource *vtpci_msix_res; + + struct vtpci_modern_resource_map vtpci_common_res_map; + struct vtpci_modern_resource_map vtpci_notify_res_map; + struct vtpci_modern_resource_map vtpci_isr_res_map; + struct vtpci_modern_resource_map vtpci_device_res_map; + +#define VTPCI_MODERN_MAX_BARS 6 + struct vtpci_modern_bar_resource vtpci_bar_res[VTPCI_MODERN_MAX_BARS]; +}; + +static int vtpci_modern_probe(device_t); +static int vtpci_modern_attach(device_t); +static int vtpci_modern_detach(device_t); +static int vtpci_modern_suspend(device_t); +static int vtpci_modern_resume(device_t); +static int vtpci_modern_shutdown(device_t); + +static void vtpci_modern_driver_added(device_t, driver_t *); +static void vtpci_modern_child_detached(device_t, device_t); +static int vtpci_modern_read_ivar(device_t, device_t, int, uintptr_t *); +static int vtpci_modern_write_ivar(device_t, device_t, int, uintptr_t); + +static uint8_t vtpci_modern_read_isr(device_t); +static uint16_t vtpci_modern_get_vq_size(device_t, int); +static bus_size_t vtpci_modern_get_vq_notify_off(device_t, int); +static void vtpci_modern_set_vq(device_t, struct virtqueue *); +static void vtpci_modern_disable_vq(device_t, int); +static int vtpci_modern_register_msix(struct vtpci_modern_softc *, int, + struct vtpci_interrupt *); +static int vtpci_modern_register_cfg_msix(device_t, + struct vtpci_interrupt *); +static int vtpci_modern_register_vq_msix(device_t, int idx, + struct vtpci_interrupt *); + +static uint64_t vtpci_modern_negotiate_features(device_t, uint64_t); +static int vtpci_modern_finalize_features(device_t); +static int vtpci_modern_with_feature(device_t, uint64_t); +static int vtpci_modern_alloc_virtqueues(device_t, int, int, + struct vq_alloc_info *); +static int vtpci_modern_setup_interrupts(device_t, enum intr_type); +static void vtpci_modern_stop(device_t); +static int vtpci_modern_reinit(device_t, uint64_t); +static void vtpci_modern_reinit_complete(device_t); +static void vtpci_modern_notify_vq(device_t, uint16_t, bus_size_t); +static int vtpci_modern_config_generation(device_t); +static void vtpci_modern_read_dev_config(device_t, bus_size_t, void *, int); +static void vtpci_modern_write_dev_config(device_t, bus_size_t, void *, int); + +static int vtpci_modern_probe_configs(device_t); +static int vtpci_modern_find_cap(device_t, uint8_t, int *); +static int vtpci_modern_map_configs(struct vtpci_modern_softc *); +static void vtpci_modern_unmap_configs(struct vtpci_modern_softc *); +static int vtpci_modern_find_cap_resource(struct vtpci_modern_softc *, + uint8_t, int, int, struct vtpci_modern_resource_map *); +static int vtpci_modern_bar_type(struct vtpci_modern_softc *, int); +static struct resource *vtpci_modern_get_bar_resource( + struct vtpci_modern_softc *, int, int); +static struct resource *vtpci_modern_alloc_bar_resource( + struct vtpci_modern_softc *, int, int); +static void vtpci_modern_free_bar_resources(struct vtpci_modern_softc *); +static int vtpci_modern_alloc_resource_map(struct vtpci_modern_softc *, + struct vtpci_modern_resource_map *); +static void vtpci_modern_free_resource_map(struct vtpci_modern_softc *, + struct vtpci_modern_resource_map *); +static void vtpci_modern_alloc_msix_resource(struct vtpci_modern_softc *); +static void vtpci_modern_free_msix_resource(struct vtpci_modern_softc *); + +static void vtpci_modern_probe_and_attach_child(struct vtpci_modern_softc *); + +static uint64_t vtpci_modern_read_features(struct vtpci_modern_softc *); +static void vtpci_modern_write_features(struct vtpci_modern_softc *, + uint64_t); +static void vtpci_modern_select_virtqueue(struct vtpci_modern_softc *, int); +static uint8_t vtpci_modern_get_status(struct vtpci_modern_softc *); +static void vtpci_modern_set_status(struct vtpci_modern_softc *, uint8_t); +static void vtpci_modern_reset(struct vtpci_modern_softc *); +static void vtpci_modern_enable_virtqueues(struct vtpci_modern_softc *); + +static uint8_t vtpci_modern_read_common_1(struct vtpci_modern_softc *, + bus_size_t); +static uint16_t vtpci_modern_read_common_2(struct vtpci_modern_softc *, + bus_size_t); +static uint32_t vtpci_modern_read_common_4(struct vtpci_modern_softc *, + bus_size_t); +static void vtpci_modern_write_common_1(struct vtpci_modern_softc *, + bus_size_t, uint8_t); +static void vtpci_modern_write_common_2(struct vtpci_modern_softc *, + bus_size_t, uint16_t); +static void vtpci_modern_write_common_4(struct vtpci_modern_softc *, + bus_size_t, uint32_t); +static void vtpci_modern_write_common_8(struct vtpci_modern_softc *, + bus_size_t, uint64_t); +static void vtpci_modern_write_notify_2(struct vtpci_modern_softc *, + bus_size_t, uint16_t); +static uint8_t vtpci_modern_read_isr_1(struct vtpci_modern_softc *, + bus_size_t); +static uint8_t vtpci_modern_read_device_1(struct vtpci_modern_softc *, + bus_size_t); +static uint16_t vtpci_modern_read_device_2(struct vtpci_modern_softc *, + bus_size_t); +static uint32_t vtpci_modern_read_device_4(struct vtpci_modern_softc *, + bus_size_t); +static uint64_t vtpci_modern_read_device_8(struct vtpci_modern_softc *, + bus_size_t); +static void vtpci_modern_write_device_1(struct vtpci_modern_softc *, + bus_size_t, uint8_t); +static void vtpci_modern_write_device_2(struct vtpci_modern_softc *, + bus_size_t, uint16_t); +static void vtpci_modern_write_device_4(struct vtpci_modern_softc *, + bus_size_t, uint32_t); +static void vtpci_modern_write_device_8(struct vtpci_modern_softc *, + bus_size_t, uint64_t); + +/* Tunables. */ +static int vtpci_modern_transitional = 0; +TUNABLE_INT("hw.virtio.pci.transitional", &vtpci_modern_transitional); + +static device_method_t vtpci_modern_methods[] = { + /* Device interface. */ + DEVMETHOD(device_probe, vtpci_modern_probe), + DEVMETHOD(device_attach, vtpci_modern_attach), + DEVMETHOD(device_detach, vtpci_modern_detach), + DEVMETHOD(device_suspend, vtpci_modern_suspend), + DEVMETHOD(device_resume, vtpci_modern_resume), + DEVMETHOD(device_shutdown, vtpci_modern_shutdown), + + /* Bus interface. */ + DEVMETHOD(bus_driver_added, vtpci_modern_driver_added), + DEVMETHOD(bus_child_detached, vtpci_modern_child_detached), + DEVMETHOD(bus_read_ivar, vtpci_modern_read_ivar), + DEVMETHOD(bus_write_ivar, vtpci_modern_write_ivar), + + /* VirtIO PCI interface. */ + DEVMETHOD(virtio_pci_read_isr, vtpci_modern_read_isr), + DEVMETHOD(virtio_pci_get_vq_size, vtpci_modern_get_vq_size), + DEVMETHOD(virtio_pci_get_vq_notify_off, vtpci_modern_get_vq_notify_off), + DEVMETHOD(virtio_pci_set_vq, vtpci_modern_set_vq), + DEVMETHOD(virtio_pci_disable_vq, vtpci_modern_disable_vq), + DEVMETHOD(virtio_pci_register_cfg_msix, vtpci_modern_register_cfg_msix), + DEVMETHOD(virtio_pci_register_vq_msix, vtpci_modern_register_vq_msix), + + /* VirtIO bus interface. */ + DEVMETHOD(virtio_bus_negotiate_features, vtpci_modern_negotiate_features), + DEVMETHOD(virtio_bus_finalize_features, vtpci_modern_finalize_features), + DEVMETHOD(virtio_bus_with_feature, vtpci_modern_with_feature), + DEVMETHOD(virtio_bus_alloc_virtqueues, vtpci_modern_alloc_virtqueues), + DEVMETHOD(virtio_bus_setup_intr, vtpci_modern_setup_interrupts), + DEVMETHOD(virtio_bus_stop, vtpci_modern_stop), + DEVMETHOD(virtio_bus_reinit, vtpci_modern_reinit), + DEVMETHOD(virtio_bus_reinit_complete, vtpci_modern_reinit_complete), + DEVMETHOD(virtio_bus_notify_vq, vtpci_modern_notify_vq), + DEVMETHOD(virtio_bus_config_generation, vtpci_modern_config_generation), + DEVMETHOD(virtio_bus_read_device_config, vtpci_modern_read_dev_config), + DEVMETHOD(virtio_bus_write_device_config, vtpci_modern_write_dev_config), + + DEVMETHOD_END +}; + +static driver_t vtpci_modern_driver = { + .name = "vtpcim", + .methods = vtpci_modern_methods, + .size = sizeof(struct vtpci_modern_softc) +}; + +devclass_t vtpci_modern_devclass; + +DRIVER_MODULE(vtpcim, pci, vtpci_modern_driver, vtpci_modern_devclass, 0, 0); + +static int +vtpci_modern_probe(device_t dev) +{ + char desc[64]; + const char *name; + uint16_t devid; + + if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID) + return (ENXIO); + + if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN || + pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MODERN_MAX) + return (ENXIO); + + if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MODERN_MIN) { + if (!vtpci_modern_transitional) + return (ENXIO); + devid = pci_get_subdevice(dev); + } else + devid = pci_get_device(dev) - VIRTIO_PCI_DEVICEID_MODERN_MIN; + + if (vtpci_modern_probe_configs(dev) != 0) + return (ENXIO); + + name = virtio_device_name(devid); + if (name == NULL) + name = "Unknown"; + + snprintf(desc, sizeof(desc), "VirtIO PCI (modern) %s adapter", name); + device_set_desc_copy(dev, desc); + + return (BUS_PROBE_DEFAULT); +} + +static int +vtpci_modern_attach(device_t dev) +{ + struct vtpci_modern_softc *sc; + int error; + + sc = device_get_softc(dev); + sc->vtpci_dev = dev; + vtpci_init(&sc->vtpci_common, dev, true); + + if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MODERN_MIN) + sc->vtpci_devid = pci_get_subdevice(dev); + else + sc->vtpci_devid = pci_get_device(dev) - + VIRTIO_PCI_DEVICEID_MODERN_MIN; + + error = vtpci_modern_map_configs(sc); + if (error) { + device_printf(dev, "cannot map configs\n"); + vtpci_modern_unmap_configs(sc); + return (error); + } + + vtpci_modern_reset(sc); + + /* Tell the host we've noticed this device. */ + vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); + + error = vtpci_add_child(&sc->vtpci_common); + if (error) + goto fail; + + vtpci_modern_probe_and_attach_child(sc); + + return (0); + +fail: + vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_FAILED); + vtpci_modern_detach(dev); + + return (error); +} + +static int +vtpci_modern_detach(device_t dev) +{ + struct vtpci_modern_softc *sc; + int error; + + sc = device_get_softc(dev); + + error = vtpci_delete_child(&sc->vtpci_common); + if (error) + return (error); + + vtpci_modern_reset(sc); + vtpci_modern_unmap_configs(sc); + + return (0); +} + +static int +vtpci_modern_suspend(device_t dev) +{ + return (bus_generic_suspend(dev)); +} + +static int +vtpci_modern_resume(device_t dev) +{ + return (bus_generic_resume(dev)); +} + +static int +vtpci_modern_shutdown(device_t dev) +{ + (void) bus_generic_shutdown(dev); + /* Forcibly stop the host device. */ + vtpci_modern_stop(dev); + + return (0); +} + +static void +vtpci_modern_driver_added(device_t dev, driver_t *driver) +{ + vtpci_modern_probe_and_attach_child(device_get_softc(dev)); +} + +static void +vtpci_modern_child_detached(device_t dev, device_t child) +{ + struct vtpci_modern_softc *sc; + + sc = device_get_softc(dev); + + vtpci_modern_reset(sc); + vtpci_child_detached(&sc->vtpci_common); + + /* After the reset, retell the host we've noticed this device. */ + vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); +} + +static int +vtpci_modern_read_ivar(device_t dev, device_t child, int index, + uintptr_t *result) +{ + struct vtpci_modern_softc *sc; + struct vtpci_common *cn; + + sc = device_get_softc(dev); + cn = &sc->vtpci_common; + + if (vtpci_child_device(cn) != child) + return (ENOENT); + + switch (index) { + case VIRTIO_IVAR_DEVTYPE: + *result = sc->vtpci_devid; + break; + default: + return (vtpci_read_ivar(cn, index, result)); + } + + return (0); +} + +static int +vtpci_modern_write_ivar(device_t dev, device_t child, int index, + uintptr_t value) +{ + struct vtpci_modern_softc *sc; + struct vtpci_common *cn; + + sc = device_get_softc(dev); + cn = &sc->vtpci_common; + + if (vtpci_child_device(cn) != child) + return (ENOENT); + + switch (index) { + default: + return (vtpci_write_ivar(cn, index, value)); + } + + return (0); +} + +static uint64_t +vtpci_modern_negotiate_features(device_t dev, uint64_t child_features) +{ + struct vtpci_modern_softc *sc; + uint64_t host_features, features; + + sc = device_get_softc(dev); + host_features = vtpci_modern_read_features(sc); + + /* + * Since the driver was added as a child of the modern PCI bus, + * always add the V1 flag. + */ + child_features |= VIRTIO_F_VERSION_1; + + features = vtpci_negotiate_features(&sc->vtpci_common, + child_features, host_features); + vtpci_modern_write_features(sc, features); + + return (features); +} + +static int +vtpci_modern_finalize_features(device_t dev) +{ + struct vtpci_modern_softc *sc; + uint8_t status; + + sc = device_get_softc(dev); + + /* + * Must re-read the status after setting it to verify the negotiated + * features were accepted by the device. + */ + vtpci_modern_set_status(sc, VIRTIO_CONFIG_S_FEATURES_OK); + + status = vtpci_modern_get_status(sc); + if ((status & VIRTIO_CONFIG_S_FEATURES_OK) == 0) { + device_printf(dev, "desired features were not accepted\n"); + return (ENOTSUP); + } + + return (0); +} + +static int +vtpci_modern_with_feature(device_t dev, uint64_t feature) +{ + struct vtpci_modern_softc *sc; + + sc = device_get_softc(dev); + + return (vtpci_with_feature(&sc->vtpci_common, feature)); +} + +static uint64_t +vtpci_modern_read_features(struct vtpci_modern_softc *sc) +{ + uint32_t features0, features1; + + vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_DFSELECT, 0); + features0 = vtpci_modern_read_common_4(sc, VIRTIO_PCI_COMMON_DF); + vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_DFSELECT, 1); + features1 = vtpci_modern_read_common_4(sc, VIRTIO_PCI_COMMON_DF); + + return (((uint64_t) features1 << 32) | features0); +} + +static void +vtpci_modern_write_features(struct vtpci_modern_softc *sc, uint64_t features) +{ + uint32_t features0, features1; + + features0 = features; + features1 = features >> 32; + + vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_GFSELECT, 0); + vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_GF, features0); + vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_GFSELECT, 1); + vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_GF, features1); +} + +static int +vtpci_modern_alloc_virtqueues(device_t dev, int flags, int nvqs, + struct vq_alloc_info *vq_info) +{ + struct vtpci_modern_softc *sc; + struct vtpci_common *cn; + uint16_t max_nvqs; + + sc = device_get_softc(dev); + cn = &sc->vtpci_common; + + max_nvqs = vtpci_modern_read_common_2(sc, VIRTIO_PCI_COMMON_NUMQ); + if (nvqs > max_nvqs) { + device_printf(sc->vtpci_dev, "requested virtqueue count %d " + "exceeds max %d\n", nvqs, max_nvqs); + return (E2BIG); + } + + return (vtpci_alloc_virtqueues(cn, flags, nvqs, vq_info)); +} + +static int +vtpci_modern_setup_interrupts(device_t dev, enum intr_type type) +{ + struct vtpci_modern_softc *sc; + int error; + + sc = device_get_softc(dev); + + error = vtpci_setup_interrupts(&sc->vtpci_common, type); + if (error == 0) + vtpci_modern_enable_virtqueues(sc); + + return (error); +} + +static void +vtpci_modern_stop(device_t dev) +{ + vtpci_modern_reset(device_get_softc(dev)); +} + +static int +vtpci_modern_reinit(device_t dev, uint64_t features) +{ + struct vtpci_modern_softc *sc; + struct vtpci_common *cn; + int error; + + sc = device_get_softc(dev); + cn = &sc->vtpci_common; + + /* + * Redrive the device initialization. This is a bit of an abuse of + * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to + * play nice. + * + * We do not allow the host device to change from what was originally + * negotiated beyond what the guest driver changed. MSIX state should + * not change, number of virtqueues and their size remain the same, etc. + * This will need to be rethought when we want to support migration. + */ + + if (vtpci_modern_get_status(sc) != VIRTIO_CONFIG_STATUS_RESET) + vtpci_modern_stop(dev); + + /* + * Quickly drive the status through ACK and DRIVER. The device does + * not become usable again until DRIVER_OK in reinit complete. + */ + vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); + vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER); + + /* + * TODO: Check that features are not added as to what was + * originally negotiated. + */ + vtpci_modern_negotiate_features(dev, features); + error = vtpci_modern_finalize_features(dev); + if (error) { + device_printf(dev, "cannot finalize features during reinit\n"); + return (error); + } + + error = vtpci_reinit(cn); + if (error) + return (error); + + return (0); +} + +static void +vtpci_modern_reinit_complete(device_t dev) +{ + struct vtpci_modern_softc *sc; + + sc = device_get_softc(dev); + + vtpci_modern_enable_virtqueues(sc); + vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER_OK); +} + +static void +vtpci_modern_notify_vq(device_t dev, uint16_t queue, bus_size_t offset) +{ + struct vtpci_modern_softc *sc; + + sc = device_get_softc(dev); + + vtpci_modern_write_notify_2(sc, offset, queue); +} + +static uint8_t +vtpci_modern_get_status(struct vtpci_modern_softc *sc) +{ + return (vtpci_modern_read_common_1(sc, VIRTIO_PCI_COMMON_STATUS)); +} + +static void +vtpci_modern_set_status(struct vtpci_modern_softc *sc, uint8_t status) +{ + if (status != VIRTIO_CONFIG_STATUS_RESET) + status |= vtpci_modern_get_status(sc); + + vtpci_modern_write_common_1(sc, VIRTIO_PCI_COMMON_STATUS, status); +} + +static int +vtpci_modern_config_generation(device_t dev) +{ + struct vtpci_modern_softc *sc; + uint8_t gen; + + sc = device_get_softc(dev); + gen = vtpci_modern_read_common_1(sc, VIRTIO_PCI_COMMON_CFGGENERATION); + + return (gen); +} + +static void +vtpci_modern_read_dev_config(device_t dev, bus_size_t offset, void *dst, + int length) +{ + struct vtpci_modern_softc *sc; + + sc = device_get_softc(dev); + + if (sc->vtpci_device_res_map.vtrm_map.r_size == 0) { + panic("%s: attempt to read dev config but not present", + __func__); + } + + switch (length) { + case 1: + *(uint8_t *) dst = vtpci_modern_read_device_1(sc, offset); + break; + case 2: + *(uint16_t *) dst = virtio_htog16(true, + vtpci_modern_read_device_2(sc, offset)); + break; + case 4: + *(uint32_t *) dst = virtio_htog32(true, + vtpci_modern_read_device_4(sc, offset)); + break; + case 8: + *(uint64_t *) dst = virtio_htog64(true, + vtpci_modern_read_device_8(sc, offset)); + break; + default: + panic("%s: device %s invalid device read length %d offset %d", + __func__, device_get_nameunit(dev), length, (int) offset); + } +} + +static void +vtpci_modern_write_dev_config(device_t dev, bus_size_t offset, void *src, + int length) +{ + struct vtpci_modern_softc *sc; + + sc = device_get_softc(dev); + + if (sc->vtpci_device_res_map.vtrm_map.r_size == 0) { + panic("%s: attempt to write dev config but not present", + __func__); + } + + switch (length) { + case 1: + vtpci_modern_write_device_1(sc, offset, *(uint8_t *) src); + break; + case 2: { + uint16_t val = virtio_gtoh16(true, *(uint16_t *) src); + vtpci_modern_write_device_2(sc, offset, val); + break; + } + case 4: { + uint32_t val = virtio_gtoh32(true, *(uint32_t *) src); + vtpci_modern_write_device_4(sc, offset, val); + break; + } + case 8: { + uint64_t val = virtio_gtoh64(true, *(uint64_t *) src); + vtpci_modern_write_device_8(sc, offset, val); + break; + } + default: + panic("%s: device %s invalid device write length %d offset %d", + __func__, device_get_nameunit(dev), length, (int) offset); + } +} + +static int +vtpci_modern_probe_configs(device_t dev) +{ + int error; + + /* + * These config capabilities must be present. The DEVICE_CFG + * capability is only present if the device requires it. + */ + + error = vtpci_modern_find_cap(dev, VIRTIO_PCI_CAP_COMMON_CFG, NULL); + if (error) { + device_printf(dev, "cannot find COMMON_CFG capability\n"); + return (error); + } + + error = vtpci_modern_find_cap(dev, VIRTIO_PCI_CAP_NOTIFY_CFG, NULL); + if (error) { + device_printf(dev, "cannot find NOTIFY_CFG capability\n"); + return (error); + } + + error = vtpci_modern_find_cap(dev, VIRTIO_PCI_CAP_ISR_CFG, NULL); + if (error) { + device_printf(dev, "cannot find ISR_CFG capability\n"); + return (error); + } + + return (0); +} + +static int +vtpci_modern_find_cap(device_t dev, uint8_t cfg_type, int *cap_offset) +{ + uint32_t type, bar; + int capreg, error; + + for (error = pci_find_cap(dev, PCIY_VENDOR, &capreg); + error == 0; + error = pci_find_next_cap(dev, PCIY_VENDOR, capreg, &capreg)) { + + type = pci_read_config(dev, capreg + + offsetof(struct virtio_pci_cap, cfg_type), 1); + bar = pci_read_config(dev, capreg + + offsetof(struct virtio_pci_cap, bar), 1); + + /* Must ignore reserved BARs. */ + if (bar >= VTPCI_MODERN_MAX_BARS) + continue; + + if (type == cfg_type) { + if (cap_offset != NULL) + *cap_offset = capreg; + break; + } + } + + return (error); +} + +static int +vtpci_modern_map_common_config(struct vtpci_modern_softc *sc) +{ + device_t dev; + int error; + + dev = sc->vtpci_dev; + + error = vtpci_modern_find_cap_resource(sc, VIRTIO_PCI_CAP_COMMON_CFG, + sizeof(struct virtio_pci_common_cfg), 4, &sc->vtpci_common_res_map); + if (error) { + device_printf(dev, "cannot find cap COMMON_CFG resource\n"); + return (error); + } + + error = vtpci_modern_alloc_resource_map(sc, &sc->vtpci_common_res_map); + if (error) { + device_printf(dev, "cannot alloc resource for COMMON_CFG\n"); + return (error); + } + + return (0); +} + +static int +vtpci_modern_map_notify_config(struct vtpci_modern_softc *sc) +{ + device_t dev; + int cap_offset, error; + + dev = sc->vtpci_dev; + + error = vtpci_modern_find_cap_resource(sc, VIRTIO_PCI_CAP_NOTIFY_CFG, + -1, 2, &sc->vtpci_notify_res_map); + if (error) { + device_printf(dev, "cannot find cap NOTIFY_CFG resource\n"); + return (error); + } + + cap_offset = sc->vtpci_notify_res_map.vtrm_cap_offset; + + sc->vtpci_notify_offset_multiplier = pci_read_config(dev, cap_offset + + offsetof(struct virtio_pci_notify_cap, notify_off_multiplier), 4); + + error = vtpci_modern_alloc_resource_map(sc, &sc->vtpci_notify_res_map); + if (error) { + device_printf(dev, "cannot alloc resource for NOTIFY_CFG\n"); + return (error); + } + + return (0); +} + +static int +vtpci_modern_map_isr_config(struct vtpci_modern_softc *sc) +{ + device_t dev; + int error; + + dev = sc->vtpci_dev; + + error = vtpci_modern_find_cap_resource(sc, VIRTIO_PCI_CAP_ISR_CFG, + sizeof(uint8_t), 1, &sc->vtpci_isr_res_map); + if (error) { + device_printf(dev, "cannot find cap ISR_CFG resource\n"); + return (error); + } + + error = vtpci_modern_alloc_resource_map(sc, &sc->vtpci_isr_res_map); + if (error) { + device_printf(dev, "cannot alloc resource for ISR_CFG\n"); + return (error); + } + + return (0); +} + +static int +vtpci_modern_map_device_config(struct vtpci_modern_softc *sc) +{ + device_t dev; + int error; + + dev = sc->vtpci_dev; + + error = vtpci_modern_find_cap_resource(sc, VIRTIO_PCI_CAP_DEVICE_CFG, + -1, 4, &sc->vtpci_device_res_map); + if (error == ENOENT) { + /* Device configuration is optional depending on device. */ + return (0); + } else if (error) { + device_printf(dev, "cannot find cap DEVICE_CFG resource\n"); + return (error); + } + + error = vtpci_modern_alloc_resource_map(sc, &sc->vtpci_device_res_map); + if (error) { + device_printf(dev, "cannot alloc resource for DEVICE_CFG\n"); + return (error); + } + + return (error); +} + +static int +vtpci_modern_map_configs(struct vtpci_modern_softc *sc) +{ + int error; + + error = vtpci_modern_map_common_config(sc); + if (error) + return (error); + + error = vtpci_modern_map_notify_config(sc); + if (error) + return (error); + + error = vtpci_modern_map_isr_config(sc); + if (error) + return (error); + + error = vtpci_modern_map_device_config(sc); + if (error) + return (error); + + vtpci_modern_alloc_msix_resource(sc); + + return (0); +} + +static void +vtpci_modern_unmap_configs(struct vtpci_modern_softc *sc) +{ + + vtpci_modern_free_resource_map(sc, &sc->vtpci_common_res_map); + vtpci_modern_free_resource_map(sc, &sc->vtpci_notify_res_map); + vtpci_modern_free_resource_map(sc, &sc->vtpci_isr_res_map); + vtpci_modern_free_resource_map(sc, &sc->vtpci_device_res_map); + + vtpci_modern_free_bar_resources(sc); + vtpci_modern_free_msix_resource(sc); + + sc->vtpci_notify_offset_multiplier = 0; +} + +static int +vtpci_modern_find_cap_resource(struct vtpci_modern_softc *sc, uint8_t cfg_type, + int min_size, int alignment, struct vtpci_modern_resource_map *res) +{ + device_t dev; + int cap_offset, offset, length, error; + uint8_t bar, cap_length; + + dev = sc->vtpci_dev; + + error = vtpci_modern_find_cap(dev, cfg_type, &cap_offset); + if (error) + return (error); + + cap_length = pci_read_config(dev, + cap_offset + offsetof(struct virtio_pci_cap, cap_len), 1); + + if (cap_length < sizeof(struct virtio_pci_cap)) { + device_printf(dev, "cap %u length %d less than expected\n", + cfg_type, cap_length); + return (ENXIO); + } + + bar = pci_read_config(dev, + cap_offset + offsetof(struct virtio_pci_cap, bar), 1); + offset = pci_read_config(dev, + cap_offset + offsetof(struct virtio_pci_cap, offset), 4); + length = pci_read_config(dev, + cap_offset + offsetof(struct virtio_pci_cap, length), 4); + + if (min_size != -1 && length < min_size) { + device_printf(dev, "cap %u struct length %d less than min %d\n", + cfg_type, length, min_size); + return (ENXIO); + } + + if (offset % alignment) { + device_printf(dev, "cap %u struct offset %d not aligned to %d\n", + cfg_type, offset, alignment); + return (ENXIO); + } + + /* BMV: TODO Can we determine the size of the BAR here? */ + + res->vtrm_cap_offset = cap_offset; + res->vtrm_bar = bar; + res->vtrm_offset = offset; + res->vtrm_length = length; + res->vtrm_type = vtpci_modern_bar_type(sc, bar); + + return (0); +} + +static int +vtpci_modern_bar_type(struct vtpci_modern_softc *sc, int bar) +{ + uint32_t val; + + /* + * The BAR described by a config capability may be either an IOPORT or + * MEM, but we must know the type when calling bus_alloc_resource(). + */ + val = pci_read_config(sc->vtpci_dev, PCIR_BAR(bar), 4); + if (PCI_BAR_IO(val)) + return (SYS_RES_IOPORT); + else + return (SYS_RES_MEMORY); +} + +static struct resource * +vtpci_modern_get_bar_resource(struct vtpci_modern_softc *sc, int bar, int type) +{ + struct resource *res; + + MPASS(bar >= 0 && bar < VTPCI_MODERN_MAX_BARS); + res = sc->vtpci_bar_res[bar].vtbr_res; + MPASS(res == NULL || sc->vtpci_bar_res[bar].vtbr_type == type); + + return (res); +} + +static struct resource * +vtpci_modern_alloc_bar_resource(struct vtpci_modern_softc *sc, int bar, + int type) +{ + struct resource *res; + int rid; + + MPASS(bar >= 0 && bar < VTPCI_MODERN_MAX_BARS); + MPASS(type == SYS_RES_MEMORY || type == SYS_RES_IOPORT); + + res = sc->vtpci_bar_res[bar].vtbr_res; + if (res != NULL) { + MPASS(sc->vtpci_bar_res[bar].vtbr_type == type); + return (res); + } + + rid = PCIR_BAR(bar); + res = bus_alloc_resource_any(sc->vtpci_dev, type, &rid, + RF_ACTIVE | RF_UNMAPPED); + if (res != NULL) { + sc->vtpci_bar_res[bar].vtbr_res = res; + sc->vtpci_bar_res[bar].vtbr_type = type; + } + + return (res); +} + +static void +vtpci_modern_free_bar_resources(struct vtpci_modern_softc *sc) +{ + device_t dev; + struct resource *res; + int bar, rid, type; + + dev = sc->vtpci_dev; + + for (bar = 0; bar < VTPCI_MODERN_MAX_BARS; bar++) { + res = sc->vtpci_bar_res[bar].vtbr_res; + type = sc->vtpci_bar_res[bar].vtbr_type; + + if (res != NULL) { + rid = PCIR_BAR(bar); + bus_release_resource(dev, type, rid, res); + sc->vtpci_bar_res[bar].vtbr_res = NULL; + sc->vtpci_bar_res[bar].vtbr_type = 0; + } + } +} + +static int +vtpci_modern_alloc_resource_map(struct vtpci_modern_softc *sc, + struct vtpci_modern_resource_map *map) +{ + struct resource_map_request req; + struct resource *res; + int type; + + type = map->vtrm_type; + + res = vtpci_modern_alloc_bar_resource(sc, map->vtrm_bar, type); + if (res == NULL) + return (ENXIO); + + resource_init_map_request(&req); + req.offset = map->vtrm_offset; + req.length = map->vtrm_length; + + return (bus_map_resource(sc->vtpci_dev, type, res, &req, + &map->vtrm_map)); +} + +static void +vtpci_modern_free_resource_map(struct vtpci_modern_softc *sc, + struct vtpci_modern_resource_map *map) +{ + struct resource *res; + int type; + + type = map->vtrm_type; + res = vtpci_modern_get_bar_resource(sc, map->vtrm_bar, type); + + if (res != NULL && map->vtrm_map.r_size != 0) { + bus_unmap_resource(sc->vtpci_dev, type, res, &map->vtrm_map); + bzero(map, sizeof(struct vtpci_modern_resource_map)); + } +} + +static void +vtpci_modern_alloc_msix_resource(struct vtpci_modern_softc *sc) +{ + device_t dev; + int bar; + + dev = sc->vtpci_dev; + + if (!vtpci_is_msix_available(&sc->vtpci_common) || + (bar = pci_msix_table_bar(dev)) == -1) + return; + + sc->vtpci_msix_bar = bar; + if ((sc->vtpci_msix_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, + &bar, RF_ACTIVE)) == NULL) + device_printf(dev, "Unable to map MSIX table\n"); +} + +static void +vtpci_modern_free_msix_resource(struct vtpci_modern_softc *sc) +{ + device_t dev; + + dev = sc->vtpci_dev; + + if (sc->vtpci_msix_res != NULL) { + bus_release_resource(dev, SYS_RES_MEMORY, sc->vtpci_msix_bar, + sc->vtpci_msix_res); + sc->vtpci_msix_bar = 0; + sc->vtpci_msix_res = NULL; + } +} + +static void +vtpci_modern_probe_and_attach_child(struct vtpci_modern_softc *sc) +{ + device_t dev, child; + + dev = sc->vtpci_dev; + child = vtpci_child_device(&sc->vtpci_common); + + if (child == NULL || device_get_state(child) != DS_NOTPRESENT) + return; + + if (device_probe(child) != 0) + return; + + vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER); + + if (device_attach(child) != 0) { + vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_FAILED); + /* Reset state for later attempt. */ + vtpci_modern_child_detached(dev, child); + } else { + vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER_OK); + VIRTIO_ATTACH_COMPLETED(child); + } +} + +static int +vtpci_modern_register_msix(struct vtpci_modern_softc *sc, int offset, + struct vtpci_interrupt *intr) +{ + uint16_t vector; + + if (intr != NULL) { + /* Map from guest rid to host vector. */ + vector = intr->vti_rid - 1; + } else + vector = VIRTIO_MSI_NO_VECTOR; + + vtpci_modern_write_common_2(sc, offset, vector); + return (vtpci_modern_read_common_2(sc, offset) == vector ? 0 : ENODEV); +} + +static int +vtpci_modern_register_cfg_msix(device_t dev, struct vtpci_interrupt *intr) +{ + struct vtpci_modern_softc *sc; + int error; + + sc = device_get_softc(dev); + + error = vtpci_modern_register_msix(sc, VIRTIO_PCI_COMMON_MSIX, intr); + if (error) { + device_printf(dev, + "unable to register config MSIX interrupt\n"); + return (error); + } + + return (0); +} + +static int +vtpci_modern_register_vq_msix(device_t dev, int idx, + struct vtpci_interrupt *intr) +{ + struct vtpci_modern_softc *sc; + int error; + + sc = device_get_softc(dev); + + vtpci_modern_select_virtqueue(sc, idx); + error = vtpci_modern_register_msix(sc, VIRTIO_PCI_COMMON_Q_MSIX, intr); + if (error) { + device_printf(dev, + "unable to register virtqueue MSIX interrupt\n"); + return (error); + } + + return (0); +} + +static void +vtpci_modern_reset(struct vtpci_modern_softc *sc) +{ + /* + * Setting the status to RESET sets the host device to the + * original, uninitialized state. Must poll the status until + * the reset is complete. + */ + vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_RESET); + + while (vtpci_modern_get_status(sc) != VIRTIO_CONFIG_STATUS_RESET) + cpu_spinwait(); +} + +static void +vtpci_modern_select_virtqueue(struct vtpci_modern_softc *sc, int idx) +{ + vtpci_modern_write_common_2(sc, VIRTIO_PCI_COMMON_Q_SELECT, idx); +} + +static uint8_t +vtpci_modern_read_isr(device_t dev) +{ + return (vtpci_modern_read_isr_1(device_get_softc(dev), 0)); +} + +static uint16_t +vtpci_modern_get_vq_size(device_t dev, int idx) +{ + struct vtpci_modern_softc *sc; + + sc = device_get_softc(dev); + + vtpci_modern_select_virtqueue(sc, idx); + return (vtpci_modern_read_common_2(sc, VIRTIO_PCI_COMMON_Q_SIZE)); +} + +static bus_size_t +vtpci_modern_get_vq_notify_off(device_t dev, int idx) +{ + struct vtpci_modern_softc *sc; + uint16_t q_notify_off; + + sc = device_get_softc(dev); + + vtpci_modern_select_virtqueue(sc, idx); + q_notify_off = vtpci_modern_read_common_2(sc, VIRTIO_PCI_COMMON_Q_NOFF); + + return (q_notify_off * sc->vtpci_notify_offset_multiplier); +} + +static void +vtpci_modern_set_vq(device_t dev, struct virtqueue *vq) +{ + struct vtpci_modern_softc *sc; + + sc = device_get_softc(dev); + + vtpci_modern_select_virtqueue(sc, virtqueue_index(vq)); + + /* BMV: Currently we never adjust the device's proposed VQ size. */ + vtpci_modern_write_common_2(sc, + VIRTIO_PCI_COMMON_Q_SIZE, virtqueue_size(vq)); + + vtpci_modern_write_common_8(sc, + VIRTIO_PCI_COMMON_Q_DESCLO, virtqueue_desc_paddr(vq)); + vtpci_modern_write_common_8(sc, + VIRTIO_PCI_COMMON_Q_AVAILLO, virtqueue_avail_paddr(vq)); + vtpci_modern_write_common_8(sc, + VIRTIO_PCI_COMMON_Q_USEDLO, virtqueue_used_paddr(vq)); +} + +static void +vtpci_modern_disable_vq(device_t dev, int idx) +{ + struct vtpci_modern_softc *sc; + + sc = device_get_softc(dev); + + vtpci_modern_select_virtqueue(sc, idx); + vtpci_modern_write_common_2(sc, VIRTIO_PCI_COMMON_Q_ENABLE, 0); + vtpci_modern_write_common_8(sc, VIRTIO_PCI_COMMON_Q_DESCLO, 0ULL); + vtpci_modern_write_common_8(sc, VIRTIO_PCI_COMMON_Q_AVAILLO, 0ULL); + vtpci_modern_write_common_8(sc, VIRTIO_PCI_COMMON_Q_USEDLO, 0ULL); +} + +static void +vtpci_modern_enable_virtqueues(struct vtpci_modern_softc *sc) +{ + int idx; + + for (idx = 0; idx < sc->vtpci_common.vtpci_nvqs; idx++) { + vtpci_modern_select_virtqueue(sc, idx); + vtpci_modern_write_common_2(sc, VIRTIO_PCI_COMMON_Q_ENABLE, 1); + } +} + +static uint8_t +vtpci_modern_read_common_1(struct vtpci_modern_softc *sc, bus_size_t off) +{ + return (bus_read_1(&sc->vtpci_common_res_map.vtrm_map, off)); +} + +static uint16_t +vtpci_modern_read_common_2(struct vtpci_modern_softc *sc, bus_size_t off) +{ + return (bus_read_2(&sc->vtpci_common_res_map.vtrm_map, off)); +} + +static uint32_t +vtpci_modern_read_common_4(struct vtpci_modern_softc *sc, bus_size_t off) +{ + return (bus_read_4(&sc->vtpci_common_res_map.vtrm_map, off)); +} + +static void +vtpci_modern_write_common_1(struct vtpci_modern_softc *sc, bus_size_t off, + uint8_t val) +{ + bus_write_1(&sc->vtpci_common_res_map.vtrm_map, off, val); +} + +static void +vtpci_modern_write_common_2(struct vtpci_modern_softc *sc, bus_size_t off, + uint16_t val) +{ + bus_write_2(&sc->vtpci_common_res_map.vtrm_map, off, val); +} + +static void +vtpci_modern_write_common_4(struct vtpci_modern_softc *sc, bus_size_t off, + uint32_t val) +{ + bus_write_4(&sc->vtpci_common_res_map.vtrm_map, off, val); +} + +static void +vtpci_modern_write_common_8(struct vtpci_modern_softc *sc, bus_size_t off, + uint64_t val) +{ + uint32_t val0, val1; + + val0 = (uint32_t) val; + val1 = val >> 32; + + vtpci_modern_write_common_4(sc, off, val0); + vtpci_modern_write_common_4(sc, off + 4, val1); +} + +static void +vtpci_modern_write_notify_2(struct vtpci_modern_softc *sc, bus_size_t off, + uint16_t val) +{ + bus_write_2(&sc->vtpci_notify_res_map.vtrm_map, off, val); +} + +static uint8_t +vtpci_modern_read_isr_1(struct vtpci_modern_softc *sc, bus_size_t off) +{ + return (bus_read_1(&sc->vtpci_isr_res_map.vtrm_map, off)); +} + +static uint8_t +vtpci_modern_read_device_1(struct vtpci_modern_softc *sc, bus_size_t off) +{ + return (bus_read_1(&sc->vtpci_device_res_map.vtrm_map, off)); +} + +static uint16_t +vtpci_modern_read_device_2(struct vtpci_modern_softc *sc, bus_size_t off) +{ + return (bus_read_2(&sc->vtpci_device_res_map.vtrm_map, off)); +} + +static uint32_t +vtpci_modern_read_device_4(struct vtpci_modern_softc *sc, bus_size_t off) +{ + return (bus_read_4(&sc->vtpci_device_res_map.vtrm_map, off)); +} + +static uint64_t +vtpci_modern_read_device_8(struct vtpci_modern_softc *sc, bus_size_t off) +{ + device_t dev; + int gen; + uint32_t val0, val1; + + dev = sc->vtpci_dev; + + /* + * Treat the 64-bit field as two 32-bit fields. Use the generation + * to ensure a consistent read. + */ + do { + gen = vtpci_modern_config_generation(dev); + val0 = vtpci_modern_read_device_4(sc, off); + val1 = vtpci_modern_read_device_4(sc, off + 4); + } while (gen != vtpci_modern_config_generation(dev)); + + return (((uint64_t) val1 << 32) | val0); +} + +static void +vtpci_modern_write_device_1(struct vtpci_modern_softc *sc, bus_size_t off, + uint8_t val) +{ + bus_write_1(&sc->vtpci_device_res_map.vtrm_map, off, val); +} + +static void +vtpci_modern_write_device_2(struct vtpci_modern_softc *sc, bus_size_t off, + uint16_t val) +{ + bus_write_2(&sc->vtpci_device_res_map.vtrm_map, off, val); +} + +static void +vtpci_modern_write_device_4(struct vtpci_modern_softc *sc, bus_size_t off, + uint32_t val) +{ + bus_write_4(&sc->vtpci_device_res_map.vtrm_map, off, val); +} + +static void +vtpci_modern_write_device_8(struct vtpci_modern_softc *sc, bus_size_t off, + uint64_t val) +{ + uint32_t val0, val1; + + val0 = (uint32_t) val; + val1 = val >> 32; + + vtpci_modern_write_device_4(sc, off, val0); + vtpci_modern_write_device_4(sc, off + 4, val1); +} diff -urN sys/dev/virtio.ori/pci/virtio_pci_modern_var.h sys/dev/virtio/pci/virtio_pci_modern_var.h --- sys/dev/virtio.ori/pci/virtio_pci_modern_var.h 1969-12-31 16:00:00.000000000 -0800 +++ sys/dev/virtio/pci/virtio_pci_modern_var.h 2020-03-19 23:17:51.678848000 -0700 @@ -0,0 +1,135 @@ +/* + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright IBM Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VIRTIO_PCI_MODERN_VAR_H +#define _VIRTIO_PCI_MODERN_VAR_H + +#include + +/* IDs for different capabilities. Must all exist. */ +/* Common configuration */ +#define VIRTIO_PCI_CAP_COMMON_CFG 1 +/* Notifications */ +#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 +/* ISR access */ +#define VIRTIO_PCI_CAP_ISR_CFG 3 +/* Device specific configuration */ +#define VIRTIO_PCI_CAP_DEVICE_CFG 4 +/* PCI configuration access */ +#define VIRTIO_PCI_CAP_PCI_CFG 5 + +/* This is the PCI capability header: */ +struct virtio_pci_cap { + uint8_t cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ + uint8_t cap_next; /* Generic PCI field: next ptr. */ + uint8_t cap_len; /* Generic PCI field: capability length */ + uint8_t cfg_type; /* Identifies the structure. */ + uint8_t bar; /* Where to find it. */ + uint8_t padding[3]; /* Pad to full dword. */ + uint32_t offset; /* Offset within bar. */ + uint32_t length; /* Length of the structure, in bytes. */ +}; + +struct virtio_pci_notify_cap { + struct virtio_pci_cap cap; + uint32_t notify_off_multiplier; /* Multiplier for queue_notify_off. */ +}; + +/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ +struct virtio_pci_common_cfg { + /* About the whole device. */ + uint32_t device_feature_select; /* read-write */ + uint32_t device_feature; /* read-only */ + uint32_t guest_feature_select; /* read-write */ + uint32_t guest_feature; /* read-write */ + uint16_t msix_config; /* read-write */ + uint16_t num_queues; /* read-only */ + uint8_t device_status; /* read-write */ + uint8_t config_generation; /* read-only */ + + /* About a specific virtqueue. */ + uint16_t queue_select; /* read-write */ + uint16_t queue_size; /* read-write, power of 2. */ + uint16_t queue_msix_vector; /* read-write */ + uint16_t queue_enable; /* read-write */ + uint16_t queue_notify_off; /* read-only */ + uint32_t queue_desc_lo; /* read-write */ + uint32_t queue_desc_hi; /* read-write */ + uint32_t queue_avail_lo; /* read-write */ + uint32_t queue_avail_hi; /* read-write */ + uint32_t queue_used_lo; /* read-write */ + uint32_t queue_used_hi; /* read-write */ +}; + +/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ +struct virtio_pci_cfg_cap { + struct virtio_pci_cap cap; + uint8_t pci_cfg_data[4]; /* Data for BAR access. */ +}; + +/* Macro versions of offsets for the Old Timers! */ +#define VIRTIO_PCI_CAP_VNDR 0 +#define VIRTIO_PCI_CAP_NEXT 1 +#define VIRTIO_PCI_CAP_LEN 2 +#define VIRTIO_PCI_CAP_CFG_TYPE 3 +#define VIRTIO_PCI_CAP_BAR 4 +#define VIRTIO_PCI_CAP_OFFSET 8 +#define VIRTIO_PCI_CAP_LENGTH 12 + +#define VIRTIO_PCI_NOTIFY_CAP_MULT 16 + +#define VIRTIO_PCI_COMMON_DFSELECT 0 +#define VIRTIO_PCI_COMMON_DF 4 +#define VIRTIO_PCI_COMMON_GFSELECT 8 +#define VIRTIO_PCI_COMMON_GF 12 +#define VIRTIO_PCI_COMMON_MSIX 16 +#define VIRTIO_PCI_COMMON_NUMQ 18 +#define VIRTIO_PCI_COMMON_STATUS 20 +#define VIRTIO_PCI_COMMON_CFGGENERATION 21 +#define VIRTIO_PCI_COMMON_Q_SELECT 22 +#define VIRTIO_PCI_COMMON_Q_SIZE 24 +#define VIRTIO_PCI_COMMON_Q_MSIX 26 +#define VIRTIO_PCI_COMMON_Q_ENABLE 28 +#define VIRTIO_PCI_COMMON_Q_NOFF 30 +#define VIRTIO_PCI_COMMON_Q_DESCLO 32 +#define VIRTIO_PCI_COMMON_Q_DESCHI 36 +#define VIRTIO_PCI_COMMON_Q_AVAILLO 40 +#define VIRTIO_PCI_COMMON_Q_AVAILHI 44 +#define VIRTIO_PCI_COMMON_Q_USEDLO 48 +#define VIRTIO_PCI_COMMON_Q_USEDHI 52 + +#endif /* _VIRTIO_PCI_MODERN_VAR_H */ diff -urN sys/dev/virtio.ori/pci/virtio_pci_var.h sys/dev/virtio/pci/virtio_pci_var.h --- sys/dev/virtio.ori/pci/virtio_pci_var.h 1969-12-31 16:00:00.000000000 -0800 +++ sys/dev/virtio/pci/virtio_pci_var.h 2020-03-19 23:17:51.679042000 -0700 @@ -0,0 +1,55 @@ +/*- + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright IBM Corp. 2007 + * + * Authors: + * Anthony Liguori + * + * This header is BSD licensed so anyone can use the definitions to implement + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VIRTIO_PCI_VAR_H +#define _VIRTIO_PCI_VAR_H + +/* VirtIO PCI vendor/device ID. */ +#define VIRTIO_PCI_VENDORID 0x1AF4 +#define VIRTIO_PCI_DEVICEID_MIN 0x1000 +#define VIRTIO_PCI_DEVICEID_LEGACY_MAX 0x103F +#define VIRTIO_PCI_DEVICEID_MODERN_MIN 0x1040 +#define VIRTIO_PCI_DEVICEID_MODERN_MAX 0x107F + +/* The bit of the ISR which indicates a device has an interrupt. */ +#define VIRTIO_PCI_ISR_INTR 0x1 +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 +/* Vector value used to disable MSI for queue. */ +#define VIRTIO_MSI_NO_VECTOR 0xFFFF + +#endif /* _VIRTIO_PCI_VAR_H */ diff -urN sys/dev/virtio.ori/random/virtio_random.c sys/dev/virtio/random/virtio_random.c --- sys/dev/virtio.ori/random/virtio_random.c 2020-03-19 20:20:23.684710000 -0700 +++ sys/dev/virtio/random/virtio_random.c 2020-03-19 23:17:51.679677000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2013, Bryan Venteicher * All rights reserved. * @@ -56,7 +58,8 @@ static int vtrnd_attach(device_t); static int vtrnd_detach(device_t); -static void vtrnd_negotiate_features(struct vtrnd_softc *); +static int vtrnd_negotiate_features(struct vtrnd_softc *); +static int vtrnd_setup_features(struct vtrnd_softc *); static int vtrnd_alloc_virtqueue(struct vtrnd_softc *); static void vtrnd_harvest(struct vtrnd_softc *); static void vtrnd_timer(void *); @@ -83,8 +86,10 @@ }; static devclass_t vtrnd_devclass; -DRIVER_MODULE(virtio_random, virtio_pci, vtrnd_driver, vtrnd_devclass, +DRIVER_MODULE(virtio_random, vtpcil, vtrnd_driver, vtrnd_devclass, vtrnd_modevent, 0); +DRIVER_MODULE(virtio_random, vtpcim, vtrnd_driver, vtrnd_devclass, + vtrnd_modevent, 0); MODULE_VERSION(virtio_random, 1); MODULE_DEPEND(virtio_random, virtio, 1, 1, 1); @@ -128,11 +133,15 @@ sc = device_get_softc(dev); sc->vtrnd_dev = dev; + virtio_set_feature_desc(dev, vtrnd_feature_desc); callout_init(&sc->vtrnd_callout, 1); - virtio_set_feature_desc(dev, vtrnd_feature_desc); - vtrnd_negotiate_features(sc); + error = vtrnd_setup_features(sc); + if (error) { + device_printf(dev, "cannot setup features\n"); + goto fail; + } error = vtrnd_alloc_virtqueue(sc); if (error) { @@ -161,7 +170,7 @@ return (0); } -static void +static int vtrnd_negotiate_features(struct vtrnd_softc *sc) { device_t dev; @@ -171,6 +180,19 @@ features = VTRND_FEATURES; sc->vtrnd_features = virtio_negotiate_features(dev, features); + return (virtio_finalize_features(dev)); +} + +static int +vtrnd_setup_features(struct vtrnd_softc *sc) +{ + int error; + + error = vtrnd_negotiate_features(sc); + if (error) + return (error); + + return (0); } static int diff -urN sys/dev/virtio.ori/scsi/virtio_scsi.c sys/dev/virtio/scsi/virtio_scsi.c --- sys/dev/virtio.ori/scsi/virtio_scsi.c 2020-03-19 20:20:23.690227000 -0700 +++ sys/dev/virtio/scsi/virtio_scsi.c 2020-03-19 23:17:51.683188000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2012, Bryan Venteicher * All rights reserved. * @@ -74,11 +76,13 @@ static int vtscsi_suspend(device_t); static int vtscsi_resume(device_t); -static void vtscsi_negotiate_features(struct vtscsi_softc *); +static int vtscsi_negotiate_features(struct vtscsi_softc *); +static int vtscsi_setup_features(struct vtscsi_softc *); static void vtscsi_read_config(struct vtscsi_softc *, struct virtio_scsi_config *); static int vtscsi_maximum_segments(struct vtscsi_softc *, int); static int vtscsi_alloc_virtqueues(struct vtscsi_softc *); +static void vtscsi_check_sizes(struct vtscsi_softc *); static void vtscsi_write_device_config(struct vtscsi_softc *); static int vtscsi_reinit(struct vtscsi_softc *); @@ -132,10 +136,10 @@ static void vtscsi_get_request_lun(uint8_t [], target_id_t *, lun_id_t *); static void vtscsi_set_request_lun(struct ccb_hdr *, uint8_t []); -static void vtscsi_init_scsi_cmd_req(struct ccb_scsiio *, - struct virtio_scsi_cmd_req *); -static void vtscsi_init_ctrl_tmf_req(struct ccb_hdr *, uint32_t, - uintptr_t, struct virtio_scsi_ctrl_tmf_req *); +static void vtscsi_init_scsi_cmd_req(struct vtscsi_softc *, + struct ccb_scsiio *, struct virtio_scsi_cmd_req *); +static void vtscsi_init_ctrl_tmf_req(struct vtscsi_softc *, struct ccb_hdr *, + uint32_t, uintptr_t, struct virtio_scsi_ctrl_tmf_req *); static void vtscsi_freeze_simq(struct vtscsi_softc *, int); static int vtscsi_thaw_simq(struct vtscsi_softc *, int); @@ -181,11 +185,19 @@ static void vtscsi_enable_vqs_intr(struct vtscsi_softc *); static void vtscsi_get_tunables(struct vtscsi_softc *); -static void vtscsi_add_sysctl(struct vtscsi_softc *); +static void vtscsi_setup_sysctl(struct vtscsi_softc *); static void vtscsi_printf_req(struct vtscsi_request *, const char *, const char *, ...); +#define vtscsi_modern(_sc) (((_sc)->vtscsi_features & VIRTIO_F_VERSION_1) != 0) +#define vtscsi_htog16(_sc, _val) virtio_htog16(vtscsi_modern(_sc), _val) +#define vtscsi_htog32(_sc, _val) virtio_htog32(vtscsi_modern(_sc), _val) +#define vtscsi_htog64(_sc, _val) virtio_htog64(vtscsi_modern(_sc), _val) +#define vtscsi_gtoh16(_sc, _val) virtio_gtoh16(vtscsi_modern(_sc), _val) +#define vtscsi_gtoh32(_sc, _val) virtio_gtoh32(vtscsi_modern(_sc), _val) +#define vtscsi_gtoh64(_sc, _val) virtio_gtoh64(vtscsi_modern(_sc), _val) + /* Global tunables. */ /* * The current QEMU VirtIO SCSI implementation does not cancel in-flight @@ -203,6 +215,8 @@ static struct virtio_feature_desc vtscsi_feature_desc[] = { { VIRTIO_SCSI_F_INOUT, "InOut" }, { VIRTIO_SCSI_F_HOTPLUG, "Hotplug" }, + { VIRTIO_SCSI_F_CHANGE, "ChangeEvent" }, + { VIRTIO_SCSI_F_T10_PI, "T10PI" }, { 0, NULL } }; @@ -225,8 +239,10 @@ }; static devclass_t vtscsi_devclass; -DRIVER_MODULE(virtio_scsi, virtio_pci, vtscsi_driver, vtscsi_devclass, +DRIVER_MODULE(virtio_scsi, vtpcil, vtscsi_driver, vtscsi_devclass, vtscsi_modevent, 0); +DRIVER_MODULE(virtio_scsi, vtpcim, vtscsi_driver, vtscsi_devclass, + vtscsi_modevent, 0); MODULE_VERSION(virtio_scsi, 1); MODULE_DEPEND(virtio_scsi, virtio, 1, 1, 1); MODULE_DEPEND(virtio_scsi, cam, 1, 1, 1); @@ -272,23 +288,20 @@ sc = device_get_softc(dev); sc->vtscsi_dev = dev; + virtio_set_feature_desc(dev, vtscsi_feature_desc); VTSCSI_LOCK_INIT(sc, device_get_nameunit(dev)); TAILQ_INIT(&sc->vtscsi_req_free); vtscsi_get_tunables(sc); - vtscsi_add_sysctl(sc); + vtscsi_setup_sysctl(sc); - virtio_set_feature_desc(dev, vtscsi_feature_desc); - vtscsi_negotiate_features(sc); + error = vtscsi_setup_features(sc); + if (error) { + device_printf(dev, "cannot setup features\n"); + goto fail; + } - if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) - sc->vtscsi_flags |= VTSCSI_FLAG_INDIRECT; - if (virtio_with_feature(dev, VIRTIO_SCSI_F_INOUT)) - sc->vtscsi_flags |= VTSCSI_FLAG_BIDIRECTIONAL; - if (virtio_with_feature(dev, VIRTIO_SCSI_F_HOTPLUG)) - sc->vtscsi_flags |= VTSCSI_FLAG_HOTPLUG; - vtscsi_read_config(sc, &scsicfg); sc->vtscsi_max_channel = scsicfg.max_channel; @@ -312,6 +325,8 @@ goto fail; } + vtscsi_check_sizes(sc); + error = vtscsi_init_event_vq(sc); if (error) { device_printf(dev, "cannot populate the eventvq\n"); @@ -398,17 +413,41 @@ return (0); } -static void +static int vtscsi_negotiate_features(struct vtscsi_softc *sc) { device_t dev; uint64_t features; dev = sc->vtscsi_dev; - features = virtio_negotiate_features(dev, VTSCSI_FEATURES); - sc->vtscsi_features = features; + features = VTSCSI_FEATURES; + + sc->vtscsi_features = virtio_negotiate_features(dev, features); + return (virtio_finalize_features(dev)); } +static int +vtscsi_setup_features(struct vtscsi_softc *sc) +{ + device_t dev; + int error; + + dev = sc->vtscsi_dev; + + error = vtscsi_negotiate_features(sc); + if (error) + return (error); + + if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) + sc->vtscsi_flags |= VTSCSI_FLAG_INDIRECT; + if (virtio_with_feature(dev, VIRTIO_SCSI_F_INOUT)) + sc->vtscsi_flags |= VTSCSI_FLAG_BIDIRECTIONAL; + if (virtio_with_feature(dev, VIRTIO_SCSI_F_HOTPLUG)) + sc->vtscsi_flags |= VTSCSI_FLAG_HOTPLUG; + + return (0); +} + #define VTSCSI_GET_CONFIG(_dev, _field, _cfg) \ virtio_read_device_config(_dev, \ offsetof(struct virtio_scsi_config, _field), \ @@ -479,6 +518,26 @@ } static void +vtscsi_check_sizes(struct vtscsi_softc *sc) +{ + int rqsize; + + if ((sc->vtscsi_flags & VTSCSI_FLAG_INDIRECT) == 0) { + /* + * Ensure the assertions in virtqueue_enqueue(), + * even if the hypervisor reports a bad seg_max. + */ + rqsize = virtqueue_size(sc->vtscsi_request_vq); + if (sc->vtscsi_max_nsegs > rqsize) { + device_printf(sc->vtscsi_dev, + "clamping seg_max (%d %d)\n", sc->vtscsi_max_nsegs, + rqsize); + sc->vtscsi_max_nsegs = rqsize; + } + } +} + +static void vtscsi_write_device_config(struct vtscsi_softc *sc) { @@ -506,8 +565,8 @@ error = virtio_reinit(dev, sc->vtscsi_features); if (error == 0) { vtscsi_write_device_config(sc); - vtscsi_reinit_event_vq(sc); virtio_reinit_complete(dev); + vtscsi_reinit_event_vq(sc); vtscsi_enable_vqs_intr(sc); } @@ -1061,7 +1120,7 @@ cmd_req = &req->vsr_cmd_req; cmd_resp = &req->vsr_cmd_resp; - vtscsi_init_scsi_cmd_req(csio, cmd_req); + vtscsi_init_scsi_cmd_req(sc, csio, cmd_req); error = vtscsi_fill_scsi_cmd_sglist(sc, req, &readable, &writable); if (error) @@ -1181,7 +1240,7 @@ tmf_req = &req->vsr_tmf_req; tmf_resp = &req->vsr_tmf_resp; - vtscsi_init_ctrl_tmf_req(to_ccbh, VIRTIO_SCSI_T_TMF_ABORT_TASK, + vtscsi_init_ctrl_tmf_req(sc, to_ccbh, VIRTIO_SCSI_T_TMF_ABORT_TASK, (uintptr_t) to_ccbh, tmf_req); sglist_reset(sg); @@ -1289,26 +1348,29 @@ vtscsi_complete_scsi_cmd_response(struct vtscsi_softc *sc, struct ccb_scsiio *csio, struct virtio_scsi_cmd_resp *cmd_resp) { + uint32_t resp_sense_length; cam_status status; csio->scsi_status = cmd_resp->status; - csio->resid = cmd_resp->resid; + csio->resid = vtscsi_htog32(sc, cmd_resp->resid); if (csio->scsi_status == SCSI_STATUS_OK) status = CAM_REQ_CMP; else status = CAM_SCSI_STATUS_ERROR; - if (cmd_resp->sense_len > 0) { + resp_sense_length = vtscsi_htog32(sc, cmd_resp->sense_len); + + if (resp_sense_length > 0) { status |= CAM_AUTOSNS_VALID; - if (cmd_resp->sense_len < csio->sense_len) - csio->sense_resid = csio->sense_len - - cmd_resp->sense_len; + if (resp_sense_length < csio->sense_len) + csio->sense_resid = csio->sense_len - resp_sense_length; else csio->sense_resid = 0; - memcpy(&csio->sense_data, cmd_resp->sense, + bzero(&csio->sense_data, sizeof(csio->sense_data)); + memcpy(cmd_resp->sense, &csio->sense_data, csio->sense_len - csio->sense_resid); } @@ -1469,7 +1531,7 @@ if (abort_req->vsr_flags & VTSCSI_REQ_FLAG_TIMEOUT_SET) callout_stop(&abort_req->vsr_callout); - vtscsi_init_ctrl_tmf_req(ccbh, VIRTIO_SCSI_T_TMF_ABORT_TASK, + vtscsi_init_ctrl_tmf_req(sc, ccbh, VIRTIO_SCSI_T_TMF_ABORT_TASK, (uintptr_t) abort_ccbh, tmf_req); sglist_reset(sg); @@ -1538,7 +1600,7 @@ else subtype = VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET; - vtscsi_init_ctrl_tmf_req(ccbh, subtype, 0, tmf_req); + vtscsi_init_ctrl_tmf_req(sc, ccbh, subtype, 0, tmf_req); sglist_reset(sg); sglist_append(sg, tmf_req, sizeof(struct virtio_scsi_ctrl_tmf_req)); @@ -1575,7 +1637,7 @@ } static void -vtscsi_init_scsi_cmd_req(struct ccb_scsiio *csio, +vtscsi_init_scsi_cmd_req(struct vtscsi_softc *sc, struct ccb_scsiio *csio, struct virtio_scsi_cmd_req *cmd_req) { uint8_t attr; @@ -1596,7 +1658,7 @@ } vtscsi_set_request_lun(&csio->ccb_h, cmd_req->lun); - cmd_req->tag = (uintptr_t) csio; + cmd_req->tag = vtscsi_gtoh64(sc, (uintptr_t) csio); cmd_req->task_attr = attr; memcpy(cmd_req->cdb, @@ -1606,15 +1668,15 @@ } static void -vtscsi_init_ctrl_tmf_req(struct ccb_hdr *ccbh, uint32_t subtype, - uintptr_t tag, struct virtio_scsi_ctrl_tmf_req *tmf_req) +vtscsi_init_ctrl_tmf_req(struct vtscsi_softc *sc, struct ccb_hdr *ccbh, + uint32_t subtype, uintptr_t tag, struct virtio_scsi_ctrl_tmf_req *tmf_req) { vtscsi_set_request_lun(ccbh, tmf_req->lun); - tmf_req->type = VIRTIO_SCSI_T_TMF; - tmf_req->subtype = subtype; - tmf_req->tag = tag; + tmf_req->type = vtscsi_gtoh32(sc, VIRTIO_SCSI_T_TMF); + tmf_req->subtype = vtscsi_gtoh32(sc, subtype); + tmf_req->tag = vtscsi_gtoh64(sc, tag); } static void @@ -2248,7 +2310,7 @@ } static void -vtscsi_add_sysctl(struct vtscsi_softc *sc) +vtscsi_setup_sysctl(struct vtscsi_softc *sc) { device_t dev; struct vtscsi_statistics *stats; diff -urN sys/dev/virtio.ori/scsi/virtio_scsi.h sys/dev/virtio/scsi/virtio_scsi.h --- sys/dev/virtio.ori/scsi/virtio_scsi.h 2020-03-19 20:20:23.690461000 -0700 +++ sys/dev/virtio/scsi/virtio_scsi.h 2020-03-19 23:17:51.683311000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. * @@ -29,13 +31,7 @@ #ifndef _VIRTIO_SCSI_H #define _VIRTIO_SCSI_H -/* Feature bits */ -#define VIRTIO_SCSI_F_INOUT 0x0001 /* Single request can contain both - * read and write buffers */ -#define VIRTIO_SCSI_F_HOTPLUG 0x0002 /* Host should enable hot plug/unplug - * of new LUNs and targets. - */ - +/* Default values of the CDB and sense data size configuration fields */ #define VIRTIO_SCSI_CDB_SIZE 32 #define VIRTIO_SCSI_SENSE_SIZE 96 @@ -44,11 +40,23 @@ uint8_t lun[8]; /* Logical Unit Number */ uint64_t tag; /* Command identifier */ uint8_t task_attr; /* Task attribute */ - uint8_t prio; + uint8_t prio; /* SAM command priority field */ uint8_t crn; uint8_t cdb[VIRTIO_SCSI_CDB_SIZE]; } __packed; +/* SCSI command request, followed by protection information */ +struct virtio_scsi_cmd_req_pi { + uint8_t lun[8]; /* Logical Unit Number */ + uint64_t tag; /* Command identifier */ + uint8_t task_attr; /* Task attribute */ + uint8_t prio; /* SAM command priority field */ + uint8_t crn; + uint32_t pi_bytesout; /* DataOUT PI Number of bytes */ + uint32_t pi_bytesin; /* DataIN PI Number of bytes */ + uint8_t cdb[VIRTIO_SCSI_CDB_SIZE]; +} __packed; + /* Response, followed by sense data and data-in */ struct virtio_scsi_cmd_resp { uint32_t sense_len; /* Sense data length */ @@ -102,6 +110,22 @@ uint32_t max_lun; } __packed; +/* Feature bits */ +#define VIRTIO_SCSI_F_INOUT 0x0001 /* Single request can contain both + * read and write buffers. + */ +#define VIRTIO_SCSI_F_HOTPLUG 0x0002 /* Host should enable hot plug/unplug + * of new LUNs and targets. + */ +#define VIRTIO_SCSI_F_CHANGE 0x0004 /* Host will report changes to LUN + * parameters via a + * VIRTIO_SCSI_T_PARAM_CHANGE event. + */ +#define VIRTIO_SCSI_F_T10_PI 0x0008 /* Extended fields for T10 protection + * information (DIF/DIX) are included + * in the SCSI request header. + */ + /* Response codes */ #define VIRTIO_SCSI_S_OK 0 #define VIRTIO_SCSI_S_FUNCTION_COMPLETE 0 @@ -138,6 +162,7 @@ #define VIRTIO_SCSI_T_NO_EVENT 0 #define VIRTIO_SCSI_T_TRANSPORT_RESET 1 #define VIRTIO_SCSI_T_ASYNC_NOTIFY 2 +#define VIRTIO_SCSI_T_PARAM_CHANGE 3 /* Reasons of transport reset event */ #define VIRTIO_SCSI_EVT_RESET_HARD 0 diff -urN sys/dev/virtio.ori/scsi/virtio_scsivar.h sys/dev/virtio/scsi/virtio_scsivar.h --- sys/dev/virtio.ori/scsi/virtio_scsivar.h 2020-03-19 20:20:23.689928000 -0700 +++ sys/dev/virtio/scsi/virtio_scsivar.h 2020-03-19 23:17:51.682896000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2012, Bryan Venteicher * All rights reserved. * diff -urN sys/dev/virtio.ori/virtio.c sys/dev/virtio/virtio.c --- sys/dev/virtio.ori/virtio.c 2020-03-19 20:20:23.689201000 -0700 +++ sys/dev/virtio/virtio.c 2020-03-19 23:17:51.682447000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2011, Bryan Venteicher * All rights reserved. * @@ -73,10 +75,13 @@ /* Device independent features. */ static struct virtio_feature_desc virtio_common_feature_desc[] = { - { VIRTIO_F_NOTIFY_ON_EMPTY, "NotifyOnEmpty" }, - { VIRTIO_RING_F_INDIRECT_DESC, "RingIndirect" }, - { VIRTIO_RING_F_EVENT_IDX, "EventIdx" }, - { VIRTIO_F_BAD_FEATURE, "BadFeature" }, + { VIRTIO_F_NOTIFY_ON_EMPTY, "NotifyOnEmpty" }, /* Legacy */ + { VIRTIO_F_ANY_LAYOUT, "AnyLayout" }, /* Legacy */ + { VIRTIO_RING_F_INDIRECT_DESC, "RingIndirectDesc" }, + { VIRTIO_RING_F_EVENT_IDX, "RingEventIdx" }, + { VIRTIO_F_BAD_FEATURE, "BadFeature" }, /* Legacy */ + { VIRTIO_F_VERSION_1, "Version1" }, + { VIRTIO_F_IOMMU_PLATFORM, "IOMMUPlatform" }, { 0, NULL } }; @@ -114,24 +119,16 @@ return (NULL); } -void -virtio_describe(device_t dev, const char *msg, - uint64_t features, struct virtio_feature_desc *desc) +int +virtio_describe_sbuf(struct sbuf *sb, uint64_t features, + struct virtio_feature_desc *desc) { - struct sbuf sb; - uint64_t val; - char *buf; const char *name; + uint64_t val; int n; - if ((buf = malloc(512, M_TEMP, M_NOWAIT)) == NULL) { - device_printf(dev, "%s features: %#jx\n", msg, (uintmax_t) features); - return; - } + sbuf_printf(sb, "%#jx", (uintmax_t) features); - sbuf_new(&sb, buf, 512, SBUF_FIXEDLEN); - sbuf_printf(&sb, "%s features: %#jx", msg, (uintmax_t) features); - for (n = 0, val = 1ULL << 63; val != 0; val >>= 1) { /* * BAD_FEATURE is used to detect broken Linux clients @@ -141,32 +138,95 @@ continue; if (n++ == 0) - sbuf_cat(&sb, " <"); + sbuf_cat(sb, " <"); else - sbuf_cat(&sb, ","); + sbuf_cat(sb, ","); name = virtio_feature_name(val, desc); if (name == NULL) - sbuf_printf(&sb, "%#jx", (uintmax_t) val); + sbuf_printf(sb, "%#jx", (uintmax_t) val); else - sbuf_cat(&sb, name); + sbuf_cat(sb, name); } if (n > 0) - sbuf_cat(&sb, ">"); + sbuf_cat(sb, ">"); -#if __FreeBSD_version < 900020 - sbuf_finish(&sb); - if (sbuf_overflowed(&sb) == 0) -#else - if (sbuf_finish(&sb) == 0) -#endif + return (sbuf_finish(sb)); +} + +void +virtio_describe(device_t dev, const char *msg, uint64_t features, + struct virtio_feature_desc *desc) +{ + struct sbuf sb; + char *buf; + int error; + + if ((buf = malloc(1024, M_TEMP, M_NOWAIT)) == NULL) { + error = ENOMEM; + goto out; + } + + sbuf_new(&sb, buf, 1024, SBUF_FIXEDLEN); + sbuf_printf(&sb, "%s features: ", msg); + + error = virtio_describe_sbuf(&sb, features, desc); + if (error == 0) device_printf(dev, "%s\n", sbuf_data(&sb)); sbuf_delete(&sb); free(buf, M_TEMP); + +out: + if (error != 0) { + device_printf(dev, "%s features: %#jx\n", msg, + (uintmax_t) features); + } } +uint64_t +virtio_filter_transport_features(uint64_t features) +{ + uint64_t transport, mask; + + transport = (1ULL << + (VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START)) - 1; + transport <<= VIRTIO_TRANSPORT_F_START; + + mask = -1ULL & ~transport; + mask |= VIRTIO_RING_F_INDIRECT_DESC; + mask |= VIRTIO_RING_F_EVENT_IDX; + mask |= VIRTIO_F_VERSION_1; + + return (features & mask); +} + +int +virtio_bus_is_modern(device_t dev) +{ + uintptr_t modern; + + virtio_read_ivar(dev, VIRTIO_IVAR_MODERN, &modern); + return (modern != 0); +} + +void +virtio_read_device_config_array(device_t dev, bus_size_t offset, void *dst, + int size, int count) +{ + int i, gen; + + do { + gen = virtio_config_generation(dev); + + for (i = 0; i < count; i++) { + virtio_read_device_config(dev, offset + i * size, + (uint8_t *) dst + i * size, size); + } + } while (gen != virtio_config_generation(dev)); +} + /* * VirtIO bus method wrappers. */ @@ -192,6 +252,13 @@ return (VIRTIO_BUS_NEGOTIATE_FEATURES(device_get_parent(dev), child_features)); +} + +int +virtio_finalize_features(device_t dev) +{ + + return (VIRTIO_BUS_FINALIZE_FEATURES(device_get_parent(dev))); } int diff -urN sys/dev/virtio.ori/virtio.h sys/dev/virtio/virtio.h --- sys/dev/virtio.ori/virtio.h 2020-03-19 20:20:23.691978000 -0700 +++ sys/dev/virtio/virtio.h 2020-03-19 23:17:51.745201000 -0700 @@ -29,9 +29,11 @@ #ifndef _VIRTIO_H_ #define _VIRTIO_H_ +#include #include #include +struct sbuf; struct vq_alloc_info; /* @@ -55,7 +57,7 @@ #define VIRTIO_IVAR_DEVICE 4 #define VIRTIO_IVAR_SUBVENDOR 5 #define VIRTIO_IVAR_SUBDEVICE 6 - +#define VIRTIO_IVAR_MODERN 7 struct virtio_feature_desc { uint64_t vfd_val; const char *vfd_str; @@ -63,14 +65,20 @@ const char *virtio_device_name(uint16_t devid); void virtio_describe(device_t dev, const char *msg, - uint64_t features, struct virtio_feature_desc *feature_desc); - + uint64_t features, struct virtio_feature_desc *desc); +int virtio_describe_sbuf(struct sbuf *sb, uint64_t features, + struct virtio_feature_desc *desc); +uint64_t virtio_filter_transport_features(uint64_t features); +int virtio_bus_is_modern(device_t dev); +void virtio_read_device_config_array(device_t dev, bus_size_t offset, + void *dst, int size, int count); /* * VirtIO Bus Methods. */ void virtio_read_ivar(device_t dev, int ivar, uintptr_t *val); void virtio_write_ivar(device_t dev, int ivar, uintptr_t val); uint64_t virtio_negotiate_features(device_t dev, uint64_t child_features); +int virtio_finalize_features(device_t dev); int virtio_alloc_virtqueues(device_t dev, int flags, int nvqs, struct vq_alloc_info *info); int virtio_setup_intr(device_t dev, enum intr_type type); @@ -128,7 +136,7 @@ VIRTIO_READ_IVAR(device, VIRTIO_IVAR_DEVICE); VIRTIO_READ_IVAR(subvendor, VIRTIO_IVAR_SUBVENDOR); VIRTIO_READ_IVAR(subdevice, VIRTIO_IVAR_SUBDEVICE); - +VIRTIO_READ_IVAR(modern, VIRTIO_IVAR_MODERN); #undef VIRTIO_READ_IVAR #define VIRTIO_WRITE_IVAR(name, ivar) \ diff -urN sys/dev/virtio.ori/virtio_bus_if.m sys/dev/virtio/virtio_bus_if.m --- sys/dev/virtio.ori/virtio_bus_if.m 2020-03-19 20:20:23.689353000 -0700 +++ sys/dev/virtio/virtio_bus_if.m 2020-03-19 23:17:51.682551000 -0700 @@ -36,6 +36,12 @@ CODE { static int + virtio_bus_default_finalize_features(device_t dev) + { + return (0); + } + + static int virtio_bus_default_config_generation(device_t dev) { return (0); @@ -47,6 +53,10 @@ uint64_t child_features; }; +METHOD int finalize_features { + device_t dev; +} DEFAULT virtio_bus_default_finalize_features; + METHOD int with_feature { device_t dev; uint64_t feature; @@ -80,6 +90,7 @@ METHOD void notify_vq { device_t dev; uint16_t queue; + bus_size_t offset; }; METHOD int config_generation { diff -urN sys/dev/virtio.ori/virtio_config.h sys/dev/virtio/virtio_config.h --- sys/dev/virtio.ori/virtio_config.h 2020-03-19 20:20:23.686952000 -0700 +++ sys/dev/virtio/virtio_config.h 2020-03-19 23:17:51.680636000 -0700 @@ -33,35 +33,58 @@ /* Status byte for guest to report progress. */ #define VIRTIO_CONFIG_STATUS_RESET 0x00 +/* We have seen device and processed generic fields. */ #define VIRTIO_CONFIG_STATUS_ACK 0x01 -#define VIRTIO_CONFIG_STATUS_DRIVER 0x03 +/* We have found a driver for the device. */ +#define VIRTIO_CONFIG_STATUS_DRIVER 0x02 +/* Driver has used its parts of the config, and is happy. */ #define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04 +/* Driver has finished configuring features (modern only). */ +#define VIRTIO_CONFIG_S_FEATURES_OK 0x08 +/* Device entered invalid state, driver must reset it. */ +#define VIRTIO_CONFIG_S_NEEDS_RESET 0x40 +/* We've given up on this device. */ #define VIRTIO_CONFIG_STATUS_FAILED 0x80 /* * Generate interrupt when the virtqueue ring is * completely used, even if we've suppressed them. */ -#define VIRTIO_F_NOTIFY_ON_EMPTY (1 << 24) +#define VIRTIO_F_NOTIFY_ON_EMPTY (1UL << 24) +/* Can the device handle any descriptor layout? */ +#define VIRTIO_F_ANY_LAYOUT (1UL << 27) + /* Support for indirect buffer descriptors. */ -#define VIRTIO_RING_F_INDIRECT_DESC (1 << 28) +#define VIRTIO_RING_F_INDIRECT_DESC (1UL << 28) /* Support to suppress interrupt until specific index is reached. */ -#define VIRTIO_RING_F_EVENT_IDX (1 << 29) +#define VIRTIO_RING_F_EVENT_IDX (1UL << 29) /* * The guest should never negotiate this feature; it * is used to detect faulty drivers. */ -#define VIRTIO_F_BAD_FEATURE (1 << 30) +#define VIRTIO_F_BAD_FEATURE (1UL << 30) +/* v1.0 compliant. */ +#define VIRTIO_F_VERSION_1 (1ULL << 32) + /* - * Some VirtIO feature bits (currently bits 28 through 31) are + * If clear - device has the IOMMU bypass quirk feature. + * If set - use platform tools to detect the IOMMU. + * + * Note the reverse polarity (compared to most other features), + * this is for compatibility with legacy systems. + */ +#define VIRTIO_F_IOMMU_PLATFORM (1ULL << 33) + +/* + * Some VirtIO feature bits (currently bits 28 through 34) are * reserved for the transport being used (eg. virtio_ring), the * rest are per-device feature bits. */ #define VIRTIO_TRANSPORT_F_START 28 -#define VIRTIO_TRANSPORT_F_END 32 +#define VIRTIO_TRANSPORT_F_END 34 #endif /* _VIRTIO_CONFIG_H_ */ diff -urN sys/dev/virtio.ori/virtio_endian.h sys/dev/virtio/virtio_endian.h --- sys/dev/virtio.ori/virtio_endian.h 1969-12-31 16:00:00.000000000 -0800 +++ sys/dev/virtio/virtio_endian.h 2020-03-19 23:17:51.745941000 -0700 @@ -0,0 +1,106 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2017, Bryan Venteicher + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VIRTIO_ENDIAN_H_ +#define _VIRTIO_ENDIAN_H_ + +#include + +/* + * VirtIO V1 (modern) uses little endian, while legacy VirtIO uses the guest's + * native endian. These functions convert to and from the Guest's (driver's) + * and the Host's (device's) endianness when needed. + */ + +static inline bool +virtio_swap_endian(bool modern) +{ +#if _BYTE_ORDER == _LITTLE_ENDIAN + return (false); +#else + return (modern); +#endif +} + +static inline uint16_t +virtio_htog16(bool modern, uint16_t val) +{ + if (virtio_swap_endian(modern)) + return (le16toh(val)); + else + return (val); +} + +static inline uint16_t +virtio_gtoh16(bool modern, uint16_t val) +{ + if (virtio_swap_endian(modern)) + return (htole16(val)); + else + return (val); +} + +static inline uint32_t +virtio_htog32(bool modern, uint32_t val) +{ + if (virtio_swap_endian(modern)) + return (le32toh(val)); + else + return (val); +} + +static inline uint32_t +virtio_gtoh32(bool modern, uint32_t val) +{ + if (virtio_swap_endian(modern)) + return (htole32(val)); + else + return (val); +} + +static inline uint64_t +virtio_htog64(bool modern, uint64_t val) +{ + if (virtio_swap_endian(modern)) + return (le64toh(val)); + else + return (val); +} + +static inline uint64_t +virtio_gtoh64(bool modern, uint64_t val) +{ + if (virtio_swap_endian(modern)) + return (htole64(val)); + else + return (val); +} + +#endif /* _VIRTIO_ENDIAN_H_ */ diff -urN sys/dev/virtio.ori/virtio_ring.h sys/dev/virtio/virtio_ring.h --- sys/dev/virtio.ori/virtio_ring.h 2020-03-19 20:20:23.685048000 -0700 +++ sys/dev/virtio/virtio_ring.h 2020-03-19 23:17:51.679824000 -0700 @@ -90,6 +90,13 @@ struct vring_used *used; }; +/* Alignment requirements for vring elements. + * When using pre-virtio 1.0 layout, these fall out naturally. + */ +#define VRING_AVAIL_ALIGN_SIZE 2 +#define VRING_USED_ALIGN_SIZE 4 +#define VRING_DESC_ALIGN_SIZE 16 + /* The standard layout for the ring is a continuous chunk of memory which * looks like this. We assume num is a power of 2. * diff -urN sys/dev/virtio.ori/virtqueue.c sys/dev/virtio/virtqueue.c --- sys/dev/virtio.ori/virtqueue.c 2020-03-19 20:20:23.682707000 -0700 +++ sys/dev/virtio/virtqueue.c 2020-03-19 23:17:51.677057000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2011, Bryan Venteicher * All rights reserved. * @@ -55,18 +57,15 @@ struct virtqueue { device_t vq_dev; - char vq_name[VIRTQUEUE_MAX_NAME_SZ]; uint16_t vq_queue_index; uint16_t vq_nentries; uint32_t vq_flags; -#define VIRTQUEUE_FLAG_INDIRECT 0x0001 -#define VIRTQUEUE_FLAG_EVENT_IDX 0x0002 - - int vq_alignment; - int vq_ring_size; - void *vq_ring_mem; +#define VIRTQUEUE_FLAG_MODERN 0x0001 +#define VIRTQUEUE_FLAG_INDIRECT 0x0002 +#define VIRTQUEUE_FLAG_EVENT_IDX 0x0004 + int vq_max_indirect_size; - int vq_indirect_mem_size; + bus_size_t vq_notify_offset; virtqueue_intr_t *vq_intrhand; void *vq_intrhand_arg; @@ -85,6 +84,12 @@ */ uint16_t vq_used_cons_idx; + void *vq_ring_mem; + int vq_indirect_mem_size; + int vq_alignment; + int vq_ring_size; + char vq_name[VIRTQUEUE_MAX_NAME_SZ]; + struct vq_desc_extra { void *cookie; struct vring_desc *indirect; @@ -132,6 +137,14 @@ static void vq_ring_notify_host(struct virtqueue *); static void vq_ring_free_chain(struct virtqueue *, uint16_t); +#define vq_modern(_vq) (((_vq)->vq_flags & VIRTQUEUE_FLAG_MODERN) != 0) +#define vq_htog16(_vq, _val) virtio_htog16(vq_modern(_vq), _val) +#define vq_htog32(_vq, _val) virtio_htog32(vq_modern(_vq), _val) +#define vq_htog64(_vq, _val) virtio_htog64(vq_modern(_vq), _val) +#define vq_gtoh16(_vq, _val) virtio_gtoh16(vq_modern(_vq), _val) +#define vq_gtoh32(_vq, _val) virtio_gtoh32(vq_modern(_vq), _val) +#define vq_gtoh64(_vq, _val) virtio_gtoh64(vq_modern(_vq), _val) + uint64_t virtqueue_filter_features(uint64_t features) { @@ -145,8 +158,9 @@ } int -virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, int align, - vm_paddr_t highaddr, struct vq_alloc_info *info, struct virtqueue **vqp) +virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, + bus_size_t notify_offset, int align, vm_paddr_t highaddr, + struct vq_alloc_info *info, struct virtqueue **vqp) { struct virtqueue *vq; int error; @@ -182,12 +196,15 @@ vq->vq_dev = dev; strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name)); vq->vq_queue_index = queue; + vq->vq_notify_offset = notify_offset; vq->vq_alignment = align; vq->vq_nentries = size; vq->vq_free_cnt = size; vq->vq_intrhand = info->vqai_intr; vq->vq_intrhand_arg = info->vqai_intr_arg; + if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_F_VERSION_1) != 0) + vq->vq_flags |= VIRTQUEUE_FLAG_MODERN; if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0) vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX; @@ -292,8 +309,8 @@ bzero(indirect, vq->vq_indirect_mem_size); for (i = 0; i < vq->vq_max_indirect_size - 1; i++) - indirect[i].next = i + 1; - indirect[i].next = VQ_RING_DESC_CHAIN_END; + indirect[i].next = vq_gtoh16(vq, i + 1); + indirect[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END); } int @@ -391,6 +408,7 @@ uint16_t virtqueue_index(struct virtqueue *vq) { + return (vq->vq_queue_index); } @@ -439,7 +457,7 @@ { uint16_t used_idx, nused; - used_idx = vq->vq_ring.used->idx; + used_idx = vq_htog16(vq, vq->vq_ring.used->idx); nused = (uint16_t)(used_idx - vq->vq_used_cons_idx); VQASSERT(vq, nused <= vq->vq_nentries, "used more than available"); @@ -451,7 +469,7 @@ virtqueue_intr_filter(struct virtqueue *vq) { - if (vq->vq_used_cons_idx == vq->vq_ring.used->idx) + if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx)) return (0); virtqueue_disable_intr(vq); @@ -478,7 +496,7 @@ { uint16_t ndesc, avail_idx; - avail_idx = vq->vq_ring.avail->idx; + avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx); ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx); switch (hint) { @@ -503,10 +521,12 @@ { if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { - vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx - - vq->vq_nentries - 1; - } else - vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; + vring_used_event(&vq->vq_ring) = vq_gtoh16(vq, + vq->vq_used_cons_idx - vq->vq_nentries - 1); + return; + } + + vq->vq_ring.avail->flags |= vq_gtoh16(vq, VRING_AVAIL_F_NO_INTERRUPT); } int @@ -569,16 +589,16 @@ void *cookie; uint16_t used_idx, desc_idx; - if (vq->vq_used_cons_idx == vq->vq_ring.used->idx) + if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx)) return (NULL); used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1); uep = &vq->vq_ring.used->ring[used_idx]; rmb(); - desc_idx = (uint16_t) uep->id; + desc_idx = (uint16_t) vq_htog32(vq, uep->id); if (len != NULL) - *len = uep->len; + *len = vq_htog32(vq, uep->len); vq_ring_free_chain(vq, desc_idx); @@ -636,13 +656,13 @@ printf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; " "desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; " "used.idx=%d; used_event_idx=%d; avail.flags=0x%x; used.flags=0x%x\n", - vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, - virtqueue_nused(vq), vq->vq_queued_cnt, vq->vq_desc_head_idx, - vq->vq_ring.avail->idx, vq->vq_used_cons_idx, - vq->vq_ring.used->idx, - vring_used_event(&vq->vq_ring), - vq->vq_ring.avail->flags, - vq->vq_ring.used->flags); + vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, virtqueue_nused(vq), + vq->vq_queued_cnt, vq->vq_desc_head_idx, + vq_htog16(vq, vq->vq_ring.avail->idx), vq->vq_used_cons_idx, + vq_htog16(vq, vq->vq_ring.used->idx), + vq_htog16(vq, vring_used_event(&vq->vq_ring)), + vq_htog16(vq, vq->vq_ring.avail->flags), + vq_htog16(vq, vq->vq_ring.used->flags)); } static void @@ -659,14 +679,14 @@ vring_init(vr, size, ring_mem, vq->vq_alignment); for (i = 0; i < size - 1; i++) - vr->desc[i].next = i + 1; - vr->desc[i].next = VQ_RING_DESC_CHAIN_END; + vr->desc[i].next = vq_gtoh16(vq, i + 1); + vr->desc[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END); } static void vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) { - uint16_t avail_idx; + uint16_t avail_idx, avail_ring_idx; /* * Place the head of the descriptor chain into the next slot and make @@ -675,11 +695,12 @@ * currently running on another CPU, we can keep it processing the new * descriptor. */ - avail_idx = vq->vq_ring.avail->idx & (vq->vq_nentries - 1); - vq->vq_ring.avail->ring[avail_idx] = desc_idx; + avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx); + avail_ring_idx = avail_idx & (vq->vq_nentries - 1); + vq->vq_ring.avail->ring[avail_ring_idx] = vq_gtoh16(vq, desc_idx); wmb(); - vq->vq_ring.avail->idx++; + vq->vq_ring.avail->idx = vq_gtoh16(vq, avail_idx + 1); /* Keep pending count until virtqueue_notify(). */ vq->vq_queued_cnt++; @@ -698,19 +719,19 @@ for (i = 0, idx = head_idx, seg = sg->sg_segs; i < needed; - i++, idx = dp->next, seg++) { + i++, idx = vq_htog16(vq, dp->next), seg++) { VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END, "premature end of free desc chain"); dp = &desc[idx]; - dp->addr = seg->ss_paddr; - dp->len = seg->ss_len; + dp->addr = vq_gtoh64(vq, seg->ss_paddr); + dp->len = vq_gtoh32(vq, seg->ss_len); dp->flags = 0; if (i < needed - 1) - dp->flags |= VRING_DESC_F_NEXT; + dp->flags |= vq_gtoh16(vq, VRING_DESC_F_NEXT); if (i >= readable) - dp->flags |= VRING_DESC_F_WRITE; + dp->flags |= vq_gtoh16(vq, VRING_DESC_F_WRITE); } return (idx); @@ -755,14 +776,14 @@ dxp->cookie = cookie; dxp->ndescs = 1; - dp->addr = dxp->indirect_paddr; - dp->len = needed * sizeof(struct vring_desc); - dp->flags = VRING_DESC_F_INDIRECT; + dp->addr = vq_gtoh64(vq, dxp->indirect_paddr); + dp->len = vq_gtoh32(vq, needed * sizeof(struct vring_desc)); + dp->flags = vq_gtoh16(vq, VRING_DESC_F_INDIRECT); vq_ring_enqueue_segments(vq, dxp->indirect, 0, sg, readable, writable); - vq->vq_desc_head_idx = dp->next; + vq->vq_desc_head_idx = vq_htog16(vq, dp->next); vq->vq_free_cnt--; if (vq->vq_free_cnt == 0) VQ_RING_ASSERT_CHAIN_TERM(vq); @@ -780,10 +801,13 @@ * Enable interrupts, making sure we get the latest index of * what's already been consumed. */ - if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) - vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc; - else - vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { + vring_used_event(&vq->vq_ring) = + vq_gtoh16(vq, vq->vq_used_cons_idx + ndesc); + } else { + vq->vq_ring.avail->flags &= + vq_gtoh16(vq, ~VRING_AVAIL_F_NO_INTERRUPT); + } mb(); @@ -801,24 +825,26 @@ static int vq_ring_must_notify_host(struct virtqueue *vq) { - uint16_t new_idx, prev_idx, event_idx; + uint16_t new_idx, prev_idx, event_idx, flags; if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { - new_idx = vq->vq_ring.avail->idx; + new_idx = vq_htog16(vq, vq->vq_ring.avail->idx); prev_idx = new_idx - vq->vq_queued_cnt; - event_idx = vring_avail_event(&vq->vq_ring); + event_idx = vq_htog16(vq, vring_avail_event(&vq->vq_ring)); return (vring_need_event(event_idx, new_idx, prev_idx) != 0); } - return ((vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0); + flags = vq->vq_ring.used->flags; + return ((flags & vq_gtoh16(vq, VRING_USED_F_NO_NOTIFY)) == 0); } static void vq_ring_notify_host(struct virtqueue *vq) { - VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index); + VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index, + vq->vq_notify_offset); } static void @@ -837,10 +863,11 @@ vq->vq_free_cnt += dxp->ndescs; dxp->ndescs--; - if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { - while (dp->flags & VRING_DESC_F_NEXT) { - VQ_RING_ASSERT_VALID_IDX(vq, dp->next); - dp = &vq->vq_ring.desc[dp->next]; + if ((dp->flags & vq_gtoh16(vq, VRING_DESC_F_INDIRECT)) == 0) { + while (dp->flags & vq_gtoh16(vq, VRING_DESC_F_NEXT)) { + uint16_t next_idx = vq_htog16(vq, dp->next); + VQ_RING_ASSERT_VALID_IDX(vq, next_idx); + dp = &vq->vq_ring.desc[next_idx]; dxp->ndescs--; } } @@ -853,6 +880,6 @@ * newly freed chain. If the virtqueue was completely used, then * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). */ - dp->next = vq->vq_desc_head_idx; + dp->next = vq_gtoh16(vq, vq->vq_desc_head_idx); vq->vq_desc_head_idx = desc_idx; } diff -urN sys/dev/virtio.ori/virtqueue.h sys/dev/virtio/virtqueue.h --- sys/dev/virtio.ori/virtqueue.h 2020-03-19 20:20:23.684120000 -0700 +++ sys/dev/virtio/virtqueue.h 2020-03-19 23:17:51.679211000 -0700 @@ -1,4 +1,6 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2011, Bryan Venteicher * All rights reserved. * @@ -68,8 +70,8 @@ uint64_t virtqueue_filter_features(uint64_t features); int virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, - int align, vm_paddr_t highaddr, struct vq_alloc_info *info, - struct virtqueue **vqp); + bus_size_t notify_offset, int align, vm_paddr_t highaddr, + struct vq_alloc_info *info, struct virtqueue **vqp); void *virtqueue_drain(struct virtqueue *vq, int *last); void virtqueue_free(struct virtqueue *vq); int virtqueue_reinit(struct virtqueue *vq, uint16_t size);