FreeBSD Bugzilla – Attachment 212559 Details for
Bug 236922
Virtio fails as QEMU-KVM guest with Q35 chipset on Ubuntu 18.04.2 LTS
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
Proper FreeBSD 12.x patch for VirtIO
virtio_12.patch (text/plain), 297.61 KB, created by
Tommy P
on 2020-03-20 20:45:04 UTC
(
hide
)
Description:
Proper FreeBSD 12.x patch for VirtIO
Filename:
MIME Type:
Creator:
Tommy P
Created:
2020-03-20 20:45:04 UTC
Size:
297.61 KB
patch
obsolete
>--- sys/conf/files.ori 2020-03-18 20:15:51.221312000 -0700 >+++ sys/conf/files 2020-03-19 16:43:17.504455000 -0700 >@@ -3478,6 +3478,9 @@ > dev/virtio/virtio_bus_if.m optional virtio > dev/virtio/virtio_if.m optional virtio > dev/virtio/pci/virtio_pci.c optional virtio_pci >+dev/virtio/pci/virtio_pci_if.m optional virtio_pci >+dev/virtio/pci/virtio_pci_legacy.c optional virtio_pci >+dev/virtio/pci/virtio_pci_modern.c optional virtio_pci > dev/virtio/mmio/virtio_mmio.c optional virtio_mmio fdt > dev/virtio/mmio/virtio_mmio_if.m optional virtio_mmio fdt > dev/virtio/network/if_vtnet.c optional vtnet >diff -urN sys/modules/virtio.ori/pci/Makefile sys/modules/virtio/pci/Makefile >--- sys/modules/virtio.ori/pci/Makefile 2020-03-18 20:16:38.954687000 -0700 >+++ sys/modules/virtio/pci/Makefile 2020-03-19 20:36:09.105528000 -0700 >@@ -27,6 +27,8 @@ > > KMOD= virtio_pci > SRCS= virtio_pci.c >+SRCS+= virtio_pci_legacy.c virtio_pci_modern.c >+SRCS+= virtio_pci_if.c virtio_pci_if.h > SRCS+= virtio_bus_if.h virtio_if.h > SRCS+= bus_if.h device_if.h pci_if.h > >diff -urN sys/dev/virtio.ori/balloon/virtio_balloon.c sys/dev/virtio/balloon/virtio_balloon.c >--- sys/dev/virtio.ori/balloon/virtio_balloon.c 2020-03-18 20:16:31.706945000 -0700 >+++ sys/dev/virtio/balloon/virtio_balloon.c 2020-03-19 16:44:27.329580000 -0700 >@@ -80,6 +80,7 @@ > static struct virtio_feature_desc vtballoon_feature_desc[] = { > { VIRTIO_BALLOON_F_MUST_TELL_HOST, "MustTellHost" }, > { VIRTIO_BALLOON_F_STATS_VQ, "StatsVq" }, >+ { VIRTIO_BALLOON_F_DEFLATE_ON_OOM, "DeflateOnOOM" }, > > { 0, NULL } > }; >@@ -89,7 +90,8 @@ > static int vtballoon_detach(device_t); > static int vtballoon_config_change(device_t); > >-static void vtballoon_negotiate_features(struct vtballoon_softc *); >+static int vtballoon_negotiate_features(struct vtballoon_softc *); >+static int vtballoon_setup_features(struct vtballoon_softc *); > static int vtballoon_alloc_virtqueues(struct vtballoon_softc *); > > static void vtballoon_vq_intr(void *); >@@ -109,10 +111,13 @@ > > static int vtballoon_sleep(struct vtballoon_softc *); > static void vtballoon_thread(void *); >-static void vtballoon_add_sysctl(struct vtballoon_softc *); >+static void vtballoon_setup_sysctl(struct vtballoon_softc *); > >+#define vtballoon_modern(_sc) \ >+ (((_sc)->vtballoon_features & VIRTIO_F_VERSION_1) != 0) >+ > /* Features desired/implemented by this driver. */ >-#define VTBALLOON_FEATURES 0 >+#define VTBALLOON_FEATURES VIRTIO_BALLOON_F_MUST_TELL_HOST > > /* Timeout between retries when the balloon needs inflating. */ > #define VTBALLOON_LOWMEM_TIMEOUT hz >@@ -153,8 +158,10 @@ > }; > static devclass_t vtballoon_devclass; > >-DRIVER_MODULE(virtio_balloon, virtio_pci, vtballoon_driver, >+DRIVER_MODULE(virtio_balloon, vtpcil, vtballoon_driver, > vtballoon_devclass, 0, 0); >+DRIVER_MODULE(virtio_balloon, vtpcim, vtballoon_driver, >+ vtballoon_devclass, 0, 0); > MODULE_VERSION(virtio_balloon, 1); > MODULE_DEPEND(virtio_balloon, virtio, 1, 1, 1); > >@@ -178,14 +185,18 @@ > > sc = device_get_softc(dev); > sc->vtballoon_dev = dev; >+ virtio_set_feature_desc(dev, vtballoon_feature_desc); > > VTBALLOON_LOCK_INIT(sc, device_get_nameunit(dev)); > TAILQ_INIT(&sc->vtballoon_pages); > >- vtballoon_add_sysctl(sc); >+ vtballoon_setup_sysctl(sc); > >- virtio_set_feature_desc(dev, vtballoon_feature_desc); >- vtballoon_negotiate_features(sc); >+ error = vtballoon_setup_features(sc); >+ if (error) { >+ device_printf(dev, "cannot setup features\n"); >+ goto fail; >+ } > > sc->vtballoon_page_frames = malloc(VTBALLOON_PAGES_PER_REQUEST * > sizeof(uint32_t), M_DEVBUF, M_NOWAIT | M_ZERO); >@@ -271,18 +282,32 @@ > return (1); > } > >-static void >+static int > vtballoon_negotiate_features(struct vtballoon_softc *sc) > { > device_t dev; > uint64_t features; > > dev = sc->vtballoon_dev; >- features = virtio_negotiate_features(dev, VTBALLOON_FEATURES); >- sc->vtballoon_features = features; >+ features = VTBALLOON_FEATURES; >+ >+ sc->vtballoon_features = virtio_negotiate_features(dev, features); >+ return (virtio_finalize_features(dev)); > } > > static int >+vtballoon_setup_features(struct vtballoon_softc *sc) >+{ >+ int error; >+ >+ error = vtballoon_negotiate_features(sc); >+ if (error) >+ return (error); >+ >+ return (0); >+} >+ >+static int > vtballoon_alloc_virtqueues(struct vtballoon_softc *sc) > { > device_t dev; >@@ -440,7 +465,8 @@ > { > vm_page_t m; > >- m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ); >+ m = vm_page_alloc(NULL, 0, >+ VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_NODUMP); > if (m != NULL) > sc->vtballoon_current_npages++; > >@@ -463,16 +489,24 @@ > desired = virtio_read_dev_config_4(sc->vtballoon_dev, > offsetof(struct virtio_balloon_config, num_pages)); > >- return (le32toh(desired)); >+ if (vtballoon_modern(sc)) >+ return (desired); >+ else >+ return (le32toh(desired)); > } > > static void > vtballoon_update_size(struct vtballoon_softc *sc) > { >+ uint32_t npages; > >+ npages = sc->vtballoon_current_npages; >+ if (!vtballoon_modern(sc)) >+ npages = htole32(npages); >+ > virtio_write_dev_config_4(sc->vtballoon_dev, >- offsetof(struct virtio_balloon_config, actual), >- htole32(sc->vtballoon_current_npages)); >+ offsetof(struct virtio_balloon_config, actual), npages); >+ > } > > static int >@@ -544,7 +578,7 @@ > } > > static void >-vtballoon_add_sysctl(struct vtballoon_softc *sc) >+vtballoon_setup_sysctl(struct vtballoon_softc *sc) > { > device_t dev; > struct sysctl_ctx_list *ctx; >diff -urN sys/dev/virtio.ori/balloon/virtio_balloon.h sys/dev/virtio/balloon/virtio_balloon.h >--- sys/dev/virtio.ori/balloon/virtio_balloon.h 2020-03-18 20:16:31.706802000 -0700 >+++ sys/dev/virtio/balloon/virtio_balloon.h 2020-03-19 16:44:27.329319000 -0700 >@@ -36,6 +36,7 @@ > /* Feature bits. */ > #define VIRTIO_BALLOON_F_MUST_TELL_HOST 0x1 /* Tell before reclaiming pages */ > #define VIRTIO_BALLOON_F_STATS_VQ 0x2 /* Memory stats virtqueue */ >+#define VIRTIO_BALLOON_F_DEFLATE_ON_OOM 0x4 /* Deflate balloon on OOM */ > > /* Size of a PFN in the balloon interface. */ > #define VIRTIO_BALLOON_PFN_SHIFT 12 >@@ -54,8 +55,33 @@ > #define VIRTIO_BALLOON_S_MINFLT 3 /* Number of minor faults */ > #define VIRTIO_BALLOON_S_MEMFREE 4 /* Total amount of free memory */ > #define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */ >-#define VIRTIO_BALLOON_S_NR 6 >+#define VIRTIO_BALLOON_S_AVAIL 6 /* Available memory as in /proc */ >+#define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */ >+#define VIRTIO_BALLOON_S_NR 8 > >+/* >+ * Memory statistics structure. >+ * Driver fills an array of these structures and passes to device. >+ * >+ * NOTE: fields are laid out in a way that would make compiler add padding >+ * between and after fields, so we have to use compiler-specific attributes to >+ * pack it, to disable this padding. This also often causes compiler to >+ * generate suboptimal code. >+ * >+ * We maintain this statistics structure format for backwards compatibility, >+ * but don't follow this example. >+ * >+ * If implementing a similar structure, do something like the below instead: >+ * struct virtio_balloon_stat { >+ * __virtio16 tag; >+ * __u8 reserved[6]; >+ * __virtio64 val; >+ * }; >+ * >+ * In other words, add explicit reserved fields to align field and >+ * structure boundaries at field size, avoiding compiler padding >+ * without the packed attribute. >+ */ > struct virtio_balloon_stat { > uint16_t tag; > uint64_t val; >diff -urN sys/dev/virtio.ori/block/virtio_blk.c sys/dev/virtio/block/virtio_blk.c >--- sys/dev/virtio.ori/block/virtio_blk.c 2020-03-18 20:16:31.708312000 -0700 >+++ sys/dev/virtio/block/virtio_blk.c 2020-03-19 16:44:27.331375000 -0700 >@@ -76,11 +76,10 @@ > uint64_t vtblk_features; > uint32_t vtblk_flags; > #define VTBLK_FLAG_INDIRECT 0x0001 >-#define VTBLK_FLAG_READONLY 0x0002 >-#define VTBLK_FLAG_DETACH 0x0004 >-#define VTBLK_FLAG_SUSPEND 0x0008 >-#define VTBLK_FLAG_BARRIER 0x0010 >-#define VTBLK_FLAG_WC_CONFIG 0x0020 >+#define VTBLK_FLAG_DETACH 0x0002 >+#define VTBLK_FLAG_SUSPEND 0x0004 >+#define VTBLK_FLAG_BARRIER 0x0008 >+#define VTBLK_FLAG_WCE_CONFIG 0x0010 > > struct virtqueue *vtblk_vq; > struct sglist *vtblk_sglist; >@@ -109,9 +108,10 @@ > { VIRTIO_BLK_F_RO, "ReadOnly" }, > { VIRTIO_BLK_F_BLK_SIZE, "BlockSize" }, > { VIRTIO_BLK_F_SCSI, "SCSICmds" }, >- { VIRTIO_BLK_F_WCE, "WriteCache" }, >+ { VIRTIO_BLK_F_FLUSH, "FlushCmd" }, > { VIRTIO_BLK_F_TOPOLOGY, "Topology" }, > { VIRTIO_BLK_F_CONFIG_WCE, "ConfigWCE" }, >+ { VIRTIO_BLK_F_MQ, "Multiqueue" }, > > { 0, NULL } > }; >@@ -133,8 +133,8 @@ > static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t); > static void vtblk_strategy(struct bio *); > >-static void vtblk_negotiate_features(struct vtblk_softc *); >-static void vtblk_setup_features(struct vtblk_softc *); >+static int vtblk_negotiate_features(struct vtblk_softc *); >+static int vtblk_setup_features(struct vtblk_softc *); > static int vtblk_maximum_segments(struct vtblk_softc *, > struct virtio_blk_config *); > static int vtblk_alloc_virtqueue(struct vtblk_softc *); >@@ -193,6 +193,14 @@ > static void vtblk_setup_sysctl(struct vtblk_softc *); > static int vtblk_tunable_int(struct vtblk_softc *, const char *, int); > >+#define vtblk_modern(_sc) (((_sc)->vtblk_features & VIRTIO_F_VERSION_1) != 0) >+#define vtblk_htog16(_sc, _val) virtio_htog16(vtblk_modern(_sc), _val) >+#define vtblk_htog32(_sc, _val) virtio_htog32(vtblk_modern(_sc), _val) >+#define vtblk_htog64(_sc, _val) virtio_htog64(vtblk_modern(_sc), _val) >+#define vtblk_gtoh16(_sc, _val) virtio_gtoh16(vtblk_modern(_sc), _val) >+#define vtblk_gtoh32(_sc, _val) virtio_gtoh32(vtblk_modern(_sc), _val) >+#define vtblk_gtoh64(_sc, _val) virtio_gtoh64(vtblk_modern(_sc), _val) >+ > /* Tunables. */ > static int vtblk_no_ident = 0; > TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident); >@@ -200,18 +208,20 @@ > TUNABLE_INT("hw.vtblk.writecache_mode", &vtblk_writecache_mode); > > /* Features desired/implemented by this driver. */ >-#define VTBLK_FEATURES \ >- (VIRTIO_BLK_F_BARRIER | \ >- VIRTIO_BLK_F_SIZE_MAX | \ >+#define VTBLK_COMMON_FEATURES \ >+ (VIRTIO_BLK_F_SIZE_MAX | \ > VIRTIO_BLK_F_SEG_MAX | \ > VIRTIO_BLK_F_GEOMETRY | \ > VIRTIO_BLK_F_RO | \ > VIRTIO_BLK_F_BLK_SIZE | \ >- VIRTIO_BLK_F_WCE | \ >+ VIRTIO_BLK_F_FLUSH | \ > VIRTIO_BLK_F_TOPOLOGY | \ > VIRTIO_BLK_F_CONFIG_WCE | \ > VIRTIO_RING_F_INDIRECT_DESC) > >+#define VTBLK_MODERN_FEATURES (VTBLK_COMMON_FEATURES) >+#define VTBLK_LEGACY_FEATURES (VIRTIO_BLK_F_BARRIER | VTBLK_COMMON_FEATURES) >+ > #define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx > #define VTBLK_LOCK_INIT(_sc, _name) \ > mtx_init(VTBLK_MTX((_sc)), (_name), \ >@@ -256,8 +266,10 @@ > > DRIVER_MODULE(virtio_blk, virtio_mmio, vtblk_driver, vtblk_devclass, > vtblk_modevent, 0); >-DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass, >+DRIVER_MODULE(virtio_blk, vtpcil, vtblk_driver, vtblk_devclass, > vtblk_modevent, 0); >+DRIVER_MODULE(virtio_blk, vtpcim, vtblk_driver, vtblk_devclass, >+ vtblk_modevent, 0); > MODULE_VERSION(virtio_blk, 1); > MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1); > >@@ -301,10 +313,10 @@ > struct virtio_blk_config blkcfg; > int error; > >- virtio_set_feature_desc(dev, vtblk_feature_desc); >- > sc = device_get_softc(dev); > sc->vtblk_dev = dev; >+ virtio_set_feature_desc(dev, vtblk_feature_desc); >+ > VTBLK_LOCK_INIT(sc, device_get_nameunit(dev)); > bioq_init(&sc->vtblk_bioq); > TAILQ_INIT(&sc->vtblk_dump_queue); >@@ -312,8 +324,13 @@ > TAILQ_INIT(&sc->vtblk_req_ready); > > vtblk_setup_sysctl(sc); >- vtblk_setup_features(sc); > >+ error = vtblk_setup_features(sc); >+ if (error) { >+ device_printf(dev, "cannot setup features\n"); >+ goto fail; >+ } >+ > vtblk_read_config(sc, &blkcfg); > > /* >@@ -541,16 +558,6 @@ > return; > } > >- /* >- * Fail any write if RO. Unfortunately, there does not seem to >- * be a better way to report our readonly'ness to GEOM above. >- */ >- if (sc->vtblk_flags & VTBLK_FLAG_READONLY && >- (bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) { >- vtblk_bio_done(sc, bp, EROFS); >- return; >- } >- > VTBLK_LOCK(sc); > > if (sc->vtblk_flags & VTBLK_FLAG_DETACH) { >@@ -565,35 +572,40 @@ > VTBLK_UNLOCK(sc); > } > >-static void >+static int > vtblk_negotiate_features(struct vtblk_softc *sc) > { > device_t dev; > uint64_t features; > > dev = sc->vtblk_dev; >- features = VTBLK_FEATURES; >+ features = virtio_bus_is_modern(dev) ? VTBLK_MODERN_FEATURES : >+ VTBLK_LEGACY_FEATURES; > > sc->vtblk_features = virtio_negotiate_features(dev, features); >+ return (virtio_finalize_features(dev)); > } > >-static void >+static int > vtblk_setup_features(struct vtblk_softc *sc) > { > device_t dev; >- >+ int error; >+ > dev = sc->vtblk_dev; > >- vtblk_negotiate_features(sc); >+ error = vtblk_negotiate_features(sc); >+ if (error) >+ return (error); > > if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) > sc->vtblk_flags |= VTBLK_FLAG_INDIRECT; >- if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) >- sc->vtblk_flags |= VTBLK_FLAG_READONLY; >- if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER)) >- sc->vtblk_flags |= VTBLK_FLAG_BARRIER; > if (virtio_with_feature(dev, VIRTIO_BLK_F_CONFIG_WCE)) >- sc->vtblk_flags |= VTBLK_FLAG_WC_CONFIG; >+ sc->vtblk_flags |= VTBLK_FLAG_WCE_CONFIG; >+ if (virtio_with_feature(dev, VIRTIO_BLK_F_BARRIER)) >+ sc->vtblk_flags |= VTBLK_FLAG_BARRIER; /* Legacy. */ >+ >+ return (0); > } > > static int >@@ -672,15 +684,19 @@ > dp->d_name = VTBLK_DISK_NAME; > dp->d_unit = device_get_unit(dev); > dp->d_drv1 = sc; >- dp->d_flags = DISKFLAG_CANFLUSHCACHE | DISKFLAG_UNMAPPED_BIO | >- DISKFLAG_DIRECT_COMPLETION; >+ dp->d_flags = DISKFLAG_UNMAPPED_BIO | DISKFLAG_DIRECT_COMPLETION; > dp->d_hba_vendor = virtio_get_vendor(dev); > dp->d_hba_device = virtio_get_device(dev); > dp->d_hba_subvendor = virtio_get_subvendor(dev); > dp->d_hba_subdevice = virtio_get_subdevice(dev); > >- if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0) >+ if (virtio_with_feature(dev, VIRTIO_BLK_F_RO)) >+ dp->d_flags |= DISKFLAG_WRITE_PROTECT; >+ else { >+ if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH)) >+ dp->d_flags |= DISKFLAG_CANFLUSHCACHE; > dp->d_dump = vtblk_dump; >+ } > > /* Capacity is always in 512-byte units. */ > dp->d_mediasize = blkcfg->capacity * 512; >@@ -864,26 +880,27 @@ > bp = bioq_takefirst(bioq); > req->vbr_bp = bp; > req->vbr_ack = -1; >- req->vbr_hdr.ioprio = 1; >+ req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1); > > switch (bp->bio_cmd) { > case BIO_FLUSH: >- req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; >+ req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH); >+ req->vbr_hdr.sector = 0; > break; > case BIO_READ: >- req->vbr_hdr.type = VIRTIO_BLK_T_IN; >- req->vbr_hdr.sector = bp->bio_offset / 512; >+ req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_IN); >+ req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / 512); > break; > case BIO_WRITE: >- req->vbr_hdr.type = VIRTIO_BLK_T_OUT; >- req->vbr_hdr.sector = bp->bio_offset / 512; >+ req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT); >+ req->vbr_hdr.sector = vtblk_gtoh64(sc, bp->bio_offset / 512); > break; > default: > panic("%s: bio with unhandled cmd: %d", __func__, bp->bio_cmd); > } > > if (bp->bio_flags & BIO_ORDERED) >- req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER; >+ req->vbr_hdr.type |= vtblk_gtoh32(sc, VIRTIO_BLK_T_BARRIER); > > return (req); > } >@@ -914,7 +931,8 @@ > if (!virtqueue_empty(vq)) > return (EBUSY); > ordered = 1; >- req->vbr_hdr.type &= ~VIRTIO_BLK_T_BARRIER; >+ req->vbr_hdr.type &= vtblk_gtoh32(sc, >+ ~VIRTIO_BLK_T_BARRIER); > } > } > >@@ -1018,15 +1036,16 @@ > static void > vtblk_drain(struct vtblk_softc *sc) > { >- struct bio_queue queue; > struct bio_queue_head *bioq; > struct vtblk_request *req; > struct bio *bp; > > bioq = &sc->vtblk_bioq; >- TAILQ_INIT(&queue); > > if (sc->vtblk_vq != NULL) { >+ struct bio_queue queue; >+ >+ TAILQ_INIT(&queue); > vtblk_queue_completed(sc, &queue); > vtblk_done_completed(sc, &queue); > >@@ -1117,10 +1136,22 @@ > /* Read the configuration if the feature was negotiated. */ > VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SIZE_MAX, size_max, blkcfg); > VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_SEG_MAX, seg_max, blkcfg); >- VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, geometry, blkcfg); >+ VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, >+ geometry.cylinders, blkcfg); >+ VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, >+ geometry.heads, blkcfg); >+ VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_GEOMETRY, >+ geometry.sectors, blkcfg); > VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_BLK_SIZE, blk_size, blkcfg); >- VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, topology, blkcfg); >- VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, writeback, blkcfg); >+ VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, >+ topology.physical_block_exp, blkcfg); >+ VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, >+ topology.alignment_offset, blkcfg); >+ VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, >+ topology.min_io_size, blkcfg); >+ VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_TOPOLOGY, >+ topology.opt_io_size, blkcfg); >+ VTBLK_GET_CONFIG(dev, VIRTIO_BLK_F_CONFIG_WCE, wce, blkcfg); > } > > #undef VTBLK_GET_CONFIG >@@ -1144,8 +1175,8 @@ > return; > > req->vbr_ack = -1; >- req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID; >- req->vbr_hdr.ioprio = 1; >+ req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_GET_ID); >+ req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1); > req->vbr_hdr.sector = 0; > > req->vbr_bp = &buf; >@@ -1276,9 +1307,9 @@ > > req = &sc->vtblk_dump_request; > req->vbr_ack = -1; >- req->vbr_hdr.type = VIRTIO_BLK_T_OUT; >- req->vbr_hdr.ioprio = 1; >- req->vbr_hdr.sector = offset / 512; >+ req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_OUT); >+ req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1); >+ req->vbr_hdr.sector = vtblk_gtoh64(sc, offset / 512); > > req->vbr_bp = &buf; > g_reset_bio(&buf); >@@ -1298,8 +1329,8 @@ > > req = &sc->vtblk_dump_request; > req->vbr_ack = -1; >- req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH; >- req->vbr_hdr.ioprio = 1; >+ req->vbr_hdr.type = vtblk_gtoh32(sc, VIRTIO_BLK_T_FLUSH); >+ req->vbr_hdr.ioprio = vtblk_gtoh32(sc, 1); > req->vbr_hdr.sector = 0; > > req->vbr_bp = &buf; >@@ -1327,7 +1358,7 @@ > > /* Set either writeback (1) or writethrough (0) mode. */ > virtio_write_dev_config_1(sc->vtblk_dev, >- offsetof(struct virtio_blk_config, writeback), wc); >+ offsetof(struct virtio_blk_config, wce), wc); > } > > static int >@@ -1336,15 +1367,15 @@ > { > int wc; > >- if (sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) { >+ if (sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) { > wc = vtblk_tunable_int(sc, "writecache_mode", > vtblk_writecache_mode); > if (wc >= 0 && wc < VTBLK_CACHE_MAX) > vtblk_set_write_cache(sc, wc); > else >- wc = blkcfg->writeback; >+ wc = blkcfg->wce; > } else >- wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_WCE); >+ wc = virtio_with_feature(sc->vtblk_dev, VIRTIO_BLK_F_FLUSH); > > return (wc); > } >@@ -1361,7 +1392,7 @@ > error = sysctl_handle_int(oidp, &wc, 0, req); > if (error || req->newptr == NULL) > return (error); >- if ((sc->vtblk_flags & VTBLK_FLAG_WC_CONFIG) == 0) >+ if ((sc->vtblk_flags & VTBLK_FLAG_WCE_CONFIG) == 0) > return (EPERM); > if (wc < 0 || wc >= VTBLK_CACHE_MAX) > return (EINVAL); >diff -urN sys/dev/virtio.ori/block/virtio_blk.h sys/dev/virtio/block/virtio_blk.h >--- sys/dev/virtio.ori/block/virtio_blk.h 2020-03-18 20:16:31.708466000 -0700 >+++ sys/dev/virtio/block/virtio_blk.h 2020-03-19 16:44:27.331882000 -0700 >@@ -34,17 +34,22 @@ > #define _VIRTIO_BLK_H > > /* Feature bits */ >-#define VIRTIO_BLK_F_BARRIER 0x0001 /* Does host support barriers? */ > #define VIRTIO_BLK_F_SIZE_MAX 0x0002 /* Indicates maximum segment size */ > #define VIRTIO_BLK_F_SEG_MAX 0x0004 /* Indicates maximum # of segments */ > #define VIRTIO_BLK_F_GEOMETRY 0x0010 /* Legacy geometry available */ > #define VIRTIO_BLK_F_RO 0x0020 /* Disk is read-only */ > #define VIRTIO_BLK_F_BLK_SIZE 0x0040 /* Block size of disk is available*/ >-#define VIRTIO_BLK_F_SCSI 0x0080 /* Supports scsi command passthru */ >-#define VIRTIO_BLK_F_WCE 0x0200 /* Writeback mode enabled after reset */ >+#define VIRTIO_BLK_F_FLUSH 0x0200 /* Flush command supported */ > #define VIRTIO_BLK_F_TOPOLOGY 0x0400 /* Topology information is available */ > #define VIRTIO_BLK_F_CONFIG_WCE 0x0800 /* Writeback mode available in config */ >+#define VIRTIO_BLK_F_MQ 0x1000 /* Support more than one vq */ > >+/* Legacy feature bits */ >+#define VIRTIO_BLK_F_BARRIER 0x0001 /* Does host support barriers? */ >+#define VIRTIO_BLK_F_SCSI 0x0080 /* Supports scsi command passthru */ >+ >+/* Old (deprecated) name for VIRTIO_BLK_F_FLUSH. */ >+#define VIRTIO_BLK_F_WCE VIRTIO_BLK_F_FLUSH > #define VIRTIO_BLK_ID_BYTES 20 /* ID string length */ > > struct virtio_blk_config { >@@ -66,15 +71,23 @@ > > /* Topology of the device (if VIRTIO_BLK_F_TOPOLOGY) */ > struct virtio_blk_topology { >+ /* exponent for physical block per logical block. */ > uint8_t physical_block_exp; >+ /* alignment offset in logical blocks. */ > uint8_t alignment_offset; >+ /* minimum I/O size without performance penalty in logical >+ * blocks. */ > uint16_t min_io_size; >+ /* optimal sustained I/O size in logical blocks. */ > uint32_t opt_io_size; > } topology; > > /* Writeback mode (if VIRTIO_BLK_F_CONFIG_WCE) */ >- uint8_t writeback; >+ uint8_t wce; >+ uint8_t unused; > >+ /* Number of vqs, only available when VIRTIO_BLK_F_MQ is set */ >+ uint16_t num_queues; > } __packed; > > /* >@@ -107,7 +120,11 @@ > /* ID string length */ > #define VIRTIO_BLK_ID_BYTES 20 > >-/* This is the first element of the read scatter-gather list. */ >+/* >+ * This comes first in the read scatter-gather list. >+ * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, >+ * this is the first element of the read scatter-gather list. >+ */ > struct virtio_blk_outhdr { > /* VIRTIO_BLK_T* */ > uint32_t type; >diff -urN sys/dev/virtio.ori/console/virtio_console.c sys/dev/virtio/console/virtio_console.c >--- sys/dev/virtio.ori/console/virtio_console.c 2020-03-18 20:16:31.705237000 -0700 >+++ sys/dev/virtio/console/virtio_console.c 2020-03-19 16:44:27.325790000 -0700 >@@ -158,8 +158,8 @@ > static int vtcon_detach(device_t); > static int vtcon_config_change(device_t); > >-static void vtcon_setup_features(struct vtcon_softc *); >-static void vtcon_negotiate_features(struct vtcon_softc *); >+static int vtcon_setup_features(struct vtcon_softc *); >+static int vtcon_negotiate_features(struct vtcon_softc *); > static int vtcon_alloc_scports(struct vtcon_softc *); > static int vtcon_alloc_virtqueues(struct vtcon_softc *); > static void vtcon_read_config(struct vtcon_softc *, >@@ -227,6 +227,14 @@ > static void vtcon_enable_interrupts(struct vtcon_softc *); > static void vtcon_disable_interrupts(struct vtcon_softc *); > >+#define vtcon_modern(_sc) (((_sc)->vtcon_features & VIRTIO_F_VERSION_1) != 0) >+#define vtcon_htog16(_sc, _val) virtio_htog16(vtcon_modern(_sc), _val) >+#define vtcon_htog32(_sc, _val) virtio_htog32(vtcon_modern(_sc), _val) >+#define vtcon_htog64(_sc, _val) virtio_htog64(vtcon_modern(_sc), _val) >+#define vtcon_gtoh16(_sc, _val) virtio_gtoh16(vtcon_modern(_sc), _val) >+#define vtcon_gtoh32(_sc, _val) virtio_gtoh32(vtcon_modern(_sc), _val) >+#define vtcon_gtoh64(_sc, _val) virtio_gtoh64(vtcon_modern(_sc), _val) >+ > static int vtcon_pending_free; > > static struct ttydevsw vtcon_tty_class = { >@@ -256,8 +264,10 @@ > }; > static devclass_t vtcon_devclass; > >-DRIVER_MODULE(virtio_console, virtio_pci, vtcon_driver, vtcon_devclass, >+DRIVER_MODULE(virtio_console, vtpcil, vtcon_driver, vtcon_devclass, > vtcon_modevent, 0); >+DRIVER_MODULE(virtio_console, vtpcim, vtcon_driver, vtcon_devclass, >+ vtcon_modevent, 0); > MODULE_VERSION(virtio_console, 1); > MODULE_DEPEND(virtio_console, virtio, 1, 1, 1); > >@@ -323,12 +333,16 @@ > > sc = device_get_softc(dev); > sc->vtcon_dev = dev; >+ virtio_set_feature_desc(dev, vtcon_feature_desc); > > mtx_init(&sc->vtcon_mtx, "vtconmtx", NULL, MTX_DEF); > mtx_init(&sc->vtcon_ctrl_tx_mtx, "vtconctrlmtx", NULL, MTX_DEF); > >- virtio_set_feature_desc(dev, vtcon_feature_desc); >- vtcon_setup_features(sc); >+ error = vtcon_setup_features(sc); >+ if (error) { >+ device_printf(dev, "cannot setup features\n"); >+ goto fail; >+ } > > vtcon_read_config(sc, &concfg); > vtcon_determine_max_ports(sc, &concfg); >@@ -420,7 +434,7 @@ > return (0); > } > >-static void >+static int > vtcon_negotiate_features(struct vtcon_softc *sc) > { > device_t dev; >@@ -430,21 +444,27 @@ > features = VTCON_FEATURES; > > sc->vtcon_features = virtio_negotiate_features(dev, features); >+ return (virtio_finalize_features(dev)); > } > >-static void >+static int > vtcon_setup_features(struct vtcon_softc *sc) > { > device_t dev; >+ int error; > > dev = sc->vtcon_dev; > >- vtcon_negotiate_features(sc); >+ error = vtcon_negotiate_features(sc); >+ if (error) >+ return (error); > > if (virtio_with_feature(dev, VIRTIO_CONSOLE_F_SIZE)) > sc->vtcon_flags |= VTCON_FLAG_SIZE; > if (virtio_with_feature(dev, VIRTIO_CONSOLE_F_MULTIPORT)) > sc->vtcon_flags |= VTCON_FLAG_MULTIPORT; >+ >+ return (0); > } > > #define VTCON_GET_CONFIG(_dev, _feature, _field, _cfg) \ >@@ -847,17 +867,20 @@ > struct virtio_console_control *control, void *data, size_t data_len) > { > device_t dev; >- int id; >+ uint32_t id; >+ uint16_t event; > > dev = sc->vtcon_dev; >- id = control->id; >+ id = vtcon_htog32(sc, control->id); >+ event = vtcon_htog16(sc, control->event); > >- if (id < 0 || id >= sc->vtcon_max_ports) { >- device_printf(dev, "%s: invalid port ID %d\n", __func__, id); >+ if (id >= sc->vtcon_max_ports) { >+ device_printf(dev, "%s: event %d invalid port ID %d\n", >+ __func__, event, id); > return; > } > >- switch (control->event) { >+ switch (event) { > case VIRTIO_CONSOLE_PORT_ADD: > vtcon_ctrl_port_add_event(sc, id); > break; >@@ -985,9 +1008,9 @@ > if ((sc->vtcon_flags & VTCON_FLAG_MULTIPORT) == 0) > return; > >- control.id = portid; >- control.event = event; >- control.value = value; >+ control.id = vtcon_gtoh32(sc, portid); >+ control.event = vtcon_gtoh16(sc, event); >+ control.value = vtcon_gtoh16(sc, value); > > vtcon_ctrl_poll(sc, &control); > } >diff -urN sys/dev/virtio.ori/mmio/virtio_mmio.c sys/dev/virtio/mmio/virtio_mmio.c >--- sys/dev/virtio.ori/mmio/virtio_mmio.c 2020-03-18 20:16:31.704642000 -0700 >+++ sys/dev/virtio/mmio/virtio_mmio.c 2020-03-19 16:44:27.325013000 -0700 >@@ -426,6 +426,10 @@ > case VIRTIO_IVAR_VENDOR: > *result = vtmmio_read_config_4(sc, VIRTIO_MMIO_VENDOR_ID); > break; >+ case VIRTIO_IVAR_SUBVENDOR: >+ case VIRTIO_IVAR_MODERN: >+ *result = 0; >+ break; > default: > return (ENOENT); > } >diff -urN sys/dev/virtio.ori/network/if_vtnet.c sys/dev/virtio/network/if_vtnet.c >--- sys/dev/virtio.ori/network/if_vtnet.c 2020-03-18 20:16:31.708021000 -0700 >+++ sys/dev/virtio/network/if_vtnet.c 2020-03-19 16:44:27.330836000 -0700 >@@ -69,6 +69,7 @@ > #include <netinet6/ip6_var.h> > #include <netinet/udp.h> > #include <netinet/tcp.h> >+#include <netinet/tcp_lro.h> > #include <netinet/netdump/netdump.h> > > #include <machine/bus.h> >@@ -85,6 +86,10 @@ > #include "opt_inet.h" > #include "opt_inet6.h" > >+#if defined(INET) || defined(INET6) >+#include <machine/in_cksum.h> >+#endif >+ > static int vtnet_modevent(module_t, int, void *); > > static int vtnet_probe(device_t); >@@ -96,8 +101,8 @@ > static int vtnet_attach_completed(device_t); > static int vtnet_config_change(device_t); > >-static void vtnet_negotiate_features(struct vtnet_softc *); >-static void vtnet_setup_features(struct vtnet_softc *); >+static int vtnet_negotiate_features(struct vtnet_softc *); >+static int vtnet_setup_features(struct vtnet_softc *); > static int vtnet_init_rxq(struct vtnet_softc *, int); > static int vtnet_init_txq(struct vtnet_softc *, int); > static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); >@@ -105,8 +110,12 @@ > static int vtnet_alloc_rx_filters(struct vtnet_softc *); > static void vtnet_free_rx_filters(struct vtnet_softc *); > static int vtnet_alloc_virtqueues(struct vtnet_softc *); >+static int vtnet_alloc_interface(struct vtnet_softc *); > static int vtnet_setup_interface(struct vtnet_softc *); >-static int vtnet_change_mtu(struct vtnet_softc *, int); >+static int vtnet_ioctl_mtu(struct vtnet_softc *, int); >+static int vtnet_ioctl_ifflags(struct vtnet_softc *); >+static int vtnet_ioctl_multi(struct vtnet_softc *); >+static int vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *); > static int vtnet_ioctl(struct ifnet *, u_long, caddr_t); > static uint64_t vtnet_get_counter(struct ifnet *, ift_counter); > >@@ -114,11 +123,15 @@ > static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); > static struct mbuf * > vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); >-static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *, >+static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *, > struct mbuf *, int); > static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); > static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); > static int vtnet_rxq_new_buf(struct vtnet_rxq *); >+static int vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *, >+ uint16_t, int, struct virtio_net_hdr *); >+static int vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *, >+ uint16_t, int, struct virtio_net_hdr *); > static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, > struct virtio_net_hdr *); > static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); >@@ -130,6 +143,7 @@ > static void vtnet_rx_vq_intr(void *); > static void vtnet_rxq_tq_intr(void *, int); > >+static int vtnet_txq_intr_threshold(struct vtnet_txq *); > static int vtnet_txq_below_threshold(struct vtnet_txq *); > static int vtnet_txq_notify(struct vtnet_txq *); > static void vtnet_txq_free_mbufs(struct vtnet_txq *); >@@ -179,6 +193,7 @@ > static int vtnet_init_tx_queues(struct vtnet_softc *); > static int vtnet_init_rxtx_queues(struct vtnet_softc *); > static void vtnet_set_active_vq_pairs(struct vtnet_softc *); >+static void vtnet_update_rx_offloads(struct vtnet_softc *); > static int vtnet_reinit(struct vtnet_softc *); > static void vtnet_init_locked(struct vtnet_softc *); > static void vtnet_init(void *); >@@ -187,11 +202,11 @@ > static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, > struct sglist *, int, int); > static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); >+static int vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t); > static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); >-static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int); >+static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, int); > static int vtnet_set_promisc(struct vtnet_softc *, int); > static int vtnet_set_allmulti(struct vtnet_softc *, int); >-static void vtnet_attach_disable_promisc(struct vtnet_softc *); > static void vtnet_rx_filter(struct vtnet_softc *); > static void vtnet_rx_filter_mac(struct vtnet_softc *); > static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); >@@ -200,21 +215,23 @@ > static void vtnet_register_vlan(void *, struct ifnet *, uint16_t); > static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t); > >+static void vtnet_update_speed_duplex(struct vtnet_softc *); > static int vtnet_is_link_up(struct vtnet_softc *); > static void vtnet_update_link_status(struct vtnet_softc *); > static int vtnet_ifmedia_upd(struct ifnet *); > static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *); >-static void vtnet_get_hwaddr(struct vtnet_softc *); >-static void vtnet_set_hwaddr(struct vtnet_softc *); >+static void vtnet_get_macaddr(struct vtnet_softc *); >+static void vtnet_set_macaddr(struct vtnet_softc *); >+static void vtnet_attached_set_macaddr(struct vtnet_softc *); > static void vtnet_vlan_tag_remove(struct mbuf *); > static void vtnet_set_rx_process_limit(struct vtnet_softc *); >-static void vtnet_set_tx_intr_threshold(struct vtnet_softc *); > > static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, > struct sysctl_oid_list *, struct vtnet_rxq *); > static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, > struct sysctl_oid_list *, struct vtnet_txq *); > static void vtnet_setup_queue_sysctl(struct vtnet_softc *); >+static void vtnet_load_tunables(struct vtnet_softc *); > static void vtnet_setup_sysctl(struct vtnet_softc *); > > static int vtnet_rxq_enable_intr(struct vtnet_rxq *); >@@ -232,58 +249,84 @@ > > NETDUMP_DEFINE(vtnet); > >-/* Tunables. */ >-static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VNET driver parameters"); >+#define vtnet_htog16(_sc, _val) virtio_htog16(vtnet_modern(_sc), _val) >+#define vtnet_htog32(_sc, _val) virtio_htog32(vtnet_modern(_sc), _val) >+#define vtnet_htog64(_sc, _val) virtio_htog64(vtnet_modern(_sc), _val) >+#define vtnet_gtoh16(_sc, _val) virtio_gtoh16(vtnet_modern(_sc), _val) >+#define vtnet_gtoh32(_sc, _val) virtio_gtoh32(vtnet_modern(_sc), _val) >+#define vtnet_gtoh64(_sc, _val) virtio_gtoh64(vtnet_modern(_sc), _val) >+ >+static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD, 0, "VirtIO Net driver"); >+ > static int vtnet_csum_disable = 0; >-TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable); > SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN, > &vtnet_csum_disable, 0, "Disables receive and send checksum offload"); >+ >+static int vtnet_fixup_needs_csum = 0; >+SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN, >+ &vtnet_fixup_needs_csum, 0, >+ "Calculate valid checksum for NEEDS_CSUM packets"); >+ > static int vtnet_tso_disable = 0; >-TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); >-SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable, >- 0, "Disables TCP Segmentation Offload"); >+SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, >+ &vtnet_tso_disable, 0, "Disables TSO"); >+ > static int vtnet_lro_disable = 0; >-TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); >-SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable, >- 0, "Disables TCP Large Receive Offload"); >+SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, >+ &vtnet_lro_disable, 0, "Disables hardware LRO"); >+ > static int vtnet_mq_disable = 0; >-TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); >-SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable, >- 0, "Disables Multi Queue support"); >+SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, >+ &vtnet_mq_disable, 0, "Disables multiqueue support"); >+ > static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS; >-TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); > SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN, >- &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs"); >-static int vtnet_rx_process_limit = 512; >-TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); >+ &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs"); >+ >+static int vtnet_tso_maxlen = IP_MAXPACKET; >+SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, >+ &vtnet_tso_maxlen, 0, "TSO burst limit"); >+ >+static int vtnet_rx_process_limit = 1024; > SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, > &vtnet_rx_process_limit, 0, >- "Limits the number RX segments processed in a single pass"); >+ "Number of RX segments processed in one pass"); > >+static int vtnet_lro_entry_count = 128; >+SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, >+ &vtnet_lro_entry_count, 0, "Software LRO entry count"); >+ >+/* Enable sorted LRO, and the depth of the mbuf queue. */ >+static int vtnet_lro_mbufq_depth = 0; >+SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, >+ &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue"); >+ > static uma_zone_t vtnet_tx_header_zone; > > static struct virtio_feature_desc vtnet_feature_desc[] = { >- { VIRTIO_NET_F_CSUM, "TxChecksum" }, >- { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, >- { VIRTIO_NET_F_MAC, "MacAddress" }, >- { VIRTIO_NET_F_GSO, "TxAllGSO" }, >- { VIRTIO_NET_F_GUEST_TSO4, "RxTSOv4" }, >- { VIRTIO_NET_F_GUEST_TSO6, "RxTSOv6" }, >- { VIRTIO_NET_F_GUEST_ECN, "RxECN" }, >- { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, >- { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, >- { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, >- { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, >- { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, >- { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, >- { VIRTIO_NET_F_STATUS, "Status" }, >- { VIRTIO_NET_F_CTRL_VQ, "ControlVq" }, >- { VIRTIO_NET_F_CTRL_RX, "RxMode" }, >- { VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" }, >- { VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" }, >- { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, >- { VIRTIO_NET_F_MQ, "Multiqueue" }, >- { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" }, >+ { VIRTIO_NET_F_CSUM, "TxChecksum" }, >+ { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, >+ { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "CtrlRxOffloads" }, >+ { VIRTIO_NET_F_MAC, "MAC" }, >+ { VIRTIO_NET_F_GSO, "TxGSO" }, >+ { VIRTIO_NET_F_GUEST_TSO4, "RxLROv4" }, >+ { VIRTIO_NET_F_GUEST_TSO6, "RxLROv6" }, >+ { VIRTIO_NET_F_GUEST_ECN, "RxLROECN" }, >+ { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, >+ { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, >+ { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, >+ { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, >+ { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, >+ { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, >+ { VIRTIO_NET_F_STATUS, "Status" }, >+ { VIRTIO_NET_F_CTRL_VQ, "CtrlVq" }, >+ { VIRTIO_NET_F_CTRL_RX, "CtrlRxMode" }, >+ { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLANFilter" }, >+ { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRxModeExtra" }, >+ { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, >+ { VIRTIO_NET_F_MQ, "Multiqueue" }, >+ { VIRTIO_NET_F_CTRL_MAC_ADDR, "CtrlMacAddr" }, >+ { VIRTIO_NET_F_SPEED_DUPLEX, "SpeedDuplex" }, > > { 0, NULL } > }; >@@ -306,24 +349,24 @@ > > #ifdef DEV_NETMAP > #include <dev/netmap/if_vtnet_netmap.h> >-#endif /* DEV_NETMAP */ >+#endif > > static driver_t vtnet_driver = { >- "vtnet", >- vtnet_methods, >- sizeof(struct vtnet_softc) >+ .name = "vtnet", >+ .methods = vtnet_methods, >+ .size = sizeof(struct vtnet_softc) > }; > static devclass_t vtnet_devclass; > > DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass, > vtnet_modevent, 0); >-DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass, >- vtnet_modevent, 0); >+DRIVER_MODULE(vtnet, vtpcil, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); >+DRIVER_MODULE(vtnet, vtpcim, vtnet_driver, vtnet_devclass, vtnet_modevent, 0); > MODULE_VERSION(vtnet, 1); > MODULE_DEPEND(vtnet, virtio, 1, 1, 1); > #ifdef DEV_NETMAP > MODULE_DEPEND(vtnet, netmap, 1, 1, 1); >-#endif /* DEV_NETMAP */ >+#endif > > static int > vtnet_modevent(module_t mod, int type, void *unused) >@@ -365,7 +408,7 @@ > if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK) > return (ENXIO); > >- device_set_desc(dev, "VirtIO Networking Adapter"); >+ device_set_desc(dev, "VirtIO Network Adapter"); > > return (BUS_PROBE_DEFAULT); > } >@@ -378,16 +421,26 @@ > > sc = device_get_softc(dev); > sc->vtnet_dev = dev; >- >- /* Register our feature descriptions. */ > virtio_set_feature_desc(dev, vtnet_feature_desc); > > VTNET_CORE_LOCK_INIT(sc); > callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); >+ vtnet_load_tunables(sc); > >+ error = vtnet_alloc_interface(sc); >+ if (error) { >+ device_printf(dev, "cannot allocate interface\n"); >+ goto fail; >+ } >+ > vtnet_setup_sysctl(sc); >- vtnet_setup_features(sc); > >+ error = vtnet_setup_features(sc); >+ if (error) { >+ device_printf(dev, "cannot setup features\n"); >+ goto fail; >+ } >+ > error = vtnet_alloc_rx_filters(sc); > if (error) { > device_printf(dev, "cannot allocate Rx filters\n"); >@@ -414,16 +467,14 @@ > > error = virtio_setup_intr(dev, INTR_TYPE_NET); > if (error) { >- device_printf(dev, "cannot setup virtqueue interrupts\n"); >- /* BMV: This will crash if during boot! */ >+ device_printf(dev, "cannot setup interrupts\n"); > ether_ifdetach(sc->vtnet_ifp); > goto fail; > } > > #ifdef DEV_NETMAP > vtnet_netmap_attach(sc); >-#endif /* DEV_NETMAP */ >- >+#endif > vtnet_start_taskqueues(sc); > > fail: >@@ -455,7 +506,7 @@ > > #ifdef DEV_NETMAP > netmap_detach(ifp); >-#endif /* DEV_NETMAP */ >+#endif > > vtnet_free_taskqueues(sc); > >@@ -522,7 +573,6 @@ > static int > vtnet_shutdown(device_t dev) > { >- > /* > * Suspend already does all of what we need to > * do here; we just never expect to be resumed. >@@ -533,9 +583,14 @@ > static int > vtnet_attach_completed(device_t dev) > { >+ struct vtnet_softc *sc; > >- vtnet_attach_disable_promisc(device_get_softc(dev)); >+ sc = device_get_softc(dev); > >+ VTNET_CORE_LOCK(sc); >+ vtnet_attached_set_macaddr(sc); >+ VTNET_CORE_UNLOCK(sc); >+ > return (0); > } > >@@ -555,37 +610,67 @@ > return (0); > } > >-static void >+static int > vtnet_negotiate_features(struct vtnet_softc *sc) > { > device_t dev; >- uint64_t mask, features; >+ uint64_t features, negotiated_features; >+ int no_csum; > > dev = sc->vtnet_dev; >- mask = 0; >+ features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES : >+ VTNET_LEGACY_FEATURES; > > /* > * TSO and LRO are only available when their corresponding checksum > * offload feature is also negotiated. > */ >- if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) { >- mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM; >- mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES; >- } >- if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) >- mask |= VTNET_TSO_FEATURES; >- if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) >- mask |= VTNET_LRO_FEATURES; >+ no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable); >+ if (no_csum) >+ features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM); >+ if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) >+ features &= ~VTNET_TSO_FEATURES; >+ if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) >+ features &= ~VTNET_LRO_FEATURES; >+ > #ifndef VTNET_LEGACY_TX > if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) >- mask |= VIRTIO_NET_F_MQ; >+ features &= ~VIRTIO_NET_F_MQ; > #else >- mask |= VIRTIO_NET_F_MQ; >+ features &= ~VIRTIO_NET_F_MQ; > #endif > >- features = VTNET_FEATURES & ~mask; >- sc->vtnet_features = virtio_negotiate_features(dev, features); >+ negotiated_features = virtio_negotiate_features(dev, features); > >+ if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) { >+ uint16_t mtu; >+ >+ mtu = virtio_read_dev_config_2(dev, >+ offsetof(struct virtio_net_config, mtu)); >+ if (mtu < VTNET_MIN_MTU /* || mtu > VTNET_MAX_MTU */) { >+ device_printf(dev, "Invalid MTU value: %d. " >+ "MTU feature disabled.\n", mtu); >+ features &= ~VIRTIO_NET_F_MTU; >+ negotiated_features = >+ virtio_negotiate_features(dev, features); >+ } >+ } >+ >+ if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) { >+ uint16_t npairs; >+ >+ npairs = virtio_read_dev_config_2(dev, >+ offsetof(struct virtio_net_config, max_virtqueue_pairs)); >+ if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || >+ npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) { >+ device_printf(dev, "Invalid max_virtqueue_pairs value: " >+ "%d. Multiqueue feature disabled.\n", npairs); >+ features &= ~VIRTIO_NET_F_MQ; >+ negotiated_features = >+ virtio_negotiate_features(dev, features); >+ } >+ } >+ > if (virtio_with_feature(dev, VTNET_LRO_FEATURES) && > virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) { > /* >@@ -599,26 +684,35 @@ > */ > if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { > device_printf(dev, >- "LRO disabled due to both mergeable buffers and " >- "indirect descriptors not negotiated\n"); >- >+ "Host LRO disabled since both mergeable buffers " >+ "and indirect descriptors were not negotiated\n"); > features &= ~VTNET_LRO_FEATURES; >- sc->vtnet_features = >+ negotiated_features = > virtio_negotiate_features(dev, features); > } else > sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; > } >+ >+ sc->vtnet_features = negotiated_features; >+ sc->vtnet_negotiated_features = negotiated_features; >+ >+ return (virtio_finalize_features(dev)); > } > >-static void >+static int > vtnet_setup_features(struct vtnet_softc *sc) > { > device_t dev; >+ int error; > > dev = sc->vtnet_dev; > >- vtnet_negotiate_features(sc); >+ error = vtnet_negotiate_features(sc); >+ if (error) >+ return (error); > >+ if (virtio_with_feature(dev, VIRTIO_F_VERSION_1)) >+ sc->vtnet_flags |= VTNET_FLAG_MODERN; > if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) > sc->vtnet_flags |= VTNET_FLAG_INDIRECT; > if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) >@@ -629,26 +723,46 @@ > sc->vtnet_flags |= VTNET_FLAG_MAC; > } > >+ if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) { >+ sc->vtnet_max_mtu = virtio_read_dev_config_2(dev, >+ offsetof(struct virtio_net_config, mtu)); >+ } else >+ sc->vtnet_max_mtu = VTNET_MAX_MTU; >+ > if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { > sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; > sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); >+ } else if (vtnet_modern(sc)) { >+ /* This is identical to the mergeable header. */ >+ sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1); > } else > sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); > >- if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) >- sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS; >+ if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) >+ sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE; > else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) >- sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS; >+ sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG; > else >- sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS; >+ sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE; > >+ /* >+ * Favor "hardware" LRO if negotiated, but support software LRO as >+ * a fallback; there is usually little benefit (or worse) with both. >+ */ >+ if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 && >+ virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0) >+ sc->vtnet_flags |= VTNET_FLAG_SW_LRO; >+ > if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || > virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || > virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) >- sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS; >+ sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX; > else >- sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS; >+ sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN; > >+ sc->vtnet_req_vq_pairs = 1; >+ sc->vtnet_max_vq_pairs = 1; >+ > if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { > sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; > >@@ -658,35 +772,37 @@ > sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; > if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) > sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; >+ >+ if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) { >+ sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev, >+ offsetof(struct virtio_net_config, >+ max_virtqueue_pairs)); >+ } > } > >- if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && >- sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { >- sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev, >- offsetof(struct virtio_net_config, max_virtqueue_pairs)); >- } else >- sc->vtnet_max_vq_pairs = 1; >- > if (sc->vtnet_max_vq_pairs > 1) { >+ int req; >+ > /* >- * Limit the maximum number of queue pairs to the lower of >- * the number of CPUs and the configured maximum. >- * The actual number of queues that get used may be less. >+ * Limit the maximum number of requested queue pairs to the >+ * number of CPUs and the configured maximum. > */ >- int max; >- >- max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); >- if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) { >- if (max > mp_ncpus) >- max = mp_ncpus; >- if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) >- max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX; >- if (max > 1) { >- sc->vtnet_requested_vq_pairs = max; >- sc->vtnet_flags |= VTNET_FLAG_MULTIQ; >- } >+ req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); >+ if (req < 0) >+ req = 1; >+ if (req == 0) >+ req = mp_ncpus; >+ if (req > sc->vtnet_max_vq_pairs) >+ req = sc->vtnet_max_vq_pairs; >+ if (req > mp_ncpus) >+ req = mp_ncpus; >+ if (req > 1) { >+ sc->vtnet_req_vq_pairs = req; >+ sc->vtnet_flags |= VTNET_FLAG_MQ; > } > } >+ >+ return (0); > } > > static int >@@ -707,6 +823,14 @@ > if (rxq->vtnrx_sg == NULL) > return (ENOMEM); > >+#if defined(INET) || defined(INET6) >+ if (vtnet_software_lro(sc)) { >+ if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp, >+ sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0) >+ return (ENOMEM); >+ } >+#endif >+ > TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); > rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, > taskqueue_thread_enqueue, &rxq->vtnrx_tq); >@@ -772,6 +896,7 @@ > return (error); > } > >+ vtnet_set_rx_process_limit(sc); > vtnet_setup_queue_sysctl(sc); > > return (0); >@@ -784,6 +909,10 @@ > rxq->vtnrx_sc = NULL; > rxq->vtnrx_id = -1; > >+#if defined(INET) || defined(INET6) >+ tcp_lro_free(&rxq->vtnrx_lro); >+#endif >+ > if (rxq->vtnrx_sg != NULL) { > sglist_free(rxq->vtnrx_sg); > rxq->vtnrx_sg = NULL; >@@ -892,28 +1021,39 @@ > if (info == NULL) > return (ENOMEM); > >- for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) { >+ for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) { > rxq = &sc->vtnet_rxqs[i]; > VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, > vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, >- "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id); >+ "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id); > > txq = &sc->vtnet_txqs[i]; > VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, > vtnet_tx_vq_intr, txq, &txq->vtntx_vq, >- "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id); >+ "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id); > } > >+ /* These queues will not be used so allocate the minimum resources. */ >+ for (/**/; i < sc->vtnet_max_vq_pairs; i++, idx += 2) { >+ rxq = &sc->vtnet_rxqs[i]; >+ VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq, >+ "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id); >+ >+ txq = &sc->vtnet_txqs[i]; >+ VQ_ALLOC_INFO_INIT(&info[idx+1], 0, NULL, txq, &txq->vtntx_vq, >+ "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id); >+ } >+ > if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { > VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, > &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); > } > > /* >- * Enable interrupt binding if this is multiqueue. This only matters >- * when per-vq MSIX is available. >+ * TODO: Enable interrupt binding if this is multiqueue. This will >+ * only matter when per-virtqueue MSIX is available. > */ >- if (sc->vtnet_flags & VTNET_FLAG_MULTIQ) >+ if (sc->vtnet_flags & VTNET_FLAG_MQ) > flags |= 0; > > error = virtio_alloc_virtqueues(dev, flags, nvqs, info); >@@ -923,23 +1063,35 @@ > } > > static int >-vtnet_setup_interface(struct vtnet_softc *sc) >+vtnet_alloc_interface(struct vtnet_softc *sc) > { > device_t dev; > struct ifnet *ifp; > > dev = sc->vtnet_dev; > >- ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER); >- if (ifp == NULL) { >- device_printf(dev, "cannot allocate ifnet structure\n"); >- return (ENOSPC); >- } >+ ifp = if_alloc(IFT_ETHER); >+ if (ifp == NULL) >+ return (ENOMEM); > >- if_initname(ifp, device_get_name(dev), device_get_unit(dev)); >- ifp->if_baudrate = IF_Gbps(10); /* Approx. */ >+ sc->vtnet_ifp = ifp; > ifp->if_softc = sc; >+ if_initname(ifp, device_get_name(dev), device_get_unit(dev)); >+ >+ return (0); >+} >+ >+static int >+vtnet_setup_interface(struct vtnet_softc *sc) >+{ >+ device_t dev; >+ struct ifnet *ifp; >+ >+ dev = sc->vtnet_dev; >+ ifp = sc->vtnet_ifp; >+ > ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; >+ ifp->if_baudrate = IF_Gbps(10); > ifp->if_init = vtnet_init; > ifp->if_ioctl = vtnet_ioctl; > ifp->if_get_counter = vtnet_get_counter; >@@ -954,51 +1106,58 @@ > IFQ_SET_READY(&ifp->if_snd); > #endif > >- ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, >- vtnet_ifmedia_sts); >- ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL); >- ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE); >+ vtnet_get_macaddr(sc); > >- /* Read (or generate) the MAC address for the adapter. */ >- vtnet_get_hwaddr(sc); >- >- ether_ifattach(ifp, sc->vtnet_hwaddr); >- > if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) > ifp->if_capabilities |= IFCAP_LINKSTATE; > >- /* Tell the upper layer(s) we support long frames. */ >- ifp->if_hdrlen = sizeof(struct ether_vlan_header); >- ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU; >+ ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts); >+ ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL); >+ ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO); > > if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { >+ int gso; >+ > ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6; > >- if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) { >- ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; >+ gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO); >+ if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) >+ ifp->if_capabilities |= IFCAP_TSO4; >+ if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) >+ ifp->if_capabilities |= IFCAP_TSO6; >+ if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) > sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; >- } else { >- if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) >- ifp->if_capabilities |= IFCAP_TSO4; >- if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) >- ifp->if_capabilities |= IFCAP_TSO6; >- if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) >- sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; >- } > >- if (ifp->if_capabilities & IFCAP_TSO) >+ if (ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) { >+ int tso_maxlen; >+ > ifp->if_capabilities |= IFCAP_VLAN_HWTSO; >+ >+ tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen", >+ vtnet_tso_maxlen); >+ ifp->if_hw_tsomax = tso_maxlen - >+ (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); >+ ifp->if_hw_tsomaxsegcount = sc->vtnet_tx_nsegs - 1; >+ ifp->if_hw_tsomaxsegsize = PAGE_SIZE; >+ } > } > > if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { >- ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; >+ ifp->if_capabilities |= IFCAP_RXCSUM; >+#ifdef notyet >+ /* BMV: Rx checksums not distinguished between IPv4 and IPv6. */ >+ ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; >+#endif > >- if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || >- virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) >- ifp->if_capabilities |= IFCAP_LRO; >+ if (vtnet_tunable_int(sc, "fixup_needs_csum", >+ vtnet_fixup_needs_csum) != 0) >+ sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM; >+ >+ /* Support either "hardware" or software LRO. */ >+ ifp->if_capabilities |= IFCAP_LRO; > } > >- if (ifp->if_capabilities & IFCAP_HWCSUM) { >+ if (ifp->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) { > /* > * VirtIO does not support VLAN tagging, but we can fake > * it by inserting and removing the 802.1Q header during >@@ -1009,11 +1168,14 @@ > IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; > } > >- ifp->if_capenable = ifp->if_capabilities; >+ if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO) >+ ifp->if_capabilities |= IFCAP_JUMBO_MTU; >+ ifp->if_capabilities |= IFCAP_VLAN_MTU; > > /* > * Capabilities after here are not enabled by default. > */ >+ ifp->if_capenable = ifp->if_capabilities; > > if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { > ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; >@@ -1024,59 +1186,216 @@ > vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); > } > >- vtnet_set_rx_process_limit(sc); >- vtnet_set_tx_intr_threshold(sc); >+ ether_ifattach(ifp, sc->vtnet_hwaddr); > >+ /* Tell the upper layer(s) we support long frames. */ >+ ifp->if_hdrlen = sizeof(struct ether_vlan_header); >+ > NETDUMP_SET(ifp, vtnet); > > return (0); > } > > static int >-vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu) >+vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu) > { >+ int framesz; >+ >+ if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) >+ return (MJUMPAGESIZE); >+ else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) >+ return (MCLBYTES); >+ >+ /* >+ * Try to scale the receive mbuf cluster size from the MTU. Without >+ * the GUEST_TSO[46] features, the VirtIO specification says the >+ * driver must only be able to receive ~1500 byte frames. But if >+ * jumbo frames can be transmitted then try to receive jumbo. >+ * >+ * BMV: Not quite true when F_MTU is negotiated! >+ */ >+ if (vtnet_modern(sc)) { >+ MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1)); >+ framesz = sizeof(struct virtio_net_hdr_v1); >+ } else >+ framesz = sizeof(struct vtnet_rx_header); >+ framesz += sizeof(struct ether_vlan_header) + mtu; >+ >+ if (framesz <= MCLBYTES) >+ return (MCLBYTES); >+ else if (framesz <= MJUMPAGESIZE) >+ return (MJUMPAGESIZE); >+ else if (framesz <= MJUM9BYTES) >+ return (MJUM9BYTES); >+ >+ /* Sane default; avoid 16KB clusters. */ >+ return (MCLBYTES); >+} >+ >+static int >+vtnet_ioctl_mtu(struct vtnet_softc *sc, int mtu) >+{ > struct ifnet *ifp; >- int frame_size, clsize; >+ int clustersz; > > ifp = sc->vtnet_ifp; >+ VTNET_CORE_LOCK_ASSERT(sc); > >- if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU) >+ if (ifp->if_mtu == mtu) >+ return (0); >+ else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu) > return (EINVAL); > >- frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) + >- new_mtu; >+ ifp->if_mtu = mtu; >+ clustersz = vtnet_rx_cluster_size(sc, mtu); > >- /* >- * Based on the new MTU (and hence frame size) determine which >- * cluster size is most appropriate for the receive queues. >- */ >- if (frame_size <= MCLBYTES) { >- clsize = MCLBYTES; >- } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { >- /* Avoid going past 9K jumbos. */ >- if (frame_size > MJUM9BYTES) >- return (EINVAL); >- clsize = MJUM9BYTES; >- } else >- clsize = MJUMPAGESIZE; >+ if (clustersz != sc->vtnet_rx_clustersz && >+ ifp->if_drv_flags & IFF_DRV_RUNNING) { >+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; >+ vtnet_init_locked(sc); >+ } > >- ifp->if_mtu = new_mtu; >- sc->vtnet_rx_new_clsize = clsize; >+ return (0); >+} > >- if (ifp->if_drv_flags & IFF_DRV_RUNNING) { >- ifp->if_drv_flags &= ~IFF_DRV_RUNNING; >+static int >+vtnet_ioctl_ifflags(struct vtnet_softc *sc) >+{ >+ struct ifnet *ifp; >+ int drv_running; >+ >+ ifp = sc->vtnet_ifp; >+ drv_running = (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0; >+ >+ VTNET_CORE_LOCK_ASSERT(sc); >+ >+ if ((ifp->if_flags & IFF_UP) == 0) { >+ if (drv_running) >+ vtnet_stop(sc); >+ goto out; >+ } >+ >+ if (!drv_running) { > vtnet_init_locked(sc); >+ goto out; > } > >+ if ((ifp->if_flags ^ sc->vtnet_if_flags) & >+ (IFF_PROMISC | IFF_ALLMULTI)) { >+ if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) >+ return (ENOTSUP); >+ vtnet_rx_filter(sc); >+ } >+ >+out: >+ sc->vtnet_if_flags = ifp->if_flags; > return (0); > } > > static int >+vtnet_ioctl_multi(struct vtnet_softc *sc) >+{ >+ struct ifnet *ifp; >+ >+ ifp = sc->vtnet_ifp; >+ >+ VTNET_CORE_LOCK_ASSERT(sc); >+ >+ if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX && >+ ifp->if_drv_flags & IFF_DRV_RUNNING) >+ vtnet_rx_filter_mac(sc); >+ >+ return (0); >+} >+ >+static int >+vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr) >+{ >+ struct ifnet *ifp; >+ int mask, reinit, update; >+ >+ ifp = sc->vtnet_ifp; >+ mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ ifp->if_capenable; >+ reinit = update = 0; >+ >+ VTNET_CORE_LOCK_ASSERT(sc); >+ >+ if (mask & IFCAP_TXCSUM) >+ ifp->if_capenable ^= IFCAP_TXCSUM; >+ if (mask & IFCAP_TXCSUM_IPV6) >+ ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; >+ if (mask & IFCAP_TSO4) >+ ifp->if_capenable ^= IFCAP_TSO4; >+ if (mask & IFCAP_TSO6) >+ ifp->if_capenable ^= IFCAP_TSO6; >+ >+ if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) { >+ /* >+ * These Rx features require the negotiated features to >+ * be updated. Avoid a full reinit if possible. >+ */ >+ if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) >+ update = 1; >+ else >+ reinit = 1; >+ >+ /* BMV: Avoid needless renegotiation for just software LRO. */ >+ if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) == >+ IFCAP_LRO && vtnet_software_lro(sc)) >+ reinit = update = 0; >+ >+ if (mask & IFCAP_RXCSUM) >+ ifp->if_capenable ^= IFCAP_RXCSUM; >+ if (mask & IFCAP_RXCSUM_IPV6) >+ ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; >+ if (mask & IFCAP_LRO) >+ ifp->if_capenable ^= IFCAP_LRO; >+ >+ /* >+ * VirtIO does not distinguish between IPv4 and IPv6 checksums >+ * so treat them as a pair. Guest TSO (LRO) requires receive >+ * checksums. >+ */ >+ if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { >+ ifp->if_capenable |= IFCAP_RXCSUM; >+#ifdef notyet >+ ifp->if_capenable |= IFCAP_RXCSUM_IPV6; >+#endif >+ } else >+ ifp->if_capenable &= >+ ~(IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO); >+ } >+ >+ if (mask & IFCAP_VLAN_HWFILTER) { >+ /* These Rx features require renegotiation. */ >+ reinit = 1; >+ >+ if (mask & IFCAP_VLAN_HWFILTER) >+ ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; >+ } >+ >+ if (mask & IFCAP_VLAN_HWTSO) >+ ifp->if_capenable ^= IFCAP_VLAN_HWTSO; >+ if (mask & IFCAP_VLAN_HWTAGGING) >+ ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; >+ >+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { >+ if (reinit) { >+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; >+ vtnet_init_locked(sc); >+ } else if (update) >+ vtnet_update_rx_offloads(sc); >+ } >+ >+ return (0); >+} >+ >+static int > vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) > { > struct vtnet_softc *sc; > struct ifreq *ifr; >- int reinit, mask, error; >+ int error; > > sc = ifp->if_softc; > ifr = (struct ifreq *) data; >@@ -1084,45 +1403,21 @@ > > switch (cmd) { > case SIOCSIFMTU: >- if (ifp->if_mtu != ifr->ifr_mtu) { >- VTNET_CORE_LOCK(sc); >- error = vtnet_change_mtu(sc, ifr->ifr_mtu); >- VTNET_CORE_UNLOCK(sc); >- } >+ VTNET_CORE_LOCK(sc); >+ error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu); >+ VTNET_CORE_UNLOCK(sc); > break; > > case SIOCSIFFLAGS: > VTNET_CORE_LOCK(sc); >- if ((ifp->if_flags & IFF_UP) == 0) { >- if (ifp->if_drv_flags & IFF_DRV_RUNNING) >- vtnet_stop(sc); >- } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { >- if ((ifp->if_flags ^ sc->vtnet_if_flags) & >- (IFF_PROMISC | IFF_ALLMULTI)) { >- if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) >- vtnet_rx_filter(sc); >- else { >- ifp->if_flags |= IFF_PROMISC; >- if ((ifp->if_flags ^ sc->vtnet_if_flags) >- & IFF_ALLMULTI) >- error = ENOTSUP; >- } >- } >- } else >- vtnet_init_locked(sc); >- >- if (error == 0) >- sc->vtnet_if_flags = ifp->if_flags; >+ error = vtnet_ioctl_ifflags(sc); > VTNET_CORE_UNLOCK(sc); > break; > > case SIOCADDMULTI: > case SIOCDELMULTI: >- if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) >- break; > VTNET_CORE_LOCK(sc); >- if (ifp->if_drv_flags & IFF_DRV_RUNNING) >- vtnet_rx_filter_mac(sc); >+ error = vtnet_ioctl_multi(sc); > VTNET_CORE_UNLOCK(sc); > break; > >@@ -1133,46 +1428,9 @@ > > case SIOCSIFCAP: > VTNET_CORE_LOCK(sc); >- mask = ifr->ifr_reqcap ^ ifp->if_capenable; >- >- if (mask & IFCAP_TXCSUM) >- ifp->if_capenable ^= IFCAP_TXCSUM; >- if (mask & IFCAP_TXCSUM_IPV6) >- ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; >- if (mask & IFCAP_TSO4) >- ifp->if_capenable ^= IFCAP_TSO4; >- if (mask & IFCAP_TSO6) >- ifp->if_capenable ^= IFCAP_TSO6; >- >- if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | >- IFCAP_VLAN_HWFILTER)) { >- /* These Rx features require us to renegotiate. */ >- reinit = 1; >- >- if (mask & IFCAP_RXCSUM) >- ifp->if_capenable ^= IFCAP_RXCSUM; >- if (mask & IFCAP_RXCSUM_IPV6) >- ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; >- if (mask & IFCAP_LRO) >- ifp->if_capenable ^= IFCAP_LRO; >- if (mask & IFCAP_VLAN_HWFILTER) >- ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; >- } else >- reinit = 0; >- >- if (mask & IFCAP_VLAN_HWTSO) >- ifp->if_capenable ^= IFCAP_VLAN_HWTSO; >- if (mask & IFCAP_VLAN_HWTAGGING) >- ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; >- >- if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { >- ifp->if_drv_flags &= ~IFF_DRV_RUNNING; >- vtnet_init_locked(sc); >- } >- >+ error = vtnet_ioctl_ifcap(sc, ifr); > VTNET_CORE_UNLOCK(sc); > VLAN_CAPABILITIES(ifp); >- > break; > > default: >@@ -1191,12 +1449,6 @@ > struct virtqueue *vq; > int nbufs, error; > >-#ifdef DEV_NETMAP >- error = vtnet_netmap_rxq_populate(rxq); >- if (error >= 0) >- return (error); >-#endif /* DEV_NETMAP */ >- > vq = rxq->vtnrx_vq; > error = ENOSPC; > >@@ -1226,20 +1478,12 @@ > struct virtqueue *vq; > struct mbuf *m; > int last; >-#ifdef DEV_NETMAP >- int netmap_bufs = vtnet_netmap_queue_on(rxq->vtnrx_sc, NR_RX, >- rxq->vtnrx_id); >-#else /* !DEV_NETMAP */ >- int netmap_bufs = 0; >-#endif /* !DEV_NETMAP */ > > vq = rxq->vtnrx_vq; > last = 0; > >- while ((m = virtqueue_drain(vq, &last)) != NULL) { >- if (!netmap_bufs) >- m_freem(m); >- } >+ while ((m = virtqueue_drain(vq, &last)) != NULL) >+ m_freem(m); > > KASSERT(virtqueue_empty(vq), > ("%s: mbufs remaining in rx queue %p", __func__, rxq)); >@@ -1249,57 +1493,49 @@ > vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) > { > struct mbuf *m_head, *m_tail, *m; >- int i, clsize; >+ int i, size; > >- clsize = sc->vtnet_rx_clsize; >+ m_head = NULL; >+ size = sc->vtnet_rx_clustersz; > > KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, >- ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs)); >+ ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs)); > >- m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize); >- if (m_head == NULL) >- goto fail; >+ for (i = 0; i < nbufs; i++) { >+ m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size); >+ if (m == NULL) { >+ sc->vtnet_stats.mbuf_alloc_failed++; >+ m_freem(m_head); >+ return (NULL); >+ } > >- m_head->m_len = clsize; >- m_tail = m_head; >- >- /* Allocate the rest of the chain. */ >- for (i = 1; i < nbufs; i++) { >- m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize); >- if (m == NULL) >- goto fail; >- >- m->m_len = clsize; >- m_tail->m_next = m; >- m_tail = m; >+ m->m_len = size; >+ if (m_head != NULL) { >+ m_tail->m_next = m; >+ m_tail = m; >+ } else >+ m_head = m_tail = m; > } > > if (m_tailp != NULL) > *m_tailp = m_tail; > > return (m_head); >- >-fail: >- sc->vtnet_stats.mbuf_alloc_failed++; >- m_freem(m_head); >- >- return (NULL); > } > > /* > * Slow path for when LRO without mergeable buffers is negotiated. > */ > static int >-vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0, >+vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0, > int len0) > { > struct vtnet_softc *sc; >- struct mbuf *m, *m_prev; >- struct mbuf *m_new, *m_tail; >- int len, clsize, nreplace, error; >+ struct mbuf *m, *m_prev, *m_new, *m_tail; >+ int len, clustersz, nreplace, error; > > sc = rxq->vtnrx_sc; >- clsize = sc->vtnet_rx_clsize; >+ clustersz = sc->vtnet_rx_clustersz; > > m_prev = NULL; > m_tail = NULL; >@@ -1309,25 +1545,23 @@ > len = len0; > > /* >- * Since these mbuf chains are so large, we avoid allocating an >- * entire replacement chain if possible. When the received frame >- * did not consume the entire chain, the unused mbufs are moved >- * to the replacement chain. >+ * Since these mbuf chains are so large, avoid allocating a complete >+ * replacement when the received frame did not consume the entire >+ * chain. Unused mbufs are moved to the tail of the replacement mbuf. > */ > while (len > 0) { >- /* >- * Something is seriously wrong if we received a frame >- * larger than the chain. Drop it. >- */ > if (m == NULL) { > sc->vtnet_stats.rx_frame_too_large++; > return (EMSGSIZE); > } > >- /* We always allocate the same cluster size. */ >- KASSERT(m->m_len == clsize, >- ("%s: mbuf size %d is not the cluster size %d", >- __func__, m->m_len, clsize)); >+ /* >+ * Every mbuf should have the expected cluster size sincethat >+ * is also used to allocate the replacements. >+ */ >+ KASSERT(m->m_len == clustersz, >+ ("%s: mbuf size %d not expected cluster size %d", __func__, >+ m->m_len, clustersz)); > > m->m_len = MIN(m->m_len, len); > len -= m->m_len; >@@ -1337,19 +1571,19 @@ > nreplace++; > } > >- KASSERT(nreplace <= sc->vtnet_rx_nmbufs, >- ("%s: too many replacement mbufs %d max %d", __func__, nreplace, >- sc->vtnet_rx_nmbufs)); >+ KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs, >+ ("%s: invalid replacement mbuf count %d max %d", __func__, >+ nreplace, sc->vtnet_rx_nmbufs)); > > m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); > if (m_new == NULL) { >- m_prev->m_len = clsize; >+ m_prev->m_len = clustersz; > return (ENOBUFS); > } > > /* >- * Move any unused mbufs from the received chain onto the end >- * of the new chain. >+ * Move any unused mbufs from the received mbuf chain onto the >+ * end of the replacement chain. > */ > if (m_prev->m_next != NULL) { > m_tail->m_next = m_prev->m_next; >@@ -1359,21 +1593,18 @@ > error = vtnet_rxq_enqueue_buf(rxq, m_new); > if (error) { > /* >- * BAD! We could not enqueue the replacement mbuf chain. We >- * must restore the m0 chain to the original state if it was >- * modified so we can subsequently discard it. >+ * The replacement is suppose to be an copy of the one >+ * dequeued so this is a very unexpected error. > * >- * NOTE: The replacement is suppose to be an identical copy >- * to the one just dequeued so this is an unexpected error. >+ * Restore the m0 chain to the original state if it was >+ * modified so we can then discard it. > */ >- sc->vtnet_stats.rx_enq_replacement_failed++; >- > if (m_tail->m_next != NULL) { > m_prev->m_next = m_tail->m_next; > m_tail->m_next = NULL; > } >- >- m_prev->m_len = clsize; >+ m_prev->m_len = clustersz; >+ sc->vtnet_stats.rx_enq_replacement_failed++; > m_freem(m_new); > } > >@@ -1389,31 +1620,23 @@ > > sc = rxq->vtnrx_sc; > >- KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, >- ("%s: chained mbuf without LRO_NOMRG", __func__)); >+ if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) >+ return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len)); > >- if (m->m_next == NULL) { >- /* Fast-path for the common case of just one mbuf. */ >- if (m->m_len < len) >- return (EINVAL); >+ MPASS(m->m_next == NULL); >+ if (m->m_len < len) >+ return (EMSGSIZE); > >- m_new = vtnet_rx_alloc_buf(sc, 1, NULL); >- if (m_new == NULL) >- return (ENOBUFS); >+ m_new = vtnet_rx_alloc_buf(sc, 1, NULL); >+ if (m_new == NULL) >+ return (ENOBUFS); > >- error = vtnet_rxq_enqueue_buf(rxq, m_new); >- if (error) { >- /* >- * The new mbuf is suppose to be an identical >- * copy of the one just dequeued so this is an >- * unexpected error. >- */ >- m_freem(m_new); >- sc->vtnet_stats.rx_enq_replacement_failed++; >- } else >- m->m_len = len; >+ error = vtnet_rxq_enqueue_buf(rxq, m_new); >+ if (error) { >+ sc->vtnet_stats.rx_enq_replacement_failed++; >+ m_freem(m_new); > } else >- error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len); >+ m->m_len = len; > > return (error); > } >@@ -1423,39 +1646,43 @@ > { > struct vtnet_softc *sc; > struct sglist *sg; >- struct vtnet_rx_header *rxhdr; >- uint8_t *mdata; >- int offset, error; >+ int header_inlined, error; > > sc = rxq->vtnrx_sc; > sg = rxq->vtnrx_sg; >- mdata = mtod(m, uint8_t *); > >+ KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, >+ ("%s: mbuf chain without LRO_NOMRG", __func__)); > VTNET_RXQ_LOCK_ASSERT(rxq); >- KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, >- ("%s: chained mbuf without LRO_NOMRG", __func__)); >- KASSERT(m->m_len == sc->vtnet_rx_clsize, >- ("%s: unexpected cluster size %d/%d", __func__, m->m_len, >- sc->vtnet_rx_clsize)); > > sglist_reset(sg); >- if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { >+ header_inlined = vtnet_modern(sc) || >+ (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */ >+ >+ if (header_inlined) >+ error = sglist_append_mbuf(sg, m); >+ else { >+ struct vtnet_rx_header *rxhdr = >+ mtod(m, struct vtnet_rx_header *); > MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); >- rxhdr = (struct vtnet_rx_header *) mdata; >- sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); >- offset = sizeof(struct vtnet_rx_header); >- } else >- offset = 0; > >- sglist_append(sg, mdata + offset, m->m_len - offset); >- if (m->m_next != NULL) { >- error = sglist_append_mbuf(sg, m->m_next); >- MPASS(error == 0); >+ /* Append the header and remaining mbuf data. */ >+ error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); >+ if (error) >+ return (error); >+ error = sglist_append(sg, &rxhdr[1], >+ m->m_len - sizeof(struct vtnet_rx_header)); >+ if (error) >+ return (error); >+ >+ if (m->m_next != NULL) >+ error = sglist_append_mbuf(sg, m->m_next); > } > >- error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg); >+ if (error) >+ return (error); > >- return (error); >+ return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg)); > } > > static int >@@ -1478,54 +1705,73 @@ > return (error); > } > >-/* >- * Use the checksum offset in the VirtIO header to set the >- * correct CSUM_* flags. >- */ > static int >-vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m, >- uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) >+vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype, >+ int hoff, struct virtio_net_hdr *hdr) > { > struct vtnet_softc *sc; >-#if defined(INET) || defined(INET6) >- int offset = hdr->csum_start + hdr->csum_offset; >-#endif >+ int error; > > sc = rxq->vtnrx_sc; > >- /* Only do a basic sanity check on the offset. */ >- switch (eth_type) { >-#if defined(INET) >- case ETHERTYPE_IP: >- if (__predict_false(offset < ip_start + sizeof(struct ip))) >- return (1); >- break; >-#endif >-#if defined(INET6) >- case ETHERTYPE_IPV6: >- if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) >- return (1); >- break; >-#endif >- default: >- sc->vtnet_stats.rx_csum_bad_ethtype++; >- return (1); >+ /* >+ * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does >+ * not have an analogous CSUM flag. The checksum has been validated, >+ * but is incomplete (TCP/UDP pseudo header). >+ * >+ * The packet is likely from another VM on the same host that itself >+ * performed checksum offloading so Tx/Rx is basically a memcpy and >+ * the checksum has little value. >+ * >+ * Default to receiving the packet as-is for performance reasons, but >+ * this can cause issues if the packet is to be forwarded because it >+ * does not contain a valid checksum. This patch may be helpful: >+ * https://reviews.freebsd.org/D6611. In the meantime, have the driver >+ * compute the checksum if requested. >+ * >+ * BMV: Need to add an CSUM_PARTIAL flag? >+ */ >+ if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) { >+ error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr); >+ return (error); > } > > /* >- * Use the offset to determine the appropriate CSUM_* flags. This is >- * a bit dirty, but we can get by with it since the checksum offsets >- * happen to be different. We assume the host host does not do IPv4 >- * header checksum offloading. >+ * Compute the checksum in the driver so the packet will contain a >+ * valid checksum. The checksum is at csum_offset from csum_start. > */ >- switch (hdr->csum_offset) { >- case offsetof(struct udphdr, uh_sum): >- case offsetof(struct tcphdr, th_sum): >+ switch (etype) { >+#if defined(INET) || defined(INET6) >+ case ETHERTYPE_IP: >+ case ETHERTYPE_IPV6: { >+ int csum_off, csum_end; >+ uint16_t csum; >+ >+ csum_off = hdr->csum_start + hdr->csum_offset; >+ csum_end = csum_off + sizeof(uint16_t); >+ >+ /* Assume checksum will be in the first mbuf. */ >+ if (m->m_len < csum_end || m->m_pkthdr.len < csum_end) >+ return (1); >+ >+ /* >+ * Like in_delayed_cksum()/in6_delayed_cksum(), compute the >+ * checksum and write it at the specified offset. We could >+ * try to verify the packet: csum_start should probably >+ * correspond to the start of the TCP/UDP header. >+ * >+ * BMV: Need to properly handle UDP with zero checksum. Is >+ * the IPv4 header checksum implicitly validated? >+ */ >+ csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start); >+ *(uint16_t *)(mtodo(m, csum_off)) = csum; > m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; > m->m_pkthdr.csum_data = 0xFFFF; > break; >+ } >+#endif > default: >- sc->vtnet_stats.rx_csum_bad_offset++; >+ sc->vtnet_stats.rx_csum_bad_ethtype++; > return (1); > } > >@@ -1533,64 +1779,55 @@ > } > > static int >-vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m, >- uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) >+vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, >+ uint16_t etype, int hoff, struct virtio_net_hdr *hdr) > { > struct vtnet_softc *sc; >- int offset, proto; >+ int protocol; > > sc = rxq->vtnrx_sc; > >- switch (eth_type) { >+ switch (etype) { > #if defined(INET) >- case ETHERTYPE_IP: { >- struct ip *ip; >- if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) >- return (1); >- ip = (struct ip *)(m->m_data + ip_start); >- proto = ip->ip_p; >- offset = ip_start + (ip->ip_hl << 2); >+ case ETHERTYPE_IP: >+ if (__predict_false(m->m_len < hoff + sizeof(struct ip))) >+ protocol = IPPROTO_DONE; >+ else { >+ struct ip *ip = (struct ip *)(m->m_data + hoff); >+ protocol = ip->ip_p; >+ } > break; >- } > #endif > #if defined(INET6) > case ETHERTYPE_IPV6: >- if (__predict_false(m->m_len < ip_start + >- sizeof(struct ip6_hdr))) >- return (1); >- offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); >- if (__predict_false(offset < 0)) >- return (1); >+ if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr)) >+ || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) >+ protocol = IPPROTO_DONE; > break; > #endif > default: >- sc->vtnet_stats.rx_csum_bad_ethtype++; >- return (1); >+ protocol = IPPROTO_DONE; >+ break; > } > >- switch (proto) { >+ switch (protocol) { > case IPPROTO_TCP: >- if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) >- return (1); >- m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; >- m->m_pkthdr.csum_data = 0xFFFF; >- break; > case IPPROTO_UDP: >- if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) >- return (1); > m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; > m->m_pkthdr.csum_data = 0xFFFF; > break; > default: > /* >- * For the remaining protocols, FreeBSD does not support >- * checksum offloading, so the checksum will be recomputed. >+ * FreeBSD does not support checksum offloading of this >+ * protocol. Let the stack re-verify the checksum later >+ * if the protocol is supported. > */ > #if 0 >- if_printf(sc->vtnet_ifp, "cksum offload of unsupported " >- "protocol eth_type=%#x proto=%d csum_start=%d " >- "csum_offset=%d\n", __func__, eth_type, proto, >- hdr->csum_start, hdr->csum_offset); >+ if_printf(sc->vtnet_ifp, >+ "%s: checksum offload of unsupported protocol " >+ "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n", >+ __func__, etype, protocol, hdr->csum_start, >+ hdr->csum_offset); > #endif > break; > } >@@ -1598,41 +1835,29 @@ > return (0); > } > >-/* >- * Set the appropriate CSUM_* flags. Unfortunately, the information >- * provided is not directly useful to us. The VirtIO header gives the >- * offset of the checksum, which is all Linux needs, but this is not >- * how FreeBSD does things. We are forced to peek inside the packet >- * a bit. >- * >- * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD >- * could accept the offsets and let the stack figure it out. >- */ > static int > vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, > struct virtio_net_hdr *hdr) > { >- struct ether_header *eh; >- struct ether_vlan_header *evh; >- uint16_t eth_type; >- int offset, error; >+ const struct ether_header *eh; >+ int hoff; >+ uint16_t etype; > >- eh = mtod(m, struct ether_header *); >- eth_type = ntohs(eh->ether_type); >- if (eth_type == ETHERTYPE_VLAN) { >- /* BMV: We should handle nested VLAN tags too. */ >- evh = mtod(m, struct ether_vlan_header *); >- eth_type = ntohs(evh->evl_proto); >- offset = sizeof(struct ether_vlan_header); >+ eh = mtod(m, const struct ether_header *); >+ etype = ntohs(eh->ether_type); >+ if (etype == ETHERTYPE_VLAN) { >+ /* TODO BMV: Handle QinQ. */ >+ const struct ether_vlan_header *evh = >+ mtod(m, const struct ether_vlan_header *); >+ etype = ntohs(evh->evl_proto); >+ hoff = sizeof(struct ether_vlan_header); > } else >- offset = sizeof(struct ether_header); >+ hoff = sizeof(struct ether_header); > > if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) >- error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr); >- else >- error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr); >- >- return (error); >+ return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr)); >+ else /* VIRTIO_NET_HDR_F_DATA_VALID */ >+ return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr)); > } > > static void >@@ -1667,14 +1892,16 @@ > { > struct vtnet_softc *sc; > struct virtqueue *vq; >- struct mbuf *m, *m_tail; >- int len; >+ struct mbuf *m_tail; > > sc = rxq->vtnrx_sc; > vq = rxq->vtnrx_vq; > m_tail = m_head; > > while (--nbufs > 0) { >+ struct mbuf *m; >+ int len; >+ > m = virtqueue_dequeue(vq, &len); > if (m == NULL) { > rxq->vtnrx_stats.vrxs_ierrors++; >@@ -1709,19 +1936,35 @@ > return (1); > } > >+#if defined(INET) || defined(INET6) >+static int >+vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m) >+{ >+ struct lro_ctrl *lro; >+ >+ lro = &rxq->vtnrx_lro; >+ >+ if (lro->lro_mbuf_max != 0) { >+ tcp_lro_queue_mbuf(lro, m); >+ return (0); >+ } >+ >+ return (tcp_lro_rx(lro, m, 0)); >+} >+#endif >+ > static void > vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, > struct virtio_net_hdr *hdr) > { > struct vtnet_softc *sc; > struct ifnet *ifp; >- struct ether_header *eh; > > sc = rxq->vtnrx_sc; > ifp = sc->vtnet_ifp; > > if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { >- eh = mtod(m, struct ether_header *); >+ struct ether_header *eh = mtod(m, struct ether_header *); > if (eh->ether_type == htons(ETHERTYPE_VLAN)) { > vtnet_vlan_tag_remove(m); > /* >@@ -1736,25 +1979,36 @@ > m->m_pkthdr.flowid = rxq->vtnrx_id; > M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); > >- /* >- * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum >- * distinction that Linux does. Need to reevaluate if performing >- * offloading for the NEEDS_CSUM case is really appropriate. >- */ >- if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | >- VIRTIO_NET_HDR_F_DATA_VALID)) { >+ if (hdr->flags & >+ (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) { > if (vtnet_rxq_csum(rxq, m, hdr) == 0) > rxq->vtnrx_stats.vrxs_csum++; > else > rxq->vtnrx_stats.vrxs_csum_failed++; > } > >+ if (hdr->gso_size != 0) { >+ switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { >+ case VIRTIO_NET_HDR_GSO_TCPV4: >+ case VIRTIO_NET_HDR_GSO_TCPV6: >+ m->m_pkthdr.lro_nsegs = >+ howmany(m->m_pkthdr.len, hdr->gso_size); >+ rxq->vtnrx_stats.vrxs_host_lro++; >+ break; >+ } >+ } >+ > rxq->vtnrx_stats.vrxs_ipackets++; > rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; > >- VTNET_RXQ_UNLOCK(rxq); >+#if defined(INET) || defined(INET6) >+ if (vtnet_software_lro(sc) && ifp->if_capenable & IFCAP_LRO) { >+ if (vtnet_lro_rx(rxq, m) == 0) >+ return; >+ } >+#endif >+ > (*ifp->if_input)(ifp, m); >- VTNET_RXQ_LOCK(rxq); > } > > static int >@@ -1764,20 +2018,25 @@ > struct vtnet_softc *sc; > struct ifnet *ifp; > struct virtqueue *vq; >- struct mbuf *m; >- struct virtio_net_hdr_mrg_rxbuf *mhdr; >- int len, deq, nbufs, adjsz, count; >+ int deq, count; > > sc = rxq->vtnrx_sc; > vq = rxq->vtnrx_vq; > ifp = sc->vtnet_ifp; >- hdr = &lhdr; > deq = 0; > count = sc->vtnet_rx_process_limit; > > VTNET_RXQ_LOCK_ASSERT(rxq); > >+#ifdef DEV_NETMAP >+ if (netmap_rx_irq(ifp, 0, &deq)) >+ return (0); >+#endif >+ > while (count-- > 0) { >+ struct mbuf *m; >+ int len, nbufs, adjsz; >+ > m = virtqueue_dequeue(vq, &len); > if (m == NULL) > break; >@@ -1789,18 +2048,22 @@ > continue; > } > >- if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { >+ if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) { >+ struct virtio_net_hdr_mrg_rxbuf *mhdr = >+ mtod(m, struct virtio_net_hdr_mrg_rxbuf *); >+ nbufs = vtnet_htog16(sc, mhdr->num_buffers); >+ adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); >+ } else if (vtnet_modern(sc)) { >+ nbufs = 1; /* num_buffers is always 1 */ >+ adjsz = sizeof(struct virtio_net_hdr_v1); >+ } else { > nbufs = 1; > adjsz = sizeof(struct vtnet_rx_header); > /* >- * Account for our pad inserted between the header >- * and the actual start of the frame. >+ * Account for our gap between the header and start of >+ * data to keep the segments separated. > */ > len += VTNET_RX_HEADER_PAD; >- } else { >- mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *); >- nbufs = mhdr->num_buffers; >- adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); > } > > if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { >@@ -1822,26 +2085,29 @@ > } > > /* >- * Save copy of header before we strip it. For both mergeable >- * and non-mergeable, the header is at the beginning of the >- * mbuf data. We no longer need num_buffers, so always use a >- * regular header. >- * >- * BMV: Is this memcpy() expensive? We know the mbuf data is >- * still valid even after the m_adj(). >+ * Save an endian swapped version of the header prior to it >+ * being stripped. The header is always at the start of the >+ * mbuf data. num_buffers was already saved (and not needed) >+ * so use the standard header. > */ >- memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr)); >+ hdr = mtod(m, struct virtio_net_hdr *); >+ lhdr.flags = hdr->flags; >+ lhdr.gso_type = hdr->gso_type; >+ lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len); >+ lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size); >+ lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start); >+ lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset); > m_adj(m, adjsz); > >- vtnet_rxq_input(rxq, m, hdr); >- >- /* Must recheck after dropping the Rx lock. */ >- if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) >- break; >+ vtnet_rxq_input(rxq, m, &lhdr); > } > >- if (deq > 0) >+ if (deq > 0) { >+#if defined(INET) || defined(INET6) >+ tcp_lro_flush_all(&rxq->vtnrx_lro); >+#endif > virtqueue_notify(vq); >+ } > > return (count > 0 ? 0 : EAGAIN); > } >@@ -1870,11 +2136,6 @@ > return; > } > >-#ifdef DEV_NETMAP >- if (netmap_rx_irq(ifp, rxq->vtnrx_id, &more) != NM_IRQ_PASS) >- return; >-#endif /* DEV_NETMAP */ >- > VTNET_RXQ_LOCK(rxq); > > again: >@@ -1894,8 +2155,8 @@ > if (tries++ < VTNET_INTR_DISABLE_RETRIES) > goto again; > >- VTNET_RXQ_UNLOCK(rxq); > rxq->vtnrx_stats.vrxs_rescheduled++; >+ VTNET_RXQ_UNLOCK(rxq); > taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); > } else > VTNET_RXQ_UNLOCK(rxq); >@@ -1925,22 +2186,49 @@ > if (!more) > vtnet_rxq_disable_intr(rxq); > rxq->vtnrx_stats.vrxs_rescheduled++; >+ VTNET_RXQ_UNLOCK(rxq); > taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); >- } >+ } else >+ VTNET_RXQ_UNLOCK(rxq); >+} > >- VTNET_RXQ_UNLOCK(rxq); >+static int >+vtnet_txq_intr_threshold(struct vtnet_txq *txq) >+{ >+ struct vtnet_softc *sc; >+ int threshold; >+ >+ sc = txq->vtntx_sc; >+ >+ /* >+ * The Tx interrupt is disabled until the queue free count falls >+ * below our threshold. Completed frames are drained from the Tx >+ * virtqueue before transmitting new frames and in the watchdog >+ * callout, so the frequency of Tx interrupts is greatly reduced, >+ * at the cost of not freeing mbufs as quickly as they otherwise >+ * would be. >+ */ >+ threshold = virtqueue_size(txq->vtntx_vq) / 4; >+ >+ /* >+ * Without indirect descriptors, leave enough room for the most >+ * segments we handle. >+ */ >+ if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && >+ threshold < sc->vtnet_tx_nsegs) >+ threshold = sc->vtnet_tx_nsegs; >+ >+ return (threshold); > } > > static int > vtnet_txq_below_threshold(struct vtnet_txq *txq) > { >- struct vtnet_softc *sc; > struct virtqueue *vq; > >- sc = txq->vtntx_sc; > vq = txq->vtntx_vq; > >- return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh); >+ return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold); > } > > static int >@@ -1975,21 +2263,13 @@ > struct virtqueue *vq; > struct vtnet_tx_header *txhdr; > int last; >-#ifdef DEV_NETMAP >- int netmap_bufs = vtnet_netmap_queue_on(txq->vtntx_sc, NR_TX, >- txq->vtntx_id); >-#else /* !DEV_NETMAP */ >- int netmap_bufs = 0; >-#endif /* !DEV_NETMAP */ > > vq = txq->vtntx_vq; > last = 0; > > while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { >- if (!netmap_bufs) { >- m_freem(txhdr->vth_mbuf); >- uma_zfree(vtnet_tx_header_zone, txhdr); >- } >+ m_freem(txhdr->vth_mbuf); >+ uma_zfree(vtnet_tx_header_zone, txhdr); > } > > KASSERT(virtqueue_empty(vq), >@@ -1997,12 +2277,11 @@ > } > > /* >- * BMV: Much of this can go away once we finally have offsets in >- * the mbuf packet header. Bug andre@. >+ * BMV: This can go away once we finally have offsets in the mbuf header. > */ > static int >-vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, >- int *etype, int *proto, int *start) >+vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype, >+ int *proto, int *start) > { > struct vtnet_softc *sc; > struct ether_vlan_header *evh; >@@ -2046,7 +2325,7 @@ > break; > #endif > default: >- sc->vtnet_stats.tx_csum_bad_ethtype++; >+ sc->vtnet_stats.tx_csum_unknown_ethtype++; > return (EINVAL); > } > >@@ -2054,7 +2333,7 @@ > } > > static int >-vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, >+vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int flags, > int offset, struct virtio_net_hdr *hdr) > { > static struct timeval lastecn; >@@ -2070,16 +2349,17 @@ > } else > tcp = (struct tcphdr *)(m->m_data + offset); > >- hdr->hdr_len = offset + (tcp->th_off << 2); >- hdr->gso_size = m->m_pkthdr.tso_segsz; >- hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : >- VIRTIO_NET_HDR_GSO_TCPV6; >+ hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2)); >+ hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz); >+ hdr->gso_type = (flags & CSUM_IP_TSO) ? >+ VIRTIO_NET_HDR_GSO_TCPV4 : VIRTIO_NET_HDR_GSO_TCPV6; > >- if (tcp->th_flags & TH_CWR) { >+ if (__predict_false(tcp->th_flags & TH_CWR)) { > /* >- * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, >- * ECN support is not on a per-interface basis, but globally via >- * the net.inet.tcp.ecn.enable sysctl knob. The default is off. >+ * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In >+ * FreeBSD, ECN support is not on a per-interface basis, >+ * but globally via the net.inet.tcp.ecn.enable sysctl >+ * knob. The default is off. > */ > if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { > if (ppsratecheck(&lastecn, &curecn, 1)) >@@ -2109,30 +2389,36 @@ > if (error) > goto drop; > >- if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) || >- (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) { >- /* >- * We could compare the IP protocol vs the CSUM_ flag too, >- * but that really should not be necessary. >- */ >+ if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) { >+ /* Sanity check the parsed mbuf matches the offload flags. */ >+ if (__predict_false((flags & VTNET_CSUM_OFFLOAD && >+ etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6 >+ && etype != ETHERTYPE_IPV6))) { >+ sc->vtnet_stats.tx_csum_proto_mismatch++; >+ goto drop; >+ } >+ > hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; >- hdr->csum_start = csum_start; >- hdr->csum_offset = m->m_pkthdr.csum_data; >+ hdr->csum_start = vtnet_gtoh16(sc, csum_start); >+ hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data); > txq->vtntx_stats.vtxs_csum++; > } > >- if (flags & CSUM_TSO) { >+ if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) { >+ /* >+ * Sanity check the parsed mbuf IP protocol is TCP, and >+ * VirtIO TSO reqires the checksum offloading above. >+ */ > if (__predict_false(proto != IPPROTO_TCP)) { >- /* Likely failed to correctly parse the mbuf. */ > sc->vtnet_stats.tx_tso_not_tcp++; > goto drop; >+ } else if (__predict_false((hdr->flags & >+ VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) { >+ sc->vtnet_stats.tx_tso_without_csum++; >+ goto drop; > } > >- KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, >- ("%s: mbuf %p TSO without checksum offload %#x", >- __func__, m, flags)); >- >- error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); >+ error = vtnet_txq_offload_tso(txq, m, flags, csum_start, hdr); > if (error) > goto drop; > } >@@ -2161,8 +2447,11 @@ > > sglist_reset(sg); > error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); >- KASSERT(error == 0 && sg->sg_nseg == 1, >- ("%s: error %d adding header to sglist", __func__, error)); >+ if (error != 0 || sg->sg_nseg != 1) { >+ KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d", >+ __func__, error, sg->sg_nseg)); >+ goto fail; >+ } > > error = sglist_append_mbuf(sg, m); > if (error) { >@@ -2210,9 +2499,9 @@ > } > > /* >- * Always use the non-mergeable header, regardless if the feature >- * was negotiated. For transmit, num_buffers is always zero. The >- * vtnet_hdr_size is used to enqueue the correct header size. >+ * Always use the non-mergeable header, regardless if mergable headers >+ * were negotiated, because for transmit num_buffers is always zero. >+ * The vtnet_hdr_size is used to enqueue the right header size segment. > */ > hdr = &txhdr->vth_uhdr.hdr; > >@@ -2234,11 +2523,9 @@ > } > > error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); >- if (error == 0) >- return (0); >- > fail: >- uma_zfree(vtnet_tx_header_zone, txhdr); >+ if (error) >+ uma_zfree(vtnet_tx_header_zone, txhdr); > > return (error); > } >@@ -2387,7 +2674,6 @@ > sc = ifp->if_softc; > npairs = sc->vtnet_act_vq_pairs; > >- /* check if flowid is set */ > if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) > i = m->m_pkthdr.flowid % npairs; > else >@@ -2477,6 +2763,13 @@ > deq = 0; > VTNET_TXQ_LOCK_ASSERT(txq); > >+#ifdef DEV_NETMAP >+ if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) { >+ virtqueue_disable_intr(vq); // XXX luigi >+ return (0); // XXX or 1 ? >+ } >+#endif >+ > while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { > m = txhdr->vth_mbuf; > deq++; >@@ -2518,11 +2811,6 @@ > return; > } > >-#ifdef DEV_NETMAP >- if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS) >- return; >-#endif /* DEV_NETMAP */ >- > VTNET_TXQ_LOCK(txq); > > if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { >@@ -2709,7 +2997,7 @@ > * Most drivers just ignore the return value - it only fails > * with ENOMEM so an error is not likely. > */ >- for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { >+ for (i = 0; i < sc->vtnet_req_vq_pairs; i++) { > rxq = &sc->vtnet_rxqs[i]; > error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, > "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); >@@ -2739,7 +3027,7 @@ > rxq = &sc->vtnet_rxqs[i]; > if (rxq->vtnrx_tq != NULL) { > taskqueue_free(rxq->vtnrx_tq); >- rxq->vtnrx_tq = NULL; >+ rxq->vtnrx_vq = NULL; > } > > txq = &sc->vtnet_txqs[i]; >@@ -2779,7 +3067,12 @@ > struct vtnet_txq *txq; > int i; > >- for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { >+#ifdef DEV_NETMAP >+ if (nm_native_on(NA(sc->vtnet_ifp))) >+ return; >+#endif >+ >+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { > rxq = &sc->vtnet_rxqs[i]; > vtnet_rxq_free_mbufs(rxq); > >@@ -2795,11 +3088,13 @@ > struct vtnet_txq *txq; > int i; > >+ VTNET_CORE_LOCK_ASSERT(sc); >+ > /* > * Lock and unlock the per-queue mutex so we known the stop > * state is visible. Doing only the active queues should be > * sufficient, but it does not cost much extra to do all the >- * queues. Note we hold the core mutex here too. >+ * queues. > */ > for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { > rxq = &sc->vtnet_rxqs[i]; >@@ -2838,8 +3133,8 @@ > virtio_stop(dev); > vtnet_stop_rendezvous(sc); > >- /* Free any mbufs left in the virtqueues. */ > vtnet_drain_rxtx_queues(sc); >+ sc->vtnet_act_vq_pairs = 1; > } > > static int >@@ -2848,51 +3143,37 @@ > device_t dev; > struct ifnet *ifp; > uint64_t features; >- int mask, error; >+ int error; > > dev = sc->vtnet_dev; > ifp = sc->vtnet_ifp; >- features = sc->vtnet_features; >+ features = sc->vtnet_negotiated_features; > >- mask = 0; >-#if defined(INET) >- mask |= IFCAP_RXCSUM; >-#endif >-#if defined (INET6) >- mask |= IFCAP_RXCSUM_IPV6; >-#endif >- > /* > * Re-negotiate with the host, removing any disabled receive > * features. Transmit features are disabled only on our side > * via if_capenable and if_hwassist. > */ > >- if (ifp->if_capabilities & mask) { >- /* >- * We require both IPv4 and IPv6 offloading to be enabled >- * in order to negotiated it: VirtIO does not distinguish >- * between the two. >- */ >- if ((ifp->if_capenable & mask) != mask) >- features &= ~VIRTIO_NET_F_GUEST_CSUM; >- } >+ if ((ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0) >+ features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES); > >- if (ifp->if_capabilities & IFCAP_LRO) { >- if ((ifp->if_capenable & IFCAP_LRO) == 0) >- features &= ~VTNET_LRO_FEATURES; >- } >+ if ((ifp->if_capenable & IFCAP_LRO) == 0) >+ features &= ~VTNET_LRO_FEATURES; > >- if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) { >- if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) >- features &= ~VIRTIO_NET_F_CTRL_VLAN; >- } >+ if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) >+ features &= ~VIRTIO_NET_F_CTRL_VLAN; > > error = virtio_reinit(dev, features); >- if (error) >+ if (error) { > device_printf(dev, "virtio reinit error %d\n", error); >+ return (error); >+ } > >- return (error); >+ sc->vtnet_features = features; >+ virtio_reinit_complete(dev); >+ >+ return (0); > } > > static void >@@ -2903,9 +3184,7 @@ > ifp = sc->vtnet_ifp; > > if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { >- /* Restore promiscuous and all-multicast modes. */ > vtnet_rx_filter(sc); >- /* Restore filtered MAC addresses. */ > vtnet_rx_filter_mac(sc); > } > >@@ -2917,32 +3196,30 @@ > vtnet_init_rx_queues(struct vtnet_softc *sc) > { > device_t dev; >+ struct ifnet *ifp; > struct vtnet_rxq *rxq; >- int i, clsize, error; >+ int i, clustersz, error; > > dev = sc->vtnet_dev; >+ ifp = sc->vtnet_ifp; > >- /* >- * Use the new cluster size if one has been set (via a MTU >- * change). Otherwise, use the standard 2K clusters. >- * >- * BMV: It might make sense to use page sized clusters as >- * the default (depending on the features negotiated). >- */ >- if (sc->vtnet_rx_new_clsize != 0) { >- clsize = sc->vtnet_rx_new_clsize; >- sc->vtnet_rx_new_clsize = 0; >+ clustersz = vtnet_rx_cluster_size(sc, ifp->if_mtu); >+ sc->vtnet_rx_clustersz = clustersz; >+ >+ if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) { >+ sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) + >+ VTNET_MAX_RX_SIZE, clustersz); >+ KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, >+ ("%s: too many rx mbufs %d for %d segments", __func__, >+ sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); > } else >- clsize = MCLBYTES; >+ sc->vtnet_rx_nmbufs = 1; > >- sc->vtnet_rx_clsize = clsize; >- sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize); >+#ifdef DEV_NETMAP >+ if (vtnet_netmap_init_rx_buffers(sc)) >+ return (0); >+#endif > >- KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS || >- sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, >- ("%s: too many rx mbufs %d for %d segments", __func__, >- sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); >- > for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { > rxq = &sc->vtnet_rxqs[i]; > >@@ -2952,8 +3229,7 @@ > VTNET_RXQ_UNLOCK(rxq); > > if (error) { >- device_printf(dev, >- "cannot allocate mbufs for Rx queue %d\n", i); >+ device_printf(dev, "cannot populate Rx queue %d\n", i); > return (error); > } > } >@@ -2970,6 +3246,7 @@ > for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { > txq = &sc->vtnet_txqs[i]; > txq->vtntx_watchdog = 0; >+ txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq); > } > > return (0); >@@ -2999,36 +3276,85 @@ > > dev = sc->vtnet_dev; > >- if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { >+ if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) { > sc->vtnet_act_vq_pairs = 1; > return; > } > >- npairs = sc->vtnet_requested_vq_pairs; >+ npairs = sc->vtnet_req_vq_pairs; > > if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { >- device_printf(dev, >- "cannot set active queue pairs to %d\n", npairs); >+ device_printf(dev, "cannot set active queue pairs to %d, " >+ "falling back to 1 queue pair\n", npairs); > npairs = 1; > } > > sc->vtnet_act_vq_pairs = npairs; > } > >+static void >+vtnet_update_rx_offloads(struct vtnet_softc *sc) >+{ >+ struct ifnet *ifp; >+ uint64_t features; >+ int error; >+ >+ ifp = sc->vtnet_ifp; >+ features = sc->vtnet_features; >+ >+ VTNET_CORE_LOCK_ASSERT(sc); >+ >+ if (ifp->if_capabilities & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { >+ if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) >+ features |= VIRTIO_NET_F_GUEST_CSUM; >+ else >+ features &= ~VIRTIO_NET_F_GUEST_CSUM; >+ } >+ >+ if (ifp->if_capabilities & IFCAP_LRO && !vtnet_software_lro(sc)) { >+ if (ifp->if_capenable & IFCAP_LRO) >+ features |= VTNET_LRO_FEATURES; >+ else >+ features &= ~VTNET_LRO_FEATURES; >+ } >+ >+ error = vtnet_ctrl_guest_offloads(sc, >+ features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | >+ VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN | >+ VIRTIO_NET_F_GUEST_UFO)); >+ if (error) { >+ device_printf(sc->vtnet_dev, >+ "%s: cannot update Rx features\n", __func__); >+ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { >+ ifp->if_drv_flags &= ~IFF_DRV_RUNNING; >+ vtnet_init_locked(sc); >+ } >+ } else >+ sc->vtnet_features = features; >+} >+ > static int > vtnet_reinit(struct vtnet_softc *sc) > { >+ device_t dev; > struct ifnet *ifp; > int error; > >+ dev = sc->vtnet_dev; > ifp = sc->vtnet_ifp; > >- /* Use the current MAC address. */ > bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); >- vtnet_set_hwaddr(sc); > >+ error = vtnet_virtio_reinit(sc); >+ if (error) >+ return (error); >+ >+ vtnet_set_macaddr(sc); > vtnet_set_active_vq_pairs(sc); > >+ if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) >+ vtnet_init_rx_filters(sc); >+ > ifp->if_hwassist = 0; > if (ifp->if_capenable & IFCAP_TXCSUM) > ifp->if_hwassist |= VTNET_CSUM_OFFLOAD; >@@ -3039,16 +3365,10 @@ > if (ifp->if_capenable & IFCAP_TSO6) > ifp->if_hwassist |= CSUM_IP6_TSO; > >- if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) >- vtnet_init_rx_filters(sc); >- > error = vtnet_init_rxtx_queues(sc); > if (error) > return (error); > >- vtnet_enable_interrupts(sc); >- ifp->if_drv_flags |= IFF_DRV_RUNNING; >- > return (0); > } > >@@ -3068,22 +3388,15 @@ > > vtnet_stop(sc); > >- /* Reinitialize with the host. */ >- if (vtnet_virtio_reinit(sc) != 0) >- goto fail; >+ if (vtnet_reinit(sc) != 0) { >+ vtnet_stop(sc); >+ return; >+ } > >- if (vtnet_reinit(sc) != 0) >- goto fail; >- >- virtio_reinit_complete(dev); >- >+ ifp->if_drv_flags |= IFF_DRV_RUNNING; > vtnet_update_link_status(sc); >+ vtnet_enable_interrupts(sc); > callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); >- >- return; >- >-fail: >- vtnet_stop(sc); > } > > static void >@@ -3093,6 +3406,13 @@ > > sc = xsc; > >+#ifdef DEV_NETMAP >+ if (!NA(sc->vtnet_ifp)) { >+ D("try to attach again"); >+ vtnet_netmap_attach(sc); >+ } >+#endif >+ > VTNET_CORE_LOCK(sc); > vtnet_init_locked(sc); > VTNET_CORE_UNLOCK(sc); >@@ -3101,16 +3421,13 @@ > static void > vtnet_free_ctrl_vq(struct vtnet_softc *sc) > { >- struct virtqueue *vq; > >- vq = sc->vtnet_ctrl_vq; >- > /* > * The control virtqueue is only polled and therefore it should > * already be empty. > */ >- KASSERT(virtqueue_empty(vq), >- ("%s: ctrl vq %p not empty", __func__, vq)); >+ KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq), >+ ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq)); > } > > static void >@@ -3121,47 +3438,88 @@ > > vq = sc->vtnet_ctrl_vq; > >+ MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ); > VTNET_CORE_LOCK_ASSERT(sc); >- KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ, >- ("%s: CTRL_VQ feature not negotiated", __func__)); > > if (!virtqueue_empty(vq)) > return; >- if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0) >- return; > > /* >- * Poll for the response, but the command is likely already >- * done when we return from the notify. >+ * Poll for the response, but the command is likely completed before >+ * returning from the notify. > */ >- virtqueue_notify(vq); >- virtqueue_poll(vq, NULL); >+ if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0) { >+ virtqueue_notify(vq); >+ virtqueue_poll(vq, NULL); >+ } > } > > static int > vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) > { >- struct virtio_net_ctrl_hdr hdr __aligned(2); > struct sglist_seg segs[3]; > struct sglist sg; >- uint8_t ack; >+ struct { >+ struct virtio_net_ctrl_hdr hdr __aligned(2); >+ uint8_t pad1; >+ uint8_t addr[ETHER_ADDR_LEN] __aligned(8); >+ uint8_t pad2; >+ uint8_t ack; >+ } s; > int error; > >- hdr.class = VIRTIO_NET_CTRL_MAC; >- hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; >- ack = VIRTIO_NET_ERR; >+ error = 0; >+ MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC); > >- sglist_init(&sg, 3, segs); >+ s.hdr.class = VIRTIO_NET_CTRL_MAC; >+ s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; >+ bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN); >+ s.ack = VIRTIO_NET_ERR; >+ >+ sglist_init(&sg, nitems(segs), segs); >+ error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); >+ error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN); >+ error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); >+ MPASS(error == 0 && sg.sg_nseg == nitems(segs)); >+ >+ if (error == 0) >+ vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); >+ >+ return (s.ack == VIRTIO_NET_OK ? 0 : EIO); >+} >+ >+static int >+vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads) >+{ >+ struct sglist_seg segs[3]; >+ struct sglist sg; >+ struct { >+ struct virtio_net_ctrl_hdr hdr __aligned(2); >+ uint8_t pad1; >+ uint64_t offloads __aligned(8); >+ uint8_t pad2; >+ uint8_t ack; >+ } s; >+ int error; >+ > error = 0; >- error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); >- error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN); >- error |= sglist_append(&sg, &ack, sizeof(uint8_t)); >- KASSERT(error == 0 && sg.sg_nseg == 3, >- ("%s: error %d adding set MAC msg to sglist", __func__, error)); >+ MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); > >- vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); >+ s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; >+ s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; >+ s.offloads = vtnet_gtoh64(sc, offloads); >+ s.ack = VIRTIO_NET_ERR; > >- return (ack == VIRTIO_NET_OK ? 0 : EIO); >+ sglist_init(&sg, nitems(segs), segs); >+ error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); >+ error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t)); >+ error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); >+ MPASS(error == 0 && sg.sg_nseg == nitems(segs)); >+ >+ if (error == 0) >+ vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); >+ >+ return (s.ack == VIRTIO_NET_OK ? 0 : EIO); > } > > static int >@@ -3170,63 +3528,64 @@ > struct sglist_seg segs[3]; > struct sglist sg; > struct { >- struct virtio_net_ctrl_hdr hdr; >+ struct virtio_net_ctrl_hdr hdr __aligned(2); > uint8_t pad1; >- struct virtio_net_ctrl_mq mq; >+ struct virtio_net_ctrl_mq mq __aligned(2); > uint8_t pad2; > uint8_t ack; >- } s __aligned(2); >+ } s; > int error; > >+ error = 0; >+ MPASS(sc->vtnet_flags & VTNET_FLAG_MQ); >+ > s.hdr.class = VIRTIO_NET_CTRL_MQ; > s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; >- s.mq.virtqueue_pairs = npairs; >+ s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs); > s.ack = VIRTIO_NET_ERR; > >- sglist_init(&sg, 3, segs); >- error = 0; >+ sglist_init(&sg, nitems(segs), segs); > error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); > error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); > error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); >- KASSERT(error == 0 && sg.sg_nseg == 3, >- ("%s: error %d adding MQ message to sglist", __func__, error)); >+ MPASS(error == 0 && sg.sg_nseg == nitems(segs)); > >- vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); >+ if (error == 0) >+ vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); > > return (s.ack == VIRTIO_NET_OK ? 0 : EIO); > } > > static int >-vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on) >+vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, int on) > { > struct sglist_seg segs[3]; > struct sglist sg; > struct { >- struct virtio_net_ctrl_hdr hdr; >+ struct virtio_net_ctrl_hdr hdr __aligned(2); > uint8_t pad1; > uint8_t onoff; > uint8_t pad2; > uint8_t ack; >- } s __aligned(2); >+ } s; > int error; > >- KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, >- ("%s: CTRL_RX feature not negotiated", __func__)); >+ error = 0; >+ MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX); > > s.hdr.class = VIRTIO_NET_CTRL_RX; > s.hdr.cmd = cmd; > s.onoff = !!on; > s.ack = VIRTIO_NET_ERR; > >- sglist_init(&sg, 3, segs); >- error = 0; >+ sglist_init(&sg, nitems(segs), segs); > error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); > error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); > error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); >- KASSERT(error == 0 && sg.sg_nseg == 3, >- ("%s: error %d adding Rx message to sglist", __func__, error)); >+ MPASS(error == 0 && sg.sg_nseg == nitems(segs)); > >- vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); >+ if (error == 0) >+ vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); > > return (s.ack == VIRTIO_NET_OK ? 0 : EIO); > } >@@ -3234,40 +3593,16 @@ > static int > vtnet_set_promisc(struct vtnet_softc *sc, int on) > { >- > return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); > } > > static int > vtnet_set_allmulti(struct vtnet_softc *sc, int on) > { >- > return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); > } > >-/* >- * The device defaults to promiscuous mode for backwards compatibility. >- * Turn it off at attach time if possible. >- */ > static void >-vtnet_attach_disable_promisc(struct vtnet_softc *sc) >-{ >- struct ifnet *ifp; >- >- ifp = sc->vtnet_ifp; >- >- VTNET_CORE_LOCK(sc); >- if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) { >- ifp->if_flags |= IFF_PROMISC; >- } else if (vtnet_set_promisc(sc, 0) != 0) { >- ifp->if_flags |= IFF_PROMISC; >- device_printf(sc->vtnet_dev, >- "cannot disable default promiscuous mode\n"); >- } >- VTNET_CORE_UNLOCK(sc); >-} >- >-static void > vtnet_rx_filter(struct vtnet_softc *sc) > { > device_t dev; >@@ -3278,13 +3613,15 @@ > > VTNET_CORE_LOCK_ASSERT(sc); > >- if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) >+ if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) { > device_printf(dev, "cannot %s promiscuous mode\n", > ifp->if_flags & IFF_PROMISC ? "enable" : "disable"); >+ } > >- if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) >+ if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) { > device_printf(dev, "cannot %s all-multicast mode\n", > ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable"); >+ } > } > > static void >@@ -3302,14 +3639,15 @@ > > ifp = sc->vtnet_ifp; > filter = sc->vtnet_mac_filter; >+ > ucnt = 0; > mcnt = 0; > promisc = 0; > allmulti = 0; >+ error = 0; > >+ MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX); > VTNET_CORE_LOCK_ASSERT(sc); >- KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, >- ("%s: CTRL_RX feature not negotiated", __func__)); > > /* Unicast MAC addresses: */ > if_addr_rlock(ifp); >@@ -3330,14 +3668,6 @@ > } > if_addr_runlock(ifp); > >- if (promisc != 0) { >- filter->vmf_unicast.nentries = 0; >- if_printf(ifp, "more than %d MAC addresses assigned, " >- "falling back to promiscuous mode\n", >- VTNET_MAX_MAC_ENTRIES); >- } else >- filter->vmf_unicast.nentries = ucnt; >- > /* Multicast MAC addresses: */ > if_maddr_rlock(ifp); > CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { >@@ -3354,34 +3684,40 @@ > } > if_maddr_runlock(ifp); > >+ if (promisc != 0) { >+ if_printf(ifp, "cannot filter more than %d MAC addresses, " >+ "falling back to promiscuous mode\n", >+ VTNET_MAX_MAC_ENTRIES); >+ ucnt = 0; >+ } > if (allmulti != 0) { >- filter->vmf_multicast.nentries = 0; >- if_printf(ifp, "more than %d multicast MAC addresses " >- "assigned, falling back to all-multicast mode\n", >+ if_printf(ifp, "cannot filter more than %d multicast MAC " >+ "addresses, falling back to all-multicast mode\n", > VTNET_MAX_MAC_ENTRIES); >- } else >- filter->vmf_multicast.nentries = mcnt; >+ mcnt = 0; >+ } > > if (promisc != 0 && allmulti != 0) > goto out; > >+ filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt); >+ filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt); >+ > hdr.class = VIRTIO_NET_CTRL_MAC; > hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; > ack = VIRTIO_NET_ERR; > >- sglist_init(&sg, 4, segs); >- error = 0; >+ sglist_init(&sg, nitems(segs), segs); > error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); > error |= sglist_append(&sg, &filter->vmf_unicast, >- sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN); >+ sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN); > error |= sglist_append(&sg, &filter->vmf_multicast, >- sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN); >+ sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN); > error |= sglist_append(&sg, &ack, sizeof(uint8_t)); >- KASSERT(error == 0 && sg.sg_nseg == 4, >- ("%s: error %d adding MAC filter msg to sglist", __func__, error)); >+ MPASS(error == 0 && sg.sg_nseg == nitems(segs)); > >- vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); >- >+ if (error == 0) >+ vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); > if (ack != VIRTIO_NET_OK) > if_printf(ifp, "error setting host MAC filter table\n"); > >@@ -3398,28 +3734,30 @@ > struct sglist_seg segs[3]; > struct sglist sg; > struct { >- struct virtio_net_ctrl_hdr hdr; >+ struct virtio_net_ctrl_hdr hdr __aligned(2); > uint8_t pad1; >- uint16_t tag; >+ uint16_t tag __aligned(2); > uint8_t pad2; > uint8_t ack; >- } s __aligned(2); >+ } s; > int error; > >+ error = 0; >+ MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER); >+ > s.hdr.class = VIRTIO_NET_CTRL_VLAN; > s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; >- s.tag = tag; >+ s.tag = vtnet_gtoh16(sc, tag); > s.ack = VIRTIO_NET_ERR; > >- sglist_init(&sg, 3, segs); >- error = 0; >+ sglist_init(&sg, nitems(segs), segs); > error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); > error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); > error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); >- KASSERT(error == 0 && sg.sg_nseg == 3, >- ("%s: error %d adding VLAN message to sglist", __func__, error)); >+ MPASS(error == 0 && sg.sg_nseg == nitems(segs)); > >- vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); >+ if (error == 0) >+ vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); > > return (s.ack == VIRTIO_NET_OK ? 0 : EIO); > } >@@ -3427,13 +3765,12 @@ > static void > vtnet_rx_filter_vlan(struct vtnet_softc *sc) > { >+ int i, bit; > uint32_t w; > uint16_t tag; >- int i, bit; > >+ MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER); > VTNET_CORE_LOCK_ASSERT(sc); >- KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER, >- ("%s: VLAN_FILTER feature not negotiated", __func__)); > > /* Enable the filter for each configured VLAN. */ > for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { >@@ -3502,21 +3839,34 @@ > vtnet_update_vlan_filter(arg, 0, tag); > } > >+static void >+vtnet_update_speed_duplex(struct vtnet_softc *sc) >+{ >+ struct ifnet *ifp; >+ uint32_t speed; >+ >+ ifp = sc->vtnet_ifp; >+ >+ if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0) >+ return; >+ >+ /* BMV: Ignore duplex. */ >+ speed = virtio_read_dev_config_4(sc->vtnet_dev, >+ offsetof(struct virtio_net_config, speed)); >+ if (speed != -1) >+ ifp->if_baudrate = IF_Mbps(speed); >+} >+ > static int > vtnet_is_link_up(struct vtnet_softc *sc) > { >- device_t dev; >- struct ifnet *ifp; > uint16_t status; > >- dev = sc->vtnet_dev; >- ifp = sc->vtnet_ifp; >+ if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0) >+ return (1); > >- if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0) >- status = VIRTIO_NET_S_LINK_UP; >- else >- status = virtio_read_dev_config_2(dev, >- offsetof(struct virtio_net_config, status)); >+ status = virtio_read_dev_config_2(sc->vtnet_dev, >+ offsetof(struct virtio_net_config, status)); > > return ((status & VIRTIO_NET_S_LINK_UP) != 0); > } >@@ -3528,12 +3878,12 @@ > int link; > > ifp = sc->vtnet_ifp; >- > VTNET_CORE_LOCK_ASSERT(sc); > link = vtnet_is_link_up(sc); > > /* Notify if the link status has changed. */ > if (link != 0 && sc->vtnet_link_active == 0) { >+ vtnet_update_speed_duplex(sc); > sc->vtnet_link_active = 1; > if_link_state_change(ifp, LINK_STATE_UP); > } else if (link == 0 && sc->vtnet_link_active != 0) { >@@ -3545,16 +3895,7 @@ > static int > vtnet_ifmedia_upd(struct ifnet *ifp) > { >- struct vtnet_softc *sc; >- struct ifmedia *ifm; >- >- sc = ifp->if_softc; >- ifm = &sc->vtnet_media; >- >- if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) >- return (EINVAL); >- >- return (0); >+ return (EOPNOTSUPP); > } > > static void >@@ -3570,25 +3911,45 @@ > VTNET_CORE_LOCK(sc); > if (vtnet_is_link_up(sc) != 0) { > ifmr->ifm_status |= IFM_ACTIVE; >- ifmr->ifm_active |= VTNET_MEDIATYPE; >+ ifmr->ifm_active |= IFM_10G_T | IFM_FDX; > } else > ifmr->ifm_active |= IFM_NONE; > VTNET_CORE_UNLOCK(sc); > } > > static void >-vtnet_set_hwaddr(struct vtnet_softc *sc) >+vtnet_get_macaddr(struct vtnet_softc *sc) > { >+ >+ if (sc->vtnet_flags & VTNET_FLAG_MAC) { >+ virtio_read_device_config_array(sc->vtnet_dev, >+ offsetof(struct virtio_net_config, mac), >+ &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN); >+ } else { >+ /* Generate a random locally administered unicast address. */ >+ sc->vtnet_hwaddr[0] = 0xB2; >+ arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); >+ } >+} >+ >+static void >+vtnet_set_macaddr(struct vtnet_softc *sc) >+{ > device_t dev; >- int i; >+ int error; > > dev = sc->vtnet_dev; > > if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { >- if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0) >+ error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr); >+ if (error) > device_printf(dev, "unable to set MAC address\n"); >- } else if (sc->vtnet_flags & VTNET_FLAG_MAC) { >- for (i = 0; i < ETHER_ADDR_LEN; i++) { >+ return; >+ } >+ >+ /* MAC in config is read-only in modern VirtIO. */ >+ if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) { >+ for (int i = 0; i < ETHER_ADDR_LEN; i++) { > virtio_write_dev_config_1(dev, > offsetof(struct virtio_net_config, mac) + i, > sc->vtnet_hwaddr[i]); >@@ -3597,31 +3958,12 @@ > } > > static void >-vtnet_get_hwaddr(struct vtnet_softc *sc) >+vtnet_attached_set_macaddr(struct vtnet_softc *sc) > { >- device_t dev; >- int i; > >- dev = sc->vtnet_dev; >- >- if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) { >- /* >- * Generate a random locally administered unicast address. >- * >- * It would be nice to generate the same MAC address across >- * reboots, but it seems all the hosts currently available >- * support the MAC feature, so this isn't too important. >- */ >- sc->vtnet_hwaddr[0] = 0xB2; >- arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); >- vtnet_set_hwaddr(sc); >- return; >- } >- >- for (i = 0; i < ETHER_ADDR_LEN; i++) { >- sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev, >- offsetof(struct virtio_net_config, mac) + i); >- } >+ /* Assign MAC address if it was generated. */ >+ if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) >+ vtnet_set_macaddr(sc); > } > > static void >@@ -3652,36 +3994,6 @@ > } > > static void >-vtnet_set_tx_intr_threshold(struct vtnet_softc *sc) >-{ >- int size, thresh; >- >- size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq); >- >- /* >- * The Tx interrupt is disabled until the queue free count falls >- * below our threshold. Completed frames are drained from the Tx >- * virtqueue before transmitting new frames and in the watchdog >- * callout, so the frequency of Tx interrupts is greatly reduced, >- * at the cost of not freeing mbufs as quickly as they otherwise >- * would be. >- * >- * N.B. We assume all the Tx queues are the same size. >- */ >- thresh = size / 4; >- >- /* >- * Without indirect descriptors, leave enough room for the most >- * segments we handle. >- */ >- if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && >- thresh < sc->vtnet_tx_nsegs) >- thresh = sc->vtnet_tx_nsegs; >- >- sc->vtnet_tx_intr_thresh = thresh; >-} >- >-static void > vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, > struct sysctl_oid_list *child, struct vtnet_rxq *rxq) > { >@@ -3709,6 +4021,8 @@ > &stats->vrxs_csum, "Receive checksum offloaded"); > SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, > &stats->vrxs_csum_failed, "Receive checksum offload failed"); >+ SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD, >+ &stats->vrxs_host_lro, "Receive host segmentation offloaded"); > SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, > &stats->vrxs_rescheduled, > "Receive interrupt handler rescheduled"); >@@ -3739,7 +4053,7 @@ > SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, > &stats->vtxs_csum, "Transmit checksum offloaded"); > SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, >- &stats->vtxs_tso, "Transmit segmentation offloaded"); >+ &stats->vtxs_tso, "Transmit TCP segmentation offloaded"); > SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, > &stats->vtxs_rescheduled, > "Transmit interrupt handler rescheduled"); >@@ -3759,7 +4073,7 @@ > tree = device_get_sysctl_tree(dev); > child = SYSCTL_CHILDREN(tree); > >- for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { >+ for (i = 0; i < sc->vtnet_req_vq_pairs; i++) { > vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); > vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); > } >@@ -3819,16 +4133,20 @@ > CTLFLAG_RD, &stats->rx_task_rescheduled, > "Times the receive interrupt task rescheduled itself"); > >- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype", >- CTLFLAG_RD, &stats->tx_csum_bad_ethtype, >+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype", >+ CTLFLAG_RD, &stats->tx_csum_unknown_ethtype, > "Aborted transmit of checksum offloaded buffer with unknown " > "Ethernet type"); >- SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype", >- CTLFLAG_RD, &stats->tx_tso_bad_ethtype, >- "Aborted transmit of TSO buffer with unknown Ethernet type"); >+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch", >+ CTLFLAG_RD, &stats->tx_csum_proto_mismatch, >+ "Aborted transmit of checksum offloaded buffer because mismatched " >+ "protocols"); > SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", > CTLFLAG_RD, &stats->tx_tso_not_tcp, > "Aborted transmit of TSO buffer with non TCP protocol"); >+ SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum", >+ CTLFLAG_RD, &stats->tx_tso_without_csum, >+ "Aborted transmit of TSO buffer without TCP checksum offload"); > SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", > CTLFLAG_RD, &stats->tx_defragged, > "Transmit mbufs defragged"); >@@ -3861,10 +4179,10 @@ > > SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", > CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, >- "Maximum number of supported virtqueue pairs"); >- SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs", >- CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0, >- "Requested number of virtqueue pairs"); >+ "Number of maximum supported virtqueue pairs"); >+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs", >+ CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0, >+ "Number of requested virtqueue pairs"); > SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", > CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, > "Number of active virtqueue pairs"); >@@ -3872,6 +4190,19 @@ > vtnet_setup_stat_sysctl(ctx, child, sc); > } > >+static void >+vtnet_load_tunables(struct vtnet_softc *sc) >+{ >+ >+ sc->vtnet_lro_entry_count = vtnet_tunable_int(sc, >+ "lro_entry_count", vtnet_lro_entry_count); >+ if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES) >+ sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES; >+ >+ sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc, >+ "lro_mbufq_depeth", vtnet_lro_mbufq_depth); >+} >+ > static int > vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) > { >@@ -3913,10 +4244,14 @@ > static void > vtnet_enable_rx_interrupts(struct vtnet_softc *sc) > { >+ struct vtnet_rxq *rxq; > int i; > >- for (i = 0; i < sc->vtnet_act_vq_pairs; i++) >- vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]); >+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { >+ rxq = &sc->vtnet_rxqs[i]; >+ if (vtnet_rxq_enable_intr(rxq) != 0) >+ taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); >+ } > } > > static void >@@ -3941,7 +4276,7 @@ > { > int i; > >- for (i = 0; i < sc->vtnet_act_vq_pairs; i++) >+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) > vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); > } > >@@ -3950,7 +4285,7 @@ > { > int i; > >- for (i = 0; i < sc->vtnet_act_vq_pairs; i++) >+ for (i = 0; i < sc->vtnet_max_vq_pairs; i++) > vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); > } > >@@ -3983,9 +4318,9 @@ > sc = if_getsoftc(ifp); > > VTNET_CORE_LOCK(sc); >- *nrxr = sc->vtnet_max_vq_pairs; >+ *nrxr = sc->vtnet_req_vq_pairs; > *ncl = NETDUMP_MAX_IN_FLIGHT; >- *clsize = sc->vtnet_rx_clsize; >+ *clsize = sc->vtnet_rx_clustersz; > VTNET_CORE_UNLOCK(sc); > > /* >@@ -4034,7 +4369,7 @@ > return (EBUSY); > > (void)vtnet_txq_eof(&sc->vtnet_txqs[0]); >- for (i = 0; i < sc->vtnet_max_vq_pairs; i++) >+ for (i = 0; i < sc->vtnet_act_vq_pairs; i++) > (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]); > return (0); > } >diff -urN sys/dev/virtio.ori/network/if_vtnetvar.h sys/dev/virtio/network/if_vtnetvar.h >--- sys/dev/virtio.ori/network/if_vtnetvar.h 2020-03-18 20:16:31.707569000 -0700 >+++ sys/dev/virtio/network/if_vtnetvar.h 2020-03-19 16:44:27.330455000 -0700 >@@ -43,9 +43,10 @@ > uint64_t rx_csum_bad_ipproto; > uint64_t rx_csum_bad_offset; > uint64_t rx_csum_bad_proto; >- uint64_t tx_csum_bad_ethtype; >- uint64_t tx_tso_bad_ethtype; >+ uint64_t tx_csum_unknown_ethtype; >+ uint64_t tx_csum_proto_mismatch; > uint64_t tx_tso_not_tcp; >+ uint64_t tx_tso_without_csum; > uint64_t tx_defragged; > uint64_t tx_defrag_failed; > >@@ -67,6 +68,7 @@ > uint64_t vrxs_ierrors; /* if_ierrors */ > uint64_t vrxs_csum; > uint64_t vrxs_csum_failed; >+ uint64_t vrxs_host_lro; > uint64_t vrxs_rescheduled; > }; > >@@ -79,6 +81,7 @@ > struct vtnet_rxq_stats vtnrx_stats; > struct taskqueue *vtnrx_tq; > struct task vtnrx_intrtask; >+ struct lro_ctrl vtnrx_lro; > #ifdef DEV_NETMAP > struct virtio_net_hdr_mrg_rxbuf vtnrx_shrhdr; > #endif /* DEV_NETMAP */ >@@ -111,6 +114,7 @@ > #endif > int vtntx_id; > int vtntx_watchdog; >+ int vtntx_intr_threshold; > struct vtnet_txq_stats vtntx_stats; > struct taskqueue *vtntx_tq; > struct task vtntx_intrtask; >@@ -136,9 +140,10 @@ > struct ifnet *vtnet_ifp; > struct vtnet_rxq *vtnet_rxqs; > struct vtnet_txq *vtnet_txqs; >+ uint64_t vtnet_features; > > uint32_t vtnet_flags; >-#define VTNET_FLAG_SUSPENDED 0x0001 >+#define VTNET_FLAG_MODERN 0x0001 > #define VTNET_FLAG_MAC 0x0002 > #define VTNET_FLAG_CTRL_VQ 0x0004 > #define VTNET_FLAG_CTRL_RX 0x0008 >@@ -147,29 +152,33 @@ > #define VTNET_FLAG_TSO_ECN 0x0040 > #define VTNET_FLAG_MRG_RXBUFS 0x0080 > #define VTNET_FLAG_LRO_NOMRG 0x0100 >-#define VTNET_FLAG_MULTIQ 0x0200 >+#define VTNET_FLAG_MQ 0x0200 > #define VTNET_FLAG_INDIRECT 0x0400 > #define VTNET_FLAG_EVENT_IDX 0x0800 >+#define VTNET_FLAG_SUSPENDED 0x1000 >+#define VTNET_FLAG_FIXUP_NEEDS_CSUM 0x2000 >+#define VTNET_FLAG_SW_LRO 0x4000 > >- int vtnet_link_active; > int vtnet_hdr_size; >- int vtnet_rx_process_limit; >- int vtnet_rx_nsegs; > int vtnet_rx_nmbufs; >- int vtnet_rx_clsize; >- int vtnet_rx_new_clsize; >- int vtnet_tx_intr_thresh; >- int vtnet_tx_nsegs; >- int vtnet_if_flags; >+ int vtnet_rx_clustersz; >+ int vtnet_rx_nsegs; >+ int vtnet_rx_process_limit; >+ int vtnet_link_active; > int vtnet_act_vq_pairs; >+ int vtnet_req_vq_pairs; > int vtnet_max_vq_pairs; >- int vtnet_requested_vq_pairs; >+ int vtnet_tx_nsegs; >+ int vtnet_if_flags; >+ int vtnet_max_mtu; >+ int vtnet_lro_entry_count; >+ int vtnet_lro_mbufq_depth; > > struct virtqueue *vtnet_ctrl_vq; > struct vtnet_mac_filter *vtnet_mac_filter; > uint32_t *vtnet_vlan_filter; > >- uint64_t vtnet_features; >+ uint64_t vtnet_negotiated_features; > struct vtnet_statistics vtnet_stats; > struct callout vtnet_tick_ch; > struct ifmedia vtnet_media; >@@ -181,10 +190,22 @@ > char vtnet_hwaddr[ETHER_ADDR_LEN]; > }; > >+static bool >+vtnet_modern(struct vtnet_softc *sc) >+{ >+ return ((sc->vtnet_flags & VTNET_FLAG_MODERN) != 0); >+} >+ >+static bool >+vtnet_software_lro(struct vtnet_softc *sc) >+{ >+ return ((sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0); >+} >+ > /* > * Maximum number of queue pairs we will autoconfigure to. > */ >-#define VTNET_MAX_QUEUE_PAIRS 8 >+#define VTNET_MAX_QUEUE_PAIRS 32 > > /* > * Additional completed entries can appear in a virtqueue before we can >@@ -202,25 +223,24 @@ > #define VTNET_NOTIFY_RETRIES 4 > > /* >- * Fake the media type. The host does not provide us with any real media >- * information. >- */ >-#define VTNET_MEDIATYPE (IFM_ETHER | IFM_10G_T | IFM_FDX) >- >-/* > * Number of words to allocate for the VLAN shadow table. There is one > * bit for each VLAN. > */ > #define VTNET_VLAN_FILTER_NWORDS (4096 / 32) > >+/* We depend on these being the same size (and same layout). */ >+CTASSERT(sizeof(struct virtio_net_hdr_mrg_rxbuf) == >+ sizeof(struct virtio_net_hdr_v1)); >+ > /* >- * When mergeable buffers are not negotiated, the vtnet_rx_header structure >- * below is placed at the beginning of the mbuf data. Use 4 bytes of pad to >- * both keep the VirtIO header and the data non-contiguous and to keep the >- * frame's payload 4 byte aligned. >+ * In legacy VirtIO when mergeable buffers are not negotiated, this structure >+ * is placed at the beginning of the mbuf data. Use 4 bytes of pad to keep >+ * both the VirtIO header and the data non-contiguous and the frame's payload >+ * 4 byte aligned. Note this padding would not be necessary if the >+ * VIRTIO_F_ANY_LAYOUT feature was negotiated (but we don't support that yet). > * >- * When mergeable buffers are negotiated, the host puts the VirtIO header in >- * the beginning of the first mbuf's data. >+ * In modern VirtIO or when mergeable buffers are negotiated, the host puts >+ * the VirtIO header in the beginning of the first mbuf's data. > */ > #define VTNET_RX_HEADER_PAD 4 > struct vtnet_rx_header { >@@ -236,6 +256,7 @@ > union { > struct virtio_net_hdr hdr; > struct virtio_net_hdr_mrg_rxbuf mhdr; >+ struct virtio_net_hdr_v1 v1hdr; > } vth_uhdr; > > struct mbuf *vth_mbuf; >@@ -250,6 +271,11 @@ > */ > #define VTNET_MAX_MAC_ENTRIES 128 > >+/* >+ * The driver version of struct virtio_net_ctrl_mac but with our predefined >+ * number of MAC addresses allocated. This structure is shared with the host, >+ * so nentries field is in the correct VirtIO endianness. >+ */ > struct vtnet_mac_table { > uint32_t nentries; > uint8_t macs[VTNET_MAX_MAC_ENTRIES][ETHER_ADDR_LEN]; >@@ -275,15 +301,16 @@ > (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6 | CSUM_TSO) > > /* Features desired/implemented by this driver. */ >-#define VTNET_FEATURES \ >+#define VTNET_COMMON_FEATURES \ > (VIRTIO_NET_F_MAC | \ > VIRTIO_NET_F_STATUS | \ >+ VIRTIO_NET_F_CTRL_GUEST_OFFLOADS | \ >+ VIRTIO_NET_F_MTU | \ > VIRTIO_NET_F_CTRL_VQ | \ > VIRTIO_NET_F_CTRL_RX | \ > VIRTIO_NET_F_CTRL_MAC_ADDR | \ > VIRTIO_NET_F_CTRL_VLAN | \ > VIRTIO_NET_F_CSUM | \ >- VIRTIO_NET_F_GSO | \ > VIRTIO_NET_F_HOST_TSO4 | \ > VIRTIO_NET_F_HOST_TSO6 | \ > VIRTIO_NET_F_HOST_ECN | \ >@@ -293,9 +320,13 @@ > VIRTIO_NET_F_GUEST_ECN | \ > VIRTIO_NET_F_MRG_RXBUF | \ > VIRTIO_NET_F_MQ | \ >+ VIRTIO_NET_F_SPEED_DUPLEX | \ > VIRTIO_RING_F_EVENT_IDX | \ > VIRTIO_RING_F_INDIRECT_DESC) > >+#define VTNET_MODERN_FEATURES (VTNET_COMMON_FEATURES) >+#define VTNET_LEGACY_FEATURES (VTNET_COMMON_FEATURES | VIRTIO_NET_F_GSO) >+ > /* > * The VIRTIO_NET_F_HOST_TSO[46] features permit us to send the host > * frames larger than 1514 bytes. >@@ -305,48 +336,39 @@ > > /* > * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us >- * frames larger than 1514 bytes. We do not yet support software LRO >- * via tcp_lro_rx(). >+ * frames larger than 1514 bytes. >+ > */ > #define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \ > VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN) > >+#define VTNET_MIN_MTU 68 > #define VTNET_MAX_MTU 65536 > #define VTNET_MAX_RX_SIZE 65550 > > /* >- * Used to preallocate the Vq indirect descriptors. The first segment >- * is reserved for the header, except for mergeable buffers since the >- * header is placed inline with the data. >+ * Used to preallocate the VQ indirect descriptors. Modern and mergeable >+ * buffers do not required one segment for the VirtIO header since it is >+ * placed inline at the beginning of the receive buffer. > */ >-#define VTNET_MRG_RX_SEGS 1 >-#define VTNET_MIN_RX_SEGS 2 >-#define VTNET_MAX_RX_SEGS 34 >-#define VTNET_MIN_TX_SEGS 32 >-#define VTNET_MAX_TX_SEGS 64 >+#define VTNET_RX_SEGS_HDR_INLINE 1 >+#define VTNET_RX_SEGS_HDR_SEPARATE 2 >+#define VTNET_RX_SEGS_LRO_NOMRG 34 >+#define VTNET_TX_SEGS_MIN 32 >+#define VTNET_TX_SEGS_MAX 64 > > /* > * Assert we can receive and transmit the maximum with regular > * size clusters. > */ >-CTASSERT(((VTNET_MAX_RX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE); >-CTASSERT(((VTNET_MAX_TX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_MTU); >+CTASSERT(((VTNET_RX_SEGS_LRO_NOMRG - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE); >+CTASSERT(((VTNET_TX_SEGS_MAX - 1) * MCLBYTES) >= VTNET_MAX_MTU); > > /* > * Number of slots in the Tx bufrings. This value matches most other > * multiqueue drivers. > */ > #define VTNET_DEFAULT_BUFRING_SIZE 4096 >- >-/* >- * Determine how many mbufs are in each receive buffer. For LRO without >- * mergeable buffers, we must allocate an mbuf chain large enough to >- * hold both the vtnet_rx_header and the maximum receivable data. >- */ >-#define VTNET_NEEDED_RX_MBUFS(_sc, _clsize) \ >- ((_sc)->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0 ? 1 : \ >- howmany(sizeof(struct vtnet_rx_header) + VTNET_MAX_RX_SIZE, \ >- (_clsize)) > > #define VTNET_CORE_MTX(_sc) &(_sc)->vtnet_mtx > #define VTNET_CORE_LOCK(_sc) mtx_lock(VTNET_CORE_MTX((_sc))) >diff -urN sys/dev/virtio.ori/network/virtio_net.h sys/dev/virtio/network/virtio_net.h >--- sys/dev/virtio.ori/network/virtio_net.h 2020-03-18 20:16:31.707426000 -0700 >+++ sys/dev/virtio/network/virtio_net.h 2020-03-19 16:44:27.330176000 -0700 >@@ -34,29 +34,33 @@ > #define _VIRTIO_NET_H > > /* The feature bitmap for virtio net */ >-#define VIRTIO_NET_F_CSUM 0x00001 /* Host handles pkts w/ partial csum */ >-#define VIRTIO_NET_F_GUEST_CSUM 0x00002 /* Guest handles pkts w/ partial csum*/ >-#define VIRTIO_NET_F_MAC 0x00020 /* Host has given MAC address. */ >-#define VIRTIO_NET_F_GSO 0x00040 /* Host handles pkts w/ any GSO type */ >-#define VIRTIO_NET_F_GUEST_TSO4 0x00080 /* Guest can handle TSOv4 in. */ >-#define VIRTIO_NET_F_GUEST_TSO6 0x00100 /* Guest can handle TSOv6 in. */ >-#define VIRTIO_NET_F_GUEST_ECN 0x00200 /* Guest can handle TSO[6] w/ ECN in.*/ >-#define VIRTIO_NET_F_GUEST_UFO 0x00400 /* Guest can handle UFO in. */ >-#define VIRTIO_NET_F_HOST_TSO4 0x00800 /* Host can handle TSOv4 in. */ >-#define VIRTIO_NET_F_HOST_TSO6 0x01000 /* Host can handle TSOv6 in. */ >-#define VIRTIO_NET_F_HOST_ECN 0x02000 /* Host can handle TSO[6] w/ ECN in. */ >-#define VIRTIO_NET_F_HOST_UFO 0x04000 /* Host can handle UFO in. */ >-#define VIRTIO_NET_F_MRG_RXBUF 0x08000 /* Host can merge receive buffers. */ >-#define VIRTIO_NET_F_STATUS 0x10000 /* virtio_net_config.status available*/ >-#define VIRTIO_NET_F_CTRL_VQ 0x20000 /* Control channel available */ >-#define VIRTIO_NET_F_CTRL_RX 0x40000 /* Control channel RX mode support */ >-#define VIRTIO_NET_F_CTRL_VLAN 0x80000 /* Control channel VLAN filtering */ >-#define VIRTIO_NET_F_CTRL_RX_EXTRA 0x100000 /* Extra RX mode control support */ >-#define VIRTIO_NET_F_GUEST_ANNOUNCE 0x200000 /* Announce device on network */ >-#define VIRTIO_NET_F_MQ 0x400000 /* Device supports RFS */ >-#define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000 /* Set MAC address */ >+#define VIRTIO_NET_F_CSUM 0x000001 /* Host handles pkts w/ partial csum */ >+#define VIRTIO_NET_F_GUEST_CSUM 0x000002 /* Guest handles pkts w/ partial csum*/ >+#define VIRTIO_NET_F_CTRL_GUEST_OFFLOADS 0x000004 /* Dynamic offload configuration. */ >+#define VIRTIO_NET_F_MTU 0x000008 /* Initial MTU advice */ >+#define VIRTIO_NET_F_MAC 0x000020 /* Host has given MAC address. */ >+#define VIRTIO_NET_F_GSO 0x000040 /* Host handles pkts w/ any GSO type */ >+#define VIRTIO_NET_F_GUEST_TSO4 0x000080 /* Guest can handle TSOv4 in. */ >+#define VIRTIO_NET_F_GUEST_TSO6 0x000100 /* Guest can handle TSOv6 in. */ >+#define VIRTIO_NET_F_GUEST_ECN 0x000200 /* Guest can handle TSO[6] w/ ECN in. */ >+#define VIRTIO_NET_F_GUEST_UFO 0x000400 /* Guest can handle UFO in. */ >+#define VIRTIO_NET_F_HOST_TSO4 0x000800 /* Host can handle TSOv4 in. */ >+#define VIRTIO_NET_F_HOST_TSO6 0x001000 /* Host can handle TSOv6 in. */ >+#define VIRTIO_NET_F_HOST_ECN 0x002000 /* Host can handle TSO[6] w/ ECN in. */ >+#define VIRTIO_NET_F_HOST_UFO 0x004000 /* Host can handle UFO in. */ >+#define VIRTIO_NET_F_MRG_RXBUF 0x008000 /* Host can merge receive buffers. */ >+#define VIRTIO_NET_F_STATUS 0x010000 /* virtio_net_config.status available*/ >+#define VIRTIO_NET_F_CTRL_VQ 0x020000 /* Control channel available */ >+#define VIRTIO_NET_F_CTRL_RX 0x040000 /* Control channel RX mode support */ >+#define VIRTIO_NET_F_CTRL_VLAN 0x080000 /* Control channel VLAN filtering */ >+#define VIRTIO_NET_F_CTRL_RX_EXTRA 0x100000 /* Extra RX mode control support */ >+#define VIRTIO_NET_F_GUEST_ANNOUNCE 0x200000 /* Announce device on network */ >+#define VIRTIO_NET_F_MQ 0x400000 /* Device supports Receive Flow Steering */ >+#define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000 /* Set MAC address */ >+#define VIRTIO_NET_F_SPEED_DUPLEX (1ULL << 63) /* Device set linkspeed and duplex */ > > #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ >+#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ > > struct virtio_net_config { > /* The config defining mac address (if VIRTIO_NET_F_MAC) */ >@@ -68,16 +72,32 @@ > * Legal values are between 1 and 0x8000. > */ > uint16_t max_virtqueue_pairs; >+ /* Default maximum transmit unit advice */ >+ uint16_t mtu; >+ /* >+ * speed, in units of 1Mb. All values 0 to INT_MAX are legal. >+ * Any other value stands for unknown. >+ */ >+ uint32_t speed; >+ /* >+ * 0x00 - half duplex >+ * 0x01 - full duplex >+ * Any other value stands for unknown. >+ */ >+ uint8_t duplex; > } __packed; > > /* >- * This is the first element of the scatter-gather list. If you don't >+ * This header comes first in the scatter-gather list. If you don't > * specify GSO or CSUM features, you can simply ignore the header. >+ * >+ * This is bitwise-equivalent to the legacy struct virtio_net_hdr_mrg_rxbuf, >+ * only flattened. > */ >-struct virtio_net_hdr { >-#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start,csum_offset*/ >+struct virtio_net_hdr_v1 { >+#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ > #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ >- uint8_t flags; >+ uint8_t flags; > #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ > #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ > #define VIRTIO_NET_HDR_GSO_UDP 3 /* GSO frame, IPv4 UDP (UFO) */ >@@ -88,9 +108,27 @@ > uint16_t gso_size; /* Bytes to append to hdr_len per frame */ > uint16_t csum_start; /* Position to start checksumming from */ > uint16_t csum_offset; /* Offset after that to place checksum */ >+ uint16_t num_buffers; /* Number of merged rx buffers */ > }; > > /* >+ * This header comes first in the scatter-gather list. >+ * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must >+ * be the first element of the scatter-gather list. If you don't >+ * specify GSO or CSUM features, you can simply ignore the header. >+ */ >+struct virtio_net_hdr { >+ /* See VIRTIO_NET_HDR_F_* */ >+ uint8_t flags; >+ /* See VIRTIO_NET_HDR_GSO_* */ >+ uint8_t gso_type; >+ uint16_t hdr_len; /* Ethernet + IP + tcp/udp hdrs */ >+ uint16_t gso_size; /* Bytes to append to hdr_len per frame */ >+ uint16_t csum_start; /* Position to start checksumming from */ >+ uint16_t csum_offset; /* Offset after that to place checksum */ >+}; >+ >+/* > * This is the version of the header to use when the MRG_RXBUF > * feature has been negotiated. > */ >@@ -200,5 +238,20 @@ > #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 > #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 > #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 >+ >+/* >+ * Control network offloads >+ * >+ * Reconfigures the network offloads that Guest can handle. >+ * >+ * Available with the VIRTIO_NET_F_CTRL_GUEST_OFFLOADS feature bit. >+ * >+ * Command data format matches the feature bit mask exactly. >+ * >+ * See VIRTIO_NET_F_GUEST_* for the list of offloads >+ * that can be enabled/disabled. >+ */ >+#define VIRTIO_NET_CTRL_GUEST_OFFLOADS 5 >+#define VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET 0 > > #endif /* _VIRTIO_NET_H */ >diff -urN sys/dev/virtio.ori/pci/virtio_pci.c sys/dev/virtio/pci/virtio_pci.c >--- sys/dev/virtio.ori/pci/virtio_pci.c 2020-03-18 20:16:31.705657000 -0700 >+++ sys/dev/virtio/pci/virtio_pci.c 2020-03-19 16:44:27.326653000 -0700 >@@ -1,7 +1,7 @@ > /*- > * SPDX-License-Identifier: BSD-2-Clause-FreeBSD > * >- * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> >+ * Copyright (c) 2017, Bryan Venteicher <bryanv@FreeBSD.org> > * All rights reserved. > * > * Redistribution and use in source and binary forms, with or without >@@ -35,6 +35,8 @@ > #include <sys/systm.h> > #include <sys/bus.h> > #include <sys/kernel.h> >+#include <sys/sbuf.h> >+#include <sys/sysctl.h> > #include <sys/module.h> > #include <sys/malloc.h> > >@@ -49,355 +51,241 @@ > #include <dev/virtio/virtio.h> > #include <dev/virtio/virtqueue.h> > #include <dev/virtio/pci/virtio_pci.h> >+#include <dev/virtio/pci/virtio_pci_var.h> > >-#include "virtio_bus_if.h" >+#include "virtio_pci_if.h" > #include "virtio_if.h" > >-struct vtpci_interrupt { >- struct resource *vti_irq; >- int vti_rid; >- void *vti_handler; >-}; >- >-struct vtpci_virtqueue { >- struct virtqueue *vtv_vq; >- int vtv_no_intr; >-}; >- >-struct vtpci_softc { >- device_t vtpci_dev; >- struct resource *vtpci_res; >- struct resource *vtpci_msix_res; >- uint64_t vtpci_features; >- uint32_t vtpci_flags; >-#define VTPCI_FLAG_NO_MSI 0x0001 >-#define VTPCI_FLAG_NO_MSIX 0x0002 >-#define VTPCI_FLAG_LEGACY 0x1000 >-#define VTPCI_FLAG_MSI 0x2000 >-#define VTPCI_FLAG_MSIX 0x4000 >-#define VTPCI_FLAG_SHARED_MSIX 0x8000 >-#define VTPCI_FLAG_ITYPE_MASK 0xF000 >- >- /* This "bus" will only ever have one child. */ >- device_t vtpci_child_dev; >- struct virtio_feature_desc *vtpci_child_feat_desc; >- >- int vtpci_nvqs; >- struct vtpci_virtqueue *vtpci_vqs; >- >- /* >- * Ideally, each virtqueue that the driver provides a callback for will >- * receive its own MSIX vector. If there are not sufficient vectors >- * available, then attempt to have all the VQs share one vector. For >- * MSIX, the configuration changed notifications must be on their own >- * vector. >- * >- * If MSIX is not available, we will attempt to have the whole device >- * share one MSI vector, and then, finally, one legacy interrupt. >- */ >- struct vtpci_interrupt vtpci_device_interrupt; >- struct vtpci_interrupt *vtpci_msix_vq_interrupts; >- int vtpci_nmsix_resources; >-}; >- >-static int vtpci_probe(device_t); >-static int vtpci_attach(device_t); >-static int vtpci_detach(device_t); >-static int vtpci_suspend(device_t); >-static int vtpci_resume(device_t); >-static int vtpci_shutdown(device_t); >-static void vtpci_driver_added(device_t, driver_t *); >-static void vtpci_child_detached(device_t, device_t); >-static int vtpci_read_ivar(device_t, device_t, int, uintptr_t *); >-static int vtpci_write_ivar(device_t, device_t, int, uintptr_t); >- >-static uint64_t vtpci_negotiate_features(device_t, uint64_t); >-static int vtpci_with_feature(device_t, uint64_t); >-static int vtpci_alloc_virtqueues(device_t, int, int, >- struct vq_alloc_info *); >-static int vtpci_setup_intr(device_t, enum intr_type); >-static void vtpci_stop(device_t); >-static int vtpci_reinit(device_t, uint64_t); >-static void vtpci_reinit_complete(device_t); >-static void vtpci_notify_virtqueue(device_t, uint16_t); >-static uint8_t vtpci_get_status(device_t); >-static void vtpci_set_status(device_t, uint8_t); >-static void vtpci_read_dev_config(device_t, bus_size_t, void *, int); >-static void vtpci_write_dev_config(device_t, bus_size_t, void *, int); >- >-static void vtpci_describe_features(struct vtpci_softc *, const char *, >+static void vtpci_describe_features(struct vtpci_common *, const char *, > uint64_t); >-static void vtpci_probe_and_attach_child(struct vtpci_softc *); >- >-static int vtpci_alloc_msix(struct vtpci_softc *, int); >-static int vtpci_alloc_msi(struct vtpci_softc *); >-static int vtpci_alloc_intr_msix_pervq(struct vtpci_softc *); >-static int vtpci_alloc_intr_msix_shared(struct vtpci_softc *); >-static int vtpci_alloc_intr_msi(struct vtpci_softc *); >-static int vtpci_alloc_intr_legacy(struct vtpci_softc *); >-static int vtpci_alloc_interrupt(struct vtpci_softc *, int, int, >+static int vtpci_alloc_msix(struct vtpci_common *, int); >+static int vtpci_alloc_msi(struct vtpci_common *); >+static int vtpci_alloc_intr_msix_pervq(struct vtpci_common *); >+static int vtpci_alloc_intr_msix_shared(struct vtpci_common *); >+static int vtpci_alloc_intr_msi(struct vtpci_common *); >+static int vtpci_alloc_intr_intx(struct vtpci_common *); >+static int vtpci_alloc_interrupt(struct vtpci_common *, int, int, > struct vtpci_interrupt *); >-static int vtpci_alloc_intr_resources(struct vtpci_softc *); >+static void vtpci_free_interrupt(struct vtpci_common *, >+ struct vtpci_interrupt *); > >-static int vtpci_setup_legacy_interrupt(struct vtpci_softc *, >+static void vtpci_free_interrupts(struct vtpci_common *); >+static void vtpci_free_virtqueues(struct vtpci_common *); >+static void vtpci_cleanup_setup_intr_attempt(struct vtpci_common *); >+static int vtpci_alloc_intr_resources(struct vtpci_common *); >+static int vtpci_setup_intx_interrupt(struct vtpci_common *, > enum intr_type); >-static int vtpci_setup_pervq_msix_interrupts(struct vtpci_softc *, >+static int vtpci_setup_pervq_msix_interrupts(struct vtpci_common *, > enum intr_type); >-static int vtpci_setup_msix_interrupts(struct vtpci_softc *, >+static int vtpci_set_host_msix_vectors(struct vtpci_common *); >+static int vtpci_setup_msix_interrupts(struct vtpci_common *, > enum intr_type); >-static int vtpci_setup_interrupts(struct vtpci_softc *, enum intr_type); >- >-static int vtpci_register_msix_vector(struct vtpci_softc *, int, >- struct vtpci_interrupt *); >-static int vtpci_set_host_msix_vectors(struct vtpci_softc *); >-static int vtpci_reinit_virtqueue(struct vtpci_softc *, int); >- >-static void vtpci_free_interrupt(struct vtpci_softc *, >- struct vtpci_interrupt *); >-static void vtpci_free_interrupts(struct vtpci_softc *); >-static void vtpci_free_virtqueues(struct vtpci_softc *); >-static void vtpci_release_child_resources(struct vtpci_softc *); >-static void vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *); >-static void vtpci_reset(struct vtpci_softc *); >- >-static void vtpci_select_virtqueue(struct vtpci_softc *, int); >- >-static void vtpci_legacy_intr(void *); >+static int vtpci_setup_intrs(struct vtpci_common *, enum intr_type); >+static int vtpci_reinit_virtqueue(struct vtpci_common *, int); >+static void vtpci_intx_intr(void *); > static int vtpci_vq_shared_intr_filter(void *); > static void vtpci_vq_shared_intr(void *); > static int vtpci_vq_intr_filter(void *); > static void vtpci_vq_intr(void *); > static void vtpci_config_intr(void *); >+static void vtpci_setup_sysctl(struct vtpci_common *); > >-#define vtpci_setup_msi_interrupt vtpci_setup_legacy_interrupt >+#define vtpci_setup_msi_interrupt vtpci_setup_intx_interrupt > >-#define VIRTIO_PCI_CONFIG(_sc) \ >- VIRTIO_PCI_CONFIG_OFF((((_sc)->vtpci_flags & VTPCI_FLAG_MSIX)) != 0) >- > /* >- * I/O port read/write wrappers. >+ * This module contains two drivers: >+ * - virtio_pci_legacy (vtpcil) for pre-V1 support >+ * - virtio_pci_modern (vtpcim) for V1 support > */ >-#define vtpci_read_config_1(sc, o) bus_read_1((sc)->vtpci_res, (o)) >-#define vtpci_read_config_2(sc, o) bus_read_2((sc)->vtpci_res, (o)) >-#define vtpci_read_config_4(sc, o) bus_read_4((sc)->vtpci_res, (o)) >-#define vtpci_write_config_1(sc, o, v) bus_write_1((sc)->vtpci_res, (o), (v)) >-#define vtpci_write_config_2(sc, o, v) bus_write_2((sc)->vtpci_res, (o), (v)) >-#define vtpci_write_config_4(sc, o, v) bus_write_4((sc)->vtpci_res, (o), (v)) >- >-/* Tunables. */ >-static int vtpci_disable_msix = 0; >-TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix); >- >-static device_method_t vtpci_methods[] = { >- /* Device interface. */ >- DEVMETHOD(device_probe, vtpci_probe), >- DEVMETHOD(device_attach, vtpci_attach), >- DEVMETHOD(device_detach, vtpci_detach), >- DEVMETHOD(device_suspend, vtpci_suspend), >- DEVMETHOD(device_resume, vtpci_resume), >- DEVMETHOD(device_shutdown, vtpci_shutdown), >- >- /* Bus interface. */ >- DEVMETHOD(bus_driver_added, vtpci_driver_added), >- DEVMETHOD(bus_child_detached, vtpci_child_detached), >- DEVMETHOD(bus_read_ivar, vtpci_read_ivar), >- DEVMETHOD(bus_write_ivar, vtpci_write_ivar), >- >- /* VirtIO bus interface. */ >- DEVMETHOD(virtio_bus_negotiate_features, vtpci_negotiate_features), >- DEVMETHOD(virtio_bus_with_feature, vtpci_with_feature), >- DEVMETHOD(virtio_bus_alloc_virtqueues, vtpci_alloc_virtqueues), >- DEVMETHOD(virtio_bus_setup_intr, vtpci_setup_intr), >- DEVMETHOD(virtio_bus_stop, vtpci_stop), >- DEVMETHOD(virtio_bus_reinit, vtpci_reinit), >- DEVMETHOD(virtio_bus_reinit_complete, vtpci_reinit_complete), >- DEVMETHOD(virtio_bus_notify_vq, vtpci_notify_virtqueue), >- DEVMETHOD(virtio_bus_read_device_config, vtpci_read_dev_config), >- DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config), >- >- DEVMETHOD_END >-}; >- >-static driver_t vtpci_driver = { >- "virtio_pci", >- vtpci_methods, >- sizeof(struct vtpci_softc) >-}; >- >-devclass_t vtpci_devclass; >- >-DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, 0, 0); > MODULE_VERSION(virtio_pci, 1); > MODULE_DEPEND(virtio_pci, pci, 1, 1, 1); > MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1); > >-static int >-vtpci_probe(device_t dev) >+int vtpci_disable_msix = 0; >+TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix); >+ >+static uint8_t >+vtpci_read_isr(struct vtpci_common *cn) > { >- char desc[36]; >- const char *name; >+ return (VIRTIO_PCI_READ_ISR(cn->vtpci_dev)); >+} > >- if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID) >- return (ENXIO); >+static uint16_t >+vtpci_get_vq_size(struct vtpci_common *cn, int idx) >+{ >+ return (VIRTIO_PCI_GET_VQ_SIZE(cn->vtpci_dev, idx)); >+} > >- if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN || >- pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX) >- return (ENXIO); >+static bus_size_t >+vtpci_get_vq_notify_off(struct vtpci_common *cn, int idx) >+{ >+ return (VIRTIO_PCI_GET_VQ_NOTIFY_OFF(cn->vtpci_dev, idx)); >+} > >- if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION) >- return (ENXIO); >+static void >+vtpci_set_vq(struct vtpci_common *cn, struct virtqueue *vq) >+{ >+ VIRTIO_PCI_SET_VQ(cn->vtpci_dev, vq); >+} > >- name = virtio_device_name(pci_get_subdevice(dev)); >- if (name == NULL) >- name = "Unknown"; >+static void >+vtpci_disable_vq(struct vtpci_common *cn, int idx) >+{ >+ VIRTIO_PCI_DISABLE_VQ(cn->vtpci_dev, idx); >+} > >- snprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name); >- device_set_desc_copy(dev, desc); >- >- return (BUS_PROBE_DEFAULT); >+static int >+vtpci_register_cfg_msix(struct vtpci_common *cn, struct vtpci_interrupt *intr) >+{ >+ return (VIRTIO_PCI_REGISTER_CFG_MSIX(cn->vtpci_dev, intr)); > } > > static int >-vtpci_attach(device_t dev) >+vtpci_register_vq_msix(struct vtpci_common *cn, int idx, >+ struct vtpci_interrupt *intr) > { >- struct vtpci_softc *sc; >- device_t child; >- int rid; >+ return (VIRTIO_PCI_REGISTER_VQ_MSIX(cn->vtpci_dev, idx, intr)); >+} > >- sc = device_get_softc(dev); >- sc->vtpci_dev = dev; >+void >+vtpci_init(struct vtpci_common *cn, device_t dev, bool modern) >+{ > >+ cn->vtpci_dev = dev; >+ > pci_enable_busmaster(dev); > >- rid = PCIR_BAR(0); >- sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, >- RF_ACTIVE); >- if (sc->vtpci_res == NULL) { >- device_printf(dev, "cannot map I/O space\n"); >- return (ENXIO); >- } >- >+ if (modern) >+ cn->vtpci_flags |= VTPCI_FLAG_MODERN; > if (pci_find_cap(dev, PCIY_MSI, NULL) != 0) >- sc->vtpci_flags |= VTPCI_FLAG_NO_MSI; >+ cn->vtpci_flags |= VTPCI_FLAG_NO_MSI; >+ if (pci_find_cap(dev, PCIY_MSIX, NULL) != 0) >+ cn->vtpci_flags |= VTPCI_FLAG_NO_MSIX; > >- if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) { >- rid = PCIR_BAR(1); >- sc->vtpci_msix_res = bus_alloc_resource_any(dev, >- SYS_RES_MEMORY, &rid, RF_ACTIVE); >- } >+ vtpci_setup_sysctl(cn); >+} > >- if (sc->vtpci_msix_res == NULL) >- sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX; >+int >+vtpci_add_child(struct vtpci_common *cn) >+{ >+ device_t dev, child; > >- vtpci_reset(sc); >+ dev = cn->vtpci_dev; > >- /* Tell the host we've noticed this device. */ >- vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK); >- >- if ((child = device_add_child(dev, NULL, -1)) == NULL) { >+ child = device_add_child(dev, NULL, -1); >+ if (child == NULL) { > device_printf(dev, "cannot create child device\n"); >- vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED); >- vtpci_detach(dev); > return (ENOMEM); > } > >- sc->vtpci_child_dev = child; >- vtpci_probe_and_attach_child(sc); >+ cn->vtpci_child_dev = child; > > return (0); > } > >-static int >-vtpci_detach(device_t dev) >+int >+vtpci_delete_child(struct vtpci_common *cn) > { >- struct vtpci_softc *sc; >- device_t child; >+ device_t dev, child; > int error; > >- sc = device_get_softc(dev); >+ dev = cn->vtpci_dev; > >- if ((child = sc->vtpci_child_dev) != NULL) { >+ child = cn->vtpci_child_dev; >+ if (child != NULL) { > error = device_delete_child(dev, child); > if (error) > return (error); >- sc->vtpci_child_dev = NULL; >+ cn->vtpci_child_dev = NULL; > } > >- vtpci_reset(sc); >- >- if (sc->vtpci_msix_res != NULL) { >- bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1), >- sc->vtpci_msix_res); >- sc->vtpci_msix_res = NULL; >- } >- >- if (sc->vtpci_res != NULL) { >- bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0), >- sc->vtpci_res); >- sc->vtpci_res = NULL; >- } >- > return (0); > } > >-static int >-vtpci_suspend(device_t dev) >+void >+vtpci_child_detached(struct vtpci_common *cn) > { > >- return (bus_generic_suspend(dev)); >-} >+ vtpci_release_child_resources(cn); > >-static int >-vtpci_resume(device_t dev) >-{ >- >- return (bus_generic_resume(dev)); >+ cn->vtpci_child_feat_desc = NULL; >+ cn->vtpci_host_features = 0; >+ cn->vtpci_features = 0; > } > >-static int >-vtpci_shutdown(device_t dev) >+int >+vtpci_reinit(struct vtpci_common *cn) > { >+ int idx, error; > >- (void) bus_generic_shutdown(dev); >- /* Forcibly stop the host device. */ >- vtpci_stop(dev); >+ for (idx = 0; idx < cn->vtpci_nvqs; idx++) { >+ error = vtpci_reinit_virtqueue(cn, idx); >+ if (error) >+ return (error); >+ } > >+ if (vtpci_is_msix_enabled(cn)) { >+ error = vtpci_set_host_msix_vectors(cn); >+ if (error) >+ return (error); >+ } >+ > return (0); > } > > static void >-vtpci_driver_added(device_t dev, driver_t *driver) >+vtpci_describe_features(struct vtpci_common *cn, const char *msg, >+ uint64_t features) > { >- struct vtpci_softc *sc; >+ device_t dev, child; > >- sc = device_get_softc(dev); >+ dev = cn->vtpci_dev; >+ child = cn->vtpci_child_dev; > >- vtpci_probe_and_attach_child(sc); >+ if (device_is_attached(child) || bootverbose == 0) >+ return; >+ >+ virtio_describe(dev, msg, features, cn->vtpci_child_feat_desc); > } > >-static void >-vtpci_child_detached(device_t dev, device_t child) >+uint64_t >+vtpci_negotiate_features(struct vtpci_common *cn, >+ uint64_t child_features, uint64_t host_features) > { >- struct vtpci_softc *sc; >+ uint64_t features; > >- sc = device_get_softc(dev); >+ cn->vtpci_host_features = host_features; >+ vtpci_describe_features(cn, "host", host_features); > >- vtpci_reset(sc); >- vtpci_release_child_resources(sc); >+ /* >+ * Limit negotiated features to what the driver, virtqueue, and >+ * host all support. >+ */ >+ features = host_features & child_features; >+ features = virtio_filter_transport_features(features); >+ >+ cn->vtpci_features = features; >+ vtpci_describe_features(cn, "negotiated", features); >+ >+ return (features); > } > >-static int >-vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result) >+int >+vtpci_with_feature(struct vtpci_common *cn, uint64_t feature) > { >- struct vtpci_softc *sc; >+ return ((cn->vtpci_features & feature) != 0); >+} > >- sc = device_get_softc(dev); >+int >+vtpci_read_ivar(struct vtpci_common *cn, int index, uintptr_t *result) >+{ >+ device_t dev; >+ int error; > >- if (sc->vtpci_child_dev != child) >- return (ENOENT); >+ dev = cn->vtpci_dev; >+ error = 0; > > switch (index) { >- case VIRTIO_IVAR_DEVTYPE: > case VIRTIO_IVAR_SUBDEVICE: > *result = pci_get_subdevice(dev); > break; >@@ -410,100 +298,74 @@ > case VIRTIO_IVAR_SUBVENDOR: > *result = pci_get_subdevice(dev); > break; >+ case VIRTIO_IVAR_MODERN: >+ *result = vtpci_is_modern(cn); >+ break; > default: >- return (ENOENT); >+ error = ENOENT; > } > >- return (0); >+ return (error); > } > >-static int >-vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value) >+int >+vtpci_write_ivar(struct vtpci_common *cn, int index, uintptr_t value) > { >- struct vtpci_softc *sc; >+ int error; > >- sc = device_get_softc(dev); >+ error = 0; > >- if (sc->vtpci_child_dev != child) >- return (ENOENT); >- > switch (index) { > case VIRTIO_IVAR_FEATURE_DESC: >- sc->vtpci_child_feat_desc = (void *) value; >+ cn->vtpci_child_feat_desc = (void *) value; > break; > default: >- return (ENOENT); >+ error = ENOENT; > } > >- return (0); >+ return (error); > } > >-static uint64_t >-vtpci_negotiate_features(device_t dev, uint64_t child_features) >+int >+vtpci_alloc_virtqueues(struct vtpci_common *cn, int flags, int nvqs, >+ struct vq_alloc_info *vq_info) > { >- struct vtpci_softc *sc; >- uint64_t host_features, features; >+ device_t dev; >+ int idx, align, error; > >- sc = device_get_softc(dev); >+ dev = cn->vtpci_dev; > >- host_features = vtpci_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES); >- vtpci_describe_features(sc, "host", host_features); >- > /* >- * Limit negotiated features to what the driver, virtqueue, and >- * host all support. >+ * This is VIRTIO_PCI_VRING_ALIGN from legacy VirtIO. In modern VirtIO, >+ * the tables do not have to be allocated contiguously, but we do so >+ * anyways. > */ >- features = host_features & child_features; >- features = virtqueue_filter_features(features); >- sc->vtpci_features = features; >+ align = 4096; > >- vtpci_describe_features(sc, "negotiated", features); >- vtpci_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features); >- >- return (features); >-} >- >-static int >-vtpci_with_feature(device_t dev, uint64_t feature) >-{ >- struct vtpci_softc *sc; >- >- sc = device_get_softc(dev); >- >- return ((sc->vtpci_features & feature) != 0); >-} >- >-static int >-vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs, >- struct vq_alloc_info *vq_info) >-{ >- struct vtpci_softc *sc; >- struct virtqueue *vq; >- struct vtpci_virtqueue *vqx; >- struct vq_alloc_info *info; >- int idx, error; >- uint16_t size; >- >- sc = device_get_softc(dev); >- >- if (sc->vtpci_nvqs != 0) >+ if (cn->vtpci_nvqs != 0) > return (EALREADY); > if (nvqs <= 0) > return (EINVAL); > >- sc->vtpci_vqs = malloc(nvqs * sizeof(struct vtpci_virtqueue), >+ cn->vtpci_vqs = malloc(nvqs * sizeof(struct vtpci_virtqueue), > M_DEVBUF, M_NOWAIT | M_ZERO); >- if (sc->vtpci_vqs == NULL) >+ if (cn->vtpci_vqs == NULL) > return (ENOMEM); > > for (idx = 0; idx < nvqs; idx++) { >- vqx = &sc->vtpci_vqs[idx]; >+ struct vtpci_virtqueue *vqx; >+ struct vq_alloc_info *info; >+ struct virtqueue *vq; >+ bus_size_t notify_offset; >+ uint16_t size; >+ >+ vqx = &cn->vtpci_vqs[idx]; > info = &vq_info[idx]; > >- vtpci_select_virtqueue(sc, idx); >- size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); >+ size = vtpci_get_vq_size(cn, idx); >+ notify_offset = vtpci_get_vq_notify_off(cn, idx); > >- error = virtqueue_alloc(dev, idx, size, VIRTIO_PCI_VRING_ALIGN, >+ error = virtqueue_alloc(dev, idx, size, notify_offset, align, > 0xFFFFFFFFUL, info, &vq); > if (error) { > device_printf(dev, >@@ -511,270 +373,27 @@ > break; > } > >- vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, >- virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); >+ vtpci_set_vq(cn, vq); > > vqx->vtv_vq = *info->vqai_vq = vq; > vqx->vtv_no_intr = info->vqai_intr == NULL; > >- sc->vtpci_nvqs++; >+ cn->vtpci_nvqs++; > } > > if (error) >- vtpci_free_virtqueues(sc); >+ vtpci_free_virtqueues(cn); > > return (error); > } > > static int >-vtpci_setup_intr(device_t dev, enum intr_type type) >+vtpci_alloc_msix(struct vtpci_common *cn, int nvectors) > { >- struct vtpci_softc *sc; >- int attempt, error; >- >- sc = device_get_softc(dev); >- >- for (attempt = 0; attempt < 5; attempt++) { >- /* >- * Start with the most desirable interrupt configuration and >- * fallback towards less desirable ones. >- */ >- switch (attempt) { >- case 0: >- error = vtpci_alloc_intr_msix_pervq(sc); >- break; >- case 1: >- error = vtpci_alloc_intr_msix_shared(sc); >- break; >- case 2: >- error = vtpci_alloc_intr_msi(sc); >- break; >- case 3: >- error = vtpci_alloc_intr_legacy(sc); >- break; >- default: >- device_printf(dev, >- "exhausted all interrupt allocation attempts\n"); >- return (ENXIO); >- } >- >- if (error == 0 && vtpci_setup_interrupts(sc, type) == 0) >- break; >- >- vtpci_cleanup_setup_intr_attempt(sc); >- } >- >- if (bootverbose) { >- if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) >- device_printf(dev, "using legacy interrupt\n"); >- else if (sc->vtpci_flags & VTPCI_FLAG_MSI) >- device_printf(dev, "using MSI interrupt\n"); >- else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) >- device_printf(dev, "using shared MSIX interrupts\n"); >- else >- device_printf(dev, "using per VQ MSIX interrupts\n"); >- } >- >- return (0); >-} >- >-static void >-vtpci_stop(device_t dev) >-{ >- >- vtpci_reset(device_get_softc(dev)); >-} >- >-static int >-vtpci_reinit(device_t dev, uint64_t features) >-{ >- struct vtpci_softc *sc; >- int idx, error; >- >- sc = device_get_softc(dev); >- >- /* >- * Redrive the device initialization. This is a bit of an abuse of >- * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to >- * play nice. >- * >- * We do not allow the host device to change from what was originally >- * negotiated beyond what the guest driver changed. MSIX state should >- * not change, number of virtqueues and their size remain the same, etc. >- * This will need to be rethought when we want to support migration. >- */ >- >- if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET) >- vtpci_stop(dev); >- >- /* >- * Quickly drive the status through ACK and DRIVER. The device >- * does not become usable again until vtpci_reinit_complete(). >- */ >- vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK); >- vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER); >- >- vtpci_negotiate_features(dev, features); >- >- for (idx = 0; idx < sc->vtpci_nvqs; idx++) { >- error = vtpci_reinit_virtqueue(sc, idx); >- if (error) >- return (error); >- } >- >- if (sc->vtpci_flags & VTPCI_FLAG_MSIX) { >- error = vtpci_set_host_msix_vectors(sc); >- if (error) >- return (error); >- } >- >- return (0); >-} >- >-static void >-vtpci_reinit_complete(device_t dev) >-{ >- >- vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK); >-} >- >-static void >-vtpci_notify_virtqueue(device_t dev, uint16_t queue) >-{ >- struct vtpci_softc *sc; >- >- sc = device_get_softc(dev); >- >- vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue); >-} >- >-static uint8_t >-vtpci_get_status(device_t dev) >-{ >- struct vtpci_softc *sc; >- >- sc = device_get_softc(dev); >- >- return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS)); >-} >- >-static void >-vtpci_set_status(device_t dev, uint8_t status) >-{ >- struct vtpci_softc *sc; >- >- sc = device_get_softc(dev); >- >- if (status != VIRTIO_CONFIG_STATUS_RESET) >- status |= vtpci_get_status(dev); >- >- vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status); >-} >- >-static void >-vtpci_read_dev_config(device_t dev, bus_size_t offset, >- void *dst, int length) >-{ >- struct vtpci_softc *sc; >- bus_size_t off; >- uint8_t *d; >- int size; >- >- sc = device_get_softc(dev); >- off = VIRTIO_PCI_CONFIG(sc) + offset; >- >- for (d = dst; length > 0; d += size, off += size, length -= size) { >- if (length >= 4) { >- size = 4; >- *(uint32_t *)d = vtpci_read_config_4(sc, off); >- } else if (length >= 2) { >- size = 2; >- *(uint16_t *)d = vtpci_read_config_2(sc, off); >- } else { >- size = 1; >- *d = vtpci_read_config_1(sc, off); >- } >- } >-} >- >-static void >-vtpci_write_dev_config(device_t dev, bus_size_t offset, >- void *src, int length) >-{ >- struct vtpci_softc *sc; >- bus_size_t off; >- uint8_t *s; >- int size; >- >- sc = device_get_softc(dev); >- off = VIRTIO_PCI_CONFIG(sc) + offset; >- >- for (s = src; length > 0; s += size, off += size, length -= size) { >- if (length >= 4) { >- size = 4; >- vtpci_write_config_4(sc, off, *(uint32_t *)s); >- } else if (length >= 2) { >- size = 2; >- vtpci_write_config_2(sc, off, *(uint16_t *)s); >- } else { >- size = 1; >- vtpci_write_config_1(sc, off, *s); >- } >- } >-} >- >-static void >-vtpci_describe_features(struct vtpci_softc *sc, const char *msg, >- uint64_t features) >-{ >- device_t dev, child; >- >- dev = sc->vtpci_dev; >- child = sc->vtpci_child_dev; >- >- if (device_is_attached(child) || bootverbose == 0) >- return; >- >- virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc); >-} >- >-static void >-vtpci_probe_and_attach_child(struct vtpci_softc *sc) >-{ >- device_t dev, child; >- >- dev = sc->vtpci_dev; >- child = sc->vtpci_child_dev; >- >- if (child == NULL) >- return; >- >- if (device_get_state(child) != DS_NOTPRESENT) >- return; >- >- if (device_probe(child) != 0) >- return; >- >- vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER); >- if (device_attach(child) != 0) { >- vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED); >- vtpci_reset(sc); >- vtpci_release_child_resources(sc); >- /* Reset status for future attempt. */ >- vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK); >- } else { >- vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK); >- VIRTIO_ATTACH_COMPLETED(child); >- } >-} >- >-static int >-vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors) >-{ > device_t dev; > int nmsix, cnt, required; > >- dev = sc->vtpci_dev; >+ dev = cn->vtpci_dev; > > /* Allocate an additional vector for the config changes. */ > required = nvectors + 1; >@@ -785,7 +404,7 @@ > > cnt = required; > if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) { >- sc->vtpci_nmsix_resources = required; >+ cn->vtpci_nmsix_resources = required; > return (0); > } > >@@ -795,12 +414,12 @@ > } > > static int >-vtpci_alloc_msi(struct vtpci_softc *sc) >+vtpci_alloc_msi(struct vtpci_common *cn) > { > device_t dev; > int nmsi, cnt, required; > >- dev = sc->vtpci_dev; >+ dev = cn->vtpci_dev; > required = 1; > > nmsi = pci_msi_count(dev); >@@ -817,80 +436,78 @@ > } > > static int >-vtpci_alloc_intr_msix_pervq(struct vtpci_softc *sc) >+vtpci_alloc_intr_msix_pervq(struct vtpci_common *cn) > { > int i, nvectors, error; > >- if (vtpci_disable_msix != 0 || >- sc->vtpci_flags & VTPCI_FLAG_NO_MSIX) >+ if (vtpci_disable_msix != 0 || cn->vtpci_flags & VTPCI_FLAG_NO_MSIX) > return (ENOTSUP); > >- for (nvectors = 0, i = 0; i < sc->vtpci_nvqs; i++) { >- if (sc->vtpci_vqs[i].vtv_no_intr == 0) >+ for (nvectors = 0, i = 0; i < cn->vtpci_nvqs; i++) { >+ if (cn->vtpci_vqs[i].vtv_no_intr == 0) > nvectors++; > } > >- error = vtpci_alloc_msix(sc, nvectors); >+ error = vtpci_alloc_msix(cn, nvectors); > if (error) > return (error); > >- sc->vtpci_flags |= VTPCI_FLAG_MSIX; >+ cn->vtpci_flags |= VTPCI_FLAG_MSIX; > > return (0); > } > > static int >-vtpci_alloc_intr_msix_shared(struct vtpci_softc *sc) >+vtpci_alloc_intr_msix_shared(struct vtpci_common *cn) > { > int error; > >- if (vtpci_disable_msix != 0 || >- sc->vtpci_flags & VTPCI_FLAG_NO_MSIX) >+ if (vtpci_disable_msix != 0 || cn->vtpci_flags & VTPCI_FLAG_NO_MSIX) > return (ENOTSUP); > >- error = vtpci_alloc_msix(sc, 1); >+ error = vtpci_alloc_msix(cn, 1); > if (error) > return (error); > >- sc->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX; >+ cn->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX; > > return (0); > } > > static int >-vtpci_alloc_intr_msi(struct vtpci_softc *sc) >+vtpci_alloc_intr_msi(struct vtpci_common *cn) > { > int error; > > /* Only BHyVe supports MSI. */ >- if (sc->vtpci_flags & VTPCI_FLAG_NO_MSI) >+ if (cn->vtpci_flags & VTPCI_FLAG_NO_MSI) > return (ENOTSUP); > >- error = vtpci_alloc_msi(sc); >+ error = vtpci_alloc_msi(cn); > if (error) > return (error); > >- sc->vtpci_flags |= VTPCI_FLAG_MSI; >+ cn->vtpci_flags |= VTPCI_FLAG_MSI; > > return (0); > } > > static int >-vtpci_alloc_intr_legacy(struct vtpci_softc *sc) >+vtpci_alloc_intr_intx(struct vtpci_common *cn) > { > >- sc->vtpci_flags |= VTPCI_FLAG_LEGACY; >+ cn->vtpci_flags |= VTPCI_FLAG_INTX; > > return (0); > } > > static int >-vtpci_alloc_interrupt(struct vtpci_softc *sc, int rid, int flags, >+vtpci_alloc_interrupt(struct vtpci_common *cn, int rid, int flags, > struct vtpci_interrupt *intr) > { > struct resource *irq; > >- irq = bus_alloc_resource_any(sc->vtpci_dev, SYS_RES_IRQ, &rid, flags); >+ irq = bus_alloc_resource_any(cn->vtpci_dev, SYS_RES_IRQ, &rid, flags); > if (irq == NULL) > return (ENXIO); > >@@ -900,40 +517,136 @@ > return (0); > } > >+static void >+vtpci_free_interrupt(struct vtpci_common *cn, struct vtpci_interrupt *intr) >+{ >+ device_t dev; >+ >+ dev = cn->vtpci_dev; >+ >+ if (intr->vti_handler != NULL) { >+ bus_teardown_intr(dev, intr->vti_irq, intr->vti_handler); >+ intr->vti_handler = NULL; >+ } >+ >+ if (intr->vti_irq != NULL) { >+ bus_release_resource(dev, SYS_RES_IRQ, intr->vti_rid, >+ intr->vti_irq); >+ intr->vti_irq = NULL; >+ intr->vti_rid = -1; >+ } >+} >+ >+static void >+vtpci_free_interrupts(struct vtpci_common *cn) >+{ >+ struct vtpci_interrupt *intr; >+ int i, nvq_intrs; >+ >+ vtpci_free_interrupt(cn, &cn->vtpci_device_interrupt); >+ >+ if (cn->vtpci_nmsix_resources != 0) { >+ nvq_intrs = cn->vtpci_nmsix_resources - 1; >+ cn->vtpci_nmsix_resources = 0; >+ >+ if ((intr = cn->vtpci_msix_vq_interrupts) != NULL) { >+ for (i = 0; i < nvq_intrs; i++, intr++) >+ vtpci_free_interrupt(cn, intr); >+ >+ free(cn->vtpci_msix_vq_interrupts, M_DEVBUF); >+ cn->vtpci_msix_vq_interrupts = NULL; >+ } >+ } >+ >+ if (cn->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX)) >+ pci_release_msi(cn->vtpci_dev); >+ >+ cn->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK; >+} >+ >+static void >+vtpci_free_virtqueues(struct vtpci_common *cn) >+{ >+ struct vtpci_virtqueue *vqx; >+ int idx; >+ >+ for (idx = 0; idx < cn->vtpci_nvqs; idx++) { >+ vtpci_disable_vq(cn, idx); >+ >+ vqx = &cn->vtpci_vqs[idx]; >+ virtqueue_free(vqx->vtv_vq); >+ vqx->vtv_vq = NULL; >+ } >+ >+ free(cn->vtpci_vqs, M_DEVBUF); >+ cn->vtpci_vqs = NULL; >+ cn->vtpci_nvqs = 0; >+} >+ >+void >+vtpci_release_child_resources(struct vtpci_common *cn) >+{ >+ >+ vtpci_free_interrupts(cn); >+ vtpci_free_virtqueues(cn); >+} >+ >+static void >+vtpci_cleanup_setup_intr_attempt(struct vtpci_common *cn) >+{ >+ int idx; >+ >+ if (cn->vtpci_flags & VTPCI_FLAG_MSIX) { >+ vtpci_register_cfg_msix(cn, NULL); >+ >+ for (idx = 0; idx < cn->vtpci_nvqs; idx++) >+ vtpci_register_vq_msix(cn, idx, NULL); >+ } >+ >+ vtpci_free_interrupts(cn); >+} >+ > static int >-vtpci_alloc_intr_resources(struct vtpci_softc *sc) >+vtpci_alloc_intr_resources(struct vtpci_common *cn) > { > struct vtpci_interrupt *intr; > int i, rid, flags, nvq_intrs, error; > >- rid = 0; > flags = RF_ACTIVE; > >- if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) >+ if (cn->vtpci_flags & VTPCI_FLAG_INTX) { >+ rid = 0; > flags |= RF_SHAREABLE; >- else >+ } else > rid = 1; > > /* >- * For legacy and MSI interrupts, this single resource handles all >- * interrupts. For MSIX, this resource is used for the configuration >- * changed interrupt. >+ * When using INTX or MSI interrupts, this resource handles all >+ * interrupts. When using MSIX, this resource handles just the >+ * configuration changed interrupt. > */ >- intr = &sc->vtpci_device_interrupt; >- error = vtpci_alloc_interrupt(sc, rid, flags, intr); >- if (error || sc->vtpci_flags & (VTPCI_FLAG_LEGACY | VTPCI_FLAG_MSI)) >+ intr = &cn->vtpci_device_interrupt; >+ >+ error = vtpci_alloc_interrupt(cn, rid, flags, intr); >+ if (error || cn->vtpci_flags & (VTPCI_FLAG_INTX | VTPCI_FLAG_MSI)) > return (error); > >- /* Subtract one for the configuration changed interrupt. */ >- nvq_intrs = sc->vtpci_nmsix_resources - 1; >+ /* >+ * Now allocate the interrupts for the virtqueues. This may be one >+ * for all the virtqueues, or one for each virtqueue. Subtract one >+ * below for because of the configuration changed interrupt. >+ */ >+ nvq_intrs = cn->vtpci_nmsix_resources - 1; > >- intr = sc->vtpci_msix_vq_interrupts = malloc(nvq_intrs * >+ cn->vtpci_msix_vq_interrupts = malloc(nvq_intrs * > sizeof(struct vtpci_interrupt), M_DEVBUF, M_NOWAIT | M_ZERO); >- if (sc->vtpci_msix_vq_interrupts == NULL) >+ if (cn->vtpci_msix_vq_interrupts == NULL) > return (ENOMEM); > >+ intr = cn->vtpci_msix_vq_interrupts; >+ > for (i = 0, rid++; i < nvq_intrs; i++, rid++, intr++) { >- error = vtpci_alloc_interrupt(sc, rid, flags, intr); >+ error = vtpci_alloc_interrupt(cn, rid, flags, intr); > if (error) > return (error); > } >@@ -942,34 +655,35 @@ > } > > static int >-vtpci_setup_legacy_interrupt(struct vtpci_softc *sc, enum intr_type type) >+vtpci_setup_intx_interrupt(struct vtpci_common *cn, enum intr_type type) > { > struct vtpci_interrupt *intr; > int error; > >- intr = &sc->vtpci_device_interrupt; >- error = bus_setup_intr(sc->vtpci_dev, intr->vti_irq, type, NULL, >- vtpci_legacy_intr, sc, &intr->vti_handler); >+ intr = &cn->vtpci_device_interrupt; > >+ error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, NULL, >+ vtpci_intx_intr, cn, &intr->vti_handler); >+ > return (error); > } > > static int >-vtpci_setup_pervq_msix_interrupts(struct vtpci_softc *sc, enum intr_type type) >+vtpci_setup_pervq_msix_interrupts(struct vtpci_common *cn, enum intr_type type) > { > struct vtpci_virtqueue *vqx; > struct vtpci_interrupt *intr; > int i, error; > >- intr = sc->vtpci_msix_vq_interrupts; >+ intr = cn->vtpci_msix_vq_interrupts; > >- for (i = 0; i < sc->vtpci_nvqs; i++) { >- vqx = &sc->vtpci_vqs[i]; >+ for (i = 0; i < cn->vtpci_nvqs; i++) { >+ vqx = &cn->vtpci_vqs[i]; > > if (vqx->vtv_no_intr) > continue; > >- error = bus_setup_intr(sc->vtpci_dev, intr->vti_irq, type, >+ error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, > vtpci_vq_intr_filter, vtpci_vq_intr, vqx->vtv_vq, > &intr->vti_handler); > if (error) >@@ -982,106 +696,24 @@ > } > > static int >-vtpci_setup_msix_interrupts(struct vtpci_softc *sc, enum intr_type type) >+vtpci_set_host_msix_vectors(struct vtpci_common *cn) > { >- device_t dev; >- struct vtpci_interrupt *intr; >- int error; >- >- dev = sc->vtpci_dev; >- intr = &sc->vtpci_device_interrupt; >- >- error = bus_setup_intr(dev, intr->vti_irq, type, NULL, >- vtpci_config_intr, sc, &intr->vti_handler); >- if (error) >- return (error); >- >- if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) { >- intr = sc->vtpci_msix_vq_interrupts; >- error = bus_setup_intr(dev, intr->vti_irq, type, >- vtpci_vq_shared_intr_filter, vtpci_vq_shared_intr, sc, >- &intr->vti_handler); >- } else >- error = vtpci_setup_pervq_msix_interrupts(sc, type); >- >- return (error ? error : vtpci_set_host_msix_vectors(sc)); >-} >- >-static int >-vtpci_setup_interrupts(struct vtpci_softc *sc, enum intr_type type) >-{ >- int error; >- >- type |= INTR_MPSAFE; >- KASSERT(sc->vtpci_flags & VTPCI_FLAG_ITYPE_MASK, >- ("%s: no interrupt type selected %#x", __func__, sc->vtpci_flags)); >- >- error = vtpci_alloc_intr_resources(sc); >- if (error) >- return (error); >- >- if (sc->vtpci_flags & VTPCI_FLAG_LEGACY) >- error = vtpci_setup_legacy_interrupt(sc, type); >- else if (sc->vtpci_flags & VTPCI_FLAG_MSI) >- error = vtpci_setup_msi_interrupt(sc, type); >- else >- error = vtpci_setup_msix_interrupts(sc, type); >- >- return (error); >-} >- >-static int >-vtpci_register_msix_vector(struct vtpci_softc *sc, int offset, >- struct vtpci_interrupt *intr) >-{ >- device_t dev; >- uint16_t vector; >- >- dev = sc->vtpci_dev; >- >- if (intr != NULL) { >- /* Map from guest rid to host vector. */ >- vector = intr->vti_rid - 1; >- } else >- vector = VIRTIO_MSI_NO_VECTOR; >- >- vtpci_write_config_2(sc, offset, vector); >- >- /* Read vector to determine if the host had sufficient resources. */ >- if (vtpci_read_config_2(sc, offset) != vector) { >- device_printf(dev, >- "insufficient host resources for MSIX interrupts\n"); >- return (ENODEV); >- } >- >- return (0); >-} >- >-static int >-vtpci_set_host_msix_vectors(struct vtpci_softc *sc) >-{ > struct vtpci_interrupt *intr, *tintr; >- int idx, offset, error; >+ int idx, error; > >- intr = &sc->vtpci_device_interrupt; >- offset = VIRTIO_MSI_CONFIG_VECTOR; >- >- error = vtpci_register_msix_vector(sc, offset, intr); >+ intr = &cn->vtpci_device_interrupt; >+ error = vtpci_register_cfg_msix(cn, intr); > if (error) > return (error); > >- intr = sc->vtpci_msix_vq_interrupts; >- offset = VIRTIO_MSI_QUEUE_VECTOR; >- >- for (idx = 0; idx < sc->vtpci_nvqs; idx++) { >- vtpci_select_virtqueue(sc, idx); >- >- if (sc->vtpci_vqs[idx].vtv_no_intr) >+ intr = cn->vtpci_msix_vq_interrupts; >+ for (idx = 0; idx < cn->vtpci_nvqs; idx++) { >+ if (cn->vtpci_vqs[idx].vtv_no_intr) > tintr = NULL; > else > tintr = intr; > >- error = vtpci_register_msix_vector(sc, offset, tintr); >+ error = vtpci_register_vq_msix(cn, idx, tintr); > if (error) > break; > >@@ -1089,8 +721,8 @@ > * For shared MSIX, all the virtqueues share the first > * interrupt. > */ >- if (!sc->vtpci_vqs[idx].vtv_no_intr && >- (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) == 0) >+ if (!cn->vtpci_vqs[idx].vtv_no_intr && >+ (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) == 0) > intr++; > } > >@@ -1098,164 +730,141 @@ > } > > static int >-vtpci_reinit_virtqueue(struct vtpci_softc *sc, int idx) >+vtpci_setup_msix_interrupts(struct vtpci_common *cn, enum intr_type type) > { >- struct vtpci_virtqueue *vqx; >- struct virtqueue *vq; >+ struct vtpci_interrupt *intr; > int error; >- uint16_t size; > >- vqx = &sc->vtpci_vqs[idx]; >- vq = vqx->vtv_vq; >+ intr = &cn->vtpci_device_interrupt; > >- KASSERT(vq != NULL, ("%s: vq %d not allocated", __func__, idx)); >- >- vtpci_select_virtqueue(sc, idx); >- size = vtpci_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM); >- >- error = virtqueue_reinit(vq, size); >+ error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, NULL, >+ vtpci_config_intr, cn, &intr->vti_handler); > if (error) > return (error); > >- vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, >- virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); >+ if (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) { >+ intr = &cn->vtpci_msix_vq_interrupts[0]; > >- return (0); >+ error = bus_setup_intr(cn->vtpci_dev, intr->vti_irq, type, >+ vtpci_vq_shared_intr_filter, vtpci_vq_shared_intr, cn, >+ &intr->vti_handler); >+ } else >+ error = vtpci_setup_pervq_msix_interrupts(cn, type); >+ >+ return (error ? error : vtpci_set_host_msix_vectors(cn)); > } > >-static void >-vtpci_free_interrupt(struct vtpci_softc *sc, struct vtpci_interrupt *intr) >+static int >+vtpci_setup_intrs(struct vtpci_common *cn, enum intr_type type) > { >- device_t dev; >+ int error; > >- dev = sc->vtpci_dev; >+ type |= INTR_MPSAFE; >+ KASSERT(cn->vtpci_flags & VTPCI_FLAG_ITYPE_MASK, >+ ("%s: no interrupt type selected %#x", __func__, cn->vtpci_flags)); > >- if (intr->vti_handler != NULL) { >- bus_teardown_intr(dev, intr->vti_irq, intr->vti_handler); >- intr->vti_handler = NULL; >- } >+ error = vtpci_alloc_intr_resources(cn); >+ if (error) >+ return (error); > >- if (intr->vti_irq != NULL) { >- bus_release_resource(dev, SYS_RES_IRQ, intr->vti_rid, >- intr->vti_irq); >- intr->vti_irq = NULL; >- intr->vti_rid = -1; >- } >+ if (cn->vtpci_flags & VTPCI_FLAG_INTX) >+ error = vtpci_setup_intx_interrupt(cn, type); >+ else if (cn->vtpci_flags & VTPCI_FLAG_MSI) >+ error = vtpci_setup_msi_interrupt(cn, type); >+ else >+ error = vtpci_setup_msix_interrupts(cn, type); >+ >+ return (error); > } > >-static void >-vtpci_free_interrupts(struct vtpci_softc *sc) >+int >+vtpci_setup_interrupts(struct vtpci_common *cn, enum intr_type type) > { >- struct vtpci_interrupt *intr; >- int i, nvq_intrs; >+ device_t dev; >+ int attempt, error; > >- vtpci_free_interrupt(sc, &sc->vtpci_device_interrupt); >+ dev = cn->vtpci_dev; > >- if (sc->vtpci_nmsix_resources != 0) { >- nvq_intrs = sc->vtpci_nmsix_resources - 1; >- sc->vtpci_nmsix_resources = 0; >+ for (attempt = 0; attempt < 5; attempt++) { >+ /* >+ * Start with the most desirable interrupt configuration and >+ * fallback towards less desirable ones. >+ */ >+ switch (attempt) { >+ case 0: >+ error = vtpci_alloc_intr_msix_pervq(cn); >+ break; >+ case 1: >+ error = vtpci_alloc_intr_msix_shared(cn); >+ break; >+ case 2: >+ error = vtpci_alloc_intr_msi(cn); >+ break; >+ case 3: >+ error = vtpci_alloc_intr_intx(cn); >+ break; >+ default: >+ device_printf(dev, >+ "exhausted all interrupt allocation attempts\n"); >+ return (ENXIO); >+ } > >- intr = sc->vtpci_msix_vq_interrupts; >- if (intr != NULL) { >- for (i = 0; i < nvq_intrs; i++, intr++) >- vtpci_free_interrupt(sc, intr); >+ if (error == 0 && vtpci_setup_intrs(cn, type) == 0) >+ break; > >- free(sc->vtpci_msix_vq_interrupts, M_DEVBUF); >- sc->vtpci_msix_vq_interrupts = NULL; >- } >+ vtpci_cleanup_setup_intr_attempt(cn); > } > >- if (sc->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX)) >- pci_release_msi(sc->vtpci_dev); >+ if (bootverbose) { >+ if (cn->vtpci_flags & VTPCI_FLAG_INTX) >+ device_printf(dev, "using legacy interrupt\n"); >+ else if (cn->vtpci_flags & VTPCI_FLAG_MSI) >+ device_printf(dev, "using MSI interrupt\n"); >+ else if (cn->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) >+ device_printf(dev, "using shared MSIX interrupts\n"); >+ else >+ device_printf(dev, "using per VQ MSIX interrupts\n"); >+ } > >- sc->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK; >+ return (0); > } > >-static void >-vtpci_free_virtqueues(struct vtpci_softc *sc) >+static int >+vtpci_reinit_virtqueue(struct vtpci_common *cn, int idx) > { > struct vtpci_virtqueue *vqx; >- int idx; >+ struct virtqueue *vq; >+ int error; > >- for (idx = 0; idx < sc->vtpci_nvqs; idx++) { >- vqx = &sc->vtpci_vqs[idx]; >+ vqx = &cn->vtpci_vqs[idx]; >+ vq = vqx->vtv_vq; > >- vtpci_select_virtqueue(sc, idx); >- vtpci_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, 0); >+ KASSERT(vq != NULL, ("%s: vq %d not allocated", __func__, idx)); > >- virtqueue_free(vqx->vtv_vq); >- vqx->vtv_vq = NULL; >- } >+ error = virtqueue_reinit(vq, vtpci_get_vq_size(cn, idx)); >+ if (error == 0) >+ vtpci_set_vq(cn, vq); > >- free(sc->vtpci_vqs, M_DEVBUF); >- sc->vtpci_vqs = NULL; >- sc->vtpci_nvqs = 0; >+ return (error); > } > > static void >-vtpci_release_child_resources(struct vtpci_softc *sc) >+vtpci_intx_intr(void *xcn) > { >- >- vtpci_free_interrupts(sc); >- vtpci_free_virtqueues(sc); >-} >- >-static void >-vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *sc) >-{ >- int idx; >- >- if (sc->vtpci_flags & VTPCI_FLAG_MSIX) { >- vtpci_write_config_2(sc, VIRTIO_MSI_CONFIG_VECTOR, >- VIRTIO_MSI_NO_VECTOR); >- >- for (idx = 0; idx < sc->vtpci_nvqs; idx++) { >- vtpci_select_virtqueue(sc, idx); >- vtpci_write_config_2(sc, VIRTIO_MSI_QUEUE_VECTOR, >- VIRTIO_MSI_NO_VECTOR); >- } >- } >- >- vtpci_free_interrupts(sc); >-} >- >-static void >-vtpci_reset(struct vtpci_softc *sc) >-{ >- >- /* >- * Setting the status to RESET sets the host device to >- * the original, uninitialized state. >- */ >- vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET); >-} >- >-static void >-vtpci_select_virtqueue(struct vtpci_softc *sc, int idx) >-{ >- >- vtpci_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, idx); >-} >- >-static void >-vtpci_legacy_intr(void *xsc) >-{ >- struct vtpci_softc *sc; >+ struct vtpci_common *cn; > struct vtpci_virtqueue *vqx; > int i; > uint8_t isr; > >- sc = xsc; >- vqx = &sc->vtpci_vqs[0]; >+ cn = xcn; >+ isr = vtpci_read_isr(cn); > >- /* Reading the ISR also clears it. */ >- isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR); >- > if (isr & VIRTIO_PCI_ISR_CONFIG) >- vtpci_config_intr(sc); >+ vtpci_config_intr(cn); > > if (isr & VIRTIO_PCI_ISR_INTR) { >- for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) { >+ vqx = &cn->vtpci_vqs[0]; >+ for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) { > if (vqx->vtv_no_intr == 0) > virtqueue_intr(vqx->vtv_vq); > } >@@ -1263,17 +872,17 @@ > } > > static int >-vtpci_vq_shared_intr_filter(void *xsc) >+vtpci_vq_shared_intr_filter(void *xcn) > { >- struct vtpci_softc *sc; >+ struct vtpci_common *cn; > struct vtpci_virtqueue *vqx; > int i, rc; > >+ cn = xcn; >+ vqx = &cn->vtpci_vqs[0]; > rc = 0; >- sc = xsc; >- vqx = &sc->vtpci_vqs[0]; > >- for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) { >+ for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) { > if (vqx->vtv_no_intr == 0) > rc |= virtqueue_intr_filter(vqx->vtv_vq); > } >@@ -1282,16 +891,16 @@ > } > > static void >-vtpci_vq_shared_intr(void *xsc) >+vtpci_vq_shared_intr(void *xcn) > { >- struct vtpci_softc *sc; >+ struct vtpci_common *cn; > struct vtpci_virtqueue *vqx; > int i; > >- sc = xsc; >- vqx = &sc->vtpci_vqs[0]; >+ cn = xcn; >+ vqx = &cn->vtpci_vqs[0]; > >- for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) { >+ for (i = 0; i < cn->vtpci_nvqs; i++, vqx++) { > if (vqx->vtv_no_intr == 0) > virtqueue_intr(vqx->vtv_vq); > } >@@ -1319,14 +928,75 @@ > } > > static void >-vtpci_config_intr(void *xsc) >+vtpci_config_intr(void *xcn) > { >- struct vtpci_softc *sc; >+ struct vtpci_common *cn; > device_t child; > >- sc = xsc; >- child = sc->vtpci_child_dev; >+ cn = xcn; >+ child = cn->vtpci_child_dev; > > if (child != NULL) > VIRTIO_CONFIG_CHANGE(child); >+} >+ >+static int >+vtpci_feature_sysctl(struct sysctl_req *req, struct vtpci_common *cn, >+ uint64_t features) >+{ >+ struct sbuf *sb; >+ int error; >+ >+ sb = sbuf_new_for_sysctl(NULL, NULL, 256, req); >+ if (sb == NULL) >+ return (ENOMEM); >+ >+ error = virtio_describe_sbuf(sb, features, cn->vtpci_child_feat_desc); >+ sbuf_delete(sb); >+ >+ return (error); >+} >+ >+static int >+vtpci_host_features_sysctl(SYSCTL_HANDLER_ARGS) >+{ >+ struct vtpci_common *cn; >+ >+ cn = arg1; >+ >+ return (vtpci_feature_sysctl(req, cn, cn->vtpci_host_features)); >+} >+ >+static int >+vtpci_negotiated_features_sysctl(SYSCTL_HANDLER_ARGS) >+{ >+ struct vtpci_common *cn; >+ >+ cn = arg1; >+ >+ return (vtpci_feature_sysctl(req, cn, cn->vtpci_features)); >+} >+ >+static void >+vtpci_setup_sysctl(struct vtpci_common *cn) >+{ >+ device_t dev; >+ struct sysctl_ctx_list *ctx; >+ struct sysctl_oid *tree; >+ struct sysctl_oid_list *child; >+ >+ dev = cn->vtpci_dev; >+ ctx = device_get_sysctl_ctx(dev); >+ tree = device_get_sysctl_tree(dev); >+ child = SYSCTL_CHILDREN(tree); >+ >+ SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nvqs", >+ CTLFLAG_RD, &cn->vtpci_nvqs, 0, "Number of virtqueues"); >+ >+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "host_features", >+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, cn, 0, >+ vtpci_host_features_sysctl, "A", "Features supported by the host"); >+ SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "negotiated_features", >+ CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, cn, 0, >+ vtpci_negotiated_features_sysctl, "A", "Features negotiated"); > } >diff -urN sys/dev/virtio.ori/pci/virtio_pci.h sys/dev/virtio/pci/virtio_pci.h >--- sys/dev/virtio.ori/pci/virtio_pci.h 2020-03-18 20:16:31.705815000 -0700 >+++ sys/dev/virtio/pci/virtio_pci.h 2020-03-19 16:44:27.326838000 -0700 >@@ -1,88 +1,132 @@ > /*- >- * SPDX-License-Identifier: BSD-3-Clause >+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD > * >- * Copyright IBM Corp. 2007 >+ * Copyright (c) 2017, Bryan Venteicher <bryanv@FreeBSD.org> >+ * All rights reserved. > * >- * Authors: >- * Anthony Liguori <aliguori@us.ibm.com> >- * >- * This header is BSD licensed so anyone can use the definitions to implement >- * compatible drivers/servers. >- * > * Redistribution and use in source and binary forms, with or without > * modification, are permitted provided that the following conditions > * are met: > * 1. Redistributions of source code must retain the above copyright >- * notice, this list of conditions and the following disclaimer. >+ * notice unmodified, this list of conditions, and the following >+ * disclaimer. > * 2. Redistributions in binary form must reproduce the above copyright > * notice, this list of conditions and the following disclaimer in the > * documentation and/or other materials provided with the distribution. >- * 3. Neither the name of IBM nor the names of its contributors >- * may be used to endorse or promote products derived from this software >- * without specific prior written permission. >- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS >- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED >- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR >- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE >- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL >- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS >- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) >- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT >- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY >- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF >- * SUCH DAMAGE. > * >+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >+ * > * $FreeBSD: releng/12.1/sys/dev/virtio/pci/virtio_pci.h 326022 2017-11-20 19:36:21Z pfg $ > */ > > #ifndef _VIRTIO_PCI_H > #define _VIRTIO_PCI_H > >-/* VirtIO PCI vendor/device ID. */ >-#define VIRTIO_PCI_VENDORID 0x1AF4 >-#define VIRTIO_PCI_DEVICEID_MIN 0x1000 >-#define VIRTIO_PCI_DEVICEID_MAX 0x103F >+struct vtpci_interrupt { >+ struct resource *vti_irq; >+ int vti_rid; >+ void *vti_handler; >+}; > >-/* VirtIO ABI version, this must match exactly. */ >-#define VIRTIO_PCI_ABI_VERSION 0 >+struct vtpci_virtqueue { >+ struct virtqueue *vtv_vq; >+ int vtv_no_intr; >+ int vtv_notify_offset; >+}; > >-/* >- * VirtIO Header, located in BAR 0. >- */ >-#define VIRTIO_PCI_HOST_FEATURES 0 /* host's supported features (32bit, RO)*/ >-#define VIRTIO_PCI_GUEST_FEATURES 4 /* guest's supported features (32, RW) */ >-#define VIRTIO_PCI_QUEUE_PFN 8 /* physical address of VQ (32, RW) */ >-#define VIRTIO_PCI_QUEUE_NUM 12 /* number of ring entries (16, RO) */ >-#define VIRTIO_PCI_QUEUE_SEL 14 /* current VQ selection (16, RW) */ >-#define VIRTIO_PCI_QUEUE_NOTIFY 16 /* notify host regarding VQ (16, RW) */ >-#define VIRTIO_PCI_STATUS 18 /* device status register (8, RW) */ >-#define VIRTIO_PCI_ISR 19 /* interrupt status register, reading >- * also clears the register (8, RO) */ >-/* Only if MSIX is enabled: */ >-#define VIRTIO_MSI_CONFIG_VECTOR 20 /* configuration change vector (16, RW) */ >-#define VIRTIO_MSI_QUEUE_VECTOR 22 /* vector for selected VQ notifications >- (16, RW) */ >+struct vtpci_common { >+ device_t vtpci_dev; >+ uint64_t vtpci_host_features; >+ uint64_t vtpci_features; >+ struct vtpci_virtqueue *vtpci_vqs; >+ int vtpci_nvqs; > >-/* The bit of the ISR which indicates a device has an interrupt. */ >-#define VIRTIO_PCI_ISR_INTR 0x1 >-/* The bit of the ISR which indicates a device configuration change. */ >-#define VIRTIO_PCI_ISR_CONFIG 0x2 >-/* Vector value used to disable MSI for queue. */ >-#define VIRTIO_MSI_NO_VECTOR 0xFFFF >+ uint32_t vtpci_flags; >+#define VTPCI_FLAG_NO_MSI 0x0001 >+#define VTPCI_FLAG_NO_MSIX 0x0002 >+#define VTPCI_FLAG_MODERN 0x0004 >+#define VTPCI_FLAG_INTX 0x1000 >+#define VTPCI_FLAG_MSI 0x2000 >+#define VTPCI_FLAG_MSIX 0x4000 >+#define VTPCI_FLAG_SHARED_MSIX 0x8000 >+#define VTPCI_FLAG_ITYPE_MASK 0xF000 > >-/* >- * The remaining space is defined by each driver as the per-driver >- * configuration space. >- */ >-#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) >+ /* The VirtIO PCI "bus" will only ever have one child. */ >+ device_t vtpci_child_dev; >+ struct virtio_feature_desc *vtpci_child_feat_desc; > >-/* >- * How many bits to shift physical queue address written to QUEUE_PFN. >- * 12 is historical, and due to x86 page size. >- */ >-#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 >+ /* >+ * Ideally, each virtqueue that the driver provides a callback for will >+ * receive its own MSIX vector. If there are not sufficient vectors >+ * available, then attempt to have all the VQs share one vector. For >+ * MSIX, the configuration changed notifications must be on their own >+ * vector. >+ * >+ * If MSIX is not available, attempt to have the whole device share >+ * one MSI vector, and then, finally, one intx interrupt. >+ */ >+ struct vtpci_interrupt vtpci_device_interrupt; >+ struct vtpci_interrupt *vtpci_msix_vq_interrupts; >+ int vtpci_nmsix_resources; >+}; > >-/* The alignment to use between consumer and producer parts of vring. */ >-#define VIRTIO_PCI_VRING_ALIGN 4096 >+extern int vtpci_disable_msix; >+ >+static inline device_t >+vtpci_child_device(struct vtpci_common *cn) >+{ >+ return (cn->vtpci_child_dev); >+} >+ >+static inline bool >+vtpci_is_msix_available(struct vtpci_common *cn) >+{ >+ return ((cn->vtpci_flags & VTPCI_FLAG_NO_MSIX) == 0); >+} >+ >+static inline bool >+vtpci_is_msix_enabled(struct vtpci_common *cn) >+{ >+ return ((cn->vtpci_flags & VTPCI_FLAG_MSIX) != 0); >+} >+ >+static inline bool >+vtpci_is_modern(struct vtpci_common *cn) >+{ >+ return ((cn->vtpci_flags & VTPCI_FLAG_MODERN) != 0); >+} >+ >+static inline int >+vtpci_virtqueue_count(struct vtpci_common *cn) >+{ >+ return (cn->vtpci_nvqs); >+} >+ >+void vtpci_init(struct vtpci_common *cn, device_t dev, bool modern); >+int vtpci_add_child(struct vtpci_common *cn); >+int vtpci_delete_child(struct vtpci_common *cn); >+void vtpci_child_detached(struct vtpci_common *cn); >+int vtpci_reinit(struct vtpci_common *cn); >+ >+uint64_t vtpci_negotiate_features(struct vtpci_common *cn, >+ uint64_t child_features, uint64_t host_features); >+int vtpci_with_feature(struct vtpci_common *cn, uint64_t feature); >+ >+int vtpci_read_ivar(struct vtpci_common *cn, int index, uintptr_t *result); >+int vtpci_write_ivar(struct vtpci_common *cn, int index, uintptr_t value); >+ >+int vtpci_alloc_virtqueues(struct vtpci_common *cn, int flags, int nvqs, >+ struct vq_alloc_info *vq_info); >+int vtpci_setup_interrupts(struct vtpci_common *cn, enum intr_type type); >+void vtpci_release_child_resources(struct vtpci_common *cn); > > #endif /* _VIRTIO_PCI_H */ >diff -urN sys/dev/virtio.ori/pci/virtio_pci_if.m sys/dev/virtio/pci/virtio_pci_if.m >--- sys/dev/virtio.ori/pci/virtio_pci_if.m 1969-12-31 16:00:00.000000000 -0800 >+++ sys/dev/virtio/pci/virtio_pci_if.m 2020-03-19 16:44:27.327182000 -0700 >@@ -0,0 +1,71 @@ >+#- >+# Copyright (c) 2017, Bryan Venteicher <bryanv@FreeBSD.org> >+# All rights reserved. >+# >+# Redistribution and use in source and binary forms, with or without >+# modification, are permitted provided that the following conditions >+# are met: >+# 1. Redistributions of source code must retain the above copyright >+# notice, this list of conditions and the following disclaimer. >+# 2. Redistributions in binary form must reproduce the above copyright >+# notice, this list of conditions and the following disclaimer in the >+# documentation and/or other materials provided with the distribution. >+# >+# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND >+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE >+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE >+# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE >+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL >+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS >+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) >+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT >+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY >+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF >+# SUCH DAMAGE. >+# >+# $FreeBSD$ >+ >+#include <sys/bus.h> >+#include <machine/bus.h> >+ >+INTERFACE virtio_pci; >+ >+HEADER { >+struct virtqueue; >+struct vtpci_interrupt; >+}; >+ >+METHOD uint8_t read_isr { >+ device_t dev; >+}; >+ >+METHOD uint16_t get_vq_size { >+ device_t dev; >+ int idx; >+}; >+ >+METHOD bus_size_t get_vq_notify_off { >+ device_t dev; >+ int idx; >+}; >+ >+METHOD void set_vq { >+ device_t dev; >+ struct virtqueue *vq; >+}; >+ >+METHOD void disable_vq { >+ device_t dev; >+ int idx; >+}; >+ >+METHOD int register_cfg_msix { >+ device_t dev; >+ struct vtpci_interrupt *intr; >+}; >+ >+METHOD int register_vq_msix { >+ device_t dev; >+ int idx; >+ struct vtpci_interrupt *intr; >+}; >diff -urN sys/dev/virtio.ori/pci/virtio_pci_legacy.c sys/dev/virtio/pci/virtio_pci_legacy.c >--- sys/dev/virtio.ori/pci/virtio_pci_legacy.c 1969-12-31 16:00:00.000000000 -0800 >+++ sys/dev/virtio/pci/virtio_pci_legacy.c 2020-03-19 16:44:27.327480000 -0700 >@@ -0,0 +1,714 @@ >+/*- >+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD >+ * >+ * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> >+ * All rights reserved. >+ * >+ * Redistribution and use in source and binary forms, with or without >+ * modification, are permitted provided that the following conditions >+ * are met: >+ * 1. Redistributions of source code must retain the above copyright >+ * notice unmodified, this list of conditions, and the following >+ * disclaimer. >+ * 2. Redistributions in binary form must reproduce the above copyright >+ * notice, this list of conditions and the following disclaimer in the >+ * documentation and/or other materials provided with the distribution. >+ * >+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >+ */ >+ >+/* Driver for the legacy VirtIO PCI interface. */ >+ >+#include <sys/cdefs.h> >+__FBSDID("$FreeBSD$"); >+ >+#include <sys/param.h> >+#include <sys/systm.h> >+#include <sys/bus.h> >+#include <sys/kernel.h> >+#include <sys/module.h> >+ >+#include <machine/bus.h> >+#include <machine/resource.h> >+#include <sys/bus.h> >+#include <sys/rman.h> >+ >+#include <dev/pci/pcivar.h> >+#include <dev/pci/pcireg.h> >+ >+#include <dev/virtio/virtio.h> >+#include <dev/virtio/virtqueue.h> >+#include <dev/virtio/pci/virtio_pci.h> >+#include <dev/virtio/pci/virtio_pci_legacy_var.h> >+ >+#include "virtio_bus_if.h" >+#include "virtio_pci_if.h" >+#include "virtio_if.h" >+ >+struct vtpci_legacy_softc { >+ device_t vtpci_dev; >+ struct vtpci_common vtpci_common; >+ struct resource *vtpci_res; >+ struct resource *vtpci_msix_res; >+}; >+ >+static int vtpci_legacy_probe(device_t); >+static int vtpci_legacy_attach(device_t); >+static int vtpci_legacy_detach(device_t); >+static int vtpci_legacy_suspend(device_t); >+static int vtpci_legacy_resume(device_t); >+static int vtpci_legacy_shutdown(device_t); >+ >+static void vtpci_legacy_driver_added(device_t, driver_t *); >+static void vtpci_legacy_child_detached(device_t, device_t); >+static int vtpci_legacy_read_ivar(device_t, device_t, int, uintptr_t *); >+static int vtpci_legacy_write_ivar(device_t, device_t, int, uintptr_t); >+ >+static uint8_t vtpci_legacy_read_isr(device_t); >+static uint16_t vtpci_legacy_get_vq_size(device_t, int); >+static bus_size_t vtpci_legacy_get_vq_notify_off(device_t, int); >+static void vtpci_legacy_set_vq(device_t, struct virtqueue *); >+static void vtpci_legacy_disable_vq(device_t, int); >+static int vtpci_legacy_register_cfg_msix(device_t, >+ struct vtpci_interrupt *); >+static int vtpci_legacy_register_vq_msix(device_t, int idx, >+ struct vtpci_interrupt *); >+ >+static uint64_t vtpci_legacy_negotiate_features(device_t, uint64_t); >+static int vtpci_legacy_with_feature(device_t, uint64_t); >+static int vtpci_legacy_alloc_virtqueues(device_t, int, int, >+ struct vq_alloc_info *); >+static int vtpci_legacy_setup_interrupts(device_t, enum intr_type); >+static void vtpci_legacy_stop(device_t); >+static int vtpci_legacy_reinit(device_t, uint64_t); >+static void vtpci_legacy_reinit_complete(device_t); >+static void vtpci_legacy_notify_vq(device_t, uint16_t, bus_size_t); >+static void vtpci_legacy_read_dev_config(device_t, bus_size_t, void *, int); >+static void vtpci_legacy_write_dev_config(device_t, bus_size_t, void *, int); >+ >+static int vtpci_legacy_alloc_resources(struct vtpci_legacy_softc *); >+static void vtpci_legacy_free_resources(struct vtpci_legacy_softc *); >+ >+static void vtpci_legacy_probe_and_attach_child(struct vtpci_legacy_softc *); >+ >+static uint8_t vtpci_legacy_get_status(struct vtpci_legacy_softc *); >+static void vtpci_legacy_set_status(struct vtpci_legacy_softc *, uint8_t); >+static void vtpci_legacy_select_virtqueue(struct vtpci_legacy_softc *, int); >+static void vtpci_legacy_reset(struct vtpci_legacy_softc *); >+ >+#define VIRTIO_PCI_LEGACY_CONFIG(_sc) \ >+ VIRTIO_PCI_CONFIG_OFF(vtpci_is_msix_enabled(&(_sc)->vtpci_common)) >+ >+/* >+ * I/O port read/write wrappers. >+ */ >+#define vtpci_legacy_read_config_1(sc, o) bus_read_1((sc)->vtpci_res, (o)) >+#define vtpci_legacy_read_config_2(sc, o) bus_read_2((sc)->vtpci_res, (o)) >+#define vtpci_legacy_read_config_4(sc, o) bus_read_4((sc)->vtpci_res, (o)) >+#define vtpci_legacy_write_config_1(sc, o, v) \ >+ bus_write_1((sc)->vtpci_res, (o), (v)) >+#define vtpci_legacy_write_config_2(sc, o, v) \ >+ bus_write_2((sc)->vtpci_res, (o), (v)) >+#define vtpci_legacy_write_config_4(sc, o, v) \ >+ bus_write_4((sc)->vtpci_res, (o), (v)) >+ >+static device_method_t vtpci_legacy_methods[] = { >+ /* Device interface. */ >+ DEVMETHOD(device_probe, vtpci_legacy_probe), >+ DEVMETHOD(device_attach, vtpci_legacy_attach), >+ DEVMETHOD(device_detach, vtpci_legacy_detach), >+ DEVMETHOD(device_suspend, vtpci_legacy_suspend), >+ DEVMETHOD(device_resume, vtpci_legacy_resume), >+ DEVMETHOD(device_shutdown, vtpci_legacy_shutdown), >+ >+ /* Bus interface. */ >+ DEVMETHOD(bus_driver_added, vtpci_legacy_driver_added), >+ DEVMETHOD(bus_child_detached, vtpci_legacy_child_detached), >+ DEVMETHOD(bus_read_ivar, vtpci_legacy_read_ivar), >+ DEVMETHOD(bus_write_ivar, vtpci_legacy_write_ivar), >+ >+ /* VirtIO PCI interface. */ >+ DEVMETHOD(virtio_pci_read_isr, vtpci_legacy_read_isr), >+ DEVMETHOD(virtio_pci_get_vq_size, vtpci_legacy_get_vq_size), >+ DEVMETHOD(virtio_pci_get_vq_notify_off, vtpci_legacy_get_vq_notify_off), >+ DEVMETHOD(virtio_pci_set_vq, vtpci_legacy_set_vq), >+ DEVMETHOD(virtio_pci_disable_vq, vtpci_legacy_disable_vq), >+ DEVMETHOD(virtio_pci_register_cfg_msix, vtpci_legacy_register_cfg_msix), >+ DEVMETHOD(virtio_pci_register_vq_msix, vtpci_legacy_register_vq_msix), >+ >+ /* VirtIO bus interface. */ >+ DEVMETHOD(virtio_bus_negotiate_features, vtpci_legacy_negotiate_features), >+ DEVMETHOD(virtio_bus_with_feature, vtpci_legacy_with_feature), >+ DEVMETHOD(virtio_bus_alloc_virtqueues, vtpci_legacy_alloc_virtqueues), >+ DEVMETHOD(virtio_bus_setup_intr, vtpci_legacy_setup_interrupts), >+ DEVMETHOD(virtio_bus_stop, vtpci_legacy_stop), >+ DEVMETHOD(virtio_bus_reinit, vtpci_legacy_reinit), >+ DEVMETHOD(virtio_bus_reinit_complete, vtpci_legacy_reinit_complete), >+ DEVMETHOD(virtio_bus_notify_vq, vtpci_legacy_notify_vq), >+ DEVMETHOD(virtio_bus_read_device_config, vtpci_legacy_read_dev_config), >+ DEVMETHOD(virtio_bus_write_device_config, vtpci_legacy_write_dev_config), >+ >+ DEVMETHOD_END >+}; >+ >+static driver_t vtpci_legacy_driver = { >+ .name = "vtpcil", >+ .methods = vtpci_legacy_methods, >+ .size = sizeof(struct vtpci_legacy_softc) >+}; >+ >+devclass_t vtpci_legacy_devclass; >+ >+DRIVER_MODULE(vtpcil, pci, vtpci_legacy_driver, vtpci_legacy_devclass, 0, 0); >+ >+static int >+vtpci_legacy_probe(device_t dev) >+{ >+ char desc[64]; >+ const char *name; >+ >+ if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID) >+ return (ENXIO); >+ >+ if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN || >+ pci_get_device(dev) > VIRTIO_PCI_DEVICEID_LEGACY_MAX) >+ return (ENXIO); >+ >+ if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION) >+ return (ENXIO); >+ >+ name = virtio_device_name(pci_get_subdevice(dev)); >+ if (name == NULL) >+ name = "Unknown"; >+ >+ snprintf(desc, sizeof(desc), "VirtIO PCI (legacy) %s adapter", name); >+ device_set_desc_copy(dev, desc); >+ >+ /* Prefer transitional modern VirtIO PCI. */ >+ return (BUS_PROBE_LOW_PRIORITY); >+} >+ >+static int >+vtpci_legacy_attach(device_t dev) >+{ >+ struct vtpci_legacy_softc *sc; >+ int error; >+ >+ sc = device_get_softc(dev); >+ sc->vtpci_dev = dev; >+ vtpci_init(&sc->vtpci_common, dev, false); >+ >+ error = vtpci_legacy_alloc_resources(sc); >+ if (error) { >+ device_printf(dev, "cannot map I/O space\n"); >+ return (error); >+ } >+ >+ vtpci_legacy_reset(sc); >+ >+ /* Tell the host we've noticed this device. */ >+ vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); >+ >+ error = vtpci_add_child(&sc->vtpci_common); >+ if (error) >+ goto fail; >+ >+ vtpci_legacy_probe_and_attach_child(sc); >+ >+ return (0); >+ >+fail: >+ vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_FAILED); >+ vtpci_legacy_detach(dev); >+ >+ return (error); >+} >+ >+static int >+vtpci_legacy_detach(device_t dev) >+{ >+ struct vtpci_legacy_softc *sc; >+ int error; >+ >+ sc = device_get_softc(dev); >+ >+ error = vtpci_delete_child(&sc->vtpci_common); >+ if (error) >+ return (error); >+ >+ vtpci_legacy_reset(sc); >+ vtpci_legacy_free_resources(sc); >+ >+ return (0); >+} >+ >+static int >+vtpci_legacy_suspend(device_t dev) >+{ >+ return (bus_generic_suspend(dev)); >+} >+ >+static int >+vtpci_legacy_resume(device_t dev) >+{ >+ return (bus_generic_resume(dev)); >+} >+ >+static int >+vtpci_legacy_shutdown(device_t dev) >+{ >+ (void) bus_generic_shutdown(dev); >+ /* Forcibly stop the host device. */ >+ vtpci_legacy_stop(dev); >+ >+ return (0); >+} >+ >+static void >+vtpci_legacy_driver_added(device_t dev, driver_t *driver) >+{ >+ vtpci_legacy_probe_and_attach_child(device_get_softc(dev)); >+} >+ >+static void >+vtpci_legacy_child_detached(device_t dev, device_t child) >+{ >+ struct vtpci_legacy_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_legacy_reset(sc); >+ vtpci_child_detached(&sc->vtpci_common); >+ >+ /* After the reset, retell the host we've noticed this device. */ >+ vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); >+} >+ >+static int >+vtpci_legacy_read_ivar(device_t dev, device_t child, int index, >+ uintptr_t *result) >+{ >+ struct vtpci_legacy_softc *sc; >+ struct vtpci_common *cn; >+ >+ sc = device_get_softc(dev); >+ cn = &sc->vtpci_common; >+ >+ if (vtpci_child_device(cn) != child) >+ return (ENOENT); >+ >+ switch (index) { >+ case VIRTIO_IVAR_DEVTYPE: >+ *result = pci_get_subdevice(dev); >+ break; >+ default: >+ return (vtpci_read_ivar(cn, index, result)); >+ } >+ >+ return (0); >+} >+ >+static int >+vtpci_legacy_write_ivar(device_t dev, device_t child, int index, uintptr_t value) >+{ >+ struct vtpci_legacy_softc *sc; >+ struct vtpci_common *cn; >+ >+ sc = device_get_softc(dev); >+ cn = &sc->vtpci_common; >+ >+ if (vtpci_child_device(cn) != child) >+ return (ENOENT); >+ >+ switch (index) { >+ default: >+ return (vtpci_write_ivar(cn, index, value)); >+ } >+ >+ return (0); >+} >+ >+static uint64_t >+vtpci_legacy_negotiate_features(device_t dev, uint64_t child_features) >+{ >+ struct vtpci_legacy_softc *sc; >+ uint64_t host_features, features; >+ >+ sc = device_get_softc(dev); >+ host_features = vtpci_legacy_read_config_4(sc, VIRTIO_PCI_HOST_FEATURES); >+ >+ features = vtpci_negotiate_features(&sc->vtpci_common, >+ child_features, host_features); >+ vtpci_legacy_write_config_4(sc, VIRTIO_PCI_GUEST_FEATURES, features); >+ >+ return (features); >+} >+ >+static int >+vtpci_legacy_with_feature(device_t dev, uint64_t feature) >+{ >+ struct vtpci_legacy_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ return (vtpci_with_feature(&sc->vtpci_common, feature)); >+} >+ >+static int >+vtpci_legacy_alloc_virtqueues(device_t dev, int flags, int nvqs, >+ struct vq_alloc_info *vq_info) >+{ >+ struct vtpci_legacy_softc *sc; >+ struct vtpci_common *cn; >+ >+ sc = device_get_softc(dev); >+ cn = &sc->vtpci_common; >+ >+ return (vtpci_alloc_virtqueues(cn, flags, nvqs, vq_info)); >+} >+ >+static int >+vtpci_legacy_setup_interrupts(device_t dev, enum intr_type type) >+{ >+ struct vtpci_legacy_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ return (vtpci_setup_interrupts(&sc->vtpci_common, type)); >+} >+ >+static void >+vtpci_legacy_stop(device_t dev) >+{ >+ vtpci_legacy_reset(device_get_softc(dev)); >+} >+ >+static int >+vtpci_legacy_reinit(device_t dev, uint64_t features) >+{ >+ struct vtpci_legacy_softc *sc; >+ struct vtpci_common *cn; >+ int error; >+ >+ sc = device_get_softc(dev); >+ cn = &sc->vtpci_common; >+ >+ /* >+ * Redrive the device initialization. This is a bit of an abuse of >+ * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to >+ * play nice. >+ * >+ * We do not allow the host device to change from what was originally >+ * negotiated beyond what the guest driver changed. MSIX state should >+ * not change, number of virtqueues and their size remain the same, etc. >+ * This will need to be rethought when we want to support migration. >+ */ >+ >+ if (vtpci_legacy_get_status(sc) != VIRTIO_CONFIG_STATUS_RESET) >+ vtpci_legacy_stop(dev); >+ >+ /* >+ * Quickly drive the status through ACK and DRIVER. The device does >+ * not become usable again until DRIVER_OK in reinit complete. >+ */ >+ vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); >+ vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER); >+ >+ vtpci_legacy_negotiate_features(dev, features); >+ >+ error = vtpci_reinit(cn); >+ if (error) >+ return (error); >+ >+ return (0); >+} >+ >+static void >+vtpci_legacy_reinit_complete(device_t dev) >+{ >+ struct vtpci_legacy_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER_OK); >+} >+ >+static void >+vtpci_legacy_notify_vq(device_t dev, uint16_t queue, bus_size_t offset) >+{ >+ struct vtpci_legacy_softc *sc; >+ >+ sc = device_get_softc(dev); >+ MPASS(offset == VIRTIO_PCI_QUEUE_NOTIFY); >+ >+ vtpci_legacy_write_config_2(sc, offset, queue); >+} >+ >+static uint8_t >+vtpci_legacy_get_status(struct vtpci_legacy_softc *sc) >+{ >+ return (vtpci_legacy_read_config_1(sc, VIRTIO_PCI_STATUS)); >+} >+ >+static void >+vtpci_legacy_set_status(struct vtpci_legacy_softc *sc, uint8_t status) >+{ >+ if (status != VIRTIO_CONFIG_STATUS_RESET) >+ status |= vtpci_legacy_get_status(sc); >+ >+ vtpci_legacy_write_config_1(sc, VIRTIO_PCI_STATUS, status); >+} >+ >+static void >+vtpci_legacy_read_dev_config(device_t dev, bus_size_t offset, >+ void *dst, int length) >+{ >+ struct vtpci_legacy_softc *sc; >+ bus_size_t off; >+ uint8_t *d; >+ int size; >+ >+ sc = device_get_softc(dev); >+ off = VIRTIO_PCI_LEGACY_CONFIG(sc) + offset; >+ >+ for (d = dst; length > 0; d += size, off += size, length -= size) { >+ if (length >= 4) { >+ size = 4; >+ *(uint32_t *)d = vtpci_legacy_read_config_4(sc, off); >+ } else if (length >= 2) { >+ size = 2; >+ *(uint16_t *)d = vtpci_legacy_read_config_2(sc, off); >+ } else { >+ size = 1; >+ *d = vtpci_legacy_read_config_1(sc, off); >+ } >+ } >+} >+ >+static void >+vtpci_legacy_write_dev_config(device_t dev, bus_size_t offset, >+ void *src, int length) >+{ >+ struct vtpci_legacy_softc *sc; >+ bus_size_t off; >+ uint8_t *s; >+ int size; >+ >+ sc = device_get_softc(dev); >+ off = VIRTIO_PCI_LEGACY_CONFIG(sc) + offset; >+ >+ for (s = src; length > 0; s += size, off += size, length -= size) { >+ if (length >= 4) { >+ size = 4; >+ vtpci_legacy_write_config_4(sc, off, *(uint32_t *)s); >+ } else if (length >= 2) { >+ size = 2; >+ vtpci_legacy_write_config_2(sc, off, *(uint16_t *)s); >+ } else { >+ size = 1; >+ vtpci_legacy_write_config_1(sc, off, *s); >+ } >+ } >+} >+ >+static int >+vtpci_legacy_alloc_resources(struct vtpci_legacy_softc *sc) >+{ >+ device_t dev; >+ int rid; >+ >+ dev = sc->vtpci_dev; >+ >+ rid = PCIR_BAR(0); >+ if ((sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, >+ &rid, RF_ACTIVE)) == NULL) >+ return (ENXIO); >+ >+ if (vtpci_is_msix_available(&sc->vtpci_common)) { >+ rid = PCIR_BAR(1); >+ if ((sc->vtpci_msix_res = bus_alloc_resource_any(dev, >+ SYS_RES_MEMORY, &rid, RF_ACTIVE)) == NULL) >+ return (ENXIO); >+ } >+ >+ return (0); >+} >+ >+static void >+vtpci_legacy_free_resources(struct vtpci_legacy_softc *sc) >+{ >+ device_t dev; >+ >+ dev = sc->vtpci_dev; >+ >+ if (sc->vtpci_msix_res != NULL) { >+ bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1), >+ sc->vtpci_msix_res); >+ sc->vtpci_msix_res = NULL; >+ } >+ >+ if (sc->vtpci_res != NULL) { >+ bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0), >+ sc->vtpci_res); >+ sc->vtpci_res = NULL; >+ } >+} >+ >+static void >+vtpci_legacy_probe_and_attach_child(struct vtpci_legacy_softc *sc) >+{ >+ device_t dev, child; >+ >+ dev = sc->vtpci_dev; >+ child = vtpci_child_device(&sc->vtpci_common); >+ >+ if (child == NULL || device_get_state(child) != DS_NOTPRESENT) >+ return; >+ >+ if (device_probe(child) != 0) >+ return; >+ >+ vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER); >+ >+ if (device_attach(child) != 0) { >+ vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_FAILED); >+ /* Reset status for future attempt. */ >+ vtpci_legacy_child_detached(dev, child); >+ } else { >+ vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER_OK); >+ VIRTIO_ATTACH_COMPLETED(child); >+ } >+} >+ >+static int >+vtpci_legacy_register_msix(struct vtpci_legacy_softc *sc, int offset, >+ struct vtpci_interrupt *intr) >+{ >+ device_t dev; >+ uint16_t vector; >+ >+ dev = sc->vtpci_dev; >+ >+ if (intr != NULL) { >+ /* Map from guest rid to host vector. */ >+ vector = intr->vti_rid - 1; >+ } else >+ vector = VIRTIO_MSI_NO_VECTOR; >+ >+ vtpci_legacy_write_config_2(sc, offset, vector); >+ return (vtpci_legacy_read_config_2(sc, offset) == vector ? 0 : ENODEV); >+} >+ >+static int >+vtpci_legacy_register_cfg_msix(device_t dev, struct vtpci_interrupt *intr) >+{ >+ struct vtpci_legacy_softc *sc; >+ int error; >+ >+ sc = device_get_softc(dev); >+ >+ error = vtpci_legacy_register_msix(sc, VIRTIO_MSI_CONFIG_VECTOR, intr); >+ if (error) { >+ device_printf(dev, >+ "unable to register config MSIX interrupt\n"); >+ return (error); >+ } >+ >+ return (0); >+} >+ >+static int >+vtpci_legacy_register_vq_msix(device_t dev, int idx, >+ struct vtpci_interrupt *intr) >+{ >+ struct vtpci_legacy_softc *sc; >+ int error; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_legacy_select_virtqueue(sc, idx); >+ error = vtpci_legacy_register_msix(sc, VIRTIO_MSI_QUEUE_VECTOR, intr); >+ if (error) { >+ device_printf(dev, >+ "unable to register virtqueue MSIX interrupt\n"); >+ return (error); >+ } >+ >+ return (0); >+} >+ >+static void >+vtpci_legacy_reset(struct vtpci_legacy_softc *sc) >+{ >+ /* >+ * Setting the status to RESET sets the host device to the >+ * original, uninitialized state. >+ */ >+ vtpci_legacy_set_status(sc, VIRTIO_CONFIG_STATUS_RESET); >+ (void) vtpci_legacy_get_status(sc); >+} >+ >+static void >+vtpci_legacy_select_virtqueue(struct vtpci_legacy_softc *sc, int idx) >+{ >+ vtpci_legacy_write_config_2(sc, VIRTIO_PCI_QUEUE_SEL, idx); >+} >+ >+static uint8_t >+vtpci_legacy_read_isr(device_t dev) >+{ >+ struct vtpci_legacy_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ return (vtpci_legacy_read_config_1(sc, VIRTIO_PCI_ISR)); >+} >+ >+static uint16_t >+vtpci_legacy_get_vq_size(device_t dev, int idx) >+{ >+ struct vtpci_legacy_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_legacy_select_virtqueue(sc, idx); >+ return (vtpci_legacy_read_config_2(sc, VIRTIO_PCI_QUEUE_NUM)); >+} >+ >+static bus_size_t >+vtpci_legacy_get_vq_notify_off(device_t dev, int idx) >+{ >+ return (VIRTIO_PCI_QUEUE_NOTIFY); >+} >+ >+static void >+vtpci_legacy_set_vq(device_t dev, struct virtqueue *vq) >+{ >+ struct vtpci_legacy_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_legacy_select_virtqueue(sc, virtqueue_index(vq)); >+ vtpci_legacy_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, >+ virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT); >+} >+ >+static void >+vtpci_legacy_disable_vq(device_t dev, int idx) >+{ >+ struct vtpci_legacy_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_legacy_select_virtqueue(sc, idx); >+ vtpci_legacy_write_config_4(sc, VIRTIO_PCI_QUEUE_PFN, 0); >+} >diff -urN sys/dev/virtio.ori/pci/virtio_pci_legacy_var.h sys/dev/virtio/pci/virtio_pci_legacy_var.h >--- sys/dev/virtio.ori/pci/virtio_pci_legacy_var.h 1969-12-31 16:00:00.000000000 -0800 >+++ sys/dev/virtio/pci/virtio_pci_legacy_var.h 2020-03-19 16:44:27.327641000 -0700 >@@ -0,0 +1,78 @@ >+/*- >+ * SPDX-License-Identifier: BSD-3-Clause >+ * >+ * Copyright IBM Corp. 2007 >+ * >+ * Authors: >+ * Anthony Liguori <aliguori@us.ibm.com> >+ * >+ * This header is BSD licensed so anyone can use the definitions to implement >+ * compatible drivers/servers. >+ * >+ * Redistribution and use in source and binary forms, with or without >+ * modification, are permitted provided that the following conditions >+ * are met: >+ * 1. Redistributions of source code must retain the above copyright >+ * notice, this list of conditions and the following disclaimer. >+ * 2. Redistributions in binary form must reproduce the above copyright >+ * notice, this list of conditions and the following disclaimer in the >+ * documentation and/or other materials provided with the distribution. >+ * 3. Neither the name of IBM nor the names of its contributors >+ * may be used to endorse or promote products derived from this software >+ * without specific prior written permission. >+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS >+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED >+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR >+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE >+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL >+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS >+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) >+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT >+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY >+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF >+ * SUCH DAMAGE. >+ * >+ * $FreeBSD$ >+ */ >+ >+#ifndef _VIRTIO_PCI_LEGACY_VAR_H >+#define _VIRTIO_PCI_LEGACY_VAR_H >+ >+#include <dev/virtio/pci/virtio_pci_var.h> >+ >+/* VirtIO ABI version, this must match exactly. */ >+#define VIRTIO_PCI_ABI_VERSION 0 >+ >+/* >+ * VirtIO Header, located in BAR 0. >+ */ >+#define VIRTIO_PCI_HOST_FEATURES 0 /* host's supported features (32bit, RO)*/ >+#define VIRTIO_PCI_GUEST_FEATURES 4 /* guest's supported features (32, RW) */ >+#define VIRTIO_PCI_QUEUE_PFN 8 /* physical address of VQ (32, RW) */ >+#define VIRTIO_PCI_QUEUE_NUM 12 /* number of ring entries (16, RO) */ >+#define VIRTIO_PCI_QUEUE_SEL 14 /* current VQ selection (16, RW) */ >+#define VIRTIO_PCI_QUEUE_NOTIFY 16 /* notify host regarding VQ (16, RW) */ >+#define VIRTIO_PCI_STATUS 18 /* device status register (8, RW) */ >+#define VIRTIO_PCI_ISR 19 /* interrupt status register, reading >+ * also clears the register (8, RO) */ >+/* Only if MSIX is enabled: */ >+#define VIRTIO_MSI_CONFIG_VECTOR 20 /* configuration change vector (16, RW) */ >+#define VIRTIO_MSI_QUEUE_VECTOR 22 /* vector for selected VQ notifications >+ (16, RW) */ >+ >+/* >+ * The remaining space is defined by each driver as the per-driver >+ * configuration space. >+ */ >+#define VIRTIO_PCI_CONFIG_OFF(msix_enabled) ((msix_enabled) ? 24 : 20) >+ >+/* >+ * How many bits to shift physical queue address written to QUEUE_PFN. >+ * 12 is historical, and due to x86 page size. >+ */ >+#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 >+ >+/* The alignment to use between consumer and producer parts of vring. */ >+#define VIRTIO_PCI_VRING_ALIGN 4096 >+ >+#endif /* _VIRTIO_PCI_LEGACY_VAR_H */ >diff -urN sys/dev/virtio.ori/pci/virtio_pci_modern.c sys/dev/virtio/pci/virtio_pci_modern.c >--- sys/dev/virtio.ori/pci/virtio_pci_modern.c 1969-12-31 16:00:00.000000000 -0800 >+++ sys/dev/virtio/pci/virtio_pci_modern.c 2020-03-19 16:44:27.327910000 -0700 >@@ -0,0 +1,1443 @@ >+/*- >+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD >+ * >+ * Copyright (c) 2017, Bryan Venteicher <bryanv@FreeBSD.org> >+ * All rights reserved. >+ * >+ * Redistribution and use in source and binary forms, with or without >+ * modification, are permitted provided that the following conditions >+ * are met: >+ * 1. Redistributions of source code must retain the above copyright >+ * notice unmodified, this list of conditions, and the following >+ * disclaimer. >+ * 2. Redistributions in binary form must reproduce the above copyright >+ * notice, this list of conditions and the following disclaimer in the >+ * documentation and/or other materials provided with the distribution. >+ * >+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >+ */ >+ >+/* Driver for the modern VirtIO PCI interface. */ >+ >+#include <sys/cdefs.h> >+__FBSDID("$FreeBSD$"); >+ >+#include <sys/param.h> >+#include <sys/systm.h> >+#include <sys/bus.h> >+#include <sys/kernel.h> >+#include <sys/module.h> >+ >+#include <machine/bus.h> >+#include <machine/cpu.h> >+#include <machine/resource.h> >+#include <sys/bus.h> >+#include <sys/rman.h> >+ >+#include <dev/pci/pcivar.h> >+#include <dev/pci/pcireg.h> >+ >+#include <dev/virtio/virtio.h> >+#include <dev/virtio/virtqueue.h> >+#include <dev/virtio/pci/virtio_pci.h> >+#include <dev/virtio/pci/virtio_pci_modern_var.h> >+ >+#include "virtio_bus_if.h" >+#include "virtio_pci_if.h" >+#include "virtio_if.h" >+ >+struct vtpci_modern_resource_map { >+ struct resource_map vtrm_map; >+ int vtrm_cap_offset; >+ int vtrm_bar; >+ int vtrm_offset; >+ int vtrm_length; >+ int vtrm_type; /* SYS_RES_{MEMORY, IOPORT} */ >+}; >+ >+struct vtpci_modern_bar_resource { >+ struct resource *vtbr_res; >+ int vtbr_type; >+}; >+ >+struct vtpci_modern_softc { >+ device_t vtpci_dev; >+ struct vtpci_common vtpci_common; >+ uint32_t vtpci_notify_offset_multiplier; >+ uint16_t vtpci_devid; >+ int vtpci_msix_bar; >+ struct resource *vtpci_msix_res; >+ >+ struct vtpci_modern_resource_map vtpci_common_res_map; >+ struct vtpci_modern_resource_map vtpci_notify_res_map; >+ struct vtpci_modern_resource_map vtpci_isr_res_map; >+ struct vtpci_modern_resource_map vtpci_device_res_map; >+ >+#define VTPCI_MODERN_MAX_BARS 6 >+ struct vtpci_modern_bar_resource vtpci_bar_res[VTPCI_MODERN_MAX_BARS]; >+}; >+ >+static int vtpci_modern_probe(device_t); >+static int vtpci_modern_attach(device_t); >+static int vtpci_modern_detach(device_t); >+static int vtpci_modern_suspend(device_t); >+static int vtpci_modern_resume(device_t); >+static int vtpci_modern_shutdown(device_t); >+ >+static void vtpci_modern_driver_added(device_t, driver_t *); >+static void vtpci_modern_child_detached(device_t, device_t); >+static int vtpci_modern_read_ivar(device_t, device_t, int, uintptr_t *); >+static int vtpci_modern_write_ivar(device_t, device_t, int, uintptr_t); >+ >+static uint8_t vtpci_modern_read_isr(device_t); >+static uint16_t vtpci_modern_get_vq_size(device_t, int); >+static bus_size_t vtpci_modern_get_vq_notify_off(device_t, int); >+static void vtpci_modern_set_vq(device_t, struct virtqueue *); >+static void vtpci_modern_disable_vq(device_t, int); >+static int vtpci_modern_register_msix(struct vtpci_modern_softc *, int, >+ struct vtpci_interrupt *); >+static int vtpci_modern_register_cfg_msix(device_t, >+ struct vtpci_interrupt *); >+static int vtpci_modern_register_vq_msix(device_t, int idx, >+ struct vtpci_interrupt *); >+ >+static uint64_t vtpci_modern_negotiate_features(device_t, uint64_t); >+static int vtpci_modern_finalize_features(device_t); >+static int vtpci_modern_with_feature(device_t, uint64_t); >+static int vtpci_modern_alloc_virtqueues(device_t, int, int, >+ struct vq_alloc_info *); >+static int vtpci_modern_setup_interrupts(device_t, enum intr_type); >+static void vtpci_modern_stop(device_t); >+static int vtpci_modern_reinit(device_t, uint64_t); >+static void vtpci_modern_reinit_complete(device_t); >+static void vtpci_modern_notify_vq(device_t, uint16_t, bus_size_t); >+static int vtpci_modern_config_generation(device_t); >+static void vtpci_modern_read_dev_config(device_t, bus_size_t, void *, int); >+static void vtpci_modern_write_dev_config(device_t, bus_size_t, void *, int); >+ >+static int vtpci_modern_probe_configs(device_t); >+static int vtpci_modern_find_cap(device_t, uint8_t, int *); >+static int vtpci_modern_map_configs(struct vtpci_modern_softc *); >+static void vtpci_modern_unmap_configs(struct vtpci_modern_softc *); >+static int vtpci_modern_find_cap_resource(struct vtpci_modern_softc *, >+ uint8_t, int, int, struct vtpci_modern_resource_map *); >+static int vtpci_modern_bar_type(struct vtpci_modern_softc *, int); >+static struct resource *vtpci_modern_get_bar_resource( >+ struct vtpci_modern_softc *, int, int); >+static struct resource *vtpci_modern_alloc_bar_resource( >+ struct vtpci_modern_softc *, int, int); >+static void vtpci_modern_free_bar_resources(struct vtpci_modern_softc *); >+static int vtpci_modern_alloc_resource_map(struct vtpci_modern_softc *, >+ struct vtpci_modern_resource_map *); >+static void vtpci_modern_free_resource_map(struct vtpci_modern_softc *, >+ struct vtpci_modern_resource_map *); >+static void vtpci_modern_alloc_msix_resource(struct vtpci_modern_softc *); >+static void vtpci_modern_free_msix_resource(struct vtpci_modern_softc *); >+ >+static void vtpci_modern_probe_and_attach_child(struct vtpci_modern_softc *); >+ >+static uint64_t vtpci_modern_read_features(struct vtpci_modern_softc *); >+static void vtpci_modern_write_features(struct vtpci_modern_softc *, >+ uint64_t); >+static void vtpci_modern_select_virtqueue(struct vtpci_modern_softc *, int); >+static uint8_t vtpci_modern_get_status(struct vtpci_modern_softc *); >+static void vtpci_modern_set_status(struct vtpci_modern_softc *, uint8_t); >+static void vtpci_modern_reset(struct vtpci_modern_softc *); >+static void vtpci_modern_enable_virtqueues(struct vtpci_modern_softc *); >+ >+static uint8_t vtpci_modern_read_common_1(struct vtpci_modern_softc *, >+ bus_size_t); >+static uint16_t vtpci_modern_read_common_2(struct vtpci_modern_softc *, >+ bus_size_t); >+static uint32_t vtpci_modern_read_common_4(struct vtpci_modern_softc *, >+ bus_size_t); >+static void vtpci_modern_write_common_1(struct vtpci_modern_softc *, >+ bus_size_t, uint8_t); >+static void vtpci_modern_write_common_2(struct vtpci_modern_softc *, >+ bus_size_t, uint16_t); >+static void vtpci_modern_write_common_4(struct vtpci_modern_softc *, >+ bus_size_t, uint32_t); >+static void vtpci_modern_write_common_8(struct vtpci_modern_softc *, >+ bus_size_t, uint64_t); >+static void vtpci_modern_write_notify_2(struct vtpci_modern_softc *, >+ bus_size_t, uint16_t); >+static uint8_t vtpci_modern_read_isr_1(struct vtpci_modern_softc *, >+ bus_size_t); >+static uint8_t vtpci_modern_read_device_1(struct vtpci_modern_softc *, >+ bus_size_t); >+static uint16_t vtpci_modern_read_device_2(struct vtpci_modern_softc *, >+ bus_size_t); >+static uint32_t vtpci_modern_read_device_4(struct vtpci_modern_softc *, >+ bus_size_t); >+static uint64_t vtpci_modern_read_device_8(struct vtpci_modern_softc *, >+ bus_size_t); >+static void vtpci_modern_write_device_1(struct vtpci_modern_softc *, >+ bus_size_t, uint8_t); >+static void vtpci_modern_write_device_2(struct vtpci_modern_softc *, >+ bus_size_t, uint16_t); >+static void vtpci_modern_write_device_4(struct vtpci_modern_softc *, >+ bus_size_t, uint32_t); >+static void vtpci_modern_write_device_8(struct vtpci_modern_softc *, >+ bus_size_t, uint64_t); >+ >+/* Tunables. */ >+static int vtpci_modern_transitional = 0; >+TUNABLE_INT("hw.virtio.pci.transitional", &vtpci_modern_transitional); >+ >+static device_method_t vtpci_modern_methods[] = { >+ /* Device interface. */ >+ DEVMETHOD(device_probe, vtpci_modern_probe), >+ DEVMETHOD(device_attach, vtpci_modern_attach), >+ DEVMETHOD(device_detach, vtpci_modern_detach), >+ DEVMETHOD(device_suspend, vtpci_modern_suspend), >+ DEVMETHOD(device_resume, vtpci_modern_resume), >+ DEVMETHOD(device_shutdown, vtpci_modern_shutdown), >+ >+ /* Bus interface. */ >+ DEVMETHOD(bus_driver_added, vtpci_modern_driver_added), >+ DEVMETHOD(bus_child_detached, vtpci_modern_child_detached), >+ DEVMETHOD(bus_read_ivar, vtpci_modern_read_ivar), >+ DEVMETHOD(bus_write_ivar, vtpci_modern_write_ivar), >+ >+ /* VirtIO PCI interface. */ >+ DEVMETHOD(virtio_pci_read_isr, vtpci_modern_read_isr), >+ DEVMETHOD(virtio_pci_get_vq_size, vtpci_modern_get_vq_size), >+ DEVMETHOD(virtio_pci_get_vq_notify_off, vtpci_modern_get_vq_notify_off), >+ DEVMETHOD(virtio_pci_set_vq, vtpci_modern_set_vq), >+ DEVMETHOD(virtio_pci_disable_vq, vtpci_modern_disable_vq), >+ DEVMETHOD(virtio_pci_register_cfg_msix, vtpci_modern_register_cfg_msix), >+ DEVMETHOD(virtio_pci_register_vq_msix, vtpci_modern_register_vq_msix), >+ >+ /* VirtIO bus interface. */ >+ DEVMETHOD(virtio_bus_negotiate_features, vtpci_modern_negotiate_features), >+ DEVMETHOD(virtio_bus_finalize_features, vtpci_modern_finalize_features), >+ DEVMETHOD(virtio_bus_with_feature, vtpci_modern_with_feature), >+ DEVMETHOD(virtio_bus_alloc_virtqueues, vtpci_modern_alloc_virtqueues), >+ DEVMETHOD(virtio_bus_setup_intr, vtpci_modern_setup_interrupts), >+ DEVMETHOD(virtio_bus_stop, vtpci_modern_stop), >+ DEVMETHOD(virtio_bus_reinit, vtpci_modern_reinit), >+ DEVMETHOD(virtio_bus_reinit_complete, vtpci_modern_reinit_complete), >+ DEVMETHOD(virtio_bus_notify_vq, vtpci_modern_notify_vq), >+ DEVMETHOD(virtio_bus_config_generation, vtpci_modern_config_generation), >+ DEVMETHOD(virtio_bus_read_device_config, vtpci_modern_read_dev_config), >+ DEVMETHOD(virtio_bus_write_device_config, vtpci_modern_write_dev_config), >+ >+ DEVMETHOD_END >+}; >+ >+static driver_t vtpci_modern_driver = { >+ .name = "vtpcim", >+ .methods = vtpci_modern_methods, >+ .size = sizeof(struct vtpci_modern_softc) >+}; >+ >+devclass_t vtpci_modern_devclass; >+ >+DRIVER_MODULE(vtpcim, pci, vtpci_modern_driver, vtpci_modern_devclass, 0, 0); >+ >+static int >+vtpci_modern_probe(device_t dev) >+{ >+ char desc[64]; >+ const char *name; >+ uint16_t devid; >+ >+ if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID) >+ return (ENXIO); >+ >+ if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN || >+ pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MODERN_MAX) >+ return (ENXIO); >+ >+ if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MODERN_MIN) { >+ if (!vtpci_modern_transitional) >+ return (ENXIO); >+ devid = pci_get_subdevice(dev); >+ } else >+ devid = pci_get_device(dev) - VIRTIO_PCI_DEVICEID_MODERN_MIN; >+ >+ if (vtpci_modern_probe_configs(dev) != 0) >+ return (ENXIO); >+ >+ name = virtio_device_name(devid); >+ if (name == NULL) >+ name = "Unknown"; >+ >+ snprintf(desc, sizeof(desc), "VirtIO PCI (modern) %s adapter", name); >+ device_set_desc_copy(dev, desc); >+ >+ return (BUS_PROBE_DEFAULT); >+} >+ >+static int >+vtpci_modern_attach(device_t dev) >+{ >+ struct vtpci_modern_softc *sc; >+ int error; >+ >+ sc = device_get_softc(dev); >+ sc->vtpci_dev = dev; >+ vtpci_init(&sc->vtpci_common, dev, true); >+ >+ if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MODERN_MIN) >+ sc->vtpci_devid = pci_get_subdevice(dev); >+ else >+ sc->vtpci_devid = pci_get_device(dev) - >+ VIRTIO_PCI_DEVICEID_MODERN_MIN; >+ >+ error = vtpci_modern_map_configs(sc); >+ if (error) { >+ device_printf(dev, "cannot map configs\n"); >+ vtpci_modern_unmap_configs(sc); >+ return (error); >+ } >+ >+ vtpci_modern_reset(sc); >+ >+ /* Tell the host we've noticed this device. */ >+ vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); >+ >+ error = vtpci_add_child(&sc->vtpci_common); >+ if (error) >+ goto fail; >+ >+ vtpci_modern_probe_and_attach_child(sc); >+ >+ return (0); >+ >+fail: >+ vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_FAILED); >+ vtpci_modern_detach(dev); >+ >+ return (error); >+} >+ >+static int >+vtpci_modern_detach(device_t dev) >+{ >+ struct vtpci_modern_softc *sc; >+ int error; >+ >+ sc = device_get_softc(dev); >+ >+ error = vtpci_delete_child(&sc->vtpci_common); >+ if (error) >+ return (error); >+ >+ vtpci_modern_reset(sc); >+ vtpci_modern_unmap_configs(sc); >+ >+ return (0); >+} >+ >+static int >+vtpci_modern_suspend(device_t dev) >+{ >+ return (bus_generic_suspend(dev)); >+} >+ >+static int >+vtpci_modern_resume(device_t dev) >+{ >+ return (bus_generic_resume(dev)); >+} >+ >+static int >+vtpci_modern_shutdown(device_t dev) >+{ >+ (void) bus_generic_shutdown(dev); >+ /* Forcibly stop the host device. */ >+ vtpci_modern_stop(dev); >+ >+ return (0); >+} >+ >+static void >+vtpci_modern_driver_added(device_t dev, driver_t *driver) >+{ >+ vtpci_modern_probe_and_attach_child(device_get_softc(dev)); >+} >+ >+static void >+vtpci_modern_child_detached(device_t dev, device_t child) >+{ >+ struct vtpci_modern_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_modern_reset(sc); >+ vtpci_child_detached(&sc->vtpci_common); >+ >+ /* After the reset, retell the host we've noticed this device. */ >+ vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); >+} >+ >+static int >+vtpci_modern_read_ivar(device_t dev, device_t child, int index, >+ uintptr_t *result) >+{ >+ struct vtpci_modern_softc *sc; >+ struct vtpci_common *cn; >+ >+ sc = device_get_softc(dev); >+ cn = &sc->vtpci_common; >+ >+ if (vtpci_child_device(cn) != child) >+ return (ENOENT); >+ >+ switch (index) { >+ case VIRTIO_IVAR_DEVTYPE: >+ *result = sc->vtpci_devid; >+ break; >+ default: >+ return (vtpci_read_ivar(cn, index, result)); >+ } >+ >+ return (0); >+} >+ >+static int >+vtpci_modern_write_ivar(device_t dev, device_t child, int index, >+ uintptr_t value) >+{ >+ struct vtpci_modern_softc *sc; >+ struct vtpci_common *cn; >+ >+ sc = device_get_softc(dev); >+ cn = &sc->vtpci_common; >+ >+ if (vtpci_child_device(cn) != child) >+ return (ENOENT); >+ >+ switch (index) { >+ default: >+ return (vtpci_write_ivar(cn, index, value)); >+ } >+ >+ return (0); >+} >+ >+static uint64_t >+vtpci_modern_negotiate_features(device_t dev, uint64_t child_features) >+{ >+ struct vtpci_modern_softc *sc; >+ uint64_t host_features, features; >+ >+ sc = device_get_softc(dev); >+ host_features = vtpci_modern_read_features(sc); >+ >+ /* >+ * Since the driver was added as a child of the modern PCI bus, >+ * always add the V1 flag. >+ */ >+ child_features |= VIRTIO_F_VERSION_1; >+ >+ features = vtpci_negotiate_features(&sc->vtpci_common, >+ child_features, host_features); >+ vtpci_modern_write_features(sc, features); >+ >+ return (features); >+} >+ >+static int >+vtpci_modern_finalize_features(device_t dev) >+{ >+ struct vtpci_modern_softc *sc; >+ uint8_t status; >+ >+ sc = device_get_softc(dev); >+ >+ /* >+ * Must re-read the status after setting it to verify the negotiated >+ * features were accepted by the device. >+ */ >+ vtpci_modern_set_status(sc, VIRTIO_CONFIG_S_FEATURES_OK); >+ >+ status = vtpci_modern_get_status(sc); >+ if ((status & VIRTIO_CONFIG_S_FEATURES_OK) == 0) { >+ device_printf(dev, "desired features were not accepted\n"); >+ return (ENOTSUP); >+ } >+ >+ return (0); >+} >+ >+static int >+vtpci_modern_with_feature(device_t dev, uint64_t feature) >+{ >+ struct vtpci_modern_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ return (vtpci_with_feature(&sc->vtpci_common, feature)); >+} >+ >+static uint64_t >+vtpci_modern_read_features(struct vtpci_modern_softc *sc) >+{ >+ uint32_t features0, features1; >+ >+ vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_DFSELECT, 0); >+ features0 = vtpci_modern_read_common_4(sc, VIRTIO_PCI_COMMON_DF); >+ vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_DFSELECT, 1); >+ features1 = vtpci_modern_read_common_4(sc, VIRTIO_PCI_COMMON_DF); >+ >+ return (((uint64_t) features1 << 32) | features0); >+} >+ >+static void >+vtpci_modern_write_features(struct vtpci_modern_softc *sc, uint64_t features) >+{ >+ uint32_t features0, features1; >+ >+ features0 = features; >+ features1 = features >> 32; >+ >+ vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_GFSELECT, 0); >+ vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_GF, features0); >+ vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_GFSELECT, 1); >+ vtpci_modern_write_common_4(sc, VIRTIO_PCI_COMMON_GF, features1); >+} >+ >+static int >+vtpci_modern_alloc_virtqueues(device_t dev, int flags, int nvqs, >+ struct vq_alloc_info *vq_info) >+{ >+ struct vtpci_modern_softc *sc; >+ struct vtpci_common *cn; >+ uint16_t max_nvqs; >+ >+ sc = device_get_softc(dev); >+ cn = &sc->vtpci_common; >+ >+ max_nvqs = vtpci_modern_read_common_2(sc, VIRTIO_PCI_COMMON_NUMQ); >+ if (nvqs > max_nvqs) { >+ device_printf(sc->vtpci_dev, "requested virtqueue count %d " >+ "exceeds max %d\n", nvqs, max_nvqs); >+ return (E2BIG); >+ } >+ >+ return (vtpci_alloc_virtqueues(cn, flags, nvqs, vq_info)); >+} >+ >+static int >+vtpci_modern_setup_interrupts(device_t dev, enum intr_type type) >+{ >+ struct vtpci_modern_softc *sc; >+ int error; >+ >+ sc = device_get_softc(dev); >+ >+ error = vtpci_setup_interrupts(&sc->vtpci_common, type); >+ if (error == 0) >+ vtpci_modern_enable_virtqueues(sc); >+ >+ return (error); >+} >+ >+static void >+vtpci_modern_stop(device_t dev) >+{ >+ vtpci_modern_reset(device_get_softc(dev)); >+} >+ >+static int >+vtpci_modern_reinit(device_t dev, uint64_t features) >+{ >+ struct vtpci_modern_softc *sc; >+ struct vtpci_common *cn; >+ int error; >+ >+ sc = device_get_softc(dev); >+ cn = &sc->vtpci_common; >+ >+ /* >+ * Redrive the device initialization. This is a bit of an abuse of >+ * the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to >+ * play nice. >+ * >+ * We do not allow the host device to change from what was originally >+ * negotiated beyond what the guest driver changed. MSIX state should >+ * not change, number of virtqueues and their size remain the same, etc. >+ * This will need to be rethought when we want to support migration. >+ */ >+ >+ if (vtpci_modern_get_status(sc) != VIRTIO_CONFIG_STATUS_RESET) >+ vtpci_modern_stop(dev); >+ >+ /* >+ * Quickly drive the status through ACK and DRIVER. The device does >+ * not become usable again until DRIVER_OK in reinit complete. >+ */ >+ vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_ACK); >+ vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER); >+ >+ /* >+ * TODO: Check that features are not added as to what was >+ * originally negotiated. >+ */ >+ vtpci_modern_negotiate_features(dev, features); >+ error = vtpci_modern_finalize_features(dev); >+ if (error) { >+ device_printf(dev, "cannot finalize features during reinit\n"); >+ return (error); >+ } >+ >+ error = vtpci_reinit(cn); >+ if (error) >+ return (error); >+ >+ return (0); >+} >+ >+static void >+vtpci_modern_reinit_complete(device_t dev) >+{ >+ struct vtpci_modern_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_modern_enable_virtqueues(sc); >+ vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER_OK); >+} >+ >+static void >+vtpci_modern_notify_vq(device_t dev, uint16_t queue, bus_size_t offset) >+{ >+ struct vtpci_modern_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_modern_write_notify_2(sc, offset, queue); >+} >+ >+static uint8_t >+vtpci_modern_get_status(struct vtpci_modern_softc *sc) >+{ >+ return (vtpci_modern_read_common_1(sc, VIRTIO_PCI_COMMON_STATUS)); >+} >+ >+static void >+vtpci_modern_set_status(struct vtpci_modern_softc *sc, uint8_t status) >+{ >+ if (status != VIRTIO_CONFIG_STATUS_RESET) >+ status |= vtpci_modern_get_status(sc); >+ >+ vtpci_modern_write_common_1(sc, VIRTIO_PCI_COMMON_STATUS, status); >+} >+ >+static int >+vtpci_modern_config_generation(device_t dev) >+{ >+ struct vtpci_modern_softc *sc; >+ uint8_t gen; >+ >+ sc = device_get_softc(dev); >+ gen = vtpci_modern_read_common_1(sc, VIRTIO_PCI_COMMON_CFGGENERATION); >+ >+ return (gen); >+} >+ >+static void >+vtpci_modern_read_dev_config(device_t dev, bus_size_t offset, void *dst, >+ int length) >+{ >+ struct vtpci_modern_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ if (sc->vtpci_device_res_map.vtrm_map.r_size == 0) { >+ panic("%s: attempt to read dev config but not present", >+ __func__); >+ } >+ >+ switch (length) { >+ case 1: >+ *(uint8_t *) dst = vtpci_modern_read_device_1(sc, offset); >+ break; >+ case 2: >+ *(uint16_t *) dst = virtio_htog16(true, >+ vtpci_modern_read_device_2(sc, offset)); >+ break; >+ case 4: >+ *(uint32_t *) dst = virtio_htog32(true, >+ vtpci_modern_read_device_4(sc, offset)); >+ break; >+ case 8: >+ *(uint64_t *) dst = virtio_htog64(true, >+ vtpci_modern_read_device_8(sc, offset)); >+ break; >+ default: >+ panic("%s: device %s invalid device read length %d offset %d", >+ __func__, device_get_nameunit(dev), length, (int) offset); >+ } >+} >+ >+static void >+vtpci_modern_write_dev_config(device_t dev, bus_size_t offset, void *src, >+ int length) >+{ >+ struct vtpci_modern_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ if (sc->vtpci_device_res_map.vtrm_map.r_size == 0) { >+ panic("%s: attempt to write dev config but not present", >+ __func__); >+ } >+ >+ switch (length) { >+ case 1: >+ vtpci_modern_write_device_1(sc, offset, *(uint8_t *) src); >+ break; >+ case 2: { >+ uint16_t val = virtio_gtoh16(true, *(uint16_t *) src); >+ vtpci_modern_write_device_2(sc, offset, val); >+ break; >+ } >+ case 4: { >+ uint32_t val = virtio_gtoh32(true, *(uint32_t *) src); >+ vtpci_modern_write_device_4(sc, offset, val); >+ break; >+ } >+ case 8: { >+ uint64_t val = virtio_gtoh64(true, *(uint64_t *) src); >+ vtpci_modern_write_device_8(sc, offset, val); >+ break; >+ } >+ default: >+ panic("%s: device %s invalid device write length %d offset %d", >+ __func__, device_get_nameunit(dev), length, (int) offset); >+ } >+} >+ >+static int >+vtpci_modern_probe_configs(device_t dev) >+{ >+ int error; >+ >+ /* >+ * These config capabilities must be present. The DEVICE_CFG >+ * capability is only present if the device requires it. >+ */ >+ >+ error = vtpci_modern_find_cap(dev, VIRTIO_PCI_CAP_COMMON_CFG, NULL); >+ if (error) { >+ device_printf(dev, "cannot find COMMON_CFG capability\n"); >+ return (error); >+ } >+ >+ error = vtpci_modern_find_cap(dev, VIRTIO_PCI_CAP_NOTIFY_CFG, NULL); >+ if (error) { >+ device_printf(dev, "cannot find NOTIFY_CFG capability\n"); >+ return (error); >+ } >+ >+ error = vtpci_modern_find_cap(dev, VIRTIO_PCI_CAP_ISR_CFG, NULL); >+ if (error) { >+ device_printf(dev, "cannot find ISR_CFG capability\n"); >+ return (error); >+ } >+ >+ return (0); >+} >+ >+static int >+vtpci_modern_find_cap(device_t dev, uint8_t cfg_type, int *cap_offset) >+{ >+ uint32_t type, bar; >+ int capreg, error; >+ >+ for (error = pci_find_cap(dev, PCIY_VENDOR, &capreg); >+ error == 0; >+ error = pci_find_next_cap(dev, PCIY_VENDOR, capreg, &capreg)) { >+ >+ type = pci_read_config(dev, capreg + >+ offsetof(struct virtio_pci_cap, cfg_type), 1); >+ bar = pci_read_config(dev, capreg + >+ offsetof(struct virtio_pci_cap, bar), 1); >+ >+ /* Must ignore reserved BARs. */ >+ if (bar >= VTPCI_MODERN_MAX_BARS) >+ continue; >+ >+ if (type == cfg_type) { >+ if (cap_offset != NULL) >+ *cap_offset = capreg; >+ break; >+ } >+ } >+ >+ return (error); >+} >+ >+static int >+vtpci_modern_map_common_config(struct vtpci_modern_softc *sc) >+{ >+ device_t dev; >+ int error; >+ >+ dev = sc->vtpci_dev; >+ >+ error = vtpci_modern_find_cap_resource(sc, VIRTIO_PCI_CAP_COMMON_CFG, >+ sizeof(struct virtio_pci_common_cfg), 4, &sc->vtpci_common_res_map); >+ if (error) { >+ device_printf(dev, "cannot find cap COMMON_CFG resource\n"); >+ return (error); >+ } >+ >+ error = vtpci_modern_alloc_resource_map(sc, &sc->vtpci_common_res_map); >+ if (error) { >+ device_printf(dev, "cannot alloc resource for COMMON_CFG\n"); >+ return (error); >+ } >+ >+ return (0); >+} >+ >+static int >+vtpci_modern_map_notify_config(struct vtpci_modern_softc *sc) >+{ >+ device_t dev; >+ int cap_offset, error; >+ >+ dev = sc->vtpci_dev; >+ >+ error = vtpci_modern_find_cap_resource(sc, VIRTIO_PCI_CAP_NOTIFY_CFG, >+ -1, 2, &sc->vtpci_notify_res_map); >+ if (error) { >+ device_printf(dev, "cannot find cap NOTIFY_CFG resource\n"); >+ return (error); >+ } >+ >+ cap_offset = sc->vtpci_notify_res_map.vtrm_cap_offset; >+ >+ sc->vtpci_notify_offset_multiplier = pci_read_config(dev, cap_offset + >+ offsetof(struct virtio_pci_notify_cap, notify_off_multiplier), 4); >+ >+ error = vtpci_modern_alloc_resource_map(sc, &sc->vtpci_notify_res_map); >+ if (error) { >+ device_printf(dev, "cannot alloc resource for NOTIFY_CFG\n"); >+ return (error); >+ } >+ >+ return (0); >+} >+ >+static int >+vtpci_modern_map_isr_config(struct vtpci_modern_softc *sc) >+{ >+ device_t dev; >+ int error; >+ >+ dev = sc->vtpci_dev; >+ >+ error = vtpci_modern_find_cap_resource(sc, VIRTIO_PCI_CAP_ISR_CFG, >+ sizeof(uint8_t), 1, &sc->vtpci_isr_res_map); >+ if (error) { >+ device_printf(dev, "cannot find cap ISR_CFG resource\n"); >+ return (error); >+ } >+ >+ error = vtpci_modern_alloc_resource_map(sc, &sc->vtpci_isr_res_map); >+ if (error) { >+ device_printf(dev, "cannot alloc resource for ISR_CFG\n"); >+ return (error); >+ } >+ >+ return (0); >+} >+ >+static int >+vtpci_modern_map_device_config(struct vtpci_modern_softc *sc) >+{ >+ device_t dev; >+ int error; >+ >+ dev = sc->vtpci_dev; >+ >+ error = vtpci_modern_find_cap_resource(sc, VIRTIO_PCI_CAP_DEVICE_CFG, >+ -1, 4, &sc->vtpci_device_res_map); >+ if (error == ENOENT) { >+ /* Device configuration is optional depending on device. */ >+ return (0); >+ } else if (error) { >+ device_printf(dev, "cannot find cap DEVICE_CFG resource\n"); >+ return (error); >+ } >+ >+ error = vtpci_modern_alloc_resource_map(sc, &sc->vtpci_device_res_map); >+ if (error) { >+ device_printf(dev, "cannot alloc resource for DEVICE_CFG\n"); >+ return (error); >+ } >+ >+ return (error); >+} >+ >+static int >+vtpci_modern_map_configs(struct vtpci_modern_softc *sc) >+{ >+ int error; >+ >+ error = vtpci_modern_map_common_config(sc); >+ if (error) >+ return (error); >+ >+ error = vtpci_modern_map_notify_config(sc); >+ if (error) >+ return (error); >+ >+ error = vtpci_modern_map_isr_config(sc); >+ if (error) >+ return (error); >+ >+ error = vtpci_modern_map_device_config(sc); >+ if (error) >+ return (error); >+ >+ vtpci_modern_alloc_msix_resource(sc); >+ >+ return (0); >+} >+ >+static void >+vtpci_modern_unmap_configs(struct vtpci_modern_softc *sc) >+{ >+ >+ vtpci_modern_free_resource_map(sc, &sc->vtpci_common_res_map); >+ vtpci_modern_free_resource_map(sc, &sc->vtpci_notify_res_map); >+ vtpci_modern_free_resource_map(sc, &sc->vtpci_isr_res_map); >+ vtpci_modern_free_resource_map(sc, &sc->vtpci_device_res_map); >+ >+ vtpci_modern_free_bar_resources(sc); >+ vtpci_modern_free_msix_resource(sc); >+ >+ sc->vtpci_notify_offset_multiplier = 0; >+} >+ >+static int >+vtpci_modern_find_cap_resource(struct vtpci_modern_softc *sc, uint8_t cfg_type, >+ int min_size, int alignment, struct vtpci_modern_resource_map *res) >+{ >+ device_t dev; >+ int cap_offset, offset, length, error; >+ uint8_t bar, cap_length; >+ >+ dev = sc->vtpci_dev; >+ >+ error = vtpci_modern_find_cap(dev, cfg_type, &cap_offset); >+ if (error) >+ return (error); >+ >+ cap_length = pci_read_config(dev, >+ cap_offset + offsetof(struct virtio_pci_cap, cap_len), 1); >+ >+ if (cap_length < sizeof(struct virtio_pci_cap)) { >+ device_printf(dev, "cap %u length %d less than expected\n", >+ cfg_type, cap_length); >+ return (ENXIO); >+ } >+ >+ bar = pci_read_config(dev, >+ cap_offset + offsetof(struct virtio_pci_cap, bar), 1); >+ offset = pci_read_config(dev, >+ cap_offset + offsetof(struct virtio_pci_cap, offset), 4); >+ length = pci_read_config(dev, >+ cap_offset + offsetof(struct virtio_pci_cap, length), 4); >+ >+ if (min_size != -1 && length < min_size) { >+ device_printf(dev, "cap %u struct length %d less than min %d\n", >+ cfg_type, length, min_size); >+ return (ENXIO); >+ } >+ >+ if (offset % alignment) { >+ device_printf(dev, "cap %u struct offset %d not aligned to %d\n", >+ cfg_type, offset, alignment); >+ return (ENXIO); >+ } >+ >+ /* BMV: TODO Can we determine the size of the BAR here? */ >+ >+ res->vtrm_cap_offset = cap_offset; >+ res->vtrm_bar = bar; >+ res->vtrm_offset = offset; >+ res->vtrm_length = length; >+ res->vtrm_type = vtpci_modern_bar_type(sc, bar); >+ >+ return (0); >+} >+ >+static int >+vtpci_modern_bar_type(struct vtpci_modern_softc *sc, int bar) >+{ >+ uint32_t val; >+ >+ /* >+ * The BAR described by a config capability may be either an IOPORT or >+ * MEM, but we must know the type when calling bus_alloc_resource(). >+ */ >+ val = pci_read_config(sc->vtpci_dev, PCIR_BAR(bar), 4); >+ if (PCI_BAR_IO(val)) >+ return (SYS_RES_IOPORT); >+ else >+ return (SYS_RES_MEMORY); >+} >+ >+static struct resource * >+vtpci_modern_get_bar_resource(struct vtpci_modern_softc *sc, int bar, int type) >+{ >+ struct resource *res; >+ >+ MPASS(bar >= 0 && bar < VTPCI_MODERN_MAX_BARS); >+ res = sc->vtpci_bar_res[bar].vtbr_res; >+ MPASS(res == NULL || sc->vtpci_bar_res[bar].vtbr_type == type); >+ >+ return (res); >+} >+ >+static struct resource * >+vtpci_modern_alloc_bar_resource(struct vtpci_modern_softc *sc, int bar, >+ int type) >+{ >+ struct resource *res; >+ int rid; >+ >+ MPASS(bar >= 0 && bar < VTPCI_MODERN_MAX_BARS); >+ MPASS(type == SYS_RES_MEMORY || type == SYS_RES_IOPORT); >+ >+ res = sc->vtpci_bar_res[bar].vtbr_res; >+ if (res != NULL) { >+ MPASS(sc->vtpci_bar_res[bar].vtbr_type == type); >+ return (res); >+ } >+ >+ rid = PCIR_BAR(bar); >+ res = bus_alloc_resource_any(sc->vtpci_dev, type, &rid, >+ RF_ACTIVE | RF_UNMAPPED); >+ if (res != NULL) { >+ sc->vtpci_bar_res[bar].vtbr_res = res; >+ sc->vtpci_bar_res[bar].vtbr_type = type; >+ } >+ >+ return (res); >+} >+ >+static void >+vtpci_modern_free_bar_resources(struct vtpci_modern_softc *sc) >+{ >+ device_t dev; >+ struct resource *res; >+ int bar, rid, type; >+ >+ dev = sc->vtpci_dev; >+ >+ for (bar = 0; bar < VTPCI_MODERN_MAX_BARS; bar++) { >+ res = sc->vtpci_bar_res[bar].vtbr_res; >+ type = sc->vtpci_bar_res[bar].vtbr_type; >+ >+ if (res != NULL) { >+ rid = PCIR_BAR(bar); >+ bus_release_resource(dev, type, rid, res); >+ sc->vtpci_bar_res[bar].vtbr_res = NULL; >+ sc->vtpci_bar_res[bar].vtbr_type = 0; >+ } >+ } >+} >+ >+static int >+vtpci_modern_alloc_resource_map(struct vtpci_modern_softc *sc, >+ struct vtpci_modern_resource_map *map) >+{ >+ struct resource_map_request req; >+ struct resource *res; >+ int type; >+ >+ type = map->vtrm_type; >+ >+ res = vtpci_modern_alloc_bar_resource(sc, map->vtrm_bar, type); >+ if (res == NULL) >+ return (ENXIO); >+ >+ resource_init_map_request(&req); >+ req.offset = map->vtrm_offset; >+ req.length = map->vtrm_length; >+ >+ return (bus_map_resource(sc->vtpci_dev, type, res, &req, >+ &map->vtrm_map)); >+} >+ >+static void >+vtpci_modern_free_resource_map(struct vtpci_modern_softc *sc, >+ struct vtpci_modern_resource_map *map) >+{ >+ struct resource *res; >+ int type; >+ >+ type = map->vtrm_type; >+ res = vtpci_modern_get_bar_resource(sc, map->vtrm_bar, type); >+ >+ if (res != NULL && map->vtrm_map.r_size != 0) { >+ bus_unmap_resource(sc->vtpci_dev, type, res, &map->vtrm_map); >+ bzero(map, sizeof(struct vtpci_modern_resource_map)); >+ } >+} >+ >+static void >+vtpci_modern_alloc_msix_resource(struct vtpci_modern_softc *sc) >+{ >+ device_t dev; >+ int bar; >+ >+ dev = sc->vtpci_dev; >+ >+ if (!vtpci_is_msix_available(&sc->vtpci_common) || >+ (bar = pci_msix_table_bar(dev)) == -1) >+ return; >+ >+ sc->vtpci_msix_bar = bar; >+ if ((sc->vtpci_msix_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, >+ &bar, RF_ACTIVE)) == NULL) >+ device_printf(dev, "Unable to map MSIX table\n"); >+} >+ >+static void >+vtpci_modern_free_msix_resource(struct vtpci_modern_softc *sc) >+{ >+ device_t dev; >+ >+ dev = sc->vtpci_dev; >+ >+ if (sc->vtpci_msix_res != NULL) { >+ bus_release_resource(dev, SYS_RES_MEMORY, sc->vtpci_msix_bar, >+ sc->vtpci_msix_res); >+ sc->vtpci_msix_bar = 0; >+ sc->vtpci_msix_res = NULL; >+ } >+} >+ >+static void >+vtpci_modern_probe_and_attach_child(struct vtpci_modern_softc *sc) >+{ >+ device_t dev, child; >+ >+ dev = sc->vtpci_dev; >+ child = vtpci_child_device(&sc->vtpci_common); >+ >+ if (child == NULL || device_get_state(child) != DS_NOTPRESENT) >+ return; >+ >+ if (device_probe(child) != 0) >+ return; >+ >+ vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER); >+ >+ if (device_attach(child) != 0) { >+ vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_FAILED); >+ /* Reset state for later attempt. */ >+ vtpci_modern_child_detached(dev, child); >+ } else { >+ vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_DRIVER_OK); >+ VIRTIO_ATTACH_COMPLETED(child); >+ } >+} >+ >+static int >+vtpci_modern_register_msix(struct vtpci_modern_softc *sc, int offset, >+ struct vtpci_interrupt *intr) >+{ >+ uint16_t vector; >+ >+ if (intr != NULL) { >+ /* Map from guest rid to host vector. */ >+ vector = intr->vti_rid - 1; >+ } else >+ vector = VIRTIO_MSI_NO_VECTOR; >+ >+ vtpci_modern_write_common_2(sc, offset, vector); >+ return (vtpci_modern_read_common_2(sc, offset) == vector ? 0 : ENODEV); >+} >+ >+static int >+vtpci_modern_register_cfg_msix(device_t dev, struct vtpci_interrupt *intr) >+{ >+ struct vtpci_modern_softc *sc; >+ int error; >+ >+ sc = device_get_softc(dev); >+ >+ error = vtpci_modern_register_msix(sc, VIRTIO_PCI_COMMON_MSIX, intr); >+ if (error) { >+ device_printf(dev, >+ "unable to register config MSIX interrupt\n"); >+ return (error); >+ } >+ >+ return (0); >+} >+ >+static int >+vtpci_modern_register_vq_msix(device_t dev, int idx, >+ struct vtpci_interrupt *intr) >+{ >+ struct vtpci_modern_softc *sc; >+ int error; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_modern_select_virtqueue(sc, idx); >+ error = vtpci_modern_register_msix(sc, VIRTIO_PCI_COMMON_Q_MSIX, intr); >+ if (error) { >+ device_printf(dev, >+ "unable to register virtqueue MSIX interrupt\n"); >+ return (error); >+ } >+ >+ return (0); >+} >+ >+static void >+vtpci_modern_reset(struct vtpci_modern_softc *sc) >+{ >+ /* >+ * Setting the status to RESET sets the host device to the >+ * original, uninitialized state. Must poll the status until >+ * the reset is complete. >+ */ >+ vtpci_modern_set_status(sc, VIRTIO_CONFIG_STATUS_RESET); >+ >+ while (vtpci_modern_get_status(sc) != VIRTIO_CONFIG_STATUS_RESET) >+ cpu_spinwait(); >+} >+ >+static void >+vtpci_modern_select_virtqueue(struct vtpci_modern_softc *sc, int idx) >+{ >+ vtpci_modern_write_common_2(sc, VIRTIO_PCI_COMMON_Q_SELECT, idx); >+} >+ >+static uint8_t >+vtpci_modern_read_isr(device_t dev) >+{ >+ return (vtpci_modern_read_isr_1(device_get_softc(dev), 0)); >+} >+ >+static uint16_t >+vtpci_modern_get_vq_size(device_t dev, int idx) >+{ >+ struct vtpci_modern_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_modern_select_virtqueue(sc, idx); >+ return (vtpci_modern_read_common_2(sc, VIRTIO_PCI_COMMON_Q_SIZE)); >+} >+ >+static bus_size_t >+vtpci_modern_get_vq_notify_off(device_t dev, int idx) >+{ >+ struct vtpci_modern_softc *sc; >+ uint16_t q_notify_off; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_modern_select_virtqueue(sc, idx); >+ q_notify_off = vtpci_modern_read_common_2(sc, VIRTIO_PCI_COMMON_Q_NOFF); >+ >+ return (q_notify_off * sc->vtpci_notify_offset_multiplier); >+} >+ >+static void >+vtpci_modern_set_vq(device_t dev, struct virtqueue *vq) >+{ >+ struct vtpci_modern_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_modern_select_virtqueue(sc, virtqueue_index(vq)); >+ >+ /* BMV: Currently we never adjust the device's proposed VQ size. */ >+ vtpci_modern_write_common_2(sc, >+ VIRTIO_PCI_COMMON_Q_SIZE, virtqueue_size(vq)); >+ >+ vtpci_modern_write_common_8(sc, >+ VIRTIO_PCI_COMMON_Q_DESCLO, virtqueue_desc_paddr(vq)); >+ vtpci_modern_write_common_8(sc, >+ VIRTIO_PCI_COMMON_Q_AVAILLO, virtqueue_avail_paddr(vq)); >+ vtpci_modern_write_common_8(sc, >+ VIRTIO_PCI_COMMON_Q_USEDLO, virtqueue_used_paddr(vq)); >+} >+ >+static void >+vtpci_modern_disable_vq(device_t dev, int idx) >+{ >+ struct vtpci_modern_softc *sc; >+ >+ sc = device_get_softc(dev); >+ >+ vtpci_modern_select_virtqueue(sc, idx); >+ vtpci_modern_write_common_2(sc, VIRTIO_PCI_COMMON_Q_ENABLE, 0); >+ vtpci_modern_write_common_8(sc, VIRTIO_PCI_COMMON_Q_DESCLO, 0ULL); >+ vtpci_modern_write_common_8(sc, VIRTIO_PCI_COMMON_Q_AVAILLO, 0ULL); >+ vtpci_modern_write_common_8(sc, VIRTIO_PCI_COMMON_Q_USEDLO, 0ULL); >+} >+ >+static void >+vtpci_modern_enable_virtqueues(struct vtpci_modern_softc *sc) >+{ >+ int idx; >+ >+ for (idx = 0; idx < sc->vtpci_common.vtpci_nvqs; idx++) { >+ vtpci_modern_select_virtqueue(sc, idx); >+ vtpci_modern_write_common_2(sc, VIRTIO_PCI_COMMON_Q_ENABLE, 1); >+ } >+} >+ >+static uint8_t >+vtpci_modern_read_common_1(struct vtpci_modern_softc *sc, bus_size_t off) >+{ >+ return (bus_read_1(&sc->vtpci_common_res_map.vtrm_map, off)); >+} >+ >+static uint16_t >+vtpci_modern_read_common_2(struct vtpci_modern_softc *sc, bus_size_t off) >+{ >+ return (bus_read_2(&sc->vtpci_common_res_map.vtrm_map, off)); >+} >+ >+static uint32_t >+vtpci_modern_read_common_4(struct vtpci_modern_softc *sc, bus_size_t off) >+{ >+ return (bus_read_4(&sc->vtpci_common_res_map.vtrm_map, off)); >+} >+ >+static void >+vtpci_modern_write_common_1(struct vtpci_modern_softc *sc, bus_size_t off, >+ uint8_t val) >+{ >+ bus_write_1(&sc->vtpci_common_res_map.vtrm_map, off, val); >+} >+ >+static void >+vtpci_modern_write_common_2(struct vtpci_modern_softc *sc, bus_size_t off, >+ uint16_t val) >+{ >+ bus_write_2(&sc->vtpci_common_res_map.vtrm_map, off, val); >+} >+ >+static void >+vtpci_modern_write_common_4(struct vtpci_modern_softc *sc, bus_size_t off, >+ uint32_t val) >+{ >+ bus_write_4(&sc->vtpci_common_res_map.vtrm_map, off, val); >+} >+ >+static void >+vtpci_modern_write_common_8(struct vtpci_modern_softc *sc, bus_size_t off, >+ uint64_t val) >+{ >+ uint32_t val0, val1; >+ >+ val0 = (uint32_t) val; >+ val1 = val >> 32; >+ >+ vtpci_modern_write_common_4(sc, off, val0); >+ vtpci_modern_write_common_4(sc, off + 4, val1); >+} >+ >+static void >+vtpci_modern_write_notify_2(struct vtpci_modern_softc *sc, bus_size_t off, >+ uint16_t val) >+{ >+ bus_write_2(&sc->vtpci_notify_res_map.vtrm_map, off, val); >+} >+ >+static uint8_t >+vtpci_modern_read_isr_1(struct vtpci_modern_softc *sc, bus_size_t off) >+{ >+ return (bus_read_1(&sc->vtpci_isr_res_map.vtrm_map, off)); >+} >+ >+static uint8_t >+vtpci_modern_read_device_1(struct vtpci_modern_softc *sc, bus_size_t off) >+{ >+ return (bus_read_1(&sc->vtpci_device_res_map.vtrm_map, off)); >+} >+ >+static uint16_t >+vtpci_modern_read_device_2(struct vtpci_modern_softc *sc, bus_size_t off) >+{ >+ return (bus_read_2(&sc->vtpci_device_res_map.vtrm_map, off)); >+} >+ >+static uint32_t >+vtpci_modern_read_device_4(struct vtpci_modern_softc *sc, bus_size_t off) >+{ >+ return (bus_read_4(&sc->vtpci_device_res_map.vtrm_map, off)); >+} >+ >+static uint64_t >+vtpci_modern_read_device_8(struct vtpci_modern_softc *sc, bus_size_t off) >+{ >+ device_t dev; >+ int gen; >+ uint32_t val0, val1; >+ >+ dev = sc->vtpci_dev; >+ >+ /* >+ * Treat the 64-bit field as two 32-bit fields. Use the generation >+ * to ensure a consistent read. >+ */ >+ do { >+ gen = vtpci_modern_config_generation(dev); >+ val0 = vtpci_modern_read_device_4(sc, off); >+ val1 = vtpci_modern_read_device_4(sc, off + 4); >+ } while (gen != vtpci_modern_config_generation(dev)); >+ >+ return (((uint64_t) val1 << 32) | val0); >+} >+ >+static void >+vtpci_modern_write_device_1(struct vtpci_modern_softc *sc, bus_size_t off, >+ uint8_t val) >+{ >+ bus_write_1(&sc->vtpci_device_res_map.vtrm_map, off, val); >+} >+ >+static void >+vtpci_modern_write_device_2(struct vtpci_modern_softc *sc, bus_size_t off, >+ uint16_t val) >+{ >+ bus_write_2(&sc->vtpci_device_res_map.vtrm_map, off, val); >+} >+ >+static void >+vtpci_modern_write_device_4(struct vtpci_modern_softc *sc, bus_size_t off, >+ uint32_t val) >+{ >+ bus_write_4(&sc->vtpci_device_res_map.vtrm_map, off, val); >+} >+ >+static void >+vtpci_modern_write_device_8(struct vtpci_modern_softc *sc, bus_size_t off, >+ uint64_t val) >+{ >+ uint32_t val0, val1; >+ >+ val0 = (uint32_t) val; >+ val1 = val >> 32; >+ >+ vtpci_modern_write_device_4(sc, off, val0); >+ vtpci_modern_write_device_4(sc, off + 4, val1); >+} >diff -urN sys/dev/virtio.ori/pci/virtio_pci_modern_var.h sys/dev/virtio/pci/virtio_pci_modern_var.h >--- sys/dev/virtio.ori/pci/virtio_pci_modern_var.h 1969-12-31 16:00:00.000000000 -0800 >+++ sys/dev/virtio/pci/virtio_pci_modern_var.h 2020-03-19 16:44:27.328094000 -0700 >@@ -0,0 +1,135 @@ >+/* >+ * SPDX-License-Identifier: BSD-3-Clause >+ * >+ * Copyright IBM Corp. 2007 >+ * >+ * Authors: >+ * Anthony Liguori <aliguori@us.ibm.com> >+ * >+ * This header is BSD licensed so anyone can use the definitions to implement >+ * compatible drivers/servers. >+ * >+ * Redistribution and use in source and binary forms, with or without >+ * modification, are permitted provided that the following conditions >+ * are met: >+ * 1. Redistributions of source code must retain the above copyright >+ * notice, this list of conditions and the following disclaimer. >+ * 2. Redistributions in binary form must reproduce the above copyright >+ * notice, this list of conditions and the following disclaimer in the >+ * documentation and/or other materials provided with the distribution. >+ * 3. Neither the name of IBM nor the names of its contributors >+ * may be used to endorse or promote products derived from this software >+ * without specific prior written permission. >+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS >+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED >+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR >+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE >+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL >+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS >+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) >+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT >+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY >+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF >+ * SUCH DAMAGE. >+ * >+ * $FreeBSD$ >+ */ >+ >+#ifndef _VIRTIO_PCI_MODERN_VAR_H >+#define _VIRTIO_PCI_MODERN_VAR_H >+ >+#include <dev/virtio/pci/virtio_pci_var.h> >+ >+/* IDs for different capabilities. Must all exist. */ >+/* Common configuration */ >+#define VIRTIO_PCI_CAP_COMMON_CFG 1 >+/* Notifications */ >+#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 >+/* ISR access */ >+#define VIRTIO_PCI_CAP_ISR_CFG 3 >+/* Device specific configuration */ >+#define VIRTIO_PCI_CAP_DEVICE_CFG 4 >+/* PCI configuration access */ >+#define VIRTIO_PCI_CAP_PCI_CFG 5 >+ >+/* This is the PCI capability header: */ >+struct virtio_pci_cap { >+ uint8_t cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ >+ uint8_t cap_next; /* Generic PCI field: next ptr. */ >+ uint8_t cap_len; /* Generic PCI field: capability length */ >+ uint8_t cfg_type; /* Identifies the structure. */ >+ uint8_t bar; /* Where to find it. */ >+ uint8_t padding[3]; /* Pad to full dword. */ >+ uint32_t offset; /* Offset within bar. */ >+ uint32_t length; /* Length of the structure, in bytes. */ >+}; >+ >+struct virtio_pci_notify_cap { >+ struct virtio_pci_cap cap; >+ uint32_t notify_off_multiplier; /* Multiplier for queue_notify_off. */ >+}; >+ >+/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ >+struct virtio_pci_common_cfg { >+ /* About the whole device. */ >+ uint32_t device_feature_select; /* read-write */ >+ uint32_t device_feature; /* read-only */ >+ uint32_t guest_feature_select; /* read-write */ >+ uint32_t guest_feature; /* read-write */ >+ uint16_t msix_config; /* read-write */ >+ uint16_t num_queues; /* read-only */ >+ uint8_t device_status; /* read-write */ >+ uint8_t config_generation; /* read-only */ >+ >+ /* About a specific virtqueue. */ >+ uint16_t queue_select; /* read-write */ >+ uint16_t queue_size; /* read-write, power of 2. */ >+ uint16_t queue_msix_vector; /* read-write */ >+ uint16_t queue_enable; /* read-write */ >+ uint16_t queue_notify_off; /* read-only */ >+ uint32_t queue_desc_lo; /* read-write */ >+ uint32_t queue_desc_hi; /* read-write */ >+ uint32_t queue_avail_lo; /* read-write */ >+ uint32_t queue_avail_hi; /* read-write */ >+ uint32_t queue_used_lo; /* read-write */ >+ uint32_t queue_used_hi; /* read-write */ >+}; >+ >+/* Fields in VIRTIO_PCI_CAP_PCI_CFG: */ >+struct virtio_pci_cfg_cap { >+ struct virtio_pci_cap cap; >+ uint8_t pci_cfg_data[4]; /* Data for BAR access. */ >+}; >+ >+/* Macro versions of offsets for the Old Timers! */ >+#define VIRTIO_PCI_CAP_VNDR 0 >+#define VIRTIO_PCI_CAP_NEXT 1 >+#define VIRTIO_PCI_CAP_LEN 2 >+#define VIRTIO_PCI_CAP_CFG_TYPE 3 >+#define VIRTIO_PCI_CAP_BAR 4 >+#define VIRTIO_PCI_CAP_OFFSET 8 >+#define VIRTIO_PCI_CAP_LENGTH 12 >+ >+#define VIRTIO_PCI_NOTIFY_CAP_MULT 16 >+ >+#define VIRTIO_PCI_COMMON_DFSELECT 0 >+#define VIRTIO_PCI_COMMON_DF 4 >+#define VIRTIO_PCI_COMMON_GFSELECT 8 >+#define VIRTIO_PCI_COMMON_GF 12 >+#define VIRTIO_PCI_COMMON_MSIX 16 >+#define VIRTIO_PCI_COMMON_NUMQ 18 >+#define VIRTIO_PCI_COMMON_STATUS 20 >+#define VIRTIO_PCI_COMMON_CFGGENERATION 21 >+#define VIRTIO_PCI_COMMON_Q_SELECT 22 >+#define VIRTIO_PCI_COMMON_Q_SIZE 24 >+#define VIRTIO_PCI_COMMON_Q_MSIX 26 >+#define VIRTIO_PCI_COMMON_Q_ENABLE 28 >+#define VIRTIO_PCI_COMMON_Q_NOFF 30 >+#define VIRTIO_PCI_COMMON_Q_DESCLO 32 >+#define VIRTIO_PCI_COMMON_Q_DESCHI 36 >+#define VIRTIO_PCI_COMMON_Q_AVAILLO 40 >+#define VIRTIO_PCI_COMMON_Q_AVAILHI 44 >+#define VIRTIO_PCI_COMMON_Q_USEDLO 48 >+#define VIRTIO_PCI_COMMON_Q_USEDHI 52 >+ >+#endif /* _VIRTIO_PCI_MODERN_VAR_H */ >diff -urN sys/dev/virtio.ori/pci/virtio_pci_var.h sys/dev/virtio/pci/virtio_pci_var.h >--- sys/dev/virtio.ori/pci/virtio_pci_var.h 1969-12-31 16:00:00.000000000 -0800 >+++ sys/dev/virtio/pci/virtio_pci_var.h 2020-03-19 16:44:27.327016000 -0700 >@@ -0,0 +1,55 @@ >+/*- >+ * SPDX-License-Identifier: BSD-3-Clause >+ * >+ * Copyright IBM Corp. 2007 >+ * >+ * Authors: >+ * Anthony Liguori <aliguori@us.ibm.com> >+ * >+ * This header is BSD licensed so anyone can use the definitions to implement >+ * compatible drivers/servers. >+ * >+ * Redistribution and use in source and binary forms, with or without >+ * modification, are permitted provided that the following conditions >+ * are met: >+ * 1. Redistributions of source code must retain the above copyright >+ * notice, this list of conditions and the following disclaimer. >+ * 2. Redistributions in binary form must reproduce the above copyright >+ * notice, this list of conditions and the following disclaimer in the >+ * documentation and/or other materials provided with the distribution. >+ * 3. Neither the name of IBM nor the names of its contributors >+ * may be used to endorse or promote products derived from this software >+ * without specific prior written permission. >+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS >+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED >+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR >+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE >+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL >+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS >+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) >+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT >+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY >+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF >+ * SUCH DAMAGE. >+ * >+ * $FreeBSD$ >+ */ >+ >+#ifndef _VIRTIO_PCI_VAR_H >+#define _VIRTIO_PCI_VAR_H >+ >+/* VirtIO PCI vendor/device ID. */ >+#define VIRTIO_PCI_VENDORID 0x1AF4 >+#define VIRTIO_PCI_DEVICEID_MIN 0x1000 >+#define VIRTIO_PCI_DEVICEID_LEGACY_MAX 0x103F >+#define VIRTIO_PCI_DEVICEID_MODERN_MIN 0x1040 >+#define VIRTIO_PCI_DEVICEID_MODERN_MAX 0x107F >+ >+/* The bit of the ISR which indicates a device has an interrupt. */ >+#define VIRTIO_PCI_ISR_INTR 0x1 >+/* The bit of the ISR which indicates a device configuration change. */ >+#define VIRTIO_PCI_ISR_CONFIG 0x2 >+/* Vector value used to disable MSI for queue. */ >+#define VIRTIO_MSI_NO_VECTOR 0xFFFF >+ >+#endif /* _VIRTIO_PCI_VAR_H */ >diff -urN sys/dev/virtio.ori/random/virtio_random.c sys/dev/virtio/random/virtio_random.c >--- sys/dev/virtio.ori/random/virtio_random.c 2020-03-18 20:16:31.706223000 -0700 >+++ sys/dev/virtio/random/virtio_random.c 2020-03-19 16:44:27.328560000 -0700 >@@ -58,7 +58,8 @@ > static int vtrnd_attach(device_t); > static int vtrnd_detach(device_t); > >-static void vtrnd_negotiate_features(struct vtrnd_softc *); >+static int vtrnd_negotiate_features(struct vtrnd_softc *); >+static int vtrnd_setup_features(struct vtrnd_softc *); > static int vtrnd_alloc_virtqueue(struct vtrnd_softc *); > static void vtrnd_harvest(struct vtrnd_softc *); > static void vtrnd_timer(void *); >@@ -85,8 +86,10 @@ > }; > static devclass_t vtrnd_devclass; > >-DRIVER_MODULE(virtio_random, virtio_pci, vtrnd_driver, vtrnd_devclass, >+DRIVER_MODULE(virtio_random, vtpcil, vtrnd_driver, vtrnd_devclass, > vtrnd_modevent, 0); >+DRIVER_MODULE(virtio_random, vtpcim, vtrnd_driver, vtrnd_devclass, >+ vtrnd_modevent, 0); > MODULE_VERSION(virtio_random, 1); > MODULE_DEPEND(virtio_random, virtio, 1, 1, 1); > >@@ -130,12 +133,14 @@ > > sc = device_get_softc(dev); > sc->vtrnd_dev = dev; >- >+ virtio_set_feature_desc(dev, vtrnd_feature_desc); > callout_init(&sc->vtrnd_callout, 1); > >- virtio_set_feature_desc(dev, vtrnd_feature_desc); >- vtrnd_negotiate_features(sc); >- >+ error = vtrnd_setup_features(sc); >+ if (error) { >+ device_printf(dev, "cannot setup features\n"); >+ goto fail; >+ } > error = vtrnd_alloc_virtqueue(sc); > if (error) { > device_printf(dev, "cannot allocate virtqueue\n"); >@@ -163,7 +168,7 @@ > return (0); > } > >-static void >+static int > vtrnd_negotiate_features(struct vtrnd_softc *sc) > { > device_t dev; >@@ -173,9 +178,22 @@ > features = VTRND_FEATURES; > > sc->vtrnd_features = virtio_negotiate_features(dev, features); >+ return (virtio_finalize_features(dev)); > } > > static int >+vtrnd_setup_features(struct vtrnd_softc *sc) >+{ >+ int error; >+ >+ error = vtrnd_negotiate_features(sc); >+ if (error) >+ return (error); >+ >+ return (0); >+} >+ >+static int > vtrnd_alloc_virtqueue(struct vtrnd_softc *sc) > { > device_t dev; >@@ -217,6 +235,8 @@ > virtqueue_notify(vq); > virtqueue_poll(vq, NULL); > >+// random_harvest_queue(&value, sizeof(value), sizeof(value) * NBBY / 2, >+// RANDOM_PURE_VIRTIO); > random_harvest_queue(&value, sizeof(value), RANDOM_PURE_VIRTIO); > } > >diff -urN sys/dev/virtio.ori/scsi/virtio_scsi.c sys/dev/virtio/scsi/virtio_scsi.c >--- sys/dev/virtio.ori/scsi/virtio_scsi.c 2020-03-18 20:16:31.703939000 -0700 >+++ sys/dev/virtio/scsi/virtio_scsi.c 2020-03-19 16:44:27.324250000 -0700 >@@ -76,7 +76,8 @@ > static int vtscsi_suspend(device_t); > static int vtscsi_resume(device_t); > >-static void vtscsi_negotiate_features(struct vtscsi_softc *); >+static int vtscsi_negotiate_features(struct vtscsi_softc *); >+static int vtscsi_setup_features(struct vtscsi_softc *); > static void vtscsi_read_config(struct vtscsi_softc *, > struct virtio_scsi_config *); > static int vtscsi_maximum_segments(struct vtscsi_softc *, int); >@@ -135,10 +136,10 @@ > > static void vtscsi_get_request_lun(uint8_t [], target_id_t *, lun_id_t *); > static void vtscsi_set_request_lun(struct ccb_hdr *, uint8_t []); >-static void vtscsi_init_scsi_cmd_req(struct ccb_scsiio *, >- struct virtio_scsi_cmd_req *); >-static void vtscsi_init_ctrl_tmf_req(struct ccb_hdr *, uint32_t, >- uintptr_t, struct virtio_scsi_ctrl_tmf_req *); >+static void vtscsi_init_scsi_cmd_req(struct vtscsi_softc *, >+ struct ccb_scsiio *, struct virtio_scsi_cmd_req *); >+static void vtscsi_init_ctrl_tmf_req(struct vtscsi_softc *, struct ccb_hdr *, >+ uint32_t, uintptr_t, struct virtio_scsi_ctrl_tmf_req *); > > static void vtscsi_freeze_simq(struct vtscsi_softc *, int); > static int vtscsi_thaw_simq(struct vtscsi_softc *, int); >@@ -184,11 +185,19 @@ > static void vtscsi_enable_vqs_intr(struct vtscsi_softc *); > > static void vtscsi_get_tunables(struct vtscsi_softc *); >-static void vtscsi_add_sysctl(struct vtscsi_softc *); >+static void vtscsi_setup_sysctl(struct vtscsi_softc *); > > static void vtscsi_printf_req(struct vtscsi_request *, const char *, > const char *, ...); > >+#define vtscsi_modern(_sc) (((_sc)->vtscsi_features & VIRTIO_F_VERSION_1) != 0) >+#define vtscsi_htog16(_sc, _val) virtio_htog16(vtscsi_modern(_sc), _val) >+#define vtscsi_htog32(_sc, _val) virtio_htog32(vtscsi_modern(_sc), _val) >+#define vtscsi_htog64(_sc, _val) virtio_htog64(vtscsi_modern(_sc), _val) >+#define vtscsi_gtoh16(_sc, _val) virtio_gtoh16(vtscsi_modern(_sc), _val) >+#define vtscsi_gtoh32(_sc, _val) virtio_gtoh32(vtscsi_modern(_sc), _val) >+#define vtscsi_gtoh64(_sc, _val) virtio_gtoh64(vtscsi_modern(_sc), _val) >+ > /* Global tunables. */ > /* > * The current QEMU VirtIO SCSI implementation does not cancel in-flight >@@ -206,6 +215,8 @@ > static struct virtio_feature_desc vtscsi_feature_desc[] = { > { VIRTIO_SCSI_F_INOUT, "InOut" }, > { VIRTIO_SCSI_F_HOTPLUG, "Hotplug" }, >+ { VIRTIO_SCSI_F_CHANGE, "ChangeEvent" }, >+ { VIRTIO_SCSI_F_T10_PI, "T10PI" }, > > { 0, NULL } > }; >@@ -228,8 +239,10 @@ > }; > static devclass_t vtscsi_devclass; > >-DRIVER_MODULE(virtio_scsi, virtio_pci, vtscsi_driver, vtscsi_devclass, >+DRIVER_MODULE(virtio_scsi, vtpcil, vtscsi_driver, vtscsi_devclass, > vtscsi_modevent, 0); >+DRIVER_MODULE(virtio_scsi, vtpcim, vtscsi_driver, vtscsi_devclass, >+ vtscsi_modevent, 0); > MODULE_VERSION(virtio_scsi, 1); > MODULE_DEPEND(virtio_scsi, virtio, 1, 1, 1); > MODULE_DEPEND(virtio_scsi, cam, 1, 1, 1); >@@ -275,23 +288,20 @@ > > sc = device_get_softc(dev); > sc->vtscsi_dev = dev; >+ virtio_set_feature_desc(dev, vtscsi_feature_desc); > > VTSCSI_LOCK_INIT(sc, device_get_nameunit(dev)); > TAILQ_INIT(&sc->vtscsi_req_free); > > vtscsi_get_tunables(sc); >- vtscsi_add_sysctl(sc); >+ vtscsi_setup_sysctl(sc); > >- virtio_set_feature_desc(dev, vtscsi_feature_desc); >- vtscsi_negotiate_features(sc); >+ error = vtscsi_setup_features(sc); >+ if (error) { >+ device_printf(dev, "cannot setup features\n"); >+ goto fail; >+ } > >- if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) >- sc->vtscsi_flags |= VTSCSI_FLAG_INDIRECT; >- if (virtio_with_feature(dev, VIRTIO_SCSI_F_INOUT)) >- sc->vtscsi_flags |= VTSCSI_FLAG_BIDIRECTIONAL; >- if (virtio_with_feature(dev, VIRTIO_SCSI_F_HOTPLUG)) >- sc->vtscsi_flags |= VTSCSI_FLAG_HOTPLUG; >- > vtscsi_read_config(sc, &scsicfg); > > sc->vtscsi_max_channel = scsicfg.max_channel; >@@ -403,17 +413,41 @@ > return (0); > } > >-static void >+static int > vtscsi_negotiate_features(struct vtscsi_softc *sc) > { > device_t dev; > uint64_t features; > > dev = sc->vtscsi_dev; >- features = virtio_negotiate_features(dev, VTSCSI_FEATURES); >- sc->vtscsi_features = features; >+ features = VTSCSI_FEATURES; >+ >+ sc->vtscsi_features = virtio_negotiate_features(dev, features); >+ return (virtio_finalize_features(dev)); > } > >+static int >+vtscsi_setup_features(struct vtscsi_softc *sc) >+{ >+ device_t dev; >+ int error; >+ >+ dev = sc->vtscsi_dev; >+ >+ error = vtscsi_negotiate_features(sc); >+ if (error) >+ return (error); >+ >+ if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) >+ sc->vtscsi_flags |= VTSCSI_FLAG_INDIRECT; >+ if (virtio_with_feature(dev, VIRTIO_SCSI_F_INOUT)) >+ sc->vtscsi_flags |= VTSCSI_FLAG_BIDIRECTIONAL; >+ if (virtio_with_feature(dev, VIRTIO_SCSI_F_HOTPLUG)) >+ sc->vtscsi_flags |= VTSCSI_FLAG_HOTPLUG; >+ >+ return (0); >+} >+ > #define VTSCSI_GET_CONFIG(_dev, _field, _cfg) \ > virtio_read_device_config(_dev, \ > offsetof(struct virtio_scsi_config, _field), \ >@@ -531,8 +565,8 @@ > error = virtio_reinit(dev, sc->vtscsi_features); > if (error == 0) { > vtscsi_write_device_config(sc); >- vtscsi_reinit_event_vq(sc); > virtio_reinit_complete(dev); >+ vtscsi_reinit_event_vq(sc); > > vtscsi_enable_vqs_intr(sc); > } >@@ -940,7 +974,7 @@ > > cpi->max_target = sc->vtscsi_max_target; > cpi->max_lun = sc->vtscsi_max_lun; >- cpi->initiator_id = VTSCSI_INITIATOR_ID; >+ cpi->initiator_id = cpi->max_target + 1; > > strlcpy(cpi->sim_vid, "FreeBSD", SIM_IDLEN); > strlcpy(cpi->hba_vid, "VirtIO", HBA_IDLEN); >@@ -1086,7 +1120,7 @@ > cmd_req = &req->vsr_cmd_req; > cmd_resp = &req->vsr_cmd_resp; > >- vtscsi_init_scsi_cmd_req(csio, cmd_req); >+ vtscsi_init_scsi_cmd_req(sc, csio, cmd_req); > > error = vtscsi_fill_scsi_cmd_sglist(sc, req, &readable, &writable); > if (error) >@@ -1206,7 +1240,7 @@ > tmf_req = &req->vsr_tmf_req; > tmf_resp = &req->vsr_tmf_resp; > >- vtscsi_init_ctrl_tmf_req(to_ccbh, VIRTIO_SCSI_T_TMF_ABORT_TASK, >+ vtscsi_init_ctrl_tmf_req(sc, to_ccbh, VIRTIO_SCSI_T_TMF_ABORT_TASK, > (uintptr_t) to_ccbh, tmf_req); > > sglist_reset(sg); >@@ -1314,26 +1348,29 @@ > vtscsi_complete_scsi_cmd_response(struct vtscsi_softc *sc, > struct ccb_scsiio *csio, struct virtio_scsi_cmd_resp *cmd_resp) > { >+ uint32_t resp_sense_length; > cam_status status; > > csio->scsi_status = cmd_resp->status; >- csio->resid = cmd_resp->resid; >+ csio->resid = vtscsi_htog32(sc, cmd_resp->resid); > > if (csio->scsi_status == SCSI_STATUS_OK) > status = CAM_REQ_CMP; > else > status = CAM_SCSI_STATUS_ERROR; > >- if (cmd_resp->sense_len > 0) { >+ resp_sense_length = vtscsi_htog32(sc, cmd_resp->sense_len); >+ >+ if (resp_sense_length > 0) { > status |= CAM_AUTOSNS_VALID; > >- if (cmd_resp->sense_len < csio->sense_len) >- csio->sense_resid = csio->sense_len - >- cmd_resp->sense_len; >+ if (resp_sense_length < csio->sense_len) >+ csio->sense_resid = csio->sense_len - resp_sense_length; > else > csio->sense_resid = 0; > >- memcpy(&csio->sense_data, cmd_resp->sense, >+ bzero(&csio->sense_data, sizeof(csio->sense_data)); >+ memcpy(cmd_resp->sense, &csio->sense_data, > csio->sense_len - csio->sense_resid); > } > >@@ -1494,7 +1531,7 @@ > if (abort_req->vsr_flags & VTSCSI_REQ_FLAG_TIMEOUT_SET) > callout_stop(&abort_req->vsr_callout); > >- vtscsi_init_ctrl_tmf_req(ccbh, VIRTIO_SCSI_T_TMF_ABORT_TASK, >+ vtscsi_init_ctrl_tmf_req(sc, ccbh, VIRTIO_SCSI_T_TMF_ABORT_TASK, > (uintptr_t) abort_ccbh, tmf_req); > > sglist_reset(sg); >@@ -1563,7 +1600,7 @@ > else > subtype = VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET; > >- vtscsi_init_ctrl_tmf_req(ccbh, subtype, 0, tmf_req); >+ vtscsi_init_ctrl_tmf_req(sc, ccbh, subtype, 0, tmf_req); > > sglist_reset(sg); > sglist_append(sg, tmf_req, sizeof(struct virtio_scsi_ctrl_tmf_req)); >@@ -1600,7 +1637,7 @@ > } > > static void >-vtscsi_init_scsi_cmd_req(struct ccb_scsiio *csio, >+vtscsi_init_scsi_cmd_req(struct vtscsi_softc *sc, struct ccb_scsiio *csio, > struct virtio_scsi_cmd_req *cmd_req) > { > uint8_t attr; >@@ -1621,7 +1658,7 @@ > } > > vtscsi_set_request_lun(&csio->ccb_h, cmd_req->lun); >- cmd_req->tag = (uintptr_t) csio; >+ cmd_req->tag = vtscsi_gtoh64(sc, (uintptr_t) csio); > cmd_req->task_attr = attr; > > memcpy(cmd_req->cdb, >@@ -1631,15 +1668,15 @@ > } > > static void >-vtscsi_init_ctrl_tmf_req(struct ccb_hdr *ccbh, uint32_t subtype, >- uintptr_t tag, struct virtio_scsi_ctrl_tmf_req *tmf_req) >+vtscsi_init_ctrl_tmf_req(struct vtscsi_softc *sc, struct ccb_hdr *ccbh, >+ uint32_t subtype, uintptr_t tag, struct virtio_scsi_ctrl_tmf_req *tmf_req) > { > > vtscsi_set_request_lun(ccbh, tmf_req->lun); > >- tmf_req->type = VIRTIO_SCSI_T_TMF; >- tmf_req->subtype = subtype; >- tmf_req->tag = tag; >+ tmf_req->type = vtscsi_gtoh32(sc, VIRTIO_SCSI_T_TMF); >+ tmf_req->subtype = vtscsi_gtoh32(sc, subtype); >+ tmf_req->tag = vtscsi_gtoh64(sc, tag); > } > > static void >@@ -2273,7 +2310,7 @@ > } > > static void >-vtscsi_add_sysctl(struct vtscsi_softc *sc) >+vtscsi_setup_sysctl(struct vtscsi_softc *sc) > { > device_t dev; > struct vtscsi_statistics *stats; >diff -urN sys/dev/virtio.ori/scsi/virtio_scsi.h sys/dev/virtio/scsi/virtio_scsi.h >--- sys/dev/virtio.ori/scsi/virtio_scsi.h 2020-03-18 20:16:31.703634000 -0700 >+++ sys/dev/virtio/scsi/virtio_scsi.h 2020-03-19 16:44:27.292360000 -0700 >@@ -31,13 +31,7 @@ > #ifndef _VIRTIO_SCSI_H > #define _VIRTIO_SCSI_H > >-/* Feature bits */ >-#define VIRTIO_SCSI_F_INOUT 0x0001 /* Single request can contain both >- * read and write buffers */ >-#define VIRTIO_SCSI_F_HOTPLUG 0x0002 /* Host should enable hot plug/unplug >- * of new LUNs and targets. >- */ >- >+/* Default values of the CDB and sense data size configuration fields */ > #define VIRTIO_SCSI_CDB_SIZE 32 > #define VIRTIO_SCSI_SENSE_SIZE 96 > >@@ -46,11 +40,23 @@ > uint8_t lun[8]; /* Logical Unit Number */ > uint64_t tag; /* Command identifier */ > uint8_t task_attr; /* Task attribute */ >- uint8_t prio; >+ uint8_t prio; /* SAM command priority field */ > uint8_t crn; > uint8_t cdb[VIRTIO_SCSI_CDB_SIZE]; > } __packed; > >+/* SCSI command request, followed by protection information */ >+struct virtio_scsi_cmd_req_pi { >+ uint8_t lun[8]; /* Logical Unit Number */ >+ uint64_t tag; /* Command identifier */ >+ uint8_t task_attr; /* Task attribute */ >+ uint8_t prio; /* SAM command priority field */ >+ uint8_t crn; >+ uint32_t pi_bytesout; /* DataOUT PI Number of bytes */ >+ uint32_t pi_bytesin; /* DataIN PI Number of bytes */ >+ uint8_t cdb[VIRTIO_SCSI_CDB_SIZE]; >+} __packed; >+ > /* Response, followed by sense data and data-in */ > struct virtio_scsi_cmd_resp { > uint32_t sense_len; /* Sense data length */ >@@ -104,6 +110,22 @@ > uint32_t max_lun; > } __packed; > >+/* Feature bits */ >+#define VIRTIO_SCSI_F_INOUT 0x0001 /* Single request can contain both >+ * read and write buffers. >+ */ >+#define VIRTIO_SCSI_F_HOTPLUG 0x0002 /* Host should enable hot plug/unplug >+ * of new LUNs and targets. >+ */ >+#define VIRTIO_SCSI_F_CHANGE 0x0004 /* Host will report changes to LUN >+ * parameters via a >+ * VIRTIO_SCSI_T_PARAM_CHANGE event. >+ */ >+#define VIRTIO_SCSI_F_T10_PI 0x0008 /* Extended fields for T10 protection >+ * information (DIF/DIX) are included >+ * in the SCSI request header. >+ */ >+ > /* Response codes */ > #define VIRTIO_SCSI_S_OK 0 > #define VIRTIO_SCSI_S_FUNCTION_COMPLETE 0 >@@ -140,6 +162,7 @@ > #define VIRTIO_SCSI_T_NO_EVENT 0 > #define VIRTIO_SCSI_T_TRANSPORT_RESET 1 > #define VIRTIO_SCSI_T_ASYNC_NOTIFY 2 >+#define VIRTIO_SCSI_T_PARAM_CHANGE 3 > > /* Reasons of transport reset event */ > #define VIRTIO_SCSI_EVT_RESET_HARD 0 >diff -urN sys/dev/virtio.ori/scsi/virtio_scsivar.h sys/dev/virtio/scsi/virtio_scsivar.h >--- sys/dev/virtio.ori/scsi/virtio_scsivar.h 2020-03-18 20:16:31.703781000 -0700 >+++ sys/dev/virtio/scsi/virtio_scsivar.h 2020-03-19 16:44:27.320652000 -0700 >@@ -205,11 +205,6 @@ > #define VTSCSI_RESERVED_REQUESTS 10 > > /* >- * Specification doesn't say, use traditional SCSI default. >- */ >-#define VTSCSI_INITIATOR_ID 7 >- >-/* > * How to wait (or not) for request completion. > */ > #define VTSCSI_EXECUTE_ASYNC 0 >diff -urN sys/dev/virtio.ori/virtio.c sys/dev/virtio/virtio.c >--- sys/dev/virtio.ori/virtio.c 2020-03-18 20:16:31.708613000 -0700 >+++ sys/dev/virtio/virtio.c 2020-03-19 16:44:27.340598000 -0700 >@@ -75,10 +75,13 @@ > > /* Device independent features. */ > static struct virtio_feature_desc virtio_common_feature_desc[] = { >- { VIRTIO_F_NOTIFY_ON_EMPTY, "NotifyOnEmpty" }, >- { VIRTIO_RING_F_INDIRECT_DESC, "RingIndirect" }, >- { VIRTIO_RING_F_EVENT_IDX, "EventIdx" }, >- { VIRTIO_F_BAD_FEATURE, "BadFeature" }, >+ { VIRTIO_F_NOTIFY_ON_EMPTY, "NotifyOnEmpty" }, /* Legacy */ >+ { VIRTIO_F_ANY_LAYOUT, "AnyLayout" }, /* Legacy */ >+ { VIRTIO_RING_F_INDIRECT_DESC, "RingIndirectDesc" }, >+ { VIRTIO_RING_F_EVENT_IDX, "RingEventIdx" }, >+ { VIRTIO_F_BAD_FEATURE, "BadFeature" }, /* Legacy */ >+ { VIRTIO_F_VERSION_1, "Version1" }, >+ { VIRTIO_F_IOMMU_PLATFORM, "IOMMUPlatform" }, > > { 0, NULL } > }; >@@ -116,24 +119,16 @@ > return (NULL); > } > >-void >-virtio_describe(device_t dev, const char *msg, >- uint64_t features, struct virtio_feature_desc *desc) >+int >+virtio_describe_sbuf(struct sbuf *sb, uint64_t features, >+ struct virtio_feature_desc *desc) > { >- struct sbuf sb; > uint64_t val; >- char *buf; > const char *name; > int n; > >- if ((buf = malloc(512, M_TEMP, M_NOWAIT)) == NULL) { >- device_printf(dev, "%s features: %#jx\n", msg, (uintmax_t) features); >- return; >- } >+ sbuf_printf(sb, "%#jx", (uintmax_t) features); > >- sbuf_new(&sb, buf, 512, SBUF_FIXEDLEN); >- sbuf_printf(&sb, "%s features: %#jx", msg, (uintmax_t) features); >- > for (n = 0, val = 1ULL << 63; val != 0; val >>= 1) { > /* > * BAD_FEATURE is used to detect broken Linux clients >@@ -143,32 +138,95 @@ > continue; > > if (n++ == 0) >- sbuf_cat(&sb, " <"); >+ sbuf_cat(sb, " <"); > else >- sbuf_cat(&sb, ","); >+ sbuf_cat(sb, ","); > > name = virtio_feature_name(val, desc); > if (name == NULL) >- sbuf_printf(&sb, "%#jx", (uintmax_t) val); >+ sbuf_printf(sb, "%#jx", (uintmax_t) val); > else >- sbuf_cat(&sb, name); >+ sbuf_cat(sb, name); > } > > if (n > 0) >- sbuf_cat(&sb, ">"); >+ sbuf_cat(sb, ">"); > >-#if __FreeBSD_version < 900020 >- sbuf_finish(&sb); >- if (sbuf_overflowed(&sb) == 0) >-#else >- if (sbuf_finish(&sb) == 0) >-#endif >+ return (sbuf_finish(sb)); >+} >+ >+void >+virtio_describe(device_t dev, const char *msg, uint64_t features, >+ struct virtio_feature_desc *desc) >+{ >+ struct sbuf sb; >+ char *buf; >+ int error; >+ >+ if ((buf = malloc(1024, M_TEMP, M_NOWAIT)) == NULL) { >+ error = ENOMEM; >+ goto out; >+ } >+ >+ sbuf_new(&sb, buf, 1024, SBUF_FIXEDLEN); >+ sbuf_printf(&sb, "%s features: ", msg); >+ >+ error = virtio_describe_sbuf(&sb, features, desc); >+ if (error == 0) > device_printf(dev, "%s\n", sbuf_data(&sb)); > > sbuf_delete(&sb); > free(buf, M_TEMP); >+ >+out: >+ if (error != 0) { >+ device_printf(dev, "%s features: %#jx\n", msg, >+ (uintmax_t) features); >+ } > } > >+uint64_t >+virtio_filter_transport_features(uint64_t features) >+{ >+ uint64_t transport, mask; >+ >+ transport = (1ULL << >+ (VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START)) - 1; >+ transport <<= VIRTIO_TRANSPORT_F_START; >+ >+ mask = -1ULL & ~transport; >+ mask |= VIRTIO_RING_F_INDIRECT_DESC; >+ mask |= VIRTIO_RING_F_EVENT_IDX; >+ mask |= VIRTIO_F_VERSION_1; >+ >+ return (features & mask); >+} >+ >+int >+virtio_bus_is_modern(device_t dev) >+{ >+ uintptr_t modern; >+ >+ virtio_read_ivar(dev, VIRTIO_IVAR_MODERN, &modern); >+ return (modern != 0); >+} >+ >+void >+virtio_read_device_config_array(device_t dev, bus_size_t offset, void *dst, >+ int size, int count) >+{ >+ int i, gen; >+ >+ do { >+ gen = virtio_config_generation(dev); >+ >+ for (i = 0; i < count; i++) { >+ virtio_read_device_config(dev, offset + i * size, >+ (uint8_t *) dst + i * size, size); >+ } >+ } while (gen != virtio_config_generation(dev)); >+} >+ > /* > * VirtIO bus method wrappers. > */ >@@ -194,6 +252,13 @@ > > return (VIRTIO_BUS_NEGOTIATE_FEATURES(device_get_parent(dev), > child_features)); >+} >+ >+int >+virtio_finalize_features(device_t dev) >+{ >+ >+ return (VIRTIO_BUS_FINALIZE_FEATURES(device_get_parent(dev))); > } > > int >diff -urN sys/dev/virtio.ori/virtio.h sys/dev/virtio/virtio.h >--- sys/dev/virtio.ori/virtio.h 2020-03-18 20:16:31.704784000 -0700 >+++ sys/dev/virtio/virtio.h 2020-03-19 16:44:27.325172000 -0700 >@@ -31,9 +31,11 @@ > #ifndef _VIRTIO_H_ > #define _VIRTIO_H_ > >+#include <dev/virtio/virtio_endian.h> > #include <dev/virtio/virtio_ids.h> > #include <dev/virtio/virtio_config.h> > >+struct sbuf; > struct vq_alloc_info; > > /* >@@ -57,6 +59,7 @@ > #define VIRTIO_IVAR_DEVICE 4 > #define VIRTIO_IVAR_SUBVENDOR 5 > #define VIRTIO_IVAR_SUBDEVICE 6 >+#define VIRTIO_IVAR_MODERN 7 > > struct virtio_feature_desc { > uint64_t vfd_val; >@@ -65,7 +68,13 @@ > > const char *virtio_device_name(uint16_t devid); > void virtio_describe(device_t dev, const char *msg, >- uint64_t features, struct virtio_feature_desc *feature_desc); >+ uint64_t features, struct virtio_feature_desc *desc); >+int virtio_describe_sbuf(struct sbuf *sb, uint64_t features, >+ struct virtio_feature_desc *desc); >+uint64_t virtio_filter_transport_features(uint64_t features); >+int virtio_bus_is_modern(device_t dev); >+void virtio_read_device_config_array(device_t dev, bus_size_t offset, >+ void *dst, int size, int count); > > /* > * VirtIO Bus Methods. >@@ -73,6 +82,7 @@ > void virtio_read_ivar(device_t dev, int ivar, uintptr_t *val); > void virtio_write_ivar(device_t dev, int ivar, uintptr_t val); > uint64_t virtio_negotiate_features(device_t dev, uint64_t child_features); >+int virtio_finalize_features(device_t dev); > int virtio_alloc_virtqueues(device_t dev, int flags, int nvqs, > struct vq_alloc_info *info); > int virtio_setup_intr(device_t dev, enum intr_type type); >@@ -130,6 +140,7 @@ > VIRTIO_READ_IVAR(device, VIRTIO_IVAR_DEVICE); > VIRTIO_READ_IVAR(subvendor, VIRTIO_IVAR_SUBVENDOR); > VIRTIO_READ_IVAR(subdevice, VIRTIO_IVAR_SUBDEVICE); >+VIRTIO_READ_IVAR(modern, VIRTIO_IVAR_MODERN); > > #undef VIRTIO_READ_IVAR > >diff -urN sys/dev/virtio.ori/virtio_bus_if.m sys/dev/virtio/virtio_bus_if.m >--- sys/dev/virtio.ori/virtio_bus_if.m 2020-03-18 20:16:31.702972000 -0700 >+++ sys/dev/virtio/virtio_bus_if.m 2020-03-19 16:44:27.291959000 -0700 >@@ -36,6 +36,12 @@ > > CODE { > static int >+ virtio_bus_default_finalize_features(device_t dev) >+ { >+ return (0); >+ } >+ >+ static int > virtio_bus_default_config_generation(device_t dev) > { > return (0); >@@ -47,6 +53,10 @@ > uint64_t child_features; > }; > >+METHOD int finalize_features { >+ device_t dev; >+} DEFAULT virtio_bus_default_finalize_features; >+ > METHOD int with_feature { > device_t dev; > uint64_t feature; >@@ -80,6 +90,7 @@ > METHOD void notify_vq { > device_t dev; > uint16_t queue; >+ bus_size_t offset; > }; > > METHOD int config_generation { >diff -urN sys/dev/virtio.ori/virtio_endian.h sys/dev/virtio/virtio_endian.h >--- sys/dev/virtio.ori/virtio_endian.h 1969-12-31 16:00:00.000000000 -0800 >+++ sys/dev/virtio/virtio_endian.h 2020-03-19 16:44:27.340761000 -0700 >@@ -0,0 +1,106 @@ >+/*- >+ * SPDX-License-Identifier: BSD-2-Clause >+ * >+ * Copyright (c) 2017, Bryan Venteicher <bryanv@FreeBSD.org> >+ * All rights reserved. >+ * >+ * Redistribution and use in source and binary forms, with or without >+ * modification, are permitted provided that the following conditions >+ * are met: >+ * 1. Redistributions of source code must retain the above copyright >+ * notice unmodified, this list of conditions, and the following >+ * disclaimer. >+ * 2. Redistributions in binary form must reproduce the above copyright >+ * notice, this list of conditions and the following disclaimer in the >+ * documentation and/or other materials provided with the distribution. >+ * >+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR >+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES >+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. >+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, >+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT >+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, >+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY >+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT >+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF >+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. >+ * >+ * $FreeBSD$ >+ */ >+ >+#ifndef _VIRTIO_ENDIAN_H_ >+#define _VIRTIO_ENDIAN_H_ >+ >+#include <sys/endian.h> >+ >+/* >+ * VirtIO V1 (modern) uses little endian, while legacy VirtIO uses the guest's >+ * native endian. These functions convert to and from the Guest's (driver's) >+ * and the Host's (device's) endianness when needed. >+ */ >+ >+static inline bool >+virtio_swap_endian(bool modern) >+{ >+#if _BYTE_ORDER == _LITTLE_ENDIAN >+ return (false); >+#else >+ return (modern); >+#endif >+} >+ >+static inline uint16_t >+virtio_htog16(bool modern, uint16_t val) >+{ >+ if (virtio_swap_endian(modern)) >+ return (le16toh(val)); >+ else >+ return (val); >+} >+ >+static inline uint16_t >+virtio_gtoh16(bool modern, uint16_t val) >+{ >+ if (virtio_swap_endian(modern)) >+ return (htole16(val)); >+ else >+ return (val); >+} >+ >+static inline uint32_t >+virtio_htog32(bool modern, uint32_t val) >+{ >+ if (virtio_swap_endian(modern)) >+ return (le32toh(val)); >+ else >+ return (val); >+} >+ >+static inline uint32_t >+virtio_gtoh32(bool modern, uint32_t val) >+{ >+ if (virtio_swap_endian(modern)) >+ return (htole32(val)); >+ else >+ return (val); >+} >+ >+static inline uint64_t >+virtio_htog64(bool modern, uint64_t val) >+{ >+ if (virtio_swap_endian(modern)) >+ return (le64toh(val)); >+ else >+ return (val); >+} >+ >+static inline uint64_t >+virtio_gtoh64(bool modern, uint64_t val) >+{ >+ if (virtio_swap_endian(modern)) >+ return (htole64(val)); >+ else >+ return (val); >+} >+ >+#endif /* _VIRTIO_ENDIAN_H_ */ >diff -urN sys/dev/virtio.ori/virtqueue.c sys/dev/virtio/virtqueue.c >--- sys/dev/virtio.ori/virtqueue.c 2020-03-18 20:16:31.705390000 -0700 >+++ sys/dev/virtio/virtqueue.c 2020-03-19 16:44:27.326058000 -0700 >@@ -57,18 +57,15 @@ > > struct virtqueue { > device_t vq_dev; >- char vq_name[VIRTQUEUE_MAX_NAME_SZ]; > uint16_t vq_queue_index; > uint16_t vq_nentries; > uint32_t vq_flags; >-#define VIRTQUEUE_FLAG_INDIRECT 0x0001 >-#define VIRTQUEUE_FLAG_EVENT_IDX 0x0002 >+#define VIRTQUEUE_FLAG_MODERN 0x0001 >+#define VIRTQUEUE_FLAG_INDIRECT 0x0002 >+#define VIRTQUEUE_FLAG_EVENT_IDX 0x0004 > >- int vq_alignment; >- int vq_ring_size; >- void *vq_ring_mem; > int vq_max_indirect_size; >- int vq_indirect_mem_size; >+ bus_size_t vq_notify_offset; > virtqueue_intr_t *vq_intrhand; > void *vq_intrhand_arg; > >@@ -87,6 +84,12 @@ > */ > uint16_t vq_used_cons_idx; > >+ void *vq_ring_mem; >+ int vq_indirect_mem_size; >+ int vq_alignment; >+ int vq_ring_size; >+ char vq_name[VIRTQUEUE_MAX_NAME_SZ]; >+ > struct vq_desc_extra { > void *cookie; > struct vring_desc *indirect; >@@ -134,6 +137,14 @@ > static void vq_ring_notify_host(struct virtqueue *); > static void vq_ring_free_chain(struct virtqueue *, uint16_t); > >+#define vq_modern(_vq) (((_vq)->vq_flags & VIRTQUEUE_FLAG_MODERN) != 0) >+#define vq_htog16(_vq, _val) virtio_htog16(vq_modern(_vq), _val) >+#define vq_htog32(_vq, _val) virtio_htog32(vq_modern(_vq), _val) >+#define vq_htog64(_vq, _val) virtio_htog64(vq_modern(_vq), _val) >+#define vq_gtoh16(_vq, _val) virtio_gtoh16(vq_modern(_vq), _val) >+#define vq_gtoh32(_vq, _val) virtio_gtoh32(vq_modern(_vq), _val) >+#define vq_gtoh64(_vq, _val) virtio_gtoh64(vq_modern(_vq), _val) >+ > uint64_t > virtqueue_filter_features(uint64_t features) > { >@@ -147,8 +158,9 @@ > } > > int >-virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, int align, >- vm_paddr_t highaddr, struct vq_alloc_info *info, struct virtqueue **vqp) >+virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, >+ bus_size_t notify_offset, int align, vm_paddr_t highaddr, >+ struct vq_alloc_info *info, struct virtqueue **vqp) > { > struct virtqueue *vq; > int error; >@@ -184,12 +196,15 @@ > vq->vq_dev = dev; > strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name)); > vq->vq_queue_index = queue; >+ vq->vq_notify_offset = notify_offset; > vq->vq_alignment = align; > vq->vq_nentries = size; > vq->vq_free_cnt = size; > vq->vq_intrhand = info->vqai_intr; > vq->vq_intrhand_arg = info->vqai_intr_arg; > >+ if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_F_VERSION_1) != 0) >+ vq->vq_flags |= VIRTQUEUE_FLAG_MODERN; > if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0) > vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX; > >@@ -294,8 +309,8 @@ > bzero(indirect, vq->vq_indirect_mem_size); > > for (i = 0; i < vq->vq_max_indirect_size - 1; i++) >- indirect[i].next = i + 1; >- indirect[i].next = VQ_RING_DESC_CHAIN_END; >+ indirect[i].next = vq_gtoh16(vq, i + 1); >+ indirect[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END); > } > > int >@@ -441,7 +456,7 @@ > { > uint16_t used_idx, nused; > >- used_idx = vq->vq_ring.used->idx; >+ used_idx = vq_htog16(vq, vq->vq_ring.used->idx); > > nused = (uint16_t)(used_idx - vq->vq_used_cons_idx); > VQASSERT(vq, nused <= vq->vq_nentries, "used more than available"); >@@ -453,7 +468,7 @@ > virtqueue_intr_filter(struct virtqueue *vq) > { > >- if (vq->vq_used_cons_idx == vq->vq_ring.used->idx) >+ if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx)) > return (0); > > virtqueue_disable_intr(vq); >@@ -480,7 +495,7 @@ > { > uint16_t ndesc, avail_idx; > >- avail_idx = vq->vq_ring.avail->idx; >+ avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx); > ndesc = (uint16_t)(avail_idx - vq->vq_used_cons_idx); > > switch (hint) { >@@ -505,10 +520,12 @@ > { > > if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { >- vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx - >- vq->vq_nentries - 1; >- } else >- vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; >+ vring_used_event(&vq->vq_ring) = vq_gtoh16(vq, >+ vq->vq_used_cons_idx - vq->vq_nentries - 1); >+ return; >+ } >+ >+ vq->vq_ring.avail->flags |= vq_gtoh16(vq, VRING_AVAIL_F_NO_INTERRUPT); > } > > int >@@ -571,16 +588,16 @@ > void *cookie; > uint16_t used_idx, desc_idx; > >- if (vq->vq_used_cons_idx == vq->vq_ring.used->idx) >+ if (vq->vq_used_cons_idx == vq_htog16(vq, vq->vq_ring.used->idx)) > return (NULL); > > used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1); > uep = &vq->vq_ring.used->ring[used_idx]; > > rmb(); >- desc_idx = (uint16_t) uep->id; >+ desc_idx = (uint16_t) vq_htog32(vq, uep->id); > if (len != NULL) >- *len = uep->len; >+ *len = vq_htog32(vq, uep->len); > > vq_ring_free_chain(vq, desc_idx); > >@@ -638,13 +655,13 @@ > printf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; " > "desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; " > "used.idx=%d; used_event_idx=%d; avail.flags=0x%x; used.flags=0x%x\n", >- vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, >- virtqueue_nused(vq), vq->vq_queued_cnt, vq->vq_desc_head_idx, >- vq->vq_ring.avail->idx, vq->vq_used_cons_idx, >- vq->vq_ring.used->idx, >- vring_used_event(&vq->vq_ring), >- vq->vq_ring.avail->flags, >- vq->vq_ring.used->flags); >+ vq->vq_name, vq->vq_nentries, vq->vq_free_cnt, virtqueue_nused(vq), >+ vq->vq_queued_cnt, vq->vq_desc_head_idx, >+ vq_htog16(vq, vq->vq_ring.avail->idx), vq->vq_used_cons_idx, >+ vq_htog16(vq, vq->vq_ring.used->idx), >+ vq_htog16(vq, vring_used_event(&vq->vq_ring)), >+ vq_htog16(vq, vq->vq_ring.avail->flags), >+ vq_htog16(vq, vq->vq_ring.used->flags)); > } > > static void >@@ -661,14 +678,14 @@ > vring_init(vr, size, ring_mem, vq->vq_alignment); > > for (i = 0; i < size - 1; i++) >- vr->desc[i].next = i + 1; >- vr->desc[i].next = VQ_RING_DESC_CHAIN_END; >+ vr->desc[i].next = vq_gtoh16(vq, i + 1); >+ vr->desc[i].next = vq_gtoh16(vq, VQ_RING_DESC_CHAIN_END); > } > > static void > vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx) > { >- uint16_t avail_idx; >+ uint16_t avail_idx, avail_ring_idx; > > /* > * Place the head of the descriptor chain into the next slot and make >@@ -677,11 +694,11 @@ > * currently running on another CPU, we can keep it processing the new > * descriptor. > */ >- avail_idx = vq->vq_ring.avail->idx & (vq->vq_nentries - 1); >- vq->vq_ring.avail->ring[avail_idx] = desc_idx; >- >+ avail_idx = vq_htog16(vq, vq->vq_ring.avail->idx); >+ avail_ring_idx = avail_idx & (vq->vq_nentries - 1); >+ vq->vq_ring.avail->ring[avail_ring_idx] = vq_gtoh16(vq, desc_idx); > wmb(); >- vq->vq_ring.avail->idx++; >+ vq->vq_ring.avail->idx = vq_gtoh16(vq, avail_idx + 1); > > /* Keep pending count until virtqueue_notify(). */ > vq->vq_queued_cnt++; >@@ -700,19 +717,19 @@ > > for (i = 0, idx = head_idx, seg = sg->sg_segs; > i < needed; >- i++, idx = dp->next, seg++) { >+ i++, idx = vq_htog16(vq, dp->next), seg++) { > VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END, > "premature end of free desc chain"); > > dp = &desc[idx]; >- dp->addr = seg->ss_paddr; >- dp->len = seg->ss_len; >+ dp->addr = vq_gtoh64(vq, seg->ss_paddr); >+ dp->len = vq_gtoh32(vq, seg->ss_len); > dp->flags = 0; > > if (i < needed - 1) >- dp->flags |= VRING_DESC_F_NEXT; >+ dp->flags |= vq_gtoh16(vq, VRING_DESC_F_NEXT); > if (i >= readable) >- dp->flags |= VRING_DESC_F_WRITE; >+ dp->flags |= vq_gtoh16(vq, VRING_DESC_F_WRITE); > } > > return (idx); >@@ -757,14 +774,14 @@ > dxp->cookie = cookie; > dxp->ndescs = 1; > >- dp->addr = dxp->indirect_paddr; >- dp->len = needed * sizeof(struct vring_desc); >- dp->flags = VRING_DESC_F_INDIRECT; >+ dp->addr = vq_gtoh64(vq, dxp->indirect_paddr); >+ dp->len = vq_gtoh32(vq, needed * sizeof(struct vring_desc)); >+ dp->flags = vq_gtoh16(vq, VRING_DESC_F_INDIRECT); > > vq_ring_enqueue_segments(vq, dxp->indirect, 0, > sg, readable, writable); > >- vq->vq_desc_head_idx = dp->next; >+ vq->vq_desc_head_idx = vq_htog16(vq, dp->next); > vq->vq_free_cnt--; > if (vq->vq_free_cnt == 0) > VQ_RING_ASSERT_CHAIN_TERM(vq); >@@ -782,11 +799,13 @@ > * Enable interrupts, making sure we get the latest index of > * what's already been consumed. > */ >- if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) >- vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc; >- else >- vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; >- >+ if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { >+ vring_used_event(&vq->vq_ring) = >+ vq_gtoh16(vq, vq->vq_used_cons_idx + ndesc); >+ } else { >+ vq->vq_ring.avail->flags &= >+ vq_gtoh16(vq, ~VRING_AVAIL_F_NO_INTERRUPT); >+ } > mb(); > > /* >@@ -803,24 +822,26 @@ > static int > vq_ring_must_notify_host(struct virtqueue *vq) > { >- uint16_t new_idx, prev_idx, event_idx; >+ uint16_t new_idx, prev_idx, event_idx, flags; > > if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) { >- new_idx = vq->vq_ring.avail->idx; >+ new_idx = vq_htog16(vq, vq->vq_ring.avail->idx); > prev_idx = new_idx - vq->vq_queued_cnt; >- event_idx = vring_avail_event(&vq->vq_ring); >+ event_idx = vq_htog16(vq, vring_avail_event(&vq->vq_ring)); > > return (vring_need_event(event_idx, new_idx, prev_idx) != 0); > } > >- return ((vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0); >+ flags = vq->vq_ring.used->flags; >+ return ((flags & vq_gtoh16(vq, VRING_USED_F_NO_NOTIFY)) == 0); > } > > static void > vq_ring_notify_host(struct virtqueue *vq) > { > >- VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index); >+ VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index, >+ vq->vq_notify_offset); > } > > static void >@@ -839,10 +860,11 @@ > vq->vq_free_cnt += dxp->ndescs; > dxp->ndescs--; > >- if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { >- while (dp->flags & VRING_DESC_F_NEXT) { >- VQ_RING_ASSERT_VALID_IDX(vq, dp->next); >- dp = &vq->vq_ring.desc[dp->next]; >+ if ((dp->flags & vq_gtoh16(vq, VRING_DESC_F_INDIRECT)) == 0) { >+ while (dp->flags & vq_gtoh16(vq, VRING_DESC_F_NEXT)) { >+ uint16_t next_idx = vq_htog16(vq, dp->next); >+ VQ_RING_ASSERT_VALID_IDX(vq, next_idx); >+ dp = &vq->vq_ring.desc[next_idx]; > dxp->ndescs--; > } > } >@@ -855,6 +877,6 @@ > * newly freed chain. If the virtqueue was completely used, then > * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). > */ >- dp->next = vq->vq_desc_head_idx; >+ dp->next = vq_gtoh16(vq, vq->vq_desc_head_idx); > vq->vq_desc_head_idx = desc_idx; > } >diff -urN sys/dev/virtio.ori/virtqueue.h sys/dev/virtio/virtqueue.h >--- sys/dev/virtio.ori/virtqueue.h 2020-03-18 20:16:31.705954000 -0700 >+++ sys/dev/virtio/virtqueue.h 2020-03-19 16:44:27.328253000 -0700 >@@ -70,8 +70,8 @@ > uint64_t virtqueue_filter_features(uint64_t features); > > int virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, >- int align, vm_paddr_t highaddr, struct vq_alloc_info *info, >- struct virtqueue **vqp); >+ bus_size_t notify_offset, int align, vm_paddr_t highaddr, >+ struct vq_alloc_info *info, struct virtqueue **vqp); > void *virtqueue_drain(struct virtqueue *vq, int *last); > void virtqueue_free(struct virtqueue *vq); > int virtqueue_reinit(struct virtqueue *vq, uint16_t size);
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 236922
:
203279
|
203281
|
206894
|
210551
|
210556
|
210660
|
210713
|
210723
|
210728
|
210734
|
210737
|
210783
|
210808
|
211258
|
212556
|
212558
| 212559 |
212560
|
226337