FreeBSD Bugzilla – Attachment 207200 Details for
Bug 240340
MFC r351747 Implement nvme suspend / resume for pci attachment
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
Remember
[x]
|
Forgot Password
Login:
[x]
[patch]
nvme resume patch
nvme-suspend-1.diff (text/plain), 8.40 KB, created by
Theron Tarigo
on 2019-09-05 01:40:48 UTC
(
hide
)
Description:
nvme resume patch
Filename:
MIME Type:
Creator:
Theron Tarigo
Created:
2019-09-05 01:40:48 UTC
Size:
8.40 KB
patch
obsolete
>Index: sys/dev/nvme/nvme.c >=================================================================== >--- sys/dev/nvme/nvme.c (revision 351834) >+++ sys/dev/nvme/nvme.c (working copy) >@@ -61,6 +61,8 @@ > static int nvme_probe(device_t); > static int nvme_attach(device_t); > static int nvme_detach(device_t); >+static int nvme_suspend(device_t); >+static int nvme_resume(device_t); > static int nvme_shutdown(device_t); > > static devclass_t nvme_devclass; >@@ -70,6 +72,8 @@ > DEVMETHOD(device_probe, nvme_probe), > DEVMETHOD(device_attach, nvme_attach), > DEVMETHOD(device_detach, nvme_detach), >+ DEVMETHOD(device_suspend, nvme_suspend), >+ DEVMETHOD(device_resume, nvme_resume), > DEVMETHOD(device_shutdown, nvme_shutdown), > { 0, 0 } > }; >@@ -277,9 +281,10 @@ > pci_enable_busmaster(dev); > > /* >- * Reset controller twice to ensure we do a transition from cc.en==1 >- * to cc.en==0. This is because we don't really know what status >- * the controller was left in when boot handed off to OS. >+ * Reset controller twice to ensure we do a transition from cc.en==1 to >+ * cc.en==0. This is because we don't really know what status the >+ * controller was left in when boot handed off to OS. Linux doesn't do >+ * this, however. If we adopt that policy, see also nvme_ctrlr_resume(). > */ > status = nvme_ctrlr_hw_reset(ctrlr); > if (status != 0) { >@@ -311,6 +316,23 @@ > return (0); > } > >+static int >+nvme_suspend(device_t dev) >+{ >+ struct nvme_controller *ctrlr; >+ >+ ctrlr = DEVICE2SOFTC(dev); >+ return (nvme_ctrlr_suspend(ctrlr)); >+} >+ >+static int nvme_resume(device_t dev) >+{ >+ struct nvme_controller *ctrlr; >+ >+ ctrlr = DEVICE2SOFTC(dev); >+ return (nvme_ctrlr_resume(ctrlr)); >+} >+ > static void > nvme_notify(struct nvme_consumer *cons, > struct nvme_controller *ctrlr) >Index: sys/dev/nvme/nvme_ctrlr.c >=================================================================== >--- sys/dev/nvme/nvme_ctrlr.c (revision 351834) >+++ sys/dev/nvme/nvme_ctrlr.c (working copy) >@@ -154,8 +154,8 @@ > > /* > * Our best estimate for the maximum number of I/Os that we should >- * noramlly have in flight at one time. This should be viewed as a hint, >- * not a hard limit and will need to be revisitted when the upper layers >+ * normally have in flight at one time. This should be viewed as a hint, >+ * not a hard limit and will need to be revisited when the upper layers > * of the storage system grows multi-queue support. > */ > ctrlr->max_hw_pend_io = num_trackers * ctrlr->num_io_queues * 3 / 4; >@@ -380,10 +380,10 @@ > return (nvme_ctrlr_wait_for_ready(ctrlr, 1)); > } > >-int >-nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr) >+static void >+nvme_ctrlr_disable_qpairs(struct nvme_controller *ctrlr) > { >- int i, err; >+ int i; > > nvme_admin_qpair_disable(&ctrlr->adminq); > /* >@@ -395,7 +395,15 @@ > for (i = 0; i < ctrlr->num_io_queues; i++) > nvme_io_qpair_disable(&ctrlr->ioq[i]); > } >+} > >+int >+nvme_ctrlr_hw_reset(struct nvme_controller *ctrlr) >+{ >+ int err; >+ >+ nvme_ctrlr_disable_qpairs(ctrlr); >+ > DELAY(100*1000); > > err = nvme_ctrlr_disable(ctrlr); >@@ -521,7 +529,7 @@ > } > > static int >-nvme_ctrlr_destroy_qpairs(struct nvme_controller *ctrlr) >+nvme_ctrlr_delete_qpairs(struct nvme_controller *ctrlr) > { > struct nvme_completion_poll_status status; > struct nvme_qpair *qpair; >@@ -863,7 +871,7 @@ > } > > static void >-nvme_ctrlr_start(void *ctrlr_arg) >+nvme_ctrlr_start(void *ctrlr_arg, bool resetting) > { > struct nvme_controller *ctrlr = ctrlr_arg; > uint32_t old_num_io_queues; >@@ -876,7 +884,7 @@ > * the number of I/O queues supported, so cannot reset > * the adminq again here. > */ >- if (ctrlr->is_resetting) { >+ if (resetting) { > nvme_qpair_reset(&ctrlr->adminq); > } > >@@ -898,7 +906,7 @@ > * explicit specify how many queues it will use. This value should > * never change between resets, so panic if somehow that does happen. > */ >- if (ctrlr->is_resetting) { >+ if (resetting) { > old_num_io_queues = ctrlr->num_io_queues; > if (nvme_ctrlr_set_num_qpairs(ctrlr) != 0) { > nvme_ctrlr_fail(ctrlr); >@@ -938,7 +946,7 @@ > > if (nvme_ctrlr_set_num_qpairs(ctrlr) == 0 && > nvme_ctrlr_construct_io_qpairs(ctrlr) == 0) >- nvme_ctrlr_start(ctrlr); >+ nvme_ctrlr_start(ctrlr, false); > else > nvme_ctrlr_fail(ctrlr); > >@@ -967,7 +975,7 @@ > */ > pause("nvmereset", hz / 10); > if (status == 0) >- nvme_ctrlr_start(ctrlr); >+ nvme_ctrlr_start(ctrlr, true); > else > nvme_ctrlr_fail(ctrlr); > >@@ -990,7 +998,7 @@ > } > > /* >- * Poll the single-vector intertrupt case: num_io_queues will be 1 and >+ * Poll the single-vector interrupt case: num_io_queues will be 1 and > * there's only a single vector. While we're polling, we mask further > * interrupts in the controller. > */ >@@ -1084,7 +1092,7 @@ > if (is_user_buffer) { > /* > * Ensure the user buffer is wired for the duration of >- * this passthrough command. >+ * this pass-through command. > */ > PHOLD(curproc); > buf = getpbuf(NULL); >@@ -1103,7 +1111,7 @@ > } else > req = nvme_allocate_request_null(nvme_pt_done, pt); > >- /* Assume userspace already converted to little-endian */ >+ /* Assume user space already converted to little-endian */ > req->cmd.opc = pt->cmd.opc; > req->cmd.fuse = pt->cmd.fuse; > req->cmd.rsvd2 = pt->cmd.rsvd2; >@@ -1366,7 +1374,7 @@ > destroy_dev(ctrlr->cdev); > > if (!gone) >- nvme_ctrlr_destroy_qpairs(ctrlr); >+ nvme_ctrlr_delete_qpairs(ctrlr); > for (i = 0; i < ctrlr->num_io_queues; i++) > nvme_io_qpair_destroy(&ctrlr->ioq[i]); > free(ctrlr->ioq, M_NVME); >@@ -1468,3 +1476,87 @@ > > return (&ctrlr->cdata); > } >+ >+int >+nvme_ctrlr_suspend(struct nvme_controller *ctrlr) >+{ >+ int to = hz; >+ >+ /* >+ * Can't touch failed controllers, so it's already suspended. >+ */ >+ if (ctrlr->is_failed) >+ return (0); >+ >+ /* >+ * We don't want the reset taskqueue running, since it does similar >+ * things, so prevent it from running after we start. Wait for any reset >+ * that may have been started to complete. The reset process we follow >+ * will ensure that any new I/O will queue and be given to the hardware >+ * after we resume (though there should be none). >+ */ >+ while (atomic_cmpset_32(&ctrlr->is_resetting, 0, 1) == 0 && to-- > 0) >+ pause("nvmesusp", 1); >+ if (to <= 0) { >+ nvme_printf(ctrlr, >+ "Competing reset task didn't finish. Try again later.\n"); >+ return (EWOULDBLOCK); >+ } >+ >+ /* >+ * Per Section 7.6.2 of NVMe spec 1.4, to properly suspend, we need to >+ * delete the hardware I/O queues, and then shutdown. This properly >+ * flushes any metadata the drive may have stored so it can survive >+ * having its power removed and prevents the unsafe shutdown count from >+ * incriminating. Once we delete the qpairs, we have to disable them >+ * before shutting down. The delay is out of paranoia in >+ * nvme_ctrlr_hw_reset, and is repeated here (though we should have no >+ * pending I/O that the delay copes with). >+ */ >+ nvme_ctrlr_delete_qpairs(ctrlr); >+ nvme_ctrlr_disable_qpairs(ctrlr); >+ DELAY(100*1000); >+ nvme_ctrlr_shutdown(ctrlr); >+ >+ return (0); >+} >+ >+int >+nvme_ctrlr_resume(struct nvme_controller *ctrlr) >+{ >+ >+ /* >+ * Can't touch failed controllers, so nothing to do to resume. >+ */ >+ if (ctrlr->is_failed) >+ return (0); >+ >+ /* >+ * Have to reset the hardware twice, just like we do on attach. See >+ * nmve_attach() for why. >+ */ >+ if (nvme_ctrlr_hw_reset(ctrlr) != 0) >+ goto fail; >+ if (nvme_ctrlr_hw_reset(ctrlr) != 0) >+ goto fail; >+ >+ /* >+ * Now that we're reset the hardware, we can restart the controller. Any >+ * I/O that was pending is requeued. Any admin commands are aborted with >+ * an error. Once we've restarted, take the controller out of reset. >+ */ >+ nvme_ctrlr_start(ctrlr, true); >+ atomic_cmpset_32(&ctrlr->is_resetting, 1, 0); >+ >+ return (0); >+fail: >+ /* >+ * Since we can't bring the controller out of reset, announce and fail >+ * the controller. However, we have to return success for the resume >+ * itself, due to questionable APIs. >+ */ >+ nvme_printf(ctrlr, "Failed to reset on resume, failing.\n"); >+ nvme_ctrlr_fail(ctrlr); >+ atomic_cmpset_32(&ctrlr->is_resetting, 1, 0); >+ return (0); >+} >Index: sys/dev/nvme/nvme_private.h >=================================================================== >--- sys/dev/nvme/nvme_private.h (revision 351834) >+++ sys/dev/nvme/nvme_private.h (working copy) >@@ -529,4 +529,7 @@ > void nvme_ctrlr_intx_handler(void *arg); > void nvme_ctrlr_poll(struct nvme_controller *ctrlr); > >+int nvme_ctrlr_suspend(struct nvme_controller *ctrlr); >+int nvme_ctrlr_resume(struct nvme_controller *ctrlr); >+ > #endif /* __NVME_PRIVATE_H__ */
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 240340
: 207200