| Summary: | [vm] [patch] bus_dmamem_alloc() with BUS_DMA_NOWAIT can block | ||
|---|---|---|---|
| Product: | Base System | Reporter: | Peter Jeremy <PeterJeremy> |
| Component: | kern | Assignee: | Alan Cox <alc> |
| Status: | Closed FIXED | ||
| Severity: | Affects Only Me | CC: | PeterJeremy |
| Priority: | Normal | ||
| Version: | 5.3-RELEASE | ||
| Hardware: | Any | ||
| OS: | Any | ||
|
Description
Peter Jeremy
2005-02-28 07:20:09 UTC
I have recently confirmed the continued existence of this problem in 7-CURRENT from 5th November: Whilst copying a file to a USB memory stick, I received the panic below. Looking at the sources, there are references to BUS_DMA_NOWAIT in over 100 files so the USB code isn't alone - though it's not obvious how many of these paths allocate more than one page (and therefore trigger the bug). Nov 7 19:14:26 server kernel: panic: trying to sleep while sleeping is prohibited Nov 7 19:14:26 server kernel: KDB: stack backtrace: Nov 7 19:14:26 server kernel: kdb_backtrace(c06fac04,c0762460,c06fd889,d4494828,100) at kdb_backtrace+0x2e Nov 7 19:14:26 server kernel: panic(c06fd889,1,c06fd805,10c,2) at panic+0xb7 Nov 7 19:14:26 server kernel: sleepq_add(cbe8f0e0,c07adc60,c070d2f1,0,c053527a) at sleepq_add+0xb2 Nov 7 19:14:26 server kernel: msleep(cbe8f0e0,c07adc60,44,c070d2f1,0) at msleep+0x2df Nov 7 19:14:26 server kernel: bwait(cbe8f0e0,44,c070d2f1,509,0) at bwait+0x60 Nov 7 19:14:26 server kernel: swap_pager_putpages(c2215ce4,d4494970,1,1,d4494920) at swap_pager_putpages+0x47a Nov 7 19:14:26 server kernel: default_pager_putpages(c2215ce4,d4494970,1,1,d4494920) at default_pager_putpages+0x2e Nov 7 19:14:26 server kernel: vm_pageout_flush(d4494970,1,1,60,c19115a0) at vm_pageout_flush+0x16b Nov 7 19:14:26 server kernel: vm_contig_launder_page(c1911558,0,c070dd4b,1ec,ffffffff) at vm_contig_launder_page+0x229 Nov 7 19:14:26 server kernel: vm_page_alloc_contig(10,0,0,ffffffff,1) at vm_page_alloc_contig+0x24d Nov 7 19:14:26 server kernel: contigmalloc(10000,c073a940,1,0,ffffffff) at contigmalloc+0xb5 Nov 7 19:14:26 server kernel: bus_dmamem_alloc(c2879100,c2410c88,5,c2410c84,ffffffff) at bus_dmamem_alloc+0xd2 Nov 7 19:14:26 server kernel: usb_block_allocmem(0,10000,1,c1fe333c,d4494a9c) at usb_block_allocmem+0x180 Nov 7 19:14:26 server kernel: usb_allocmem(c1b14000,10000,0,c1fe333c,d4494ae8) at usb_allocmem+0x73 Nov 7 19:14:26 server kernel: uhci_allocm(c1b14000,c1fe333c,10000,10c,1) at uhci_allocm+0x27 Nov 7 19:14:26 server kernel: usbd_transfer(c1fe3300,c4849480,c26e6400,d1483000,10000) at usbd_transfer+0xaa Nov 7 19:14:26 server kernel: umass_setup_transfer(c26e6400,c4849480,d1483000,10000,0) at umass_setup_transfer+0x57 Nov 7 19:14:26 server kernel: umass_bbb_state(c1a52d00,c26e6400,0,c19566f0,d4494b7c) at umass_bbb_state+0x1ce Nov 7 19:14:26 server kernel: usb_transfer_complete(c1a52d00,c053552c,c0761ce0,1,c06f9a88) at usb_transfer_complete+0x1aa Nov 7 19:14:26 server kernel: uhci_idone(c1a52d70,c1a52d88,c07ad548) at uhci_idone+0x2ee -- Peter Jeremy I took a stab at the problem that NOWAIT is not being honored in
contigmalloc() by making the vm_contig_launder_page() honor
the flag.
Basically there is a few places that NOWAIT can sleep:
1) the page is busy - change, don't sleep, just return EWOULDBLOCK
2) the page is dirty and the object is the kernel object because
vm_pageout_flush() will force a syncronous write - change check for
kernel object and return EWOULDBLOCK
3) the page is dirty and the object is not the kernel object - change
send the NOWAIT information to the flushing routines.
the flags is added to vm_page_alloc_contig(), so the header file and
the routine in sys/arm that calls vm_page_alloc_contig() needs to be
updated.
I put a temporary printf() statement before the EWOULDBLOCK just to see
if we can trip the code to know that a panic was everted. Since the problem
generates panics with a combination of fragmented memory and the combination
of an allocation at interrupt time, I have not tripped the problem to prove
this patch.
-- patch from -current should work with 6.0-RELEASE too --
*** arm/arm/vm_machdep.c.orig Thu Nov 17 15:10:52 2005
--- arm/arm/vm_machdep.c Thu Nov 17 15:11:09 2005
***************
*** 415,421 ****
if (alloc_curaddr < 0xf0000000) {/* XXX */
mtx_lock(&Giant);
page_array = vm_page_alloc_contig(0x100000 / PAGE_SIZE,
! 0, 0xffffffff, 0x100000, 0);
mtx_unlock(&Giant);
}
if (page_array) {
--- 415,421 ----
if (alloc_curaddr < 0xf0000000) {/* XXX */
mtx_lock(&Giant);
page_array = vm_page_alloc_contig(0x100000 / PAGE_SIZE,
! 0, 0xffffffff, 0x100000, 0, 0);
mtx_unlock(&Giant);
}
if (page_array) {
*** vm/vm_contig.c.orig Thu Nov 17 15:10:10 2005
--- vm/vm_contig.c Thu Nov 17 13:59:47 2005
***************
*** 86,92 ****
#include <vm/vm_extern.h>
static int
! vm_contig_launder_page(vm_page_t m)
{
vm_object_t object;
vm_page_t m_tmp;
--- 86,92 ----
#include <vm/vm_extern.h>
static int
! vm_contig_launder_page(vm_page_t m, int flags)
{
vm_object_t object;
vm_page_t m_tmp;
***************
*** 95,100 ****
--- 95,105 ----
object = m->object;
if (!VM_OBJECT_TRYLOCK(object))
return (EAGAIN);
+ if (flags & M_NOWAIT && (m->flags & PG_BUSY || m->busy)) {
+ VM_OBJECT_UNLOCK(object);
+ printf("vm_contig_launder_page: would sleep (busy)\n");
+ return (EWOULDBLOCK);
+ }
if (vm_page_sleep_if_busy(m, TRUE, "vpctw0")) {
VM_OBJECT_UNLOCK(object);
vm_page_lock_queues();
***************
*** 104,126 ****
if (m->dirty == 0 && m->hold_count == 0)
pmap_remove_all(m);
if (m->dirty) {
if (object->type == OBJT_VNODE) {
vm_page_unlock_queues();
vp = object->handle;
VM_OBJECT_UNLOCK(object);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
VM_OBJECT_LOCK(object);
! vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
VM_OBJECT_UNLOCK(object);
VOP_UNLOCK(vp, 0, curthread);
vm_page_lock_queues();
! return (0);
} else if (object->type == OBJT_SWAP ||
object->type == OBJT_DEFAULT) {
m_tmp = m;
! vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC);
VM_OBJECT_UNLOCK(object);
! return (0);
}
} else if (m->hold_count == 0)
vm_page_cache(m);
--- 109,149 ----
if (m->dirty == 0 && m->hold_count == 0)
pmap_remove_all(m);
if (m->dirty) {
+ /* Both paths use vm_pageout_flush() which forces
+ * a syncronous putpage for the kernel_object.
+ */
+ if (flags & M_NOWAIT && object == kernel_object) {
+ VM_OBJECT_UNLOCK(object);
+ printf("vm_contig_launder_page: would sleep (kobject)\n");
+ return (EWOULDBLOCK);
+ }
if (object->type == OBJT_VNODE) {
vm_page_unlock_queues();
vp = object->handle;
VM_OBJECT_UNLOCK(object);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
VM_OBJECT_LOCK(object);
! vm_object_page_clean(object, 0, 0,
! (flags & M_NOWAIT) ? 0 : OBJPC_SYNC);
VM_OBJECT_UNLOCK(object);
VOP_UNLOCK(vp, 0, curthread);
vm_page_lock_queues();
! if ((flags & M_NOWAIT) && m->dirty) {
! printf("vm_contig_launder_page: would sleep (dirty)\n");
! return (EWOULDBLOCK);
! } else
! return (0);
} else if (object->type == OBJT_SWAP ||
object->type == OBJT_DEFAULT) {
m_tmp = m;
! vm_pageout_flush(&m_tmp, 1,
! (flags & M_NOWAIT) ? 0 : VM_PAGER_PUT_SYNC);
VM_OBJECT_UNLOCK(object);
! if ((flags & M_NOWAIT) && m->dirty) {
! printf("vm_contig_launder_page: would sleep (dirty)\n");
! return (EWOULDBLOCK);
! } else
! return (0);
}
} else if (m->hold_count == 0)
vm_page_cache(m);
***************
*** 129,135 ****
}
static int
! vm_contig_launder(int queue)
{
vm_page_t m, next;
int error;
--- 152,158 ----
}
static int
! vm_contig_launder(int queue, int flags)
{
vm_page_t m, next;
int error;
***************
*** 143,149 ****
KASSERT(m->queue == queue,
("vm_contig_launder: page %p's queue is not %d", m, queue));
! error = vm_contig_launder_page(m);
if (error == 0)
return (TRUE);
if (error == EBUSY)
--- 166,172 ----
KASSERT(m->queue == queue,
("vm_contig_launder: page %p's queue is not %d", m, queue));
! error = vm_contig_launder_page(m, flags);
if (error == 0)
return (TRUE);
if (error == EBUSY)
***************
*** 224,235 ****
actmax = vm_page_queues[PQ_ACTIVE].lcnt;
again1:
if (inactl < inactmax &&
! vm_contig_launder(PQ_INACTIVE)) {
inactl++;
goto again1;
}
if (actl < actmax &&
! vm_contig_launder(PQ_ACTIVE)) {
actl++;
goto again1;
}
--- 247,258 ----
actmax = vm_page_queues[PQ_ACTIVE].lcnt;
again1:
if (inactl < inactmax &&
! vm_contig_launder(PQ_INACTIVE, flags)) {
inactl++;
goto again1;
}
if (actl < actmax &&
! vm_contig_launder(PQ_ACTIVE, flags)) {
actl++;
goto again1;
}
***************
*** 381,387 ****
vm_page_t
vm_page_alloc_contig(vm_pindex_t npages, vm_paddr_t low, vm_paddr_t high,
! vm_offset_t alignment, vm_offset_t boundary)
{
vm_object_t object;
vm_offset_t size;
--- 404,410 ----
vm_page_t
vm_page_alloc_contig(vm_pindex_t npages, vm_paddr_t low, vm_paddr_t high,
! vm_offset_t alignment, vm_offset_t boundary, int flags)
{
vm_object_t object;
vm_offset_t size;
***************
*** 459,465 ****
switch (m->queue) {
case PQ_ACTIVE:
case PQ_INACTIVE:
! if (vm_contig_launder_page(m) != 0)
goto cleanup_freed;
pqtype = m->queue - m->pc;
if (pqtype == PQ_FREE ||
--- 482,488 ----
switch (m->queue) {
case PQ_ACTIVE:
case PQ_INACTIVE:
! if (vm_contig_launder_page(m, flags) != 0)
goto cleanup_freed;
pqtype = m->queue - m->pc;
if (pqtype == PQ_FREE ||
***************
*** 570,576 ****
boundary, kernel_map);
} else {
pages = vm_page_alloc_contig(npgs, low, high,
! alignment, boundary);
if (pages == NULL) {
ret = NULL;
} else {
--- 593,599 ----
boundary, kernel_map);
} else {
pages = vm_page_alloc_contig(npgs, low, high,
! alignment, boundary, flags);
if (pages == NULL) {
ret = NULL;
} else {
*** vm/vm_page.h.orig Thu Nov 17 15:12:56 2005
--- vm/vm_page.h Thu Nov 17 15:13:12 2005
***************
*** 344,350 ****
void vm_page_activate (vm_page_t);
vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int);
vm_page_t vm_page_alloc_contig (vm_pindex_t, vm_paddr_t, vm_paddr_t,
! vm_offset_t, vm_offset_t);
void vm_page_release_contig (vm_page_t, vm_pindex_t);
vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
void vm_page_cache (register vm_page_t);
--- 344,350 ----
void vm_page_activate (vm_page_t);
vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int);
vm_page_t vm_page_alloc_contig (vm_pindex_t, vm_paddr_t, vm_paddr_t,
! vm_offset_t, vm_offset_t, int);
void vm_page_release_contig (vm_page_t, vm_pindex_t);
vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
void vm_page_cache (register vm_page_t);
On Thu, 2005-Nov-17 16:04:14 -0600, Mark Tinguely wrote: >I took a stab at the problem that NOWAIT is not being honored in >contigmalloc() by making the vm_contig_launder_page() honor >the flag. Thank you. Of course, this doesn't help drivers like bktr(4) that use contigmalloc(M_NOWAIT) and panic() if the memory isn't available. I've gone through your changes and suspect you may have missed a few possibilities that can sleep. >*** vm/vm_contig.c.orig Thu Nov 17 15:10:10 2005 >--- vm/vm_contig.c Thu Nov 17 13:59:47 2005 >*************** >*** 95,100 **** >--- 95,105 ---- > object = m->object; > if (!VM_OBJECT_TRYLOCK(object)) > return (EAGAIN); >+ if (flags & M_NOWAIT && (m->flags & PG_BUSY || m->busy)) { >+ VM_OBJECT_UNLOCK(object); >+ printf("vm_contig_launder_page: would sleep (busy)\n"); >+ return (EWOULDBLOCK); I suspect all your EWOULDBLOCK should be EAGAIN for consistency. >+ } > if (vm_page_sleep_if_busy(m, TRUE, "vpctw0")) { > VM_OBJECT_UNLOCK(object); > vm_page_lock_queues(); >*************** >*** 104,126 **** ... >--- 109,149 ---- > if (m->dirty == 0 && m->hold_count == 0) > pmap_remove_all(m); This can sleep because it seizes a mutex for each entry. > if (m->dirty) { >+ /* Both paths use vm_pageout_flush() which forces >+ * a syncronous putpage for the kernel_object. >+ */ >+ if (flags & M_NOWAIT && object == kernel_object) { >+ VM_OBJECT_UNLOCK(object); >+ printf("vm_contig_launder_page: would sleep (kobject)\n"); >+ return (EWOULDBLOCK); >+ } > if (object->type == OBJT_VNODE) { > vm_page_unlock_queues(); > vp = object->handle; > VM_OBJECT_UNLOCK(object); > vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); vn_lock(9) implies this can sleep unless the 2nd argument includes LK_NOWAIT. > VM_OBJECT_LOCK(object); This will sleep if something else has grabbed the object mutex. >! vm_object_page_clean(object, 0, 0, >! (flags & M_NOWAIT) ? 0 : OBJPC_SYNC); > VM_OBJECT_UNLOCK(object); > VOP_UNLOCK(vp, 0, curthread); > vm_page_lock_queues(); This will sleep if something else is holding vm_page_queue_mtx. >! if ((flags & M_NOWAIT) && m->dirty) { >! printf("vm_contig_launder_page: would sleep (dirty)\n"); >! return (EWOULDBLOCK); >! } else >! return (0); > } else if (object->type == OBJT_SWAP || > object->type == OBJT_DEFAULT) { > m_tmp = m; >! vm_pageout_flush(&m_tmp, 1, >! (flags & M_NOWAIT) ? 0 : VM_PAGER_PUT_SYNC); > VM_OBJECT_UNLOCK(object); >! if ((flags & M_NOWAIT) && m->dirty) { >! printf("vm_contig_launder_page: would sleep (dirty)\n"); >! return (EWOULDBLOCK); >! } else >! return (0); > } > } else if (m->hold_count == 0) > vm_page_cache(m); >*************** -- Peter Jeremy On Thu, 2005-Nov-17 16:04:14 -0600, Mark Tinguely wrote:
>I took a stab at the problem that NOWAIT is not being honored in
>contigmalloc() by making the vm_contig_launder_page() honor
>the flag.
I've been running your code for about a month now and it doesn't seem
to help. Accessing a umass device after the system has been up for
more than a few days is still an instant panic:
bwait(cbe922d8,44,c070d9ac,509,0) at bwait+0x60
swap_pager_putpages(c3e6bce4,d56a696c,1,1,d56a6920) at swap_pager_putpages+0x47a
vm_pageout_flush(d56a696c,1,0,60,c06fa047) at vm_pageout_flush+0x16b
vm_contig_launder_page(c1906e48,1,c070e406,203,ffffffff) at vm_contig_launder_page+0x2f9
vm_page_alloc_contig(10,0,0,ffffffff,1) at vm_page_alloc_contig+0x254
contigmalloc(10000,c073b0a0,1,0,ffffffff) at contigmalloc+0xbc
bus_dmamem_alloc(c2138300,c2270488,5,c2270484,ffffffff) at bus_dmamem_alloc+0xd2
usb_block_allocmem(0,10000,1,c319c33c,d56a6a9c) at usb_block_allocmem+0x180
uhci_allocm(c1c3c000,c319c33c,10000,10c,6) at uhci_allocm+0x27
bwait(cbe90bc8,44,c0712862,509,0) at bwait+0x60
swap_pager_putpages(c3653210,d56a6874,1,1,d56a6820) at swap_pager_putpages+0x47a
vm_pageout_flush(d56a6874,1,0,60,c06feef8) at vm_pageout_flush+0x16b
vm_contig_launder_page(c19070d0,1,c07132bc,203,ffffffff) at vm_contig_launder_page+0x2f9
vm_page_alloc_contig(19,0,0,ffffffff,10) at vm_page_alloc_contig+0x254
contigmalloc(1827c,c0745e40,1,0,ffffffff) at contigmalloc+0xbc
bus_dmamem_alloc(c2f14800,d56a6978,5,d56a697c,ffffffff) at bus_dmamem_alloc+0xd2
usb_alloc_mem(18270,4,d56a6b10,10000,c0703afb) at usb_alloc_mem+0xb4
uhci_xfer_setup(c1bfc400,0,d56a6b08,d56a6b10,d56a6b24) at uhci_xfer_setup+0x40b
Interestingly, none of your "vm_contig_launder_page: would sleep"
printf's are triggered. Looking at the code, the problem is that
vm_pageout_flush() forces VM_PAGER_PUT_SYNC if the given object is
kernel_object. I don't understand the VM subsystem well enough to
know why this is done.
--
Peter Jeremy
Strange that the following added test in vm_contig_launder_page() did not
trigger in the kernel_object case:
if (m->dirty) {
+ /* Both paths use vm_pageout_flush() which forces
+ * a syncronous putpage for the kernel_object.
+ */
+ if (flags & M_NOWAIT && object == kernel_object) {
...
Anyway, I think it was concluded that when contigmalloc() is called from an
interrupt handler, we cannot launder the pages at all. It simplifies
the patch considerably. A couple things to consider:
1) it is wise for a driver to be requesting more than a page
at interrupt time? It is a recipe for failure on busy machines.
wouldn't it be better that the driver use some kind of UMA? This
really a side issue and does not fix the error.
2) as mentioned before, many other callers to contigmalloc() that
set NOWAIT, really expect laundering and waiting a brief amount
of time. Many of these callers do not check for the allocation
failure case. Most are allocating memory on device initialization
and are not in their interrupt handlers. These issue have to
be fixed or else we will actually be increasing the number of
panics.
--Mark Tinguely.
I stated in the -current mailing list that my original patch to
sys/vm/vm_contig.c was wrong. The following is closer to what should
be done.
--- vm_contig.c.orig Fri Mar 10 14:30:07 2006
+++ vm_contig.c Thu May 11 11:50:14 2006
@@ -86,7 +86,7 @@
#include <vm/vm_extern.h>
static int
-vm_contig_launder_page(vm_page_t m)
+vm_contig_launder_page(vm_page_t m, int flags)
{
vm_object_t object;
vm_page_t m_tmp;
@@ -96,6 +96,20 @@
object = m->object;
if (!VM_OBJECT_TRYLOCK(object))
return (EAGAIN);
+
+ if (flags & M_NOWAIT) { /* cannot sleep in interrupt mode */
+ if ((m->flags & PG_BUSY) || m->busy) {
+ VM_OBJECT_UNLOCK(object);
+ return (EBUSY);
+ } else {
+ vm_page_test_dirty(m);
+ if (m->dirty) {
+ VM_OBJECT_UNLOCK(object);
+ return (EAGAIN);
+ }
+ }
+ }
+
if (vm_page_sleep_if_busy(m, TRUE, "vpctw0")) {
VM_OBJECT_UNLOCK(object);
vm_page_lock_queues();
@@ -138,7 +152,7 @@
}
static int
-vm_contig_launder(int queue)
+vm_contig_launder(int queue, int flags)
{
vm_page_t m, next;
int error;
@@ -152,7 +166,7 @@
KASSERT(VM_PAGE_INQUEUE2(m, queue),
("vm_contig_launder: page %p's queue is not %d", m, queue));
- error = vm_contig_launder_page(m);
+ error = vm_contig_launder_page(m, flags);
if (error == 0)
return (TRUE);
if (error == EBUSY)
@@ -233,12 +247,12 @@
actmax = vm_page_queues[PQ_ACTIVE].lcnt;
again1:
if (inactl < inactmax &&
- vm_contig_launder(PQ_INACTIVE)) {
+ vm_contig_launder(PQ_INACTIVE, flags)) {
inactl++;
goto again1;
}
if (actl < actmax &&
- vm_contig_launder(PQ_ACTIVE)) {
+ vm_contig_launder(PQ_ACTIVE, flags)) {
actl++;
goto again1;
}
@@ -390,7 +404,7 @@
vm_page_t
vm_page_alloc_contig(vm_pindex_t npages, vm_paddr_t low, vm_paddr_t high,
- vm_offset_t alignment, vm_offset_t boundary)
+ vm_offset_t alignment, vm_offset_t boundary, int flags)
{
vm_object_t object;
vm_offset_t size;
@@ -483,7 +497,7 @@
pqtype != PQ_CACHE) {
if (m->queue == PQ_ACTIVE ||
m->queue == PQ_INACTIVE) {
- if (vm_contig_launder_page(m) != 0)
+ if (vm_contig_launder_page(m, flags) != 0)
goto cleanup_freed;
pqtype = m->queue - m->pc;
if (pqtype != PQ_FREE &&
@@ -590,7 +604,7 @@
boundary, kernel_map);
} else {
pages = vm_page_alloc_contig(npgs, low, high,
- alignment, boundary);
+ alignment, boundary, flags);
if (pages == NULL) {
ret = NULL;
} else {
State Changed From-To: open->patched This should be fixed in HEAD and RELENG_7. Responsible Changed From-To: freebsd-bugs->alc This should be fixed in HEAD and RELENG_7. State Changed From-To: patched->closed This bug has been fixed since FreeBSD 7.0-RELEASE. |