The syscall of amr_ioctl() used by megarc corrupts every system running FreeBSD 7.x or FreeBSD 8.x, if there is activity on the disks controlled by amr(4). This problem is well known (ports/128082, ports/137938) and did not exist in FreeBSD 6.x. A kernel with WITNESS and INVARIANTS most of the time stops quickly with panic after megarc is started on a busy sytem. In the following example one of two processes running bsdtar crashes (pid 4337), the megarc process running amr_ioctl() has pid 4343. I used serial console to enter some ddb commands: Kernel page fault with the following non-sleepable locks held: exclusive sleep mutex FFS2 dinode (UMA zone) r = 0 (0xc9ea9388) locked @ vm/uma_core.c:2013 KDB: stack backtrace: db_trace_self_wrapper(c09504cb,ea02a69c,246,ca672904,c9506ed0,...) at db_trace_self_wrapper+0x26 kdb_backtrace(7dd,3,ffffffff,c0b50704,ea02a700,...) at kdb_backtrace+0x2a _witness_debugger(c0952aac,ea02a714,4,1,0,...) at _witness_debugger+0x25 witness_warn(5,0,c097bdeb,ea02a770,ca1c0550,...) at witness_warn+0x1fe trap(ea02a79c) at trap+0x164 calltrap() at calltrap+0x6 --- trap 0xc, eip = 0xc084f8ce, esp = 0xea02a7dc, ebp = 0xea02a7f0 --- uma_dbg_alloc(c9ef3380,0,cb5ceb00,7dd,0,...) at uma_dbg_alloc+0x4e uma_zalloc_arg(c9ef3380,0,2,4000,0,...) at uma_zalloc_arg+0xea ffs_vgetf(ca0c8b40,46d5fe,80000,ea02a9e8,0,...) at ffs_vgetf+0x41c ffs_vget(ca0c8b40,46d5fe,80000,ea02a9e8,c0746def,...) at ffs_vget+0x2e ffs_valloc(cb39a968,41ed,c9969d00,ea02a9e8,c09dfba0,...) at ffs_valloc+0x4f1 ufs_mkdir(ea02abf8,c0980823,0,ea02ab3c,ea02aba4,...) at ufs_mkdir+0xb9 VOP_MKDIR_APV(c09c6320,ea02abf8,2,0,0,...) at VOP_MKDIR_APV+0xc5 kern_mkdirat(ca6f9b40,ffffff9c,88496100,0,1ed,...) at kern_mkdirat+0x1f7 kern_mkdir(ca6f9b40,88496100,0,1ed,ea02ac7c,...) at kern_mkdir+0x2e mkdir(ca6f9b40,ea02acec,ea02ad28,c0951e3d,0,...) at mkdir+0x29 syscallenter(ca6f9b40,ea02ace4,ea02ace4,2,c0a11610,...) at syscallenter+0x246 syscall(ea02ad28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (136, FreeBSD ELF32, mkdir), eip = 0x8835d433, esp = 0xbfbfe88c, ebp = 0xbfbfe8b8 --- Fatal trap 12: page fault while in kernel mode cpuid = 0; apic id = 00 fault virtual address = 0x50 fault code = supervisor read, page not present instruction pointer = 0x20:0xc084f8ce stack pointer = 0x28:0xea02a7dc frame pointer = 0x28:0xea02a7f0 code segment = base rx0, limit 0xfffff, type 0x1b = DPL 0, pres 1, def32 1, gran 1 processor eflags = interrupt enabled, resume, IOPL = 0 current process = 4337 (bsdtar) [thread pid 4337 tid 100181 ] Stopped at uma_dbg_alloc+0x4e: divl 0x50(%esi),%eax db> show allpcpu Current CPU: 0 cpuid = 0 dynamic pcpu = 0x644b00 curthread = 0xca6f9b40: pid 4337 "bsdtar" curpcb = 0xea02ad80 fpcurthread = none idlethread = 0xc9547000: tid 100006 "idle: cpu0" APIC ID = 0 currentldt = 0x50 spin locks held: cpuid = 1 dynamic pcpu = 0x68eab00 curthread = 0xc95472d0: pid 11 "idle: cpu1" curpcb = 0xc72f1d80 fpcurthread = none idlethread = 0xc95472d0: tid 100005 "idle: cpu1" APIC ID = 1 currentldt = 0x50 spin locks held: cpuid = 2 dynamic pcpu = 0x68edb00 curthread = 0xc97c72d0: pid 21 "bufdaemon" curpcb = 0xe7c2dd80 fpcurthread = none idlethread = 0xc95475a0: tid 100004 "idle: cpu2" APIC ID = 6 currentldt = 0x50 spin locks held: exclusive spin mutex uart_hwmtx (uart_hwmtx) r = 0 (0xc97e5120) locked @ dev/uart/uart_cpu.h:92 cpuid = 3 dynamic pcpu = 0x68f0b00 curthread = 0xca0cab40: pid 4343 "megarc" curpcb = 0xe9f2fd80 fpcurthread = none idlethread = 0xc9547870: tid 100003 "idle: cpu3" APIC ID = 7 currentldt = 0x50 spin locks held: exclusive spin mutex smp rendezvous (smp rendezvous) r = 0 (0xc0a244f4) locked @ i386/i386/mp_machdep.c:1165 db> trace Tracing pid 4337 tid 100181 td 0xca6f9b40 uma_dbg_alloc(c9ef3380,0,cb5ceb00,7dd,0,...) at uma_dbg_alloc+0x4e uma_zalloc_arg(c9ef3380,0,2,4000,0,...) at uma_zalloc_arg+0xea ffs_vgetf(ca0c8b40,46d5fe,80000,ea02a9e8,0,...) at ffs_vgetf+0x41c ffs_vget(ca0c8b40,46d5fe,80000,ea02a9e8,c0746def,...) at ffs_vget+0x2e ffs_valloc(cb39a968,41ed,c9969d00,ea02a9e8,c09dfba0,...) at ffs_valloc+0x4f1 ufs_mkdir(ea02abf8,c0980823,0,ea02ab3c,ea02aba4,...) at ufs_mkdir+0xb9 VOP_MKDIR_APV(c09c6320,ea02abf8,2,0,0,...) at VOP_MKDIR_APV+0xc5 kern_mkdirat(ca6f9b40,ffffff9c,88496100,0,1ed,...) at kern_mkdirat+0x1f7 kern_mkdir(ca6f9b40,88496100,0,1ed,ea02ac7c,...) at kern_mkdir+0x2e mkdir(ca6f9b40,ea02acec,ea02ad28,c0951e3d,0,...) at mkdir+0x29 syscallenter(ca6f9b40,ea02ace4,ea02ace4,2,c0a11610,...) at syscallenter+0x246 syscall(ea02ad28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (136, FreeBSD ELF32, mkdir), eip = 0x8835d433, esp = 0xbfbfe88c, ebp = 0xbfbfe8b8 --- Some other trace information: Tracing command megarc pid 4343 tid 100129 td 0xca0cab40 cpustop_handler(8,e9f2f854,c08c6cb6,c06f8e3b,c094e5fd,...) at cpustop_handler+0x34 ipi_nmi_handler(c06f8e3b,c094e5fd,c0944b18,3,ca0c0aa0,...) at ipi_nmi_handler+0x2f trap(e9f2f860) at trap+0x36 calltrap() at calltrap+0x6 --- trap 0x13, eip = 0xc08bc2c8, esp = 0xe9f2f8a0, ebp = 0xe9f2f8c0 --- smp_tlb_shootdown(e9f2f8e0,c08bf168,cb5d0000,c0bceea0,0,...) at smp_tlb_shootdown+0x98 smp_invlpg(cb5d0000,c0bceea0,0,bff2d740,e9f2f908,...) at smp_invlpg+0x1e pmap_invalidate_page(c0bceea0,cb5d0000,c097a99a,aab,cb5d0000,...) at pmap_invalidate_page+0x38 pmap_remove_pte(e9f2f928,0,c097a99a,aeb,c0bceea0,...) at pmap_remove_pte+0x84 pmap_remove(c0bceea0,cb5d0000,cb5d7000,0,c096ee82,...) at pmap_remove+0x17a vm_map_delete(c158f08c,cb5d0000,cb5d7000,cb557b18,7000,...) at vm_map_delete+0x1a5 vm_map_remove(c158f08c,cb5d0000,cb5d7000,e9f2f9f0,c084b4c6,...) at vm_map_remove+0x51 kmem_free(c158f08c,cb5d0000,7000,e9f2fa08,c084c558,...) at kmem_free+0x30 page_free(cb5d0000,7000,22,cb5d0000,e9f2fa2c,...) at page_free+0x46 uma_large_free(cb557b18,4,c094b615,cb5d0000,0,...) at uma_large_free+0x88 free(cb5d0000,c0992b90,c092a81c,3a3,c06f8edc,...) at free+0x10f amr_ioctl(c96dab00,c0304301,ca8b2380,1,ca0cab40,...) at amr_ioctl+0xb6b giant_ioctl(c96dab00,c0304301,ca8b2380,1,ca0cab40,...) at giant_ioctl+0x75 devfs_ioctl_f(ca0bece8,c0304301,ca8b2380,c9969d00,ca0cab40,...) at devfs_ioctl_f+0x10b kern_ioctl(ca0cab40,3,c0304301,ca8b2380,1f2fcec,...) at kern_ioctl+0x1fd ioctl(ca0cab40,e9f2fcec,e9f2fd28,c0951e3d,0,...) at ioctl+0x134 syscallenter(ca0cab40,e9f2fce4,e9f2fce4,0,c0a11610,...) at syscallenter+0x246 syscall(e9f2fd28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (54, FreeBSD ELF32, ioctl), eip = 0x8079d2f, esp = 0xbfbfe91c, ebp = 0xbfbfe988 --- Tracing command bsdtar pid 4337 tid 100181 td 0xca6f9b40 uma_dbg_alloc(c9ef3380,0,cb5ceb00,7dd,0,...) at uma_dbg_alloc+0x4e uma_zalloc_arg(c9ef3380,0,2,4000,0,...) at uma_zalloc_arg+0xea ffs_vgetf(ca0c8b40,46d5fe,80000,ea02a9e8,0,...) at ffs_vgetf+0x41c ffs_vget(ca0c8b40,46d5fe,80000,ea02a9e8,c0746def,...) at ffs_vget+0x2e ffs_valloc(cb39a968,41ed,c9969d00,ea02a9e8,c09dfba0,...) at ffs_valloc+0x4f1 ufs_mkdir(ea02abf8,c0980823,0,ea02ab3c,ea02aba4,...) at ufs_mkdir+0xb9 VOP_MKDIR_APV(c09c6320,ea02abf8,2,0,0,...) at VOP_MKDIR_APV+0xc5 kern_mkdirat(ca6f9b40,ffffff9c,88496100,0,1ed,...) at kern_mkdirat+0x1f7 kern_mkdir(ca6f9b40,88496100,0,1ed,ea02ac7c,...) at kern_mkdir+0x2e mkdir(ca6f9b40,ea02acec,ea02ad28,c0951e3d,0,...) at mkdir+0x29 syscallenter(ca6f9b40,ea02ace4,ea02ace4,2,c0a11610,...) at syscallenter+0x246 syscall(ea02ad28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (136, FreeBSD ELF32, mkdir), eip = 0x8835d433, esp = 0xbfbfe88c, ebp = 0xbfbfe8b8 --- Tracing command sh pid 4336 tid 100173 td 0xca6fb5a0 sched_switch(ca6fb5a0,0,104,191,6e571642,...) at sched_switch+0x389 mi_switch(104,0,c09515f9,1eb,5c,...) at mi_switch+0x200 sleepq_switch(ca6fb5a0,0,c09515f9,1a0,5c,...) at sleepq_switch+0x15f sleepq_catch_signals(c09515f9,160,0,100,100,...) at sleepq_catch_signals+0xb5 sleepq_wait_sig(ca6eaaa0,5c,c0953eca,100,0,...) at sleepq_wait_sig+0x17 _sleep(ca6eaaa0,ca6eab28,15c,c0953eca,0,...) at _sleep+0x35c kern_wait(ca6fb5a0,ffffffff,ea00ec44,2,0,...) at kern_wait+0xb76 wait4(ca6fb5a0,ea00ecec,ea00ed28,c0951e3d,0,...) at wait4+0x3b syscallenter(ca6fb5a0,ea00ece4,ea00ece4,0,c0a11610,...) at syscallenter+0x246 syscall(ea00ed28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (7, FreeBSD ELF32, wait4), eip = 0x8815cfbb, esp = 0xbfbfe8fc, ebp = 0xbfbfe918 --- Tracing command bsdtar pid 4333 tid 100142 td 0xca1c5870 sched_switch(ca1c5870,0,104,191,f7f7cfd8,...) at sched_switch+0x389 mi_switch(104,0,c09515f9,1eb,50,...) at mi_switch+0x200 sleepq_switch(ca1c5870,0,c09515f9,260,50,...) at sleepq_switch+0x15f sleepq_wait(db7b01a0,50,c0958ef4,4,0,...) at sleepq_wait+0x63 __lockmgr_args(db7b01a0,81900,ca672904,c0958ef4,50,...) at __lockmgr_args+0xb1f getblk(ca672810,46b5460,0,4000,0,...) at getblk+0x167 breadn(ca672810,46b5460,0,4000,0,...) at breadn+0x44 bread(ca672810,46b5460,0,4000,0,...) at bread+0x4c ffs_vgetf(ca0c8b40,46d5ff,80000,e9f90918,0,...) at ffs_vgetf+0x3aa ffs_vget(ca0c8b40,46d5ff,80000,e9f90918,e9f90858,...) at ffs_vget+0x2e ffs_valloc(cb32a810,81a4,c9969d00,e9f90918,e9f908fc,...) at ffs_valloc+0x4f1 ufs_makeinode(e9f90ba4,c09c67e0,e9f90b00,e9f90a64,c08ee315,...) at ufs_makeinode+0xa3 ufs_create(e9f90b00,c09809e9,0,0,e9f90b78,...) at ufs_create+0x30 VOP_CREATE_APV(c09c6320,e9f90b00,2,c06f8e3b,0,...) at VOP_CREATE_APV+0xc5 vn_open_cred(e9f90b78,e9f90c2c,1a4,0,c9969d00,...) at vn_open_cred+0x1c3 vn_open(e9f90b78,e9f90c2c,1a4,ca6079a0,0,...) at vn_open+0x3b kern_openat(ca1c5870,ffffff9c,88496100,0,a02,...) at kern_openat+0x125 kern_open(ca1c5870,88496100,0,a01,1a4,...) at kern_open+0x35 open(ca1c5870,e9f90cec,e9f90d28,c0951e3d,0,...) at open+0x30 syscallenter(ca1c5870,e9f90ce4,e9f90ce4,0,c094b615,...) at syscallenter+0x246 syscall(e9f90d28) at syscall+0x4f Xint0x80_syscall() at Xint0x80_syscall+0x21 --- syscall (5, FreeBSD ELF32, open), eip = 0x8835d473, esp = 0xbfbfe89c, ebp = 0xbfbfe8c8 --- Tracing command bufdaemon pid 21 tid 100063 td 0xc97c72d0 cpustop_handler(4,e7c2d9e8,c08c6cb6,c,0,...) at cpustop_handler+0x34 ipi_nmi_handler(c,0,c9502340,c0b51db0,c9e02000,...) at ipi_nmi_handler+0x2f trap(e7c2d9f4) at trap+0x36 calltrap() at calltrap+0x6 --- trap 0x13, eip = 0xc05eba0f, esp = 0xe7c2da34, ebp = 0xe7c2da54 --- ns8250_bus_ipend(c97e5100,e7c2da7c,c06a4344,c97e5180,c97e5158,...) at ns8250_bus_ipend+0x5f uart_intr(c97e5100,111,2,c9e02000,c97c72d0,...) at uart_intr+0x33b intr_event_handle(c9542d80,e7c2daf0,0,c0b87dc4,0,...) at intr_event_handle+0x65 intr_execute_handlers(c95288d0,e7c2daf0,6,e7c2db4c,c08ad725,...) at intr_execute_handlers+0x49 lapic_handle_intr(30,e7c2daf0) at lapic_handle_intr+0x4c Xapic_isr1() at Xapic_isr1+0x35 --- interrupt, eip = 0xc06a4929, esp = 0xe7c2db30, ebp = 0xe7c2db4c --- _mtx_lock_sleep(c0b87dc4,c97c72d0,0,c09583d4,e3f,...) at _mtx_lock_sleep+0x99 _mtx_lock_flags(c0b87dc4,0,c09583d4,e3f,cb42f990,...) at _mtx_lock_flags+0xf7 vfs_busy_pages(db4ec8e8,1,df,1800,0,...) at vfs_busy_pages+0xab bufwrite(db4ec8e8,0,c09583d4,6c8,c095aeb2,...) at bufwrite+0xa7 vfs_bio_awrite(db4ec8e8,0,c09583d4,92b,0,...) at vfs_bio_awrite+0x318 flushbufqueues(ce,c094b615,c9e02088,0,0,...) at flushbufqueues+0x5b0 buf_do_flush(c9e02000,0,c09583d4,897,64,...) at buf_do_flush+0x19 buf_daemon(0,e7c2dd28,c0948386,345,c9e02000,...) at buf_daemon+0xba fork_exit(c07304b0,0,e7c2dd28) at fork_exit+0xb8 fork_trampoline() at fork_trampoline+0x8 --- trap 0, eip = 0, esp = 0xe7c2dd60, ebp = 0 --- db> ps pid ppid pgrp uid state wmesg wchan cmd 4343 4329 4321 0 R+ CPU 3 megarc 4337 4336 4321 0 R+ CPU 0 bsdtar 4336 4324 4321 0 S+ wait 0xca6eaaa0 sh 4333 4331 4321 0 S+ getblk 0xdb7b01a0 bsdtar 4331 4324 4321 0 S+ wait 0xca6ea000 sh 4329 4327 4321 0 S+ wait 0xca6ea2a8 sh 4327 4324 4321 0 S+ wait 0xca1bed48 sh 4324 4322 4321 0 S+ wait 0xca1c0000 sh 4322 1 4321 0 S+ wait 0xc9f462a8 sh 4310 4309 4310 0 S+ ttyin 0xca243a70 sh 4309 4305 4309 1003 S+ wait 0xc9ebfaa0 su 4305 4304 4305 1003 Ss+ wait 0xc9f18d48 bash 4304 4302 4302 1003 S select 0xca23ad64 sshd 4302 3695 4302 0 Ss sbwait 0xca451a60 sshd 4162 1 4162 0 Ss+ ttyin 0xc967ec70 getty 4161 1 4161 0 Ss+ ttyin 0xc97f7870 getty 4160 1 4160 0 Ss+ ttyin 0xc97f7a70 getty 4159 1 4159 0 Ss+ ttyin 0xc97f7c70 getty 4158 1 4158 0 Ss+ ttyin 0xc97f7e70 getty 4157 1 4157 0 Ss+ ttyin 0xc97f8070 getty 4156 1 4156 0 Ss+ ttyin 0xc97f8270 getty 4155 1 4155 0 Ss+ ttyin 0xc967e870 getty 4123 1 4123 0 Ss select 0xc9edb5e4 inetd 3953 1 3953 25 Ss pause 0xca1bfda0 sendmail 3949 1 3949 0 Ss select 0xc996b9e4 sendmail 3913 3893 26 0 S+ nanslp 0xc0a122e4 sleep 3893 1 26 0 S+ wait 0xc9ebf7f8 sh 3854 1 3854 0 Ss nanslp 0xc0a122e4 cron 3720 3709 26 0 S+ nanslp 0xc0a122e4 sleep 3709 1 26 0 S+ wait 0xca1bf2a8 sh 3701 1 3701 0 Ss select 0xca2394a4 sshd 3695 1 3695 0 Ss select 0xc9ee75e4 sshd 3688 1 26 0 S+ piperd 0xc9f42498 sh 3687 1 26 0 S+ pause 0xc9fed5a8 top 3682 3555 3555 88 S (threaded) mysqld 100166 S ucond 0xca19c640 mysqld 100165 S sigwait 0xe9fdebb0 mysqld 100164 S ucond 0xc9ee74c0 mysqld 100071 S select 0xca23aca4 initial thread 3555 1 3555 88 Ss wait 0xca0c27f8 sh 3542 1 3542 181 Ss select 0xc9edb9e4 nrpe2 3485 1 3485 0 Ss select 0xc9ee7d24 ntpd 3267 1 3267 53 Ss (threaded) named 100163 S kqread 0xc9e92c80 named 100162 S ucond 0xca50f080 named 100161 S ucond 0xca239b80 named 100160 S ucond 0xca239240 named 100159 S ucond 0xc9ee7700 named 100158 S ucond 0xca19b8c0 named 100126 S sigwait 0xe9f26bb0 named 3179 1 3179 0 Ss select 0xc9eda8e4 syslogd 3141 0 0 0 SL - 0xc0a10198 [accounting] 2930 1 2930 0 Ss select 0xca2391e4 devd 1452 1443 1443 64 S bpf 0xc9f7e900 pflogd 1443 1 1443 0 Ss sbwait 0xc9f37728 pflogd 325 0 0 0 SL mdwait 0xc9e19800 [md0] 249 1 249 0 Ss pause 0xc9fed058 adjkerntz 25 0 0 0 SL flowclea 0xc0b7e8c0 [flowcleaner] 24 0 0 0 SL sdflush 0xc0b874e0 [softdepflush] 23 0 0 0 SL vlruwt 0xc9652aa0 [vnlru] 22 0 0 0 SL syncer 0xc0b7e6b8 [syncer] 21 0 0 0 RL CPU 2 [bufdaemon] 20 0 0 0 SL pollid 0xc0a1183c [idlepoll] 19 0 0 0 SL pgzero 0xc0b881f4 [pagezero] 18 0 0 0 SL psleep 0xc0b87e1c [vmdaemon] 17 0 0 0 SL psleep 0xc0b87de4 [pagedaemon] 16 0 0 0 SL ipmireq 0xc97f396c [ipmi0: kcs] 9 0 0 0 SL ccb_scan 0xc09e11d4 [xpt_thrd] 8 0 0 0 SL pftm 0xc04b7900 [pfpurge] 7 0 0 0 SL - 0xc973403c [fdc0] 15 0 0 0 SL (threaded) usb 100047 D - 0xc97cbdac [usbus1] 100046 D - 0xc97cbd7c [usbus1] 100045 D - 0xc97cbd4c [usbus1] 100044 D - 0xc97cbd1c [usbus1] 100042 D - 0xc97b3dac [usbus0] 100041 D - 0xc97b3d7c [usbus0] 100040 D - 0xc97b3d4c [usbus0] 100039 D - 0xc97b3d1c [usbus0] 6 0 0 0 SL idle 0xc9741000 [aic_recovery1] 5 0 0 0 SL idle 0xc96f5000 [aic_recovery0] 14 0 0 0 SL - 0xc0a12144 [yarrow] 4 0 0 0 SL - 0xc0a0f9c4 [g_down] 3 0 0 0 SL - 0xc0a0f9c0 [g_up] 2 0 0 0 SL - 0xc0a0f9b8 [g_event] 13 0 0 0 SL (threaded) ng_queue 100016 D sleep 0xc09bdf00 [ng_queue3] 100015 D sleep 0xc09bdf00 [ng_queue2] 100014 D sleep 0xc09bdf00 [ng_queue1] 100013 D sleep 0xc09bdf00 [ng_queue0] 12 0 0 0 WL (threaded) intr 100054 I [swi0: uart uart] 100052 I [irq12: psm0] 100051 I [irq1: atkbd0] 100050 I [irq17: ichsmb0] 100049 I [irq15: ata1] 100048 I [irq14: ata0] 100043 I [irq19: uhci1] 100038 I [irq16: uhci0] 100037 I [irq24: amr0] 100033 I [irq49: ahd1] 100031 I [irq50: ahd0] 100030 I [irq9: acpi0] 100028 I [swi5: +] 100027 I [swi2: cambio] 100022 I [swi6: task queue] 100021 I [swi6: Giant taskq] 100012 I [swi1: netisr 0] 100011 I [swi4: clock] 100010 I [swi4: clock] 100009 I [swi4: clock] 100008 I [swi4: clock] 100007 I [swi3: vm] 11 0 0 0 RL (threaded) idle 100006 CanRun [idle: cpu0] 100005 Run CPU 1 [idle: cpu1] 100004 CanRun [idle: cpu2] 100003 CanRun [idle: cpu3] 1 0 1 0 SLs wait 0xc9545d48 [init] 10 0 0 0 SL audit_wo 0xc0b86e80 [audit] 0 0 0 0 SLs (threaded) kernel 100056 D - 0xc996c5c0 [dummynet] 100036 D - 0xc9795ac0 [em1 taskq] 100035 D - 0xc97830c0 [em0 taskq] 100029 D - 0xc96a7700 [thread taskq] 100026 D - 0xc96a78c0 [kqueue taskq] 100025 D - 0xc96a7900 [acpi_task_2] 100024 D - 0xc96a7900 [acpi_task_1] 100023 D - 0xc96a7900 [acpi_task_0] 100000 D sched 0xc0a0faa0 [swapper] db> show lock Giant class: sleep mutex name: Giant flags: {DEF, RECURSE} state: {OWNED} owner: 0xca0cab40 (tid 100129, pid 4343, "megarc") db> show lockedvnods Locked vnodes 0xcb32a810: tag ufs, type VDIR usecount 1, writecount 0, refcount 2 mountedhere 0 flags () v_object 0xcb54c440 ref 0 pages 0 lock type ufs: EXCL by thread 0xca1c5870 (pid 4333) #0 0xc069c7a5 at __lockmgr_args+0x595 #1 0xc08358b1 at ffs_lock+0xa1 #2 0xc08ed345 at VOP_LOCK1_APV+0xb5 #3 0xc0753828 at _vn_lock+0x78 #4 0xc0747c4b at vget+0xbb #5 0xc0733a6b at cache_lookup+0x67b #6 0xc0733c4d at vfs_cache_lookup+0xad #7 0xc08ee4c5 at VOP_LOOKUP_APV+0xe5 #8 0xc073af6d at lookup+0x62d #9 0xc073bd8f at namei+0x57f #10 0xc0752e30 at vn_open_cred+0x90 #11 0xc075331b at vn_open+0x3b #12 0xc0751ec5 at kern_openat+0x125 #13 0xc0752305 at kern_open+0x35 #14 0xc0752340 at open+0x30 #15 0xc06f2456 at syscallenter+0x246 #16 0xc08c6b9f at syscall+0x4f #17 0xc08ad3c1 at Xint0x80_syscall+0x21 ino 4640097, on dev amrd1s1d 0xcb39a968: tag ufs, type VDIR usecount 1, writecount 0, refcount 3 mountedhere 0 flags () v_object 0xcb54c550 ref 0 pages 0 lock type ufs: EXCL by thread 0xca6f9b40 (pid 4337) #0 0xc069c7a5 at __lockmgr_args+0x595 #1 0xc08358b1 at ffs_lock+0xa1 #2 0xc08ed345 at VOP_LOCK1_APV+0xb5 #3 0xc0753828 at _vn_lock+0x78 #4 0xc0747c4b at vget+0xbb #5 0xc0733a6b at cache_lookup+0x67b #6 0xc0733c4d at vfs_cache_lookup+0xad #7 0xc08ee4c5 at VOP_LOOKUP_APV+0xe5 #8 0xc073af6d at lookup+0x62d #9 0xc073bd8f at namei+0x57f #10 0xc074ac48 at kern_mkdirat+0x68 #11 0xc074ae7e at kern_mkdir+0x2e #12 0xc074aea9 at mkdir+0x29 #13 0xc06f2456 at syscallenter+0x246 #14 0xc08c6b9f at syscall+0x4f #15 0xc08ad3c1 at Xint0x80_syscall+0x21 ino 4640256, on dev amrd1s1d 0xcb5b86b8: tag ufs, type VREG usecount 0, writecount 0, refcount 2 mountedhere 0 flags () v_object 0xcb42f990 ref 0 pages 2 lock type ufs: EXCL by thread 0xc97c72d0 (pid 21) #0 0xc069ce0e at __lockmgr_args+0xbfe #1 0xc08358b1 at ffs_lock+0xa1 #2 0xc08ed345 at VOP_LOCK1_APV+0xb5 #3 0xc0753828 at _vn_lock+0x78 #4 0xc07300cb at flushbufqueues+0x56b #5 0xc07303c9 at buf_do_flush+0x19 #6 0xc073056a at buf_daemon+0xba #7 0xc068b748 at fork_exit+0xb8 #8 0xc08ad3d4 at fork_trampoline+0x8 ino 4642236, on dev amrd1s1d 0xcb5c4408: tag ufs, type VNON usecount 1, writecount 0, refcount 1 mountedhere 0 flags () lock type ufs: EXCL by thread 0xca6f9b40 (pid 4337) #0 0xc069ce0e at __lockmgr_args+0xbfe #1 0xc0830201 at ffs_vgetf+0x1e1 #2 0xc08305ae at ffs_vget+0x2e #3 0xc08113b1 at ffs_valloc+0x4f1 #4 0xc0844369 at ufs_mkdir+0xb9 #5 0xc08ed7c5 at VOP_MKDIR_APV+0xc5 #6 0xc074add7 at kern_mkdirat+0x1f7 #7 0xc074ae7e at kern_mkdir+0x2e #8 0xc074aea9 at mkdir+0x29 #9 0xc06f2456 at syscallenter+0x246 #10 0xc08c6b9f at syscall+0x4f #11 0xc08ad3c1 at Xint0x80_syscall+0x21 ino 4642302, on dev amrd1s1d 0xcb582810: tag ufs, type VNON usecount 1, writecount 0, refcount 1 mountedhere 0 flags () lock type ufs: EXCL by thread 0xca1c5870 (pid 4333) #0 0xc069ce0e at __lockmgr_args+0xbfe #1 0xc0830201 at ffs_vgetf+0x1e1 #2 0xc08305ae at ffs_vget+0x2e #3 0xc08113b1 at ffs_valloc+0x4f1 #4 0xc0841da3 at ufs_makeinode+0xa3 #5 0xc0842540 at ufs_create+0x30 #6 0xc08ee315 at VOP_CREATE_APV+0xc5 #7 0xc0752f63 at vn_open_cred+0x1c3 #8 0xc075331b at vn_open+0x3b #9 0xc0751ec5 at kern_openat+0x125 #10 0xc0752305 at kern_open+0x35 #11 0xc0752340 at open+0x30 #12 0xc06f2456 at syscallenter+0x246 #13 0xc08c6b9f at syscall+0x4f #14 0xc08ad3c1 at Xint0x80_syscall+0x21 ino 4642303, on dev amrd1s1d db> show lockchain thread 100181 (pid 4337, bsdtar) running on CPU 0 db> show locks exclusive sleep mutex FFS2 dinode (UMA zone) r = 0 (0xc9ea9388) locked @ vm/uma_core.c:2013 exclusive lockmgr bufwait (bufwait) r = 0 (0xdb7b01a0) locked @ kern/vfs_bio.c:2636 exclusive lockmgr ufs (ufs) r = 0 (0xcb5c4460) locked @ ufs/ffs/ffs_vfsops.c:1515 exclusive lockmgr ufs (ufs) r = 0 (0xcb39a9c0) locked @ kern/vfs_subr.c:2120 db> show alllocks Process 4343 (megarc) thread 0xca0cab40 (100129) exclusive sleep mutex pmap (pmap) r = 0 (0xc0bceea0) locked @ i386/i386/pmap.c:2795 exclusive sleep mutex vm page queue mutex (vm page queue mutex) r = 0 (0xc0b87dc4) locked @ i386/i386/pmap.c:2793 exclusive sleep mutex system map (system map) r = 0 (0xc158f0e8) locked @ vm/vm_map.c:2844 exclusive sleep mutex Giant (Giant) r = 0 (0xc0a11610) locked @ kern/kern_conf.c:427 Process 4337 (bsdtar) thread 0xca6f9b40 (100181) exclusive sleep mutex FFS2 dinode (UMA zone) r = 0 (0xc9ea9388) locked @ vm/uma_core.c:2013 exclusive lockmgr bufwait (bufwait) r = 0 (0xdb7b01a0) locked @ kern/vfs_bio.c:2636 exclusive lockmgr ufs (ufs) r = 0 (0xcb5c4460) locked @ ufs/ffs/ffs_vfsops.c:1515 exclusive lockmgr ufs (ufs) r = 0 (0xcb39a9c0) locked @ kern/vfs_subr.c:2120 Process 4333 (bsdtar) thread 0xca1c5870 (100142) exclusive lockmgr ufs (ufs) r = 0 (0xcb582868) locked @ ufs/ffs/ffs_vfsops.c:1515 exclusive lockmgr ufs (ufs) r = 0 (0xcb32a868) locked @ kern/vfs_subr.c:2120 Process 4302 (sshd) thread 0xca1c4870 (100147) exclusive sx so_rcv_sx (so_rcv_sx) r = 0 (0xca451a34) locked @ kern/uipc_sockbuf.c:148 Process 1443 (pflogd) thread 0xc9f50870 (100082) exclusive sx so_rcv_sx (so_rcv_sx) r = 0 (0xc9f376fc) locked @ kern/uipc_sockbuf.c:148 Process 21 (bufdaemon) thread 0xc97c72d0 (100063) exclusive sleep mutex vm object (standard object) r = 0 (0xcb42f990) locked @ kern/vfs_bio.c:3641 exclusive lockmgr ufs (ufs) r = 0 (0xcb5b8710) locked @ kern/vfs_bio.c:2346 exclusive lockmgr bufwait (bufwait) r = 0 (0xdb4ec948) locked @ kern/vfs_bio.c:2299 I can give more ddb output. A crash dump was not possible because "Fatal double fault". Fix: My dirty workaround replaces malloc/free in amr_ioctl() by a static buffer big enough for all megarc parameter. Of course only one megarc command is allowed to run at any time, but thats ok for me at the moment. --- /usr/src/sys/dev/amr/amr.c.orig 2010-12-21 18:09:25.000000000 +0100 +++ /usr/src/sys/dev/amr/amr.c 2011-03-17 10:15:09.000000000 +0100 @@ -164,6 +164,8 @@ /* * Debugging */ +#define MAX_AMR_IOCTL 25600 /* observed: 25412 */ +static char amr_ioctl_buf[MAX_AMR_IOCTL]; static void amr_describe_controller(struct amr_softc *sc); #ifdef AMR_DEBUG #if 0 @@ -843,12 +845,13 @@ /* handle inbound data buffer */ if (au_length != 0 && au_cmd[0] != 0x06) { - if ((dp = malloc(au_length, M_AMR, M_WAITOK|M_ZERO)) == NULL) { + if (au_length >= MAX_AMR_IOCTL) { error = ENOMEM; goto out; } + memset(&amr_ioctl_buf, '\0', MAX_AMR_IOCTL); + dp = amr_ioctl_buf; if ((error = copyin(au_buffer, dp, au_length)) != 0) { - free(dp, M_AMR); return (error); } debug(2, "copyin %ld bytes from %p -> %p", au_length, au_buffer, dp); @@ -929,8 +932,6 @@ if (ac != NULL) amr_releasecmd(ac); mtx_unlock(&sc->amr_list_lock); - if (dp != NULL) - free(dp, M_AMR); #ifndef LSI if (logical_drives_changed) How-To-Repeat: Run some disk I/O intensive jobs on a disk contolled by amr(4) and start megarc in a loop.
As John Baldwin pointed out to me, this problem is caused by overrunnig the memory buffer by a specific megarc command. Further analysis showed that indeed this happens. There are several commands leading megarc to acquire a buffer of 12868 bytes but in this case the controller always sends back 25412 bytes. That is the memory corruption. This problem can be patched in megarc, see ports/137938. But I have also seen seen sporadic situations like this: megarc wants a buffer of 36 bytes, but the controller sends 1k data back. Therefore I think it is the best to be on the safe side and allocate always a buffer big enough for all answers of the controller. The following patch now looks good for me: --- amr.c.orig 2010-02-11 19:34:06.000000000 +0100 +++ amr.c 2011-09-09 16:23:07.000000000 +0200 @@ -87,6 +87,7 @@ #include <dev/amr/amrvar.h> #define AMR_DEFINE_TABLES #include <dev/amr/amr_tables.h> +#define MAX_AMR_IOCTL 25600 /* observed: 25412 */ SYSCTL_NODE(_hw, OID_AUTO, amr, CTLFLAG_RD, 0, "AMR driver parameters"); @@ -843,7 +844,7 @@ /* handle inbound data buffer */ if (au_length != 0 && au_cmd[0] != 0x06) { - if ((dp = malloc(au_length, M_AMR, M_WAITOK|M_ZERO)) == NULL) { + if ((dp = malloc(MAX_AMR_IOCTL, M_AMR, M_WAITOK|M_ZERO)) == NULL) { error = ENOMEM; goto out; } -- Dr. Andreas Longwitz Data Service GmbH Beethovenstr. 2A 23617 Stockelsdorf Amtsgericht Lübeck, HRB 318 BS Geschäftsführer: Wilfried Paepcke, Dr. Andreas Longwitz, Josef Flatau
It is much better to modify the malloc in the patch in this way: malloc(max(au_length, MAX_AMR_IOCTL), M_AMR, M_WAITOK|M_ZERO)) Thanks to Adran Chadd for this hint. -- Dr. Andreas Longwitz Data Service GmbH Beethovenstr. 2A 23617 Stockelsdorf Amtsgericht Lübeck, HRB 318 BS Geschäftsführer: Wilfried Paepcke, Dr. Andreas Longwitz, Josef Flatau
Scott, what do you think of this? Do you know offhand if there is a known maximum reply size for a message from an amr(4) controller? (That is, a better value for MAX_AMR_IOCTL in the proposed patch) -- John Baldwin
Is the problem that the buffer in allocated to be too big, or too small? The bug report talks about a 36 byte buffer being requested from megarc, = but the firmware returning 1k instead. If this is the bug that I'm = thinking, then it was fixed in the linux ioctl path a few years ago. I = had hoped that megarc would never be compiled and run natively on = FreeBSD, so I didn't fix it in the native path. The fix is to always allocate a minimum of 4k and attach it to the = command sent to the card, but only copy back to user land the length = that the ioctl requests. This is what linux does. It's a bug in the = megaraid firmware, and it's a bug in megarc, and I had hoped that LSI = would be better than this. Note that the linux ioctl path in the driver = that I mentioned only does this as a special case when the length is 0. = It sounds like it needs to be done for all cases of min(au_lenth, 4096). I'm not sure why there's a discussion on max length; if I'm not = understanding the problem, please let me know.
Did a bit of history review. When an ioctl goes through the linux = driver, it goes through a pool allocator to get a scratch buffer for the = command. The pool sizes are PAGE_SIZE (4K) and power-of-2 multiples = above that, up to 64k. So when an application requests a buffer size of = 0-4k, a 4k buffer gets sent to the card. A ~25k buffer request actually = gets 32k sent to the card. The firmware seems to exploit this and = basically ignores the specified length, happily filling the provided = buffer with whatever power-of-2 data count it feels like. According to = an old conversation with Dell and LSI, the LSI engineers consider this = to be a necessary feature, not a bug. As I mentioned previously, I fixed this as a special case of len=3D=3D0 = in the linux ioctl emulation path. Based on the bug report here, the = fix needs to be expanded to rounding up to power-of-2 sizes in both the = native and emulated paths. Anyone who wants to tackle this is welcome to do so. The existing hack = also suffers from bug in the form of it copying the 4k buffer back to = user land, even though userland specified a length of 0. This is = probably a bad thing, and it's something that linux appears to handle = correctly.=
Ok, I've adapted that into a patch at http://www.freebsd.org/~jhb/patches/amr_buffer_len.patch Andreas, can you test that updated patch to verify it fixes your issue? -- John Baldwin
John, I did several tests with your patch in 8.2 and everything works fine, if I use the binary version of megarc with the patch included described in ports/137938. The original megarc sends amr_ioctl's with length 12868 (e.g. the first ioctl of the command "megarc -ctlrinfo -a0") and your patch calls the controller with real_length=16384, but the controller returns 25412 Bytes. This happens all the time on nearly every megarc command, I think this is a program error in megarc, he uses user_cmd=0xa104 with buffer length 12868, but the firmware of the controller replies with 25412 bytes. So we have memory corruption of 25412 - 16384 = 9026 bytes. The patch in ports/137938 changes the lenght field in megarc from 12868 to 25412 to avoid this problem. A line like if( len == 12868 ) len = 25412; would solve this problem in the driver. I did not find any other static problems of this type. Another story are dynamic problems. When the controller is very busy, I see sometimes 1KB bytes returned from the controller, when lenght is much lower. This problem is handled by your patch in all cases. Andreas Longwitz
On Thursday, April 19, 2012 4:12:50 pm Andreas Longwitz wrote: > John, > I did several tests with your patch in 8.2 and everything works fine, if > I use the binary version of megarc with the patch included described in > ports/137938. > > The original megarc sends amr_ioctl's with length 12868 (e.g. the first > ioctl of the command "megarc -ctlrinfo -a0") and your patch calls the > controller with real_length=16384, but the controller returns 25412 > Bytes. This happens all the time on nearly every megarc command, I think > this is a program error in megarc, he uses user_cmd=0xa104 with buffer > length 12868, but the firmware of the controller replies with 25412 > bytes. So we have memory corruption of 25412 - 16384 = 9026 bytes. The > patch in ports/137938 changes the lenght field in megarc from 12868 to > 25412 to avoid this problem. A line like > if( len == 12868 ) len = 25412; > would solve this problem in the driver. I did not find any other static > problems of this type. > > Another story are dynamic problems. When the controller is very busy, I > see sometimes 1KB bytes returned from the controller, when lenght is > much lower. This problem is handled by your patch in all cases. Hmm, given the above, I'm tempted to just force the buffer to always be at least 32k. Scott, what do you think about that? -- John Baldwin
On Thursday, April 19, 2012 4:12:50 pm Andreas Longwitz wrote: > John, > I did several tests with your patch in 8.2 and everything works fine, if > I use the binary version of megarc with the patch included described in > ports/137938. > > The original megarc sends amr_ioctl's with length 12868 (e.g. the first > ioctl of the command "megarc -ctlrinfo -a0") and your patch calls the > controller with real_length=16384, but the controller returns 25412 > Bytes. This happens all the time on nearly every megarc command, I think > this is a program error in megarc, he uses user_cmd=0xa104 with buffer > length 12868, but the firmware of the controller replies with 25412 > bytes. So we have memory corruption of 25412 - 16384 = 9026 bytes. The > patch in ports/137938 changes the lenght field in megarc from 12868 to > 25412 to avoid this problem. A line like > if( len == 12868 ) len = 25412; > would solve this problem in the driver. I did not find any other static > problems of this type. > > Another story are dynamic problems. When the controller is very busy, I > see sometimes 1KB bytes returned from the controller, when lenght is > much lower. This problem is handled by your patch in all cases. > > Andreas Longwitz Ah, ok. I think we should just make the minimum buffer size 32k which should workaround this. I've updated the patch at the same URL: http://www.freebsd.org/~jhb/patches/amr_buffer_len.patch -- John Baldwin
Worst firmware ever. Seriously. If this was open bad, the driver would be r= emoved and the hardware blacklisted as a security threat. Scott On Apr 20, 2012, at 6:13 AM, John Baldwin <jhb@freebsd.org> wrote: > On Thursday, April 19, 2012 4:12:50 pm Andreas Longwitz wrote: >> John, >> I did several tests with your patch in 8.2 and everything works fine, if >> I use the binary version of megarc with the patch included described in >> ports/137938. >>=20 >> The original megarc sends amr_ioctl's with length 12868 (e.g. the first >> ioctl of the command "megarc -ctlrinfo -a0") and your patch calls the >> controller with real_length=3D16384, but the controller returns 25412 >> Bytes. This happens all the time on nearly every megarc command, I think >> this is a program error in megarc, he uses user_cmd=3D0xa104 with buffer >> length 12868, but the firmware of the controller replies with 25412 >> bytes. So we have memory corruption of 25412 - 16384 =3D 9026 bytes. The >> patch in ports/137938 changes the lenght field in megarc from 12868 to >> 25412 to avoid this problem. A line like >> if( len =3D=3D 12868 ) len =3D 25412; >> would solve this problem in the driver. I did not find any other static >> problems of this type. >>=20 >> Another story are dynamic problems. When the controller is very busy, I >> see sometimes 1KB bytes returned from the controller, when lenght is >> much lower. This problem is handled by your patch in all cases. >>=20 >> Andreas Longwitz >=20 > Ah, ok. I think we should just make the minimum buffer size 32k which > should workaround this. I've updated the patch at the same URL: >=20 > http://www.freebsd.org/~jhb/patches/amr_buffer_len.patch >=20 > --=20 > John Baldwin
"openbsd". <i>When computers attack!<i> On Apr 20, 2012, at 7:30 AM, Scott Long <scott4long@yahoo.com> wrote: > Worst firmware ever. Seriously. If this was open bad, the driver would b= e removed and the hardware blacklisted as a security threat. >=20 > Scott >=20 > On Apr 20, 2012, at 6:13 AM, John Baldwin <jhb@freebsd.org> wrote: >=20 >> On Thursday, April 19, 2012 4:12:50 pm Andreas Longwitz wrote: >>> John, >>> I did several tests with your patch in 8.2 and everything works fine, if= >>> I use the binary version of megarc with the patch included described in >>> ports/137938. >>>=20 >>> The original megarc sends amr_ioctl's with length 12868 (e.g. the first >>> ioctl of the command "megarc -ctlrinfo -a0") and your patch calls the >>> controller with real_length=3D16384, but the controller returns 25412 >>> Bytes. This happens all the time on nearly every megarc command, I think= >>> this is a program error in megarc, he uses user_cmd=3D0xa104 with buffer= >>> length 12868, but the firmware of the controller replies with 25412 >>> bytes. So we have memory corruption of 25412 - 16384 =3D 9026 bytes. The= >>> patch in ports/137938 changes the lenght field in megarc from 12868 to >>> 25412 to avoid this problem. A line like >>> if( len =3D=3D 12868 ) len =3D 25412; >>> would solve this problem in the driver. I did not find any other static >>> problems of this type. >>>=20 >>> Another story are dynamic problems. When the controller is very busy, I >>> see sometimes 1KB bytes returned from the controller, when lenght is >>> much lower. This problem is handled by your patch in all cases. >>>=20 >>> Andreas Longwitz >>=20 >> Ah, ok. I think we should just make the minimum buffer size 32k which >> should workaround this. I've updated the patch at the same URL: >>=20 >> http://www.freebsd.org/~jhb/patches/amr_buffer_len.patch >>=20 >> --=20 >> John Baldwin
> Worst firmware ever. Seriously. Agree, but I run three dozen servers with amr controllers of four different types and all behave equal. Without source and/or docs I can not decide what is firmware and/or megarc error. I must live with this situation and I am happy, that this is the only corner in my FreeBSD world that is not "open". >> Ah, ok. I think we should just make the minimum buffer size 32k which >> should workaround this. I've updated the patch at the same URL: >> >> http://www.freebsd.org/~jhb/patches/amr_buffer_len.patch >> >> -- >> John Baldwin I have tested the updated patch, for me it is ok. Thanks you all for investigating in this problem. Andreas Longwitz
State Changed From-To: open->patched Fix committed to HEAD.
Responsible Changed From-To: freebsd-bugs->jhb Fix committed to HEAD.
Author: jhb Date: Fri Apr 20 20:27:31 2012 New Revision: 234501 URL: http://svn.freebsd.org/changeset/base/234501 Log: The amr(4) firmware contains a rather dubious "feature" where it assumes for small buffers (< 64k) that the OS driver is actually using a buffer rounded up to the next power of 2. It also assumes that the buffer is at least 4k in size. Furthermore, there is at least one known instance of megarc sending a request with a 12k buffer where the firmware writes out a 24k-ish reply. To workaround the data corruption triggered by this "feature", ensure that buffers for user commands use a minimum size of 32k, and that buffers between 32k and 64k use a 64k buffer. PR: kern/155658 Submitted by: Andreas Longwitz longwitz incore de Reviewed by: scottl MFC after: 1 week Modified: head/sys/dev/amr/amr.c Modified: head/sys/dev/amr/amr.c ============================================================================== --- head/sys/dev/amr/amr.c Fri Apr 20 16:56:14 2012 (r234500) +++ head/sys/dev/amr/amr.c Fri Apr 20 20:27:31 2012 (r234501) @@ -535,6 +535,31 @@ shutdown_out: amr_startup(sc); } +/* + * Bug-for-bug compatibility with Linux! + * Some apps will send commands with inlen and outlen set to 0, + * even though they expect data to be transfered to them from the + * card. Linux accidentally allows this by allocating a 4KB + * buffer for the transfer anyways, but it then throws it away + * without copying it back to the app. + * + * The amr(4) firmware relies on this feature. In fact, it assumes + * the buffer is always a power of 2 up to a max of 64k. There is + * also at least one case where it assumes a buffer less than 16k is + * greater than 16k. Force a minimum buffer size of 32k and round + * sizes between 32k and 64k up to 64k as a workaround. + */ +static unsigned long +amr_ioctl_buffer_length(unsigned long len) +{ + + if (len <= 32 * 1024) + return (32 * 1024); + if (len <= 64 * 1024) + return (64 * 1024); + return (len); +} + int amr_linux_ioctl_int(struct cdev *dev, u_long cmd, caddr_t addr, int32_t flag, struct thread *td) @@ -664,16 +689,7 @@ amr_linux_ioctl_int(struct cdev *dev, u_ error = ENOIOCTL; break; } else { - /* - * Bug-for-bug compatibility with Linux! - * Some apps will send commands with inlen and outlen set to 0, - * even though they expect data to be transfered to them from the - * card. Linux accidentally allows this by allocating a 4KB - * buffer for the transfer anyways, but it then throws it away - * without copying it back to the app. - */ - if (!len) - len = 4096; + len = amr_ioctl_buffer_length(imax(ali.inlen, ali.outlen)); dp = malloc(len, M_AMR, M_WAITOK | M_ZERO); @@ -703,7 +719,7 @@ amr_linux_ioctl_int(struct cdev *dev, u_ status = ac->ac_status; error = copyout(&status, &((struct amr_mailbox *)&((struct amr_linux_ioctl *)addr)->mbox[0])->mb_status, sizeof(status)); if (ali.outlen) { - error = copyout(dp, (void *)(uintptr_t)mb->mb_physaddr, len); + error = copyout(dp, (void *)(uintptr_t)mb->mb_physaddr, ali.outlen); if (error) break; } @@ -750,7 +766,7 @@ amr_ioctl(struct cdev *dev, u_long cmd, struct amr_command *ac; struct amr_mailbox_ioctl *mbi; void *dp, *au_buffer; - unsigned long au_length; + unsigned long au_length, real_length; unsigned char *au_cmd; int *au_statusp, au_direction; int error; @@ -842,8 +858,9 @@ amr_ioctl(struct cdev *dev, u_long cmd, } /* handle inbound data buffer */ + real_length = amr_ioctl_buffer_length(au_length); if (au_length != 0 && au_cmd[0] != 0x06) { - if ((dp = malloc(au_length, M_AMR, M_WAITOK|M_ZERO)) == NULL) { + if ((dp = malloc(real_length, M_AMR, M_WAITOK|M_ZERO)) == NULL) { error = ENOMEM; goto out; } @@ -902,7 +919,7 @@ amr_ioctl(struct cdev *dev, u_long cmd, /* build the command */ ac->ac_data = dp; - ac->ac_length = au_length; + ac->ac_length = real_length; ac->ac_flags |= AMR_CMD_DATAIN|AMR_CMD_DATAOUT; /* run the command */ _______________________________________________ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
State Changed From-To: patched->closed Fix merged to 8 and 9.
The last update of amr.c (SVN rev 236319) needs a correction, because the controller should see the length of the buffer of the userland program: @@ -919,7 +919,7 @@ /* build the command */ ac->ac_data = dp; - ac->ac_length = real_length; + ac->ac_length = au_length; ac->ac_flags |= AMR_CMD_DATAIN|AMR_CMD_DATAOUT; /* run the command */ Without this patch some busy controllers only return zeros on every amr_ioctl. To do what is described in the comment "Bug-for-bug compatibility .." we should allocate a buffer in the case of userland length 0 too: @ -859,11 +859,11 @@ /* handle inbound data buffer */ real_length = amr_ioctl_buffer_length(au_length); + if ((dp = malloc(real_length, M_AMR, M_WAITOK|M_ZERO)) == NULL) { + error = ENOMEM; + goto out; + } if (au_length != 0 && au_cmd[0] != 0x06) { - if ((dp = malloc(real_length, M_AMR, M_WAITOK|M_ZERO)) == NULL) { - error = ENOMEM; - goto out; - } if ((error = copyin(au_buffer, dp, au_length)) != 0) { free(dp, M_AMR); return (error); Andreas Longwitz
Author: jhb Date: Wed Sep 19 11:54:32 2012 New Revision: 240692 URL: http://svn.freebsd.org/changeset/base/240692 Log: As a followup to r234501, ensure that the native ioctl path always allocates a 4kb buffer if a request uses a buffer size of 0. (The Linux ioctl path already did this.) PR: kern/155658 Submitted by: Andreas Longwitz MFC after: 1 week Modified: head/sys/dev/amr/amr.c Modified: head/sys/dev/amr/amr.c ============================================================================== --- head/sys/dev/amr/amr.c Wed Sep 19 11:40:17 2012 (r240691) +++ head/sys/dev/amr/amr.c Wed Sep 19 11:54:32 2012 (r240692) @@ -846,11 +846,8 @@ amr_ioctl(struct cdev *dev, u_long cmd, /* handle inbound data buffer */ real_length = amr_ioctl_buffer_length(au_length); + dp = malloc(real_length, M_AMR, M_WAITOK|M_ZERO); if (au_length != 0 && au_cmd[0] != 0x06) { - if ((dp = malloc(real_length, M_AMR, M_WAITOK|M_ZERO)) == NULL) { - error = ENOMEM; - goto out; - } if ((error = copyin(au_buffer, dp, au_length)) != 0) { free(dp, M_AMR); return (error); @@ -920,8 +917,7 @@ amr_ioctl(struct cdev *dev, u_long cmd, error = copyout(dp, au_buffer, au_length); } debug(2, "copyout %ld bytes from %p -> %p", au_length, dp, au_buffer); - if (dp != NULL) - debug(2, "%p status 0x%x", dp, ac->ac_status); + debug(2, "%p status 0x%x", dp, ac->ac_status); *au_statusp = ac->ac_status; out: _______________________________________________ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
On Wednesday, September 19, 2012 6:13:33 am Andreas Longwitz wrote: > The last update of amr.c (SVN rev 236319) needs a correction, because > the controller should see the length of the buffer of the userland program: > > @@ -919,7 +919,7 @@ > > /* build the command */ > ac->ac_data = dp; > - ac->ac_length = real_length; > + ac->ac_length = au_length; > ac->ac_flags |= AMR_CMD_DATAIN|AMR_CMD_DATAOUT; > > /* run the command */ > > Without this patch some busy controllers only return zeros on every > amr_ioctl. Hmm. We pass in the real_length in the Linux ioctl path as well. If we want to fix that we should fix it in both places. Also, I don't see how this can work properly for a size of zero. bus_dma won't populate the S/G list in that case and you are back to the original buggy case for a size of 0. Are you sure that you need this patch and not just the fix for commands with a size of zero? > To do what is described in the comment "Bug-for-bug compatibility .." > we should allocate a buffer in the case of userland length 0 too: I've committed a slightly modified version of this (no need to check for NULL from a M_WAITOK malloc(), they always succeed). -- John Baldwin
Hi, >> The last update of amr.c (SVN rev 236319) needs a correction, because >> the controller should see the length of the buffer of the userland program: >> >> @@ -919,7 +919,7 @@ >> >> /* build the command */ >> ac->ac_data = dp; >> - ac->ac_length = real_length; >> + ac->ac_length = au_length; >> ac->ac_flags |= AMR_CMD_DATAIN|AMR_CMD_DATAOUT; >> >> /* run the command */ >> >> Without this patch some busy controllers only return zeros on every >> amr_ioctl. > > Hmm. We pass in the real_length in the Linux ioctl path as well. If we want > to fix that we should fix it in both places. Yes, thats true. I would prefer to set ac->ac_length always to the length the user program gives us, it was done in this way until 1.90.2.4. The real_length should be used only for the malloc to prevent the panic when the controller does not respect the given length. > Also, I don't see how this can > work properly for a size of zero. bus_dma won't populate the S/G list in that > case and you are back to the original buggy case for a size of 0. There are amr_ioctl() calls from megarc with au_length=0, but the controller sends back 148 bytes to the buffer dp, which was allocated in real_length. I have checked this with a special ioctl_hook > Are you sure that you need this patch and not just the fix for commands with a size of > zero? Yes. After introducing your patch on my servers I found that on some of them megarc and amrstat did not work correct anymore. Sometime for some hours the amr_ioctl did not give any output. I tried a simple amrstat (au_length=1024, therefore real_length=32K) every 5 minutes on 10 servers. The result was very confused. Some server ok, some never ok, some for four hours ok, then not, ... Going back to 1.90.2.4 - with an updated malloc( max(au_length, 25600)) - lets the problem disappear. After all Dr. Rasch pointed out to me, that the only difference for the interface to the controller of your patch is the value of ac->ac_length. After changing ac->ac_length from real_length (32K on every call) to au_length (for most calls of megarc only some bytes, 1K for amrstat) everything works fine now on all my servers for more than a week. I think the controller is overstrained with a lot of 32k requests. >> To do what is described in the comment "Bug-for-bug compatibility .." >> we should allocate a buffer in the case of userland length 0 too: > > I've committed a slightly modified version of this (no need to check for > NULL from a M_WAITOK malloc(), they always succeed). Thats ok for me, thanks! Andreas Longwitz
On Wednesday, September 19, 2012 12:25:00 pm Andreas Longwitz wrote: > Hi, > > >> The last update of amr.c (SVN rev 236319) needs a correction, because > >> the controller should see the length of the buffer of the userland program: > >> > >> @@ -919,7 +919,7 @@ > >> > >> /* build the command */ > >> ac->ac_data = dp; > >> - ac->ac_length = real_length; > >> + ac->ac_length = au_length; > >> ac->ac_flags |= AMR_CMD_DATAIN|AMR_CMD_DATAOUT; > >> > >> /* run the command */ > >> > >> Without this patch some busy controllers only return zeros on every > >> amr_ioctl. > > > > Hmm. We pass in the real_length in the Linux ioctl path as well. If we want > > to fix that we should fix it in both places. > Yes, thats true. I would prefer to set ac->ac_length always to the > length the user program gives us, it was done in this way until > 1.90.2.4. The real_length should be used only for the malloc to prevent > the panic when the controller does not respect the given length. Except that this can't really work. The controller wants a buffer somewhere that it can DMA to. What addresses do you expect it to use for its larger buffer if we don't give it an address? One option is to use contigmalloc() instead of malloc() to allocate a contiguous buffer perhaps if the adapter just assumes that the extra bytes are physically contiguous after the last S/G entry. However, that still won't work for the 0 length case. In the 0 length case the bus_dma code won't populate the S/G list at all, so the adapter will just scribble over random memory when it dumps something out. That takes us right back to the original bug this was trying to fix. > > Also, I don't see how this can > > work properly for a size of zero. bus_dma won't populate the S/G list in that > > case and you are back to the original buggy case for a size of 0. > > There are amr_ioctl() calls from megarc with au_length=0, but the > controller sends back 148 bytes to the buffer dp, which was allocated in > real_length. I have checked this with a special ioctl_hook Right, but with your change we won't actually send the address of that buffer to the adapter. > > Are you sure that you need this patch and not just the fix for commands with a size of > > zero? > > Yes. After introducing your patch on my servers I found that on some of > them megarc and amrstat did not work correct anymore. Sometime for some > hours the amr_ioctl did not give any output. I tried a simple amrstat > (au_length=1024, therefore real_length=32K) every 5 minutes on 10 > servers. The result was very confused. Some server ok, some never ok, > some for four hours ok, then not, ... Going back to 1.90.2.4 - with an > updated malloc( max(au_length, 25600)) - lets the problem disappear. > After all Dr. Rasch pointed out to me, that the only difference for the > interface to the controller of your patch is the value of ac->ac_length. > After changing ac->ac_length from real_length (32K on every call) to > au_length (for most calls of megarc only some bytes, 1K for amrstat) > everything works fine now on all my servers for more than a week. I > think the controller is overstrained with a lot of 32k requests. I think the correct fix is to adjust the logic that sets real_length, not to undo the previous change (which reverting to real_length will do). If you don't send the updated length to the adapter you have no control over where it writes its overflow data into memory. I do think it might be true that if it is trying to read in 32k that might be a problem, but I think your solution just brings back the old bug. I looked at the history of this PR, and I think we should just refine the limit logic to go back to rounding up to a power of 2, but handle your special case of a 12k buffer that gets a returned size larger than 16k by rounding requests between 8k and 16k up to 32k rather than 16k. Try this: Index: amr.c =================================================================== --- amr.c (revision 240692) +++ amr.c (working copy) @@ -533,13 +533,19 @@ shutdown_out: * The amr(4) firmware relies on this feature. In fact, it assumes * the buffer is always a power of 2 up to a max of 64k. There is * also at least one case where it assumes a buffer less than 16k is - * greater than 16k. Force a minimum buffer size of 32k and round - * sizes between 32k and 64k up to 64k as a workaround. + * greater than 16k. However, forcing all buffers to a size of 32k + * causes stalls in the firmware. Force each command smaller than + * 64k up to the next power of two except that commands between 8k + * and 16k are rounded up to 32k instead of 16k. */ static unsigned long amr_ioctl_buffer_length(unsigned long len) { + if (len <= 4 * 1024) + return (4 * 1024); + if (len <= 8 * 1024) + return (8 * 1024); if (len <= 32 * 1024) return (32 * 1024); if (len <= 64 * 1024) -- John Baldwin
>> I would prefer to set ac->ac_length always to the >> length the user program gives us, it was done in this way until >> 1.90.2.4. The real_length should be used only for the malloc to prevent >> the panic when the controller does not respect the given length. > > Except that this can't really work. The controller wants a buffer somewhere that > it can DMA to. What addresses do you expect it to use for its larger buffer if > we don't give it an address? One option is to use contigmalloc() instead of malloc() > to allocate a contiguous buffer perhaps if the adapter just assumes that the extra > bytes are physically contiguous after the last S/G entry. Ok, thanks for your explanation. So it seems to me, DMA did work for me with my first max(au_lenght, 25600) construct because this is in practice always equal to 25600 < MAXBSIZE (=64k). > However, that still won't work for the 0 length case. In the 0 length case the > bus_dma code won't populate the S/G list at all, so the adapter will just scribble > over random memory when it dumps something out. That takes us right back to the > original bug this was trying to fix. Ok, I now have understand better the DMA stuff in amr and I am wondering how this could worked for length 0 before. > I think the correct fix is to adjust the logic that sets real_length, not > to undo the previous change (which reverting to real_length will do). > > If you don't send the updated length to the adapter you have no control > over where it writes its overflow data into memory. I do think it might > be true that if it is trying to read in 32k that might be a problem, but > I think your solution just brings back the old bug. > > I looked at the history of this PR, and I think we should just refine the > limit logic to go back to rounding up to a power of 2, but handle your > special case of a 12k buffer that gets a returned size larger than 16k > by rounding requests between 8k and 16k up to 32k rather than 16k. Try > this: > > Index: amr.c > =================================================================== > --- amr.c (revision 240692) > +++ amr.c (working copy) > @@ -533,13 +533,19 @@ shutdown_out: > * The amr(4) firmware relies on this feature. In fact, it assumes > * the buffer is always a power of 2 up to a max of 64k. There is > * also at least one case where it assumes a buffer less than 16k is > - * greater than 16k. Force a minimum buffer size of 32k and round > - * sizes between 32k and 64k up to 64k as a workaround. > + * greater than 16k. However, forcing all buffers to a size of 32k > + * causes stalls in the firmware. Force each command smaller than > + * 64k up to the next power of two except that commands between 8k > + * and 16k are rounded up to 32k instead of 16k. > */ > static unsigned long > amr_ioctl_buffer_length(unsigned long len) > { > > + if (len <= 4 * 1024) > + return (4 * 1024); > + if (len <= 8 * 1024) > + return (8 * 1024); > if (len <= 32 * 1024) > return (32 * 1024); > if (len <= 64 * 1024) > I have introduced this patch on some servers and will report the result. -- Andreas Longwitz
> > Index: amr.c > =================================================================== > --- amr.c (revision 240692) > +++ amr.c (working copy) > @@ -533,13 +533,19 @@ shutdown_out: > * The amr(4) firmware relies on this feature. In fact, it assumes > * the buffer is always a power of 2 up to a max of 64k. There is > * also at least one case where it assumes a buffer less than 16k is > - * greater than 16k. Force a minimum buffer size of 32k and round > - * sizes between 32k and 64k up to 64k as a workaround. > + * greater than 16k. However, forcing all buffers to a size of 32k > + * causes stalls in the firmware. Force each command smaller than > + * 64k up to the next power of two except that commands between 8k > + * and 16k are rounded up to 32k instead of 16k. > */ > static unsigned long > amr_ioctl_buffer_length(unsigned long len) > { > > + if (len <= 4 * 1024) > + return (4 * 1024); > + if (len <= 8 * 1024) > + return (8 * 1024); > if (len <= 32 * 1024) > return (32 * 1024); > if (len <= 64 * 1024) > I have tested this patch for 10 days on 10 different server, no error occurred. Patch works ! Andreas Longwitz
Author: jhb Date: Fri Oct 5 15:52:31 2012 New Revision: 241228 URL: http://svn.freebsd.org/changeset/base/241228 Log: Further adjust the workaround in r234501. Rounding all small requests up to 32k swamped the controller causing firmware hangs. Instead, round requests smaller than 64k up to the next power of 2 as a general rule. To handle the one known special case of a command that accepts a 12k buffer returning a 24k-ish reply, round requests between 8k and 16k up to 32k rather than 16k. The result is that commands less than 8k should now be rounded up to a smaller size (either 4k or 8k) rather than 32k. PR: kern/155658 Tested by: Andreas Longwitz MFC after: 1 week Modified: head/sys/dev/amr/amr.c Modified: head/sys/dev/amr/amr.c ============================================================================== --- head/sys/dev/amr/amr.c Fri Oct 5 15:36:30 2012 (r241227) +++ head/sys/dev/amr/amr.c Fri Oct 5 15:52:31 2012 (r241228) @@ -533,13 +533,19 @@ shutdown_out: * The amr(4) firmware relies on this feature. In fact, it assumes * the buffer is always a power of 2 up to a max of 64k. There is * also at least one case where it assumes a buffer less than 16k is - * greater than 16k. Force a minimum buffer size of 32k and round - * sizes between 32k and 64k up to 64k as a workaround. + * greater than 16k. However, forcing all buffers to a size of 32k + * causes stalls in the firmware. Force each command smaller than + * 64k up to the next power of two except that commands between 8k + * and 16k are rounded up to 32k instead of 16k. */ static unsigned long amr_ioctl_buffer_length(unsigned long len) { + if (len <= 4 * 1024) + return (4 * 1024); + if (len <= 8 * 1024) + return (8 * 1024); if (len <= 32 * 1024) return (32 * 1024); if (len <= 64 * 1024) _______________________________________________ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"