I run three high-bandwidth Tor exit relay on FreeBSD 13.1-RELEASE, in KVM VPSes at BuyVM.net and AlienData.com (and more at other non-KVM hosts), namely 4GB RAM, dedicated CPU core AMD Ryzen VPS services (sort of virtual dedicated servers). Using the vtnet Ethernet adapter, I frequently get page faults like this, especially if I consistently push ~250-300 Mbps of traffic for a few days. Full page fault attached. Fatal trap 12: page fault while in kernel mode cpuid = 0; apic id = 00 fault virtual address = 0x8 fault code = supervisor read data, page not present instruction pointer = 0x20:0xffffffff80cb8088 stack pointer = 0x0:0xfffffe00634be830 frame pointer = 0x0:0xfffffe00634be870 code segment = base rx0, limit 0xfffff, type 0x1b = DPL 0, pres 1, long 1, def32 0, gran 1 processor eflags = interrupt enabled, resume, IOPL = 0 current process = 12 (irq25: virtio_pci0) trap number = 12 panic: page fault cpuid = 0 time = 1672613133 KDB: stack backtrace: #0 0xffffffff80c694a5 at kdb_backtrace+0x65 #1 0xffffffff80c1bb5f at vpanic+0x17f #2 0xffffffff80c1b9d3 at panic+0x43 #3 0xffffffff810afdf5 at trap_fatal+0x385 #4 0xffffffff810afe4f at trap_pfault+0x4f #5 0xffffffff810875b8 at calltrap+0x8 #6 0xffffffff80cb8297 at sbdrop+0x37 #7 0xffffffff80dd8021 at tcp_do_segment+0x2df1 #8 0xffffffff80dd44f1 at tcp_input_with_port+0xb61 #9 0xffffffff80dd3929 at tcp6_input_with_port+0x69 #10 0xffffffff80dd4ceb at tcp6_input+0xb #11 0xffffffff80e16eae at ip6_input+0x95e #12 0xffffffff80d530c9 at netisr_dispatch_src+0xb9 #13 0xffffffff80d36ee8 at ether_demux+0x138 #14 0xffffffff80d38275 at ether_nh_input+0x355 #15 0xffffffff80d530c9 at netisr_dispatch_src+0xb9 #16 0xffffffff80d37319 at ether_input+0x69 #17 0xffffffff80a51fde at vtnet_rxq_eof+0x73e Uptime: 16h0m55s Dumping 660 out of 4062 MB:..3%..13%..22%..32%..42%..51%..61%..71%..83%..93% __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55 55 __asm("movq %%gs:%P1,%0" : "=r" (td) : "n" (offsetof(struct pcpu, (kgdb) #0 __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:55 #1 doadump (textdump=<optimized out>) at /usr/src/sys/kern/kern_shutdown.c:399 #2 0xffffffff80c1b75c in kern_reboot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:487 #3 0xffffffff80c1bbce in vpanic (fmt=0xffffffff811b4fb9 "%s", ap=<optimized out>) at /usr/src/sys/kern/kern_shutdown.c:920 #4 0xffffffff80c1b9d3 in panic (fmt=<unavailable>) at /usr/src/sys/kern/kern_shutdown.c:844 #5 0xffffffff810afdf5 in trap_fatal (frame=0xfffffe00634be770, eva=8) at /usr/src/sys/amd64/amd64/trap.c:944 #6 0xffffffff810afe4f in trap_pfault (frame=0xfffffe00634be770, usermode=false, signo=<optimized out>, ucode=<optimized out>) at /usr/src/sys/amd64/amd64/trap.c:763 #7 <signal handler called> #8 sbcut_internal (sb=sb@entry=0xfffff800acf4b9c0, len=2306, len@entry=2856) at /usr/src/sys/kern/uipc_sockbuf.c:1488 #9 0xffffffff80cb8297 in sbdrop (sb=sb@entry=0xfffff800acf4b9c0, len=2856) at /usr/src/sys/kern/uipc_sockbuf.c:1597 #10 0xffffffff80dd8021 in tcp_do_segment (m=<optimized out>, th=<optimized out>, so=0x1, tp=0xfffffe00a90d4950, drop_hdrlen=72, tlen=<optimized out>, iptos=0 '\000') at /usr/src/sys/netinet/tcp_input.c:1869 #11 0xffffffff80dd44f1 in tcp_input_with_port (mp=<optimized out>, mp@entry=0xfffffe00634beb38, offp=<optimized out>, offp@entry=0xfffffe00634beb30, proto=<optimized out>, port=0) at /usr/src/sys/netinet/tcp_input.c:1414 #12 0xffffffff80dd3929 in tcp6_input_with_port (mp=0xfffffe00634beb38, offp=0xfffffe00634beb30, proto=<optimized out>, port=port@entry=0) at /usr/src/sys/netinet/tcp_input.c:627 #13 0xffffffff80dd4ceb in tcp6_input (mp=0xfffff800acf4b9c0, offp=0xfffff800908a7800, proto=1665919160) at /usr/src/sys/netinet/tcp_input.c:634 #14 0xffffffff80e16eae in ip6_input (m=0x0) at /usr/src/sys/netinet6/ip6_input.c:929 #15 0xffffffff80d530c9 in netisr_dispatch_src (proto=6, source=source@entry=0, m=0xfffff8004001c100) at /usr/src/sys/net/netisr.c:1143 #16 0xffffffff80d5349f in netisr_dispatch (proto=2901719488, m=0xfffffe00634be8b8) at /usr/src/sys/net/netisr.c:1234 #17 0xffffffff80d36ee8 in ether_demux (ifp=ifp@entry=0xfffff8000360e000, m=0xfffff800908a7800) at /usr/src/sys/net/if_ethersubr.c:921 #18 0xffffffff80d38275 in ether_input_internal (ifp=0xfffff8000360e000, m=0xfffff800908a7800) at /usr/src/sys/net/if_ethersubr.c:707 #19 ether_nh_input (m=<optimized out>) at /usr/src/sys/net/if_ethersubr.c:737 #20 0xffffffff80d530c9 in netisr_dispatch_src (proto=proto@entry=5, source=source@entry=0, m=m@entry=0xfffff8004001c100) at /usr/src/sys/net/netisr.c:1143 #21 0xffffffff80d5349f in netisr_dispatch (proto=2901719488, proto@entry=5, m=0xfffffe00634be8b8, m@entry=0xfffff8004001c100) at /usr/src/sys/net/netisr.c:1234 #22 0xffffffff80d37319 in ether_input (ifp=<optimized out>, m=0xfffff8004001c100) at /usr/src/sys/net/if_ethersubr.c:828 #23 0xffffffff80a51fde in vtnet_rxq_input (rxq=0xfffff800037e1480, m=0xfffff8004001c100, hdr=<optimized out>) at /usr/src/sys/dev/virtio/network/if_vtnet.c:2043 #24 vtnet_rxq_eof (rxq=<optimized out>, rxq@entry=0xfffff800037e1480) at /usr/src/sys/dev/virtio/network/if_vtnet.c:2147 #25 0xffffffff80a51797 in vtnet_rx_vq_process (rxq=0xfffff800037e1480, tries=<optimized out>) at /usr/src/sys/dev/virtio/network/if_vtnet.c:2211 #26 0xffffffff80bdbcfa in intr_event_execute_handlers (ie=0xfffff800037c3e00, p=<optimized out>) at /usr/src/sys/kern/kern_intr.c:1168 #27 ithread_execute_handlers (ie=<optimized out>, p=<optimized out>) at /usr/src/sys/kern/kern_intr.c:1181 #28 ithread_loop (arg=arg@entry=0xfffff80003625840) at /usr/src/sys/kern/kern_intr.c:1269 #29 0xffffffff80bd8a9e in fork_exit ( callout=0xffffffff80bdbaa0 <ithread_loop>, arg=0xfffff80003625840, frame=0xfffffe00634bef40) at /usr/src/sys/kern/kern_fork.c:1093 #30 <signal handler called> #31 mi_startup () at /usr/src/sys/kern/init_main.c:322 Backtrace stopped: Cannot access memory at address 0x8 (kgdb) /etc/sysctl.conf is: kern.ipc.soacceptqueue=2048 net.inet.ip.portrange.first=1024 net.inet.tcp.cc.algorithm=htcp net.inet.tcp.cc.htcp.adaptive_backoff=1 net.inet.tcp.cc.htcp.rtt_scaling=1 net.inet.tcp.recvbuf_max=4194304 net.inet.tcp.recvspace=65536 net.inet.tcp.sendbuf_inc=65536 net.inet.tcp.sendbuf_max=4194304 net.inet.tcp.sendspace=65536 net.inet.tcp.abc_l_var=44 net.inet.tcp.initcwnd_segments=44 net.inet.tcp.rfc6675_pipe=1 /boot/loader.conf is: cc_htcp_load="YES" Side notes: To sysctl.conf, if I add "net.inet.tcp.tso=0", I can get about a day or two of uptime (from ~6-14 hours), but eventually page fault, as has happened on my BuyVM VPS. Right now, I'm trying "-rxcsum -txcsum -tso -lro -txcsum6 -vlanhwtag -vlanhwtso" in the "ifconfig_vtnet0" line/config, to see if that improves anything. Core dump attached
It seems core dump wasn't attached (too big), so it's on GitHub Gist: https://gist.github.com/neelchauhan/e294e17aa73ea7ac7b08d70120c33395
Hi Neel, I'm doing some Tor tuning on my end and was curious if you are still seeing these issues or have gained any insight here.
I switched my Tor relays to openSUSE Linux, so unfortunately I can't comment on this.
I'm encountering this on powerpc64le as well, on -CURRENT. Kernel page fault with the following non-sleepable locks held: exclusive sleep mutex vtnet0-rx0 (vtnet0-rx0) r = 0 (0x8b0d180) locked @ /usr/src/sys/dev/virtio/network/if_vtnet.c:2189 stack backtrace: #0 0xc00000000091a148 at witness_debugger+0x98 #1 0xc00000000091b920 at witness_warn+0x4b0 #2 0xc000000000e4fd1c at trap_pfault+0x26c #3 0xc000000000e4f26c at trap+0x12c #4 0xc000000000e424ac at powerpc_interrupt+0x1cc fatal kernel trap: exception = 0x300 (data storage interrupt) virtual address = 0xdeadc0dedeadc0e8 dsisr = 0x40000000 srr0 = 0xc0000000006be608 (0x6be608) srr1 = 0x8000000000009033 current msr = 0x8000000000009033 lr = 0xc0000000006be5a0 (0x6be5a0) frame = 0xc00800006f53a240 curthread = 0xc00800006e4f7b40 pid = 12, comm = irq4611: ++ panic: data storage interrupt trap cpuid = 5 time = 1696974442 KDB: stack backtrace: 0xc00800006f539d30: at kdb_backtrace+0x60 0xc00800006f539e40: at vpanic+0x1b8 0xc00800006f539ef0: at panic+0x44 0xc00800006f539f20: at trap_fatal+0xc4 0xc00800006f539fa0: at trap_pfault+0x280 0xc00800006f53a050: at trap+0x12c 0xc00800006f53a180: at powerpc_interrupt+0x1cc 0xc00800006f53a210: kernel DSI read trap @ 0xdeadc0dedeadc0e8 by vtnet_rxq_eof+0x188: srr1=0x8000000000009033 r1=0xc00800006f53a4c0 cr=0x42400c00 xer=0 ctr=0xc00000000084cfa4 r2=0xc000000001735000 sr=0x40000000 frame=0xc00800006f53a240 0xc00800006f53a4c0: at vtnet_rxq_eof+0x11c 0xc00800006f53a600: at vtnet_rx_vq_process+0xf4 0xc00800006f53a660: at virtqueue_intr+0x2c 0xc00800006f53a690: at vtpci_intx_intr+0x11c 0xc00800006f53a6d0: at ithread_loop+0x3d8 0xc00800006f53a820: at fork_exit+0xc4 0xc00800006f53a8c0: at fork_trampoline+0x18 0xc00800006f53a8f0: at -0x4 KDB: enter: panic [ thread pid 12 tid 100550 ] Stopped at kdb_enter+0x70: ori r0, r0, 0x0 db>
(In reply to Neel Chauhan from comment #0) FWIW, this also happens on low-bandwidth Tor relays, but probably less frequently.
The trace from Shawn looks very different to the one from Neel. These are two different bugs very likely. Given that Neel can't reproduce this anymore due to switching panicing service to Linux, and bug is reported for 13.1 (we have 13.2 and 14 release already), and there is no core file to analyze, I'm planning to close this bug unless there are any objections.
Close as there is no way to reproduce the problem and it was reported for legacy stable branch. Neel, feel free to reopen, if you got any new info.