Bug 222987

Summary: ARM64 panic in iflib.c:2944
Product: Base System Reporter: Sean Bruno <sbruno>
Component: kernAssignee: freebsd-bugs (Nobody) <bugs>
Status: Closed FIXED    
Severity: Affects Some People CC: Andrew, emaste, shurd
Priority: ---    
Version: CURRENT   
Hardware: arm64   
OS: Any   

Description Sean Bruno freebsd_committer freebsd_triage 2017-10-13 18:02:04 UTC
timeout stopping cpus
panic: Assertion ifsd_m[next] == NULL failed at /usr/src/sys/net/iflib.c:2944
cpuid = 32
time = 1507910229
KDB: stack backtrace:
db_trace_self() at db_trace_self_wrapper+0x28
         pc = 0xffff0000005e72e8  lr = 0xffff000000086ad4
         sp = 0xffff000b97775ca0  fp = 0xffff000b97775eb0

db_trace_self_wrapper() at vpanic+0x184
         pc = 0xffff000000086ad4  lr = 0xffff00000031161c
         sp = 0xffff000b97775ec0  fp = 0xffff000b97775f40

vpanic() at kassert_panic+0x158
         pc = 0xffff00000031161c  lr = 0xffff000000311494
         sp = 0xffff000b97775f50  fp = 0xffff000b97776010

kassert_panic() at iflib_txq_drain+0xbb4
         pc = 0xffff000000311494  lr = 0xffff00000040f624
         sp = 0xffff000b97776020  fp = 0xffff000b97776190

iflib_txq_drain() at drain_ring_lockless+0xa4
         pc = 0xffff00000040f624  lr = 0xffff00000041260c
         sp = 0xffff000b977761a0  fp = 0xffff000b977761f0

drain_ring_lockless() at ifmp_ring_enqueue+0x218
         pc = 0xffff00000041260c  lr = 0xffff000000412390
         sp = 0xffff000b97776200  fp = 0xffff000b97776250

ifmp_ring_enqueue() at iflib_if_transmit+0xb0
         pc = 0xffff000000412390  lr = 0xffff0000004114e0
         sp = 0xffff000b97776260  fp = 0xffff000b97776290

iflib_if_transmit() at ether_output+0x6a4
         pc = 0xffff0000004114e0  lr = 0xffff0000003fcbd4
         sp = 0xffff000b977762a0  fp = 0xffff000b97776330

ether_output() at ip6_output+0x190c
         pc = 0xffff0000003fcbd4  lr = 0xffff0000004d3470
         sp = 0xffff000b97776340  fp = 0xffff000b977765b0

ip6_output() at tcp_output+0x1690
         pc = 0xffff0000004d3470  lr = 0xffff0000004a3c74
         sp = 0xffff000b977765c0  fp = 0xffff000b97776770

tcp_output() at tcp_usr_send+0x2bc
         pc = 0xffff0000004a3c74  lr = 0xffff0000004b1a8c
         sp = 0xffff000b97776780  fp = 0xffff000b977767e0

tcp_usr_send() at sosend_generic+0x3c0
         pc = 0xffff0000004b1a8c  lr = 0xffff00000039cf4c
         sp = 0xffff000b977767f0  fp = 0xffff000b97776890

sosend_generic() at soo_write+0x40
         pc = 0xffff00000039cf4c  lr = 0xffff00000037b9e4
         sp = 0xffff000b977768a0  fp = 0xffff000b977768b0

soo_write() at dofilewrite+0xb4
         pc = 0xffff00000037b9e4  lr = 0xffff0000003742d0
         sp = 0xffff000b977768c0  fp = 0xffff000b97776900

dofilewrite() at kern_writev+0x6c
         pc = 0xffff0000003742d0  lr = 0xffff000000373f14
         sp = 0xffff000b97776910  fp = 0xffff000b97776950

kern_writev() at sys_write+0x84
         pc = 0xffff000000373f14  lr = 0xffff000000373e98
         sp = 0xffff000b97776960  fp = 0xffff000b977769a0

sys_write() at do_el0_sync+0x890
         pc = 0xffff000000373e98  lr = 0xffff00000060172c
         sp = 0xffff000b977769b0  fp = 0xffff000b97776a70

do_el0_sync() at handle_el0_sync+0x74
         pc = 0xffff00000060172c  lr = 0xffff0000005e91f4
         sp = 0xffff000b97776a80  fp = 0xffff000b97776b90

handle_el0_sync() at 0x4291c0
         pc = 0xffff0000005e91f4  lr = 0x00000000004291c0
         sp = 0xffff000b97776ba0  fp = 0x0000ffffffff91b0
Comment 1 Sean Bruno freebsd_committer freebsd_triage 2017-10-13 18:03:04 UTC
I think I need to run a specific patch to get a valid crash dump here on thunderx1.  I wanted to start a ticket on this so that it doesn't get lost.
Comment 2 Antoine Brodin freebsd_committer freebsd_triage 2017-12-18 19:27:41 UTC
Another panic from yesterday:

KDB: reentering
KDB: stack backtrace:
db_trace_self() at db_trace_self_wrapper+40
         pc = 0xffff0000005e72e8  lr = 0xffff000000086ad4
         sp = 0xffff000b96c21810  fp = 0xffff000b96c21a20

db_trace_self_wrapper() at kdb_reenter+56
         pc = 0xffff000000086ad4  lr = 0xffff000000350a2c
         sp = 0xffff000b96c21a30  fp = 0xffff000b96c21a40

kdb_reenter() at db_term+260
         pc = 0xffff000000350a2c  lr = 0xffff000000085764
         sp = 0xffff000b96c21a50  fp = 0xffff000b96c21a60

db_term() at db_mult_expr+40
         pc = 0xffff000000085764  lr = 0xffff0000000853dc
         sp = 0xffff000b96c21a70  fp = 0xffff000b96c21ad0

db_mult_expr() at db_add_expr+40
         pc = 0xffff0000000853dc  lr = 0xffff0000000852d0
         sp = 0xffff000b96c21ae0  fp = 0xffff000b96c21b40
        
db_add_expr() at db_shift_expr+36
         pc = 0xffff0000000852d0  lr = 0xffff0000000851cc
         sp = 0xffff000b96c21b50  fp = 0xffff000b96c21ba0

db_shift_expr() at db_logical_relation_expr+40
         pc = 0xffff0000000851cc  lr = 0xffff000000085058
         sp = 0xffff000b96c21bb0  fp = 0xffff000b96c21c10

db_logical_relation_expr() at db_logical_and_expr+28
         pc = 0xffff000000085058  lr = 0xffff000000084f98
         sp = 0xffff000b96c21c20  fp = 0xffff000b96c21c50

db_logical_and_expr() at db_expression+28
         pc = 0xffff000000084f98  lr = 0xffff000000084eec
         sp = 0xffff000b96c21c60  fp = 0xffff000b96c21c90

db_expression() at db_stack_trace+44
         pc = 0xffff000000084eec  lr = 0xffff000000084044
         sp = 0xffff000b96c21ca0  fp = 0xffff000b96c21cd0
        
db_stack_trace() at db_command+548
         pc = 0xffff000000084044  lr = 0xffff000000083d90
         sp = 0xffff000b96c21ce0  fp = 0xffff000b96c21dc0

db_command() at db_command_loop+96
         pc = 0xffff000000083d90  lr = 0xffff000000083b50
         sp = 0xffff000b96c21dd0  fp = 0xffff000b96c21df0

db_command_loop() at db_trap+244
         pc = 0xffff000000083b50  lr = 0xffff000000086c3c
         sp = 0xffff000b96c21e00  fp = 0xffff000b96c22020

db_trap() at kdb_trap+400
         pc = 0xffff000000086c3c  lr = 0xffff000000350e18
         sp = 0xffff000b96c22030  fp = 0xffff000b96c22090

kdb_trap() at do_el1h_sync+144
         pc = 0xffff000000350e18  lr = 0xffff000000600adc
         sp = 0xffff000b96c220a0  fp = 0xffff000b96c220d0
        
do_el1h_sync() at handle_el1h_sync+116
         pc = 0xffff000000600adc  lr = 0xffff0000005e9074
         sp = 0xffff000b96c220e0  fp = 0xffff000b96c221f0

handle_el1h_sync() at kdb_enter+52
         pc = 0xffff0000005e9074  lr = 0xffff000000350520
         sp = 0xffff000b96c22200  fp = 0xffff000b96c22290

kdb_enter() at vpanic+416
         pc = 0xffff000000350520  lr = 0xffff000000311638
         sp = 0xffff000b96c222a0  fp = 0xffff000b96c22320

vpanic() at panic+68
         pc = 0xffff000000311638  lr = 0xffff0000003116a4
         sp = 0xffff000b96c22330  fp = 0xffff000b96c223b0

panic() at data_abort+592
         pc = 0xffff0000003116a4  lr = 0xffff000000600e98
         sp = 0xffff000b96c223c0  fp = 0xffff000b96c22470
        
data_abort() at do_el1h_sync+248
         pc = 0xffff000000600e98  lr = 0xffff000000600b44
         sp = 0xffff000b96c22480  fp = 0xffff000b96c224b0

do_el1h_sync() at handle_el1h_sync+116
         pc = 0xffff000000600b44  lr = 0xffff0000005e9074
         sp = 0xffff000b96c224c0  fp = 0xffff000b96c225d0

handle_el1h_sync() at _iflib_fl_refill+880
         pc = 0xffff0000005e9074  lr = 0xffff000000410844
         sp = 0xffff000b96c225e0  fp = 0xffff000b96c226d0

_iflib_fl_refill() at _iflib_fl_refill+880
         pc = 0xffff000000410844  lr = 0xffff000000410844
         sp = 0xffff000b96c226e0  fp = 0xffff000b96c227a0

_iflib_fl_refill() at iflib_init_locked+992
         pc = 0xffff000000410844  lr = 0xffff00000040c5d4
         sp = 0xffff000b96c227b0  fp = 0xffff000b96c22810
        
iflib_init_locked() at _task_fn_admin+460
         pc = 0xffff00000040c5d4  lr = 0xffff00000040b3f8
         sp = 0xffff000b96c22820  fp = 0xffff000b96c22860

_task_fn_admin() at gtaskqueue_run_locked+264
         pc = 0xffff00000040b3f8  lr = 0xffff00000034f14c
         sp = 0xffff000b96c22870  fp = 0xffff000b96c228e0

gtaskqueue_run_locked() at gtaskqueue_thread_loop+156
         pc = 0xffff00000034f14c  lr = 0xffff00000034eee4
         sp = 0xffff000b96c228f0  fp = 0xffff000b96c22910

gtaskqueue_thread_loop() at fork_exit+124
         pc = 0xffff00000034eee4  lr = 0xffff0000002d661c
         sp = 0xffff000b96c22920  fp = 0xffff000b96c22950

fork_exit() at fork_trampoline+16
         pc = 0xffff0000002d661c  lr = 0xffff0000006008bc
         sp = 0xffff000b96c22960  fp = 0x0000000000000000
Comment 3 Sean Bruno freebsd_committer freebsd_triage 2018-01-28 14:58:49 UTC
Panic this morning:

Tracing pid 0 tid 100236 td 0xfffffd000b986540
db_trace_self() at db_stack_trace+0xec
         pc = 0xffff0000005fb138  lr = 0xffff0000000898a8
         sp = 0xffff00013230c1a0  fp = 0xffff00013230c1d0

db_stack_trace() at db_command+0x224
         pc = 0xffff0000000898a8  lr = 0xffff000000089534
         sp = 0xffff00013230c1e0  fp = 0xffff00013230c2c0

db_command() at db_command_loop+0x60
         pc = 0xffff000000089534  lr = 0xffff0000000892f4
         sp = 0xffff00013230c2d0  fp = 0xffff00013230c2f0

db_command_loop() at db_trap+0xf4
         pc = 0xffff0000000892f4  lr = 0xffff00000008c3c8
         sp = 0xffff00013230c300  fp = 0xffff00013230c520

db_trap() at kdb_trap+0x190
         pc = 0xffff00000008c3c8  lr = 0xffff00000035ecb0
         sp = 0xffff00013230c530  fp = 0xffff00013230c590
        
kdb_trap() at do_el1h_sync+0xf0
         pc = 0xffff00000035ecb0  lr = 0xffff000000614af4
         sp = 0xffff00013230c5a0  fp = 0xffff00013230c5d0

do_el1h_sync() at handle_el1h_sync+0x74
         pc = 0xffff000000614af4  lr = 0xffff0000005fd074
         sp = 0xffff00013230c5e0  fp = 0xffff00013230c6f0

handle_el1h_sync() at kdb_enter+0x34
         pc = 0xffff0000005fd074  lr = 0xffff00000035e38c
         sp = 0xffff00013230c700  fp = 0xffff00013230c790

kdb_enter() at vpanic+0x1a0
         pc = 0xffff00000035e38c  lr = 0xffff00000031dee4
         sp = 0xffff00013230c7a0  fp = 0xffff00013230c820

vpanic() at panic+0x44
         pc = 0xffff00000031dee4  lr = 0xffff00000031df78
         sp = 0xffff00013230c830  fp = 0xffff00013230c8b0
        
panic() at deadlkres+0x2d8
         pc = 0xffff00000031df78  lr = 0xffff0000002be944
         sp = 0xffff00013230c8c0  fp = 0xffff00013230c910

deadlkres() at fork_exit+0x7c
         pc = 0xffff0000002be944  lr = 0xffff0000002e1800
         sp = 0xffff00013230c920  fp = 0xffff00013230c950

fork_exit() at fork_trampoline+0x10
         pc = 0xffff0000002e1800  lr = 0xffff000000614874
         sp = 0xffff00013230c960  fp = 0x0000000000000000
Comment 4 Sean Bruno freebsd_committer freebsd_triage 2018-01-28 14:59:41 UTC
(In reply to Sean Bruno from comment #3)
ACtually, looking at scrollback, its actually this:

Jan 28 08:31:55 thunderx1 smartd[53576]: In the system's table of devices NO devices found to scan
panic: deadlkres: possible deadlock detected for 0xfffffd0d0af76a80, blocked for 901018 ticks

cpuid = 35
time = 1517129764
KDB: stack backtrace:
db_trace_self() at db_trace_self_wrapper+0x28
         pc = 0xffff0000005fb138  lr = 0xffff00000008c260
         sp = 0xffff00013230c580  fp = 0xffff00013230c790

db_trace_self_wrapper() at vpanic+0x184
         pc = 0xffff00000008c260  lr = 0xffff00000031dec8
         sp = 0xffff00013230c7a0  fp = 0xffff00013230c820

vpanic() at panic+0x44
         pc = 0xffff00000031dec8  lr = 0xffff00000031df78
         sp = 0xffff00013230c830  fp = 0xffff00013230c8b0

panic() at deadlkres+0x2d8
         pc = 0xffff00000031df78  lr = 0xffff0000002be944
         sp = 0xffff00013230c8c0  fp = 0xffff00013230c910

deadlkres() at fork_exit+0x7c
         pc = 0xffff0000002be944  lr = 0xffff0000002e1800
         sp = 0xffff00013230c920  fp = 0xffff00013230c950

fork_exit() at fork_trampoline+0x10
         pc = 0xffff0000002e1800  lr = 0xffff000000614874
         sp = 0xffff00013230c960  fp = 0x0000000000000000

KDB: enter: panic
[ thread pid 0 tid 100236 ]
Stopped at      0
Comment 5 Sean Bruno freebsd_committer freebsd_triage 2018-06-08 14:37:22 UTC
This seems to have been caused by a failing igb(4) nic that died in thunderx1.  This device has been "recycled" correctly.