Backtrace: * thread #1, name = 'lldb', stop reason = signal SIGABRT * frame #0: 0x00000008055ea1fa libc.so.7`__sys_thr_kill at thr_kill.S:4 frame #1: 0x000000080555fc64 libc.so.7`__raise(s=6) at raise.c:52:10 frame #2: 0x0000000805613ad6 libc.so.7`abort at abort.c:79:8 frame #3: 0x0000000805488d69 libcxxrt.so.1`report_failure(err=<unavailable>, thrown_exception=0x000000080628f368) at exception.cc:719:5 frame #4: 0x000000080545f4ac libc++.so.1`std::__1::__throw_system_error(ev=11, what_arg="") at system_error.cpp:287:5 frame #5: 0x000000080545361d libc++.so.1`std::__1::mutex::lock(this=<unavailable>) at mutex.cpp:35:9 frame #6: 0x0000000002137a29 lldb`::PrintAsync() [inlined] lock_guard at __mutex_base:91:27 frame #7: 0x0000000002137a21 lldb`::PrintAsync() at Editline.cpp:1465 frame #8: 0x00000000020cee36 lldb`::PrintAsync() at IOHandler.cpp:127:14 frame #9: 0x00000000020f5e66 lldb`::~StreamAsynchronousIO() [inlined] Flush at StreamAsynchronousIO.cpp:28:16 frame #10: 0x00000000020f5e2e lldb`::~StreamAsynchronousIO() at StreamAsynchronousIO.cpp:23 frame #11: 0x00000000024a0e8c lldb`::PrintWarning() [inlined] __release_shared at memory:3341:9 frame #12: 0x00000000024a0e78 lldb`::PrintWarning() [inlined] __release_shared at memory:3383 frame #13: 0x00000000024a0e78 lldb`::PrintWarning() [inlined] ~shared_ptr at memory:4098 frame #14: 0x00000000024a0e68 lldb`::PrintWarning() at Process.cpp:5837 frame #15: 0x00000000024a1110 lldb`::PrintWarningUnsupportedLanguage() at Process.cpp:5864:5 frame #16: 0x00000000024d8f3f lldb`::FrameSelectedCallback() at Thread.cpp:335:19 frame #17: 0x00000000024d8cbb lldb`::GetSelectedFrame() at Thread.cpp:274:3 frame #18: 0x000000000247bad0 lldb`::SetTargetPtr() at ExecutionContext.cpp:527:56 frame #19: 0x0000000002160125 lldb`::HandleCompletion() [inlined] UpdateExecutionContext at CommandInterpreter.cpp:2685:19 frame #20: 0x00000000021600fe lldb`::HandleCompletion() at CommandInterpreter.cpp:1854 frame #21: 0x0000000002135a81 lldb`::TabCommand() at Editline.cpp:988:3 frame #22: 0x0000000002138b6c lldb`::__invoke() [inlined] operator() at Editline.cpp:1144:45 frame #23: 0x0000000002138b57 lldb`::__invoke() at Editline.cpp:1142 frame #24: 0x0000000805261741 libedit.so.8`el_wgets(el=0x00000008062e6600, nread=0x00007fffffffe0b4) at read.c:537:12 frame #25: 0x00000000021375c2 lldb`::GetLine() at Editline.cpp:1407:16 frame #26: 0x00000000020cfa56 lldb`::GetLine() at IOHandler.cpp:323:29 frame #27: 0x00000000020d08ae lldb`::Run() at IOHandler.cpp:554:11 frame #28: 0x00000000020b7f5f lldb`::RunIOHandlers() at Debugger.cpp:857:16 frame #29: 0x0000000002163869 lldb`::RunCommandInterpreter() at CommandInterpreter.cpp:2984:16 frame #30: 0x0000000001e2b21c lldb`::RunCommandInterpreter() at SBDebugger.cpp:1172:42 frame #31: 0x0000000001def2ef lldb`::MainLoop() at Driver.cpp:672:18 frame #32: 0x0000000001df0aa3 lldb`main at Driver.cpp:910:26 frame #33: 0x0000000001decaff lldb`_start(ap=<unavailable>, cleanup=<unavailable>) at crt1_c.c:75:7 frame #1: 0x000000080555fc64 libc.so.7`__raise(s=6) at raise.c:52:10 49 50 if (__sys_thr_self(&id) == -1) 51 return (-1); -> 52 return (__sys_thr_kill(id, s)); 53 } (lldb) up frame #2: 0x0000000805613ad6 libc.so.7`abort at abort.c:79:8 76 (void)__libc_sigaction(SIGABRT, &act, NULL); 77 sigdelset(&act.sa_mask, SIGABRT); 78 (void)__libc_sigprocmask(SIG_SETMASK, &act.sa_mask, NULL); -> 79 (void)raise(SIGABRT); 80 exit(1); 81 } (lldb) up frame #3: 0x0000000805488d69 libcxxrt.so.1`report_failure(err=<unavailable>, thrown_exception=0x000000080628f368) at exception.cc:719:5 716 #endif 717 case _URC_END_OF_STACK: 718 __cxa_begin_catch (&(thrown_exception->unwindHeader)); -> 719 std::terminate(); 720 fprintf(stderr, "Terminating due to uncaught exception %p", 721 static_cast<void*>(thrown_exception)); 722 thrown_exception = realExceptionFromException(thrown_exception); (lldb) up frame #4: 0x000000080545f4ac libc++.so.1`std::__1::__throw_system_error(ev=11, what_arg="") at system_error.cpp:287:5 284 __throw_system_error(int ev, const char* what_arg) 285 { 286 #ifndef _LIBCPP_NO_EXCEPTIONS -> 287 throw system_error(error_code(ev, system_category()), what_arg); 288 #else 289 (void)ev; 290 (void)what_arg; (lldb) up frame #5: 0x000000080545361d libc++.so.1`std::__1::mutex::lock(this=<unavailable>) at mutex.cpp:35:9 32 { 33 int ec = __libcpp_mutex_lock(&__m_); 34 if (ec) -> 35 __throw_system_error(ec, "mutex lock failed"); 36 } 37 38 bool (lldb) up frame #6: 0x0000000002137a29 lldb`::PrintAsync() [inlined] lock_guard at __mutex_base:91:27 88 89 _LIBCPP_NODISCARD_EXT _LIBCPP_INLINE_VISIBILITY 90 explicit lock_guard(mutex_type& __m) _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability(__m)) -> 91 : __m_(__m) {__m_.lock();} 92 93 _LIBCPP_NODISCARD_EXT _LIBCPP_INLINE_VISIBILITY 94 lock_guard(mutex_type& __m, adopt_lock_t) _LIBCPP_THREAD_SAFETY_ANNOTATION(requires_capability(__m))
Any idea how to reproduce this? It appears that mutex::lock() just calls pthread_mutex_lock() and it fails somehow: 00000000000b5380 <std::__1::mutex::lock()@@Base>: b5380: 55 push %rbp b5381: 48 89 e5 mov %rsp,%rbp b5384: e8 67 e7 00 00 callq c3af0 <pthread_mutex_lock@plt> b5389: 85 c0 test %eax,%eax b538b: 75 02 jne b538f <std::__1::mutex::lock()@@Base+0xf> b538d: 5d pop %rbp b538e: c3 retq b538f: 48 8d 35 e8 a0 f8 ff lea -0x75f18(%rip),%rsi # 3f47e <typeinfo name for std::bad_any_cast@@Base-0x8df> b5396: 89 c7 mov %eax,%edi b5398: e8 d3 d2 00 00 callq c2670 <std::__1::__throw_system_error(int, char const*)@plt> So in frame 5, can you print the value of 'ec' ? pthread_mutex_lock(3) says: ERRORS The pthread_mutex_lock() function will fail if: [EINVAL] The value specified by mutex is invalid. [EDEADLK] A deadlock would occur if the thread blocked waiting for mutex. [EOWNERDEAD] The argument mutex points to a robust mutex and the process containing the previous owning thread terminated while holding the mutex lock. The lock was granted to the caller and it is up to the new owner to make the state consistent. [ENOTRECOVERABLE] The state protected by the mutex is not recoverable. I wonder what the actual error is...
Was this reproducible or just occurred once? If you have a moment can you try upstream lldb built from source (https://lldb.llvm.org/resources/build.html)? A lot of work is going into lldb FreeBSD support upstream so we're well positioned to work on them if they still happen there.
It would take about a day for me to catch the offending program (more context at https://gitlab.isc.org/isc-projects/bind9/-/issues/2465 if you are interested) to enter the state that I need to debug, so it would take some time for me to try to reproduce the lldb issue. What I did was fairly basic, basically I have a runaway BIND instance, which after receiving a SIGTERM won't exit, and I was attaching to the process and trying to get a backtrace for all threads. Or in other words, I did: sudo lldb --attach-pid `pgrep named` `which named` and were going to do a "thread backtrace all", while doing that I might have pressed <tab> and lldb dumped core. === Should I build a lldb version with -g and -O0 just in case, by the way?
(In reply to Xin LI from comment #3) > Should I build a lldb version with -g and -O0 just in case, by the way? I think cmake setting `-DCMAKE_BUILD_TYPE=Debug` should do the right thing.
Looking at the stack trace, it seems the line editor was processing a tab keypress (while being in libedit via el_wgets), and then the autocompletion stuff got some error, which it tried to print via PrintAsync. That uses a non-recursive mutex 'm_output_mutex' in the EditLine class, which is also locked just before calling into el_wgets(). So I think it's getting a EDEADLK, as described in lib/libthr/thread/thr_mutex.c: 885 static int 886 mutex_self_lock(struct pthread_mutex *m, const struct timespec *abstime) 887 { 888 struct timespec ts1, ts2; 889 int ret; 890 891 switch (PMUTEX_TYPE(m->m_flags)) { 892 case PTHREAD_MUTEX_ERRORCHECK: 893 case PTHREAD_MUTEX_ADAPTIVE_NP:. 894 if (abstime) { ... 904 } else { 905 /* 906 * POSIX specifies that mutexes should return 907 * EDEADLK if a recursive lock is detected. 908 */ 909 ret = EDEADLK; 910 } I think this is an upstream bug. But it would be handy to have an easier reproduction scenario.
(In reply to Dimitry Andric from comment #1) The error was 11 (EDEADLK).
(In reply to Ed Maste from comment #2) Yes, this is reliably reproducible; the current known reproduction way would be to attach on named and type thread<tab>. I'll see if I can find a smaller test case, as I need about a day or so to have named to enter the state.
I was able to reliably reproduce the issue with the following test program: <--<--<--<--<--<--<--<--<--<--<--<--<--<-- #include <sys/cdefs.h> #include <string.h> #include <pthread.h> #include <unistd.h> #define WORKERS 9 void * worker(void *p) { for (;;) sleep(10); } int main(int argc, char **argv) { int i; pthread_t thr[WORKERS]; memset(&thr, 0, sizeof(thr)); for (i=0; i<WORKERS; i++) { pthread_create(&thr[i], NULL, worker, NULL); } for (i=0; i<WORKERS; i++) { pthread_join(thr[i], NULL); } /* NOTREACHED */ return (0); } -->-->-->-->-->-->-->-->-->-->-->-->-->--> Run the test program, then lldb attach to it, and type "thread<tab>" would crash lldb with the same crash.