The following program segfaults on amd64: #include <limits.h> #include <pthread.h> static void * fn(void *arg __unused) { return (NULL); } int main(void) { pthread_t t; pthread_attr_t attr; (void)pthread_attr_init(&attr); (void)pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN); (void)pthread_create(&t, &attr, fn, NULL); (void)pthread_join(t, NULL); return (0); } We end up crashing when the thread exits: (gdb) bt #0 symlook_obj1_gnu (req=0x7fffdfffd070, obj=0x80022a000) at /home/mark/src/freebsd-dev/libexec/rtld-elf/rtld.c:4463 #1 symlook_obj (req=0x7fffdfffd070, obj=0x80022a000) at /home/mark/src/freebsd-dev/libexec/rtld-elf/rtld.c:4272 #2 0x0000000800214357 in symlook_list (req=<optimized out>, dlp=<optimized out>, objlist=<optimized out>) at /home/mark/src/freebsd-dev/libexec/rtld-elf/rtld.c:4197 #3 symlook_global (req=0x7fffdfffd6d0, donelist=0x7fffdfffd1c0) at /home/mark/src/freebsd-dev/libexec/rtld-elf/rtld.c:4091 #4 0x000000080020dd3c in symlook_default (req=0x7fffdfffd6d0, refobj=0x80022a800) at /home/mark/src/freebsd-dev/libexec/rtld-elf/rtld.c:4148 #5 0x000000080020ea73 in do_dlsym (handle=0xfffffffffffffffe, name=0x80025049b "_Unwind_ForcedUnwind", retaddr=0x800263dbe <_pthread_exit_mask+158>, ve=0x0, flags=<error reading variable: Cannot access memory at address 0x2>) at /home/mark/src/freebsd-dev/libexec/rtld-elf/rtld.c:3503 #6 0x0000000800263dbe in thread_uw_init () at /home/mark/src/freebsd-dev/lib/libthr/thread/thr_exit.c:82 #7 _pthread_exit_mask (status=0x0, mask=0x0) at /home/mark/src/freebsd-dev/lib/libthr/thread/thr_exit.c:243 #8 0x0000000800263d1b in _pthread_exit (status=0x7fffdfffd070) at /home/mark/src/freebsd-dev/lib/libthr/thread/thr_exit.c:208 #9 0x000000080025675e in thread_start (curthread=0x80100a500) at /home/mark/src/freebsd-dev/lib/libthr/thread/thr_create.c:291 #10 0x0000000000000000 in ?? ()
PTHREAD_STACK_MIN is 2048 on amd64. I'm not sure it makes much sense for it to be less than the page size.
Still 2048 on all x86, probably still disfunctional.
(In reply to Conrad Meyer from comment #2) Yes, this problem still exists on both i386 and amd64.
I'll have a go at making a patch shortly.
Seems to be defined here paulf> grep -rI __MINSIGSTKSZ * | grep -v obj include/pthread.h:#define PTHREAD_STACK_MIN __MINSIGSTKSZ sys/arm64/include/_limits.h:#define __MINSIGSTKSZ (1024 * 4) sys/riscv/include/_limits.h:#define __MINSIGSTKSZ (1024 * 4) sys/sys/signal.h:#include <machine/_limits.h> /* __MINSIGSTKSZ */ sys/sys/signal.h:#define MINSIGSTKSZ __MINSIGSTKSZ /* minimum stack size */ sys/powerpc/include/_limits.h:#define __MINSIGSTKSZ (512 * 4) sys/x86/include/_limits.h:#define __MINSIGSTKSZ (512 * 4) sys/arm/include/_limits.h:#define __MINSIGSTKSZ (1024 * 4) sys/mips/include/_limits.h:#define __MINSIGSTKSZ (512 * 4) Used in quite a lot of places, including sysconf() So, what to do? __MINSIGSTKSZ is used for both signal stacks and pthread stacks. I guess changing sys/x86/include/_limits.h:#define __MINSIGSTKSZ (512 * 4) to sys/x86/include/_limits.h:#define __MINSIGSTKSZ (1024 * 4) would do the job on i386 and amd64
I modified the testcase to take the stack size as an argument and print "success" if the thread gets created and joins. #include <limits.h> #include <pthread.h> #include <stdio.h> #include <stdlib.h> static void * fn(void *arg __unused) { return (NULL); } int main(int argc, char** argv) { pthread_t t; pthread_attr_t attr; size_t size = (size_t)atoi(argv[1]); (void)pthread_attr_init(&attr); /*(void)pthread_attr_setstacksize(&attr, PTHREAD_STACK_MIN);*/ (void)pthread_attr_setstacksize(&attr, size); (void)pthread_create(&t, &attr, fn, NULL); (void)pthread_join(t, NULL); printf("success\n"); return (0); } then I wrote a little python script to do a binary search to find the minimum #!/usr/local/bin/python3.8 import subprocess def binary_search(start, end): while (start <= end): mid = (start + end ) // 2 status = subprocess.run(['./pthread_stack', str(mid)], capture_output=True) if (status.stdout.decode().strip("\n") == "success"): end = mid - 1 else: start = mid + 1 return mid res = binary_search(1, 10000) print("res " + str(res)) For an i386 binary compiled with clang -g -o pthread_stack pthread_stack.c -pthread -m32 I get res 2788 and for an amd64 binary (as before but without -m32) I get res 3024
Created attachment 229849 [details] patch for min stack size Not sure if this is strictly necessary