while debugging a userlevel program The user can hang the system, no other processes seem to work. Fix: none found How-To-Repeat: (sync all data first) extract this shar file, create the executable with "gmake" and run the script "hangme.sh" Send a signal <CTRL>+<C> to the debugger # This is a shell archive. Save it in a file, remove anything before # this line, and then unpack it by entering "sh file". Note, it may # create directories; files and directories will be owned by you and # have default permissions. # # This archive contains: # # hangme # hangme/hangme.c # hangme/hangme.sh # hangme/GNUmakefile # echo c - hangme mkdir -p hangme > /dev/null 2>&1 echo x - hangme/hangme.c sed 's/^X//' >hangme/hangme.c << 'END-of-hangme/hangme.c' X/****************************************************************** X* HANGME.C X******************************************************************/ X X/****************************************************************** X* X* This Programm should test a problem with locking X* X* Copyright (c) 1992,1993,1994,1995,1996,1997,1998,1999 X* by Dirk Meyer, All rights reserved. X* Im Grund 4, 34317 Habichtswald, Germany X* Email: dirk.meyer@dinoex.sub.org X* X******************************************************************/ X X#include <stdio.h> X#include <stdlib.h> X#include <string.h> X X#include <unistd.h> X#include <fcntl.h> X#include <signal.h> X#include <errno.h> X X/****************************************************************** X******************************************************************/ X Xtypedef int lockhandle_t; X Xconst char *const *Argv; X Xpid_t Session_gpid; X Xconst char *Logfile = "logfile"; X Xvolatile int Hang_up; X X/****************************************************************** X******************************************************************/ X Xvoid lock_handle( lockhandle_t handle, const char *datei ) X{ X int st; X X if ( datei == NULL ) X return; X X /* CONSTANTCONDITION */ X while ( 1 ) { X st = flock( handle, LOCK_EX | LOCK_NB ); X if ( st == 0 ) X break; X if ( errno == EWOULDBLOCK ) { X printf( "blocked by <%s>\n", datei ); X break; X }; X fprintf( stderr, X "lock <%s> returns %d: %s", datei, st, X strerror( errno ) ); X if ( errno == ENOLCK ) { X sleep( 20 ); X continue; X }; X return; X }; X /* CONSTANTCONDITION */ X while ( 1 ) { X st = flock( handle, LOCK_EX ); X if ( st == 0 ) X break; X fprintf( stderr, X "lock <%s> returns %d: %s", datei, st, X strerror( errno ) ); X if ( errno == ENOLCK ) { X sleep( 20 ); X continue; X }; X return; X } X} X X/****************************************************************** X******************************************************************/ X XFILE *file_open( const char *datei, const char *mode ) X{ X FILE *handle; X X if ( datei == NULL ) X return ( NULL ); X X handle = fopen( datei, mode ); X if ( handle == NULL ) X fprintf( stderr, X "Fatal: Datei '%s' nicht verfuegbar", datei ); X if ( mode[ 0 ] != 'r' ) X lock_handle( fileno( handle ), datei ); X return ( handle ); X} X Xvoid file_close( FILE *handle ) X{ X int status; X X status = fclose( handle ); X if ( status != 0 ) { X fprintf( stderr, X "Fatal: Fehler %d beim Schliessen der Datei", X status ); X } X} X X/****************************************************************** X******************************************************************/ X X#ifdef __FreeBSD__ X#undef SIG_DFL X#define SIG_DFL (void (*)(int))0 X#undef SIG_IGN X#define SIG_IGN (void (*)(int))1 X#endif X Xvoid my_appl_exit( void ); Xvoid my_appl_exit( void ) X{ X signal( SIGHUP, SIG_DFL ); X signal( SIGQUIT, SIG_DFL ); X} X Xvoid onquit( int test ); X/* ARGSUSED */ Xvoid onquit( int test ) X{ X fprintf( stderr, "QUIT-Signal\n" ); X signal( SIGQUIT, onquit ); X X signal( SIGHUP, SIG_IGN ); X signal( SIGTERM, SIG_IGN ); X if ( Session_gpid > 1 ) X (void)kill( -Session_gpid, SIGHUP ); X if ( Session_gpid > 1 ) X (void)kill( -Session_gpid, SIGTERM ); X my_appl_exit(); X execv( Argv[ 0 ], (char *const *)Argv ); X} X Xvoid onhangup( int test ); X/* ARGSUSED */ Xvoid onhangup( int test ) X{ X FILE *flog; X X fprintf( stderr, "HANGUP-Signal\n" ); X if ( Hang_up == 0 ) { X signal( SIGHUP, SIG_IGN ); X if ( Session_gpid > 1 ) { X (void)kill( -Session_gpid, SIGHUP ); X } X }; X Hang_up = 1; X signal( SIGHUP, onhangup ); X X /* deadlock */ X flog = file_open( Logfile, "ab" ); X file_close( flog ); X} X Xvoid my_appl_init( void ); Xvoid my_appl_init( void ) X{ X#ifdef __FreeBSD__ X int error; X#endif X X atexit( my_appl_exit ); X#ifdef __FreeBSD__ X error = setpgid( 0, getppid() ); X if ( error < 0 ) { X fprintf( stderr, X "!!! Fehler %d in " X "'setpgid( ppid=%ld )' :%s", X error, (long)getppid(), X strerror( errno ) ); X }; X Session_gpid = setsid(); X if ( Session_gpid < (pid_t)0 ) { X fprintf( stderr, X "!!! Fehler %d in " X "'setsid()' :%s", error, X strerror( errno ) ); X }; X#else X Session_gpid = 0; X#endif X signal( SIGHUP, onhangup ); X signal( SIGQUIT, onquit ); X} X Xvoid main( int argc, const char *const *argv ); Xvoid main( int argc, const char *const *argv ) X{ X FILE *flog; X X Hang_up = 0; X Argv = argv; X my_appl_init(); X flog = file_open( Logfile, "ab" ); X fputs( "main\n", flog ); X while ( Hang_up == 0 ) X ; X file_close( flog ); X exit( 0 ); X} X X/****************************************************************** X* END OF FILE HANGUP.C X******************************************************************/ END-of-hangme/hangme.c echo x - hangme/hangme.sh sed 's/^X//' >hangme/hangme.sh << 'END-of-hangme/hangme.sh' X#!/bin/sh X# X# (c) 1994-99, Dirk Meyer, Im Grund 4, 34317 Habichtswald X# Xnpid="$$" X./hangme & Xsleep 1 Xnpid=`expr ${npid} + 1` Xecho "${npid}" Xkillall -1 hangme Xsleep 1 Xgdb ./hangme "${npid}" << 'EOF' Xq XEOF Xgdb ./hangme "${npid}" << 'EOF' Xq XEOF Xkillall -9 hangme X# X# eof END-of-hangme/hangme.sh echo x - hangme/GNUmakefile sed 's/^X//' >hangme/GNUmakefile << 'END-of-hangme/GNUmakefile' X XCPPFLAGS+= -Wall XCFLAGS+= -g XLDLAGS+= -g X Xall: hangme X END-of-hangme/GNUmakefile exit
>>Description: > > while debugging a userlevel program > The user can hang the system, > no other processes seem to work. Try this fix. tsleep()'s return codes are poorly documented and were misinterpreted in lf_setlock(). tsleep() can return 0 if the process was restarted by a debugger, `so tsleep() != 0' is not the condition for the lock having been removed from the blocked list. Leaving the lock on the list corrupts the list. In your program, the corrupt list happens to be circular and this caused an endless loop in lf_wakelock() when the list is traversed. Bruce diff -c2 kern_lockf.c~ kern_lockf.c *** kern_lockf.c~ Sun May 9 20:42:39 1999 --- kern_lockf.c Fri Jun 18 05:37:00 1999 *************** *** 273,290 **** } #endif /* LOCKF_DEBUG */ ! if ((error = tsleep((caddr_t)lock, priority, lockstr, 0))) { ! /* ! * We may have been awakened by a signal (in ! * which case we must remove ourselves from the ! * blocked list) and/or by another process ! * releasing a lock (in which case we have already ! * been removed from the blocked list and our ! * lf_next field set to NOLOCKF). ! */ ! if (lock->lf_next) ! TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, ! lf_block); ! free(lock, M_LOCKF); ! return (error); } } --- 273,292 ---- } #endif /* LOCKF_DEBUG */ ! error = tsleep((caddr_t)lock, priority, lockstr, 0); ! /* ! * We may have been awakened by a signal and/or by a ! * debugger continuing us (in which cases we must remove ! * ourselves from the blocked list) and/or by another ! * process releasing a lock (in which case we have ! * already been removed from the blocked list and our ! * lf_next field set to NOLOCKF). ! */ ! if (lock->lf_next) { ! TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block); ! lock->lf_next = NOLOCKF; ! } ! if (error) { ! free(lock, M_LOCKF); ! return (error); } }
Bruce Evans wrote:, > >>Description: > > > > while debugging a userlevel program > > The user can hang the system, > > no other processes seem to work. > > Try this fix. tsleep()'s return codes are poorly documented and were > misinterpreted in lf_setlock(). tsleep() can return 0 if the process > was restarted by a debugger, `so tsleep() != 0' is not the condition for > the lock having been removed from the blocked list. Leaving the lock > on the list corrupts the list. In your program, the corrupt list > happens to be circular and this caused an endless loop in lf_wakelock() > when the list is traversed. Fine this fix is runing here now. It solve the stated problem. Only if gdb try to exit in the second time, It keep hangin and kann be killed with <CTRL-C> the debugged application will terminate too. But the system keep runninng as intended. We could close this PR. In my opinion, the hanging gdb might be a gdb bug. kind regards Dirk -- Dirk Meyer, Im Grund 4, 34317 Habichtswald, Germany -- Tel. +49-5606-6512
>Fine this fix is runing here now. >It solve the stated problem. > >Only if gdb try to exit in the second time, >It keep hangin and kann be killed with <CTRL-C> >the debugged application will terminate too. >But the system keep runninng as intended. > >We could close this PR. >In my opinion, the hanging gdb might be a gdb bug. I think it is a bug in PT_ATTACH or PT_DETACH, since gdb hangs waiting for the "hangme" process after it (gdb) issues PT_ATTACH the second time. Bruce
Bruce Evans wrote: > tsleep()'s return codes are poorly documented and were > misinterpreted in lf_setlock(). tsleep() can return 0 if the process > was restarted by a debugger, I didn't realise that a process sleeping interruptible can be stopped inside the tsleep call (is that true?). It looks dangerous to me. For example, interruptible nfs may sleep interuuptible, in particular in the vfs_bio code, with vnode locks held, etc. Stopping at such point looks like a good opportunity to hang the machine... Dima
>> tsleep()'s return codes are poorly documented and were >> misinterpreted in lf_setlock(). tsleep() can return 0 if the process >> was restarted by a debugger, > >I didn't realise that a process sleeping interruptible can be stopped >inside the tsleep call (is that true?). It looks dangerous to me. For I think it isn't true. >example, interruptible nfs may sleep interuuptible, in particular in >the vfs_bio code, with vnode locks held, etc. Stopping at such point >looks like a good opportunity to hang the machine... PT_ATTACH is implemented using SIGSTOP, but the consequences shouldn't be any worse than for a manual kill -STOP. I think SIGSTOP of a stopped process is normally optimised away (so tsleep() doesn't return), but for ptrace() it is explicitly pessimised (so tsleep() returns 0). Bruce
Bruce Evans wrote: > >> tsleep()'s return codes are poorly documented and were > >> misinterpreted in lf_setlock(). tsleep() can return 0 if the process > >> was restarted by a debugger, > > > >I didn't realise that a process sleeping interruptible can be stopped > >inside the tsleep call (is that true?). It looks dangerous to me. For > > I think it isn't true. No I've verified that both kill -STOP and gdb attach move process to the stopped state without wakeing it up. > > >example, interruptible nfs may sleep interuuptible, in particular in > >the vfs_bio code, with vnode locks held, etc. Stopping at such point > >looks like a good opportunity to hang the machine... > > PT_ATTACH is implemented using SIGSTOP, but the consequences shouldn't > be any worse than for a manual kill -STOP. I think SIGSTOP of a stopped > process is normally optimised away (so tsleep() doesn't return), but for > ptrace() it is explicitly pessimised (so tsleep() returns 0). I think, while psignal() make process runnable in most cases, the process then stuck in issignal() if stopped. I'm not saying that kill -STOP is less dangerous than ptrace(). Dima
Bruce, Would you mind taking a look at PR 12247 and seeing if it's still relevant? Best regards, Mike Barcroft
On Fri, 20 Jul 2001, Mike Barcroft wrote: > Bruce, > Would you mind taking a look at PR 12247 and seeing if it's still > relevant? I think the original problem is fixed. I don't know about the general signal handling details discussed in followups. I meant to look at them someday but haven't had time. Bruce
State Changed From-To: open->closed bde confirmed the originator's problem has been solved.