Bug 12247 - userlevel program let kernel hang
Summary: userlevel program let kernel hang
Status: Closed FIXED
Alias: None
Product: Base System
Classification: Unclassified
Component: kern (show other bugs)
Version: 3.2-RELEASE
Hardware: Any Any
: Normal Affects Only Me
Assignee: freebsd-bugs (Nobody)
URL:
Keywords:
Depends on:
Blocks:
 
Reported: 1999-06-16 17:20 UTC by dirk.meyer
Modified: 2001-07-21 17:56 UTC (History)
0 users

See Also:


Attachments

Note You need to log in before you can comment on or make changes to this bug.
Description dirk.meyer 1999-06-16 17:20:01 UTC
	while debugging a userlevel program
	The user can hang the system,
	no other processes seem to work.

Fix: 

none found
How-To-Repeat: 
	(sync all data first)
	extract this shar file,
	create the executable with "gmake"
	and run the script "hangme.sh"
	Send a signal <CTRL>+<C> to the debugger

# This is a shell archive.  Save it in a file, remove anything before
# this line, and then unpack it by entering "sh file".  Note, it may
# create directories; files and directories will be owned by you and
# have default permissions.
#
# This archive contains:
#
#	hangme
#	hangme/hangme.c
#	hangme/hangme.sh
#	hangme/GNUmakefile
#
echo c - hangme
mkdir -p hangme > /dev/null 2>&1
echo x - hangme/hangme.c
sed 's/^X//' >hangme/hangme.c << 'END-of-hangme/hangme.c'
X/******************************************************************
X*	HANGME.C
X******************************************************************/
X
X/******************************************************************
X*
X*	This Programm should test a problem with locking
X*
X*	Copyright (c) 1992,1993,1994,1995,1996,1997,1998,1999
X*	by Dirk Meyer, All rights reserved.
X*	Im Grund 4, 34317 Habichtswald, Germany
X*	Email: dirk.meyer@dinoex.sub.org
X*
X******************************************************************/
X
X#include <stdio.h>
X#include <stdlib.h>
X#include <string.h>
X
X#include <unistd.h>
X#include <fcntl.h>
X#include <signal.h>
X#include <errno.h>
X
X/******************************************************************
X******************************************************************/
X
Xtypedef int lockhandle_t;
X
Xconst char	*const *Argv;
X
Xpid_t		Session_gpid;
X
Xconst char	*Logfile = "logfile";
X
Xvolatile int	Hang_up;
X
X/******************************************************************
X******************************************************************/
X
Xvoid lock_handle( lockhandle_t handle, const char *datei )
X{
X	int		st;
X
X	if ( datei == NULL )
X		return;
X
X	/* CONSTANTCONDITION */
X	while ( 1 ) {
X		st = flock( handle, LOCK_EX | LOCK_NB );
X		if ( st == 0 )
X			break;
X		if ( errno == EWOULDBLOCK ) {
X			printf( "blocked by <%s>\n", datei );
X			break;
X		};
X		fprintf( stderr,
X			"lock <%s> returns %d: %s", datei, st,
X			strerror( errno ) );
X		if ( errno == ENOLCK ) {
X			sleep( 20 );
X			continue;
X		};
X		return;
X	};
X	/* CONSTANTCONDITION */
X	while ( 1 ) {
X		st = flock( handle, LOCK_EX );
X		if ( st == 0 )
X			break;
X		fprintf( stderr,
X			"lock <%s> returns %d: %s", datei, st,
X			strerror( errno ) );
X		if ( errno == ENOLCK ) {
X			sleep( 20 );
X			continue;
X		};
X		return;
X	}
X}
X
X/******************************************************************
X******************************************************************/
X
XFILE *file_open( const char *datei, const char *mode )
X{
X	FILE *handle;
X
X	if ( datei == NULL )
X		return ( NULL );
X
X	handle = fopen( datei, mode );
X	if ( handle == NULL )
X		fprintf( stderr,
X			"Fatal: Datei '%s' nicht verfuegbar", datei );
X	if ( mode[ 0 ] != 'r' )
X		lock_handle( fileno( handle ), datei );
X	return ( handle );
X}
X
Xvoid file_close( FILE *handle )
X{
X	int		status;
X
X	status = fclose( handle );
X	if ( status != 0 ) {
X		fprintf( stderr,
X			"Fatal: Fehler %d beim Schliessen der Datei",
X			status );
X	}
X}
X
X/******************************************************************
X******************************************************************/
X
X#ifdef __FreeBSD__
X#undef SIG_DFL
X#define SIG_DFL		(void (*)(int))0
X#undef SIG_IGN
X#define SIG_IGN		(void (*)(int))1
X#endif
X
Xvoid my_appl_exit( void );
Xvoid my_appl_exit( void )
X{
X	signal( SIGHUP, SIG_DFL );
X	signal( SIGQUIT, SIG_DFL );
X}
X
Xvoid onquit( int test );
X/* ARGSUSED */
Xvoid onquit( int test )
X{
X	fprintf( stderr, "QUIT-Signal\n" );
X	signal( SIGQUIT, onquit );
X
X	signal( SIGHUP, SIG_IGN );
X	signal( SIGTERM, SIG_IGN );
X	if ( Session_gpid > 1 )
X		(void)kill( -Session_gpid, SIGHUP );
X	if ( Session_gpid > 1 )
X		(void)kill( -Session_gpid, SIGTERM );
X	my_appl_exit();
X	execv( Argv[ 0 ], (char *const *)Argv );
X}
X
Xvoid onhangup( int test );
X/* ARGSUSED */
Xvoid onhangup( int test )
X{
X	FILE *flog;
X
X	fprintf( stderr, "HANGUP-Signal\n" );
X	if ( Hang_up == 0 ) {
X		signal( SIGHUP, SIG_IGN );
X		if ( Session_gpid > 1 ) {
X			(void)kill( -Session_gpid, SIGHUP );
X		}
X	};
X	Hang_up = 1;
X	signal( SIGHUP, onhangup );
X
X	/* deadlock */
X	flog = file_open( Logfile, "ab" );
X	file_close( flog );
X}
X
Xvoid my_appl_init( void );
Xvoid my_appl_init( void )
X{
X#ifdef __FreeBSD__
X	int error;
X#endif
X
X	atexit( my_appl_exit );
X#ifdef __FreeBSD__
X	error = setpgid( 0, getppid() );
X	if ( error < 0 ) {
X		fprintf( stderr,
X				"!!! Fehler %d in "
X				"'setpgid( ppid=%ld )' :%s",
X				error, (long)getppid(),
X				strerror( errno ) );
X	};
X	Session_gpid = setsid();
X	if ( Session_gpid < (pid_t)0 ) {
X		fprintf( stderr,
X				"!!! Fehler %d in "
X				"'setsid()' :%s", error,
X				strerror( errno ) );
X	};
X#else
X	Session_gpid = 0;
X#endif
X	signal( SIGHUP, onhangup );
X	signal( SIGQUIT, onquit );
X}
X
Xvoid main( int argc, const char *const *argv );
Xvoid main( int argc, const char *const *argv )
X{
X	FILE *flog;
X
X	Hang_up = 0;
X	Argv = argv;
X	my_appl_init();
X	flog = file_open( Logfile, "ab" );
X	fputs( "main\n", flog );
X	while ( Hang_up == 0 )
X		;
X	file_close( flog );
X	exit( 0 );
X}
X
X/******************************************************************
X*	END OF FILE HANGUP.C
X******************************************************************/
END-of-hangme/hangme.c
echo x - hangme/hangme.sh
sed 's/^X//' >hangme/hangme.sh << 'END-of-hangme/hangme.sh'
X#!/bin/sh
X#
X#	(c) 1994-99, Dirk Meyer, Im Grund 4, 34317 Habichtswald
X#
Xnpid="$$"
X./hangme &
Xsleep 1
Xnpid=`expr ${npid} + 1`
Xecho "${npid}"
Xkillall -1 hangme
Xsleep 1
Xgdb ./hangme "${npid}" << 'EOF'
Xq
XEOF
Xgdb ./hangme "${npid}" << 'EOF'
Xq
XEOF
Xkillall -9 hangme
X#
X# eof
END-of-hangme/hangme.sh
echo x - hangme/GNUmakefile
sed 's/^X//' >hangme/GNUmakefile << 'END-of-hangme/GNUmakefile'
X
XCPPFLAGS+= -Wall
XCFLAGS+= -g
XLDLAGS+= -g
X
Xall: hangme
X
END-of-hangme/GNUmakefile
exit
Comment 1 Bruce Evans 1999-06-17 21:00:46 UTC
>>Description:
>
>	while debugging a userlevel program
>	The user can hang the system,
>	no other processes seem to work.

Try this fix.  tsleep()'s return codes are poorly documented and were
misinterpreted in lf_setlock().  tsleep() can return 0 if the process
was restarted by a debugger, `so tsleep() != 0' is not the condition for
the lock having been removed from the blocked list.  Leaving the lock
on the list corrupts the list.  In your program, the corrupt list
happens to be circular and this caused an endless loop in lf_wakelock()
when the list is traversed.

Bruce

diff -c2 kern_lockf.c~ kern_lockf.c
*** kern_lockf.c~	Sun May  9 20:42:39 1999
--- kern_lockf.c	Fri Jun 18 05:37:00 1999
***************
*** 273,290 ****
  		}
  #endif /* LOCKF_DEBUG */
! 		if ((error = tsleep((caddr_t)lock, priority, lockstr, 0))) {
!                         /*
! 			 * We may have been awakened by a signal (in
! 			 * which case we must remove ourselves from the
! 			 * blocked list) and/or by another process
! 			 * releasing a lock (in which case we have already
! 			 * been removed from the blocked list and our
! 			 * lf_next field set to NOLOCKF).
!                          */
! 			if (lock->lf_next)
! 				TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock,
! 					lf_block);
!                         free(lock, M_LOCKF);
!                         return (error);
  		}
  	}
--- 273,292 ----
  		}
  #endif /* LOCKF_DEBUG */
! 		error = tsleep((caddr_t)lock, priority, lockstr, 0);
! 		/*
! 		 * We may have been awakened by a signal and/or by a
! 		 * debugger continuing us (in which cases we must remove
! 		 * ourselves from the blocked list) and/or by another
! 		 * process releasing a lock (in which case we have
! 		 * already been removed from the blocked list and our
! 		 * lf_next field set to NOLOCKF).
! 		 */
! 		if (lock->lf_next) {
! 			TAILQ_REMOVE(&lock->lf_next->lf_blkhd, lock, lf_block);
! 			lock->lf_next = NOLOCKF;
! 		}
! 		if (error) {
! 			free(lock, M_LOCKF);
! 			return (error);
  		}
  	}
Comment 2 dirk.meyer 1999-06-18 05:44:57 UTC
Bruce Evans wrote:,

> >>Description:
> >
> >	while debugging a userlevel program
> >	The user can hang the system,
> >	no other processes seem to work.
> 
> Try this fix.  tsleep()'s return codes are poorly documented and were
> misinterpreted in lf_setlock().  tsleep() can return 0 if the process
> was restarted by a debugger, `so tsleep() != 0' is not the condition for
> the lock having been removed from the blocked list.  Leaving the lock
> on the list corrupts the list.  In your program, the corrupt list
> happens to be circular and this caused an endless loop in lf_wakelock()
> when the list is traversed.

Fine this fix is runing here now.
It solve the stated problem.

Only if gdb try to exit in the second time,
It keep hangin and kann be killed with <CTRL-C>
the debugged application will terminate too.
But the system keep runninng as intended.

We could close this PR.
In my opinion, the hanging gdb might be a gdb bug.

kind regards Dirk

-- Dirk Meyer, Im Grund 4, 34317 Habichtswald, Germany
-- Tel. +49-5606-6512
Comment 3 Bruce Evans 1999-06-18 10:33:33 UTC
>Fine this fix is runing here now.
>It solve the stated problem.
>
>Only if gdb try to exit in the second time,
>It keep hangin and kann be killed with <CTRL-C>
>the debugged application will terminate too.
>But the system keep runninng as intended.
>
>We could close this PR.
>In my opinion, the hanging gdb might be a gdb bug.

I think it is a bug in PT_ATTACH or PT_DETACH, since gdb hangs waiting
for the "hangme" process after it (gdb) issues PT_ATTACH the second time.

Bruce
Comment 4 Dmitrij Tejblum 1999-06-19 16:00:24 UTC
Bruce Evans wrote:
>  tsleep()'s return codes are poorly documented and were
>  misinterpreted in lf_setlock().  tsleep() can return 0 if the process
>  was restarted by a debugger, 

I didn't realise that a process sleeping interruptible can be stopped 
inside the tsleep call (is that true?). It looks dangerous to me. For 
example, interruptible nfs may sleep interuuptible, in particular in 
the vfs_bio code, with vnode locks held, etc. Stopping at such point 
looks like a good opportunity to hang the machine...

Dima
Comment 5 Bruce Evans 1999-06-19 23:45:53 UTC
>>  tsleep()'s return codes are poorly documented and were
>>  misinterpreted in lf_setlock().  tsleep() can return 0 if the process
>>  was restarted by a debugger, 
>
>I didn't realise that a process sleeping interruptible can be stopped 
>inside the tsleep call (is that true?). It looks dangerous to me. For 

I think it isn't true.

>example, interruptible nfs may sleep interuuptible, in particular in 
>the vfs_bio code, with vnode locks held, etc. Stopping at such point 
>looks like a good opportunity to hang the machine...

PT_ATTACH is implemented using SIGSTOP, but the consequences shouldn't
be any worse than for a manual kill -STOP.  I think SIGSTOP of a stopped
process is normally optimised away (so tsleep() doesn't return), but for
ptrace() it is explicitly pessimised (so tsleep() returns 0).

Bruce
Comment 6 Dmitrij Tejblum 1999-06-20 20:37:44 UTC
Bruce Evans wrote:
> >>  tsleep()'s return codes are poorly documented and were
> >>  misinterpreted in lf_setlock().  tsleep() can return 0 if the process
> >>  was restarted by a debugger, 
> >
> >I didn't realise that a process sleeping interruptible can be stopped 
> >inside the tsleep call (is that true?). It looks dangerous to me. For 
> 
> I think it isn't true.

No I've verified that both kill -STOP and gdb attach move process to 
the stopped state without wakeing it up.

> 
> >example, interruptible nfs may sleep interuuptible, in particular in 
> >the vfs_bio code, with vnode locks held, etc. Stopping at such point 
> >looks like a good opportunity to hang the machine...
> 
> PT_ATTACH is implemented using SIGSTOP, but the consequences shouldn't
> be any worse than for a manual kill -STOP.  I think SIGSTOP of a stopped
> process is normally optimised away (so tsleep() doesn't return), but for
> ptrace() it is explicitly pessimised (so tsleep() returns 0).

I think, while psignal() make process runnable in most cases, the process 
then stuck in issignal() if stopped. I'm not saying that kill -STOP is 
less dangerous than ptrace().

Dima
Comment 7 Mike Barcroft freebsd_committer freebsd_triage 2001-07-21 00:09:00 UTC
Bruce,
Would you mind taking a look at PR 12247 and seeing if it's still
relevant?

Best regards,
Mike Barcroft
Comment 8 Bruce Evans 2001-07-21 13:16:55 UTC
On Fri, 20 Jul 2001, Mike Barcroft wrote:

> Bruce,
> Would you mind taking a look at PR 12247 and seeing if it's still
> relevant?

I think the original problem is fixed.  I don't know about the
general signal handling details discussed in followups.  I meant
to look at them someday but haven't had time.

Bruce
Comment 9 Mike Barcroft freebsd_committer freebsd_triage 2001-07-21 17:55:49 UTC
State Changed
From-To: open->closed


bde confirmed the originator's problem has been solved.