Bug 160662

Summary: [ufs] [hang] Snapshots cause a lockup on UFS with SU+J enabled
Product: Base System Reporter: Hans Ottevanger <hans>
Component: kernAssignee: Kirk McKusick <mckusick>
Status: Closed FIXED    
Severity: Affects Only Me    
Priority: Normal    
Version: 9.0-BETA2   
Hardware: Any   
OS: Any   

Description Hans Ottevanger 2011-09-11 16:50:07 UTC
On a UFS filesystem with SU+J enabled attempting to make a snapshot with mksnap_ffs causes the system to lockup completely after a while, needing a
reset to recover.

This is not the extreme slowdown due to the snapshot taking all available
disk bandwidth: the system becomes fully unresponsive, i.e. no reaction on
keyboard or mouse and e.g. remote ssh sessions just stop. However, the
system remains pingable.

If journalling is disabled by running tunefs -j disable <fs> (in single
user mode, if needed), making a snapshot will succeed again.

Two lock order reversal occurs in both cases, identical modulo the addresses.
These are the ones for an SU+J case:

lock order reversal:
 1st 0xc6347498 ufs (ufs) @ /usr/src/sys/ufs/ffs/ffs_snapshot.c:425
 2nd 0xdf326728 bufwait (bufwait) @ /usr/src/sys/kern/vfs_bio.c:2658
 3rd 0xc603baf8 ufs (ufs) @ /usr/src/sys/ufs/ffs/ffs_snapshot.c:546
KDB: stack backtrace:
db_trace_self_wrapper(c0efdd0c,616e735f,6f687370,3a632e74,a363435,...) at db_trace_self_wrapper+0x26
kdb_backtrace(c0a415fb,c0f016fc,c5965370,c5969198,c57a8404,...) at kdb_backtrace+0x2a
_witness_debugger(c0f016fc,c603baf8,c0ef0968,c5969198,c0f2c002,...) at _witness_debugger+0x25
witness_checkorder(c603baf8,9,c0f2c002,222,0,...) at witness_checkorder+0x839
__lockmgr_args(c603baf8,80100,c603bb18,0,0,...) at __lockmgr_args+0x824
ffs_lock(c57a852c,c11dd3c8,c5ee5390,80100,c603baa0,...) at ffs_lock+0x8a
VOP_LOCK1_APV(c1047760,c57a852c,c57a854c,c1057e00,c603baa0,...) at VOP_LOCK1_APV+0xb5
_vn_lock(c603baa0,80100,c0f2c002,222,c598de80,...) at _vn_lock+0x5e
ffs_snapshot(c5ec4798,c5aea300,c0f2f410,1a2,0,...) at ffs_snapshot+0x14fc
ffs_mount(c5ec4798,c5ca6000,ff,394,c5962450,...) at ffs_mount+0x1c13
vfs_donmount(c5ee52e0,11000,c5997d80,c5997d80,c5f0d588,...) at vfs_donmount+0x1219
nmount(c5ee52e0,c57a8cec,c57a8d28,c0efffda,0,...) at nmount+0x84
syscallenter(c5ee52e0,c57a8ce4,c57a8ce4,0,0,...) at syscallenter+0x263
syscall(c57a8d28) at syscall+0x34
Xint0x80_syscall() at Xint0x80_syscall+0x21
--- syscall (378, FreeBSD ELF32, nmount), eip = 0x280dc61b, esp = 0xbfbfe56c, ebp = 0xbfbfece8 ---
lock order reversal:
 1st 0xdf326728 bufwait (bufwait) @ /usr/src/sys/kern/vfs_bio.c:2658
 2nd 0xc5995a1c snaplk (snaplk) @ /usr/src/sys/ufs/ffs/ffs_snapshot.c:818
KDB: stack backtrace:
db_trace_self_wrapper(c0efdd0c,662f7366,735f7366,7370616e,2e746f68,...) at db_trace_self_wrapper+0x26
kdb_backtrace(c0a415fb,c0f016e3,c5965370,c5969540,c57a8404,...) at kdb_backtrace+0x2a
_witness_debugger(c0f016e3,c5995a1c,c0f2c064,c5969540,c0f2c002,...) at _witness_debugger+0x25
witness_checkorder(c5995a1c,9,c0f2c002,332,c63474b8,...) at witness_checkorder+0x839
__lockmgr_args(c5995a1c,80400,c63474b8,0,0,...) at __lockmgr_args+0x824
ffs_lock(c57a852c,df2f7f68,100000,80400,c6347440,...) at ffs_lock+0x8a
VOP_LOCK1_APV(c1047760,c57a852c,df2f7fc4,c1057e00,c6347440,...) at VOP_LOCK1_APV+0xb5
_vn_lock(c6347440,80400,c0f2c002,332,0,...) at _vn_lock+0x5e
ffs_snapshot(c5ec4798,c5aea300,c0f2f410,1a2,0,...) at ffs_snapshot+0x298e
ffs_mount(c5ec4798,c5ca6000,ff,394,c5962450,...) at ffs_mount+0x1c13
vfs_donmount(c5ee52e0,11000,c5997d80,c5997d80,c5f0d588,...) at vfs_donmount+0x1219
nmount(c5ee52e0,c57a8cec,c57a8d28,c0efffda,0,...) at nmount+0x84
syscallenter(c5ee52e0,c57a8ce4,c57a8ce4,0,0,...) at syscallenter+0x263
syscall(c57a8d28) at syscall+0x34
Xint0x80_syscall() at Xint0x80_syscall+0x21
--- syscall (378, FreeBSD ELF32, nmount), eip = 0x280dc61b, esp = 0xbfbfe56c, ebp = 0xbfbfece8

It is not clear if the LORs are related to the lockup.

This issue specifically occurs on i386 (2.4 GHz P4, 2 GByte RAM, 500 GByte PATA disk) running 9.0-BETA2 as distributed, but the problem is also 100% reproducible on amd64 running a more recent 9.0-BETA2.

How-To-Repeat: Attempt to make a snapshot of the /usr filesystem (32 GByte in my case), which is SU+J enabled by default. This can done by typing (as root):

cd /usr; mksnap_ffs /usr/.snap/testsnap

After a lot of disk activity for a few seconds the system locks up a described.
Comment 1 Mark Linimon freebsd_committer freebsd_triage 2011-09-18 03:31:13 UTC
Responsible Changed
From-To: freebsd-bugs->freebsd-fs

Over to maintainer(s).
Comment 2 Kirk McKusick freebsd_committer freebsd_triage 2011-09-18 12:08:54 UTC
Responsible Changed
From-To: freebsd-fs->mckusick

I will take responsibility for dealing with this bug.
Comment 3 dfilter service freebsd_committer freebsd_triage 2011-09-27 18:34:11 UTC
Author: mckusick
Date: Tue Sep 27 17:34:02 2011
New Revision: 225806
URL: http://svn.freebsd.org/changeset/base/225806

Log:
  This update eliminates the system hang reported in kern/160662 when
  taking a snapshot on a filesystem running with journaled soft updates.
  
  Reported by:     Hans Ottevanger
  Fix verified by: Hans Ottevanger
  PR:              kern/160662

Modified:
  head/sys/ufs/ffs/ffs_snapshot.c

Modified: head/sys/ufs/ffs/ffs_snapshot.c
==============================================================================
--- head/sys/ufs/ffs/ffs_snapshot.c	Tue Sep 27 17:11:31 2011	(r225805)
+++ head/sys/ufs/ffs/ffs_snapshot.c	Tue Sep 27 17:34:02 2011	(r225806)
@@ -203,7 +203,7 @@ ffs_snapshot(mp, snapfile)
 	ufs2_daddr_t numblks, blkno, *blkp, *snapblklist;
 	int error, cg, snaploc;
 	int i, size, len, loc;
-	int flag;
+	uint64_t flag;
 	struct timespec starttime = {0, 0}, endtime;
 	char saved_nice = 0;
 	long redo = 0, snaplistsize = 0;
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
Comment 4 dfilter service freebsd_committer freebsd_triage 2011-09-27 18:42:02 UTC
Author: mckusick
Date: Tue Sep 27 17:41:48 2011
New Revision: 225807
URL: http://svn.freebsd.org/changeset/base/225807

Log:
  This update eliminates a lock-order reversal warning discovered
  whle tracking down the system hang reported in kern/160662 and
  corrected in revision 225806. The LOR is not the cause of the system
  hang and indeed cannot cause an actual deadlock. However, it can
  be easily eliminated by defering the acquisition of a buflock until
  after all the vnode locks have been acquired.
  
  Reported by:     Hans Ottevanger
  PR:              kern/160662

Modified:
  head/sys/ufs/ffs/ffs_snapshot.c

Modified: head/sys/ufs/ffs/ffs_snapshot.c
==============================================================================
--- head/sys/ufs/ffs/ffs_snapshot.c	Tue Sep 27 17:34:02 2011	(r225806)
+++ head/sys/ufs/ffs/ffs_snapshot.c	Tue Sep 27 17:41:48 2011	(r225807)
@@ -212,7 +212,7 @@ ffs_snapshot(mp, snapfile)
 	struct fs *copy_fs = NULL, *fs;
 	struct thread *td = curthread;
 	struct inode *ip, *xp;
-	struct buf *bp, *nbp, *ibp, *sbp = NULL;
+	struct buf *bp, *nbp, *ibp;
 	struct nameidata nd;
 	struct mount *wrtmp;
 	struct vattr vat;
@@ -460,21 +460,14 @@ restart:
 	 * Grab a copy of the superblock and its summary information.
 	 * We delay writing it until the suspension is released below.
 	 */
-	error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize,
-	    KERNCRED, &sbp);
-	if (error) {
-		brelse(sbp);
-		sbp = NULL;
-		goto out1;
-	}
-	loc = blkoff(fs, fs->fs_sblockloc);
-	copy_fs = (struct fs *)(sbp->b_data + loc);
+	copy_fs = malloc((u_long)fs->fs_bsize, M_UFSMNT, M_WAITOK);
 	bcopy(fs, copy_fs, fs->fs_sbsize);
 	if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
 		copy_fs->fs_clean = 1;
 	size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE;
 	if (fs->fs_sbsize < size)
-		bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize);
+		bzero(&((char *)copy_fs)[fs->fs_sbsize],
+		    size - fs->fs_sbsize);
 	size = blkroundup(fs, fs->fs_cssize);
 	if (fs->fs_contigsumsize > 0)
 		size += fs->fs_ncg * sizeof(int32_t);
@@ -490,8 +483,8 @@ restart:
 		    len, KERNCRED, &bp)) != 0) {
 			brelse(bp);
 			free(copy_fs->fs_csp, M_UFSMNT);
-			bawrite(sbp);
-			sbp = NULL;
+			free(copy_fs, M_UFSMNT);
+			copy_fs = NULL;
 			goto out1;
 		}
 		bcopy(bp->b_data, space, (u_int)len);
@@ -606,8 +599,8 @@ loop:
 		vdrop(xvp);
 		if (error) {
 			free(copy_fs->fs_csp, M_UFSMNT);
-			bawrite(sbp);
-			sbp = NULL;
+			free(copy_fs, M_UFSMNT);
+			copy_fs = NULL;
 			MNT_VNODE_FOREACH_ABORT(mp, mvp);
 			goto out1;
 		}
@@ -621,8 +614,8 @@ loop:
 		error = softdep_journal_lookup(mp, &xvp);
 		if (error) {
 			free(copy_fs->fs_csp, M_UFSMNT);
-			bawrite(sbp);
-			sbp = NULL;
+			free(copy_fs, M_UFSMNT);
+			copy_fs = NULL;
 			goto out1;
 		}
 		xp = VTOI(xvp);
@@ -688,8 +681,8 @@ loop:
 	VI_UNLOCK(devvp);
 	ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp");
 out1:
-	KASSERT((sn != NULL && sbp != NULL && error == 0) ||
-		(sn == NULL && sbp == NULL && error != 0),
+	KASSERT((sn != NULL && copy_fs != NULL && error == 0) ||
+		(sn == NULL && copy_fs == NULL && error != 0),
 		("email phk@ and mckusick@"));
 	/*
 	 * Resume operation on filesystem.
@@ -703,7 +696,7 @@ out1:
 		    vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec,
 		    endtime.tv_nsec / 1000000, redo, fs->fs_ncg);
 	}
-	if (sbp == NULL)
+	if (copy_fs == NULL)
 		goto out;
 	/*
 	 * Copy allocation information from all the snapshots in
@@ -793,6 +786,15 @@ out1:
 		space = (char *)space + fs->fs_bsize;
 		bawrite(nbp);
 	}
+	error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize,
+	    KERNCRED, &nbp);
+	if (error) {
+		brelse(nbp);
+	} else {
+		loc = blkoff(fs, fs->fs_sblockloc);
+		bcopy((char *)copy_fs, &nbp->b_data[loc], fs->fs_bsize);
+		bawrite(nbp);
+	}
 	/*
 	 * As this is the newest list, it is the most inclusive, so
 	 * should replace the previous list.
@@ -822,7 +824,8 @@ out1:
 		vrele(vp);		/* Drop extra reference */
 done:
 	free(copy_fs->fs_csp, M_UFSMNT);
-	bawrite(sbp);
+	free(copy_fs, M_UFSMNT);
+	copy_fs = NULL;
 out:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (saved_nice > 0) {
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
Comment 5 dfilter service freebsd_committer freebsd_triage 2011-09-28 20:36:36 UTC
Author: mckusick
Date: Wed Sep 28 19:36:21 2011
New Revision: 225850
URL: http://svn.freebsd.org/changeset/base/225850

Log:
  MFC: r225806:
  This update eliminates the system hang reported in kern/160662 when
  taking a snapshot on a filesystem running with journaled soft updates.
  
  As journaled soft updates first appeared in 9.0, this will be the
  only MFC of this change.
  
  Approved by:     re (kib)
  Reported by:     Hans Ottevanger
  Fix verified by: Hans Ottevanger
  PR:              kern/160662

Modified:
  stable/9/sys/ufs/ffs/ffs_snapshot.c

Modified: stable/9/sys/ufs/ffs/ffs_snapshot.c
==============================================================================
--- stable/9/sys/ufs/ffs/ffs_snapshot.c	Wed Sep 28 19:01:15 2011	(r225849)
+++ stable/9/sys/ufs/ffs/ffs_snapshot.c	Wed Sep 28 19:36:21 2011	(r225850)
@@ -203,7 +203,7 @@ ffs_snapshot(mp, snapfile)
 	ufs2_daddr_t numblks, blkno, *blkp, *snapblklist;
 	int error, cg, snaploc;
 	int i, size, len, loc;
-	int flag;
+	uint64_t flag;
 	struct timespec starttime = {0, 0}, endtime;
 	char saved_nice = 0;
 	long redo = 0, snaplistsize = 0;
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
Comment 6 dfilter service freebsd_committer freebsd_triage 2011-09-28 20:38:56 UTC
Author: mckusick
Date: Wed Sep 28 19:38:47 2011
New Revision: 225851
URL: http://svn.freebsd.org/changeset/base/225851

Log:
  MFC r225807:
  This update eliminates a lock-order reversal warning discovered
  whle tracking down the system hang reported in kern/160662 and
  corrected in revision 225806 (MFC'ed as 225850). The LOR is not
  the cause of the system hang and indeed cannot cause an actual
  deadlock. However, it can be easily eliminated by defering the
  acquisition of a buflock until after all the vnode locks have
  been acquired.
  
  As journaled soft updates first appeared in 9.0, this will be the
  only MFC of this change.
  
  Approved by:     re (kib)
  Reported by:     Hans Ottevanger
  PR:              kern/160662

Modified:
  stable/9/sys/ufs/ffs/ffs_snapshot.c

Modified: stable/9/sys/ufs/ffs/ffs_snapshot.c
==============================================================================
--- stable/9/sys/ufs/ffs/ffs_snapshot.c	Wed Sep 28 19:36:21 2011	(r225850)
+++ stable/9/sys/ufs/ffs/ffs_snapshot.c	Wed Sep 28 19:38:47 2011	(r225851)
@@ -212,7 +212,7 @@ ffs_snapshot(mp, snapfile)
 	struct fs *copy_fs = NULL, *fs;
 	struct thread *td = curthread;
 	struct inode *ip, *xp;
-	struct buf *bp, *nbp, *ibp, *sbp = NULL;
+	struct buf *bp, *nbp, *ibp;
 	struct nameidata nd;
 	struct mount *wrtmp;
 	struct vattr vat;
@@ -460,21 +460,14 @@ restart:
 	 * Grab a copy of the superblock and its summary information.
 	 * We delay writing it until the suspension is released below.
 	 */
-	error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize,
-	    KERNCRED, &sbp);
-	if (error) {
-		brelse(sbp);
-		sbp = NULL;
-		goto out1;
-	}
-	loc = blkoff(fs, fs->fs_sblockloc);
-	copy_fs = (struct fs *)(sbp->b_data + loc);
+	copy_fs = malloc((u_long)fs->fs_bsize, M_UFSMNT, M_WAITOK);
 	bcopy(fs, copy_fs, fs->fs_sbsize);
 	if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0)
 		copy_fs->fs_clean = 1;
 	size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE;
 	if (fs->fs_sbsize < size)
-		bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize);
+		bzero(&((char *)copy_fs)[fs->fs_sbsize],
+		    size - fs->fs_sbsize);
 	size = blkroundup(fs, fs->fs_cssize);
 	if (fs->fs_contigsumsize > 0)
 		size += fs->fs_ncg * sizeof(int32_t);
@@ -490,8 +483,8 @@ restart:
 		    len, KERNCRED, &bp)) != 0) {
 			brelse(bp);
 			free(copy_fs->fs_csp, M_UFSMNT);
-			bawrite(sbp);
-			sbp = NULL;
+			free(copy_fs, M_UFSMNT);
+			copy_fs = NULL;
 			goto out1;
 		}
 		bcopy(bp->b_data, space, (u_int)len);
@@ -606,8 +599,8 @@ loop:
 		vdrop(xvp);
 		if (error) {
 			free(copy_fs->fs_csp, M_UFSMNT);
-			bawrite(sbp);
-			sbp = NULL;
+			free(copy_fs, M_UFSMNT);
+			copy_fs = NULL;
 			MNT_VNODE_FOREACH_ABORT(mp, mvp);
 			goto out1;
 		}
@@ -621,8 +614,8 @@ loop:
 		error = softdep_journal_lookup(mp, &xvp);
 		if (error) {
 			free(copy_fs->fs_csp, M_UFSMNT);
-			bawrite(sbp);
-			sbp = NULL;
+			free(copy_fs, M_UFSMNT);
+			copy_fs = NULL;
 			goto out1;
 		}
 		xp = VTOI(xvp);
@@ -688,8 +681,8 @@ loop:
 	VI_UNLOCK(devvp);
 	ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp");
 out1:
-	KASSERT((sn != NULL && sbp != NULL && error == 0) ||
-		(sn == NULL && sbp == NULL && error != 0),
+	KASSERT((sn != NULL && copy_fs != NULL && error == 0) ||
+		(sn == NULL && copy_fs == NULL && error != 0),
 		("email phk@ and mckusick@"));
 	/*
 	 * Resume operation on filesystem.
@@ -703,7 +696,7 @@ out1:
 		    vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec,
 		    endtime.tv_nsec / 1000000, redo, fs->fs_ncg);
 	}
-	if (sbp == NULL)
+	if (copy_fs == NULL)
 		goto out;
 	/*
 	 * Copy allocation information from all the snapshots in
@@ -793,6 +786,15 @@ out1:
 		space = (char *)space + fs->fs_bsize;
 		bawrite(nbp);
 	}
+	error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize,
+	    KERNCRED, &nbp);
+	if (error) {
+		brelse(nbp);
+	} else {
+		loc = blkoff(fs, fs->fs_sblockloc);
+		bcopy((char *)copy_fs, &nbp->b_data[loc], fs->fs_bsize);
+		bawrite(nbp);
+	}
 	/*
 	 * As this is the newest list, it is the most inclusive, so
 	 * should replace the previous list.
@@ -822,7 +824,8 @@ out1:
 		vrele(vp);		/* Drop extra reference */
 done:
 	free(copy_fs->fs_csp, M_UFSMNT);
-	bawrite(sbp);
+	free(copy_fs, M_UFSMNT);
+	copy_fs = NULL;
 out:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (saved_nice > 0) {
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org"
Comment 7 Kirk McKusick freebsd_committer freebsd_triage 2011-09-28 20:46:45 UTC
State Changed
From-To: open->closed

The bug has been resolved in head and and the fix MFC'ed to 9.