Summary: | [ufs] [hang] Snapshots cause a lockup on UFS with SU+J enabled | ||
---|---|---|---|
Product: | Base System | Reporter: | Hans Ottevanger <hans> |
Component: | kern | Assignee: | Kirk McKusick <mckusick> |
Status: | Closed FIXED | ||
Severity: | Affects Only Me | ||
Priority: | Normal | ||
Version: | 9.0-BETA2 | ||
Hardware: | Any | ||
OS: | Any |
Description
Hans Ottevanger
2011-09-11 16:50:07 UTC
Responsible Changed From-To: freebsd-bugs->freebsd-fs Over to maintainer(s). Responsible Changed From-To: freebsd-fs->mckusick I will take responsibility for dealing with this bug. Author: mckusick Date: Tue Sep 27 17:34:02 2011 New Revision: 225806 URL: http://svn.freebsd.org/changeset/base/225806 Log: This update eliminates the system hang reported in kern/160662 when taking a snapshot on a filesystem running with journaled soft updates. Reported by: Hans Ottevanger Fix verified by: Hans Ottevanger PR: kern/160662 Modified: head/sys/ufs/ffs/ffs_snapshot.c Modified: head/sys/ufs/ffs/ffs_snapshot.c ============================================================================== --- head/sys/ufs/ffs/ffs_snapshot.c Tue Sep 27 17:11:31 2011 (r225805) +++ head/sys/ufs/ffs/ffs_snapshot.c Tue Sep 27 17:34:02 2011 (r225806) @@ -203,7 +203,7 @@ ffs_snapshot(mp, snapfile) ufs2_daddr_t numblks, blkno, *blkp, *snapblklist; int error, cg, snaploc; int i, size, len, loc; - int flag; + uint64_t flag; struct timespec starttime = {0, 0}, endtime; char saved_nice = 0; long redo = 0, snaplistsize = 0; _______________________________________________ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org" Author: mckusick Date: Tue Sep 27 17:41:48 2011 New Revision: 225807 URL: http://svn.freebsd.org/changeset/base/225807 Log: This update eliminates a lock-order reversal warning discovered whle tracking down the system hang reported in kern/160662 and corrected in revision 225806. The LOR is not the cause of the system hang and indeed cannot cause an actual deadlock. However, it can be easily eliminated by defering the acquisition of a buflock until after all the vnode locks have been acquired. Reported by: Hans Ottevanger PR: kern/160662 Modified: head/sys/ufs/ffs/ffs_snapshot.c Modified: head/sys/ufs/ffs/ffs_snapshot.c ============================================================================== --- head/sys/ufs/ffs/ffs_snapshot.c Tue Sep 27 17:34:02 2011 (r225806) +++ head/sys/ufs/ffs/ffs_snapshot.c Tue Sep 27 17:41:48 2011 (r225807) @@ -212,7 +212,7 @@ ffs_snapshot(mp, snapfile) struct fs *copy_fs = NULL, *fs; struct thread *td = curthread; struct inode *ip, *xp; - struct buf *bp, *nbp, *ibp, *sbp = NULL; + struct buf *bp, *nbp, *ibp; struct nameidata nd; struct mount *wrtmp; struct vattr vat; @@ -460,21 +460,14 @@ restart: * Grab a copy of the superblock and its summary information. * We delay writing it until the suspension is released below. */ - error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, - KERNCRED, &sbp); - if (error) { - brelse(sbp); - sbp = NULL; - goto out1; - } - loc = blkoff(fs, fs->fs_sblockloc); - copy_fs = (struct fs *)(sbp->b_data + loc); + copy_fs = malloc((u_long)fs->fs_bsize, M_UFSMNT, M_WAITOK); bcopy(fs, copy_fs, fs->fs_sbsize); if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) copy_fs->fs_clean = 1; size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE; if (fs->fs_sbsize < size) - bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize); + bzero(&((char *)copy_fs)[fs->fs_sbsize], + size - fs->fs_sbsize); size = blkroundup(fs, fs->fs_cssize); if (fs->fs_contigsumsize > 0) size += fs->fs_ncg * sizeof(int32_t); @@ -490,8 +483,8 @@ restart: len, KERNCRED, &bp)) != 0) { brelse(bp); free(copy_fs->fs_csp, M_UFSMNT); - bawrite(sbp); - sbp = NULL; + free(copy_fs, M_UFSMNT); + copy_fs = NULL; goto out1; } bcopy(bp->b_data, space, (u_int)len); @@ -606,8 +599,8 @@ loop: vdrop(xvp); if (error) { free(copy_fs->fs_csp, M_UFSMNT); - bawrite(sbp); - sbp = NULL; + free(copy_fs, M_UFSMNT); + copy_fs = NULL; MNT_VNODE_FOREACH_ABORT(mp, mvp); goto out1; } @@ -621,8 +614,8 @@ loop: error = softdep_journal_lookup(mp, &xvp); if (error) { free(copy_fs->fs_csp, M_UFSMNT); - bawrite(sbp); - sbp = NULL; + free(copy_fs, M_UFSMNT); + copy_fs = NULL; goto out1; } xp = VTOI(xvp); @@ -688,8 +681,8 @@ loop: VI_UNLOCK(devvp); ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); out1: - KASSERT((sn != NULL && sbp != NULL && error == 0) || - (sn == NULL && sbp == NULL && error != 0), + KASSERT((sn != NULL && copy_fs != NULL && error == 0) || + (sn == NULL && copy_fs == NULL && error != 0), ("email phk@ and mckusick@")); /* * Resume operation on filesystem. @@ -703,7 +696,7 @@ out1: vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec, endtime.tv_nsec / 1000000, redo, fs->fs_ncg); } - if (sbp == NULL) + if (copy_fs == NULL) goto out; /* * Copy allocation information from all the snapshots in @@ -793,6 +786,15 @@ out1: space = (char *)space + fs->fs_bsize; bawrite(nbp); } + error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, + KERNCRED, &nbp); + if (error) { + brelse(nbp); + } else { + loc = blkoff(fs, fs->fs_sblockloc); + bcopy((char *)copy_fs, &nbp->b_data[loc], fs->fs_bsize); + bawrite(nbp); + } /* * As this is the newest list, it is the most inclusive, so * should replace the previous list. @@ -822,7 +824,8 @@ out1: vrele(vp); /* Drop extra reference */ done: free(copy_fs->fs_csp, M_UFSMNT); - bawrite(sbp); + free(copy_fs, M_UFSMNT); + copy_fs = NULL; out: NDFREE(&nd, NDF_ONLY_PNBUF); if (saved_nice > 0) { _______________________________________________ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org" Author: mckusick Date: Wed Sep 28 19:36:21 2011 New Revision: 225850 URL: http://svn.freebsd.org/changeset/base/225850 Log: MFC: r225806: This update eliminates the system hang reported in kern/160662 when taking a snapshot on a filesystem running with journaled soft updates. As journaled soft updates first appeared in 9.0, this will be the only MFC of this change. Approved by: re (kib) Reported by: Hans Ottevanger Fix verified by: Hans Ottevanger PR: kern/160662 Modified: stable/9/sys/ufs/ffs/ffs_snapshot.c Modified: stable/9/sys/ufs/ffs/ffs_snapshot.c ============================================================================== --- stable/9/sys/ufs/ffs/ffs_snapshot.c Wed Sep 28 19:01:15 2011 (r225849) +++ stable/9/sys/ufs/ffs/ffs_snapshot.c Wed Sep 28 19:36:21 2011 (r225850) @@ -203,7 +203,7 @@ ffs_snapshot(mp, snapfile) ufs2_daddr_t numblks, blkno, *blkp, *snapblklist; int error, cg, snaploc; int i, size, len, loc; - int flag; + uint64_t flag; struct timespec starttime = {0, 0}, endtime; char saved_nice = 0; long redo = 0, snaplistsize = 0; _______________________________________________ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org" Author: mckusick Date: Wed Sep 28 19:38:47 2011 New Revision: 225851 URL: http://svn.freebsd.org/changeset/base/225851 Log: MFC r225807: This update eliminates a lock-order reversal warning discovered whle tracking down the system hang reported in kern/160662 and corrected in revision 225806 (MFC'ed as 225850). The LOR is not the cause of the system hang and indeed cannot cause an actual deadlock. However, it can be easily eliminated by defering the acquisition of a buflock until after all the vnode locks have been acquired. As journaled soft updates first appeared in 9.0, this will be the only MFC of this change. Approved by: re (kib) Reported by: Hans Ottevanger PR: kern/160662 Modified: stable/9/sys/ufs/ffs/ffs_snapshot.c Modified: stable/9/sys/ufs/ffs/ffs_snapshot.c ============================================================================== --- stable/9/sys/ufs/ffs/ffs_snapshot.c Wed Sep 28 19:36:21 2011 (r225850) +++ stable/9/sys/ufs/ffs/ffs_snapshot.c Wed Sep 28 19:38:47 2011 (r225851) @@ -212,7 +212,7 @@ ffs_snapshot(mp, snapfile) struct fs *copy_fs = NULL, *fs; struct thread *td = curthread; struct inode *ip, *xp; - struct buf *bp, *nbp, *ibp, *sbp = NULL; + struct buf *bp, *nbp, *ibp; struct nameidata nd; struct mount *wrtmp; struct vattr vat; @@ -460,21 +460,14 @@ restart: * Grab a copy of the superblock and its summary information. * We delay writing it until the suspension is released below. */ - error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, - KERNCRED, &sbp); - if (error) { - brelse(sbp); - sbp = NULL; - goto out1; - } - loc = blkoff(fs, fs->fs_sblockloc); - copy_fs = (struct fs *)(sbp->b_data + loc); + copy_fs = malloc((u_long)fs->fs_bsize, M_UFSMNT, M_WAITOK); bcopy(fs, copy_fs, fs->fs_sbsize); if ((fs->fs_flags & (FS_UNCLEAN | FS_NEEDSFSCK)) == 0) copy_fs->fs_clean = 1; size = fs->fs_bsize < SBLOCKSIZE ? fs->fs_bsize : SBLOCKSIZE; if (fs->fs_sbsize < size) - bzero(&sbp->b_data[loc + fs->fs_sbsize], size - fs->fs_sbsize); + bzero(&((char *)copy_fs)[fs->fs_sbsize], + size - fs->fs_sbsize); size = blkroundup(fs, fs->fs_cssize); if (fs->fs_contigsumsize > 0) size += fs->fs_ncg * sizeof(int32_t); @@ -490,8 +483,8 @@ restart: len, KERNCRED, &bp)) != 0) { brelse(bp); free(copy_fs->fs_csp, M_UFSMNT); - bawrite(sbp); - sbp = NULL; + free(copy_fs, M_UFSMNT); + copy_fs = NULL; goto out1; } bcopy(bp->b_data, space, (u_int)len); @@ -606,8 +599,8 @@ loop: vdrop(xvp); if (error) { free(copy_fs->fs_csp, M_UFSMNT); - bawrite(sbp); - sbp = NULL; + free(copy_fs, M_UFSMNT); + copy_fs = NULL; MNT_VNODE_FOREACH_ABORT(mp, mvp); goto out1; } @@ -621,8 +614,8 @@ loop: error = softdep_journal_lookup(mp, &xvp); if (error) { free(copy_fs->fs_csp, M_UFSMNT); - bawrite(sbp); - sbp = NULL; + free(copy_fs, M_UFSMNT); + copy_fs = NULL; goto out1; } xp = VTOI(xvp); @@ -688,8 +681,8 @@ loop: VI_UNLOCK(devvp); ASSERT_VOP_LOCKED(vp, "ffs_snapshot vp"); out1: - KASSERT((sn != NULL && sbp != NULL && error == 0) || - (sn == NULL && sbp == NULL && error != 0), + KASSERT((sn != NULL && copy_fs != NULL && error == 0) || + (sn == NULL && copy_fs == NULL && error != 0), ("email phk@ and mckusick@")); /* * Resume operation on filesystem. @@ -703,7 +696,7 @@ out1: vp->v_mount->mnt_stat.f_mntonname, (long)endtime.tv_sec, endtime.tv_nsec / 1000000, redo, fs->fs_ncg); } - if (sbp == NULL) + if (copy_fs == NULL) goto out; /* * Copy allocation information from all the snapshots in @@ -793,6 +786,15 @@ out1: space = (char *)space + fs->fs_bsize; bawrite(nbp); } + error = bread(vp, lblkno(fs, fs->fs_sblockloc), fs->fs_bsize, + KERNCRED, &nbp); + if (error) { + brelse(nbp); + } else { + loc = blkoff(fs, fs->fs_sblockloc); + bcopy((char *)copy_fs, &nbp->b_data[loc], fs->fs_bsize); + bawrite(nbp); + } /* * As this is the newest list, it is the most inclusive, so * should replace the previous list. @@ -822,7 +824,8 @@ out1: vrele(vp); /* Drop extra reference */ done: free(copy_fs->fs_csp, M_UFSMNT); - bawrite(sbp); + free(copy_fs, M_UFSMNT); + copy_fs = NULL; out: NDFREE(&nd, NDF_ONLY_PNBUF); if (saved_nice > 0) { _______________________________________________ svn-src-all@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscribe@freebsd.org" State Changed From-To: open->closed The bug has been resolved in head and and the fix MFC'ed to 9. |