Attempting to remount a filesystem fails to flush its outstanding buffers, and spews out errors like softdep_waitidle: Failed to flush worklist for 0xc3b60d0c Once this occurs the filesystem can't be re-mounted, can't be umount'd, and can't be umount -f'd. Any attempt to do so will elicit a repeat of the softdep_waitidle message. Reboot won't cleanly unmount it either. Post-reboot fscking turns up a lot of errors. Using umount instead of mount -u doesn't provoke it (or at least, does so MUCH less often; mount -u does it reliably). If filesystem is made without softupdates, it also doesn't occur. This has been going on for at least a year in -CURRENT, and so it's probably in RELENG_7 too. See previous reports like <http://lists.freebsd.org/pipermail/freebsd-current/2007-February/069178.html> How-To-Repeat: NOTE: Running this script will produce a mounted filesystem on /mnt that can't be gotten rid of without rebooting. No additional instability has ever been observed from this, but be warned. #!/bin/sh -ex # Make file rm -f /tmp/fs dd if=/dev/zero of=/tmp/fs bs=1m count=20 # Make vnode mdconfig -a -t vnode -f /tmp/fs -u 10 # Make the filesystem newfs -U /dev/md10 # Mount mount /dev/md10 /mnt # Make the big tree mtree -deU -f /etc/mtree/BSD.usr.dist -p /mnt/ >> /dev/null # Sync up sync ; sync ; sync # Now blow it away rm -rf /mnt/* # This works... #umount /mnt # But this doesn't mount -u -o ro /mnt # BOOM.
Responsible Changed From-To: freebsd-bugs->kib I have some intermediate patches, and intent to continue the work on it. It owuld be good to see whether the existing changes help in this case.
The patch below got some review by Tor Egge, who would prefer different solution. Meantime, you can try this. [From my softdep_insmntque git branch] diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index a65344b..d63bb75 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -1019,9 +1019,15 @@ insmntque1(struct vnode *vp, struct mount *mp, KASSERT(vp->v_mount == NULL, ("insmntque: vnode already on per mount vnode list")); VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)")); +#ifdef DEBUG_VFS_LOCKS + if (!VFS_NEEDSGIANT(mp)) + ASSERT_VOP_ELOCKED(vp, + "insmntque: mp-safe fs and non-locked vp"); +#endif MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 && - mp->mnt_nvnodelistsize == 0) { + mp->mnt_nvnodelistsize == 0 && + VOP_ISLOCKED(vp, curthread) && !(vp->v_vflag & VV_FORCEINSMQ)) { MNT_IUNLOCK(mp); if (dtr != NULL) dtr(vp, dtr_arg); @@ -3133,9 +3139,13 @@ vfs_allocate_syncvnode(struct mount *mp) return (error); } vp->v_type = VNON; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) panic("vfs_allocate_syncvnode: insmntque failed"); + vp->v_vflag &= ~VV_FORCEINSMQ; + VOP_UNLOCK(vp, 0); /* * Place the vnode onto the syncer worklist. We attempt to * scatter them about on the list so that they will go off diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 1bbe8b8..bc272f8 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -255,6 +255,7 @@ struct xvnode { #define VV_NOKNOTE 0x0200 /* don't activate knotes on this vnode */ #define VV_DELETED 0x0400 /* should be removed */ #define VV_MD 0x0800 /* vnode backs the md device */ +#define VV_FORCEINSMQ 0x1000 /* force the insmntque to succeed */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index a496542..3c8d323 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -87,6 +87,9 @@ int ffs_valloc(struct vnode *, int, struct ucred *, struct vnode **); int ffs_vfree(struct vnode *, ino_t, int); vfs_vget_t ffs_vget; +int ffs_vgetf(struct mount *, ino_t, int, struct vnode **, int); + +#define FFSV_FORCEINSMQ 0x0001 extern struct vop_vector ffs_vnodeops1; extern struct vop_vector ffs_fifoops1; diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 64dcd0a..c82b9d6 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -919,8 +919,8 @@ process_worklist_item(mp, flags) wk->wk_state |= INPROGRESS; ump->softdep_on_worklist_inprogress++; FREE_LOCK(&lk); - ffs_vget(mp, WK_DIRREM(wk)->dm_oldinum, - LK_NOWAIT | LK_EXCLUSIVE, &vp); + ffs_vgetf(mp, WK_DIRREM(wk)->dm_oldinum, + LK_NOWAIT | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ); ACQUIRE_LOCK(&lk); wk->wk_state &= ~INPROGRESS; ump->softdep_on_worklist_inprogress--; @@ -1074,8 +1074,11 @@ softdep_flushfiles(oldmnt, flags, td) int flags; struct thread *td; { - int error, count, loopcnt; + int error, count, loopcnt, retry_flush_count, retry; + loopcnt = 10; + retry_flush_count = 3; +retry_flush: error = 0; /* @@ -1084,7 +1087,7 @@ softdep_flushfiles(oldmnt, flags, td) * creates. In theory, this loop can happen at most twice, * but we give it a few extra just to be sure. */ - for (loopcnt = 10; loopcnt > 0; loopcnt--) { + for (; loopcnt > 0; loopcnt--) { /* * Do another flush in case any vnodes were brought in * as part of the cleanup operations. @@ -1107,6 +1110,24 @@ softdep_flushfiles(oldmnt, flags, td) } if (!error) error = softdep_waitidle(oldmnt); + if (!error) { + if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT) { + retry = 0; + MNT_ILOCK(oldmnt); + KASSERT((oldmnt->mnt_kern_flag & MNTK_NOINSMNTQ) != 0, + ("softdep_flushfiles: !MNTK_NOINSMNTQ")); + if (oldmnt->mnt_nvnodelistsize > 0) { + if (--retry_flush_count > 0) { + retry = 1; + loopcnt = 3; + } else + error = EBUSY; + } + MNT_IUNLOCK(oldmnt); + if (retry) + goto retry_flush; + } + } return (error); } @@ -2770,8 +2791,9 @@ handle_workitem_freeblocks(freeblks, flags) */ if (freeblks->fb_chkcnt != blocksreleased && (fs->fs_flags & FS_UNCLEAN) != 0 && - ffs_vget(freeblks->fb_list.wk_mp, freeblks->fb_previousinum, - (flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp) == 0) { + ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_previousinum, + (flags & LK_NOWAIT) | LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ) + == 0) { ip = VTOI(vp); DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + \ freeblks->fb_chkcnt - blocksreleased); @@ -3558,8 +3580,8 @@ handle_workitem_remove(dirrem, xp) int error; if ((vp = xp) == NULL && - (error = ffs_vget(dirrem->dm_list.wk_mp, - dirrem->dm_oldinum, LK_EXCLUSIVE, &vp)) != 0) { + (error = ffs_vgetf(dirrem->dm_list.wk_mp, + dirrem->dm_oldinum, LK_EXCLUSIVE, &vp, FFSV_FORCEINSMQ)) != 0) { softdep_error("handle_workitem_remove: vget", error); return; } @@ -5074,9 +5096,11 @@ softdep_fsync(vp) * for details on possible races. */ FREE_LOCK(&lk); - if (ffs_vget(mp, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp)) { + if (ffs_vgetf(mp, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp, + FFSV_FORCEINSMQ)) { VOP_UNLOCK(vp, 0); - error = ffs_vget(mp, parentino, LK_EXCLUSIVE, &pvp); + error = ffs_vgetf(mp, parentino, LK_EXCLUSIVE, + &pvp, FFSV_FORCEINSMQ); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (error != 0) return (error); @@ -5576,7 +5600,8 @@ flush_pagedep_deps(pvp, mp, diraddhdp) inum = dap->da_newinum; if (dap->da_state & MKDIR_BODY) { FREE_LOCK(&lk); - if ((error = ffs_vget(mp, inum, LK_EXCLUSIVE, &vp))) + if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp, + FFSV_FORCEINSMQ))) break; if ((error=ffs_syncvnode(vp, MNT_NOWAIT)) || (error=ffs_syncvnode(vp, MNT_NOWAIT))) { @@ -5911,7 +5936,8 @@ clear_remove(td) if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) continue; FREE_LOCK(&lk); - if ((error = ffs_vget(mp, ino, LK_EXCLUSIVE, &vp))) { + if ((error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp, + FFSV_FORCEINSMQ))) { softdep_error("clear_remove: vget", error); vn_finished_write(mp); ACQUIRE_LOCK(&lk); @@ -5982,7 +6008,8 @@ clear_inodedeps(td) if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) continue; FREE_LOCK(&lk); - if ((error = ffs_vget(mp, ino, LK_EXCLUSIVE, &vp)) != 0) { + if ((error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp, + FFSV_FORCEINSMQ)) != 0) { softdep_error("clear_inodedeps: vget", error); vn_finished_write(mp); ACQUIRE_LOCK(&lk); diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 09249ac..7ef86d1 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -1308,6 +1308,17 @@ ffs_vget(mp, ino, flags, vpp) int flags; struct vnode **vpp; { + return (ffs_vgetf(mp, ino, flags, vpp, 0)); +} + +int +ffs_vgetf(mp, ino, flags, vpp, ffs_flags) + struct mount *mp; + ino_t ino; + int flags; + struct vnode **vpp; + int ffs_flags; +{ struct fs *fs; struct inode *ip; struct ufsmount *ump; @@ -1382,12 +1393,15 @@ ffs_vget(mp, ino, flags, vpp) td = curthread; lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); + if (ffs_flags & FFSV_FORCEINSMQ) + vp->v_vflag |= VV_FORCEINSMQ; error = insmntque(vp, mp); if (error != 0) { uma_zfree(uma_inode, ip); *vpp = NULL; return (error); } + vp->v_vflag &= ~VV_FORCEINSMQ; error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); if (error || *vpp != NULL) return (error);
State Changed From-To: open->feedback Supposed fixes where in CURRENT for some time, and I MFCed them to RELENG_7 approximately a week ago. Please confirm that the problem is fixed.
On Mon, Feb 23, 2009 at 11:14:45AM +0000 I heard the voice of kib@FreeBSD.org, and lo! it spake thus: > > Supposed fixes where in CURRENT for some time, and I MFCed them to > RELENG_7 approximately a week ago. > > Please confirm that the problem is fixed. I've just tested on a Feb 18 RELENG_7 and a Mar 14 -CURRENT, and can't make it happen anymore in a few rounds of testing (previously it was pretty much guaranteed in a single try). Looks fixed, thanks! :) -- Matthew Fuller (MF4839) | fullermd@over-yonder.net Systems/Network Administrator | http://www.over-yonder.net/~fullermd/ On the Internet, nobody can hear you scream.
State Changed From-To: feedback->closed Closed after the submitter confirmation.