| Summary: | stacking mount_null causes an error: mount_null: Resource deadlock avoided | ||
|---|---|---|---|
| Product: | Base System | Reporter: | akr <akr> |
| Component: | kern | Assignee: | freebsd-bugs (Nobody) <bugs> |
| Status: | Closed FIXED | ||
| Severity: | Affects Only Me | ||
| Priority: | Normal | ||
| Version: | 4.1-RELEASE | ||
| Hardware: | Any | ||
| OS: | Any | ||
|
Description
akr
2000-08-16 04:50:00 UTC
On Wed, 16 Aug 2000 12:45:21 +0900, akr@m17n.org wrote: > If null file system is stacked by mount_null, following error is caused. > > mount_null: Resource deadlock avoided > > Then, umount cannot unmount the file system with same error and some > commands hangs forever. NULLFS is known to be broken. You might want to try out the following patch (unfortunately I forget who sent it, but I'll have a look) and report back. Ciao, Sheldon. Common subdirectories: ./nullfs.orig/CVS and ./nullfs/CVS diff -c ./nullfs.orig/null_vfsops.c ./nullfs/null_vfsops.c *** ./nullfs.orig/null_vfsops.c Fri Aug 4 16:38:49 2000 --- ./nullfs/null_vfsops.c Sun Aug 6 19:58:23 2000 *************** *** 57,66 **** static MALLOC_DEFINE(M_NULLFSMNT, "NULLFS mount", "NULLFS mount structure"); - static int nullfs_fhtovp __P((struct mount *mp, struct fid *fidp, - struct vnode **vpp)); static int nullfs_checkexp __P((struct mount *mp, struct sockaddr *nam, int *extflagsp, struct ucred **credanonp)); static int nullfs_mount __P((struct mount *mp, char *path, caddr_t data, struct nameidata *ndp, struct proc *p)); static int nullfs_quotactl __P((struct mount *mp, int cmd, uid_t uid, --- 57,69 ---- static MALLOC_DEFINE(M_NULLFSMNT, "NULLFS mount", "NULLFS mount structure"); static int nullfs_checkexp __P((struct mount *mp, struct sockaddr *nam, int *extflagsp, struct ucred **credanonp)); + static int nullfs_extattrctl __P((struct mount *mp, int cmd, + const char *attrname, caddr_t arg, + struct proc *p)); + static int nullfs_fhtovp __P((struct mount *mp, struct fid *fidp, + struct vnode **vpp)); static int nullfs_mount __P((struct mount *mp, char *path, caddr_t data, struct nameidata *ndp, struct proc *p)); static int nullfs_quotactl __P((struct mount *mp, int cmd, uid_t uid, *************** *** 372,378 **** struct proc *p; { /* ! * XXX - Assumes no data cached at null layer. */ return (0); } --- 375,383 ---- struct proc *p; { /* ! * XXX There are no buffers to flush, as we don't ! * have VOP_BMAP. But there are pages. Don't ! * know how to flush pages. */ return (0); } *************** *** 383,390 **** ino_t ino; struct vnode **vpp; { ! return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp); } static int --- 388,399 ---- ino_t ino; struct vnode **vpp; { + int error; + error = VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp); + if (error) + return (error); ! return (null_node_create(mp, *vpp, vpp)); } static int *************** *** 393,400 **** struct fid *fidp; struct vnode **vpp; { ! return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp); } static int --- 402,413 ---- struct fid *fidp; struct vnode **vpp; { + int error; + error = VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, vpp); + if (error) + return (error); ! return (null_node_create(mp, *vpp, vpp)); } static int diff -c ./nullfs.orig/null_vnops.c ./nullfs/null_vnops.c *** ./nullfs.orig/null_vnops.c Fri Aug 4 16:38:49 2000 --- ./nullfs/null_vnops.c Sun Aug 6 20:05:18 2000 *************** *** 37,43 **** * * Ancestors: * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 - * $FreeBSD: src/sys/miscfs/nullfs/null_vnops.c,v 1.38 1999/12/11 16:12:56 eivind Exp $ * ...and... * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project * --- 37,42 ---- *************** *** 183,188 **** --- 182,193 ---- #include <sys/namei.h> #include <sys/malloc.h> #include <sys/buf.h> + + #include <vm/vm.h> + #include <vm/vm_extern.h> + #include <vm/vm_object.h> + #include <vm/vnode_pager.h> + #include <miscfs/nullfs/null.h> static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ *************** *** 191,203 **** --- 196,341 ---- static int null_access __P((struct vop_access_args *ap)); static int null_getattr __P((struct vop_getattr_args *ap)); + static int null_getpages __P((struct vop_getpages_args *ap)); static int null_inactive __P((struct vop_inactive_args *ap)); static int null_lock __P((struct vop_lock_args *ap)); static int null_lookup __P((struct vop_lookup_args *ap)); + static int null_open __P((struct vop_open_args *)); static int null_print __P((struct vop_print_args *ap)); + static int null_putpages __P((struct vop_putpages_args *)); static int null_reclaim __P((struct vop_reclaim_args *ap)); + static int null_rename __P((struct vop_rename_args *)); static int null_setattr __P((struct vop_setattr_args *ap)); + static int null_strategy __P((struct vop_strategy_args *ap)); static int null_unlock __P((struct vop_unlock_args *ap)); + static int null_write __P((struct vop_write_args *ap)); + + /* + * We handle this to eliminate null FS to lower FS + * file moving. Don't know why we don't allow this, + * possibly we should. + */ + static int + null_rename(ap) + struct vop_rename_args /* { + struct vnode *a_fdvp; + struct vnode *a_fvp; + struct componentname *a_fcnp; + struct vnode *a_tdvp; + struct vnode *a_tvp; + struct componentname *a_tcnp; + } */ *ap; + { + struct vnode *tdvp = ap->a_tdvp; + struct vnode *fvp = ap->a_fvp; + struct vnode *fdvp = ap->a_fdvp; + struct vnode *tvp = ap->a_tvp; + + /* Check for cross-device rename. */ + if ((fvp->v_mount != tdvp->v_mount) || + (tvp && (fvp->v_mount != tvp->v_mount))) { + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + vrele(fdvp); + vrele(fvp); + return (EXDEV); + } + + return (null_bypass((struct vop_generic_args *)ap)); + } + + /* + * We handle this to create vm_object for lowervp as + * vn_open does for us. + */ + int + null_open(ap) + struct vop_open_args *ap; + { + int error; + + /* Process operation */ + error = null_bypass((struct vop_generic_args *)ap); + if (error) + return (error); + + /* Create object, vfs_object_create() check itself if vnode + * already have one or if it can't have one */ + vfs_object_create (NULLVPTOLOWERVP(ap->a_vp), ap->a_p, ap->a_cred); + + return (error); + } + + /* + * Panic VOP_STRATEGY operation. Shouldn't happen, as we + * don't have VOP_BMAP. + */ + static int + null_strategy(ap) + struct vop_strategy_args *ap; + { + panic("null_strategy: NOT IMPLEMENTED!"); + + return (EOPNOTSUPP); + } + + /* + * We need to look for file size changes, to keep + * vnode_pager up with correct file size. + */ + static int + null_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; + { + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + off_t newsize; + off_t oldsize; + int error; + + /* Process write operation */ + error = null_bypass((struct vop_generic_args *)ap); + + /* Update vm_object size, if exist and file size changed */ + if (vp->v_object) { + oldsize = vp->v_object->un_pager.vnp.vnp_size; + newsize = uio->uio_offset; + if (newsize > oldsize) + vnode_pager_setsize(vp, newsize); + } + + return (error); + } + + /* + * We use generic (put|get)pages routines and no VOP_BMAP operation, + * this force vnode_pager to use VOP_READ and VOP_WRITE + * to access pages. + */ + int + null_getpages(ap) + struct vop_getpages_args *ap; + { + return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_reqpage); + } + + int + null_putpages(ap) + struct vop_putpages_args *ap; + { + return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_sync, ap->a_rtvals); + } /* * This is the 10-Apr-92 bypass routine. *************** *** 389,394 **** --- 527,534 ---- /* * Setattr call. Disallow write attempts if the layer is mounted read-only. + * Also look for file size change, and do appropriate vm_object + * modification. */ int null_setattr(ap) *************** *** 402,407 **** --- 542,548 ---- { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; + int error; if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || *************** *** 430,436 **** return (EROFS); } } ! return (null_bypass((struct vop_generic_args *)ap)); } /* --- 571,590 ---- return (EROFS); } } ! ! error = null_bypass((struct vop_generic_args *)ap); ! if (error || vp->v_object == NULL || vap->va_size == VNOVAL) ! return (error); ! ! /* If they have changed file size, tell to vnode_pager. ! * XXX If they have shrink the file, do we need to ! * invalidate buffers with vtruncbuf()? I think ! * no. ! */ ! if (vap->va_size != vp->v_object->un_pager.vnp.vnp_size) ! vnode_pager_setsize(vp, vap->va_size); ! ! return (error); } /* *************** *** 449,457 **** --- 603,618 ---- if ((error = null_bypass((struct vop_generic_args *)ap)) != 0) return (error); + + /* Fake fsid, also done in vn_stat(), but... */ + ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; + return (0); } + /* + * Handle to disallow write access if mounted read-only. + */ static int null_access(ap) struct vop_access_args /* { *************** *** 519,528 **** --- 680,700 ---- } */ *ap; { vop_nounlock(ap); + + /* Support unlocking of inactive vnodes */ + if (NULLVPTOLOWERVP(ap->a_vp) == NULLVP) + return (0); + ap->a_flags &= ~LK_INTERLOCK; return (null_bypass((struct vop_generic_args *)ap)); } + /* + * We have to vrele lowervp as soon as possible. Otherway + * we can never discover that someone has removed file + * on lower FS. This technique removes advantages of hash, + * but i don't know other way. + */ static int null_inactive(ap) struct vop_inactive_args /* { *************** *** 533,557 **** struct vnode *vp = ap->a_vp; struct null_node *xp = VTONULL(vp); struct vnode *lowervp = xp->null_lowervp; ! /* ! * Do nothing (and _don't_ bypass). ! * Wait to vrele lowervp until reclaim, ! * so that until then our null_node is in the ! * cache and reusable. ! * We still have to tell the lower layer the vnode ! * is now inactive though. ! * ! * NEEDSWORK: Someday, consider inactive'ing ! * the lowervp and then trying to reactivate it ! * with capabilities (v_id) ! * like they do in the name lookup cache code. ! * That's too much work for now. ! */ ! VOP_INACTIVE(lowervp, ap->a_p); VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } static int null_reclaim(ap) struct vop_reclaim_args /* { --- 705,729 ---- struct vnode *vp = ap->a_vp; struct null_node *xp = VTONULL(vp); struct vnode *lowervp = xp->null_lowervp; ! ! /* Release lowervp totaly */ ! VOP_UNLOCK(lowervp, 0, ap->a_p); ! vrele (lowervp); ! ! /* Forget all we knew, remove hash entry */ ! xp->null_lowervp = NULLVP; ! LIST_REMOVE(xp, null_hash); ! ! /* Unlock our vnode */ VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + return (0); } + /* + * We can free memory in null_inactive, but we do this + * here. (Possible to guard vp->v_data to point somewhere) + */ static int null_reclaim(ap) struct vop_reclaim_args /* { *************** *** 560,578 **** } */ *ap; { struct vnode *vp = ap->a_vp; - struct null_node *xp = VTONULL(vp); - struct vnode *lowervp = xp->null_lowervp; - /* - * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p, - * so we can't call VOPs on ourself. - */ - /* After this assignment, this node will not be re-used. */ - xp->null_lowervp = NULLVP; - LIST_REMOVE(xp, null_hash); FREE(vp->v_data, M_TEMP); vp->v_data = NULL; ! vrele (lowervp); return (0); } --- 732,741 ---- } */ *ap; { struct vnode *vp = ap->a_vp; FREE(vp->v_data, M_TEMP); vp->v_data = NULL; ! return (0); } *************** *** 593,607 **** --- 756,778 ---- vop_t **null_vnodeop_p; static struct vnodeopv_entry_desc null_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) null_bypass }, + { &vop_access_desc, (vop_t *) null_access }, + { &vop_bmap_desc, (vop_t *) vop_eopnotsupp }, { &vop_getattr_desc, (vop_t *) null_getattr }, + { &vop_getpages_desc, (vop_t *) null_getpages }, { &vop_inactive_desc, (vop_t *) null_inactive }, { &vop_lock_desc, (vop_t *) null_lock }, { &vop_lookup_desc, (vop_t *) null_lookup }, + { &vop_open_desc, (vop_t *) null_open }, { &vop_print_desc, (vop_t *) null_print }, + { &vop_putpages_desc, (vop_t *) null_putpages }, { &vop_reclaim_desc, (vop_t *) null_reclaim }, + { &vop_rename_desc, (vop_t *) null_rename }, { &vop_setattr_desc, (vop_t *) null_setattr }, + { &vop_strategy_desc, (vop_t *) null_strategy }, { &vop_unlock_desc, (vop_t *) null_unlock }, + { &vop_write_desc, (vop_t *) null_write }, { NULL, NULL } }; static struct vnodeopv_desc null_vnodeop_opv_desc = State Changed From-To: open->feedback I've sent the originator a patch, the origin of which I'm still investigating. In article <14634.966412918@axl.ops.uunet.co.za>, Sheldon Hearn <sheldonh@uunet.co.za> writes: > NULLFS is known to be broken. Sorry. I overlooked the note in mount_null(8). > You might want to try out the following > patch (unfortunately I forget who sent it, but I'll have a look) and > report back. Unfortunately the patch doesn't fix the problem. -- Tanaka Akira On 16 Aug 2000 19:56:20 +0900, Tanaka Akira wrote: > > You might want to try out the following > > patch (unfortunately I forget who sent it, but I'll have a look) and > > report back. > > Unfortunately the patch doesn't fix the problem. I've asked Ustimenko Semen <semen@iclub.nsu.ru>, the author of the patch, to take a look at your PR. If you are not yet familiar with kernel debugging and would like to help Ustimenko investigate this problem, now would be a good time to have a look at http://www.freebsd.org/handbook/kerneldebug.html . Ciao, Sheldon. > > Unfortunately the patch doesn't fix the problem.
>
> I've asked Ustimenko Semen <semen@iclub.nsu.ru>, the author of the
> patch, to take a look at your PR.
Here is yet another nullfs patch for -current. It lacks some
of the features in Ustimenko's patch (e.g. support for nfs exporting
nfs mounted file systems), but it avoids the aliasing problem.
Index: sbin/mount_null/mount_null.c
===================================================================
RCS file: /home/ncvs/src/sbin/mount_null/mount_null.c,v
retrieving revision 1.14
diff -u -r1.14 mount_null.c
--- sbin/mount_null/mount_null.c 2000/07/28 11:54:08 1.14
+++ sbin/mount_null/mount_null.c 2000/08/06 19:16:28
@@ -101,9 +101,11 @@
(void)checkpath(argv[0], target);
(void)checkpath(argv[1], source);
+#if 0
if (subdir(target, source) || subdir(source, target))
errx(EX_USAGE, "%s (%s) and %s are not distinct paths",
argv[0], target, argv[1]);
+#endif
args.target = target;
Index: sys/kern/vfs_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/vfs_subr.c,v
retrieving revision 1.267
diff -u -r1.267 vfs_subr.c
--- sys/kern/vfs_subr.c 2000/07/24 05:28:29 1.267
+++ sys/kern/vfs_subr.c 2000/08/06 19:16:46
@@ -711,7 +711,7 @@
*/
simple_lock(&vp->v_interlock);
object = vp->v_object;
- if (object != NULL) {
+ if (object != NULL && (void *) vp == object->handle) {
vm_object_page_remove(object, 0, 0,
(flags & V_SAVE) ? TRUE : FALSE);
}
@@ -841,6 +841,8 @@
int s;
KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
+ if (vp->v_tag == VT_NULL)
+ panic("bgetvp: null node");
vhold(vp);
bp->b_vp = vp;
@@ -1190,6 +1192,8 @@
}
vn_syncer_add_to_worklist(newvp, delay);
}
+ if (newvp->v_tag == VT_NULL)
+ panic("reassignbuf: null node dirty list");
bp->b_xflags |= BX_VNDIRTY;
tbp = TAILQ_FIRST(listheadp);
if (tbp == NULL ||
@@ -1243,6 +1247,8 @@
TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
}
} else {
+ if (newvp->v_tag == VT_NULL)
+ panic("reassignbuf: null node clean list");
bp->b_xflags |= BX_VNCLEAN;
TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs);
if ((newvp->v_flag & VONWORKLST) &&
@@ -1683,7 +1689,7 @@
vinvalbuf(vp, 0, NOCRED, p, 0, 0);
}
- if ((obj = vp->v_object) != NULL) {
+ if ((obj = vp->v_object) != NULL && (void *) vp == obj->handle) {
if (obj->ref_count == 0) {
/*
* vclean() may be called twice. The first time
@@ -2529,7 +2535,8 @@
simple_lock(&vp->v_interlock);
if (vp->v_object &&
- (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
+ (void *) vp == vp->v_object->handle &&
+ (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
if (!vget(vp,
LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
if (vp->v_object) {
@@ -2613,6 +2620,21 @@
s = splbio();
simple_lock(&vnode_free_list_slock);
+ if ((vp->v_flag & VFREE) != 0)
+ panic("vfree called on doomed node");
+ if ((vp->v_flag & VFREE) != 0)
+ panic("vfree called on free node");
+ if (vp->v_holdcnt != 0)
+ panic("vfree called with holdcnt=%d (>0)", vp->v_holdcnt);
+ if (vp->v_usecount != 0)
+ panic("vfree called with usecnt=%d (>0)", vp->v_usecount);
+ if (vp->v_object != NULL &&
+ vp->v_object->handle == (void *) vp &&
+ (vp->v_object->resident_page_count != 0 ||
+ vp->v_object->ref_count != 0))
+ panic("vfree called with object RPC: %d, RC: %d",
+ vp->v_object->resident_page_count,
+ vp->v_object->ref_count);
KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free"));
if (vp->v_flag & VAGE) {
TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
Index: sys/miscfs/nullfs/null.h
===================================================================
RCS file: /home/ncvs/src/sys/miscfs/nullfs/null.h,v
retrieving revision 1.13
diff -u -r1.13 null.h
--- sys/miscfs/nullfs/null.h 2000/05/26 02:04:58 1.13
+++ sys/miscfs/nullfs/null.h 2000/08/06 19:16:47
@@ -55,7 +55,15 @@
LIST_ENTRY(null_node) null_hash; /* Hash list */
struct vnode *null_lowervp; /* VREFed once */
struct vnode *null_vnode; /* Back pointer */
+ int flags; /* flags (see below) */
+ pid_t lockholder; /* owner of exclusive lock */
};
+
+/* Null node flags */
+
+#define NN_LOCK 0x01
+#define NN_WANT 0x02
+#define NN_FSYNC_SKIPPED 0x04
extern int nullfs_init __P((struct vfsconf *vfsp));
extern int null_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
Index: sys/miscfs/nullfs/null_subr.c
===================================================================
RCS file: /home/ncvs/src/sys/miscfs/nullfs/null_subr.c,v
retrieving revision 1.23
diff -u -r1.23 null_subr.c
--- sys/miscfs/nullfs/null_subr.c 2000/05/26 02:04:59 1.23
+++ sys/miscfs/nullfs/null_subr.c 2000/08/06 19:16:47
@@ -45,6 +45,8 @@
#include <sys/mount.h>
#include <sys/malloc.h>
#include <miscfs/nullfs/null.h>
+#include <vm/vm.h>
+#include <vm/vm_object.h>
#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */
#define NNULLNODECACHE 16
@@ -94,6 +96,7 @@
struct null_node_hashhead *hd;
struct null_node *a;
struct vnode *vp;
+ int retry;
/*
* Find hash base, and then search the (two-way) linked
@@ -102,6 +105,7 @@
* reference count (but NOT the lower vnode's VREF counter).
*/
hd = NULL_NHASH(lowervp);
+ retry = 0;
loop:
for (a = hd->lh_first; a != 0; a = a->null_hash.le_next) {
if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
@@ -112,6 +116,9 @@
* the lower node.
*/
if (vget(vp, 0, p)) {
+ retry++;
+ if (retry < 2)
+ goto loop;
printf ("null_node_find: vget failed.\n");
goto loop;
};
@@ -138,6 +145,7 @@
struct null_node *xp;
struct vnode *othervp, *vp;
int error;
+ int waslocked;
/*
* Do the MALLOC before the getnewvnode since doing so afterward
@@ -157,6 +165,8 @@
xp->null_vnode = vp;
vp->v_data = xp;
xp->null_lowervp = lowervp;
+ xp->flags = 0;
+ xp->lockholder = 0;
/*
* Before we insert our new node onto the hash chains,
* check to see if someone else has beaten us to it.
@@ -167,12 +177,41 @@
FREE(xp, M_TEMP);
vp->v_type = VBAD; /* node is discarded */
vp->v_usecount = 0; /* XXX */
+ if (VSHOULDFREE(vp))
+ vfree(vp);
*vpp = othervp;
return 0;
};
VREF(lowervp); /* Extra VREF will be vrele'd in null_node_create */
hd = NULL_NHASH(lowervp);
LIST_INSERT_HEAD(hd, xp, null_hash);
+ /*
+ * Create the lower VM object, if needed
+ */
+ if ((lowervp->v_type == VREG) &&
+ ((lowervp->v_object == NULL) ||
+ (lowervp->v_object->flags & OBJ_DEAD))) {
+ waslocked = VOP_ISLOCKED(lowervp, NULL); /* XXX: Wrong */
+ if (!waslocked) {
+ simple_lock(&lowervp->v_interlock);
+ vn_lock(lowervp,
+ LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY,
+ curproc);
+ }
+ vfs_object_create(lowervp, curproc, curproc->p_ucred);
+ if (!waslocked) {
+ simple_lock(&lowervp->v_interlock);
+ VOP_UNLOCK(lowervp, LK_INTERLOCK, curproc);
+ }
+
+ }
+ vp->v_object = lowervp->v_object; /* XXX: share object */
+ if (vp->v_object != NULL) {
+ if ((lowervp->v_flag & VOBJBUF) != 0)
+ vp->v_flag |= VOBJBUF;
+ if ((lowervp->v_flag & VTEXT) != 0)
+ vp->v_flag |= VTEXT;
+ }
return 0;
}
Index: sys/miscfs/nullfs/null_vfsops.c
===================================================================
RCS file: /home/ncvs/src/sys/miscfs/nullfs/null_vfsops.c,v
retrieving revision 1.37
diff -u -r1.37 null_vfsops.c
--- sys/miscfs/nullfs/null_vfsops.c 2000/07/28 11:54:09 1.37
+++ sys/miscfs/nullfs/null_vfsops.c 2000/08/06 19:16:47
@@ -93,7 +93,6 @@
struct vnode *nullm_rootvp;
struct null_mount *xmp;
u_int size;
- int isvnunlocked = 0;
#ifdef DEBUG
printf("nullfs_mount(mp = %p)\n", (void *)mp);
@@ -115,46 +114,23 @@
return (error);
/*
- * Unlock lower node to avoid deadlock.
- * (XXX) VOP_ISLOCKED is needed?
- */
- if ((mp->mnt_vnodecovered->v_op == null_vnodeop_p) &&
- VOP_ISLOCKED(mp->mnt_vnodecovered, NULL)) {
- VOP_UNLOCK(mp->mnt_vnodecovered, 0, p);
- isvnunlocked = 1;
- }
- /*
* Find lower node
*/
- NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
- UIO_USERSPACE, args.target, p);
+ NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF,
+ UIO_USERSPACE, args.target, p);
error = namei(ndp);
- /*
- * Re-lock vnode.
- */
- if (isvnunlocked && !VOP_ISLOCKED(mp->mnt_vnodecovered, NULL))
- vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY, p);
-
if (error)
return (error);
NDFREE(ndp, NDF_ONLY_PNBUF);
- /*
- * Sanity check on lower vnode
- */
lowerrootvp = ndp->ni_vp;
- vrele(ndp->ni_dvp);
- ndp->ni_dvp = NULLVP;
-
/*
- * Check multi null mount to avoid `lock against myself' panic.
+ * Sanity check on lower vnode
*/
- if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) {
-#ifdef DEBUG
- printf("nullfs_mount: multi null mount?\n");
-#endif
- return (EDEADLK);
+ if (VOP_ISLOCKED(mp->mnt_vnodecovered, NULL)) {
+ vput(lowerrootvp);
+ return EBUSY;
}
xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
@@ -171,19 +147,18 @@
*/
error = null_node_create(mp, lowerrootvp, &vp);
/*
- * Unlock the node (either the lower or the alias)
- */
- VOP_UNLOCK(vp, 0, p);
- /*
* Make sure the node alias worked
*/
if (error) {
- vrele(lowerrootvp);
+ vput(lowerrootvp);
free(xmp, M_NULLFSMNT); /* XXX */
return (error);
}
-
/*
+ * Unlock the node (either the lower or the alias)
+ */
+ VOP_UNLOCK(vp, 0, p);
+ /*
* Keep a held reference to the root vnode.
* It is vrele'd in nullfs_unmount.
*/
@@ -269,7 +244,8 @@
/*
* And blow it away for future re-use
*/
- vgone(nullm_rootvp);
+ if (nullm_rootvp->v_mount == mp)
+ vgone(nullm_rootvp);
/*
* Finally, throw away the null_mount structure
*/
@@ -297,18 +273,7 @@
*/
vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
VREF(vp);
- if (VOP_ISLOCKED(vp, NULL)) {
- /*
- * XXX
- * Should we check type of node?
- */
-#ifdef DEBUG
- printf("nullfs_root: multi null mount?\n");
-#endif
- vrele(vp);
- return (EDEADLK);
- } else
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
*vpp = vp;
return 0;
}
Index: sys/miscfs/nullfs/null_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/miscfs/nullfs/null_vnops.c,v
retrieving revision 1.39
diff -u -r1.39 null_vnops.c
--- sys/miscfs/nullfs/null_vnops.c 2000/04/18 15:15:23 1.39
+++ sys/miscfs/nullfs/null_vnops.c 2000/08/06 19:16:51
@@ -182,7 +182,12 @@
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/malloc.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
#include <miscfs/nullfs/null.h>
+#include <vm/vm.h>
+#include <vm/vm_object.h>
static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
@@ -191,12 +196,15 @@
static int null_access __P((struct vop_access_args *ap));
static int null_getattr __P((struct vop_getattr_args *ap));
static int null_inactive __P((struct vop_inactive_args *ap));
+static int null_islocked __P((struct vop_islocked_args *ap));
static int null_lock __P((struct vop_lock_args *ap));
static int null_lookup __P((struct vop_lookup_args *ap));
static int null_print __P((struct vop_print_args *ap));
static int null_reclaim __P((struct vop_reclaim_args *ap));
static int null_setattr __P((struct vop_setattr_args *ap));
static int null_unlock __P((struct vop_unlock_args *ap));
+static int null_fsync __P((struct vop_fsync_args *ap));
+static int null_getwritemount __P((struct vop_getwritemount_args *ap));
/*
* This is the 10-Apr-92 bypass routine.
@@ -236,6 +244,7 @@
struct vnode ***vppp;
struct vnodeop_desc *descp = ap->a_desc;
int reles, i;
+ struct mount *old_mount;
if (null_bug_bypass)
printf ("null_bypass: %s\n", descp->vdesc_name);
@@ -288,6 +297,11 @@
*/
error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+ if (descp->vdesc_vp_offsets[0] != VDESC_NO_OFFSET &&
+ old_vps[0] != NULLVP)
+ old_mount = old_vps[0]->v_mount;
+ else
+ old_mount = NULL;
/*
* Maintain the illusion of call-by-value
* by restoring vnodes in the argument structure
@@ -323,7 +337,7 @@
vppp = VOPARG_OFFSETTO(struct vnode***,
descp->vdesc_vpp_offset,ap);
if (*vppp)
- error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+ error = null_node_create(old_mount, **vppp, *vppp);
}
out:
@@ -496,10 +510,31 @@
struct proc *a_p;
} */ *ap;
{
+ struct null_node *xp;
+ pid_t pid;
+ xp = VTONULL(ap->a_vp);
+ if ((xp->flags & NN_LOCK) != 0) {
+ pid = (ap->a_p == NULL) ? LK_KERNPROC : ap->a_p->p_pid;
+ if (xp->lockholder == pid) {
+ panic("null_lock: recursive lock");
+ } else {
+ if ((ap->a_flags & LK_NOWAIT) != 0)
+ return EBUSY;
+ while ((xp->flags & NN_LOCK) != 0) {
+ xp->flags |= NN_WANT;
+ tsleep(xp, PINOD, "nnlock", 0);
+ }
+ }
+ }
+
vop_nolock(ap);
- if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
- return (0);
+ if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN) {
+ pid = (ap->a_p == NULL) ? LK_KERNPROC : ap->a_p->p_pid;
+ xp->flags |= NN_LOCK;
+ xp->lockholder = pid;
+ return 0;
+ }
ap->a_flags &= ~LK_INTERLOCK;
return (null_bypass((struct vop_generic_args *)ap));
}
@@ -517,28 +552,58 @@
struct proc *a_p;
} */ *ap;
{
+ struct vnode *vp = ap->a_vp;
+ struct null_node *xp;
+ pid_t pid;
+
+ xp = VTONULL(vp);
+ if ((xp->flags & NN_LOCK) != 0) {
+ pid = (ap->a_p == NULL) ? LK_KERNPROC : ap->a_p->p_pid;
+ if (xp->lockholder == pid) {
+ xp->flags &= ~ (NN_LOCK | NN_FSYNC_SKIPPED);
+ xp->lockholder = 0;
+ if ((xp->flags & NN_WANT) != 0) {
+ xp->flags &= ~NN_WANT;
+ wakeup(xp);
+ }
+ return 0;
+ }
+ }
+
vop_nounlock(ap);
ap->a_flags &= ~LK_INTERLOCK;
return (null_bypass((struct vop_generic_args *)ap));
}
static int
+null_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp;
+ struct null_node *xp;
+
+ vp = ap->a_vp;
+ xp = VTONULL(vp);
+ if (xp == NULL || (xp->flags & NN_LOCK) != 0)
+ return LK_EXCLUSIVE;
+
+ return (null_bypass((struct vop_generic_args *)ap));
+}
+
+static int
null_inactive(ap)
struct vop_inactive_args /* {
struct vnode *a_vp;
struct proc *a_p;
} */ *ap;
{
- struct vnode *vp = ap->a_vp;
- struct null_node *xp = VTONULL(vp);
- struct vnode *lowervp = xp->null_lowervp;
/*
* Do nothing (and _don't_ bypass).
* Wait to vrele lowervp until reclaim,
* so that until then our null_node is in the
* cache and reusable.
- * We still have to tell the lower layer the vnode
- * is now inactive though.
*
* NEEDSWORK: Someday, consider inactive'ing
* the lowervp and then trying to reactivate it
@@ -546,8 +611,8 @@
* like they do in the name lookup cache code.
* That's too much work for now.
*/
- VOP_INACTIVE(lowervp, ap->a_p);
VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+ vrecycle(ap->a_vp, (struct simplelock *) 0, ap->a_p);
return (0);
}
@@ -561,21 +626,85 @@
struct vnode *vp = ap->a_vp;
struct null_node *xp = VTONULL(vp);
struct vnode *lowervp = xp->null_lowervp;
+ vm_object_t object;
/*
* Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
* so we can't call VOPs on ourself.
*/
+
+ object = vp->v_object;
+ if (object != lowervp->v_object)
+ panic("null_reclaim: wrong object");
+ if (object != NULL) {
+ if ((void *) lowervp != object->handle)
+ panic("null_reclaim: wrong object handle");
+ vp->v_object = NULL;
+ vp->v_flag &= ~(VTEXT|VOBJBUF);
+ if (lowervp->v_usecount < 1 + object->ref_count)
+ panic("null_reclaim: lowervp->v_usecount too low");
+ }
/* After this assignment, this node will not be re-used. */
xp->null_lowervp = NULLVP;
LIST_REMOVE(xp, null_hash);
FREE(vp->v_data, M_TEMP);
vp->v_data = NULL;
+ if (lowervp->v_usecount < 1)
+ panic("null_reclaim: lowervp->v_usecount < 1");
vrele (lowervp);
return (0);
}
static int
+null_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp;
+ struct null_node *xp;
+ pid_t pid;
+
+ vp = ap->a_vp;
+ xp = VTONULL(vp);
+
+ if (!TAILQ_EMPTY(&vp->v_dirtyblkhd))
+ panic("dirty buffers on null vnode");
+ if (!TAILQ_EMPTY(&vp->v_cleanblkhd))
+ panic("clean buffers on null vnode");
+
+ if ((xp->flags & NN_LOCK) != 0) {
+ pid = (ap->a_p == NULL) ? LK_KERNPROC : ap->a_p->p_pid;
+ if (xp->lockholder == pid)
+ if (/* vp->v_object != NULL &&
+ vp->v_object->handle != (void *) vp && */
+ (xp->flags & NN_FSYNC_SKIPPED) == 0) {
+ xp->flags |= NN_FSYNC_SKIPPED;
+ return 0;
+ }
+ }
+ return (null_bypass((struct vop_generic_args *)ap));
+}
+
+int
+null_getwritemount(ap)
+ struct vop_getwritemount_args /* {
+ struct vnode *a_vp;
+ struct mount **a_mpp;
+ } */ *ap;
+{
+ if (VTONULL(ap->a_vp) == NULL) {
+ return VOP_GETWRITEMOUNT(ap->a_vp->v_mount->mnt_vnodecovered, ap->a_mpp);
+ } else
+ return (null_bypass((struct vop_generic_args *) ap));
+}
+
+
+static int
null_print(ap)
struct vop_print_args /* {
struct vnode *a_vp;
@@ -595,12 +724,15 @@
{ &vop_access_desc, (vop_t *) null_access },
{ &vop_getattr_desc, (vop_t *) null_getattr },
{ &vop_inactive_desc, (vop_t *) null_inactive },
+ { &vop_islocked_desc, (vop_t *) null_islocked },
{ &vop_lock_desc, (vop_t *) null_lock },
{ &vop_lookup_desc, (vop_t *) null_lookup },
{ &vop_print_desc, (vop_t *) null_print },
{ &vop_reclaim_desc, (vop_t *) null_reclaim },
{ &vop_setattr_desc, (vop_t *) null_setattr },
{ &vop_unlock_desc, (vop_t *) null_unlock },
+ { &vop_fsync_desc, (vop_t *) null_fsync },
+ { &vop_getwritemount_desc, (vop_t *) null_getwritemount },
{ NULL, NULL }
};
static struct vnodeopv_desc null_vnodeop_opv_desc =
Index: sys/vm/vnode_pager.c
===================================================================
RCS file: /home/ncvs/src/sys/vm/vnode_pager.c,v
retrieving revision 1.124
diff -u -r1.124 vnode_pager.c
--- sys/vm/vnode_pager.c 2000/07/11 22:07:57 1.124
+++ sys/vm/vnode_pager.c 2000/08/06 19:17:02
@@ -154,7 +154,7 @@
vp->v_usecount++;
} else {
object->ref_count++;
- vp->v_usecount++;
+ ((struct vnode *) object->handle)->v_usecount++;
}
vp->v_flag &= ~VOLOCK;
- Tor Egge
Just a quick note to explain that the PR is still in feedback because Ustimenko has said that he'll try to come up with something that incorporates both his own and Tor's patch. Ciao, Sheldon. State Changed From-To: feedback->closed Automatic feedback timeout. If additional feedback that warrants the re-opening of this PR is available but not included in the audit trail, please include the feedback in a reply to this message (preserving the Subject line) and ask that the PR be re-opened. |