[pnfs] [PATCH 5/7] pnfs: start rework of layout locks

Fred Isaman iisaman at citi.umich.edu
Mon Jun 9 16:29:06 EDT 2008


Currently have:

nfsi->lo_lock protects:
    nfsi->current_layout
    nfsi->pnfs_layout_suspend
    nfsi->pnfs_layout_state
    lo->refcount
    lo->segs
and is incorrectly held over call to alloc_lseg

pnfs_spinlock protects:
    pnfs_modules_tbl
    nfsi->layoutcommit_ctx
    nfsi->pnfs_write_begin_pos ??? (sometimes)
    nfsi->pnfs_write_end_pos   ??? (sometimes)
and is incorrectly held over call to setup_layoutcommit

Goal is to have:

nfsi->lo_lock protects:
    nfsi->*
lo->lseg_lock protects:
    lo->segs
pnfs_spinlock protects:
    pnfs_modules_tbl

Start by changing lock grabbed by get_lock_current_layout to new
lo->lseg_lock, with minimal other changes.

Signed-off-by: Fred Isaman <iisaman at citi.umich.edu>
---
 fs/nfs/pnfs.c             |   55 +++++++++++++++++++++++++++++---------------
 include/linux/nfs4_pnfs.h |    3 +-
 2 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 52f003d..ebb4454 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -161,16 +161,16 @@ pnfs_need_layoutcommit(struct nfs_inode *nfsi, struct nfs_open_context *ctx)
 {
 	dprintk("%s: current_layout=%p layoutcommit_ctx=%p ctx=%p\n", __func__,
 		nfsi->current_layout, nfsi->layoutcommit_ctx, ctx);
-	spin_lock(&pnfs_spinlock);
+	spin_lock(&nfsi->lo_lock);
 	if (nfsi->current_layout && !nfsi->layoutcommit_ctx) {
 		nfsi->layoutcommit_ctx = get_nfs_open_context(ctx);
 		nfsi->change_attr++;
-		spin_unlock(&pnfs_spinlock);
+		spin_unlock(&nfsi->lo_lock);
 		dprintk("%s: Set layoutcommit_ctx=%p\n", __func__,
 			nfsi->layoutcommit_ctx);
 		return;
 	}
-	spin_unlock(&pnfs_spinlock);
+	spin_unlock(&nfsi->lo_lock);
 }
 
 /* Update last_write_offset for layoutcommit.
@@ -183,6 +183,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
 {
 	loff_t end_pos, orig_offset = offset;
 
+	spin_lock(&nfsi->lo_lock);
 	if (orig_offset < nfsi->pnfs_write_begin_pos)
 		nfsi->pnfs_write_begin_pos = orig_offset;
 	end_pos = orig_offset + extent - 1; /* I'm being inclusive */
@@ -194,6 +195,7 @@ pnfs_update_last_write(struct nfs_inode *nfsi, loff_t offset, size_t extent)
 		(unsigned long) offset ,
 		(unsigned long) nfsi->pnfs_write_begin_pos,
 		(unsigned long) nfsi->pnfs_write_end_pos);
+	spin_unlock(&nfsi->lo_lock);
 }
 
 /* Unitialize a mountpoint in a layout driver */
@@ -306,7 +308,7 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
  */
 #if defined(CONFIG_SMP)
 #define BUG_ON_UNLOCKED_LO(lo) \
-	BUG_ON(!spin_is_locked(&PNFS_NFS_INODE(lo)->lo_lock))
+	BUG_ON(!spin_is_locked(&lo->lseg_lock))
 #else /* CONFIG_SMP */
 #define BUG_ON_UNLOCKED_LO(lo) do {} while (0)
 #endif /* CONFIG_SMP */
@@ -323,9 +325,9 @@ get_lock_current_layout(struct nfs_inode *nfsi)
 	lo = nfsi->current_layout;
 	if (lo)
 		lo->refcount++;
-	else
-		spin_unlock(&nfsi->lo_lock);
-
+	spin_unlock(&nfsi->lo_lock);
+	if (lo)
+		spin_lock(&lo->lseg_lock);
 	return lo;
 }
 
@@ -339,6 +341,7 @@ put_unlock_current_layout(struct nfs_inode *nfsi,
 	int destroyed = 0;
 
 	BUG_ON_UNLOCKED_LO(lo);
+	spin_lock(&nfsi->lo_lock);
 	BUG_ON(lo->refcount <= 0);
 
 	if (--lo->refcount == 0 && list_empty(&lo->segs)) {
@@ -352,6 +355,7 @@ put_unlock_current_layout(struct nfs_inode *nfsi,
 		destroyed = 1;
 	}
 	spin_unlock(&nfsi->lo_lock);
+	spin_unlock(&lo->lseg_lock);
 
 	if (destroyed) {
 		struct nfs_client *clp;
@@ -368,7 +372,7 @@ pnfs_layout_release(struct pnfs_layout_type *lo)
 {
 	struct nfs_inode *nfsi = NFS_I(lo->inode);
 
-	spin_lock(&nfsi->lo_lock);
+	spin_lock(&lo->lseg_lock);
 	put_unlock_current_layout(nfsi, lo);
 }
 
@@ -632,7 +636,10 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range,
 			goto out;
 		}
 		pnfs_free_layout(lo, &arg);
-		spin_unlock(&nfsi->lo_lock);
+		/* unlock w/o put rebalanced by eventual call to
+		 * pnfs_layout_release
+		 */
+		spin_unlock(&lo->lseg_lock);
 	}
 
 	status = return_layout(ino, &arg, type, lo);
@@ -725,7 +732,7 @@ pnfs_inject_layout(struct pnfs_layout_type *lo,
 	struct pnfs_layout_segment *lseg;
 
 	dprintk("%s Begin\n", __func__);
-	/* FIXME - BUG - this is called while holding nfsi->lo_lock spinlock */
+	/* FIXME - BUG - this is called while holding lo->lseg_lock spinlock */
 	lseg = PNFS_LD_IO_OPS(lo)->alloc_lseg(lo, lgr);
 	if (!lseg || IS_ERR(lseg)) {
 		if (!lseg)
@@ -757,6 +764,7 @@ alloc_init_layout(struct inode *ino, struct layoutdriver_io_operations *io_ops)
 		return NULL;
 	}
 
+	spin_lock_init(&lo->lseg_lock);
 	seqlock_init(&lo->seqlock);
 	memset(&lo->stateid, 0, NFS4_STATEID_SIZE);
 	lo->refcount = 1;
@@ -777,7 +785,7 @@ static int pnfs_wait_schedule(void *word)
 /*
  * get, possibly allocate, and lock current_layout
  *
- * Note: If successful, nfsi->lo_lock is taken and the caller
+ * Note: If successful, lo->lseg_lock is taken and the caller
  * must put and unlock current_layout by using put_unlock_current_layout()
  * when the returned layout is released.
  */
@@ -814,11 +822,12 @@ get_lock_alloc_layout(struct inode *ino,
 			struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
 
 			down_write(&clp->cl_sem);
-			/* must grab the layout lock */
 			spin_lock(&nfsi->lo_lock);
 			nfsi->current_layout = lo;
 			list_add_tail(&nfsi->lo_inodes, &clp->cl_lo_inodes);
+			spin_unlock(&nfsi->lo_lock);
 			up_write(&clp->cl_sem);
+			spin_lock(&lo->lseg_lock);
 		} else
 			lo = ERR_PTR(-ENOMEM);
 
@@ -939,6 +948,7 @@ pnfs_update_layout(struct inode *ino,
 	}
 
 	/* if get layout already failed once goto out */
+	spin_lock(&nfsi->lo_lock);
 	if (test_bit(NFS_INO_LAYOUT_FAILED, &nfsi->pnfs_layout_state)) {
 		if (unlikely(nfsi->pnfs_layout_suspend &&
 		    get_seconds() >= nfsi->pnfs_layout_suspend)) {
@@ -948,12 +958,15 @@ pnfs_update_layout(struct inode *ino,
 			nfsi->pnfs_layout_suspend = 0;
 		} else {
 			result = 1;
+			spin_unlock(&nfsi->lo_lock);
 			goto out_put;
 		}
 	}
-
 	spin_unlock(&nfsi->lo_lock);
 
+	/* Lose lock, but not reference, match this with pnfs_layout_release */
+	spin_unlock(&lo->lseg_lock);
+
 	result = get_layout(ino, ctx, &arg, lsegpp, lo);
 out:
 	dprintk("%s end (err:%d) state 0x%lx lseg %p\n",
@@ -978,7 +991,8 @@ pnfs_get_layout_done(struct pnfs_layout_type *lo,
 
 	dprintk("-->%s\n", __func__);
 
-	spin_lock(&nfsi->lo_lock);
+	/* Already have reference */
+	spin_lock(&lo->lseg_lock);
 
 	BUG_ON(nfsi->current_layout != lo);
 
@@ -1023,6 +1037,7 @@ pnfs_get_layout_done(struct pnfs_layout_type *lo,
 	}
 
 	/* Inject layout blob into I/O device driver */
+	/* BUG - can't hold any spinlocks here */
 	lseg = pnfs_inject_layout(lo, res, lgp->lsegpp != NULL);
 	if (IS_ERR(lseg)) {
 		lgp->status = PTR_ERR(lseg);
@@ -1042,10 +1057,12 @@ pnfs_get_layout_done(struct pnfs_layout_type *lo,
 get_out:
 	/* remember that get layout failed and don't try again */
 	if (lgp->status < 0) {
+		spin_lock(&nfsi->lo_lock);
 		set_bit(NFS_INO_LAYOUT_FAILED, &nfsi->pnfs_layout_state);
 		nfsi->pnfs_layout_suspend = suspend;
+		spin_unlock(&nfsi->lo_lock);
 	}
-	spin_unlock(&nfsi->lo_lock);
+	spin_unlock(&lo->lseg_lock);
 
 	/* Done processing layoutget. Set the layout stateid */
 	pnfs_set_layout_stateid(lo, &res->stateid);
@@ -1896,7 +1913,7 @@ pnfs_layoutcommit_setup(struct pnfs_layoutcommit_data *data, int sync)
 	 * until xdr time.
 	 */
 	/* BUG - a major purpose of this call is to set (and thus allocate)
-	 * data->args.new_layout, but pnfs_spinlock is currently held
+	 * data->args.new_layout, but nfsi->lo_lock is currently held
 	 */
 	if (nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit) {
 		result = nfss->pnfs_curr_ld->ld_io_ops->setup_layoutcommit(
@@ -1932,7 +1949,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
 	if (!data)
 		return -ENOMEM;
 
-	spin_lock(&pnfs_spinlock);
+	spin_lock(&nfsi->lo_lock);
 	if (!nfsi->layoutcommit_ctx) {
 		pnfs_layoutcommit_free(data);
 		goto out_unlock;
@@ -1955,7 +1972,7 @@ pnfs_layoutcommit_inode(struct inode *inode, int sync)
 	nfsi->layoutcommit_ctx = NULL;
 
 	/* release lock on pnfs layoutcommit attrs */
-	spin_unlock(&pnfs_spinlock);
+	spin_unlock(&nfsi->lo_lock);
 
 	/* Execute the layout commit synchronously */
 	if (sync) {
@@ -1968,7 +1985,7 @@ out:
 	dprintk("%s end (err:%d)\n", __func__, status);
 	return status;
 out_unlock:
-	spin_unlock(&pnfs_spinlock);
+	spin_unlock(&nfsi->lo_lock);
 	goto out;
 }
 
diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
index 2116898..4371174 100644
--- a/include/linux/nfs4_pnfs.h
+++ b/include/linux/nfs4_pnfs.h
@@ -40,7 +40,8 @@ struct pnfs_mount_type {
  * A reference is stored in the nfs_inode structure.
  */
 struct pnfs_layout_type {
-	int refcount;
+	int refcount;			/* Protected by nfsi->lo_lock */
+	spinlock_t lseg_lock;		/* Protects segments list */
 	struct list_head segs;		/* layout segments list */
 	int roc_iomode;			/* iomode to return on close, 0=none */
 	struct inode *inode;
-- 
1.5.3.3



More information about the pNFS mailing list