[pnfs] [PATCH 21/38] [v1] pnfs: client layout cache: {get_lock, put_unlock}_current_layout

Benny Halevy bhalevy at panasas.com
Fri Jan 4 05:52:44 EST 2008


get_lock_current_layout locks and takes a reference count on pnfs_layout_type
put_unlock_current_layout reverses that.

get_alloc_layout allocates the layout and also returns the layout locked.

The higher-level callers (e.g. pnfs_update_layout) may (and should) release the
lock if they are going to block or run for an extended period of time.
The reference count on the layout guarantees that it will be kept around.
When done, the lock must be taken again via spinlock(&nfsi->lo_lock)
and put_unlock_current_layout must be called to release the reference
and unlock the layout.

Signed-off-by: Benny Halevy <bhalevy at panasas.com>
---
 fs/nfs/inode.c         |    1 +
 fs/nfs/pnfs.c          |   80 +++++++++++++++++++++++++++++++++++++-----------
 include/linux/nfs_fs.h |    1 +
 3 files changed, 64 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c891492..20c6292 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1184,6 +1184,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
 #endif
 #ifdef CONFIG_PNFS
 	init_waitqueue_head(&nfsi->lo_waitq);
+	spin_lock_init(&nfsi->lo_lock);
 #endif /* CONFIG_PNFS */
 }
 
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6757608..81d9800 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -304,6 +304,44 @@ pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type)
 }
 
 /*
+ * pNFS client layout cache
+ */
+#define BUG_ON_UNLOCKED_LO(lo) \
+	BUG_ON(spin_trylock(&PNFS_NFS_INODE(lo)->lo_lock))
+
+/*
+ * get and lock nfs->current_layout
+ */
+static inline struct pnfs_layout_type *
+get_lock_current_layout(struct nfs_inode *nfsi)
+{
+	struct pnfs_layout_type *lo;
+
+	spin_lock(&nfsi->lo_lock);
+	lo = nfsi->current_layout;
+	if (lo)
+		lo->refcount++;
+	else
+		spin_unlock(&nfsi->lo_lock);
+
+	return lo;
+}
+
+/*
+ * put and unlock nfs->current_layout
+ */
+static inline void
+put_unlock_current_layout(struct nfs_inode *nfsi,
+			    struct pnfs_layout_type *lo)
+{
+	BUG_ON_UNLOCKED_LO(lo);
+	BUG_ON(lo->refcount <= 0);
+
+	--lo->refcount;
+	spin_unlock(&nfsi->lo_lock);
+}
+
+/*
 * Get layout from server.
 *    for now, assume that whole file layouts are requested.
 *    arg->offset: 0
@@ -343,18 +381,21 @@ get_layout(struct inode *ino,
 int
 pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range)
 {
+	struct pnfs_layout_type *lo;
 	struct nfs_inode *nfsi = NFS_I(ino);
 	struct nfs_server *server = NFS_SERVER(ino);
 	struct nfs4_pnfs_layoutreturn_arg arg;
 	int status;
 
-	dprintk("%s:Begin layout %p\n", __FUNCTION__, nfsi->current_layout);
+	lo = get_lock_current_layout(nfsi);
+	dprintk("%s:Begin layout %p\n", __FUNCTION__, lo);
 
-	if (nfsi->current_layout == NULL)
+	if (lo == NULL)
 		return 0;
 
 	server->pnfs_curr_ld->ld_io_ops->free_layout(
 		&nfsi->current_layout, &arg.lseg);
+	put_unlock_current_layout(nfsi, lo);
 
 	arg.reclaim = 0;
 	arg.layout_type = server->pnfs_curr_ld->id;
@@ -441,6 +482,7 @@ alloc_init_layout(struct inode *ino, struct layoutdriver_io_operations *io_ops)
 		return NULL;
 	}
 
+	lo->refcount = 1;
 	lo->roc_iomode = 0;
 	lo->inode = ino;
 	return lo;
@@ -455,7 +497,7 @@ static int pnfs_wait_schedule(void *word)
 }
 
 /*
- * get, possibly allocate current_layout
+ * get, possibly allocate, and lock current_layout
  */
 static struct pnfs_layout_type *
 get_alloc_layout(struct inode *ino,
@@ -467,7 +509,7 @@ get_alloc_layout(struct inode *ino,
 
 	dprintk("%s Begin\n", __FUNCTION__);
 
-	while ((lo = nfsi->current_layout) == NULL) {
+	while ((lo = get_lock_current_layout(nfsi)) == NULL) {
 		/* Compete against other threads on who's doing the allocation,
 		 * wait until bit is cleared if we lost this race.
 		 */
@@ -480,22 +522,22 @@ get_alloc_layout(struct inode *ino,
 		}
 
 		/* Was current_layout already allocated while we slept?
-		 * If not, allocate it.
+		 * If so, retry get_lock'ing it. Otherwise, allocate it.
 		 */
-		lo = nfsi->current_layout;
-		if (!lo)
-			lo = nfsi->current_layout =
-				alloc_init_layout(ino, io_ops);
+		if (nfsi->current_layout)
+			continue;
+
+		lo = alloc_init_layout(ino, io_ops);
+		if (lo) {
+			/* must grab the layout lock */
+			spin_lock(&nfsi->lo_lock);
+			nfsi->current_layout = lo;
+		} else
+			lo = ERR_PTR(-ENOMEM);
 
 		/* release the NFS_INO_LAYOUT_ALLOC bit and wake up waiters */
 		clear_bit_unlock(NFS_INO_LAYOUT_ALLOC, &nfsi->pnfs_layout_state);
 		wake_up_bit(&nfsi->pnfs_layout_state, NFS_INO_LAYOUT_ALLOC);
-
-		/* we're done here.
-		 * just check whether alloc_init_layout succeeded.
-		 */
-		if (!lo)
-			lo = ERR_PTR(-ENOMEM);
 		break;
 	}
 
@@ -505,7 +547,6 @@ get_alloc_layout(struct inode *ino,
 	else
 		dprintk("%s Return error %ld\n", __FUNCTION__, PTR_ERR(lo));
 #endif
-	return lo;
 }
 
 /* Update the file's layout for the given range and iomode.
@@ -631,6 +672,7 @@ out:
 
 	/* res.layout.buf kalloc'ed by the xdr decoder? */
 	kfree(res.layout.buf);
+	put_unlock_current_layout(nfsi, layout_new);
 ret:
 	dprintk("%s end (err:%d) state 0x%lx\n",
 		__FUNCTION__, result, nfsi->pnfs_layout_state);
@@ -721,9 +763,11 @@ pnfs_getboundary(struct inode *inode)
 		goto out;
 
 	nfsi = NFS_I(inode);
-	lo = nfsi->current_layout;
-	if (lo)
+	lo = get_lock_current_layout(nfsi);;
+	if (lo) {
 		stripe_size = policy_ops->get_stripesize(lo);
+		put_unlock_current_layout(nfsi, lo);
+	}
 out:
 	return stripe_size;
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1a093b1..99dfe9c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -181,6 +181,7 @@ struct nfs_inode {
 #define NFS_INO_LAYOUT_ALLOC	0x0002	/* get layout failed, stop trying */
 	time_t pnfs_layout_suspend;
 	wait_queue_head_t lo_waitq;
+	spinlock_t lo_lock;
 	struct pnfs_layout_type *current_layout;
 	/* use rpc_creds in this open_context to send LAYOUTCOMMIT to MDS */
 	struct nfs_open_context *layoutcommit_ctx;
-- 
1.5.3.3



More information about the pNFS mailing list