[pnfs] [PATCH] pnfs: sync layout get/return

Benny Halevy bhalevy at panasas.com
Thu Sep 20 17:20:46 EDT 2007


synchronize between layout get and return operations on
the pnfs client.

Use per-server read/write semaphore to synchronize
between per-file layout gets and returns vs. recall_{fsid,all}.
recall_{fsid,all} loop takes an exclusive (down_write) lock
and per-file layout_{get,return} take a shared (down_read) lock.
This allows only one recall_{fsid,all} processing at a time per server.

Use per-file mutex to synchronize between layout get and return.
At the moment only one, either layout_get or return operation
can take place at a time.  In the future we may allow
either multiple concurrent layout gets or multiple concurrent
layout returns to take place.

Signed-off-by: Benny Halevy <bhalevy at panasas.com>
---
 fs/nfs/delegation.c       |    4 ++-
 fs/nfs/inode.c            |    5 +++-
 fs/nfs/pnfs.c             |   55 ++++++++++++++++++++++++++++++--------------
 fs/nfs/pnfs.h             |    1 +
 fs/nfs/super.c            |    1 +
 include/linux/nfs_fs.h    |    1 +
 include/linux/nfs_fs_sb.h |    2 +
 7 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 0b73e0a..8d159b2 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -495,6 +495,7 @@ static int recall_layout_thread(void *data)
 	args->rl.cbl_seg.offset = 0;
 	args->rl.cbl_seg.length = NFS4_LENGTH_EOF;
 
+	down_write(&server->pnfs_lo_rwsem);
 	down_read(&clp->cl_sem);
 	list_for_each_entry(server, &clp->cl_superblocks, nfs4_siblings) {
 		dprintk("%s: fsid 0x%llx-0x%llx 0x%llx-0x%llx\n",
@@ -524,7 +525,7 @@ static int recall_layout_thread(void *data)
 				spin_unlock(&inode_lock);
 
 				if (found) {
-					pnfs_return_layout(ino, &args->rl.cbl_seg);
+					pnfs_return_layout_locked(ino, &args->rl.cbl_seg);
 					iput(ino);
 				}
 			} while(found);
@@ -546,6 +547,7 @@ static int recall_layout_thread(void *data)
 			       __FUNCTION__, status);
 	}
 	up_read(&clp->cl_sem);
+	up_write(&server->pnfs_lo_rwsem);
 
 out:
 	module_put_and_exit(0);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 207304c..b9e6714 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1131,7 +1131,10 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
 	nfsi->delegation = NULL;
 	nfsi->delegation_state = 0;
 	init_rwsem(&nfsi->rwsem);
-#endif
+#ifdef CONFIG_PNFS
+	mutex_init(&nfsi->lo_mutex);
+#endif /* CONFIG_PNFS */
+#endif /* CONFIG_NFS_V4 */
 }
 
 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d9c03e7..4eac8e7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -347,18 +347,13 @@ get_layout(struct inode* ino,
 }
 
 int
-pnfs_return_layout(struct inode* ino, struct nfs4_pnfs_layout_segment *range)
+pnfs_return_layout_locked(struct inode* ino, struct nfs4_pnfs_layout_segment *range)
 {
 	struct nfs_inode* nfsi = NFS_I(ino);
 	struct nfs_server* server = NFS_SERVER(ino);
 	struct nfs4_pnfs_layoutreturn_arg arg;
 	int status;
 
-	dprintk("%s:Begin layout %p\n", __FUNCTION__, nfsi->current_layout);
-
-	if (nfsi->current_layout == NULL)
-		return 0;
-
 	arg.reclaim = 0;
 	arg.layout_type = server->pnfs_curr_ld->id;
 	arg.return_type = RETURN_FILE;
@@ -373,17 +368,36 @@ pnfs_return_layout(struct inode* ino, struct nfs4_pnfs_layout_segment *range)
 
 	status = pnfs_return_layout_rpc(server, &arg);
 
-	if (nfsi->current_layout) {
-		if (status)
-			dprintk ("%s: pnfs_return_layout_rpc status=%d. "
-				 "removing layout anyway\n", __FUNCTION__,
-				 status);
-		else
-			dprintk ("%s: removing layout\n", __FUNCTION__);
+	if (status)
+		dprintk ("%s: pnfs_return_layout_rpc status=%d. "
+			 "removing layout anyway\n", __FUNCTION__,
+			 status);
+	else
+		dprintk ("%s: removing layout\n", __FUNCTION__);
+	server->pnfs_curr_ld->ld_io_ops->free_layout(&nfsi->current_layout, ino, &arg.lseg);
 
-		server->pnfs_curr_ld->ld_io_ops->free_layout(&nfsi->current_layout, ino, &arg.lseg);
+	return status;
+}
+
+int
+pnfs_return_layout(struct inode* ino, struct nfs4_pnfs_layout_segment *range)
+{
+	struct nfs_inode* nfsi = NFS_I(ino);
+	struct nfs_server* server = NFS_SERVER(ino);
+	int status = 0;
+
+	dprintk("%s:Begin layout %p\n", __FUNCTION__, nfsi->current_layout);
+
+	if (nfsi->current_layout == NULL)
+		return 0;
+
+	down_read(&server->pnfs_lo_rwsem);
+	mutex_lock(&nfsi->lo_mutex);
+	if (nfsi->current_layout)
+		status = pnfs_return_layout_locked(ino, range);
+	mutex_unlock(&nfsi->lo_mutex);
+	up_read(&server->pnfs_lo_rwsem);
 
-	}
 	dprintk("%s:Exit status %d\n", __FUNCTION__, status);
 	return status;
 }
@@ -515,6 +529,8 @@ virtual_update_layout(struct inode* ino,
 		goto out;
 	}
 
+	down_read(&nfss->pnfs_lo_rwsem);
+	mutex_lock(&nfsi->lo_mutex);
 	if ((result = get_layout(ino, ctx, &arg, &res))) {
 		printk("%s: ERROR retrieving layout %d\n", __FUNCTION__, result);
 
@@ -540,13 +556,13 @@ virtual_update_layout(struct inode* ino,
 			/* mark with NFS_INO_LAYOUT_FAILED */
 			break;
 		}
-		goto out;
+		goto out_unlock;
 	}
 
 	if (res.layout.len <= 0) {
 		printk("%s: ERROR!  Layout size is ZERO!\n",__FUNCTION__);
 		result =  -EIO;
-		goto out;
+		goto out_unlock;
 	}
 
 	/* Inject layout blob into I/O device driver */
@@ -556,7 +572,7 @@ virtual_update_layout(struct inode* ino,
 	if (layout_new == NULL) {
 		printk("%s: ERROR!  Could not inject layout (%d)\n",__FUNCTION__,result);
 		result =  -EIO;
-		goto out;
+		goto out_unlock;
 	}
 
 	if (res.return_on_close) {
@@ -568,6 +584,9 @@ virtual_update_layout(struct inode* ino,
 	nfsi->current_layout = layout_new;
 
 	result = 0;
+out_unlock:
+	mutex_unlock(&nfsi->lo_mutex);
+	up_read(&nfss->pnfs_lo_rwsem);
 out:
 
         /* remember that get layout failed and don't try again */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index f9d2ef6..fa9dc19 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -17,6 +17,7 @@ int virtual_update_layout(struct inode* ino, struct nfs_open_context* ctx,
 	size_t count, loff_t pos, enum pnfs_iomode access_type);
 
 int pnfs_return_layout(struct inode* ino, struct nfs4_pnfs_layout_segment *range);
+int pnfs_return_layout_locked(struct inode* ino, struct nfs4_pnfs_layout_segment *range);
 int pnfs_return_layout_rpc(struct nfs_server *server, struct nfs4_pnfs_layoutreturn_arg *argp);
 void set_pnfs_layoutdriver(struct super_block *sb, u32 id);
 void unmount_pnfs_layoutdriver(struct super_block *sb);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 09f3d65..960688f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -609,6 +609,7 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 					(server->ds_rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 		}
 	}
+	init_rwsem(&server->pnfs_lo_rwsem);
 #else /* CONFIG_PNFS */
 	server->pnfs_curr_ld = NULL;
 	server->pnfs_mountid = NULL;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 0885b5b..f22bcca 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -180,6 +180,7 @@ struct nfs_inode {
 	struct pnfs_layout_type* current_layout;
 	struct nfs_open_context* layoutcommit_ctx; /* use rpc_creds in this open_context
 												* to send LAYOUTCOMMIT to MDS */
+	struct mutex            lo_mutex;	/* for synchronizing layout get/return */
 	/* DH: These vars keep track of the maximum write range
 	 * so the values can be used for layoutcommit.
 	 */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6e97206..63f84c0 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -59,6 +59,8 @@ struct nfs_server {
 	struct pnfs_mount_type        * pnfs_mountid;  /* Mount identifier for
 							  layout driver
 						       */
+	struct rw_semaphore		pnfs_lo_rwsem; /* to sync with fsid/all layout returns */
+
 #endif
 	/* Data server values will equal NFS server values if
 	 * no pNFS layout driver exists for the mountpoint
-- 
1.5.3.1



More information about the pNFS mailing list