[pnfs] [PATCH] pnfs: sync layout get/return
Benny Halevy
bhalevy at panasas.com
Thu Sep 20 17:20:46 EDT 2007
synchronize between layout get and return operations on
the pnfs client.
Use per-server read/write semaphore to synchronize
between per-file layout gets and returns vs. recall_{fsid,all}.
recall_{fsid,all} loop takes an exclusive (down_write) lock
and per-file layout_{get,return} take a shared (down_read) lock.
This allows only one recall_{fsid,all} processing at a time per server.
Use per-file mutex to synchronize between layout get and return.
At the moment only one, either layout_get or return operation
can take place at a time. In the future we may allow
either multiple concurrent layout gets or multiple concurrent
layout returns to take place.
Signed-off-by: Benny Halevy <bhalevy at panasas.com>
---
fs/nfs/delegation.c | 4 ++-
fs/nfs/inode.c | 5 +++-
fs/nfs/pnfs.c | 55 ++++++++++++++++++++++++++++++--------------
fs/nfs/pnfs.h | 1 +
fs/nfs/super.c | 1 +
include/linux/nfs_fs.h | 1 +
include/linux/nfs_fs_sb.h | 2 +
7 files changed, 49 insertions(+), 20 deletions(-)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 0b73e0a..8d159b2 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -495,6 +495,7 @@ static int recall_layout_thread(void *data)
args->rl.cbl_seg.offset = 0;
args->rl.cbl_seg.length = NFS4_LENGTH_EOF;
+ down_write(&server->pnfs_lo_rwsem);
down_read(&clp->cl_sem);
list_for_each_entry(server, &clp->cl_superblocks, nfs4_siblings) {
dprintk("%s: fsid 0x%llx-0x%llx 0x%llx-0x%llx\n",
@@ -524,7 +525,7 @@ static int recall_layout_thread(void *data)
spin_unlock(&inode_lock);
if (found) {
- pnfs_return_layout(ino, &args->rl.cbl_seg);
+ pnfs_return_layout_locked(ino, &args->rl.cbl_seg);
iput(ino);
}
} while(found);
@@ -546,6 +547,7 @@ static int recall_layout_thread(void *data)
__FUNCTION__, status);
}
up_read(&clp->cl_sem);
+ up_write(&server->pnfs_lo_rwsem);
out:
module_put_and_exit(0);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 207304c..b9e6714 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1131,7 +1131,10 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
nfsi->delegation = NULL;
nfsi->delegation_state = 0;
init_rwsem(&nfsi->rwsem);
-#endif
+#ifdef CONFIG_PNFS
+ mutex_init(&nfsi->lo_mutex);
+#endif /* CONFIG_PNFS */
+#endif /* CONFIG_NFS_V4 */
}
static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d9c03e7..4eac8e7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -347,18 +347,13 @@ get_layout(struct inode* ino,
}
int
-pnfs_return_layout(struct inode* ino, struct nfs4_pnfs_layout_segment *range)
+pnfs_return_layout_locked(struct inode* ino, struct nfs4_pnfs_layout_segment *range)
{
struct nfs_inode* nfsi = NFS_I(ino);
struct nfs_server* server = NFS_SERVER(ino);
struct nfs4_pnfs_layoutreturn_arg arg;
int status;
- dprintk("%s:Begin layout %p\n", __FUNCTION__, nfsi->current_layout);
-
- if (nfsi->current_layout == NULL)
- return 0;
-
arg.reclaim = 0;
arg.layout_type = server->pnfs_curr_ld->id;
arg.return_type = RETURN_FILE;
@@ -373,17 +368,36 @@ pnfs_return_layout(struct inode* ino, struct nfs4_pnfs_layout_segment *range)
status = pnfs_return_layout_rpc(server, &arg);
- if (nfsi->current_layout) {
- if (status)
- dprintk ("%s: pnfs_return_layout_rpc status=%d. "
- "removing layout anyway\n", __FUNCTION__,
- status);
- else
- dprintk ("%s: removing layout\n", __FUNCTION__);
+ if (status)
+ dprintk ("%s: pnfs_return_layout_rpc status=%d. "
+ "removing layout anyway\n", __FUNCTION__,
+ status);
+ else
+ dprintk ("%s: removing layout\n", __FUNCTION__);
+ server->pnfs_curr_ld->ld_io_ops->free_layout(&nfsi->current_layout, ino, &arg.lseg);
- server->pnfs_curr_ld->ld_io_ops->free_layout(&nfsi->current_layout, ino, &arg.lseg);
+ return status;
+}
+
+int
+pnfs_return_layout(struct inode* ino, struct nfs4_pnfs_layout_segment *range)
+{
+ struct nfs_inode* nfsi = NFS_I(ino);
+ struct nfs_server* server = NFS_SERVER(ino);
+ int status = 0;
+
+ dprintk("%s:Begin layout %p\n", __FUNCTION__, nfsi->current_layout);
+
+ if (nfsi->current_layout == NULL)
+ return 0;
+
+ down_read(&server->pnfs_lo_rwsem);
+ mutex_lock(&nfsi->lo_mutex);
+ if (nfsi->current_layout)
+ status = pnfs_return_layout_locked(ino, range);
+ mutex_unlock(&nfsi->lo_mutex);
+ up_read(&server->pnfs_lo_rwsem);
- }
dprintk("%s:Exit status %d\n", __FUNCTION__, status);
return status;
}
@@ -515,6 +529,8 @@ virtual_update_layout(struct inode* ino,
goto out;
}
+ down_read(&nfss->pnfs_lo_rwsem);
+ mutex_lock(&nfsi->lo_mutex);
if ((result = get_layout(ino, ctx, &arg, &res))) {
printk("%s: ERROR retrieving layout %d\n", __FUNCTION__, result);
@@ -540,13 +556,13 @@ virtual_update_layout(struct inode* ino,
/* mark with NFS_INO_LAYOUT_FAILED */
break;
}
- goto out;
+ goto out_unlock;
}
if (res.layout.len <= 0) {
printk("%s: ERROR! Layout size is ZERO!\n",__FUNCTION__);
result = -EIO;
- goto out;
+ goto out_unlock;
}
/* Inject layout blob into I/O device driver */
@@ -556,7 +572,7 @@ virtual_update_layout(struct inode* ino,
if (layout_new == NULL) {
printk("%s: ERROR! Could not inject layout (%d)\n",__FUNCTION__,result);
result = -EIO;
- goto out;
+ goto out_unlock;
}
if (res.return_on_close) {
@@ -568,6 +584,9 @@ virtual_update_layout(struct inode* ino,
nfsi->current_layout = layout_new;
result = 0;
+out_unlock:
+ mutex_unlock(&nfsi->lo_mutex);
+ up_read(&nfss->pnfs_lo_rwsem);
out:
/* remember that get layout failed and don't try again */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index f9d2ef6..fa9dc19 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -17,6 +17,7 @@ int virtual_update_layout(struct inode* ino, struct nfs_open_context* ctx,
size_t count, loff_t pos, enum pnfs_iomode access_type);
int pnfs_return_layout(struct inode* ino, struct nfs4_pnfs_layout_segment *range);
+int pnfs_return_layout_locked(struct inode* ino, struct nfs4_pnfs_layout_segment *range);
int pnfs_return_layout_rpc(struct nfs_server *server, struct nfs4_pnfs_layoutreturn_arg *argp);
void set_pnfs_layoutdriver(struct super_block *sb, u32 id);
void unmount_pnfs_layoutdriver(struct super_block *sb);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 09f3d65..960688f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -609,6 +609,7 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
(server->ds_rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
}
}
+ init_rwsem(&server->pnfs_lo_rwsem);
#else /* CONFIG_PNFS */
server->pnfs_curr_ld = NULL;
server->pnfs_mountid = NULL;
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 0885b5b..f22bcca 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -180,6 +180,7 @@ struct nfs_inode {
struct pnfs_layout_type* current_layout;
struct nfs_open_context* layoutcommit_ctx; /* use rpc_creds in this open_context
* to send LAYOUTCOMMIT to MDS */
+ struct mutex lo_mutex; /* for synchronizing layout get/return */
/* DH: These vars keep track of the maximum write range
* so the values can be used for layoutcommit.
*/
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 6e97206..63f84c0 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -59,6 +59,8 @@ struct nfs_server {
struct pnfs_mount_type * pnfs_mountid; /* Mount identifier for
layout driver
*/
+ struct rw_semaphore pnfs_lo_rwsem; /* to sync with fsid/all layout returns */
+
#endif
/* Data server values will equal NFS server values if
* no pNFS layout driver exists for the mountpoint
--
1.5.3.1
More information about the pNFS
mailing list