[pnfs] CVS: nfsv4

Benny Halevy bhalevy at panasas.com
Wed Dec 27 08:26:40 EST 2006


Marc, I took a stab at it.  Here's what I propose (worked for me with
a simulated error on virtual_update_layout).
If there's no objection I'll just submit this to the cvs tree.
The gist of it is to separate transient from permanent errors
on layoutget and mark the nfs_inode accordingly.  For transient
errors, I added a timestamp on the nfs_inode so that when it passes
we clear the error and resume layout_get.  virtual_update_layout
returns a status greater than zero when normal nfsv4 I/O should
take place.  status less than zero is still interpreted as hard
error that should be returned to the caller.

Benny

Analyze and handle layoutget nfs errors.
Keep a timestamp on struct nfs_inode for transient
layoutget errors so to resume layout_get after some delay
(currently approx. 1 second)

Signed-off-by: Benny Halevy <bhalevy at panasas.com>

diff -urp a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
--- a/fs/nfs/nfs4xdr.c	2006-12-18 19:18:44.000000000 +0200
+++ b/fs/nfs/nfs4xdr.c	2006-12-26 14:05:02.000000000 +0200
@@ -5125,6 +5125,13 @@ static struct {
 						    * to be handled by a
 						    * middle-layer.
 						    */
+	{ NFSERR_BADIOMODE,		EINVAL	},
+	{ NFS4ERR_BADLAYOUT,		ENOENT	},
+	{ NFS4ERR_UNKNOWN_LAYOUTTYPE,	EINVAL	},
+	{ NFS4ERR_LAYOUTTRYLATER,	EAGAIN	},
+	{ NFS4ERR_LAYOUTUNAVAILABLE,	ENOTSUPP},
+	{ NFS4ERR_RECALLCONFLICT,	EAGAIN	},
+
 	{ -1,			EIO		}
 };
 
diff -urp a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
--- a/fs/nfs/pnfs.c	2006-12-26 13:44:24.000000000 +0200
+++ b/fs/nfs/pnfs.c	2006-12-27 15:00:40.000000000 +0200
@@ -438,17 +438,43 @@ virtual_update_layout(struct inode* ino,
 	res.layout.buf = NULL;
 
         /* if get layout already failed once goto out */
-        if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED)
+	if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED) {
+		if (unlikely(nfsi->pnfs_layout_suspend &&
+		             get_seconds() >= nfsi->pnfs_layout_suspend)) {
+			dprintk("%s: layout_get resumed\n", __FUNCTION__);
+			nfsi->pnfs_layout_state &= NFS_INO_LAYOUT_FAILED;
+			nfsi->pnfs_layout_suspend = 0;
+		} else
+			result = 1;
 		goto out;
+	}
 
 	if ((result = get_layout(ino, ctx, count, pos, iomode, &arg, &res))) {
-		printk("\n%s: ERROR retrieving layout %d\n",__FUNCTION__,result);
-		result =  -EIO;
+		printk("%s: ERROR retrieving layout %d\n", __FUNCTION__, result);
+		
+		switch (result) {
+		case -ENOENT:	/* NFS4ERR_BADLAYOUT */
+			/* transient error, don't mark with NFS_INO_LAYOUT_FAILED */
+			result = 1;
+			break;
+
+		case -EAGAIN:	/* NFS4ERR_LAYOUTTRYLATER, NFS4ERR_RECALLCONFLICT, NFS4ERR_LOCKED */
+			nfsi->pnfs_layout_suspend = get_seconds() + 1;
+			dprintk("%s: layout_get suspended until %ld\n",
+			        __FUNCTION__, nfsi->pnfs_layout_suspend);
+			/* FALLTHROUGH */
+		case -EINVAL:	/* NFS4ERR_INVAL, NFSERR_BADIOMODE, NFS4ERR_UNKNOWN_LAYOUTTYPE */
+		case -ENOTSUPP:	/* NFS4ERR_LAYOUTUNAVAILABLE */
+		case -ETOOSMALL:/* NFS4ERR_TOOSMALL */
+		default:
+			/* mark with NFS_INO_LAYOUT_FAILED */
+			break;
+		}
 		goto out;
 	}
 
 	if (res.layout.len <= 0) {
-		printk("\n%s: ERROR!  Layout size is ZERO!\n",__FUNCTION__);
+		printk("%s: ERROR!  Layout size is ZERO!\n",__FUNCTION__);
 		result =  -EIO;
 		goto out;
 	}
@@ -474,8 +500,8 @@ virtual_update_layout(struct inode* ino,
 	result = 0;
 out:
 
-	/* remember that get layout failed and don't try again */
-	if (result)
+        /* remember that get layout failed and don't try again */
+	if (result < 0)
 		nfsi->pnfs_layout_state |= NFS_INO_LAYOUT_FAILED;
 
 	/* res.layout.buf kalloc'ed by the xdr decoder? */
@@ -723,7 +749,8 @@ pnfs_writeback_done(struct nfs_write_dat
 /*
  * Call the appropriate parallel I/O subsystem write function.
  * If no I/O device driver exists, or one does match the returned
- * fstype, then call regular NFS processing.
+ * fstype, then return a positive status for regular NFS processing.
+ *
  * TODO: Is wdata->how and wdata->args.stable always the same value?
  * TODO: It seems in NFS, the server may not do a stable write even
  * though it was requested (and vice-versa?).  To check, it looks
@@ -750,13 +777,15 @@ pnfs_writepages(struct nfs_write_data* w
 					    args->context,
 					    args->count,
 					    args->offset,
-					    IOMODE_RW)))
+					    IOMODE_RW))) {
+		status = 1;	/* retry with nfs I/O */
 		goto out;
+	}
 
 	if (!nfss->pnfs_curr_ld->ld_io_ops ||
 	    !nfss->pnfs_curr_ld->ld_io_ops->write_pagelist) {
 		printk("%s: ERROR, no layout driver write operation\n", __FUNCTION__);
-		status = -EIO;
+		status = 1;
 		goto out;
 	}
 
@@ -783,8 +812,10 @@ pnfs_writepages(struct nfs_write_data* w
 							       wdata);
 
 	if (status > 0) {
+		dprintk("%s: LD write_pagelist returned status %d > 0\n",__FUNCTION__, status);
 		pnfs_update_last_write(nfsi, args->offset, status);
 		pnfs_need_layoutcommit(nfsi, wdata->args.context);
+		status = 0;
 	}
 
 out:
@@ -816,7 +847,7 @@ pnfs_read_done(struct nfs_read_data* dat
 /*
  * Call the appropriate parallel I/O subsystem read function.
  * If no I/O device driver exists, or one does match the returned
- * fstype, then call regular NFS processing.
+ * fstype, then return a positive status for regular NFS processing.
  */
 int
 pnfs_readpages(struct nfs_read_data *rdata)
@@ -842,11 +873,13 @@ pnfs_readpages(struct nfs_read_data *rda
 	{
 		printk(KERN_WARNING "%s: ERROR %d from virtual_update_layout\n",
 			__FUNCTION__, status);
+		status = 1;
+		goto out;
 	}
 	if (!nfss->pnfs_curr_ld->ld_io_ops ||
 	    !nfss->pnfs_curr_ld->ld_io_ops->read_pagelist) {
 		printk("%s: ERROR, no layout driver read operation\n", __FUNCTION__);
-		status = -EIO;
+		status = 1;
 		goto out;
 	}
 
@@ -867,6 +900,11 @@ pnfs_readpages(struct nfs_read_data *rda
 							      (loff_t)args->offset,
 							      args->count,
 							      rdata);
+	if (status > 0) {
+		dprintk("%s: LD read_pagelist returned status %d > 0\n",__FUNCTION__, status);
+		status = 0;
+	}
+
  out:
 	dprintk("%s: End Status %d\n",__FUNCTION__, status);
 	return status;
@@ -883,7 +921,7 @@ int pnfs_try_to_read_data(struct nfs_rea
 		dprintk("%s Utilizing pNFS I/O\n",__FUNCTION__);
 		data->call_ops = call_ops;
 		data->pnfsflags |= PNFS_USE_DS;
-		if((status = pnfs_readpages(data)) < 0)
+		if((status = pnfs_readpages(data)))
 			return status;
 		return 0;
 	}
@@ -915,12 +953,12 @@ pnfs_file_read(struct file* filp,
 
 	/* Using NFS page cache with pNFS */
 	if (use_page_cache(inode))
-		return do_sync_read(filp, buf, count, pos);
+		goto fallback;
 
 	/* Small I/O Optimization */
 	if (below_threshold(inode, count, 0)) {
 		dfprintk(IO, "%s: Below Read threshold, using NFSv4 read\n",__FUNCTION__);
-		return do_sync_read(filp,buf,count,pos);
+		goto fallback;
 	}
 
 	/* Step 1: Retrieve and set layout if not allready cached*/
@@ -928,21 +966,25 @@ pnfs_file_read(struct file* filp,
 					    (struct nfs_open_context *)filp->private_data,
 					    count,
 					    *pos,
-					    IOMODE_READ)))
-		goto out;
+					    IOMODE_READ))) {
+		dfprintk(IO, "%s: Could not get layout result=%Zd, using NFSv4 read\n",__FUNCTION__, result);
+		goto fallback;
+	}
 
 	/* Step 2: Call I/O device driver's read function */
 	if (!nfss->pnfs_curr_ld->ld_io_ops &&
 	    nfss->pnfs_curr_ld->ld_io_ops->read) {
-		result = -EIO;
-		goto out;
+		dfprintk(IO, "%s: No LD read function, using NFSv4 read\n",__FUNCTION__);
+		goto fallback;
 	}
 
 	result = nfss->pnfs_curr_ld->ld_io_ops->read(nfsi->current_layout,
 						     filp, buf, count, pos);
-out:
 	dprintk("%s end (err:%Zd)\n",__FUNCTION__,result);
 	return result;
+
+fallback:
+	return do_sync_read(filp, buf, count, pos);
 }
 
 int pnfs_try_to_write_data(struct nfs_write_data *data,
@@ -958,7 +1000,7 @@ int pnfs_try_to_write_data(struct nfs_wr
 		data->call_ops = call_ops;
 		data->pnfsflags |= PNFS_USE_DS;
 		data->how = how; /* XXX do we really need this? */
-                if((status = pnfs_writepages(data, how)) <0)
+                if((status = pnfs_writepages(data, how)))
 			return status;
 		return 0;
 	}
@@ -993,12 +1035,12 @@ pnfs_file_write(struct file* filp,
 
 	/* Using NFS page cache with pNFS */
 	if (use_page_cache(inode))
-		return do_sync_write(filp,buf,count,pos);
+		goto fallback;
 
 	/* Small I/O Optimization */
 	if (below_threshold(inode, count, 1)) {
 		dfprintk(IO, "%s: Below write threshold, using NFSv4 write\n",__FUNCTION__);
-		return do_sync_write(filp,buf,count,pos);
+		goto fallback;
 	}
 
 	/* Need to adjust write param if this is an append, etc */
@@ -1012,14 +1054,16 @@ pnfs_file_write(struct file* filp,
 					    (struct nfs_open_context *)filp->private_data,
 					    count,
 					    *pos,
-					    IOMODE_RW)))
-		goto out;
+					    IOMODE_RW))) {
+		dfprintk(IO, "%s: Could not get layout result=%Zd, using NFSv4 write\n",__FUNCTION__, result);
+		goto fallback;
+	}
 
 	/* Step 2: Call I/O device driver's write function */
 	if (!nfss->pnfs_curr_ld->ld_io_ops &&
 	    nfss->pnfs_curr_ld->ld_io_ops->write) {
-		result = -EIO;
-		goto out;
+		dfprintk(IO, "%s: No LD write function, using NFSv4 write\n",__FUNCTION__);
+		goto fallback;
 	}
 
 	result = nfss->pnfs_curr_ld->ld_io_ops->write(nfsi->current_layout,
@@ -1033,9 +1077,11 @@ pnfs_file_write(struct file* filp,
 		pnfs_update_last_write(nfsi, pos_orig, result);
 		pnfs_need_layoutcommit(nfsi, (struct nfs_open_context *)filp->private_data);
 	}
-out:
 	dprintk("%s end (err:%Zd)\n",__FUNCTION__,result);
 	return result;
+
+fallback:
+	return do_sync_write(filp, buf, count, pos);
 }
 
 int pnfs_try_to_commit(struct nfs_write_data *data, struct list_head *head, int how)
@@ -1139,8 +1185,10 @@ pnfs_fsync(struct file *file, struct den
 					    (struct nfs_open_context *)file->private_data,
 					    0,
 					    0,
-					    IOMODE_RW)))
+					    IOMODE_RW))) {
+		result = -EIO;
 		goto out;
+	}
 	
 	dprintk("%s: Calling layout driver fsync\n",__FUNCTION__);
 	result = nfss->pnfs_curr_ld->ld_io_ops->fsync(nfsi->current_layout,
diff -urp a/fs/nfs/pnfs.c~ b/fs/nfs/pnfs.c~
--- a/fs/nfs/pnfs.c~	2006-12-25 08:35:20.000000000 +0200
+++ b/fs/nfs/pnfs.c~	2006-12-26 15:53:59.000000000 +0200
@@ -438,17 +438,43 @@ virtual_update_layout(struct inode* ino,
 	res.layout.buf = NULL;
 
         /* if get layout already failed once goto out */
-        if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED)
+	if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED) {
+		if (unlikely(nfsi->pnfs_layout_suspend &&
+		             get_seconds() >= nfsi->pnfs_layout_suspend)) {
+			dprintk("%s: layout_get resumed\n", __FUNCTION__);
+			nfsi->pnfs_layout_state &= NFS_INO_LAYOUT_FAILED;
+			nfsi->pnfs_layout_suspend = 0;
+		} else
+			result = 1;
 		goto out;
+	}
 
 	if ((result = get_layout(ino, ctx, count, pos, iomode, &arg, &res))) {
-		printk("\n%s: ERROR retrieving layout %d\n",__FUNCTION__,result);
-		result =  -EIO;
+		printk("%s: ERROR retrieving layout %d\n", __FUNCTION__, result);
+		
+		switch (result) {
+		case -ENOENT:	/* NFS4ERR_BADLAYOUT */
+			/* transient error, don't mark with NFS_INO_LAYOUT_FAILED */
+			result = 1;
+			break;
+
+		case -EAGAIN:	/* NFS4ERR_LAYOUTTRYLATER, NFS4ERR_RECALLCONFLICT, NFS4ERR_LOCKED */
+			nfsi->pnfs_layout_suspend = get_seconds() + 1;
+			dprintk("%s: layout_get suspended until %ld\n",
+			        __FUNCTION__, nfsi->pnfs_layout_suspend);
+			/* FALLTHROUGH */
+		case -EINVAL:	/* NFS4ERR_INVAL, NFSERR_BADIOMODE, NFS4ERR_UNKNOWN_LAYOUTTYPE */
+		case -ENOTSUPP:	/* NFS4ERR_LAYOUTUNAVAILABLE */
+		case -ETOOSMALL:/* NFS4ERR_TOOSMALL */
+		default:
+			/* mark with NFS_INO_LAYOUT_FAILED */
+			break;
+		}
 		goto out;
 	}
 
 	if (res.layout.len <= 0) {
-		printk("\n%s: ERROR!  Layout size is ZERO!\n",__FUNCTION__);
+		printk("%s: ERROR!  Layout size is ZERO!\n",__FUNCTION__);
 		result =  -EIO;
 		goto out;
 	}
@@ -459,7 +485,7 @@ virtual_update_layout(struct inode* ino,
 					res.layout.buf);
 	if (layout_new == NULL) {
 		printk("%s: ERROR!  Could not inject layout (%d)\n",__FUNCTION__,result);
-		result =  -EIO;
+		result = -EIO;
 		goto out;
 	}
 
@@ -475,7 +501,8 @@ virtual_update_layout(struct inode* ino,
 out:
 
         /* remember that get layout failed and don't try again */
-        nfsi->pnfs_layout_state |= NFS_INO_LAYOUT_FAILED;
+	if (result < 0)
+		nfsi->pnfs_layout_state |= NFS_INO_LAYOUT_FAILED;
 
 	/* res.layout.buf kalloc'ed by the xdr decoder? */
 	if (res.layout.buf)
@@ -722,7 +749,8 @@ pnfs_writeback_done(struct nfs_write_dat
 /*
  * Call the appropriate parallel I/O subsystem write function.
  * If no I/O device driver exists, or one does match the returned
- * fstype, then call regular NFS processing.
+ * fstype, then return a positive status for regular NFS processing.
+ *
  * TODO: Is wdata->how and wdata->args.stable always the same value?
  * TODO: It seems in NFS, the server may not do a stable write even
  * though it was requested (and vice-versa?).  To check, it looks
@@ -749,13 +777,15 @@ pnfs_writepages(struct nfs_write_data* w
 					    args->context,
 					    args->count,
 					    args->offset,
-					    IOMODE_RW)))
+					    IOMODE_RW))) {
+		status = 1;	/* retry with nfs I/O */
 		goto out;
+	}
 
 	if (!nfss->pnfs_curr_ld->ld_io_ops ||
 	    !nfss->pnfs_curr_ld->ld_io_ops->write_pagelist) {
 		printk("%s: ERROR, no layout driver write operation\n", __FUNCTION__);
-		status = -EIO;
+		status = 1;
 		goto out;
 	}
 
@@ -782,8 +812,10 @@ pnfs_writepages(struct nfs_write_data* w
 							       wdata);
 
 	if (status > 0) {
+		dprintk("%s: LD write_pagelist returned status %d > 0\n",__FUNCTION__, status);
 		pnfs_update_last_write(nfsi, args->offset, status);
 		pnfs_need_layoutcommit(nfsi, wdata->args.context);
+		status = 0;
 	}
 
 out:
@@ -815,7 +847,7 @@ pnfs_read_done(struct nfs_read_data* dat
 /*
  * Call the appropriate parallel I/O subsystem read function.
  * If no I/O device driver exists, or one does match the returned
- * fstype, then call regular NFS processing.
+ * fstype, then return a positive status for regular NFS processing.
  */
 int
 pnfs_readpages(struct nfs_read_data *rdata)
@@ -841,11 +873,13 @@ pnfs_readpages(struct nfs_read_data *rda
 	{
 		printk(KERN_WARNING "%s: ERROR %d from virtual_update_layout\n",
 			__FUNCTION__, status);
+		status = 1;
+		goto out;
 	}
 	if (!nfss->pnfs_curr_ld->ld_io_ops ||
 	    !nfss->pnfs_curr_ld->ld_io_ops->read_pagelist) {
 		printk("%s: ERROR, no layout driver read operation\n", __FUNCTION__);
-		status = -EIO;
+		status = 1;
 		goto out;
 	}
 
@@ -866,6 +900,11 @@ pnfs_readpages(struct nfs_read_data *rda
 							      (loff_t)args->offset,
 							      args->count,
 							      rdata);
+	if (status > 0) {
+		dprintk("%s: LD read_pagelist returned status %d > 0\n",__FUNCTION__, status);
+		status = 0;
+	}
+
  out:
 	dprintk("%s: End Status %d\n",__FUNCTION__, status);
 	return status;
@@ -882,7 +921,7 @@ int pnfs_try_to_read_data(struct nfs_rea
 		dprintk("%s Utilizing pNFS I/O\n",__FUNCTION__);
 		data->call_ops = call_ops;
 		data->pnfsflags |= PNFS_USE_DS;
-		if((status = pnfs_readpages(data)) < 0)
+		if((status = pnfs_readpages(data)))
 			return status;
 		return 0;
 	}
@@ -914,12 +953,12 @@ pnfs_file_read(struct file* filp,
 
 	/* Using NFS page cache with pNFS */
 	if (use_page_cache(inode))
-		return do_sync_read(filp, buf, count, pos);
+		goto fallback;
 
 	/* Small I/O Optimization */
 	if (below_threshold(inode, count, 0)) {
 		dfprintk(IO, "%s: Below Read threshold, using NFSv4 read\n",__FUNCTION__);
-		return do_sync_read(filp,buf,count,pos);
+		goto fallback;
 	}
 
 	/* Step 1: Retrieve and set layout if not allready cached*/
@@ -927,21 +966,25 @@ pnfs_file_read(struct file* filp,
 					    (struct nfs_open_context *)filp->private_data,
 					    count,
 					    *pos,
-					    IOMODE_READ)))
-		goto out;
+					    IOMODE_READ))) {
+		dfprintk(IO, "%s: Could not get layout result=%Zd, using NFSv4 read\n",__FUNCTION__, result);
+		goto fallback;
+	}
 
 	/* Step 2: Call I/O device driver's read function */
 	if (!nfss->pnfs_curr_ld->ld_io_ops &&
 	    nfss->pnfs_curr_ld->ld_io_ops->read) {
-		result = -EIO;
-		goto out;
+		dfprintk(IO, "%s: No LD read function, using NFSv4 read\n",__FUNCTION__);
+		goto fallback;
 	}
 
 	result = nfss->pnfs_curr_ld->ld_io_ops->read(nfsi->current_layout,
 						     filp, buf, count, pos);
-out:
 	dprintk("%s end (err:%Zd)\n",__FUNCTION__,result);
 	return result;
+
+fallback:
+	return do_sync_read(filp, buf, count, pos);
 }
 
 int pnfs_try_to_write_data(struct nfs_write_data *data,
@@ -957,7 +1000,7 @@ int pnfs_try_to_write_data(struct nfs_wr
 		data->call_ops = call_ops;
 		data->pnfsflags |= PNFS_USE_DS;
 		data->how = how; /* XXX do we really need this? */
-                if((status = pnfs_writepages(data, how)) <0)
+                if((status = pnfs_writepages(data, how)))
 			return status;
 		return 0;
 	}
@@ -992,12 +1035,12 @@ pnfs_file_write(struct file* filp,
 
 	/* Using NFS page cache with pNFS */
 	if (use_page_cache(inode))
-		return do_sync_write(filp,buf,count,pos);
+		goto fallback;
 
 	/* Small I/O Optimization */
 	if (below_threshold(inode, count, 1)) {
 		dfprintk(IO, "%s: Below write threshold, using NFSv4 write\n",__FUNCTION__);
-		return do_sync_write(filp,buf,count,pos);
+		goto fallback;
 	}
 
 	/* Need to adjust write param if this is an append, etc */
@@ -1011,14 +1054,16 @@ pnfs_file_write(struct file* filp,
 					    (struct nfs_open_context *)filp->private_data,
 					    count,
 					    *pos,
-					    IOMODE_RW)))
-		goto out;
+					    IOMODE_RW))) {
+		dfprintk(IO, "%s: Could not get layout result=%Zd, using NFSv4 write\n",__FUNCTION__, result);
+		goto fallback;
+	}
 
 	/* Step 2: Call I/O device driver's write function */
 	if (!nfss->pnfs_curr_ld->ld_io_ops &&
 	    nfss->pnfs_curr_ld->ld_io_ops->write) {
-		result = -EIO;
-		goto out;
+		dfprintk(IO, "%s: No LD write function, using NFSv4 write\n",__FUNCTION__);
+		goto fallback;
 	}
 
 	result = nfss->pnfs_curr_ld->ld_io_ops->write(nfsi->current_layout,
@@ -1032,9 +1077,11 @@ pnfs_file_write(struct file* filp,
 		pnfs_update_last_write(nfsi, pos_orig, result);
 		pnfs_need_layoutcommit(nfsi, (struct nfs_open_context *)filp->private_data);
 	}
-out:
 	dprintk("%s end (err:%Zd)\n",__FUNCTION__,result);
 	return result;
+
+fallback:
+	return do_sync_write(filp, buf, count, pos);
 }
 
 int pnfs_try_to_commit(struct nfs_write_data *data, struct list_head *head, int how)
@@ -1138,8 +1185,10 @@ pnfs_fsync(struct file *file, struct den
 					    (struct nfs_open_context *)file->private_data,
 					    0,
 					    0,
-					    IOMODE_RW)))
+					    IOMODE_RW))) {
+		result = -EIO;
 		goto out;
+	}
 	
 	dprintk("%s: Calling layout driver fsync\n",__FUNCTION__);
 	result = nfss->pnfs_curr_ld->ld_io_ops->fsync(nfsi->current_layout,
Only in b/fs/nfs: pnfs.o
Only in b/fs/nfs: .pnfs.o.cmd
Only in b/fs/nfs: proc.o
Only in b/fs/nfs: .proc.o.cmd
diff -urp a/fs/nfs/read.c b/fs/nfs/read.c
--- a/fs/nfs/read.c	2006-12-18 19:18:44.000000000 +0200
+++ b/fs/nfs/read.c	2006-12-26 14:51:36.000000000 +0200
@@ -825,22 +825,35 @@ int nfs_readpages(struct file *filp, str
 	if (!list_empty(&head)) {
 		int err;
 		#ifdef CONFIG_NFS_V4
-			struct nfs_page *nfs_page;
+			struct nfs_page *first_page, *last_page;
+			loff_t offset;
+			size_t count;
 
 			if (pnfs_enabled_sb(NFS_SERVER(inode))) {
-				nfs_page = nfs_list_entry(head.next);
-
-				if (virtual_update_layout(inode, desc.ctx, 
-							nfs_page->wb_bytes, 
-							nfs_page->wb_index << PAGE_CACHE_SHIFT, FMODE_READ)) {
-					printk(KERN_NOTICE "failed to get the layout!\n");
+				first_page = nfs_list_entry(head.next);
+				last_page = nfs_list_entry(head.prev);
+				offset = (first_page->wb_index << PAGE_CACHE_SHIFT) +
+				         first_page->wb_offset;
+				if (unlikely(first_page == last_page))
+					count = last_page->wb_bytes;
+				else
+					count = ((last_page->wb_index - first_page->wb_index) << PAGE_CACHE_SHIFT) -
+					        first_page->wb_offset + last_page->wb_bytes;
+
+				if ((err = virtual_update_layout(inode, desc.ctx, 
+				                                 count, offset,
+				                                 FMODE_READ)) != 0) {
+					printk(KERN_NOTICE "%s: failed to get the layout!"
+				               " %Zd@%llu error=%d\n",
+					       __FUNCTION__, count, (u64)offset, err);
+					err = 0;
 				}
 			}
 		#endif
 		err = nfs_pagein_list(&head, inode);
 		if (!ret)
 			nfs_add_stats(inode, NFSIOS_READPAGES, err);
-			ret = err;
+		ret = err;
 	}
 	put_nfs_open_context(desc.ctx);
 	return ret;
diff -urp a/fs/nfs/write.c b/fs/nfs/write.c
--- a/fs/nfs/write.c	2006-12-18 19:18:44.000000000 +0200
+++ b/fs/nfs/write.c	2006-12-26 15:21:14.000000000 +0200
@@ -1021,7 +1021,7 @@ static int nfs_flush_multi(struct inode 
 
 	nbytes = req->wb_bytes;
 	do {
-		size_t len = min(nbytes, wsize);
+		size_t len = min(nbytes, (size_t)wsize);
 
 		data = nfs_writedata_alloc(len);
 		if (!data)
@@ -1538,7 +1538,6 @@ nfs_commit_list(struct inode *inode, str
 {
 	struct nfs_write_data	*data = NULL;
 	struct nfs_page         *req;
-	unsigned int 		wpages;
 	int 			ret;
 
 	dprintk("%s Begin\n", __FUNCTION__);
@@ -1573,7 +1572,6 @@ nfs_commit_list(struct inode *inode, str
 static void nfs_commit_done_pages(struct nfs_write_data *data, int status, int ckverf)
 {
 	struct nfs_page	        *req;
-	int res = 0;
 
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
@@ -1729,9 +1727,13 @@ int nfs_sync_inode_wait(struct inode *in
 					
 					if (pnfs_enabled_sb(NFS_SERVER(inode))) {
 						nfs_page = nfs_list_entry(head.next);
-						if (virtual_update_layout(inode, nfs_page->wb_context, 
-							npages * PAGE_SIZE, idx_start * PAGE_SIZE, FMODE_WRITE))
-						printk(KERN_NOTICE "Failed to get layout!\n");
+						if ((ret = virtual_update_layout(inode, nfs_page->wb_context, 
+						    npages * PAGE_SIZE, idx_start * PAGE_SIZE, FMODE_WRITE)))
+						printk(KERN_NOTICE "%s: failed to get the layout!"
+						       " %Zd@%llu error=%d\n",
+						       __FUNCTION__, npages * idx_start * PAGE_SIZE,
+						       (u64)idx_start * PAGE_SIZE, ret);
+						ret = 0;
 					}
 				#endif
 				
diff -urp a/include/linux/nfs4.h b/include/linux/nfs4.h
--- a/include/linux/nfs4.h	2006-12-18 19:18:48.000000000 +0200
+++ b/include/linux/nfs4.h	2006-12-26 15:16:54.000000000 +0200
@@ -235,7 +235,23 @@ enum nfsstat4 {
 	NFS4ERR_DEADLOCK = 10045,
 	NFS4ERR_FILE_OPEN = 10046,
 	NFS4ERR_ADMIN_REVOKED = 10047,
-	NFS4ERR_CB_PATH_DOWN = 10048
+	NFS4ERR_CB_PATH_DOWN = 10048,
+	NFS4ERR_BADIOMODE = 10049,
+	NFS4ERR_BADLAYOUT = 10050,
+	NFS4ERR_BAD_SESSION_DIGEST = 10051,
+	NFS4ERR_BADSESSION = 10052,
+	NFS4ERR_BADSLOT = 10053,
+	NFS4ERR_COMPLETE_ALREADY = 10054,
+	NFS4ERR_CONN_NOT_BOUND_TO_SESSION = 10055,
+	NFS4ERR_DELEG_ALREADY_WANTED = 10056,
+	NFS4ERR_DIRDELEG_UNAVAIL = 10057,
+	NFS4ERR_LAYOUTTRYLATER = 10058,
+	NFS4ERR_LAYOUTUNAVAILABLE = 10059,
+	NFS4ERR_NOMATCHING_LAYOUT = 10060,
+	NFS4ERR_RECALLCONFLICT = 10061,
+	NFS4ERR_UNKNOWN_LAYOUTTYPE = 10062,
+	NFS4ERR_SEQ_MISORDERED = 10063,
+	NFS4ERR_SEQUENCE_POS = 10064,
 };
 
 /*
diff -urp a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
--- a/include/linux/nfs_fs.h	2006-12-25 08:35:29.000000000 +0200
+++ b/include/linux/nfs_fs.h	2006-12-26 14:13:31.000000000 +0200
@@ -183,6 +183,7 @@ struct nfs_inode {
 	/* pNFS layout information */
 	u32 pnfs_layout_state;
 #define NFS_INO_LAYOUT_FAILED	0x0001	/* get layout failed, stop trying */
+	time_t pnfs_layout_suspend;
 	struct pnfs_layout_type* current_layout;
 	struct nfs_open_context* layoutcommit_ctx; /* use rpc_creds in this open_context
 												* to send LAYOUTCOMMIT to MDS */


Marc Eshel wrote:
> pnfs-bounces at linux-nfs.org wrote on 12/26/2006 03:42:27 AM:
> 
>> Marc, there are a few problems with this.
>>
>> 1. "nfsi->pnfs_layout_state |= NFS_INO_LAYOUT_FAILED;" happens 
>> unconditionally,
>> in all cases, including success. (I will submit a patch fixing that 
> today)
> 
> Yes, I guess I had to much Eggnog.
>  
>> 2. I understand marking the client's nfs_file with such a flag when the 
> server
>> returns a permanent error such as NFS4ERR_LAYOUTUNAVAILABLE but in 
>> other cases,
>> when the error is transient we must have a mechanism to clear the state, 
> e.g.
>> put a timestamp on the file after which you'll start asking for layouts 
> again
>> (e.g. in the NFS4ERR_LAYOUTTRYLATER case).  I summary, the server's 
> error code
>> must be analyzed by the client to determine what to do.  IMO, we 
>> should consider
>> reverting what you did in this submit until we have finer grained 
>> error handling
>> for layoutget.
> 
> Yes, error recovery needs much more work. I was just trying to fix a 
> specific failure that I observed while the server refused to give layout 
> and the client kept asking for it on each IO. 
> 
>> 3. I noticed that when virtual_update_layout returns an error on theread 
> path
>> the error is returned to the application as EIO.  This is wrong as the 
> client
>> should fall back to doing NFSv4 I/O when it couldn't get a layout. 
>> I'm working
>> on a patch for that too.
> 
> I though I already fixed the read path to do just that maybe I missed a 
> place. The write path definitely needs more work.
> 
>> Benny
>>
>> Marc Eshel (IBM) wrote:
>>> CVSROOT:   /cvs
>>> Module name:   nfsv4
>>> Changes by:   eshel at citi.   2006/12/24 19:01:59
>>>
>>> Modified files:
>>>    cvs/pnfs/fs/nfs: inode.c pnfs.c 
>>>
>>> Log message:
>>> If for some reason the server does not return a layout the client 
>> keep asking
>>> on each read. Remember that get layout failed and don't request it 
> again.
>>> Index: inode.c
>>> ===================================================================
>>> RCS file: /cvs/nfsv4/cvs/pnfs/fs/nfs/inode.c,v
>>> retrieving revision 1.17
>>> diff -u -r1.17 inode.c
>>> --- inode.c   18 Dec 2006 17:18:43 -0000   1.17
>>> +++ inode.c   24 Dec 2006 23:54:04 -0000
>>> @@ -1084,6 +1084,7 @@
>>> #endif
>>> #ifdef CONFIG_NFS_V4 /* XXX CONFIG_PNFS */
>>> nfsi->nfs4_acl = NULL;
>>> +   nfsi->pnfs_layout_state = 0;
>>> nfsi->current_layout = NULL;
>>> nfsi->layoutcommit_ctx = NULL;
>>> #endif /* CONFIG_NFS_V4 */
>>> Index: pnfs.c
>>> ===================================================================
>>> RCS file: /cvs/nfsv4/cvs/pnfs/fs/nfs/pnfs.c,v
>>> retrieving revision 1.50
>>> diff -u -r1.50 pnfs.c
>>> --- pnfs.c   14 Dec 2006 10:47:40 -0000   1.50
>>> +++ pnfs.c   24 Dec 2006 23:54:05 -0000
>>> @@ -436,6 +436,11 @@
>>> }
>>>
>>> res.layout.buf = NULL;
>>> +
>>> +        /* if get layout already failed once goto out */
>>> +        if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED)
>>> +      goto out;
>>> +
>>> if ((result = get_layout(ino, ctx, count, pos, iomode, &arg, &res))) {
>>> printk("\n%s: ERROR retrieving layout %d\n",__FUNCTION__,result);
>>> result =  -EIO;
>>> @@ -468,10 +473,15 @@
>>>
>>> result = 0;
>>> out:
>>> +
>>> +        /* remember that get layout failed and don't try again */
>>> +        nfsi->pnfs_layout_state |= NFS_INO_LAYOUT_FAILED;
>>> +
>>> /* res.layout.buf kalloc'ed by the xdr decoder? */
>>> if (res.layout.buf)
>>> kfree(res.layout.buf);
>>> -   dprintk("%s end (err:%d)\n",__FUNCTION__,result);
>>> +   dprintk("%s end (err:%d) state %d\n",
>>> +      __FUNCTION__,result,nfsi->pnfs_layout_state);
>>> return result;
>>> }
>>>
>>> _______________________________________________
>>> pNFS mailing list
>>> pNFS at linux-nfs.org
>>> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
>> _______________________________________________
>> pNFS mailing list
>> pNFS at linux-nfs.org
>> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
> 



More information about the pNFS mailing list