[pnfs] CVS: nfsv4
Benny Halevy Panasas
bhalevy at citi.umich.edu
Wed Jan 3 08:18:17 EST 2007
CVSROOT: /cvs
Module name: nfsv4
Changes by: bhalevy at citi. 2007/01/03 08:18:17
Modified files:
cvs/pnfs/fs/nfs: nfs4xdr.c pnfs.c read.c write.c
cvs/pnfs/include/linux: nfs4.h nfs_fs.h
Log message:
Analyze and handle layoutget nfs errors.
Keep a timestamp on struct nfs_inode for transient
layoutget errors so to resume layout_get after some delay
(currently approx. 1 second)
Signed-off-by: Benny Halevy <bhalevy at panasas.com>
Index: fs/nfs/nfs4xdr.c
===================================================================
RCS file: /cvs/nfsv4/cvs/pnfs/fs/nfs/nfs4xdr.c,v
retrieving revision 1.27
diff -d -u -p -r1.27 nfs4xdr.c
--- fs/nfs/nfs4xdr.c 18 Dec 2006 17:18:44 -0000 1.27
+++ fs/nfs/nfs4xdr.c 3 Jan 2007 13:17:05 -0000
@@ -5125,6 +5125,13 @@ static struct {
* to be handled by a
* middle-layer.
*/
+ { NFS4ERR_BADIOMODE, EINVAL },
+ { NFS4ERR_BADLAYOUT, ENOENT },
+ { NFS4ERR_UNKNOWN_LAYOUTTYPE, EINVAL },
+ { NFS4ERR_LAYOUTTRYLATER, EAGAIN },
+ { NFS4ERR_LAYOUTUNAVAILABLE, ENOTSUPP},
+ { NFS4ERR_RECALLCONFLICT, EAGAIN },
+
{ -1, EIO }
};
Index: fs/nfs/pnfs.c
===================================================================
RCS file: /cvs/nfsv4/cvs/pnfs/fs/nfs/pnfs.c,v
retrieving revision 1.52
diff -d -u -p -r1.52 pnfs.c
--- fs/nfs/pnfs.c 26 Dec 2006 11:46:38 -0000 1.52
+++ fs/nfs/pnfs.c 3 Jan 2007 13:17:06 -0000
@@ -438,17 +438,43 @@ virtual_update_layout(struct inode* ino,
res.layout.buf = NULL;
/* if get layout already failed once goto out */
- if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED)
+ if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED) {
+ if (unlikely(nfsi->pnfs_layout_suspend &&
+ get_seconds() >= nfsi->pnfs_layout_suspend)) {
+ dprintk("%s: layout_get resumed\n", __FUNCTION__);
+ nfsi->pnfs_layout_state &= NFS_INO_LAYOUT_FAILED;
+ nfsi->pnfs_layout_suspend = 0;
+ } else
+ result = 1;
goto out;
+ }
if ((result = get_layout(ino, ctx, count, pos, iomode, &arg, &res))) {
- printk("\n%s: ERROR retrieving layout %d\n",__FUNCTION__,result);
- result = -EIO;
+ printk("%s: ERROR retrieving layout %d\n", __FUNCTION__, result);
+
+ switch (result) {
+ case -ENOENT: /* NFS4ERR_BADLAYOUT */
+ /* transient error, don't mark with NFS_INO_LAYOUT_FAILED */
+ result = 1;
+ break;
+
+ case -EAGAIN: /* NFS4ERR_LAYOUTTRYLATER, NFS4ERR_RECALLCONFLICT, NFS4ERR_LOCKED */
+ nfsi->pnfs_layout_suspend = get_seconds() + 1;
+ dprintk("%s: layout_get suspended until %ld\n",
+ __FUNCTION__, nfsi->pnfs_layout_suspend);
+ /* FALLTHROUGH */
+ case -EINVAL: /* NFS4ERR_INVAL, NFSERR_BADIOMODE, NFS4ERR_UNKNOWN_LAYOUTTYPE */
+ case -ENOTSUPP: /* NFS4ERR_LAYOUTUNAVAILABLE */
+ case -ETOOSMALL:/* NFS4ERR_TOOSMALL */
+ default:
+ /* mark with NFS_INO_LAYOUT_FAILED */
+ break;
+ }
goto out;
}
if (res.layout.len <= 0) {
- printk("\n%s: ERROR! Layout size is ZERO!\n",__FUNCTION__);
+ printk("%s: ERROR! Layout size is ZERO!\n",__FUNCTION__);
result = -EIO;
goto out;
}
@@ -474,8 +500,8 @@ virtual_update_layout(struct inode* ino,
result = 0;
out:
- /* remember that get layout failed and don't try again */
- if (result)
+ /* remember that get layout failed and don't try again */
+ if (result < 0)
nfsi->pnfs_layout_state |= NFS_INO_LAYOUT_FAILED;
/* res.layout.buf kalloc'ed by the xdr decoder? */
@@ -723,7 +749,8 @@ pnfs_writeback_done(struct nfs_write_dat
/*
* Call the appropriate parallel I/O subsystem write function.
* If no I/O device driver exists, or one does match the returned
- * fstype, then call regular NFS processing.
+ * fstype, then return a positive status for regular NFS processing.
+ *
* TODO: Is wdata->how and wdata->args.stable always the same value?
* TODO: It seems in NFS, the server may not do a stable write even
* though it was requested (and vice-versa?). To check, it looks
@@ -750,13 +777,15 @@ pnfs_writepages(struct nfs_write_data* w
args->context,
args->count,
args->offset,
- IOMODE_RW)))
+ IOMODE_RW))) {
+ status = 1; /* retry with nfs I/O */
goto out;
+ }
if (!nfss->pnfs_curr_ld->ld_io_ops ||
!nfss->pnfs_curr_ld->ld_io_ops->write_pagelist) {
printk("%s: ERROR, no layout driver write operation\n", __FUNCTION__);
- status = -EIO;
+ status = 1;
goto out;
}
@@ -783,8 +812,10 @@ pnfs_writepages(struct nfs_write_data* w
wdata);
if (status > 0) {
+ dprintk("%s: LD write_pagelist returned status %d > 0\n",__FUNCTION__, status);
pnfs_update_last_write(nfsi, args->offset, status);
pnfs_need_layoutcommit(nfsi, wdata->args.context);
+ status = 0;
}
out:
@@ -816,7 +847,7 @@ pnfs_read_done(struct nfs_read_data* dat
/*
* Call the appropriate parallel I/O subsystem read function.
* If no I/O device driver exists, or one does match the returned
- * fstype, then call regular NFS processing.
+ * fstype, then return a positive status for regular NFS processing.
*/
int
pnfs_readpages(struct nfs_read_data *rdata)
@@ -842,11 +873,13 @@ pnfs_readpages(struct nfs_read_data *rda
{
printk(KERN_WARNING "%s: ERROR %d from virtual_update_layout\n",
__FUNCTION__, status);
+ status = 1;
+ goto out;
}
if (!nfss->pnfs_curr_ld->ld_io_ops ||
!nfss->pnfs_curr_ld->ld_io_ops->read_pagelist) {
printk("%s: ERROR, no layout driver read operation\n", __FUNCTION__);
- status = -EIO;
+ status = 1;
goto out;
}
@@ -867,6 +900,11 @@ pnfs_readpages(struct nfs_read_data *rda
(loff_t)args->offset,
args->count,
rdata);
+ if (status > 0) {
+ dprintk("%s: LD read_pagelist returned status %d > 0\n",__FUNCTION__, status);
+ status = 0;
+ }
+
out:
dprintk("%s: End Status %d\n",__FUNCTION__, status);
return status;
@@ -883,7 +921,7 @@ int pnfs_try_to_read_data(struct nfs_rea
dprintk("%s Utilizing pNFS I/O\n",__FUNCTION__);
data->call_ops = call_ops;
data->pnfsflags |= PNFS_USE_DS;
- if((status = pnfs_readpages(data)) < 0)
+ if((status = pnfs_readpages(data)))
return status;
return 0;
}
@@ -915,12 +953,12 @@ pnfs_file_read(struct file* filp,
/* Using NFS page cache with pNFS */
if (use_page_cache(inode))
- return do_sync_read(filp, buf, count, pos);
+ goto fallback;
/* Small I/O Optimization */
if (below_threshold(inode, count, 0)) {
dfprintk(IO, "%s: Below Read threshold, using NFSv4 read\n",__FUNCTION__);
- return do_sync_read(filp,buf,count,pos);
+ goto fallback;
}
/* Step 1: Retrieve and set layout if not allready cached*/
@@ -928,21 +966,25 @@ pnfs_file_read(struct file* filp,
(struct nfs_open_context *)filp->private_data,
count,
*pos,
- IOMODE_READ)))
- goto out;
+ IOMODE_READ))) {
+ dfprintk(IO, "%s: Could not get layout result=%Zd, using NFSv4 read\n",__FUNCTION__, result);
+ goto fallback;
+ }
/* Step 2: Call I/O device driver's read function */
if (!nfss->pnfs_curr_ld->ld_io_ops &&
nfss->pnfs_curr_ld->ld_io_ops->read) {
- result = -EIO;
- goto out;
+ dfprintk(IO, "%s: No LD read function, using NFSv4 read\n",__FUNCTION__);
+ goto fallback;
}
result = nfss->pnfs_curr_ld->ld_io_ops->read(nfsi->current_layout,
filp, buf, count, pos);
-out:
dprintk("%s end (err:%Zd)\n",__FUNCTION__,result);
return result;
+
+fallback:
+ return do_sync_read(filp, buf, count, pos);
}
int pnfs_try_to_write_data(struct nfs_write_data *data,
@@ -958,7 +1000,7 @@ int pnfs_try_to_write_data(struct nfs_wr
data->call_ops = call_ops;
data->pnfsflags |= PNFS_USE_DS;
data->how = how; /* XXX do we really need this? */
- if((status = pnfs_writepages(data, how)) <0)
+ if((status = pnfs_writepages(data, how)))
return status;
return 0;
}
@@ -993,12 +1035,12 @@ pnfs_file_write(struct file* filp,
/* Using NFS page cache with pNFS */
if (use_page_cache(inode))
- return do_sync_write(filp,buf,count,pos);
+ goto fallback;
/* Small I/O Optimization */
if (below_threshold(inode, count, 1)) {
dfprintk(IO, "%s: Below write threshold, using NFSv4 write\n",__FUNCTION__);
- return do_sync_write(filp,buf,count,pos);
+ goto fallback;
}
/* Need to adjust write param if this is an append, etc */
@@ -1012,14 +1054,16 @@ pnfs_file_write(struct file* filp,
(struct nfs_open_context *)filp->private_data,
count,
*pos,
- IOMODE_RW)))
- goto out;
+ IOMODE_RW))) {
+ dfprintk(IO, "%s: Could not get layout result=%Zd, using NFSv4 write\n",__FUNCTION__, result);
+ goto fallback;
+ }
/* Step 2: Call I/O device driver's write function */
if (!nfss->pnfs_curr_ld->ld_io_ops &&
nfss->pnfs_curr_ld->ld_io_ops->write) {
- result = -EIO;
- goto out;
+ dfprintk(IO, "%s: No LD write function, using NFSv4 write\n",__FUNCTION__);
+ goto fallback;
}
result = nfss->pnfs_curr_ld->ld_io_ops->write(nfsi->current_layout,
@@ -1033,9 +1077,11 @@ pnfs_file_write(struct file* filp,
pnfs_update_last_write(nfsi, pos_orig, result);
pnfs_need_layoutcommit(nfsi, (struct nfs_open_context *)filp->private_data);
}
-out:
dprintk("%s end (err:%Zd)\n",__FUNCTION__,result);
return result;
+
+fallback:
+ return do_sync_write(filp, buf, count, pos);
}
int pnfs_try_to_commit(struct nfs_write_data *data, struct list_head *head, int how)
@@ -1139,8 +1185,10 @@ pnfs_fsync(struct file *file, struct den
(struct nfs_open_context *)file->private_data,
0,
0,
- IOMODE_RW)))
+ IOMODE_RW))) {
+ result = -EIO;
goto out;
+ }
dprintk("%s: Calling layout driver fsync\n",__FUNCTION__);
result = nfss->pnfs_curr_ld->ld_io_ops->fsync(nfsi->current_layout,
Index: fs/nfs/read.c
===================================================================
RCS file: /cvs/nfsv4/cvs/pnfs/fs/nfs/read.c,v
retrieving revision 1.20
diff -d -u -p -r1.20 read.c
--- fs/nfs/read.c 18 Dec 2006 17:18:44 -0000 1.20
+++ fs/nfs/read.c 3 Jan 2007 13:17:07 -0000
@@ -825,22 +825,35 @@ int nfs_readpages(struct file *filp, str
if (!list_empty(&head)) {
int err;
#ifdef CONFIG_NFS_V4
- struct nfs_page *nfs_page;
+ struct nfs_page *first_page, *last_page;
+ loff_t offset;
+ size_t count;
if (pnfs_enabled_sb(NFS_SERVER(inode))) {
- nfs_page = nfs_list_entry(head.next);
+ first_page = nfs_list_entry(head.next);
+ last_page = nfs_list_entry(head.prev);
+ offset = (first_page->wb_index << PAGE_CACHE_SHIFT) +
+ first_page->wb_offset;
+ if (unlikely(first_page == last_page))
+ count = last_page->wb_bytes;
+ else
+ count = ((last_page->wb_index - first_page->wb_index) << PAGE_CACHE_SHIFT) -
+ first_page->wb_offset + last_page->wb_bytes;
- if (virtual_update_layout(inode, desc.ctx,
- nfs_page->wb_bytes,
- nfs_page->wb_index << PAGE_CACHE_SHIFT, FMODE_READ)) {
- printk(KERN_NOTICE "failed to get the layout!\n");
+ if ((err = virtual_update_layout(inode, desc.ctx,
+ count, offset,
+ FMODE_READ)) != 0) {
+ printk(KERN_NOTICE "%s: failed to get the layout!"
+ " %Zd@%llu error=%d\n",
+ __FUNCTION__, count, (u64)offset, err);
+ err = 0;
}
}
#endif
err = nfs_pagein_list(&head, inode);
if (!ret)
nfs_add_stats(inode, NFSIOS_READPAGES, err);
- ret = err;
+ ret = err;
}
put_nfs_open_context(desc.ctx);
return ret;
Index: fs/nfs/write.c
===================================================================
RCS file: /cvs/nfsv4/cvs/pnfs/fs/nfs/write.c,v
retrieving revision 1.32
diff -d -u -p -r1.32 write.c
--- fs/nfs/write.c 18 Dec 2006 17:18:44 -0000 1.32
+++ fs/nfs/write.c 3 Jan 2007 13:17:08 -0000
@@ -1021,7 +1021,7 @@ static int nfs_flush_multi(struct inode
nbytes = req->wb_bytes;
do {
- size_t len = min(nbytes, wsize);
+ size_t len = min(nbytes, (size_t)wsize);
data = nfs_writedata_alloc(len);
if (!data)
@@ -1538,7 +1538,6 @@ nfs_commit_list(struct inode *inode, str
{
struct nfs_write_data *data = NULL;
struct nfs_page *req;
- unsigned int wpages;
int ret;
dprintk("%s Begin\n", __FUNCTION__);
@@ -1573,7 +1572,6 @@ nfs_commit_list(struct inode *inode, str
static void nfs_commit_done_pages(struct nfs_write_data *data, int status, int ckverf)
{
struct nfs_page *req;
- int res = 0;
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
@@ -1729,9 +1727,13 @@ int nfs_sync_inode_wait(struct inode *in
if (pnfs_enabled_sb(NFS_SERVER(inode))) {
nfs_page = nfs_list_entry(head.next);
- if (virtual_update_layout(inode, nfs_page->wb_context,
- npages * PAGE_SIZE, idx_start * PAGE_SIZE, FMODE_WRITE))
- printk(KERN_NOTICE "Failed to get layout!\n");
+ if ((ret = virtual_update_layout(inode, nfs_page->wb_context,
+ npages * PAGE_SIZE, idx_start * PAGE_SIZE, FMODE_WRITE)))
+ printk(KERN_NOTICE "%s: failed to get the layout!"
+ " %Zd@%llu error=%d\n",
+ __FUNCTION__, npages * idx_start * PAGE_SIZE,
+ (u64)idx_start * PAGE_SIZE, ret);
+ ret = 0;
}
#endif
Index: include/linux//nfs4.h
===================================================================
RCS file: /cvs/nfsv4/cvs/pnfs/include/linux/nfs4.h,v
retrieving revision 1.12
diff -d -u -p -r1.12 nfs4.h
--- include/linux//nfs4.h 18 Dec 2006 17:18:48 -0000 1.12
+++ include/linux//nfs4.h 3 Jan 2007 13:17:09 -0000
@@ -235,7 +235,23 @@ enum nfsstat4 {
NFS4ERR_DEADLOCK = 10045,
NFS4ERR_FILE_OPEN = 10046,
NFS4ERR_ADMIN_REVOKED = 10047,
- NFS4ERR_CB_PATH_DOWN = 10048
+ NFS4ERR_CB_PATH_DOWN = 10048,
+ NFS4ERR_BADIOMODE = 10049,
+ NFS4ERR_BADLAYOUT = 10050,
+ NFS4ERR_BAD_SESSION_DIGEST = 10051,
+ NFS4ERR_BADSESSION = 10052,
+ NFS4ERR_BADSLOT = 10053,
+ NFS4ERR_COMPLETE_ALREADY = 10054,
+ NFS4ERR_CONN_NOT_BOUND_TO_SESSION = 10055,
+ NFS4ERR_DELEG_ALREADY_WANTED = 10056,
+ NFS4ERR_DIRDELEG_UNAVAIL = 10057,
+ NFS4ERR_LAYOUTTRYLATER = 10058,
+ NFS4ERR_LAYOUTUNAVAILABLE = 10059,
+ NFS4ERR_NOMATCHING_LAYOUT = 10060,
+ NFS4ERR_RECALLCONFLICT = 10061,
+ NFS4ERR_UNKNOWN_LAYOUTTYPE = 10062,
+ NFS4ERR_SEQ_MISORDERED = 10063,
+ NFS4ERR_SEQUENCE_POS = 10064,
};
/*
Index: include/linux//nfs_fs.h
===================================================================
RCS file: /cvs/nfsv4/cvs/pnfs/include/linux/nfs_fs.h,v
retrieving revision 1.18
diff -d -u -p -r1.18 nfs_fs.h
--- include/linux//nfs_fs.h 25 Dec 2006 00:00:09 -0000 1.18
+++ include/linux//nfs_fs.h 3 Jan 2007 13:17:10 -0000
@@ -183,6 +183,7 @@ struct nfs_inode {
/* pNFS layout information */
u32 pnfs_layout_state;
#define NFS_INO_LAYOUT_FAILED 0x0001 /* get layout failed, stop trying */
+ time_t pnfs_layout_suspend;
struct pnfs_layout_type* current_layout;
struct nfs_open_context* layoutcommit_ctx; /* use rpc_creds in this open_context
* to send LAYOUTCOMMIT to MDS */
More information about the pNFS
mailing list