[pnfs] CVS: nfsv4

Marc Eshel eshel at almaden.ibm.com
Tue Jan 2 14:06:51 EST 2007


Looks reasonable, check it in and I will test it with my server.
Marc. 

Benny Halevy <bhalevy at panasas.com> wrote on 12/27/2006 05:26:40 AM:

> Marc, I took a stab at it.  Here's what I propose (worked for me with
> a simulated error on virtual_update_layout).
> If there's no objection I'll just submit this to the cvs tree.
> The gist of it is to separate transient from permanent errors
> on layoutget and mark the nfs_inode accordingly.  For transient
> errors, I added a timestamp on the nfs_inode so that when it passes
> we clear the error and resume layout_get.  virtual_update_layout
> returns a status greater than zero when normal nfsv4 I/O should
> take place.  status less than zero is still interpreted as hard
> error that should be returned to the caller.
> 
> Benny
> 
> Analyze and handle layoutget nfs errors.
> Keep a timestamp on struct nfs_inode for transient
> layoutget errors so to resume layout_get after some delay
> (currently approx. 1 second)
> 
> Signed-off-by: Benny Halevy <bhalevy at panasas.com>
> 
> diff -urp a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> --- a/fs/nfs/nfs4xdr.c   2006-12-18 19:18:44.000000000 +0200
> +++ b/fs/nfs/nfs4xdr.c   2006-12-26 14:05:02.000000000 +0200
> @@ -5125,6 +5125,13 @@ static struct {
>                        * to be handled by a
>                        * middle-layer.
>                        */
> +   { NFSERR_BADIOMODE,      EINVAL   },
> +   { NFS4ERR_BADLAYOUT,      ENOENT   },
> +   { NFS4ERR_UNKNOWN_LAYOUTTYPE,   EINVAL   },
> +   { NFS4ERR_LAYOUTTRYLATER,   EAGAIN   },
> +   { NFS4ERR_LAYOUTUNAVAILABLE,   ENOTSUPP},
> +   { NFS4ERR_RECALLCONFLICT,   EAGAIN   },
> +
>     { -1,         EIO      }
>  };
> 
> diff -urp a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> --- a/fs/nfs/pnfs.c   2006-12-26 13:44:24.000000000 +0200
> +++ b/fs/nfs/pnfs.c   2006-12-27 15:00:40.000000000 +0200
> @@ -438,17 +438,43 @@ virtual_update_layout(struct inode* ino,
>     res.layout.buf = NULL;
> 
>          /* if get layout already failed once goto out */
> -        if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED)
> +   if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED) {
> +      if (unlikely(nfsi->pnfs_layout_suspend &&
> +                   get_seconds() >= nfsi->pnfs_layout_suspend)) {
> +         dprintk("%s: layout_get resumed\n", __FUNCTION__);
> +         nfsi->pnfs_layout_state &= NFS_INO_LAYOUT_FAILED;
> +         nfsi->pnfs_layout_suspend = 0;
> +      } else
> +         result = 1;
>        goto out;
> +   }
> 
>     if ((result = get_layout(ino, ctx, count, pos, iomode, &arg, &res))) 
{
> -      printk("\n%s: ERROR retrieving layout %d\n",__FUNCTION__,result);
> -      result =  -EIO;
> +      printk("%s: ERROR retrieving layout %d\n", __FUNCTION__, result);
> + 
> +      switch (result) {
> +      case -ENOENT:   /* NFS4ERR_BADLAYOUT */
> +         /* transient error, don't mark with NFS_INO_LAYOUT_FAILED */
> +         result = 1;
> +         break;
> +
> +      case -EAGAIN:   /* NFS4ERR_LAYOUTTRYLATER, 
> NFS4ERR_RECALLCONFLICT, NFS4ERR_LOCKED */
> +         nfsi->pnfs_layout_suspend = get_seconds() + 1;
> +         dprintk("%s: layout_get suspended until %ld\n",
> +                 __FUNCTION__, nfsi->pnfs_layout_suspend);
> +         /* FALLTHROUGH */
> +      case -EINVAL:   /* NFS4ERR_INVAL, NFSERR_BADIOMODE, 
> NFS4ERR_UNKNOWN_LAYOUTTYPE */
> +      case -ENOTSUPP:   /* NFS4ERR_LAYOUTUNAVAILABLE */
> +      case -ETOOSMALL:/* NFS4ERR_TOOSMALL */
> +      default:
> +         /* mark with NFS_INO_LAYOUT_FAILED */
> +         break;
> +      }
>        goto out;
>     }
> 
>     if (res.layout.len <= 0) {
> -      printk("\n%s: ERROR!  Layout size is ZERO!\n",__FUNCTION__);
> +      printk("%s: ERROR!  Layout size is ZERO!\n",__FUNCTION__);
>        result =  -EIO;
>        goto out;
>     }
> @@ -474,8 +500,8 @@ virtual_update_layout(struct inode* ino,
>     result = 0;
>  out:
> 
> -   /* remember that get layout failed and don't try again */
> -   if (result)
> +        /* remember that get layout failed and don't try again */
> +   if (result < 0)
>        nfsi->pnfs_layout_state |= NFS_INO_LAYOUT_FAILED;
> 
>     /* res.layout.buf kalloc'ed by the xdr decoder? */
> @@ -723,7 +749,8 @@ pnfs_writeback_done(struct nfs_write_dat
>  /*
>   * Call the appropriate parallel I/O subsystem write function.
>   * If no I/O device driver exists, or one does match the returned
> - * fstype, then call regular NFS processing.
> + * fstype, then return a positive status for regular NFS processing.
> + *
>   * TODO: Is wdata->how and wdata->args.stable always the same value?
>   * TODO: It seems in NFS, the server may not do a stable write even
>   * though it was requested (and vice-versa?).  To check, it looks
> @@ -750,13 +777,15 @@ pnfs_writepages(struct nfs_write_data* w
>                     args->context,
>                     args->count,
>                     args->offset,
> -                   IOMODE_RW)))
> +                   IOMODE_RW))) {
> +      status = 1;   /* retry with nfs I/O */
>        goto out;
> +   }
> 
>     if (!nfss->pnfs_curr_ld->ld_io_ops ||
>         !nfss->pnfs_curr_ld->ld_io_ops->write_pagelist) {
>        printk("%s: ERROR, no layout driver write operation\n", 
__FUNCTION__);
> -      status = -EIO;
> +      status = 1;
>        goto out;
>     }
> 
> @@ -783,8 +812,10 @@ pnfs_writepages(struct nfs_write_data* w
>                              wdata);
> 
>     if (status > 0) {
> +      dprintk("%s: LD write_pagelist returned status %d > 0\n",
> __FUNCTION__, status);
>        pnfs_update_last_write(nfsi, args->offset, status);
>        pnfs_need_layoutcommit(nfsi, wdata->args.context);
> +      status = 0;
>     }
> 
>  out:
> @@ -816,7 +847,7 @@ pnfs_read_done(struct nfs_read_data* dat
>  /*
>   * Call the appropriate parallel I/O subsystem read function.
>   * If no I/O device driver exists, or one does match the returned
> - * fstype, then call regular NFS processing.
> + * fstype, then return a positive status for regular NFS processing.
>   */
>  int
>  pnfs_readpages(struct nfs_read_data *rdata)
> @@ -842,11 +873,13 @@ pnfs_readpages(struct nfs_read_data *rda
>     {
>        printk(KERN_WARNING "%s: ERROR %d from virtual_update_layout\n",
>           __FUNCTION__, status);
> +      status = 1;
> +      goto out;
>     }
>     if (!nfss->pnfs_curr_ld->ld_io_ops ||
>         !nfss->pnfs_curr_ld->ld_io_ops->read_pagelist) {
>        printk("%s: ERROR, no layout driver read operation\n", 
__FUNCTION__);
> -      status = -EIO;
> +      status = 1;
>        goto out;
>     }
> 
> @@ -867,6 +900,11 @@ pnfs_readpages(struct nfs_read_data *rda
>                             (loff_t)args->offset,
>                             args->count,
>                             rdata);
> +   if (status > 0) {
> +      dprintk("%s: LD read_pagelist returned status %d > 0\n",
> __FUNCTION__, status);
> +      status = 0;
> +   }
> +
>   out:
>     dprintk("%s: End Status %d\n",__FUNCTION__, status);
>     return status;
> @@ -883,7 +921,7 @@ int pnfs_try_to_read_data(struct nfs_rea
>        dprintk("%s Utilizing pNFS I/O\n",__FUNCTION__);
>        data->call_ops = call_ops;
>        data->pnfsflags |= PNFS_USE_DS;
> -      if((status = pnfs_readpages(data)) < 0)
> +      if((status = pnfs_readpages(data)))
>           return status;
>        return 0;
>     }
> @@ -915,12 +953,12 @@ pnfs_file_read(struct file* filp,
> 
>     /* Using NFS page cache with pNFS */
>     if (use_page_cache(inode))
> -      return do_sync_read(filp, buf, count, pos);
> +      goto fallback;
> 
>     /* Small I/O Optimization */
>     if (below_threshold(inode, count, 0)) {
>        dfprintk(IO, "%s: Below Read threshold, using NFSv4 read\n",
> __FUNCTION__);
> -      return do_sync_read(filp,buf,count,pos);
> +      goto fallback;
>     }
> 
>     /* Step 1: Retrieve and set layout if not allready cached*/
> @@ -928,21 +966,25 @@ pnfs_file_read(struct file* filp,
>                     (struct nfs_open_context *)filp->private_data,
>                     count,
>                     *pos,
> -                   IOMODE_READ)))
> -      goto out;
> +                   IOMODE_READ))) {
> +      dfprintk(IO, "%s: Could not get layout result=%Zd, using 
> NFSv4 read\n",__FUNCTION__, result);
> +      goto fallback;
> +   }
> 
>     /* Step 2: Call I/O device driver's read function */
>     if (!nfss->pnfs_curr_ld->ld_io_ops &&
>         nfss->pnfs_curr_ld->ld_io_ops->read) {
> -      result = -EIO;
> -      goto out;
> +      dfprintk(IO, "%s: No LD read function, using NFSv4 read\n",
> __FUNCTION__);
> +      goto fallback;
>     }
> 
>     result = nfss->pnfs_curr_ld->ld_io_ops->read(nfsi->current_layout,
>                         filp, buf, count, pos);
> -out:
>     dprintk("%s end (err:%Zd)\n",__FUNCTION__,result);
>     return result;
> +
> +fallback:
> +   return do_sync_read(filp, buf, count, pos);
>  }
> 
>  int pnfs_try_to_write_data(struct nfs_write_data *data,
> @@ -958,7 +1000,7 @@ int pnfs_try_to_write_data(struct nfs_wr
>        data->call_ops = call_ops;
>        data->pnfsflags |= PNFS_USE_DS;
>        data->how = how; /* XXX do we really need this? */
> -                if((status = pnfs_writepages(data, how)) <0)
> +                if((status = pnfs_writepages(data, how)))
>           return status;
>        return 0;
>     }
> @@ -993,12 +1035,12 @@ pnfs_file_write(struct file* filp,
> 
>     /* Using NFS page cache with pNFS */
>     if (use_page_cache(inode))
> -      return do_sync_write(filp,buf,count,pos);
> +      goto fallback;
> 
>     /* Small I/O Optimization */
>     if (below_threshold(inode, count, 1)) {
>        dfprintk(IO, "%s: Below write threshold, using NFSv4 
> write\n",__FUNCTION__);
> -      return do_sync_write(filp,buf,count,pos);
> +      goto fallback;
>     }
> 
>     /* Need to adjust write param if this is an append, etc */
> @@ -1012,14 +1054,16 @@ pnfs_file_write(struct file* filp,
>                     (struct nfs_open_context *)filp->private_data,
>                     count,
>                     *pos,
> -                   IOMODE_RW)))
> -      goto out;
> +                   IOMODE_RW))) {
> +      dfprintk(IO, "%s: Could not get layout result=%Zd, using 
> NFSv4 write\n",__FUNCTION__, result);
> +      goto fallback;
> +   }
> 
>     /* Step 2: Call I/O device driver's write function */
>     if (!nfss->pnfs_curr_ld->ld_io_ops &&
>         nfss->pnfs_curr_ld->ld_io_ops->write) {
> -      result = -EIO;
> -      goto out;
> +      dfprintk(IO, "%s: No LD write function, using NFSv4 write\n",
> __FUNCTION__);
> +      goto fallback;
>     }
> 
>     result = nfss->pnfs_curr_ld->ld_io_ops->write(nfsi->current_layout,
> @@ -1033,9 +1077,11 @@ pnfs_file_write(struct file* filp,
>        pnfs_update_last_write(nfsi, pos_orig, result);
>        pnfs_need_layoutcommit(nfsi, (struct nfs_open_context *)
> filp->private_data);
>     }
> -out:
>     dprintk("%s end (err:%Zd)\n",__FUNCTION__,result);
>     return result;
> +
> +fallback:
> +   return do_sync_write(filp, buf, count, pos);
>  }
> 
>  int pnfs_try_to_commit(struct nfs_write_data *data, struct 
> list_head *head, int how)
> @@ -1139,8 +1185,10 @@ pnfs_fsync(struct file *file, struct den
>                     (struct nfs_open_context *)file->private_data,
>                     0,
>                     0,
> -                   IOMODE_RW)))
> +                   IOMODE_RW))) {
> +      result = -EIO;
>        goto out;
> +   }
> 
>     dprintk("%s: Calling layout driver fsync\n",__FUNCTION__);
>     result = nfss->pnfs_curr_ld->ld_io_ops->fsync(nfsi->current_layout,
> diff -urp a/fs/nfs/pnfs.c~ b/fs/nfs/pnfs.c~
> --- a/fs/nfs/pnfs.c~   2006-12-25 08:35:20.000000000 +0200
> +++ b/fs/nfs/pnfs.c~   2006-12-26 15:53:59.000000000 +0200
> @@ -438,17 +438,43 @@ virtual_update_layout(struct inode* ino,
>     res.layout.buf = NULL;
> 
>          /* if get layout already failed once goto out */
> -        if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED)
> +   if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED) {
> +      if (unlikely(nfsi->pnfs_layout_suspend &&
> +                   get_seconds() >= nfsi->pnfs_layout_suspend)) {
> +         dprintk("%s: layout_get resumed\n", __FUNCTION__);
> +         nfsi->pnfs_layout_state &= NFS_INO_LAYOUT_FAILED;
> +         nfsi->pnfs_layout_suspend = 0;
> +      } else
> +         result = 1;
>        goto out;
> +   }
> 
>     if ((result = get_layout(ino, ctx, count, pos, iomode, &arg, &res))) 
{
> -      printk("\n%s: ERROR retrieving layout %d\n",__FUNCTION__,result);
> -      result =  -EIO;
> +      printk("%s: ERROR retrieving layout %d\n", __FUNCTION__, result);
> + 
> +      switch (result) {
> +      case -ENOENT:   /* NFS4ERR_BADLAYOUT */
> +         /* transient error, don't mark with NFS_INO_LAYOUT_FAILED */
> +         result = 1;
> +         break;
> +
> +      case -EAGAIN:   /* NFS4ERR_LAYOUTTRYLATER, 
> NFS4ERR_RECALLCONFLICT, NFS4ERR_LOCKED */
> +         nfsi->pnfs_layout_suspend = get_seconds() + 1;
> +         dprintk("%s: layout_get suspended until %ld\n",
> +                 __FUNCTION__, nfsi->pnfs_layout_suspend);
> +         /* FALLTHROUGH */
> +      case -EINVAL:   /* NFS4ERR_INVAL, NFSERR_BADIOMODE, 
> NFS4ERR_UNKNOWN_LAYOUTTYPE */
> +      case -ENOTSUPP:   /* NFS4ERR_LAYOUTUNAVAILABLE */
> +      case -ETOOSMALL:/* NFS4ERR_TOOSMALL */
> +      default:
> +         /* mark with NFS_INO_LAYOUT_FAILED */
> +         break;
> +      }
>        goto out;
>     }
> 
>     if (res.layout.len <= 0) {
> -      printk("\n%s: ERROR!  Layout size is ZERO!\n",__FUNCTION__);
> +      printk("%s: ERROR!  Layout size is ZERO!\n",__FUNCTION__);
>        result =  -EIO;
>        goto out;
>     }
> @@ -459,7 +485,7 @@ virtual_update_layout(struct inode* ino,
>                 res.layout.buf);
>     if (layout_new == NULL) {
>        printk("%s: ERROR!  Could not inject layout (%d)\n",
> __FUNCTION__,result);
> -      result =  -EIO;
> +      result = -EIO;
>        goto out;
>     }
> 
> @@ -475,7 +501,8 @@ virtual_update_layout(struct inode* ino,
>  out:
> 
>          /* remember that get layout failed and don't try again */
> -        nfsi->pnfs_layout_state |= NFS_INO_LAYOUT_FAILED;
> +   if (result < 0)
> +      nfsi->pnfs_layout_state |= NFS_INO_LAYOUT_FAILED;
> 
>     /* res.layout.buf kalloc'ed by the xdr decoder? */
>     if (res.layout.buf)
> @@ -722,7 +749,8 @@ pnfs_writeback_done(struct nfs_write_dat
>  /*
>   * Call the appropriate parallel I/O subsystem write function.
>   * If no I/O device driver exists, or one does match the returned
> - * fstype, then call regular NFS processing.
> + * fstype, then return a positive status for regular NFS processing.
> + *
>   * TODO: Is wdata->how and wdata->args.stable always the same value?
>   * TODO: It seems in NFS, the server may not do a stable write even
>   * though it was requested (and vice-versa?).  To check, it looks
> @@ -749,13 +777,15 @@ pnfs_writepages(struct nfs_write_data* w
>                     args->context,
>                     args->count,
>                     args->offset,
> -                   IOMODE_RW)))
> +                   IOMODE_RW))) {
> +      status = 1;   /* retry with nfs I/O */
>        goto out;
> +   }
> 
>     if (!nfss->pnfs_curr_ld->ld_io_ops ||
>         !nfss->pnfs_curr_ld->ld_io_ops->write_pagelist) {
>        printk("%s: ERROR, no layout driver write operation\n", 
__FUNCTION__);
> -      status = -EIO;
> +      status = 1;
>        goto out;
>     }
> 
> @@ -782,8 +812,10 @@ pnfs_writepages(struct nfs_write_data* w
>                              wdata);
> 
>     if (status > 0) {
> +      dprintk("%s: LD write_pagelist returned status %d > 0\n",
> __FUNCTION__, status);
>        pnfs_update_last_write(nfsi, args->offset, status);
>        pnfs_need_layoutcommit(nfsi, wdata->args.context);
> +      status = 0;
>     }
> 
>  out:
> @@ -815,7 +847,7 @@ pnfs_read_done(struct nfs_read_data* dat
>  /*
>   * Call the appropriate parallel I/O subsystem read function.
>   * If no I/O device driver exists, or one does match the returned
> - * fstype, then call regular NFS processing.
> + * fstype, then return a positive status for regular NFS processing.
>   */
>  int
>  pnfs_readpages(struct nfs_read_data *rdata)
> @@ -841,11 +873,13 @@ pnfs_readpages(struct nfs_read_data *rda
>     {
>        printk(KERN_WARNING "%s: ERROR %d from virtual_update_layout\n",
>           __FUNCTION__, status);
> +      status = 1;
> +      goto out;
>     }
>     if (!nfss->pnfs_curr_ld->ld_io_ops ||
>         !nfss->pnfs_curr_ld->ld_io_ops->read_pagelist) {
>        printk("%s: ERROR, no layout driver read operation\n", 
__FUNCTION__);
> -      status = -EIO;
> +      status = 1;
>        goto out;
>     }
> 
> @@ -866,6 +900,11 @@ pnfs_readpages(struct nfs_read_data *rda
>                             (loff_t)args->offset,
>                             args->count,
>                             rdata);
> +   if (status > 0) {
> +      dprintk("%s: LD read_pagelist returned status %d > 0\n",
> __FUNCTION__, status);
> +      status = 0;
> +   }
> +
>   out:
>     dprintk("%s: End Status %d\n",__FUNCTION__, status);
>     return status;
> @@ -882,7 +921,7 @@ int pnfs_try_to_read_data(struct nfs_rea
>        dprintk("%s Utilizing pNFS I/O\n",__FUNCTION__);
>        data->call_ops = call_ops;
>        data->pnfsflags |= PNFS_USE_DS;
> -      if((status = pnfs_readpages(data)) < 0)
> +      if((status = pnfs_readpages(data)))
>           return status;
>        return 0;
>     }
> @@ -914,12 +953,12 @@ pnfs_file_read(struct file* filp,
> 
>     /* Using NFS page cache with pNFS */
>     if (use_page_cache(inode))
> -      return do_sync_read(filp, buf, count, pos);
> +      goto fallback;
> 
>     /* Small I/O Optimization */
>     if (below_threshold(inode, count, 0)) {
>        dfprintk(IO, "%s: Below Read threshold, using NFSv4 read\n",
> __FUNCTION__);
> -      return do_sync_read(filp,buf,count,pos);
> +      goto fallback;
>     }
> 
>     /* Step 1: Retrieve and set layout if not allready cached*/
> @@ -927,21 +966,25 @@ pnfs_file_read(struct file* filp,
>                     (struct nfs_open_context *)filp->private_data,
>                     count,
>                     *pos,
> -                   IOMODE_READ)))
> -      goto out;
> +                   IOMODE_READ))) {
> +      dfprintk(IO, "%s: Could not get layout result=%Zd, using 
> NFSv4 read\n",__FUNCTION__, result);
> +      goto fallback;
> +   }
> 
>     /* Step 2: Call I/O device driver's read function */
>     if (!nfss->pnfs_curr_ld->ld_io_ops &&
>         nfss->pnfs_curr_ld->ld_io_ops->read) {
> -      result = -EIO;
> -      goto out;
> +      dfprintk(IO, "%s: No LD read function, using NFSv4 read\n",
> __FUNCTION__);
> +      goto fallback;
>     }
> 
>     result = nfss->pnfs_curr_ld->ld_io_ops->read(nfsi->current_layout,
>                         filp, buf, count, pos);
> -out:
>     dprintk("%s end (err:%Zd)\n",__FUNCTION__,result);
>     return result;
> +
> +fallback:
> +   return do_sync_read(filp, buf, count, pos);
>  }
> 
>  int pnfs_try_to_write_data(struct nfs_write_data *data,
> @@ -957,7 +1000,7 @@ int pnfs_try_to_write_data(struct nfs_wr
>        data->call_ops = call_ops;
>        data->pnfsflags |= PNFS_USE_DS;
>        data->how = how; /* XXX do we really need this? */
> -                if((status = pnfs_writepages(data, how)) <0)
> +                if((status = pnfs_writepages(data, how)))
>           return status;
>        return 0;
>     }
> @@ -992,12 +1035,12 @@ pnfs_file_write(struct file* filp,
> 
>     /* Using NFS page cache with pNFS */
>     if (use_page_cache(inode))
> -      return do_sync_write(filp,buf,count,pos);
> +      goto fallback;
> 
>     /* Small I/O Optimization */
>     if (below_threshold(inode, count, 1)) {
>        dfprintk(IO, "%s: Below write threshold, using NFSv4 
> write\n",__FUNCTION__);
> -      return do_sync_write(filp,buf,count,pos);
> +      goto fallback;
>     }
> 
>     /* Need to adjust write param if this is an append, etc */
> @@ -1011,14 +1054,16 @@ pnfs_file_write(struct file* filp,
>                     (struct nfs_open_context *)filp->private_data,
>                     count,
>                     *pos,
> -                   IOMODE_RW)))
> -      goto out;
> +                   IOMODE_RW))) {
> +      dfprintk(IO, "%s: Could not get layout result=%Zd, using 
> NFSv4 write\n",__FUNCTION__, result);
> +      goto fallback;
> +   }
> 
>     /* Step 2: Call I/O device driver's write function */
>     if (!nfss->pnfs_curr_ld->ld_io_ops &&
>         nfss->pnfs_curr_ld->ld_io_ops->write) {
> -      result = -EIO;
> -      goto out;
> +      dfprintk(IO, "%s: No LD write function, using NFSv4 write\n",
> __FUNCTION__);
> +      goto fallback;
>     }
> 
>     result = nfss->pnfs_curr_ld->ld_io_ops->write(nfsi->current_layout,
> @@ -1032,9 +1077,11 @@ pnfs_file_write(struct file* filp,
>        pnfs_update_last_write(nfsi, pos_orig, result);
>        pnfs_need_layoutcommit(nfsi, (struct nfs_open_context *)
> filp->private_data);
>     }
> -out:
>     dprintk("%s end (err:%Zd)\n",__FUNCTION__,result);
>     return result;
> +
> +fallback:
> +   return do_sync_write(filp, buf, count, pos);
>  }
> 
>  int pnfs_try_to_commit(struct nfs_write_data *data, struct 
> list_head *head, int how)
> @@ -1138,8 +1185,10 @@ pnfs_fsync(struct file *file, struct den
>                     (struct nfs_open_context *)file->private_data,
>                     0,
>                     0,
> -                   IOMODE_RW)))
> +                   IOMODE_RW))) {
> +      result = -EIO;
>        goto out;
> +   }
> 
>     dprintk("%s: Calling layout driver fsync\n",__FUNCTION__);
>     result = nfss->pnfs_curr_ld->ld_io_ops->fsync(nfsi->current_layout,
> Only in b/fs/nfs: pnfs.o
> Only in b/fs/nfs: .pnfs.o.cmd
> Only in b/fs/nfs: proc.o
> Only in b/fs/nfs: .proc.o.cmd
> diff -urp a/fs/nfs/read.c b/fs/nfs/read.c
> --- a/fs/nfs/read.c   2006-12-18 19:18:44.000000000 +0200
> +++ b/fs/nfs/read.c   2006-12-26 14:51:36.000000000 +0200
> @@ -825,22 +825,35 @@ int nfs_readpages(struct file *filp, str
>     if (!list_empty(&head)) {
>        int err;
>        #ifdef CONFIG_NFS_V4
> -         struct nfs_page *nfs_page;
> +         struct nfs_page *first_page, *last_page;
> +         loff_t offset;
> +         size_t count;
> 
>           if (pnfs_enabled_sb(NFS_SERVER(inode))) {
> -            nfs_page = nfs_list_entry(head.next);
> -
> -            if (virtual_update_layout(inode, desc.ctx, 
> -                     nfs_page->wb_bytes, 
> -                     nfs_page->wb_index << PAGE_CACHE_SHIFT, 
FMODE_READ)) {
> -               printk(KERN_NOTICE "failed to get the layout!\n");
> +            first_page = nfs_list_entry(head.next);
> +            last_page = nfs_list_entry(head.prev);
> +            offset = (first_page->wb_index << PAGE_CACHE_SHIFT) +
> +                     first_page->wb_offset;
> +            if (unlikely(first_page == last_page))
> +               count = last_page->wb_bytes;
> +            else
> +               count = ((last_page->wb_index - 
> first_page->wb_index) << PAGE_CACHE_SHIFT) -
> +                       first_page->wb_offset + last_page->wb_bytes;
> +
> +            if ((err = virtual_update_layout(inode, desc.ctx, 
> +                                             count, offset,
> +                                             FMODE_READ)) != 0) {
> +               printk(KERN_NOTICE "%s: failed to get the layout!"
> +                           " %Zd@%llu error=%d\n",
> +                      __FUNCTION__, count, (u64)offset, err);
> +               err = 0;
>              }
>           }
>        #endif
>        err = nfs_pagein_list(&head, inode);
>        if (!ret)
>           nfs_add_stats(inode, NFSIOS_READPAGES, err);
> -         ret = err;
> +      ret = err;
>     }
>     put_nfs_open_context(desc.ctx);
>     return ret;
> diff -urp a/fs/nfs/write.c b/fs/nfs/write.c
> --- a/fs/nfs/write.c   2006-12-18 19:18:44.000000000 +0200
> +++ b/fs/nfs/write.c   2006-12-26 15:21:14.000000000 +0200
> @@ -1021,7 +1021,7 @@ static int nfs_flush_multi(struct inode 
> 
>     nbytes = req->wb_bytes;
>     do {
> -      size_t len = min(nbytes, wsize);
> +      size_t len = min(nbytes, (size_t)wsize);
> 
>        data = nfs_writedata_alloc(len);
>        if (!data)
> @@ -1538,7 +1538,6 @@ nfs_commit_list(struct inode *inode, str
>  {
>     struct nfs_write_data   *data = NULL;
>     struct nfs_page         *req;
> -   unsigned int       wpages;
>     int          ret;
> 
>     dprintk("%s Begin\n", __FUNCTION__);
> @@ -1573,7 +1572,6 @@ nfs_commit_list(struct inode *inode, str
>  static void nfs_commit_done_pages(struct nfs_write_data *data, int 
> status, int ckverf)
>  {
>     struct nfs_page           *req;
> -   int res = 0;
> 
>     while (!list_empty(&data->pages)) {
>        req = nfs_list_entry(data->pages.next);
> @@ -1729,9 +1727,13 @@ int nfs_sync_inode_wait(struct inode *in
> 
>                 if (pnfs_enabled_sb(NFS_SERVER(inode))) {
>                    nfs_page = nfs_list_entry(head.next);
> -                  if (virtual_update_layout(inode, 
nfs_page->wb_context, 
> -                     npages * PAGE_SIZE, idx_start * PAGE_SIZE, 
FMODE_WRITE))
> -                  printk(KERN_NOTICE "Failed to get layout!\n");
> +                  if ((ret = virtual_update_layout(inode, 
> nfs_page->wb_context, 
> +                      npages * PAGE_SIZE, idx_start * PAGE_SIZE, 
> FMODE_WRITE)))
> +                  printk(KERN_NOTICE "%s: failed to get the layout!"
> +                         " %Zd@%llu error=%d\n",
> +                         __FUNCTION__, npages * idx_start * PAGE_SIZE,
> +                         (u64)idx_start * PAGE_SIZE, ret);
> +                  ret = 0;
>                 }
>              #endif
> 
> diff -urp a/include/linux/nfs4.h b/include/linux/nfs4.h
> --- a/include/linux/nfs4.h   2006-12-18 19:18:48.000000000 +0200
> +++ b/include/linux/nfs4.h   2006-12-26 15:16:54.000000000 +0200
> @@ -235,7 +235,23 @@ enum nfsstat4 {
>     NFS4ERR_DEADLOCK = 10045,
>     NFS4ERR_FILE_OPEN = 10046,
>     NFS4ERR_ADMIN_REVOKED = 10047,
> -   NFS4ERR_CB_PATH_DOWN = 10048
> +   NFS4ERR_CB_PATH_DOWN = 10048,
> +   NFS4ERR_BADIOMODE = 10049,
> +   NFS4ERR_BADLAYOUT = 10050,
> +   NFS4ERR_BAD_SESSION_DIGEST = 10051,
> +   NFS4ERR_BADSESSION = 10052,
> +   NFS4ERR_BADSLOT = 10053,
> +   NFS4ERR_COMPLETE_ALREADY = 10054,
> +   NFS4ERR_CONN_NOT_BOUND_TO_SESSION = 10055,
> +   NFS4ERR_DELEG_ALREADY_WANTED = 10056,
> +   NFS4ERR_DIRDELEG_UNAVAIL = 10057,
> +   NFS4ERR_LAYOUTTRYLATER = 10058,
> +   NFS4ERR_LAYOUTUNAVAILABLE = 10059,
> +   NFS4ERR_NOMATCHING_LAYOUT = 10060,
> +   NFS4ERR_RECALLCONFLICT = 10061,
> +   NFS4ERR_UNKNOWN_LAYOUTTYPE = 10062,
> +   NFS4ERR_SEQ_MISORDERED = 10063,
> +   NFS4ERR_SEQUENCE_POS = 10064,
>  };
> 
>  /*
> diff -urp a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> --- a/include/linux/nfs_fs.h   2006-12-25 08:35:29.000000000 +0200
> +++ b/include/linux/nfs_fs.h   2006-12-26 14:13:31.000000000 +0200
> @@ -183,6 +183,7 @@ struct nfs_inode {
>     /* pNFS layout information */
>     u32 pnfs_layout_state;
>  #define NFS_INO_LAYOUT_FAILED   0x0001   /* get layout failed, 
stoptrying */
> +   time_t pnfs_layout_suspend;
>     struct pnfs_layout_type* current_layout;
>     struct nfs_open_context* layoutcommit_ctx; /* use rpc_creds in 
> this open_context
>                                      * to send LAYOUTCOMMIT to MDS */
> 
> 
> Marc Eshel wrote:
> > pnfs-bounces at linux-nfs.org wrote on 12/26/2006 03:42:27 AM:
> > 
> >> Marc, there are a few problems with this.
> >>
> >> 1. "nfsi->pnfs_layout_state |= NFS_INO_LAYOUT_FAILED;" happens 
> >> unconditionally,
> >> in all cases, including success. (I will submit a patch fixing that 
> > today)
> > 
> > Yes, I guess I had to much Eggnog.
> > 
> >> 2. I understand marking the client's nfs_file with such a flag when 
the 
> > server
> >> returns a permanent error such as NFS4ERR_LAYOUTUNAVAILABLE but in 
> >> other cases,
> >> when the error is transient we must have a mechanism to clear the 
state, 
> > e.g.
> >> put a timestamp on the file after which you'll start asking for 
layouts 
> > again
> >> (e.g. in the NFS4ERR_LAYOUTTRYLATER case).  I summary, the server's 
> > error code
> >> must be analyzed by the client to determine what to do.  IMO, we 
> >> should consider
> >> reverting what you did in this submit until we have finer grained 
> >> error handling
> >> for layoutget.
> > 
> > Yes, error recovery needs much more work. I was just trying to fix a 
> > specific failure that I observed while the server refused to give 
layout 
> > and the client kept asking for it on each IO. 
> > 
> >> 3. I noticed that when virtual_update_layout returns an error on 
theread 
> > path
> >> the error is returned to the application as EIO.  This is wrong as 
the 
> > client
> >> should fall back to doing NFSv4 I/O when it couldn't get a layout. 
> >> I'm working
> >> on a patch for that too.
> > 
> > I though I already fixed the read path to do just that maybe I missed 
a 
> > place. The write path definitely needs more work.
> > 
> >> Benny
> >>
> >> Marc Eshel (IBM) wrote:
> >>> CVSROOT:   /cvs
> >>> Module name:   nfsv4
> >>> Changes by:   eshel at citi.   2006/12/24 19:01:59
> >>>
> >>> Modified files:
> >>>    cvs/pnfs/fs/nfs: inode.c pnfs.c 
> >>>
> >>> Log message:
> >>> If for some reason the server does not return a layout the client 
> >> keep asking
> >>> on each read. Remember that get layout failed and don't request it 
> > again.
> >>> Index: inode.c
> >>> ===================================================================
> >>> RCS file: /cvs/nfsv4/cvs/pnfs/fs/nfs/inode.c,v
> >>> retrieving revision 1.17
> >>> diff -u -r1.17 inode.c
> >>> --- inode.c   18 Dec 2006 17:18:43 -0000   1.17
> >>> +++ inode.c   24 Dec 2006 23:54:04 -0000
> >>> @@ -1084,6 +1084,7 @@
> >>> #endif
> >>> #ifdef CONFIG_NFS_V4 /* XXX CONFIG_PNFS */
> >>> nfsi->nfs4_acl = NULL;
> >>> +   nfsi->pnfs_layout_state = 0;
> >>> nfsi->current_layout = NULL;
> >>> nfsi->layoutcommit_ctx = NULL;
> >>> #endif /* CONFIG_NFS_V4 */
> >>> Index: pnfs.c
> >>> ===================================================================
> >>> RCS file: /cvs/nfsv4/cvs/pnfs/fs/nfs/pnfs.c,v
> >>> retrieving revision 1.50
> >>> diff -u -r1.50 pnfs.c
> >>> --- pnfs.c   14 Dec 2006 10:47:40 -0000   1.50
> >>> +++ pnfs.c   24 Dec 2006 23:54:05 -0000
> >>> @@ -436,6 +436,11 @@
> >>> }
> >>>
> >>> res.layout.buf = NULL;
> >>> +
> >>> +        /* if get layout already failed once goto out */
> >>> +        if (nfsi->pnfs_layout_state & NFS_INO_LAYOUT_FAILED)
> >>> +      goto out;
> >>> +
> >>> if ((result = get_layout(ino, ctx, count, pos, iomode, &arg, &res))) 
{
> >>> printk("\n%s: ERROR retrieving layout %d\n",__FUNCTION__,result);
> >>> result =  -EIO;
> >>> @@ -468,10 +473,15 @@
> >>>
> >>> result = 0;
> >>> out:
> >>> +
> >>> +        /* remember that get layout failed and don't try again */
> >>> +        nfsi->pnfs_layout_state |= NFS_INO_LAYOUT_FAILED;
> >>> +
> >>> /* res.layout.buf kalloc'ed by the xdr decoder? */
> >>> if (res.layout.buf)
> >>> kfree(res.layout.buf);
> >>> -   dprintk("%s end (err:%d)\n",__FUNCTION__,result);
> >>> +   dprintk("%s end (err:%d) state %d\n",
> >>> +      __FUNCTION__,result,nfsi->pnfs_layout_state);
> >>> return result;
> >>> }
> >>>
> >>> _______________________________________________
> >>> pNFS mailing list
> >>> pNFS at linux-nfs.org
> >>> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
> >> _______________________________________________
> >> pNFS mailing list
> >> pNFS at linux-nfs.org
> >> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
> > 
> 



More information about the pNFS mailing list