[pnfs] [PATCH 18/18] pnfs client flush_one io operation

William A. (Andy) Adamson andros at citi.umich.edu
Wed Jan 9 09:24:24 EST 2008


On Jan 9, 2008 4:59 AM, Benny Halevy <bhalevy at panasas.com> wrote:

> Andy, while you're at it I'd really appreciate if you could
> pass the patches through scripts/checkpatch.pl and fix the
> subject lines to use the "pnfs:" prefix rather than "pnfs client".
>

ok


>
> I already rebased this patchset in my personal copy of the tree
> (and fixed the issues I reported to the mailing list as well
> as a few code style issues that checkpatch complained about)
> on top of this patch that changes rpc_call_validate_args:
> http://linux-nfs.org/pipermail/pnfs/2008-January/002247.html
> and then I rebased the client-layout-cache on top of your patchset.


great!


>
>
> All this in preparation to pushing everything to the public tree, so the
> question is who goes in first...  I can push the combined work today and
> then you can submit more patches to complete the write path or you
> can send the revised patchset and to be rebased again.
> What do you prefer?


go ahead and push everything to the public tree - i'll submit more patches
to complete the write path.

-->Andy


>
> Thanks,
>
> Benny
>
> On Jan. 08, 2008, 21:06 +0200, "William A. (Andy) Adamson" <
> andros at citi.umich.edu> wrote:
> > This patch is incomplete - I did not free nor bump refcounts on the
> > wb_private (thanks fred).
> > I'll send an update.
> >
> > -->Andy
> >
> > On 1/7/08, andros at umich.edu <andros at umich.edu> wrote:
> >> From: Andy Adamson <andros at umich.edu>
> >>
> >> Add a flush one io_ops entry point for pnfs.
> >> Filelayout uses the flush routine to setup data server I/O
> >> in the NFS page cache
> >>
> >> Add kref to struct nfs4_pnfs_dserver which is passed in
> >> nfs_page->wb_private
> >> from filelayout_flush_one to the filelayout_write_pagelist
> >> and not looked up again.
> >>
> >> Signed-off by: Andy Adamson<andros at umich.edu>
> >> ---
> >> fs/nfs/nfs4filelayout.c    |  226
> >> ++++++++++++++++++++++++++++++++++++++------
> >> fs/nfs/nfs4filelayout.h    |    3 +-
> >> fs/nfs/nfs4filelayoutdev.c |    2 +-
> >> fs/nfs/pnfs.c              |   17 ++++
> >> fs/nfs/pnfs.h              |    1 +
> >> fs/nfs/write.c             |    7 +-
> >> include/linux/nfs4_pnfs.h  |    2 +
> >> include/linux/nfs_page.h   |    1 +
> >> 8 files changed, 225 insertions(+), 34 deletions(-)
> >>
> >> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> >> index d8876a9..3a0a6fa 100644
> >> --- a/fs/nfs/nfs4filelayout.c
> >> +++ b/fs/nfs/nfs4filelayout.c
> >> @@ -76,10 +76,14 @@ extern void nfs_initiate_commit(struct
> nfs_write_data
> >> *, struct rpc_clnt *, int)
> >> extern void nfs_read_validate(struct rpc_task *task, void *calldata);
> >> extern void nfs_readdata_release(void *data);
> >> extern void nfs_write_validate(struct rpc_task *task, void *calldata);
> >> +extern int nfs_flush_one(struct inode *, struct list_head *, unsigned
> >> int, size_t, int);
> >>
> >> /* Callback operations to the pNFS client */
> >> struct pnfs_client_operations *pnfs_callback_ops;
> >>
> >> +/* Forward declaration */
> >> +ssize_t filelayout_get_stripesize(struct pnfs_layout_type*, struct
> >> inode*);
> >> +
> >> /* Initialize a mountpoint by retrieving the list of
> >>   * available devices for it.
> >>   * Return the pnfs_mount_type structure so the
> >> @@ -340,7 +344,167 @@ ssize_t filelayout_read_pagelist(
> >>         return status;
> >> }
> >>
> >> -/* Perform sync or async writes.
> >> +void
> >> +print_ds(struct nfs4_pnfs_ds *ds)
> >> +{
> >> +       dprintk("        ds->ds_ip_addr %x\n", htonl(ds->ds_ip_addr));
> >> +       dprintk("        ds->ds_port %hu\n", ntohs(ds->ds_port));
> >> +       dprintk("        ds->ds_clp %p\n", ds->ds_clp);
> >> +       dprintk("        ds->ds_count %d\n",
> atomic_read(&ds->ds_count));
> >> +}
> >> +
> >> +static struct nfs4_pnfs_dserver *
> >> +filelayout_create_dserver(void)
> >> +{
> >> +       struct nfs4_pnfs_dserver *local;
> >> +
> >> +       dprintk("--> %s\n", __func__);
> >> +       local = kzalloc(sizeof(*local), GFP_KERNEL);
> >> +       if (!local) {
> >> +               return NULL;
> >> +       }
> >> +       kref_init(&local->ref);
> >> +       return local;
> >> +}
> >> +
> >> +static void filelayout_free_dserver(struct kref *kref)
> >> +{
> >> +       struct nfs4_pnfs_dserver *dserver;
> >> +       dserver = container_of(kref, struct nfs4_pnfs_dserver, ref);
> >> +
> >> +       dprintk("--> %s dserver %p\n", __func__, dserver);
> >> +       kfree(dserver);
> >> +}
> >> +
> >> +static void filelayout_release_dserver(struct nfs4_pnfs_dserver
> *dserver)
> >> +{
> >> +       dprintk("--> %s dserver %p\n", __func__, dserver);
> >> +       kref_put(&dserver->ref, filelayout_free_dserver);
> >> +}
> >> +
> >> +static void filelayout_get_dserver(struct nfs4_pnfs_dserver *dserver)
> >> +{
> >> +       dprintk("--> %s\n", __func__);
> >> +       kref_get(&dserver->ref);
> >> +}
> >> +
> >> +/*
> >> +* feed nfs_flush_one with per data server pages.
> >> +*
> >> +* Assume stripesz >= PAGE_SIZE.
> >> +* TODO: If stripesz < PAGE_SIZE, use i/o through MDS
> >> +*
> >> +*/
> >> +int filelayout_flush_one(struct inode *inode, struct list_head *head,
> >> unsigned int npages, size_t count, int how)
> >> +{
> >> +       struct pnfs_layout_type *ltype = NFS_I(inode)->current_layout;
> >> +       struct nfs4_filelayout *nfslay = ltype->layoutid;
> >> +       struct nfs4_pnfs_dserver *dserver = NULL;
> >> +       struct nfs4_pnfs_ds *ds = NULL;  /* current stripe data server
> */
> >> +       struct nfs_page* req;
> >> +       loff_t dsoffset = 0;
> >> +       size_t stripesz, reqcount, dstotal = 0;
> >> +       struct list_head *dslist;
> >> +       int status = -ENOMEM, use_ds = 0, ndspages = 0;
> >> +
> >> +       dprintk("--> %s npages %d, count %Zd, ltype %p nfslay %p\n",
> >> __func__, npages, count, ltype, nfslay);
> >> +
> >> +       dslist = kmalloc(sizeof(*dslist), GFP_KERNEL);
> >> +       if (!dslist)
> >> +               return status;
> >> +       INIT_LIST_HEAD(dslist);
> >> +
> >> +       stripesz =
> filelayout_get_stripesize(NFS_I(inode)->current_layout,
> >> inode);
> >> +       dprintk("%s stripesize %Zd\n", __func__, stripesz);
> >> +       /* split up the list according to DS */
> >> +       while(!list_empty(head)) {
> >> +next_ds:
> >> +               req = nfs_list_entry(head->next);
> >> +
> >> +               if (use_ds)
> >> +                       goto use_ds;
> >> +               /* reset for new data server */
> >> +               dstotal = 0;
> >> +               ndspages = 0;
> >> +
> >> +               status = -ENOMEM;
> >> +               dserver = filelayout_create_dserver();
> >> +               if (!dserver) {
> >> +                       dprintk("%s failed to get dserver. status
> %d\n",
> >> +                                               __FUNCTION__, status);
> >> +                       goto out;
> >> +               }
> >> +
> >> +               /* get the data server that serves this stripe */
> >> +               status = nfs4_pnfs_dserver_get(inode, nfslay, dsoffset,
> >> +                               stripesz, dserver);
> >> +
> >> +               if (status != 0) {
> >> +                       dprintk("%s failed to get dataserver. status
> >> %d\n",
> >> +                                               __FUNCTION__, status);
> >> +                       status =  -EIO;
> >> +                       goto out;
> >> +               }
> >> +               /* just try the first multipath data server */
> >> +               ds = dserver->dev->ds_list[0];
> >> +
> >> +               use_ds = 1;
> >> +use_ds:
> >> +               filelayout_get_dserver(dserver);
> >> +
> >> +               reqcount = count < PAGE_SIZE? count: PAGE_SIZE;
> >> +               count -= reqcount;
> >> +               dstotal += reqcount;
> >> +
> >> +               req->wb_devip = ds->ds_ip_addr;
> >> +               req->wb_devport = ds->ds_port;
> >> +               req->wb_private = dserver;
> >> +
> >> +               /* move request to dslist */
> >> +               nfs_list_remove_request(req);
> >> +               nfs_list_add_request(req, dslist);
> >> +               ndspages++;
> >> +               npages--;
> >> +
> >> +               if (dstotal == stripesz)
> >> +                       dsoffset += dstotal;
> >> +
> >> +               if (count == 0 || npages == 0 || dstotal == stripesz) {
> >> +                       use_ds = 0;
> >> +                       goto send;
> >> +               }
> >> +       }
> >> +       if (!ds) {
> >> +               status = -EIO;
> >> +               goto out;
> >> +       }
> >> +
> >> +send:
> >> +       /* XXX should recover to send through MDS */
> >> +       dprintk("%s Send: ndspages %d dstotal %Zd list_empty(head) %d
> \n",
> >> +                       __func__, ndspages, dstotal, list_empty(head));
> >> +       status = nfs_flush_one(inode, dslist, ndspages, dstotal, how);
> >> +       if (status < 0)
> >> +               goto out;
> >> +
> >> +       /* XXX should be BUG_ON(!list_empty(dslist)); */
> >> +       if (!list_empty(head) && npages > 0) {
> >> +               if (!list_empty(dslist)) {
> >> +                       printk("%s ERROR! dslist NOT EMPTY\n",
> __func__);
> >> +                       status = -EIO;
> >> +                       goto out;
> >> +               }
> >> +               dprintk("%s next_ds\n", __func__);
> >> +               goto next_ds;
> >> +       }
> >> +
> >> +out:
> >> +       kfree(dslist);
> >> +       dprintk("<-- %s npages %d (should be zero!)\n", __func__,
> npages);
> >> +       return status;
> >> +}
> >> +
> >> +/* Perform async writes.
> >>   *
> >>   * TODO: See filelayout_read_pagelist.
> >>   */
> >> @@ -356,52 +520,51 @@ ssize_t filelayout_write_pagelist(
> >>         struct nfs_write_data *data)
> >> {
> >>         struct nfs4_filelayout *nfslay = (struct nfs4_filelayout
> >> *)layoutid->layoutid;
> >> -       struct nfs4_pnfs_dserver dserver;
> >> +       struct nfs4_pnfs_dserver *dserver = NULL;
> >>         struct nfs4_pnfs_ds *ds;
> >> -       struct nfs_page *req;
> >> +       struct nfs_page* req = NULL;
> >>         struct list_head *h;
> >> -       int status;
> >>
> >> -       /* Retrieve the correct rpc_client for the byte range */
> >> -       status = nfs4_pnfs_dserver_get(inode,
> >> -                                       nfslay,
> >> -                                       offset,
> >> -                                       count,
> >> -                                       &dserver);
> >> -       if (status) {
> >> -               dprintk("%s failed to get dataserver\n",
> >> -                                               __FUNCTION__);
> >> -               data->ds_nfs_client = NULL;
> >> -               return -EIO;
> >> -       } else {
> >> -               /* just try the first data server for the index.. */
> >> -               ds = dserver.dev->ds_list[0];
> >> -               data->pnfs_client = ds->ds_clp->cl_rpcclient;
> >> -               data->ds_nfs_client = ds->ds_clp;
> >> -               data->args.fh = dserver.fh;
> >> -       }
> >> -       dprintk("%s set wb_devip: wb_devport %x:%hu\n", __FUNCTION__,
> >> -                       htonl(ds->ds_ip_addr), ntohs(ds->ds_port));
> >> +       dprintk("--> %s nr_pages %d offset:count %Lu:%Zu\n", __func__,
> >> +                                               nr_pages, offset,
> count);
> >>
> >> +       /* Retrieve the correct rpc_client for the byte range */
> >>         list_for_each(h, &data->pages) {
> >>                 req = list_entry(h, struct nfs_page, wb_list);
> >> -               req->wb_devip = ds->ds_ip_addr;
> >> -               req->wb_devport = ds->ds_port;
> >> +               break;
> >>         }
> >> +       BUG_ON(!req);
> >>
> >> -       /* Now get the file offset on the dserver
> >> -        * Set the write offset to this offset, and
> >> -        * save the original offset in orig_offset
> >> -        * the offset will be reset in the call_ops->rpc_call_done()
> >> routine.
> >> +       dserver = (struct nfs4_pnfs_dserver *)req->wb_private;
> >> +       BUG_ON(!dserver);
> >> +
> >> +       /* use the first multipath data server */
> >> +       ds = dserver->dev->ds_list[0];
> >> +       dprintk("%s USE DS:\n", __func__);
> >> +       print_ds(ds);
> >> +
> >> +       data->pnfs_client = ds->ds_clp->cl_rpcclient;
> >> +       data->ds_nfs_client = ds->ds_clp;
> >> +       data->args.fh = dserver->fh;
> >> +
> >> +       dprintk("%s set wb_devip: wb_devport %x:%hu\n", __FUNCTION__,
> >> +       htonl(ds->ds_ip_addr),ntohs(ds->ds_port));
> >> +
> >> +       /* Get the file offset on the dserver. Set the write offset to
> >> +        * this offset and save the original offset.
> >>          */
> >>         data->args.offset = filelayout_get_dserver_offset(offset,
> nfslay);
> >>         data->orig_offset = offset;
> >>
> >> -       /* Perform an asynchronous write */
> >> +       /* Perform an asynchronous write The offset will be reset in
> the
> >> +        * call_ops->rpc_call_done() routine
> >> +        */
> >>         BUG_ON(data->pnfsflags & PNFS_ISSYNC);
> >>         nfs_initiate_write(data, data->pnfs_client,
> >>                         &filelayout_write_call_ops, sync);
> >>
> >> +       filelayout_release_dserver(dserver);
> >> +
> >>         return 0;
> >> }
> >>
> >> @@ -686,6 +849,7 @@ struct layoutdriver_io_operations
> >> filelayout_io_operations = {
> >>         .commit                  = filelayout_commit,
> >>         .read_pagelist           = filelayout_read_pagelist,
> >>         .write_pagelist          = filelayout_write_pagelist,
> >> +       .flush_one               = filelayout_flush_one,
> >>         .set_layout              = filelayout_set_layout,
> >>         .alloc_layout            = filelayout_alloc_layout,
> >>         .free_layout             = filelayout_free_layout,
> >> diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
> >> index fd099c7..c3eb2a4 100644
> >> --- a/fs/nfs/nfs4filelayout.h
> >> +++ b/fs/nfs/nfs4filelayout.h
> >> @@ -12,6 +12,7 @@
> >> #ifndef FS_NFS_NFS4FILELAYOUT_H
> >> #define FS_NFS_NFS4FILELAYOUT_H
> >>
> >> +#include <linux/kref.h>
> >> #include <linux/nfs4_pnfs.h>
> >> #include <linux/nfs4_session.h>
> >> #include <linux/pnfs_xdr.h>
> >> @@ -77,7 +78,7 @@ struct nfs4_pnfs_devlist {
> >> struct nfs4_pnfs_dserver {
> >>         struct nfs_fh        *fh;
> >>         struct nfs4_pnfs_dev *dev;
> >> -       u32 dev_id;
> >> +       struct kref           ref;
> >> };
> >>
> >> struct nfs4_filelayout {
> >> diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
> >> index 9d38cc8..cf683a0 100644
> >> --- a/fs/nfs/nfs4filelayoutdev.c
> >> +++ b/fs/nfs/nfs4filelayoutdev.c
> >> @@ -253,7 +253,7 @@ out_put:
> >> }
> >>
> >> /* Assumes lock is held */
> >> -static int
> >> +int
> >> unhash_ds(struct nfs4_pnfs_ds *ds)
> >> {
> >>
> >> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> >> index 3f5ee35..574c600 100644
> >> --- a/fs/nfs/pnfs.c
> >> +++ b/fs/nfs/pnfs.c
> >> @@ -63,6 +63,8 @@ extern int nfs4_pnfs_getdeviceinfo(struct inode
> *inode,
> >> u32 dev_id,
> >>                                    struct pnfs_device *res);
> >> extern void nfs_initiate_commit(struct nfs_write_data *data,
> >>                                 struct rpc_clnt *clnt, int how);
> >> +extern int nfs_flush_one(struct inode *inode, struct list_head *head,
> >> +                               unsigned int npages, size_t count, int
> >> how);
> >>
> >> struct pnfs_client_operations pnfs_ops;
> >>
> >> @@ -957,6 +959,21 @@ pnfs_writeback_done(struct nfs_write_data *data,
> >> ssize_t status)
> >>         data->call_ops->rpc_release(data);
> >> }
> >>
> >> +int
> >> +pnfs_flush_one(struct inode *inode, struct list_head *head, unsigned
> int
> >> npages, size_t count, int how)
> >> +{
> >> +       struct nfs_inode* nfsi = NFS_I(inode);
> >> +       struct nfs_server* nfss = NFS_SERVER(inode);
> >> +       struct layoutdriver_io_operations *io_ops;
> >> +
> >> +       if (nfsi->current_layout != NULL &&
> >> +           (nfss->pnfs_curr_ld->ld_io_ops->flush_one)) {
> >> +               io_ops = nfss->pnfs_curr_ld->ld_io_ops;
> >> +               return io_ops->flush_one(inode, head, npages, count,
> how);
> >> +       } else
> >> +               return nfs_flush_one(inode, head, npages, count, how);
> >> +}
> >> +
> >> /*
> >>   * Call the appropriate parallel I/O subsystem write function.
> >>   * If no I/O device driver exists, or one does match the returned
> >> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> >> index b314aff..88f88d5 100644
> >> --- a/fs/nfs/pnfs.h
> >> +++ b/fs/nfs/pnfs.h
> >> @@ -53,6 +53,7 @@ void pnfs_commit_done_norpc(struct rpc_task *, void
> *);
> >> void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode
> *,
> >> struct nfs_open_context *, struct list_head *, size_t *);
> >> void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct
> inode
> >> *);
> >> void pnfs_update_layout_commit(struct inode *, struct list_head *,
> >> pgoff_t, unsigned int);
> >> +int pnfs_flush_one(struct inode *, struct list_head *, unsigned int,
> >> size_t, int);
> >>
> >> #endif /* CONFIG_PNFS */
> >>
> >> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> >> index 4231915..45624d0 100644
> >> --- a/fs/nfs/write.c
> >> +++ b/fs/nfs/write.c
> >> @@ -934,7 +934,7 @@ out_bad:
> >>   * This is the case if nfs_updatepage detects a conflicting request
> >>   * that has been written but not committed.
> >>   */
> >> -static int nfs_flush_one(struct inode *inode, struct list_head *head,
> >> unsigned int npages, size_t count, int how)
> >> +int nfs_flush_one(struct inode *inode, struct list_head *head,
> unsigned
> >> int npages, size_t count, int how)
> >> {
> >>         struct nfs_page         *req;
> >>         struct page             **pages;
> >> @@ -986,7 +986,11 @@ static void nfs_pageio_init_write(struct
> >> nfs_pageio_descriptor *pgio,
> >>         if (wsize < PAGE_CACHE_SIZE)
> >>                 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize,
> >> ioflags);
> >>         else
> >> +#ifdef CONFIG_PNFS
> >> +               nfs_pageio_init(pgio, inode, pnfs_flush_one, wsize,
> >> ioflags);
> >> +#else
> >>                 nfs_pageio_init(pgio, inode, nfs_flush_one, wsize,
> >> ioflags);
> >> +#endif /* CONFIG_PNFS */
> >> }
> >>
> >> #ifdef CONFIG_PNFS
> >> @@ -1678,6 +1682,7 @@ EXPORT_SYMBOL(nfs_execute_write);
> >> EXPORT_SYMBOL(nfs_write_validate);
> >> EXPORT_SYMBOL(nfs_writedata_release);
> >> EXPORT_SYMBOL(nfs_flush_task_priority);
> >> +EXPORT_SYMBOL(nfs_flush_one);
> >> EXPORT_SYMBOL(nfs_commit_rpcsetup);
> >> EXPORT_SYMBOL(nfs_initiate_write);
> >> EXPORT_SYMBOL(nfs_initiate_commit);
> >> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
> >> index c5dd321..83aefc5 100644
> >> --- a/include/linux/nfs4_pnfs.h
> >> +++ b/include/linux/nfs4_pnfs.h
> >> @@ -45,6 +45,8 @@ struct layoutdriver_io_operations {
> >>          */
> >>         ssize_t (*read_pagelist) (struct pnfs_layout_type *layoutid,
> >> struct inode *, struct page **pages, unsigned int pgbase, unsigned
> nr_pages,
> >> loff_t offset, size_t count, struct nfs_read_data *nfs_data);
> >>         ssize_t (*write_pagelist) (struct pnfs_layout_type *layoutid,
> >> struct inode *, struct page **pages, unsigned int pgbase, unsigned
> nr_pages,
> >> loff_t offset, size_t count, int sync, struct nfs_write_data
> *nfs_data);
> >> +       int (*flush_one) (struct inode *inode, struct list_head *head,
> >> unsigned int npages, size_t count, int how);
> >> +
> >>
> >>         /* Functions that do not use the pagecache.
> >>          * If use_pagecache == 0, then these functions must be
> >> implemented.
> >> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> >> index fe82716..cc5f1d0 100644
> >> --- a/include/linux/nfs_page.h
> >> +++ b/include/linux/nfs_page.h
> >> @@ -47,6 +47,7 @@ struct nfs_page {
> >> #ifdef CONFIG_PNFS
> >>         unsigned int            wb_devip;       /* pNFS data server IP
> >> addr */
> >>         unsigned int            wb_devport;     /* pNFS data server
> port
> >> */
> >> +       void                    *wb_private;
> >> #endif
> >> };
> >>
> >> --
> >> 1.5.0.2
> >>
> >>
> >
> >
> > ------------------------------------------------------------------------
> >
> > _______________________________________________
> > pNFS mailing list
> > pNFS at linux-nfs.org
> > http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://linux-nfs.org/pipermail/pnfs/attachments/20080109/3f5cf94c/attachment-0001.htm 


More information about the pNFS mailing list