[pnfs] [PATCH 03/10] pnfs: file layout commit use wb_private
Benny Halevy
bhalevy at panasas.com
Thu Jan 17 05:27:38 EST 2008
On Jan. 17, 2008, 0:52 +0200, andros at umich.edu wrote:
> From: Andy Adamson <andros at umich.edu>
>
> Remove struct nfs_page wb_devip and wb_devport fields. Refactor
> filelayout_commit to use the struct nfs4_pnfs_dserver in the nfs_page
> wb_private field.
>
> Signed-off-by: Andy Adamson<andros at umich.edu>
> ---
> fs/nfs/nfs4filelayout.c | 133 +++++++++++++++++++++++++---------------------
> include/linux/nfs_page.h | 2 -
> 2 files changed, 73 insertions(+), 62 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index c8e3be3..a642751 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -460,8 +460,6 @@ skip_get:
> count -= reqcount;
> dstotal += reqcount;
>
> - req->wb_devip = ds->ds_ip_addr;
> - req->wb_devport = ds->ds_port;
> req->wb_ops = &filelayout_io_operations;
> req->wb_private = dserver;
>
> @@ -559,8 +557,8 @@ ssize_t filelayout_write_pagelist(
> data->ds_nfs_client = ds->ds_clp;
> data->args.fh = dserver->fh;
>
> - dprintk("%s set wb_devip: wb_devport %x:%hu\n", __FUNCTION__,
> - htonl(ds->ds_ip_addr), ntohs(ds->ds_port));
> + dprintk("%s using DS %x:%hu\n", __FUNCTION__,
> + htonl(ds->ds_ip_addr), ntohs(ds->ds_port));
>
> /* Get the file offset on the dserver. Set the write offset to
> * this offset and save the original offset.
> @@ -665,26 +663,62 @@ filelayout_free_lseg(struct pnfs_layout_type *layoutid,
> memset(fls, 0, sizeof(*fls));
> }
>
> -/* TODO: Technically we would need to execute a COMMIT op to each
> - * data server on which a page in 'pages' exists.
> - * Once we fix this, we will need to invoke the pnfs_commit_complete callback.
> +/*
> + * Do two nfs_pnfs_dserver pointers point to the same structure?
> + * Just compare the first multipath servers.
> + */
> +static int
> +filelayout_same_ds(struct nfs4_pnfs_dserver *one, struct nfs4_pnfs_dserver *two)
> +{
> + struct nfs4_pnfs_dev *d_one = one->dev, *d_two = two->dev;
> + struct nfs4_pnfs_ds *ds_one, *ds_two;
> +
> + ds_one = d_one->ds_list[0];
> + ds_two = d_two->ds_list[0];
> + return (d_one->stripe_index == d_two->stripe_index &&
> + d_one->num_ds == d_two->num_ds &&
> + ds_one->ds_ip_addr == ds_two->ds_ip_addr &&
> + ds_one->ds_port == ds_two->ds_port);
> +}
> +
> +/*
> + * Allocate a new nfs_write_data struct and initialize
> + */
> +static void
> +filelayout_clone_write_data(struct nfs_write_data *old,
> + struct nfs_write_data *new)
new seems to be an output argument, if so, shouldn't it
point to struct nfs_write_data *?
Or even better, why not just return it? :)
> +{
> + new = nfs_commit_alloc();
> + if (!new)
> + return;
> + new->inode = old->inode;
> + new->cred = old->cred;
> + new->args.offset = 0;
> + new->args.count = 0;
> + new->res.count = 0;
> + new->res.fattr = old->res.fattr;
> + new->res.verf = old->res.verf;
> + new->args.context = old->args.context;
> + new->call_ops = old->call_ops;
> + new->how = old->how;
> +}
> +
> +/*
> + * Execute a COMMIT op to the MDS or to each data server on which a page
> + * in 'pages' exists.
> + * Invoke the pnfs_commit_complete callback.
> */
> int
> filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
> struct nfs_write_data *data)
> {
> - struct inode *ino = PNFS_INODE(layoutid);
> - struct nfs_write_data *dsdata = NULL;
> struct nfs4_filelayout *flo;
> struct nfs4_filelayout_segment *nfslay;
> - struct nfs4_pnfs_dev_item *dev;
> - struct nfs4_pnfs_dev *fdev;
> - struct nfs4_pnfs_dserver dserver;
> + struct nfs_write_data *dsdata = NULL;
> + struct nfs4_pnfs_dserver *dserver = NULL;
> struct nfs4_pnfs_ds *ds;
> - struct nfs_page *first;
> struct nfs_page *req;
> - struct list_head *pos, *tmp;
> - int i;
> + struct list_head *pos, *tmp, *head = &data->pages;
>
> flo = PNFS_LD_DATA(layoutid);
> nfslay = LSEG_LD_DATA(&flo->pnfs_lseg);
> @@ -697,70 +731,49 @@ filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
> return 1;
> }
>
> - dev = nfs4_pnfs_device_item_get(ino, nfslay->dev_id);
> - fdev = &dev->stripe_devs[0];
> + /* COMMIT to each Data Server */
> + while (!list_empty(head)) {
>
> - /* Gather pages per DS - Isn't this already done in nfs_pageio_init?*/
> - for (i = 0; i < nfslay->num_fh; i++) {
> - /* just try the first data server for the index..*/
> - ds = fdev->ds_list[0];
> + /* dserver and dsdata must be NULL */
> + req = nfs_list_entry(head->next);
>
> - if (!dsdata) {
> - unsigned int pgcnt = 0;
> + dserver = (struct nfs4_pnfs_dserver *)req->wb_private;
> + /* XXX BUG_ON(!dserver) ?*/
> + if (!dserver)
> + goto out_bad;
>
> - list_for_each_safe(pos, tmp, &data->pages) {
> - req = nfs_list_entry(pos);
> - if (req->wb_devip == ds->ds_ip_addr &&
> - req->wb_devport == ds->ds_port)
> - pgcnt++;
> - }
> - dsdata = nfs_commit_alloc();
> - }
> + filelayout_clone_write_data(data, dsdata);
so passing &dsdata, filelayout_clone_write_data could have changed its
value, or if it changes to returning the clone then
dsdata = filelayout_clone_write_data(data);
seems to be the natural way of doing this...
> if (!dsdata)
> goto out_bad;
> - list_for_each_safe(pos, tmp, &data->pages) {
> +
> + /* Just try the first multipath data server */
> + ds = dserver->dev->ds_list[0];
> + dsdata->pnfs_client = ds->ds_clp->cl_rpcclient;
> + dsdata->ds_nfs_client = ds->ds_clp;
> + dsdata->args.fh = dserver->fh;
> +
> + /* Gather all pages going to the data server */
> + list_for_each_safe(pos, tmp, head) {
> req = nfs_list_entry(pos);
> - if (req->wb_devip == ds->ds_ip_addr &&
> - req->wb_devport == ds->ds_port) {
> + if (filelayout_same_ds(dserver, req->wb_private)) {
> nfs_list_remove_request(req);
> - nfs_list_add_request(req, &dsdata->pages);
> + nfs_list_add_request(req, head);
Hmm, doesn't this put req back onto head's list?
Benny
> }
> }
> - if (list_empty(&dsdata->pages)) {
> - if (list_empty(&data->pages)) {
> - dprintk("%s exit i %d devid %d\n",
> - __FUNCTION__, i, nfslay->dev_id);
> - nfs_commit_free(dsdata);
> - return 0;
> - } else
> - continue;
> - }
> - first = nfs_list_entry(dsdata->pages.next);
> -
> - dprintk("%s call nfs_initiate_commit i %d devid %d\n",
> - __FUNCTION__, i, nfslay->dev_id);
> -
> - dsdata->pnfs_client = ds->ds_clp->cl_rpcclient;
> - dsdata->ds_nfs_client = ds->ds_clp;
> - dserver.dev = fdev;
> -
> - /* TODO: Is the FH different from NFS_FH(data->inode)?
> - * (set in nfs_commit_rpcsetup)
> - */
> - dserver.fh = &nfslay->fh_array[i];
> - dsdata->args.fh = dserver.fh;
>
> + /* Send COMMIT to data server */
> nfs_initiate_commit(dsdata, dsdata->pnfs_client, sync);
>
> + /* reset for next data server */
> dsdata = NULL;
> - fdev++;
> + dserver = NULL;
> }
> -
> /* Release original commit data since it is not used */
> nfs_commit_free(data);
> return 0;
>
> out_bad:
> + /* XXX should we send COMMIT to MDS e.g. not free data and return 1 ? */
> nfs_commit_free(data);
> return -ENOMEM;
> }
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index 098b788..8da70b2 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -45,8 +45,6 @@ struct nfs_page {
> unsigned long wb_flags;
> struct nfs_writeverf wb_verf; /* Commit cookie */
> #ifdef CONFIG_PNFS
> - unsigned int wb_devip; /* pNFS data server IP addr */
> - unsigned int wb_devport; /* pNFS data server port */
> void *wb_ops; /* pNFS io operations */
> void *wb_private;
> #endif
More information about the pNFS
mailing list