[pnfs] [PATCH 03/10] pnfs: file layout commit use wb_private

Benny Halevy bhalevy at panasas.com
Thu Jan 17 05:27:38 EST 2008


On Jan. 17, 2008, 0:52 +0200, andros at umich.edu wrote:
> From: Andy Adamson <andros at umich.edu>
> 
> Remove struct nfs_page wb_devip and wb_devport fields. Refactor
> filelayout_commit to use the struct nfs4_pnfs_dserver in the nfs_page
> wb_private field.
> 
> Signed-off-by: Andy Adamson<andros at umich.edu>
> ---
>  fs/nfs/nfs4filelayout.c  |  133 +++++++++++++++++++++++++---------------------
>  include/linux/nfs_page.h |    2 -
>  2 files changed, 73 insertions(+), 62 deletions(-)
> 
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index c8e3be3..a642751 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -460,8 +460,6 @@ skip_get:
>  		count -= reqcount;
>  		dstotal += reqcount;
>  
> -		req->wb_devip = ds->ds_ip_addr;
> -		req->wb_devport = ds->ds_port;
>  		req->wb_ops = &filelayout_io_operations;
>  		req->wb_private = dserver;
>  
> @@ -559,8 +557,8 @@ ssize_t filelayout_write_pagelist(
>  	data->ds_nfs_client = ds->ds_clp;
>  	data->args.fh = dserver->fh;
>  
> -	dprintk("%s set wb_devip: wb_devport %x:%hu\n", __FUNCTION__,
> -	htonl(ds->ds_ip_addr), ntohs(ds->ds_port));
> +	dprintk("%s using DS %x:%hu\n", __FUNCTION__,
> +		htonl(ds->ds_ip_addr), ntohs(ds->ds_port));
>  
>  	/* Get the file offset on the dserver. Set the write offset to
>  	 * this offset and save the original offset.
> @@ -665,26 +663,62 @@ filelayout_free_lseg(struct pnfs_layout_type *layoutid,
>  	memset(fls, 0, sizeof(*fls));
>  }
>  
> -/* TODO: Technically we would need to execute a COMMIT op to each
> - * data server on which a page in 'pages' exists.
> - * Once we fix this, we will need to invoke the pnfs_commit_complete callback.
> +/*
> + * Do two nfs_pnfs_dserver pointers point to the same structure?
> + * Just compare the first multipath servers.
> + */
> +static int
> +filelayout_same_ds(struct nfs4_pnfs_dserver *one, struct nfs4_pnfs_dserver *two)
> +{
> +	struct nfs4_pnfs_dev *d_one = one->dev, *d_two = two->dev;
> +	struct nfs4_pnfs_ds *ds_one, *ds_two;
> +
> +	ds_one = d_one->ds_list[0];
> +	ds_two = d_two->ds_list[0];
> +	return (d_one->stripe_index == d_two->stripe_index &&
> +		d_one->num_ds == d_two->num_ds &&
> +		ds_one->ds_ip_addr == ds_two->ds_ip_addr &&
> +		ds_one->ds_port == ds_two->ds_port);
> +}
> +
> +/*
> + * Allocate a new nfs_write_data struct and initialize
> + */
> +static void
> +filelayout_clone_write_data(struct nfs_write_data *old,
> +			    struct nfs_write_data *new)

new seems to be an output argument, if so, shouldn't it
point to struct nfs_write_data *?
Or even better, why not just return it? :)

> +{
> +	new = nfs_commit_alloc();
> +	if (!new)
> +		return;
> +	new->inode       = old->inode;
> +	new->cred        = old->cred;
> +	new->args.offset = 0;
> +	new->args.count  = 0;
> +	new->res.count   = 0;
> +	new->res.fattr   = old->res.fattr;
> +	new->res.verf    = old->res.verf;
> +	new->args.context = old->args.context;
> +	new->call_ops = old->call_ops;
> +	new->how = old->how;
> +}
> +
> +/*
> + * Execute a COMMIT op to the MDS or to each data server on which a page
> + * in 'pages' exists.
> + * Invoke the pnfs_commit_complete callback.
>   */
>  int
>  filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
>  		  struct nfs_write_data *data)
>  {
> -	struct inode *ino = PNFS_INODE(layoutid);
> -	struct nfs_write_data   *dsdata = NULL;
>  	struct nfs4_filelayout *flo;
>  	struct nfs4_filelayout_segment *nfslay;
> -	struct nfs4_pnfs_dev_item *dev;
> -	struct nfs4_pnfs_dev *fdev;
> -	struct nfs4_pnfs_dserver dserver;
> +	struct nfs_write_data   *dsdata = NULL;
> +	struct nfs4_pnfs_dserver *dserver = NULL;
>  	struct nfs4_pnfs_ds *ds;
> -	struct nfs_page *first;
>  	struct nfs_page *req;
> -	struct list_head *pos, *tmp;
> -	int i;
> +	struct list_head *pos, *tmp, *head = &data->pages;
>  
>  	flo = PNFS_LD_DATA(layoutid);
>  	nfslay = LSEG_LD_DATA(&flo->pnfs_lseg);
> @@ -697,70 +731,49 @@ filelayout_commit(struct pnfs_layout_type *layoutid, int sync,
>  		return 1;
>  	}
>  
> -	dev = nfs4_pnfs_device_item_get(ino, nfslay->dev_id);
> -	fdev = &dev->stripe_devs[0];
> +	/* COMMIT to each Data Server */
> +	while (!list_empty(head)) {
>  
> -	/* Gather pages per DS - Isn't this already done in nfs_pageio_init?*/
> -	for (i = 0; i < nfslay->num_fh; i++) {
> -		/* just try the first data server for the index..*/
> -		ds = fdev->ds_list[0];
> +		/* dserver and dsdata must be NULL */
> +		req = nfs_list_entry(head->next);
>  
> -		if (!dsdata) {
> -			unsigned int pgcnt = 0;
> +		dserver = (struct nfs4_pnfs_dserver *)req->wb_private;
> +		/* XXX BUG_ON(!dserver) ?*/
> +		if (!dserver)
> +			goto out_bad;
>  
> -			list_for_each_safe(pos, tmp, &data->pages) {
> -				req = nfs_list_entry(pos);
> -				if (req->wb_devip == ds->ds_ip_addr &&
> -				    req->wb_devport == ds->ds_port)
> -					pgcnt++;
> -			}
> -			dsdata = nfs_commit_alloc();
> -		}
> +		filelayout_clone_write_data(data, dsdata);

so passing &dsdata, filelayout_clone_write_data could have changed its
value, or if it changes to returning the clone then
		dsdata = filelayout_clone_write_data(data);
seems to be the natural way of doing this...

>  		if (!dsdata)
>  			goto out_bad;
> -		list_for_each_safe(pos, tmp, &data->pages) {
> +
> +		/* Just try the first multipath data server */
> +		ds = dserver->dev->ds_list[0];
> +		dsdata->pnfs_client = ds->ds_clp->cl_rpcclient;
> +		dsdata->ds_nfs_client = ds->ds_clp;
> +		dsdata->args.fh = dserver->fh;
> +
> +		/* Gather all pages going to the data server */
> +		list_for_each_safe(pos, tmp, head) {
>  			req = nfs_list_entry(pos);
> -			if (req->wb_devip == ds->ds_ip_addr &&
> -			    req->wb_devport == ds->ds_port) {
> +			if (filelayout_same_ds(dserver, req->wb_private)) {
>  				nfs_list_remove_request(req);
> -				nfs_list_add_request(req, &dsdata->pages);
> +				nfs_list_add_request(req, head);

Hmm, doesn't this put req back onto head's list?

Benny

>  			}
>  		}
> -		if (list_empty(&dsdata->pages)) {
> -			if (list_empty(&data->pages)) {
> -				dprintk("%s exit i %d devid %d\n",
> -					__FUNCTION__, i, nfslay->dev_id);
> -				nfs_commit_free(dsdata);
> -				return 0;
> -			} else
> -				continue;
> -		}
> -		first = nfs_list_entry(dsdata->pages.next);
> -
> -		dprintk("%s call nfs_initiate_commit i %d devid %d\n",
> -			__FUNCTION__, i, nfslay->dev_id);
> -
> -		dsdata->pnfs_client = ds->ds_clp->cl_rpcclient;
> -		dsdata->ds_nfs_client =  ds->ds_clp;
> -		dserver.dev = fdev;
> -
> -		/* TODO: Is the FH different from NFS_FH(data->inode)?
> -		 * (set in nfs_commit_rpcsetup)
> -		 */
> -		dserver.fh = &nfslay->fh_array[i];
> -		dsdata->args.fh = dserver.fh;
>  
> +		/* Send COMMIT to data server */
>  		nfs_initiate_commit(dsdata, dsdata->pnfs_client, sync);
>  
> +		/* reset for next data server */
>  		dsdata = NULL;
> -		fdev++;
> +		dserver = NULL;
>  	}
> -
>  	/* Release original commit data since it is not used */
>  	nfs_commit_free(data);
>  	return 0;
>  
>  out_bad:
> +	/* XXX should we send COMMIT to MDS e.g. not free data and return 1 ? */
>  	nfs_commit_free(data);
>  	return -ENOMEM;
>  }
> diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
> index 098b788..8da70b2 100644
> --- a/include/linux/nfs_page.h
> +++ b/include/linux/nfs_page.h
> @@ -45,8 +45,6 @@ struct nfs_page {
>  	unsigned long		wb_flags;
>  	struct nfs_writeverf	wb_verf;	/* Commit cookie */
>  #ifdef CONFIG_PNFS
> -	unsigned int		wb_devip;	/* pNFS data server IP addr */
> -	unsigned int		wb_devport;	/* pNFS data server port */
>  	void			*wb_ops;	/* pNFS io operations */
>  	void			*wb_private;
>  #endif



More information about the pNFS mailing list