[pnfs] [PATCH] update pNFS ops to draft 13

Benny Halevy bhalevy at panasas.com
Tue Sep 4 13:35:31 EDT 2007


nit pick:
>  struct pnfs_devicelist {
> +	int                 layout_type;

this better be unsigned as the "experimental" layout type bit
is 1<<31 and I don't want it to get extended if this is implictly
converted into a 64 bit long.

Other than that the patch looks fine.

I will also compare it "visually" to draft-13.

Benny

On Tue, Sep 04 2007 at 19:23:04 +0300, Marc Eshel <eshel at almaden.ibm.com> wrote:
> From: Marc Eshel <eshel at almaden.ibm.com>
> 
> 
> ---
> 
>  fs/nfs/nfs4filelayout.c            |   25 ++++++----
>  fs/nfs/nfs4filelayout.h            |    1 
>  fs/nfs/nfs4filelayoutdev.c         |   23 +++++----
>  fs/nfs/nfs4xdr.c                   |   90 ++++++++++++++++++++----------------
>  fs/nfsd/nfs4filelayoutxdr.c        |   28 +++++++----
>  fs/nfsd/nfs4xdr.c                  |   16 ++++--
>  include/linux/nfs4.h               |   18 +++++++
>  include/linux/nfs4_pnfs.h          |    7 ---
>  include/linux/nfsd/nfs4layoutxdr.h |    2 -
>  9 files changed, 126 insertions(+), 84 deletions(-)
> 
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 043583f..9a63870 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -445,6 +445,7 @@ filelayout_set_layout(struct pnfs_layout
>  	struct nfs4_filelayout* fl = NULL;
>  	int i;
>  	uint32_t *p = (uint32_t*)layout;
> +	uint32_t nfl_util;
>  
>  	dprintk("%s set_layout_map Begin\n", __FUNCTION__);
>  
> @@ -454,30 +455,34 @@ filelayout_set_layout(struct pnfs_layout
>  	if (!fl)
>  		goto nfserr;
>  
> -	READ32(fl->stripe_type);
> -	READ32(fl->commit_through_mds);
> -	READ64(fl->stripe_unit);
> -	READ64(fl->file_size);
> -	READ32(fl->index_len);
>  	if (fl->index_len > 0) { //??? if>0 must build index list
>  		printk("filelayout_set_layout: XXX add loop for index list\n");
>  	}
>  	READ32(fl->num_devs);
>  
> -	dprintk("DEBUG: %s: type %d stripe_unit %lld file_size %lld devs %d\n",
> -				__func__, fl->stripe_type, fl->stripe_unit,
> -				fl->file_size, fl->num_devs);
> +	dprintk("DEBUG: %s: devs %d\n", __FUNCTION__, fl->num_devs);
>  
>  	for (i = 0; i < fl->num_devs; i++) {
>  		READ32(fl->devs[i].dev_id);
> +		READ32(nfl_util);
>  		READ32(fl->devs[i].dev_index);
> +		READ32(fl->index_len);
>  
> +		if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
> +			fl->commit_through_mds = 1;
> +		if (nfl_util & NFL4_UFLG_DENSE)
> +			fl->stripe_type = STRIPE_DENSE;
> +		else
> +			fl->stripe_type = STRIPE_SPARSE;
> +		fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK;
> +
> +	
>  		/* fh */
>  		memset(&fl->devs[i].fh, 0, sizeof(struct nfs_fh));
>  		READ32(fl->devs[i].fh.size);
>  		COPYMEM(fl->devs[i].fh.data, fl->devs[i].fh.size);
> -		dprintk("DEBUG: %s: dev %d len %d\n", __func__,
> -		fl->devs[i].dev_id,fl->devs[i].fh.size);
> +		dprintk("DEBUG: %s: dev %d len %d nfl_util 0x%X\n", __func__,
> +			fl->devs[i].dev_id,fl->devs[i].fh.size, nfl_util);
>  	}
>  
>  	return layoutid;
> diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
> index 357a915..1383a36 100644
> --- a/fs/nfs/nfs4filelayout.h
> +++ b/fs/nfs/nfs4filelayout.h
> @@ -73,7 +73,6 @@ struct nfs4_filelayout {
>  	u64 offset;
>  	u64 length;
>  	u32 iomode;
> -	u64 file_size;
>  	u32 stripe_type;
>  	u32 commit_through_mds;
>  	u64 stripe_unit;
> diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
> index fe1e067..4296007 100644
> --- a/fs/nfs/nfs4filelayoutdev.c
> +++ b/fs/nfs/nfs4filelayoutdev.c
> @@ -265,7 +265,7 @@ nfs4_pnfs_device_add(struct filelayout_m
>  static struct nfs4_pnfs_dev_item*
>  decode_device(struct pnfs_device* dev)
>  {
> -	int len;
> +	int index, i, j, len;
>  	int tmp[6];
>  	uint32_t *p = (uint32_t*)dev->dev_addr_buf;
>  	struct nfs4_pnfs_dev_item* file_dev;
> @@ -275,7 +275,6 @@ decode_device(struct pnfs_device* dev)
>  	{
>  		return NULL;
>  	}
> -
>  	/* Initialize dev */
>  	INIT_HLIST_NODE(&file_dev->hash_node);
>  	atomic_set(&file_dev->count, 0);
> @@ -283,19 +282,20 @@ decode_device(struct pnfs_device* dev)
>  	/* Device id */
>  	file_dev->dev_id = dev->dev_id;
>  
> -	/* Get the device type */
> -	READ32(dev->dev_type);
> -
> -	if (dev->dev_type != FILE_SIMPLE) {
> -		printk(KERN_NOTICE "Device type %d not supported!\n", dev->dev_type);
> -		return NULL;
> +	READ32(index);
> +	for (i = 0; i < index; i++) {  /* skip indices list */
> +		READ32(j);
>  	}
>  
> +	READ32(len);
> +	BUG_ON(len != 1);    /* 1 DS per device id */
> +
>  	/* Get the device count */
>  	READ32(dev->dev_count);
>  
>  	if (dev->dev_count > 1)
> -		printk(KERN_NOTICE "%s: Add loop for dev_count\n", __FUNCTION__);
> +		printk(KERN_NOTICE "%s: Add loop for multipath dev_count %d dev_id %d\n",
> +			__FUNCTION__, dev->dev_count, dev->dev_id);
>  
>  	/* Decode contents of device*/
>  
> @@ -303,8 +303,11 @@ decode_device(struct pnfs_device* dev)
>  
>  	/* check and skip r_netid */
>  	READ32(len);
> -	if (len != 3) /* "tcp" */
> +	if (len != 3) { /* "tcp" */
> +		printk("%s: ERROR: Device index %d dev_count %d len %d\n",
> +			__FUNCTION__, index, dev->dev_count, len);
>  		return NULL;
> +	}
>  	/* Read the bytes into a temporary buffer */
>  	/* TODO: should probably sanity check them */
>  	READ32(tmp[0]);
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index 2e5a8a7..cfb391d 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -4914,54 +4914,62 @@ #ifdef CONFIG_PNFS
>   */
>  static int decode_getdevicelist(struct xdr_stream *xdr, struct pnfs_devicelist *res)
>  {
> -        uint32_t *p;
> -        int status, i, cnt;
> -        uint32_t len = 0, total_len = 0;
> -        struct nfs_writeverf verftemp;
> +	uint32_t *p;
> +	int status, i, cnt;
> +	uint32_t len = 0, total_len = 0;
> +	struct nfs_writeverf verftemp;
>  
> -        status = decode_op_hdr(xdr, OP_GETDEVICELIST);
> -        if (status)
> -                return status;
> +	status = decode_op_hdr(xdr, OP_GETDEVICELIST);
> +	if (status)
> +		return status;
>  
> -        /* TODO: Skip cookie for now */
> -        READ_BUF(8);
> -        (*p) += 2;
> +	/* TODO: Skip cookie for now */
> +	READ_BUF(8);
> +	(*p) += 2;
>  
> -        /* Read verifier */
> -        READ_BUF(8);
> -        COPYMEM(verftemp.verifier, 8);
> +	/* Read verifier */
> +	READ_BUF(8);
> +	COPYMEM(verftemp.verifier, 8);
>  
> -        READ_BUF(4);
> -        READ32(res->num_devs);
> +	READ_BUF(4);
> +	READ32(res->num_devs);
>  
> -        for (i = 0,cnt=0; i < res->num_devs && cnt < NFS4_PNFS_DEV_MAXCOUNT; i++)
> -        {
> -                READ_BUF(4);
> -                READ32(res->devs[cnt].dev_id);	/* device id */
> -                READ_BUF(4);			/* skip layout type */
> -                READ_BUF(4);
> -                READ32(len);
> -                dprintk("%s: num_dev %d i %d cnt %d id %d len %d\n",
> -                        __FUNCTION__, res->num_devs, i, cnt,
> -                        res->devs[cnt].dev_id, len);
> -
> -                READ_BUF(len);
> -
> -                /* DH-TODO: Can I decode this inline?  Is the xdr_stream
> -                 * memory valid after the completion of this function?
> -                 */
> -/*              decode_opaque_inline(xdr, &len, &r_addr); */
> -                COPYMEM(&res->devs[cnt].dev_addr_buf, len);
> -                res->devs[cnt].dev_addr_len = len;
> +	for (i = 0,cnt=0; i < res->num_devs && cnt < NFS4_PNFS_DEV_MAXCOUNT; i++)
> +	{
> +		READ_BUF(4);
> +		READ32(res->devs[cnt].dev_id);	/* device id */
>  
> -                total_len += len;
> -                cnt++;
> -        }
> -        READ_BUF(4);
> -        READ32(res->eof);
> +		READ_BUF(4);
> +		READ32(len); /* 1 in list of device_addr */
> +		if (len > 1)
> +			printk(KERN_EMERG "%s: list of %d device addr\n",
> +				__FUNCTION__, len);
> +		READ_BUF(4);
> +		READ32(res->layout_type);
>  
> -        res->devs_len = total_len;
> -        return 0;
> +		READ_BUF(4);
> +		READ32(len);
> +		dprintk("%s: num_dev %d i %d cnt %d id %d len %d\n",
> +			__FUNCTION__, res->num_devs, i, cnt,
> +			res->devs[cnt].dev_id, len);
> +
> +		READ_BUF(len);
> +
> +		/* DH-TODO: Can I decode this inline?  Is the xdr_stream
> +		 * memory valid after the completion of this function?
> +		 */
> +		/* decode_opaque_inline(xdr, &len, &r_addr); */
> +		COPYMEM(&res->devs[cnt].dev_addr_buf, len);
> +		res->devs[cnt].dev_addr_len = len;
> +
> +		total_len += len;
> +		cnt++;
> +	}
> +	READ_BUF(4);
> +	READ32(res->eof);
> +
> +	res->devs_len = total_len;
> +	return 0;
>  }
>  
>  /* DH: decode device info arguments
> diff --git a/fs/nfsd/nfs4filelayoutxdr.c b/fs/nfsd/nfs4filelayoutxdr.c
> index 28badc6..a3d697f 100644
> --- a/fs/nfsd/nfs4filelayoutxdr.c
> +++ b/fs/nfsd/nfs4filelayoutxdr.c
> @@ -52,13 +52,17 @@ filelayout_encode_devaddr(u32 *p, u32 *e
>  	u32 *p_in = p;
>  
>  	fdev = (struct pnfs_filelayout_devaddr *)dev_addr;
> -        len = 4+XDR_QUADLEN(fdev->r_netid.len)+XDR_QUADLEN(fdev->r_addr.len);
> +        len = 6+XDR_QUADLEN(fdev->r_netid.len)+XDR_QUADLEN(fdev->r_addr.len);
>          len = len << 2;
>  	if (p + XDR_QUADLEN(len) > end)
>  		return -ENOMEM;
>  	WRITE32(len);
> -	WRITE32(fdev->r_dev_type);
> -	WRITE32(1);
> +
> +	WRITE32(1);  /* 1 in indices list */
> +	WRITE32(0);  /* index 0 */
> +	WRITE32(1);  /* 1 DS per device id */
> +	WRITE32(1);  /* 1 in list of multipath */
> +
>  	WRITE32(fdev->r_netid.len);
>  	WRITEMEM(fdev->r_netid.data,fdev->r_netid.len);
>  	WRITE32(fdev->r_addr.len);
> @@ -89,11 +93,13 @@ filelayout_encode_layoutlist_item(u32 *p
>  	int len;
>  	unsigned int fhlen = item->dev_fh.fh_size;
>  
> -	len = 12 + fhlen;
> +	len = 20 + fhlen;
>  	if (p + XDR_QUADLEN(len) > end)
>  		return -ENOMEM;
>  	WRITE32(item->dev_id);
> +	WRITE32(item->dev_util); /* nfl_util4 */
>  	WRITE32(item->dev_index);
> +	WRITE32(1); /* One for now can be an array of FHs */
>  	WRITE32(fhlen);
>  	WRITEMEM(&item->dev_fh.fh_base, fhlen);
>  	return len;
> @@ -107,19 +113,20 @@ filelayout_encode_layout(u32 *p, u32 *en
>  	struct nfsd4_pnfs_layoutlist *item;
>  	int i, full_len, len;
>  	u32 *totlen;
> +	u32 nfl_util;
>  
>  	flp = (struct nfsd4_pnfs_filelayout *)layout;
> -	len = 32;
> +	len = 4;
>  	if (p + XDR_QUADLEN(len + 4) > end)
>  		return -ENOMEM;
>  	full_len = len + 4;
>  	totlen = p; 	/* fill-in opaque layout length later*/
>  	p++;
> -	WRITE32(flp->lg_stripe_type);
> -	WRITE32(flp->lg_commit_through_mds);
> -	WRITE64(flp->lg_stripe_unit);
> -	WRITE64(flp->lg_file_size);
> -	WRITE32(flp->lg_indexlen);
> +	nfl_util = flp->lg_stripe_unit;
> +	if (flp->lg_commit_through_mds)
> +		nfl_util |= NFL4_UFLG_COMMIT_THRU_MDS;
> +	if (flp->lg_stripe_type)
> +		nfl_util |= NFL4_UFLG_DENSE;
>  
>  	if (flp->lg_indexlen > 0) {   //??? if>0 must build index list
>  		printk("filelayout_encode_layout: XXX add loop for index list\n");
> @@ -127,6 +134,7 @@ filelayout_encode_layout(u32 *p, u32 *en
>  	WRITE32(flp->lg_llistlen);
>  	for (i=0; i < flp->lg_llistlen; i++) {
>  		item = &flp->lg_llist[i];
> +		item->dev_util = nfl_util;
>  		len = filelayout_encode_layoutlist_item(p, end, item);
>  		if (len > 0) {
>  			p += XDR_QUADLEN(len);
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index b9e0400..d3ce4a7 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2888,10 +2888,14 @@ nfsd4_encode_devlist_item(struct nfsd4_c
>  	int len;
>  	ENCODE_HEAD;
>  
> -	RESERVE_SPACE(8);
> +	RESERVE_SPACE(28);
>  	WRITE32(dlist->dev_id);
> -	WRITE32(lotype);
> +
> +	WRITE32(1); /* 1 in list of device_addr */
> +	WRITE32(lotype); /* layout type */
> +	
>  	ADJUST_ARGS();
> +	dprintk("%s: device id %d\n",__FUNCTION__, dlist->dev_id);
>  
>  	if (ex_ops->devaddr_encode == NULL && lotype == LAYOUT_NFSV4_FILES)
>  	{
> @@ -2941,7 +2945,7 @@ nfsd4_encode_getdevlist(struct nfsd4_com
>  		item = gdevl->gd_devlist;
>  		for (i = 0; i < gdevl->gd_devlist_len; i++) {
>  			dprintk("%s: i %d item %p\n",__FUNCTION__, i, item);
> -			len = nfsd4_encode_devlist_item (resp, item,
> +			len = nfsd4_encode_devlist_item(resp, item,
>  						gdevl->gd_ops, gdevl->gd_type);
>  			item++;
>  			if (len <= 0) {
> @@ -2969,12 +2973,14 @@ nfsd4_encode_getdevinfo(struct nfsd4_com
>  
>  	printk("%s: err %d\n",__FUNCTION__, nfserr);
>  	if (!nfserr) {
> -		RESERVE_SPACE(8);
> +		RESERVE_SPACE(28);
>  		WRITE32(gdev->gd_type);
> +		ADJUST_ARGS();
> +
>  		if (gdev->gd_ops->devaddr_encode == NULL &&
>  					gdev->gd_type == LAYOUT_NFSV4_FILES)
>  		{
> -			len = filelayout_encode_devaddr(p, resp->end,gdev->gd_devaddr);
> +			len = filelayout_encode_devaddr(p, resp->end, gdev->gd_devaddr);
>  			filelayout_free_devaddr(gdev->gd_devaddr);
>  		}
>  		else {
> diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
> index 4912cea..bdc55ed 100644
> --- a/include/linux/nfs4.h
> +++ b/include/linux/nfs4.h
> @@ -294,6 +294,11 @@ enum nfsstat4 {
>  	NFS4ERR_CONN_BINDING_NOT_ENFORCED = 10073,
>  	NFS4ERR_CLIENTID_BUSY = 10074,
>  	NFS4ERR_PNFS_IO_HOLE = 10075,
> +	NFS4ERR_SEQ_FALSE_RETRY	= 10076,
> +	NFS4ERR_BAD_HIGH_SLOT = 10077,
> +	NFS4ERR_DEADSESSION = 10078,
> +	NFS4ERR_ENCR_ALG_UNSUPP = 10079,
> +	NFS4ERR_PNFS_NO_LAYOUT = 10080
>  };
>  
>  /*
> @@ -496,6 +501,19 @@ enum pnfs_iomode {
>  	IOMODE_ANY = 3,
>  };
>  
> +#define NFL4_UFLG_MASK			0x0000003F
> +#define NFL4_UFLG_DENSE			0x00000001
> +#define NFL4_UFLG_COMMIT_THRU_MDS	0x00000002
> +#define NFL4_UFLG_STRIPE_UNIT_SIZE_MASK	0xFFFFFFC0
> +
> +/* Encoded in the loh_body field of type layouthint4 */
> +enum filelayout_hint_care4 {
> +	NFLH4_CARE_DENSE		= NFL4_UFLG_DENSE,
> +	NFLH4_CARE_COMMIT_THRU_MDS	= NFL4_UFLG_COMMIT_THRU_MDS,
> +	NFLH4_CARE_STRIPE_UNIT_SIZE	= 0x00000040,
> +	NFLH4_CARE_STRIPE_COUNT		= 0x00000080
> +};
> +
>  #endif /* CONFIG_PNFS */
>  
>  /* Create Session Flags */
> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
> index 74d25fd..48c33fa 100644
> --- a/include/linux/nfs4_pnfs.h
> +++ b/include/linux/nfs4_pnfs.h
> @@ -122,13 +122,13 @@ struct pnfs_layoutdriver_type {
>  struct pnfs_device
>  {
>  	int           dev_id;
> -	int           dev_type;
>  	unsigned int  dev_count;
>  	unsigned int  dev_addr_len;
>  	char          dev_addr_buf[NFS4_PNFS_DEV_MAXSIZE];
>  };
>  
>  struct pnfs_devicelist {
> +	int                 layout_type;
>  	unsigned int        num_devs;
>  	unsigned int        eof;
>  	unsigned int        devs_len;
> @@ -170,9 +170,4 @@ void pnfs_unregister_layoutdriver(struct
>  #define NFS4_PNFS_MAX_LAYOUTS 4
>  #define NFS4_PNFS_PRIVATE_LAYOUT 0x80000000
>  
> -enum file_layout_device_type {
> -	FILE_SIMPLE  = 1,
> -	FILE_COMPLEX = 2
> -};
> -
>  #endif /* LINUX_NFS4_PNFS_H */
> diff --git a/include/linux/nfsd/nfs4layoutxdr.h b/include/linux/nfsd/nfs4layoutxdr.h
> index 25cde5f..2d0159a 100644
> --- a/include/linux/nfsd/nfs4layoutxdr.h
> +++ b/include/linux/nfsd/nfs4layoutxdr.h
> @@ -58,7 +58,6 @@ #define ADJUST_ARGS()           resp->p 
>  
>  /* the nfsd4_pnfs_devlist dev_addr for the file layout type */
>  struct pnfs_filelayout_devaddr {
> -	u32			r_dev_type;
>  	struct xdr_netobj	r_netid;
>  	struct xdr_netobj	r_addr;
>  };
> @@ -66,6 +65,7 @@ struct pnfs_filelayout_devaddr {
>  struct nfsd4_pnfs_layoutlist {
>  	u32				dev_id;
>  	u32                             dev_index;
> +	u32                             dev_util;
>  	struct knfsd_fh                 dev_fh;
>  };
>  
> _______________________________________________
> pNFS mailing list
> pNFS at linux-nfs.org
> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs



More information about the pNFS mailing list