[pnfs] [PATCH 02/10] pnfs: Define 128 bit deviceid type for client

Benny Halevy bhalevy at panasas.com
Fri Feb 8 09:33:56 EST 2008


Dean Hildebrand wrote:
> Define 128 bit deviceid for common pNFS and 64bit maj/min
> for file layout
>
> Signed-off-by: Dean Hildebrand <dhildeb at us.ibm.com>
> ---
>  fs/nfs/nfs4filelayout.c    |    6 ++--
>  fs/nfs/nfs4filelayout.h    |   21 ++++++++------
>  fs/nfs/nfs4filelayoutdev.c |   62 ++++++++++++++++++++++++++++++++------------
>  fs/nfs/nfs4proc.c          |    2 +-
>  fs/nfs/nfs4xdr.c           |   26 ++++++++++--------
>  fs/nfs/pnfs.c              |    6 +++-
>  include/linux/nfs4_pnfs.h  |    6 +++-
>  include/linux/pnfs_xdr.h   |    6 +++-
>  8 files changed, 89 insertions(+), 46 deletions(-)
>
> diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
> index 7863ddf..a1ada9e 100644
> --- a/fs/nfs/nfs4filelayout.c
> +++ b/fs/nfs/nfs4filelayout.c
> @@ -607,7 +607,7 @@ filelayout_set_layout(struct pnfs_layout_type *layoutid,
>  
>  	dprintk("%s set_layout_map Begin\n", __FUNCTION__);
>  
> -	READ32(fl->dev_id);
> +	COPYMEM(&fl->dev_id, NFS4_PNFS_DEVICEID4_SIZE);
>  	READ32(nfl_util);
>  	if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS)
>  		fl->commit_through_mds = 1;
> @@ -620,8 +620,8 @@ filelayout_set_layout(struct pnfs_layout_type *layoutid,
>  	READ32(fl->first_stripe_index);
>  	READ32(fl->num_fh);
>  
> -	dprintk("DEBUG: %s: dev_id %u nfl_util 0x%X num_fh %u\n", __func__,
> -				fl->dev_id, nfl_util, fl->num_fh);
> +	dprintk("%s: nfl_util 0x%X num_fh %u dev_id %s\n",
> +		__func__, nfl_util, fl->num_fh, deviceid_fmt(&fl->dev_id));
>  
>  	for (i = 0; i < fl->num_fh; i++) {
>  		/* fh */
> diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
> index 5c567dd..4feee5f 100644
> --- a/fs/nfs/nfs4filelayout.h
> +++ b/fs/nfs/nfs4filelayout.h
> @@ -18,7 +18,8 @@
>  #include <linux/pnfs_xdr.h>
>  
>  #define NFS4_PNFS_DEV_HASH_BITS 5
> -#define NFS4_PNFS_DEV_HASH (1 << NFS4_PNFS_DEV_HASH_BITS)
> +#define NFS4_PNFS_DEV_HASH_SIZE (1 << NFS4_PNFS_DEV_HASH_BITS)
> +#define NFS4_PNFS_DEV_HASH_MASK (NFS4_PNFS_DEV_HASH_SIZE - 1)
>  
>  #define NFS4_PNFS_MAX_STRIPE_CNT 16
>  #define NFS4_PNFS_MAX_MULTI_DS   2
> @@ -28,8 +29,10 @@
>  			(NFS_SERVER(inode)->pnfs_mountid->mountid))
>  
>  struct nfs4_session *nfs41_alloc_session(void);
> -int _nfs4_proc_create_session(struct nfs_client *clp, struct nfs4_session *session,
> -				struct rpc_clnt *clnt);
> +int _nfs4_proc_create_session(struct nfs_client *clp,
> +			      struct nfs4_session *session,
> +			      struct rpc_clnt *clnt);
> +char *deviceid_fmt(pnfs_deviceid *dev_id);
>  
>  enum stripetype4 {
>  	STRIPE_SPARSE = 1,
> @@ -53,19 +56,19 @@ struct nfs4_pnfs_dev {
>  /* stripe_count is length of dev_list, bounded by NFS4_PNFS_MAX_STRIPE_CNT */
>  struct nfs4_pnfs_dev_item {
>  	struct hlist_node	hash_node;	/* nfs4_pnfs_dev_hlist dev_list */
> -	u32 			dev_id;
> +	pnfs_deviceid		dev_id;
>  	u32 			stripe_count;
>  	struct nfs4_pnfs_dev	*stripe_devs;
>  };
>  
>  struct nfs4_pnfs_dev_hlist {
>  	rwlock_t		dev_lock;
> -	struct hlist_head	dev_list[NFS4_PNFS_DEV_HASH];
> -	struct hlist_head	dev_dslist[NFS4_PNFS_DEV_HASH];
> +	struct hlist_head	dev_list[NFS4_PNFS_DEV_HASH_SIZE];
> +	struct hlist_head	dev_dslist[NFS4_PNFS_DEV_HASH_SIZE];
>  };
>  
>  struct nfs4_pnfs_devaddr {
> -	u32 dev_id;
> +	pnfs_deviceid dev_id;
>  	u32 ip;
>  	u16 port;
>  };
> @@ -86,7 +89,7 @@ struct nfs4_filelayout_segment {
>  	u32 commit_through_mds;
>  	u64 stripe_unit;
>  	u32 first_stripe_index;
> -	u32 dev_id;
> +	pnfs_deviceid dev_id;
>  	unsigned int num_fh;
>  	struct nfs_fh fh_array[NFS4_PNFS_MAX_STRIPE_CNT];
>  };
> @@ -114,7 +117,7 @@ int nfs4_pnfs_dserver_get(struct inode *inode,
>  int decode_and_add_devicelist(struct filelayout_mount_type *mt, struct pnfs_devicelist *devlist);
>  
>  struct nfs4_pnfs_dev_item *
> -nfs4_pnfs_device_item_get(struct inode *inode, u32 dev_id);
> +nfs4_pnfs_device_item_get(struct inode *inode, pnfs_deviceid *dev_id);
>  
>  
>  #define READ32(x)         (x) = ntohl(*p++)
> diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
> index 63d1c74..24e5173 100644
> --- a/fs/nfs/nfs4filelayoutdev.c
> +++ b/fs/nfs/nfs4filelayoutdev.c
> @@ -96,21 +96,46 @@ print_stripe_devs(struct nfs4_pnfs_dev_item *dev)
>  	}
>  }
>  
> +char*
>   
I think there should be a space before '*'... did you run checkpatch?
> +deviceid_fmt(pnfs_deviceid *dev_id)
> +{
> +	static char buf[NFS4_PNFS_DEVICEID4_SIZE];
>   
16 bytes are too little for showing the whole deviceid...

> +	sprintf(buf, "%x %x %x %x\n",
>   
why the newline? That's the caller's call...
> +		dev_id->data[0],
>   
> +		dev_id->data[1],
> +		dev_id->data[2],
> +		dev_id->data[3]);
>   
This prints just the first 4 bytes.
Assuming pnfs_deviceid is defined as 4 32-bit words as suggested below,
how about something like the following:

	static char buf[NFS4_PNFS_DEVICEID4_SIZE * 2 + 3 + 1];

	sprintf(buf, "%08x:%08x:%08x:%08x",
		ntohl(dev_id->data[0]),
		ntohl(dev_id->data[1]),
		ntohl(dev_id->data[2]),
		ntohl(dev_id->data[3]));


> +	return buf;
> +}
> +
> +unsigned long
> +_deviceid_hash(pnfs_deviceid *dev_id)
> +{
> +	unsigned char *cptr = (unsigned char *) dev_id->data;
>   
nit: don't need the space between the closing parenthesis to dev_id
> +	unsigned int nbytes = NFS4_PNFS_DEVICEID4_SIZE;
> +	u64 x = 0;
>   
need a line break here :)
> +	while (nbytes--) {
> +		x *= 37;
> +		x += *cptr++;
> +	}
>   
Any reason not to reuse an existing crypt/hash e.g. crc32 like we use
for the sessionid on the server side?  The per-byte hashing is not very 
efficient...

> +	return x & NFS4_PNFS_DEV_HASH_MASK;
> +}
> +
>  /* Assumes lock is held */
>  static inline struct nfs4_pnfs_dev_item *
> -_device_lookup(struct nfs4_pnfs_dev_hlist *hlist, u32 dev_id)
> +_device_lookup(struct nfs4_pnfs_dev_hlist *hlist, pnfs_deviceid *dev_id)
>  {
>  	unsigned long      hash;
>  	struct hlist_node *np;
>  
> -	dprintk("_device_lookup: dev_id=%u\n", dev_id);
> +	dprintk("_device_lookup: dev_id=%s\n", deviceid_fmt(dev_id));
>  
> -	hash = hash_long(dev_id, NFS4_PNFS_DEV_HASH_BITS);
> +	hash = _deviceid_hash(dev_id);
>  
>  	hlist_for_each(np, &hlist->dev_list[hash]) {
>  		struct nfs4_pnfs_dev_item *dev;
>  		dev = hlist_entry(np, struct nfs4_pnfs_dev_item, hash_node);
> -		if (dev->dev_id == dev_id)
> +		if (!memcmp(&dev->dev_id, dev_id, NFS4_PNFS_DEVICEID4_SIZE))
>  			return dev;
>  	}
>  	return NULL;
> @@ -146,10 +171,11 @@ _device_add(struct nfs4_pnfs_dev_hlist *hlist, struct nfs4_pnfs_dev_item *dev)
>  {
>  	unsigned long      hash;
>  
> -	dprintk("_device_add: dev_id=%u stripe_devs:\n", dev->dev_id);
> +	dprintk("_device_add: dev_id=%s\nstripe_devs:\n",
> +		deviceid_fmt(&dev->dev_id));
>  	print_stripe_devs(dev);
>  
> -	hash = hash_long(dev->dev_id, NFS4_PNFS_DEV_HASH_BITS);
> +	hash = _deviceid_hash(&dev->dev_id);
>  	hlist_add_head(&dev->hash_node, &hlist->dev_list[hash]);
>  }
>  
> @@ -290,7 +316,8 @@ device_destroy(struct nfs4_pnfs_dev_item *dev,
>  	if (!dev)
>  		return;
>  
> -	dprintk("device_destroy: did=%u dev_list: \n", dev->dev_id);
> +	dprintk("device_destroy: dev_id=%s\ndev_list:\n",
> +		deviceid_fmt(&dev->dev_id));
>  	print_stripe_devs(dev);
>  
>  	write_lock(&hlist->dev_lock);
> @@ -321,7 +348,7 @@ nfs4_pnfs_devlist_init(struct nfs4_pnfs_dev_hlist *hlist)
>  
>  	hlist->dev_lock = __RW_LOCK_UNLOCKED("pnfs_devlist_lock");
>  
> -	for (i = 0; i < NFS4_PNFS_DEV_HASH; i++) {
> +	for (i = 0; i < NFS4_PNFS_DEV_HASH_SIZE; i++) {
>  		INIT_HLIST_HEAD(&hlist->dev_list[i]);
>  		INIT_HLIST_HEAD(&hlist->dev_dslist[i]);
>  	}
> @@ -341,7 +368,7 @@ nfs4_pnfs_devlist_destroy(struct nfs4_pnfs_dev_hlist *hlist)
>  		return;
>  
>  	/* No lock held, as synchronization should occur at upper levels */
> -	for (i = 0; i < NFS4_PNFS_DEV_HASH; i++) {
> +	for (i = 0; i < NFS4_PNFS_DEV_HASH_SIZE; i++) {
>  		struct hlist_node *np, *next;
>  
>  		hlist_for_each_safe(np, next, &hlist->dev_list[i]) {
> @@ -368,7 +395,7 @@ nfs4_pnfs_device_add(struct filelayout_mount_type *mt,
>  
>  	/* Write lock, do lookup again, and then add device */
>  	write_lock(&hlist->dev_lock);
> -	tmp_dev = _device_lookup(hlist, dev->dev_id);
> +	tmp_dev = _device_lookup(hlist, &dev->dev_id);
>  	if (tmp_dev == NULL)
>  		_device_add(hlist, dev);
>  	write_unlock(&hlist->dev_lock);
> @@ -511,7 +538,7 @@ decode_device(struct filelayout_mount_type *mt, struct pnfs_device *dev)
>  	INIT_HLIST_NODE(&file_dev->hash_node);
>  
>  	/* Device id */
> -	file_dev->dev_id = dev->dev_id;
> +	memcpy(&file_dev->dev_id, &dev->dev_id, NFS4_PNFS_DEVICEID4_SIZE);
>  
>  	fdev = &file_dev->stripe_devs[0];
>  	for (i = 0; i < len; i++) {
> @@ -612,7 +639,7 @@ decode_and_add_devicelist(struct filelayout_mount_type *mt, struct pnfs_deviceli
>   * of available devices, and return it.
>   */
>  static struct nfs4_pnfs_dev_item *
> -get_device_info(struct inode *inode, u32 dev_id)
> +get_device_info(struct inode *inode, pnfs_deviceid *dev_id)
>  {
>  	struct filelayout_mount_type *mt = FILE_MT(inode);
>  	struct pnfs_device *pdev = NULL;
> @@ -623,7 +650,7 @@ get_device_info(struct inode *inode, u32 dev_id)
>  	if (pdev == NULL)
>  		return NULL;
>  
> -	pdev->dev_id = dev_id;
> +	memcpy(&pdev->dev_id, dev_id, NFS4_PNFS_DEVICEID4_SIZE);
>  
>  	rc = pnfs_callback_ops->nfs_getdeviceinfo(inode, dev_id, pdev);
>  	dprintk("%s getdevice info returns %d\n", __func__, rc);
> @@ -639,7 +666,7 @@ get_device_info(struct inode *inode, u32 dev_id)
>  }
>  
>  struct nfs4_pnfs_dev_item *
> -nfs4_pnfs_device_item_get(struct inode *inode, u32 dev_id)
> +nfs4_pnfs_device_item_get(struct inode *inode, pnfs_deviceid *dev_id)
>  {
>  	struct filelayout_mount_type *mt = FILE_MT(inode);
>  	struct nfs4_pnfs_dev_item *dev;
> @@ -673,7 +700,7 @@ nfs4_pnfs_dserver_get(struct inode *inode,
>  
>  	layout = LSEG_LD_DATA(&flo->pnfs_lseg);
>  
> -	di = nfs4_pnfs_device_item_get(inode, layout->dev_id);
> +	di = nfs4_pnfs_device_item_get(inode, &layout->dev_id);
>  	if (di == NULL)
>  		return 1;
>  
> @@ -704,8 +731,9 @@ nfs4_pnfs_dserver_get(struct inode *inode,
>  	else
>  		dserver->fh = &layout->fh_array[stripe_idx];
>  
> -	dprintk("%s: dev_id=%u, idx=%u, offset=%Lu, count=%Zu\n",
> -		__FUNCTION__, layout->dev_id, stripe_idx, offset, count);
> +	dprintk("%s: dev_id=%s idx=%u, offset=%Lu, count=%Zu\n",
> +		__func__, deviceid_fmt(&layout->dev_id),
> +		stripe_idx, offset, count);
>  
>  	return 0;
>  }
> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
> index eaaaeed..a5ca2df 100644
> --- a/fs/nfs/nfs4proc.c
> +++ b/fs/nfs/nfs4proc.c
> @@ -5356,7 +5356,7 @@ int nfs4_pnfs_getdevicelist(struct nfs_fh *fh,
>  }
>  
>  int nfs4_pnfs_getdeviceinfo(struct inode *inode,
> -			    u32 dev_id,
> +			    pnfs_deviceid *dev_id,
>  			    struct pnfs_device *dev)
>  {
>  	struct nfs_server *server = NFS_SERVER(inode);
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index b9e78ea..c5c7b5a 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -272,10 +272,13 @@ static int nr_sequence_quads;
>  #if defined(CONFIG_PNFS)
>  #define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 +  \
>  				   (NFS4_VERIFIER_SIZE >> 2))
> -#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + 5 + 2 +      \
> -				   NFS4_PNFS_DEV_MAXNUM*NFS4_PNFS_DEV_MAXSIZE)
> -#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 2)
> -#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + 3 + \
> +#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + 6 +		\
> +				    NFS4_PNFS_DEV_MAXNUM *		\
> +				    (NFS4_PNFS_DEVICEID4_SIZE + 2 +	\
> +				     NFS4_PNFS_DEV_MAXSIZE))
>   
Hmm, NFS4_PNFS_DEV_MAXSIZE above seems obsolete.
Doesn't getdevicelist return only deviceids and no device addresses anymore?

> +#define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 2 + \
> +				    NFS4_PNFS_DEVICEID4_SIZE)
> +#define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + 2 + \
>  				   NFS4_PNFS_DEV_MAXSIZE)
>  #define encode_pnfs_layoutget_sz (op_encode_hdr_maxsz + 13)
>  #define decode_pnfs_layoutget_maxsz	(op_decode_hdr_maxsz + 8 + \
> @@ -1826,9 +1829,11 @@ static int encode_getdeviceinfo(struct xdr_stream *xdr,
>  				const struct nfs4_pnfs_getdeviceinfo_arg *args)
>  {
>  	uint32_t *p;
> -	RESERVE_SPACE(16);
> +
> +	RESERVE_SPACE(8 + NFS4_PNFS_DEVICEID4_SIZE);
>   
12 maybe (rather than 8)?
> +
>  	WRITE32(OP_GETDEVICEINFO);
> -	WRITE32(args->dev_id);
> +	WRITEMEM(args->dev_id->data, NFS4_PNFS_DEVICEID4_SIZE);
>  	WRITE32(args->layoutclass);
>  	WRITE32(NFS4_PNFS_DEV_MAXSIZE);
>  	return 0;
> @@ -5480,17 +5485,16 @@ static int decode_getdevicelist(struct xdr_stream *xdr, struct pnfs_devicelist *
>  	for (i = 0, cnt = 0;
>  	     i < res->num_devs && cnt < NFS4_PNFS_DEV_MAXNUM;
>  	     i++) {
> -		READ_BUF(4);
> -		READ32(res->devs[cnt].dev_id);	/* device id */
> +		READ_BUF(NFS4_PNFS_DEVICEID4_SIZE);
> +		COPYMEM(res->devs[cnt].dev_id.data, NFS4_PNFS_DEVICEID4_SIZE);
>  
>  		READ_BUF(4);
>  		READ32(res->layout_type);
>  
>  		READ_BUF(4);
>  		READ32(len);
> -		dprintk("%s: num_dev %d i %d cnt %d id %d len %d\n",
> -			__FUNCTION__, res->num_devs, i, cnt,
> -			res->devs[cnt].dev_id, len);
> +		dprintk("%s: num_dev %d i %d cnt %d len %d\n",
> +			__FUNCTION__, res->num_devs, i, cnt, len);
>  
>  		READ_BUF(len);
>  
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 033024f..f56ff89 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -59,7 +59,7 @@
>  extern int nfs_fsync(struct file *file, struct dentry *dentry, int datasync);
>  extern int nfs4_pnfs_getdevicelist(struct nfs_fh *fh, struct nfs_server *server,
>  				   struct pnfs_devicelist *devlist);
> -extern int nfs4_pnfs_getdeviceinfo(struct inode *inode, u32 dev_id,
> +extern int nfs4_pnfs_getdeviceinfo(struct inode *inode, pnfs_deviceid *dev_id,
>  				   struct pnfs_device *res);
>  extern void nfs_initiate_commit(struct nfs_write_data *data,
>  				struct rpc_clnt *clnt, int how);
> @@ -1593,7 +1593,9 @@ pnfs_getdevicelist(struct super_block *sb, struct nfs_fh *fh,
>  /* Retrieve the device information for a device.
>   */
>  int
> -pnfs_getdeviceinfo(struct inode *inode, u32 dev_id, struct pnfs_device *dev)
> +pnfs_getdeviceinfo(struct inode *inode,
> +		   pnfs_deviceid *dev_id,
> +		   struct pnfs_device *dev)
>  {
>  	int rc;
>  
> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
> index 8a720e5..c05477c 100644
> --- a/include/linux/nfs4_pnfs.h
> +++ b/include/linux/nfs4_pnfs.h
> @@ -181,7 +181,7 @@ struct layoutdriver_policy_operations {
>  };
>  
>  struct pnfs_device {
> -	int           dev_id;
> +	pnfs_deviceid dev_id;
>  	unsigned int  dev_count;
>  	unsigned int  dev_addr_len;
>  	char          dev_addr_buf[NFS4_PNFS_DEV_MAXSIZE];
> @@ -202,7 +202,9 @@ struct pnfs_devicelist {
>   */
>  struct pnfs_client_operations {
>  	int (*nfs_getdevicelist) (struct super_block *sb, struct nfs_fh *fh, struct pnfs_devicelist *devlist);
> -	int (*nfs_getdeviceinfo) (struct inode *inode, u32 dev_id, struct pnfs_device *dev);
> +	int (*nfs_getdeviceinfo) (struct inode *inode,
> +				  pnfs_deviceid *dev_id,
> +				  struct pnfs_device *dev);
>  
>  	/* Post read callback. */
>  	void (*nfs_readlist_complete) (struct nfs_read_data *nfs_data);
> diff --git a/include/linux/pnfs_xdr.h b/include/linux/pnfs_xdr.h
> index 3bc8226..a75f143 100644
> --- a/include/linux/pnfs_xdr.h
> +++ b/include/linux/pnfs_xdr.h
> @@ -18,6 +18,9 @@
>  
>  #define PNFS_LAYOUT_MAXSIZE 4096
>  #define PNFS_MAX_NUM_LAYOUT_TYPES 2
> +#define NFS4_PNFS_DEVICEID4_SIZE 16
> +
> +typedef struct { char data[NFS4_PNFS_DEVICEID4_SIZE]; } pnfs_deviceid;
>   
how about?

typedef struct { __be32 data[NFS4_PNFS_DEVICEID4_SIZE >> 2]; } pnfs_deviceid;

if you define it as an array of bytes, let's at least define them
as unsigned chars. And the print format above can print ntohl(*(__be32 *)&data[0]),
ntohl(*(__be32 *)&data[4]), etc.



>  
>  struct nfs4_pnfs_layout {
>  	__u32 len;
> @@ -122,7 +125,8 @@ struct nfs4_pnfs_getdevicelist_res {
>  struct nfs4_pnfs_getdeviceinfo_arg {
>  	const struct nfs_fh *fh;
>  	u32 layoutclass;
> -	u32 dev_id;
> +	pnfs_deviceid 			*dev_id;
>   

That should be a pnfs_deviceid rather than pnfs_deviceid *
> +	u32 				dev_notify_types;
>  	struct nfs41_sequence_args	seq_args;
>  };
>  
>   



More information about the pNFS mailing list