[pnfs] [PATCH 03/10] pnfsd: Use 128 bit deviceid on server
Benny Halevy
bhalevy at panasas.com
Fri Feb 8 10:37:07 EST 2008
Dean Hildebrand wrote:
> Signed-off-by: Dean Hildebrand <dhildeb at us.ibm.com>
> ---
> fs/nfsd/nfs4filelayoutxdr.c | 23 +++++++++++++++++++----
> fs/nfsd/nfs4pnfsds.c | 4 ++--
> fs/nfsd/nfs4proc.c | 6 +++---
> fs/nfsd/nfs4state.c | 6 +++---
> fs/nfsd/nfs4xdr.c | 31 ++++++++++++++++---------------
> include/linux/exportfs.h | 34 +++++++++++++++++++++++++++-------
> include/linux/nfsd/nfs4layoutxdr.h | 13 ++-----------
> include/linux/nfsd/nfsd4_pnfs.h | 3 ++-
> include/linux/nfsd/pnfsd.h | 2 +-
> include/linux/nfsd/state.h | 2 +-
> 10 files changed, 76 insertions(+), 48 deletions(-)
>
> diff --git a/fs/nfsd/nfs4filelayoutxdr.c b/fs/nfsd/nfs4filelayoutxdr.c
> index 183f23c..51e2825 100644
> --- a/fs/nfsd/nfs4filelayoutxdr.c
> +++ b/fs/nfsd/nfs4filelayoutxdr.c
> @@ -42,11 +42,25 @@
> #include <linux/nfs4.h>
> #include <linux/nfsd/state.h>
> #include <linux/nfsd/xdr4.h>
> +#include <linux/sunrpc/xdr.h>
> #include <linux/nfsd/nfs4layoutxdr.h>
> #include <linux/nfsd/nfsd4_pnfs.h>
>
> #define NFSDDBG_FACILITY NFSDDBG_PNFS
>
> +#define ENCODE_HEAD __be32 *p
> +
> +#define RESERVE_SPACE(nbytes) do { \
> + p = resp->p; \
> + BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \
> +} while (0)
> +
> +#define WRITEMEM(ptr,nbytes) do { \
> + *(p + XDR_QUADLEN(nbytes) - 1) = 0; \
> + memcpy(p, ptr, nbytes); \
> + p += XDR_QUADLEN(nbytes); \
> +} while (0)
> +
> /* Encodes the nfsv4_1_file_layout_ds_addr4 structure from draft 13
> * on the response stream.
> * Use linux error codes (not nfs) since these values are being
> @@ -154,9 +168,10 @@ filelayout_encode_layout(struct pnfs_xdr_info *resp, void *layout)
>
> resp->bytes_written = 0; /* in case there is an error */
>
> - dprintk("%s: devid %u, fsi %u, numfh %u\n",
> + dprintk("%s: device_id %llx:%llx fsi %u, numfh %u\n",
> __func__,
> - flp->device_id,
> + flp->device_id.pnfs_fsid,
> + flp->device_id.pnfs_devid,
> flp->lg_first_stripe_index,
> flp->lg_fh_length);
>
> @@ -182,8 +197,8 @@ filelayout_encode_layout(struct pnfs_xdr_info *resp, void *layout)
> len += 4;
>
> /* encode device id */
> - WRITE32(flp->device_id);
> - len += 4;
> + WRITEMEM(&flp->device_id, sizeof(deviceid_t));
> + len += sizeof(deviceid_t);
>
> /* set and encode flags */
> nfl_util = flp->lg_stripe_unit;
> diff --git a/fs/nfsd/nfs4pnfsds.c b/fs/nfsd/nfs4pnfsds.c
> index b97a288..b585ef8 100644
> --- a/fs/nfsd/nfs4pnfsds.c
> +++ b/fs/nfsd/nfs4pnfsds.c
> @@ -201,7 +201,7 @@ alloc_init_mds_id(struct pnfs_get_state *gsp)
> INIT_LIST_HEAD(&mdp->di_hash);
> INIT_LIST_HEAD(&mdp->di_mdsclid);
> list_add(&mdp->di_hash, &mds_id_tbl);
> - mdp->di_mdsid = gsp->devid;
> + mdp->di_mdsid = gsp->dsid;
> mdp->di_mdsboot = 0;
> return mdp;
> }
> @@ -215,7 +215,7 @@ alloc_init_ds_clientid(struct pnfs_get_state *gsp)
>
> dprintk("pNFSD: %s\n", __func__);
>
> - mdp = find_pnfs_mds_id(gsp->devid);
> + mdp = find_pnfs_mds_id(gsp->dsid);
> if (!mdp)
> mdp = alloc_init_mds_id(gsp);
> if (!mdp)
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 88edf0c..8cc6a14 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1201,9 +1201,9 @@ nfsd4_getdevinfo(struct svc_rqst *rqstp,
> struct svc_fh *current_fh = &cstate->current_fh;
> int status;
>
> - dprintk("%s: type %u dev_id %u maxcnt %u\n",
> - __func__, gdp->gd_type, gdp->gd_devid,
> - gdp->gd_maxcount);
> + dprintk("%s: type %u dev_id %llx:%llx maxcnt %u\n",
> + __func__, gdp->gd_type, gdp->gd_devid.pnfs_fsid,
> + gdp->gd_devid.pnfs_devid, gdp->gd_maxcount);
>
> status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
> if (status) {
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 232e7aa..abcb640 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -4288,7 +4288,7 @@ release_pnfs_ds_dev_list(struct nfs4_stateid *stp)
> }
>
> static int
> -nfs4_add_pnfs_ds_dev(struct nfs4_stateid *stp, u32 devid)
> +nfs4_add_pnfs_ds_dev(struct nfs4_stateid *stp, u32 dsid)
> {
> struct pnfs_ds_dev_entry *ddp;
>
> @@ -4298,7 +4298,7 @@ nfs4_add_pnfs_ds_dev(struct nfs4_stateid *stp, u32 devid)
>
> INIT_LIST_HEAD(&ddp->dd_dev_entry);
> list_add(&ddp->dd_dev_entry, &stp->st_pnfs_ds_id);
> - ddp->dd_devid = devid;
> + ddp->dd_dsid = dsid;
> return 0;
> }
>
> @@ -4754,7 +4754,7 @@ nfs4_pnfs_cb_get_state(struct super_block *sb, struct pnfs_get_state *arg)
> /* XXX ANDROS: marc removed nfs4_check_fh - how come? */
>
> /* arg->devid is the Data server id, set by the cluster fs */
> - status = nfs4_add_pnfs_ds_dev(stp, arg->devid);
> + status = nfs4_add_pnfs_ds_dev(stp, arg->dsid);
> if (status)
> goto out;
>
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 32b5a55..c1e9b45 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1250,20 +1250,14 @@ nfsd4_decode_getdevlist(struct nfsd4_compoundargs *argp,
> DECODE_TAIL;
> }
>
> -/* GETDEVICEINFO: minorversion1-13.txt
> -u32 pnfs_deviceid4 device_id;
> -u32 pnfs_layouttype4 layout_type;
> -u32 count4 maxcount;
> -
> -*/
> - static int
> +static int
> nfsd4_decode_getdevinfo(struct nfsd4_compoundargs *argp,
> struct nfsd4_pnfs_getdevinfo *gdev)
> {
> DECODE_HEAD;
>
> - READ_BUF(12);
> - READ32(gdev->gd_devid);
> + READ_BUF(8 + sizeof(deviceid_t));
> + COPYMEM(&gdev->gd_devid, sizeof(deviceid_t));
> READ32(gdev->gd_type);
> READ32(gdev->gd_maxcount);
>
> @@ -3222,8 +3216,12 @@ nfsd4_encode_devlist_iterator(struct nfsd4_compoundres *resp,
> goto out;
>
> /* Encode device id and layout type */
> - RESERVE_SPACE(8);
> - WRITE32(iter_arg.devid);
> + RESERVE_SPACE(4 + sizeof(deviceid_t));
> + /* TODO: Need to encode an identifier that uniquely
> + * identifies the export. (fsid)
> + */
> + WRITE64(0L); /* devid major */
> + WRITE64(iter_arg.devid); /* devid minor */
>
Isn't there a problem WRT endianess here?
you encoded/decoded the deviceid above using {WRITE,COPY}MEM
which retains the structure order, while here you're encoding the minor part
of the devid here in network order. Is iter_arg.devid in host order and
deviceid_t members already in network order?
It'd be easier if everything is put on the wire in canonical netorder so
instead
of WRITEMEM you could use WRITE64(pnfs_fsid); WRITE64(pnfs_devid)
> WRITE32(iter_arg.type);
> ADJUST_ARGS();
> *bytes_written += 8;
> @@ -3236,18 +3234,20 @@ nfsd4_encode_devlist_iterator(struct nfsd4_compoundres *resp,
>
> /* Set dev info arguments */
> info_arg.type = gdevl->gd_type;
> - info_arg.devid = iter_arg.devid;
> + info_arg.devid.di_opaque.major = 0;
> + info_arg.devid.di_opaque.minor = iter_arg.devid;
>
> /* set xdr info */
> info_arg.xdr.p = resp->p;
> info_arg.xdr.end = resp->end;
> info_arg.xdr.maxcount = maxcount;
>
> - dprintk("%s: pre get_device_info type %u, mxcnt %u,devid %u\n",
> + dprintk("%s: pre gdi type %u, mxcnt %u,devid %llx:%llx\n",
> __func__,
> info_arg.type,
> info_arg.xdr.maxcount,
> - info_arg.devid);
> + info_arg.devid.di_opaque.major,
> + info_arg.devid.di_opaque.minor);
> nfserr = sb->s_export_op->get_device_info(sb, &info_arg);
> dprintk("%s: post get_device_info err %d bytes_wr %u\n",
> __func__,
> @@ -3414,7 +3414,8 @@ nfsd4_encode_getdevinfo(struct nfsd4_compoundres *resp,
>
> /* Set layout type and id of device to encode */
> args.type = gdev->gd_type;
> - args.devid = gdev->gd_devid;
> + args.devid.pnfs_fsid = gdev->gd_devid.pnfs_fsid;
> + args.devid.pnfs_devid = gdev->gd_devid.pnfs_devid;
>
> /* Set xdr info so file system can encode device */
> args.xdr.p = resp->p;
> diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
> index 2f4c704..6048e7b 100644
> --- a/include/linux/exportfs.h
> +++ b/include/linux/exportfs.h
> @@ -37,6 +37,18 @@ enum fid_type {
>
> #if defined(CONFIG_PNFS)
>
> +typedef struct {
> + uint64_t major; /* fsid */
> + uint64_t minor; /* deviceid */
> +} deviceid_opaque_t;
> +
> +typedef struct {
> + deviceid_opaque_t di_opaque;
> +} deviceid_t;
>
The opaque typedef seems a bit extraneous...
Is there a use case for it?
If you just want to encapsulate it you could've defined it inline like this:
typedef union {
struct {
uint64_t major;
uint64_t minor;
} di_opaque;
unsigned char data[...SIZE];
} deviceid_t;
note that the members are defined as in host order assuming they will
be xdr xcoded using {WRITE,READ}64.
If you want to use {WRITE,COPY}MEM instead, these should be __be64.
> +
> +#define pnfs_fsid di_opaque.major
> +#define pnfs_devid di_opaque.minor
>
you defined 2 levels of syntactic indirection to get to this fields
(a macro, and then a structure member)
Who are you trying to hide this information from? :-)
It's stays in the family :)
I think it's be simpler iif the structure is defined in a straight forward
manner:
typedef struct {
uint64_t fsid; (or expid if it's not really a fsid but rather a
handle identifying the export)
uint64_t devid;
} pnfs_deviceid_t;
> +
> /* XDR stream arguments and results. Exported file system uses this
> * struct to encode information and return how many bytes were encoded.
> */
> @@ -57,22 +69,23 @@ typedef int (*pnfs_encodedev_t)(struct pnfs_xdr_info *xdr, void *device);
> /* Arguments for get_device_info */
> struct pnfs_devinfo_arg {
> u32 type;
> - u32 devid;
> + deviceid_t devid;
> struct pnfs_xdr_info xdr;
> pnfs_encodedev_t func;
> };
>
> /* Used by get_device_iter to retrieve all available devices.
> * Args:
> - * gld_type - layout type
> - * gld_cookie/verf - index and verifier of current list item
> - * gld_devid - output device id
> + * type - layout type
> + * cookie/verf - index and verifier of current list item
> + * export_id - Minor part of deviceid_t
> + * eof - end of file?
> */
> struct pnfs_deviter_arg {
> u32 type; /* request */
> u64 cookie; /* request/response */
> u64 verf; /* request/response */
> - u32 devid; /* response */
> + u64 devid; /* response */
> u32 eof; /* response */
> };
>
> @@ -86,8 +99,14 @@ struct nfsd4_layout_seg {
>
> /* Used by layout_get to encode layout (loc_body var in spec)
> * Args:
> - * xdr - xdr stream
> - * layout - pointer to layout to be encoded
> + * minlength - min number of accessible bytes given by layout
> + * func - per layout encoding function
> + * export_id - Major part of deviceid_t. File system uses this
> + * to build the deviceid returned in the layout.
> + * fh - fs can modify the file handle for use on data servers
> + * seg - layout info requested and layout info returned
> + * xdr - xdr info
> + * return_on_close - true if layout to be returned on file close
> * TODO: use common func with dev?
> */
> typedef int (*pnfs_encodelayout_t)(struct pnfs_xdr_info *xdr, void *layout);
> @@ -96,6 +115,7 @@ typedef int (*pnfs_encodelayout_t)(struct pnfs_xdr_info *xdr, void *layout);
> struct pnfs_layoutget_arg {
> u64 minlength; /* request */
> pnfs_encodelayout_t func; /* request */
> + u64 fsid; /* request */
> struct knfsd_fh *fh; /* request/response */
> struct nfsd4_layout_seg seg; /* request/response */
> struct pnfs_xdr_info xdr; /* request/response */
> diff --git a/include/linux/nfsd/nfs4layoutxdr.h b/include/linux/nfsd/nfs4layoutxdr.h
> index 97a7686..1590174 100644
> --- a/include/linux/nfsd/nfs4layoutxdr.h
> +++ b/include/linux/nfsd/nfs4layoutxdr.h
> @@ -39,25 +39,16 @@
> #if defined(CONFIG_PNFSD)
>
> /* Macros from fs/nfsd/nfs4xdr.c */
> -#define ENCODE_HEAD u32 *p
>
> #define WRITE32(n) *p++ = htonl(n)
> #define WRITE64(n) do { \
> *p++ = htonl((u32)((n) >> 32)); \
> *p++ = htonl((u32)(n)); \
> } while (0)
> -#define WRITEMEM(ptr,nbytes) do { \
> - *(p + XDR_QUADLEN(nbytes) - 1) = 0; \
> - memcpy(p, ptr, nbytes); \
> - p += XDR_QUADLEN(nbytes); \
> -} while (0)
>
> -#define RESERVE_SPACE(nbytes) do { \
> - p = resp->p; \
> - BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \
> -} while (0)
> #define ADJUST_ARGS() resp->p = p
>
> +
> /* the nfsd4_pnfs_devlist dev_addr for the file layout type */
> struct pnfs_filelayout_devaddr {
> struct xdr_netobj r_netid;
> @@ -83,7 +74,7 @@ struct pnfs_filelayout_layout {
> u32 lg_commit_through_mds; /* response */
> u64 lg_stripe_unit; /* response */
> u32 lg_first_stripe_index; /* response */
> - u32 device_id; /* response */
> + deviceid_t device_id; /* response */
> u32 lg_fh_length; /* response */
> struct knfsd_fh *lg_fh_list; /* response */
> };
> diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
> index bc794e2..b24b4cf 100644
> --- a/include/linux/nfsd/nfsd4_pnfs.h
> +++ b/include/linux/nfsd/nfsd4_pnfs.h
> @@ -41,6 +41,7 @@
> #include <linux/nfs.h>
> #include <linux/nfs_xdr.h>
> #include <linux/exportfs.h>
> +#include <linux/nfsd/nfs4layoutxdr.h>
>
> /* pNFS structs */
>
> @@ -55,7 +56,7 @@ struct nfsd4_pnfs_getdevlist {
>
> struct nfsd4_pnfs_getdevinfo {
> u32 gd_type; /* request */
> - u32 gd_devid; /* request */
> + deviceid_t gd_devid; /* request */
> u32 gd_maxcount; /* request */
> struct svc_fh *gd_fhp; /* response */
> };
> diff --git a/include/linux/nfsd/pnfsd.h b/include/linux/nfsd/pnfsd.h
> index 8a03fed..2e63781 100644
> --- a/include/linux/nfsd/pnfsd.h
> +++ b/include/linux/nfsd/pnfsd.h
> @@ -42,7 +42,7 @@
>
> /* pNFS Metadata to Data server state communication*/
> struct pnfs_get_state {
> - u32 devid; /* request */
> + u32 dsid; /* request */
> unsigned long ino; /* request */
> stateid_t stid; /* request;response */
> clientid_t clid; /* response */
> diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
> index 67f5b08..8cde9ea 100644
> --- a/include/linux/nfsd/state.h
> +++ b/include/linux/nfsd/state.h
> @@ -389,7 +389,7 @@ struct nfs4_file {
>
> struct pnfs_ds_dev_entry {
> struct list_head dd_dev_entry; /* st_pnfs_ds_id entry */
> - u32 dd_devid;
> + u32 dd_dsid;
> };
> #endif /* CONFIG_PNFSD */
>
>
More information about the pNFS
mailing list