[PATCH] layout get changes for nfsd layout cache

Benny Halevy bhalevy at ns1.bhalevy.com
Mon May 7 11:19:34 EDT 2007


---
 fs/nfsd/nfs4proc.c              |   23 +++-
 fs/nfsd/nfs4state.c             |  349 ++++++++++++++++++++++-----------------
 include/linux/fs.h              |    2 +
 include/linux/nfsd/nfsd4_pnfs.h |    8 +-
 include/linux/nfsd/state.h      |   18 +--
 5 files changed, 228 insertions(+), 172 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 038c5cf..8ae3291 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -812,11 +812,30 @@ nfsd4_layoutget( struct svc_rqst *rqstp, struct current_session *cses, struct sv
 		goto out;
 	}
 
+	status = nfserr_layoutunavailable;
+	if (!sb->s_export_op->layout_get) {
+		dprintk("pNFS %s: layout_get not implemented for layout type %d\n", __FUNCTION__, type);
+		goto out;
+	}
+
+	status = nfserr_inval;
+	if (lgp->lg_seg.iomode != IOMODE_READ &&
+	    lgp->lg_seg.iomode != IOMODE_RW &&
+	    lgp->lg_seg.iomode != IOMODE_ANY) {
+		dprintk("pNFS %s: invalid iomode %d\n", __FUNCTION__,
+		        lgp->lg_seg.iomode);
+		goto out;
+	}
+
+	status = nfserr_badiomode;
+	if (lgp->lg_seg.iomode == IOMODE_ANY) {
+		dprintk("pNFS %s: IOMODE_ANY is not allowed\n", __FUNCTION__);
+		goto out;
+	}
+
 	/* set the export ops for encoding the devaddr */
 	lgp->lg_ops = sb->s_export_op;
 
-        /* Set file handle and clientid*/
-	memcpy(&lgp->lg_fh, &current_fh->fh_handle, sizeof(struct knfsd_fh));
 	lgp->lg_seg.clientid  = *(u64 *)&cses->cs_sid.clientid;
 
         status = nfs4_pnfs_get_layout(sb, current_fh, lgp);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7b8f8e2..cd5c212 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -88,10 +88,9 @@ static void nfs4_set_recdir(char *recdir);
  * Layout state - NFSv4.1 pNFS
  */
 static struct list_head layout_recall_lru;
-static int num_layouts;
 static kmem_cache_t *pnfs_layout_slab = NULL;
+static void destroy_layout(struct nfs4_layout *lp);
 static void release_pnfs_ds_dev_list(struct nfs4_stateid *stp);
-static void nfs4_put_layout(struct nfs4_layout *lp);
 #endif /* CONFIG_PNFS */
 
 
@@ -301,20 +300,6 @@ unhash_delegation(struct nfs4_delegation *dp)
 	nfs4_put_delegation(dp);
 }
 
-#if defined(CONFIG_PNFS)
-/* Called under the state lock. */
-static void
-unhash_layout(struct nfs4_layout *lp)
-{
-	list_del_init(&lp->lo_perfile);
-	list_del_init(&lp->lo_perclnt);
-	spin_lock(&recall_lock);
-	list_del_init(&lp->lo_recall_lru);
-	spin_unlock(&recall_lock);
-	nfs4_put_layout(lp);
-}
-#endif
-
 /*
  * SETCLIENTID state
  */
@@ -634,7 +619,7 @@ expire_client(struct nfs4_client *clp)
 	while (!list_empty(&reaplist)) {
 		lp = list_entry(reaplist.next, struct nfs4_layout, lo_recall_lru);
 		list_del_init(&lp->lo_recall_lru);
-		unhash_layout(lp);
+		destroy_layout(lp);
 	}
 
 #endif
@@ -1695,6 +1680,18 @@ find_file(struct inode *ino)
 	return NULL;
 }
 
+static struct nfs4_file *
+find_alloc_file(struct inode *ino, struct svc_fh *current_fh)
+{
+	struct nfs4_file *fp;
+
+	fp = find_file(ino);
+	if (fp)
+		return fp;
+
+	return alloc_init_file(ino, current_fh);
+}
+
 static int access_valid(u32 x)
 {
 	return (x > 0 && x < 4);
@@ -3868,70 +3865,62 @@ nfs4_reset_lease(time_t leasetime)
 }
 
 #if defined(CONFIG_PNFS)
-
-/* Called under the state lock. */
-static void
-pnfs_unhash_layout(struct nfs4_layout *lp)
+static inline struct nfs4_layout *
+alloc_layout(void)
 {
-	list_del_init(&lp->lo_perfile);
-	list_del_init(&lp->lo_perclnt);
+	return kmem_cache_alloc(pnfs_layout_slab, GFP_KERNEL);
 }
 
-static void
-free_nfs4_layout(struct kref *kref)
+static inline void
+free_layout(struct nfs4_layout *lp)
 {
-	struct nfs4_layout *lp;
-	struct nfs4_file *fp;
-	struct nfsd4_pnfs_layoutreturn lr;
-	struct inode *ino;
-
-	lp = container_of(kref, struct nfs4_layout, lo_ref);
-	pnfs_unhash_layout(lp);
-
-	fp = lp->lo_file;
-	ino = fp->fi_inode;
-
-	dprintk("pNFS %s: lp %p fp %p ino %p\n", __FUNCTION__, lp, fp, ino);
-	lr.lr_reclaim = 0;
-	lr.lr_seg.layout_type = lp->lo_layout_type;
-	lr.lr_seg.iomode = lp->lo_iomode;
-	lr.lr_return_type = RETURN_FILE;
-	lr.lr_seg.offset = lp->lo_offset;
-	lr.lr_seg.length = lp->lo_length;
-	lr.lr_flags = 0; /* last return */
+	kmem_cache_free(pnfs_layout_slab, lp);
+}
 
-//??? recall layout
+static struct nfs4_layout *
+alloc_init_layout(struct nfs4_layout *lp,
+                  struct nfs4_file *fp,
+                  struct nfs4_client *clp,
+                  struct svc_fh *current_fh,
+                  struct nfsd4_pnfs_layoutget *lg)
+{
+	dprintk("NFSD %s\n", __FUNCTION__);
+	if (!lp) {
+		lp = alloc_layout();
+		if (!lp)
+			return NULL;
+	}
 
-	if (ino->i_sb->s_export_op->layout_return)
-		ino->i_sb->s_export_op->layout_return(ino, &lr);
+	dprintk("pNFS %s: lp %p clp %p fp %p ino %p\n", __FUNCTION__,
+	        lp, clp, fp, fp->fi_inode);
 
-	kmem_cache_free(pnfs_layout_slab, lp);
-	put_nfs4_file(fp);
+	get_nfs4_client(clp);
+	get_nfs4_file(fp);
+	lp->lo_client = clp;
+	lp->lo_file = fp;
+	memcpy(&lp->lo_seg, &lg->lg_seg, sizeof(lp->lo_seg));
+	list_add_tail(&lp->lo_perclnt, &clp->cl_layouts);
+	list_add_tail(&lp->lo_perfile, &fp->fi_layouts);
+	dprintk("NFSD %s return %p\n", __FUNCTION__, lp);
+	return lp;
 }
 
 static void
-nfs4_put_layout(struct nfs4_layout *lp)
+destroy_layout(struct nfs4_layout *lp)
 {
-	dprintk("pNFS %s: lo_ref %d fi_ref %d\n", __FUNCTION__,
-				atomic_read(&lp->lo_ref.refcount),
-				atomic_read(&lp->lo_file->fi_ref.refcount));
-	kref_put(&lp->lo_ref, free_nfs4_layout);
-}
+	struct nfs4_client *clp;
+	struct nfs4_file *fp;
 
-static inline void
-get_nfs4_layout(struct nfs4_layout *lp)
-{
-	kref_get(&lp->lo_ref);
-	dprintk("pNFS %s: lo_ref %d fi_ref %d\n", __FUNCTION__,
-				atomic_read(&lp->lo_ref.refcount),
-				atomic_read(&lp->lo_file->fi_ref.refcount));
-}
+	list_del(&lp->lo_perclnt);
+	list_del(&lp->lo_perfile);
+	clp = lp->lo_client;
+	fp = lp->lo_file;
+	dprintk("pNFS %s: lp %p clp %p fp %p ino %p\n", __FUNCTION__,
+	        lp, clp, fp, fp->fi_inode);
 
-static void
-pnfs_hash_layoutget(struct nfs4_layout *lp)
-{
-	list_add(&lp->lo_perfile, &lp->lo_file->fi_layouts);
-	list_add(&lp->lo_perclnt, &lp->lo_client->cl_layouts);
+	kmem_cache_free(pnfs_layout_slab, lp);
+	put_nfs4_client(clp);
+	put_nfs4_file(fp);
 }
 
 static void
@@ -3940,43 +3929,6 @@ pnfs_hash_cb_layout(struct nfs4_layout *lp)
 	list_add(&lp->lo_recall_lru, &layout_recall_lru);
 }
 
-static struct nfs4_layout *
-pnfs_alloc_init_layout(struct nfs4_file *fp, struct nfs4_client *clp, struct svc_fh *current_fh, struct nfsd4_pnfs_layoutget *lg)
-{
-	struct nfs4_layout *lp;
-	struct nfs4_callback *cb = &clp->cl_callback;
-	struct inode *ino = current_fh->fh_dentry->d_inode;
-
-	dprintk("NFSD alloc_init_layout\n");
-	lp = kmem_cache_alloc(pnfs_layout_slab, GFP_KERNEL);
-	if (lp == NULL)
-		return lp;
-
-	kref_init(&lp->lo_ref);
-	INIT_LIST_HEAD(&lp->lo_perfile);
-	INIT_LIST_HEAD(&lp->lo_perclnt);
-	INIT_LIST_HEAD(&lp->lo_recall_lru);
-
-	lp->lo_file = fp;
-	lp->lo_time = 0;
-	memset(&lp->lo_cb_layout, 0, sizeof(struct nfs4_cb_layout));
-	lp->lo_client = clp;
-	lp->lo_sb = ino->i_sb;
-	lp->lo_ident = cb->cb_ident;
-	lp->lo_fhlen = current_fh->fh_handle.fh_size;
-	memcpy(lp->lo_fhval, &current_fh->fh_handle.fh_base,
-		        current_fh->fh_handle.fh_size);
-	lp->lo_layout_type = lg->lg_seg.layout_type;
-	lp->lo_iomode = lg->lg_seg.iomode;
-	lp->lo_offset = lg->lg_seg.offset;
-	lp->lo_length = lg->lg_seg.length;
-	num_layouts++;
-	pnfs_hash_layoutget(lp);
-	get_nfs4_file(lp->lo_file);
-	dprintk("NFSD alloc_init_layout exit\n");
-	return lp;
-}
-
 /*
  * get_state() and cb_get_state() are
  */
@@ -4008,6 +3960,89 @@ nfs4_add_pnfs_ds_dev(struct nfs4_stateid *stp, u32 devid)
 	return 0;
 }
 
+/*
+ * are two octet ranges overlapping?
+ * start1            last1
+ *   |-----------------|
+ *                start2            last2
+ *                  |----------------|
+ */
+static inline int
+lo_seg_overlapping(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
+{
+	u64 start1 = l1->offset;
+	u64 last1 = last_byte_offset(start1, l1->length);
+	u64 start2 = l2->offset;
+	u64 last2 = last_byte_offset(start2, l2->length);
+
+	/* is last1 == start2 there's a single byte overlap */
+	return (last2 >= start1) && (last1 >= start2);
+}
+
+static inline int
+same_fsid(struct nfs4_fsid *fsid, struct svc_fh *current_fh)
+{
+	return fsid->major == current_fh->fh_export->ex_fsid;
+}
+
+/*
+ * are two octet ranges overlapping or adjacent?
+ */
+static inline int
+lo_seg_mergeable(struct nfsd4_layout_seg *l1, struct nfsd4_layout_seg *l2)
+{
+	u64 start1 = l1->offset;
+	u64 end1 = end_offset(start1, l1->length);
+	u64 start2 = l2->offset;
+	u64 end2 = end_offset(start2, l2->length);
+
+	/* is end1 == start2 ranges are adjacent */
+	return (end2 >= start1) && (end1 >= start2);
+}
+
+static void
+extend_layout(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *lg)
+{
+	u64 lo_start = lo->offset;
+	u64 lo_end = end_offset(lo_start, lo->length);
+	u64 lg_start = lg->offset;
+	u64 lg_end = end_offset(lg_start, lg->length);
+
+	/* lo already covers lg? */
+	if (lo_start <= lg_start && lg_end <= lo_end)
+		return;
+
+	/* extend start offset */
+	if (lo_start > lg_start)
+		lo_start = lg_start;
+
+	/* extend end offset */
+	if (lo_end < lg_end)
+		lo_end = lg_end;
+
+	lo->offset = lo_start;
+	lo->length = (lo_end == NFS4_LENGTH_EOF) ?
+			 lo_end : lo_end - lo_start;
+}
+
+static struct nfs4_layout *
+merge_layout(struct nfs4_file *fp, struct nfs4_client *clp,
+             struct nfsd4_pnfs_layoutget *lgp)
+{
+	struct nfs4_layout *lp;
+
+	list_for_each_entry (lp, &fp->fi_layouts, lo_perfile)
+		if (lp->lo_seg.layout_type == lgp->lg_seg.layout_type &&
+		    lp->lo_seg.clientid == lgp->lg_seg.clientid &&
+		    lp->lo_seg.iomode == lgp->lg_seg.iomode &&
+		    lo_seg_mergeable(&lp->lo_seg, &lgp->lg_seg)) {
+			extend_layout(&lp->lo_seg, &lgp->lg_seg);
+			return lp;
+		}
+
+	return NULL;
+}
+
 static struct nfs4_layout *
 find_layout(struct nfs4_file *fp, struct nfs4_client *clp)
 {
@@ -4024,40 +4059,41 @@ find_layout(struct nfs4_file *fp, struct nfs4_client *clp)
 int nfs4_pnfs_get_layout(struct super_block *sb, struct svc_fh *current_fh,
 				struct nfsd4_pnfs_layoutget *lgp)
 {
-	int status = -ENOENT;
+	int status = nfserr_layouttrylater;
 	struct inode *ino = current_fh->fh_dentry->d_inode;
+	int can_merge;
 	struct nfs4_file *fp;
 	struct nfs4_client *clp;
-	struct nfs4_layout *lp;
+	struct nfs4_layout *lp = NULL;
 	struct nfsd4_pnfs_layoutreturn lr;
 
 	dprintk("NFSD: nfs4_pnfs_get_layout\n");
 
-	fp = find_file(ino);
-	if (!fp) {
-		fp = alloc_init_file(ino, current_fh);
-		if (fp == NULL)
-			goto out;
-	}
+	nfs4_lock_state();
+	fp = find_alloc_file(ino, current_fh);
 	clp = find_confirmed_client((clientid_t *)&lgp->lg_seg.clientid);
-	dprintk("pNFS %s: clp %p \n", __FUNCTION__, clp);
-	if (!clp)
+	dprintk("pNFS %s: fp %p clp %p \n", __FUNCTION__, fp, clp);
+	if (!fp || !clp)
 	        goto out;
 
-	lgp->lg_flags = 0;
-	lp = find_layout(fp, clp);
-	if (lp)
-		lgp->lg_flags = 1; /* update layout */
-	if (sb->s_export_op->layout_get) {
-		status = sb->s_export_op->layout_get(current_fh->fh_dentry->d_inode,
-					(void *)lgp);
+	can_merge = sb->s_export_op->can_merge_layouts != NULL &&
+	            sb->s_export_op->can_merge_layouts(lgp->lg_seg.layout_type);
+
+	if (!can_merge || list_empty(&fp->fi_layouts)) {
+		lp = alloc_layout();
+		if (!lp)
+			goto out;
+	}
 
-		dprintk("pNFS %s: status %d type %d maxcount %d \n",
-			__FUNCTION__, status, lgp->lg_seg.layout_type, lgp->lg_mxcnt);
+	BUG_ON(!sb->s_export_op->layout_get);
+	status = sb->s_export_op->layout_get(current_fh->fh_dentry->d_inode,
+				(void *)lgp);
 
+	dprintk("pNFS %s: status %d type %d maxcount %d \n",
+		__FUNCTION__, status, lgp->lg_seg.layout_type, lgp->lg_mxcnt);
+
+	if (status) {
 		switch (status) {
-			case 0:
-				break;
 			case -ENOMEM:
 			case -EAGAIN:
 			case -EINTR:
@@ -4066,38 +4102,47 @@ int nfs4_pnfs_get_layout(struct super_block *sb, struct svc_fh *current_fh,
 			case -ENOENT:
 				status = nfserr_badlayout;
 				break;
+			case -E2BIG:
+				status = nfserr_toosmall;
+				break;
 			default:
 				status = nfserr_layoutunavailable;
 		}
-		if (status)
-			goto out;
-
-		if (!lp) {
-			lp = pnfs_alloc_init_layout(fp, clp, current_fh, lgp);
-		}
-		if (lp) {
-			dprintk("pNFS %s: lp %p\n", __FUNCTION__, lp);
-			goto out;
-		}
-		status = nfserr_layouttrylater;
+		goto out;
+	}
 
-		if (lgp->lg_ops->layout_encode == NULL &&
-				lgp->lg_seg.layout_type == LAYOUT_NFSV4_FILES)
-			filelayout_free_layout(lgp->lg_layout);
-		else
-			lgp->lg_ops->layout_free(lgp->lg_layout);
+	/* can the new layout be merged into an existing one? */
+	if (can_merge && merge_layout(fp, clp, lgp))
+		goto out;
 
-		lr.lr_reclaim = 0;
-		lr.lr_return_type = RETURN_FILE;
-		lr.lr_seg = lgp->lg_seg;
-		if (sb->s_export_op->layout_return)
-			sb->s_export_op->layout_return(ino, &lr);
+	lp = alloc_init_layout(lp, fp, clp, current_fh, lgp);
+	if (lp) {
+		lp = NULL;	/* so it won't get freed */
+		goto out;	/* success! */
 	}
-out:
-	if (fp)
-		put_nfs4_file(fp);
 
-	dprintk("pNFS %s: exit status %d \n", __FUNCTION__, status);
+	status = nfserr_layouttrylater;
+
+	/* free filesystem layout "cookie" */
+	if (lgp->lg_ops->layout_encode != NULL)
+		lgp->lg_ops->layout_free(lgp->lg_layout);
+	else if (lgp->lg_seg.layout_type == LAYOUT_NFSV4_FILES)
+		filelayout_free_layout(lgp->lg_layout);
+
+	/* simulate a layoutreturn for the newly layout */
+	memset(&lr, 0, sizeof(lr));
+	lr.lr_return_type = RETURN_FILE;
+	memcpy(&lr.lr_seg, &lgp->lg_seg, sizeof(lr.lr_seg));
+	lr.lr_flags = LR_FLAG_INTERN;
+	if (sb->s_export_op->layout_return)
+		sb->s_export_op->layout_return(ino, &lr);
+out:
+	if (lp)
+		free_layout(lp);
+	put_nfs4_file(fp);
+	put_nfs4_client(clp);
+	nfs4_unlock_state();
+	dprintk("pNFS %s: lp %p exit status %d\n", __FUNCTION__, lp, status);
 	return status;
 }
 
@@ -4127,7 +4172,7 @@ int nfs4_pnfs_return_layout(struct super_block *sb, struct svc_fh *current_fh,
 	dprintk("pNFS %s: lp %p\n", __FUNCTION__, lp);
 
 	if (lp) {
-		nfs4_put_layout(lp);
+		destroy_layout(lp);
 		status = 0;
 	}
 	put_nfs4_file(fp);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2287c20..d72c974 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1318,6 +1318,8 @@ struct export_operations {
 		 * Arg: layout - file system defined
 		 */
 	void (*layout_free)(void * layout);
+		/* can layout segments be merged for this layout type? */
+	int (*can_merge_layouts) (u32 layout_type);
 		/* pNFS: returns the opaque layout
 		 * Arg: buf - struct nfsd4_pnfs_layoutget
 		 */
diff --git a/include/linux/nfsd/nfsd4_pnfs.h b/include/linux/nfsd/nfsd4_pnfs.h
index f8a472e..c63c5a9 100644
--- a/include/linux/nfsd/nfsd4_pnfs.h
+++ b/include/linux/nfsd/nfsd4_pnfs.h
@@ -83,12 +83,8 @@ struct nfsd4_pnfs_layoutget {
 	u32				lg_signal;	/* request */
 	u64				lg_minlength;	/* request */
 	u32				lg_mxcnt;	/* request */
-	u32				lg_flags;	/* request */
 	struct export_operations	*lg_ops;
 
-        /* only for cluster fs file layout 'struct knfsd_fh' */
-	unsigned char                   lg_fh[NFS_MAXFHSIZE];
-
 	u32				lg_return_on_close; /* response */
 	void				*lg_layout;     /* response callback encoded */
 };
@@ -106,6 +102,10 @@ struct nfsd4_pnfs_layoutcommit {
 	u64                     lc_newsize;     /* response */
 };
 
+enum layoutreturn_flags {
+	LR_FLAG_INTERN = 1 << 0
+};
+
 struct nfsd4_pnfs_layoutreturn {
 	u32                             lr_return_type; /* request */
 	struct nfsd4_layout_seg		lr_seg;		/* request */
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 3ae9221..db0ff4d 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -199,7 +199,7 @@ struct nfs4_client {
 	struct list_head	cl_openowners;
 	struct list_head	cl_delegations;
 #ifdef CONFIG_PNFS
-	struct list_head	cl_layouts;
+	struct list_head	cl_layouts;	/* outstanding layouts */
 #endif /* CONFIG_PNFS */
 	struct list_head	cl_sessions;
 	struct list_head        cl_lru;         /* tail queue */
@@ -226,6 +226,7 @@ struct nfs4_fsid {
 };
 
 #ifdef CONFIG_PNFS
+#include <linux/nfsd/nfsd4_pnfs.h>
 
 struct nfs4_cb_layout {
 	struct super_block	*cbl_sb;
@@ -242,27 +243,16 @@ struct nfs4_cb_layout {
 	u32			cbl_fhval[NFS4_FHSIZE];
 };
 
+/* outstanding layout */
 struct nfs4_layout {
-	struct kref		lo_ref;
 	struct list_head	lo_perfile;    /* hash by f_id */
 	struct list_head	lo_perclnt;    /* hash by clientid */
 	struct list_head	lo_recall_lru; /* when in recall */
 	struct nfs4_file        *lo_file;      /* backpointer */
 	struct nfs4_client      *lo_client;
-	time_t                  lo_time;       /* time recall started */
-	struct nfs4_cb_layout	lo_cb_layout;
+	struct nfsd4_layout_seg lo_seg;
 };
 
-#define lo_clienti      lo_cb_layout.cbl_client
-#define lo_sb           lo_cb_layout.cbl_sb
-#define lo_ident        lo_cb_layout.cbl_ident
-#define lo_fhlen        lo_cb_layout.cbl_fhlen
-#define lo_fhval        lo_cb_layout.cbl_fhval
-#define lo_layout_type  lo_cb_layout.cbl_layout_type
-#define lo_iomode       lo_cb_layout.cbl_iomode
-#define lo_offset       lo_cb_layout.cbl_offset
-#define lo_length       lo_cb_layout.cbl_length
-
 #endif /* CONFIG_PNFS */
 
 /* struct nfs4_client_reset
-- 
1.5.1


--------------060009080805080300090707--


More information about the pNFS mailing list