[pnfs] [PATCH 03/18] pnfs: initialize pnfs pageio for write

andros at umich.edu andros at umich.edu
Mon Jan 7 15:47:19 EST 2008


From: Andy Adamson <andros at umich.edu>

In 2.6.18.3, there are several write I/O paths through the client code.

In 2.6.24, there is only one write I/O path through the client code.  The
interface to the nfs page cache has been re-written. It is now shared between
the read and write code paths, and moved entirely to fs/nfs/pagelist.c.
The pageio is setup in the nfs address_space operations, nfs_readpages and
nfs_writepages. We need to determine if we are performing pNFS or MDS I/O
the pageio code so that we coalese correctly.

The read path (nfs_readpages) can calculate the readahead length and use it
to determine I/O threshold.

The write path (nfs_writepages) receives an address_mapping of all the
outstanding writes to the server (possibly via multiple opens).

Add some housekeeping to struct nfs_pageio_descriptor and some write path
code to the pg_test routine to set the pg_bsize to wsize (to MDS) or
ds_wsize (to pNFS) during page coalescing.

Signed-off by: Andy Adamson<andros at umich.edu>
---
 fs/nfs/nfs4filelayout.c  |   15 +++++++++++++--
 fs/nfs/pnfs.c            |   44 ++++++++++++++++++++++++++++++--------------
 fs/nfs/pnfs.h            |    1 +
 fs/nfs/write.c           |    4 ++--
 include/linux/nfs_page.h |    2 ++
 5 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index d23a0e4..cbb05d7 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -628,14 +628,25 @@ filelayout_gather_across_stripes(struct pnfs_mount_type *mountid)
 
 /*
  * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
- * return 1 :  prev and req on same stripe.
- * return 0 :  pref and req on different stripe.
+ *
+ * For writes which come from the flush daemon, set the bsize on the fly.
+ * reads set the bsize in pnfs_pageio_init_read.
+ *
+ * return 1 :  coalesce page
+ * return 0 :  don't coalesce page
  */
 int
 filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req)
 {
 	u32 p_stripe, r_stripe;
 
+	if (!pgio->pg_iswrite)
+		goto boundary;
+
+	if (pgio->pg_bsize != NFS_SERVER(pgio->pg_inode)->ds_wsize && pgio->pg_count > pgio->pg_threshold)
+		pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->ds_wsize;
+
+boundary:
 	if (pgio->pg_boundary == 0)
 		return 1;
 	p_stripe = prev->wb_index << PAGE_CACHE_SHIFT;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index dd78258..0d312de 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -607,35 +607,41 @@ use_page_cache(struct inode *inode)
 		return 0;
 }
 
-/*
- * Ask the layout driver for the request size at which pNFS should be used
- * or standard NFSv4 I/O.  Writing directly to the NFSv4 server can
- * improve performance through its singularity and async behavior to
- * the underlying parallel file system.
- */
-static int
-below_threshold(struct inode *inode, size_t req_size, int iswrite)
+size_t
+pnfs_getthreshold(struct inode *inode, int iswrite)
 {
 	struct nfs_server *nfss = NFS_SERVER(inode);
 	struct nfs_inode *nfsi = NFS_I(inode);
-	ssize_t threshold = -1;
+	ssize_t threshold = 0;
 
 	if (!pnfs_enabled_sb(nfss) ||
 	    !nfss->pnfs_curr_ld->ld_policy_ops)
-		return 0;
+		goto out;
 
 	if (iswrite && nfss->pnfs_curr_ld->ld_policy_ops->get_write_threshold) {
 		threshold = nfss->pnfs_curr_ld->ld_policy_ops->get_write_threshold(nfsi->current_layout, inode);
-		dprintk("%s wthresh: %Zd\n", __FUNCTION__, threshold);
-		goto check;
+		goto out;
 	}
 
 	if (!iswrite && nfss->pnfs_curr_ld->ld_policy_ops->get_read_threshold) {
 		threshold = nfss->pnfs_curr_ld->ld_policy_ops->get_read_threshold(nfsi->current_layout, inode);
-		dprintk("%s rthresh: %Zd\n", __FUNCTION__, threshold);
 	}
+out:
+	return threshold;
+}
 
-check:
+/*
+ * Ask the layout driver for the request size at which pNFS should be used
+ * or standard NFSv4 I/O.  Writing directly to the NFSv4 server can
+ * improve performance through its singularity and async behavior to
+ * the underlying parallel file system.
+ */
+static int
+below_threshold(struct inode *inode, size_t req_size, int iswrite)
+{
+	ssize_t threshold;
+ 
+	threshold = pnfs_getthreshold(inode, iswrite);
 	if ((ssize_t)req_size <= threshold)
 		return 1;
 	else
@@ -699,6 +705,8 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 	loff_t loff;
 	int status = 0;
 
+	pgio->pg_threshold = 0;
+	pgio->pg_iswrite = 0;
 	pgio->pg_boundary = 0;
 	pgio->pg_test = NULL;
 
@@ -723,6 +731,14 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 	}
 }
 
+void
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode * inode)
+{
+	pgio->pg_iswrite = 1;
+	pgio->pg_threshold = pnfs_getthreshold(inode, 1);
+	pgio->pg_boundary = pnfs_getboundary(inode);
+	pnfs_set_pg_test(inode, pgio);
+}
 
 /* This is utilized in the paging system to determine if
  * it should use the NFSv4 or pNFS read path.
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 72fec10..d67787a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -56,6 +56,7 @@ void pnfs_readpage_result_norpc(struct rpc_task *task, void *calldata);
 void pnfs_writeback_done_norpc(struct rpc_task *, void *);
 void pnfs_commit_done_norpc(struct rpc_task *, void *);
 void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, struct nfs_open_context *, struct list_head *, size_t *);
+void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
 
 #endif /* CONFIG_PNFS */
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 31f2745..c45eeeb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -23,6 +23,7 @@
 #include <asm/uaccess.h>
 #if defined(CONFIG_PNFS)
 #include <linux/module.h>
+#include "pnfs.h"
 #endif
 
 #include "delegation.h"
@@ -975,8 +976,7 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
 	int wsize = NFS_SERVER(inode)->wsize;
 
 #ifdef CONFIG_PNFS
-	pgio->pg_boundary = 0;
-	pgio->pg_test = NULL;
+	pnfs_pageio_init_write(pgio, inode);
 #endif /* CONFIG_PNFS */
 
 	if (wsize < PAGE_CACHE_SIZE)
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 49a5de1..fe82716 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -62,6 +62,8 @@ struct nfs_pageio_descriptor {
 	int 			pg_ioflags;
 	int			pg_error;
 #ifdef CONFIG_PNFS
+	size_t			pg_threshold;
+	int			pg_iswrite;
 	int			pg_boundary;
 	int			(*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
 #endif /* CONFIG_PNFS */
-- 
1.5.0.2



More information about the pNFS mailing list