[pnfs] [PATCH 03/18] pnfs: initialize pnfs pageio for write
andros at umich.edu
andros at umich.edu
Mon Jan 7 15:47:19 EST 2008
From: Andy Adamson <andros at umich.edu>
In 2.6.18.3, there are several write I/O paths through the client code.
In 2.6.24, there is only one write I/O path through the client code. The
interface to the nfs page cache has been re-written. It is now shared between
the read and write code paths, and moved entirely to fs/nfs/pagelist.c.
The pageio is setup in the nfs address_space operations, nfs_readpages and
nfs_writepages. We need to determine if we are performing pNFS or MDS I/O
the pageio code so that we coalese correctly.
The read path (nfs_readpages) can calculate the readahead length and use it
to determine I/O threshold.
The write path (nfs_writepages) receives an address_mapping of all the
outstanding writes to the server (possibly via multiple opens).
Add some housekeeping to struct nfs_pageio_descriptor and some write path
code to the pg_test routine to set the pg_bsize to wsize (to MDS) or
ds_wsize (to pNFS) during page coalescing.
Signed-off by: Andy Adamson<andros at umich.edu>
---
fs/nfs/nfs4filelayout.c | 15 +++++++++++++--
fs/nfs/pnfs.c | 44 ++++++++++++++++++++++++++++++--------------
fs/nfs/pnfs.h | 1 +
fs/nfs/write.c | 4 ++--
include/linux/nfs_page.h | 2 ++
5 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index d23a0e4..cbb05d7 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -628,14 +628,25 @@ filelayout_gather_across_stripes(struct pnfs_mount_type *mountid)
/*
* filelayout_pg_test(). Called by nfs_can_coalesce_requests()
- * return 1 : prev and req on same stripe.
- * return 0 : pref and req on different stripe.
+ *
+ * For writes which come from the flush daemon, set the bsize on the fly.
+ * reads set the bsize in pnfs_pageio_init_read.
+ *
+ * return 1 : coalesce page
+ * return 0 : don't coalesce page
*/
int
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req)
{
u32 p_stripe, r_stripe;
+ if (!pgio->pg_iswrite)
+ goto boundary;
+
+ if (pgio->pg_bsize != NFS_SERVER(pgio->pg_inode)->ds_wsize && pgio->pg_count > pgio->pg_threshold)
+ pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->ds_wsize;
+
+boundary:
if (pgio->pg_boundary == 0)
return 1;
p_stripe = prev->wb_index << PAGE_CACHE_SHIFT;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index dd78258..0d312de 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -607,35 +607,41 @@ use_page_cache(struct inode *inode)
return 0;
}
-/*
- * Ask the layout driver for the request size at which pNFS should be used
- * or standard NFSv4 I/O. Writing directly to the NFSv4 server can
- * improve performance through its singularity and async behavior to
- * the underlying parallel file system.
- */
-static int
-below_threshold(struct inode *inode, size_t req_size, int iswrite)
+size_t
+pnfs_getthreshold(struct inode *inode, int iswrite)
{
struct nfs_server *nfss = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
- ssize_t threshold = -1;
+ ssize_t threshold = 0;
if (!pnfs_enabled_sb(nfss) ||
!nfss->pnfs_curr_ld->ld_policy_ops)
- return 0;
+ goto out;
if (iswrite && nfss->pnfs_curr_ld->ld_policy_ops->get_write_threshold) {
threshold = nfss->pnfs_curr_ld->ld_policy_ops->get_write_threshold(nfsi->current_layout, inode);
- dprintk("%s wthresh: %Zd\n", __FUNCTION__, threshold);
- goto check;
+ goto out;
}
if (!iswrite && nfss->pnfs_curr_ld->ld_policy_ops->get_read_threshold) {
threshold = nfss->pnfs_curr_ld->ld_policy_ops->get_read_threshold(nfsi->current_layout, inode);
- dprintk("%s rthresh: %Zd\n", __FUNCTION__, threshold);
}
+out:
+ return threshold;
+}
-check:
+/*
+ * Ask the layout driver for the request size at which pNFS should be used
+ * or standard NFSv4 I/O. Writing directly to the NFSv4 server can
+ * improve performance through its singularity and async behavior to
+ * the underlying parallel file system.
+ */
+static int
+below_threshold(struct inode *inode, size_t req_size, int iswrite)
+{
+ ssize_t threshold;
+
+ threshold = pnfs_getthreshold(inode, iswrite);
if ((ssize_t)req_size <= threshold)
return 1;
else
@@ -699,6 +705,8 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
loff_t loff;
int status = 0;
+ pgio->pg_threshold = 0;
+ pgio->pg_iswrite = 0;
pgio->pg_boundary = 0;
pgio->pg_test = NULL;
@@ -723,6 +731,14 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
}
}
+void
+pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode * inode)
+{
+ pgio->pg_iswrite = 1;
+ pgio->pg_threshold = pnfs_getthreshold(inode, 1);
+ pgio->pg_boundary = pnfs_getboundary(inode);
+ pnfs_set_pg_test(inode, pgio);
+}
/* This is utilized in the paging system to determine if
* it should use the NFSv4 or pNFS read path.
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 72fec10..d67787a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -56,6 +56,7 @@ void pnfs_readpage_result_norpc(struct rpc_task *task, void *calldata);
void pnfs_writeback_done_norpc(struct rpc_task *, void *);
void pnfs_commit_done_norpc(struct rpc_task *, void *);
void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, struct nfs_open_context *, struct list_head *, size_t *);
+void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
#endif /* CONFIG_PNFS */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 31f2745..c45eeeb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -23,6 +23,7 @@
#include <asm/uaccess.h>
#if defined(CONFIG_PNFS)
#include <linux/module.h>
+#include "pnfs.h"
#endif
#include "delegation.h"
@@ -975,8 +976,7 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
int wsize = NFS_SERVER(inode)->wsize;
#ifdef CONFIG_PNFS
- pgio->pg_boundary = 0;
- pgio->pg_test = NULL;
+ pnfs_pageio_init_write(pgio, inode);
#endif /* CONFIG_PNFS */
if (wsize < PAGE_CACHE_SIZE)
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 49a5de1..fe82716 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -62,6 +62,8 @@ struct nfs_pageio_descriptor {
int pg_ioflags;
int pg_error;
#ifdef CONFIG_PNFS
+ size_t pg_threshold;
+ int pg_iswrite;
int pg_boundary;
int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
#endif /* CONFIG_PNFS */
--
1.5.0.2
More information about the pNFS
mailing list