[pnfs] [PATCH 01/18] pnfs: initialize pageio for read

andros at umich.edu andros at umich.edu
Mon Jan 7 15:47:17 EST 2008


From: Andy Adamson <andros at umich.edu>

The page cache can already have data in the readahead range. The nfs_readpages
pagelist will 'skip' these pages.  In this case using the number of pages in
the pagelist to calculate the layout range will be incorrect.

Use the first and last page in the pagelist to calculate the layout request
range.

The pageio coalesce code will create requests to fill the holes of data not
present in the page cache.

So! We could delay setting the rsize (and whether to use pNFS or RPC to MDS)
until the coalesce code, sending requests below the threshold to the MDS....

This code uses the whole readahead range for the threshold decision, as is
done in 2.6.18.3.

This patch calculates the readahead range, tries to get a layout, and if
successful, sets the coalesce size to ds_rsize.

Signed-off by: Andy Adamson<andros at umich.edu>
---
 fs/nfs/pnfs.c |   84 +++++++++++++++++++++++++++++++-------------------------
 fs/nfs/pnfs.h |    2 +-
 fs/nfs/read.c |    3 +-
 3 files changed, 48 insertions(+), 41 deletions(-)

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index d49f9ce..dd78258 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -643,76 +643,84 @@ check:
 }
 
 void
+readahead_range(struct inode * inode, struct list_head *pages, loff_t *offset, size_t *count)
+{
+	struct page *first, *last;
+	loff_t foff, i_size = i_size_read(inode);
+	pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+	size_t range;
+
+
+	first = list_entry((pages)->prev, struct page, lru);
+	last = list_entry((pages)->next, struct page, lru);
+
+	foff = first->index << PAGE_CACHE_SHIFT;
+
+	range = (last->index - first->index) * PAGE_CACHE_SIZE;
+	if (last->index == end_index)
+		range += ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
+	else
+		range += PAGE_CACHE_SIZE;
+	dprintk("%s foff %lu, range %Zu\n", __func__,  (unsigned long)foff,
+                                        range);
+	*offset = foff;
+	*count = range;
+}
+
+void
 pnfs_set_pg_test(struct inode *inode, struct nfs_pageio_descriptor *pgio)
 {
 	struct pnfs_layout_type *laytype;
-	struct layoutdriver_policy_operations *policy_ops;
 	struct pnfs_layoutdriver_type *ld;
 
+	pgio->pg_test = NULL;
+
 	laytype = NFS_I(inode)->current_layout;
 	ld = NFS_SERVER(inode)->pnfs_curr_ld;
 	if (!pnfs_enabled_sb(NFS_SERVER(inode)) || !laytype)
 		return;
-	policy_ops = ld->ld_policy_ops;
 
-	if (policy_ops && policy_ops->pg_test) {
-		pgio->pg_test = policy_ops->pg_test;
-	} else {
-		pgio->pg_test = NULL;
-	}
+	if (ld->ld_policy_ops && ld->ld_policy_ops->pg_test)
+		pgio->pg_test = ld->ld_policy_ops->pg_test;
 }
 
 /*
  * rsize is already set by caller to MDS rsize.
  */
 void
-pnfs_set_ds_rsize(struct inode *inode,
+pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
+		  struct inode *inode,
 		  struct nfs_open_context *ctx,
 		  struct list_head *pages,
-		  unsigned long nr_pages,
-		  loff_t offset,
-		  size_t *rsize,
-		  struct nfs_pageio_descriptor *pgio)
+		  size_t *rsize)
 {
 	struct nfs_server *nfss = NFS_SERVER(inode);
-	loff_t end_offset, i_size;
-	size_t count;
+	size_t count = 0;
+	loff_t loff;
 	int status = 0;
 
-	dprintk("--> %s inode %p ctx %p pages %p nr_pages %lu offset %lu\n",
-		__func__, inode, ctx, pages, nr_pages,(unsigned long)offset);
-
 	pgio->pg_boundary = 0;
-	pgio->pg_test = 0;
+	pgio->pg_test = NULL;
 
 	if (!pnfs_enabled_sb(nfss))
 		return;
 
 	/* Calculate the total read-ahead count */
-	end_offset = (offset & PAGE_CACHE_MASK) + nr_pages * PAGE_CACHE_SIZE;
-	i_size = i_size_read(inode);
-	if (end_offset > i_size)
-		end_offset = i_size;
-	count = end_offset - offset;
+	readahead_range(inode, pages, &loff, &count);
 
-	dprintk("%s count %ld\n", __func__,(long int)count);
-
-
-	status = virtual_update_layout(inode, ctx, count,
-						offset, IOMODE_READ);
-	dprintk("%s *rsize %Zd virt update returned %d\n",
+	if (count > 0 && !below_threshold(inode, count, 0)) {
+		status = virtual_update_layout(inode, ctx, count,
+						loff, IOMODE_READ);
+		dprintk("%s *rsize %Zd virt update returned %d\n",
 					__func__, *rsize, status);
+		if (status != 0)
+			return;
 
-	if (status == 0 && count > 0 && !below_threshold(inode, count, 0))
 		*rsize = NFS_SERVER(inode)->ds_rsize;
-
-	/* boundary set => gather pages by stripe => need pg_test */
-	pgio->pg_boundary = pnfs_getboundary(inode);
-	if (pgio->pg_boundary)
-		pnfs_set_pg_test(inode, pgio);
-
-	dprintk("<-- %s pg_boundary %d, pg_test %p\n", __func__,
-			pgio->pg_boundary, pgio->pg_test);
+		pgio->pg_boundary = pnfs_getboundary(inode);
+		if (pgio->pg_boundary)
+			pnfs_set_pg_test(inode, pgio);
+	}
 }
 
 
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index bb7b091..72fec10 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -55,7 +55,7 @@ int pnfs_wpages(struct inode *);
 void pnfs_readpage_result_norpc(struct rpc_task *task, void *calldata);
 void pnfs_writeback_done_norpc(struct rpc_task *, void *);
 void pnfs_commit_done_norpc(struct rpc_task *, void *);
-void pnfs_set_ds_rsize(struct inode *, struct nfs_open_context *, struct list_head *, unsigned long, loff_t, size_t *, struct nfs_pageio_descriptor *);
+void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, struct nfs_open_context *, struct list_head *, size_t *);
 
 #endif /* CONFIG_PNFS */
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index b92eb98..19aa8c8 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -650,8 +650,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	} else
 		desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
 #ifdef CONFIG_PNFS
-	pnfs_set_ds_rsize(inode, desc.ctx, pages, nr_pages, filp->f_pos,
-			  &rsize, &pgio);
+	pnfs_pageio_init_read(&pgio, inode, desc.ctx, pages, &rsize);
 #endif /* CONFIG_PNFS */
 	if (rsize < PAGE_CACHE_SIZE)
 		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
-- 
1.5.0.2



More information about the pNFS mailing list