[pnfs] [PATCH 13/29] pnfs: read path set ds_size

Benny Halevy bhalevy at panasas.com
Wed Jan 2 09:42:38 EST 2008


Here's the full change that I propose

Benny

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 6e446d2..ccb019f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1000,17 +1000,19 @@ void
 pnfs_set_ds_rsize(struct inode *inode,
 		 struct nfs_open_context *ctx,
 		 struct list_head *pages,
+		  unsigned long nr_pages,
 		 loff_t offset,
 		 size_t *rsize,
 		 struct nfs_pageio_descriptor *pgio)
 {
 	struct nfs_server *nfss = NFS_SERVER(inode);
 	struct page *page;
-	size_t count = 0;
+	loff_t end_offset, i_size;
+	size_t count;
 	int status = 0;
 
-	dprintk("--> %s inode %p ctx %p pages %p offset %lu\n",
-		__func__, inode, ctx, pages, (unsigned long)offset);
+	dprintk("--> %s inode %p ctx %p pages %p nr_pages %lu offset %lu\n",
+		__func__, inode, ctx, pages, nr_pages,(unsigned long)offset);
 
 	pgio->pg_boundary = 0;
 	pgio->pg_test = 0;
@@ -1019,8 +1021,11 @@ pnfs_set_ds_rsize(struct inode *inode,
 		return;
 
 	/* Calculate the total read-ahead count */
-	list_for_each_entry(page, pages, lru)
-		count += pnfs_page_length(page, inode);
+	end_offset = (offset & PAGE_CACHE_MASK) + nr_pages * PAGE_CACHE_SIZE;
+	i_size = i_size_read(inode);
+	if (end_offset > i_size)
+		end_offset = i_size;
+	count = end_offset - offset;
 
 	dprintk("%s count %ld\n", __func__,(long int)count);
 
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 310490c..2797cd3 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -44,27 +44,7 @@ int pnfs_try_to_commit(struct inode *, struct nfs_write_data *, struct list_head
 int pnfs_wsize(struct inode *, unsigned int, struct nfs_write_data *);
 int pnfs_rpages(struct inode *);
 int pnfs_wpages(struct inode *);
-void pnfs_set_ds_rsize(struct inode *, struct nfs_open_context *, struct list_head *, loff_t, size_t *, struct nfs_pageio_descriptor *);
-
-
-/*
- * Determine the number of bytes of data the page contains
- */
-static inline
-unsigned int pnfs_page_length(struct page *page, struct inode *inode)
-{
-	loff_t i_size = i_size_read(inode);
-
-	if (i_size > 0) {
-		pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
-
-		if (page->index < end_index)
-			return PAGE_CACHE_SIZE;
-		if (page->index == end_index)
-			return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
-	}
-	return 0;
-}
+void pnfs_set_ds_rsize(struct inode *, struct nfs_open_context *, struct list_head *, unsigned long, loff_t, size_t *, struct nfs_pageio_descriptor *);
 
 #endif /* CONFIG_PNFS */
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 689722e..e83b94c 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -643,7 +643,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
 	} else
 		desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
 #ifdef CONFIG_PNFS
-	pnfs_set_ds_rsize(inode, desc.ctx, pages, filp->f_pos, &rsize, &pgio);
+	pnfs_set_ds_rsize(inode, desc.ctx, pages, nr_pages, filp->f_pos,
+			  &rsize, &pgio);
 #endif /* CONFIG_PNFS */
 	if (rsize < PAGE_CACHE_SIZE)
 		nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);

On Jan. 01, 2008, 18:48 +0200, Benny Halevy <bhalevy at panasas.com> wrote:
> On Dec. 28, 2007, 10:45 +0200, Benny Halevy <bhalevy at panasas.com> wrote:
>> From: Andy Adamson <andros at umich.edu>
>>
>> Use the pNFS ds_rsize for read pageio.
>>
>> When using the nfs page cache for pNFS I/O, it is necessary to setupe pages
>> using the rsize of the storage device instead of the rsize of the MDS.
>>
>> In 2.6.18.3, there are several read I/O paths through the client code and
>> the rsize was used in each of these paths to setup the nfs page cache. Since
>> several functions used the rsize, an rpc_ops->rsize function makes sense.
>>
>> In 2.6.24, there is only one read I/O path through the client code. The
>> interface to the nfs page cache has been re-written. It is now shared between
>> the read and write code paths, and moved entirely to fs/nfs/pagelist.c.
>>
>> As a result, there is now only one place to set the rsize.
>>
>> This patch removes the rpc_ops->rsize function pointer and
>> sets the rsize to the pNFS ds_size when pNFS is being used.
>>
>> pNFS is not being used if layoutget fails, or the count is below the
>> threshold.
>>
>> Signed-off-by: Andy Adamson<andros at umich.edu>
>> Signed-off-by: Benny Halevy <bhalevy at panasas.com>
>> ---
>>  fs/nfs/nfs4proc.c |    1 -
>>  fs/nfs/pnfs.c     |   48 ++++++++++++++++++++++++++++++++++++++----------
>>  fs/nfs/pnfs.h     |   23 ++++++++++++++++++++++-
>>  fs/nfs/read.c     |    7 +++++++
>>  4 files changed, 67 insertions(+), 12 deletions(-)
>>
>> diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
>> index 2294754..3613960 100644
>> --- a/fs/nfs/nfs4proc.c
>> +++ b/fs/nfs/nfs4proc.c
>> @@ -5312,7 +5312,6 @@ const struct nfs_rpc_ops pnfs_v41_clientops = {
>>  	.file_open      = nfs_open,
>>  	.file_release   = nfs_release,
>>  	.lock		= nfs4_proc_lock,
>> -	.rsize		= pnfs_rsize,
>>  	.wsize		= pnfs_wsize,
>>  	.rpages		= pnfs_rpages,
>>  	.wpages		= pnfs_wpages,
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index 33de1e9..c269cf6 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -640,6 +640,44 @@ check:
>>  		return 0;
>>  }
>>  
>> +/*
>> + * rsize is already set by caller to MDS rsize.
>> + */
>> +void
>> +pnfs_set_ds_size(struct inode *inode,
>> +		 struct nfs_open_context *ctx,
>> +		 struct list_head *pages,
>> +		 loff_t offset,
>> +		 size_t *rsize)
>> +{
>> +	struct nfs_server *nfss = NFS_SERVER(inode);
>> +	struct page *page;
>> +	size_t count = 0;
>> +	int status = 0;
>> +
>> +	dprintk("--> %s inode %p ctx %p pages %p offset %lu\n",
>> +		__func__, inode, ctx, pages, (unsigned long)offset);
>> +
>> +	if (!pnfs_enabled_sb(nfss))
>> +		return;
>> +
>> +	/* Calculate the total read-ahead count */
>> +	list_for_each_entry(page, pages, lru)
>> +		count += pnfs_page_length(page, inode);
> 
> Andy, I'm not sure why we need to do all of this...
> I presume that the byte count on the last page may be the whole page
> and extend over the file read size and you want to get the layout
> only up to the file size, is that so?
> 
> Anyway, the pages are consecutive so why do you need to go over the 
> the list and not just rely on the number of pages, the offset, and
> the file size? (the current calculation also seems to have a bug
> since it doesn't take the offset into the first page into account...)
> 
> How about doing something like this:
> - pass nr_pages to pnfs_set_ds_rsize
> 
> 	loff_t i_size = i_size_read(inode);
> 	loff_t end_offset = (offset & PAGE_CACHE_MASK) + nr_pages * PAGE_CACHE_SIZE;
> 
> 	if (end_offset > i_size)
> 		end_offset = i_size;
> 	count = end_offset - offset;
> 
> - Benny
> _______________________________________________
> pNFS mailing list
> pNFS at linux-nfs.org
> http://linux-nfs.org/cgi-bin/mailman/listinfo/pnfs
> 



More information about the pNFS mailing list