[pnfs] [PATCH 19/28] pnfs: pnfs_write_begin

Fredric Isaman iisaman at citi.umich.edu
Thu Mar 13 14:37:53 EDT 2008



On Thu, 13 Mar 2008, Benny Halevy wrote:

> On Mar. 11, 2008, 21:32 +0200, Fred Isaman <iisaman at citi.umich.edu> wrote:
>> From: Fred <iisaman at citi.umich.edu>
>>
>> Add hooks in the nfs_write_begin path, giving a driver the potential
>> to read in page data around the data that is about to be copied to the page.
>>
>> Signed-off-by: Fred <iisaman at citi.umich.edu>
>> ---
>>  fs/nfs/file.c             |   20 ++++++++--
>>  fs/nfs/pnfs.c             |   88 +++++++++++++++++++++++++++++++++++++++++++++
>>  fs/nfs/pnfs.h             |   10 +++++
>>  fs/nfs/write.c            |    2 +-
>>  include/linux/nfs4_pnfs.h |   15 +++++++-
>>  5 files changed, 129 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
>> index 6f16772..e870982 100644
>> --- a/fs/nfs/file.c
>> +++ b/fs/nfs/file.c
>> @@ -352,10 +352,19 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
>>  	*pagep = page;
>>
>>  	ret = nfs_flush_incompatible(file, page);
>> -	if (ret) {
>> -		unlock_page(page);
>> -		page_cache_release(page);
>> -	}
>> +	if (ret)
>> +		goto out_err;
>> +#ifdef CONFIG_PNFS
>> +	ret = pnfs_write_begin(file, page, pos, len, flags, fsdata);
>> +	if (ret)
>> +		goto out_err;
>> +#endif /* CONFIG_PNFS */
>
> Trond's preference is to define pnfs functions as static inline noops
> in the !defined(CONFIG_PNFS) case so we can avoid these annoying ifdefs
> like you did with pnfs_do_flush below.  So let's start doing it from now
> on...
>

OK


>> +	return 0;
>> +
>> + out_err:
>> +	unlock_page(page);
>> +	page_cache_release(page);
>> +	*pagep = NULL;
>
> Yup, that's a good thing to do regardless of pnfs.
>
>>  	return ret;
>>  }
>>
>> @@ -372,6 +381,9 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
>>
>>  	unlock_page(page);
>>  	page_cache_release(page);
>> +#ifdef CONFIG_PNFS
>> +	pnfs_free_fsdata(fsdata);
>
> This violates layering.
> The nfs layer need not know what the pnfs layer wants to do.
> Please change that to a new call to pnfs_write_end (that will just call
> pnfs_free_fsdata() at the moment)
>

OK

>> +#endif /* CONFIG_PNFS */
>>
>>  	return status < 0 ? status : copied;
>>  }
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index 56aa1bf..a4db86e 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -809,6 +809,28 @@ pnfs_has_layout(struct pnfs_layout_type *lo,
>>  	return ret;
>>  }
>>
>> +static struct pnfs_layout_segment *
>> +pnfs_find_get_lseg(struct inode *inode,
>> +		   loff_t pos,
>> +		   size_t count,
>> +		   enum pnfs_iomode iomode)
>> +{
>> +	struct nfs_inode *nfsi = NFS_I(inode);
>> +	struct pnfs_layout_segment *lseg;
>> +	struct pnfs_layout_type *lo;
>> +	struct nfs4_pnfs_layout_segment range;
>> +
>> +	lo = get_lock_current_layout(nfsi);
>> +	if (!lo)
>> +		return NULL;
>> +	range.iomode = iomode;
>> +	range.offset = pos;
>> +	range.length = count;
>> +	lseg = pnfs_has_layout(lo, &range, 1);
>> +	put_unlock_current_layout(nfsi, lo);
>> +	return lseg;
>> +}
>> +
>>  /* Update the file's layout for the given range and iomode.
>>   * Layout is retreived from the server if needed.
>>   * If lsegpp is given, the appropriate layout segment is referenced and
>> @@ -1537,6 +1559,64 @@ int pnfs_try_to_read_data(struct nfs_read_data *data,
>>  	}
>>  }
>>
>> +/*
>> + * This gives the layout driver an opportunity to read in page "around"
>> + * the data to be written.  It returns 0 on success, otherwise an error code
>> + * which will either be passed up to user, or ignored if
>> + * some previous part of write succeeded.
>> + * Note the range [pos, pos+len-1] is entirely within the page.
>> + */
>> +/* flags = AOP_FLAG_UNINTERRUPTIBLE or 0 - ??? need this ??? */
>> +int pnfs_write_begin(struct file *filp, struct page *page,
>> +		     loff_t pos, unsigned len,
>> +		     unsigned flags, void **fsdata)
>> +{
>> +	struct inode *inode = filp->f_dentry->d_inode;
>> +	struct nfs_server *nfss = NFS_SERVER(inode);
>> +	struct pnfs_layout_segment *lseg;
>> +	struct pnfs_fsdata *pnfs_fsdata = NULL;
>> +	int status = 0;
>> +
>> +	if (!nfss->pnfs_curr_ld || !nfss->pnfs_curr_ld->ld_io_ops ||
>> +	    !nfss->pnfs_curr_ld->ld_io_ops->write_begin) {
>> +		/* If layout driver doesn't define, just do nothing */
>> +		goto out;
>> +	}
>
> let's move this check and similar ones to a static inline definition of
> this function in the header file and call this function (renamed to
> _pnfs_write_begin) only if the layout driver implements write_begin.
>

OK, and I'll modify all the other similar functions likewise.

>> +	lseg = pnfs_find_get_lseg(inode, pos, len, IOMODE_RW);
>
> what if lseg == NULL?
> wouldn't it be better to use pnfs_update_layout()?
>

No.  For most of these functions I specifically do not want to trigger a 
layoutget (though in this particular case it might not hurt).  I want to 
use an existing layout, or get back NULL.  If it is NULL, in this case I 
should just call "goto out".

>> +	status = nfss->pnfs_curr_ld->ld_io_ops->write_begin(lseg, page,
>> +							    pos, len,
>> +							    &pnfs_fsdata);
>> +	put_lseg(lseg);
>> +	if (pnfs_fsdata && !pnfs_fsdata->ld_io_ops)
>> +		pnfs_fsdata->ld_io_ops = nfss->pnfs_curr_ld->ld_io_ops;
>> + out:
>> +	*fsdata = pnfs_fsdata;
>> +	return status;
>> +}
>> +
>> +/* Given an nfs request, determine if it should be flushed before proceeding.
>> + * It should default to returning False, returning True only if there is a
>> + * specific reason to flush.
>> + */
>> +int pnfs_do_flush(struct nfs_page *req, void *fsdata)
>> +{
>> +	struct inode *inode = req->wb_context->path.dentry->d_inode;
>> +	struct nfs_server *nfss = NFS_SERVER(inode);
>> +	struct pnfs_layout_segment *lseg;
>> +	loff_t pos = (req->wb_index << PAGE_CACHE_SHIFT) + req->wb_offset;
>> +	int status = 0;
>> +
>> +	if (!nfss->pnfs_curr_ld || !nfss->pnfs_curr_ld->ld_io_ops ||
>> +	    !nfss->pnfs_curr_ld->ld_policy_ops->do_flush)
>> +		return 0;
>
> ditto
>
>> +	lseg = pnfs_find_get_lseg(inode, pos, req->wb_bytes, IOMODE_RW);
>
> ditto
>

In this case, that fact that lseg is NULL is useful information that must 
be passed in to the layout driver.


>> +	/* Note lseg may be NULL */
>> +	status = nfss->pnfs_curr_ld->ld_policy_ops->do_flush(lseg, req,
>> +							     fsdata);
>> +	put_lseg(lseg);
>> +	return status;
>> +}
>> +
>>  int pnfs_try_to_write_data(struct nfs_write_data *data,
>>  				const struct rpc_call_ops *call_ops, int how)
>>  {
>> @@ -1859,6 +1939,14 @@ void pnfs_free_request_data(struct nfs_page *req)
>>  		lo->free_request_data(req);
>>  }
>>
>> +void pnfs_free_fsdata(void *fsdata)
>> +{
>> +	struct pnfs_fsdata *data = (struct pnfs_fsdata *)fsdata;
>> +
>> +	if (data && data->ld_io_ops->free_fsdata)
>> +		data->ld_io_ops->free_fsdata(data);
>> +}
>> +
>>  /* Callback operations for layout drivers.
>>   */
>>  struct pnfs_client_operations pnfs_ops = {
>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> index 8e11489..3860e38 100644
>> --- a/fs/nfs/pnfs.h
>> +++ b/fs/nfs/pnfs.h
>> @@ -29,6 +29,9 @@ int pnfs_use_ds_io(struct list_head *, struct inode *, int);
>>
>>  int pnfs_use_write(struct inode *inode, ssize_t count);
>>  int pnfs_try_to_write_data(struct nfs_write_data *, const struct rpc_call_ops *, int);
>> +int pnfs_write_begin(struct file *filp, struct page *page, loff_t pos,
>> +		     unsigned len, unsigned flags, void **fsdata);
>> +int pnfs_do_flush(struct nfs_page *req, void *fsdata);
>>  int pnfs_try_to_read_data(struct nfs_read_data *data, const struct rpc_call_ops *call_ops);
>>  int pnfs_initialize(void);
>>  void pnfs_uninitialize(void);
>> @@ -46,8 +49,15 @@ void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
>>  void pnfs_update_layout_commit(struct inode *, struct list_head *, pgoff_t, unsigned int);
>>  int pnfs_flush_one(struct inode *, struct list_head *, unsigned int, size_t, int);
>>  void pnfs_free_request_data(struct nfs_page *req);
>> +void pnfs_free_fsdata(void *fsdata);
>>  ssize_t pnfs_file_write(struct file *, const char __user *, size_t, loff_t *);
>>
>> +#else /* CONFIG_PNFS */
>> +static inline int pnfs_do_flush(struct nfs_page *req, void *fsdata)
>> +{
>> +	return 0;
>> +}
>> +
>>  #endif /* CONFIG_PNFS */
>>
>>  #endif /* FS_NFS_PNFS_H */
>> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
>> index 253cf57..9fb00fc 100644
>> --- a/fs/nfs/write.c
>> +++ b/fs/nfs/write.c
>> @@ -700,7 +700,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
>>  		if (req == NULL)
>>  			return 0;
>>  		do_flush = req->wb_page != page || req->wb_context != ctx
>> -			|| !nfs_dirty_request(req);
>> +			|| !nfs_dirty_request(req) || pnfs_do_flush(req, NULL);
>>  		nfs_release_request(req);
>>  		if (!do_flush)
>>  			return 0;
>> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
>> index 451afef..308a9fb 100644
>> --- a/include/linux/nfs4_pnfs.h
>> +++ b/include/linux/nfs4_pnfs.h
>> @@ -48,6 +48,12 @@ struct pnfs_layout_type {
>>  	u8 ld_data[];			/* layout driver private data */
>>  };
>>
>> +struct pnfs_fsdata {
>> +	struct layoutdriver_io_operations *ld_io_ops;
>> +	u8 ld_fsdata[];
>> +};
>> +
>> +
>>  static inline struct inode *
>>  PNFS_INODE(struct pnfs_layout_type *lo)
>>  {
>> @@ -132,8 +138,11 @@ struct layoutdriver_io_operations {
>>  			       unsigned nr_pages, loff_t offset, size_t count,
>>  			       int sync, struct nfs_write_data *nfs_data);
>>  	int (*flush_one) (struct pnfs_layout_segment *, struct list_head *head, unsigned int npages, size_t count, int how);
>> +	int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
>> +			    loff_t pos, unsigned count,
>> +			    struct pnfs_fsdata **fsdata);
>>  	void (*free_request_data) (struct nfs_page *);
>> -
>> +	void (*free_fsdata) (struct pnfs_fsdata *data);
>>
>>  	/* Consistency ops */
>>  	/* 2 problems:
>> @@ -172,6 +181,10 @@ struct layoutdriver_policy_operations {
>>  	/* test for nfs page cache coalescing */
>>  	int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
>>
>> +	/* Test for pre-write request flushing */
>> +	int (*do_flush)(struct pnfs_layout_segment *lseg, struct nfs_page *req,
>> +			struct pnfs_fsdata *fsdata);
>> +
>>  	/* Retreive the block size of the file system.  If gather_across_stripes == 1,
>>  	 * then the file system will gather requests into the block size.
>>  	 * TODO: Where will the layout driver get this info?  It is hard coded in PVFS2.
>
>


More information about the pNFS mailing list