[pnfs] [PATCH 03/11] pnfs: pnfs_write_begin

Dean Hildebrand seattleplus at gmail.com
Fri Apr 11 16:58:32 EDT 2008



Fred Isaman wrote:
> From: Fred <iisaman at citi.umich.edu>
>
> Add hooks in the nfs_write_begin path, giving a driver the potential
> to read in page data around the data that is about to be copied to the page.
>
> Signed-off-by: Fred Isaman <iisaman at citi.umich.edu>
> Signed-off-by: Benny Halevy <bhalevy at panasas.com>
> ---
>  fs/nfs/file.c             |   16 +++++++--
>  fs/nfs/pnfs.c             |   74 +++++++++++++++++++++++++++++++++++++++++++++
>  fs/nfs/pnfs.h             |   53 ++++++++++++++++++++++++++++++++
>  fs/nfs/write.c            |    2 +-
>  include/linux/nfs4_pnfs.h |   14 ++++++++-
>  5 files changed, 153 insertions(+), 6 deletions(-)
>
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index aa1644e..f4c1d6b 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -350,10 +350,17 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
>  	*pagep = page;
>  
>  	ret = nfs_flush_incompatible(file, page);
> -	if (ret) {
> -		unlock_page(page);
> -		page_cache_release(page);
> -	}
> +	if (ret)
> +		goto out_err;
> +	ret = pnfs_write_begin(file, page, pos, len, flags, fsdata);
> +	if (ret)
> +		goto out_err;
> +	return 0;
> +
> + out_err:
> +	unlock_page(page);
> +	page_cache_release(page);
> +	*pagep = NULL;
>  	return ret;
>  }
>  
> @@ -370,6 +377,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
>  
>  	unlock_page(page);
>  	page_cache_release(page);
> +	pnfs_write_end_cleanup(fsdata);
>  
>  	if (status < 0)
>  		return status;
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index ee2f10a..7be8bbe 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -807,6 +807,28 @@ pnfs_has_layout(struct pnfs_layout_type *lo,
>  	return ret;
>  }
>  
> +static struct pnfs_layout_segment *
> +pnfs_find_get_lseg(struct inode *inode,
> +		   loff_t pos,
> +		   size_t count,
> +		   enum pnfs_iomode iomode)
> +{
> +	struct nfs_inode *nfsi = NFS_I(inode);
> +	struct pnfs_layout_segment *lseg;
> +	struct pnfs_layout_type *lo;
> +	struct nfs4_pnfs_layout_segment range;
> +
> +	lo = get_lock_current_layout(nfsi);
> +	if (!lo)
> +		return NULL;
> +	range.iomode = iomode;
> +	range.offset = pos;
> +	range.length = count;
> +	lseg = pnfs_has_layout(lo, &range, 1);
> +	put_unlock_current_layout(nfsi, lo);
> +	return lseg;
> +}
> +
>  /* Update the file's layout for the given range and iomode.
>   * Layout is retreived from the server if needed.
>   * If lsegpp is given, the appropriate layout segment is referenced and
> @@ -1544,6 +1566,53 @@ int _pnfs_try_to_read_data(struct nfs_read_data *data,
>  	}
>  }
>  
> +/*
> + * This gives the layout driver an opportunity to read in page "around"
> + * the data to be written.  It returns 0 on success, otherwise an error code
> + * which will either be passed up to user, or ignored if
> + * some previous part of write succeeded.
> + * Note the range [pos, pos+len-1] is entirely within the page.
> + */
> +/* flags = AOP_FLAG_UNINTERRUPTIBLE or 0 - ??? need this ??? */
> +int _pnfs_write_begin(struct inode *inode, struct nfs_server *nfss,
> +		      struct page *page, loff_t pos, unsigned len,
> +		      unsigned flags, struct pnfs_fsdata **fsdata)
> +{
> +	struct pnfs_layout_segment *lseg;
> +	int status = 0;
> +
> +	*fsdata = kzalloc(sizeof(struct pnfs_fsdata), GFP_KERNEL);
> +	if (!*fsdata)
> +		return -ENOMEM;
> +	lseg = pnfs_find_get_lseg(inode, pos, len, IOMODE_RW);
> +	if (lseg) {
> +		status = nfss->pnfs_curr_ld->ld_io_ops->write_begin(lseg, page,
> +								    pos, len,
> +								    *fsdata);
> +		put_lseg(lseg);
> +	}
> +	return status;
> +}
> +
> +/* Given an nfs request, determine if it should be flushed before proceeding.
> + * It should default to returning False, returning True only if there is a
> + * specific reason to flush.
> + */
> +int _pnfs_do_flush(struct inode *inode, struct nfs_server *nfss,
> +		   struct nfs_page *req, struct pnfs_fsdata *fsdata)
> +{
> +	struct pnfs_layout_segment *lseg;
> +	loff_t pos = (req->wb_index << PAGE_CACHE_SHIFT) + req->wb_offset;
> +	int status = 0;
> +
> +	lseg = pnfs_find_get_lseg(inode, pos, req->wb_bytes, IOMODE_RW);
> +	/* Note that lseg==NULL may be useful info for do_flush */
> +	status = nfss->pnfs_curr_ld->ld_policy_ops->do_flush(lseg, req,
> +							     fsdata);
> +	put_lseg(lseg);
> +	return status;
> +}
> +
>  int _pnfs_try_to_write_data(struct nfs_write_data *data,
>  			    const struct rpc_call_ops *call_ops, int how)
>  {
> @@ -1864,6 +1933,11 @@ void pnfs_free_request_data(struct nfs_page *req)
>  		lo->free_request_data(req);
>  }
>  
> +void pnfs_free_fsdata(struct pnfs_fsdata *fsdata)
> +{
> +	kfree(fsdata);
> +}
> +
>  /* Callback operations for layout drivers.
>   */
>  struct pnfs_client_operations pnfs_ops = {
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 3014f34..0c63fea 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -55,10 +55,16 @@ void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
>  void pnfs_update_layout_commit(struct inode *, struct list_head *, pgoff_t, unsigned int);
>  int pnfs_flush_one(struct inode *, struct list_head *, unsigned int, size_t, int);
>  void pnfs_free_request_data(struct nfs_page *req);
> +void pnfs_free_fsdata(struct pnfs_fsdata *fsdata);
>  ssize_t pnfs_file_write(struct file *, const char __user *, size_t, loff_t *);
>  void pnfs_get_layout_done(struct pnfs_layout_type *,
>  			  struct nfs4_pnfs_layoutget *, int);
>  void pnfs_layout_release(struct pnfs_layout_type *);
> +int _pnfs_write_begin(struct inode *inode, struct nfs_server *nfss,
> +		      struct page *page, loff_t pos, unsigned len,
> +		      unsigned flags, struct pnfs_fsdata **fsdata);
> +int _pnfs_do_flush(struct inode *inode, struct nfs_server *nfss,
> +		   struct nfs_page *req, struct pnfs_fsdata *fsdata);
>  
>  #define PNFS_EXISTS_LDIO_OP(srv, opname) ((srv)->pnfs_curr_ld &&	\
>  				     (srv)->pnfs_curr_ld->ld_io_ops &&	\
> @@ -109,6 +115,37 @@ static inline int pnfs_try_to_commit(struct nfs_write_data *data)
>  	return 1;
>  }
>  
> +static inline int pnfs_write_begin(struct file *filp, struct page *page,
> +				   loff_t pos, unsigned len, unsigned flags,
> +				   void **fsdata)
> +{
> +	struct inode *inode = filp->f_dentry->d_inode;
> +	struct nfs_server *nfss = NFS_SERVER(inode);
> +	struct pnfs_fsdata *pnfs_fsdata = NULL;
> +	int status = 0;
> +
> +	if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
> +		status = _pnfs_write_begin(inode, nfss, page, pos, len, flags,
> +					   &pnfs_fsdata);
> +	*fsdata = pnfs_fsdata; /* fsdata should never be NULL */
> +	return status;
> +}
> +
> +static inline int pnfs_do_flush(struct nfs_page *req, void *fsdata)
> +{
> +	struct inode *inode = req->wb_context->path.dentry->d_inode;
> +	struct nfs_server *nfss = NFS_SERVER(inode);
> +	if (PNFS_EXISTS_LDPOLICY_OP(nfss, do_flush))
> +		return _pnfs_do_flush(inode, nfss, req, fsdata);
> +	else
> +		return 0;
> +}
> +
> +static inline void pnfs_write_end_cleanup(void *fsdata)
> +{
> +	pnfs_free_fsdata((struct pnfs_fsdata *) fsdata);
> +}
> +
>  #else  /* CONFIG_PNFS */
>  
>  static inline int pnfs_try_to_read_data(struct nfs_read_data *data,
> @@ -129,6 +166,22 @@ static inline int pnfs_try_to_commit(struct nfs_write_data *data)
>  	return 1;
>  }
>  
> +static inline int pnfs_do_flush(struct nfs_page *req, void *fsdata)
> +{
> +	return 0;
> +}
> +
> +static inline int pnfs_write_begin(struct file *filp, struct page *page,
> +				   loff_t pos, unsigned len, unsigned flags,
> +				   void **fsdata)
> +{
> +	return 0;
> +}
> +
> +static inline void pnfs_write_end_cleanup(void *fsdata)
> +{
> +}
> +
>  #endif /* CONFIG_PNFS */
>  
>  #endif /* FS_NFS_PNFS_H */
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index 015e035..5616f48 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -699,7 +699,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
>  		if (req == NULL)
>  			return 0;
>  		do_flush = req->wb_page != page || req->wb_context != ctx
> -			|| !nfs_dirty_request(req);
> +			|| !nfs_dirty_request(req) || pnfs_do_flush(req, NULL);
>  		nfs_release_request(req);
>  		if (!do_flush)
>  			return 0;
> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
> index 654767a..9ed6819 100644
> --- a/include/linux/nfs4_pnfs.h
> +++ b/include/linux/nfs4_pnfs.h
> @@ -48,6 +48,11 @@ struct pnfs_layout_type {
>  	u8 ld_data[];			/* layout driver private data */
>  };
>  
> +struct pnfs_fsdata {
> +	int ok_to_use_pnfs;
> +};
> +
> +
>  static inline struct inode *
>  PNFS_INODE(struct pnfs_layout_type *lo)
>  {
> @@ -133,8 +138,11 @@ struct layoutdriver_io_operations {
>  			       unsigned nr_pages, loff_t offset, size_t count,
>  			       int sync, struct nfs_write_data *nfs_data);
>  	int (*flush_one) (struct pnfs_layout_segment *, struct list_head *head, unsigned int npages, size_t count, int how);
> +	int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
> +			    loff_t pos, unsigned count,
> +			    struct pnfs_fsdata *fsdata);
>  	void (*free_request_data) (struct nfs_page *);
> -
> +	void (*free_fsdata) (struct pnfs_fsdata *data);
>   
Is this still needed?
Dean
>  
>  	/* Consistency ops */
>  	/* 2 problems:
> @@ -173,6 +181,10 @@ struct layoutdriver_policy_operations {
>  	/* test for nfs page cache coalescing */
>  	int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
>  
> +	/* Test for pre-write request flushing */
> +	int (*do_flush)(struct pnfs_layout_segment *lseg, struct nfs_page *req,
> +			struct pnfs_fsdata *fsdata);
> +
>  	/* Retreive the block size of the file system.  If gather_across_stripes == 1,
>  	 * then the file system will gather requests into the block size.
>  	 * TODO: Where will the layout driver get this info?  It is hard coded in PVFS2.
>   


More information about the pNFS mailing list