[pnfs] [PATCH 19/28] pnfs: pnfs_write_begin
Benny Halevy
bhalevy at panasas.com
Thu Mar 13 10:08:42 EDT 2008
On Mar. 11, 2008, 21:32 +0200, Fred Isaman <iisaman at citi.umich.edu> wrote:
> From: Fred <iisaman at citi.umich.edu>
>
> Add hooks in the nfs_write_begin path, giving a driver the potential
> to read in page data around the data that is about to be copied to the page.
>
> Signed-off-by: Fred <iisaman at citi.umich.edu>
> ---
> fs/nfs/file.c | 20 ++++++++--
> fs/nfs/pnfs.c | 88 +++++++++++++++++++++++++++++++++++++++++++++
> fs/nfs/pnfs.h | 10 +++++
> fs/nfs/write.c | 2 +-
> include/linux/nfs4_pnfs.h | 15 +++++++-
> 5 files changed, 129 insertions(+), 6 deletions(-)
>
> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
> index 6f16772..e870982 100644
> --- a/fs/nfs/file.c
> +++ b/fs/nfs/file.c
> @@ -352,10 +352,19 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
> *pagep = page;
>
> ret = nfs_flush_incompatible(file, page);
> - if (ret) {
> - unlock_page(page);
> - page_cache_release(page);
> - }
> + if (ret)
> + goto out_err;
> +#ifdef CONFIG_PNFS
> + ret = pnfs_write_begin(file, page, pos, len, flags, fsdata);
> + if (ret)
> + goto out_err;
> +#endif /* CONFIG_PNFS */
Trond's preference is to define pnfs functions as static inline noops
in the !defined(CONFIG_PNFS) case so we can avoid these annoying ifdefs
like you did with pnfs_do_flush below. So let's start doing it from now
on...
> + return 0;
> +
> + out_err:
> + unlock_page(page);
> + page_cache_release(page);
> + *pagep = NULL;
Yup, that's a good thing to do regardless of pnfs.
> return ret;
> }
>
> @@ -372,6 +381,9 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
>
> unlock_page(page);
> page_cache_release(page);
> +#ifdef CONFIG_PNFS
> + pnfs_free_fsdata(fsdata);
This violates layering.
The nfs layer need not know what the pnfs layer wants to do.
Please change that to a new call to pnfs_write_end (that will just call
pnfs_free_fsdata() at the moment)
> +#endif /* CONFIG_PNFS */
>
> return status < 0 ? status : copied;
> }
> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
> index 56aa1bf..a4db86e 100644
> --- a/fs/nfs/pnfs.c
> +++ b/fs/nfs/pnfs.c
> @@ -809,6 +809,28 @@ pnfs_has_layout(struct pnfs_layout_type *lo,
> return ret;
> }
>
> +static struct pnfs_layout_segment *
> +pnfs_find_get_lseg(struct inode *inode,
> + loff_t pos,
> + size_t count,
> + enum pnfs_iomode iomode)
> +{
> + struct nfs_inode *nfsi = NFS_I(inode);
> + struct pnfs_layout_segment *lseg;
> + struct pnfs_layout_type *lo;
> + struct nfs4_pnfs_layout_segment range;
> +
> + lo = get_lock_current_layout(nfsi);
> + if (!lo)
> + return NULL;
> + range.iomode = iomode;
> + range.offset = pos;
> + range.length = count;
> + lseg = pnfs_has_layout(lo, &range, 1);
> + put_unlock_current_layout(nfsi, lo);
> + return lseg;
> +}
> +
> /* Update the file's layout for the given range and iomode.
> * Layout is retreived from the server if needed.
> * If lsegpp is given, the appropriate layout segment is referenced and
> @@ -1537,6 +1559,64 @@ int pnfs_try_to_read_data(struct nfs_read_data *data,
> }
> }
>
> +/*
> + * This gives the layout driver an opportunity to read in page "around"
> + * the data to be written. It returns 0 on success, otherwise an error code
> + * which will either be passed up to user, or ignored if
> + * some previous part of write succeeded.
> + * Note the range [pos, pos+len-1] is entirely within the page.
> + */
> +/* flags = AOP_FLAG_UNINTERRUPTIBLE or 0 - ??? need this ??? */
> +int pnfs_write_begin(struct file *filp, struct page *page,
> + loff_t pos, unsigned len,
> + unsigned flags, void **fsdata)
> +{
> + struct inode *inode = filp->f_dentry->d_inode;
> + struct nfs_server *nfss = NFS_SERVER(inode);
> + struct pnfs_layout_segment *lseg;
> + struct pnfs_fsdata *pnfs_fsdata = NULL;
> + int status = 0;
> +
> + if (!nfss->pnfs_curr_ld || !nfss->pnfs_curr_ld->ld_io_ops ||
> + !nfss->pnfs_curr_ld->ld_io_ops->write_begin) {
> + /* If layout driver doesn't define, just do nothing */
> + goto out;
> + }
let's move this check and similar ones to a static inline definition of
this function in the header file and call this function (renamed to
_pnfs_write_begin) only if the layout driver implements write_begin.
> + lseg = pnfs_find_get_lseg(inode, pos, len, IOMODE_RW);
what if lseg == NULL?
wouldn't it be better to use pnfs_update_layout()?
> + status = nfss->pnfs_curr_ld->ld_io_ops->write_begin(lseg, page,
> + pos, len,
> + &pnfs_fsdata);
> + put_lseg(lseg);
> + if (pnfs_fsdata && !pnfs_fsdata->ld_io_ops)
> + pnfs_fsdata->ld_io_ops = nfss->pnfs_curr_ld->ld_io_ops;
> + out:
> + *fsdata = pnfs_fsdata;
> + return status;
> +}
> +
> +/* Given an nfs request, determine if it should be flushed before proceeding.
> + * It should default to returning False, returning True only if there is a
> + * specific reason to flush.
> + */
> +int pnfs_do_flush(struct nfs_page *req, void *fsdata)
> +{
> + struct inode *inode = req->wb_context->path.dentry->d_inode;
> + struct nfs_server *nfss = NFS_SERVER(inode);
> + struct pnfs_layout_segment *lseg;
> + loff_t pos = (req->wb_index << PAGE_CACHE_SHIFT) + req->wb_offset;
> + int status = 0;
> +
> + if (!nfss->pnfs_curr_ld || !nfss->pnfs_curr_ld->ld_io_ops ||
> + !nfss->pnfs_curr_ld->ld_policy_ops->do_flush)
> + return 0;
ditto
> + lseg = pnfs_find_get_lseg(inode, pos, req->wb_bytes, IOMODE_RW);
ditto
> + /* Note lseg may be NULL */
> + status = nfss->pnfs_curr_ld->ld_policy_ops->do_flush(lseg, req,
> + fsdata);
> + put_lseg(lseg);
> + return status;
> +}
> +
> int pnfs_try_to_write_data(struct nfs_write_data *data,
> const struct rpc_call_ops *call_ops, int how)
> {
> @@ -1859,6 +1939,14 @@ void pnfs_free_request_data(struct nfs_page *req)
> lo->free_request_data(req);
> }
>
> +void pnfs_free_fsdata(void *fsdata)
> +{
> + struct pnfs_fsdata *data = (struct pnfs_fsdata *)fsdata;
> +
> + if (data && data->ld_io_ops->free_fsdata)
> + data->ld_io_ops->free_fsdata(data);
> +}
> +
> /* Callback operations for layout drivers.
> */
> struct pnfs_client_operations pnfs_ops = {
> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
> index 8e11489..3860e38 100644
> --- a/fs/nfs/pnfs.h
> +++ b/fs/nfs/pnfs.h
> @@ -29,6 +29,9 @@ int pnfs_use_ds_io(struct list_head *, struct inode *, int);
>
> int pnfs_use_write(struct inode *inode, ssize_t count);
> int pnfs_try_to_write_data(struct nfs_write_data *, const struct rpc_call_ops *, int);
> +int pnfs_write_begin(struct file *filp, struct page *page, loff_t pos,
> + unsigned len, unsigned flags, void **fsdata);
> +int pnfs_do_flush(struct nfs_page *req, void *fsdata);
> int pnfs_try_to_read_data(struct nfs_read_data *data, const struct rpc_call_ops *call_ops);
> int pnfs_initialize(void);
> void pnfs_uninitialize(void);
> @@ -46,8 +49,15 @@ void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
> void pnfs_update_layout_commit(struct inode *, struct list_head *, pgoff_t, unsigned int);
> int pnfs_flush_one(struct inode *, struct list_head *, unsigned int, size_t, int);
> void pnfs_free_request_data(struct nfs_page *req);
> +void pnfs_free_fsdata(void *fsdata);
> ssize_t pnfs_file_write(struct file *, const char __user *, size_t, loff_t *);
>
> +#else /* CONFIG_PNFS */
> +static inline int pnfs_do_flush(struct nfs_page *req, void *fsdata)
> +{
> + return 0;
> +}
> +
> #endif /* CONFIG_PNFS */
>
> #endif /* FS_NFS_PNFS_H */
> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
> index 253cf57..9fb00fc 100644
> --- a/fs/nfs/write.c
> +++ b/fs/nfs/write.c
> @@ -700,7 +700,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
> if (req == NULL)
> return 0;
> do_flush = req->wb_page != page || req->wb_context != ctx
> - || !nfs_dirty_request(req);
> + || !nfs_dirty_request(req) || pnfs_do_flush(req, NULL);
> nfs_release_request(req);
> if (!do_flush)
> return 0;
> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
> index 451afef..308a9fb 100644
> --- a/include/linux/nfs4_pnfs.h
> +++ b/include/linux/nfs4_pnfs.h
> @@ -48,6 +48,12 @@ struct pnfs_layout_type {
> u8 ld_data[]; /* layout driver private data */
> };
>
> +struct pnfs_fsdata {
> + struct layoutdriver_io_operations *ld_io_ops;
> + u8 ld_fsdata[];
> +};
> +
> +
> static inline struct inode *
> PNFS_INODE(struct pnfs_layout_type *lo)
> {
> @@ -132,8 +138,11 @@ struct layoutdriver_io_operations {
> unsigned nr_pages, loff_t offset, size_t count,
> int sync, struct nfs_write_data *nfs_data);
> int (*flush_one) (struct pnfs_layout_segment *, struct list_head *head, unsigned int npages, size_t count, int how);
> + int (*write_begin) (struct pnfs_layout_segment *lseg, struct page *page,
> + loff_t pos, unsigned count,
> + struct pnfs_fsdata **fsdata);
> void (*free_request_data) (struct nfs_page *);
> -
> + void (*free_fsdata) (struct pnfs_fsdata *data);
>
> /* Consistency ops */
> /* 2 problems:
> @@ -172,6 +181,10 @@ struct layoutdriver_policy_operations {
> /* test for nfs page cache coalescing */
> int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
>
> + /* Test for pre-write request flushing */
> + int (*do_flush)(struct pnfs_layout_segment *lseg, struct nfs_page *req,
> + struct pnfs_fsdata *fsdata);
> +
> /* Retreive the block size of the file system. If gather_across_stripes == 1,
> * then the file system will gather requests into the block size.
> * TODO: Where will the layout driver get this info? It is hard coded in PVFS2.
More information about the pNFS
mailing list