[pnfs] [PATCH 03/11] pnfs: pnfs_write_begin
Fredric Isaman
iisaman at citi.umich.edu
Mon Apr 14 09:32:06 EDT 2008
On Fri, 11 Apr 2008, Dean Hildebrand wrote:
>
>
> Fred Isaman wrote:
>> From: Fred <iisaman at citi.umich.edu>
>>
>> Add hooks in the nfs_write_begin path, giving a driver the potential
>> to read in page data around the data that is about to be copied to the
>> page.
>>
>> Signed-off-by: Fred Isaman <iisaman at citi.umich.edu>
>> Signed-off-by: Benny Halevy <bhalevy at panasas.com>
>> ---
>> fs/nfs/file.c | 16 +++++++--
>> fs/nfs/pnfs.c | 74
>> +++++++++++++++++++++++++++++++++++++++++++++
>> fs/nfs/pnfs.h | 53 ++++++++++++++++++++++++++++++++
>> fs/nfs/write.c | 2 +-
>> include/linux/nfs4_pnfs.h | 14 ++++++++-
>> 5 files changed, 153 insertions(+), 6 deletions(-)
>>
>> diff --git a/fs/nfs/file.c b/fs/nfs/file.c
>> index aa1644e..f4c1d6b 100644
>> --- a/fs/nfs/file.c
>> +++ b/fs/nfs/file.c
>> @@ -350,10 +350,17 @@ static int nfs_write_begin(struct file *file, struct
>> address_space *mapping,
>> *pagep = page;
>> ret = nfs_flush_incompatible(file, page);
>> - if (ret) {
>> - unlock_page(page);
>> - page_cache_release(page);
>> - }
>> + if (ret)
>> + goto out_err;
>> + ret = pnfs_write_begin(file, page, pos, len, flags, fsdata);
>> + if (ret)
>> + goto out_err;
>> + return 0;
>> +
>> + out_err:
>> + unlock_page(page);
>> + page_cache_release(page);
>> + *pagep = NULL;
>> return ret;
>> }
>> @@ -370,6 +377,7 @@ static int nfs_write_end(struct file *file, struct
>> address_space *mapping,
>> unlock_page(page);
>> page_cache_release(page);
>> + pnfs_write_end_cleanup(fsdata);
>> if (status < 0)
>> return status;
>> diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
>> index ee2f10a..7be8bbe 100644
>> --- a/fs/nfs/pnfs.c
>> +++ b/fs/nfs/pnfs.c
>> @@ -807,6 +807,28 @@ pnfs_has_layout(struct pnfs_layout_type *lo,
>> return ret;
>> }
>> +static struct pnfs_layout_segment *
>> +pnfs_find_get_lseg(struct inode *inode,
>> + loff_t pos,
>> + size_t count,
>> + enum pnfs_iomode iomode)
>> +{
>> + struct nfs_inode *nfsi = NFS_I(inode);
>> + struct pnfs_layout_segment *lseg;
>> + struct pnfs_layout_type *lo;
>> + struct nfs4_pnfs_layout_segment range;
>> +
>> + lo = get_lock_current_layout(nfsi);
>> + if (!lo)
>> + return NULL;
>> + range.iomode = iomode;
>> + range.offset = pos;
>> + range.length = count;
>> + lseg = pnfs_has_layout(lo, &range, 1);
>> + put_unlock_current_layout(nfsi, lo);
>> + return lseg;
>> +}
>> +
>> /* Update the file's layout for the given range and iomode.
>> * Layout is retreived from the server if needed.
>> * If lsegpp is given, the appropriate layout segment is referenced and
>> @@ -1544,6 +1566,53 @@ int _pnfs_try_to_read_data(struct nfs_read_data
>> *data,
>> }
>> }
>> +/*
>> + * This gives the layout driver an opportunity to read in page "around"
>> + * the data to be written. It returns 0 on success, otherwise an error
>> code
>> + * which will either be passed up to user, or ignored if
>> + * some previous part of write succeeded.
>> + * Note the range [pos, pos+len-1] is entirely within the page.
>> + */
>> +/* flags = AOP_FLAG_UNINTERRUPTIBLE or 0 - ??? need this ??? */
>> +int _pnfs_write_begin(struct inode *inode, struct nfs_server *nfss,
>> + struct page *page, loff_t pos, unsigned len,
>> + unsigned flags, struct pnfs_fsdata **fsdata)
>> +{
>> + struct pnfs_layout_segment *lseg;
>> + int status = 0;
>> +
>> + *fsdata = kzalloc(sizeof(struct pnfs_fsdata), GFP_KERNEL);
>> + if (!*fsdata)
>> + return -ENOMEM;
>> + lseg = pnfs_find_get_lseg(inode, pos, len, IOMODE_RW);
>> + if (lseg) {
>> + status = nfss->pnfs_curr_ld->ld_io_ops->write_begin(lseg,
>> page,
>> + pos, len,
>> + *fsdata);
>>
> If status != E_OK on this line, will fsdata get freed? (is pnfs_free_data
> guaranteed to be called?)
> Dean
Good point. No, this needs to be fixed.
Fred
>> + put_lseg(lseg);
>> + }
>> + return status;
>> +}
>> +
>> +/* Given an nfs request, determine if it should be flushed before
>> proceeding.
>> + * It should default to returning False, returning True only if there is a
>> + * specific reason to flush.
>> + */
>> +int _pnfs_do_flush(struct inode *inode, struct nfs_server *nfss,
>> + struct nfs_page *req, struct pnfs_fsdata *fsdata)
>> +{
>> + struct pnfs_layout_segment *lseg;
>> + loff_t pos = (req->wb_index << PAGE_CACHE_SHIFT) + req->wb_offset;
>> + int status = 0;
>> +
>> + lseg = pnfs_find_get_lseg(inode, pos, req->wb_bytes, IOMODE_RW);
>> + /* Note that lseg==NULL may be useful info for do_flush */
>> + status = nfss->pnfs_curr_ld->ld_policy_ops->do_flush(lseg, req,
>> + fsdata);
>> + put_lseg(lseg);
>> + return status;
>> +}
>> +
>> int _pnfs_try_to_write_data(struct nfs_write_data *data,
>> const struct rpc_call_ops *call_ops, int how)
>> {
>> @@ -1864,6 +1933,11 @@ void pnfs_free_request_data(struct nfs_page *req)
>> lo->free_request_data(req);
>> }
>> +void pnfs_free_fsdata(struct pnfs_fsdata *fsdata)
>> +{
>> + kfree(fsdata);
>> +}
>> +
>> /* Callback operations for layout drivers.
>> */
>> struct pnfs_client_operations pnfs_ops = {
>> diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
>> index 3014f34..0c63fea 100644
>> --- a/fs/nfs/pnfs.h
>> +++ b/fs/nfs/pnfs.h
>> @@ -55,10 +55,16 @@ void pnfs_pageio_init_write(struct
>> nfs_pageio_descriptor *, struct inode *);
>> void pnfs_update_layout_commit(struct inode *, struct list_head *,
>> pgoff_t, unsigned int);
>> int pnfs_flush_one(struct inode *, struct list_head *, unsigned int,
>> size_t, int);
>> void pnfs_free_request_data(struct nfs_page *req);
>> +void pnfs_free_fsdata(struct pnfs_fsdata *fsdata);
>> ssize_t pnfs_file_write(struct file *, const char __user *, size_t, loff_t
>> *);
>> void pnfs_get_layout_done(struct pnfs_layout_type *,
>> struct nfs4_pnfs_layoutget *, int);
>> void pnfs_layout_release(struct pnfs_layout_type *);
>> +int _pnfs_write_begin(struct inode *inode, struct nfs_server *nfss,
>> + struct page *page, loff_t pos, unsigned len,
>> + unsigned flags, struct pnfs_fsdata **fsdata);
>> +int _pnfs_do_flush(struct inode *inode, struct nfs_server *nfss,
>> + struct nfs_page *req, struct pnfs_fsdata *fsdata);
>> #define PNFS_EXISTS_LDIO_OP(srv, opname) ((srv)->pnfs_curr_ld && \
>> (srv)->pnfs_curr_ld->ld_io_ops && \
>> @@ -109,6 +115,37 @@ static inline int pnfs_try_to_commit(struct
>> nfs_write_data *data)
>> return 1;
>> }
>> +static inline int pnfs_write_begin(struct file *filp, struct page *page,
>> + loff_t pos, unsigned len, unsigned flags,
>> + void **fsdata)
>> +{
>> + struct inode *inode = filp->f_dentry->d_inode;
>> + struct nfs_server *nfss = NFS_SERVER(inode);
>> + struct pnfs_fsdata *pnfs_fsdata = NULL;
>> + int status = 0;
>> +
>> + if (PNFS_EXISTS_LDIO_OP(nfss, write_begin))
>> + status = _pnfs_write_begin(inode, nfss, page, pos, len,
>> flags,
>> + &pnfs_fsdata);
>> + *fsdata = pnfs_fsdata; /* fsdata should never be NULL */
>> + return status;
>> +}
>> +
>> +static inline int pnfs_do_flush(struct nfs_page *req, void *fsdata)
>> +{
>> + struct inode *inode = req->wb_context->path.dentry->d_inode;
>> + struct nfs_server *nfss = NFS_SERVER(inode);
>> + if (PNFS_EXISTS_LDPOLICY_OP(nfss, do_flush))
>> + return _pnfs_do_flush(inode, nfss, req, fsdata);
>> + else
>> + return 0;
>> +}
>> +
>> +static inline void pnfs_write_end_cleanup(void *fsdata)
>> +{
>> + pnfs_free_fsdata((struct pnfs_fsdata *) fsdata);
>> +}
>> +
>> #else /* CONFIG_PNFS */
>> static inline int pnfs_try_to_read_data(struct nfs_read_data *data,
>> @@ -129,6 +166,22 @@ static inline int pnfs_try_to_commit(struct
>> nfs_write_data *data)
>> return 1;
>> }
>> +static inline int pnfs_do_flush(struct nfs_page *req, void *fsdata)
>> +{
>> + return 0;
>> +}
>> +
>> +static inline int pnfs_write_begin(struct file *filp, struct page *page,
>> + loff_t pos, unsigned len, unsigned flags,
>> + void **fsdata)
>> +{
>> + return 0;
>> +}
>> +
>> +static inline void pnfs_write_end_cleanup(void *fsdata)
>> +{
>> +}
>> +
>> #endif /* CONFIG_PNFS */
>> #endif /* FS_NFS_PNFS_H */
>> diff --git a/fs/nfs/write.c b/fs/nfs/write.c
>> index 015e035..5616f48 100644
>> --- a/fs/nfs/write.c
>> +++ b/fs/nfs/write.c
>> @@ -699,7 +699,7 @@ int nfs_flush_incompatible(struct file *file, struct
>> page *page)
>> if (req == NULL)
>> return 0;
>> do_flush = req->wb_page != page || req->wb_context != ctx
>> - || !nfs_dirty_request(req);
>> + || !nfs_dirty_request(req) || pnfs_do_flush(req,
>> NULL);
>> nfs_release_request(req);
>> if (!do_flush)
>> return 0;
>> diff --git a/include/linux/nfs4_pnfs.h b/include/linux/nfs4_pnfs.h
>> index 654767a..9ed6819 100644
>> --- a/include/linux/nfs4_pnfs.h
>> +++ b/include/linux/nfs4_pnfs.h
>> @@ -48,6 +48,11 @@ struct pnfs_layout_type {
>> u8 ld_data[]; /* layout driver private data */
>> };
>> +struct pnfs_fsdata {
>> + int ok_to_use_pnfs;
>> +};
>> +
>> +
>> static inline struct inode *
>> PNFS_INODE(struct pnfs_layout_type *lo)
>> {
>> @@ -133,8 +138,11 @@ struct layoutdriver_io_operations {
>> unsigned nr_pages, loff_t offset, size_t
>> count,
>> int sync, struct nfs_write_data *nfs_data);
>> int (*flush_one) (struct pnfs_layout_segment *, struct list_head
>> *head, unsigned int npages, size_t count, int how);
>> + int (*write_begin) (struct pnfs_layout_segment *lseg, struct page
>> *page,
>> + loff_t pos, unsigned count,
>> + struct pnfs_fsdata *fsdata);
>> void (*free_request_data) (struct nfs_page *);
>> -
>> + void (*free_fsdata) (struct pnfs_fsdata *data);
>> /* Consistency ops */
>> /* 2 problems:
>> @@ -173,6 +181,10 @@ struct layoutdriver_policy_operations {
>> /* test for nfs page cache coalescing */
>> int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *,
>> struct nfs_page *);
>> + /* Test for pre-write request flushing */
>> + int (*do_flush)(struct pnfs_layout_segment *lseg, struct nfs_page
>> *req,
>> + struct pnfs_fsdata *fsdata);
>> +
>> /* Retreive the block size of the file system. If
>> gather_across_stripes == 1,
>> * then the file system will gather requests into the block size.
>> * TODO: Where will the layout driver get this info? It is hard
>> coded in PVFS2.
>>
More information about the pNFS
mailing list