Subject: [PATCH] NFS: Direct write path allocates nfs_write_data on the stack Reduce stack utilization in the NFS direct write path by using a dynamically allocated nfs_write_data structure instead of allocating one on the stack. This reduces stack utilization of nfs_direct_write_seg from over 900 bytes to less than 100 bytes. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 75 ++++++++++++++++++++--------------------- fs/nfs/write.c | 39 +-------------------- include/linux/nfs_fs.h | 39 +++++++++++++++++++++ 3 files changed, 79 insertions(+), 74 deletions(-) diff -X /home/cel/src/linux/dont-diff -Naurp 11-nfs-short-direct-write/fs/nfs/direct.c 12-nfs-direct-write-alloc/fs/nfs/direct.c --- 11-nfs-short-direct-write/fs/nfs/direct.c 2004-10-21 10:40:55.690101000 -0400 +++ 12-nfs-direct-write-alloc/fs/nfs/direct.c 2004-10-21 10:41:53.934023000 -0400 @@ -255,22 +255,21 @@ nfs_direct_write_seg(struct inode *inode size_t request; int curpage, need_commit, result, tot_bytes; struct nfs_writeverf first_verf; - struct nfs_write_data wdata = { - .inode = inode, - .cred = ctx->cred, - .args = { - .fh = NFS_FH(inode), - .context = ctx, - }, - .res = { - .fattr = &wdata.fattr, - .verf = &wdata.verf, - }, - }; + struct nfs_write_data *wdata; - wdata.args.stable = NFS_UNSTABLE; + wdata = nfs_writedata_alloc(); + if (!wdata) + return -ENOMEM; + + wdata->inode = inode; + wdata->cred = ctx->cred; + wdata->args.fh = NFS_FH(inode); + wdata->args.context = ctx; + wdata->args.stable = NFS_UNSTABLE; if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) - wdata.args.stable = NFS_FILE_SYNC; + wdata->args.stable = NFS_FILE_SYNC; + wdata->res.fattr = &wdata->fattr; + wdata->res.verf = &wdata->verf; nfs_begin_data_update(inode); retry: @@ -278,20 +277,20 @@ retry: tot_bytes = 0; curpage = 0; request = count; - wdata.args.pgbase = user_addr & ~PAGE_MASK; - wdata.args.offset = file_offset; - do { - wdata.args.count = request; - if (wdata.args.count > wsize) - wdata.args.count = wsize; - wdata.args.pages = &pages[curpage]; + wdata->args.pgbase = user_addr & ~PAGE_MASK; + wdata->args.offset = file_offset; + do { + wdata->args.count = request; + if (wdata->args.count > wsize) + wdata->args.count = wsize; + wdata->args.pages = &pages[curpage]; dprintk("NFS: direct write: c=%u o=%Ld ua=%lu, pb=%u, cp=%u\n", - wdata.args.count, (long long) wdata.args.offset, - user_addr + tot_bytes, wdata.args.pgbase, curpage); + wdata->args.count, (long long) wdata->args.offset, + user_addr + tot_bytes, wdata->args.pgbase, curpage); lock_kernel(); - result = NFS_PROTO(inode)->write(&wdata); + result = NFS_PROTO(inode)->write(wdata); unlock_kernel(); if (result <= 0) { @@ -301,11 +300,11 @@ retry: } if (tot_bytes == 0) - memcpy(&first_verf.verifier, &wdata.verf.verifier, + memcpy(&first_verf.verifier, &wdata->verf.verifier, sizeof(first_verf.verifier)); - if (wdata.verf.committed != NFS_FILE_SYNC) { + if (wdata->verf.committed != NFS_FILE_SYNC) { need_commit = 1; - if (memcmp(&first_verf.verifier, &wdata.verf.verifier, + if (memcmp(&first_verf.verifier, &wdata->verf.verifier, sizeof(first_verf.verifier))); goto sync_retry; } @@ -313,13 +312,13 @@ retry: tot_bytes += result; /* in case of a short write: stop now, let the app recover */ - if (result < wdata.args.count) + if (result < wdata->args.count) break; - wdata.args.offset += result; - wdata.args.pgbase += result; - curpage += wdata.args.pgbase >> PAGE_SHIFT; - wdata.args.pgbase &= ~PAGE_MASK; + wdata->args.offset += result; + wdata->args.pgbase += result; + curpage += wdata->args.pgbase >> PAGE_SHIFT; + wdata->args.pgbase &= ~PAGE_MASK; request -= result; } while (request != 0); @@ -327,15 +326,15 @@ retry: * Commit data written so far, even in the event of an error */ if (need_commit) { - wdata.args.count = tot_bytes; - wdata.args.offset = file_offset; + wdata->args.count = tot_bytes; + wdata->args.offset = file_offset; lock_kernel(); - result = NFS_PROTO(inode)->commit(&wdata); + result = NFS_PROTO(inode)->commit(wdata); unlock_kernel(); if (result < 0 || memcmp(&first_verf.verifier, - &wdata.verf.verifier, + &wdata->verf.verifier, sizeof(first_verf.verifier)) != 0) goto sync_retry; } @@ -343,11 +342,11 @@ retry: out: nfs_end_data_update_defer(inode); - + nfs_writedata_free(wdata); return result; sync_retry: - wdata.args.stable = NFS_FILE_SYNC; + wdata->args.stable = NFS_FILE_SYNC; goto retry; } diff -X /home/cel/src/linux/dont-diff -Naurp 11-nfs-short-direct-write/fs/nfs/write.c 12-nfs-direct-write-alloc/fs/nfs/write.c --- 11-nfs-short-direct-write/fs/nfs/write.c 2004-10-21 10:38:14.244959000 -0400 +++ 12-nfs-direct-write-alloc/fs/nfs/write.c 2004-10-21 10:41:53.939014000 -0400 @@ -61,7 +61,6 @@ #include #include #include -#include #include "delegation.h" @@ -83,49 +82,17 @@ static int nfs_wait_on_write_congestion( static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); static kmem_cache_t *nfs_wdata_cachep; -static mempool_t *nfs_wdata_mempool; -static mempool_t *nfs_commit_mempool; +mempool_t *nfs_wdata_mempool; +mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); -static __inline__ struct nfs_write_data *nfs_writedata_alloc(void) -{ - struct nfs_write_data *p; - p = (struct nfs_write_data *)mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - } - return p; -} - -static __inline__ void nfs_writedata_free(struct nfs_write_data *p) -{ - mempool_free(p, nfs_wdata_mempool); -} - -static void nfs_writedata_release(struct rpc_task *task) +void nfs_writedata_release(struct rpc_task *task) { struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; nfs_writedata_free(wdata); } -static __inline__ struct nfs_write_data *nfs_commit_alloc(void) -{ - struct nfs_write_data *p; - p = (struct nfs_write_data *)mempool_alloc(nfs_commit_mempool, SLAB_NOFS); - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - } - return p; -} - -static __inline__ void nfs_commit_free(struct nfs_write_data *p) -{ - mempool_free(p, nfs_commit_mempool); -} - /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { diff -X /home/cel/src/linux/dont-diff -Naurp 11-nfs-short-direct-write/include/linux/nfs_fs.h 12-nfs-direct-write-alloc/include/linux/nfs_fs.h --- 11-nfs-short-direct-write/include/linux/nfs_fs.h 2004-10-21 10:39:00.965285000 -0400 +++ 12-nfs-direct-write-alloc/include/linux/nfs_fs.h 2004-10-21 10:41:53.942011000 -0400 @@ -30,6 +30,7 @@ #include #include #include +#include /* * Enable debugging support for nfs client. @@ -421,6 +422,44 @@ static inline int nfs_wb_page(struct ino return nfs_wb_page_priority(inode, page, 0); } +/* + * Allocate and free nfs_write_data structures + */ +extern mempool_t *nfs_wdata_mempool; +extern mempool_t *nfs_commit_mempool; + +static inline struct nfs_write_data *nfs_writedata_alloc(void) +{ + struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + } + return p; +} + +static inline void nfs_writedata_free(struct nfs_write_data *p) +{ + mempool_free(p, nfs_wdata_mempool); +} + +extern void nfs_writedata_release(struct rpc_task *task); + +static inline struct nfs_write_data *nfs_commit_alloc(void) +{ + struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + } + return p; +} + +static inline void nfs_commit_free(struct nfs_write_data *p) +{ + mempool_free(p, nfs_commit_mempool); +} + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0)