Break the nfs_wreq_lock into per-mount locks. This helps prevent a heavy read and write workload on one mount point from interfering with workloads on other mount points. Note that there is still some serialization due to the big kernel lock. Patch against 2.6.7-rc3. fs/nfs/inode.c | 1 fs/nfs/pagelist.c | 16 ++++----- fs/nfs/write.c | 66 +++++++++++++++++++++------------------ include/linux/nfs_fs.h | 3 + include/linux/nfs_page.h | 2 - 5 files changed, 47 insertions(+), 41 deletions(-) Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust diff -X /home/cel/src/linux/dont-diff -Naurp 03-access_cache/fs/nfs/inode.c 04-nfsi-req_lock/fs/nfs/inode.c --- 03-access_cache/fs/nfs/inode.c 2004-06-08 17:32:49.149285000 -0400 +++ 04-nfsi-req_lock/fs/nfs/inode.c 2004-06-08 17:34:43.192398000 -0400 @@ -1738,6 +1738,7 @@ static void init_once(void * foo, kmem_c if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&nfsi->vfs_inode); + nfsi->req_lock = SPIN_LOCK_UNLOCKED; INIT_LIST_HEAD(&nfsi->dirty); INIT_LIST_HEAD(&nfsi->commit); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); diff -X /home/cel/src/linux/dont-diff -Naurp 03-access_cache/fs/nfs/pagelist.c 04-nfsi-req_lock/fs/nfs/pagelist.c --- 03-access_cache/fs/nfs/pagelist.c 2004-05-09 22:33:19.000000000 -0400 +++ 04-nfsi-req_lock/fs/nfs/pagelist.c 2004-06-08 17:34:43.196400000 -0400 @@ -21,11 +21,6 @@ #define NFS_PARANOIA 1 -/* - * Spinlock - */ -spinlock_t nfs_wreq_lock = SPIN_LOCK_UNLOCKED; - static kmem_cache_t *nfs_page_cachep; static inline struct nfs_page * @@ -137,12 +132,14 @@ void nfs_clear_request(struct nfs_page * void nfs_release_request(struct nfs_page *req) { - spin_lock(&nfs_wreq_lock); + struct nfs_inode *nfsi = NFS_I(req->wb_inode); + + spin_lock(&nfsi->req_lock); if (--req->wb_count) { - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); return; } - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); #ifdef NFS_PARANOIA BUG_ON (!list_empty(&req->wb_list)); @@ -254,7 +251,8 @@ nfs_coalesce_requests(struct list_head * * If the number of requests is set to 0, the entire address_space * starting at index idx_start, is scanned. * The requests are *not* checked to ensure that they form a contiguous set. - * You must be holding the nfs_wreq_lock when calling this function + * + * Caller must hold the appropriate inode's req_lock. */ int nfs_scan_list(struct list_head *head, struct list_head *dst, diff -X /home/cel/src/linux/dont-diff -Naurp 03-access_cache/fs/nfs/write.c 04-nfsi-req_lock/fs/nfs/write.c --- 03-access_cache/fs/nfs/write.c 2004-06-08 17:21:53.163629000 -0400 +++ 04-nfsi-req_lock/fs/nfs/write.c 2004-06-08 17:34:43.203399000 -0400 @@ -374,6 +374,8 @@ out: /* * Insert a write request into an inode + * + * The inode's req_lock is held by the caller. */ static inline int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) @@ -395,26 +397,28 @@ nfs_inode_add_request(struct inode *inod } /* - * Insert a write request into an inode + * Remove a write request from an inode + * + * A positive wb_count keeps req->wb_inode good while + * we're in here. */ static void nfs_inode_remove_request(struct nfs_page *req) { - struct nfs_inode *nfsi; - struct inode *inode; + struct inode *inode = req->wb_inode; + struct nfs_inode *nfsi = NFS_I(inode); BUG_ON (!NFS_WBACK_BUSY(req)); - spin_lock(&nfs_wreq_lock); - inode = req->wb_inode; - nfsi = NFS_I(inode); + + spin_lock(&nfsi->req_lock); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; if (!nfsi->npages) { - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); nfs_end_data_update_defer(inode); iput(inode); } else - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); nfs_clear_request(req); nfs_release_request(req); } @@ -439,9 +443,9 @@ nfs_find_request(struct inode *inode, un { struct nfs_page *req; - spin_lock(&nfs_wreq_lock); + spin_lock(&NFS_I(inode)->req_lock); req = _nfs_find_request(inode, index); - spin_unlock(&nfs_wreq_lock); + spin_unlock(&NFS_I(inode)->req_lock); return req; } @@ -454,10 +458,10 @@ nfs_mark_request_dirty(struct nfs_page * struct inode *inode = req->wb_inode; struct nfs_inode *nfsi = NFS_I(inode); - spin_lock(&nfs_wreq_lock); + spin_lock(&nfsi->req_lock); nfs_list_add_request(req, &nfsi->dirty); nfsi->ndirty++; - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); inc_page_state(nr_dirty); mark_inode_dirty(inode); } @@ -482,10 +486,10 @@ nfs_mark_request_commit(struct nfs_page struct inode *inode = req->wb_inode; struct nfs_inode *nfsi = NFS_I(inode); - spin_lock(&nfs_wreq_lock); + spin_lock(&nfsi->req_lock); nfs_list_add_request(req, &nfsi->commit); nfsi->ncommit++; - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); inc_page_state(nr_unstable); mark_inode_dirty(inode); } @@ -510,7 +514,7 @@ nfs_wait_on_requests(struct inode *inode else idx_end = idx_start + npages - 1; - spin_lock(&nfs_wreq_lock); + spin_lock(&nfsi->req_lock); next = idx_start; while (radix_tree_gang_lookup(&nfsi->nfs_page_tree, (void **)&req, next, 1)) { if (req->wb_index > idx_end) @@ -521,15 +525,15 @@ nfs_wait_on_requests(struct inode *inode continue; req->wb_count++; - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); if (error < 0) return error; - spin_lock(&nfs_wreq_lock); + spin_lock(&nfsi->req_lock); res++; } - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); return res; } @@ -624,7 +628,8 @@ static struct nfs_page * nfs_update_request(struct file* file, struct inode *inode, struct page *page, unsigned int offset, unsigned int bytes) { - struct nfs_server *server = NFS_SERVER(inode); + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_server *server = NFS_SERVER(inode); struct nfs_page *req, *new = NULL; unsigned long rqend, end; @@ -636,19 +641,19 @@ nfs_update_request(struct file* file, st /* Loop over all inode entries and see if we find * A request for the page we wish to update */ - spin_lock(&nfs_wreq_lock); + spin_lock(&nfsi->req_lock); req = _nfs_find_request(inode, page->index); if (req) { if (!nfs_lock_request_dontget(req)) { int error; - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); error = nfs_wait_on_request(req); nfs_release_request(req); if (error < 0) return ERR_PTR(error); continue; } - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); if (new) nfs_release_request(new); break; @@ -659,15 +664,15 @@ nfs_update_request(struct file* file, st nfs_lock_request_dontget(new); error = nfs_inode_add_request(inode, new); if (error) { - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); nfs_unlock_request(new); return ERR_PTR(error); } - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); nfs_mark_request_dirty(new); return new; } - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); new = nfs_create_request(file, inode, page, offset, bytes); if (IS_ERR(new)) @@ -1352,9 +1357,9 @@ int nfs_flush_inode(struct inode *inode, int res, error = 0; - spin_lock(&nfs_wreq_lock); + spin_lock(&NFS_I(inode)->req_lock); res = nfs_scan_dirty(inode, &head, idx_start, npages); - spin_unlock(&nfs_wreq_lock); + spin_unlock(&NFS_I(inode)->req_lock); if (res) error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how); if (error < 0) @@ -1366,18 +1371,19 @@ int nfs_flush_inode(struct inode *inode, int nfs_commit_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how) { + struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); int res, error = 0; - spin_lock(&nfs_wreq_lock); + spin_lock(&nfsi->req_lock); res = nfs_scan_commit(inode, &head, idx_start, npages); if (res) { res += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); error = nfs_commit_list(&head, how); } else - spin_unlock(&nfs_wreq_lock); + spin_unlock(&nfsi->req_lock); if (error < 0) return error; return res; diff -X /home/cel/src/linux/dont-diff -Naurp 03-access_cache/include/linux/nfs_fs.h 04-nfsi-req_lock/include/linux/nfs_fs.h --- 03-access_cache/include/linux/nfs_fs.h 2004-06-08 17:21:55.457631000 -0400 +++ 04-nfsi-req_lock/include/linux/nfs_fs.h 2004-06-08 17:34:43.208399000 -0400 @@ -30,6 +30,8 @@ #include #include +#include + /* * Enable debugging support for nfs client. * Requires RPC_DEBUG. @@ -148,6 +150,7 @@ struct nfs_inode { /* * This is the list of dirty unwritten pages. */ + spinlock_t req_lock; struct list_head dirty; struct list_head commit; struct radix_tree_root nfs_page_tree; diff -X /home/cel/src/linux/dont-diff -Naurp 03-access_cache/include/linux/nfs_page.h 04-nfsi-req_lock/include/linux/nfs_page.h --- 03-access_cache/include/linux/nfs_page.h 2004-05-09 22:32:00.000000000 -0400 +++ 04-nfsi-req_lock/include/linux/nfs_page.h 2004-06-08 17:34:43.212399000 -0400 @@ -65,8 +65,6 @@ extern int nfs_coalesce_requests(struct unsigned int); extern int nfs_wait_on_request(struct nfs_page *); -extern spinlock_t nfs_wreq_lock; - /* * Lock the page of an asynchronous request without incrementing the wb_count */