NFS: Replace nfs_page insertion sort with a radix sort Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 - fs/nfs/pagelist.c | 86 ++++++++++++++++++++++++++++++----------------- fs/nfs/write.c | 71 +++++++++++++++++--------------------- include/linux/nfs_fs.h | 4 +- include/linux/nfs_page.h | 18 ++++++++- 5 files changed, 107 insertions(+), 74 deletions(-) Index: linux-2.6.12-rc1/fs/nfs/pagelist.c =================================================================== --- linux-2.6.12-rc1.orig/fs/nfs/pagelist.c +++ linux-2.6.12-rc1/fs/nfs/pagelist.c @@ -177,36 +177,6 @@ nfs_release_request(struct nfs_page *req nfs_page_free(req); } -/** - * nfs_list_add_request - Insert a request into a sorted list - * @req: request - * @head: head of list into which to insert the request. - * - * Note that the wb_list is sorted by page index in order to facilitate - * coalescing of requests. - * We use an insertion sort that is optimized for the case of appended - * writes. - */ -void -nfs_list_add_request(struct nfs_page *req, struct list_head *head) -{ - struct list_head *pos; - -#ifdef NFS_PARANOIA - if (!list_empty(&req->wb_list)) { - printk(KERN_ERR "NFS: Add to list failed!\n"); - BUG(); - } -#endif - list_for_each_prev(pos, head) { - struct nfs_page *p = nfs_list_entry(pos); - if (p->wb_index < req->wb_index) - break; - } - list_add(&req->wb_list, pos); - req->wb_list_head = head; -} - static int nfs_wait_bit_interruptible(void *word) { int ret = 0; @@ -291,6 +261,62 @@ nfs_coalesce_requests(struct list_head * return npages; } +#define NFS_SCAN_MAXENTRIES 16 +/** + * nfs_scan_lock_dirty - Scan the radix tree for dirty requests + * @nfsi: NFS inode + * @dst: Destination list + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. + * + * Moves elements from one of the inode request lists. + * If the number of requests is set to 0, the entire address_space + * starting at index idx_start, is scanned. + * The requests are *not* checked to ensure that they form a contiguous set. + * You must be holding the inode's req_lock when calling this function + */ +int +nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, + unsigned long idx_start, unsigned int npages) +{ + struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES]; + struct nfs_page *req; + unsigned long idx_end; + int found, i; + int res; + + res = 0; + if (npages == 0) + idx_end = ~0; + else + idx_end = idx_start + npages - 1; + + for (;;) { + found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, + (void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES, + NFS_PAGE_TAG_DIRTY); + if (found <= 0) + break; + for (i = 0; i < found; i++) { + req = pgvec[i]; + if (req->wb_index > idx_end) + goto out; + + idx_start = req->wb_index + 1; + + if (nfs_set_page_writeback_locked(req)) { + radix_tree_tag_clear(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); + nfs_list_remove_request(req); + nfs_list_add_request(req, dst); + res++; + } + } + } +out: + return res; +} + /** * nfs_scan_list - Scan a list for matching requests * @head: One of the NFS inode request lists Index: linux-2.6.12-rc1/fs/nfs/write.c =================================================================== --- linux-2.6.12-rc1.orig/fs/nfs/write.c +++ linux-2.6.12-rc1/fs/nfs/write.c @@ -352,7 +352,7 @@ int nfs_writepages(struct address_space if (err < 0) goto out; } - err = nfs_commit_inode(inode, 0, 0, wb_priority(wbc)); + err = nfs_commit_inode(inode, wb_priority(wbc)); if (err > 0) { wbc->nr_to_write -= err; err = 0; @@ -446,6 +446,8 @@ nfs_mark_request_dirty(struct nfs_page * struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfsi->req_lock); + radix_tree_tag_set(&nfsi->nfs_page_tree, + req->wb_index, NFS_PAGE_TAG_DIRTY); nfs_list_add_request(req, &nfsi->dirty); nfsi->ndirty++; spin_unlock(&nfsi->req_lock); @@ -537,12 +539,15 @@ static int nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages); - nfsi->ndirty -= res; - sub_page_state(nr_dirty,res); - if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + int res = 0; + + if (nfsi->ndirty != 0) { + res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages); + nfsi->ndirty -= res; + sub_page_state(nr_dirty,res); + if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n"); + } return res; } @@ -561,11 +566,14 @@ static int nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) { struct nfs_inode *nfsi = NFS_I(inode); - int res; - res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); - nfsi->ncommit -= res; - if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) - printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + int res = 0; + + if (nfsi->ncommit != 0) { + res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); + nfsi->ncommit -= res; + if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); + } return res; } #endif @@ -1209,36 +1217,24 @@ static void nfs_commit_rpcsetup(struct l struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; - struct nfs_page *first, *last; + struct nfs_page *first; struct inode *inode; - loff_t start, end, len; /* Set up the RPC argument and reply structs * NB: take care not to mess about with data->commit et al. */ list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); - last = nfs_list_entry(data->pages.prev); inode = first->wb_context->dentry->d_inode; - /* - * Determine the offset range of requests in the COMMIT call. - * We rely on the fact that data->pages is an ordered list... - */ - start = req_offset(first); - end = req_offset(last) + last->wb_bytes; - len = end - start; - /* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */ - if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1)) - len = 0; - data->inode = inode; data->cred = first->wb_context->cred; data->args.fh = NFS_FH(data->inode); - data->args.offset = start; - data->args.count = len; - data->res.count = len; + /* Note: we always request a commit of the entire inode */ + data->args.offset = 0; + data->args.count = 0; + data->res.count = 0; data->res.fattr = &data->fattr; data->res.verf = &data->verf; @@ -1349,8 +1345,7 @@ static int nfs_flush_inode(struct inode } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -int nfs_commit_inode(struct inode *inode, unsigned long idx_start, - unsigned int npages, int how) +int nfs_commit_inode(struct inode *inode, int how) { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); @@ -1358,15 +1353,13 @@ int nfs_commit_inode(struct inode *inode error = 0; spin_lock(&nfsi->req_lock); - res = nfs_scan_commit(inode, &head, idx_start, npages); + res = nfs_scan_commit(inode, &head, 0, 0); + spin_unlock(&nfsi->req_lock); if (res) { - res += nfs_scan_commit(inode, &head, 0, 0); - spin_unlock(&nfsi->req_lock); error = nfs_commit_list(&head, how); - } else - spin_unlock(&nfsi->req_lock); - if (error < 0) - return error; + if (error < 0) + return error; + } return res; } #endif @@ -1388,7 +1381,7 @@ int nfs_sync_inode(struct inode *inode, error = nfs_flush_inode(inode, idx_start, npages, how); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (error == 0) - error = nfs_commit_inode(inode, idx_start, npages, how); + error = nfs_commit_inode(inode, how); #endif } while (error > 0); return error; Index: linux-2.6.12-rc1/include/linux/nfs_page.h =================================================================== --- linux-2.6.12-rc1.orig/include/linux/nfs_page.h +++ linux-2.6.12-rc1/include/linux/nfs_page.h @@ -22,6 +22,7 @@ /* * Valid flags for the radix tree */ +#define NFS_PAGE_TAG_DIRTY 0 #define NFS_PAGE_TAG_WRITEBACK 1 /* @@ -31,6 +32,7 @@ #define PG_NEED_COMMIT 1 #define PG_NEED_RESCHED 2 +struct nfs_inode; struct nfs_page { struct list_head wb_list, /* Defines state of page: */ *wb_list_head; /* read/write/commit */ @@ -59,8 +61,8 @@ extern void nfs_clear_request(struct nfs extern void nfs_release_request(struct nfs_page *req); -extern void nfs_list_add_request(struct nfs_page *, struct list_head *); - +extern int nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst, + unsigned long idx_start, unsigned int npages); extern int nfs_scan_list(struct list_head *, struct list_head *, unsigned long, unsigned int); extern int nfs_coalesce_requests(struct list_head *, struct list_head *, @@ -94,6 +96,18 @@ nfs_lock_request(struct nfs_page *req) return 1; } +/** + * nfs_list_add_request - Insert a request into a list + * @req: request + * @head: head of list into which to insert the request. + */ +static inline void +nfs_list_add_request(struct nfs_page *req, struct list_head *head) +{ + list_add_tail(&req->wb_list, head); + req->wb_list_head = head; +} + /** * nfs_list_remove_request - Remove a request from its wb_list Index: linux-2.6.12-rc1/include/linux/nfs_fs.h =================================================================== --- linux-2.6.12-rc1.orig/include/linux/nfs_fs.h +++ linux-2.6.12-rc1/include/linux/nfs_fs.h @@ -394,10 +394,10 @@ extern void nfs_commit_done(struct rpc_t */ extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -extern int nfs_commit_inode(struct inode *, unsigned long, unsigned int, int); +extern int nfs_commit_inode(struct inode *, int); #else static inline int -nfs_commit_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how) +nfs_commit_inode(struct inode *inode, int how) { return 0; } Index: linux-2.6.12-rc1/fs/nfs/inode.c =================================================================== --- linux-2.6.12-rc1.orig/fs/nfs/inode.c +++ linux-2.6.12-rc1/fs/nfs/inode.c @@ -135,7 +135,7 @@ nfs_write_inode(struct inode *inode, int int flags = sync ? FLUSH_WAIT : 0; int ret; - ret = nfs_commit_inode(inode, 0, 0, flags); + ret = nfs_commit_inode(inode, flags); if (ret < 0) return ret; return 0;