From: Trond Myklebust Date: 1163538743 -0500 NFS: Fix asynchronous read error handling We must always call ->read_done() before we truncate the page data, or decide to flag an error. The reasons are that in NFSv2, ->read_done() is where the eof flag gets set. in NFSv3/v4 ->read_done() handles EJUKEBOX-type errors, and v4 state recovery. However, we need to mark the pages as uptodate before we deal with short read errors, since we may need to modify the nfs_read_data arguments. We therefore split the current nfs_readpage_result() into two parts: nfs_readpage_result(), which calls ->read_done() etc, and nfs_readpage_retry(), which subsequently handles short reads. Note: Removing the code that retries in case of a short read also fixes a bug in nfs_direct_read_result(), which used to return a corrupted number of bytes. Signed-off-by: Trond Myklebust --- fs/nfs/read.c | 140 ++++++++++++++++++++++++++++----------------------------- 1 files changed, 69 insertions(+), 71 deletions(-) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 8b58bbf..e879ee6 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -461,6 +461,55 @@ nfs_pagein_list(struct list_head *head, } /* + * This is the callback from RPC telling us whether a reply was + * received or some error occurred (timeout or socket shutdown). + */ +int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) +{ + int status; + + dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid, + task->tk_status); + + status = NFS_PROTO(data->inode)->read_done(task, data); + if (status != 0) + return status; + + nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); + + if (task->tk_status == -ESTALE) { + set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); + nfs_mark_for_revalidate(data->inode); + } + spin_lock(&data->inode->i_lock); + NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; + spin_unlock(&data->inode->i_lock); + return 0; +} + +static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) +{ + struct nfs_readargs *argp = &data->args; + struct nfs_readres *resp = &data->res; + + if (resp->eof || resp->count == argp->count) + return 0; + + /* This is a short read! */ + nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); + /* Has the server at least made some progress? */ + if (resp->count == 0) + return 0; + + /* Yes, so retry the read at the end of the data */ + argp->offset += resp->count; + argp->pgbase += resp->count; + argp->count -= resp->count; + rpc_restart_call(task); + return -EAGAIN; +} + +/* * Handle a read reply that fills part of a page. */ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) @@ -469,12 +518,16 @@ static void nfs_readpage_result_partial( struct nfs_page *req = data->req; struct page *page = req->wb_page; - if (likely(task->tk_status >= 0)) - nfs_readpage_truncate_uninitialised_page(data); - else - SetPageError(page); if (nfs_readpage_result(task, data) != 0) return; + + if (likely(task->tk_status >= 0)) { + nfs_readpage_truncate_uninitialised_page(data); + if (nfs_readpage_retry(task, data) != 0) + return; + } + if (unlikely(task->tk_status < 0)) + SetPageError(page); if (atomic_dec_and_test(&req->wb_complete)) { if (!PageError(page)) SetPageUptodate(page); @@ -502,25 +555,13 @@ static void nfs_readpage_set_pages_uptod count += base; for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) SetPageUptodate(*pages); - if (count != 0) + if (count == 0) + return; + /* Was this a short read? */ + if (data->res.eof || data->res.count == data->args.count) SetPageUptodate(*pages); } -static void nfs_readpage_set_pages_error(struct nfs_read_data *data) -{ - unsigned int count = data->args.count; - unsigned int base = data->args.pgbase; - struct page **pages; - - pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; - base &= ~PAGE_CACHE_MASK; - count += base; - for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) - SetPageError(*pages); - if (count != 0) - SetPageError(*pages); -} - /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). @@ -529,19 +570,20 @@ static void nfs_readpage_result_full(str { struct nfs_read_data *data = calldata; + if (nfs_readpage_result(task, data) != 0) + return; /* - * Note: nfs_readpage_result may change the values of + * Note: nfs_readpage_retry may change the values of * data->args. In the multi-page case, we therefore need - * to ensure that we call the next nfs_readpage_set_page_uptodate() - * first in the multi-page case. + * to ensure that we call nfs_readpage_set_pages_uptodate() + * first. */ if (likely(task->tk_status >= 0)) { nfs_readpage_truncate_uninitialised_page(data); nfs_readpage_set_pages_uptodate(data); - } else - nfs_readpage_set_pages_error(data); - if (nfs_readpage_result(task, data) != 0) - return; + if (nfs_readpage_retry(task, data) != 0) + return; + } while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); @@ -556,50 +598,6 @@ static const struct rpc_call_ops nfs_rea }; /* - * This is the callback from RPC telling us whether a reply was - * received or some error occurred (timeout or socket shutdown). - */ -int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) -{ - struct nfs_readargs *argp = &data->args; - struct nfs_readres *resp = &data->res; - int status; - - dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", - task->tk_pid, task->tk_status); - - status = NFS_PROTO(data->inode)->read_done(task, data); - if (status != 0) - return status; - - nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count); - - if (task->tk_status < 0) { - if (task->tk_status == -ESTALE) { - set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); - nfs_mark_for_revalidate(data->inode); - } - } else if (resp->count < argp->count && !resp->eof) { - /* This is a short read! */ - nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); - /* Has the server at least made some progress? */ - if (resp->count != 0) { - /* Yes, so retry the read at the end of the data */ - argp->offset += resp->count; - argp->pgbase += resp->count; - argp->count -= resp->count; - rpc_restart_call(task); - return -EAGAIN; - } - task->tk_status = -EIO; - } - spin_lock(&data->inode->i_lock); - NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; - spin_unlock(&data->inode->i_lock); - return 0; -} - -/* * Read a page over NFS. * We read the page synchronously in the following case: * - The error flag is set for this page. This happens only when a