All of the above --- fs/locks.c | 2 fs/nfs/Makefile | 1 fs/nfs/delegation.c | 2 fs/nfs/dir.c | 43 +++- fs/nfs/direct.c | 4 fs/nfs/file.c | 107 +++++++-- fs/nfs/inode.c | 12 + fs/nfs/namespace.c | 2 fs/nfs/nfs4proc.c | 62 ++--- fs/nfs/nfs4state.c | 2 fs/nfs/nfs4xdr.c | 27 ++ fs/nfs/read.c | 6 - fs/nfs/super.c | 24 +- fs/nfs/write.c | 233 ++++++++++---------- include/linux/jiffies.h | 4 include/linux/nfs_fs.h | 25 +- include/linux/nfs_page.h | 1 include/linux/nfs_xdr.h | 3 include/linux/sunrpc/xprt.h | 3 include/linux/writeback.h | 2 kernel/auditsc.c | 1 net/sunrpc/rpc_pipe.c | 7 - net/sunrpc/rpcb_clnt.c | 80 +++++-- net/sunrpc/xprtsock.c | 498 ++++++++++++++++++++++++++++++++++--------- 24 files changed, 783 insertions(+), 368 deletions(-) diff --git a/fs/locks.c b/fs/locks.c index 50857d2..9864bf3 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -819,7 +819,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str lock_kernel(); if (request->fl_type != F_UNLCK) { for_each_lock(inode, before) { - struct file_lock *fl = *before; + fl = *before; if (!IS_POSIX(fl)) continue; if (!posix_locks_conflict(request, fl)) diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index b55cb23..df0f41e 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -16,4 +16,3 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ nfs4namespace.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-$(CONFIG_SYSCTL) += sysctl.o -nfs-objs := $(nfs-y) diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index c55a761..7a1b6e8 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -52,7 +52,7 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; - if ((struct nfs_open_context *)fl->fl_file->private_data != ctx) + if (nfs_file_open_context(fl->fl_file) != ctx) continue; status = nfs4_lock_delegation_recall(state, fl); if (status >= 0) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ea97408..93445a7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -407,7 +407,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, struct file *file = desc->file; struct nfs_entry *entry = desc->entry; struct dentry *dentry = NULL; - unsigned long fileid; + u64 fileid; int loop_count = 0, res; @@ -418,7 +418,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, unsigned d_type = DT_UNKNOWN; /* Note: entry->prev_cookie contains the cookie for * retrieving the current dirent on the server */ - fileid = nfs_fileid_to_ino_t(entry->ino); + fileid = entry->ino; /* Get a dentry if we have one */ if (dentry != NULL) @@ -428,7 +428,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, /* Use readdirplus info */ if (dentry != NULL && dentry->d_inode != NULL) { d_type = dt_type(dentry->d_inode); - fileid = dentry->d_inode->i_ino; + fileid = NFS_FILEID(dentry->d_inode); } res = filldir(dirent, entry->name, entry->len, @@ -558,7 +558,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; + desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); @@ -623,7 +623,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) } if (offset != filp->f_pos) { filp->f_pos = offset; - ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; + nfs_file_open_context(filp)->dir_cookie = 0; } out: mutex_unlock(&filp->f_path.dentry->d_inode->i_mutex); @@ -1348,9 +1348,9 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) { static unsigned int sillycounter; - const int i_inosize = sizeof(dir->i_ino)*2; + const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; const int countersize = sizeof(sillycounter)*2; - const int slen = sizeof(".nfs") + i_inosize + countersize - 1; + const int slen = sizeof(".nfs")+fileidsize+countersize-1; char silly[slen+1]; struct qstr qsilly; struct dentry *sdentry; @@ -1368,8 +1368,9 @@ static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) if (dentry->d_flags & DCACHE_NFSFS_RENAMED) goto out; - sprintf(silly, ".nfs%*.*lx", - i_inosize, i_inosize, dentry->d_inode->i_ino); + sprintf(silly, ".nfs%*.*Lx", + fileidsize, fileidsize, + (unsigned long long)NFS_FILEID(dentry->d_inode)); /* Return delegation in anticipation of the rename */ nfs_inode_return_delegation(dentry->d_inode); @@ -1840,7 +1841,7 @@ static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, st return NULL; } -int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +static int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs_access_entry *cache; @@ -1852,7 +1853,7 @@ int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs cache = nfs_access_search_rbtree(inode, cred); if (cache == NULL) goto out; - if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) + if (!time_in_range(jiffies, cache->jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) goto out_stale; res->jiffies = cache->jiffies; res->cred = cache->cred; @@ -1907,7 +1908,7 @@ found: nfs_access_free_entry(entry); } -void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) { struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); if (cache == NULL) @@ -1955,6 +1956,24 @@ out: return -EACCES; } +static int nfs_open_permission_mask(int openflags) +{ + int mask = 0; + + if (openflags & FMODE_READ) + mask |= MAY_READ; + if (openflags & FMODE_WRITE) + mask |= MAY_WRITE; + if (openflags & FMODE_EXEC) + mask |= MAY_EXEC; + return mask; +} + +int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags) +{ + return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags)); +} + int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) { struct rpc_cred *cred; diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index fcf4d38..28c8e1b 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -368,7 +368,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, unsigned long user_addr, size return -ENOMEM; dreq->inode = inode; - dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; @@ -718,7 +718,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz sync = FLUSH_STABLE; dreq->inode = inode; - dreq->ctx = get_nfs_open_context((struct nfs_open_context *)iocb->ki_filp->private_data); + dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c87dc71..c664bb9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -33,6 +33,7 @@ #include #include "delegation.h" +#include "internal.h" #include "iostat.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -55,6 +56,8 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); +static struct vm_operations_struct nfs_file_vm_ops; + const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, .read = do_sync_read, @@ -174,13 +177,38 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) } /* + * Helper for nfs_file_flush() and nfs_fsync() + * + * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to + * disk, but it retrieves and clears ctx->error after synching, despite + * the two being set at the same time in nfs_context_set_write_error(). + * This is because the former is used to notify the _next_ call to + * nfs_file_write() that a write error occured, and hence cause it to + * fall back to doing a synchronous write. + */ +static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) +{ + int have_error, status; + int ret = 0; + + have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); + status = nfs_wb_all(inode); + have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); + if (have_error) + ret = xchg(&ctx->error, 0); + if (!ret) + ret = status; + return ret; +} + +/* * Flush all dirty pages, and check for write errors. * */ static int nfs_file_flush(struct file *file, fl_owner_t id) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = file->f_path.dentry->d_inode; int status; @@ -189,16 +217,11 @@ nfs_file_flush(struct file *file, fl_owner_t id) if ((file->f_mode & FMODE_WRITE) == 0) return 0; nfs_inc_stats(inode, NFSIOS_VFSFLUSH); - lock_kernel(); + /* Ensure that data+attribute caches are up to date after close() */ - status = nfs_wb_all(inode); - if (!status) { - status = ctx->error; - ctx->error = 0; - if (!status) - nfs_revalidate_inode(NFS_SERVER(inode), inode); - } - unlock_kernel(); + status = nfs_do_fsync(ctx, inode); + if (!status) + nfs_revalidate_inode(NFS_SERVER(inode), inode); return status; } @@ -257,8 +280,11 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) dentry->d_parent->d_name.name, dentry->d_name.name); status = nfs_revalidate_mapping(inode, file->f_mapping); - if (!status) - status = generic_file_mmap(file, vma); + if (!status) { + vma->vm_ops = &nfs_file_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; + file_accessed(file); + } return status; } @@ -270,21 +296,13 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma) static int nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = dentry->d_inode; - int status; dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); nfs_inc_stats(inode, NFSIOS_VFSFSYNC); - lock_kernel(); - status = nfs_wb_all(inode); - if (!status) { - status = ctx->error; - ctx->error = 0; - } - unlock_kernel(); - return status; + return nfs_do_fsync(ctx, inode); } /* @@ -316,7 +334,7 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) if (offset != 0) return; /* Cancel any unstarted writes on this page */ - nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE); + nfs_wb_page_cancel(page->mapping->host, page); } static int nfs_release_page(struct page *page, gfp_t gfp) @@ -333,7 +351,7 @@ static int nfs_launder_page(struct page *page) const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, .readpages = nfs_readpages, - .set_page_dirty = nfs_set_page_dirty, + .set_page_dirty = __set_page_dirty_nobuffers, .writepage = nfs_writepage, .writepages = nfs_writepages, .prepare_write = nfs_prepare_write, @@ -346,6 +364,43 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, }; +static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) +{ + struct file *filp = vma->vm_file; + unsigned pagelen; + int ret = -EINVAL; + + lock_page(page); + if (page->mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) + goto out_unlock; + pagelen = nfs_page_length(page); + if (pagelen == 0) + goto out_unlock; + ret = nfs_prepare_write(filp, page, 0, pagelen); + if (!ret) + ret = nfs_commit_write(filp, page, 0, pagelen); +out_unlock: + unlock_page(page); + return ret; +} + +static struct vm_operations_struct nfs_file_vm_ops = { + .fault = filemap_fault, + .page_mkwrite = nfs_vm_page_mkwrite, +}; + +static int nfs_need_sync_write(struct file *filp, struct inode *inode) +{ + struct nfs_open_context *ctx; + + if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) + return 1; + ctx = nfs_file_open_context(filp); + if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) + return 1; + return 0; +} + static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos) { @@ -382,8 +437,8 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); result = generic_file_aio_write(iocb, iov, nr_segs, pos); /* Return error values for O_SYNC and IS_SYNC() */ - if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) { - int err = nfs_fsync(iocb->ki_filp, dentry, 1); + if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { + int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); if (err < 0) result = err; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 71a49c3..45633f9 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -431,7 +431,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) /* Flush out writes to the server in order to update c/mtime */ if (S_ISREG(inode->i_mode)) - nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT); + nfs_wb_nocommit(inode); /* * We may force a getattr if the user cares about atime. @@ -450,8 +450,10 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); else err = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (!err) + if (!err) { generic_fillattr(inode, stat); + stat->ino = NFS_FILEID(inode); + } return err; } @@ -536,7 +538,7 @@ struct nfs_open_context *nfs_find_open_context(struct inode *inode, struct rpc_c static void nfs_file_clear_open_context(struct file *filp) { struct inode *inode = filp->f_path.dentry->d_inode; - struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(filp); if (ctx) { filp->private_data = NULL; @@ -654,7 +656,7 @@ int nfs_attribute_timeout(struct inode *inode) if (nfs_have_delegation(inode, FMODE_READ)) return 0; - return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); + return !time_in_range(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo); } /** @@ -1053,7 +1055,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; - } else if (time_after(now, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { + } else if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index aea76d0..acfc56f 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -176,7 +176,7 @@ static void nfs_expire_automounts(struct work_struct *work) void nfs_release_automount_timer(void) { if (list_empty(&nfs_automount_list)) - cancel_delayed_work_sync(&nfs_automount_task); + cancel_delayed_work(&nfs_automount_task); } /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 62b3ae2..0e366a3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -62,10 +62,8 @@ struct nfs4_opendata; static int _nfs4_proc_open(struct nfs4_opendata *data); static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *); -static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception); static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp); -static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags); static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); @@ -177,7 +175,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent *p++ = xdr_one; /* bitmap length */ *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ *p++ = htonl(8); /* attribute buffer length */ - p = xdr_encode_hyper(p, dentry->d_inode->i_ino); + p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_inode)); } *p++ = xdr_one; /* next */ @@ -189,7 +187,7 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent *p++ = xdr_one; /* bitmap length */ *p++ = htonl(FATTR4_WORD0_FILEID); /* bitmap */ *p++ = htonl(8); /* attribute buffer length */ - p = xdr_encode_hyper(p, dentry->d_parent->d_inode->i_ino); + p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode)); readdir->pgbase = (char *)p - (char *)start; readdir->count -= readdir->pgbase; @@ -454,7 +452,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata) memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data)); rcu_read_unlock(); lock_kernel(); - ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode); + ret = nfs_may_open(state->inode, state->owner->so_cred, open_mode); unlock_kernel(); if (ret != 0) goto out; @@ -646,7 +644,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0) - delegation_type = delegation->flags; + delegation_type = delegation->type; rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); @@ -948,36 +946,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data) return 0; } -static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags) -{ - struct nfs_access_entry cache; - int mask = 0; - int status; - - if (openflags & FMODE_READ) - mask |= MAY_READ; - if (openflags & FMODE_WRITE) - mask |= MAY_WRITE; - if (openflags & FMODE_EXEC) - mask |= MAY_EXEC; - status = nfs_access_get_cached(inode, cred, &cache); - if (status == 0) - goto out; - - /* Be clever: ask server to check for all possible rights */ - cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; - cache.cred = cred; - cache.jiffies = jiffies; - status = _nfs4_proc_access(inode, &cache); - if (status != 0) - return status; - nfs_access_add_cache(inode, &cache); -out: - if ((cache.mask & mask) == mask) - return 0; - return -EACCES; -} - static int nfs4_recover_expired_lease(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; @@ -1381,7 +1349,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct /* If the open_intent is for execute, we have an extra check to make */ if (nd->intent.open.flags & FMODE_EXEC) { - ret = _nfs4_do_access(state->inode, + ret = nfs_may_open(state->inode, state->owner->so_cred, nd->intent.open.flags); if (ret < 0) @@ -1390,7 +1358,7 @@ static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct filp = lookup_instantiate_filp(nd, path->dentry, NULL); if (!IS_ERR(filp)) { struct nfs_open_context *ctx; - ctx = (struct nfs_open_context *)filp->private_data; + ctx = nfs_file_open_context(filp); ctx->state = state; return 0; } @@ -1434,7 +1402,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) } res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) - dentry = res; + path.dentry = res; nfs4_intent_set_file(nd, &path, state); return res; } @@ -1757,10 +1725,16 @@ static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) { + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; struct nfs4_accessargs args = { .fh = NFS_FH(inode), + .bitmask = server->attr_bitmask, + }; + struct nfs4_accessres res = { + .server = server, + .fattr = &fattr, }; - struct nfs4_accessres res = { 0 }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], .rpc_argp = &args, @@ -1786,6 +1760,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry if (mode & MAY_EXEC) args.access |= NFS4_ACCESS_EXECUTE; } + nfs_fattr_init(&fattr); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (!status) { entry->mask = 0; @@ -1795,6 +1770,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry entry->mask |= MAY_WRITE; if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) entry->mask |= MAY_EXEC; + nfs_refresh_inode(inode, &fattr); } return status; } @@ -3303,7 +3279,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock * status = -ENOMEM; if (seqid == NULL) goto out; - task = nfs4_do_unlck(request, request->fl_file->private_data, lsp, seqid); + task = nfs4_do_unlck(request, nfs_file_open_context(request->fl_file), lsp, seqid); status = PTR_ERR(task); if (IS_ERR(task)) goto out; @@ -3447,7 +3423,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f int ret; dprintk("%s: begin!\n", __FUNCTION__); - data = nfs4_alloc_lockdata(fl, fl->fl_file->private_data, + data = nfs4_alloc_lockdata(fl, nfs_file_open_context(fl->fl_file), fl->fl_u.nfs4_fl.owner); if (data == NULL) return -ENOMEM; @@ -3573,7 +3549,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) int status; /* verify open state */ - ctx = (struct nfs_open_context *)filp->private_data; + ctx = nfs_file_open_context(filp); state = ctx->state; if (request->fl_start < 0 || request->fl_end < 0) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3e4adf8..bfb3626 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -774,7 +774,7 @@ static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_s for (fl = inode->i_flock; fl != 0; fl = fl->fl_next) { if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) continue; - if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) + if (nfs_file_open_context(fl->fl_file)->state != state) continue; status = ops->recover_lock(state, fl); if (status >= 0) diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index badd73b..1fde1e7 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -376,10 +376,12 @@ static int nfs4_stat_to_errno(int); decode_locku_maxsz) #define NFS4_enc_access_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_access_maxsz) + encode_access_maxsz + \ + encode_getattr_maxsz) #define NFS4_dec_access_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - decode_access_maxsz) + decode_access_maxsz + \ + decode_getattr_maxsz) #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz) @@ -1376,14 +1378,20 @@ static int nfs4_xdr_enc_access(struct rpc_rqst *req, __be32 *p, const struct nfs { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 2, + .nops = 3, }; int status; xdr_init_encode(&xdr, &req->rq_snd_buf, p); encode_compound_hdr(&xdr, &hdr); - if ((status = encode_putfh(&xdr, args->fh)) == 0) - status = encode_access(&xdr, args->access); + status = encode_putfh(&xdr, args->fh); + if (status != 0) + goto out; + status = encode_access(&xdr, args->access); + if (status != 0) + goto out; + status = encode_getfattr(&xdr, args->bitmask); +out: return status; } @@ -3783,8 +3791,13 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_ac xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); if ((status = decode_compound_hdr(&xdr, &hdr)) != 0) goto out; - if ((status = decode_putfh(&xdr)) == 0) - status = decode_access(&xdr, res); + status = decode_putfh(&xdr); + if (status != 0) + goto out; + status = decode_access(&xdr, res); + if (status != 0) + goto out; + decode_getfattr(&xdr, res->fattr, res->server); out: return status; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 19e0563..d6e62d7 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -497,8 +497,7 @@ int nfs_readpage(struct file *file, struct page *page) if (ctx == NULL) goto out_unlock; } else - ctx = get_nfs_open_context((struct nfs_open_context *) - file->private_data); + ctx = get_nfs_open_context(nfs_file_open_context(file)); error = nfs_readpage_async(ctx, inode, page); @@ -576,8 +575,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (desc.ctx == NULL) return -EBADF; } else - desc.ctx = get_nfs_open_context((struct nfs_open_context *) - filp->private_data); + desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); if (rsize < PAGE_CACHE_SIZE) nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); else diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b2a851c..14196fe 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -506,8 +506,8 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) nfs_show_mount_options(m, nfss, 0); - seq_puts(m, ",addr="); - seq_escape(m, nfss->nfs_client->cl_hostname, " \t\n\\"); + seq_printf(m, ",addr="NIPQUAD_FMT, + NIPQUAD(nfss->nfs_client->cl_addr.sin_addr)); return 0; } @@ -911,13 +911,13 @@ static int nfs_parse_mount_options(char *raw, kfree(string); switch (token) { - case Opt_udp: + case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = IPPROTO_UDP; mnt->timeo = 7; mnt->retrans = 5; break; - case Opt_tcp: + case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = IPPROTO_TCP; mnt->timeo = 600; @@ -936,10 +936,10 @@ static int nfs_parse_mount_options(char *raw, kfree(string); switch (token) { - case Opt_udp: + case Opt_xprt_udp: mnt->mount_server.protocol = IPPROTO_UDP; break; - case Opt_tcp: + case Opt_xprt_tcp: mnt->mount_server.protocol = IPPROTO_TCP; break; default: @@ -1153,20 +1153,20 @@ static int nfs_validate_mount_data(struct nfs_mount_data **options, c = strchr(dev_name, ':'); if (c == NULL) return -EINVAL; - len = c - dev_name - 1; + len = c - dev_name; if (len > sizeof(data->hostname)) - return -EINVAL; + return -ENAMETOOLONG; strncpy(data->hostname, dev_name, len); args.nfs_server.hostname = data->hostname; c++; if (strlen(c) > NFS_MAXPATHLEN) - return -EINVAL; + return -ENAMETOOLONG; args.nfs_server.export_path = c; status = nfs_try_mount(&args, mntfh); if (status) - return -EINVAL; + return status; /* * Translate to nfs_mount_data, which nfs_fill_super @@ -1668,7 +1668,7 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options, /* while calculating len, pretend ':' is '\0' */ len = c - dev_name; if (len > NFS4_MAXNAMLEN) - return -EINVAL; + return -ENAMETOOLONG; *hostname = kzalloc(len, GFP_KERNEL); if (*hostname == NULL) return -ENOMEM; @@ -1677,7 +1677,7 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options, c++; /* step over the ':' */ len = strlen(c); if (len > NFS4_MAXPATHLEN) - return -EINVAL; + return -ENAMETOOLONG; *mntpath = kzalloc(len + 1, GFP_KERNEL); if (*mntpath == NULL) return -ENOMEM; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ef97e0c..3e9e268 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -110,6 +110,13 @@ void nfs_writedata_release(void *wdata) nfs_writedata_free(wdata); } +static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) +{ + ctx->error = error; + smp_wmb(); + set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); +} + static struct nfs_page *nfs_page_find_request_locked(struct page *page) { struct nfs_page *req = NULL; @@ -243,10 +250,7 @@ static void nfs_end_page_writeback(struct page *page) /* * Find an associated nfs write request, and prepare to flush it out - * Returns 1 if there was no write request, or if the request was - * already tagged by nfs_set_page_dirty.Returns 0 if the request - * was not tagged. - * May also return an error if the user signalled nfs_wait_on_request(). + * May return an error if the user signalled nfs_wait_on_request(). */ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, struct page *page) @@ -261,7 +265,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, req = nfs_page_find_request_locked(page); if (req == NULL) { spin_unlock(&inode->i_lock); - return 1; + return 0; } if (nfs_lock_request_dontget(req)) break; @@ -282,7 +286,7 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, spin_unlock(&inode->i_lock); nfs_unlock_request(req); nfs_pageio_complete(pgio); - return 1; + return 0; } if (nfs_set_page_writeback(page) != 0) { spin_unlock(&inode->i_lock); @@ -290,70 +294,56 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, } radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED); - ret = test_bit(PG_NEED_FLUSH, &req->wb_flags); spin_unlock(&inode->i_lock); nfs_pageio_add_request(pgio, req); - return ret; + return 0; } -/* - * Write an mmapped page to the server. - */ -static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) { - struct nfs_pageio_descriptor mypgio, *pgio; - struct nfs_open_context *ctx; struct inode *inode = page->mapping->host; - unsigned offset; - int err; nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); - if (wbc->for_writepages) - pgio = wbc->fs_private; - else { - nfs_pageio_init_write(&mypgio, inode, wb_priority(wbc)); - pgio = &mypgio; - } - nfs_pageio_cond_complete(pgio, page->index); + return nfs_page_async_flush(pgio, page); +} - err = nfs_page_async_flush(pgio, page); - if (err <= 0) - goto out; - err = 0; - offset = nfs_page_length(page); - if (!offset) - goto out; - - nfs_pageio_cond_complete(pgio, page->index); +/* + * Write an mmapped page to the server. + */ +static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +{ + struct nfs_pageio_descriptor pgio; + int err; - ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE); - if (ctx == NULL) { - err = -EBADF; - goto out; - } - err = nfs_writepage_setup(ctx, page, 0, offset); - put_nfs_open_context(ctx); - if (err != 0) - goto out; - err = nfs_page_async_flush(pgio, page); - if (err > 0) - err = 0; -out: - if (!wbc->for_writepages) - nfs_pageio_complete(pgio); - return err; + nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); + err = nfs_do_writepage(page, wbc, &pgio); + nfs_pageio_complete(&pgio); + if (err < 0) + return err; + if (pgio.pg_error < 0) + return pgio.pg_error; + return 0; } int nfs_writepage(struct page *page, struct writeback_control *wbc) { - int err; + int ret; - err = nfs_writepage_locked(page, wbc); + ret = nfs_writepage_locked(page, wbc); unlock_page(page); - return err; + return ret; +} + +static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) +{ + int ret; + + ret = nfs_do_writepage(page, wbc, data); + unlock_page(page); + return ret; } int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) @@ -365,12 +355,11 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); - wbc->fs_private = &pgio; - err = generic_writepages(mapping, wbc); + err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); nfs_pageio_complete(&pgio); - if (err) + if (err < 0) return err; - if (pgio.pg_error) + if (pgio.pg_error < 0) return pgio.pg_error; return 0; } @@ -395,8 +384,6 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) } SetPagePrivate(req->wb_page); set_page_private(req->wb_page, (unsigned long)req); - if (PageDirty(req->wb_page)) - set_bit(PG_NEED_FLUSH, &req->wb_flags); nfsi->npages++; kref_get(&req->wb_kref); return 0; @@ -416,8 +403,6 @@ static void nfs_inode_remove_request(struct nfs_page *req) set_page_private(req->wb_page, 0); ClearPagePrivate(req->wb_page); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); - if (test_and_clear_bit(PG_NEED_FLUSH, &req->wb_flags)) - __set_page_dirty_nobuffers(req->wb_page); nfsi->npages--; if (!nfsi->npages) { spin_unlock(&inode->i_lock); @@ -682,7 +667,7 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, int nfs_flush_incompatible(struct file *file, struct page *page) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct nfs_page *req; int do_flush, status; /* @@ -716,7 +701,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page) int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { - struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; + struct nfs_open_context *ctx = nfs_file_open_context(file); struct inode *inode = page->mapping->host; int status = 0; @@ -967,7 +952,7 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); dprintk(", error = %d\n", task->tk_status); goto out; } @@ -1030,7 +1015,7 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) if (task->tk_status < 0) { nfs_set_pageerror(page); - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); dprintk(", error = %d\n", task->tk_status); goto remove_request; } @@ -1244,7 +1229,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata) req->wb_bytes, (long long)req_offset(req)); if (task->tk_status < 0) { - req->wb_context->error = task->tk_status; + nfs_context_set_write_error(req->wb_context, task->tk_status); nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); goto next; @@ -1347,52 +1332,95 @@ long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_contr return ret; } -/* - * flush the inode to disk. - */ -int nfs_wb_all(struct inode *inode) +static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) +{ + int ret; + + ret = nfs_writepages(mapping, wbc); + if (ret < 0) + goto out; + ret = nfs_sync_mapping_wait(mapping, wbc, how); + if (ret < 0) + goto out; + return 0; +out: + __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); + return ret; +} + +/* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ +static int nfs_write_mapping(struct address_space *mapping, int how) { - struct address_space *mapping = inode->i_mapping; struct writeback_control wbc = { .bdi = mapping->backing_dev_info, - .sync_mode = WB_SYNC_ALL, + .sync_mode = WB_SYNC_NONE, .nr_to_write = LONG_MAX, .for_writepages = 1, .range_cyclic = 1, }; int ret; - ret = nfs_writepages(mapping, &wbc); + ret = __nfs_write_mapping(mapping, &wbc, how); if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, &wbc, 0); - if (ret >= 0) - return 0; -out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); - return ret; + return ret; + wbc.sync_mode = WB_SYNC_ALL; + return __nfs_write_mapping(mapping, &wbc, how); } -int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how) +/* + * flush the inode to disk. + */ +int nfs_wb_all(struct inode *inode) { + return nfs_write_mapping(inode->i_mapping, 0); +} + +int nfs_wb_nocommit(struct inode *inode) +{ + return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); +} + +int nfs_wb_page_cancel(struct inode *inode, struct page *page) +{ + struct nfs_page *req; + loff_t range_start = page_offset(page); + loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); struct writeback_control wbc = { - .bdi = mapping->backing_dev_info, + .bdi = page->mapping->backing_dev_info, .sync_mode = WB_SYNC_ALL, .nr_to_write = LONG_MAX, .range_start = range_start, .range_end = range_end, - .for_writepages = 1, }; - int ret; + int ret = 0; - ret = nfs_writepages(mapping, &wbc); - if (ret < 0) - goto out; - ret = nfs_sync_mapping_wait(mapping, &wbc, how); - if (ret >= 0) + BUG_ON(!PageLocked(page)); + for (;;) { + req = nfs_page_find_request(page); + if (req == NULL) + goto out; + if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + nfs_release_request(req); + break; + } + if (nfs_lock_request_dontget(req)) { + nfs_inode_remove_request(req); + /* + * In case nfs_inode_remove_request has marked the + * page as being dirty + */ + cancel_dirty_page(page, PAGE_CACHE_SIZE); + nfs_unlock_request(req); + break; + } + ret = nfs_wait_on_request(req); + if (ret < 0) + goto out; + } + if (!PagePrivate(page)) return 0; + ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); out: - __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); return ret; } @@ -1433,35 +1461,6 @@ int nfs_wb_page(struct inode *inode, struct page* page) return nfs_wb_page_priority(inode, page, FLUSH_STABLE); } -int nfs_set_page_dirty(struct page *page) -{ - struct address_space *mapping = page->mapping; - struct inode *inode; - struct nfs_page *req; - int ret; - - if (!mapping) - goto out_raced; - inode = mapping->host; - if (!inode) - goto out_raced; - spin_lock(&inode->i_lock); - req = nfs_page_find_request_locked(page); - if (req != NULL) { - /* Mark any existing write requests for flushing */ - ret = !test_and_set_bit(PG_NEED_FLUSH, &req->wb_flags); - spin_unlock(&inode->i_lock); - nfs_release_request(req); - return ret; - } - ret = __set_page_dirty_nobuffers(page); - spin_unlock(&inode->i_lock); - return ret; -out_raced: - return !TestSetPageDirty(page); -} - - int __init nfs_init_writepagecache(void) { nfs_wdata_cachep = kmem_cache_create("nfs_write_data", diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index c080f61..f1c87ad 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -115,6 +115,10 @@ static inline u64 get_jiffies_64(void) ((long)(a) - (long)(b) >= 0)) #define time_before_eq(a,b) time_after_eq(b,a) +#define time_in_range(a,b,c) \ + (time_after_eq(a,b) && \ + time_before_eq(a,c)) + /* Same as above, but does so with platform independent 64bit types. * These must be used when utilizing jiffies_64 (i.e. return value of * get_jiffies_64() */ diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 157dcb0..5b42fef 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -77,6 +77,9 @@ struct nfs_open_context { struct nfs4_state *state; fl_owner_t lockowner; int mode; + + unsigned long flags; +#define NFS_CONTEXT_ERROR_WRITE (0) int error; struct list_head list; @@ -289,9 +292,6 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); -extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); -extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); -extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); @@ -328,14 +328,15 @@ extern const struct inode_operations nfs3_file_inode_operations; extern const struct file_operations nfs_file_operations; extern const struct address_space_operations nfs_file_aops; -static inline struct rpc_cred *nfs_file_cred(struct file *file) +static inline struct nfs_open_context *nfs_file_open_context(struct file *filp) { - if (file != NULL) { - struct nfs_open_context *ctx; + return filp->private_data; +} - ctx = (struct nfs_open_context*)file->private_data; - return ctx->cred; - } +static inline struct rpc_cred *nfs_file_cred(struct file *file) +{ + if (file != NULL) + return nfs_file_open_context(file)->cred; return NULL; } @@ -378,6 +379,8 @@ extern const struct file_operations nfs_dir_operations; extern struct dentry_operations nfs_dentry_operations; extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); +extern int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags); +extern void nfs_access_zap_cache(struct inode *inode); /* * linux/fs/nfs/symlink.c @@ -420,17 +423,17 @@ extern int nfs_flush_incompatible(struct file *file, struct page *page); extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int); extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *); extern void nfs_writedata_release(void *); -extern int nfs_set_page_dirty(struct page *); /* * Try to write back everything synchronously (but check the * return value!) */ extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int); -extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int); extern int nfs_wb_all(struct inode *inode); +extern int nfs_wb_nocommit(struct inode *inode); extern int nfs_wb_page(struct inode *inode, struct page* page); extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how); +extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, int); extern struct nfs_write_data *nfs_commit_alloc(void); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 78e6079..30dbcc1 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -30,7 +30,6 @@ #define PG_BUSY 0 #define PG_NEED_COMMIT 1 #define PG_NEED_RESCHED 2 -#define PG_NEED_FLUSH 3 struct nfs_inode; struct nfs_page { diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index cf74a4d..0303201 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -538,10 +538,13 @@ typedef u64 clientid4; struct nfs4_accessargs { const struct nfs_fh * fh; + const u32 * bitmask; u32 access; }; struct nfs4_accessres { + const struct nfs_server * server; + struct nfs_fattr * fattr; u32 supported; u32 access; }; diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d11cedd..902a9c0 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -53,6 +53,9 @@ enum rpc_display_format_t { RPC_DISPLAY_PORT, RPC_DISPLAY_PROTO, RPC_DISPLAY_ALL, + RPC_DISPLAY_HEX_ADDR, + RPC_DISPLAY_HEX_PORT, + RPC_DISPLAY_UNIVERSAL_ADDR, RPC_DISPLAY_MAX, }; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 4ef4d22..835cc85 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -61,8 +61,6 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned for_writepages:1; /* This is a writepages() call */ unsigned range_cyclic:1; /* range_start is cyclic */ - - void *fs_private; /* For use by ->writepages() */ }; /* diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 04f3ffb..0ae703c 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1525,6 +1525,7 @@ add_names: context->names[idx].ino = (unsigned long)-1; } } +EXPORT_SYMBOL_GPL(__audit_inode_child); /** * auditsc_get_stamp - get local copies of audit_context values diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 669e12a..ae83ac8 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include @@ -585,6 +585,7 @@ rpc_populate(struct dentry *parent, if (S_ISDIR(mode)) inc_nlink(dir); d_add(dentry, inode); + fsnotify_create(dir, dentry); } mutex_unlock(&dir->i_mutex); return 0; @@ -606,7 +607,7 @@ __rpc_mkdir(struct inode *dir, struct dentry *dentry) inode->i_ino = iunique(dir->i_sb, 100); d_instantiate(dentry, inode); inc_nlink(dir); - inode_dir_notify(dir, DN_CREATE); + fsnotify_mkdir(dir, dentry); return 0; out_err: printk(KERN_WARNING "%s: %s failed to allocate inode for dentry %s\n", @@ -748,7 +749,7 @@ rpc_mkpipe(struct dentry *parent, const char *name, void *private, struct rpc_pi rpci->flags = flags; rpci->ops = ops; rpci->nkern_readwriters = 1; - inode_dir_notify(dir, DN_CREATE); + fsnotify_create(dir, dentry); dget(dentry); out: mutex_unlock(&dir->i_mutex); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index d1740db..0bb6709 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include @@ -137,10 +139,13 @@ struct rpcbind_args { static struct rpc_procinfo rpcb_procedures2[]; static struct rpc_procinfo rpcb_procedures3[]; -static struct rpcb_info { +struct rpcb_info { int rpc_vers; struct rpc_procinfo * rpc_proc; -} rpcb_next_version[]; +}; + +static struct rpcb_info rpcb_next_version[]; +static struct rpcb_info rpcb_next_version6[]; static void rpcb_getport_prepare(struct rpc_task *task, void *calldata) { @@ -190,7 +195,17 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, RPC_CLNT_CREATE_INTR), }; - ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + switch (srvaddr->sa_family) { + case AF_INET: + ((struct sockaddr_in *)srvaddr)->sin_port = htons(RPCBIND_PORT); + break; + case AF_INET6: + ((struct sockaddr_in6 *)srvaddr)->sin6_port = htons(RPCBIND_PORT); + break; + default: + return NULL; + } + if (!privileged) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; return rpc_create(&args); @@ -316,6 +331,7 @@ void rpcb_getport_async(struct rpc_task *task) struct rpc_task *child; struct sockaddr addr; int status; + struct rpcb_info *info; dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __FUNCTION__, @@ -343,18 +359,43 @@ void rpcb_getport_async(struct rpc_task *task) goto bailout_nofree; } - if (rpcb_next_version[xprt->bind_index].rpc_proc == NULL) { + rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); + + /* Don't ever use rpcbind v2 for AF_INET6 requests */ + switch (addr.sa_family) { + case AF_INET: + info = rpcb_next_version; + break; + case AF_INET6: + info = rpcb_next_version6; + break; + default: + status = -EAFNOSUPPORT; + dprintk("RPC: %5u %s: bad address family\n", + task->tk_pid, __FUNCTION__); + goto bailout_nofree; + } + if (info[xprt->bind_index].rpc_proc == NULL) { xprt->bind_index = 0; status = -EACCES; /* tell caller to try again later */ dprintk("RPC: %5u %s: no more getport versions available\n", task->tk_pid, __FUNCTION__); goto bailout_nofree; } - bind_version = rpcb_next_version[xprt->bind_index].rpc_vers; + bind_version = info[xprt->bind_index].rpc_vers; dprintk("RPC: %5u %s: trying rpcbind version %u\n", task->tk_pid, __FUNCTION__, bind_version); + rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, + bind_version, 0); + if (IS_ERR(rpcb_clnt)) { + status = PTR_ERR(rpcb_clnt); + dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", + task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); + goto bailout_nofree; + } + map = kzalloc(sizeof(struct rpcbind_args), GFP_ATOMIC); if (!map) { status = -ENOMEM; @@ -369,26 +410,18 @@ void rpcb_getport_async(struct rpc_task *task) map->r_xprt = xprt_get(xprt); map->r_netid = (xprt->prot == IPPROTO_TCP) ? RPCB_NETID_TCP : RPCB_NETID_UDP; - memcpy(&map->r_addr, rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR), - sizeof(map->r_addr)); + memcpy(&map->r_addr, + rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR), + sizeof(map->r_addr)); map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ - rpc_peeraddr(clnt, (void *)&addr, sizeof(addr)); - rpcb_clnt = rpcb_create(clnt->cl_server, &addr, xprt->prot, bind_version, 0); - if (IS_ERR(rpcb_clnt)) { - status = PTR_ERR(rpcb_clnt); - dprintk("RPC: %5u %s: rpcb_create failed, error %ld\n", - task->tk_pid, __FUNCTION__, PTR_ERR(rpcb_clnt)); - goto bailout; - } - child = rpc_run_task(rpcb_clnt, RPC_TASK_ASYNC, &rpcb_getport_ops, map); rpc_release_client(rpcb_clnt); if (IS_ERR(child)) { status = -EIO; dprintk("RPC: %5u %s: rpc_run_task failed\n", task->tk_pid, __FUNCTION__); - goto bailout_nofree; + goto bailout; } rpc_put_task(child); @@ -490,10 +523,11 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { char *addr; - int addr_len, c, i, f, first, val; + u32 addr_len; + int c, i, f, first, val; *portp = 0; - addr_len = (unsigned int) ntohl(*p++); + addr_len = ntohl(*p++); if (addr_len > RPCB_MAXADDRLEN) /* sanity */ return -EINVAL; @@ -593,6 +627,14 @@ static struct rpcb_info rpcb_next_version[] = { { 0, NULL }, }; +static struct rpcb_info rpcb_next_version6[] = { +#ifdef CONFIG_SUNRPC_BIND34 + { 4, &rpcb_procedures4[RPCBPROC_GETVERSADDR] }, + { 3, &rpcb_procedures3[RPCBPROC_GETADDR] }, +#endif + { 0, NULL }, +}; + static struct rpc_version rpcb_version2 = { .number = 2, .nrprocs = RPCB_HIGHPROC_2, diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4ae7eed..f2c00fb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -13,6 +13,9 @@ * (C) 1999 Trond Myklebust * * IP socket transport implementation, (C) 2005 Chuck Lever + * + * IPv6 support contributed by Gilles Quillard, Bull Open Source, 2005. + * */ #include @@ -260,14 +263,29 @@ struct sock_xprt { #define TCP_RCV_COPY_XID (1UL << 2) #define TCP_RCV_COPY_DATA (1UL << 3) -static void xs_format_peer_addresses(struct rpc_xprt *xprt) +static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt) +{ + return (struct sockaddr *) &xprt->addr; +} + +static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt) +{ + return (struct sockaddr_in *) &xprt->addr; +} + +static inline struct sockaddr_in6 *xs_addr_in6(struct rpc_xprt *xprt) +{ + return (struct sockaddr_in6 *) &xprt->addr; +} + +static void xs_format_ipv4_peer_addresses(struct rpc_xprt *xprt) { - struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr; + struct sockaddr_in *addr = xs_addr_in(xprt); char *buf; buf = kzalloc(20, GFP_KERNEL); if (buf) { - snprintf(buf, 20, "%u.%u.%u.%u", + snprintf(buf, 20, NIPQUAD_FMT, NIPQUAD(addr->sin_addr.s_addr)); } xprt->address_strings[RPC_DISPLAY_ADDR] = buf; @@ -279,26 +297,115 @@ static void xs_format_peer_addresses(struct rpc_xprt *xprt) } xprt->address_strings[RPC_DISPLAY_PORT] = buf; - if (xprt->prot == IPPROTO_UDP) - xprt->address_strings[RPC_DISPLAY_PROTO] = "udp"; - else - xprt->address_strings[RPC_DISPLAY_PROTO] = "tcp"; + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + if (xprt->prot == IPPROTO_UDP) + snprintf(buf, 8, "udp"); + else + snprintf(buf, 8, "tcp"); + } + xprt->address_strings[RPC_DISPLAY_PROTO] = buf; buf = kzalloc(48, GFP_KERNEL); if (buf) { - snprintf(buf, 48, "addr=%u.%u.%u.%u port=%u proto=%s", + snprintf(buf, 48, "addr="NIPQUAD_FMT" port=%u proto=%s", NIPQUAD(addr->sin_addr.s_addr), ntohs(addr->sin_port), xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); } xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(10, GFP_KERNEL); + if (buf) { + snprintf(buf, 10, "%02x%02x%02x%02x", + NIPQUAD(addr->sin_addr.s_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(30, GFP_KERNEL); + if (buf) { + snprintf(buf, 30, NIPQUAD_FMT".%u.%u", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port) >> 8, + ntohs(addr->sin_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; +} + +static void xs_format_ipv6_peer_addresses(struct rpc_xprt *xprt) +{ + struct sockaddr_in6 *addr = xs_addr_in6(xprt); + char *buf; + + buf = kzalloc(40, GFP_KERNEL); + if (buf) { + snprintf(buf, 40, NIP6_FMT, + NIP6(addr->sin6_addr)); + } + xprt->address_strings[RPC_DISPLAY_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%u", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_PORT] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + if (xprt->prot == IPPROTO_UDP) + snprintf(buf, 8, "udp"); + else + snprintf(buf, 8, "tcp"); + } + xprt->address_strings[RPC_DISPLAY_PROTO] = buf; + + buf = kzalloc(64, GFP_KERNEL); + if (buf) { + snprintf(buf, 64, "addr="NIP6_FMT" port=%u proto=%s", + NIP6(addr->sin6_addr), + ntohs(addr->sin6_port), + xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + } + xprt->address_strings[RPC_DISPLAY_ALL] = buf; + + buf = kzalloc(36, GFP_KERNEL); + if (buf) { + snprintf(buf, 36, NIP6_SEQFMT, + NIP6(addr->sin6_addr)); + } + xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = buf; + + buf = kzalloc(8, GFP_KERNEL); + if (buf) { + snprintf(buf, 8, "%4hx", + ntohs(addr->sin6_port)); + } + xprt->address_strings[RPC_DISPLAY_HEX_PORT] = buf; + + buf = kzalloc(50, GFP_KERNEL); + if (buf) { + snprintf(buf, 50, NIP6_FMT".%u.%u", + NIP6(addr->sin6_addr), + ntohs(addr->sin6_port) >> 8, + ntohs(addr->sin6_port) & 0xff); + } + xprt->address_strings[RPC_DISPLAY_UNIVERSAL_ADDR] = buf; } static void xs_free_peer_addresses(struct rpc_xprt *xprt) { - kfree(xprt->address_strings[RPC_DISPLAY_ADDR]); - kfree(xprt->address_strings[RPC_DISPLAY_PORT]); - kfree(xprt->address_strings[RPC_DISPLAY_ALL]); + int i; + + for (i = 0; i < RPC_DISPLAY_MAX; i++) + kfree(xprt->address_strings[i]); } #define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL) @@ -463,7 +570,7 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_xtime = jiffies; status = xs_sendpages(transport->sock, - (struct sockaddr *) &xprt->addr, + xs_addr(xprt), xprt->addrlen, xdr, req->rq_bytes_sent); @@ -523,7 +630,8 @@ static int xs_tcp_send_request(struct rpc_task *task) struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; - int status, retry = 0; + int status; + unsigned int retry = 0; xs_encode_tcp_record_marker(&req->rq_snd_buf); @@ -1139,14 +1247,23 @@ static unsigned short xs_get_random_port(void) */ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port) { - struct sockaddr_in *sap = (struct sockaddr_in *) &xprt->addr; + struct sockaddr *addr = xs_addr(xprt); dprintk("RPC: setting port for xprt %p to %u\n", xprt, port); - sap->sin_port = htons(port); + switch (addr->sa_family) { + case AF_INET: + ((struct sockaddr_in *)addr)->sin_port = htons(port); + break; + case AF_INET6: + ((struct sockaddr_in6 *)addr)->sin6_port = htons(port); + break; + default: + BUG(); + } } -static int xs_bind(struct sock_xprt *transport, struct socket *sock) +static int xs_bind4(struct sock_xprt *transport, struct socket *sock) { struct sockaddr_in myaddr = { .sin_family = AF_INET, @@ -1174,8 +1291,42 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) else port--; } while (err == -EADDRINUSE && port != transport->port); - dprintk("RPC: xs_bind "NIPQUAD_FMT":%u: %s (%d)\n", - NIPQUAD(myaddr.sin_addr), port, err ? "failed" : "ok", err); + dprintk("RPC: %s "NIPQUAD_FMT":%u: %s (%d)\n", + __FUNCTION__, NIPQUAD(myaddr.sin_addr), + port, err ? "failed" : "ok", err); + return err; +} + +static int xs_bind6(struct sock_xprt *transport, struct socket *sock) +{ + struct sockaddr_in6 myaddr = { + .sin6_family = AF_INET6, + }; + struct sockaddr_in6 *sa; + int err; + unsigned short port = transport->port; + + if (!transport->xprt.resvport) + port = 0; + sa = (struct sockaddr_in6 *)&transport->addr; + myaddr.sin6_addr = sa->sin6_addr; + do { + myaddr.sin6_port = htons(port); + err = kernel_bind(sock, (struct sockaddr *) &myaddr, + sizeof(myaddr)); + if (!transport->xprt.resvport) + break; + if (err == 0) { + transport->port = port; + break; + } + if (port <= xprt_min_resvport) + port = xprt_max_resvport; + else + port--; + } while (err == -EADDRINUSE && port != transport->port); + dprintk("RPC: xs_bind6 "NIP6_FMT":%u: %s (%d)\n", + NIP6(myaddr.sin6_addr), port, err ? "failed" : "ok", err); return err; } @@ -1183,38 +1334,69 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock) static struct lock_class_key xs_key[2]; static struct lock_class_key xs_slock_key[2]; -static inline void xs_reclassify_socket(struct socket *sock) +static inline void xs_reclassify_socket4(struct socket *sock) { struct sock *sk = sock->sk; + BUG_ON(sk->sk_lock.owner != NULL); - switch (sk->sk_family) { - case AF_INET: - sock_lock_init_class_and_name(sk, "slock-AF_INET-NFS", - &xs_slock_key[0], "sk_lock-AF_INET-NFS", &xs_key[0]); - break; + sock_lock_init_class_and_name(sk, "slock-AF_INET-RPC", + &xs_slock_key[0], "sk_lock-AF_INET-RPC", &xs_key[0]); +} - case AF_INET6: - sock_lock_init_class_and_name(sk, "slock-AF_INET6-NFS", - &xs_slock_key[1], "sk_lock-AF_INET6-NFS", &xs_key[1]); - break; +static inline void xs_reclassify_socket6(struct socket *sock) +{ + struct sock *sk = sock->sk; - default: - BUG(); - } + BUG_ON(sk->sk_lock.owner != NULL); + sock_lock_init_class_and_name(sk, "slock-AF_INET6-RPC", + &xs_slock_key[1], "sk_lock-AF_INET6-RPC", &xs_key[1]); } #else -static inline void xs_reclassify_socket(struct socket *sock) +static inline void xs_reclassify_socket4(struct socket *sock) +{ +} + +static inline void xs_reclassify_socket6(struct socket *sock) { } #endif +static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_udp_data_ready; + sk->sk_write_space = xs_udp_write_space; + sk->sk_no_check = UDP_CSUM_NORCV; + sk->sk_allocation = GFP_ATOMIC; + + xprt_set_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + xs_udp_do_set_buffer_size(xprt); +} + /** - * xs_udp_connect_worker - set up a UDP socket + * xs_udp_connect_worker4 - set up a UDP socket * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_udp_connect_worker(struct work_struct *work) +static void xs_udp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1232,9 +1414,9 @@ static void xs_udp_connect_worker(struct work_struct *work) dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } - xs_reclassify_socket(sock); + xs_reclassify_socket4(sock); - if (xs_bind(transport, sock)) { + if (xs_bind4(transport, sock)) { sock_release(sock); goto out; } @@ -1242,29 +1424,48 @@ static void xs_udp_connect_worker(struct work_struct *work) dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!transport->inet) { - struct sock *sk = sock->sk; + xs_udp_finish_connecting(xprt, sock); + status = 0; +out: + xprt_wake_pending_tasks(xprt, status); + xprt_clear_connecting(xprt); +} - write_lock_bh(&sk->sk_callback_lock); +/** + * xs_udp_connect_worker6 - set up a UDP socket + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_udp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; + int err, status = -EIO; - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_udp_data_ready; - sk->sk_write_space = xs_udp_write_space; - sk->sk_no_check = UDP_CSUM_NORCV; - sk->sk_allocation = GFP_ATOMIC; + if (xprt->shutdown || !xprt_bound(xprt)) + goto out; - xprt_set_connected(xprt); + /* Start by resetting any existing state */ + xs_close(xprt); - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + dprintk("RPC: can't create UDP transport socket (%d).\n", -err); + goto out; + } + xs_reclassify_socket6(sock); - write_unlock_bh(&sk->sk_callback_lock); + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out; } - xs_udp_do_set_buffer_size(xprt); + + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); + + xs_udp_finish_connecting(xprt, sock); status = 0; out: xprt_wake_pending_tasks(xprt, status); @@ -1295,13 +1496,52 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) result); } +static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + if (!transport->inet) { + struct sock *sk = sock->sk; + + write_lock_bh(&sk->sk_callback_lock); + + sk->sk_user_data = xprt; + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + sk->sk_data_ready = xs_tcp_data_ready; + sk->sk_state_change = xs_tcp_state_change; + sk->sk_write_space = xs_tcp_write_space; + sk->sk_allocation = GFP_ATOMIC; + + /* socket options */ + sk->sk_userlocks |= SOCK_BINDPORT_LOCK; + sock_reset_flag(sk, SOCK_LINGER); + tcp_sk(sk)->linger2 = 0; + tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + + xprt_clear_connected(xprt); + + /* Reset to new socket */ + transport->sock = sock; + transport->inet = sk; + + write_unlock_bh(&sk->sk_callback_lock); + } + + /* Tell the socket layer to start connecting... */ + xprt->stat.connect_count++; + xprt->stat.connect_start = jiffies; + return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK); +} + /** - * xs_tcp_connect_worker - connect a TCP socket to a remote endpoint + * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker(struct work_struct *work) +static void xs_tcp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); @@ -1315,13 +1555,12 @@ static void xs_tcp_connect_worker(struct work_struct *work) if (!sock) { /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport " - "socket (%d).\n", -err); + dprintk("RPC: can't create TCP transport socket (%d).\n", -err); goto out; } - xs_reclassify_socket(sock); + xs_reclassify_socket4(sock); - if (xs_bind(transport, sock)) { + if (xs_bind4(transport, sock) < 0) { sock_release(sock); goto out; } @@ -1332,43 +1571,70 @@ static void xs_tcp_connect_worker(struct work_struct *work) dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - if (!transport->inet) { - struct sock *sk = sock->sk; - - write_lock_bh(&sk->sk_callback_lock); + status = xs_tcp_finish_connecting(xprt, sock); + dprintk("RPC: %p connect status %d connected %d sock state %d\n", + xprt, -status, xprt_connected(xprt), + sock->sk->sk_state); + if (status < 0) { + switch (status) { + case -EINPROGRESS: + case -EALREADY: + goto out_clear; + case -ECONNREFUSED: + case -ECONNRESET: + /* retry with existing socket, after a delay */ + break; + default: + /* get rid of existing socket, and retry */ + xs_close(xprt); + break; + } + } +out: + xprt_wake_pending_tasks(xprt, status); +out_clear: + xprt_clear_connecting(xprt); +} - sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; - sk->sk_data_ready = xs_tcp_data_ready; - sk->sk_state_change = xs_tcp_state_change; - sk->sk_write_space = xs_tcp_write_space; - sk->sk_allocation = GFP_ATOMIC; +/** + * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_tcp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; + struct socket *sock = transport->sock; + int err, status = -EIO; - /* socket options */ - sk->sk_userlocks |= SOCK_BINDPORT_LOCK; - sock_reset_flag(sk, SOCK_LINGER); - tcp_sk(sk)->linger2 = 0; - tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; + if (xprt->shutdown || !xprt_bound(xprt)) + goto out; - xprt_clear_connected(xprt); + if (!sock) { + /* start from scratch */ + if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", -err); + goto out; + } + xs_reclassify_socket6(sock); - /* Reset to new socket */ - transport->sock = sock; - transport->inet = sk; + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out; + } + } else + /* "close" the socket, preserving the local port */ + xs_tcp_reuse_connection(xprt); - write_unlock_bh(&sk->sk_callback_lock); - } + dprintk("RPC: worker connecting xprt %p to address: %s\n", + xprt, xprt->address_strings[RPC_DISPLAY_ALL]); - /* Tell the socket layer to start connecting... */ - xprt->stat.connect_count++; - xprt->stat.connect_start = jiffies; - status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, - xprt->addrlen, O_NONBLOCK); + status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), - sock->sk->sk_state); + xprt, -status, xprt_connected(xprt), sock->sk->sk_state); if (status < 0) { switch (status) { case -EINPROGRESS: @@ -1551,6 +1817,7 @@ static struct rpc_xprt *xs_setup_xprt(struct rpc_xprtsock_create *args, unsigned */ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) { + struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1559,15 +1826,11 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) - xprt_set_bound(xprt); - xprt->prot = IPPROTO_UDP; xprt->tsh_size = 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); - INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_UDP_CONN_TO; xprt->reestablish_timeout = XS_UDP_REEST_TO; @@ -1580,7 +1843,28 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) else xprt_set_timeout(&xprt->timeout, 5, 5 * HZ); - xs_format_peer_addresses(xprt); + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, + xs_udp_connect_worker4); + xs_format_ipv4_peer_addresses(xprt); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, + xs_udp_connect_worker6); + xs_format_ipv6_peer_addresses(xprt); + break; + default: + kfree(xprt); + return NULL; + } + dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1594,6 +1878,7 @@ struct rpc_xprt *xs_setup_udp(struct rpc_xprtsock_create *args) */ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) { + struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; struct sock_xprt *transport; @@ -1602,14 +1887,10 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); - if (ntohs(((struct sockaddr_in *)args->dstaddr)->sin_port) != 0) - xprt_set_bound(xprt); - xprt->prot = IPPROTO_TCP; xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; - INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker); xprt->bind_timeout = XS_BIND_TO; xprt->connect_timeout = XS_TCP_CONN_TO; xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; @@ -1622,7 +1903,26 @@ struct rpc_xprt *xs_setup_tcp(struct rpc_xprtsock_create *args) else xprt_set_timeout(&xprt->timeout, 2, 60 * HZ); - xs_format_peer_addresses(xprt); + switch (addr->sa_family) { + case AF_INET: + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker4); + xs_format_ipv4_peer_addresses(xprt); + break; + case AF_INET6: + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + xprt_set_bound(xprt); + + INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker6); + xs_format_ipv6_peer_addresses(xprt); + break; + default: + kfree(xprt); + return NULL; + } + dprintk("RPC: set up transport to address %s\n", xprt->address_strings[RPC_DISPLAY_ALL]);