NFSv4: Eliminate nfsv4 open race... Make NFSv4 return the fully initialized file pointer with the stateid that it created in the lookup w/intent. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 29 ++++------ fs/nfs/nfs3proc.c | 2 fs/nfs/nfs4_fs.h | 4 - fs/nfs/nfs4proc.c | 129 +++++++++++++++++++----------------------------- fs/nfs/proc.c | 2 include/linux/nfs_xdr.h | 2 6 files changed, 71 insertions(+), 97 deletions(-) Index: linux-2.6.12-rc1/fs/nfs/nfs4proc.c =================================================================== --- linux-2.6.12-rc1.orig/fs/nfs/nfs4proc.c +++ linux-2.6.12-rc1/fs/nfs/nfs4proc.c @@ -47,6 +47,7 @@ #include #include #include +#include #include "nfs4_fs.h" #include "delegation.h" @@ -870,12 +871,40 @@ int nfs4_do_close(struct inode *inode, s return status; } -struct inode * +static void nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state) +{ + struct file *filp; + int flags; + + flags = nd->intent.open.flags & ~(FMODE_READ|FMODE_WRITE); + switch (nd->intent.open.flags & (FMODE_READ|FMODE_WRITE)) { + case FMODE_READ|FMODE_WRITE: + flags |= O_RDWR; + break; + case FMODE_WRITE: + flags |= O_WRONLY; + break; + case FMODE_READ: + flags |= O_RDONLY; + } + + filp = dentry_open(dget(dentry), mntget(nd->mnt), flags); + if (!IS_ERR(filp)) { + struct nfs_open_context *ctx; + ctx = (struct nfs_open_context *)filp->private_data; + ctx->state = state; + nd->intent.open.file = filp; + } else + nfs4_close_state(state, nd->intent.open.flags); +} + +struct dentry * nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct iattr attr; struct rpc_cred *cred; struct nfs4_state *state; + struct dentry *res; if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -889,16 +918,23 @@ nfs4_atomic_open(struct inode *dir, stru cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); if (IS_ERR(cred)) - return (struct inode *)cred; + return (struct dentry *)cred; state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); put_rpccred(cred); - if (IS_ERR(state)) - return (struct inode *)state; - return state->inode; + if (IS_ERR(state)) { + if (PTR_ERR(state) == -ENOENT) + d_add(dentry, NULL); + return (struct dentry *)state; + } + res = d_add_unique(dentry, state->inode); + if (res != NULL) + dentry = res; + nfs4_intent_set_file(nd, dentry, state); + return res; } int -nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) { struct rpc_cred *cred; struct nfs4_state *state; @@ -911,18 +947,18 @@ nfs4_open_revalidate(struct inode *dir, if (IS_ERR(state)) state = nfs4_do_open(dir, dentry, openflags, NULL, cred); put_rpccred(cred); - if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) + if (state == ERR_PTR(-ENOENT) && dentry->d_inode == NULL) return 1; if (IS_ERR(state)) return 0; inode = state->inode; + iput(inode); if (inode == dentry->d_inode) { - iput(inode); + nfs4_intent_set_file(nd, dentry, state); return 1; } d_drop(dentry); nfs4_close_state(state, openflags); - iput(inode); return 0; } @@ -1421,7 +1457,7 @@ static int nfs4_proc_commit(struct nfs_w static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs4_state *state; struct rpc_cred *cred; @@ -1443,11 +1479,11 @@ nfs4_proc_create(struct inode *dir, stru struct nfs_fattr fattr; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(state->inode), sattr, state); - if (status == 0) - goto out; - } else if (flags != 0) - goto out; - nfs4_close_state(state, flags); + } + if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN)) + nfs4_intent_set_file(nd, dentry, state); + else + nfs4_close_state(state, flags); out: return status; } @@ -2089,65 +2125,6 @@ nfs4_proc_renew(struct nfs4_client *clp) return 0; } -/* - * We will need to arrange for the VFS layer to provide an atomic open. - * Until then, this open method is prone to inefficiency and race conditions - * due to the lookup, potential create, and open VFS calls from sys_open() - * placed on the wire. - */ -static int -nfs4_proc_file_open(struct inode *inode, struct file *filp) -{ - struct dentry *dentry = filp->f_dentry; - struct nfs_open_context *ctx; - struct nfs4_state *state = NULL; - struct rpc_cred *cred; - int status = -ENOMEM; - - dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", - (int)dentry->d_parent->d_name.len, - dentry->d_parent->d_name.name, - (int)dentry->d_name.len, dentry->d_name.name); - - - /* Find our open stateid */ - cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (IS_ERR(cred)) - return PTR_ERR(cred); - ctx = alloc_nfs_open_context(dentry, cred); - put_rpccred(cred); - if (unlikely(ctx == NULL)) - return -ENOMEM; - status = -EIO; /* ERACE actually */ - state = nfs4_find_state(inode, cred, filp->f_mode); - if (unlikely(state == NULL)) - goto no_state; - ctx->state = state; - nfs4_close_state(state, filp->f_mode); - ctx->mode = filp->f_mode; - nfs_file_set_open_context(filp, ctx); - put_nfs_open_context(ctx); - if (filp->f_mode & FMODE_WRITE) - nfs_begin_data_update(inode); - return 0; -no_state: - printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - put_nfs_open_context(ctx); - return status; -} - -/* - * Release our state - */ -static int -nfs4_proc_file_release(struct inode *inode, struct file *filp) -{ - if (filp->f_mode & FMODE_WRITE) - nfs_end_data_update(inode); - nfs_file_clear_open_context(filp); - return 0; -} - static inline int nfs4_server_supports_acls(struct nfs_server *server) { return (server->caps & NFS_CAP_ACLS) @@ -3062,8 +3039,8 @@ struct nfs_rpc_ops nfs_v4_clientops = { .read_setup = nfs4_proc_read_setup, .write_setup = nfs4_proc_write_setup, .commit_setup = nfs4_proc_commit_setup, - .file_open = nfs4_proc_file_open, - .file_release = nfs4_proc_file_release, + .file_open = nfs_open, + .file_release = nfs_release, .lock = nfs4_proc_lock, .clear_acl_cache = nfs4_zap_acl_attr, }; Index: linux-2.6.12-rc1/fs/nfs/nfs3proc.c =================================================================== --- linux-2.6.12-rc1.orig/fs/nfs/nfs3proc.c +++ linux-2.6.12-rc1/fs/nfs/nfs3proc.c @@ -298,7 +298,7 @@ static int nfs3_proc_commit(struct nfs_w */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; Index: linux-2.6.12-rc1/fs/nfs/proc.c =================================================================== --- linux-2.6.12-rc1.orig/fs/nfs/proc.c +++ linux-2.6.12-rc1/fs/nfs/proc.c @@ -214,7 +214,7 @@ static int nfs_proc_write(struct nfs_wri static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; Index: linux-2.6.12-rc1/fs/nfs/dir.c =================================================================== --- linux-2.6.12-rc1.orig/fs/nfs/dir.c +++ linux-2.6.12-rc1/fs/nfs/dir.c @@ -891,7 +891,6 @@ static int is_atomic_open(struct inode * static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct dentry *res = NULL; - struct inode *inode = NULL; int error; /* Check that we are indeed trying to open this file */ @@ -905,8 +904,10 @@ static struct dentry *nfs_atomic_lookup( dentry->d_op = NFS_PROTO(dir)->dentry_ops; /* Let vfs_create() deal with O_EXCL */ - if (nd->intent.open.flags & O_EXCL) - goto no_entry; + if (nd->intent.open.flags & O_EXCL) { + d_add(dentry, NULL); + goto out; + } /* Open the file on the server */ lock_kernel(); @@ -919,18 +920,18 @@ static struct dentry *nfs_atomic_lookup( if (nd->intent.open.flags & O_CREAT) { nfs_begin_data_update(dir); - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); nfs_end_data_update(dir); } else - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); + if (IS_ERR(res)) { + error = PTR_ERR(res); switch (error) { /* Make a negative dentry */ case -ENOENT: - inode = NULL; - break; + res = NULL; + goto out; /* This turned out not to be a regular file */ case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) @@ -938,13 +939,9 @@ static struct dentry *nfs_atomic_lookup( /* case -EISDIR: */ /* case -EINVAL: */ default: - res = ERR_PTR(error); goto out; } - } -no_entry: - res = d_add_unique(dentry, inode); - if (res != NULL) + } else if (res != NULL) dentry = res; nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -988,7 +985,7 @@ static int nfs_open_revalidate(struct de */ lock_kernel(); verifier = nfs_save_change_attribute(dir); - ret = nfs4_open_revalidate(dir, dentry, openflags); + ret = nfs4_open_revalidate(dir, dentry, openflags, nd); if (!ret) nfs_set_verifier(dentry, verifier); unlock_kernel(); @@ -1111,7 +1108,7 @@ static int nfs_create(struct inode *dir, lock_kernel(); nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); nfs_end_data_update(dir); if (error != 0) goto out_err; Index: linux-2.6.12-rc1/include/linux/nfs_xdr.h =================================================================== --- linux-2.6.12-rc1.orig/include/linux/nfs_xdr.h +++ linux-2.6.12-rc1/include/linux/nfs_xdr.h @@ -722,7 +722,7 @@ struct nfs_rpc_ops { int (*write) (struct nfs_write_data *); int (*commit) (struct nfs_write_data *); int (*create) (struct inode *, struct dentry *, - struct iattr *, int); + struct iattr *, int, struct nameidata *); int (*remove) (struct inode *, struct qstr *); int (*unlink_setup) (struct rpc_message *, struct dentry *, struct qstr *); Index: linux-2.6.12-rc1/fs/nfs/nfs4_fs.h =================================================================== --- linux-2.6.12-rc1.orig/fs/nfs/nfs4_fs.h +++ linux-2.6.12-rc1/fs/nfs/nfs4_fs.h @@ -192,8 +192,8 @@ extern int nfs4_proc_setclientid_confirm extern int nfs4_proc_async_renew(struct nfs4_client *); extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); -extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); -extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); +extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops;