diff -u --recursive --new-file linux-2.6.0-test9/fs/exec.c linux-2.6.0-25-lock/fs/exec.c --- linux-2.6.0-test9/fs/exec.c 2003-11-17 17:58:17.000000000 -0500 +++ linux-2.6.0-25-lock/fs/exec.c 2003-11-17 18:12:46.000000000 -0500 @@ -121,7 +121,7 @@ struct nameidata nd; int error; - nd.intent.open.flags = O_RDONLY; + nd.intent.open.flags = FMODE_READ; error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); if (error) goto out; @@ -471,8 +471,12 @@ struct file *open_exec(const char *name) { struct nameidata nd; - int err = path_lookup(name, LOOKUP_FOLLOW, &nd); - struct file *file = ERR_PTR(err); + int err; + struct file *file; + + nd.intent.open.flags = FMODE_READ; + err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + file = ERR_PTR(err); if (!err) { struct inode *inode = nd.dentry->d_inode; diff -u --recursive --new-file linux-2.6.0-test9/fs/nfs/dir.c linux-2.6.0-25-lock/fs/nfs/dir.c --- linux-2.6.0-test9/fs/nfs/dir.c 2003-11-17 17:58:57.000000000 -0500 +++ linux-2.6.0-25-lock/fs/nfs/dir.c 2003-11-17 18:12:46.000000000 -0500 @@ -72,6 +72,26 @@ .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V4 + +static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); +struct inode_operations nfs4_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_atomic_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +#endif /* CONFIG_NFS_V4 */ + /* * Open file */ @@ -670,7 +690,7 @@ goto out; error = -ENOMEM; - dentry->d_op = &nfs_dentry_operations; + dentry->d_op = NFS_PROTO(dir)->dentry_ops; lock_kernel(); @@ -702,6 +722,119 @@ return ERR_PTR(error); } +#ifdef CONFIG_NFS_V4 +static int nfs_open_revalidate(struct dentry *, struct nameidata *); + +struct dentry_operations nfs4_dentry_operations = { + .d_revalidate = nfs_open_revalidate, + .d_delete = nfs_dentry_delete, + .d_iput = nfs_dentry_iput, +}; + +static int is_atomic_open(struct inode *dir, struct nameidata *nd) +{ + if (!nd) + return 0; + /* Check that we are indeed trying to open this file */ + if ((nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_OPEN)) + return 0; + /* NFS does not (yet) have a stateful open for directories */ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ + if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; +} + +static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = NULL; + int error = 0; + + /* Check that we are indeed trying to open this file */ + if (!is_atomic_open(dir, nd)) + goto no_open; + + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { + error = -ENAMETOOLONG; + goto out; + } + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ + if (nd->intent.open.flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ + lock_kernel(); + inode = nfs4_atomic_open(dir, dentry, nd); + unlock_kernel(); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + switch (error) { + /* Make a negative dentry */ + case -ENOENT: + inode = NULL; + break; + /* This turned out not to be a regular file */ + case -ELOOP: + if (!(nd->intent.open.flags & O_NOFOLLOW)) + goto no_open; + /* case -EISDIR: */ + /* case -EINVAL: */ + default: + goto out; + } + } +no_entry: + d_add(dentry, inode); + nfs_renew_times(dentry); +out: + BUG_ON(error > 0); + return ERR_PTR(error); +no_open: + return nfs_lookup(dir, dentry, nd); +} + +static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *parent = NULL; + struct inode *inode = dentry->d_inode; + int openflags, ret = 0; + + /* NFS only supports OPEN for regular files */ + if (inode && !S_ISREG(inode->i_mode)) + goto no_open; + parent = dget_parent(dentry); + if (!is_atomic_open(parent->d_inode, nd)) + goto no_open; + openflags = nd->intent.open.flags; + if (openflags & O_CREAT) { + /* If this is a negative dentry, just drop it */ + if (!inode) + goto out; + /* If this is exclusive open, just revalidate */ + if (openflags & O_EXCL) + goto no_open; + } + /* We can't create new files, or truncate existing ones here */ + openflags &= ~(O_CREAT|O_TRUNC); + + lock_kernel(); + ret = nfs4_open_revalidate(parent->d_inode, dentry, openflags); + unlock_kernel(); +out: + dput(parent); + if (!ret) + d_drop(dentry); + return ret; +no_open: + dput(parent); + return nfs_lookup_revalidate(dentry, nd); +} +#endif /* CONFIG_NFSV4 */ + static inline int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry) { @@ -1281,13 +1414,6 @@ int mode = inode->i_mode; int res; - /* Are we checking permissions on anything other than lookup? */ - if (!(mask & MAY_EXEC)) { - /* We only need to check permissions on file open() and access() */ - if (!nd || !(nd->flags & (LOOKUP_OPEN|LOOKUP_ACCESS))) - return 0; - } - if (mask & MAY_WRITE) { /* * @@ -1307,6 +1433,18 @@ return -EACCES; } + /* Are we checking permissions on anything other than lookup? */ + if (!(mask & MAY_EXEC)) { + /* We only need to check permissions on file open() and access() */ + if (!nd) + return 0; + if (!(nd->flags & (LOOKUP_OPEN|LOOKUP_ACCESS))) + return 0; + /* NFSv4 has atomic_open... */ + if (NFS_PROTO(inode)->version > 3 && (nd->flags & LOOKUP_OPEN)) + return 0; + } + lock_kernel(); if (!NFS_PROTO(inode)->access) diff -u --recursive --new-file linux-2.6.0-test9/fs/nfs/file.c linux-2.6.0-25-lock/fs/nfs/file.c --- linux-2.6.0-test9/fs/nfs/file.c 2003-11-17 18:00:04.000000000 -0500 +++ linux-2.6.0-25-lock/fs/nfs/file.c 2003-11-17 21:49:01.000000000 -0500 @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -278,21 +277,17 @@ if (!inode) return -EINVAL; - /* This will be in a forthcoming patch. */ - if (NFS_PROTO(inode)->version == 4) { - printk(KERN_INFO "NFS: file locking over NFSv4 is not yet supported\n"); - return -EIO; - } - /* No mandatory locks over NFS */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) return -ENOLCK; - /* Fake OK code if mounted without NLM support */ - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { - if (IS_GETLK(cmd)) - status = LOCK_USE_CLNT; - goto out_ok; + if (NFS_PROTO(inode)->version != 4) { + /* Fake OK code if mounted without NLM support */ + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { + if (IS_GETLK(cmd)) + status = LOCK_USE_CLNT; + goto out_ok; + } } /* @@ -302,7 +297,7 @@ * Not sure whether that would be unique, though, or whether * that would break in other places. */ - if (!fl->fl_owner || (fl->fl_flags & FL_POSIX) != FL_POSIX) + if (!fl->fl_owner || !(fl->fl_flags & FL_POSIX)) return -ENOLCK; /* @@ -322,7 +317,7 @@ return status; lock_kernel(); - status = nlmclnt_proc(inode, cmd, fl); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); unlock_kernel(); if (status < 0) return status; diff -u --recursive --new-file linux-2.6.0-test9/fs/nfs/idmap.c linux-2.6.0-25-lock/fs/nfs/idmap.c --- linux-2.6.0-test9/fs/nfs/idmap.c 2003-11-17 17:57:22.000000000 -0500 +++ linux-2.6.0-25-lock/fs/nfs/idmap.c 2003-11-17 18:06:21.000000000 -0500 @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -145,7 +146,8 @@ struct idmap *idmap = server->idmap; struct idmap_msg *im; DECLARE_WAITQUEUE(wq, current); - int ret = -1, hashtype = IDMAP_HASH_TYPE_NAME, xnamelen = namelen; + int ret = -1, hashtype = IDMAP_HASH_TYPE_NAME; + u_int xnamelen = namelen; if (idmap == NULL) return (-1); @@ -168,15 +170,6 @@ memset(im, 0, sizeof(*im)); memcpy(im->im_name, name, namelen); - /* Make sure the string is NULL terminated */ - if (namelen != xnamelen) { - /* We cannot fit a NULL character */ - if (namelen == IDMAP_NAMESZ) { - ret = -1; - goto out; - } - im->im_name[namelen] = '\0'; - } im->im_type = type; im->im_conv = IDMAP_CONV_NAMETOID; diff -u --recursive --new-file linux-2.6.0-test9/fs/nfs/inode.c linux-2.6.0-25-lock/fs/nfs/inode.c --- linux-2.6.0-test9/fs/nfs/inode.c 2003-11-17 18:00:57.000000000 -0500 +++ linux-2.6.0-25-lock/fs/nfs/inode.c 2003-11-17 18:12:46.000000000 -0500 @@ -118,7 +118,7 @@ { int flags = sync ? FLUSH_WAIT : 0; - nfs_commit_file(inode, NULL, 0, 0, flags); + nfs_commit_file(inode, NULL, flags); } static void @@ -163,6 +163,8 @@ nfs_idmap_delete(server); #endif /* CONFIG_NFS_V4 */ + nfs4_renewd_prepare_shutdown(server); + if (server->client != NULL) rpc_shutdown_client(server->client); if (server->client_sys != NULL) @@ -301,7 +303,6 @@ server = NFS_SB(sb); sb->s_magic = NFS_SUPER_MAGIC; - sb->s_op = &nfs_sops; /* Did getting the root inode fail? */ if (nfs_get_root(&root_inode, authflavor, sb, &server->fh) < 0) @@ -310,7 +311,7 @@ if (!sb->s_root) goto out_no_root; - sb->s_root->d_op = &nfs_dentry_operations; + sb->s_root->d_op = server->rpc_ops->dentry_ops; /* Get some general file system info */ if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) { @@ -493,10 +494,17 @@ server->client = nfs_create_client(server, data); if (server->client == NULL) goto out_fail; - data->pseudoflavor = RPC_AUTH_UNIX; /* RFC 2623, sec 2.3.2 */ - server->client_sys = nfs_create_client(server, data); - if (server->client_sys == NULL) - goto out_shutdown; + /* RFC 2623, sec 2.3.2 */ + if (authflavor != RPC_AUTH_UNIX) { + server->client_sys = rpc_clone_client(server->client); + if (server->client_sys == NULL) + goto out_shutdown; + if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) + goto out_shutdown; + } else { + atomic_inc(&server->client->cl_count); + server->client_sys = server->client; + } /* Fire up rpciod if not yet running */ if (rpciod_up() != 0) { @@ -504,6 +512,7 @@ goto out_shutdown; } + sb->s_op = &nfs_sops; err = nfs_sb_init(sb, authflavor); if (err != 0) goto out_noinit; @@ -736,7 +745,7 @@ inode->i_data.a_ops = &nfs_file_aops; inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &nfs_dir_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) @@ -819,7 +828,12 @@ filemap_fdatawait(inode->i_mapping); if (error) goto out; + /* Optimize away unnecessary truncates */ + if ((attr->ia_valid & ATTR_SIZE) && i_size_read(inode) == attr->ia_size) + attr->ia_valid &= ~ATTR_SIZE; } + if (!attr->ia_valid) + goto out; error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); if (error) @@ -1265,6 +1279,8 @@ if (!server) return ERR_PTR(-ENOMEM); memset(server, 0, sizeof(struct nfs_server)); + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); root = &server->fh; memcpy(root, &data->root, sizeof(*root)); @@ -1337,9 +1353,52 @@ #ifdef CONFIG_NFS_V4 +static void nfs4_clear_inode(struct inode *); + +static struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .delete_inode = nfs_delete_inode, + .put_super = nfs_put_super, + .statfs = nfs_statfs, + .clear_inode = nfs4_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, +}; + +/* + * Clean out any remaining NFSv4 state that might be left over due + * to open() calls that passed nfs_atomic_lookup, but failed to call + * nfs_open(). + */ +static void nfs4_clear_inode(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + while (!list_empty(&nfsi->open_states)) { + struct nfs4_state *state; + + state = list_entry(nfsi->open_states.next, + struct nfs4_state, + inode_states); + dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", + __FUNCTION__, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + state); + list_del(&state->inode_states); + nfs4_put_open_state(state); + } + /* Now call standard NFS clear_inode() code */ + nfs_clear_inode(inode); +} + + static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) { struct nfs_server *server; + struct nfs4_client *clp = NULL; struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; struct rpc_timeout timeparms; @@ -1389,13 +1448,13 @@ return -EINVAL; } - /* Now create transport and client */ - xprt = xprt_create_proto(proto, &server->addr, &timeparms); - if (xprt == NULL) { - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + clp = nfs4_get_client(&server->addr.sin_addr); + if (!clp) { + printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); goto out_fail; } + /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { if (data->auth_flavourlen > 1) @@ -1405,41 +1464,78 @@ goto out_fail; } } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, authflavour); + + down_write(&clp->cl_sem); + if (clp->cl_rpcclient == NULL) { + xprt = xprt_create_proto(proto, &server->addr, &timeparms); + if (xprt == NULL) { + up_write(&clp->cl_sem); + printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + goto out_fail; + } + clnt = rpc_create_client(xprt, server->hostname, &nfs_program, + server->rpc_ops->version, authflavour); + if (clnt == NULL) { + up_write(&clp->cl_sem); + printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + xprt_destroy(xprt); + goto out_fail; + } + clnt->cl_chatty = 1; + clp->cl_rpcclient = clnt; + clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); + memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); + } + if (list_empty(&clp->cl_superblocks)) + clear_bit(NFS4CLNT_OK, &clp->cl_state); + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + clnt = rpc_clone_client(clp->cl_rpcclient); + server->nfs4_state = clp; + up_write(&clp->cl_sem); + clp = NULL; + if (clnt == NULL) { printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - xprt_destroy(xprt); - goto out_fail; + goto out_remove_list; } clnt->cl_intr = (server->flags & NFS4_MOUNT_INTR) ? 1 : 0; clnt->cl_softrtry = (server->flags & NFS4_MOUNT_SOFT) ? 1 : 0; - clnt->cl_chatty = 1; server->client = clnt; + if (clnt->cl_auth->au_flavor != authflavour) { + if (rpcauth_create(authflavour, clnt) == NULL) { + printk(KERN_WARNING "NFS: couldn't create credcache!\n"); + goto out_shutdown; + } + } + /* Fire up rpciod if not yet running */ if (rpciod_up() != 0) { printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); goto out_shutdown; } - if (create_nfsv4_state(server, data)) - goto out_shutdown; - if ((server->idmap = nfs_idmap_new(server)) == NULL) printk(KERN_WARNING "NFS: couldn't start IDmap\n"); + sb->s_op = &nfs4_sops; err = nfs_sb_init(sb, authflavour); if (err == 0) return 0; rpciod_down(); - destroy_nfsv4_state(server); if (server->idmap != NULL) nfs_idmap_delete(server); out_shutdown: rpc_shutdown_client(server->client); +out_remove_list: + down_write(&server->nfs4_state->cl_sem); + list_del_init(&server->nfs4_siblings); + up_write(&server->nfs4_state->cl_sem); + destroy_nfsv4_state(server); out_fail: + if (clp) + nfs4_put_client(clp); return err; } @@ -1496,6 +1592,8 @@ if (!server) return ERR_PTR(-ENOMEM); memset(server, 0, sizeof(struct nfs_server)); + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); if (data->version != NFS4_MOUNT_VERSION) { printk("nfs warning: mount version %s than kernel\n", diff -u --recursive --new-file linux-2.6.0-test9/fs/nfs/nfs3proc.c linux-2.6.0-25-lock/fs/nfs/nfs3proc.c --- linux-2.6.0-test9/fs/nfs/nfs3proc.c 2003-11-17 17:58:58.000000000 -0500 +++ linux-2.6.0-25-lock/fs/nfs/nfs3proc.c 2003-11-17 18:13:36.000000000 -0500 @@ -15,6 +15,7 @@ #include #include #include +#include #include #define NFSDBG_FACILITY NFSDBG_PROC @@ -896,8 +897,16 @@ return 1; } +static int +nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); +} + struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, @@ -929,4 +938,5 @@ .file_release = nfs_release, .request_init = nfs3_request_init, .request_compatible = nfs3_request_compatible, + .lock = nfs3_proc_lock, }; diff -u --recursive --new-file linux-2.6.0-test9/fs/nfs/nfs4proc.c linux-2.6.0-25-lock/fs/nfs/nfs4proc.c --- linux-2.6.0-test9/fs/nfs/nfs4proc.c 2003-11-17 18:00:46.000000000 -0500 +++ linux-2.6.0-25-lock/fs/nfs/nfs4proc.c 2003-11-17 22:56:17.000000000 -0500 @@ -45,19 +45,21 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_PROC +#define NFS4_POLL_RETRY_TIME (15*HZ) + #define GET_OP(cp,name) &cp->ops[cp->req_nops].u.name #define OPNUM(cp) cp->ops[cp->req_nops].opnum +static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; extern nfs4_stateid zero_stateid; -static spinlock_t renew_lock = SPIN_LOCK_UNLOCKED; - static void nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops, struct nfs_server *server, char *tag) @@ -179,44 +181,16 @@ | FATTR4_WORD1_SPACE_TOTAL }; -u32 nfs4_fsinfo_bitmap[2] = { - FATTR4_WORD0_MAXFILESIZE - | FATTR4_WORD0_MAXREAD - | FATTR4_WORD0_MAXWRITE - | FATTR4_WORD0_LEASE_TIME, - 0 -}; - u32 nfs4_pathconf_bitmap[2] = { FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME, 0 }; -/* mount bitmap: fattr bitmap + lease time */ -u32 nfs4_mount_bitmap[2] = { - FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE - | FATTR4_WORD0_SIZE - | FATTR4_WORD0_FSID - | FATTR4_WORD0_FILEID - | FATTR4_WORD0_LEASE_TIME, - FATTR4_WORD1_MODE - | FATTR4_WORD1_NUMLINKS - | FATTR4_WORD1_OWNER - | FATTR4_WORD1_OWNER_GROUP - | FATTR4_WORD1_RAWDEV - | FATTR4_WORD1_SPACE_USED - | FATTR4_WORD1_TIME_ACCESS - | FATTR4_WORD1_TIME_METADATA - | FATTR4_WORD1_TIME_MODIFY -}; - static inline void __nfs4_setup_getattr(struct nfs4_compound *cp, u32 *bitmap, struct nfs_fattr *fattr, struct nfs_fsstat *fsstat, - struct nfs_fsinfo *fsinfo, struct nfs_pathconf *pathconf) { struct nfs4_getattr *getattr = GET_OP(cp, getattr); @@ -224,7 +198,6 @@ getattr->gt_bmval = bitmap; getattr->gt_attrs = fattr; getattr->gt_fsstat = fsstat; - getattr->gt_fsinfo = fsinfo; getattr->gt_pathconf = pathconf; OPNUM(cp) = OP_GETATTR; @@ -236,16 +209,7 @@ struct nfs_fattr *fattr) { __nfs4_setup_getattr(cp, nfs4_fattr_bitmap, fattr, - NULL, NULL, NULL); -} - -static void -nfs4_setup_getrootattr(struct nfs4_compound *cp, - struct nfs_fattr *fattr, - struct nfs_fsinfo *fsinfo) -{ - __nfs4_setup_getattr(cp, nfs4_mount_bitmap, - fattr, NULL, fsinfo, NULL); + NULL, NULL); } static void @@ -253,15 +217,7 @@ struct nfs_fsstat *fsstat) { __nfs4_setup_getattr(cp, nfs4_statfs_bitmap, - NULL, fsstat, NULL, NULL); -} - -static void -nfs4_setup_fsinfo(struct nfs4_compound *cp, - struct nfs_fsinfo *fsinfo) -{ - __nfs4_setup_getattr(cp, nfs4_fsinfo_bitmap, - NULL, NULL, fsinfo, NULL); + NULL, fsstat, NULL); } static void @@ -269,7 +225,7 @@ struct nfs_pathconf *pathconf) { __nfs4_setup_getattr(cp, nfs4_pathconf_bitmap, - NULL, NULL, NULL, pathconf); + NULL, NULL, pathconf); } static void @@ -429,18 +385,6 @@ } static void -nfs4_setup_renew(struct nfs4_compound *cp) -{ - struct nfs4_client **client_state = GET_OP(cp, renew); - - *client_state = cp->server->nfs4_state; - - OPNUM(cp) = OP_RENEW; - cp->req_nops++; - cp->renew_index = cp->req_nops; -} - -static void nfs4_setup_restorefh(struct nfs4_compound *cp) { OPNUM(cp) = OP_RESTOREFH; @@ -455,47 +399,13 @@ } static void -nfs4_setup_setclientid(struct nfs4_compound *cp, u32 program, unsigned short port) -{ - struct nfs4_setclientid *setclientid = GET_OP(cp, setclientid); - struct nfs_server *server = cp->server; - struct timespec tv; - u32 *p; - - tv = CURRENT_TIME; - p = (u32 *)setclientid->sc_verifier.data; - *p++ = tv.tv_sec; - *p++ = tv.tv_nsec; - setclientid->sc_name = server->ip_addr; - sprintf(setclientid->sc_netid, "udp"); - sprintf(setclientid->sc_uaddr, "%s.%d.%d", server->ip_addr, port >> 8, port & 255); - setclientid->sc_prog = program; - setclientid->sc_cb_ident = 0; - setclientid->sc_state = server->nfs4_state; - - OPNUM(cp) = OP_SETCLIENTID; - cp->req_nops++; -} - -static void -nfs4_setup_setclientid_confirm(struct nfs4_compound *cp) -{ - struct nfs4_client **client_state = GET_OP(cp, setclientid_confirm); - - *client_state = cp->server->nfs4_state; - - OPNUM(cp) = OP_SETCLIENTID_CONFIRM; - cp->req_nops++; - cp->renew_index = cp->req_nops; -} - -static void renew_lease(struct nfs_server *server, unsigned long timestamp) { - spin_lock(&renew_lock); - if (time_before(server->last_renewal,timestamp)) - server->last_renewal = timestamp; - spin_unlock(&renew_lock); + struct nfs4_client *clp = server->nfs4_state; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); } static inline void @@ -552,6 +462,57 @@ } } +/* + * OPEN_RECLAIM: + * reclaim state on the server after a reboot. + * Assumes caller is holding the sp->so_sem + */ +int +nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr = { + .valid = 0, + }; + struct nfs4_change_info d_cinfo; + struct nfs4_getattr f_getattr = { + .gt_bmval = nfs4_fattr_bitmap, + .gt_attrs = &fattr, + }; + + struct nfs_open_reclaimargs o_arg = { + .fh = NFS_FH(inode), + .seqid = sp->so_seqid, + .id = sp->so_id, + .share_access = state->state & (FMODE_READ|FMODE_WRITE), + .clientid = server->nfs4_state->cl_clientid, + .claim = NFS4_OPEN_CLAIM_PREVIOUS, + .f_getattr = &f_getattr, + }; + struct nfs_openres o_res = { + .cinfo = &d_cinfo, + .f_getattr = &f_getattr, + .server = server, /* Grrr */ + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_RECLAIM], + .rpc_argp = &o_arg, + .rpc_resp = &o_res, + .rpc_cred = sp->so_cred, + }; + int status; + + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_seqid(status, sp); + /* Update the inode attributes */ + nfs_refresh_inode(inode, &fattr); + return status; +} + +/* + * Returns an nfs4_state + an referenced inode + */ struct nfs4_state * nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred) { @@ -578,7 +539,6 @@ struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .share_access = flags & (FMODE_READ|FMODE_WRITE), - .clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid, .opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE, .createmode = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED, .name = name, @@ -599,6 +559,7 @@ .rpc_cred = cred, }; +retry: status = -ENOMEM; if (!(sp = nfs4_get_state_owner(NFS_SERVER(dir), cred))) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); @@ -615,12 +576,12 @@ down(&sp->so_sema); o_arg.seqid = sp->so_seqid; o_arg.id = sp->so_id; + o_arg.clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid, status = rpc_call_sync(server->client, &msg, 0); - if (status) { - goto out_up; - } nfs4_increment_seqid(status, sp); + if (status) + goto out_up; process_cinfo(&d_cinfo, &d_attr); nfs_refresh_inode(dir, &d_attr); @@ -637,9 +598,7 @@ .fh = &o_res.fh, .seqid = sp->so_seqid, }; - struct nfs_open_confirmres oc_res = { - .status = 0, - }; + struct nfs_open_confirmres oc_res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], .rpc_argp = &oc_arg, @@ -649,27 +608,37 @@ memcpy(&oc_arg.stateid, &o_res.stateid, sizeof(oc_arg.stateid)); status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_seqid(status, sp); if (status) goto out_up; - nfs4_increment_seqid(status, sp); memcpy(&state->stateid, &oc_res.stateid, sizeof(state->stateid)); } else memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); state->state |= flags & (FMODE_READ|FMODE_WRITE); - state->pid = current->pid; + if (flags & FMODE_READ) + state->nreaders++; + if (flags & FMODE_WRITE) + state->nwriters++; up(&sp->so_sema); nfs4_put_state_owner(sp); - iput(inode); return state; out_up: up(&sp->so_sema); nfs4_put_state_owner(sp); - if (state) + if (state) { nfs4_put_open_state(state); - if (inode) + state = NULL; + } + if (inode) { iput(inode); + inode = NULL; + } + status = nfs4_handle_error(server, status); + if (!status) + goto retry; + BUG_ON(status < -1000 || status > 0); out: return ERR_PTR(status); } @@ -698,15 +667,23 @@ .rpc_argp = &arg, .rpc_resp = &res, }; + int status; +retry: fattr->valid = 0; if (state) - memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); + nfs4_copy_stateid(&arg.stateid, state, 0); else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); - return(rpc_call_sync(server->client, &msg, 0)); + status = rpc_call_sync(server->client, &msg, 0); + if (status) { + status = nfs4_handle_error(server, status); + if (!status) + goto retry; + } + return status; } /* @@ -728,9 +705,7 @@ struct nfs_closeargs arg = { .fh = NFS_FH(inode), }; - struct nfs_closeres res = { - .status = 0, - }; + struct nfs_closeres res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], .rpc_argp = &arg, @@ -746,82 +721,109 @@ * the state_owner. we keep this around to process errors */ nfs4_increment_seqid(status, sp); + if (!status) + memcpy(&state->stateid, &res.stateid, sizeof(state->stateid)); + + return status; +} + +int +nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) +{ + struct nfs4_state_owner *sp = state->owner; + int status = 0; + struct nfs_closeargs arg = { + .fh = NFS_FH(inode), + .seqid = sp->so_seqid, + .share_access = mode, + }; + struct nfs_closeres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + + memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); + status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + nfs4_increment_seqid(status, sp); + if (!status) + memcpy(&state->stateid, &res.stateid, sizeof(state->stateid)); return status; } +struct inode * +nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct iattr attr; + struct rpc_cred *cred; + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { + attr.ia_mode = nd->intent.open.create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; + BUG_ON(nd->intent.open.flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; + return state->inode; +} + +int +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +{ + struct rpc_cred *cred; + struct nfs4_state *state; + struct inode *inode; + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return 0; + inode = state->inode; + if (inode == dentry->d_inode) { + iput(inode); + return 1; + } + d_drop(dentry); + nfs4_close_state(state, openflags); + iput(inode); + return 0; +} + static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs4_client *clp; struct nfs4_compound compound; struct nfs4_op ops[4]; - struct nfs_fsinfo fsinfo; unsigned char * p; struct qstr q; int status; - clp = server->nfs4_state = nfs4_get_client(&server->addr.sin_addr); - if (!clp) - return -ENOMEM; - - down_write(&clp->cl_sem); - /* Has the clientid already been initialized? */ - if (clp->cl_state != NFS4CLNT_NEW) { - /* Yep, so just read the root attributes and the lease time. */ - fattr->valid = 0; - nfs4_setup_compound(&compound, ops, server, "getrootfh"); - nfs4_setup_putrootfh(&compound); - nfs4_setup_getrootattr(&compound, fattr, &fsinfo); - nfs4_setup_getfh(&compound, fhandle); - if ((status = nfs4_call_compound(&compound, NULL, 0))) - goto out_unlock; - goto no_setclientid; - } - - /* - * SETCLIENTID. - * Until delegations are imported, we don't bother setting the program - * number and port to anything meaningful. - */ - nfs4_setup_compound(&compound, ops, server, "setclientid"); - nfs4_setup_setclientid(&compound, 0, 0); - if ((status = nfs4_call_compound(&compound, NULL, 0))) - goto out_unlock; - - /* - * SETCLIENTID_CONFIRM, plus root filehandle. - * We also get the lease time here. - */ - fattr->valid = 0; - nfs4_setup_compound(&compound, ops, server, "setclientid_confirm"); - nfs4_setup_setclientid_confirm(&compound); - nfs4_setup_putrootfh(&compound); - nfs4_setup_getrootattr(&compound, fattr, &fsinfo); - nfs4_setup_getfh(&compound, fhandle); - if ((status = nfs4_call_compound(&compound, NULL, 0))) - goto out_unlock; - clp->cl_state = NFS4CLNT_OK; - -no_setclientid: - /* - * Now that we have instantiated the clientid and determined - * the lease time, we can initialize the renew daemon for this - * server. - * FIXME: we only need one renewd daemon per server. - */ - server->lease_time = fsinfo.lease_time * HZ; - if ((status = nfs4_init_renewd(server))) - goto out_unlock; - up_write(&clp->cl_sem); - /* * Now we do a separate LOOKUP for each component of the mount path. * The LOOKUPs are done separately so that we can conveniently * catch an ERR_WRONGSEC if it occurs along the way... */ p = server->mnt_path; + fattr->valid = 0; + nfs4_setup_compound(&compound, ops, server, "getrootfh"); + nfs4_setup_putrootfh(&compound); + nfs4_setup_getattr(&compound, fattr); + nfs4_setup_getfh(&compound, fhandle); + if ((status = nfs4_call_compound(&compound, NULL, 0))) + goto out; for (;;) { while (*p == '/') p++; @@ -847,10 +849,7 @@ } break; } - return status; -out_unlock: - up_write(&clp->cl_sem); - nfs4_put_client(clp); +out: return status; } @@ -892,28 +891,38 @@ struct inode * inode = dentry->d_inode; int size_change = sattr->ia_valid & ATTR_SIZE; struct nfs4_state *state = NULL; - int status; + int need_iput = 0; + int status; fattr->valid = 0; if (size_change) { struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_do_open(dentry->d_parent->d_inode, + state = nfs4_find_state(inode, cred, FMODE_WRITE); + if (!state) { + state = nfs4_do_open(dentry->d_parent->d_inode, &dentry->d_name, FMODE_WRITE, NULL, cred); + need_iput = 1; + } put_rpccred(cred); if (IS_ERR(state)) return PTR_ERR(state); if (state->inode != inode) { - printk(KERN_WARNING "nfs: raced in setattr, returning -EIO\n"); - nfs4_put_open_state(state); - return -EIO; + printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode); + status = -EIO; + goto out; } } status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); - if (state) - nfs4_put_open_state(state); +out: + if (state) { + inode = state->inode; + nfs4_close_state(state, FMODE_WRITE); + if (need_iput) + iput(inode); + } return status; } @@ -1051,7 +1060,7 @@ if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&rdata->args.stateid, &state->stateid, sizeof(rdata->args.stateid)); + nfs4_copy_stateid(&rdata->args.stateid, state, rdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&rdata->args.stateid, &zero_stateid, sizeof(rdata->args.stateid)); @@ -1093,7 +1102,7 @@ if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&wdata->args.stateid, &state->stateid, sizeof(wdata->args.stateid)); + nfs4_copy_stateid(&wdata->args.stateid, state, wdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&wdata->args.stateid, &zero_stateid, sizeof(wdata->args.stateid)); @@ -1129,7 +1138,7 @@ if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&cdata->args.stateid, &state->stateid, sizeof(cdata->args.stateid)); + nfs4_copy_stateid(&cdata->args.stateid, state, cdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&cdata->args.stateid, &zero_stateid, sizeof(cdata->args.stateid)); @@ -1169,18 +1178,18 @@ state = nfs4_do_open(dir, name, flags, sattr, cred); put_rpccred(cred); if (!IS_ERR(state)) { - inode = igrab(state->inode); + inode = state->inode; if (flags & O_EXCL) { struct nfs_fattr fattr; int status; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(inode), sattr, state); if (status != 0) { + nfs4_close_state(state, flags); iput(inode); inode = ERR_PTR(status); } } - nfs4_put_open_state(state); } else inode = (struct inode *)state; return inode; @@ -1446,14 +1455,14 @@ nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { - struct nfs4_compound compound; - struct nfs4_op ops[2]; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = fsinfo, + }; memset(fsinfo, 0, sizeof(*fsinfo)); - nfs4_setup_compound(&compound, ops, server, "statfs"); - nfs4_setup_putfh(&compound, fhandle); - nfs4_setup_fsinfo(&compound, fsinfo); - return nfs4_call_compound(&compound, NULL, 0); + return rpc_call_sync(server->client, &msg, 0); } static int @@ -1471,12 +1480,30 @@ } static void +nfs4_restart_read(struct rpc_task *task) +{ + struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; + struct nfs_page *req; + + rpc_restart_call(task); + req = nfs_list_entry(data->pages.next); + if (req->wb_state) + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); + else + memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); +} + +static void nfs4_read_done(struct rpc_task *task) { struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; struct inode *inode = data->inode; struct nfs_fattr *fattr = data->res.fattr; + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_read; + return; + } if (task->tk_status > 0) renew_lease(NFS_SERVER(inode), data->timestamp); /* Check cache consistency */ @@ -1512,8 +1539,9 @@ data->res.eof = 0; data->timestamp = jiffies; + data->lockowner = req->wb_lockowner; if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); @@ -1545,11 +1573,29 @@ } static void +nfs4_restart_write(struct rpc_task *task) +{ + struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; + struct nfs_page *req; + + rpc_restart_call(task); + req = nfs_list_entry(data->pages.next); + if (req->wb_state) + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); + else + memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); +} + +static void nfs4_write_done(struct rpc_task *task) { struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; struct inode *inode = data->inode; + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_write; + return; + } if (task->tk_status >= 0) renew_lease(NFS_SERVER(inode), data->timestamp); nfs4_write_refresh_inode(inode, data->res.fattr); @@ -1591,8 +1637,9 @@ data->res.verf = &data->verf; data->timestamp = jiffies; + data->lockowner = req->wb_lockowner; if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); @@ -1612,8 +1659,13 @@ nfs4_commit_done(struct rpc_task *task) { struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct inode *inode = data->inode; - nfs4_write_refresh_inode(data->inode, data->res.fattr); + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_write; + return; + } + nfs4_write_refresh_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ nfs_commit_done(task); } @@ -1651,55 +1703,58 @@ } /* - * nfs4_proc_renew(): This is not one of the nfs_rpc_ops; it is a special + * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special * standalone procedure for queueing an asynchronous RENEW. */ -struct renew_desc { - struct rpc_task task; - struct nfs4_compound compound; - struct nfs4_op ops[1]; -}; - static void renew_done(struct rpc_task *task) { - struct nfs4_compound *cp = (struct nfs4_compound *) task->tk_msg.rpc_argp; - process_lease(cp); + struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; + unsigned long timestamp = (unsigned long)task->tk_calldata; + + if (task->tk_status < 0) { + switch (task->tk_status) { + case -NFS4ERR_STALE_CLIENTID: + nfs4_schedule_state_recovery(clp); + return; + } + } + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); } -static void -renew_release(struct rpc_task *task) +int +nfs4_proc_async_renew(struct nfs4_client *clp) { - kfree(task->tk_calldata); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = clp, + .rpc_cred = clp->cl_cred, + }; + + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + renew_done, (void *)jiffies); } int -nfs4_proc_renew(struct nfs_server *server) +nfs4_proc_renew(struct nfs4_client *clp) { - struct renew_desc *rp; - struct rpc_task *task; - struct nfs4_compound *cp; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMPOUND], + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = clp, + .rpc_cred = clp->cl_cred, }; + unsigned long now = jiffies; + int status; - rp = (struct renew_desc *) kmalloc(sizeof(*rp), GFP_KERNEL); - if (!rp) - return -ENOMEM; - cp = &rp->compound; - task = &rp->task; - - nfs4_setup_compound(cp, rp->ops, server, "renew"); - nfs4_setup_renew(cp); - - msg.rpc_argp = cp; - msg.rpc_resp = cp; - rpc_init_task(task, server->client, renew_done, RPC_TASK_ASYNC); - rpc_call_setup(task, &msg, 0); - task->tk_calldata = rp; - task->tk_release = renew_release; - - return rpc_execute(task); + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,now)) + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + return status; } /* @@ -1712,43 +1767,31 @@ nfs4_proc_file_open(struct inode *inode, struct file *filp) { struct dentry *dentry = filp->f_dentry; - struct inode *dir = dentry->d_parent->d_inode; - struct rpc_cred *cred; struct nfs4_state *state; - int flags = filp->f_flags; - int status = 0; + struct rpc_cred *cred; dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", (int)dentry->d_parent->d_name.len, dentry->d_parent->d_name.name, (int)dentry->d_name.len, dentry->d_name.name); - if ((flags + 1) & O_ACCMODE) - flags++; - - lock_kernel(); -/* -* We have already opened the file "O_EXCL" in nfs4_proc_create!! -* This ugliness will go away with lookup-intent... -*/ + /* Find our open stateid */ cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_do_open(dir, &dentry->d_name, flags, NULL, cred); - if (IS_ERR(state)) { - status = PTR_ERR(state); - state = NULL; - } else if (filp->f_mode & FMODE_WRITE) - nfs_set_mmcred(inode, cred); - if (inode != filp->f_dentry->d_inode) { + state = nfs4_find_state(inode, cred, filp->f_mode); + put_rpccred(cred); + if (state == NULL) { printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - status = -EIO; /* ERACE actually */ - nfs4_put_open_state(state); - state = NULL; + return -EIO; /* ERACE actually */ + } + nfs4_put_open_state(state); + if (filp->f_mode & FMODE_WRITE) { + lock_kernel(); + nfs_set_mmcred(inode, state->owner->so_cred); + unlock_kernel(); } filp->private_data = state; - put_rpccred(cred); - unlock_kernel(); - return status; + return 0; } /* @@ -1780,6 +1823,120 @@ state = (struct nfs4_state *)filp->private_data; req->wb_state = state; req->wb_cred = get_rpccred(state->owner->so_cred); + req->wb_lockowner = current->files; +} + +static int +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) +{ + struct nfs4_client *clp = server->nfs4_state; + + if (!clp) + return 0; + switch(task->tk_status) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); + nfs4_schedule_state_recovery(clp); + task->tk_status = 0; + return -EAGAIN; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + rpc_delay(task, NFS4_POLL_RETRY_TIME); + task->tk_status = 0; + return -EAGAIN; + case -NFS4ERR_OLD_STATEID: + task->tk_status = 0; + return -EAGAIN; + } + return 0; +} + +int +nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) +{ + DEFINE_WAIT(wait); + sigset_t oldset; + int interruptible, res; + + might_sleep(); + + rpc_clnt_sigmask(clnt, &oldset); + interruptible = TASK_UNINTERRUPTIBLE; + if (clnt->cl_intr) + interruptible = TASK_INTERRUPTIBLE; + do { + res = 0; + prepare_to_wait(&clp->cl_waitq, &wait, interruptible); + nfs4_schedule_state_recovery(clp); + if (test_bit(NFS4CLNT_OK, &clp->cl_state) && + !test_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state)) + break; + if (clnt->cl_intr && signalled()) { + res = -ERESTARTSYS; + break; + } + schedule(); + } while(!test_bit(NFS4CLNT_OK, &clp->cl_state)); + finish_wait(&clp->cl_waitq, &wait); + rpc_clnt_sigunmask(clnt, &oldset); + return res; +} + +static int +nfs4_delay(struct rpc_clnt *clnt) +{ + sigset_t oldset; + int res = 0; + + might_sleep(); + + rpc_clnt_sigmask(clnt, &oldset); + if (clnt->cl_intr) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(NFS4_POLL_RETRY_TIME); + if (signalled()) + res = -ERESTARTSYS; + } else { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(NFS4_POLL_RETRY_TIME); + } + rpc_clnt_sigunmask(clnt, &oldset); + return res; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +int +nfs4_handle_error(struct nfs_server *server, int errorcode) +{ + struct nfs4_client *clp = server->nfs4_state; + int ret = errorcode; + + switch(errorcode) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + ret = nfs4_wait_clnt_recover(server->client, clp); + break; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + ret = nfs4_delay(server->client); + break; + case -NFS4ERR_OLD_STATEID: + ret = 0; + break; + default: + if (errorcode <= -1000) { + printk(KERN_WARNING "%s could not handle NFSv4 error %d\n", + __FUNCTION__, -errorcode); + ret = -EIO; + } + } + /* We failed to handle the error */ + return ret; } @@ -1796,14 +1953,325 @@ state = (struct nfs4_state *)filp->private_data; if (req->wb_state != state) return 0; + if (req->wb_lockowner != current->files) + return 0; cred = state->owner->so_cred; if (req->wb_cred != cred) return 0; return 1; } +int +nfs4_proc_setclientid(struct nfs4_client *clp, + u32 program, unsigned short port) +{ + u32 *p; + struct nfs4_setclientid setclientid; + struct timespec tv; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], + .rpc_argp = &setclientid, + .rpc_resp = clp, + .rpc_cred = clp->cl_cred, + }; + + tv = CURRENT_TIME; + p = (u32*)setclientid.sc_verifier.data; + *p++ = (u32)tv.tv_sec; + *p = (u32)tv.tv_nsec; + setclientid.sc_name = clp->cl_ipaddr; + sprintf(setclientid.sc_netid, "tcp"); + sprintf(setclientid.sc_uaddr, "%s.%d.%d", clp->cl_ipaddr, port >> 8, port & 255); + setclientid.sc_prog = htonl(program); + setclientid.sc_cb_ident = 0; + + return rpc_call_sync(clp->cl_rpcclient, &msg, 0); +} + +int +nfs4_proc_setclientid_confirm(struct nfs4_client *clp) +{ + struct nfs_fsinfo fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], + .rpc_argp = clp, + .rpc_resp = &fsinfo, + .rpc_cred = clp->cl_cred, + }; + unsigned long now; + int status; + + now = jiffies; + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (status == 0) { + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + } + return status; +} + +#define NFS4_LOCK_MINTIMEOUT (1 * HZ) +#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) + +/* + * sleep, with exponential backoff, and retry the LOCK operation. + */ +static unsigned long +nfs4_set_lock_task_retry(unsigned long timeout) +{ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(timeout); + timeout <<= 1; + if (timeout > NFS4_LOCK_MAXTIMEOUT) + return NFS4_LOCK_MAXTIMEOUT; + return timeout; +} + +static inline int +nfs4_lck_type(int cmd, struct file_lock *request) +{ + /* set lock type */ + switch (request->fl_type) { + case F_RDLCK: + return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT; + case F_WRLCK: + return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT; + case F_UNLCK: + return NFS4_WRITE_LT; + } + BUG(); +} + +static inline uint64_t +nfs4_lck_length(struct file_lock *request) +{ + if (request->fl_end == OFFSET_MAX) + return ~(uint64_t)0; + return request->fl_end - request->fl_start + 1; +} + +int +nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lowner nlo; + struct nfs4_lock_state *lsp; + int status; + + nlo.clientid = clp->cl_clientid; + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (lsp) + nlo.id = lsp->ls_id; + else { + spin_lock(&clp->cl_lock); + nlo.id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + } + arg.u.lockt = &nlo; + status = rpc_call_sync(server->client, &msg, 0); + if (!status) { + request->fl_type = F_UNLCK; + } else if (status == -NFS4ERR_DENIED) { + int64_t len, start, end; + start = res.u.denied.offset; + len = res.u.denied.length; + end = start + len - 1; + if (end < 0 || len == 0) + request->fl_end = OFFSET_MAX; + else + request->fl_end = (loff_t)end; + request->fl_start = (loff_t)start; + request->fl_type = F_WRLCK; + if (res.u.denied.type & 1) + request->fl_type = F_RDLCK; + request->fl_pid = 0; + status = 0; + } + if (lsp) + nfs4_put_lock_state(lsp); + up(&state->lock_sema); + return status; +} + +int +nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs4_lock_state *lsp; + struct nfs_locku_opargs luargs; + int status = 0; + + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (!lsp) + goto out; + luargs.seqid = lsp->ls_seqid; + memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + arg.u.locku = &luargs; + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_lock_seqid(status, lsp); + + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, + sizeof(lsp->ls_stateid)); + nfs4_notify_unlck(inode, request, lsp); + } + nfs4_put_lock_state(lsp); +out: + up(&state->lock_sema); + return status; +} + +static int +nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_lock_state *lsp; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lock_opargs largs = { + .new_lock_owner = 0, + }; + int status; + + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (lsp == NULL) { + struct nfs4_state_owner *owner = state->owner; + struct nfs_open_to_lock otl = { + .lock_owner.clientid = server->nfs4_state->cl_clientid, + }; + status = -ENOMEM; + lsp = nfs4_alloc_lock_state(state, request->fl_owner); + if (!lsp) + goto out; + otl.lock_seqid = lsp->ls_seqid; + otl.lock_owner.id = lsp->ls_id; + memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); + largs.u.open_lock = &otl; + largs.new_lock_owner = 1; + arg.u.lock = &largs; + down(&owner->so_sema); + otl.open_seqid = owner->so_seqid; + status = rpc_call_sync(server->client, &msg, 0); + /* increment open_owner seqid on success, and + * seqid mutating errors */ + nfs4_increment_seqid(status, owner); + up(&owner->so_sema); + } else { + struct nfs_exist_lock el = { + .seqid = lsp->ls_seqid, + }; + memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); + largs.u.exist_lock = ⪙ + largs.new_lock_owner = 0; + arg.u.lock = &largs; + status = rpc_call_sync(server->client, &msg, 0); + } + /* increment seqid on success, and * seqid mutating errors*/ + nfs4_increment_lock_seqid(status, lsp); + /* save the returned stateid. */ + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); + nfs4_notify_setlk(inode, request, lsp); + } else if (status == -NFS4ERR_DENIED) + status = -EAGAIN; + nfs4_put_lock_state(lsp); +out: + up(&state->lock_sema); + return status; +} + +static int +nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) +{ + struct nfs4_state *state; + unsigned long timeout = NFS4_LOCK_MINTIMEOUT; + int status; + + /* verify open state */ + state = (struct nfs4_state *)filp->private_data; + BUG_ON(!state); + + if (request->fl_start < 0 || request->fl_end < 0) + return -EINVAL; + + if (IS_GETLK(cmd)) + return nfs4_proc_getlk(state, F_GETLK, request); + + if (!(IS_SETLK(cmd) || IS_SETLKW(cmd))) + return -EINVAL; + + if (request->fl_type == F_UNLCK) + return nfs4_proc_unlck(state, cmd, request); + + do { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + timeout = nfs4_set_lock_task_retry(timeout); + status = -ERESTARTSYS; + if (signalled()) + break; + } while(status < 0); + + return status; +} + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ + .dentry_ops = &nfs4_dentry_operations, + .dir_inode_ops = &nfs4_dir_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, @@ -1835,6 +2303,7 @@ .file_release = nfs4_proc_file_release, .request_init = nfs4_request_init, .request_compatible = nfs4_request_compatible, + .lock = nfs4_proc_lock, }; /* diff -u --recursive --new-file linux-2.6.0-test9/fs/nfs/nfs4renewd.c linux-2.6.0-25-lock/fs/nfs/nfs4renewd.c --- linux-2.6.0-test9/fs/nfs/nfs4renewd.c 2003-11-17 18:00:52.000000000 -0500 +++ linux-2.6.0-25-lock/fs/nfs/nfs4renewd.c 2003-11-17 18:08:48.000000000 -0500 @@ -54,53 +54,91 @@ #include #include -static RPC_WAITQ(nfs4_renewd_queue, "nfs4_renewd_queue"); +#define NFSDBG_FACILITY NFSDBG_PROC -static void -renewd(struct rpc_task *task) +void +nfs4_renew_state(void *data) { - struct nfs_server *server = (struct nfs_server *)task->tk_calldata; - unsigned long lease = server->lease_time; - unsigned long last = server->last_renewal; - unsigned long timeout; - - if (!server->nfs4_state) - timeout = (2 * lease) / 3; - else if (jiffies < last + lease/3) - timeout = (2 * lease) / 3 + last - jiffies; - else { + struct nfs4_client *clp = (struct nfs4_client *)data; + long lease, timeout; + unsigned long last, now; + + down_read(&clp->cl_sem); + dprintk("%s: start\n", __FUNCTION__); + /* Are there any active superblocks? */ + if (list_empty(&clp->cl_superblocks)) + goto out; + spin_lock(&clp->cl_lock); + lease = clp->cl_lease_time; + last = clp->cl_last_renewal; + now = jiffies; + timeout = (2 * lease) / 3 + (long)last - (long)now; + /* Are we close to a lease timeout? */ + if (time_after(now, last + lease/3)) { + spin_unlock(&clp->cl_lock); /* Queue an asynchronous RENEW. */ - nfs4_proc_renew(server); + nfs4_proc_async_renew(clp); timeout = (2 * lease) / 3; - } - + spin_lock(&clp->cl_lock); + } else + dprintk("%s: failed to call renewd. Reason: lease not expired \n", + __FUNCTION__); if (timeout < 5 * HZ) /* safeguard */ timeout = 5 * HZ; - task->tk_timeout = timeout; - task->tk_action = renewd; - task->tk_exit = NULL; - rpc_sleep_on(&nfs4_renewd_queue, task, NULL, NULL); - return; + dprintk("%s: requeueing work. Lease period = %ld\n", + __FUNCTION__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + spin_unlock(&clp->cl_lock); +out: + up_read(&clp->cl_sem); + dprintk("%s: done\n", __FUNCTION__); +} + +/* Must be called with clp->cl_sem locked for writes */ +void +nfs4_schedule_state_renewal(struct nfs4_client *clp) +{ + long timeout; + + spin_lock(&clp->cl_lock); + timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal + - (long)jiffies; + if (timeout < 5 * HZ) + timeout = 5 * HZ; + dprintk("%s: requeueing work. Lease period = %ld\n", + __FUNCTION__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + spin_unlock(&clp->cl_lock); } -int -nfs4_init_renewd(struct nfs_server *server) +void +nfs4_renewd_prepare_shutdown(struct nfs_server *server) { - struct rpc_task *task; - int status; + struct nfs4_client *clp = server->nfs4_state; - lock_kernel(); - status = -ENOMEM; - task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC); - if (!task) - goto out; - task->tk_calldata = server; - task->tk_action = renewd; - status = rpc_execute(task); + if (!clp) + return; + flush_scheduled_work(); + down_write(&clp->cl_sem); + if (!list_empty(&server->nfs4_siblings)) + list_del_init(&server->nfs4_siblings); + up_write(&clp->cl_sem); +} -out: - unlock_kernel(); - return status; +/* Must be called with clp->cl_sem locked for writes */ +void +nfs4_kill_renewd(struct nfs4_client *clp) +{ + down_read(&clp->cl_sem); + if (!list_empty(&clp->cl_superblocks)) { + up_read(&clp->cl_sem); + return; + } + cancel_delayed_work(&clp->cl_renewd); + up_read(&clp->cl_sem); + flush_scheduled_work(); } /* diff -u --recursive --new-file linux-2.6.0-test9/fs/nfs/nfs4state.c linux-2.6.0-25-lock/fs/nfs/nfs4state.c --- linux-2.6.0-test9/fs/nfs/nfs4state.c 2003-11-17 17:55:13.000000000 -0500 +++ linux-2.6.0-25-lock/fs/nfs/nfs4state.c 2003-11-17 23:04:22.000000000 -0500 @@ -41,6 +41,8 @@ #include #include #include +#include +#include #define OPENOWNER_POOL_SIZE 8 @@ -55,6 +57,29 @@ static LIST_HEAD(nfs4_clientid_list); +static void nfs4_recover_state(void *); +extern void nfs4_renew_state(void *); + +void +init_nfsv4_state(struct nfs_server *server) +{ + server->nfs4_state = NULL; + INIT_LIST_HEAD(&server->nfs4_siblings); +} + +void +destroy_nfsv4_state(struct nfs_server *server) +{ + if (server->mnt_path) { + kfree(server->mnt_path); + server->mnt_path = NULL; + } + if (server->nfs4_state) { + nfs4_put_client(server->nfs4_state); + server->nfs4_state = NULL; + } +} + /* * nfs4_get_client(): returns an empty client structure * nfs4_put_client(): drops reference to client structure @@ -75,7 +100,12 @@ INIT_LIST_HEAD(&clp->cl_unused); spin_lock_init(&clp->cl_lock); atomic_set(&clp->cl_count, 1); - clp->cl_state = NFS4CLNT_NEW; + INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp); + INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); + INIT_LIST_HEAD(&clp->cl_superblocks); + init_waitqueue_head(&clp->cl_waitq); + INIT_RPC_WAITQ(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_state = 1 << NFS4CLNT_NEW; } return clp; } @@ -93,6 +123,10 @@ kfree(sp); } BUG_ON(!list_empty(&clp->cl_state_owners)); + if (clp->cl_cred) + put_rpccred(clp->cl_cred); + if (clp->cl_rpcclient) + rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); } @@ -126,10 +160,14 @@ return; list_del(&clp->cl_servers); spin_unlock(&state_spinlock); + BUG_ON(!list_empty(&clp->cl_superblocks)); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); + nfs4_kill_renewd(clp); nfs4_free_client(clp); } -static inline u32 +u32 nfs4_alloc_lockowner_id(struct nfs4_client *clp) { return clp->cl_lockowner_id ++; @@ -145,11 +183,29 @@ atomic_inc(&sp->so_count); sp->so_cred = cred; list_move(&sp->so_list, &clp->cl_state_owners); + sp->so_generation = clp->cl_generation; clp->cl_nunused--; } return sp; } +static struct nfs4_state_owner * +nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) +{ + struct nfs4_state_owner *sp, *res = NULL; + + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + if (sp->so_cred != cred) + continue; + atomic_inc(&sp->so_count); + /* Move to the head of the list */ + list_move(&sp->so_list, &clp->cl_state_owners); + res = sp; + break; + } + return res; +} + /* * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to * create a new state_owner. @@ -170,6 +226,15 @@ return sp; } +static void +nfs4_unhash_state_owner(struct nfs4_state_owner *sp) +{ + struct nfs4_client *clp = sp->so_client; + spin_lock(&clp->cl_lock); + list_del_init(&sp->so_list); + spin_unlock(&clp->cl_lock); +} + struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { @@ -179,19 +244,25 @@ get_rpccred(cred); new = nfs4_alloc_state_owner(); spin_lock(&clp->cl_lock); - sp = nfs4_client_grab_unused(clp, cred); + sp = nfs4_find_state_owner(clp, cred); + if (sp == NULL) + sp = nfs4_client_grab_unused(clp, cred); if (sp == NULL && new != NULL) { list_add(&new->so_list, &clp->cl_state_owners); new->so_client = clp; new->so_id = nfs4_alloc_lockowner_id(clp); new->so_cred = cred; + new->so_generation = clp->cl_generation; sp = new; new = NULL; } spin_unlock(&clp->cl_lock); if (new) kfree(new); - if (!sp) + if (sp) { + if (!test_bit(NFS4CLNT_OK, &clp->cl_state)) + nfs4_wait_clnt_recover(server->client, clp); + } else put_rpccred(cred); return sp; } @@ -206,6 +277,8 @@ return; if (clp->cl_nunused >= OPENOWNER_POOL_SIZE) goto out_free; + if (list_empty(&sp->so_list)) + goto out_free; list_move(&sp->so_list, &clp->cl_unused); clp->cl_nunused++; spin_unlock(&clp->cl_lock); @@ -227,24 +300,32 @@ state = kmalloc(sizeof(*state), GFP_KERNEL); if (!state) return NULL; - state->pid = current->pid; state->state = 0; + state->flags = 0; memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); + INIT_LIST_HEAD(&state->lock_states); + init_MUTEX(&state->lock_sema); + rwlock_init(&state->state_lock); return state; } static struct nfs4_state * -__nfs4_find_state_bypid(struct inode *inode, pid_t pid) +__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state *state; + mode &= (FMODE_READ|FMODE_WRITE); list_for_each_entry(state, &nfsi->open_states, inode_states) { - if (state->pid == pid) { - atomic_inc(&state->count); - return state; - } + if (state->owner->so_cred != cred) + continue; + if ((state->state & (FMODE_READ|FMODE_WRITE)) != mode) + continue; + /* Add the state to the head of the inode's list */ + list_move(&state->inode_states, &nfsi->open_states); + atomic_inc(&state->count); + return state; } return NULL; } @@ -257,6 +338,8 @@ list_for_each_entry(state, &nfsi->open_states, inode_states) { if (state->owner == owner) { + /* Add the state to the head of the inode's list */ + list_move(&state->inode_states, &nfsi->open_states); atomic_inc(&state->count); return state; } @@ -265,16 +348,12 @@ } struct nfs4_state * -nfs4_find_state_bypid(struct inode *inode, pid_t pid) +nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) { - struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state *state; spin_lock(&inode->i_lock); - state = __nfs4_find_state_bypid(inode, pid); - /* Add the state to the tail of the inode's list */ - if (state) - list_move_tail(&state->inode_states, &nfsi->open_states); + state = __nfs4_find_state(inode, cred, mode); spin_unlock(&inode->i_lock); return state; } @@ -307,7 +386,6 @@ atomic_inc(&owner->so_count); list_add(&state->inode_states, &nfsi->open_states); state->inode = inode; - atomic_inc(&inode->i_count); spin_unlock(&inode->i_lock); } else { spin_unlock(&inode->i_lock); @@ -323,6 +401,7 @@ { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; + int status = 0; if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) return; @@ -330,14 +409,223 @@ spin_unlock(&inode->i_lock); down(&owner->so_sema); list_del(&state->open_states); - if (state->state != 0) - nfs4_do_close(inode, state); + if (state->state != 0) { + do { + status = nfs4_do_close(inode, state); + if (!status) + break; + up(&owner->so_sema); + status = nfs4_handle_error(NFS_SERVER(inode), status); + down(&owner->so_sema); + } while (!status); + } up(&owner->so_sema); - iput(inode); nfs4_free_open_state(state); nfs4_put_state_owner(owner); } +void +nfs4_close_state(struct nfs4_state *state, mode_t mode) +{ + struct inode *inode = state->inode; + struct nfs4_state_owner *owner = state->owner; + int newstate; + int status = 0; + + down(&owner->so_sema); + if (mode & FMODE_READ) + state->nreaders--; + if (mode & FMODE_WRITE) + state->nwriters--; + do { + newstate = 0; + if (state->nreaders) + newstate |= FMODE_READ; + if (state->nwriters) + newstate |= FMODE_WRITE; + if ((state->state & (FMODE_READ|FMODE_WRITE)) == newstate) + break; + if (newstate != 0) + status = nfs4_do_downgrade(inode, state, newstate); + else + status = nfs4_do_close(inode, state); + if (!status) { + state->state = newstate; + break; + } + up(&owner->so_sema); + status = nfs4_handle_error(NFS_SERVER(inode), status); + down(&owner->so_sema); + } while (!status); + up(&owner->so_sema); + nfs4_put_open_state(state); +} + +/* + * Search the state->lock_states for an existing lock_owner + * that is compatible with current->files + */ +static struct nfs4_lock_state * +__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *pos; + list_for_each_entry(pos, &state->lock_states, ls_locks) { + if (pos->ls_owner != fl_owner) + continue; + atomic_inc(&pos->ls_count); + return pos; + } + return NULL; +} + +struct nfs4_lock_state * +nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + read_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + read_unlock(&state->state_lock); + return lsp; +} + +/* + * Return a compatible lock_state. If no initialized lock_state structure + * exists, return an uninitialized one. + * + * The caller must be holding state->lock_sema + */ +struct nfs4_lock_state * +nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + struct nfs4_client *clp = state->owner->so_client; + + lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); + if (lsp == NULL) + return NULL; + lsp->ls_seqid = 0; /* arbitrary */ + lsp->ls_id = -1; + memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); + atomic_set(&lsp->ls_count, 1); + lsp->ls_owner = fl_owner; + lsp->ls_parent = state; + INIT_LIST_HEAD(&lsp->ls_locks); + spin_lock(&clp->cl_lock); + lsp->ls_id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + return lsp; +} + +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. + */ +void +nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +{ + if (test_bit(LK_STATE_IN_USE, &state->flags)) { + struct nfs4_lock_state *lsp; + + lsp = nfs4_find_lock_state(state, fl_owner); + if (lsp) { + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + nfs4_put_lock_state(lsp); + return; + } + } + memcpy(dst, &state->stateid, sizeof(*dst)); +} + +/* +* Called with state->lock_sema held. +*/ +void +nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +{ + if (status == NFS_OK || seqid_mutating_err(-status)) + lsp->ls_seqid++; +} + +/* +* Check to see if the request lock (type FL_UNLK) effects the fl lock. +* +* fl and request must have the same posix owner +* +* return: +* 0 -> fl not effected by request +* 1 -> fl consumed by request +*/ + +static int +nfs4_check_unlock(struct file_lock *fl, struct file_lock *request) +{ + if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end) + return 1; + return 0; +} + +/* + * Post an initialized lock_state on the state->lock_states list. + */ +void +nfs4_notify_setlk(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state = lsp->ls_parent; + + if (!list_empty(&lsp->ls_locks)) + return; + write_lock(&state->state_lock); + list_add(&lsp->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); +} + +/* + * to decide to 'reap' lock state: + * 1) search i_flock for file_locks with fl.lock_state = to ls. + * 2) determine if unlock will consume found lock. + * if so, reap + * + * else, don't reap. + * + */ +void +nfs4_notify_unlck(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state = lsp->ls_parent; + struct file_lock *fl; + + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; + if (fl->fl_owner != lsp->ls_owner) + continue; + /* Exit if we find at least one lock which is not consumed */ + if (nfs4_check_unlock(fl,request) == 0) + return; + } + + write_lock(&state->state_lock); + list_del_init(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); +} + +/* + * Release reference to lock_state, and free it if we see that + * it is no longer in use + */ +void +nfs4_put_lock_state(struct nfs4_lock_state *lsp) +{ + if (!atomic_dec_and_test(&lsp->ls_count)) + return; + if (!list_empty(&lsp->ls_locks)) + return; + kfree(lsp); +} + /* * Called with sp->so_sema held. * @@ -346,10 +634,172 @@ * see comments nfs_fs.h:seqid_mutating_error() */ void -nfs4_increment_seqid(u32 status, struct nfs4_state_owner *sp) +nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) { - if (status == NFS_OK || seqid_mutating_err(status)) + if (status == NFS_OK || seqid_mutating_err(-status)) sp->so_seqid++; + /* If the server returns BAD_SEQID, unhash state_owner here */ + if (status == -NFS4ERR_BAD_SEQID) + nfs4_unhash_state_owner(sp); +} + +static int reclaimer(void *); +struct reclaimer_args { + struct nfs4_client *clp; + struct completion complete; +}; + +/* + * State recovery routine + */ +void +nfs4_recover_state(void *data) +{ + struct nfs4_client *clp = (struct nfs4_client *)data; + struct reclaimer_args args = { + .clp = clp, + }; + might_sleep(); + + init_completion(&args.complete); + + down_read(&clp->cl_sem); + if (test_and_set_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state)) + goto out_failed; + if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0) + goto out_failed_clear; + wait_for_completion(&args.complete); + return; +out_failed_clear: + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state); + smp_mb__after_clear_bit(); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); +out_failed: + up_read(&clp->cl_sem); +} + +/* + * Schedule a state recovery attempt + */ +void +nfs4_schedule_state_recovery(struct nfs4_client *clp) +{ + if (!clp) + return; + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_OK, &clp->cl_state); + smp_mb__after_clear_bit(); + schedule_work(&clp->cl_recoverd); +} + +static int +nfs4_reclaim_open_state(struct nfs4_state_owner *sp) +{ + struct nfs4_state *state; + int status = 0; + + list_for_each_entry(state, &sp->so_states, open_states) { + status = nfs4_open_reclaim(sp, state); + if (status >= 0) + continue; + switch (status) { + default: + printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", + __FUNCTION__, status); + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + /* + * Open state on this file cannot be recovered + * All we can do is revert to using the zero stateid. + */ + memset(state->stateid.data, 0, + sizeof(state->stateid.data)); + /* Mark the file as being 'closed' */ + state->state = 0; + break; + case -NFS4ERR_STALE_CLIENTID: + goto out_err; + } + } + return 0; +out_err: + return status; +} + +static int +reclaimer(void *ptr) +{ + struct reclaimer_args *args = (struct reclaimer_args *)ptr; + struct nfs4_client *clp = args->clp; + struct nfs4_state_owner *sp; + int generation; + int status; + + daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr)); + allow_signal(SIGKILL); + + complete(&args->complete); + + /* Are there any NFS mounts out there? */ + if (list_empty(&clp->cl_superblocks)) + goto out; + if (!test_bit(NFS4CLNT_NEW, &clp->cl_state)) { + status = nfs4_proc_renew(clp); + if (status == 0) { + set_bit(NFS4CLNT_OK, &clp->cl_state); + goto out; + } + } + status = nfs4_proc_setclientid(clp, 0, 0); + if (status) + goto out_error; + status = nfs4_proc_setclientid_confirm(clp); + if (status) + goto out_error; + generation = ++(clp->cl_generation); + clear_bit(NFS4CLNT_NEW, &clp->cl_state); + set_bit(NFS4CLNT_OK, &clp->cl_state); + up_read(&clp->cl_sem); + nfs4_schedule_state_renewal(clp); +restart_loop: + spin_lock(&clp->cl_lock); + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + if (sp->so_generation - generation <= 0) + continue; + atomic_inc(&sp->so_count); + spin_unlock(&clp->cl_lock); + down(&sp->so_sema); + if (sp->so_generation - generation < 0) { + smp_rmb(); + sp->so_generation = clp->cl_generation; + status = nfs4_reclaim_open_state(sp); + } + up(&sp->so_sema); + nfs4_put_state_owner(sp); + if (status < 0) { + if (status == -NFS4ERR_STALE_CLIENTID) + nfs4_schedule_state_recovery(clp); + goto out; + } + goto restart_loop; + } + spin_unlock(&clp->cl_lock); +out: + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state); + smp_mb__after_clear_bit(); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); + return 0; +out_error: + printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u\n", + NIPQUAD(clp->cl_addr.s_addr)); + up_read(&clp->cl_sem); + goto out; } /* diff -u --recursive --new-file linux-2.6.0-test9/fs/nfs/nfs4xdr.c linux-2.6.0-25-lock/fs/nfs/nfs4xdr.c --- linux-2.6.0-test9/fs/nfs/nfs4xdr.c 2003-11-17 18:02:47.000000000 -0500 +++ linux-2.6.0-25-lock/fs/nfs/nfs4xdr.c 2003-11-17 21:27:05.000000000 -0500 @@ -57,7 +57,7 @@ /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO -extern int nfs_stat_to_errno(int); +static int nfs_stat_to_errno(int); /* NFSv4 COMPOUND tags are only wanted for debugging purposes */ #ifdef DEBUG @@ -66,6 +66,10 @@ #define NFS4_MAXTAGLEN 0 #endif +/* lock,open owner id: + * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) + */ +#define owner_id_maxsz 1 + 1 #define compound_encode_hdr_maxsz 3 + (NFS4_MAXTAGLEN >> 2) #define compound_decode_hdr_maxsz 2 + (NFS4_MAXTAGLEN >> 2) #define op_encode_hdr_maxsz 1 @@ -73,6 +77,8 @@ #define encode_putfh_maxsz op_encode_hdr_maxsz + 1 + \ (NFS4_FHSIZE >> 2) #define decode_putfh_maxsz op_decode_hdr_maxsz +#define encode_putrootfh_maxsz op_encode_hdr_maxsz +#define decode_putrootfh_maxsz op_decode_hdr_maxsz #define encode_getfh_maxsz op_encode_hdr_maxsz #define decode_getfh_maxsz op_decode_hdr_maxsz + 1 + \ (NFS4_FHSIZE >> 2) @@ -90,6 +96,25 @@ #define decode_pre_write_getattr_maxsz op_decode_hdr_maxsz + 5 #define encode_post_write_getattr_maxsz op_encode_hdr_maxsz + 2 #define decode_post_write_getattr_maxsz op_decode_hdr_maxsz + 13 +#define encode_fsinfo_maxsz op_encode_hdr_maxsz + 2 +#define decode_fsinfo_maxsz op_decode_hdr_maxsz + 11 +#define encode_renew_maxsz op_encode_hdr_maxsz + 3 +#define decode_renew_maxsz op_decode_hdr_maxsz +#define encode_setclientid_maxsz \ + op_encode_hdr_maxsz + \ + 4 /*server->ip_addr*/ + \ + 1 /*Netid*/ + \ + 6 /*uaddr*/ + \ + 6 + (NFS4_VERIFIER_SIZE >> 2) +#define decode_setclientid_maxsz \ + op_decode_hdr_maxsz + \ + 2 + \ + 1024 /* large value for CLID_INUSE */ +#define encode_setclientid_confirm_maxsz \ + op_encode_hdr_maxsz + \ + 3 + (NFS4_VERIFIER_SIZE >> 2) +#define decode_setclientid_confirm_maxsz \ + op_decode_hdr_maxsz #define NFS4_enc_compound_sz 1024 /* XXX: large enough? */ #define NFS4_dec_compound_sz 1024 /* XXX: large enough? */ @@ -145,6 +170,24 @@ #define NFS4_dec_open_confirm_sz compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + 4 +#define NFS4_enc_open_reclaim_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + \ + 11 + \ + encode_getattr_maxsz +#define NFS4_dec_open_reclaim_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + 4 + 5 + 2 + 3 + \ + decode_getattr_maxsz +#define NFS4_enc_open_downgrade_sz \ + compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 7 +#define NFS4_dec_open_downgrade_sz \ + compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 4 #define NFS4_enc_close_sz compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 5 @@ -159,6 +202,60 @@ #define NFS4_dec_setattr_sz compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + 3 +#define NFS4_enc_fsinfo_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_fsinfo_maxsz +#define NFS4_dec_fsinfo_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_fsinfo_maxsz +#define NFS4_enc_renew_sz compound_encode_hdr_maxsz + \ + encode_renew_maxsz +#define NFS4_dec_renew_sz compound_decode_hdr_maxsz + \ + decode_renew_maxsz +#define NFS4_enc_setclientid_sz compound_encode_hdr_maxsz + \ + encode_setclientid_maxsz +#define NFS4_dec_setclientid_sz compound_decode_hdr_maxsz + \ + decode_setclientid_maxsz +#define NFS4_enc_setclientid_confirm_sz \ + compound_encode_hdr_maxsz + \ + encode_setclientid_confirm_maxsz + \ + encode_putrootfh_maxsz + \ + encode_fsinfo_maxsz +#define NFS4_dec_setclientid_confirm_sz \ + compound_decode_hdr_maxsz + \ + decode_setclientid_confirm_maxsz + \ + decode_putrootfh_maxsz + \ + decode_fsinfo_maxsz +#define NFS4_enc_lock_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 1 + 2 + 2 + \ + 1 + 4 + 1 + 2 + \ + owner_id_maxsz +#define NFS4_dec_lock_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + \ + 2 + 2 + 1 + 2 + \ + owner_id_maxsz +#define NFS4_enc_lockt_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 2 + 2 + 2 + \ + owner_id_maxsz +#define NFS4_dec_lockt_sz NFS4_dec_lock_sz +#define NFS4_enc_locku_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 1 + 4 + 2 + 2 +#define NFS4_dec_locku_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + 4 + static struct { @@ -241,8 +338,8 @@ { char owner_name[256]; char owner_group[256]; - int owner_namelen = 0; - int owner_grouplen = 0; + int owner_namelen = sizeof(owner_name); + int owner_grouplen = sizeof(owner_group); uint32_t *p; uint32_t *q; int len; @@ -503,6 +600,15 @@ } static int +encode_fsinfo(struct xdr_stream *xdr) +{ + return encode_getattr_one(xdr, FATTR4_WORD0_MAXFILESIZE + | FATTR4_WORD0_MAXREAD + | FATTR4_WORD0_MAXWRITE + | FATTR4_WORD0_LEASE_TIME); +} + +static int encode_getfh(struct xdr_stream *xdr) { uint32_t *p; @@ -526,6 +632,80 @@ return 0; } +/* + * opcode,type,reclaim,offset,length,new_lock_owner = 32 + * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 + */ +static int +encode_lock(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_lock_opargs *opargs = arg->u.lock; + + RESERVE_SPACE(32); + WRITE32(OP_LOCK); + WRITE32(arg->type); + WRITE32(opargs->reclaim); + WRITE64(arg->offset); + WRITE64(arg->length); + WRITE32(opargs->new_lock_owner); + if (opargs->new_lock_owner){ + struct nfs_open_to_lock *ol = opargs->u.open_lock; + + RESERVE_SPACE(40); + WRITE32(ol->open_seqid); + WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); + WRITE32(ol->lock_seqid); + WRITE64(ol->lock_owner.clientid); + WRITE32(4); + WRITE32(ol->lock_owner.id); + } + else { + struct nfs_exist_lock *el = opargs->u.exist_lock; + + RESERVE_SPACE(20); + WRITEMEM(&el->stateid, sizeof(el->stateid)); + WRITE32(el->seqid); + } + + return 0; +} + +static int +encode_lockt(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_lowner *opargs = arg->u.lockt; + + RESERVE_SPACE(40); + WRITE32(OP_LOCKT); + WRITE32(arg->type); + WRITE64(arg->offset); + WRITE64(arg->length); + WRITE64(opargs->clientid); + WRITE32(4); + WRITE32(opargs->id); + + return 0; +} + +static int +encode_locku(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_locku_opargs *opargs = arg->u.locku; + + RESERVE_SPACE(44); + WRITE32(OP_LOCKU); + WRITE32(arg->type); + WRITE32(opargs->seqid); + WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); + WRITE64(arg->offset); + WRITE64(arg->length); + + return 0; +} + static int encode_lookup(struct xdr_stream *xdr, struct nfs4_lookup *lookup) { @@ -615,6 +795,57 @@ static int +encode_open_reclaim(struct xdr_stream *xdr, struct nfs_open_reclaimargs *arg) +{ + uint32_t *p; + + /* + * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, + * owner 4, opentype 4, claim 4, delegation_type 4 = 44 + */ + RESERVE_SPACE(44); + WRITE32(OP_OPEN); + WRITE32(arg->seqid); + switch (arg->share_access) { + case FMODE_READ: + WRITE32(NFS4_SHARE_ACCESS_READ); + break; + case FMODE_WRITE: + WRITE32(NFS4_SHARE_ACCESS_WRITE); + break; + case FMODE_READ|FMODE_WRITE: + WRITE32(NFS4_SHARE_ACCESS_BOTH); + break; + default: + BUG(); + } + WRITE32(0); /* for linux, share_deny = 0 always */ + WRITE64(arg->clientid); + WRITE32(4); + WRITE32(arg->id); + WRITE32(NFS4_OPEN_NOCREATE); + WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); + WRITE32(NFS4_OPEN_DELEGATE_NONE); + return 0; +} + +static int +encode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(16+sizeof(arg->stateid.data)); + WRITE32(OP_OPEN_DOWNGRADE); + WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITE32(arg->seqid); + WRITE32(arg->share_access); + /* No deny modes */ + WRITE32(0); + + return 0; +} + +static int encode_putfh(struct xdr_stream *xdr, struct nfs_fh *fh) { int len = fh->size; @@ -891,21 +1122,12 @@ case OP_RENAME: status = encode_rename(xdr, &cp->ops[i].u.rename); break; - case OP_RENEW: - status = encode_renew(xdr, cp->ops[i].u.renew); - break; case OP_RESTOREFH: status = encode_restorefh(xdr); break; case OP_SAVEFH: status = encode_savefh(xdr); break; - case OP_SETCLIENTID: - status = encode_setclientid(xdr, &cp->ops[i].u.setclientid); - break; - case OP_SETCLIENTID_CONFIRM: - status = encode_setclientid_confirm(xdr, cp->ops[i].u.setclientid_confirm); - break; default: BUG(); } @@ -1015,6 +1237,119 @@ return status; } +/* + * Encode an OPEN request + */ +static int +nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p, + struct nfs_open_reclaimargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_open_reclaim(&xdr, args); + if (status) + goto out; + status = encode_getattr(&xdr, args->f_getattr); +out: + return status; +} + +/* + * Encode an OPEN_DOWNGRADE request + */ +static int +nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_open_downgrade(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCK request + */ +static int +nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lock(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKT request + */ +static int +nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lockt(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKU request + */ +static int +nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_locku(&xdr, args); +out: + return status; +} /* * Encode a READ request @@ -1134,6 +1469,82 @@ } /* + * FSINFO request + */ +static int +nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, void *fhandle) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, fhandle); + if (!status) + status = encode_fsinfo(&xdr); + return status; +} + +/* + * a RENEW request + */ +static int +nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + return encode_renew(&xdr, clp); +} + +/* + * a SETCLIENTID request + */ +static int +nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, + struct nfs4_setclientid *sc) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + return encode_setclientid(&xdr, sc); +} + +/* + * a SETCLIENTID_CONFIRM request + */ +static int +nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, + struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_setclientid_confirm(&xdr, clp); + if (!status) + status = encode_putrootfh(&xdr); + if (!status) + status = encode_fsinfo(&xdr); + return status; +} + +/* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" * set of XDR encode/decode routines which are intended to be shared by @@ -1295,7 +1706,6 @@ } extern uint32_t nfs4_fattr_bitmap[2]; -extern uint32_t nfs4_fsinfo_bitmap[2]; extern uint32_t nfs4_fsstat_bitmap[2]; extern uint32_t nfs4_pathconf_bitmap[2]; @@ -1305,7 +1715,6 @@ { struct nfs_fattr *nfp = getattr->gt_attrs; struct nfs_fsstat *fsstat = getattr->gt_fsstat; - struct nfs_fsinfo *fsinfo = getattr->gt_fsinfo; struct nfs_pathconf *pathconf = getattr->gt_pathconf; uint32_t attrlen, dummy32, bmlen, bmval0 = 0, @@ -1351,11 +1760,6 @@ nfp->nlink = 1; nfp->timestamp = jiffies; } - if (fsinfo) { - fsinfo->rtmult = fsinfo->wtmult = 512; /* ??? */ - fsinfo->lease_time = 60; - } - if (bmval0 & FATTR4_WORD0_TYPE) { READ_BUF(4); len += 4; @@ -1389,12 +1793,6 @@ (long long)nfp->fsid_u.nfs4.major, (long long)nfp->fsid_u.nfs4.minor); } - if (bmval0 & FATTR4_WORD0_LEASE_TIME) { - READ_BUF(4); - len += 4; - READ32(fsinfo->lease_time); - dprintk("read_attrs: lease_time=%d\n", fsinfo->lease_time); - } if (bmval0 & FATTR4_WORD0_FILEID) { READ_BUF(8); len += 8; @@ -1419,12 +1817,6 @@ READ64(fsstat->tfiles); dprintk("read_attrs: files_tot=0x%Lx\n", (long long) fsstat->tfiles); } - if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { - READ_BUF(8); - len += 8; - READ64(fsinfo->maxfilesize); - dprintk("read_attrs: maxfilesize=0x%Lx\n", (long long) fsinfo->maxfilesize); - } if (bmval0 & FATTR4_WORD0_MAXLINK) { READ_BUF(4); len += 4; @@ -1437,20 +1829,6 @@ READ32(pathconf->max_namelen); dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen); } - if (bmval0 & FATTR4_WORD0_MAXREAD) { - READ_BUF(8); - len += 8; - READ64(fsinfo->rtmax); - fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax; - dprintk("read_attrs: maxread=%d\n", fsinfo->rtmax); - } - if (bmval0 & FATTR4_WORD0_MAXWRITE) { - READ_BUF(8); - len += 8; - READ64(fsinfo->wtmax); - fsinfo->wtpref = fsinfo->wtmax; - dprintk("read_attrs: maxwrite=%d\n", fsinfo->wtmax); - } if (bmval1 & FATTR4_WORD1_MODE) { READ_BUF(4); @@ -1476,7 +1854,7 @@ READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); if ((status = nfs_idmap_id(server, IDMAP_TYPE_USER, - (char *)p, len, &nfp->uid)) == -1) { + (char *)p, dummy32, &nfp->uid)) == -1) { dprintk("read_attrs: gss_get_num failed!\n"); /* goto out; */ nfp->uid = -2; @@ -1494,7 +1872,7 @@ READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); if ((status = nfs_idmap_id(server, IDMAP_TYPE_GROUP, - (char *)p, len, &nfp->gid)) == -1) { + (char *)p, dummy32, &nfp->gid)) == -1) { dprintk("read_attrs: gss_get_num failed!\n"); nfp->gid = -2; /* goto out; */ @@ -1695,6 +2073,74 @@ static int +decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) +{ + uint32_t *p; + uint32_t len, attrlen, bmlen, bmval0 = 0, bmval1 = 0; + int status; + + status = decode_op_hdr(xdr, OP_GETATTR); + if (status) + return status; + READ_BUF(4); + READ32(bmlen); + if (bmlen < 1) + return -EIO; + READ_BUF(bmlen << 2); + READ32(bmval0); + if (bmval0 & ~(FATTR4_WORD0_MAXFILESIZE|FATTR4_WORD0_MAXREAD| + FATTR4_WORD0_MAXWRITE|FATTR4_WORD0_LEASE_TIME)) + goto out_bad_bitmap; + if (bmlen > 1) { + READ32(bmval1); + if (bmval1 != 0 || bmlen > 2) + goto out_bad_bitmap; + } + READ_BUF(4); + READ32(attrlen); + READ_BUF(attrlen); + fsinfo->rtmult = fsinfo->wtmult = 512; /* ??? */ + fsinfo->lease_time = 60; + len = attrlen; + + if (bmval0 & FATTR4_WORD0_LEASE_TIME) { + len -= 4; + READ32(fsinfo->lease_time); + dprintk("read_attrs: lease_time=%d\n", fsinfo->lease_time); + } + if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { + len -= 8; + READ64(fsinfo->maxfilesize); + dprintk("read_attrs: maxfilesize=0x%Lx\n", (long long) fsinfo->maxfilesize); + } + if (bmval0 & FATTR4_WORD0_MAXREAD) { + len -= 8; + READ64(fsinfo->rtmax); + fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax; + dprintk("read_attrs: maxread=%d\n", fsinfo->rtmax); + } + if (bmval0 & FATTR4_WORD0_MAXWRITE) { + len -= 8; + READ64(fsinfo->wtmax); + fsinfo->wtpref = fsinfo->wtmax; + dprintk("read_attrs: maxwrite=%d\n", fsinfo->wtmax); + } + if (len != 0) + goto out_bad_attrlen; + return 0; +out_bad_attrlen: + printk(KERN_NOTICE "%s: server attribute length %u does not match bitmap 0x%x/0x%x\n", + __FUNCTION__, (unsigned int)attrlen, + (unsigned int) bmval0, (unsigned int)bmval1); + return -EIO; +out_bad_bitmap: + printk(KERN_NOTICE "%s: server returned bad attribute bitmap 0x%x/0x%x\n", + __FUNCTION__, + (unsigned int)bmval0, (unsigned int)bmval1); + return -EIO; +} + +static int decode_getfh(struct xdr_stream *xdr, struct nfs4_getfh *getfh) { struct nfs_fh *fh = getfh->gf_fhandle; @@ -1729,6 +2175,66 @@ return decode_change_info(xdr, link->ln_cinfo); } +/* + * We create the owner, so we know a proper owner.id length is 4. + */ +static int +decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied) +{ + uint32_t *p; + uint32_t namelen; + + READ_BUF(32); + READ64(denied->offset); + READ64(denied->length); + READ32(denied->type); + READ64(denied->owner.clientid); + READ32(namelen); + READ_BUF(namelen); + if (namelen == 4) + READ32(denied->owner.id); + return -NFS4ERR_DENIED; +} + +static int +decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_LOCK); + if (status == 0) { + READ_BUF(sizeof(nfs4_stateid)); + COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + } else if (status == -NFS4ERR_DENIED) + return decode_lock_denied(xdr, &res->u.denied); + return status; +} + +static int +decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + int status; + status = decode_op_hdr(xdr, OP_LOCKT); + if (status == -NFS4ERR_DENIED) + return decode_lock_denied(xdr, &res->u.denied); + return status; +} + +static int +decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_LOCKU); + if (status == 0) { + READ_BUF(sizeof(nfs4_stateid)); + COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + } + return status; +} + static int decode_lookup(struct xdr_stream *xdr) { @@ -1769,15 +2275,29 @@ decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) { uint32_t *p; + int status; - res->status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); - if (res->status) - return res->status; + status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); + if (status) + return status; READ_BUF(sizeof(res->stateid.data)); COPYMEM(res->stateid.data, sizeof(res->stateid.data)); return 0; } +static int +decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); + if (status) + return status; + READ_BUF(sizeof(res->stateid.data)); + COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + return 0; +} static int decode_putfh(struct xdr_stream *xdr) @@ -2011,7 +2531,7 @@ } static int -decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid *setclientid) +decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) { uint32_t *p; uint32_t opnum; @@ -2027,9 +2547,9 @@ } READ32(nfserr); if (nfserr == NFS_OK) { - READ_BUF(8 + sizeof(setclientid->sc_state->cl_confirm.data)); - READ64(setclientid->sc_state->cl_clientid); - COPYMEM(setclientid->sc_state->cl_confirm.data, sizeof(setclientid->sc_state->cl_confirm.data)); + READ_BUF(8 + sizeof(clp->cl_confirm.data)); + READ64(clp->cl_clientid); + COPYMEM(clp->cl_confirm.data, sizeof(clp->cl_confirm.data)); } else if (nfserr == NFSERR_CLID_INUSE) { uint32_t len; @@ -2141,18 +2661,9 @@ case OP_RENAME: status = decode_rename(xdr, &op->u.rename); break; - case OP_RENEW: - status = decode_renew(xdr); - break; case OP_SAVEFH: status = decode_savefh(xdr); break; - case OP_SETCLIENTID: - status = decode_setclientid(xdr, &op->u.setclientid); - break; - case OP_SETCLIENTID_CONFIRM: - status = decode_setclientid_confirm(xdr); - break; default: BUG(); return -EIO; @@ -2163,6 +2674,29 @@ DECODE_TAIL; } + +/* + * Decode OPEN_DOWNGRADE response + */ +static int +nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open_downgrade(&xdr, res); +out: + return status; +} + /* * END OF "GENERIC" DECODE ROUTINES. */ @@ -2275,6 +2809,31 @@ } /* + * Decode OPEN_RECLAIM response + */ +static int +nfs4_xdr_dec_open_reclaim(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open(&xdr, res); + if (status) + goto out; + status = decode_getattr(&xdr, res->f_getattr, res->server); +out: + return status; +} + +/* * Decode SETATTR response */ static int @@ -2299,6 +2858,71 @@ return status; } +/* + * Decode LOCK response + */ +static int +nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lock(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKT response + */ +static int +nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lockt(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKU response + */ +static int +nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_locku(&xdr, res); +out: + return status; +} /* * Decode Read response @@ -2391,6 +3015,87 @@ return status; } +/* + * FSINFO request + */ +static int +nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_putfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, fsinfo); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + +/* + * Decode RENEW response + */ +static int +nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_renew(&xdr); + return status; +} + +/* + * a SETCLIENTID request + */ +static int +nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p, + struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_setclientid(&xdr, clp); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + +/* + * a SETCLIENTID_CONFIRM request + */ +static int +nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_setclientid_confirm(&xdr); + if (!status) + status = decode_putrootfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, fsinfo); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + uint32_t * nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) { @@ -2426,6 +3131,67 @@ return p; } +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS4_OK, 0 }, + { NFS4ERR_PERM, EPERM }, + { NFS4ERR_NOENT, ENOENT }, + { NFS4ERR_IO, errno_NFSERR_IO }, + { NFS4ERR_NXIO, ENXIO }, + { NFS4ERR_ACCESS, EACCES }, + { NFS4ERR_EXIST, EEXIST }, + { NFS4ERR_XDEV, EXDEV }, + { NFS4ERR_NOTDIR, ENOTDIR }, + { NFS4ERR_ISDIR, EISDIR }, + { NFS4ERR_INVAL, EINVAL }, + { NFS4ERR_FBIG, EFBIG }, + { NFS4ERR_NOSPC, ENOSPC }, + { NFS4ERR_ROFS, EROFS }, + { NFS4ERR_MLINK, EMLINK }, + { NFS4ERR_NAMETOOLONG, ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, ENOTEMPTY }, + { NFS4ERR_DQUOT, EDQUOT }, + { NFS4ERR_STALE, ESTALE }, + { NFS4ERR_BADHANDLE, EBADHANDLE }, + { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, + { NFS4ERR_NOTSUPP, ENOTSUPP }, + { NFS4ERR_TOOSMALL, ETOOSMALL }, + { NFS4ERR_SERVERFAULT, ESERVERFAULT }, + { NFS4ERR_BADTYPE, EBADTYPE }, + { NFS4ERR_LOCKED, EAGAIN }, + { NFS4ERR_RESOURCE, EREMOTEIO }, + { NFS4ERR_SYMLINK, ELOOP }, + { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP }, + { NFS4ERR_DEADLOCK, EDEADLK }, + { -1, EIO } +}; + +/* + * Convert an NFS error code to a local one. + * This one is used jointly by NFSv2 and NFSv3. + */ +static int +nfs_stat_to_errno(int stat) +{ + int i; + for (i = 0; nfs_errtbl[i].stat != -1; i++) { + if (nfs_errtbl[i].stat == stat) + return nfs_errtbl[i].errno; + } + /* If we cannot translate the error, the recovery routines should + * handle it. + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ + return stat; +} + #ifndef MAX # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif @@ -2445,8 +3211,17 @@ PROC(COMMIT, enc_commit, dec_commit), PROC(OPEN, enc_open, dec_open), PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), + PROC(OPEN_RECLAIM, enc_open_reclaim, dec_open_reclaim), + PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade), PROC(CLOSE, enc_close, dec_close), PROC(SETATTR, enc_setattr, dec_setattr), + PROC(FSINFO, enc_fsinfo, dec_fsinfo), + PROC(RENEW, enc_renew, dec_renew), + PROC(SETCLIENTID, enc_setclientid, dec_setclientid), + PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm), + PROC(LOCK, enc_lock, dec_lock), + PROC(LOCKT, enc_lockt, dec_lockt), + PROC(LOCKU, enc_locku, dec_locku), }; struct rpc_version nfs_version4 = { diff -u --recursive --new-file linux-2.6.0-test9/fs/nfs/proc.c linux-2.6.0-25-lock/fs/nfs/proc.c --- linux-2.6.0-test9/fs/nfs/proc.c 2003-11-17 18:02:03.000000000 -0500 +++ linux-2.6.0-25-lock/fs/nfs/proc.c 2003-11-17 18:13:36.000000000 -0500 @@ -42,6 +42,7 @@ #include #include #include +#include #include #define NFSDBG_FACILITY NFSDBG_PROC @@ -653,9 +654,17 @@ return 1; } +static int +nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); +} + struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, @@ -687,4 +696,5 @@ .file_release = nfs_release, .request_init = nfs_request_init, .request_compatible = nfs_request_compatible, + .lock = nfs_proc_lock, }; diff -u --recursive --new-file linux-2.6.0-test9/fs/nfs/write.c linux-2.6.0-25-lock/fs/nfs/write.c --- linux-2.6.0-test9/fs/nfs/write.c 2003-11-17 18:00:42.000000000 -0500 +++ linux-2.6.0-25-lock/fs/nfs/write.c 2003-11-17 18:04:45.000000000 -0500 @@ -228,8 +228,19 @@ unsigned long end_index; unsigned offset = PAGE_CACHE_SIZE; loff_t i_size = i_size_read(inode); + int inode_referenced = 0; int err; + /* + * Note: We need to ensure that we have a reference to the inode + * if we are to do asynchronous writes. If not, waiting + * in nfs_wait_on_request() may deadlock with clear_inode(). + * + * If igrab() fails here, then it is in any case safe to + * call nfs_wb_page(), since there will be no pending writes. + */ + if (igrab(inode) != 0) + inode_referenced = 1; end_index = i_size >> PAGE_CACHE_SHIFT; /* Ensure we've flushed out any previous writes */ @@ -247,7 +258,8 @@ goto out; do_it: lock_kernel(); - if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode)) { + if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode) && + inode_referenced) { err = nfs_writepage_async(NULL, inode, page, 0, offset); if (err >= 0) err = 0; @@ -259,6 +271,8 @@ unlock_kernel(); out: unlock_page(page); + if (inode_referenced) + iput(inode); return err; } @@ -280,7 +294,7 @@ if (is_sync && wbc->sync_mode == WB_SYNC_ALL) { err = nfs_wb_all(inode); } else - nfs_commit_file(inode, NULL, 0, 0, 0); + nfs_commit_file(inode, NULL, 0); out: return err; } @@ -1065,15 +1079,14 @@ } #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -int nfs_commit_file(struct inode *inode, struct file *file, unsigned long idx_start, - unsigned int npages, int how) +int nfs_commit_file(struct inode *inode, struct file *file, int how) { LIST_HEAD(head); int res, error = 0; spin_lock(&nfs_wreq_lock); - res = nfs_scan_commit(inode, &head, file, idx_start, npages); + res = nfs_scan_commit(inode, &head, file, 0, 0); spin_unlock(&nfs_wreq_lock); if (res) error = nfs_commit_list(&head, how); @@ -1103,7 +1116,7 @@ error = nfs_flush_file(inode, file, idx_start, npages, how); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) if (error == 0) - error = nfs_commit_file(inode, file, idx_start, npages, how); + error = nfs_commit_file(inode, file, how); #endif } while (error > 0); return error; diff -u --recursive --new-file linux-2.6.0-test9/include/linux/nfs4.h linux-2.6.0-25-lock/include/linux/nfs4.h --- linux-2.6.0-test9/include/linux/nfs4.h 2003-11-17 17:58:52.000000000 -0500 +++ linux-2.6.0-25-lock/include/linux/nfs4.h 2003-11-17 18:13:36.000000000 -0500 @@ -88,6 +88,76 @@ OP_WRITE = 38, }; +enum nfsstat4 { + NFS4_OK = 0, + NFS4ERR_PERM = 1, + NFS4ERR_NOENT = 2, + NFS4ERR_IO = 5, + NFS4ERR_NXIO = 6, + NFS4ERR_ACCESS = 13, + NFS4ERR_EXIST = 17, + NFS4ERR_XDEV = 18, + /* Unused/reserved 19 */ + NFS4ERR_NOTDIR = 20, + NFS4ERR_ISDIR = 21, + NFS4ERR_INVAL = 22, + NFS4ERR_FBIG = 27, + NFS4ERR_NOSPC = 28, + NFS4ERR_ROFS = 30, + NFS4ERR_MLINK = 31, + NFS4ERR_NAMETOOLONG = 63, + NFS4ERR_NOTEMPTY = 66, + NFS4ERR_DQUOT = 69, + NFS4ERR_STALE = 70, + NFS4ERR_BADHANDLE = 10001, + NFS4ERR_BAD_COOKIE = 10003, + NFS4ERR_NOTSUPP = 10004, + NFS4ERR_TOOSMALL = 10005, + NFS4ERR_SERVERFAULT = 10006, + NFS4ERR_BADTYPE = 10007, + NFS4ERR_DELAY = 10008, + NFS4ERR_SAME = 10009, + NFS4ERR_DENIED = 10010, + NFS4ERR_EXPIRED = 10011, + NFS4ERR_LOCKED = 10012, + NFS4ERR_GRACE = 10013, + NFS4ERR_FHEXPIRED = 10014, + NFS4ERR_SHARE_DENIED = 10015, + NFS4ERR_WRONGSEC = 10016, + NFS4ERR_CLID_INUSE = 10017, + NFS4ERR_RESOURCE = 10018, + NFS4ERR_MOVED = 10019, + NFS4ERR_NOFILEHANDLE = 10020, + NFS4ERR_MINOR_VERS_MISMATCH = 10021, + NFS4ERR_STALE_CLIENTID = 10022, + NFS4ERR_STALE_STATEID = 10023, + NFS4ERR_OLD_STATEID = 10024, + NFS4ERR_BAD_STATEID = 10025, + NFS4ERR_BAD_SEQID = 10026, + NFS4ERR_NOT_SAME = 10027, + NFS4ERR_LOCK_RANGE = 10028, + NFS4ERR_SYMLINK = 10029, + NFS4ERR_RESTOREFH = 10030, + NFS4ERR_LEASE_MOVED = 10031, + NFS4ERR_ATTRNOTSUPP = 10032, + NFS4ERR_NO_GRACE = 10033, + NFS4ERR_RECLAIM_BAD = 10034, + NFS4ERR_RECLAIM_CONFLICT = 10035, + NFS4ERR_BADXDR = 10036, + NFS4ERR_LOCKS_HELD = 10037, + NFS4ERR_OPENMODE = 10038, + NFS4ERR_BADOWNER = 10039, + NFS4ERR_BADCHAR = 10040, + NFS4ERR_BADNAME = 10041, + NFS4ERR_BAD_RANGE = 10042, + NFS4ERR_LOCK_NOTSUPP = 10043, + NFS4ERR_OP_ILLEGAL = 10044, + NFS4ERR_DEADLOCK = 10045, + NFS4ERR_FILE_OPEN = 10046, + NFS4ERR_ADMIN_REVOKED = 10047, + NFS4ERR_CB_PATH_DOWN = 10048 +}; + /* * Note: NF4BAD is not actually part of the protocol; it is just used * internally by nfsd. @@ -219,8 +289,17 @@ NFSPROC4_CLNT_COMMIT, NFSPROC4_CLNT_OPEN, NFSPROC4_CLNT_OPEN_CONFIRM, + NFSPROC4_CLNT_OPEN_RECLAIM, + NFSPROC4_CLNT_OPEN_DOWNGRADE, NFSPROC4_CLNT_CLOSE, NFSPROC4_CLNT_SETATTR, + NFSPROC4_CLNT_FSINFO, + NFSPROC4_CLNT_RENEW, + NFSPROC4_CLNT_SETCLIENTID, + NFSPROC4_CLNT_SETCLIENTID_CONFIRM, + NFSPROC4_CLNT_LOCK, + NFSPROC4_CLNT_LOCKT, + NFSPROC4_CLNT_LOCKU, }; #endif diff -u --recursive --new-file linux-2.6.0-test9/include/linux/nfs_fs.h linux-2.6.0-25-lock/include/linux/nfs_fs.h --- linux-2.6.0-test9/include/linux/nfs_fs.h 2003-11-17 17:58:08.000000000 -0500 +++ linux-2.6.0-25-lock/include/linux/nfs_fs.h 2003-11-17 22:48:29.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include /* * Enable debugging support for nfs client. @@ -313,12 +314,11 @@ extern int nfs_flush_file(struct inode *, struct file *, unsigned long, unsigned int, int); extern int nfs_flush_list(struct list_head *, int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) -extern int nfs_commit_file(struct inode *, struct file *, unsigned long, unsigned int, int); +extern int nfs_commit_file(struct inode *, struct file *, int); extern int nfs_commit_list(struct list_head *, int); #else static inline int -nfs_commit_file(struct inode *inode, struct file *file, unsigned long offset, - unsigned int len, int flags) +nfs_commit_file(struct inode *inode, struct file *file, int flags) { return 0; } @@ -465,6 +465,7 @@ enum nfs4_client_state { NFS4CLNT_OK = 0, NFS4CLNT_NEW, + NFS4CLNT_SETUP_STATE, }; /* @@ -475,7 +476,8 @@ struct in_addr cl_addr; /* Server identifier */ u64 cl_clientid; /* constant */ nfs4_verifier cl_confirm; - enum nfs4_client_state cl_state; + unsigned long cl_state; + long cl_generation; u32 cl_lockowner_id; @@ -490,6 +492,24 @@ int cl_nunused; spinlock_t cl_lock; atomic_t cl_count; + + struct rpc_clnt * cl_rpcclient; + struct rpc_cred * cl_cred; + + struct list_head cl_superblocks; /* List of nfs_server structs */ + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + wait_queue_head_t cl_waitq; + struct rpc_wait_queue cl_rpcwaitq; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; }; /* @@ -509,6 +529,7 @@ u32 so_seqid; /* protected by so_sema */ unsigned int so_flags; /* protected by so_sema */ atomic_t so_count; + long so_generation; struct rpc_cred *so_cred; /* Associated cred */ struct list_head so_states; @@ -516,73 +537,105 @@ /* * struct nfs4_state maintains the client-side state for a given - * (state_owner,inode) tuple. + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). * + * OPEN: * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, * we need to know how many files are open for reading or writing on a * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + fl_owner_t ls_owner; /* POSIX lock owner */ + struct nfs4_state * ls_parent; /* Parent nfs4_state */ + u32 ls_seqid; + u32 ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, +}; + struct nfs4_state { struct list_head open_states; /* List of states for the same state_owner */ struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ struct nfs4_state_owner *owner; /* Pointer to the open owner */ struct inode *inode; /* Pointer to the inode */ - pid_t pid; /* Thread that called OPEN */ + + unsigned long flags; /* Do we hold any locks? */ + struct semaphore lock_sema; /* Serializes file locking operations */ + rwlock_t state_lock; /* Protects the lock_states list */ nfs4_stateid stateid; + unsigned int nreaders; + unsigned int nwriters; int state; /* State on the server (R,W, or RW) */ atomic_t count; }; +extern struct dentry_operations nfs4_dentry_operations; +extern struct inode_operations nfs4_dir_inode_operations; + /* nfs4proc.c */ -extern int nfs4_proc_renew(struct nfs_server *server); +extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); +extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); +extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *); +extern int nfs4_proc_async_renew(struct nfs4_client *); +extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *, struct nfs4_state *); +int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); +extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); /* nfs4renewd.c */ -extern int nfs4_init_renewd(struct nfs_server *server); +extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); +extern void nfs4_kill_renewd(struct nfs4_client *); /* nfs4state.c */ +extern void init_nfsv4_state(struct nfs_server *); +extern void destroy_nfsv4_state(struct nfs_server *); extern struct nfs4_client *nfs4_get_client(struct in_addr *); extern void nfs4_put_client(struct nfs4_client *clp); +extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); + extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_increment_seqid(u32 status, struct nfs4_state_owner *sp); - - - +extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); +extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); +extern int nfs4_handle_error(struct nfs_server *, int); +extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); +extern struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t); +extern void nfs4_put_lock_state(struct nfs4_lock_state *state); +extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); +extern void nfs4_notify_setlk(struct inode *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_notify_unlck(struct inode *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); struct nfs4_mount_data; -static inline int -create_nfsv4_state(struct nfs_server *server, struct nfs4_mount_data *data) -{ - server->nfs4_state = NULL; - return 0; -} - -static inline void -destroy_nfsv4_state(struct nfs_server *server) -{ - if (server->mnt_path) { - kfree(server->mnt_path); - server->mnt_path = NULL; - } - if (server->nfs4_state) { - nfs4_put_client(server->nfs4_state); - server->nfs4_state = NULL; - } -} #else -#define create_nfsv4_state(server, data) 0 +#define init_nfsv4_state(server) do { } while (0) #define destroy_nfsv4_state(server) do { } while (0) #define nfs4_put_state_owner(inode, owner) do { } while (0) #define nfs4_put_open_state(state) do { } while (0) +#define nfs4_renewd_prepare_shutdown(server) do { } while (0) #endif #endif /* __KERNEL__ */ diff -u --recursive --new-file linux-2.6.0-test9/include/linux/nfs_fs_sb.h linux-2.6.0-25-lock/include/linux/nfs_fs_sb.h --- linux-2.6.0-test9/include/linux/nfs_fs_sb.h 2003-11-17 18:03:03.000000000 -0500 +++ linux-2.6.0-25-lock/include/linux/nfs_fs_sb.h 2003-11-17 18:08:48.000000000 -0500 @@ -35,8 +35,9 @@ char ip_addr[16]; char * mnt_path; struct nfs4_client * nfs4_state; /* all NFSv4 state starts here */ - unsigned long lease_time; /* in jiffies */ - unsigned long last_renewal; /* in jiffies */ + struct list_head nfs4_siblings; /* List of other nfs_server structs + * that share the same clientid + */ void *idmap; #endif }; diff -u --recursive --new-file linux-2.6.0-test9/include/linux/nfs_page.h linux-2.6.0-25-lock/include/linux/nfs_page.h --- linux-2.6.0-test9/include/linux/nfs_page.h 2003-11-17 17:56:11.000000000 -0500 +++ linux-2.6.0-25-lock/include/linux/nfs_page.h 2003-11-17 22:43:54.000000000 -0500 @@ -26,6 +26,7 @@ struct list_head wb_list, /* Defines state of page: */ *wb_list_head; /* read/write/commit */ struct file *wb_file; + fl_owner_t wb_lockowner; struct inode *wb_inode; struct rpc_cred *wb_cred; struct nfs4_state *wb_state; diff -u --recursive --new-file linux-2.6.0-test9/include/linux/nfs_xdr.h linux-2.6.0-25-lock/include/linux/nfs_xdr.h --- linux-2.6.0-test9/include/linux/nfs_xdr.h 2003-11-17 18:01:21.000000000 -0500 +++ linux-2.6.0-25-lock/include/linux/nfs_xdr.h 2003-11-17 22:56:37.000000000 -0500 @@ -109,7 +109,6 @@ }; struct nfs_openres { - __u32 status; nfs4_stateid stateid; struct nfs_fh fh; struct nfs4_change_info * cinfo; @@ -129,24 +128,95 @@ }; struct nfs_open_confirmres { - __u32 status; nfs4_stateid stateid; }; /* + * Arguments to the open_reclaim call. + */ +struct nfs_open_reclaimargs { + struct nfs_fh * fh; + __u64 clientid; + __u32 seqid; + __u32 id; + __u32 share_access; + __u32 claim; + struct nfs4_getattr * f_getattr; +}; + +/* * Arguments to the close call. */ struct nfs_closeargs { struct nfs_fh * fh; nfs4_stateid stateid; __u32 seqid; + __u32 share_access; }; struct nfs_closeres { - __u32 status; nfs4_stateid stateid; }; +/* + * * Arguments to the lock,lockt, and locku call. + * */ +struct nfs_lowner { + __u64 clientid; + u32 id; +}; +struct nfs_open_to_lock { + __u32 open_seqid; + nfs4_stateid open_stateid; + __u32 lock_seqid; + struct nfs_lowner lock_owner; +}; + +struct nfs_exist_lock { + nfs4_stateid stateid; + __u32 seqid; +}; + +struct nfs_lock_opargs { + __u32 reclaim; + __u32 new_lock_owner; + union { + struct nfs_open_to_lock *open_lock; + struct nfs_exist_lock *exist_lock; + } u; +}; + +struct nfs_locku_opargs { + __u32 seqid; + nfs4_stateid stateid; +}; + +struct nfs_lockargs { + struct nfs_fh * fh; + __u32 type; + __u64 offset; + __u64 length; + union { + struct nfs_lock_opargs *lock; /* LOCK */ + struct nfs_lowner *lockt; /* LOCKT */ + struct nfs_locku_opargs *locku; /* LOCKU */ + } u; +}; + +struct nfs_lock_denied { + __u64 offset; + __u64 length; + __u32 type; + struct nfs_lowner owner; +}; + +struct nfs_lockres { + union { + nfs4_stateid stateid;/* LOCK success, LOCKU */ + struct nfs_lock_denied denied; /* LOCK failed, LOCKT success */ + } u; + struct nfs_server * server; +}; /* * Arguments to the read call. @@ -449,7 +519,6 @@ u32 * gt_bmval; /* request */ struct nfs_fattr * gt_attrs; /* response */ struct nfs_fsstat * gt_fsstat; /* response */ - struct nfs_fsinfo * gt_fsinfo; /* response */ struct nfs_pathconf * gt_pathconf; /* response */ }; @@ -556,8 +625,6 @@ struct nfs4_rename rename; struct nfs4_client * renew; struct nfs4_setattr setattr; - struct nfs4_setclientid setclientid; - struct nfs4_client * setclientid_confirm; } u; }; @@ -594,6 +661,7 @@ struct rpc_task task; struct inode *inode; struct rpc_cred *cred; + fl_owner_t lockowner; struct nfs_fattr fattr; /* fattr storage */ struct list_head pages; /* Coalesced read requests */ struct page *pagevec[NFS_READ_MAXIOV]; @@ -609,6 +677,7 @@ struct rpc_task task; struct inode *inode; struct rpc_cred *cred; + fl_owner_t lockowner; struct nfs_fattr fattr; struct nfs_writeverf verf; struct list_head pages; /* Coalesced requests we wish to flush */ @@ -627,6 +696,8 @@ */ struct nfs_rpc_ops { int version; /* Protocol version */ + struct dentry_operations *dentry_ops; + struct inode_operations *dir_inode_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); @@ -673,6 +744,7 @@ int (*file_release) (struct inode *, struct file *); void (*request_init)(struct nfs_page *, struct file *); int (*request_compatible)(struct nfs_page *, struct file *, struct page *); + int (*lock)(struct file *, int, struct file_lock *); }; /* diff -u --recursive --new-file linux-2.6.0-test9/include/linux/sunrpc/auth.h linux-2.6.0-25-lock/include/linux/sunrpc/auth.h --- linux-2.6.0-test9/include/linux/sunrpc/auth.h 2003-11-17 17:59:14.000000000 -0500 +++ linux-2.6.0-25-lock/include/linux/sunrpc/auth.h 2003-11-17 18:07:52.000000000 -0500 @@ -73,6 +73,7 @@ * differ from the flavor in * au_ops->au_flavor in gss * case) */ + atomic_t au_count; /* Reference counter */ /* per-flavor data */ }; diff -u --recursive --new-file linux-2.6.0-test9/include/linux/sunrpc/clnt.h linux-2.6.0-25-lock/include/linux/sunrpc/clnt.h --- linux-2.6.0-test9/include/linux/sunrpc/clnt.h 2003-11-17 17:59:42.000000000 -0500 +++ linux-2.6.0-25-lock/include/linux/sunrpc/clnt.h 2003-11-17 18:07:52.000000000 -0500 @@ -26,6 +26,8 @@ __u32 pm_vers; __u32 pm_prot; __u16 pm_port; + unsigned char pm_binding : 1; /* doing a getport() */ + struct rpc_wait_queue pm_bindwait; /* waiting on getport() */ }; struct rpc_inode; @@ -34,6 +36,7 @@ * The high-level client handle */ struct rpc_clnt { + atomic_t cl_count; /* Number of clones */ atomic_t cl_users; /* number of references */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ @@ -48,26 +51,27 @@ cl_intr : 1,/* interruptible */ cl_chatty : 1,/* be verbose */ cl_autobind : 1,/* use getport() */ - cl_binding : 1,/* doing a getport() */ cl_droppriv : 1,/* enable NFS suid hack */ cl_oneshot : 1,/* dispose after use */ cl_dead : 1;/* abandoned */ - struct rpc_rtt cl_rtt; /* RTO estimator data */ - - struct rpc_portmap cl_pmap; /* port mapping */ - struct rpc_wait_queue cl_bindwait; /* waiting on getport() */ + struct rpc_rtt * cl_rtt; /* RTO estimator data */ + struct rpc_portmap * cl_pmap; /* port mapping */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; char cl_pathname[30];/* Path in rpc_pipe_fs */ struct dentry * cl_dentry; /* inode */ + struct rpc_clnt * cl_parent; /* Points to parent of clones */ + struct rpc_rtt cl_rtt_default; + struct rpc_portmap cl_pmap_default; + char cl_inline_name[32]; }; #define cl_timeout cl_xprt->timeout -#define cl_prog cl_pmap.pm_prog -#define cl_vers cl_pmap.pm_vers -#define cl_port cl_pmap.pm_port -#define cl_prot cl_pmap.pm_prot +#define cl_prog cl_pmap->pm_prog +#define cl_vers cl_pmap->pm_vers +#define cl_port cl_pmap->pm_port +#define cl_prot cl_pmap->pm_prot /* * General RPC program info @@ -108,6 +112,7 @@ struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *info, u32 version, rpc_authflavor_t authflavor); +struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); diff -u --recursive --new-file linux-2.6.0-test9/include/linux/sunrpc/rpc_pipe_fs.h linux-2.6.0-25-lock/include/linux/sunrpc/rpc_pipe_fs.h --- linux-2.6.0-test9/include/linux/sunrpc/rpc_pipe_fs.h 2003-11-17 17:57:57.000000000 -0500 +++ linux-2.6.0-25-lock/include/linux/sunrpc/rpc_pipe_fs.h 2003-11-17 18:05:45.000000000 -0500 @@ -14,6 +14,7 @@ struct rpc_pipe_ops { ssize_t (*upcall)(struct file *, struct rpc_pipe_msg *, char __user *, size_t); ssize_t (*downcall)(struct file *, const char __user *, size_t); + void (*release_pipe)(struct inode *, int, int); void (*destroy_msg)(struct rpc_pipe_msg *); }; @@ -23,10 +24,12 @@ struct list_head pipe; int pipelen; int nreaders; + int nwriters; wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; struct rpc_pipe_ops *ops; + struct work_struct queue_timeout; }; static inline struct rpc_inode * diff -u --recursive --new-file linux-2.6.0-test9/include/linux/sunrpc/sched.h linux-2.6.0-25-lock/include/linux/sunrpc/sched.h --- linux-2.6.0-test9/include/linux/sunrpc/sched.h 2003-11-17 18:00:12.000000000 -0500 +++ linux-2.6.0-25-lock/include/linux/sunrpc/sched.h 2003-11-17 18:11:38.000000000 -0500 @@ -110,6 +110,7 @@ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_KILLED 0x0100 /* task was killed */ +#define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SETUID(t) ((t)->tk_flags & RPC_TASK_SETUID) @@ -119,6 +120,7 @@ #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) #define RPC_IS_ACTIVATED(t) ((t)->tk_active) #define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL) +#define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) #define RPC_TASK_SLEEPING 0 #define RPC_TASK_RUNNING 1 diff -u --recursive --new-file linux-2.6.0-test9/include/linux/sunrpc/xprt.h linux-2.6.0-25-lock/include/linux/sunrpc/xprt.h --- linux-2.6.0-test9/include/linux/sunrpc/xprt.h 2003-11-17 18:00:38.000000000 -0500 +++ linux-2.6.0-25-lock/include/linux/sunrpc/xprt.h 2003-11-17 18:12:12.000000000 -0500 @@ -162,6 +162,12 @@ tcp_offset; /* fragment offset */ unsigned long tcp_copied, /* copied to request */ tcp_flags; + /* + * Disconnection of idle sockets + */ + struct work_struct task_cleanup; + struct timer_list timer; + unsigned long last_used; /* * Send stuff @@ -201,6 +207,7 @@ void xprt_sock_setbufsize(struct rpc_xprt *); #define XPRT_CONNECT 0 +#define XPRT_LOCKED 1 #define xprt_connected(xp) (test_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) diff -u --recursive --new-file linux-2.6.0-test9/net/sunrpc/auth.c linux-2.6.0-25-lock/net/sunrpc/auth.c --- linux-2.6.0-test9/net/sunrpc/auth.c 2003-11-17 17:59:12.000000000 -0500 +++ linux-2.6.0-25-lock/net/sunrpc/auth.c 2003-11-17 18:07:52.000000000 -0500 @@ -61,6 +61,7 @@ struct rpc_auth * rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) { + struct rpc_auth *auth; struct rpc_authops *ops; u32 flavor = pseudoflavor_to_flavor(pseudoflavor); @@ -68,13 +69,21 @@ return NULL; if (!try_module_get(ops->owner)) return NULL; - clnt->cl_auth = ops->create(clnt, pseudoflavor); - return clnt->cl_auth; + auth = ops->create(clnt, pseudoflavor); + if (!auth) + return NULL; + atomic_set(&auth->au_count, 1); + if (clnt->cl_auth) + rpcauth_destroy(clnt->cl_auth); + clnt->cl_auth = auth; + return auth; } void rpcauth_destroy(struct rpc_auth *auth) { + if (!atomic_dec_and_test(&auth->au_count)) + return; auth->au_ops->destroy(auth); module_put(auth->au_ops->owner); kfree(auth); diff -u --recursive --new-file linux-2.6.0-test9/net/sunrpc/auth_gss/auth_gss.c linux-2.6.0-25-lock/net/sunrpc/auth_gss/auth_gss.c --- linux-2.6.0-test9/net/sunrpc/auth_gss/auth_gss.c 2003-11-17 17:58:45.000000000 -0500 +++ linux-2.6.0-25-lock/net/sunrpc/auth_gss/auth_gss.c 2003-11-17 18:07:28.000000000 -0500 @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -155,17 +156,17 @@ gss_put_ctx(old); } -static struct gss_cl_ctx * -gss_cred_get_uptodate_ctx(struct rpc_cred *cred) +static int +gss_cred_is_uptodate_ctx(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); - struct gss_cl_ctx *ctx = NULL; + int res = 0; read_lock(&gss_ctx_lock); if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx) - ctx = gss_get_ctx(gss_cred->gc_ctx); + res = 1; read_unlock(&gss_ctx_lock); - return ctx; + return res; } static inline int @@ -292,13 +293,9 @@ static void gss_release_msg(struct gss_upcall_msg *gss_msg) { - struct gss_auth *gss_auth = gss_msg->auth; - - if (!atomic_dec_and_lock(&gss_msg->count, &gss_auth->lock)) + if (!atomic_dec_and_test(&gss_msg->count)) return; - if (!list_empty(&gss_msg->list)) - list_del(&gss_msg->list); - spin_unlock(&gss_auth->lock); + BUG_ON(!list_empty(&gss_msg->list)); kfree(gss_msg); } @@ -315,24 +312,17 @@ return NULL; } -static struct gss_upcall_msg * -gss_find_upcall(struct gss_auth *gss_auth, uid_t uid) -{ - struct gss_upcall_msg *gss_msg; - - spin_lock(&gss_auth->lock); - gss_msg = __gss_find_upcall(gss_auth, uid); - spin_unlock(&gss_auth->lock); - return gss_msg; -} - static void __gss_unhash_msg(struct gss_upcall_msg *gss_msg) { if (list_empty(&gss_msg->list)) return; list_del_init(&gss_msg->list); - rpc_wake_up(&gss_msg->waitq); + if (gss_msg->msg.errno < 0) + rpc_wake_up_status(&gss_msg->waitq, gss_msg->msg.errno); + else + rpc_wake_up(&gss_msg->waitq); + atomic_dec(&gss_msg->count); } static void @@ -345,40 +335,27 @@ spin_unlock(&gss_auth->lock); } -static void -gss_release_callback(struct rpc_task *task) -{ - struct rpc_clnt *clnt = task->tk_client; - struct gss_auth *gss_auth = container_of(clnt->cl_auth, - struct gss_auth, rpc_auth); - struct gss_upcall_msg *gss_msg; - - gss_msg = gss_find_upcall(gss_auth, task->tk_msg.rpc_cred->cr_uid); - BUG_ON(!gss_msg); - atomic_dec(&gss_msg->count); - gss_release_msg(gss_msg); -} - static int -gss_upcall(struct rpc_clnt *clnt, struct rpc_task *task, uid_t uid) +gss_upcall(struct rpc_clnt *clnt, struct rpc_task *task, struct rpc_cred *cred) { struct gss_auth *gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth); struct gss_upcall_msg *gss_msg, *gss_new = NULL; struct rpc_pipe_msg *msg; struct dentry *dentry = gss_auth->dentry; - int res; + uid_t uid = cred->cr_uid; + int res = 0; retry: + spin_lock(&gss_auth->lock); gss_msg = __gss_find_upcall(gss_auth, uid); if (gss_msg) goto out_sleep; if (gss_new == NULL) { spin_unlock(&gss_auth->lock); gss_new = kmalloc(sizeof(*gss_new), GFP_KERNEL); - if (gss_new) + if (!gss_new) return -ENOMEM; - spin_lock(&gss_auth->lock); goto retry; } gss_msg = gss_new; @@ -393,20 +370,34 @@ gss_new->auth = gss_auth; list_add(&gss_new->list, &gss_auth->upcalls); gss_new = NULL; - task->tk_timeout = 5 * HZ; - rpc_sleep_on(&gss_msg->waitq, task, gss_release_callback, NULL); - spin_unlock(&gss_auth->lock); - res = rpc_queue_upcall(dentry->d_inode, msg); - if (res) { - gss_unhash_msg(gss_msg); - gss_release_msg(gss_msg); + /* Has someone updated the credential behind our back? */ + if (!gss_cred_is_uptodate_ctx(cred)) { + /* No, so do upcall and sleep */ + task->tk_timeout = 0; + rpc_sleep_on(&gss_msg->waitq, task, NULL, NULL); + spin_unlock(&gss_auth->lock); + res = rpc_queue_upcall(dentry->d_inode, msg); + if (res) + gss_unhash_msg(gss_msg); + } else { + /* Yes, so cancel upcall */ + __gss_unhash_msg(gss_msg); + spin_unlock(&gss_auth->lock); } + gss_release_msg(gss_msg); return res; out_sleep: - rpc_sleep_on(&gss_msg->waitq, task, gss_release_callback, NULL); + /* Sleep forever */ + task->tk_timeout = 0; + rpc_sleep_on(&gss_msg->waitq, task, NULL, NULL); spin_unlock(&gss_auth->lock); if (gss_new) kfree(gss_new); + /* Note: we drop the reference here: we are automatically removed + * from the queue when we're woken up, and we should in any case + * have no further responsabilities w.r.t. the upcall. + */ + gss_release_msg(gss_msg); return 0; } @@ -491,14 +482,56 @@ return err; } +static void +gss_pipe_release(struct inode *inode, int nreaders, int nwriters) +{ + struct rpc_inode *rpci = RPC_I(inode); + struct rpc_clnt *clnt; + struct rpc_auth *auth; + struct gss_auth *gss_auth; + + if (nreaders != 0 || nwriters != 0) + return; + clnt = rpci->private; + atomic_inc(&clnt->cl_users); + auth = clnt->cl_auth; + gss_auth = container_of(auth, struct gss_auth, rpc_auth); + spin_lock(&gss_auth->lock); + while (!list_empty(&gss_auth->upcalls)) { + struct gss_upcall_msg *gss_msg; + + gss_msg = list_entry(gss_auth->upcalls.next, + struct gss_upcall_msg, list); + gss_msg->msg.errno = -EPIPE; + atomic_inc(&gss_msg->count); + __gss_unhash_msg(gss_msg); + spin_unlock(&gss_auth->lock); + gss_release_msg(gss_msg); + spin_lock(&gss_auth->lock); + } + spin_unlock(&gss_auth->lock); + rpc_release_client(clnt); +} + void gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) { struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); + static unsigned long ratelimit; - if (msg->errno < 0) + if (msg->errno < 0) { + atomic_inc(&gss_msg->count); gss_unhash_msg(gss_msg); - gss_release_msg(gss_msg); + if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) { + unsigned long now = jiffies; + if (time_after(now, ratelimit)) { + printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n" + "Please check user daemon is running!\n"); + ratelimit = now + 15*HZ; + } + } + gss_release_msg(gss_msg); + } } /* @@ -691,6 +724,8 @@ goto out_put_ctx; } p = xdr_encode_netobj(p, &bufout); + gss_put_ctx(ctx); + kfree(bufout.data); return p; out_put_ctx: gss_put_ctx(ctx); @@ -704,20 +739,13 @@ gss_refresh(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct gss_auth *gss_auth = container_of(clnt->cl_auth, - struct gss_auth, rpc_auth); struct rpc_xprt *xprt = task->tk_xprt; struct rpc_cred *cred = task->tk_msg.rpc_cred; - int err = 0; task->tk_timeout = xprt->timeout.to_current; - spin_lock(&gss_auth->lock); - if (gss_cred_get_uptodate_ctx(cred)) - goto out; - err = gss_upcall(clnt, task, cred->cr_uid); -out: - spin_unlock(&gss_auth->lock); - return err; + if (!gss_cred_is_uptodate_ctx(cred)) + return gss_upcall(clnt, task, cred); + return 0; } static u32 * @@ -735,13 +763,13 @@ flav = ntohl(*p++); if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) { printk("RPC: giant verf size: %ld\n", (unsigned long) len); - return NULL; + goto out_bad; } dprintk("RPC: gss_validate: verifier flavor %d, len %d\n", flav, len); if (flav != RPC_AUTH_GSS) { printk("RPC: bad verf flavor: %ld\n", (unsigned long)flav); - return NULL; + goto out_bad; } seq = htonl(task->tk_gss_seqno); bufin.data = (u8 *) &seq; @@ -750,10 +778,14 @@ bufout.len = len; if (gss_verify_mic(ctx->gc_gss_ctx, &bufin, &bufout, &qop_state) != 0) - return NULL; + goto out_bad; task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; dprintk("RPC: GSS gss_validate: gss_verify_mic succeeded.\n"); + gss_put_ctx(ctx); return p + XDR_QUADLEN(len); +out_bad: + gss_put_ctx(ctx); + return NULL; } static struct rpc_authops authgss_ops = { @@ -779,6 +811,7 @@ .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, + .release_pipe = gss_pipe_release, }; /* diff -u --recursive --new-file linux-2.6.0-test9/net/sunrpc/auth_gss/gss_krb5_crypto.c linux-2.6.0-25-lock/net/sunrpc/auth_gss/gss_krb5_crypto.c --- linux-2.6.0-test9/net/sunrpc/auth_gss/gss_krb5_crypto.c 2003-11-17 17:55:37.000000000 -0500 +++ linux-2.6.0-25-lock/net/sunrpc/auth_gss/gss_krb5_crypto.c 2003-11-17 18:06:34.000000000 -0500 @@ -71,14 +71,13 @@ if (iv) memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm)); - crypto_cipher_set_iv(tfm, local_iv, crypto_tfm_alg_ivsize(tfm)); memcpy(out, in, length); sg[0].page = virt_to_page(out); sg[0].offset = offset_in_page(out); sg[0].length = length; - ret = crypto_cipher_encrypt(tfm, sg, sg, length); + ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv); out: dprintk("gss_k5encrypt returns %d\n",ret); @@ -110,14 +109,13 @@ } if (iv) memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm)); - crypto_cipher_set_iv(tfm, local_iv, crypto_tfm_alg_blocksize(tfm)); memcpy(out, in, length); sg[0].page = virt_to_page(out); sg[0].offset = offset_in_page(out); sg[0].length = length; - ret = crypto_cipher_decrypt(tfm, sg, sg, length); + ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv); out: dprintk("gss_k5decrypt returns %d\n",ret); diff -u --recursive --new-file linux-2.6.0-test9/net/sunrpc/auth_gss/gss_mech_switch.c linux-2.6.0-25-lock/net/sunrpc/auth_gss/gss_mech_switch.c --- linux-2.6.0-test9/net/sunrpc/auth_gss/gss_mech_switch.c 2003-11-17 18:00:03.000000000 -0500 +++ linux-2.6.0-25-lock/net/sunrpc/auth_gss/gss_mech_switch.c 2003-11-17 18:07:16.000000000 -0500 @@ -70,6 +70,7 @@ } gm->gm_oid.len = mech_type->len; if (!(gm->gm_oid.data = kmalloc(mech_type->len, GFP_KERNEL))) { + kfree(gm); printk("Failed to allocate memory in gss_mech_register"); return -1; } diff -u --recursive --new-file linux-2.6.0-test9/net/sunrpc/auth_gss/gss_pseudoflavors.c linux-2.6.0-25-lock/net/sunrpc/auth_gss/gss_pseudoflavors.c --- linux-2.6.0-test9/net/sunrpc/auth_gss/gss_pseudoflavors.c 2003-11-17 17:59:01.000000000 -0500 +++ linux-2.6.0-25-lock/net/sunrpc/auth_gss/gss_pseudoflavors.c 2003-11-17 18:07:16.000000000 -0500 @@ -92,6 +92,7 @@ return 0; err_unlock: + kfree(triple); spin_unlock(®istered_triples_lock); err: return -1; diff -u --recursive --new-file linux-2.6.0-test9/net/sunrpc/clnt.c linux-2.6.0-25-lock/net/sunrpc/clnt.c --- linux-2.6.0-test9/net/sunrpc/clnt.c 2003-11-17 17:58:18.000000000 -0500 +++ linux-2.6.0-25-lock/net/sunrpc/clnt.c 2003-11-17 18:11:38.000000000 -0500 @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -101,6 +102,7 @@ { struct rpc_version *version; struct rpc_clnt *clnt = NULL; + int len; dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); @@ -115,23 +117,37 @@ goto out_no_clnt; memset(clnt, 0, sizeof(*clnt)); atomic_set(&clnt->cl_users, 0); + atomic_set(&clnt->cl_count, 1); + clnt->cl_parent = clnt; + + clnt->cl_server = clnt->cl_inline_name; + len = strlen(servname) + 1; + if (len > sizeof(clnt->cl_inline_name)) { + char *buf = kmalloc(len, GFP_KERNEL); + if (buf != 0) + clnt->cl_server = buf; + else + len = sizeof(clnt->cl_inline_name); + } + strlcpy(clnt->cl_server, servname, len); clnt->cl_xprt = xprt; clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; - clnt->cl_server = servname; clnt->cl_protname = program->name; + clnt->cl_pmap = &clnt->cl_pmap_default; clnt->cl_port = xprt->addr.sin_port; clnt->cl_prog = program->number; clnt->cl_vers = version->number; clnt->cl_prot = xprt->prot; clnt->cl_stats = program->stats; - INIT_RPC_WAITQ(&clnt->cl_bindwait, "bindwait"); + INIT_RPC_WAITQ(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); if (!clnt->cl_port) clnt->cl_autobind = 1; - rpc_init_rtt(&clnt->cl_rtt, xprt->timeout.to_initval); + clnt->cl_rtt = &clnt->cl_rtt_default; + rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); if (rpc_setup_pipedir(clnt, program->pipe_dir_name) < 0) goto out_no_path; @@ -156,12 +172,40 @@ out_no_auth: rpc_rmdir(clnt->cl_pathname); out_no_path: + if (clnt->cl_server != clnt->cl_inline_name) + kfree(clnt->cl_server); kfree(clnt); clnt = NULL; goto out; } /* + * This function clones the RPC client structure. It allows us to share the + * same transport while varying parameters such as the authentication + * flavour. + */ +struct rpc_clnt * +rpc_clone_client(struct rpc_clnt *clnt) +{ + struct rpc_clnt *new; + + new = (struct rpc_clnt *)kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + goto out_no_clnt; + memcpy(new, clnt, sizeof(*new)); + atomic_set(&new->cl_count, 1); + atomic_set(&new->cl_users, 0); + atomic_inc(&new->cl_parent->cl_count); + if (new->cl_auth) + atomic_inc(&new->cl_auth->au_count); +out: + return new; +out_no_clnt: + printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); + goto out; +} + +/* * Properly shut down an RPC client, terminating all outstanding * requests. Note that we must be certain that cl_oneshot and * cl_dead are cleared, or else the client would be destroyed @@ -200,19 +244,29 @@ int rpc_destroy_client(struct rpc_clnt *clnt) { + if (!atomic_dec_and_test(&clnt->cl_count)) + return 1; + BUG_ON(atomic_read(&clnt->cl_users) != 0); + dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (clnt->cl_auth) { rpcauth_destroy(clnt->cl_auth); clnt->cl_auth = NULL; } + if (clnt->cl_parent != clnt) { + rpc_destroy_client(clnt->cl_parent); + goto out_free; + } if (clnt->cl_pathname[0]) rpc_rmdir(clnt->cl_pathname); if (clnt->cl_xprt) { xprt_destroy(clnt->cl_xprt); clnt->cl_xprt = NULL; } + if (clnt->cl_server != clnt->cl_inline_name) + kfree(clnt->cl_server); +out_free: kfree(clnt); return 0; } @@ -743,7 +797,7 @@ to->to_retries = clnt->cl_timeout.to_retries; dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid); - if (clnt->cl_softrtry) { + if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); @@ -786,7 +840,7 @@ } if (task->tk_status < 12) { - if (!clnt->cl_softrtry) { + if (!RPC_IS_SOFT(task)) { task->tk_action = call_bind; clnt->cl_stats->rpcretrans++; goto out_retry; diff -u --recursive --new-file linux-2.6.0-test9/net/sunrpc/pmap_clnt.c linux-2.6.0-25-lock/net/sunrpc/pmap_clnt.c --- linux-2.6.0-test9/net/sunrpc/pmap_clnt.c 2003-11-17 17:59:04.000000000 -0500 +++ linux-2.6.0-25-lock/net/sunrpc/pmap_clnt.c 2003-11-17 18:07:52.000000000 -0500 @@ -41,7 +41,7 @@ void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) { - struct rpc_portmap *map = &clnt->cl_pmap; + struct rpc_portmap *map = clnt->cl_pmap; struct sockaddr_in *sap = &clnt->cl_xprt->addr; struct rpc_message msg = { .rpc_proc = &pmap_procedures[PMAP_GETPORT], @@ -57,12 +57,12 @@ map->pm_prog, map->pm_vers, map->pm_prot); spin_lock(&pmap_lock); - if (clnt->cl_binding) { - rpc_sleep_on(&clnt->cl_bindwait, task, NULL, 0); + if (map->pm_binding) { + rpc_sleep_on(&map->pm_bindwait, task, NULL, 0); spin_unlock(&pmap_lock); return; } - clnt->cl_binding = 1; + map->pm_binding = 1; spin_unlock(&pmap_lock); task->tk_status = -EACCES; /* why set this? returns -EIO below */ @@ -85,8 +85,8 @@ bailout: spin_lock(&pmap_lock); - clnt->cl_binding = 0; - rpc_wake_up(&clnt->cl_bindwait); + map->pm_binding = 0; + rpc_wake_up(&map->pm_bindwait); spin_unlock(&pmap_lock); task->tk_status = -EIO; task->tk_action = NULL; @@ -129,6 +129,7 @@ pmap_getport_done(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_portmap *map = clnt->cl_pmap; dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", task->tk_pid, task->tk_status, clnt->cl_port); @@ -145,8 +146,8 @@ clnt->cl_xprt->addr.sin_port = clnt->cl_port; } spin_lock(&pmap_lock); - clnt->cl_binding = 0; - rpc_wake_up(&clnt->cl_bindwait); + map->pm_binding = 0; + rpc_wake_up(&map->pm_bindwait); spin_unlock(&pmap_lock); } diff -u --recursive --new-file linux-2.6.0-test9/net/sunrpc/rpc_pipe.c linux-2.6.0-25-lock/net/sunrpc/rpc_pipe.c --- linux-2.6.0-test9/net/sunrpc/rpc_pipe.c 2003-11-17 17:57:48.000000000 -0500 +++ linux-2.6.0-25-lock/net/sunrpc/rpc_pipe.c 2003-11-17 18:05:45.000000000 -0500 @@ -25,6 +25,7 @@ #include #include +#include #include static struct vfsmount *rpc_mount; @@ -35,6 +36,8 @@ static kmem_cache_t *rpc_inode_cachep; +#define RPC_UPCALL_TIMEOUT (30*HZ) + static void __rpc_purge_upcall(struct inode *inode, int err) { @@ -59,6 +62,18 @@ up(&inode->i_sem); } +static void +rpc_timeout_upcall_queue(void *data) +{ + struct rpc_inode *rpci = (struct rpc_inode *)data; + struct inode *inode = &rpci->vfs_inode; + + down(&inode->i_sem); + if (rpci->nreaders == 0 && !list_empty(&rpci->pipe)) + __rpc_purge_upcall(inode, -ETIMEDOUT); + up(&inode->i_sem); +} + int rpc_queue_upcall(struct inode *inode, struct rpc_pipe_msg *msg) { @@ -66,7 +81,13 @@ int res = 0; down(&inode->i_sem); - if (rpci->nreaders || (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN)) { + if (rpci->nreaders) { + list_add_tail(&msg->list, &rpci->pipe); + rpci->pipelen += msg->len; + } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { + if (list_empty(&rpci->pipe)) + schedule_delayed_work(&rpci->queue_timeout, + RPC_UPCALL_TIMEOUT); list_add_tail(&msg->list, &rpci->pipe); rpci->pipelen += msg->len; } else @@ -80,6 +101,9 @@ rpc_inode_setowner(struct inode *inode, void *private) { struct rpc_inode *rpci = RPC_I(inode); + + cancel_delayed_work(&rpci->queue_timeout); + flush_scheduled_work(); down(&inode->i_sem); rpci->private = private; if (!private) @@ -113,6 +137,8 @@ if (rpci->private != NULL) { if (filp->f_mode & FMODE_READ) rpci->nreaders ++; + if (filp->f_mode & FMODE_WRITE) + rpci->nwriters ++; res = 0; } up(&inode->i_sem); @@ -131,10 +157,14 @@ rpci->ops->destroy_msg(msg); } down(&inode->i_sem); + if (filp->f_mode & FMODE_WRITE) + rpci->nwriters --; if (filp->f_mode & FMODE_READ) rpci->nreaders --; - if (!rpci->nreaders && !(rpci->flags & RPC_PIPE_WAIT_FOR_OPEN)) + if (!rpci->nreaders) __rpc_purge_upcall(inode, -EPIPE); + if (rpci->ops->release_pipe) + rpci->ops->release_pipe(inode, rpci->nreaders, rpci->nwriters); up(&inode->i_sem); return 0; } @@ -769,6 +799,7 @@ INIT_LIST_HEAD(&rpci->pipe); rpci->pipelen = 0; init_waitqueue_head(&rpci->waitq); + INIT_WORK(&rpci->queue_timeout, rpc_timeout_upcall_queue, rpci); rpci->ops = NULL; } } diff -u --recursive --new-file linux-2.6.0-test9/net/sunrpc/sched.c linux-2.6.0-25-lock/net/sunrpc/sched.c --- linux-2.6.0-test9/net/sunrpc/sched.c 2003-11-17 17:59:22.000000000 -0500 +++ linux-2.6.0-25-lock/net/sunrpc/sched.c 2003-11-17 18:11:38.000000000 -0500 @@ -731,8 +731,11 @@ list_add(&task->tk_task, &all_tasks); spin_unlock(&rpc_sched_lock); - if (clnt) + if (clnt) { atomic_inc(&clnt->cl_users); + if (clnt->cl_softrtry) + task->tk_flags |= RPC_TASK_SOFT; + } #ifdef RPC_DEBUG task->tk_magic = 0xf00baa; diff -u --recursive --new-file linux-2.6.0-test9/net/sunrpc/sunrpc_syms.c linux-2.6.0-25-lock/net/sunrpc/sunrpc_syms.c --- linux-2.6.0-test9/net/sunrpc/sunrpc_syms.c 2003-11-17 18:01:37.000000000 -0500 +++ linux-2.6.0-25-lock/net/sunrpc/sunrpc_syms.c 2003-11-17 18:07:52.000000000 -0500 @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -40,6 +41,7 @@ /* RPC client functions */ EXPORT_SYMBOL(rpc_create_client); +EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); EXPORT_SYMBOL(rpc_release_client); @@ -65,6 +67,7 @@ /* Client credential cache */ EXPORT_SYMBOL(rpcauth_register); EXPORT_SYMBOL(rpcauth_unregister); +EXPORT_SYMBOL(rpcauth_create); EXPORT_SYMBOL(rpcauth_lookupcred); EXPORT_SYMBOL(rpcauth_lookup_credcache); EXPORT_SYMBOL(rpcauth_free_credcache); diff -u --recursive --new-file linux-2.6.0-test9/net/sunrpc/xprt.c linux-2.6.0-25-lock/net/sunrpc/xprt.c --- linux-2.6.0-test9/net/sunrpc/xprt.c 2003-11-17 18:00:13.000000000 -0500 +++ linux-2.6.0-25-lock/net/sunrpc/xprt.c 2003-11-17 18:12:12.000000000 -0500 @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -75,6 +76,7 @@ #endif #define XPRT_MAX_BACKOFF (8) +#define XPRT_IDLE_TIMEOUT (5*60*HZ) /* * Local functions @@ -139,25 +141,33 @@ { struct rpc_rqst *req = task->tk_rqstp; - if (!xprt->snd_task) { - if (xprt->nocong || __xprt_get_cong(xprt, task)) { - xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } - } + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { + if (task == xprt->snd_task) + return 1; + if (task == NULL) + return 0; + goto out_sleep; } - if (xprt->snd_task != task) { - dprintk("RPC: %4d TCP write queue full\n", task->tk_pid); - task->tk_timeout = 0; - task->tk_status = -EAGAIN; - if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); - else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + if (xprt->nocong || __xprt_get_cong(xprt, task)) { + xprt->snd_task = task; + if (req) { + req->rq_bytes_sent = 0; + req->rq_ntrans++; + } + return 1; } - return xprt->snd_task == task; + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); +out_sleep: + dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + if (req && req->rq_ntrans) + rpc_sleep_on(&xprt->resend, task, NULL, NULL); + else + rpc_sleep_on(&xprt->sending, task, NULL, NULL); + return 0; } static inline int @@ -177,15 +187,15 @@ { struct rpc_task *task; - if (xprt->snd_task) + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) return; + if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) + goto out_unlock; task = rpc_wake_up_next(&xprt->resend); if (!task) { - if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) - return; task = rpc_wake_up_next(&xprt->sending); if (!task) - return; + goto out_unlock; } if (xprt->nocong || __xprt_get_cong(xprt, task)) { struct rpc_rqst *req = task->tk_rqstp; @@ -194,7 +204,12 @@ req->rq_bytes_sent = 0; req->rq_ntrans++; } + return; } +out_unlock: + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); } /* @@ -203,9 +218,13 @@ static void __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { - if (xprt->snd_task == task) + if (xprt->snd_task == task) { xprt->snd_task = NULL; - __xprt_lock_write_next(xprt); + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); + __xprt_lock_write_next(xprt); + } } static inline void @@ -393,6 +412,15 @@ sock_release(sock); } +static void +xprt_socket_autoclose(void *args) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)args; + + xprt_close(xprt); + xprt_release_write(xprt, NULL); +} + /* * Mark a transport as disconnected */ @@ -407,6 +435,27 @@ } /* + * Used to allow disconnection when we've been idle + */ +static void +xprt_init_autodisconnect(unsigned long data) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)data; + + spin_lock(&xprt->sock_lock); + if (!list_empty(&xprt->recv) || xprt->shutdown) + goto out_abort; + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) + goto out_abort; + spin_unlock(&xprt->sock_lock); + /* Let keventd close the socket */ + schedule_work(&xprt->task_cleanup); + return; +out_abort: + spin_unlock(&xprt->sock_lock); +} + +/* * Attempt to connect a TCP socket. * */ @@ -488,7 +537,7 @@ case -ECONNREFUSED: case -ECONNRESET: case -ENOTCONN: - if (!task->tk_client->cl_softrtry) { + if (!RPC_IS_SOFT(task)) { rpc_delay(task, RPC_REESTABLISH_TIMEOUT); task->tk_status = -ENOTCONN; break; @@ -496,7 +545,7 @@ default: /* Report myriad other possible returns. If this file * system is soft mounted, just error out, like Solaris. */ - if (task->tk_client->cl_softrtry) { + if (RPC_IS_SOFT(task)) { printk(KERN_WARNING "RPC: error %d connecting to server %s, exiting\n", -status, task->tk_client->cl_server); @@ -530,7 +579,7 @@ } /* if soft mounted, just cause this RPC to fail */ - if (task->tk_client->cl_softrtry) + if (RPC_IS_SOFT(task)) task->tk_status = -EIO; switch (task->tk_status) { @@ -584,9 +633,9 @@ __xprt_put_cong(xprt, req); if (timer) { if (req->rq_ntrans == 1) - rpc_update_rtt(&clnt->cl_rtt, timer, + rpc_update_rtt(clnt->cl_rtt, timer, (long)jiffies - req->rq_xtime); - rpc_set_timeo(&clnt->cl_rtt, timer, req->rq_ntrans - 1); + rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1); } } @@ -1224,8 +1273,8 @@ spin_lock_bh(&xprt->sock_lock); if (!xprt->nocong) { int timer = task->tk_msg.rpc_proc->p_timer; - task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt, timer); - task->tk_timeout <<= rpc_ntimeo(&clnt->cl_rtt, timer); + task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); + task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer); task->tk_timeout <<= clnt->cl_timeout.to_retries - req->rq_timeout.to_retries; if (task->tk_timeout > req->rq_timeout.to_maxval) @@ -1254,6 +1303,8 @@ spin_lock(&xprt->xprt_lock); do_xprt_reserve(task); spin_unlock(&xprt->xprt_lock); + if (task->tk_rqstp) + del_timer_sync(&xprt->timer); } } @@ -1333,6 +1384,9 @@ __xprt_put_cong(xprt, req); if (!list_empty(&req->rq_list)) list_del(&req->rq_list); + xprt->last_used = jiffies; + if (list_empty(&xprt->recv) && !xprt->shutdown) + mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); spin_unlock_bh(&xprt->sock_lock); task->tk_rqstp = NULL; memset(req, 0, sizeof(*req)); /* mark unused */ @@ -1403,6 +1457,11 @@ init_waitqueue_head(&xprt->cong_wait); INIT_LIST_HEAD(&xprt->recv); + INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); + init_timer(&xprt->timer); + xprt->timer.function = xprt_init_autodisconnect; + xprt->timer.data = (unsigned long) xprt; + xprt->last_used = jiffies; /* Set timeout parameters */ if (to) { @@ -1583,6 +1642,7 @@ rpc_wake_up(&xprt->backlog); if (waitqueue_active(&xprt->cong_wait)) wake_up(&xprt->cong_wait); + del_timer_sync(&xprt->timer); } /*