fs/exec.c | 10 fs/inode.c | 2 fs/nfs/dir.c | 319 ++++++++- fs/nfs/direct.c | 3 fs/nfs/file.c | 30 fs/nfs/idmap.c | 407 ++++++------ fs/nfs/inode.c | 557 ++++++++++------- fs/nfs/nfs3proc.c | 38 - fs/nfs/nfs4proc.c | 1034 +++++++++++++++++++++++--------- fs/nfs/nfs4renewd.c | 110 ++- fs/nfs/nfs4state.c | 516 +++++++++++++++ fs/nfs/nfs4xdr.c | 921 ++++++++++++++++++++++++++-- fs/nfs/proc.c | 30 fs/nfs/read.c | 2 fs/nfs/unlink.c | 3 fs/nfs/write.c | 39 - include/linux/fs.h | 2 include/linux/nfs4.h | 79 ++ include/linux/nfs_fs.h | 214 ++++-- include/linux/nfs_fs_sb.h | 6 include/linux/nfs_idmap.h | 21 include/linux/nfs_page.h | 1 include/linux/nfs_xdr.h | 84 ++ include/linux/sunrpc/auth.h | 7 include/linux/sunrpc/clnt.h | 23 include/linux/sunrpc/gss_api.h | 9 include/linux/sunrpc/gss_krb5.h | 22 include/linux/sunrpc/rpc_pipe_fs.h | 5 include/linux/sunrpc/sched.h | 6 include/linux/sunrpc/xdr.h | 4 include/linux/sunrpc/xprt.h | 8 net/sunrpc/auth.c | 42 + net/sunrpc/auth_gss/auth_gss.c | 389 ++++++++---- net/sunrpc/auth_gss/gss_krb5_crypto.c | 89 +- net/sunrpc/auth_gss/gss_krb5_mech.c | 30 net/sunrpc/auth_gss/gss_krb5_seal.c | 58 - net/sunrpc/auth_gss/gss_krb5_unseal.c | 156 ---- net/sunrpc/auth_gss/gss_mech_switch.c | 5 net/sunrpc/auth_gss/gss_pseudoflavors.c | 1 net/sunrpc/clnt.c | 72 +- net/sunrpc/pmap_clnt.c | 17 net/sunrpc/rpc_pipe.c | 86 ++ net/sunrpc/sched.c | 5 net/sunrpc/sunrpc_syms.c | 6 net/sunrpc/xdr.c | 157 ++++ net/sunrpc/xprt.c | 120 ++- 46 files changed, 4308 insertions(+), 1437 deletions(-) diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/exec.c linux-2.6.1-37-attr/fs/exec.c --- linux-2.6.1-rc3/fs/exec.c 2004-01-08 17:37:52.000000000 -0500 +++ linux-2.6.1-37-attr/fs/exec.c 2004-01-08 17:55:28.000000000 -0500 @@ -121,7 +121,7 @@ asmlinkage long sys_uselib(const char __ struct nameidata nd; int error; - nd.intent.open.flags = O_RDONLY; + nd.intent.open.flags = FMODE_READ; error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); if (error) goto out; @@ -471,8 +471,12 @@ static inline void free_arg_pages(struct struct file *open_exec(const char *name) { struct nameidata nd; - int err = path_lookup(name, LOOKUP_FOLLOW, &nd); - struct file *file = ERR_PTR(err); + int err; + struct file *file; + + nd.intent.open.flags = FMODE_READ; + err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + file = ERR_PTR(err); if (!err) { struct inode *inode = nd.dentry->d_inode; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/inode.c linux-2.6.1-37-attr/fs/inode.c --- linux-2.6.1-rc3/fs/inode.c 2004-01-08 17:46:19.000000000 -0500 +++ linux-2.6.1-37-attr/fs/inode.c 2004-01-08 18:08:14.000000000 -0500 @@ -1178,6 +1178,8 @@ void inode_update_time(struct inode *ino struct timespec now; int sync_it = 0; + if (IS_NOCMTIME(inode)) + return; if (IS_RDONLY(inode)) return; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/dir.c linux-2.6.1-37-attr/fs/nfs/dir.c --- linux-2.6.1-rc3/fs/nfs/dir.c 2004-01-08 17:39:15.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/dir.c 2004-01-08 18:08:14.000000000 -0500 @@ -72,6 +72,26 @@ struct inode_operations nfs_dir_inode_op .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V4 + +static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); +struct inode_operations nfs4_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_atomic_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +#endif /* CONFIG_NFS_V4 */ + /* * Open file */ @@ -119,11 +139,13 @@ int nfs_readdir_filler(nfs_readdir_descr struct file *file = desc->file; struct inode *inode = file->f_dentry->d_inode; struct rpc_cred *cred = nfs_file_cred(file); + unsigned long timestamp; int error; dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); again: + timestamp = jiffies; error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { @@ -137,18 +159,21 @@ int nfs_readdir_filler(nfs_readdir_descr goto error; } SetPageUptodate(page); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * throught inode->i_sem or some other mechanism. */ - if (page->index == 0) + if (page->index == 0) { invalidate_inode_pages(inode->i_mapping); + NFS_I(inode)->readdir_timestamp = timestamp; + } unlock_page(page); return 0; error: SetPageError(page); unlock_page(page); - invalidate_inode_pages(inode->i_mapping); + nfs_zap_caches(inode); desc->error = error; return -EIO; } @@ -361,6 +386,7 @@ int uncached_readdir(nfs_readdir_descrip page, NFS_SERVER(inode)->dtsize, desc->plus); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -439,7 +465,15 @@ static int nfs_readdir(struct file *filp } res = 0; break; - } else if (res < 0) + } + if (res == -ETOOSMALL && desc->plus) { + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + nfs_zap_caches(inode); + desc->plus = 0; + desc->entry->eof = 0; + continue; + } + if (res < 0) break; res = nfs_do_filldir(desc, dirent, filldir); @@ -461,14 +495,19 @@ static int nfs_readdir(struct file *filp * In the case it has, we assume that the dentries are untrustworthy * and may need to be looked up again. */ -static inline -int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { if (IS_ROOT(dentry)) return 1; - if (nfs_revalidate_inode(NFS_SERVER(dir), dir)) + if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0 + || nfs_attribute_timeout(dir)) return 0; - return time_after(dentry->d_time, NFS_MTIME_UPDATE(dir)); + return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); +} + +static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) +{ + dentry->d_fsdata = (void *)verf; } /* @@ -508,9 +547,7 @@ int nfs_neg_need_reval(struct inode *dir /* Don't revalidate a negative dentry if we're creating a new file */ if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE)) return 0; - if (!nfs_check_verifier(dir, dentry)) - return 1; - return time_after(jiffies, dentry->d_time + NFS_ATTRTIMEO(dir)); + return !nfs_check_verifier(dir, dentry); } /* @@ -532,6 +569,7 @@ static int nfs_lookup_revalidate(struct int error; struct nfs_fh fhandle; struct nfs_fattr fattr; + unsigned long verifier; int isopen = 0; parent = dget_parent(dentry); @@ -554,6 +592,9 @@ static int nfs_lookup_revalidate(struct goto out_bad; } + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { if (nfs_lookup_verify_inode(inode, isopen)) @@ -561,6 +602,12 @@ static int nfs_lookup_revalidate(struct goto out_valid; } + /* + * Note: we're not holding inode->i_sem and so may be racing with + * operations that change the directory. We therefore save the + * change attribute *before* we do the RPC call. + */ + verifier = nfs_save_change_attribute(dir); error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); if (!error) { if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) @@ -583,6 +630,7 @@ static int nfs_lookup_revalidate(struct out_valid_renew: nfs_renew_times(dentry); + nfs_set_verifier(dentry, verifier); out_valid: unlock_kernel(); dput(parent); @@ -670,9 +718,11 @@ static struct dentry *nfs_lookup(struct goto out; error = -ENOMEM; - dentry->d_op = &nfs_dentry_operations; + dentry->d_op = NFS_PROTO(dir)->dentry_ops; lock_kernel(); + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); /* If we're doing an exclusive create, optimize away the lookup */ if (nfs_is_exclusive_create(dir, nd)) @@ -695,6 +745,7 @@ no_entry: error = 0; d_add(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unlock: unlock_kernel(); out: @@ -702,6 +753,139 @@ out: return ERR_PTR(error); } +#ifdef CONFIG_NFS_V4 +static int nfs_open_revalidate(struct dentry *, struct nameidata *); + +struct dentry_operations nfs4_dentry_operations = { + .d_revalidate = nfs_open_revalidate, + .d_delete = nfs_dentry_delete, + .d_iput = nfs_dentry_iput, +}; + +static int is_atomic_open(struct inode *dir, struct nameidata *nd) +{ + if (!nd) + return 0; + /* Check that we are indeed trying to open this file */ + if ((nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_OPEN)) + return 0; + /* NFS does not (yet) have a stateful open for directories */ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ + if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; +} + +static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = NULL; + int error = 0; + + /* Check that we are indeed trying to open this file */ + if (!is_atomic_open(dir, nd)) + goto no_open; + + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { + error = -ENAMETOOLONG; + goto out; + } + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ + if (nd->intent.open.flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ + lock_kernel(); + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + + if (nd->intent.open.flags & O_CREAT) { + nfs_begin_data_update(dir); + inode = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); + } else + inode = nfs4_atomic_open(dir, dentry, nd); + unlock_kernel(); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + switch (error) { + /* Make a negative dentry */ + case -ENOENT: + inode = NULL; + break; + /* This turned out not to be a regular file */ + case -ELOOP: + if (!(nd->intent.open.flags & O_NOFOLLOW)) + goto no_open; + /* case -EISDIR: */ + /* case -EINVAL: */ + default: + goto out; + } + } +no_entry: + d_add(dentry, inode); + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); +out: + BUG_ON(error > 0); + return ERR_PTR(error); +no_open: + return nfs_lookup(dir, dentry, nd); +} + +static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *parent = NULL; + struct inode *inode = dentry->d_inode; + struct inode *dir; + unsigned long verifier; + int openflags, ret = 0; + + /* NFS only supports OPEN for regular files */ + if (inode && !S_ISREG(inode->i_mode)) + goto no_open; + parent = dget_parent(dentry); + dir = parent->d_inode; + if (!is_atomic_open(dir, nd)) + goto no_open; + openflags = nd->intent.open.flags; + if (openflags & O_CREAT) { + /* If this is a negative dentry, just drop it */ + if (!inode) + goto out; + /* If this is exclusive open, just revalidate */ + if (openflags & O_EXCL) + goto no_open; + } + /* We can't create new files, or truncate existing ones here */ + openflags &= ~(O_CREAT|O_TRUNC); + + /* + * Note: we're not holding inode->i_sem and so may be racing with + * operations that change the directory. We therefore save the + * change attribute *before* we do the RPC call. + */ + lock_kernel(); + verifier = nfs_save_change_attribute(dir); + ret = nfs4_open_revalidate(dir, dentry, openflags); + if (!ret) + nfs_set_verifier(dentry, verifier); + unlock_kernel(); +out: + dput(parent); + if (!ret) + d_drop(dentry); + return ret; +no_open: + dput(parent); + return nfs_lookup_revalidate(dentry, nd); +} +#endif /* CONFIG_NFSV4 */ + static inline int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry) { @@ -736,15 +920,20 @@ int nfs_cached_lookup(struct inode *dir, struct nfs_server *server; struct nfs_entry entry; struct page *page; - unsigned long timestamp = NFS_MTIME_UPDATE(dir); + unsigned long timestamp; int res; if (!NFS_USE_READDIRPLUS(dir)) return -ENOENT; server = NFS_SERVER(dir); - if (server->flags & NFS_MOUNT_NOAC) + /* Don't use readdirplus unless the cache is stable */ + if ((server->flags & NFS_MOUNT_NOAC) != 0 + || nfs_caches_unstable(dir) + || nfs_attribute_timeout(dir)) + return -ENOENT; + if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0) return -ENOENT; - nfs_revalidate_inode(server, dir); + timestamp = NFS_I(dir)->readdir_timestamp; entry.fh = fh; entry.fattr = fattr; @@ -798,6 +987,7 @@ static int nfs_instantiate(struct dentry if (inode) { d_instantiate(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dentry->d_parent->d_inode)); error = 0; } return error; @@ -836,11 +1026,13 @@ static int nfs_create(struct inode *dir, * does not pass the create flags. */ lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags); + nfs_end_data_update(dir); if (!IS_ERR(inode)) { d_instantiate(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); error = 0; } else { error = PTR_ERR(inode); @@ -871,9 +1063,10 @@ nfs_mknod(struct inode *dir, struct dent attr.ia_valid = ATTR_MODE; lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->mknod(dir, &dentry->d_name, &attr, rdev, &fhandle, &fattr); + nfs_end_data_update(dir); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); else @@ -908,9 +1101,10 @@ static int nfs_mkdir(struct inode *dir, */ d_drop(dentry); #endif - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->mkdir(dir, &dentry->d_name, &attr, &fhandle, &fattr); + nfs_end_data_update(dir); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); else @@ -927,10 +1121,12 @@ static int nfs_rmdir(struct inode *dir, dir->i_ino, dentry->d_name.name); lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); - if (!error) + /* Ensure the VFS deletes this inode */ + if (error == 0 && dentry->d_inode != NULL) dentry->d_inode->i_nlink = 0; + nfs_end_data_update(dir); unlock_kernel(); return error; @@ -986,12 +1182,21 @@ dentry->d_parent->d_name.name, dentry->d goto out; } while(sdentry->d_inode != NULL); /* need negative lookup */ - nfs_zap_caches(dir); qsilly.name = silly; qsilly.len = strlen(silly); - error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); + nfs_begin_data_update(dir); + if (dentry->d_inode) { + nfs_begin_data_update(dentry->d_inode); + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + nfs_end_data_update(dentry->d_inode); + } else + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + nfs_end_data_update(dir); if (!error) { nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); d_move(dentry, sdentry); error = nfs_async_unlink(dentry); /* If we return 0 we don't unlink */ @@ -1023,14 +1228,17 @@ static int nfs_safe_remove(struct dentry goto out; } - nfs_zap_caches(dir); - if (inode) - NFS_CACHEINV(inode); - error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); - if (error < 0) - goto out; - if (inode) - inode->i_nlink--; + nfs_begin_data_update(dir); + if (inode != NULL) { + nfs_begin_data_update(inode); + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + /* The VFS may want to delete this inode */ + if (error == 0) + inode->i_nlink--; + nfs_end_data_update(inode); + } else + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + nfs_end_data_update(dir); out: return error; } @@ -1065,9 +1273,10 @@ static int nfs_unlink(struct inode *dir, spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); - if (!error) + if (!error) { nfs_renew_times(dentry); - else if (need_rehash) + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + } else if (need_rehash) d_rehash(dentry); unlock_kernel(); return error; @@ -1114,9 +1323,10 @@ dentry->d_parent->d_name.name, dentry->d qsymname.len = strlen(symname); lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname, &attr, &sym_fh, &sym_attr); + nfs_end_data_update(dir); if (!error) { error = nfs_instantiate(dentry, &sym_fh, &sym_attr); } else { @@ -1148,9 +1358,12 @@ nfs_link(struct dentry *old_dentry, stru */ lock_kernel(); d_drop(dentry); - nfs_zap_caches(dir); - NFS_CACHEINV(inode); + + nfs_begin_data_update(dir); + nfs_begin_data_update(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); + nfs_end_data_update(inode); + nfs_end_data_update(dir); unlock_kernel(); return error; } @@ -1255,16 +1468,23 @@ go_ahead: if (new_inode) d_delete(new_dentry); - nfs_zap_caches(new_dir); - nfs_zap_caches(old_dir); + nfs_begin_data_update(old_dir); + nfs_begin_data_update(new_dir); + nfs_begin_data_update(old_inode); error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); + nfs_end_data_update(old_inode); + nfs_end_data_update(new_dir); + nfs_end_data_update(old_dir); out: if (rehash) d_rehash(rehash); - if (!error && !S_ISDIR(old_inode->i_mode)) - d_move(old_dentry, new_dentry); - nfs_renew_times(new_dentry); + if (!error) { + if (!S_ISDIR(old_inode->i_mode)) + d_move(old_dentry, new_dentry); + nfs_renew_times(new_dentry); + nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); + } /* new dentry created? */ if (dentry) @@ -1281,13 +1501,8 @@ nfs_permission(struct inode *inode, int int mode = inode->i_mode; int res; - /* Are we checking permissions on anything other than lookup? */ - if (!(mask & MAY_EXEC)) { - /* We only need to check permissions on file open() and access() */ - if (!nd || !(nd->flags & (LOOKUP_OPEN|LOOKUP_ACCESS))) - return 0; - } - + if (mask == 0) + return 0; if (mask & MAY_WRITE) { /* * @@ -1306,6 +1521,15 @@ nfs_permission(struct inode *inode, int if (IS_IMMUTABLE(inode)) return -EACCES; } + /* Are we checking permissions on anything other than lookup/execute? */ + if ((mask & MAY_EXEC) == 0) { + /* We only need to check permissions on file open() and access() */ + if (!nd || !(nd->flags & (LOOKUP_OPEN|LOOKUP_ACCESS))) + return 0; + /* NFSv4 has atomic_open... */ + if (NFS_PROTO(inode)->version > 3 && (nd->flags & LOOKUP_OPEN)) + return 0; + } lock_kernel(); @@ -1314,7 +1538,8 @@ nfs_permission(struct inode *inode, int cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); if (cache->cred == cred - && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) { + && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) + && !(NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) { if (!(res = cache->err)) { /* Is the mask a subset of an accepted mask? */ if ((cache->mask & mask) == mask) diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/direct.c linux-2.6.1-37-attr/fs/nfs/direct.c --- linux-2.6.1-rc3/fs/nfs/direct.c 2004-01-08 17:44:30.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/direct.c 2004-01-08 18:08:14.000000000 -0500 @@ -269,6 +269,7 @@ nfs_direct_write_seg(struct inode *inode if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) wdata.args.stable = NFS_FILE_SYNC; + nfs_begin_data_update(inode); retry: need_commit = 0; tot_bytes = 0; @@ -334,6 +335,8 @@ retry: VERF_SIZE) != 0) goto sync_retry; } + nfs_end_data_update(inode); + NFS_FLAGS(inode) |= NFS_INO_INVALID_DATA; return tot_bytes; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/file.c linux-2.6.1-37-attr/fs/nfs/file.c --- linux-2.6.1-rc3/fs/nfs/file.c 2004-01-08 17:41:32.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/file.c 2004-01-08 18:08:14.000000000 -0500 @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -105,11 +104,16 @@ nfs_file_flush(struct file *file) dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + if ((file->f_mode & FMODE_WRITE) == 0) + return 0; lock_kernel(); - status = nfs_wb_file(inode, file); + /* Ensure that data+attribute caches are up to date after close() */ + status = nfs_wb_all(inode); if (!status) { status = file->f_error; file->f_error = 0; + if (!status) + __nfs_revalidate_inode(NFS_SERVER(inode), inode); } unlock_kernel(); return status; @@ -278,21 +282,17 @@ nfs_lock(struct file *filp, int cmd, str if (!inode) return -EINVAL; - /* This will be in a forthcoming patch. */ - if (NFS_PROTO(inode)->version == 4) { - printk(KERN_INFO "NFS: file locking over NFSv4 is not yet supported\n"); - return -EIO; - } - /* No mandatory locks over NFS */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) return -ENOLCK; - /* Fake OK code if mounted without NLM support */ - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { - if (IS_GETLK(cmd)) - status = LOCK_USE_CLNT; - goto out_ok; + if (NFS_PROTO(inode)->version != 4) { + /* Fake OK code if mounted without NLM support */ + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { + if (IS_GETLK(cmd)) + status = LOCK_USE_CLNT; + goto out_ok; + } } /* @@ -302,7 +302,7 @@ nfs_lock(struct file *filp, int cmd, str * Not sure whether that would be unique, though, or whether * that would break in other places. */ - if (!fl->fl_owner || (fl->fl_flags & FL_POSIX) != FL_POSIX) + if (!fl->fl_owner || !(fl->fl_flags & FL_POSIX)) return -ENOLCK; /* @@ -322,7 +322,7 @@ nfs_lock(struct file *filp, int cmd, str return status; lock_kernel(); - status = nlmclnt_proc(inode, cmd, fl); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); unlock_kernel(); if (status < 0) return status; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/idmap.c linux-2.6.1-37-attr/fs/nfs/idmap.c --- linux-2.6.1-rc3/fs/nfs/idmap.c 2004-01-08 17:36:07.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/idmap.c 2004-01-08 18:07:25.000000000 -0500 @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -51,14 +52,16 @@ #include #define IDMAP_HASH_SZ 128 -#define IDMAP_HASH_TYPE_NAME 0x01 -#define IDMAP_HASH_TYPE_ID 0x02 -#define IDMAP_HASH_TYPE_INSERT 0x04 struct idmap_hashent { - uid_t ih_id; - char ih_name[IDMAP_NAMESZ]; - u_int32_t ih_namelen; + __u32 ih_id; + int ih_namelen; + char ih_name[IDMAP_NAMESZ]; +}; + +struct idmap_hashtable { + __u8 h_type; + struct idmap_hashent h_entries[IDMAP_HASH_SZ]; }; struct idmap { @@ -66,12 +69,10 @@ struct idmap { struct dentry *idmap_dentry; wait_queue_head_t idmap_wq; struct idmap_msg idmap_im; - struct nfs_server *idmap_server; - struct semaphore idmap_lock; - struct semaphore idmap_im_lock; - struct semaphore idmap_hash_lock; - struct idmap_hashent idmap_id_hash[IDMAP_HASH_SZ]; - struct idmap_hashent idmap_name_hash[IDMAP_HASH_SZ]; + struct semaphore idmap_lock; /* Serializes upcalls */ + struct semaphore idmap_im_lock; /* Protects the hashtable */ + struct idmap_hashtable idmap_user_hash; + struct idmap_hashtable idmap_group_hash; }; static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, char *, @@ -79,10 +80,7 @@ static ssize_t idmap_pipe_upcall(struc static ssize_t idmap_pipe_downcall(struct file *, const char *, size_t); void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); -static int validate_ascii(char *, u_int32_t); - -static u_int32_t fnvhash32(void *, u_int32_t); -static int idmap_cache_lookup(struct idmap *, int, char *, u_int32_t *, uid_t *); +static unsigned int fnvhash32(const void *, size_t); static struct rpc_pipe_ops idmap_upcall_ops = { .upcall = idmap_pipe_upcall, @@ -90,95 +88,153 @@ static struct rpc_pipe_ops idmap_upcall_ .destroy_msg = idmap_pipe_destroy_msg, }; -void * -nfs_idmap_new(struct nfs_server *server) +void +nfs_idmap_new(struct nfs4_client *clp) { struct idmap *idmap; + if (clp->cl_idmap != NULL) + return; if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) - return (NULL); + return; memset(idmap, 0, sizeof(*idmap)); - idmap->idmap_server = server; - snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), - "%s/idmap", idmap->idmap_server->client->cl_pathname); + "%s/idmap", clp->cl_rpcclient->cl_pathname); idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, - idmap->idmap_server, &idmap_upcall_ops, 0); - if (IS_ERR(idmap->idmap_dentry)) - goto err_free; + idmap, &idmap_upcall_ops, 0); + if (IS_ERR(idmap->idmap_dentry)) { + kfree(idmap); + return; + } init_MUTEX(&idmap->idmap_lock); init_MUTEX(&idmap->idmap_im_lock); - init_MUTEX(&idmap->idmap_hash_lock); init_waitqueue_head(&idmap->idmap_wq); + idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; + idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; - return (idmap); - - err_free: - kfree(idmap); - return (NULL); + clp->cl_idmap = idmap; } void -nfs_idmap_delete(struct nfs_server *server) +nfs_idmap_delete(struct nfs4_client *clp) { - struct idmap *idmap = server->idmap; + struct idmap *idmap = clp->cl_idmap; if (!idmap) return; rpc_unlink(idmap->idmap_path); - server->idmap = NULL; + clp->cl_idmap = NULL; kfree(idmap); } /* + * Helper routines for manipulating the hashtable + */ +static inline struct idmap_hashent * +idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len) +{ + return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ]; +} + +static struct idmap_hashent * +idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) +{ + struct idmap_hashent *he = idmap_name_hash(h, name, len); + + if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) + return NULL; + return he; +} + +static inline struct idmap_hashent * +idmap_id_hash(struct idmap_hashtable* h, __u32 id) +{ + return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; +} + +static struct idmap_hashent * +idmap_lookup_id(struct idmap_hashtable *h, __u32 id) +{ + struct idmap_hashent *he = idmap_id_hash(h, id); + if (he->ih_id != id || he->ih_namelen == 0) + return NULL; + return he; +} + +/* + * Routines for allocating new entries in the hashtable. + * For now, we just have 1 entry per bucket, so it's all + * pretty trivial. + */ +static inline struct idmap_hashent * +idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len) +{ + return idmap_name_hash(h, name, len); +} + +static inline struct idmap_hashent * +idmap_alloc_id(struct idmap_hashtable *h, __u32 id) +{ + return idmap_id_hash(h, id); +} + +static void +idmap_update_entry(struct idmap_hashent *he, const char *name, + size_t namelen, __u32 id) +{ + he->ih_id = id; + memcpy(he->ih_name, name, namelen); + he->ih_name[namelen] = '\0'; + he->ih_namelen = namelen; +} + +/* * Name -> ID */ -int -nfs_idmap_id(struct nfs_server *server, u_int8_t type, char *name, - u_int namelen, uid_t *id) +static int +nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, + const char *name, size_t namelen, __u32 *id) { struct rpc_pipe_msg msg; - struct idmap *idmap = server->idmap; struct idmap_msg *im; + struct idmap_hashent *he; DECLARE_WAITQUEUE(wq, current); - int ret = -1, hashtype = IDMAP_HASH_TYPE_NAME, xnamelen = namelen; - - if (idmap == NULL) - return (-1); + int ret = -EIO; im = &idmap->idmap_im; - if (namelen > IDMAP_NAMESZ || namelen == 0) - return (-1); + /* + * String sanity checks + * Note that the userland daemon expects NUL terminated strings + */ + for (;;) { + if (namelen == 0) + return -EINVAL; + if (name[namelen-1] != '\0') + break; + namelen--; + } + if (namelen >= IDMAP_NAMESZ) + return -EINVAL; down(&idmap->idmap_lock); down(&idmap->idmap_im_lock); - if (name[xnamelen - 1] == '\0') - xnamelen--; - - if (idmap_cache_lookup(idmap, hashtype, name, &xnamelen, id) == 0) { + he = idmap_lookup_name(h, name, namelen); + if (he != NULL) { + *id = he->ih_id; ret = 0; goto out; } memset(im, 0, sizeof(*im)); memcpy(im->im_name, name, namelen); - /* Make sure the string is NULL terminated */ - if (namelen != xnamelen) { - /* We cannot fit a NULL character */ - if (namelen == IDMAP_NAMESZ) { - ret = -1; - goto out; - } - im->im_name[namelen] = '\0'; - } - im->im_type = type; + im->im_type = h->h_type; im->im_conv = IDMAP_CONV_NAMETOID; memset(&msg, 0, sizeof(msg)); @@ -198,16 +254,9 @@ nfs_idmap_id(struct nfs_server *server, remove_wait_queue(&idmap->idmap_wq, &wq); down(&idmap->idmap_im_lock); - /* - * XXX Race condition here, with testing for status. Go ahead - * and and do the cace lookup anyway. - */ if (im->im_status & IDMAP_STATUS_SUCCESS) { - ret = 0; *id = im->im_id; - - hashtype |= IDMAP_HASH_TYPE_INSERT; - ret = idmap_cache_lookup(idmap, hashtype, name, &xnamelen, id); + ret = 0; } out: @@ -220,35 +269,31 @@ nfs_idmap_id(struct nfs_server *server, /* * ID -> Name */ -int -nfs_idmap_name(struct nfs_server *server, u_int8_t type, uid_t id, - char *name, u_int *namelen) +static int +nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, + __u32 id, char *name) { struct rpc_pipe_msg msg; - struct idmap *idmap = server->idmap; struct idmap_msg *im; + struct idmap_hashent *he; DECLARE_WAITQUEUE(wq, current); - int ret = -1, hashtype = IDMAP_HASH_TYPE_ID; - u_int len; - - if (idmap == NULL) - return (-1); + int ret = -EIO; + unsigned int len; im = &idmap->idmap_im; - if (*namelen < IDMAP_NAMESZ || *namelen == 0) - return (-1); - down(&idmap->idmap_lock); down(&idmap->idmap_im_lock); - if (idmap_cache_lookup(idmap, hashtype, name, namelen, &id) == 0) { - ret = 0; + he = idmap_lookup_id(h, id); + if (he != 0) { + memcpy(name, he->ih_name, he->ih_namelen); + ret = he->ih_namelen; goto out; } memset(im, 0, sizeof(*im)); - im->im_type = type; + im->im_type = h->h_type; im->im_conv = IDMAP_CONV_IDTONAME; im->im_id = id; @@ -263,9 +308,6 @@ nfs_idmap_name(struct nfs_server *server goto out; } - /* - * XXX add timeouts here - */ set_current_state(TASK_UNINTERRUPTIBLE); up(&idmap->idmap_im_lock); schedule(); @@ -274,23 +316,20 @@ nfs_idmap_name(struct nfs_server *server down(&idmap->idmap_im_lock); if (im->im_status & IDMAP_STATUS_SUCCESS) { - if ((len = validate_ascii(im->im_name, IDMAP_NAMESZ)) == -1) + if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) goto out; - ret = 0; memcpy(name, im->im_name, len); - *namelen = len; - - hashtype |= IDMAP_HASH_TYPE_INSERT; - ret = idmap_cache_lookup(idmap, hashtype, name, namelen, &id); + ret = len; } out: memset(im, 0, sizeof(*im)); up(&idmap->idmap_im_lock); up(&idmap->idmap_lock); - return (ret); + return ret; } +/* RPC pipefs upcall/downcall routines */ static ssize_t idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, char *dst, size_t buflen) @@ -317,10 +356,12 @@ static ssize_t idmap_pipe_downcall(struct file *filp, const char *src, size_t mlen) { struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); - struct nfs_server *server = rpci->private; - struct idmap *idmap = server->idmap; + struct idmap *idmap = (struct idmap *)rpci->private; struct idmap_msg im_in, *im = &idmap->idmap_im; - int match = 0, hashtype, badmsg = 0, namelen_in, namelen; + struct idmap_hashtable *h; + struct idmap_hashent *he = NULL; + int namelen_in; + int ret; if (mlen != sizeof(im_in)) return (-ENOSPC); @@ -330,39 +371,66 @@ idmap_pipe_downcall(struct file *filp, c down(&idmap->idmap_im_lock); - namelen_in = validate_ascii(im_in.im_name, IDMAP_NAMESZ); - namelen = validate_ascii(im->im_name, IDMAP_NAMESZ); + ret = mlen; + im->im_status = im_in.im_status; + /* If we got an error, terminate now, and wake up pending upcalls */ + if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) { + wake_up(&idmap->idmap_wq); + goto out; + } + + /* Sanity checking of strings */ + ret = -EINVAL; + namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ); + if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) + goto out; - badmsg = !(im_in.im_status & IDMAP_STATUS_SUCCESS) || namelen_in <= 0; + switch (im_in.im_type) { + case IDMAP_TYPE_USER: + h = &idmap->idmap_user_hash; + break; + case IDMAP_TYPE_GROUP: + h = &idmap->idmap_group_hash; + break; + default: + goto out; + } switch (im_in.im_conv) { case IDMAP_CONV_IDTONAME: - match = im->im_id == im_in.im_id; + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_IDTONAME + && im->im_type == im_in.im_type + && im->im_id == im_in.im_id) { + /* Yes: copy string, including the terminating '\0' */ + memcpy(im->im_name, im_in.im_name, namelen_in); + im->im_name[namelen_in] = '\0'; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_id(h, im_in.im_id); break; case IDMAP_CONV_NAMETOID: - match = namelen == namelen_in && - memcmp(im->im_name, im_in.im_name, namelen) == 0; + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_NAMETOID + && im->im_type == im_in.im_type + && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in + && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) { + im->im_id = im_in.im_id; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_name(h, im_in.im_name, namelen_in); break; default: - badmsg = 1; - break; - } - - match = match && im->im_type == im_in.im_type; - - if (match) { - memcpy(im, &im_in, sizeof(*im)); - wake_up(&idmap->idmap_wq); - } else if (!badmsg) { - hashtype = im_in.im_conv == IDMAP_CONV_IDTONAME ? - IDMAP_HASH_TYPE_ID : IDMAP_HASH_TYPE_NAME; - hashtype |= IDMAP_HASH_TYPE_INSERT; - idmap_cache_lookup(idmap, hashtype, im_in.im_name, &namelen_in, - &im_in.im_id); + goto out; } + /* If the entry is valid, also copy it to the cache */ + if (he != NULL) + idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); + ret = mlen; +out: up(&idmap->idmap_im_lock); - return (mlen); + return ret; } void @@ -379,108 +447,51 @@ idmap_pipe_destroy_msg(struct rpc_pipe_m up(&idmap->idmap_im_lock); } -static int -validate_ascii(char *string, u_int32_t len) -{ - int i; - - for (i = 0; i < len; i++) { - if (string[i] == '\0') - break; - - if (string[i] & 0x80) - return (-1); - } - - if (string[i] != '\0') - return (-1); - - return (i); -} - /* * Fowler/Noll/Vo hash * http://www.isthe.com/chongo/tech/comp/fnv/ */ -#define FNV_P_32 ((u_int32_t)0x01000193) /* 16777619 */ -#define FNV_1_32 ((u_int32_t)0x811c9dc5) /* 2166136261 */ +#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */ +#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */ -static u_int32_t -fnvhash32(void *buf, u_int32_t buflen) +static unsigned int fnvhash32(const void *buf, size_t buflen) { - u_char *p, *end = (u_char *)buf + buflen; - u_int32_t hash = FNV_1_32; + const unsigned char *p, *end = (const unsigned char *)buf + buflen; + unsigned int hash = FNV_1_32; for (p = buf; p < end; p++) { hash *= FNV_P_32; - hash ^= (u_int32_t)*p; + hash ^= (unsigned int)*p; } return (hash); } -/* - * ->ih_namelen == 0 indicates negative entry - */ -static int -idmap_cache_lookup(struct idmap *idmap, int type, char *name, u_int32_t *namelen, - uid_t *id) +int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) { - u_int32_t hash; - struct idmap_hashent *he = NULL; - int insert = type & IDMAP_HASH_TYPE_INSERT; - int ret = -1; + struct idmap *idmap = clp->cl_idmap; - /* - * XXX technically, this is not needed, since we will always - * hold idmap_im_lock when altering the hash tables. but - * semantically that just hurts. - * - * XXX cache negative responses - */ - down(&idmap->idmap_hash_lock); + return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); +} - if (*namelen > IDMAP_NAMESZ || *namelen == 0) - goto out; +int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) +{ + struct idmap *idmap = clp->cl_idmap; - if (type & IDMAP_HASH_TYPE_NAME) { - hash = fnvhash32(name, *namelen) % IDMAP_HASH_SZ; - he = &idmap->idmap_name_hash[hash]; - - /* - * Testing he->ih_namelen == *namelen implicitly tests - * namelen != 0, and thus a non-negative entry. - */ - if (!insert && he->ih_namelen == *namelen && - memcmp(he->ih_name, name, *namelen) == 0) { - *id = he->ih_id; - ret = 0; - goto out; - } - } + return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); +} - if (type & IDMAP_HASH_TYPE_ID) { - hash = fnvhash32(id, sizeof(*id)) % IDMAP_HASH_SZ; - he = &idmap->idmap_id_hash[hash]; - - if (!insert && *id == he->ih_id && he->ih_namelen != 0 && - *namelen >= he->ih_namelen) { - memcpy(name, he->ih_name, he->ih_namelen); - *namelen = he->ih_namelen; - ret = 0; - goto out; - } - } +int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf) +{ + struct idmap *idmap = clp->cl_idmap; - if (insert && he != NULL) { - he->ih_id = *id; - memcpy(he->ih_name, name, *namelen); - he->ih_namelen = *namelen; - ret = 0; - } + return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); +} +int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf) +{ + struct idmap *idmap = clp->cl_idmap; - out: - up(&idmap->idmap_hash_lock); - return (ret); + return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); } + diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/inode.c linux-2.6.1-37-attr/fs/nfs/inode.c --- linux-2.6.1-rc3/fs/nfs/inode.c 2004-01-08 17:43:12.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/inode.c 2004-01-08 18:08:14.000000000 -0500 @@ -53,8 +53,8 @@ */ #define NFS_MAX_READAHEAD RPC_MAXREQS -void nfs_zap_caches(struct inode *); static void nfs_invalidate_inode(struct inode *); +static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long); static struct inode *nfs_alloc_inode(struct super_block *sb); static void nfs_destroy_inode(struct inode *); @@ -151,6 +151,7 @@ nfs_clear_inode(struct inode *inode) cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); + BUG_ON(atomic_read(&nfsi->data_updates) != 0); } void @@ -158,10 +159,7 @@ nfs_put_super(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); -#ifdef CONFIG_NFS_V4 - if (server->idmap != NULL) - nfs_idmap_delete(server); -#endif /* CONFIG_NFS_V4 */ + nfs4_renewd_prepare_shutdown(server); if (server->client != NULL) rpc_shutdown_client(server->client); @@ -301,7 +299,6 @@ nfs_sb_init(struct super_block *sb, rpc_ server = NFS_SB(sb); sb->s_magic = NFS_SUPER_MAGIC; - sb->s_op = &nfs_sops; /* Did getting the root inode fail? */ if (nfs_get_root(&root_inode, authflavor, sb, &server->fh) < 0) @@ -310,7 +307,7 @@ nfs_sb_init(struct super_block *sb, rpc_ if (!sb->s_root) goto out_no_root; - sb->s_root->d_op = &nfs_dentry_operations; + sb->s_root->d_op = server->rpc_ops->dentry_ops; /* Get some general file system info */ if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) { @@ -493,10 +490,17 @@ nfs_fill_super(struct super_block *sb, s server->client = nfs_create_client(server, data); if (server->client == NULL) goto out_fail; - data->pseudoflavor = RPC_AUTH_UNIX; /* RFC 2623, sec 2.3.2 */ - server->client_sys = nfs_create_client(server, data); - if (server->client_sys == NULL) - goto out_shutdown; + /* RFC 2623, sec 2.3.2 */ + if (authflavor != RPC_AUTH_UNIX) { + server->client_sys = rpc_clone_client(server->client); + if (server->client_sys == NULL) + goto out_shutdown; + if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) + goto out_shutdown; + } else { + atomic_inc(&server->client->cl_count); + server->client_sys = server->client; + } /* Fire up rpciod if not yet running */ if (rpciod_up() != 0) { @@ -504,6 +508,7 @@ nfs_fill_super(struct super_block *sb, s goto out_shutdown; } + sb->s_op = &nfs_sops; err = nfs_sb_init(sb, authflavor); if (err != 0) goto out_noinit; @@ -623,13 +628,17 @@ static int nfs_show_options(struct seq_f void nfs_zap_caches(struct inode *inode) { + struct nfs_inode *nfsi = NFS_I(inode); + int mode = inode->i_mode; + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; - invalidate_remote_inode(inode); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - NFS_CACHEINV(inode); + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + else + nfsi->flags |= NFS_INO_INVALID_ATTR; } /* @@ -669,9 +678,6 @@ nfs_find_actor(struct inode *inode, void return 0; if (is_bad_inode(inode)) return 0; - /* Force an attribute cache update if inode->i_count == 0 */ - if (!atomic_read(&inode->i_count)) - NFS_CACHEINV(inode); return 1; } @@ -725,7 +731,7 @@ nfs_fhget(struct super_block *sb, struct inode->i_ino = hash; /* We can't support update_atime(), since the server will reset it */ - inode->i_flags |= S_NOATIME; + inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_mode = fattr->mode; /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. @@ -736,7 +742,7 @@ nfs_fhget(struct super_block *sb, struct inode->i_data.a_ops = &nfs_file_aops; inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &nfs_dir_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) @@ -750,10 +756,6 @@ nfs_fhget(struct super_block *sb, struct inode->i_atime = fattr->atime; inode->i_mtime = fattr->mtime; inode->i_ctime = fattr->ctime; - nfsi->read_cache_ctime = fattr->ctime; - nfsi->read_cache_mtime = fattr->mtime; - nfsi->cache_mtime_jiffies = fattr->timestamp; - nfsi->read_cache_isize = fattr->size; if (fattr->valid & NFS_ATTR_FATTR_V4) nfsi->change_attr = fattr->change_attr; inode->i_size = nfs_size_to_loff_t(fattr->size); @@ -791,6 +793,8 @@ out_no_inode: goto out; } +#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET) + int nfs_setattr(struct dentry *dentry, struct iattr *attr) { @@ -798,58 +802,50 @@ nfs_setattr(struct dentry *dentry, struc struct nfs_fattr fattr; int error; - lock_kernel(); - - /* - * Make sure the inode is up-to-date. - */ - error = nfs_revalidate_inode(NFS_SERVER(inode),inode); - if (error) { -#ifdef NFS_PARANOIA -printk("nfs_setattr: revalidate failed, error=%d\n", error); -#endif - goto out; + if (attr->ia_valid & ATTR_SIZE) { + if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) + attr->ia_valid &= ~ATTR_SIZE; } - if (!S_ISREG(inode->i_mode)) { - attr->ia_valid &= ~ATTR_SIZE; - } else { - filemap_fdatawrite(inode->i_mapping); - error = nfs_wb_all(inode); - filemap_fdatawait(inode->i_mapping); - if (error) - goto out; - } + /* Optimization: if the end result is no change, don't RPC */ + attr->ia_valid &= NFS_VALID_ATTRS; + if (attr->ia_valid == 0) + return 0; + lock_kernel(); + nfs_begin_data_update(inode); + /* Write all dirty data if we're changing file permissions or size */ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { + if (filemap_fdatawrite(inode->i_mapping) == 0) + filemap_fdatawait(inode->i_mapping); + nfs_wb_all(inode); + } error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); - if (error) - goto out; - /* - * If we changed the size or mtime, update the inode - * now to avoid invalidating the page cache. - */ - if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size != fattr.size) - printk("nfs_setattr: attr=%Ld, fattr=%Ld??\n", - (long long) attr->ia_size, (long long)fattr.size); - vmtruncate(inode, attr->ia_size); + if (error == 0) { + nfs_refresh_inode(inode, &fattr); + if ((attr->ia_valid & ATTR_MODE) != 0) { + int mode; + mode = inode->i_mode & ~S_IALLUGO; + mode |= attr->ia_mode & S_IALLUGO; + inode->i_mode = mode; + } + if ((attr->ia_valid & ATTR_UID) != 0) + inode->i_uid = attr->ia_uid; + if ((attr->ia_valid & ATTR_GID) != 0) + inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_SIZE) != 0) { + i_size_write(inode, attr->ia_size); + vmtruncate(inode, attr->ia_size); + } } - - /* - * If we changed the size or mtime, update the inode - * now to avoid invalidating the page cache. - */ - if (!(fattr.valid & NFS_ATTR_WCC)) { - struct nfs_inode *nfsi = NFS_I(inode); - fattr.pre_size = nfsi->read_cache_isize; - fattr.pre_mtime = nfsi->read_cache_mtime; - fattr.pre_ctime = nfsi->read_cache_ctime; - fattr.valid |= NFS_ATTR_WCC; - } - /* Force an attribute cache update */ - NFS_CACHEINV(inode); - error = nfs_refresh_inode(inode, &fattr); -out: + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { + struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; + if (*cred) { + put_rpccred(*cred); + *cred = NULL; + } + } + nfs_end_data_update(inode); unlock_kernel(); return error; } @@ -877,7 +873,19 @@ nfs_wait_on_inode(struct inode *inode, i int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; - int err = nfs_revalidate_inode(NFS_SERVER(inode), inode); + struct nfs_inode *nfsi = NFS_I(inode); + int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME; + int err; + + if (__IS_FLG(inode, MS_NOATIME)) + need_atime = 0; + else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + need_atime = 0; + /* We may force a getattr if the user cares about atime */ + if (need_atime) + err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + else + err = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (!err) generic_fillattr(inode, stat); return err; @@ -912,8 +920,10 @@ int nfs_open(struct inode *inode, struct auth = NFS_CLIENT(inode)->cl_auth; cred = rpcauth_lookupcred(auth, 0); filp->private_data = cred; - if (filp->f_mode & FMODE_WRITE) + if ((filp->f_mode & FMODE_WRITE) != 0) { nfs_set_mmcred(inode, cred); + nfs_begin_data_update(inode); + } return 0; } @@ -922,6 +932,8 @@ int nfs_release(struct inode *inode, str struct rpc_cred *cred; lock_kernel(); + if ((filp->f_mode & FMODE_WRITE) != 0) + nfs_end_data_update(inode); cred = nfs_file_cred(filp); if (cred) put_rpccred(cred); @@ -938,6 +950,9 @@ __nfs_revalidate_inode(struct nfs_server { int status = -ESTALE; struct nfs_fattr fattr; + struct nfs_inode *nfsi = NFS_I(inode); + unsigned long verifier; + unsigned int flags; dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -947,23 +962,16 @@ __nfs_revalidate_inode(struct nfs_server goto out_nowait; if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode) goto out_nowait; - if (NFS_FAKE_ROOT(inode)) { - dfprintk(VFS, "NFS: not revalidating fake root\n"); - status = 0; - goto out_nowait; - } while (NFS_REVALIDATING(inode)) { status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); if (status < 0) goto out_nowait; - if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) { - status = NFS_STALE(inode) ? -ESTALE : 0; - goto out_nowait; - } } NFS_FLAGS(inode) |= NFS_INO_REVALIDATING; + /* Protect against RPC races by saving the change attribute */ + verifier = nfs_save_change_attribute(inode); status = NFS_PROTO(inode)->getattr(inode, &fattr); if (status) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", @@ -977,13 +985,34 @@ __nfs_revalidate_inode(struct nfs_server goto out; } - status = nfs_refresh_inode(inode, &fattr); + status = nfs_update_inode(inode, &fattr, verifier); if (status) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), status); goto out; } + flags = nfsi->flags; + /* + * We may need to keep the attributes marked as invalid if + * we raced with nfs_end_attr_update(). + */ + if (verifier == nfsi->cache_change_attribute) + nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + /* Do the page cache invalidation */ + if (flags & NFS_INO_INVALID_DATA) { + if (S_ISREG(inode->i_mode)) { + if (filemap_fdatawrite(inode->i_mapping) == 0) + filemap_fdatawait(inode->i_mapping); + nfs_wb_all(inode); + } + nfsi->flags &= ~NFS_INO_INVALID_DATA; + invalidate_inode_pages2(inode->i_mapping); + memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + } dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -991,41 +1020,104 @@ __nfs_revalidate_inode(struct nfs_server NFS_FLAGS(inode) &= ~NFS_INO_STALE; out: NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; - wake_up(&NFS_I(inode)->nfs_i_wait); + wake_up(&nfsi->nfs_i_wait); out_nowait: unlock_kernel(); return status; } -/* - * nfs_fattr_obsolete - Test if attribute data is newer than cached data - * @inode: inode - * @fattr: attributes to test +/** + * nfs_begin_data_update + * @inode - pointer to inode + * Declare that a set of operations will update file data on the server + */ +void nfs_begin_data_update(struct inode *inode) +{ + atomic_inc(&NFS_I(inode)->data_updates); +} + +/** + * nfs_end_data_update + * @inode - pointer to inode + * Declare end of the operations that will update file data + */ +void nfs_end_data_update(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (atomic_dec_and_test(&nfsi->data_updates)) { + nfsi->cache_change_attribute ++; + /* Mark the attribute cache for revalidation */ + nfsi->flags |= NFS_INO_INVALID_ATTR; + /* Directories and symlinks: invalidate page cache too */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + nfsi->flags |= NFS_INO_INVALID_DATA; + } +} + +/** + * nfs_refresh_inode - verify consistency of the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes * - * Avoid stuffing the attribute cache with obsolete information. - * We always accept updates if the attribute cache timed out, or if - * fattr->ctime is newer than our cached value. - * If fattr->ctime matches the cached value, we still accept the update - * if it increases the file size. + * Verifies the attribute cache. If we have just changed the attributes, + * so that fattr carries weak cache consistency data, then it may + * also update the ctime/mtime/change_attribute. */ -static inline -int nfs_fattr_obsolete(struct inode *inode, struct nfs_fattr *fattr) +int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); - long cdif; + loff_t cur_size, new_isize; + int data_unstable; + + /* Are we in the process of updating data on the server? */ + data_unstable = nfs_caches_unstable(inode); + + if (fattr->valid & NFS_ATTR_FATTR_V4) { + if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 + && nfsi->change_attr == fattr->pre_change_attr) + nfsi->change_attr = fattr->change_attr; + if (!data_unstable && nfsi->change_attr != fattr->change_attr) + nfsi->flags |= NFS_INO_INVALID_ATTR; + } - if (time_after(jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo)) - goto out_valid; - cdif = fattr->ctime.tv_sec - nfsi->read_cache_ctime.tv_sec; - if (cdif == 0) - cdif = fattr->ctime.tv_nsec - nfsi->read_cache_ctime.tv_nsec; - if (cdif > 0) - goto out_valid; - /* Ugh... */ - if (cdif == 0 && fattr->size > nfsi->read_cache_isize) - goto out_valid; - return -1; - out_valid: + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + + /* Has the inode gone and changed behind our back? */ + if (nfsi->fileid != fattr->fileid + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + return -EIO; + + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); + + /* If we have atomic WCC data, we may update some attributes */ + if ((fattr->valid & NFS_ATTR_WCC) != 0) { + if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + } + + /* Verify a few of the more important attributes */ + if (!data_unstable) { + if (!timespec_equal(&inode->i_mtime, &fattr->mtime) + || cur_size != new_isize) + nfsi->flags |= NFS_INO_INVALID_ATTR; + } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) + nfsi->flags |= NFS_INO_INVALID_ATTR; + + /* Have any file permissions changed? */ + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) + || inode->i_uid != fattr->uid + || inode->i_gid != fattr->gid) + nfsi->flags |= NFS_INO_INVALID_ATTR; + + if (!timespec_equal(&inode->i_atime, &fattr->atime)) + nfsi->flags |= NFS_INO_INVALID_ATIME; + + nfsi->read_cache_jiffies = fattr->timestamp; return 0; } @@ -1041,20 +1133,22 @@ int nfs_fattr_obsolete(struct inode *ino * * A very similar scenario holds for the dir cache. */ -int -__nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) { struct nfs_inode *nfsi = NFS_I(inode); __u64 new_size; loff_t new_isize; - int invalid = 0; - int mtime_update = 0; + unsigned int invalid = 0; loff_t cur_isize; + int data_unstable; - dfprintk(VFS, "NFS: refresh_inode(%s/%ld ct=%d info=0x%x)\n", - inode->i_sb->s_id, inode->i_ino, + dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", + __FUNCTION__, inode->i_sb->s_id, inode->i_ino, atomic_read(&inode->i_count), fattr->valid); + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + /* First successful call after mount, fill real data. */ if (NFS_FAKE_ROOT(inode)) { dfprintk(VFS, "NFS: updating fake root\n"); @@ -1063,43 +1157,49 @@ __nfs_refresh_inode(struct inode *inode, } if (nfsi->fileid != fattr->fileid) { - printk(KERN_ERR "nfs_refresh_inode: inode number mismatch\n" + printk(KERN_ERR "%s: inode number mismatch\n" "expected (%s/0x%Lx), got (%s/0x%Lx)\n", + __FUNCTION__, inode->i_sb->s_id, (long long)nfsi->fileid, inode->i_sb->s_id, (long long)fattr->fileid); goto out_err; } - /* Throw out obsolete READDIRPLUS attributes */ - if (time_before(fattr->timestamp, NFS_READTIME(inode))) - return 0; /* * Make sure the inode's type hasn't changed. */ if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; - new_size = fattr->size; - new_isize = nfs_size_to_loff_t(fattr->size); - - /* Avoid races */ - if (nfs_fattr_obsolete(inode, fattr)) - goto out_nochange; - /* * Update the read time so we don't revalidate too often. */ nfsi->read_cache_jiffies = fattr->timestamp; - /* - * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache. - * NOT inode->i_size!!! - */ - if (nfsi->read_cache_isize != new_size) { + /* Are we racing with known updates of the metadata on the server? */ + data_unstable = ! nfs_verify_change_attribute(inode, verifier); + + /* Check if the file size agrees */ + new_size = fattr->size; + new_isize = nfs_size_to_loff_t(fattr->size); + cur_isize = i_size_read(inode); + if (cur_isize != new_size) { #ifdef NFS_DEBUG_VERBOSE printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; + /* + * If we have pending writebacks, things can get + * messy. + */ + if (S_ISREG(inode->i_mode) && data_unstable) { + if (new_isize > cur_isize) { + i_size_write(inode, new_isize); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } + } else { + i_size_write(inode, new_isize); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } } /* @@ -1107,12 +1207,13 @@ __nfs_refresh_inode(struct inode *inode, * can change this value in VFS without requiring a * cache revalidation. */ - if (!timespec_equal(&nfsi->read_cache_mtime, &fattr->mtime)) { + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); #ifdef NFS_DEBUG_VERBOSE printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; - mtime_update = 1; + if (!data_unstable) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } if ((fattr->valid & NFS_ATTR_FATTR_V4) @@ -1121,47 +1222,15 @@ __nfs_refresh_inode(struct inode *inode, printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; - } - - /* Check Weak Cache Consistency data. - * If size and mtime match the pre-operation values, we can - * assume that any attribute changes were caused by our NFS - * operation, so there's no need to invalidate the caches. - */ - if ((fattr->valid & NFS_ATTR_PRE_CHANGE) - && nfsi->change_attr == fattr->pre_change_attr) { - invalid = 0; - } - else if ((fattr->valid & NFS_ATTR_WCC) - && nfsi->read_cache_isize == fattr->pre_size - && timespec_equal(&nfsi->read_cache_mtime, &fattr->pre_mtime)) { - invalid = 0; - } - - /* - * If we have pending writebacks, things can get - * messy. - */ - cur_isize = i_size_read(inode); - if (nfs_have_writebacks(inode) && new_isize < cur_isize) - new_isize = cur_isize; - - nfsi->read_cache_ctime = fattr->ctime; - inode->i_ctime = fattr->ctime; - inode->i_atime = fattr->atime; - - if (mtime_update) { - if (invalid) - nfsi->cache_mtime_jiffies = fattr->timestamp; - nfsi->read_cache_mtime = fattr->mtime; - inode->i_mtime = fattr->mtime; + nfsi->change_attr = fattr->change_attr; + if (!data_unstable) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } - nfsi->read_cache_isize = new_size; - i_size_write(inode, new_isize); + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - if (inode->i_mode != fattr->mode || + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) { struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; @@ -1169,11 +1238,9 @@ __nfs_refresh_inode(struct inode *inode, put_rpccred(*cred); *cred = NULL; } + invalid |= NFS_INO_INVALID_ATTR; } - if (fattr->valid & NFS_ATTR_FATTR_V4) - nfsi->change_attr = fattr->change_attr; - inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; inode->i_uid = fattr->uid; @@ -1189,31 +1256,30 @@ __nfs_refresh_inode(struct inode *inode, inode->i_blocks = fattr->du.nfs2.blocks; inode->i_blksize = fattr->du.nfs2.blocksize; } - - /* Update attrtimeo value */ - if (invalid) { + + /* Update attrtimeo value if we're out of the unstable period */ + if (invalid & NFS_INO_INVALID_ATTR) { nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - invalidate_remote_inode(inode); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; } + /* Don't invalidate the data if we were to blame */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) + || S_ISLNK(inode->i_mode))) + invalid &= ~NFS_INO_INVALID_DATA; + nfsi->flags |= invalid; return 0; - out_nochange: - if (!timespec_equal(&fattr->atime, &inode->i_atime)) - inode->i_atime = fattr->atime; - return 0; out_changed: /* * Big trouble! The inode has become a different object. */ #ifdef NFS_PARANOIA - printk(KERN_DEBUG "nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n", - inode->i_ino, inode->i_mode, fattr->mode); + printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", + __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); #endif /* * No need to worry about unhashing the dentry, as the @@ -1265,6 +1331,8 @@ static struct super_block *nfs_get_sb(st if (!server) return ERR_PTR(-ENOMEM); memset(server, 0, sizeof(struct nfs_server)); + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); root = &server->fh; memcpy(root, &data->root, sizeof(*root)); @@ -1337,9 +1405,52 @@ static struct file_system_type nfs_fs_ty #ifdef CONFIG_NFS_V4 +static void nfs4_clear_inode(struct inode *); + +static struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .delete_inode = nfs_delete_inode, + .put_super = nfs_put_super, + .statfs = nfs_statfs, + .clear_inode = nfs4_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, +}; + +/* + * Clean out any remaining NFSv4 state that might be left over due + * to open() calls that passed nfs_atomic_lookup, but failed to call + * nfs_open(). + */ +static void nfs4_clear_inode(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + while (!list_empty(&nfsi->open_states)) { + struct nfs4_state *state; + + state = list_entry(nfsi->open_states.next, + struct nfs4_state, + inode_states); + dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", + __FUNCTION__, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + state); + list_del(&state->inode_states); + nfs4_put_open_state(state); + } + /* Now call standard NFS clear_inode() code */ + nfs_clear_inode(inode); +} + + static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) { struct nfs_server *server; + struct nfs4_client *clp = NULL; struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; struct rpc_timeout timeparms; @@ -1389,13 +1500,13 @@ static int nfs4_fill_super(struct super_ return -EINVAL; } - /* Now create transport and client */ - xprt = xprt_create_proto(proto, &server->addr, &timeparms); - if (xprt == NULL) { - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + clp = nfs4_get_client(&server->addr.sin_addr); + if (!clp) { + printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); goto out_fail; } + /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { if (data->auth_flavourlen > 1) @@ -1405,41 +1516,78 @@ static int nfs4_fill_super(struct super_ goto out_fail; } } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, authflavour); + + down_write(&clp->cl_sem); + if (clp->cl_rpcclient == NULL) { + xprt = xprt_create_proto(proto, &server->addr, &timeparms); + if (xprt == NULL) { + up_write(&clp->cl_sem); + printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + goto out_fail; + } + clnt = rpc_create_client(xprt, server->hostname, &nfs_program, + server->rpc_ops->version, authflavour); + if (clnt == NULL) { + up_write(&clp->cl_sem); + printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + xprt_destroy(xprt); + goto out_fail; + } + clnt->cl_chatty = 1; + clp->cl_rpcclient = clnt; + clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); + memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); + nfs_idmap_new(clp); + } + if (list_empty(&clp->cl_superblocks)) + clear_bit(NFS4CLNT_OK, &clp->cl_state); + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + clnt = rpc_clone_client(clp->cl_rpcclient); + server->nfs4_state = clp; + up_write(&clp->cl_sem); + clp = NULL; + if (clnt == NULL) { printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - xprt_destroy(xprt); - goto out_fail; + goto out_remove_list; + } + if (server->nfs4_state->cl_idmap == NULL) { + printk(KERN_WARNING "NFS: failed to create idmapper.\n"); + goto out_shutdown; } clnt->cl_intr = (server->flags & NFS4_MOUNT_INTR) ? 1 : 0; clnt->cl_softrtry = (server->flags & NFS4_MOUNT_SOFT) ? 1 : 0; - clnt->cl_chatty = 1; server->client = clnt; + if (clnt->cl_auth->au_flavor != authflavour) { + if (rpcauth_create(authflavour, clnt) == NULL) { + printk(KERN_WARNING "NFS: couldn't create credcache!\n"); + goto out_shutdown; + } + } + /* Fire up rpciod if not yet running */ if (rpciod_up() != 0) { printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); goto out_shutdown; } - if (create_nfsv4_state(server, data)) - goto out_shutdown; - - if ((server->idmap = nfs_idmap_new(server)) == NULL) - printk(KERN_WARNING "NFS: couldn't start IDmap\n"); - + sb->s_op = &nfs4_sops; err = nfs_sb_init(sb, authflavour); if (err == 0) return 0; rpciod_down(); - destroy_nfsv4_state(server); - if (server->idmap != NULL) - nfs_idmap_delete(server); out_shutdown: rpc_shutdown_client(server->client); +out_remove_list: + down_write(&server->nfs4_state->cl_sem); + list_del_init(&server->nfs4_siblings); + up_write(&server->nfs4_state->cl_sem); + destroy_nfsv4_state(server); out_fail: + if (clp) + nfs4_put_client(clp); return err; } @@ -1496,6 +1644,8 @@ static struct super_block *nfs4_get_sb(s if (!server) return ERR_PTR(-ENOMEM); memset(server, 0, sizeof(struct nfs_server)); + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); if (data->version != NFS4_MOUNT_VERSION) { printk("nfs warning: mount version %s than kernel\n", @@ -1616,6 +1766,7 @@ static void init_once(void * foo, kmem_c INIT_LIST_HEAD(&nfsi->dirty); INIT_LIST_HEAD(&nfsi->commit); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + atomic_set(&nfsi->data_updates, 0); nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/nfs3proc.c linux-2.6.1-37-attr/fs/nfs/nfs3proc.c --- linux-2.6.1-rc3/fs/nfs/nfs3proc.c 2004-01-08 17:39:18.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/nfs3proc.c 2004-01-08 18:08:14.000000000 -0500 @@ -15,6 +15,7 @@ #include #include #include +#include #include #define NFSDBG_FACILITY NFSDBG_PROC @@ -67,20 +68,6 @@ nfs3_async_handle_jukebox(struct rpc_tas return 1; } -static void -nfs3_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) -{ - if (fattr->valid & NFS_ATTR_FATTR) { - if (!(fattr->valid & NFS_ATTR_WCC)) { - fattr->pre_size = NFS_CACHE_ISIZE(inode); - fattr->pre_mtime = NFS_CACHE_MTIME(inode); - fattr->pre_ctime = NFS_CACHE_CTIME(inode); - fattr->valid |= NFS_ATTR_WCC; - } - nfs_refresh_inode(inode, fattr); - } -} - static struct rpc_cred * nfs_cred(struct inode *inode, struct file *filp) { @@ -279,7 +266,7 @@ nfs3_proc_write(struct nfs_write_data *w msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); if (status >= 0) - nfs3_write_refresh_inode(inode, fattr); + nfs_refresh_inode(inode, fattr); dprintk("NFS reply write: %d\n", status); return status < 0? status : wdata->res.count; } @@ -302,7 +289,7 @@ nfs3_proc_commit(struct nfs_write_data * msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status >= 0) - nfs3_write_refresh_inode(inode, fattr); + nfs_refresh_inode(inode, fattr); dprintk("NFS reply commit: %d\n", status); return status; } @@ -776,12 +763,13 @@ nfs3_proc_read_setup(struct nfs_read_dat static void nfs3_write_done(struct rpc_task *task) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data; if (nfs3_async_handle_jukebox(task)) return; + data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs3_write_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); nfs_writeback_done(task); } @@ -834,12 +822,13 @@ nfs3_proc_write_setup(struct nfs_write_d static void nfs3_commit_done(struct rpc_task *task) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data; if (nfs3_async_handle_jukebox(task)) return; + data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs3_write_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); nfs_commit_done(task); } @@ -896,8 +885,16 @@ nfs3_request_compatible(struct nfs_page return 1; } +static int +nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); +} + struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, @@ -929,4 +926,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .file_release = nfs_release, .request_init = nfs3_request_init, .request_compatible = nfs3_request_compatible, + .lock = nfs3_proc_lock, }; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/nfs4proc.c linux-2.6.1-37-attr/fs/nfs/nfs4proc.c --- linux-2.6.1-rc3/fs/nfs/nfs4proc.c 2004-01-08 17:42:52.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/nfs4proc.c 2004-01-08 18:08:14.000000000 -0500 @@ -45,19 +45,21 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_PROC +#define NFS4_POLL_RETRY_TIME (15*HZ) + #define GET_OP(cp,name) &cp->ops[cp->req_nops].u.name #define OPNUM(cp) cp->ops[cp->req_nops].opnum +static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; extern nfs4_stateid zero_stateid; -static spinlock_t renew_lock = SPIN_LOCK_UNLOCKED; - static void nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops, struct nfs_server *server, char *tag) @@ -179,44 +181,16 @@ u32 nfs4_statfs_bitmap[2] = { | FATTR4_WORD1_SPACE_TOTAL }; -u32 nfs4_fsinfo_bitmap[2] = { - FATTR4_WORD0_MAXFILESIZE - | FATTR4_WORD0_MAXREAD - | FATTR4_WORD0_MAXWRITE - | FATTR4_WORD0_LEASE_TIME, - 0 -}; - u32 nfs4_pathconf_bitmap[2] = { FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME, 0 }; -/* mount bitmap: fattr bitmap + lease time */ -u32 nfs4_mount_bitmap[2] = { - FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE - | FATTR4_WORD0_SIZE - | FATTR4_WORD0_FSID - | FATTR4_WORD0_FILEID - | FATTR4_WORD0_LEASE_TIME, - FATTR4_WORD1_MODE - | FATTR4_WORD1_NUMLINKS - | FATTR4_WORD1_OWNER - | FATTR4_WORD1_OWNER_GROUP - | FATTR4_WORD1_RAWDEV - | FATTR4_WORD1_SPACE_USED - | FATTR4_WORD1_TIME_ACCESS - | FATTR4_WORD1_TIME_METADATA - | FATTR4_WORD1_TIME_MODIFY -}; - static inline void __nfs4_setup_getattr(struct nfs4_compound *cp, u32 *bitmap, struct nfs_fattr *fattr, struct nfs_fsstat *fsstat, - struct nfs_fsinfo *fsinfo, struct nfs_pathconf *pathconf) { struct nfs4_getattr *getattr = GET_OP(cp, getattr); @@ -224,7 +198,6 @@ __nfs4_setup_getattr(struct nfs4_compoun getattr->gt_bmval = bitmap; getattr->gt_attrs = fattr; getattr->gt_fsstat = fsstat; - getattr->gt_fsinfo = fsinfo; getattr->gt_pathconf = pathconf; OPNUM(cp) = OP_GETATTR; @@ -236,16 +209,7 @@ nfs4_setup_getattr(struct nfs4_compound struct nfs_fattr *fattr) { __nfs4_setup_getattr(cp, nfs4_fattr_bitmap, fattr, - NULL, NULL, NULL); -} - -static void -nfs4_setup_getrootattr(struct nfs4_compound *cp, - struct nfs_fattr *fattr, - struct nfs_fsinfo *fsinfo) -{ - __nfs4_setup_getattr(cp, nfs4_mount_bitmap, - fattr, NULL, fsinfo, NULL); + NULL, NULL); } static void @@ -253,15 +217,7 @@ nfs4_setup_statfs(struct nfs4_compound * struct nfs_fsstat *fsstat) { __nfs4_setup_getattr(cp, nfs4_statfs_bitmap, - NULL, fsstat, NULL, NULL); -} - -static void -nfs4_setup_fsinfo(struct nfs4_compound *cp, - struct nfs_fsinfo *fsinfo) -{ - __nfs4_setup_getattr(cp, nfs4_fsinfo_bitmap, - NULL, NULL, fsinfo, NULL); + NULL, fsstat, NULL); } static void @@ -269,7 +225,7 @@ nfs4_setup_pathconf(struct nfs4_compound struct nfs_pathconf *pathconf) { __nfs4_setup_getattr(cp, nfs4_pathconf_bitmap, - NULL, NULL, NULL, pathconf); + NULL, NULL, pathconf); } static void @@ -429,18 +385,6 @@ nfs4_setup_rename(struct nfs4_compound * } static void -nfs4_setup_renew(struct nfs4_compound *cp) -{ - struct nfs4_client **client_state = GET_OP(cp, renew); - - *client_state = cp->server->nfs4_state; - - OPNUM(cp) = OP_RENEW; - cp->req_nops++; - cp->renew_index = cp->req_nops; -} - -static void nfs4_setup_restorefh(struct nfs4_compound *cp) { OPNUM(cp) = OP_RESTOREFH; @@ -455,47 +399,13 @@ nfs4_setup_savefh(struct nfs4_compound * } static void -nfs4_setup_setclientid(struct nfs4_compound *cp, u32 program, unsigned short port) -{ - struct nfs4_setclientid *setclientid = GET_OP(cp, setclientid); - struct nfs_server *server = cp->server; - struct timespec tv; - u32 *p; - - tv = CURRENT_TIME; - p = (u32 *)setclientid->sc_verifier.data; - *p++ = tv.tv_sec; - *p++ = tv.tv_nsec; - setclientid->sc_name = server->ip_addr; - sprintf(setclientid->sc_netid, "udp"); - sprintf(setclientid->sc_uaddr, "%s.%d.%d", server->ip_addr, port >> 8, port & 255); - setclientid->sc_prog = program; - setclientid->sc_cb_ident = 0; - setclientid->sc_state = server->nfs4_state; - - OPNUM(cp) = OP_SETCLIENTID; - cp->req_nops++; -} - -static void -nfs4_setup_setclientid_confirm(struct nfs4_compound *cp) -{ - struct nfs4_client **client_state = GET_OP(cp, setclientid_confirm); - - *client_state = cp->server->nfs4_state; - - OPNUM(cp) = OP_SETCLIENTID_CONFIRM; - cp->req_nops++; - cp->renew_index = cp->req_nops; -} - -static void renew_lease(struct nfs_server *server, unsigned long timestamp) { - spin_lock(&renew_lock); - if (time_before(server->last_renewal,timestamp)) - server->last_renewal = timestamp; - spin_unlock(&renew_lock); + struct nfs4_client *clp = server->nfs4_state; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); } static inline void @@ -552,6 +462,57 @@ process_cinfo(struct nfs4_change_info *i } } +/* + * OPEN_RECLAIM: + * reclaim state on the server after a reboot. + * Assumes caller is holding the sp->so_sem + */ +int +nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr = { + .valid = 0, + }; + struct nfs4_change_info d_cinfo; + struct nfs4_getattr f_getattr = { + .gt_bmval = nfs4_fattr_bitmap, + .gt_attrs = &fattr, + }; + + struct nfs_open_reclaimargs o_arg = { + .fh = NFS_FH(inode), + .seqid = sp->so_seqid, + .id = sp->so_id, + .share_access = state->state, + .clientid = server->nfs4_state->cl_clientid, + .claim = NFS4_OPEN_CLAIM_PREVIOUS, + .f_getattr = &f_getattr, + }; + struct nfs_openres o_res = { + .cinfo = &d_cinfo, + .f_getattr = &f_getattr, + .server = server, /* Grrr */ + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_RECLAIM], + .rpc_argp = &o_arg, + .rpc_resp = &o_res, + .rpc_cred = sp->so_cred, + }; + int status; + + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_seqid(status, sp); + /* Update the inode attributes */ + nfs_refresh_inode(inode, &fattr); + return status; +} + +/* + * Returns an nfs4_state + an referenced inode + */ struct nfs4_state * nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred) { @@ -578,7 +539,6 @@ nfs4_do_open(struct inode *dir, struct q struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .share_access = flags & (FMODE_READ|FMODE_WRITE), - .clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid, .opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE, .createmode = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED, .name = name, @@ -599,6 +559,7 @@ nfs4_do_open(struct inode *dir, struct q .rpc_cred = cred, }; +retry: status = -ENOMEM; if (!(sp = nfs4_get_state_owner(NFS_SERVER(dir), cred))) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); @@ -615,12 +576,12 @@ nfs4_do_open(struct inode *dir, struct q down(&sp->so_sema); o_arg.seqid = sp->so_seqid; o_arg.id = sp->so_id; + o_arg.clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid, status = rpc_call_sync(server->client, &msg, 0); - if (status) { - goto out_up; - } nfs4_increment_seqid(status, sp); + if (status) + goto out_up; process_cinfo(&d_cinfo, &d_attr); nfs_refresh_inode(dir, &d_attr); @@ -637,9 +598,7 @@ nfs4_do_open(struct inode *dir, struct q .fh = &o_res.fh, .seqid = sp->so_seqid, }; - struct nfs_open_confirmres oc_res = { - .status = 0, - }; + struct nfs_open_confirmres oc_res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], .rpc_argp = &oc_arg, @@ -649,27 +608,54 @@ nfs4_do_open(struct inode *dir, struct q memcpy(&oc_arg.stateid, &o_res.stateid, sizeof(oc_arg.stateid)); status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_seqid(status, sp); if (status) goto out_up; - nfs4_increment_seqid(status, sp); memcpy(&state->stateid, &oc_res.stateid, sizeof(state->stateid)); } else memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); + spin_lock(&inode->i_lock); + if (flags & FMODE_READ) + state->nreaders++; + if (flags & FMODE_WRITE) + state->nwriters++; state->state |= flags & (FMODE_READ|FMODE_WRITE); - state->pid = current->pid; + spin_unlock(&inode->i_lock); up(&sp->so_sema); nfs4_put_state_owner(sp); - iput(inode); return state; out_up: up(&sp->so_sema); nfs4_put_state_owner(sp); - if (state) + if (state) { nfs4_put_open_state(state); - if (inode) + state = NULL; + } + if (inode) { iput(inode); + inode = NULL; + } + /* NOTE: BAD_SEQID means the server and client disagree about the + * book-keeping w.r.t. state-changing operations + * (OPEN/CLOSE/LOCK/LOCKU...) + * It is actually a sign of a bug on the client or on the server. + * + * If we receive a BAD_SEQID error in the particular case of + * doing an OPEN, we assume that nfs4_increment_seqid() will + * have unhashed the old state_owner for us, and that we can + * therefore safely retry using a new one. We should still warn + * the user though... + */ + if (status == -NFS4ERR_BAD_SEQID) { + printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); + goto retry; + } + status = nfs4_handle_error(server, status); + if (!status) + goto retry; + BUG_ON(status < -1000 || status > 0); out: return ERR_PTR(status); } @@ -698,15 +684,23 @@ nfs4_do_setattr(struct nfs_server *serve .rpc_argp = &arg, .rpc_resp = &res, }; + int status; +retry: fattr->valid = 0; if (state) - memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); + nfs4_copy_stateid(&arg.stateid, state, 0); else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); - return(rpc_call_sync(server->client, &msg, 0)); + status = rpc_call_sync(server->client, &msg, 0); + if (status) { + status = nfs4_handle_error(server, status); + if (!status) + goto retry; + } + return status; } /* @@ -728,9 +722,7 @@ nfs4_do_close(struct inode *inode, struc struct nfs_closeargs arg = { .fh = NFS_FH(inode), }; - struct nfs_closeres res = { - .status = 0, - }; + struct nfs_closeres res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], .rpc_argp = &arg, @@ -746,82 +738,111 @@ nfs4_do_close(struct inode *inode, struc * the state_owner. we keep this around to process errors */ nfs4_increment_seqid(status, sp); + if (!status) + memcpy(&state->stateid, &res.stateid, sizeof(state->stateid)); + + return status; +} + +int +nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) +{ + struct nfs4_state_owner *sp = state->owner; + int status = 0; + struct nfs_closeargs arg = { + .fh = NFS_FH(inode), + .seqid = sp->so_seqid, + .share_access = mode, + }; + struct nfs_closeres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + + memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); + status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + nfs4_increment_seqid(status, sp); + if (!status) + memcpy(&state->stateid, &res.stateid, sizeof(state->stateid)); return status; } +struct inode * +nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct iattr attr; + struct rpc_cred *cred; + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { + attr.ia_mode = nd->intent.open.create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; + BUG_ON(nd->intent.open.flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; + return state->inode; +} + +int +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +{ + struct rpc_cred *cred; + struct nfs4_state *state; + struct inode *inode; + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred); + put_rpccred(cred); + if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) + return 1; + if (IS_ERR(state)) + return 0; + inode = state->inode; + if (inode == dentry->d_inode) { + iput(inode); + return 1; + } + d_drop(dentry); + nfs4_close_state(state, openflags); + iput(inode); + return 0; +} + static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs4_client *clp; struct nfs4_compound compound; struct nfs4_op ops[4]; - struct nfs_fsinfo fsinfo; unsigned char * p; struct qstr q; int status; - clp = server->nfs4_state = nfs4_get_client(&server->addr.sin_addr); - if (!clp) - return -ENOMEM; - - down_write(&clp->cl_sem); - /* Has the clientid already been initialized? */ - if (clp->cl_state != NFS4CLNT_NEW) { - /* Yep, so just read the root attributes and the lease time. */ - fattr->valid = 0; - nfs4_setup_compound(&compound, ops, server, "getrootfh"); - nfs4_setup_putrootfh(&compound); - nfs4_setup_getrootattr(&compound, fattr, &fsinfo); - nfs4_setup_getfh(&compound, fhandle); - if ((status = nfs4_call_compound(&compound, NULL, 0))) - goto out_unlock; - goto no_setclientid; - } - - /* - * SETCLIENTID. - * Until delegations are imported, we don't bother setting the program - * number and port to anything meaningful. - */ - nfs4_setup_compound(&compound, ops, server, "setclientid"); - nfs4_setup_setclientid(&compound, 0, 0); - if ((status = nfs4_call_compound(&compound, NULL, 0))) - goto out_unlock; - - /* - * SETCLIENTID_CONFIRM, plus root filehandle. - * We also get the lease time here. - */ - fattr->valid = 0; - nfs4_setup_compound(&compound, ops, server, "setclientid_confirm"); - nfs4_setup_setclientid_confirm(&compound); - nfs4_setup_putrootfh(&compound); - nfs4_setup_getrootattr(&compound, fattr, &fsinfo); - nfs4_setup_getfh(&compound, fhandle); - if ((status = nfs4_call_compound(&compound, NULL, 0))) - goto out_unlock; - clp->cl_state = NFS4CLNT_OK; - -no_setclientid: - /* - * Now that we have instantiated the clientid and determined - * the lease time, we can initialize the renew daemon for this - * server. - * FIXME: we only need one renewd daemon per server. - */ - server->lease_time = fsinfo.lease_time * HZ; - if ((status = nfs4_init_renewd(server))) - goto out_unlock; - up_write(&clp->cl_sem); - /* * Now we do a separate LOOKUP for each component of the mount path. * The LOOKUPs are done separately so that we can conveniently * catch an ERR_WRONGSEC if it occurs along the way... */ p = server->mnt_path; + fattr->valid = 0; + nfs4_setup_compound(&compound, ops, server, "getrootfh"); + nfs4_setup_putrootfh(&compound); + nfs4_setup_getattr(&compound, fattr); + nfs4_setup_getfh(&compound, fhandle); + if ((status = nfs4_call_compound(&compound, NULL, 0))) + goto out; for (;;) { while (*p == '/') p++; @@ -847,10 +868,7 @@ no_setclientid: } break; } - return status; -out_unlock: - up_write(&clp->cl_sem); - nfs4_put_client(clp); +out: return status; } @@ -892,28 +910,38 @@ nfs4_proc_setattr(struct dentry *dentry, struct inode * inode = dentry->d_inode; int size_change = sattr->ia_valid & ATTR_SIZE; struct nfs4_state *state = NULL; - int status; + int need_iput = 0; + int status; fattr->valid = 0; if (size_change) { struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_do_open(dentry->d_parent->d_inode, + state = nfs4_find_state(inode, cred, FMODE_WRITE); + if (!state) { + state = nfs4_do_open(dentry->d_parent->d_inode, &dentry->d_name, FMODE_WRITE, NULL, cred); + need_iput = 1; + } put_rpccred(cred); if (IS_ERR(state)) return PTR_ERR(state); if (state->inode != inode) { - printk(KERN_WARNING "nfs: raced in setattr, returning -EIO\n"); - nfs4_put_open_state(state); - return -EIO; + printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode); + status = -EIO; + goto out; } } status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); - if (state) - nfs4_put_open_state(state); +out: + if (state) { + inode = state->inode; + nfs4_close_state(state, FMODE_WRITE); + if (need_iput) + iput(inode); + } return status; } @@ -1051,7 +1079,7 @@ nfs4_proc_read(struct nfs_read_data *rda if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&rdata->args.stateid, &state->stateid, sizeof(rdata->args.stateid)); + nfs4_copy_stateid(&rdata->args.stateid, state, rdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&rdata->args.stateid, &zero_stateid, sizeof(rdata->args.stateid)); @@ -1060,12 +1088,8 @@ nfs4_proc_read(struct nfs_read_data *rda fattr->valid = 0; status = rpc_call_sync(server->client, &msg, flags); - if (!status) { + if (!status) renew_lease(server, timestamp); - /* Check cache consistency */ - if (fattr->change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - } dprintk("NFS reply read: %d\n", status); return status; } @@ -1093,7 +1117,7 @@ nfs4_proc_write(struct nfs_write_data *w if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&wdata->args.stateid, &state->stateid, sizeof(wdata->args.stateid)); + nfs4_copy_stateid(&wdata->args.stateid, state, wdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&wdata->args.stateid, &zero_stateid, sizeof(wdata->args.stateid)); @@ -1102,7 +1126,6 @@ nfs4_proc_write(struct nfs_write_data *w fattr->valid = 0; status = rpc_call_sync(server->client, &msg, rpcflags); - NFS_CACHEINV(inode); dprintk("NFS reply write: %d\n", status); return status; } @@ -1129,7 +1152,7 @@ nfs4_proc_commit(struct nfs_write_data * if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&cdata->args.stateid, &state->stateid, sizeof(cdata->args.stateid)); + nfs4_copy_stateid(&cdata->args.stateid, state, cdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&cdata->args.stateid, &zero_stateid, sizeof(cdata->args.stateid)); @@ -1169,18 +1192,18 @@ nfs4_proc_create(struct inode *dir, stru state = nfs4_do_open(dir, name, flags, sattr, cred); put_rpccred(cred); if (!IS_ERR(state)) { - inode = igrab(state->inode); + inode = state->inode; if (flags & O_EXCL) { struct nfs_fattr fattr; int status; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(inode), sattr, state); if (status != 0) { + nfs4_close_state(state, flags); iput(inode); inode = ERR_PTR(status); } } - nfs4_put_open_state(state); } else inode = (struct inode *)state; return inode; @@ -1446,14 +1469,14 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { - struct nfs4_compound compound; - struct nfs4_op ops[2]; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = fsinfo, + }; memset(fsinfo, 0, sizeof(*fsinfo)); - nfs4_setup_compound(&compound, ops, server, "statfs"); - nfs4_setup_putfh(&compound, fhandle); - nfs4_setup_fsinfo(&compound, fsinfo); - return nfs4_call_compound(&compound, NULL, 0); + return rpc_call_sync(server->client, &msg, 0); } static int @@ -1471,19 +1494,31 @@ nfs4_proc_pathconf(struct nfs_server *se } static void +nfs4_restart_read(struct rpc_task *task) +{ + struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; + struct nfs_page *req; + + rpc_restart_call(task); + req = nfs_list_entry(data->pages.next); + if (req->wb_state) + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); + else + memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); +} + +static void nfs4_read_done(struct rpc_task *task) { struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; struct inode *inode = data->inode; - struct nfs_fattr *fattr = data->res.fattr; + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_read; + return; + } if (task->tk_status > 0) renew_lease(NFS_SERVER(inode), data->timestamp); - /* Check cache consistency */ - if (fattr->change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_ACCESS) - inode->i_atime = fattr->atime; /* Call back common NFS readpage processing */ nfs_readpage_result(task); } @@ -1512,8 +1547,9 @@ nfs4_proc_read_setup(struct nfs_read_dat data->res.eof = 0; data->timestamp = jiffies; + data->lockowner = req->wb_lockowner; if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); @@ -1530,18 +1566,17 @@ nfs4_proc_read_setup(struct nfs_read_dat } static void -nfs4_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +nfs4_restart_write(struct rpc_task *task) { - /* Check cache consistency */ - if (fattr->pre_change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - NFS_CHANGE_ATTR(inode) = fattr->change_attr; - if (fattr->bitmap[1] & FATTR4_WORD1_SPACE_USED) - inode->i_blocks = (fattr->du.nfs3.used + 511) >> 9; - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_METADATA) - inode->i_ctime = fattr->ctime; - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_MODIFY) - inode->i_mtime = fattr->mtime; + struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; + struct nfs_page *req; + + rpc_restart_call(task); + req = nfs_list_entry(data->pages.next); + if (req->wb_state) + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); + else + memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); } static void @@ -1550,9 +1585,12 @@ nfs4_write_done(struct rpc_task *task) struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; struct inode *inode = data->inode; + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_write; + return; + } if (task->tk_status >= 0) renew_lease(NFS_SERVER(inode), data->timestamp); - nfs4_write_refresh_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ nfs_writeback_done(task); } @@ -1591,8 +1629,9 @@ nfs4_proc_write_setup(struct nfs_write_d data->res.verf = &data->verf; data->timestamp = jiffies; + data->lockowner = req->wb_lockowner; if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); @@ -1612,8 +1651,12 @@ static void nfs4_commit_done(struct rpc_task *task) { struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct inode *inode = data->inode; - nfs4_write_refresh_inode(data->inode, data->res.fattr); + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_write; + return; + } /* Call back common NFS writeback processing */ nfs_commit_done(task); } @@ -1651,55 +1694,58 @@ nfs4_proc_commit_setup(struct nfs_write_ } /* - * nfs4_proc_renew(): This is not one of the nfs_rpc_ops; it is a special + * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special * standalone procedure for queueing an asynchronous RENEW. */ -struct renew_desc { - struct rpc_task task; - struct nfs4_compound compound; - struct nfs4_op ops[1]; -}; - static void renew_done(struct rpc_task *task) { - struct nfs4_compound *cp = (struct nfs4_compound *) task->tk_msg.rpc_argp; - process_lease(cp); + struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; + unsigned long timestamp = (unsigned long)task->tk_calldata; + + if (task->tk_status < 0) { + switch (task->tk_status) { + case -NFS4ERR_STALE_CLIENTID: + nfs4_schedule_state_recovery(clp); + return; + } + } + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); } -static void -renew_release(struct rpc_task *task) +int +nfs4_proc_async_renew(struct nfs4_client *clp) { - kfree(task->tk_calldata); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = clp, + .rpc_cred = clp->cl_cred, + }; + + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + renew_done, (void *)jiffies); } int -nfs4_proc_renew(struct nfs_server *server) +nfs4_proc_renew(struct nfs4_client *clp) { - struct renew_desc *rp; - struct rpc_task *task; - struct nfs4_compound *cp; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMPOUND], + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = clp, + .rpc_cred = clp->cl_cred, }; + unsigned long now = jiffies; + int status; - rp = (struct renew_desc *) kmalloc(sizeof(*rp), GFP_KERNEL); - if (!rp) - return -ENOMEM; - cp = &rp->compound; - task = &rp->task; - - nfs4_setup_compound(cp, rp->ops, server, "renew"); - nfs4_setup_renew(cp); - - msg.rpc_argp = cp; - msg.rpc_resp = cp; - rpc_init_task(task, server->client, renew_done, RPC_TASK_ASYNC); - rpc_call_setup(task, &msg, 0); - task->tk_calldata = rp; - task->tk_release = renew_release; - - return rpc_execute(task); + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,now)) + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + return status; } /* @@ -1712,43 +1758,32 @@ static int nfs4_proc_file_open(struct inode *inode, struct file *filp) { struct dentry *dentry = filp->f_dentry; - struct inode *dir = dentry->d_parent->d_inode; - struct rpc_cred *cred; struct nfs4_state *state; - int flags = filp->f_flags; - int status = 0; + struct rpc_cred *cred; dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", (int)dentry->d_parent->d_name.len, dentry->d_parent->d_name.name, (int)dentry->d_name.len, dentry->d_name.name); - if ((flags + 1) & O_ACCMODE) - flags++; - - lock_kernel(); -/* -* We have already opened the file "O_EXCL" in nfs4_proc_create!! -* This ugliness will go away with lookup-intent... -*/ + /* Find our open stateid */ cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_do_open(dir, &dentry->d_name, flags, NULL, cred); - if (IS_ERR(state)) { - status = PTR_ERR(state); - state = NULL; - } else if (filp->f_mode & FMODE_WRITE) - nfs_set_mmcred(inode, cred); - if (inode != filp->f_dentry->d_inode) { + state = nfs4_find_state(inode, cred, filp->f_mode); + put_rpccred(cred); + if (state == NULL) { printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - status = -EIO; /* ERACE actually */ - nfs4_put_open_state(state); - state = NULL; + return -EIO; /* ERACE actually */ + } + nfs4_close_state(state, filp->f_mode); + if (filp->f_mode & FMODE_WRITE) { + lock_kernel(); + nfs_set_mmcred(inode, state->owner->so_cred); + nfs_begin_data_update(inode); + unlock_kernel(); } filp->private_data = state; - put_rpccred(cred); - unlock_kernel(); - return status; + return 0; } /* @@ -1760,7 +1795,12 @@ nfs4_proc_file_release(struct inode *ino struct nfs4_state *state = (struct nfs4_state *)filp->private_data; if (state) - nfs4_put_open_state(state); + nfs4_close_state(state, filp->f_mode); + if (filp->f_mode & FMODE_WRITE) { + lock_kernel(); + nfs_end_data_update(inode); + unlock_kernel(); + } return 0; } @@ -1780,6 +1820,120 @@ nfs4_request_init(struct nfs_page *req, state = (struct nfs4_state *)filp->private_data; req->wb_state = state; req->wb_cred = get_rpccred(state->owner->so_cred); + req->wb_lockowner = current->files; +} + +static int +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) +{ + struct nfs4_client *clp = server->nfs4_state; + + if (!clp) + return 0; + switch(task->tk_status) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); + nfs4_schedule_state_recovery(clp); + task->tk_status = 0; + return -EAGAIN; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + rpc_delay(task, NFS4_POLL_RETRY_TIME); + task->tk_status = 0; + return -EAGAIN; + case -NFS4ERR_OLD_STATEID: + task->tk_status = 0; + return -EAGAIN; + } + return 0; +} + +int +nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) +{ + DEFINE_WAIT(wait); + sigset_t oldset; + int interruptible, res; + + might_sleep(); + + rpc_clnt_sigmask(clnt, &oldset); + interruptible = TASK_UNINTERRUPTIBLE; + if (clnt->cl_intr) + interruptible = TASK_INTERRUPTIBLE; + do { + res = 0; + prepare_to_wait(&clp->cl_waitq, &wait, interruptible); + nfs4_schedule_state_recovery(clp); + if (test_bit(NFS4CLNT_OK, &clp->cl_state) && + !test_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state)) + break; + if (clnt->cl_intr && signalled()) { + res = -ERESTARTSYS; + break; + } + schedule(); + } while(!test_bit(NFS4CLNT_OK, &clp->cl_state)); + finish_wait(&clp->cl_waitq, &wait); + rpc_clnt_sigunmask(clnt, &oldset); + return res; +} + +static int +nfs4_delay(struct rpc_clnt *clnt) +{ + sigset_t oldset; + int res = 0; + + might_sleep(); + + rpc_clnt_sigmask(clnt, &oldset); + if (clnt->cl_intr) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(NFS4_POLL_RETRY_TIME); + if (signalled()) + res = -ERESTARTSYS; + } else { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(NFS4_POLL_RETRY_TIME); + } + rpc_clnt_sigunmask(clnt, &oldset); + return res; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +int +nfs4_handle_error(struct nfs_server *server, int errorcode) +{ + struct nfs4_client *clp = server->nfs4_state; + int ret = errorcode; + + switch(errorcode) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + ret = nfs4_wait_clnt_recover(server->client, clp); + break; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + ret = nfs4_delay(server->client); + break; + case -NFS4ERR_OLD_STATEID: + ret = 0; + break; + default: + if (errorcode <= -1000) { + printk(KERN_WARNING "%s could not handle NFSv4 error %d\n", + __FUNCTION__, -errorcode); + ret = -EIO; + } + } + /* We failed to handle the error */ + return ret; } @@ -1796,14 +1950,325 @@ nfs4_request_compatible(struct nfs_page state = (struct nfs4_state *)filp->private_data; if (req->wb_state != state) return 0; + if (req->wb_lockowner != current->files) + return 0; cred = state->owner->so_cred; if (req->wb_cred != cred) return 0; return 1; } +int +nfs4_proc_setclientid(struct nfs4_client *clp, + u32 program, unsigned short port) +{ + u32 *p; + struct nfs4_setclientid setclientid; + struct timespec tv; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], + .rpc_argp = &setclientid, + .rpc_resp = clp, + .rpc_cred = clp->cl_cred, + }; + + tv = CURRENT_TIME; + p = (u32*)setclientid.sc_verifier.data; + *p++ = (u32)tv.tv_sec; + *p = (u32)tv.tv_nsec; + setclientid.sc_name = clp->cl_ipaddr; + sprintf(setclientid.sc_netid, "tcp"); + sprintf(setclientid.sc_uaddr, "%s.%d.%d", clp->cl_ipaddr, port >> 8, port & 255); + setclientid.sc_prog = htonl(program); + setclientid.sc_cb_ident = 0; + + return rpc_call_sync(clp->cl_rpcclient, &msg, 0); +} + +int +nfs4_proc_setclientid_confirm(struct nfs4_client *clp) +{ + struct nfs_fsinfo fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], + .rpc_argp = clp, + .rpc_resp = &fsinfo, + .rpc_cred = clp->cl_cred, + }; + unsigned long now; + int status; + + now = jiffies; + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (status == 0) { + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + } + return status; +} + +#define NFS4_LOCK_MINTIMEOUT (1 * HZ) +#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) + +/* + * sleep, with exponential backoff, and retry the LOCK operation. + */ +static unsigned long +nfs4_set_lock_task_retry(unsigned long timeout) +{ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(timeout); + timeout <<= 1; + if (timeout > NFS4_LOCK_MAXTIMEOUT) + return NFS4_LOCK_MAXTIMEOUT; + return timeout; +} + +static inline int +nfs4_lck_type(int cmd, struct file_lock *request) +{ + /* set lock type */ + switch (request->fl_type) { + case F_RDLCK: + return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT; + case F_WRLCK: + return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT; + case F_UNLCK: + return NFS4_WRITE_LT; + } + BUG(); +} + +static inline uint64_t +nfs4_lck_length(struct file_lock *request) +{ + if (request->fl_end == OFFSET_MAX) + return ~(uint64_t)0; + return request->fl_end - request->fl_start + 1; +} + +int +nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lowner nlo; + struct nfs4_lock_state *lsp; + int status; + + nlo.clientid = clp->cl_clientid; + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (lsp) + nlo.id = lsp->ls_id; + else { + spin_lock(&clp->cl_lock); + nlo.id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + } + arg.u.lockt = &nlo; + status = rpc_call_sync(server->client, &msg, 0); + if (!status) { + request->fl_type = F_UNLCK; + } else if (status == -NFS4ERR_DENIED) { + int64_t len, start, end; + start = res.u.denied.offset; + len = res.u.denied.length; + end = start + len - 1; + if (end < 0 || len == 0) + request->fl_end = OFFSET_MAX; + else + request->fl_end = (loff_t)end; + request->fl_start = (loff_t)start; + request->fl_type = F_WRLCK; + if (res.u.denied.type & 1) + request->fl_type = F_RDLCK; + request->fl_pid = 0; + status = 0; + } + if (lsp) + nfs4_put_lock_state(lsp); + up(&state->lock_sema); + return status; +} + +int +nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs4_lock_state *lsp; + struct nfs_locku_opargs luargs; + int status = 0; + + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (!lsp) + goto out; + luargs.seqid = lsp->ls_seqid; + memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + arg.u.locku = &luargs; + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_lock_seqid(status, lsp); + + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, + sizeof(lsp->ls_stateid)); + nfs4_notify_unlck(inode, request, lsp); + } + nfs4_put_lock_state(lsp); +out: + up(&state->lock_sema); + return status; +} + +static int +nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_lock_state *lsp; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lock_opargs largs = { + .new_lock_owner = 0, + }; + int status; + + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (lsp == NULL) { + struct nfs4_state_owner *owner = state->owner; + struct nfs_open_to_lock otl = { + .lock_owner.clientid = server->nfs4_state->cl_clientid, + }; + status = -ENOMEM; + lsp = nfs4_alloc_lock_state(state, request->fl_owner); + if (!lsp) + goto out; + otl.lock_seqid = lsp->ls_seqid; + otl.lock_owner.id = lsp->ls_id; + memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); + largs.u.open_lock = &otl; + largs.new_lock_owner = 1; + arg.u.lock = &largs; + down(&owner->so_sema); + otl.open_seqid = owner->so_seqid; + status = rpc_call_sync(server->client, &msg, 0); + /* increment open_owner seqid on success, and + * seqid mutating errors */ + nfs4_increment_seqid(status, owner); + up(&owner->so_sema); + } else { + struct nfs_exist_lock el = { + .seqid = lsp->ls_seqid, + }; + memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); + largs.u.exist_lock = ⪙ + largs.new_lock_owner = 0; + arg.u.lock = &largs; + status = rpc_call_sync(server->client, &msg, 0); + } + /* increment seqid on success, and * seqid mutating errors*/ + nfs4_increment_lock_seqid(status, lsp); + /* save the returned stateid. */ + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); + nfs4_notify_setlk(inode, request, lsp); + } else if (status == -NFS4ERR_DENIED) + status = -EAGAIN; + nfs4_put_lock_state(lsp); +out: + up(&state->lock_sema); + return status; +} + +static int +nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) +{ + struct nfs4_state *state; + unsigned long timeout = NFS4_LOCK_MINTIMEOUT; + int status; + + /* verify open state */ + state = (struct nfs4_state *)filp->private_data; + BUG_ON(!state); + + if (request->fl_start < 0 || request->fl_end < 0) + return -EINVAL; + + if (IS_GETLK(cmd)) + return nfs4_proc_getlk(state, F_GETLK, request); + + if (!(IS_SETLK(cmd) || IS_SETLKW(cmd))) + return -EINVAL; + + if (request->fl_type == F_UNLCK) + return nfs4_proc_unlck(state, cmd, request); + + do { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + timeout = nfs4_set_lock_task_retry(timeout); + status = -ERESTARTSYS; + if (signalled()) + break; + } while(status < 0); + + return status; +} + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ + .dentry_ops = &nfs4_dentry_operations, + .dir_inode_ops = &nfs4_dir_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, @@ -1835,6 +2300,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .file_release = nfs4_proc_file_release, .request_init = nfs4_request_init, .request_compatible = nfs4_request_compatible, + .lock = nfs4_proc_lock, }; /* diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/nfs4renewd.c linux-2.6.1-37-attr/fs/nfs/nfs4renewd.c --- linux-2.6.1-rc3/fs/nfs/nfs4renewd.c 2004-01-08 17:43:03.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/nfs4renewd.c 2004-01-08 18:03:40.000000000 -0500 @@ -54,53 +54,91 @@ #include #include -static RPC_WAITQ(nfs4_renewd_queue, "nfs4_renewd_queue"); +#define NFSDBG_FACILITY NFSDBG_PROC -static void -renewd(struct rpc_task *task) +void +nfs4_renew_state(void *data) { - struct nfs_server *server = (struct nfs_server *)task->tk_calldata; - unsigned long lease = server->lease_time; - unsigned long last = server->last_renewal; - unsigned long timeout; - - if (!server->nfs4_state) - timeout = (2 * lease) / 3; - else if (jiffies < last + lease/3) - timeout = (2 * lease) / 3 + last - jiffies; - else { + struct nfs4_client *clp = (struct nfs4_client *)data; + long lease, timeout; + unsigned long last, now; + + down_read(&clp->cl_sem); + dprintk("%s: start\n", __FUNCTION__); + /* Are there any active superblocks? */ + if (list_empty(&clp->cl_superblocks)) + goto out; + spin_lock(&clp->cl_lock); + lease = clp->cl_lease_time; + last = clp->cl_last_renewal; + now = jiffies; + timeout = (2 * lease) / 3 + (long)last - (long)now; + /* Are we close to a lease timeout? */ + if (time_after(now, last + lease/3)) { + spin_unlock(&clp->cl_lock); /* Queue an asynchronous RENEW. */ - nfs4_proc_renew(server); + nfs4_proc_async_renew(clp); timeout = (2 * lease) / 3; - } - + spin_lock(&clp->cl_lock); + } else + dprintk("%s: failed to call renewd. Reason: lease not expired \n", + __FUNCTION__); if (timeout < 5 * HZ) /* safeguard */ timeout = 5 * HZ; - task->tk_timeout = timeout; - task->tk_action = renewd; - task->tk_exit = NULL; - rpc_sleep_on(&nfs4_renewd_queue, task, NULL, NULL); - return; + dprintk("%s: requeueing work. Lease period = %ld\n", + __FUNCTION__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + spin_unlock(&clp->cl_lock); +out: + up_read(&clp->cl_sem); + dprintk("%s: done\n", __FUNCTION__); +} + +/* Must be called with clp->cl_sem locked for writes */ +void +nfs4_schedule_state_renewal(struct nfs4_client *clp) +{ + long timeout; + + spin_lock(&clp->cl_lock); + timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal + - (long)jiffies; + if (timeout < 5 * HZ) + timeout = 5 * HZ; + dprintk("%s: requeueing work. Lease period = %ld\n", + __FUNCTION__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + spin_unlock(&clp->cl_lock); } -int -nfs4_init_renewd(struct nfs_server *server) +void +nfs4_renewd_prepare_shutdown(struct nfs_server *server) { - struct rpc_task *task; - int status; + struct nfs4_client *clp = server->nfs4_state; - lock_kernel(); - status = -ENOMEM; - task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC); - if (!task) - goto out; - task->tk_calldata = server; - task->tk_action = renewd; - status = rpc_execute(task); + if (!clp) + return; + flush_scheduled_work(); + down_write(&clp->cl_sem); + if (!list_empty(&server->nfs4_siblings)) + list_del_init(&server->nfs4_siblings); + up_write(&clp->cl_sem); +} -out: - unlock_kernel(); - return status; +/* Must be called with clp->cl_sem locked for writes */ +void +nfs4_kill_renewd(struct nfs4_client *clp) +{ + down_read(&clp->cl_sem); + if (!list_empty(&clp->cl_superblocks)) { + up_read(&clp->cl_sem); + return; + } + cancel_delayed_work(&clp->cl_renewd); + up_read(&clp->cl_sem); + flush_scheduled_work(); } /* diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/nfs4state.c linux-2.6.1-37-attr/fs/nfs/nfs4state.c --- linux-2.6.1-rc3/fs/nfs/nfs4state.c 2004-01-08 17:33:16.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/nfs4state.c 2004-01-08 18:07:57.000000000 -0500 @@ -41,6 +41,9 @@ #include #include #include +#include +#include +#include #define OPENOWNER_POOL_SIZE 8 @@ -55,6 +58,29 @@ nfs4_stateid one_stateid = static LIST_HEAD(nfs4_clientid_list); +static void nfs4_recover_state(void *); +extern void nfs4_renew_state(void *); + +void +init_nfsv4_state(struct nfs_server *server) +{ + server->nfs4_state = NULL; + INIT_LIST_HEAD(&server->nfs4_siblings); +} + +void +destroy_nfsv4_state(struct nfs_server *server) +{ + if (server->mnt_path) { + kfree(server->mnt_path); + server->mnt_path = NULL; + } + if (server->nfs4_state) { + nfs4_put_client(server->nfs4_state); + server->nfs4_state = NULL; + } +} + /* * nfs4_get_client(): returns an empty client structure * nfs4_put_client(): drops reference to client structure @@ -75,7 +101,12 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_unused); spin_lock_init(&clp->cl_lock); atomic_set(&clp->cl_count, 1); - clp->cl_state = NFS4CLNT_NEW; + INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp); + INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); + INIT_LIST_HEAD(&clp->cl_superblocks); + init_waitqueue_head(&clp->cl_waitq); + INIT_RPC_WAITQ(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_state = 1 << NFS4CLNT_NEW; } return clp; } @@ -93,6 +124,11 @@ nfs4_free_client(struct nfs4_client *clp kfree(sp); } BUG_ON(!list_empty(&clp->cl_state_owners)); + if (clp->cl_cred) + put_rpccred(clp->cl_cred); + nfs_idmap_delete(clp); + if (clp->cl_rpcclient) + rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); } @@ -126,10 +162,14 @@ nfs4_put_client(struct nfs4_client *clp) return; list_del(&clp->cl_servers); spin_unlock(&state_spinlock); + BUG_ON(!list_empty(&clp->cl_superblocks)); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); + nfs4_kill_renewd(clp); nfs4_free_client(clp); } -static inline u32 +u32 nfs4_alloc_lockowner_id(struct nfs4_client *clp) { return clp->cl_lockowner_id ++; @@ -145,11 +185,29 @@ nfs4_client_grab_unused(struct nfs4_clie atomic_inc(&sp->so_count); sp->so_cred = cred; list_move(&sp->so_list, &clp->cl_state_owners); + sp->so_generation = clp->cl_generation; clp->cl_nunused--; } return sp; } +static struct nfs4_state_owner * +nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) +{ + struct nfs4_state_owner *sp, *res = NULL; + + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + if (sp->so_cred != cred) + continue; + atomic_inc(&sp->so_count); + /* Move to the head of the list */ + list_move(&sp->so_list, &clp->cl_state_owners); + res = sp; + break; + } + return res; +} + /* * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to * create a new state_owner. @@ -170,6 +228,15 @@ nfs4_alloc_state_owner(void) return sp; } +static void +nfs4_unhash_state_owner(struct nfs4_state_owner *sp) +{ + struct nfs4_client *clp = sp->so_client; + spin_lock(&clp->cl_lock); + list_del_init(&sp->so_list); + spin_unlock(&clp->cl_lock); +} + struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { @@ -179,19 +246,25 @@ nfs4_get_state_owner(struct nfs_server * get_rpccred(cred); new = nfs4_alloc_state_owner(); spin_lock(&clp->cl_lock); - sp = nfs4_client_grab_unused(clp, cred); + sp = nfs4_find_state_owner(clp, cred); + if (sp == NULL) + sp = nfs4_client_grab_unused(clp, cred); if (sp == NULL && new != NULL) { list_add(&new->so_list, &clp->cl_state_owners); new->so_client = clp; new->so_id = nfs4_alloc_lockowner_id(clp); new->so_cred = cred; + new->so_generation = clp->cl_generation; sp = new; new = NULL; } spin_unlock(&clp->cl_lock); if (new) kfree(new); - if (!sp) + if (sp) { + if (!test_bit(NFS4CLNT_OK, &clp->cl_state)) + nfs4_wait_clnt_recover(server->client, clp); + } else put_rpccred(cred); return sp; } @@ -206,6 +279,8 @@ nfs4_put_state_owner(struct nfs4_state_o return; if (clp->cl_nunused >= OPENOWNER_POOL_SIZE) goto out_free; + if (list_empty(&sp->so_list)) + goto out_free; list_move(&sp->so_list, &clp->cl_unused); clp->cl_nunused++; spin_unlock(&clp->cl_lock); @@ -227,24 +302,42 @@ nfs4_alloc_open_state(void) state = kmalloc(sizeof(*state), GFP_KERNEL); if (!state) return NULL; - state->pid = current->pid; state->state = 0; + state->nreaders = 0; + state->nwriters = 0; + state->flags = 0; memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); + INIT_LIST_HEAD(&state->lock_states); + init_MUTEX(&state->lock_sema); + rwlock_init(&state->state_lock); return state; } static struct nfs4_state * -__nfs4_find_state_bypid(struct inode *inode, pid_t pid) +__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state *state; + mode &= (FMODE_READ|FMODE_WRITE); list_for_each_entry(state, &nfsi->open_states, inode_states) { - if (state->pid == pid) { - atomic_inc(&state->count); - return state; - } + if (state->owner->so_cred != cred) + continue; + if ((mode & FMODE_READ) != 0 && state->nreaders == 0) + continue; + if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0) + continue; + if ((state->state & mode) != mode) + continue; + /* Add the state to the head of the inode's list */ + list_move(&state->inode_states, &nfsi->open_states); + atomic_inc(&state->count); + if (mode & FMODE_READ) + state->nreaders++; + if (mode & FMODE_WRITE) + state->nwriters++; + return state; } return NULL; } @@ -256,7 +349,12 @@ __nfs4_find_state_byowner(struct inode * struct nfs4_state *state; list_for_each_entry(state, &nfsi->open_states, inode_states) { + /* Is this in the process of being freed? */ + if (state->nreaders == 0 && state->nwriters == 0) + continue; if (state->owner == owner) { + /* Add the state to the head of the inode's list */ + list_move(&state->inode_states, &nfsi->open_states); atomic_inc(&state->count); return state; } @@ -265,16 +363,12 @@ __nfs4_find_state_byowner(struct inode * } struct nfs4_state * -nfs4_find_state_bypid(struct inode *inode, pid_t pid) +nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) { - struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state *state; spin_lock(&inode->i_lock); - state = __nfs4_find_state_bypid(inode, pid); - /* Add the state to the tail of the inode's list */ - if (state) - list_move_tail(&state->inode_states, &nfsi->open_states); + state = __nfs4_find_state(inode, cred, mode); spin_unlock(&inode->i_lock); return state; } @@ -307,7 +401,6 @@ nfs4_get_open_state(struct inode *inode, atomic_inc(&owner->so_count); list_add(&state->inode_states, &nfsi->open_states); state->inode = inode; - atomic_inc(&inode->i_count); spin_unlock(&inode->i_lock); } else { spin_unlock(&inode->i_lock); @@ -323,6 +416,7 @@ nfs4_put_open_state(struct nfs4_state *s { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; + int status = 0; if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) return; @@ -330,14 +424,230 @@ nfs4_put_open_state(struct nfs4_state *s spin_unlock(&inode->i_lock); down(&owner->so_sema); list_del(&state->open_states); - if (state->state != 0) - nfs4_do_close(inode, state); + if (state->state != 0) { + do { + status = nfs4_do_close(inode, state); + if (!status) + break; + up(&owner->so_sema); + status = nfs4_handle_error(NFS_SERVER(inode), status); + down(&owner->so_sema); + } while (!status); + } up(&owner->so_sema); - iput(inode); nfs4_free_open_state(state); nfs4_put_state_owner(owner); } +void +nfs4_close_state(struct nfs4_state *state, mode_t mode) +{ + struct inode *inode = state->inode; + struct nfs4_state_owner *owner = state->owner; + int newstate; + int status = 0; + + down(&owner->so_sema); + /* Protect against nfs4_find_state() */ + spin_lock(&inode->i_lock); + if (mode & FMODE_READ) + state->nreaders--; + if (mode & FMODE_WRITE) + state->nwriters--; + if (state->nwriters == 0 && state->nreaders == 0) + list_del_init(&state->inode_states); + spin_unlock(&inode->i_lock); + do { + newstate = 0; + if (state->state == 0) + break; + if (state->nreaders) + newstate |= FMODE_READ; + if (state->nwriters) + newstate |= FMODE_WRITE; + if (state->state == newstate) + break; + if (newstate != 0) + status = nfs4_do_downgrade(inode, state, newstate); + else + status = nfs4_do_close(inode, state); + if (!status) { + state->state = newstate; + break; + } + up(&owner->so_sema); + status = nfs4_handle_error(NFS_SERVER(inode), status); + down(&owner->so_sema); + } while (!status); + up(&owner->so_sema); + nfs4_put_open_state(state); +} + +/* + * Search the state->lock_states for an existing lock_owner + * that is compatible with current->files + */ +static struct nfs4_lock_state * +__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *pos; + list_for_each_entry(pos, &state->lock_states, ls_locks) { + if (pos->ls_owner != fl_owner) + continue; + atomic_inc(&pos->ls_count); + return pos; + } + return NULL; +} + +struct nfs4_lock_state * +nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + read_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + read_unlock(&state->state_lock); + return lsp; +} + +/* + * Return a compatible lock_state. If no initialized lock_state structure + * exists, return an uninitialized one. + * + * The caller must be holding state->lock_sema + */ +struct nfs4_lock_state * +nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + struct nfs4_client *clp = state->owner->so_client; + + lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); + if (lsp == NULL) + return NULL; + lsp->ls_seqid = 0; /* arbitrary */ + lsp->ls_id = -1; + memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); + atomic_set(&lsp->ls_count, 1); + lsp->ls_owner = fl_owner; + lsp->ls_parent = state; + INIT_LIST_HEAD(&lsp->ls_locks); + spin_lock(&clp->cl_lock); + lsp->ls_id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + return lsp; +} + +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. + */ +void +nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +{ + if (test_bit(LK_STATE_IN_USE, &state->flags)) { + struct nfs4_lock_state *lsp; + + lsp = nfs4_find_lock_state(state, fl_owner); + if (lsp) { + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + nfs4_put_lock_state(lsp); + return; + } + } + memcpy(dst, &state->stateid, sizeof(*dst)); +} + +/* +* Called with state->lock_sema held. +*/ +void +nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +{ + if (status == NFS_OK || seqid_mutating_err(-status)) + lsp->ls_seqid++; +} + +/* +* Check to see if the request lock (type FL_UNLK) effects the fl lock. +* +* fl and request must have the same posix owner +* +* return: +* 0 -> fl not effected by request +* 1 -> fl consumed by request +*/ + +static int +nfs4_check_unlock(struct file_lock *fl, struct file_lock *request) +{ + if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end) + return 1; + return 0; +} + +/* + * Post an initialized lock_state on the state->lock_states list. + */ +void +nfs4_notify_setlk(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state = lsp->ls_parent; + + if (!list_empty(&lsp->ls_locks)) + return; + write_lock(&state->state_lock); + list_add(&lsp->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); +} + +/* + * to decide to 'reap' lock state: + * 1) search i_flock for file_locks with fl.lock_state = to ls. + * 2) determine if unlock will consume found lock. + * if so, reap + * + * else, don't reap. + * + */ +void +nfs4_notify_unlck(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state = lsp->ls_parent; + struct file_lock *fl; + + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; + if (fl->fl_owner != lsp->ls_owner) + continue; + /* Exit if we find at least one lock which is not consumed */ + if (nfs4_check_unlock(fl,request) == 0) + return; + } + + write_lock(&state->state_lock); + list_del_init(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); +} + +/* + * Release reference to lock_state, and free it if we see that + * it is no longer in use + */ +void +nfs4_put_lock_state(struct nfs4_lock_state *lsp) +{ + if (!atomic_dec_and_test(&lsp->ls_count)) + return; + if (!list_empty(&lsp->ls_locks)) + return; + kfree(lsp); +} + /* * Called with sp->so_sema held. * @@ -346,10 +656,172 @@ nfs4_put_open_state(struct nfs4_state *s * see comments nfs_fs.h:seqid_mutating_error() */ void -nfs4_increment_seqid(u32 status, struct nfs4_state_owner *sp) +nfs4_increment_seqid(int status, struct nfs4_state_owner *sp) { - if (status == NFS_OK || seqid_mutating_err(status)) + if (status == NFS_OK || seqid_mutating_err(-status)) sp->so_seqid++; + /* If the server returns BAD_SEQID, unhash state_owner here */ + if (status == -NFS4ERR_BAD_SEQID) + nfs4_unhash_state_owner(sp); +} + +static int reclaimer(void *); +struct reclaimer_args { + struct nfs4_client *clp; + struct completion complete; +}; + +/* + * State recovery routine + */ +void +nfs4_recover_state(void *data) +{ + struct nfs4_client *clp = (struct nfs4_client *)data; + struct reclaimer_args args = { + .clp = clp, + }; + might_sleep(); + + init_completion(&args.complete); + + down_read(&clp->cl_sem); + if (test_and_set_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state)) + goto out_failed; + if (kernel_thread(reclaimer, &args, CLONE_KERNEL) < 0) + goto out_failed_clear; + wait_for_completion(&args.complete); + return; +out_failed_clear: + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state); + smp_mb__after_clear_bit(); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); +out_failed: + up_read(&clp->cl_sem); +} + +/* + * Schedule a state recovery attempt + */ +void +nfs4_schedule_state_recovery(struct nfs4_client *clp) +{ + if (!clp) + return; + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_OK, &clp->cl_state); + smp_mb__after_clear_bit(); + schedule_work(&clp->cl_recoverd); +} + +static int +nfs4_reclaim_open_state(struct nfs4_state_owner *sp) +{ + struct nfs4_state *state; + int status = 0; + + list_for_each_entry(state, &sp->so_states, open_states) { + status = nfs4_open_reclaim(sp, state); + if (status >= 0) + continue; + switch (status) { + default: + printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", + __FUNCTION__, status); + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: + case -NFS4ERR_RECLAIM_BAD: + case -NFS4ERR_RECLAIM_CONFLICT: + /* + * Open state on this file cannot be recovered + * All we can do is revert to using the zero stateid. + */ + memset(state->stateid.data, 0, + sizeof(state->stateid.data)); + /* Mark the file as being 'closed' */ + state->state = 0; + break; + case -NFS4ERR_STALE_CLIENTID: + goto out_err; + } + } + return 0; +out_err: + return status; +} + +static int +reclaimer(void *ptr) +{ + struct reclaimer_args *args = (struct reclaimer_args *)ptr; + struct nfs4_client *clp = args->clp; + struct nfs4_state_owner *sp; + int generation; + int status; + + daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr)); + allow_signal(SIGKILL); + + complete(&args->complete); + + /* Are there any NFS mounts out there? */ + if (list_empty(&clp->cl_superblocks)) + goto out; + if (!test_bit(NFS4CLNT_NEW, &clp->cl_state)) { + status = nfs4_proc_renew(clp); + if (status == 0) { + set_bit(NFS4CLNT_OK, &clp->cl_state); + goto out; + } + } + status = nfs4_proc_setclientid(clp, 0, 0); + if (status) + goto out_error; + status = nfs4_proc_setclientid_confirm(clp); + if (status) + goto out_error; + generation = ++(clp->cl_generation); + clear_bit(NFS4CLNT_NEW, &clp->cl_state); + set_bit(NFS4CLNT_OK, &clp->cl_state); + up_read(&clp->cl_sem); + nfs4_schedule_state_renewal(clp); +restart_loop: + spin_lock(&clp->cl_lock); + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + if (sp->so_generation - generation <= 0) + continue; + atomic_inc(&sp->so_count); + spin_unlock(&clp->cl_lock); + down(&sp->so_sema); + if (sp->so_generation - generation < 0) { + smp_rmb(); + sp->so_generation = clp->cl_generation; + status = nfs4_reclaim_open_state(sp); + } + up(&sp->so_sema); + nfs4_put_state_owner(sp); + if (status < 0) { + if (status == -NFS4ERR_STALE_CLIENTID) + nfs4_schedule_state_recovery(clp); + goto out; + } + goto restart_loop; + } + spin_unlock(&clp->cl_lock); +out: + smp_mb__before_clear_bit(); + clear_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state); + smp_mb__after_clear_bit(); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); + return 0; +out_error: + printk(KERN_WARNING "Error: state recovery failed on NFSv4 server %u.%u.%u.%u\n", + NIPQUAD(clp->cl_addr.s_addr)); + up_read(&clp->cl_sem); + goto out; } /* diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/nfs4xdr.c linux-2.6.1-37-attr/fs/nfs/nfs4xdr.c --- linux-2.6.1-rc3/fs/nfs/nfs4xdr.c 2004-01-08 17:46:31.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/nfs4xdr.c 2004-01-08 18:07:57.000000000 -0500 @@ -57,7 +57,7 @@ /* Mapping from NFS error code to "errno" error code. */ #define errno_NFSERR_IO EIO -extern int nfs_stat_to_errno(int); +static int nfs_stat_to_errno(int); /* NFSv4 COMPOUND tags are only wanted for debugging purposes */ #ifdef DEBUG @@ -66,6 +66,10 @@ extern int nfs_stat_to_errno(int); #define NFS4_MAXTAGLEN 0 #endif +/* lock,open owner id: + * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) + */ +#define owner_id_maxsz 1 + 1 #define compound_encode_hdr_maxsz 3 + (NFS4_MAXTAGLEN >> 2) #define compound_decode_hdr_maxsz 2 + (NFS4_MAXTAGLEN >> 2) #define op_encode_hdr_maxsz 1 @@ -73,6 +77,8 @@ extern int nfs_stat_to_errno(int); #define encode_putfh_maxsz op_encode_hdr_maxsz + 1 + \ (NFS4_FHSIZE >> 2) #define decode_putfh_maxsz op_decode_hdr_maxsz +#define encode_putrootfh_maxsz op_encode_hdr_maxsz +#define decode_putrootfh_maxsz op_decode_hdr_maxsz #define encode_getfh_maxsz op_encode_hdr_maxsz #define decode_getfh_maxsz op_decode_hdr_maxsz + 1 + \ (NFS4_FHSIZE >> 2) @@ -90,6 +96,25 @@ extern int nfs_stat_to_errno(int); #define decode_pre_write_getattr_maxsz op_decode_hdr_maxsz + 5 #define encode_post_write_getattr_maxsz op_encode_hdr_maxsz + 2 #define decode_post_write_getattr_maxsz op_decode_hdr_maxsz + 13 +#define encode_fsinfo_maxsz op_encode_hdr_maxsz + 2 +#define decode_fsinfo_maxsz op_decode_hdr_maxsz + 11 +#define encode_renew_maxsz op_encode_hdr_maxsz + 3 +#define decode_renew_maxsz op_decode_hdr_maxsz +#define encode_setclientid_maxsz \ + op_encode_hdr_maxsz + \ + 4 /*server->ip_addr*/ + \ + 1 /*Netid*/ + \ + 6 /*uaddr*/ + \ + 6 + (NFS4_VERIFIER_SIZE >> 2) +#define decode_setclientid_maxsz \ + op_decode_hdr_maxsz + \ + 2 + \ + 1024 /* large value for CLID_INUSE */ +#define encode_setclientid_confirm_maxsz \ + op_encode_hdr_maxsz + \ + 3 + (NFS4_VERIFIER_SIZE >> 2) +#define decode_setclientid_confirm_maxsz \ + op_decode_hdr_maxsz #define NFS4_enc_compound_sz 1024 /* XXX: large enough? */ #define NFS4_dec_compound_sz 1024 /* XXX: large enough? */ @@ -145,6 +170,24 @@ extern int nfs_stat_to_errno(int); #define NFS4_dec_open_confirm_sz compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + 4 +#define NFS4_enc_open_reclaim_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + \ + 11 + \ + encode_getattr_maxsz +#define NFS4_dec_open_reclaim_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + 4 + 5 + 2 + 3 + \ + decode_getattr_maxsz +#define NFS4_enc_open_downgrade_sz \ + compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 7 +#define NFS4_dec_open_downgrade_sz \ + compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + 4 #define NFS4_enc_close_sz compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 5 @@ -159,6 +202,60 @@ extern int nfs_stat_to_errno(int); #define NFS4_dec_setattr_sz compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + 3 +#define NFS4_enc_fsinfo_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_fsinfo_maxsz +#define NFS4_dec_fsinfo_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_fsinfo_maxsz +#define NFS4_enc_renew_sz compound_encode_hdr_maxsz + \ + encode_renew_maxsz +#define NFS4_dec_renew_sz compound_decode_hdr_maxsz + \ + decode_renew_maxsz +#define NFS4_enc_setclientid_sz compound_encode_hdr_maxsz + \ + encode_setclientid_maxsz +#define NFS4_dec_setclientid_sz compound_decode_hdr_maxsz + \ + decode_setclientid_maxsz +#define NFS4_enc_setclientid_confirm_sz \ + compound_encode_hdr_maxsz + \ + encode_setclientid_confirm_maxsz + \ + encode_putrootfh_maxsz + \ + encode_fsinfo_maxsz +#define NFS4_dec_setclientid_confirm_sz \ + compound_decode_hdr_maxsz + \ + decode_setclientid_confirm_maxsz + \ + decode_putrootfh_maxsz + \ + decode_fsinfo_maxsz +#define NFS4_enc_lock_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 1 + 2 + 2 + \ + 1 + 4 + 1 + 2 + \ + owner_id_maxsz +#define NFS4_dec_lock_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + \ + 2 + 2 + 1 + 2 + \ + owner_id_maxsz +#define NFS4_enc_lockt_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 2 + 2 + 2 + \ + owner_id_maxsz +#define NFS4_dec_lockt_sz NFS4_dec_lock_sz +#define NFS4_enc_locku_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz + \ + op_encode_hdr_maxsz + \ + 1 + 1 + 4 + 2 + 2 +#define NFS4_dec_locku_sz compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + 4 + static struct { @@ -239,8 +336,8 @@ static int encode_attrs(struct xdr_stream *xdr, struct iattr *iap, struct nfs_server *server) { - char owner_name[256]; - char owner_group[256]; + char owner_name[IDMAP_NAMESZ]; + char owner_group[IDMAP_NAMESZ]; int owner_namelen = 0; int owner_grouplen = 0; uint32_t *p; @@ -265,9 +362,8 @@ encode_attrs(struct xdr_stream *xdr, str if (iap->ia_valid & ATTR_MODE) len += 4; if (iap->ia_valid & ATTR_UID) { - status = nfs_idmap_name(server, IDMAP_TYPE_USER, - iap->ia_uid, owner_name, &owner_namelen); - if (status < 0) { + owner_namelen = nfs_map_uid_to_name(server->nfs4_state, iap->ia_uid, owner_name); + if (owner_namelen < 0) { printk(KERN_WARNING "nfs: couldn't resolve uid %d to string\n", iap->ia_uid); /* XXX */ @@ -278,9 +374,8 @@ encode_attrs(struct xdr_stream *xdr, str len += 4 + (XDR_QUADLEN(owner_namelen) << 2); } if (iap->ia_valid & ATTR_GID) { - status = nfs_idmap_name(server, IDMAP_TYPE_GROUP, - iap->ia_gid, owner_group, &owner_grouplen); - if (status < 0) { + owner_grouplen = nfs_map_gid_to_group(server->nfs4_state, iap->ia_gid, owner_group); + if (owner_grouplen < 0) { printk(KERN_WARNING "nfs4: couldn't resolve gid %d to string\n", iap->ia_gid); strcpy(owner_group, "nobody"); @@ -503,6 +598,15 @@ encode_post_write_getattr(struct xdr_str } static int +encode_fsinfo(struct xdr_stream *xdr) +{ + return encode_getattr_one(xdr, FATTR4_WORD0_MAXFILESIZE + | FATTR4_WORD0_MAXREAD + | FATTR4_WORD0_MAXWRITE + | FATTR4_WORD0_LEASE_TIME); +} + +static int encode_getfh(struct xdr_stream *xdr) { uint32_t *p; @@ -526,6 +630,80 @@ encode_link(struct xdr_stream *xdr, stru return 0; } +/* + * opcode,type,reclaim,offset,length,new_lock_owner = 32 + * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40 + */ +static int +encode_lock(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_lock_opargs *opargs = arg->u.lock; + + RESERVE_SPACE(32); + WRITE32(OP_LOCK); + WRITE32(arg->type); + WRITE32(opargs->reclaim); + WRITE64(arg->offset); + WRITE64(arg->length); + WRITE32(opargs->new_lock_owner); + if (opargs->new_lock_owner){ + struct nfs_open_to_lock *ol = opargs->u.open_lock; + + RESERVE_SPACE(40); + WRITE32(ol->open_seqid); + WRITEMEM(&ol->open_stateid, sizeof(ol->open_stateid)); + WRITE32(ol->lock_seqid); + WRITE64(ol->lock_owner.clientid); + WRITE32(4); + WRITE32(ol->lock_owner.id); + } + else { + struct nfs_exist_lock *el = opargs->u.exist_lock; + + RESERVE_SPACE(20); + WRITEMEM(&el->stateid, sizeof(el->stateid)); + WRITE32(el->seqid); + } + + return 0; +} + +static int +encode_lockt(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_lowner *opargs = arg->u.lockt; + + RESERVE_SPACE(40); + WRITE32(OP_LOCKT); + WRITE32(arg->type); + WRITE64(arg->offset); + WRITE64(arg->length); + WRITE64(opargs->clientid); + WRITE32(4); + WRITE32(opargs->id); + + return 0; +} + +static int +encode_locku(struct xdr_stream *xdr, struct nfs_lockargs *arg) +{ + uint32_t *p; + struct nfs_locku_opargs *opargs = arg->u.locku; + + RESERVE_SPACE(44); + WRITE32(OP_LOCKU); + WRITE32(arg->type); + WRITE32(opargs->seqid); + WRITEMEM(&opargs->stateid, sizeof(opargs->stateid)); + WRITE64(arg->offset); + WRITE64(arg->length); + + return 0; +} + static int encode_lookup(struct xdr_stream *xdr, struct nfs4_lookup *lookup) { @@ -615,6 +793,57 @@ encode_open_confirm(struct xdr_stream *x static int +encode_open_reclaim(struct xdr_stream *xdr, struct nfs_open_reclaimargs *arg) +{ + uint32_t *p; + + /* + * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, + * owner 4, opentype 4, claim 4, delegation_type 4 = 44 + */ + RESERVE_SPACE(44); + WRITE32(OP_OPEN); + WRITE32(arg->seqid); + switch (arg->share_access) { + case FMODE_READ: + WRITE32(NFS4_SHARE_ACCESS_READ); + break; + case FMODE_WRITE: + WRITE32(NFS4_SHARE_ACCESS_WRITE); + break; + case FMODE_READ|FMODE_WRITE: + WRITE32(NFS4_SHARE_ACCESS_BOTH); + break; + default: + BUG(); + } + WRITE32(0); /* for linux, share_deny = 0 always */ + WRITE64(arg->clientid); + WRITE32(4); + WRITE32(arg->id); + WRITE32(NFS4_OPEN_NOCREATE); + WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); + WRITE32(NFS4_OPEN_DELEGATE_NONE); + return 0; +} + +static int +encode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(16+sizeof(arg->stateid.data)); + WRITE32(OP_OPEN_DOWNGRADE); + WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITE32(arg->seqid); + WRITE32(arg->share_access); + /* No deny modes */ + WRITE32(0); + + return 0; +} + +static int encode_putfh(struct xdr_stream *xdr, struct nfs_fh *fh) { int len = fh->size; @@ -891,21 +1120,12 @@ encode_compound(struct xdr_stream *xdr, case OP_RENAME: status = encode_rename(xdr, &cp->ops[i].u.rename); break; - case OP_RENEW: - status = encode_renew(xdr, cp->ops[i].u.renew); - break; case OP_RESTOREFH: status = encode_restorefh(xdr); break; case OP_SAVEFH: status = encode_savefh(xdr); break; - case OP_SETCLIENTID: - status = encode_setclientid(xdr, &cp->ops[i].u.setclientid); - break; - case OP_SETCLIENTID_CONFIRM: - status = encode_setclientid_confirm(xdr, cp->ops[i].u.setclientid_confirm); - break; default: BUG(); } @@ -1015,6 +1235,119 @@ out: return status; } +/* + * Encode an OPEN request + */ +static int +nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p, + struct nfs_open_reclaimargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_open_reclaim(&xdr, args); + if (status) + goto out; + status = encode_getattr(&xdr, args->f_getattr); +out: + return status; +} + +/* + * Encode an OPEN_DOWNGRADE request + */ +static int +nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_open_downgrade(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCK request + */ +static int +nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lock(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKT request + */ +static int +nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_lockt(&xdr, args); +out: + return status; +} + +/* + * Encode a LOCKU request + */ +static int +nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if(status) + goto out; + status = encode_locku(&xdr, args); +out: + return status; +} /* * Encode a READ request @@ -1134,6 +1467,82 @@ out: } /* + * FSINFO request + */ +static int +nfs4_xdr_enc_fsinfo(struct rpc_rqst *req, uint32_t *p, void *fhandle) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, fhandle); + if (!status) + status = encode_fsinfo(&xdr); + return status; +} + +/* + * a RENEW request + */ +static int +nfs4_xdr_enc_renew(struct rpc_rqst *req, uint32_t *p, struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + return encode_renew(&xdr, clp); +} + +/* + * a SETCLIENTID request + */ +static int +nfs4_xdr_enc_setclientid(struct rpc_rqst *req, uint32_t *p, + struct nfs4_setclientid *sc) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 1, + }; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + return encode_setclientid(&xdr, sc); +} + +/* + * a SETCLIENTID_CONFIRM request + */ +static int +nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, + struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_setclientid_confirm(&xdr, clp); + if (!status) + status = encode_putrootfh(&xdr); + if (!status) + status = encode_fsinfo(&xdr); + return status; +} + +/* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" * set of XDR encode/decode routines which are intended to be shared by @@ -1295,7 +1704,6 @@ decode_create(struct xdr_stream *xdr, st } extern uint32_t nfs4_fattr_bitmap[2]; -extern uint32_t nfs4_fsinfo_bitmap[2]; extern uint32_t nfs4_fsstat_bitmap[2]; extern uint32_t nfs4_pathconf_bitmap[2]; @@ -1305,7 +1713,6 @@ decode_getattr(struct xdr_stream *xdr, s { struct nfs_fattr *nfp = getattr->gt_attrs; struct nfs_fsstat *fsstat = getattr->gt_fsstat; - struct nfs_fsinfo *fsinfo = getattr->gt_fsinfo; struct nfs_pathconf *pathconf = getattr->gt_pathconf; uint32_t attrlen, dummy32, bmlen, bmval0 = 0, @@ -1351,11 +1758,6 @@ decode_getattr(struct xdr_stream *xdr, s nfp->nlink = 1; nfp->timestamp = jiffies; } - if (fsinfo) { - fsinfo->rtmult = fsinfo->wtmult = 512; /* ??? */ - fsinfo->lease_time = 60; - } - if (bmval0 & FATTR4_WORD0_TYPE) { READ_BUF(4); len += 4; @@ -1389,12 +1791,6 @@ decode_getattr(struct xdr_stream *xdr, s (long long)nfp->fsid_u.nfs4.major, (long long)nfp->fsid_u.nfs4.minor); } - if (bmval0 & FATTR4_WORD0_LEASE_TIME) { - READ_BUF(4); - len += 4; - READ32(fsinfo->lease_time); - dprintk("read_attrs: lease_time=%d\n", fsinfo->lease_time); - } if (bmval0 & FATTR4_WORD0_FILEID) { READ_BUF(8); len += 8; @@ -1419,12 +1815,6 @@ decode_getattr(struct xdr_stream *xdr, s READ64(fsstat->tfiles); dprintk("read_attrs: files_tot=0x%Lx\n", (long long) fsstat->tfiles); } - if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { - READ_BUF(8); - len += 8; - READ64(fsinfo->maxfilesize); - dprintk("read_attrs: maxfilesize=0x%Lx\n", (long long) fsinfo->maxfilesize); - } if (bmval0 & FATTR4_WORD0_MAXLINK) { READ_BUF(4); len += 4; @@ -1437,20 +1827,6 @@ decode_getattr(struct xdr_stream *xdr, s READ32(pathconf->max_namelen); dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen); } - if (bmval0 & FATTR4_WORD0_MAXREAD) { - READ_BUF(8); - len += 8; - READ64(fsinfo->rtmax); - fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax; - dprintk("read_attrs: maxread=%d\n", fsinfo->rtmax); - } - if (bmval0 & FATTR4_WORD0_MAXWRITE) { - READ_BUF(8); - len += 8; - READ64(fsinfo->wtmax); - fsinfo->wtpref = fsinfo->wtmax; - dprintk("read_attrs: maxwrite=%d\n", fsinfo->wtmax); - } if (bmval1 & FATTR4_WORD1_MODE) { READ_BUF(4); @@ -1475,10 +1851,9 @@ decode_getattr(struct xdr_stream *xdr, s } READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); - if ((status = nfs_idmap_id(server, IDMAP_TYPE_USER, - (char *)p, len, &nfp->uid)) == -1) { - dprintk("read_attrs: gss_get_num failed!\n"); - /* goto out; */ + if ((status = nfs_map_name_to_uid(server->nfs4_state, (char *)p, dummy32, + &nfp->uid)) < 0) { + dprintk("read_attrs: name-to-uid mapping failed!\n"); nfp->uid = -2; } dprintk("read_attrs: uid=%d\n", (int)nfp->uid); @@ -1493,11 +1868,10 @@ decode_getattr(struct xdr_stream *xdr, s } READ_BUF(dummy32); len += (XDR_QUADLEN(dummy32) << 2); - if ((status = nfs_idmap_id(server, IDMAP_TYPE_GROUP, - (char *)p, len, &nfp->gid)) == -1) { - dprintk("read_attrs: gss_get_num failed!\n"); + if ((status = nfs_map_group_to_gid(server->nfs4_state, (char *)p, dummy32, + &nfp->gid)) < 0) { + dprintk("read_attrs: group-to-gid mapping failed!\n"); nfp->gid = -2; - /* goto out; */ } dprintk("read_attrs: gid=%d\n", (int)nfp->gid); } @@ -1695,6 +2069,74 @@ out_bad_bitmap: static int +decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) +{ + uint32_t *p; + uint32_t len, attrlen, bmlen, bmval0 = 0, bmval1 = 0; + int status; + + status = decode_op_hdr(xdr, OP_GETATTR); + if (status) + return status; + READ_BUF(4); + READ32(bmlen); + if (bmlen < 1) + return -EIO; + READ_BUF(bmlen << 2); + READ32(bmval0); + if (bmval0 & ~(FATTR4_WORD0_MAXFILESIZE|FATTR4_WORD0_MAXREAD| + FATTR4_WORD0_MAXWRITE|FATTR4_WORD0_LEASE_TIME)) + goto out_bad_bitmap; + if (bmlen > 1) { + READ32(bmval1); + if (bmval1 != 0 || bmlen > 2) + goto out_bad_bitmap; + } + READ_BUF(4); + READ32(attrlen); + READ_BUF(attrlen); + fsinfo->rtmult = fsinfo->wtmult = 512; /* ??? */ + fsinfo->lease_time = 60; + len = attrlen; + + if (bmval0 & FATTR4_WORD0_LEASE_TIME) { + len -= 4; + READ32(fsinfo->lease_time); + dprintk("read_attrs: lease_time=%d\n", fsinfo->lease_time); + } + if (bmval0 & FATTR4_WORD0_MAXFILESIZE) { + len -= 8; + READ64(fsinfo->maxfilesize); + dprintk("read_attrs: maxfilesize=0x%Lx\n", (long long) fsinfo->maxfilesize); + } + if (bmval0 & FATTR4_WORD0_MAXREAD) { + len -= 8; + READ64(fsinfo->rtmax); + fsinfo->rtpref = fsinfo->dtpref = fsinfo->rtmax; + dprintk("read_attrs: maxread=%d\n", fsinfo->rtmax); + } + if (bmval0 & FATTR4_WORD0_MAXWRITE) { + len -= 8; + READ64(fsinfo->wtmax); + fsinfo->wtpref = fsinfo->wtmax; + dprintk("read_attrs: maxwrite=%d\n", fsinfo->wtmax); + } + if (len != 0) + goto out_bad_attrlen; + return 0; +out_bad_attrlen: + printk(KERN_NOTICE "%s: server attribute length %u does not match bitmap 0x%x/0x%x\n", + __FUNCTION__, (unsigned int)attrlen, + (unsigned int) bmval0, (unsigned int)bmval1); + return -EIO; +out_bad_bitmap: + printk(KERN_NOTICE "%s: server returned bad attribute bitmap 0x%x/0x%x\n", + __FUNCTION__, + (unsigned int)bmval0, (unsigned int)bmval1); + return -EIO; +} + +static int decode_getfh(struct xdr_stream *xdr, struct nfs4_getfh *getfh) { struct nfs_fh *fh = getfh->gf_fhandle; @@ -1729,6 +2171,66 @@ decode_link(struct xdr_stream *xdr, stru return decode_change_info(xdr, link->ln_cinfo); } +/* + * We create the owner, so we know a proper owner.id length is 4. + */ +static int +decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied) +{ + uint32_t *p; + uint32_t namelen; + + READ_BUF(32); + READ64(denied->offset); + READ64(denied->length); + READ32(denied->type); + READ64(denied->owner.clientid); + READ32(namelen); + READ_BUF(namelen); + if (namelen == 4) + READ32(denied->owner.id); + return -NFS4ERR_DENIED; +} + +static int +decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_LOCK); + if (status == 0) { + READ_BUF(sizeof(nfs4_stateid)); + COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + } else if (status == -NFS4ERR_DENIED) + return decode_lock_denied(xdr, &res->u.denied); + return status; +} + +static int +decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + int status; + status = decode_op_hdr(xdr, OP_LOCKT); + if (status == -NFS4ERR_DENIED) + return decode_lock_denied(xdr, &res->u.denied); + return status; +} + +static int +decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_LOCKU); + if (status == 0) { + READ_BUF(sizeof(nfs4_stateid)); + COPYMEM(&res->u.stateid, sizeof(res->u.stateid)); + } + return status; +} + static int decode_lookup(struct xdr_stream *xdr) { @@ -1769,15 +2271,29 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res) { uint32_t *p; + int status; - res->status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); - if (res->status) - return res->status; + status = decode_op_hdr(xdr, OP_OPEN_CONFIRM); + if (status) + return status; READ_BUF(sizeof(res->stateid.data)); COPYMEM(res->stateid.data, sizeof(res->stateid.data)); return 0; } +static int +decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *res) +{ + uint32_t *p; + int status; + + status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE); + if (status) + return status; + READ_BUF(sizeof(res->stateid.data)); + COPYMEM(res->stateid.data, sizeof(res->stateid.data)); + return 0; +} static int decode_putfh(struct xdr_stream *xdr) @@ -2011,7 +2527,7 @@ decode_setattr(struct xdr_stream *xdr, s } static int -decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid *setclientid) +decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) { uint32_t *p; uint32_t opnum; @@ -2027,9 +2543,9 @@ decode_setclientid(struct xdr_stream *xd } READ32(nfserr); if (nfserr == NFS_OK) { - READ_BUF(8 + sizeof(setclientid->sc_state->cl_confirm.data)); - READ64(setclientid->sc_state->cl_clientid); - COPYMEM(setclientid->sc_state->cl_confirm.data, sizeof(setclientid->sc_state->cl_confirm.data)); + READ_BUF(8 + sizeof(clp->cl_confirm.data)); + READ64(clp->cl_clientid); + COPYMEM(clp->cl_confirm.data, sizeof(clp->cl_confirm.data)); } else if (nfserr == NFSERR_CLID_INUSE) { uint32_t len; @@ -2141,18 +2657,9 @@ decode_compound(struct xdr_stream *xdr, case OP_RENAME: status = decode_rename(xdr, &op->u.rename); break; - case OP_RENEW: - status = decode_renew(xdr); - break; case OP_SAVEFH: status = decode_savefh(xdr); break; - case OP_SETCLIENTID: - status = decode_setclientid(xdr, &op->u.setclientid); - break; - case OP_SETCLIENTID_CONFIRM: - status = decode_setclientid_confirm(xdr); - break; default: BUG(); return -EIO; @@ -2163,6 +2670,29 @@ decode_compound(struct xdr_stream *xdr, DECODE_TAIL; } + +/* + * Decode OPEN_DOWNGRADE response + */ +static int +nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open_downgrade(&xdr, res); +out: + return status; +} + /* * END OF "GENERIC" DECODE ROUTINES. */ @@ -2275,6 +2805,31 @@ out: } /* + * Decode OPEN_RECLAIM response + */ +static int +nfs4_xdr_dec_open_reclaim(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_open(&xdr, res); + if (status) + goto out; + status = decode_getattr(&xdr, res->f_getattr, res->server); +out: + return status; +} + +/* * Decode SETATTR response */ static int @@ -2299,6 +2854,71 @@ out: return status; } +/* + * Decode LOCK response + */ +static int +nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lock(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKT response + */ +static int +nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_lockt(&xdr, res); +out: + return status; +} + +/* + * Decode LOCKU response + */ +static int +nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_locku(&xdr, res); +out: + return status; +} /* * Decode Read response @@ -2391,6 +3011,87 @@ out: return status; } +/* + * FSINFO request + */ +static int +nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_putfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, fsinfo); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + +/* + * Decode RENEW response + */ +static int +nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_renew(&xdr); + return status; +} + +/* + * a SETCLIENTID request + */ +static int +nfs4_xdr_dec_setclientid(struct rpc_rqst *req, uint32_t *p, + struct nfs4_client *clp) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_setclientid(&xdr, clp); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + +/* + * a SETCLIENTID_CONFIRM request + */ +static int +nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_fsinfo *fsinfo) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (!status) + status = decode_setclientid_confirm(&xdr); + if (!status) + status = decode_putrootfh(&xdr); + if (!status) + status = decode_fsinfo(&xdr, fsinfo); + if (!status) + status = -nfs_stat_to_errno(hdr.status); + return status; +} + uint32_t * nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) { @@ -2426,6 +3127,67 @@ nfs4_decode_dirent(uint32_t *p, struct n return p; } +/* + * We need to translate between nfs status return values and + * the local errno values which may not be the same. + */ +static struct { + int stat; + int errno; +} nfs_errtbl[] = { + { NFS4_OK, 0 }, + { NFS4ERR_PERM, EPERM }, + { NFS4ERR_NOENT, ENOENT }, + { NFS4ERR_IO, errno_NFSERR_IO }, + { NFS4ERR_NXIO, ENXIO }, + { NFS4ERR_ACCESS, EACCES }, + { NFS4ERR_EXIST, EEXIST }, + { NFS4ERR_XDEV, EXDEV }, + { NFS4ERR_NOTDIR, ENOTDIR }, + { NFS4ERR_ISDIR, EISDIR }, + { NFS4ERR_INVAL, EINVAL }, + { NFS4ERR_FBIG, EFBIG }, + { NFS4ERR_NOSPC, ENOSPC }, + { NFS4ERR_ROFS, EROFS }, + { NFS4ERR_MLINK, EMLINK }, + { NFS4ERR_NAMETOOLONG, ENAMETOOLONG }, + { NFS4ERR_NOTEMPTY, ENOTEMPTY }, + { NFS4ERR_DQUOT, EDQUOT }, + { NFS4ERR_STALE, ESTALE }, + { NFS4ERR_BADHANDLE, EBADHANDLE }, + { NFS4ERR_BAD_COOKIE, EBADCOOKIE }, + { NFS4ERR_NOTSUPP, ENOTSUPP }, + { NFS4ERR_TOOSMALL, ETOOSMALL }, + { NFS4ERR_SERVERFAULT, ESERVERFAULT }, + { NFS4ERR_BADTYPE, EBADTYPE }, + { NFS4ERR_LOCKED, EAGAIN }, + { NFS4ERR_RESOURCE, EREMOTEIO }, + { NFS4ERR_SYMLINK, ELOOP }, + { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP }, + { NFS4ERR_DEADLOCK, EDEADLK }, + { -1, EIO } +}; + +/* + * Convert an NFS error code to a local one. + * This one is used jointly by NFSv2 and NFSv3. + */ +static int +nfs_stat_to_errno(int stat) +{ + int i; + for (i = 0; nfs_errtbl[i].stat != -1; i++) { + if (nfs_errtbl[i].stat == stat) + return nfs_errtbl[i].errno; + } + /* If we cannot translate the error, the recovery routines should + * handle it. + * Note: remaining NFSv4 error codes have values > 10000, so should + * not conflict with native Linux error codes. + */ + return stat; +} + #ifndef MAX # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif @@ -2445,8 +3207,17 @@ struct rpc_procinfo nfs4_procedures[] = PROC(COMMIT, enc_commit, dec_commit), PROC(OPEN, enc_open, dec_open), PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), + PROC(OPEN_RECLAIM, enc_open_reclaim, dec_open_reclaim), + PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade), PROC(CLOSE, enc_close, dec_close), PROC(SETATTR, enc_setattr, dec_setattr), + PROC(FSINFO, enc_fsinfo, dec_fsinfo), + PROC(RENEW, enc_renew, dec_renew), + PROC(SETCLIENTID, enc_setclientid, dec_setclientid), + PROC(SETCLIENTID_CONFIRM, enc_setclientid_confirm, dec_setclientid_confirm), + PROC(LOCK, enc_lock, dec_lock), + PROC(LOCKT, enc_lockt, dec_lockt), + PROC(LOCKU, enc_locku, dec_locku), }; struct rpc_version nfs_version4 = { diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/proc.c linux-2.6.1-37-attr/fs/nfs/proc.c --- linux-2.6.1-rc3/fs/nfs/proc.c 2004-01-08 17:45:11.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/proc.c 2004-01-08 18:08:14.000000000 -0500 @@ -42,24 +42,13 @@ #include #include #include +#include #include #define NFSDBG_FACILITY NFSDBG_PROC extern struct rpc_procinfo nfs_procedures[]; -static void -nfs_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) -{ - if (!(fattr->valid & NFS_ATTR_WCC)) { - fattr->pre_size = NFS_CACHE_ISIZE(inode); - fattr->pre_mtime = NFS_CACHE_MTIME(inode); - fattr->pre_ctime = NFS_CACHE_CTIME(inode); - fattr->valid |= NFS_ATTR_WCC; - } - nfs_refresh_inode(inode, fattr); -} - static struct rpc_cred * nfs_cred(struct inode *inode, struct file *filp) { @@ -204,7 +193,7 @@ nfs_proc_write(struct nfs_write_data *wd msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) { - nfs_write_refresh_inode(inode, fattr); + nfs_refresh_inode(inode, fattr); wdata->res.count = wdata->args.count; wdata->verf.committed = NFS_FILE_SYNC; } @@ -330,10 +319,8 @@ nfs_proc_unlink_done(struct dentry *dir, { struct rpc_message *msg = &task->tk_msg; - if (msg->rpc_argp) { - NFS_CACHEINV(dir->d_inode); + if (msg->rpc_argp) kfree(msg->rpc_argp); - } return 0; } @@ -583,7 +570,7 @@ nfs_write_done(struct rpc_task *task) struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; if (task->tk_status >= 0) - nfs_write_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); nfs_writeback_done(task); } @@ -653,9 +640,17 @@ nfs_request_compatible(struct nfs_page * return 1; } +static int +nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); +} + struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, @@ -687,4 +682,5 @@ struct nfs_rpc_ops nfs_v2_clientops = { .file_release = nfs_release, .request_init = nfs_request_init, .request_compatible = nfs_request_compatible, + .lock = nfs_proc_lock, }; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/read.c linux-2.6.1-37-attr/fs/nfs/read.c --- linux-2.6.1-rc3/fs/nfs/read.c 2004-01-08 17:32:41.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/read.c 2004-01-08 18:08:14.000000000 -0500 @@ -124,6 +124,7 @@ nfs_readpage_sync(struct file *file, str if (result < rdata.args.count) /* NFSv2ism */ break; } while (count); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; if (count) memclear_highpage_flush(page, rdata.args.pgbase, count); @@ -266,6 +267,7 @@ nfs_readpage_result(struct rpc_task *tas dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", task->tk_pid, task->tk_status); + NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME; while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); struct page *page = req->wb_page; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/unlink.c linux-2.6.1-37-attr/fs/nfs/unlink.c --- linux-2.6.1-rc3/fs/nfs/unlink.c 2004-01-08 17:45:22.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/unlink.c 2004-01-08 18:08:14.000000000 -0500 @@ -104,6 +104,7 @@ nfs_async_unlink_init(struct rpc_task *t status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name); if (status < 0) goto out_err; + nfs_begin_data_update(dir->d_inode); rpc_call_setup(task, &msg, 0); return; out_err: @@ -126,7 +127,7 @@ nfs_async_unlink_done(struct rpc_task *t if (!dir) return; dir_i = dir->d_inode; - nfs_zap_caches(dir_i); + nfs_end_data_update(dir_i); if (NFS_PROTO(dir_i)->unlink_done(dir, task)) return; put_rpccred(data->cred); diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/fs/nfs/write.c linux-2.6.1-37-attr/fs/nfs/write.c --- linux-2.6.1-rc3/fs/nfs/write.c 2004-01-08 17:42:43.000000000 -0500 +++ linux-2.6.1-37-attr/fs/nfs/write.c 2004-01-08 18:08:14.000000000 -0500 @@ -157,6 +157,7 @@ nfs_writepage_sync(struct file *file, st (long long)NFS_FILEID(inode), count, (long long)(page_offset(page) + offset)); + nfs_begin_data_update(inode); do { if (count < wsize && !swapfile) wdata.args.count = count; @@ -185,6 +186,7 @@ nfs_writepage_sync(struct file *file, st if (wdata.args.offset > i_size_read(inode)) i_size_write(inode, wdata.args.offset); } while (count); + nfs_end_data_update(inode); if (PageError(page)) ClearPageError(page); @@ -204,6 +206,7 @@ nfs_writepage_async(struct file *file, s loff_t end; int status; + nfs_begin_data_update(inode); req = nfs_update_request(file, inode, page, offset, count); status = (IS_ERR(req)) ? PTR_ERR(req) : 0; if (status < 0) @@ -213,6 +216,7 @@ nfs_writepage_async(struct file *file, s end = ((loff_t)page->index<> PAGE_CACHE_SHIFT; /* Ensure we've flushed out any previous writes */ @@ -247,7 +262,8 @@ nfs_writepage(struct page *page, struct goto out; do_it: lock_kernel(); - if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode)) { + if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode) && + inode_referenced) { err = nfs_writepage_async(NULL, inode, page, 0, offset); if (err >= 0) err = 0; @@ -259,6 +275,8 @@ do_it: unlock_kernel(); out: unlock_page(page); + if (inode_referenced) + iput(inode); return err; } @@ -298,8 +316,10 @@ nfs_inode_add_request(struct inode *inod BUG_ON(error == -EEXIST); if (error) return error; - if (!nfsi->npages) + if (!nfsi->npages) { igrab(inode); + nfs_begin_data_update(inode); + } nfsi->npages++; req->wb_count++; return 0; @@ -322,6 +342,7 @@ nfs_inode_remove_request(struct nfs_page nfsi->npages--; if (!nfsi->npages) { spin_unlock(&nfs_wreq_lock); + nfs_end_data_update(inode); iput(inode); } else spin_unlock(&nfs_wreq_lock); @@ -682,6 +703,7 @@ nfs_updatepage(struct file *file, struct return status; } + nfs_begin_data_update(inode); /* * Try to find an NFS request corresponding to this page * and update it. @@ -715,6 +737,7 @@ nfs_updatepage(struct file *file, struct } else nfs_unlock_request(req); done: + nfs_end_data_update(inode); dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", status, (long long)i_size_read(inode)); if (status < 0) @@ -877,10 +900,7 @@ nfs_writeback_done(struct rpc_task *task #endif /* - * Update attributes as result of writeback. - * FIXME: There is an inherent race with invalidate_inode_pages and - * writebacks since the page->count is kept > 1 for as long - * as the page has a write request pending. + * Process the nfs_page list */ while (!list_empty(&data->pages)) { req = nfs_list_entry(data->pages.next); @@ -1074,9 +1094,12 @@ int nfs_commit_file(struct inode *inode, spin_lock(&nfs_wreq_lock); res = nfs_scan_commit(inode, &head, file, idx_start, npages); - spin_unlock(&nfs_wreq_lock); - if (res) + if (res) { + res += nfs_scan_commit(inode, &head, NULL, 0, 0); + spin_unlock(&nfs_wreq_lock); error = nfs_commit_list(&head, how); + } else + spin_unlock(&nfs_wreq_lock); if (error < 0) return error; return res; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/fs.h linux-2.6.1-37-attr/include/linux/fs.h --- linux-2.6.1-rc3/include/linux/fs.h 2004-01-08 17:36:25.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/fs.h 2004-01-08 18:08:14.000000000 -0500 @@ -137,6 +137,7 @@ extern int leases_enable, dir_notify_ena #define S_DEAD 32 /* removed, but still open directory */ #define S_NOQUOTA 64 /* Inode is not counted to quota */ #define S_DIRSYNC 128 /* Directory modifications are synchronous */ +#define S_NOCMTIME 256 /* Do not update file c/mtime */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -170,6 +171,7 @@ extern int leases_enable, dir_notify_ena #define IS_ONE_SECOND(inode) __IS_FLG(inode, MS_ONE_SECOND) #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) +#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/nfs4.h linux-2.6.1-37-attr/include/linux/nfs4.h --- linux-2.6.1-rc3/include/linux/nfs4.h 2004-01-08 17:39:07.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/nfs4.h 2004-01-08 18:07:57.000000000 -0500 @@ -88,6 +88,76 @@ enum nfs_opnum4 { OP_WRITE = 38, }; +enum nfsstat4 { + NFS4_OK = 0, + NFS4ERR_PERM = 1, + NFS4ERR_NOENT = 2, + NFS4ERR_IO = 5, + NFS4ERR_NXIO = 6, + NFS4ERR_ACCESS = 13, + NFS4ERR_EXIST = 17, + NFS4ERR_XDEV = 18, + /* Unused/reserved 19 */ + NFS4ERR_NOTDIR = 20, + NFS4ERR_ISDIR = 21, + NFS4ERR_INVAL = 22, + NFS4ERR_FBIG = 27, + NFS4ERR_NOSPC = 28, + NFS4ERR_ROFS = 30, + NFS4ERR_MLINK = 31, + NFS4ERR_NAMETOOLONG = 63, + NFS4ERR_NOTEMPTY = 66, + NFS4ERR_DQUOT = 69, + NFS4ERR_STALE = 70, + NFS4ERR_BADHANDLE = 10001, + NFS4ERR_BAD_COOKIE = 10003, + NFS4ERR_NOTSUPP = 10004, + NFS4ERR_TOOSMALL = 10005, + NFS4ERR_SERVERFAULT = 10006, + NFS4ERR_BADTYPE = 10007, + NFS4ERR_DELAY = 10008, + NFS4ERR_SAME = 10009, + NFS4ERR_DENIED = 10010, + NFS4ERR_EXPIRED = 10011, + NFS4ERR_LOCKED = 10012, + NFS4ERR_GRACE = 10013, + NFS4ERR_FHEXPIRED = 10014, + NFS4ERR_SHARE_DENIED = 10015, + NFS4ERR_WRONGSEC = 10016, + NFS4ERR_CLID_INUSE = 10017, + NFS4ERR_RESOURCE = 10018, + NFS4ERR_MOVED = 10019, + NFS4ERR_NOFILEHANDLE = 10020, + NFS4ERR_MINOR_VERS_MISMATCH = 10021, + NFS4ERR_STALE_CLIENTID = 10022, + NFS4ERR_STALE_STATEID = 10023, + NFS4ERR_OLD_STATEID = 10024, + NFS4ERR_BAD_STATEID = 10025, + NFS4ERR_BAD_SEQID = 10026, + NFS4ERR_NOT_SAME = 10027, + NFS4ERR_LOCK_RANGE = 10028, + NFS4ERR_SYMLINK = 10029, + NFS4ERR_RESTOREFH = 10030, + NFS4ERR_LEASE_MOVED = 10031, + NFS4ERR_ATTRNOTSUPP = 10032, + NFS4ERR_NO_GRACE = 10033, + NFS4ERR_RECLAIM_BAD = 10034, + NFS4ERR_RECLAIM_CONFLICT = 10035, + NFS4ERR_BADXDR = 10036, + NFS4ERR_LOCKS_HELD = 10037, + NFS4ERR_OPENMODE = 10038, + NFS4ERR_BADOWNER = 10039, + NFS4ERR_BADCHAR = 10040, + NFS4ERR_BADNAME = 10041, + NFS4ERR_BAD_RANGE = 10042, + NFS4ERR_LOCK_NOTSUPP = 10043, + NFS4ERR_OP_ILLEGAL = 10044, + NFS4ERR_DEADLOCK = 10045, + NFS4ERR_FILE_OPEN = 10046, + NFS4ERR_ADMIN_REVOKED = 10047, + NFS4ERR_CB_PATH_DOWN = 10048 +}; + /* * Note: NF4BAD is not actually part of the protocol; it is just used * internally by nfsd. @@ -219,8 +289,17 @@ enum { NFSPROC4_CLNT_COMMIT, NFSPROC4_CLNT_OPEN, NFSPROC4_CLNT_OPEN_CONFIRM, + NFSPROC4_CLNT_OPEN_RECLAIM, + NFSPROC4_CLNT_OPEN_DOWNGRADE, NFSPROC4_CLNT_CLOSE, NFSPROC4_CLNT_SETATTR, + NFSPROC4_CLNT_FSINFO, + NFSPROC4_CLNT_RENEW, + NFSPROC4_CLNT_SETCLIENTID, + NFSPROC4_CLNT_SETCLIENTID_CONFIRM, + NFSPROC4_CLNT_LOCK, + NFSPROC4_CLNT_LOCKT, + NFSPROC4_CLNT_LOCKU, }; #endif diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/nfs_fs.h linux-2.6.1-37-attr/include/linux/nfs_fs.h --- linux-2.6.1-rc3/include/linux/nfs_fs.h 2004-01-08 17:37:38.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/nfs_fs.h 2004-01-08 18:08:14.000000000 -0500 @@ -28,6 +28,7 @@ #include #include #include +#include /* * Enable debugging support for nfs client. @@ -98,7 +99,7 @@ struct nfs_inode { /* * Various flags */ - unsigned short flags; + unsigned int flags; /* * read_cache_jiffies is when we started read-caching this inode, @@ -117,19 +118,22 @@ struct nfs_inode { * * mtime != read_cache_mtime */ + unsigned long readdir_timestamp; unsigned long read_cache_jiffies; - struct timespec read_cache_ctime; - struct timespec read_cache_mtime; - __u64 read_cache_isize; unsigned long attrtimeo; unsigned long attrtimeo_timestamp; __u64 change_attr; /* v4 only */ + /* "Generation counter" for the attribute cache. This is + * bumped whenever we update the metadata on the + * server. + */ + unsigned long cache_change_attribute; /* - * Timestamp that dates the change made to read_cache_mtime. - * This is of use for dentry revalidation + * Counter indicating the number of outstanding requests that + * will cause a file data update. */ - unsigned long cache_mtime_jiffies; + atomic_t data_updates; struct nfs_access_cache cache_access; @@ -169,7 +173,9 @@ struct nfs_inode { #define NFS_INO_STALE 0x0001 /* possible stale inode */ #define NFS_INO_ADVISE_RDPLUS 0x0002 /* advise readdirplus */ #define NFS_INO_REVALIDATING 0x0004 /* revalidating attrs */ -#define NFS_INO_FLUSH 0x0008 /* inode is due for flushing */ +#define NFS_INO_INVALID_ATTR 0x0008 /* cached attrs are invalid */ +#define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */ +#define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */ #define NFS_INO_FAKE_ROOT 0x0080 /* root inode placeholder */ static inline struct nfs_inode *NFS_I(struct inode *inode) @@ -185,15 +191,7 @@ static inline struct nfs_inode *NFS_I(st #define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode))) #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) -#define NFS_MTIME_UPDATE(inode) (NFS_I(inode)->cache_mtime_jiffies) -#define NFS_CACHE_CTIME(inode) (NFS_I(inode)->read_cache_ctime) -#define NFS_CACHE_MTIME(inode) (NFS_I(inode)->read_cache_mtime) -#define NFS_CACHE_ISIZE(inode) (NFS_I(inode)->read_cache_isize) #define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr) -#define NFS_CACHEINV(inode) \ -do { \ - NFS_READTIME(inode) = jiffies - NFS_MAXATTRTIMEO(inode) - 1; \ -} while (0) #define NFS_ATTRTIMEO(inode) (NFS_I(inode)->attrtimeo) #define NFS_MINATTRTIMEO(inode) \ (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \ @@ -210,6 +208,17 @@ do { \ #define NFS_FILEID(inode) (NFS_I(inode)->fileid) +static inline int nfs_caches_unstable(struct inode *inode) +{ + return atomic_read(&NFS_I(inode)->data_updates) != 0; +} + +static inline void NFS_CACHEINV(struct inode *inode) +{ + if (!nfs_caches_unstable(inode)) + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR; +} + static inline int nfs_server_capable(struct inode *inode, int cap) { return NFS_SERVER(inode)->caps & cap; @@ -226,13 +235,37 @@ loff_t page_offset(struct page *page) return ((loff_t)page->index) << PAGE_CACHE_SHIFT; } +/** + * nfs_save_change_attribute - Returns the inode attribute change cookie + * @inode - pointer to inode + * The "change attribute" is updated every time we finish an operation + * that will result in a metadata change on the server. + */ +static inline long nfs_save_change_attribute(struct inode *inode) +{ + return NFS_I(inode)->cache_change_attribute; +} + +/** + * nfs_verify_change_attribute - Detects NFS inode cache updates + * @inode - pointer to inode + * @chattr - previously saved change attribute + * Return "false" if metadata has been updated (or is in the process of + * being updated) since the change attribute was saved. + */ +static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr) +{ + return !nfs_caches_unstable(inode) + && chattr == NFS_I(inode)->cache_change_attribute; +} + /* * linux/fs/nfs/inode.c */ extern void nfs_zap_caches(struct inode *); extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, struct nfs_fattr *); -extern int __nfs_refresh_inode(struct inode *, struct nfs_fattr *); +extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); extern void nfs_set_mmcred(struct inode *, struct rpc_cred *); @@ -240,6 +273,10 @@ extern int nfs_open(struct inode *, stru extern int nfs_release(struct inode *, struct file *); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_setattr(struct dentry *, struct iattr *); +extern void nfs_begin_attr_update(struct inode *); +extern void nfs_end_attr_update(struct inode *); +extern void nfs_begin_data_update(struct inode *); +extern void nfs_end_data_update(struct inode *); /* * linux/fs/nfs/file.c @@ -383,20 +420,27 @@ extern int nfsroot_mount(struct sockadd /* * inline functions */ -static inline int -nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) + +static inline int nfs_attribute_timeout(struct inode *inode) { - if (time_before(jiffies, NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) - return NFS_STALE(inode) ? -ESTALE : 0; - return __nfs_revalidate_inode(server, inode); + struct nfs_inode *nfsi = NFS_I(inode); + + return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); } -static inline int -nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +/** + * nfs_revalidate_inode - Revalidate the inode attributes + * @server - pointer to nfs_server struct + * @inode - pointer to inode struct + * + * Updates inode attribute information by retrieving the data from the server. + */ +static inline int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { - if ((fattr->valid & NFS_ATTR_FATTR) == 0) - return 0; - return __nfs_refresh_inode(inode,fattr); + if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + && !nfs_attribute_timeout(inode)) + return NFS_STALE(inode) ? -ESTALE : 0; + return __nfs_revalidate_inode(server, inode); } static inline loff_t @@ -438,6 +482,8 @@ extern void * nfs_root_data(void); #ifdef CONFIG_NFS_V4 +struct idmap; + /* * In a seqid-mutating op, this macro controls which error return * values trigger incrementation of the seqid. @@ -465,6 +511,7 @@ extern void * nfs_root_data(void); enum nfs4_client_state { NFS4CLNT_OK = 0, NFS4CLNT_NEW, + NFS4CLNT_SETUP_STATE, }; /* @@ -475,7 +522,8 @@ struct nfs4_client { struct in_addr cl_addr; /* Server identifier */ u64 cl_clientid; /* constant */ nfs4_verifier cl_confirm; - enum nfs4_client_state cl_state; + unsigned long cl_state; + long cl_generation; u32 cl_lockowner_id; @@ -490,6 +538,27 @@ struct nfs4_client { int cl_nunused; spinlock_t cl_lock; atomic_t cl_count; + + struct rpc_clnt * cl_rpcclient; + struct rpc_cred * cl_cred; + + struct list_head cl_superblocks; /* List of nfs_server structs */ + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + wait_queue_head_t cl_waitq; + struct rpc_wait_queue cl_rpcwaitq; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; }; /* @@ -509,6 +578,7 @@ struct nfs4_state_owner { u32 so_seqid; /* protected by so_sema */ unsigned int so_flags; /* protected by so_sema */ atomic_t so_count; + long so_generation; struct rpc_cred *so_cred; /* Associated cred */ struct list_head so_states; @@ -516,73 +586,105 @@ struct nfs4_state_owner { /* * struct nfs4_state maintains the client-side state for a given - * (state_owner,inode) tuple. + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). * + * OPEN: * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, * we need to know how many files are open for reading or writing on a * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + fl_owner_t ls_owner; /* POSIX lock owner */ + struct nfs4_state * ls_parent; /* Parent nfs4_state */ + u32 ls_seqid; + u32 ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, +}; + struct nfs4_state { struct list_head open_states; /* List of states for the same state_owner */ struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ struct nfs4_state_owner *owner; /* Pointer to the open owner */ struct inode *inode; /* Pointer to the inode */ - pid_t pid; /* Thread that called OPEN */ + + unsigned long flags; /* Do we hold any locks? */ + struct semaphore lock_sema; /* Serializes file locking operations */ + rwlock_t state_lock; /* Protects the lock_states list */ nfs4_stateid stateid; + unsigned int nreaders; + unsigned int nwriters; int state; /* State on the server (R,W, or RW) */ atomic_t count; }; +extern struct dentry_operations nfs4_dentry_operations; +extern struct inode_operations nfs4_dir_inode_operations; + /* nfs4proc.c */ -extern int nfs4_proc_renew(struct nfs_server *server); +extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); +extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); +extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *); +extern int nfs4_proc_async_renew(struct nfs4_client *); +extern int nfs4_proc_renew(struct nfs4_client *); extern int nfs4_do_close(struct inode *, struct nfs4_state *); +int nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); +extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); /* nfs4renewd.c */ -extern int nfs4_init_renewd(struct nfs_server *server); +extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); +extern void nfs4_kill_renewd(struct nfs4_client *); /* nfs4state.c */ +extern void init_nfsv4_state(struct nfs_server *); +extern void destroy_nfsv4_state(struct nfs_server *); extern struct nfs4_client *nfs4_get_client(struct in_addr *); extern void nfs4_put_client(struct nfs4_client *clp); +extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); + extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); extern void nfs4_put_state_owner(struct nfs4_state_owner *); extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_increment_seqid(u32 status, struct nfs4_state_owner *sp); - - - +extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); +extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); +extern int nfs4_handle_error(struct nfs_server *, int); +extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); +extern struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t); +extern void nfs4_put_lock_state(struct nfs4_lock_state *state); +extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); +extern void nfs4_notify_setlk(struct inode *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_notify_unlck(struct inode *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); struct nfs4_mount_data; -static inline int -create_nfsv4_state(struct nfs_server *server, struct nfs4_mount_data *data) -{ - server->nfs4_state = NULL; - return 0; -} - -static inline void -destroy_nfsv4_state(struct nfs_server *server) -{ - if (server->mnt_path) { - kfree(server->mnt_path); - server->mnt_path = NULL; - } - if (server->nfs4_state) { - nfs4_put_client(server->nfs4_state); - server->nfs4_state = NULL; - } -} #else -#define create_nfsv4_state(server, data) 0 +#define init_nfsv4_state(server) do { } while (0) #define destroy_nfsv4_state(server) do { } while (0) #define nfs4_put_state_owner(inode, owner) do { } while (0) #define nfs4_put_open_state(state) do { } while (0) +#define nfs4_renewd_prepare_shutdown(server) do { } while (0) #endif #endif /* __KERNEL__ */ diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/nfs_fs_sb.h linux-2.6.1-37-attr/include/linux/nfs_fs_sb.h --- linux-2.6.1-rc3/include/linux/nfs_fs_sb.h 2004-01-08 17:47:05.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/nfs_fs_sb.h 2004-01-08 18:07:25.000000000 -0500 @@ -35,9 +35,9 @@ struct nfs_server { char ip_addr[16]; char * mnt_path; struct nfs4_client * nfs4_state; /* all NFSv4 state starts here */ - unsigned long lease_time; /* in jiffies */ - unsigned long last_renewal; /* in jiffies */ - void *idmap; + struct list_head nfs4_siblings; /* List of other nfs_server structs + * that share the same clientid + */ #endif }; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/nfs_idmap.h linux-2.6.1-37-attr/include/linux/nfs_idmap.h --- linux-2.6.1-rc3/include/linux/nfs_idmap.h 2004-01-08 17:37:37.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/nfs_idmap.h 2004-01-08 18:07:25.000000000 -0500 @@ -52,18 +52,21 @@ #define IDMAP_STATUS_SUCCESS 0x08 struct idmap_msg { - u_int8_t im_type; - u_int8_t im_conv; - char im_name[IDMAP_NAMESZ]; - u_int32_t im_id; - u_int8_t im_status; + __u8 im_type; + __u8 im_conv; + char im_name[IDMAP_NAMESZ]; + __u32 im_id; + __u8 im_status; }; #ifdef __KERNEL__ -void *nfs_idmap_new(struct nfs_server *); -void nfs_idmap_delete(struct nfs_server *); -int nfs_idmap_id(struct nfs_server *, u_int8_t, char *, u_int, uid_t *); -int nfs_idmap_name(struct nfs_server *, u_int8_t, uid_t, char *, u_int *); +void nfs_idmap_new(struct nfs4_client *); +void nfs_idmap_delete(struct nfs4_client *); + +int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *); +int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *); +int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *); +int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *); #endif /* __KERNEL__ */ #endif /* NFS_IDMAP_H */ diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/nfs_page.h linux-2.6.1-37-attr/include/linux/nfs_page.h --- linux-2.6.1-rc3/include/linux/nfs_page.h 2004-01-08 17:34:25.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/nfs_page.h 2004-01-08 18:07:57.000000000 -0500 @@ -26,6 +26,7 @@ struct nfs_page { struct list_head wb_list, /* Defines state of page: */ *wb_list_head; /* read/write/commit */ struct file *wb_file; + fl_owner_t wb_lockowner; struct inode *wb_inode; struct rpc_cred *wb_cred; struct nfs4_state *wb_state; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/nfs_xdr.h linux-2.6.1-37-attr/include/linux/nfs_xdr.h --- linux-2.6.1-rc3/include/linux/nfs_xdr.h 2004-01-08 17:43:46.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/nfs_xdr.h 2004-01-08 18:07:57.000000000 -0500 @@ -109,7 +109,6 @@ struct nfs_openargs { }; struct nfs_openres { - __u32 status; nfs4_stateid stateid; struct nfs_fh fh; struct nfs4_change_info * cinfo; @@ -129,24 +128,95 @@ struct nfs_open_confirmargs { }; struct nfs_open_confirmres { - __u32 status; nfs4_stateid stateid; }; /* + * Arguments to the open_reclaim call. + */ +struct nfs_open_reclaimargs { + struct nfs_fh * fh; + __u64 clientid; + __u32 seqid; + __u32 id; + __u32 share_access; + __u32 claim; + struct nfs4_getattr * f_getattr; +}; + +/* * Arguments to the close call. */ struct nfs_closeargs { struct nfs_fh * fh; nfs4_stateid stateid; __u32 seqid; + __u32 share_access; }; struct nfs_closeres { - __u32 status; nfs4_stateid stateid; }; +/* + * * Arguments to the lock,lockt, and locku call. + * */ +struct nfs_lowner { + __u64 clientid; + u32 id; +}; +struct nfs_open_to_lock { + __u32 open_seqid; + nfs4_stateid open_stateid; + __u32 lock_seqid; + struct nfs_lowner lock_owner; +}; + +struct nfs_exist_lock { + nfs4_stateid stateid; + __u32 seqid; +}; + +struct nfs_lock_opargs { + __u32 reclaim; + __u32 new_lock_owner; + union { + struct nfs_open_to_lock *open_lock; + struct nfs_exist_lock *exist_lock; + } u; +}; + +struct nfs_locku_opargs { + __u32 seqid; + nfs4_stateid stateid; +}; + +struct nfs_lockargs { + struct nfs_fh * fh; + __u32 type; + __u64 offset; + __u64 length; + union { + struct nfs_lock_opargs *lock; /* LOCK */ + struct nfs_lowner *lockt; /* LOCKT */ + struct nfs_locku_opargs *locku; /* LOCKU */ + } u; +}; + +struct nfs_lock_denied { + __u64 offset; + __u64 length; + __u32 type; + struct nfs_lowner owner; +}; + +struct nfs_lockres { + union { + nfs4_stateid stateid;/* LOCK success, LOCKU */ + struct nfs_lock_denied denied; /* LOCK failed, LOCKT success */ + } u; + struct nfs_server * server; +}; /* * Arguments to the read call. @@ -449,7 +519,6 @@ struct nfs4_getattr { u32 * gt_bmval; /* request */ struct nfs_fattr * gt_attrs; /* response */ struct nfs_fsstat * gt_fsstat; /* response */ - struct nfs_fsinfo * gt_fsinfo; /* response */ struct nfs_pathconf * gt_pathconf; /* response */ }; @@ -556,8 +625,6 @@ struct nfs4_op { struct nfs4_rename rename; struct nfs4_client * renew; struct nfs4_setattr setattr; - struct nfs4_setclientid setclientid; - struct nfs4_client * setclientid_confirm; } u; }; @@ -594,6 +661,7 @@ struct nfs_read_data { struct rpc_task task; struct inode *inode; struct rpc_cred *cred; + fl_owner_t lockowner; struct nfs_fattr fattr; /* fattr storage */ struct list_head pages; /* Coalesced read requests */ struct page *pagevec[NFS_READ_MAXIOV]; @@ -609,6 +677,7 @@ struct nfs_write_data { struct rpc_task task; struct inode *inode; struct rpc_cred *cred; + fl_owner_t lockowner; struct nfs_fattr fattr; struct nfs_writeverf verf; struct list_head pages; /* Coalesced requests we wish to flush */ @@ -627,6 +696,8 @@ struct nfs_page; */ struct nfs_rpc_ops { int version; /* Protocol version */ + struct dentry_operations *dentry_ops; + struct inode_operations *dir_inode_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fattr *); @@ -673,6 +744,7 @@ struct nfs_rpc_ops { int (*file_release) (struct inode *, struct file *); void (*request_init)(struct nfs_page *, struct file *); int (*request_compatible)(struct nfs_page *, struct file *, struct page *); + int (*lock)(struct file *, int, struct file_lock *); }; /* diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/sunrpc/auth.h linux-2.6.1-37-attr/include/linux/sunrpc/auth.h --- linux-2.6.1-rc3/include/linux/sunrpc/auth.h 2004-01-08 17:39:52.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/sunrpc/auth.h 2004-01-08 18:02:36.000000000 -0500 @@ -73,6 +73,7 @@ struct rpc_auth { * differ from the flavor in * au_ops->au_flavor in gss * case) */ + atomic_t au_count; /* Reference counter */ /* per-flavor data */ }; @@ -102,6 +103,10 @@ struct rpc_credops { u32 * (*crmarshal)(struct rpc_task *, u32 *, int); int (*crrefresh)(struct rpc_task *); u32 * (*crvalidate)(struct rpc_task *, u32 *); + int (*crwrap_req)(struct rpc_task *, kxdrproc_t, + void *, u32 *, void *); + int (*crunwrap_resp)(struct rpc_task *, kxdrproc_t, + void *, u32 *, void *); }; extern struct rpc_authops authunix_ops; @@ -124,6 +129,8 @@ void put_rpccred(struct rpc_cred *); void rpcauth_unbindcred(struct rpc_task *); u32 * rpcauth_marshcred(struct rpc_task *, u32 *); u32 * rpcauth_checkverf(struct rpc_task *, u32 *); +int rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, u32 *data, void *obj); +int rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, u32 *data, void *obj); int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/sunrpc/clnt.h linux-2.6.1-37-attr/include/linux/sunrpc/clnt.h --- linux-2.6.1-rc3/include/linux/sunrpc/clnt.h 2004-01-08 17:40:49.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/sunrpc/clnt.h 2004-01-08 18:02:36.000000000 -0500 @@ -26,6 +26,8 @@ struct rpc_portmap { __u32 pm_vers; __u32 pm_prot; __u16 pm_port; + unsigned char pm_binding : 1; /* doing a getport() */ + struct rpc_wait_queue pm_bindwait; /* waiting on getport() */ }; struct rpc_inode; @@ -34,6 +36,7 @@ struct rpc_inode; * The high-level client handle */ struct rpc_clnt { + atomic_t cl_count; /* Number of clones */ atomic_t cl_users; /* number of references */ struct rpc_xprt * cl_xprt; /* transport */ struct rpc_procinfo * cl_procinfo; /* procedure info */ @@ -48,26 +51,27 @@ struct rpc_clnt { cl_intr : 1,/* interruptible */ cl_chatty : 1,/* be verbose */ cl_autobind : 1,/* use getport() */ - cl_binding : 1,/* doing a getport() */ cl_droppriv : 1,/* enable NFS suid hack */ cl_oneshot : 1,/* dispose after use */ cl_dead : 1;/* abandoned */ - struct rpc_rtt cl_rtt; /* RTO estimator data */ - - struct rpc_portmap cl_pmap; /* port mapping */ - struct rpc_wait_queue cl_bindwait; /* waiting on getport() */ + struct rpc_rtt * cl_rtt; /* RTO estimator data */ + struct rpc_portmap * cl_pmap; /* port mapping */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME]; char cl_pathname[30];/* Path in rpc_pipe_fs */ struct dentry * cl_dentry; /* inode */ + struct rpc_clnt * cl_parent; /* Points to parent of clones */ + struct rpc_rtt cl_rtt_default; + struct rpc_portmap cl_pmap_default; + char cl_inline_name[32]; }; #define cl_timeout cl_xprt->timeout -#define cl_prog cl_pmap.pm_prog -#define cl_vers cl_pmap.pm_vers -#define cl_port cl_pmap.pm_port -#define cl_prot cl_pmap.pm_prot +#define cl_prog cl_pmap->pm_prog +#define cl_vers cl_pmap->pm_vers +#define cl_port cl_pmap->pm_port +#define cl_prot cl_pmap->pm_prot /* * General RPC program info @@ -108,6 +112,7 @@ struct rpc_procinfo { struct rpc_clnt *rpc_create_client(struct rpc_xprt *xprt, char *servname, struct rpc_program *info, u32 version, rpc_authflavor_t authflavor); +struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/sunrpc/gss_api.h linux-2.6.1-37-attr/include/linux/sunrpc/gss_api.h --- linux-2.6.1-rc3/include/linux/sunrpc/gss_api.h 2004-01-08 17:45:12.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/sunrpc/gss_api.h 2004-01-08 18:01:01.000000000 -0500 @@ -16,6 +16,7 @@ #ifdef __KERNEL__ #include +#include /* The mechanism-independent gss-api context: */ struct gss_ctx { @@ -39,11 +40,11 @@ u32 gss_import_sec_context( u32 gss_get_mic( struct gss_ctx *ctx_id, u32 qop, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token); u32 gss_verify_mic( struct gss_ctx *ctx_id, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); u32 gss_delete_sec_context( @@ -95,11 +96,11 @@ struct gss_api_ops { u32 (*gss_get_mic)( struct gss_ctx *ctx_id, u32 qop, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token); u32 (*gss_verify_mic)( struct gss_ctx *ctx_id, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate); void (*gss_delete_sec_context)( diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/sunrpc/gss_krb5.h linux-2.6.1-37-attr/include/linux/sunrpc/gss_krb5.h --- linux-2.6.1-rc3/include/linux/sunrpc/gss_krb5.h 2004-01-08 17:39:59.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/sunrpc/gss_krb5.h 2004-01-08 18:01:01.000000000 -0500 @@ -50,7 +50,6 @@ struct krb5_ctx { struct crypto_tfm *seq; s32 endtime; u32 seq_send; - u32 seq_recv; struct xdr_netobj mech_used; }; @@ -73,7 +72,7 @@ enum seal_alg { SEAL_ALG_DES3KD = 0x0002 }; -#define RSA_MD5_CKSUM_LENGTH 16 +#define KRB5_CKSUM_LENGTH 8 #define CKSUMTYPE_CRC32 0x0001 #define CKSUMTYPE_RSA_MD4 0x0002 @@ -100,16 +99,6 @@ enum seal_alg { #define KG_EMPTY_CCACHE (39756044L) #define KG_NO_CTYPES (39756045L) -#define KV5M_PRINCIPAL (-1760647423L) -#define KV5M_KEYBLOCK (-1760647421L) -#define KV5M_CHECKSUM (-1760647420L) -#define KV5M_ADDRESS (-1760647390L) -#define KV5M_AUTHENTICATOR (-1760647410L) -#define KV5M_AUTH_CONTEXT (-1760647383L) -#define KV5M_AUTHDATA (-1760647414L) -#define KV5M_GSS_OID (-1760647372L) -#define KV5M_GSS_QUEUE (-1760647371L) - /* per Kerberos v5 protocol spec crypto types from the wire. * these get mapped to linux kernel crypto routines. */ @@ -126,19 +115,18 @@ enum seal_alg { #define ENCTYPE_UNKNOWN 0x01ff s32 -krb5_make_checksum(s32 cksumtype, - struct xdr_netobj *input, +krb5_make_checksum(s32 cksumtype, char *header, struct xdr_buf *body, struct xdr_netobj *cksum); u32 krb5_make_token(struct krb5_ctx *context_handle, int qop_req, - struct xdr_netobj * input_message_buffer, - struct xdr_netobj * output_message_buffer, int toktype); + struct xdr_buf *input_message_buffer, + struct xdr_netobj *output_message_buffer, int toktype); u32 krb5_read_token(struct krb5_ctx *context_handle, struct xdr_netobj *input_token_buffer, - struct xdr_netobj *message_buffer, + struct xdr_buf *message_buffer, int *qop_state, int toktype); u32 diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/sunrpc/rpc_pipe_fs.h linux-2.6.1-37-attr/include/linux/sunrpc/rpc_pipe_fs.h --- linux-2.6.1-rc3/include/linux/sunrpc/rpc_pipe_fs.h 2004-01-08 17:37:18.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/sunrpc/rpc_pipe_fs.h 2004-01-08 17:56:53.000000000 -0500 @@ -14,6 +14,7 @@ struct rpc_pipe_msg { struct rpc_pipe_ops { ssize_t (*upcall)(struct file *, struct rpc_pipe_msg *, char __user *, size_t); ssize_t (*downcall)(struct file *, const char __user *, size_t); + void (*release_pipe)(struct inode *); void (*destroy_msg)(struct rpc_pipe_msg *); }; @@ -21,12 +22,15 @@ struct rpc_inode { struct inode vfs_inode; void *private; struct list_head pipe; + struct list_head in_upcall; int pipelen; int nreaders; + int nwriters; wait_queue_head_t waitq; #define RPC_PIPE_WAIT_FOR_OPEN 1 int flags; struct rpc_pipe_ops *ops; + struct work_struct queue_timeout; }; static inline struct rpc_inode * @@ -35,7 +39,6 @@ RPC_I(struct inode *inode) return container_of(inode, struct rpc_inode, vfs_inode); } -extern void rpc_inode_setowner(struct inode *, void *); extern int rpc_queue_upcall(struct inode *, struct rpc_pipe_msg *); extern struct dentry *rpc_mkdir(char *, struct rpc_clnt *); diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/sunrpc/sched.h linux-2.6.1-37-attr/include/linux/sunrpc/sched.h --- linux-2.6.1-rc3/include/linux/sunrpc/sched.h 2004-01-08 17:41:48.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/sunrpc/sched.h 2004-01-08 18:05:58.000000000 -0500 @@ -48,8 +48,6 @@ struct rpc_task { __u8 tk_garb_retry, tk_cred_retry, tk_suid_retry; - u32 tk_gss_seqno; /* rpcsec_gss sequence number - used on this request */ /* * timeout_fn to be executed by timer bottom half @@ -110,6 +108,7 @@ typedef void (*rpc_action)(struct rpc_ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ #define RPC_TASK_KILLED 0x0100 /* task was killed */ +#define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) #define RPC_IS_SETUID(t) ((t)->tk_flags & RPC_TASK_SETUID) @@ -119,6 +118,7 @@ typedef void (*rpc_action)(struct rpc_ #define RPC_ASSASSINATED(t) ((t)->tk_flags & RPC_TASK_KILLED) #define RPC_IS_ACTIVATED(t) ((t)->tk_active) #define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL) +#define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) #define RPC_TASK_SLEEPING 0 #define RPC_TASK_RUNNING 1 @@ -205,7 +205,7 @@ rpc_exit(struct rpc_task *task, int stat static __inline__ char * rpc_qname(struct rpc_wait_queue *q) { - return q->name? q->name : "unknown"; + return ((q && q->name) ? q->name : "unknown"); } #endif diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/sunrpc/xdr.h linux-2.6.1-37-attr/include/linux/sunrpc/xdr.h --- linux-2.6.1-rc3/include/linux/sunrpc/xdr.h 2004-01-08 17:46:42.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/sunrpc/xdr.h 2004-01-08 18:01:02.000000000 -0500 @@ -141,6 +141,10 @@ void xdr_shift_iovec(struct iovec *, int extern int xdr_kmap(struct iovec *, struct xdr_buf *, size_t); extern void xdr_kunmap(struct xdr_buf *, size_t); extern void xdr_shift_buf(struct xdr_buf *, size_t); +extern void _copy_from_pages(char *, struct page **, size_t, size_t); +extern void xdr_buf_from_iov(struct iovec *, struct xdr_buf *); +extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int); +extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int); /* * Helper structure for copying from an sk_buff. diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/include/linux/sunrpc/xprt.h linux-2.6.1-37-attr/include/linux/sunrpc/xprt.h --- linux-2.6.1-rc3/include/linux/sunrpc/xprt.h 2004-01-08 17:42:38.000000000 -0500 +++ linux-2.6.1-37-attr/include/linux/sunrpc/xprt.h 2004-01-08 18:06:26.000000000 -0500 @@ -95,6 +95,7 @@ struct rpc_rqst { struct rpc_rqst * rq_next; /* free list */ int rq_cong; /* has incremented xprt->cong */ int rq_received; /* receive completed */ + u32 rq_seqno; /* gss seq no. used on req. */ struct list_head rq_list; @@ -162,6 +163,12 @@ struct rpc_xprt { tcp_offset; /* fragment offset */ unsigned long tcp_copied, /* copied to request */ tcp_flags; + /* + * Disconnection of idle sockets + */ + struct work_struct task_cleanup; + struct timer_list timer; + unsigned long last_used; /* * Send stuff @@ -201,6 +208,7 @@ int xprt_clear_backlog(struct rpc_xprt void xprt_sock_setbufsize(struct rpc_xprt *); #define XPRT_CONNECT 0 +#define XPRT_LOCKED 1 #define xprt_connected(xp) (test_bit(XPRT_CONNECT, &(xp)->sockstate)) #define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate)) diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/auth.c linux-2.6.1-37-attr/net/sunrpc/auth.c --- linux-2.6.1-rc3/net/sunrpc/auth.c 2004-01-08 17:39:46.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/auth.c 2004-01-08 18:02:36.000000000 -0500 @@ -61,6 +61,7 @@ rpcauth_unregister(struct rpc_authops *o struct rpc_auth * rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) { + struct rpc_auth *auth; struct rpc_authops *ops; u32 flavor = pseudoflavor_to_flavor(pseudoflavor); @@ -68,13 +69,21 @@ rpcauth_create(rpc_authflavor_t pseudofl return NULL; if (!try_module_get(ops->owner)) return NULL; - clnt->cl_auth = ops->create(clnt, pseudoflavor); - return clnt->cl_auth; + auth = ops->create(clnt, pseudoflavor); + if (!auth) + return NULL; + atomic_set(&auth->au_count, 1); + if (clnt->cl_auth) + rpcauth_destroy(clnt->cl_auth); + clnt->cl_auth = auth; + return auth; } void rpcauth_destroy(struct rpc_auth *auth) { + if (!atomic_dec_and_test(&auth->au_count)) + return; auth->au_ops->destroy(auth); module_put(auth->au_ops->owner); kfree(auth); @@ -340,6 +349,35 @@ rpcauth_checkverf(struct rpc_task *task, } int +rpcauth_wrap_req(struct rpc_task *task, kxdrproc_t encode, void *rqstp, + u32 *data, void *obj) +{ + struct rpc_cred *cred = task->tk_msg.rpc_cred; + + dprintk("RPC: %4d using %s cred %p to wrap rpc data\n", + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); + if (cred->cr_ops->crwrap_req) + return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); + /* By default, we encode the arguments normally. */ + return encode(rqstp, data, obj); +} + +int +rpcauth_unwrap_resp(struct rpc_task *task, kxdrproc_t decode, void *rqstp, + u32 *data, void *obj) +{ + struct rpc_cred *cred = task->tk_msg.rpc_cred; + + dprintk("RPC: %4d using %s cred %p to unwrap rpc data\n", + task->tk_pid, cred->cr_auth->au_ops->au_name, cred); + if (cred->cr_ops->crunwrap_resp) + return cred->cr_ops->crunwrap_resp(task, decode, rqstp, + data, obj); + /* By default, we decode the arguments normally. */ + return decode(rqstp, data, obj); +} + +int rpcauth_refreshcred(struct rpc_task *task) { struct rpc_auth *auth = task->tk_auth; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/auth_gss/auth_gss.c linux-2.6.1-37-attr/net/sunrpc/auth_gss/auth_gss.c --- linux-2.6.1-rc3/net/sunrpc/auth_gss/auth_gss.c 2004-01-08 17:38:53.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/auth_gss/auth_gss.c 2004-01-08 18:01:35.000000000 -0500 @@ -49,7 +49,9 @@ #include #include #include +#include #include +#include #include static struct rpc_authops authgss_ops; @@ -64,7 +66,9 @@ static struct rpc_credops gss_credops; #define GSS_CRED_EXPIRE (60 * HZ) /* XXX: reasonable? */ #define GSS_CRED_SLACK 1024 /* XXX: unused */ -#define GSS_VERF_SLACK 48 /* length of a krb5 verifier.*/ +/* length of a krb5 verifier (48), plus data added before arguments when + * using integrity (two 4-byte integers): */ +#define GSS_VERF_SLACK 56 /* XXX this define must match the gssd define * as it is passed to gssd to signal the use of @@ -155,17 +159,17 @@ gss_cred_set_ctx(struct rpc_cred *cred, gss_put_ctx(old); } -static struct gss_cl_ctx * -gss_cred_get_uptodate_ctx(struct rpc_cred *cred) +static int +gss_cred_is_uptodate_ctx(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); - struct gss_cl_ctx *ctx = NULL; + int res = 0; read_lock(&gss_ctx_lock); if ((cred->cr_flags & RPCAUTH_CRED_UPTODATE) && gss_cred->gc_ctx) - ctx = gss_get_ctx(gss_cred->gc_ctx); + res = 1; read_unlock(&gss_ctx_lock); - return ctx; + return res; } static inline int @@ -292,13 +296,9 @@ struct gss_upcall_msg { static void gss_release_msg(struct gss_upcall_msg *gss_msg) { - struct gss_auth *gss_auth = gss_msg->auth; - - if (!atomic_dec_and_lock(&gss_msg->count, &gss_auth->lock)) + if (!atomic_dec_and_test(&gss_msg->count)) return; - if (!list_empty(&gss_msg->list)) - list_del(&gss_msg->list); - spin_unlock(&gss_auth->lock); + BUG_ON(!list_empty(&gss_msg->list)); kfree(gss_msg); } @@ -315,24 +315,17 @@ __gss_find_upcall(struct gss_auth *gss_a return NULL; } -static struct gss_upcall_msg * -gss_find_upcall(struct gss_auth *gss_auth, uid_t uid) -{ - struct gss_upcall_msg *gss_msg; - - spin_lock(&gss_auth->lock); - gss_msg = __gss_find_upcall(gss_auth, uid); - spin_unlock(&gss_auth->lock); - return gss_msg; -} - static void __gss_unhash_msg(struct gss_upcall_msg *gss_msg) { if (list_empty(&gss_msg->list)) return; list_del_init(&gss_msg->list); - rpc_wake_up(&gss_msg->waitq); + if (gss_msg->msg.errno < 0) + rpc_wake_up_status(&gss_msg->waitq, gss_msg->msg.errno); + else + rpc_wake_up(&gss_msg->waitq); + atomic_dec(&gss_msg->count); } static void @@ -345,40 +338,27 @@ gss_unhash_msg(struct gss_upcall_msg *gs spin_unlock(&gss_auth->lock); } -static void -gss_release_callback(struct rpc_task *task) -{ - struct rpc_clnt *clnt = task->tk_client; - struct gss_auth *gss_auth = container_of(clnt->cl_auth, - struct gss_auth, rpc_auth); - struct gss_upcall_msg *gss_msg; - - gss_msg = gss_find_upcall(gss_auth, task->tk_msg.rpc_cred->cr_uid); - BUG_ON(!gss_msg); - atomic_dec(&gss_msg->count); - gss_release_msg(gss_msg); -} - static int -gss_upcall(struct rpc_clnt *clnt, struct rpc_task *task, uid_t uid) +gss_upcall(struct rpc_clnt *clnt, struct rpc_task *task, struct rpc_cred *cred) { struct gss_auth *gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth); struct gss_upcall_msg *gss_msg, *gss_new = NULL; struct rpc_pipe_msg *msg; struct dentry *dentry = gss_auth->dentry; - int res; + uid_t uid = cred->cr_uid; + int res = 0; retry: + spin_lock(&gss_auth->lock); gss_msg = __gss_find_upcall(gss_auth, uid); if (gss_msg) goto out_sleep; if (gss_new == NULL) { spin_unlock(&gss_auth->lock); gss_new = kmalloc(sizeof(*gss_new), GFP_KERNEL); - if (gss_new) + if (!gss_new) return -ENOMEM; - spin_lock(&gss_auth->lock); goto retry; } gss_msg = gss_new; @@ -393,20 +373,34 @@ retry: gss_new->auth = gss_auth; list_add(&gss_new->list, &gss_auth->upcalls); gss_new = NULL; - task->tk_timeout = 5 * HZ; - rpc_sleep_on(&gss_msg->waitq, task, gss_release_callback, NULL); - spin_unlock(&gss_auth->lock); - res = rpc_queue_upcall(dentry->d_inode, msg); - if (res) { - gss_unhash_msg(gss_msg); - gss_release_msg(gss_msg); + /* Has someone updated the credential behind our back? */ + if (!gss_cred_is_uptodate_ctx(cred)) { + /* No, so do upcall and sleep */ + task->tk_timeout = 0; + rpc_sleep_on(&gss_msg->waitq, task, NULL, NULL); + spin_unlock(&gss_auth->lock); + res = rpc_queue_upcall(dentry->d_inode, msg); + if (res) + gss_unhash_msg(gss_msg); + } else { + /* Yes, so cancel upcall */ + __gss_unhash_msg(gss_msg); + spin_unlock(&gss_auth->lock); } + gss_release_msg(gss_msg); return res; out_sleep: - rpc_sleep_on(&gss_msg->waitq, task, gss_release_callback, NULL); + /* Sleep forever */ + task->tk_timeout = 0; + rpc_sleep_on(&gss_msg->waitq, task, NULL, NULL); spin_unlock(&gss_auth->lock); if (gss_new) kfree(gss_new); + /* Note: we drop the reference here: we are automatically removed + * from the queue when we're woken up, and we should in any case + * have no further responsabilities w.r.t. the upcall. + */ + gss_release_msg(gss_msg); return 0; } @@ -491,14 +485,52 @@ err: return err; } +static void +gss_pipe_release(struct inode *inode) +{ + struct rpc_inode *rpci = RPC_I(inode); + struct rpc_clnt *clnt; + struct rpc_auth *auth; + struct gss_auth *gss_auth; + + clnt = rpci->private; + auth = clnt->cl_auth; + gss_auth = container_of(auth, struct gss_auth, rpc_auth); + spin_lock(&gss_auth->lock); + while (!list_empty(&gss_auth->upcalls)) { + struct gss_upcall_msg *gss_msg; + + gss_msg = list_entry(gss_auth->upcalls.next, + struct gss_upcall_msg, list); + gss_msg->msg.errno = -EPIPE; + atomic_inc(&gss_msg->count); + __gss_unhash_msg(gss_msg); + spin_unlock(&gss_auth->lock); + gss_release_msg(gss_msg); + spin_lock(&gss_auth->lock); + } + spin_unlock(&gss_auth->lock); +} + void gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) { struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); + static unsigned long ratelimit; - if (msg->errno < 0) + if (msg->errno < 0) { + atomic_inc(&gss_msg->count); gss_unhash_msg(gss_msg); - gss_release_msg(gss_msg); + if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) { + unsigned long now = jiffies; + if (time_after(now, ratelimit)) { + printk(KERN_WARNING "RPC: AUTH_GSS upcall timed out.\n" + "Please check user daemon is running!\n"); + ratelimit = now + 15*HZ; + } + } + gss_release_msg(gss_msg); + } } /* @@ -640,21 +672,14 @@ gss_marshal(struct rpc_task *task, u32 * struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); u32 *cred_len; struct rpc_rqst *req = task->tk_rqstp; - struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = clnt->cl_xprt; - u32 *verfbase = req->rq_svec[0].iov_base; u32 maj_stat = 0; - struct xdr_netobj bufin,bufout; + struct xdr_netobj mic; + struct iovec iov; + struct xdr_buf verf_buf; u32 service; dprintk("RPC: gss_marshal\n"); - /* We compute the checksum for the verifier over the xdr-encoded bytes - * starting with the xid (which verfbase points to) and ending at - * the end of the credential. */ - if (xprt->stream) - verfbase++; /* See clnt.c:call_header() */ - *p++ = htonl(RPC_AUTH_GSS); cred_len = p++; @@ -665,32 +690,39 @@ gss_marshal(struct rpc_task *task, u32 * goto out_put_ctx; } spin_lock(&ctx->gc_seq_lock); - task->tk_gss_seqno = ctx->gc_seq++; + req->rq_seqno = ctx->gc_seq++; spin_unlock(&ctx->gc_seq_lock); *p++ = htonl((u32) RPC_GSS_VERSION); *p++ = htonl((u32) ctx->gc_proc); - *p++ = htonl((u32) task->tk_gss_seqno); + *p++ = htonl((u32) req->rq_seqno); *p++ = htonl((u32) service); p = xdr_encode_netobj(p, &ctx->gc_wire_ctx); *cred_len = htonl((p - (cred_len + 1)) << 2); - /* Marshal verifier. */ - bufin.data = (u8 *)verfbase; - bufin.len = (p - verfbase) << 2; + /* We compute the checksum for the verifier over the xdr-encoded bytes + * starting with the xid and ending at the end of the credential: */ + iov.iov_base = req->rq_snd_buf.head[0].iov_base; + if (task->tk_client->cl_xprt->stream) + /* See clnt.c:call_header() */ + iov.iov_base += 4; + iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; + xdr_buf_from_iov(&iov, &verf_buf); /* set verifier flavor*/ *p++ = htonl(RPC_AUTH_GSS); + mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, GSS_C_QOP_DEFAULT, - &bufin, &bufout); + &verf_buf, &mic); if(maj_stat != 0){ - printk("gss_marshal: gss_get_mic FAILED (%d)\n", - maj_stat); + printk("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat); goto out_put_ctx; } - p = xdr_encode_netobj(p, &bufout); + *p++ = htonl(mic.len); + p += XDR_QUADLEN(mic.len); + gss_put_ctx(ctx); return p; out_put_ctx: gss_put_ctx(ctx); @@ -704,58 +736,206 @@ static int gss_refresh(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct gss_auth *gss_auth = container_of(clnt->cl_auth, - struct gss_auth, rpc_auth); struct rpc_xprt *xprt = task->tk_xprt; struct rpc_cred *cred = task->tk_msg.rpc_cred; - int err = 0; task->tk_timeout = xprt->timeout.to_current; - spin_lock(&gss_auth->lock); - if (gss_cred_get_uptodate_ctx(cred)) - goto out; - err = gss_upcall(clnt, task, cred->cr_uid); -out: - spin_unlock(&gss_auth->lock); - return err; + if (!gss_cred_is_uptodate_ctx(cred)) + return gss_upcall(clnt, task, cred); + return 0; } static u32 * gss_validate(struct rpc_task *task, u32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, + gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); u32 seq, qop_state; - struct xdr_netobj bufin; - struct xdr_netobj bufout; + struct iovec iov; + struct xdr_buf verf_buf; + struct xdr_netobj mic; u32 flav,len; + u32 service; dprintk("RPC: gss_validate\n"); flav = ntohl(*p++); - if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) { - printk("RPC: giant verf size: %ld\n", (unsigned long) len); - return NULL; - } - dprintk("RPC: gss_validate: verifier flavor %d, len %d\n", flav, len); - - if (flav != RPC_AUTH_GSS) { - printk("RPC: bad verf flavor: %ld\n", (unsigned long)flav); - return NULL; - } - seq = htonl(task->tk_gss_seqno); - bufin.data = (u8 *) &seq; - bufin.len = sizeof(seq); - bufout.data = (u8 *) p; - bufout.len = len; - - if (gss_verify_mic(ctx->gc_gss_ctx, &bufin, &bufout, &qop_state) != 0) - return NULL; - task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; - dprintk("RPC: GSS gss_validate: gss_verify_mic succeeded.\n"); + if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) + goto out_bad; + if (flav != RPC_AUTH_GSS) + goto out_bad; + seq = htonl(task->tk_rqstp->rq_seqno); + iov.iov_base = &seq; + iov.iov_len = sizeof(seq); + xdr_buf_from_iov(&iov, &verf_buf); + mic.data = (u8 *)p; + mic.len = len; + + if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state)) + goto out_bad; + service = gss_pseudoflavor_to_service(gss_cred->gc_flavor); + switch (service) { + case RPC_GSS_SVC_NONE: + /* verifier data, flavor, length: */ + task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; + break; + case RPC_GSS_SVC_INTEGRITY: + /* verifier data, flavor, length, length, sequence number: */ + task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4; + break; + default: + goto out_bad; + } + gss_put_ctx(ctx); return p + XDR_QUADLEN(len); +out_bad: + gss_put_ctx(ctx); + return NULL; } +static int +gss_wrap_req(struct rpc_task *task, + kxdrproc_t encode, void *rqstp, u32 *p, void *obj) +{ + struct rpc_rqst *req = (struct rpc_rqst *)rqstp; + struct xdr_buf *snd_buf = &req->rq_snd_buf; + struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, + gc_base); + struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); + u32 *integ_len = NULL; + int status = -EIO; + u32 maj_stat = 0; + struct xdr_buf integ_buf; + struct xdr_netobj mic; + u32 service; + u32 offset, *q; + struct iovec *iov; + + dprintk("RPC: gss_wrap_body\n"); + BUG_ON(!ctx); + if (ctx->gc_proc != RPC_GSS_PROC_DATA) { + /* The spec seems a little ambiguous here, but I think that not + * wrapping context destruction requests makes the most sense. + */ + status = encode(rqstp, p, obj); + goto out; + } + service = gss_pseudoflavor_to_service(gss_cred->gc_flavor); + switch (service) { + case RPC_GSS_SVC_NONE: + status = encode(rqstp, p, obj); + goto out; + case RPC_GSS_SVC_INTEGRITY: + + integ_len = p++; + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; + *p++ = htonl(req->rq_seqno); + + status = encode(rqstp, p, obj); + if (status) + goto out; + + if (xdr_buf_subsegment(snd_buf, &integ_buf, + offset, snd_buf->len - offset)) + goto out; + *integ_len = htonl(integ_buf.len); + + /* guess whether we're in the head or the tail: */ + if (snd_buf->page_len || snd_buf->tail[0].iov_len) + iov = snd_buf->tail; + else + iov = snd_buf->head; + p = iov->iov_base + iov->iov_len; + mic.data = (u8 *)(p + 1); + + maj_stat = gss_get_mic(ctx->gc_gss_ctx, + GSS_C_QOP_DEFAULT, &integ_buf, &mic); + status = -EIO; /* XXX? */ + if (maj_stat) + goto out; + q = p; + *q++ = htonl(mic.len); + q += XDR_QUADLEN(mic.len); + + offset = (u8 *)q - (u8 *)p; + iov->iov_len += offset; + snd_buf->len += offset; + break; + case RPC_GSS_SVC_PRIVACY: + default: + goto out; + } + status = 0; +out: + gss_put_ctx(ctx); + dprintk("RPC: gss_wrap_req returning %d\n", status); + return status; +} + +static int +gss_unwrap_resp(struct rpc_task *task, + kxdrproc_t decode, void *rqstp, u32 *p, void *obj) +{ + struct rpc_rqst *req = (struct rpc_rqst *)rqstp; + struct xdr_buf *rcv_buf = &req->rq_rcv_buf; + struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, + gc_base); + struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); + struct xdr_buf integ_buf; + struct xdr_netobj mic; + int status = -EIO; + u32 maj_stat = 0; + u32 service; + u32 data_offset, mic_offset; + u32 integ_len; + + BUG_ON(!ctx); + + if (ctx->gc_proc != RPC_GSS_PROC_DATA) + goto out_decode; + service = gss_pseudoflavor_to_service(gss_cred->gc_flavor); + switch (service) { + case RPC_GSS_SVC_NONE: + goto out_decode; + case RPC_GSS_SVC_INTEGRITY: + integ_len = ntohl(*p++); + if (integ_len & 3) + goto out; + data_offset = (u8 *)p - (u8 *)rcv_buf->head[0].iov_base; + mic_offset = integ_len + data_offset; + if (mic_offset > rcv_buf->len) + goto out; + if (ntohl(*p++) != req->rq_seqno) + goto out; + + if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, + mic_offset - data_offset)) + goto out; + + if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset)) + goto out; + + maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, + &mic, NULL); + if (maj_stat != GSS_S_COMPLETE) + goto out; + break; + case RPC_GSS_SVC_PRIVACY: + default: + goto out; + } +out_decode: + status = decode(rqstp, p, obj); +out: + gss_put_ctx(ctx); + dprintk("RPC: gss_unwrap_resp returning %d\n", status); + return status; +} + static struct rpc_authops authgss_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_GSS, @@ -773,12 +953,15 @@ static struct rpc_credops gss_credops = .crmarshal = gss_marshal, .crrefresh = gss_refresh, .crvalidate = gss_validate, + .crwrap_req = gss_wrap_req, + .crunwrap_resp = gss_unwrap_resp, }; static struct rpc_pipe_ops gss_upcall_ops = { .upcall = gss_pipe_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, + .release_pipe = gss_pipe_release, }; /* diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_krb5_crypto.c linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_krb5_crypto.c --- linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_krb5_crypto.c 2004-01-08 17:33:39.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_krb5_crypto.c 2004-01-08 18:01:02.000000000 -0500 @@ -39,6 +39,7 @@ #include #include #include +#include #include #ifdef RPC_DEBUG @@ -57,7 +58,7 @@ krb5_encrypt( struct scatterlist sg[1]; u8 local_iv[16] = {0}; - dprintk("RPC: gss_k5encrypt: TOP in %p out %p\nin data:\n", out, in); + dprintk("RPC: krb5_encrypt: input data:\n"); print_hexl((u32 *)in, length, 0); if (length % crypto_tfm_alg_blocksize(tfm) != 0) @@ -71,17 +72,18 @@ krb5_encrypt( if (iv) memcpy(local_iv, iv, crypto_tfm_alg_ivsize(tfm)); - crypto_cipher_set_iv(tfm, local_iv, crypto_tfm_alg_ivsize(tfm)); memcpy(out, in, length); sg[0].page = virt_to_page(out); sg[0].offset = offset_in_page(out); sg[0].length = length; - ret = crypto_cipher_encrypt(tfm, sg, sg, length); + ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv); + dprintk("RPC: krb5_encrypt: output data:\n"); + print_hexl((u32 *)out, length, 0); out: - dprintk("gss_k5encrypt returns %d\n",ret); + dprintk("krb5_encrypt returns %d\n",ret); return(ret); } @@ -97,8 +99,8 @@ krb5_decrypt( struct scatterlist sg[1]; u8 local_iv[16] = {0}; - dprintk("RPC: gss_k5decrypt: TOP in %p out %p\nin data:\n", in, out); - print_hexl((u32 *)in,length,0); + dprintk("RPC: krb5_decrypt: input data:\n"); + print_hexl((u32 *)in, length, 0); if (length % crypto_tfm_alg_blocksize(tfm) != 0) goto out; @@ -110,28 +112,40 @@ krb5_decrypt( } if (iv) memcpy(local_iv,iv, crypto_tfm_alg_ivsize(tfm)); - crypto_cipher_set_iv(tfm, local_iv, crypto_tfm_alg_blocksize(tfm)); memcpy(out, in, length); sg[0].page = virt_to_page(out); sg[0].offset = offset_in_page(out); sg[0].length = length; - ret = crypto_cipher_decrypt(tfm, sg, sg, length); + ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv); + dprintk("RPC: krb5_decrypt: output_data:\n"); + print_hexl((u32 *)out, length, 0); out: dprintk("gss_k5decrypt returns %d\n",ret); return(ret); } +void +buf_to_sg(struct scatterlist *sg, char *ptr, int len) { + sg->page = virt_to_page(ptr); + sg->offset = offset_in_page(ptr); + sg->length = len; +} + +/* checksum the plaintext data and the first 8 bytes of the krb5 token header, + * as specified by the rfc: */ s32 -krb5_make_checksum(s32 cksumtype, struct xdr_netobj *input, +krb5_make_checksum(s32 cksumtype, char *header, struct xdr_buf *body, struct xdr_netobj *cksum) { - s32 ret = -EINVAL; - struct scatterlist sg[1]; - char *cksumname; - struct crypto_tfm *tfm; + char *cksumname; + struct crypto_tfm *tfm = NULL; /* XXX add to ctx? */ + struct scatterlist sg[1]; + u32 code = GSS_S_FAILURE; + int len, thislen, offset; + int i; switch (cksumtype) { case CKSUMTYPE_RSA_MD5: @@ -145,24 +159,43 @@ krb5_make_checksum(s32 cksumtype, struct if (!(tfm = crypto_alloc_tfm(cksumname, 0))) goto out; cksum->len = crypto_tfm_alg_digestsize(tfm); - - if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL) { - ret = -ENOMEM; - goto out_free_tfm; - } - sg[0].page = virt_to_page(input->data); - sg[0].offset = offset_in_page(input->data); - sg[0].length = input->len; + if ((cksum->data = kmalloc(cksum->len, GFP_KERNEL)) == NULL) + goto out; crypto_digest_init(tfm); + buf_to_sg(sg, header, 8); crypto_digest_update(tfm, sg, 1); - crypto_digest_final(tfm, cksum->data); - - ret = 0; + if (body->head[0].iov_len) { + buf_to_sg(sg, body->head[0].iov_base, body->head[0].iov_len); + crypto_digest_update(tfm, sg, 1); + } -out_free_tfm: - crypto_free_tfm(tfm); + len = body->page_len; + offset = body->page_base; + i = 0; + while (len) { + sg->page = body->pages[i]; + sg->offset = offset; + offset = 0; + if (PAGE_SIZE > len) + thislen = len; + else + thislen = PAGE_SIZE; + sg->length = thislen; + kmap(sg->page); /* XXX kmap_atomic? */ + crypto_digest_update(tfm, sg, 1); + kunmap(sg->page); + len -= thislen; + i++; + } + if (body->tail[0].iov_len) { + buf_to_sg(sg, body->tail[0].iov_base, body->tail[0].iov_len); + crypto_digest_update(tfm, sg, 1); + } + crypto_digest_final(tfm, cksum->data); + code = 0; out: - dprintk("RPC: gss_k5cksum: returning %d\n", ret); - return (ret); + if (tfm) + crypto_free_tfm(tfm); + return code; } diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_krb5_mech.c linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_krb5_mech.c --- linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_krb5_mech.c 2004-01-08 17:42:14.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_krb5_mech.c 2004-01-08 18:01:02.000000000 -0500 @@ -98,7 +98,7 @@ get_key(char **p, char *end, struct cryp alg_mode = CRYPTO_TFM_MODE_CBC; break; default: - dprintk("RPC: get_key: unsupported algorithm %d", alg); + dprintk("RPC: get_key: unsupported algorithm %d\n", alg); goto out_err_free_key; } if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) @@ -168,7 +168,7 @@ out_err: return GSS_S_FAILURE; } -void +static void gss_delete_sec_context_kerberos(void *internal_ctx) { struct krb5_ctx *kctx = internal_ctx; @@ -181,16 +181,16 @@ gss_delete_sec_context_kerberos(void *in kfree(kctx); } -u32 +static u32 gss_verify_mic_kerberos(struct gss_ctx *ctx, - struct xdr_netobj *signbuf, - struct xdr_netobj *checksum, - u32 *qstate) { + struct xdr_buf *message, + struct xdr_netobj *mic_token, + u32 *qstate) { u32 maj_stat = 0; int qop_state; struct krb5_ctx *kctx = ctx->internal_ctx_id; - maj_stat = krb5_read_token(kctx, checksum, signbuf, &qop_state, + maj_stat = krb5_read_token(kctx, mic_token, message, &qop_state, KG_TOK_MIC_MSG); if (!maj_stat && qop_state) *qstate = qop_state; @@ -199,21 +199,15 @@ gss_verify_mic_kerberos(struct gss_ctx return maj_stat; } -u32 +static u32 gss_get_mic_kerberos(struct gss_ctx *ctx, u32 qop, - struct xdr_netobj *message_buffer, - struct xdr_netobj *message_token) { + struct xdr_buf *message, + struct xdr_netobj *mic_token) { u32 err = 0; struct krb5_ctx *kctx = ctx->internal_ctx_id; - if (!message_buffer->data) return GSS_S_FAILURE; - - dprintk("RPC: gss_get_mic_kerberos:" - " message_buffer->len %d\n",message_buffer->len); - - err = krb5_make_token(kctx, qop, message_buffer, - message_token, KG_TOK_MIC_MSG); + err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); @@ -237,12 +231,14 @@ static int __init init_kerberos_module(v printk("Failed to register kerberos gss mechanism!\n"); gm = gss_mech_get_by_OID(&gss_mech_krb5_oid); gss_register_triple(RPC_AUTH_GSS_KRB5 , gm, 0, RPC_GSS_SVC_NONE); + gss_register_triple(RPC_AUTH_GSS_KRB5I, gm, 0, RPC_GSS_SVC_INTEGRITY); gss_mech_put(gm); return 0; } static void __exit cleanup_kerberos_module(void) { + gss_unregister_triple(RPC_AUTH_GSS_KRB5I); gss_unregister_triple(RPC_AUTH_GSS_KRB5); } diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_krb5_seal.c linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_krb5_seal.c --- linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_krb5_seal.c 2004-01-08 17:32:43.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_krb5_seal.c 2004-01-08 18:01:02.000000000 -0500 @@ -63,14 +63,13 @@ #include #include #include +#include #include #ifdef RPC_DEBUG # define RPCDBG_FACILITY RPCDBG_AUTH #endif -#define CKSUM_SIZE 8 - static inline int gss_krb5_padding(int blocksize, int length) { /* Most of the code is block-size independent but in practice we @@ -79,32 +78,9 @@ gss_krb5_padding(int blocksize, int leng return 8 - (length & 7); } -/* checksum the plaintext data and the first 8 bytes of the krb5 token header, - * as specified by the rfc: */ -static u32 -compute_checksum(s32 checksum_type, char *header, char *body, int body_len, - struct xdr_netobj *md5cksum) { - char *data_ptr; - struct xdr_netobj plaind; - u32 code = GSS_S_FAILURE; - - if (!(data_ptr = kmalloc(8 + body_len, GFP_KERNEL))) - goto out; - memcpy(data_ptr, header, 8); - memcpy(data_ptr + 8, body, body_len); - plaind.len = 8 + body_len; - plaind.data = data_ptr; - code = krb5_make_checksum(checksum_type, &plaind, md5cksum); - kfree(data_ptr); - code = 0; - -out: - return code; -} - u32 krb5_make_token(struct krb5_ctx *ctx, int qop_req, - struct xdr_netobj * text, struct xdr_netobj * token, + struct xdr_buf *text, struct xdr_netobj *token, int toktype) { s32 checksum_type; @@ -113,7 +89,7 @@ krb5_make_token(struct krb5_ctx *ctx, in unsigned char *ptr, *krb5_hdr, *msg_start; s32 now; - dprintk("RPC: gss_krb5_seal"); + dprintk("RPC: gss_krb5_seal\n"); now = jiffies; @@ -144,8 +120,6 @@ krb5_make_token(struct krb5_ctx *ctx, in } token->len = g_token_size(&ctx->mech_used, 22 + tmsglen); - if ((token->data = kmalloc(token->len, GFP_KERNEL)) == NULL) - goto out_err; ptr = token->data; g_make_token_header(&ctx->mech_used, 22 + tmsglen, &ptr, toktype); @@ -160,24 +134,11 @@ krb5_make_token(struct krb5_ctx *ctx, in *(u16 *)(krb5_hdr + 4) = htons(ctx->sealalg); if (toktype == KG_TOK_WRAP_MSG) { - unsigned char pad = gss_krb5_padding(blocksize, text->len); - - get_random_bytes(msg_start, blocksize); /* "confounder" */ - memcpy(msg_start + blocksize, text->data, text->len); - - memset(msg_start + blocksize + text->len, pad, pad); - - if (compute_checksum(checksum_type, krb5_hdr, msg_start, - tmsglen, &md5cksum)) - goto out_err; - - if (krb5_encrypt(ctx->enc, NULL, msg_start, msg_start, - tmsglen)) - goto out_err; - + /* XXX removing support for now */ + goto out_err; } else { /* Sign only. */ - if (compute_checksum(checksum_type, krb5_hdr, text->data, - text->len, &md5cksum)) + if (krb5_make_checksum(checksum_type, krb5_hdr, text, + &md5cksum)) goto out_err; } @@ -187,10 +148,11 @@ krb5_make_token(struct krb5_ctx *ctx, in md5cksum.data, md5cksum.len)) goto out_err; memcpy(krb5_hdr + 16, - md5cksum.data + md5cksum.len - CKSUM_SIZE, CKSUM_SIZE); + md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, + KRB5_CKSUM_LENGTH); dprintk("make_seal_token: cksum data: \n"); - print_hexl((u32 *) (krb5_hdr + 16), CKSUM_SIZE, 0); + print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); break; default: BUG(); diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_krb5_unseal.c linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_krb5_unseal.c --- linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_krb5_unseal.c 2004-01-08 17:43:22.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_krb5_unseal.c 2004-01-08 18:01:02.000000000 -0500 @@ -68,45 +68,42 @@ #endif -/* message_buffer is an input if MIC and an output if WRAP. */ +/* message_buffer is an input if toktype is MIC and an output if it is WRAP: + * If toktype is MIC: read_token is a mic token, and message_buffer is the + * data that the mic was supposedly taken over. + * If toktype is WRAP: read_token is a wrap token, and message_buffer is used + * to return the decrypted data. + */ +/* XXX will need to change prototype and/or just split into a separate function + * when we add privacy (because read_token will be in pages too). */ u32 krb5_read_token(struct krb5_ctx *ctx, struct xdr_netobj *read_token, - struct xdr_netobj *message_buffer, + struct xdr_buf *message_buffer, int *qop_state, int toktype) { - s32 code; - int tmsglen = 0; - int conflen = 0; int signalg; int sealalg; - struct xdr_netobj token = {.len = 0, .data = NULL}; s32 checksum_type; - struct xdr_netobj cksum; struct xdr_netobj md5cksum = {.len = 0, .data = NULL}; - struct xdr_netobj plaind; - char *data_ptr; s32 now; - unsigned char *plain = NULL; - int cksum_len = 0; - int plainlen = 0; int direction; s32 seqnum; unsigned char *ptr = (unsigned char *)read_token->data; int bodysize; u32 ret = GSS_S_DEFECTIVE_TOKEN; - dprintk("RPC: krb5_read_token\n"); + dprintk("RPC: krb5_read_token\n"); - if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used, - &bodysize, &ptr, toktype, + if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr, toktype, read_token->len)) goto out; + /* XXX sanity-check bodysize?? */ if (toktype == KG_TOK_WRAP_MSG) { - message_buffer->len = 0; - message_buffer->data = NULL; + /* XXX gone */ + goto out; } /* get the sign and seal algorithms */ @@ -138,63 +135,6 @@ krb5_read_token(struct krb5_ctx *ctx, signalg != SGN_ALG_HMAC_SHA1_DES3_KD)) goto out; - /* starting with a single alg */ - switch (signalg) { - case SGN_ALG_DES_MAC_MD5: - cksum_len = 8; - break; - default: - goto out; - } - - if (toktype == KG_TOK_WRAP_MSG) - tmsglen = bodysize - (14 + cksum_len); - - /* get the token parameters */ - - /* decode the message, if WRAP */ - - if (toktype == KG_TOK_WRAP_MSG) { - dprintk("RPC: krb5_read_token KG_TOK_WRAP_MSG\n"); - - plain = kmalloc(tmsglen, GFP_KERNEL); - ret = GSS_S_FAILURE; - if (plain == NULL) - goto out; - - code = krb5_decrypt(ctx->enc, NULL, - ptr + 14 + cksum_len, plain, - tmsglen); - if (code) - goto out; - - plainlen = tmsglen; - - conflen = crypto_tfm_alg_blocksize(ctx->enc); - token.len = tmsglen - conflen - plain[tmsglen - 1]; - - if (token.len) { - token.data = kmalloc(token.len, GFP_KERNEL); - if (token.data == NULL) - goto out; - memcpy(token.data, plain + conflen, token.len); - } - - } else if (toktype == KG_TOK_MIC_MSG) { - dprintk("RPC: krb5_read_token KG_TOK_MIC_MSG\n"); - token = *message_buffer; - plain = token.data; - plainlen = token.len; - } else { - token.len = 0; - token.data = NULL; - plain = token.data; - plainlen = token.len; - } - - dprintk("RPC krb5_read_token: token.len %d plainlen %d\n", token.len, - plainlen); - /* compute the checksum of the message */ /* initialize the the cksum */ @@ -209,72 +149,28 @@ krb5_read_token(struct krb5_ctx *ctx, switch (signalg) { case SGN_ALG_DES_MAC_MD5: - dprintk("RPC krb5_read_token SGN_ALG_DES_MAC_MD5\n"); - /* compute the checksum of the message. - * 8 = bytes of token body to be checksummed according to spec - */ - - data_ptr = kmalloc(8 + plainlen, GFP_KERNEL); - ret = GSS_S_FAILURE; - if (!data_ptr) + ret = krb5_make_checksum(checksum_type, ptr - 2, + message_buffer, &md5cksum); + if (ret) goto out; - memcpy(data_ptr, ptr - 2, 8); - memcpy(data_ptr + 8, plain, plainlen); - - plaind.len = 8 + plainlen; - plaind.data = data_ptr; - - code = krb5_make_checksum(checksum_type, - &plaind, &md5cksum); - - kfree(data_ptr); - - if (code) + ret = krb5_encrypt(ctx->seq, NULL, md5cksum.data, + md5cksum.data, 16); + if (ret) goto out; - code = krb5_encrypt(ctx->seq, NULL, md5cksum.data, - md5cksum.data, 16); - if (code) + if (memcmp(md5cksum.data + 8, ptr + 14, 8)) { + ret = GSS_S_BAD_SIG; goto out; - - if (signalg == 0) - cksum.len = 8; - else - cksum.len = 16; - cksum.data = md5cksum.data + 16 - cksum.len; - - dprintk - ("RPC: krb5_read_token: memcmp digest cksum.len %d:\n", - cksum.len); - dprintk(" md5cksum.data\n"); - print_hexl((u32 *) md5cksum.data, 16, 0); - dprintk(" cksum.data:\n"); - print_hexl((u32 *) cksum.data, cksum.len, 0); - { - u32 *p; - - (u8 *) p = ptr + 14; - dprintk(" ptr+14:\n"); - print_hexl(p, cksum.len, 0); } - - code = memcmp(cksum.data, ptr + 14, cksum.len); break; default: ret = GSS_S_DEFECTIVE_TOKEN; goto out; } - ret = GSS_S_BAD_SIG; - if (code) - goto out; - /* it got through unscathed. Make sure the context is unexpired */ - if (toktype == KG_TOK_WRAP_MSG) - *message_buffer = token; - if (qop_state) *qop_state = GSS_C_QOP_DEFAULT; @@ -287,8 +183,8 @@ krb5_read_token(struct krb5_ctx *ctx, /* do sequencing checks */ ret = GSS_S_BAD_SIG; - if ((code = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, - &seqnum))) + if ((ret = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, + &seqnum))) goto out; if ((ctx->initiate && direction != 0xff) || @@ -298,9 +194,5 @@ krb5_read_token(struct krb5_ctx *ctx, ret = GSS_S_COMPLETE; out: if (md5cksum.data) kfree(md5cksum.data); - if (toktype == KG_TOK_WRAP_MSG) { - if (plain) kfree(plain); - if (ret && token.data) kfree(token.data); - } return ret; } diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_mech_switch.c linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_mech_switch.c --- linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_mech_switch.c 2004-01-08 17:41:30.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_mech_switch.c 2004-01-08 18:01:02.000000000 -0500 @@ -70,6 +70,7 @@ gss_mech_register(struct xdr_netobj * me } gm->gm_oid.len = mech_type->len; if (!(gm->gm_oid.data = kmalloc(mech_type->len, GFP_KERNEL))) { + kfree(gm); printk("Failed to allocate memory in gss_mech_register"); return -1; } @@ -195,7 +196,7 @@ gss_import_sec_context(struct xdr_netobj u32 gss_get_mic(struct gss_ctx *context_handle, u32 qop, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token) { return context_handle->mech_type->gm_ops @@ -209,7 +210,7 @@ gss_get_mic(struct gss_ctx *context_hand u32 gss_verify_mic(struct gss_ctx *context_handle, - struct xdr_netobj *message, + struct xdr_buf *message, struct xdr_netobj *mic_token, u32 *qstate) { diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_pseudoflavors.c linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_pseudoflavors.c --- linux-2.6.1-rc3/net/sunrpc/auth_gss/gss_pseudoflavors.c 2004-01-08 17:39:24.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/auth_gss/gss_pseudoflavors.c 2004-01-08 17:59:13.000000000 -0500 @@ -92,6 +92,7 @@ gss_register_triple(u32 pseudoflavor, st return 0; err_unlock: + kfree(triple); spin_unlock(®istered_triples_lock); err: return -1; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/clnt.c linux-2.6.1-37-attr/net/sunrpc/clnt.c --- linux-2.6.1-rc3/net/sunrpc/clnt.c 2004-01-08 17:37:56.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/clnt.c 2004-01-08 18:05:58.000000000 -0500 @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -101,6 +102,7 @@ rpc_create_client(struct rpc_xprt *xprt, { struct rpc_version *version; struct rpc_clnt *clnt = NULL; + int len; dprintk("RPC: creating %s client for %s (xprt %p)\n", program->name, servname, xprt); @@ -115,23 +117,37 @@ rpc_create_client(struct rpc_xprt *xprt, goto out_no_clnt; memset(clnt, 0, sizeof(*clnt)); atomic_set(&clnt->cl_users, 0); + atomic_set(&clnt->cl_count, 1); + clnt->cl_parent = clnt; + + clnt->cl_server = clnt->cl_inline_name; + len = strlen(servname) + 1; + if (len > sizeof(clnt->cl_inline_name)) { + char *buf = kmalloc(len, GFP_KERNEL); + if (buf != 0) + clnt->cl_server = buf; + else + len = sizeof(clnt->cl_inline_name); + } + strlcpy(clnt->cl_server, servname, len); clnt->cl_xprt = xprt; clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; - clnt->cl_server = servname; clnt->cl_protname = program->name; + clnt->cl_pmap = &clnt->cl_pmap_default; clnt->cl_port = xprt->addr.sin_port; clnt->cl_prog = program->number; clnt->cl_vers = version->number; clnt->cl_prot = xprt->prot; clnt->cl_stats = program->stats; - INIT_RPC_WAITQ(&clnt->cl_bindwait, "bindwait"); + INIT_RPC_WAITQ(&clnt->cl_pmap_default.pm_bindwait, "bindwait"); if (!clnt->cl_port) clnt->cl_autobind = 1; - rpc_init_rtt(&clnt->cl_rtt, xprt->timeout.to_initval); + clnt->cl_rtt = &clnt->cl_rtt_default; + rpc_init_rtt(&clnt->cl_rtt_default, xprt->timeout.to_initval); if (rpc_setup_pipedir(clnt, program->pipe_dir_name) < 0) goto out_no_path; @@ -156,12 +172,40 @@ out_no_clnt: out_no_auth: rpc_rmdir(clnt->cl_pathname); out_no_path: + if (clnt->cl_server != clnt->cl_inline_name) + kfree(clnt->cl_server); kfree(clnt); clnt = NULL; goto out; } /* + * This function clones the RPC client structure. It allows us to share the + * same transport while varying parameters such as the authentication + * flavour. + */ +struct rpc_clnt * +rpc_clone_client(struct rpc_clnt *clnt) +{ + struct rpc_clnt *new; + + new = (struct rpc_clnt *)kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + goto out_no_clnt; + memcpy(new, clnt, sizeof(*new)); + atomic_set(&new->cl_count, 1); + atomic_set(&new->cl_users, 0); + atomic_inc(&new->cl_parent->cl_count); + if (new->cl_auth) + atomic_inc(&new->cl_auth->au_count); +out: + return new; +out_no_clnt: + printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); + goto out; +} + +/* * Properly shut down an RPC client, terminating all outstanding * requests. Note that we must be certain that cl_oneshot and * cl_dead are cleared, or else the client would be destroyed @@ -200,19 +244,29 @@ rpc_shutdown_client(struct rpc_clnt *cln int rpc_destroy_client(struct rpc_clnt *clnt) { + if (!atomic_dec_and_test(&clnt->cl_count)) + return 1; + BUG_ON(atomic_read(&clnt->cl_users) != 0); + dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); - if (clnt->cl_auth) { rpcauth_destroy(clnt->cl_auth); clnt->cl_auth = NULL; } + if (clnt->cl_parent != clnt) { + rpc_destroy_client(clnt->cl_parent); + goto out_free; + } if (clnt->cl_pathname[0]) rpc_rmdir(clnt->cl_pathname); if (clnt->cl_xprt) { xprt_destroy(clnt->cl_xprt); clnt->cl_xprt = NULL; } + if (clnt->cl_server != clnt->cl_inline_name) + kfree(clnt->cl_server); +out_free: kfree(clnt); return 0; } @@ -567,7 +621,8 @@ call_encode(struct rpc_task *task) rpc_exit(task, -EIO); return; } - if (encode && (status = encode(req, p, task->tk_msg.rpc_argp)) < 0) { + if (encode && (status = rpcauth_wrap_req(task, encode, req, p, + task->tk_msg.rpc_argp)) < 0) { printk(KERN_WARNING "%s: can't encode arguments: %d\n", clnt->cl_protname, -status); rpc_exit(task, status); @@ -743,7 +798,7 @@ call_timeout(struct rpc_task *task) to->to_retries = clnt->cl_timeout.to_retries; dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid); - if (clnt->cl_softrtry) { + if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) printk(KERN_NOTICE "%s: server %s not responding, timed out\n", clnt->cl_protname, clnt->cl_server); @@ -786,7 +841,7 @@ call_decode(struct rpc_task *task) } if (task->tk_status < 12) { - if (!clnt->cl_softrtry) { + if (!RPC_IS_SOFT(task)) { task->tk_action = call_bind; clnt->cl_stats->rpcretrans++; goto out_retry; @@ -826,7 +881,8 @@ call_decode(struct rpc_task *task) task->tk_action = NULL; if (decode) - task->tk_status = decode(req, p, task->tk_msg.rpc_resp); + task->tk_status = rpcauth_unwrap_resp(task, decode, req, p, + task->tk_msg.rpc_resp); dprintk("RPC: %4d call_decode result %d\n", task->tk_pid, task->tk_status); return; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/pmap_clnt.c linux-2.6.1-37-attr/net/sunrpc/pmap_clnt.c --- linux-2.6.1-rc3/net/sunrpc/pmap_clnt.c 2004-01-08 17:39:31.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/pmap_clnt.c 2004-01-08 18:02:36.000000000 -0500 @@ -41,7 +41,7 @@ static spinlock_t pmap_lock = SPIN_LOCK void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) { - struct rpc_portmap *map = &clnt->cl_pmap; + struct rpc_portmap *map = clnt->cl_pmap; struct sockaddr_in *sap = &clnt->cl_xprt->addr; struct rpc_message msg = { .rpc_proc = &pmap_procedures[PMAP_GETPORT], @@ -57,12 +57,12 @@ rpc_getport(struct rpc_task *task, struc map->pm_prog, map->pm_vers, map->pm_prot); spin_lock(&pmap_lock); - if (clnt->cl_binding) { - rpc_sleep_on(&clnt->cl_bindwait, task, NULL, 0); + if (map->pm_binding) { + rpc_sleep_on(&map->pm_bindwait, task, NULL, 0); spin_unlock(&pmap_lock); return; } - clnt->cl_binding = 1; + map->pm_binding = 1; spin_unlock(&pmap_lock); task->tk_status = -EACCES; /* why set this? returns -EIO below */ @@ -85,8 +85,8 @@ rpc_getport(struct rpc_task *task, struc bailout: spin_lock(&pmap_lock); - clnt->cl_binding = 0; - rpc_wake_up(&clnt->cl_bindwait); + map->pm_binding = 0; + rpc_wake_up(&map->pm_bindwait); spin_unlock(&pmap_lock); task->tk_status = -EIO; task->tk_action = NULL; @@ -129,6 +129,7 @@ static void pmap_getport_done(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + struct rpc_portmap *map = clnt->cl_pmap; dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", task->tk_pid, task->tk_status, clnt->cl_port); @@ -145,8 +146,8 @@ pmap_getport_done(struct rpc_task *task) clnt->cl_xprt->addr.sin_port = clnt->cl_port; } spin_lock(&pmap_lock); - clnt->cl_binding = 0; - rpc_wake_up(&clnt->cl_bindwait); + map->pm_binding = 0; + rpc_wake_up(&map->pm_bindwait); spin_unlock(&pmap_lock); } diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/rpc_pipe.c linux-2.6.1-37-attr/net/sunrpc/rpc_pipe.c --- linux-2.6.1-rc3/net/sunrpc/rpc_pipe.c 2004-01-08 17:37:00.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/rpc_pipe.c 2004-01-08 17:56:53.000000000 -0500 @@ -25,6 +25,7 @@ #include #include +#include #include static struct vfsmount *rpc_mount; @@ -35,6 +36,8 @@ static struct file_system_type rpc_pipe_ static kmem_cache_t *rpc_inode_cachep; +#define RPC_UPCALL_TIMEOUT (30*HZ) + static void __rpc_purge_upcall(struct inode *inode, int err) { @@ -47,15 +50,25 @@ __rpc_purge_upcall(struct inode *inode, msg->errno = err; rpci->ops->destroy_msg(msg); } + while (!list_empty(&rpci->in_upcall)) { + msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list); + list_del_init(&msg->list); + msg->errno = err; + rpci->ops->destroy_msg(msg); + } rpci->pipelen = 0; wake_up(&rpci->waitq); } -void -rpc_purge_upcall(struct inode *inode, int err) +static void +rpc_timeout_upcall_queue(void *data) { + struct rpc_inode *rpci = (struct rpc_inode *)data; + struct inode *inode = &rpci->vfs_inode; + down(&inode->i_sem); - __rpc_purge_upcall(inode, err); + if (rpci->nreaders == 0 && !list_empty(&rpci->pipe)) + __rpc_purge_upcall(inode, -ETIMEDOUT); up(&inode->i_sem); } @@ -66,7 +79,13 @@ rpc_queue_upcall(struct inode *inode, st int res = 0; down(&inode->i_sem); - if (rpci->nreaders || (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN)) { + if (rpci->nreaders) { + list_add_tail(&msg->list, &rpci->pipe); + rpci->pipelen += msg->len; + } else if (rpci->flags & RPC_PIPE_WAIT_FOR_OPEN) { + if (list_empty(&rpci->pipe)) + schedule_delayed_work(&rpci->queue_timeout, + RPC_UPCALL_TIMEOUT); list_add_tail(&msg->list, &rpci->pipe); rpci->pipelen += msg->len; } else @@ -76,17 +95,31 @@ rpc_queue_upcall(struct inode *inode, st return res; } -void -rpc_inode_setowner(struct inode *inode, void *private) +static void +rpc_close_pipes(struct inode *inode) { struct rpc_inode *rpci = RPC_I(inode); + + cancel_delayed_work(&rpci->queue_timeout); + flush_scheduled_work(); down(&inode->i_sem); - rpci->private = private; - if (!private) + if (rpci->ops != NULL) { + rpci->nreaders = 0; __rpc_purge_upcall(inode, -EPIPE); + rpci->nwriters = 0; + if (rpci->ops->release_pipe) + rpci->ops->release_pipe(inode); + rpci->ops = NULL; + } up(&inode->i_sem); } +static inline void +rpc_inode_setowner(struct inode *inode, void *private) +{ + RPC_I(inode)->private = private; +} + static struct inode * rpc_alloc_inode(struct super_block *sb) { @@ -110,9 +143,11 @@ rpc_pipe_open(struct inode *inode, struc int res = -ENXIO; down(&inode->i_sem); - if (rpci->private != NULL) { + if (rpci->ops != NULL) { if (filp->f_mode & FMODE_READ) rpci->nreaders ++; + if (filp->f_mode & FMODE_WRITE) + rpci->nwriters ++; res = 0; } up(&inode->i_sem); @@ -125,16 +160,24 @@ rpc_pipe_release(struct inode *inode, st struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); struct rpc_pipe_msg *msg; + down(&inode->i_sem); + if (rpci->ops == NULL) + goto out; msg = (struct rpc_pipe_msg *)filp->private_data; if (msg != NULL) { msg->errno = -EPIPE; + list_del_init(&msg->list); rpci->ops->destroy_msg(msg); } - down(&inode->i_sem); + if (filp->f_mode & FMODE_WRITE) + rpci->nwriters --; if (filp->f_mode & FMODE_READ) rpci->nreaders --; - if (!rpci->nreaders && !(rpci->flags & RPC_PIPE_WAIT_FOR_OPEN)) + if (!rpci->nreaders) __rpc_purge_upcall(inode, -EPIPE); + if (rpci->ops->release_pipe) + rpci->ops->release_pipe(inode); +out: up(&inode->i_sem); return 0; } @@ -148,7 +191,7 @@ rpc_pipe_read(struct file *filp, char __ int res = 0; down(&inode->i_sem); - if (!rpci->private) { + if (rpci->ops == NULL) { res = -EPIPE; goto out_unlock; } @@ -158,7 +201,7 @@ rpc_pipe_read(struct file *filp, char __ msg = list_entry(rpci->pipe.next, struct rpc_pipe_msg, list); - list_del_init(&msg->list); + list_move(&msg->list, &rpci->in_upcall); rpci->pipelen -= msg->len; filp->private_data = msg; msg->copied = 0; @@ -170,6 +213,7 @@ rpc_pipe_read(struct file *filp, char __ res = rpci->ops->upcall(filp, msg, buf, len); if (res < 0 || msg->len == msg->copied) { filp->private_data = NULL; + list_del_init(&msg->list); rpci->ops->destroy_msg(msg); } out_unlock: @@ -186,7 +230,7 @@ rpc_pipe_write(struct file *filp, const down(&inode->i_sem); res = -EPIPE; - if (rpci->private != NULL) + if (rpci->ops != NULL) res = rpci->ops->downcall(filp, buf, len); up(&inode->i_sem); return res; @@ -202,7 +246,7 @@ rpc_pipe_poll(struct file *filp, struct poll_wait(filp, &rpci->waitq, wait); mask = POLLOUT | POLLWRNORM; - if (rpci->private == NULL) + if (rpci->ops == NULL) mask |= POLLERR | POLLHUP; if (!list_empty(&rpci->pipe)) mask |= POLLIN | POLLRDNORM; @@ -218,7 +262,7 @@ rpc_pipe_ioctl(struct inode *ino, struct switch (cmd) { case FIONREAD: - if (!rpci->private) + if (rpci->ops == NULL) return -EPIPE; len = rpci->pipelen; if (filp->private_data) { @@ -460,6 +504,7 @@ repeat: do { dentry = dvec[--n]; if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); rpc_inode_setowner(dentry->d_inode, NULL); simple_unlink(dir, dentry); } @@ -539,7 +584,10 @@ __rpc_rmdir(struct inode *dir, struct de int error; shrink_dcache_parent(dentry); - rpc_inode_setowner(dentry->d_inode, NULL); + if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); + rpc_inode_setowner(dentry->d_inode, NULL); + } if ((error = simple_rmdir(dir, dentry)) != 0) return error; if (!error) { @@ -691,6 +739,7 @@ rpc_unlink(char *path) } d_drop(dentry); if (dentry->d_inode) { + rpc_close_pipes(dentry->d_inode); rpc_inode_setowner(dentry->d_inode, NULL); error = simple_unlink(dir, dentry); } @@ -766,9 +815,12 @@ init_once(void * foo, kmem_cache_t * cac inode_init_once(&rpci->vfs_inode); rpci->private = NULL; rpci->nreaders = 0; + rpci->nwriters = 0; + INIT_LIST_HEAD(&rpci->in_upcall); INIT_LIST_HEAD(&rpci->pipe); rpci->pipelen = 0; init_waitqueue_head(&rpci->waitq); + INIT_WORK(&rpci->queue_timeout, rpc_timeout_upcall_queue, rpci); rpci->ops = NULL; } } diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/sched.c linux-2.6.1-37-attr/net/sunrpc/sched.c --- linux-2.6.1-rc3/net/sunrpc/sched.c 2004-01-08 17:40:08.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/sched.c 2004-01-08 18:05:59.000000000 -0500 @@ -731,8 +731,11 @@ rpc_init_task(struct rpc_task *task, str list_add(&task->tk_task, &all_tasks); spin_unlock(&rpc_sched_lock); - if (clnt) + if (clnt) { atomic_inc(&clnt->cl_users); + if (clnt->cl_softrtry) + task->tk_flags |= RPC_TASK_SOFT; + } #ifdef RPC_DEBUG task->tk_magic = 0xf00baa; diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/sunrpc_syms.c linux-2.6.1-37-attr/net/sunrpc/sunrpc_syms.c --- linux-2.6.1-rc3/net/sunrpc/sunrpc_syms.c 2004-01-08 17:44:15.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/sunrpc_syms.c 2004-01-08 18:02:36.000000000 -0500 @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -40,6 +41,7 @@ EXPORT_SYMBOL(rpc_release_task); /* RPC client functions */ EXPORT_SYMBOL(rpc_create_client); +EXPORT_SYMBOL(rpc_clone_client); EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); EXPORT_SYMBOL(rpc_release_client); @@ -65,6 +67,7 @@ EXPORT_SYMBOL(xprt_set_timeout); /* Client credential cache */ EXPORT_SYMBOL(rpcauth_register); EXPORT_SYMBOL(rpcauth_unregister); +EXPORT_SYMBOL(rpcauth_create); EXPORT_SYMBOL(rpcauth_lookupcred); EXPORT_SYMBOL(rpcauth_lookup_credcache); EXPORT_SYMBOL(rpcauth_free_credcache); @@ -125,6 +128,9 @@ EXPORT_SYMBOL(xdr_inline_pages); EXPORT_SYMBOL(xdr_shift_buf); EXPORT_SYMBOL(xdr_write_pages); EXPORT_SYMBOL(xdr_read_pages); +EXPORT_SYMBOL(xdr_buf_from_iov); +EXPORT_SYMBOL(xdr_buf_subsegment); +EXPORT_SYMBOL(xdr_buf_read_netobj); /* Debugging symbols */ #ifdef RPC_DEBUG diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/xdr.c linux-2.6.1-37-attr/net/sunrpc/xdr.c --- linux-2.6.1-rc3/net/sunrpc/xdr.c 2004-01-08 17:42:12.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/xdr.c 2004-01-08 18:01:59.000000000 -0500 @@ -107,16 +107,23 @@ void xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, unsigned int len) { + struct iovec *tail = xdr->tail; + u32 *p; + xdr->pages = pages; xdr->page_base = base; xdr->page_len = len; + p = (u32 *)xdr->head[0].iov_base + XDR_QUADLEN(xdr->head[0].iov_len); + tail->iov_base = p; + tail->iov_len = 0; + if (len & 3) { - struct iovec *iov = xdr->tail; unsigned int pad = 4 - (len & 3); - iov->iov_base = (void *) "\0\0\0"; - iov->iov_len = pad; + *p = 0; + tail->iov_base = (char *)p + (len & 3); + tail->iov_len = pad; len += pad; } xdr->len += len; @@ -538,7 +545,7 @@ _copy_to_pages(struct page **pages, size * Copies data into an arbitrary memory location from an array of pages * The copy is assumed to be non-overlapping. */ -static void +void _copy_from_pages(char *p, struct page **pages, size_t pgbase, size_t len) { struct page **pgfrom; @@ -731,3 +738,145 @@ xdr_read_pages(struct xdr_stream *xdr, u xdr->p = (uint32_t *)((char *)iov->iov_base + padding); xdr->end = (uint32_t *)((char *)iov->iov_base + iov->iov_len); } + +static struct iovec empty_iov = {.iov_base = NULL, .iov_len = 0}; + +void +xdr_buf_from_iov(struct iovec *iov, struct xdr_buf *buf) +{ + buf->head[0] = *iov; + buf->tail[0] = empty_iov; + buf->page_len = 0; + buf->len = iov->iov_len; +} + +/* Sets subiov to the intersection of iov with the buffer of length len + * starting base bytes after iov. Indicates empty intersection by setting + * length of subiov to zero. Decrements len by length of subiov, sets base + * to zero (or decrements it by length of iov if subiov is empty). */ +static void +iov_subsegment(struct iovec *iov, struct iovec *subiov, int *base, int *len) +{ + if (*base > iov->iov_len) { + subiov->iov_base = NULL; + subiov->iov_len = 0; + *base -= iov->iov_len; + } else { + subiov->iov_base = iov->iov_base + *base; + subiov->iov_len = min(*len, (int)iov->iov_len - *base); + *base = 0; + } + *len -= subiov->iov_len; +} + +/* Sets subbuf to the portion of buf of length len beginning base bytes + * from the start of buf. Returns -1 if base of length are out of bounds. */ +int +xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, + int base, int len) +{ + int i; + + subbuf->len = len; + iov_subsegment(buf->head, subbuf->head, &base, &len); + + if (base < buf->page_len) { + i = (base + buf->page_base) >> PAGE_CACHE_SHIFT; + subbuf->pages = &buf->pages[i]; + subbuf->page_base = (base + buf->page_base) & ~PAGE_CACHE_MASK; + subbuf->page_len = min((int)buf->page_len - base, len); + len -= subbuf->page_len; + base = 0; + } else { + base -= buf->page_len; + subbuf->page_len = 0; + } + + iov_subsegment(buf->tail, subbuf->tail, &base, &len); + if (base || len) + return -1; + return 0; +} + +/* obj is assumed to point to allocated memory of size at least len: */ +static int +read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) +{ + struct xdr_buf subbuf; + int this_len; + int status; + + status = xdr_buf_subsegment(buf, &subbuf, base, len); + if (status) + goto out; + this_len = min(len, (int)subbuf.head[0].iov_len); + memcpy(obj, subbuf.head[0].iov_base, this_len); + len -= this_len; + obj += this_len; + this_len = min(len, (int)subbuf.page_len); + if (this_len) + _copy_from_pages(obj, subbuf.pages, subbuf.page_base, this_len); + len -= this_len; + obj += this_len; + this_len = min(len, (int)subbuf.tail[0].iov_len); + memcpy(obj, subbuf.tail[0].iov_base, this_len); +out: + return status; +} + +static int +read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) +{ + u32 raw; + int status; + + status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj)); + if (status) + return status; + *obj = ntohl(raw); + return 0; +} + +/* If the netobj starting offset bytes from the start of xdr_buf is contained + * entirely in the head or the tail, set object to point to it; otherwise + * try to find space for it at the end of the tail, copy it there, and + * set obj to point to it. */ +int +xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset) +{ + u32 tail_offset = buf->head[0].iov_len + buf->page_len; + u32 obj_end_offset; + + if (read_u32_from_xdr_buf(buf, offset, &obj->len)) + goto out; + obj_end_offset = offset + 4 + obj->len; + + if (obj_end_offset <= buf->head[0].iov_len) { + /* The obj is contained entirely in the head: */ + obj->data = buf->head[0].iov_base + offset + 4; + } else if (offset + 4 >= tail_offset) { + if (obj_end_offset - tail_offset + > buf->tail[0].iov_len) + goto out; + /* The obj is contained entirely in the tail: */ + obj->data = buf->tail[0].iov_base + + offset - tail_offset + 4; + } else { + /* use end of tail as storage for obj: + * (We don't copy to the beginning because then we'd have + * to worry about doing a potentially overlapping copy. + * This assumes the object is at most half the length of the + * tail.) */ + if (obj->len > buf->tail[0].iov_len) + goto out; + obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len - + obj->len; + if (read_bytes_from_xdr_buf(buf, offset + 4, + obj->data, obj->len)) + goto out; + + } + return 0; +out: + return -1; +} diff -u --recursive --new-file --show-c-function linux-2.6.1-rc3/net/sunrpc/xprt.c linux-2.6.1-37-attr/net/sunrpc/xprt.c --- linux-2.6.1-rc3/net/sunrpc/xprt.c 2004-01-08 17:41:52.000000000 -0500 +++ linux-2.6.1-37-attr/net/sunrpc/xprt.c 2004-01-08 18:06:26.000000000 -0500 @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -75,6 +76,7 @@ #endif #define XPRT_MAX_BACKOFF (8) +#define XPRT_IDLE_TIMEOUT (5*60*HZ) /* * Local functions @@ -139,25 +141,33 @@ __xprt_lock_write(struct rpc_xprt *xprt, { struct rpc_rqst *req = task->tk_rqstp; - if (!xprt->snd_task) { - if (xprt->nocong || __xprt_get_cong(xprt, task)) { - xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } - } + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) { + if (task == xprt->snd_task) + return 1; + if (task == NULL) + return 0; + goto out_sleep; } - if (xprt->snd_task != task) { - dprintk("RPC: %4d TCP write queue full\n", task->tk_pid); - task->tk_timeout = 0; - task->tk_status = -EAGAIN; - if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL, NULL); - else - rpc_sleep_on(&xprt->sending, task, NULL, NULL); + if (xprt->nocong || __xprt_get_cong(xprt, task)) { + xprt->snd_task = task; + if (req) { + req->rq_bytes_sent = 0; + req->rq_ntrans++; + } + return 1; } - return xprt->snd_task == task; + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); +out_sleep: + dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt); + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + if (req && req->rq_ntrans) + rpc_sleep_on(&xprt->resend, task, NULL, NULL); + else + rpc_sleep_on(&xprt->sending, task, NULL, NULL); + return 0; } static inline int @@ -177,15 +187,15 @@ __xprt_lock_write_next(struct rpc_xprt * { struct rpc_task *task; - if (xprt->snd_task) + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) return; + if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) + goto out_unlock; task = rpc_wake_up_next(&xprt->resend); if (!task) { - if (!xprt->nocong && RPCXPRT_CONGESTED(xprt)) - return; task = rpc_wake_up_next(&xprt->sending); if (!task) - return; + goto out_unlock; } if (xprt->nocong || __xprt_get_cong(xprt, task)) { struct rpc_rqst *req = task->tk_rqstp; @@ -194,7 +204,12 @@ __xprt_lock_write_next(struct rpc_xprt * req->rq_bytes_sent = 0; req->rq_ntrans++; } + return; } +out_unlock: + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); } /* @@ -203,9 +218,13 @@ __xprt_lock_write_next(struct rpc_xprt * static void __xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task) { - if (xprt->snd_task == task) + if (xprt->snd_task == task) { xprt->snd_task = NULL; - __xprt_lock_write_next(xprt); + smp_mb__before_clear_bit(); + clear_bit(XPRT_LOCKED, &xprt->sockstate); + smp_mb__after_clear_bit(); + __xprt_lock_write_next(xprt); + } } static inline void @@ -393,6 +412,15 @@ xprt_close(struct rpc_xprt *xprt) sock_release(sock); } +static void +xprt_socket_autoclose(void *args) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)args; + + xprt_close(xprt); + xprt_release_write(xprt, NULL); +} + /* * Mark a transport as disconnected */ @@ -407,6 +435,27 @@ xprt_disconnect(struct rpc_xprt *xprt) } /* + * Used to allow disconnection when we've been idle + */ +static void +xprt_init_autodisconnect(unsigned long data) +{ + struct rpc_xprt *xprt = (struct rpc_xprt *)data; + + spin_lock(&xprt->sock_lock); + if (!list_empty(&xprt->recv) || xprt->shutdown) + goto out_abort; + if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) + goto out_abort; + spin_unlock(&xprt->sock_lock); + /* Let keventd close the socket */ + schedule_work(&xprt->task_cleanup); + return; +out_abort: + spin_unlock(&xprt->sock_lock); +} + +/* * Attempt to connect a TCP socket. * */ @@ -488,7 +537,7 @@ xprt_connect(struct rpc_task *task) case -ECONNREFUSED: case -ECONNRESET: case -ENOTCONN: - if (!task->tk_client->cl_softrtry) { + if (!RPC_IS_SOFT(task)) { rpc_delay(task, RPC_REESTABLISH_TIMEOUT); task->tk_status = -ENOTCONN; break; @@ -496,7 +545,7 @@ xprt_connect(struct rpc_task *task) default: /* Report myriad other possible returns. If this file * system is soft mounted, just error out, like Solaris. */ - if (task->tk_client->cl_softrtry) { + if (RPC_IS_SOFT(task)) { printk(KERN_WARNING "RPC: error %d connecting to server %s, exiting\n", -status, task->tk_client->cl_server); @@ -530,7 +579,7 @@ xprt_connect_status(struct rpc_task *tas } /* if soft mounted, just cause this RPC to fail */ - if (task->tk_client->cl_softrtry) + if (RPC_IS_SOFT(task)) task->tk_status = -EIO; switch (task->tk_status) { @@ -584,9 +633,9 @@ xprt_complete_rqst(struct rpc_xprt *xprt __xprt_put_cong(xprt, req); if (timer) { if (req->rq_ntrans == 1) - rpc_update_rtt(&clnt->cl_rtt, timer, + rpc_update_rtt(clnt->cl_rtt, timer, (long)jiffies - req->rq_xtime); - rpc_set_timeo(&clnt->cl_rtt, timer, req->rq_ntrans - 1); + rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1); } } @@ -1224,8 +1273,8 @@ xprt_transmit(struct rpc_task *task) spin_lock_bh(&xprt->sock_lock); if (!xprt->nocong) { int timer = task->tk_msg.rpc_proc->p_timer; - task->tk_timeout = rpc_calc_rto(&clnt->cl_rtt, timer); - task->tk_timeout <<= rpc_ntimeo(&clnt->cl_rtt, timer); + task->tk_timeout = rpc_calc_rto(clnt->cl_rtt, timer); + task->tk_timeout <<= rpc_ntimeo(clnt->cl_rtt, timer); task->tk_timeout <<= clnt->cl_timeout.to_retries - req->rq_timeout.to_retries; if (task->tk_timeout > req->rq_timeout.to_maxval) @@ -1254,6 +1303,8 @@ xprt_reserve(struct rpc_task *task) spin_lock(&xprt->xprt_lock); do_xprt_reserve(task); spin_unlock(&xprt->xprt_lock); + if (task->tk_rqstp) + del_timer_sync(&xprt->timer); } } @@ -1333,6 +1384,9 @@ xprt_release(struct rpc_task *task) __xprt_put_cong(xprt, req); if (!list_empty(&req->rq_list)) list_del(&req->rq_list); + xprt->last_used = jiffies; + if (list_empty(&xprt->recv) && !xprt->shutdown) + mod_timer(&xprt->timer, xprt->last_used + XPRT_IDLE_TIMEOUT); spin_unlock_bh(&xprt->sock_lock); task->tk_rqstp = NULL; memset(req, 0, sizeof(*req)); /* mark unused */ @@ -1403,6 +1457,11 @@ xprt_setup(int proto, struct sockaddr_in init_waitqueue_head(&xprt->cong_wait); INIT_LIST_HEAD(&xprt->recv); + INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); + init_timer(&xprt->timer); + xprt->timer.function = xprt_init_autodisconnect; + xprt->timer.data = (unsigned long) xprt; + xprt->last_used = jiffies; /* Set timeout parameters */ if (to) { @@ -1583,6 +1642,7 @@ xprt_shutdown(struct rpc_xprt *xprt) rpc_wake_up(&xprt->backlog); if (waitqueue_active(&xprt->cong_wait)) wake_up(&xprt->cong_wait); + del_timer_sync(&xprt->timer); } /*