All of the above --- Index: linux-2.6.11/fs/Kconfig =================================================================== --- linux-2.6.11.orig/fs/Kconfig +++ linux-2.6.11/fs/Kconfig @@ -1515,6 +1515,20 @@ config RPCSEC_GSS_SPKM3 If unsure, say N. +config RPCSEC_GSS_KEYRING + bool "Secure RPC: keyring support (EXPERIMENTAL)" + depends on SUNRPC_GSS && KEYS && EXPERIMENTAL + help + Use the new RPCSEC_GSS upcall mechanism based on keyrings. + This allows individual threads, processes or groups of + processes to specify their own authentication tokens, + providing much the same functionality that AFS pags used to. + + Note: requires the new helper program /sbin/request-key, as + well as an updated rpc.gssd daemon in order to work. + + If unsure, say N + config SMB_FS tristate "SMB file system support (to mount Windows shares etc.)" depends on INET Index: linux-2.6.11/fs/lockd/clntproc.c =================================================================== --- linux-2.6.11.orig/fs/lockd/clntproc.c +++ linux-2.6.11/fs/lockd/clntproc.c @@ -322,14 +322,13 @@ static int nlm_wait_on_grace(wait_queue_ /* * Generic NLM call */ -int +static int nlmclnt_call(struct nlm_rqst *req, u32 proc) { struct nlm_host *host = req->a_host; struct rpc_clnt *clnt; struct nlm_args *argp = &req->a_args; struct nlm_res *resp = &req->a_res; - struct file *filp = argp->lock.fl.fl_file; struct rpc_message msg = { .rpc_argp = argp, .rpc_resp = resp, @@ -339,9 +338,6 @@ nlmclnt_call(struct nlm_rqst *req, u32 p dprintk("lockd: call procedure %d on %s\n", (int)proc, host->h_name); - if (filp) - msg.rpc_cred = nfs_file_cred(filp); - do { if (host->h_reclaiming && !argp->reclaim) goto in_grace_period; @@ -428,14 +424,13 @@ nlmsvc_async_call(struct nlm_rqst *req, return status; } -int +static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback) { struct nlm_host *host = req->a_host; struct rpc_clnt *clnt; struct nlm_args *argp = &req->a_args; struct nlm_res *resp = &req->a_res; - struct file *file = argp->lock.fl.fl_file; struct rpc_message msg = { .rpc_argp = argp, .rpc_resp = resp, @@ -450,11 +445,9 @@ nlmclnt_async_call(struct nlm_rqst *req, return -ENOLCK; msg.rpc_proc = &clnt->cl_procinfo[proc]; - /* bootstrap and kick off the async RPC call */ - if (file) - msg.rpc_cred = nfs_file_cred(file); /* Increment host refcount */ nlm_get_host(host); + /* bootstrap and kick off the async RPC call */ status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req); if (status < 0) nlm_release_host(host); @@ -516,6 +509,24 @@ static void nlmclnt_locks_init_private(s fl->fl_ops = &nlmclnt_lock_ops; } +static void do_vfs_lock(struct file_lock *fl) +{ + int res = 0; + switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { + case FL_POSIX: + res = posix_lock_file_wait(fl->fl_file, fl); + break; + case FL_FLOCK: + res = flock_lock_file_wait(fl->fl_file, fl); + break; + default: + BUG(); + } + if (res < 0) + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", + __FUNCTION__); +} + /* * LOCK: Try to create a lock * @@ -564,9 +575,7 @@ nlmclnt_lock(struct nlm_rqst *req, struc fl->fl_u.nfs_fl.state = host->h_state; fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED; fl->fl_flags |= FL_SLEEP; - if (posix_lock_file_wait(fl->fl_file, fl) < 0) - printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", - __FUNCTION__); + do_vfs_lock(fl); } status = nlm_stat_to_errno(resp->status); out: @@ -635,7 +644,7 @@ nlmclnt_unlock(struct nlm_rqst *req, str nlmclnt_unlock_callback); /* Hrmf... Do the unlock early since locks_remove_posix() * really expects us to free the lock synchronously */ - posix_lock_file(fl->fl_file, fl); + do_vfs_lock(fl); if (status < 0) { nlmclnt_release_lockargs(req); kfree(req); @@ -648,7 +657,7 @@ nlmclnt_unlock(struct nlm_rqst *req, str if (status < 0) return status; - posix_lock_file(fl->fl_file, fl); + do_vfs_lock(fl); if (resp->status == NLM_LCK_GRANTED) return 0; Index: linux-2.6.11/fs/lockd/host.c =================================================================== --- linux-2.6.11.orig/fs/lockd/host.c +++ linux-2.6.11/fs/lockd/host.c @@ -110,7 +110,6 @@ nlm_lookup_host(int server, struct socka host->h_addr.sin_port = 0; /* ouch! */ host->h_version = version; host->h_proto = proto; - host->h_authflavor = RPC_AUTH_UNIX; host->h_rpcclnt = NULL; init_MUTEX(&host->h_sema); host->h_nextrebind = jiffies + NLM_HOST_REBIND; @@ -191,8 +190,9 @@ nlm_bind_host(struct nlm_host *host) xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); + /* Existing NLM servers accept AUTH_UNIX only */ clnt = rpc_create_client(xprt, host->h_name, &nlm_program, - host->h_version, host->h_authflavor); + host->h_version, RPC_AUTH_UNIX); if (IS_ERR(clnt)) { xprt_destroy(xprt); goto forgetit; Index: linux-2.6.11/fs/locks.c =================================================================== --- linux-2.6.11.orig/fs/locks.c +++ linux-2.6.11/fs/locks.c @@ -1876,8 +1876,13 @@ void locks_remove_flock(struct file *fil return; if (filp->f_op && filp->f_op->flock) { - struct file_lock fl = { .fl_flags = FL_FLOCK, - .fl_type = F_UNLCK }; + struct file_lock fl = { + .fl_pid = current->tgid, + .fl_file = filp, + .fl_flags = FL_FLOCK, + .fl_type = F_UNLCK, + .fl_end = OFFSET_MAX, + }; filp->f_op->flock(filp, F_SETLKW, &fl); } Index: linux-2.6.11/fs/namei.c =================================================================== --- linux-2.6.11.orig/fs/namei.c +++ linux-2.6.11/fs/namei.c @@ -681,7 +681,7 @@ fail: * * We expect 'base' to be positive and a directory. */ -int fastcall link_path_walk(const char * name, struct nameidata *nd) +static fastcall int __link_path_walk(const char * name, struct nameidata *nd) { struct path next; struct inode *inode; @@ -703,6 +703,7 @@ int fastcall link_path_walk(const char * struct qstr this; unsigned int c; + nd->flags |= LOOKUP_CONTINUE; err = exec_permission_lite(inode, nd); if (err == -EAGAIN) { err = permission(inode, MAY_EXEC, nd); @@ -755,7 +756,6 @@ int fastcall link_path_walk(const char * if (err < 0) break; } - nd->flags |= LOOKUP_CONTINUE; /* This does the actual lookups.. */ err = do_lookup(nd, &this, &next); if (err) @@ -881,6 +881,37 @@ return_err: return err; } +/* + * Wrapper to retry pathname resolution whenever the underlying + * file system returns an ESTALE. + * + * Retry the whole path once, forcing real lookup requests + * instead of relying on the dcache. + */ +int fastcall link_path_walk(const char *name, struct nameidata *nd) +{ + struct nameidata save = *nd; + int result; + + /* make sure the stuff we saved doesn't go away */ + dget(save.dentry); + mntget(save.mnt); + + result = __link_path_walk(name, nd); + if (result == -ESTALE) { + *nd = save; + dget(nd->dentry); + mntget(nd->mnt); + nd->flags |= LOOKUP_REVAL; + result = __link_path_walk(name, nd); + } + + dput(save.dentry); + mntput(save.mnt); + + return result; +} + int fastcall path_walk(const char * name, struct nameidata *nd) { current->total_link_count = 0; Index: linux-2.6.11/fs/nfs/Makefile =================================================================== --- linux-2.6.11.orig/fs/nfs/Makefile +++ linux-2.6.11/fs/nfs/Makefile @@ -5,7 +5,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ - proc.o read.o symlink.o unlink.o write.o + proc.o read.o symlink.o unlink.o write.o \ + namespace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ Index: linux-2.6.11/fs/nfs/callback.c =================================================================== --- linux-2.6.11.orig/fs/nfs/callback.c +++ linux-2.6.11/fs/nfs/callback.c @@ -14,6 +14,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK Index: linux-2.6.11/fs/nfs/callback_proc.c =================================================================== --- linux-2.6.11.orig/fs/nfs/callback_proc.c +++ linux-2.6.11/fs/nfs/callback_proc.c @@ -8,6 +8,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" Index: linux-2.6.11/fs/nfs/callback_xdr.c =================================================================== --- linux-2.6.11.orig/fs/nfs/callback_xdr.c +++ linux-2.6.11/fs/nfs/callback_xdr.c @@ -10,6 +10,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #define CB_OP_TAGLEN_MAXSZ (512) Index: linux-2.6.11/fs/nfs/delegation.c =================================================================== --- linux-2.6.11.orig/fs/nfs/delegation.c +++ linux-2.6.11/fs/nfs/delegation.c @@ -16,6 +16,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" static struct nfs_delegation *nfs_alloc_delegation(void) Index: linux-2.6.11/fs/nfs/dir.c =================================================================== --- linux-2.6.11.orig/fs/nfs/dir.c +++ linux-2.6.11/fs/nfs/dir.c @@ -32,6 +32,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" #define NFS_PARANOIA 1 @@ -90,6 +91,9 @@ struct inode_operations nfs4_dir_inode_o .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, }; #endif /* CONFIG_NFS_V4 */ @@ -529,13 +533,24 @@ static inline void nfs_renew_times(struc } static inline -int nfs_lookup_verify_inode(struct inode *inode, int isopen) +int nfs_lookup_verify_inode(struct inode *inode, struct nameidata *nd) { struct nfs_server *server = NFS_SERVER(inode); - if (isopen && !(server->flags & NFS_MOUNT_NOCTO)) - return __nfs_revalidate_inode(server, inode); + if (nd != NULL) { + int ndflags = nd->flags; + /* VFS wants an on-the-wire revalidation */ + if (ndflags & LOOKUP_REVAL) + goto out_force; + /* This is an open(2) */ + if ((ndflags & LOOKUP_OPEN) && + !(ndflags & LOOKUP_CONTINUE) && + !(server->flags & NFS_MOUNT_NOCTO)) + goto out_force; + } return nfs_revalidate_inode(server, inode); +out_force: + return __nfs_revalidate_inode(server, inode); } /* @@ -579,16 +594,12 @@ static int nfs_lookup_revalidate(struct struct nfs_fh fhandle; struct nfs_fattr fattr; unsigned long verifier; - int isopen = 0; parent = dget_parent(dentry); lock_kernel(); dir = parent->d_inode; inode = dentry->d_inode; - if (nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_OPEN)) - isopen = 1; - if (!inode) { if (nfs_neg_need_reval(dir, dentry, nd)) goto out_bad; @@ -602,11 +613,12 @@ static int nfs_lookup_revalidate(struct } /* Revalidate parent directory attribute cache */ - nfs_revalidate_inode(NFS_SERVER(dir), dir); + if (nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0) + goto out_zap_parent; /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { - if (nfs_lookup_verify_inode(inode, isopen)) + if (nfs_lookup_verify_inode(inode, nd)) goto out_zap_parent; goto out_valid; } @@ -702,6 +714,17 @@ int nfs_is_exclusive_create(struct inode return (nd->intent.open.flags & O_EXCL) != 0; } +static inline int nfs_reval_fsid(struct inode *dir, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + struct nfs_server *server = NFS_SERVER(dir); + + if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) + /* Revalidate fsid on root dir */ + return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode); + return 0; +} + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct dentry *res; @@ -722,7 +745,9 @@ static struct dentry *nfs_lookup(struct lock_kernel(); /* Revalidate parent directory attribute cache */ - nfs_revalidate_inode(NFS_SERVER(dir), dir); + error = nfs_revalidate_inode(NFS_SERVER(dir), dir); + if (error < 0) + goto out_err; /* If we're doing an exclusive create, optimize away the lookup */ if (nfs_is_exclusive_create(dir, nd)) @@ -731,10 +756,11 @@ static struct dentry *nfs_lookup(struct error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error == -ENOENT) goto no_entry; - if (error < 0) { - res = ERR_PTR(error); - goto out_unlock; - } + if (error < 0) + goto out_err; + error = nfs_reval_fsid(dir, &fhandle, &fattr); + if (error < 0) + goto out_err; res = ERR_PTR(-EACCES); inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); if (!inode) @@ -749,6 +775,9 @@ out_unlock: unlock_kernel(); out: return res; +out_err: + res = ERR_PTR(error); + goto out_unlock; } #ifdef CONFIG_NFS_V4 @@ -780,6 +809,7 @@ static struct dentry *nfs_atomic_lookup( { struct dentry *res = NULL; struct inode *inode = NULL; + int error; /* Check that we are indeed trying to open this file */ if (!is_atomic_open(dir, nd)) @@ -798,7 +828,11 @@ static struct dentry *nfs_atomic_lookup( /* Open the file on the server */ lock_kernel(); /* Revalidate parent directory attribute cache */ - nfs_revalidate_inode(NFS_SERVER(dir), dir); + error = nfs_revalidate_inode(NFS_SERVER(dir), dir); + if (error < 0) { + res = ERR_PTR(error); + goto out; + } if (nd->intent.open.flags & O_CREAT) { nfs_begin_data_update(dir); @@ -808,7 +842,7 @@ static struct dentry *nfs_atomic_lookup( inode = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); if (IS_ERR(inode)) { - int error = PTR_ERR(inode); + error = PTR_ERR(inode); switch (error) { /* Make a negative dentry */ case -ENOENT: @@ -938,7 +972,7 @@ static struct dentry *nfs_readdir_lookup /* * Code common to create, mkdir, and mknod. */ -static int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, +int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { struct inode *inode; @@ -959,14 +993,12 @@ static int nfs_instantiate(struct dentry if (error < 0) goto out_err; } - inode = nfs_fhget(dentry->d_sb, fhandle, fattr); - if (inode) { - d_instantiate(dentry, inode); - nfs_renew_times(dentry); - nfs_set_verifier(dentry, nfs_save_change_attribute(dentry->d_parent->d_inode)); - return 0; - } error = -ENOMEM; + inode = nfs_fhget(dentry->d_sb, fhandle, fattr); + if (inode == NULL) + goto out_err; + d_instantiate(dentry, inode); + return 0; out_err: d_drop(dentry); return error; @@ -982,7 +1014,6 @@ static int nfs_create(struct inode *dir, struct nameidata *nd) { struct iattr attr; - struct inode *inode; int error; int open_flags = 0; @@ -997,18 +1028,17 @@ static int nfs_create(struct inode *dir, lock_kernel(); nfs_begin_data_update(dir); - inode = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); nfs_end_data_update(dir); - if (!IS_ERR(inode)) { - d_instantiate(dentry, inode); - nfs_renew_times(dentry); - nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); - error = 0; - } else { - error = PTR_ERR(inode); - d_drop(dentry); - } + if (error != 0) + goto out_err; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + unlock_kernel(); + return 0; +out_err: unlock_kernel(); + d_drop(dentry); return error; } @@ -1019,9 +1049,7 @@ static int nfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev) { struct iattr attr; - struct nfs_fattr fattr; - struct nfs_fh fhandle; - int error; + int status; dfprintk(VFS, "NFS: mknod(%s/%ld, %s\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); @@ -1034,15 +1062,18 @@ nfs_mknod(struct inode *dir, struct dent lock_kernel(); nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->mknod(dir, &dentry->d_name, &attr, rdev, - &fhandle, &fattr); + status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev); nfs_end_data_update(dir); - if (!error) - error = nfs_instantiate(dentry, &fhandle, &fattr); - else - d_drop(dentry); + if (status != 0) + goto out_err; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); unlock_kernel(); - return error; + return 0; +out_err: + unlock_kernel(); + d_drop(dentry); + return status; } /* @@ -1051,8 +1082,6 @@ nfs_mknod(struct inode *dir, struct dent static int nfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) { struct iattr attr; - struct nfs_fattr fattr; - struct nfs_fh fhandle; int error; dfprintk(VFS, "NFS: mkdir(%s/%ld, %s\n", dir->i_sb->s_id, @@ -1062,23 +1091,17 @@ static int nfs_mkdir(struct inode *dir, attr.ia_mode = mode | S_IFDIR; lock_kernel(); -#if 0 - /* - * Always drop the dentry, we can't always depend on - * the fattr returned by the server (AIX seems to be - * broken). We're better off doing another lookup than - * depending on potentially bogus information. - */ - d_drop(dentry); -#endif nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->mkdir(dir, &dentry->d_name, &attr, &fhandle, - &fattr); + error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr); nfs_end_data_update(dir); - if (!error) - error = nfs_instantiate(dentry, &fhandle, &fattr); - else - d_drop(dentry); + if (error != 0) + goto out_err; + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + unlock_kernel(); + return 0; +out_err: + d_drop(dentry); unlock_kernel(); return error; } @@ -1107,7 +1130,7 @@ static int nfs_sillyrename(struct inode static unsigned int sillycounter; const int i_inosize = sizeof(dir->i_ino)*2; const int countersize = sizeof(sillycounter)*2; - const int slen = strlen(".nfs") + i_inosize + countersize; + const int slen = sizeof(".nfs") + i_inosize + countersize - 1; char silly[slen+1]; struct qstr qsilly; struct dentry *sdentry; @@ -1498,34 +1521,52 @@ out: int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) { struct rpc_cred *cred; - int res; + int res = 0; if (mask == 0) - return 0; + goto out; + /* Is this sys_access() ? */ + if (nd != NULL && (nd->flags & LOOKUP_ACCESS)) + goto force_lookup; - /* Are we checking permissions on anything other than lookup/execute? */ - if ((mask & MAY_EXEC) == 0) { - /* We only need to check permissions on file open() and access() */ - if (!nd || !(nd->flags & (LOOKUP_OPEN|LOOKUP_ACCESS))) - return 0; - /* NFSv4 has atomic_open... */ - if (NFS_PROTO(inode)->version > 3 && (nd->flags & LOOKUP_OPEN)) - return 0; + switch (inode->i_mode & S_IFMT) { + case S_IFLNK: + goto out; + case S_IFREG: + /* NFSv4 has atomic_open... */ + if (nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN) + && nd != NULL + && (nd->flags & LOOKUP_OPEN)) + goto out; + break; + case S_IFDIR: + /* + * Optimize away all write operations, since the server + * will check permissions when we perform the op. + */ + if ((mask & MAY_WRITE) && !(mask & MAY_READ)) + goto out; } +force_lookup: lock_kernel(); if (!NFS_PROTO(inode)->access) goto out_notsup; cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); - res = nfs_do_access(inode, cred, mask); - put_rpccred(cred); + if (!IS_ERR(cred)) { + res = nfs_do_access(inode, cred, mask); + put_rpccred(cred); + } else + res = PTR_ERR(cred); unlock_kernel(); +out: return res; out_notsup: - nfs_revalidate_inode(NFS_SERVER(inode), inode); - res = generic_permission(inode, mask, NULL); + res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (res == 0) + res = generic_permission(inode, mask, NULL); unlock_kernel(); return res; } Index: linux-2.6.11/fs/nfs/file.c =================================================================== --- linux-2.6.11.orig/fs/nfs/file.c +++ linux-2.6.11/fs/nfs/file.c @@ -31,6 +31,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_FILE @@ -44,6 +45,8 @@ static ssize_t nfs_file_write(struct kio static int nfs_file_flush(struct file *); static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); static int nfs_check_flags(int flags); +static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); +static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); struct file_operations nfs_file_operations = { .llseek = remote_llseek, @@ -57,6 +60,7 @@ struct file_operations nfs_file_operatio .release = nfs_file_release, .fsync = nfs_fsync, .lock = nfs_lock, + .flock = nfs_flock, .sendfile = nfs_file_sendfile, .check_flags = nfs_check_flags, }; @@ -67,6 +71,19 @@ struct inode_operations nfs_file_inode_o .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V4 + +struct inode_operations nfs4_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, +}; + +#endif /* CONFIG_NFS_V4 */ + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) @@ -312,6 +329,25 @@ static int do_getlk(struct file *filp, i return status; } +static int do_vfs_lock(struct file *file, struct file_lock *fl) +{ + int res = 0; + switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { + case FL_POSIX: + res = posix_lock_file_wait(file, fl); + break; + case FL_FLOCK: + res = flock_lock_file_wait(file, fl); + break; + default: + BUG(); + } + if (res < 0) + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", + __FUNCTION__); + return res; +} + static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) { struct inode *inode = filp->f_mapping->host; @@ -338,7 +374,7 @@ static int do_unlk(struct file *filp, in if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) status = NFS_PROTO(inode)->lock(filp, cmd, fl); else - status = posix_lock_file_wait(filp, fl); + status = do_vfs_lock(filp, fl); unlock_kernel(); rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); return status; @@ -377,9 +413,9 @@ static int do_setlk(struct file *filp, i * the process exits. */ if (status == -EINTR || status == -ERESTARTSYS) - posix_lock_file_wait(filp, fl); + do_vfs_lock(filp, fl); } else - status = posix_lock_file_wait(filp, fl); + status = do_vfs_lock(filp, fl); unlock_kernel(); if (status < 0) goto out; @@ -401,8 +437,7 @@ out: /* * Lock a (portion of) a file */ -int -nfs_lock(struct file *filp, int cmd, struct file_lock *fl) +static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode * inode = filp->f_mapping->host; @@ -418,6 +453,27 @@ nfs_lock(struct file *filp, int cmd, str if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) return -ENOLCK; + if (IS_GETLK(cmd)) + return do_getlk(filp, cmd, fl); + if (fl->fl_type == F_UNLCK) + return do_unlk(filp, cmd, fl); + return do_setlk(filp, cmd, fl); +} + +/* + * Lock a (portion of) a file + */ +static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode * inode = filp->f_mapping->host; + + dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", + inode->i_sb->s_id, inode->i_ino, + fl->fl_type, fl->fl_flags); + + if (!inode) + return -EINVAL; + /* * No BSD flocks over NFS allowed. * Note: we could try to fake a POSIX lock request here by @@ -425,11 +481,14 @@ nfs_lock(struct file *filp, int cmd, str * Not sure whether that would be unique, though, or whether * that would break in other places. */ - if (!fl->fl_owner || !(fl->fl_flags & FL_POSIX)) + if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; - if (IS_GETLK(cmd)) - return do_getlk(filp, cmd, fl); + /* We're simulating flock() locks using posix locks on the server */ + fl->fl_owner = (fl_owner_t)filp; + fl->fl_start = 0; + fl->fl_end = OFFSET_MAX; + if (fl->fl_type == F_UNLCK) return do_unlk(filp, cmd, fl); return do_setlk(filp, cmd, fl); Index: linux-2.6.11/fs/nfs/idmap.c =================================================================== --- linux-2.6.11.orig/fs/nfs/idmap.c +++ linux-2.6.11/fs/nfs/idmap.c @@ -46,10 +46,10 @@ #include #include -#include #include #include +#include "nfs4_fs.h" #define IDMAP_HASH_SZ 128 Index: linux-2.6.11/fs/nfs/inode.c =================================================================== --- linux-2.6.11.orig/fs/nfs/inode.c +++ linux-2.6.11/fs/nfs/inode.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -39,6 +40,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -64,6 +66,8 @@ static void nfs_umount_begin(struct supe static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); +static struct rpc_program nfs_program; + static struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, @@ -78,7 +82,7 @@ static struct super_operations nfs_sops /* * RPC cruft for NFS */ -struct rpc_stat nfs_rpcstat = { +static struct rpc_stat nfs_rpcstat = { .program = &nfs_program }; static struct rpc_version * nfs_version[] = { @@ -95,7 +99,7 @@ static struct rpc_version * nfs_version[ #endif }; -struct rpc_program nfs_program = { +static struct rpc_program nfs_program = { .name = "nfs", .number = NFS_PROGRAM, .nrvers = sizeof(nfs_version) / sizeof(nfs_version[0]), @@ -104,6 +108,60 @@ struct rpc_program nfs_program = { .pipe_dir_name = "/nfs", }; +#ifdef CONFIG_SYSCTL +/* Follow the established convention in NLM */ +#define CTL_UNNUMBERED -2 + +static ctl_table nfs_sysctls[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs_mountpoint_timeout", + .data = &nfs_mountpoint_expiry_timeout, + .maxlen = sizeof(nfs_mountpoint_expiry_timeout), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { .ctl_name = 0 } +}; + +static ctl_table nfs_sysctl_dir[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs", + .mode = 0555, + .child = nfs_sysctls, + }, + { .ctl_name = 0 } +}; + +static ctl_table nfs_sysctl_root[] = { + { + .ctl_name = CTL_FS, + .procname = "fs", + .mode = 0555, + .child = nfs_sysctl_dir, + }, + { .ctl_name = 0 } +}; + +static struct ctl_table_header *nfs_sysctl_table; + +static inline int nfs_init_sysctl(void) +{ + nfs_sysctl_table = register_sysctl_table(nfs_sysctl_root, 0); + return nfs_sysctl_table != NULL ? 0 : -ENOMEM; +} + +static inline void nfs_destroy_sysctl(void) +{ + unregister_sysctl_table(nfs_sysctl_table); +} +#else +#define nfs_init_sysctl() (0) +#define nfs_destroy_sysctl() do { } while(0) +#endif /* CONFIG_SYSCTL */ + static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) { @@ -138,16 +196,13 @@ nfs_delete_inode(struct inode * inode) clear_inode(inode); } -/* - * For the moment, the only task for the NFS clear_inode method is to - * release the mmap credential - */ static void nfs_clear_inode(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); struct rpc_cred *cred; + nfs4_zap_acl_attr(inode); nfs_wb_all(inode); BUG_ON (!list_empty(&nfsi->open_files)); cred = nfsi->cache_access.cred; @@ -209,6 +264,14 @@ nfs_block_size(unsigned long bsize, unsi return nfs_block_bits(bsize, nrbitsp); } +static inline void +nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) +{ + sb->s_maxbytes = (loff_t)maxfilesize; + if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) + sb->s_maxbytes = MAX_LFS_FILESIZE; +} + /* * Obtain the root inode of the file system. */ @@ -225,6 +288,7 @@ nfs_get_root(struct super_block *sb, str return ERR_PTR(error); } + server->fsid = fsinfo->fattr->fsid; rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); if (!rooti) return ERR_PTR(-ENOMEM); @@ -247,6 +311,7 @@ nfs_sb_init(struct super_block *sb, rpc_ .fattr = &fattr, }; int no_root_error = 0; + unsigned long max_rpc_payload; /* We probably want something more informative here */ snprintf(sb->s_id, sizeof(sb->s_id), "%x:%x", MAJOR(sb->s_dev), MINOR(sb->s_dev)); @@ -283,6 +348,12 @@ nfs_sb_init(struct super_block *sb, rpc_ if (fsinfo.wtmax >= 512 && server->wsize > fsinfo.wtmax) server->wsize = nfs_block_size(fsinfo.wtmax, NULL); + max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL); + if (server->rsize > max_rpc_payload) + server->rsize = max_rpc_payload; + if (server->wsize > max_rpc_payload) + server->wsize = max_rpc_payload; + server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; if (server->rpages > NFS_READ_MAXIOV) { server->rpages = NFS_READ_MAXIOV; @@ -313,9 +384,10 @@ nfs_sb_init(struct super_block *sb, rpc_ } server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - sb->s_maxbytes = fsinfo.maxfilesize; - if (sb->s_maxbytes > MAX_LFS_FILESIZE) - sb->s_maxbytes = MAX_LFS_FILESIZE; + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + + server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; + server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; /* We're airborne Set socket buffersize */ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); @@ -364,9 +436,8 @@ nfs_create_client(struct nfs_server *ser goto out_fail; } - clnt->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; - clnt->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; - clnt->cl_droppriv = (server->flags & NFS_MOUNT_BROKEN_SUID) ? 1 : 0; + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; clnt->cl_chatty = 1; return clnt; @@ -538,7 +609,6 @@ static int nfs_show_options(struct seq_f { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, { NFS_MOUNT_NONLM, ",nolock", ",lock" }, - { NFS_MOUNT_BROKEN_SUID, ",broken_suid", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; @@ -679,7 +749,7 @@ nfs_fhget(struct super_block *sb, struct /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = &nfs_file_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; @@ -690,6 +760,11 @@ nfs_fhget(struct super_block *sb, struct if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + /* Deal with crossing mountpoints */ + if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { + inode->i_op = &nfs_mountpoint_inode_operations; + inode->i_fop = NULL; + } } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; else @@ -792,7 +867,7 @@ nfs_setattr(struct dentry *dentry, struc * Wait for the inode to get unlocked. * (Used for NFS_INO_LOCKED and NFS_INO_REVALIDATING). */ -int +static int nfs_wait_on_inode(struct inode *inode, int flag) { struct rpc_clnt *clnt = NFS_CLIENT(inode); @@ -856,6 +931,12 @@ struct nfs_open_context *get_nfs_open_co void put_nfs_open_context(struct nfs_open_context *ctx) { if (atomic_dec_and_test(&ctx->count)) { + if (!list_empty(&ctx->list)) { + struct inode *inode = ctx->dentry->d_inode; + spin_lock(&inode->i_lock); + list_del(&ctx->list); + spin_unlock(&inode->i_lock); + } if (ctx->state != NULL) nfs4_close_state(ctx->state, ctx->mode); if (ctx->cred != NULL) @@ -904,7 +985,7 @@ void nfs_file_clear_open_context(struct if (ctx) { filp->private_data = NULL; spin_lock(&inode->i_lock); - list_del(&ctx->list); + list_move_tail(&ctx->list, &NFS_I(inode)->open_files); spin_unlock(&inode->i_lock); put_nfs_open_context(ctx); } @@ -918,8 +999,9 @@ int nfs_open(struct inode *inode, struct struct nfs_open_context *ctx; struct rpc_cred *cred; - if ((cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0)) == NULL) - return -ENOMEM; + cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); ctx = alloc_nfs_open_context(filp->f_dentry, cred); put_rpccred(cred); if (ctx == NULL) @@ -1197,6 +1279,7 @@ int nfs_refresh_inode(struct inode *inod */ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) { + struct nfs_server *server; struct nfs_inode *nfsi = NFS_I(inode); __u64 new_size; loff_t new_isize; @@ -1226,6 +1309,12 @@ static int nfs_update_inode(struct inode if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; + server = NFS_SERVER(inode); + /* Update the fsid if and only if this is the root directory */ + if (inode == inode->i_sb->s_root->d_inode + && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) + server->fsid = fattr->fsid; + /* * Update the read time so we don't revalidate too often. */ @@ -1294,6 +1383,7 @@ static int nfs_update_inode(struct inode inode->i_nlink = fattr->nlink; inode->i_uid = fattr->uid; inode->i_gid = fattr->gid; + nfs4_zap_acl_attr(inode); if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { /* @@ -1343,9 +1433,170 @@ static int nfs_update_inode(struct inode } /* + * nfs_try_migrate_filehandle - Check if we can migrate the inode filehandle + * @inode - pointer to inode + * @fh - the filehandle resulting from lookup() + * @fattr - attributes associated with the new filehandle + * + * Do our very best to update existing inodes when the user wants to migrate + * this filesystem to a replica server. + * + * Note that here be HUGE dragons, with endless possibilities for causing + * trouble... + */ +int nfs_try_migrate_filehandle(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr, uint32_t generation) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + /* Argh! The basic file type has changed */ + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + goto out_bad; + /* Fileid + filehandle are the same. Good! */ + if (nfs_compare_fh(&nfsi->fh, fh) == 0 && nfsi->fileid == fattr->fileid) + goto out_good; + if (fattr->valid && NFS_ATTR_FATTR_V4) { + /* Do the NFSv4 change attributes match our cached value? */ + if (nfsi->change_attr != fattr->change_attr) + goto out_bad; + } else { + /* Does the ctime match? */ + if (!timespec_equal(&fattr->ctime, &inode->i_ctime)) + goto out_bad; + } + /* Does the file size match? */ + if (nfs_size_to_loff_t(fattr->size) != inode->i_size) + goto out_bad; + /* FIXME: Here lie the biggest dragons: + * Try considering all possible races w.r.t. iget5_locked() + */ + nfs_copy_fh(&nfsi->fh, fh); + if (nfsi->fileid != fattr->fileid) { + /* The very concept of migrating to a new hash bucket + * is so full of holes and races that it defies belief! + */ + remove_inode_hash(inode); + nfsi->fileid = fattr->fileid; + inode->i_ino = nfs_fattr_to_ino_t(fattr); + insert_inode_hash(inode); + } +out_good: + inode->i_generation = generation; + return 0; +out_bad: + return -EIO; +} + +/* + * nfs_try_migrate_inode - Update an inode's filehandle after migration + * @inode - pointer to inode to migrate + * @dentry - pointer to dentry + */ +int nfs_try_migrate_inode(struct inode *inode, struct dentry *dentry) +{ + struct nfs_fh fh; + struct nfs_fattr fattr; + struct dentry *next, *next_parent; + uint32_t generation; + int status; + + if (dentry == NULL) { + status = -ENOENT; + dentry = d_find_alias(inode); + if (dentry == NULL) + goto out; + } else + dget(dentry); +repeat: + /* Has this inode already been revalidated? */ + status = 0; + generation = NFS_SERVER(inode)->generation; + if ((long)generation - (long)inode->i_generation <= 0) + goto out; + /* No. Search for a previously revalidated path element */ + next = dget(dentry); + next_parent = dget_parent(dentry); + while((long)generation - (long)next_parent->d_inode->i_generation > 0) { + BUG_ON(IS_ROOT(next_parent)); + dput(next); + next = next_parent; + next_parent = dget_parent(next); + } + status = NFS_PROTO(inode)->lookup(next_parent->d_inode, &next->d_name, + &fh, &fattr); + if (status == 0) + status = nfs_try_migrate_filehandle(next->d_inode, &fh, &fattr, generation); + switch (status) { + case -ESTALE: + if (IS_ROOT(next_parent)) + break; + case 0: + if (dentry->d_inode == inode) + break; + dput(next_parent); + dput(next); + goto repeat; + default: + d_drop(next); + } + dput(next_parent); + dput(next); +out: + dput(dentry); + dprintk("%s: returned error %d\n", __FUNCTION__, status); + return status; +} + +/* * File system information */ +/* + * nfs_path - reconstruct the path given an arbitrary dentry + * @base - arbitrary string to prepend to the path + * @dentry - pointer to dentry + * @buffer - result buffer + * @buflen - length of buffer + * + * Helper function for constructing the path from the + * root dentry to an arbitrary hashed dentry. + * + * This is mainly for use in figuring out the path on the + * server side when automounting on top of an existing partition. + */ +static char *nfs_path(const char *base, const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + char *end = buffer+buflen; + int namelen; + + *--end = '\0'; + buflen--; + spin_lock(&dcache_lock); + while (!IS_ROOT(dentry)) { + namelen = dentry->d_name.len; + buflen -= namelen + 1; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, dentry->d_name.name, namelen); + *--end = '/'; + dentry = dentry->d_parent; + } + spin_unlock(&dcache_lock); + namelen = strlen(base); + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + buflen -= namelen; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, base, namelen); + return end; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + static int nfs_set_super(struct super_block *s, void *data) { s->s_fs_info = data; @@ -1466,6 +1717,7 @@ static void nfs_kill_super(struct super_ if (server->hostname != NULL) kfree(server->hostname); kfree(server); + nfs_release_automount_timer(); } static struct file_system_type nfs_fs_type = { @@ -1478,8 +1730,53 @@ static struct file_system_type nfs_fs_ty #ifdef CONFIG_NFS_V4 -static void nfs4_clear_inode(struct inode *); +#define XATTR_NAME_NFSV4_ACL "system.nfs4_acl" + +int +nfs4_setxattr(struct dentry *dentry, const char *key, const void *buf, + size_t buflen, int flags) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EINVAL; + if (!S_ISREG(inode->i_mode) && + (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) + return -EPERM; + + return nfs4_proc_set_acl(inode, buf, buflen); +} + +/* The getxattr man page suggests returning -ENODATA for unknown attributes, + * and that's what we'll do for e.g. user attributes that haven't been set. + * But we'll follow ext2/ext3's lead by returning -EOPNOTSUPP for unsupported + * attributes in kernel-managed attribute namespaces. */ +ssize_t +nfs4_getxattr(struct dentry *dentry, const char *key, void *buf, + size_t buflen) +{ + struct inode *inode = dentry->d_inode; + + if (strcmp(key, XATTR_NAME_NFSV4_ACL) != 0) + return -EOPNOTSUPP; + + return nfs4_proc_get_acl(inode, buf, buflen); +} + +ssize_t +nfs4_listxattr(struct dentry *dentry, char *buf, size_t buflen) +{ + size_t len = strlen(XATTR_NAME_NFSV4_ACL) + 1; + + if (buf && buflen < len) + return -ERANGE; + if (buf) + memcpy(buf, XATTR_NAME_NFSV4_ACL, len); + return len; +} + +static void nfs4_clear_inode(struct inode *); static struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, @@ -1542,6 +1839,7 @@ static int nfs4_fill_super(struct super_ if (data->wsize != 0) server->wsize = nfs_block_size(data->wsize, NULL); server->flags = data->flags & NFS_MOUNT_FLAGMASK; + server->caps = NFS_CAP_ATOMIC_OPEN; server->acregmin = data->acregmin*HZ; server->acregmax = data->acregmax*HZ; @@ -1609,9 +1907,17 @@ static int nfs4_fill_super(struct super_ err = PTR_ERR(clnt); goto out_fail; } + clnt->cl_intr = 1; + clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clp->cl_rpcclient = clnt; clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); + if (IS_ERR(clp->cl_cred)) { + up_write(&clp->cl_sem); + err = PTR_ERR(clp->cl_cred); + clp->cl_cred = NULL; + goto out_fail; + } memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); nfs_idmap_new(clp); } @@ -1634,8 +1940,6 @@ static int nfs4_fill_super(struct super_ return PTR_ERR(clnt); } - clnt->cl_intr = (server->flags & NFS4_MOUNT_INTR) ? 1 : 0; - clnt->cl_softrtry = (server->flags & NFS4_MOUNT_SOFT) ? 1 : 0; server->client = clnt; if (server->nfs4_state->cl_idmap == NULL) { @@ -1805,6 +2109,7 @@ static void nfs4_kill_super(struct super if (server->hostname != NULL) kfree(server->hostname); kfree(server); + nfs_release_automount_timer(); } static struct file_system_type nfs4_fs_type = { @@ -1815,6 +2120,59 @@ static struct file_system_type nfs4_fs_t .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +/* Constructs the SERVER-side path */ +static inline char *nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) +{ + return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); +} + +static inline char *nfs4_dup_path(const struct dentry *dentry) +{ + char *page = (char *) __get_free_page(GFP_USER); + char *path; + + path = nfs4_path(dentry, page, PAGE_SIZE); + if (!IS_ERR(path)) { + int len = PAGE_SIZE + page - path; + char *tmp = path; + + path = kmalloc(len, GFP_KERNEL); + if (path) + memcpy(path, tmp, len); + else + path = ERR_PTR(-ENOMEM); + } + free_page((unsigned long)page); + return path; +} + +static struct super_block *nfs4_clone_client(struct nfs_server *server, const struct dentry *dentry) +{ + struct nfs4_client *clp = server->nfs4_state; + struct super_block *sb; + + server->mnt_path = nfs4_dup_path(dentry); + if (IS_ERR(server->mnt_path)) { + sb = (struct super_block *)server->mnt_path; + goto err; + } + sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); + if (IS_ERR(sb) || sb->s_root) + goto free_path; + nfs4_server_capabilities(server, &server->fh); + + down_write(&clp->cl_sem); + atomic_inc(&clp->cl_count); + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + up_write(&clp->cl_sem); + return sb; +free_path: + kfree(server->mnt_path); +err: + server->mnt_path = NULL; + return sb; +} + #define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ @@ -1825,12 +2183,157 @@ static struct file_system_type nfs4_fs_t #define register_nfs4fs() register_filesystem(&nfs4_fs_type) #define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) #else +#define nfs4_clone_client(a,b) ERR_PTR(-EINVAL) #define nfs4_init_once(nfsi) \ do { } while (0) #define register_nfs4fs() (0) #define unregister_nfs4fs() #endif +static inline struct super_block *nfs_clone_client(struct nfs_server *server) +{ + return sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); +} + +struct nfs_clone_mount { + const struct super_block *sb; + const struct dentry *dentry; + struct nfs_fh *fh; + struct nfs_fattr *fattr; +}; + +static struct super_block *clone_nfs_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + struct nfs_server *server; + struct nfs_server *parent = NFS_SB(data->sb); + struct super_block *sb = ERR_PTR(-EINVAL); + void *err = ERR_PTR(-ENOMEM); + struct inode *root_inode; + struct nfs_fsinfo fsinfo; + int len; + + server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (server == NULL) + goto out_err; + memcpy(server, parent, sizeof(*server)); + len = strlen(parent->hostname) + 1; + server->hostname = kmalloc(len, GFP_KERNEL); + if (server->hostname == NULL) + goto free_server; + memcpy(server->hostname, parent->hostname, len); + server->fsid = data->fattr->fsid; + nfs_copy_fh(&server->fh, data->fh); + if (rpciod_up() != 0) + goto free_hostname; + + switch (parent->rpc_ops->version) { + case 2: + case 3: + sb = nfs_clone_client(server); + break; + case 4: + sb = nfs4_clone_client(server, data->dentry); + } + if (IS_ERR((err = sb)) || sb->s_root) + goto kill_rpciod; + sb->s_op = data->sb->s_op; + sb->s_blocksize = data->sb->s_blocksize; + sb->s_blocksize_bits = data->sb->s_blocksize_bits; + sb->s_maxbytes = data->sb->s_maxbytes; + + server->client_sys = NULL; + server->client = rpc_clone_client(parent->client); + if (IS_ERR((err = server->client))) + goto out_deactivate; + if (parent->client_sys != NULL) { + server->client_sys = rpc_clone_client(parent->client_sys); + if (IS_ERR((err = server->client_sys))) + goto out_deactivate; + } + + root_inode = nfs_fhget(sb, data->fh, data->fattr); + if (!root_inode) + goto out_deactivate; + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_put_root; + fsinfo.fattr = data->fattr; + if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + sb->s_root->d_op = server->rpc_ops->dentry_ops; + sb->s_flags |= MS_ACTIVE; + return sb; +out_put_root: + iput(root_inode); +out_deactivate: + up_write(&sb->s_umount); + deactivate_super(sb); + return (struct super_block *)err; +kill_rpciod: + rpciod_down(); +free_hostname: + kfree(server->hostname); +free_server: + kfree(server); +out_err: + return (struct super_block *)err; +} + +static struct file_system_type clone_nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = clone_nfs_sb, + .kill_sb = nfs_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static inline char *nfs_devname(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); +} + +/** + * nfs_do_submount - set up mountpoint when crossing a filesystem boundary + * @mnt_parent - mountpoint of parent directory + * @dentry - parent directory + * @fh - filehandle for new root dentry + * @fattr - attributes for new root inode + * + */ +struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs_clone_mount mountdata = { + .sb = mnt_parent->mnt_sb, + .dentry = dentry, + .fh = fh, + .fattr = fattr, + }; + struct vfsmount *mnt = ERR_PTR(-ENOMEM); + char *page = (char *) __get_free_page(GFP_USER); + char *devname; + + dprintk("%s: submounting on %s/%s\n", __FUNCTION__, + dentry->d_parent->d_name.name, + dentry->d_name.name); + if (page == NULL) + goto out; + devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); + if (!IS_ERR(devname)) + mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, &mountdata); + else + mnt = (struct vfsmount *)devname; + free_page((unsigned long)page); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} + extern int nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); extern int nfs_init_readpagecache(void); @@ -1851,6 +2354,9 @@ static struct inode *nfs_alloc_inode(str if (!nfsi) return NULL; nfsi->flags = 0; +#ifdef CONFIG_NFS_V4 + nfsi->nfs4_acl = NULL; +#endif /* CONFIG_NFS_V4 */ return &nfsi->vfs_inode; } @@ -1905,6 +2411,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_init_sysctl(); + if (err) + goto out5; + err = nfs_init_nfspagecache(); if (err) goto out4; @@ -1952,6 +2462,8 @@ out2: out3: nfs_destroy_nfspagecache(); out4: + nfs_destroy_sysctl(); +out5: return err; } @@ -1967,6 +2479,7 @@ static void __exit exit_nfs_fs(void) #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif + nfs_destroy_sysctl(); unregister_filesystem(&nfs_fs_type); unregister_nfs4fs(); } Index: linux-2.6.11/fs/nfs/mount_clnt.c =================================================================== --- linux-2.6.11.orig/fs/nfs/mount_clnt.c +++ linux-2.6.11/fs/nfs/mount_clnt.c @@ -31,7 +31,7 @@ static struct rpc_clnt * mnt_create(char *, struct sockaddr_in *, int, int); -struct rpc_program mnt_program; +static struct rpc_program mnt_program; struct mnt_fhstatus { unsigned int status; @@ -174,7 +174,7 @@ static struct rpc_version * mnt_version[ static struct rpc_stat mnt_stats; -struct rpc_program mnt_program = { +static struct rpc_program mnt_program = { .name = "mount", .number = NFS_MNT_PROGRAM, .nrvers = sizeof(mnt_version)/sizeof(mnt_version[0]), Index: linux-2.6.11/fs/nfs/namespace.c =================================================================== --- /dev/null +++ linux-2.6.11/fs/nfs/namespace.c @@ -0,0 +1,111 @@ +/* + * linux/fs/nfs/namespace.c + * + * Copyright (C) 2005 Trond Myklebust + * + * NFS namespace + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#define NFSDBG_FACILITY NFSDBG_VFS + +static LIST_HEAD(nfs_automount_list); +static void nfs_expire_automounts(void *list); +static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list); +int nfs_mountpoint_expiry_timeout = 500 * HZ; + +/* + * nfs_follow_mountpoint - handle crossing a mountpoint on the server + * @dentry - dentry of mountpoint + * @nd - nameidata info + * + * When we encounter a mountpoint on the server, we want to set up + * a mountpoint on the client too, to prevent inode numbers from + * colliding, and to allow "df" to work properly. + * On NFSv4, we also want to allow for the fact that different + * filesystems may be migrated to different servers in a failover + * situation, and that different filesystems may want to use + * different security flavours. + */ +static int nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) +{ + struct vfsmount *mnt; + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct dentry *parent; + struct nfs_fh fh; + struct nfs_fattr fattr; + int err; + + BUG_ON(IS_ROOT(dentry)); + dprintk("%s: enter\n", __FUNCTION__); + dput(nd->dentry); + nd->dentry = dget(dentry); + if (d_mountpoint(nd->dentry)) + goto out_follow; + /* Look it up again */ + parent = dget_parent(nd->dentry); + err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr); + dput(parent); + if (err != 0) + goto out_err; + mnt = nfs_do_submount(nd->mnt, nd->dentry, &fh, &fattr); + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); + goto out_err; + } + mntget(mnt); + err = do_add_mount(mnt, nd, nd->mnt->mnt_flags, &nfs_automount_list); + if (err < 0) { + mntput(mnt); + if (err == -EBUSY) + goto out_follow; + goto out_err; + } + mntput(nd->mnt); + dput(nd->dentry); + nd->mnt = mnt; + nd->dentry = dget(mnt->mnt_root); + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); +out: + dprintk("%s: done, returned %d\n", __FUNCTION__, err); + return err; +out_err: + path_release(nd); + goto out; +out_follow: + while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) + ; + err = 0; + goto out; +} + +struct inode_operations nfs_mountpoint_inode_operations = { + .follow_link = nfs_follow_mountpoint, + .getattr = nfs_getattr, +}; + +static void nfs_expire_automounts(void *data) +{ + struct list_head *list = (struct list_head *)data; + + mark_mounts_for_expiry(list); + if (!list_empty(list)) + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); +} + +void nfs_release_automount_timer(void) +{ + if (list_empty(&nfs_automount_list)) { + cancel_delayed_work(&nfs_automount_task); + flush_scheduled_work(); + } +} Index: linux-2.6.11/fs/nfs/nfs2xdr.c =================================================================== --- linux-2.6.11.orig/fs/nfs/nfs2xdr.c +++ linux-2.6.11/fs/nfs/nfs2xdr.c @@ -131,7 +131,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt fattr->du.nfs2.blocksize = ntohl(*p++); rdev = ntohl(*p++); fattr->du.nfs2.blocks = ntohl(*p++); - fattr->fsid_u.nfs3 = ntohl(*p++); + fattr->fsid.major = ntohl(*p++); + fattr->fsid.minor = 0; fattr->fileid = ntohl(*p++); p = xdr_decode_time(p, &fattr->atime); p = xdr_decode_time(p, &fattr->mtime); Index: linux-2.6.11/fs/nfs/nfs3proc.c =================================================================== --- linux-2.6.11.orig/fs/nfs/nfs3proc.c +++ linux-2.6.11/fs/nfs/nfs3proc.c @@ -295,7 +295,7 @@ static int nfs3_proc_commit(struct nfs_w * Create a regular file. * For now, we don't implement O_EXCL. */ -static struct inode * +static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { @@ -342,29 +342,19 @@ again: break; case NFS3_CREATE_UNCHECKED: - goto exit; + goto out; } goto again; } -exit: - dprintk("NFS reply create: %d\n", status); - + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); if (status != 0) goto out; - if (fhandle.size == 0 || !(fattr.valid & NFS_ATTR_FATTR)) { - status = nfs3_proc_lookup(dir, &dentry->d_name, &fhandle, &fattr); - if (status != 0) - goto out; - } /* When we created the file with exclusive semantics, make * sure we set the attributes afterwards. */ if (arg.createmode == NFS3_CREATE_EXCLUSIVE) { - struct nfs3_sattrargs arg = { - .fh = &fhandle, - .sattr = sattr, - }; dprintk("NFS call setattr (post-create)\n"); if (!(sattr->ia_valid & ATTR_ATIME_SET)) @@ -375,20 +365,13 @@ exit: /* Note: we could use a guarded setattr here, but I'm * not sure this buys us anything (and I'd have * to revamp the NFSv3 XDR code) */ - fattr.valid = 0; - status = rpc_call(NFS_CLIENT(dir), NFS3PROC_SETATTR, - &arg, &fattr, 0); + status = nfs3_proc_setattr(dentry, &fattr, sattr); + nfs_refresh_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } - if (status == 0) { - struct inode *inode; - inode = nfs_fhget(dir->i_sb, &fhandle, &fattr); - if (inode) - return inode; - status = -ENOMEM; - } out: - return ERR_PTR(status); + dprintk("NFS reply create: %d\n", status); + return status; } static int @@ -540,28 +523,30 @@ nfs3_proc_symlink(struct inode *dir, str } static int -nfs3_proc_mkdir(struct inode *dir, struct qstr *name, struct iattr *sattr, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) +nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { - struct nfs_fattr dir_attr; + struct nfs_fh fhandle; + struct nfs_fattr fattr, dir_attr; struct nfs3_mkdirargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len, + .name = dentry->d_name.name, + .len = dentry->d_name.len, .sattr = sattr }; struct nfs3_diropres res = { .dir_attr = &dir_attr, - .fh = fhandle, - .fattr = fattr + .fh = &fhandle, + .fattr = &fattr }; int status; - dprintk("NFS call mkdir %s\n", name->name); + dprintk("NFS call mkdir %s\n", dentry->d_name.name); dir_attr.valid = 0; - fattr->valid = 0; + fattr.valid = 0; status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); dprintk("NFS reply mkdir: %d\n", status); return status; } @@ -639,23 +624,24 @@ nfs3_proc_readdir(struct dentry *dentry, } static int -nfs3_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr, - dev_t rdev, struct nfs_fh *fh, struct nfs_fattr *fattr) +nfs3_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + dev_t rdev) { - struct nfs_fattr dir_attr; + struct nfs_fh fh; + struct nfs_fattr fattr, dir_attr; struct nfs3_mknodargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len, + .name = dentry->d_name.name, + .len = dentry->d_name.len, .sattr = sattr, .rdev = rdev }; struct nfs3_diropres res = { .dir_attr = &dir_attr, - .fh = fh, - .fattr = fattr + .fh = &fh, + .fattr = &fattr }; - int status; + int status; switch (sattr->ia_mode & S_IFMT) { case S_IFBLK: arg.type = NF3BLK; break; @@ -665,12 +651,14 @@ nfs3_proc_mknod(struct inode *dir, struc default: return -EINVAL; } - dprintk("NFS call mknod %s %u:%u\n", name->name, + dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); dir_attr.valid = 0; - fattr->valid = 0; + fattr.valid = 0; status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); + if (status == 0) + status = nfs_instantiate(dentry, &fh, &fattr); dprintk("NFS reply mknod: %d\n", status); return status; } @@ -838,6 +826,7 @@ struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, Index: linux-2.6.11/fs/nfs/nfs3xdr.c =================================================================== --- linux-2.6.11.orig/fs/nfs/nfs3xdr.c +++ linux-2.6.11/fs/nfs/nfs3xdr.c @@ -160,7 +160,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor) fattr->rdev = 0; - p = xdr_decode_hyper(p, &fattr->fsid_u.nfs3); + p = xdr_decode_hyper(p, &fattr->fsid.major); + fattr->fsid.minor = 0; p = xdr_decode_hyper(p, &fattr->fileid); p = xdr_decode_time3(p, &fattr->atime); p = xdr_decode_time3(p, &fattr->mtime); Index: linux-2.6.11/fs/nfs/nfs4_fs.h =================================================================== --- /dev/null +++ linux-2.6.11/fs/nfs/nfs4_fs.h @@ -0,0 +1,264 @@ +/* + * linux/fs/nfs/nfs4_fs.h + * + * Copyright (C) 2005 Trond Myklebust + * + * NFSv4-specific filesystem definitions and declarations + */ + +#ifndef __LINUX_FS_NFS_NFS4_FS_H +#define __LINUX_FS_NFS_NFS4_FS_H + +#ifdef CONFIG_NFS_V4 + +struct idmap; + +/* + * In a seqid-mutating op, this macro controls which error return + * values trigger incrementation of the seqid. + * + * from rfc 3010: + * The client MUST monotonically increment the sequence number for the + * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE + * operations. This is true even in the event that the previous + * operation that used the sequence number received an error. The only + * exception to this rule is if the previous operation received one of + * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, + * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, + * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. + * + */ +#define seqid_mutating_err(err) \ +(((err) != NFSERR_STALE_CLIENTID) && \ + ((err) != NFSERR_STALE_STATEID) && \ + ((err) != NFSERR_BAD_STATEID) && \ + ((err) != NFSERR_BAD_SEQID) && \ + ((err) != NFSERR_BAD_XDR) && \ + ((err) != NFSERR_RESOURCE) && \ + ((err) != NFSERR_NOFILEHANDLE)) + +enum nfs4_client_state { + NFS4CLNT_OK = 0, +}; + +/* + * The nfs4_client identifies our client state to the server. + */ +struct nfs4_client { + struct list_head cl_servers; /* Global list of servers */ + struct in_addr cl_addr; /* Server identifier */ + u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; + unsigned long cl_state; + + u32 cl_lockowner_id; + + /* + * The following rwsem ensures exclusive access to the server + * while we recover the state following a lease expiration. + */ + struct rw_semaphore cl_sem; + + struct list_head cl_delegations; + struct list_head cl_state_owners; + struct list_head cl_unused; + int cl_nunused; + spinlock_t cl_lock; + atomic_t cl_count; + + struct rpc_clnt * cl_rpcclient; + struct rpc_cred * cl_cred; + + struct list_head cl_superblocks; /* List of nfs_server structs */ + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + wait_queue_head_t cl_waitq; + struct rpc_wait_queue cl_rpcwaitq; + + /* used for the setclientid verifier */ + struct timespec cl_boot_time; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; + unsigned char cl_id_uniquifier; +}; + +/* + * NFS4 state_owners and lock_owners are simply labels for ordered + * sequences of RPC calls. Their sole purpose is to provide once-only + * semantics by allowing the server to identify replayed requests. + * + * The ->so_sema is held during all state_owner seqid-mutating operations: + * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize + * so_seqid. + */ +struct nfs4_state_owner { + struct list_head so_list; /* per-clientid list of state_owners */ + struct nfs4_client *so_client; + u32 so_id; /* 32-bit identifier, unique */ + struct semaphore so_sema; + u32 so_seqid; /* protected by so_sema */ + atomic_t so_count; + + struct rpc_cred *so_cred; /* Associated cred */ + struct list_head so_states; + struct list_head so_delegations; +}; + +/* + * struct nfs4_state maintains the client-side state for a given + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). + * + * OPEN: + * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, + * we need to know how many files are open for reading or writing on a + * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) + */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + fl_owner_t ls_owner; /* POSIX lock owner */ +#define NFS_LOCK_INITIALIZED 1 + int ls_flags; + u32 ls_seqid; + u32 ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, + NFS_DELEGATED_STATE, +}; + +struct nfs4_state { + struct list_head open_states; /* List of states for the same state_owner */ + struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ + + struct nfs4_state_owner *owner; /* Pointer to the open owner */ + struct inode *inode; /* Pointer to the inode */ + + unsigned long flags; /* Do we hold any locks? */ + struct semaphore lock_sema; /* Serializes file locking operations */ + rwlock_t state_lock; /* Protects the lock_states list */ + + nfs4_stateid stateid; + + unsigned int nreaders; + unsigned int nwriters; + int state; /* State on the server (R,W, or RW) */ + atomic_t count; +}; + + +struct nfs4_exception { + long timeout; + int retry; +}; + +struct nfs4_state_recovery_ops { + int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); + int (*recover_lock)(struct nfs4_state *, struct file_lock *); +}; + +extern struct dentry_operations nfs4_dentry_operations; +extern struct inode_operations nfs4_dir_inode_operations; +extern struct inode_operations nfs4_file_inode_operations; + +/* inode.c */ +extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int); +extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); + + +/* nfs4proc.c */ +extern int nfs4_map_errors(int err); +extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); +extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); +extern int nfs4_proc_async_renew(struct nfs4_client *); +extern int nfs4_proc_renew(struct nfs4_client *); +extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); +extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); +extern ssize_t nfs4_proc_get_acl(struct inode *, void *buf, size_t buflen); +extern int nfs4_proc_set_acl(struct inode *, const void *buf, size_t buflen); +extern void nfs4_zap_acl_attr(struct inode *inode); +extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, + struct nfs_fs_locations *fs_locations, struct page *page); + +extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; +extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; + +extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_statfs_bitmap[2]; +extern const u32 nfs4_pathconf_bitmap[2]; +extern const u32 nfs4_fsinfo_bitmap[2]; + +/* nfs4renewd.c */ +extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); +extern void nfs4_kill_renewd(struct nfs4_client *); +extern void nfs4_renew_state(void *); + +/* nfs4state.c */ +extern void init_nfsv4_state(struct nfs_server *); +extern void destroy_nfsv4_state(struct nfs_server *); +extern struct nfs4_client *nfs4_get_client(struct in_addr *); +extern void nfs4_put_client(struct nfs4_client *clp); +extern int nfs4_init_client(struct nfs4_client *clp); +extern struct nfs4_client *nfs4_find_client(struct in_addr *); +extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); + +extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); +extern void nfs4_put_state_owner(struct nfs4_state_owner *); +extern void nfs4_drop_state_owner(struct nfs4_state_owner *); +extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); +extern void nfs4_put_open_state(struct nfs4_state *); +extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); +extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); +extern void nfs4_schedule_state_recovery(struct nfs4_client *); +extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); +extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t); +extern void nfs4_put_lock_state(struct nfs4_lock_state *state); +extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); +extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); + +extern const nfs4_stateid zero_stateid; + +/* nfs4xdr.c */ +extern uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus); +extern struct rpc_procinfo nfs4_procedures[]; + +struct nfs4_mount_data; + +/* callback_xdr.c */ +extern struct svc_version nfs4_callback_version1; + +#else + +#define init_nfsv4_state(server) do { } while (0) +#define destroy_nfsv4_state(server) do { } while (0) +#define nfs4_put_state_owner(inode, owner) do { } while (0) +#define nfs4_put_open_state(state) do { } while (0) +#define nfs4_close_state(a, b) do { } while (0) +#define nfs4_zap_acl_attr(inode) do { } while (0) + +#endif /* CONFIG_NFS_V4 */ +#endif /* __LINUX_FS_NFS_NFS4_FS.H */ Index: linux-2.6.11/fs/nfs/nfs4proc.c =================================================================== --- linux-2.6.11.orig/fs/nfs/nfs4proc.c +++ linux-2.6.11/fs/nfs/nfs4proc.c @@ -37,6 +37,7 @@ #include #include +#include #include #include #include @@ -47,6 +48,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_PROC @@ -57,16 +59,15 @@ static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); +static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; -extern nfs4_stateid zero_stateid; - /* Prevent leaks of NFSv4 errors into userland */ -static inline int nfs4_map_errors(int err) +int nfs4_map_errors(int err) { if (err < -1000) { - printk(KERN_WARNING "%s could not handle NFSv4 error %d\n", + dprintk("%s could not handle NFSv4 error %d\n", __FUNCTION__, -err); return -EIO; } @@ -102,7 +103,7 @@ const u32 nfs4_statfs_bitmap[2] = { | FATTR4_WORD1_SPACE_TOTAL }; -u32 nfs4_pathconf_bitmap[2] = { +const u32 nfs4_pathconf_bitmap[2] = { FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME, 0 @@ -188,6 +189,23 @@ static void update_changeattr(struct ino nfsi->change_attr = cinfo->after; } +static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags) +{ + struct inode *inode = state->inode; + + open_flags &= (FMODE_READ|FMODE_WRITE); + /* Protect against nfs4_find_state() */ + spin_lock(&inode->i_lock); + state->state |= open_flags; + /* NB! List reordering - see the reclaim code for why. */ + if ((open_flags & FMODE_WRITE) && 0 == state->nwriters++) + list_move(&state->open_states, &state->owner->so_states); + if (open_flags & FMODE_READ) + state->nreaders++; + memcpy(&state->stateid, stateid, sizeof(state->stateid)); + spin_unlock(&inode->i_lock); +} + /* * OPEN_RECLAIM: * reclaim state on the server after a reboot. @@ -244,7 +262,7 @@ static int _nfs4_open_reclaim(struct nfs return status; } -int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) { struct nfs_server *server = NFS_SERVER(state->inode); struct nfs4_exception exception = { }; @@ -332,7 +350,7 @@ int nfs4_open_delegation_recall(struct d return err; } -static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) +static inline int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) { struct nfs_open_confirmargs arg = { .fh = fh, @@ -355,11 +373,49 @@ static int _nfs4_proc_open_confirm(struc return status; } -static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int mask) +static int _nfs4_proc_open(struct inode *dir, struct nfs4_state_owner *sp, struct nfs_openargs *o_arg, struct nfs_openres *o_res) +{ + struct nfs_server *server = NFS_SERVER(dir); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN], + .rpc_argp = o_arg, + .rpc_resp = o_res, + .rpc_cred = sp->so_cred, + }; + int status; + + /* Update sequence id. The caller must serialize! */ + o_arg->seqid = sp->so_seqid; + o_arg->id = sp->so_id; + o_arg->clientid = sp->so_client->cl_clientid; + + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_seqid(status, sp); + if (status != 0) + goto out; + update_changeattr(dir, &o_res->cinfo); + if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) { + status = _nfs4_proc_open_confirm(server->client, &o_res->fh, + sp, &o_res->stateid); + if (status != 0) + goto out; + } + if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) + status = server->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr); +out: + return status; +} + +static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags) { struct nfs_access_entry cache; + int mask = 0; int status; + if (openflags & FMODE_READ) + mask |= MAY_READ; + if (openflags & FMODE_WRITE) + mask |= MAY_WRITE; status = nfs_access_get_cached(inode, cred, &cache); if (status == 0) goto out; @@ -379,9 +435,97 @@ out: } /* + * OPEN_EXPIRED: + * reclaim state on the server after a network partition. + * Assumes caller holds the appropriate lock + */ +static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry) +{ + struct dentry *parent = dget_parent(dentry); + struct inode *dir = parent->d_inode; + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(dir); + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + struct nfs_fattr f_attr = { + .valid = 0, + }; + struct nfs_openargs o_arg = { + .fh = NFS_FH(dir), + .open_flags = state->state, + .name = &dentry->d_name, + .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_NULL, + }; + struct nfs_openres o_res = { + .f_attr = &f_attr, + .server = server, + }; + uint32_t generation; + int status = 0; + + if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { + status = _nfs4_do_access(inode, sp->so_cred, state->state); + if (status < 0) + goto out; + memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + goto out; + } + /* If we are in a failover situation, recover path first */ + status = nfs_try_migrate_inode(dir, parent); + if (status != 0) + goto out_nodeleg; + generation = server->generation; + status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); + if (status != 0) + goto out_nodeleg; + status = nfs_try_migrate_filehandle(inode, &o_res.fh, o_res.f_attr, generation); + if (status != 0) + goto out_stale; + memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); + if (o_res.delegation_type != 0) { + if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) + nfs_inode_set_delegation(inode, sp->so_cred, &o_res); + else + nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); + } +out_nodeleg: + clear_bit(NFS_DELEGATED_STATE, &state->flags); +out: + dput(parent); + return status; +out_stale: + /* Invalidate the state owner so we don't ever use it again */ + nfs4_drop_state_owner(sp); + d_drop(dentry); + /* Should we be trying to close that stateid? */ + goto out_nodeleg; +} + +static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs_inode *nfsi = NFS_I(state->inode); + struct nfs_open_context *ctx; + int status; + + spin_lock(&state->inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + if (ctx->state != state) + continue; + get_nfs_open_context(ctx); + spin_unlock(&state->inode->i_lock); + status = _nfs4_open_expired(sp, state, ctx->dentry); + put_nfs_open_context(ctx); + return status; + } + spin_unlock(&state->inode->i_lock); + return -ENOENT; +} + +/* * Returns an nfs4_state + an extra reference to the inode */ -int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) +static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) { struct nfs_delegation *delegation; struct nfs_server *server = NFS_SERVER(inode); @@ -390,7 +534,6 @@ int _nfs4_open_delegated(struct inode *i struct nfs4_state_owner *sp = NULL; struct nfs4_state *state = NULL; int open_flags = flags & (FMODE_READ|FMODE_WRITE); - int mask = 0; int err; /* Protect against reboot recovery - NOTE ORDER! */ @@ -424,20 +567,12 @@ int _nfs4_open_delegated(struct inode *i goto out_err; lock_kernel(); - err = _nfs4_do_access(inode, cred, mask); + err = _nfs4_do_access(inode, cred, open_flags); unlock_kernel(); if (err != 0) goto out_err; - spin_lock(&inode->i_lock); - memcpy(state->stateid.data, delegation->stateid.data, - sizeof(state->stateid.data)); - state->state |= open_flags; - if (open_flags & FMODE_READ) - state->nreaders++; - if (open_flags & FMODE_WRITE) - state->nwriters++; set_bit(NFS_DELEGATED_STATE, &state->flags); - spin_unlock(&inode->i_lock); + update_open_stateid(state, &delegation->stateid, open_flags); out_ok: up(&sp->so_sema); nfs4_put_state_owner(sp); @@ -500,12 +635,6 @@ static int _nfs4_do_open(struct inode *d .f_attr = &f_attr, .server = server, }; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN], - .rpc_argp = &o_arg, - .rpc_resp = &o_res, - .rpc_cred = cred, - }; /* Protect against reboot recovery conflicts */ down_read(&clp->cl_sem); @@ -522,26 +651,10 @@ static int _nfs4_do_open(struct inode *d o_arg.u.attrs = sattr; /* Serialization for the sequence id */ down(&sp->so_sema); - o_arg.seqid = sp->so_seqid; - o_arg.id = sp->so_id; - o_arg.clientid = clp->cl_clientid, - status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); - nfs4_increment_seqid(status, sp); - if (status) + status = _nfs4_proc_open(dir, sp, &o_arg, &o_res); + if (status != 0) goto out_err; - update_changeattr(dir, &o_res.cinfo); - if(o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) { - status = _nfs4_proc_open_confirm(server->client, &o_res.fh, - sp, &o_res.stateid); - if (status != 0) - goto out_err; - } - if (!(f_attr.valid & NFS_ATTR_FATTR)) { - status = server->rpc_ops->getattr(server, &o_res.fh, &f_attr); - if (status < 0) - goto out_err; - } status = -ENOMEM; inode = nfs_fhget(dir->i_sb, &o_res.fh, &f_attr); @@ -550,14 +663,7 @@ static int _nfs4_do_open(struct inode *d state = nfs4_get_open_state(inode, sp); if (!state) goto out_err; - memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); - spin_lock(&inode->i_lock); - if (flags & FMODE_READ) - state->nreaders++; - if (flags & FMODE_WRITE) - state->nwriters++; - state->state |= flags & (FMODE_READ|FMODE_WRITE); - spin_unlock(&inode->i_lock); + update_open_stateid(state, &o_res.stateid, flags); if (o_res.delegation_type != 0) nfs_inode_set_delegation(inode, cred, &o_res); up(&sp->so_sema); @@ -581,7 +687,7 @@ out_err: } -struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) +static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred) { struct nfs4_exception exception = { }; struct nfs4_state *res; @@ -645,7 +751,7 @@ static int _nfs4_do_setattr(struct nfs_s return rpc_call_sync(server->client, &msg, 0); } -int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, +static int nfs4_do_setattr(struct nfs_server *server, struct nfs_fattr *fattr, struct nfs_fh *fhandle, struct iattr *sattr, struct nfs4_state *state) { @@ -680,7 +786,6 @@ static void nfs4_close_done(struct rpc_t nfs4_increment_seqid(task->tk_status, sp); switch (task->tk_status) { case 0: - state->state = calldata->arg.open_flags; memcpy(&state->stateid, &calldata->res.stateid, sizeof(state->stateid)); break; @@ -695,6 +800,7 @@ static void nfs4_close_done(struct rpc_t return; } } + state->state = calldata->arg.open_flags; nfs4_put_open_state(state); up(&sp->so_sema); nfs4_put_state_owner(sp); @@ -774,6 +880,8 @@ nfs4_atomic_open(struct inode *dir, stru } cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + if (IS_ERR(cred)) + return (struct inode *)cred; state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) @@ -789,6 +897,8 @@ nfs4_open_revalidate(struct inode *dir, struct inode *inode; cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); state = nfs4_open_delegated(dentry->d_inode, openflags, cred); if (IS_ERR(state)) state = nfs4_do_open(dir, dentry, openflags, NULL, cred); @@ -833,7 +943,7 @@ static int _nfs4_server_capabilities(str return status; } -static int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) +int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { struct nfs4_exception exception = { }; int err; @@ -1009,6 +1119,8 @@ nfs4_proc_setattr(struct dentry *dentry, if (size_change) { struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); + if (IS_ERR(cred)) + return PTR_ERR(cred); state = nfs4_find_state(inode, cred, FMODE_WRITE); if (state == NULL) { state = nfs4_open_delegated(dentry->d_inode, @@ -1315,33 +1427,37 @@ static int nfs4_proc_commit(struct nfs_w * opens the file O_RDONLY. This will all be resolved with the VFS changes. */ -static struct inode * +static int nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { - struct inode *inode; - struct nfs4_state *state = NULL; + struct nfs4_state *state; struct rpc_cred *cred; + int status = 0; cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + if (IS_ERR(cred)) { + status = PTR_ERR(cred); + goto out; + } state = nfs4_do_open(dir, dentry, flags, sattr, cred); put_rpccred(cred); - if (!IS_ERR(state)) { - inode = state->inode; - if (flags & O_EXCL) { - struct nfs_fattr fattr; - int status; - status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, - NFS_FH(inode), sattr, state); - if (status != 0) { - nfs4_close_state(state, flags); - iput(inode); - inode = ERR_PTR(status); - } - } - } else - inode = (struct inode *)state; - return inode; + if (IS_ERR(state)) { + status = PTR_ERR(state); + goto out; + } + d_instantiate(dentry, state->inode); + if (flags & O_EXCL) { + struct nfs_fattr fattr; + status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, + NFS_FH(state->inode), sattr, state); + if (status == 0) + goto out; + } else if (flags != 0) + goto out; + nfs4_close_state(state, flags); +out: + return status; } static int _nfs4_proc_remove(struct inode *dir, struct qstr *name) @@ -1539,23 +1655,24 @@ static int nfs4_proc_symlink(struct inod return err; } -static int _nfs4_proc_mkdir(struct inode *dir, struct qstr *name, - struct iattr *sattr, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +static int _nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct iattr *sattr) { struct nfs_server *server = NFS_SERVER(dir); + struct nfs_fh fhandle; + struct nfs_fattr fattr; struct nfs4_create_arg arg = { .dir_fh = NFS_FH(dir), .server = server, - .name = name, + .name = &dentry->d_name, .attrs = sattr, .ftype = NF4DIR, .bitmask = server->attr_bitmask, }; struct nfs4_create_res res = { .server = server, - .fh = fhandle, - .fattr = fattr, + .fh = &fhandle, + .fattr = &fattr, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], @@ -1564,24 +1681,24 @@ static int _nfs4_proc_mkdir(struct inode }; int status; - fattr->valid = 0; + fattr.valid = 0; status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (!status) + if (!status) { update_changeattr(dir, &res.dir_cinfo); + status = nfs_instantiate(dentry, &fhandle, &fattr); + } return status; } -static int nfs4_proc_mkdir(struct inode *dir, struct qstr *name, - struct iattr *sattr, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) +static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, + struct iattr *sattr) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_mkdir(dir, name, sattr, - fhandle, fattr), + _nfs4_proc_mkdir(dir, dentry, sattr), &exception); } while (exception.retry); return err; @@ -1596,6 +1713,7 @@ static int _nfs4_proc_readdir(struct den .pages = &page, .pgbase = 0, .count = count, + .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, }; struct nfs4_readdir_res res; struct rpc_message msg = { @@ -1630,22 +1748,23 @@ static int nfs4_proc_readdir(struct dent return err; } -static int _nfs4_proc_mknod(struct inode *dir, struct qstr *name, - struct iattr *sattr, dev_t rdev, struct nfs_fh *fh, - struct nfs_fattr *fattr) +static int _nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, + struct iattr *sattr, dev_t rdev) { struct nfs_server *server = NFS_SERVER(dir); + struct nfs_fh fh; + struct nfs_fattr fattr; struct nfs4_create_arg arg = { .dir_fh = NFS_FH(dir), .server = server, - .name = name, + .name = &dentry->d_name, .attrs = sattr, .bitmask = server->attr_bitmask, }; struct nfs4_create_res res = { .server = server, - .fh = fh, - .fattr = fattr, + .fh = &fh, + .fattr = &fattr, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], @@ -1655,7 +1774,7 @@ static int _nfs4_proc_mknod(struct inode int status; int mode = sattr->ia_mode; - fattr->valid = 0; + fattr.valid = 0; BUG_ON(!(sattr->ia_valid & ATTR_MODE)); BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); @@ -1675,21 +1794,21 @@ static int _nfs4_proc_mknod(struct inode arg.ftype = NF4SOCK; status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); - if (!status) + if (status == 0) { update_changeattr(dir, &res.dir_cinfo); + status = nfs_instantiate(dentry, &fh, &fattr); + } return status; } -static int nfs4_proc_mknod(struct inode *dir, struct qstr *name, - struct iattr *sattr, dev_t rdev, struct nfs_fh *fh, - struct nfs_fattr *fattr) +static int nfs4_proc_mknod(struct inode *dir, struct dentry *dentry, + struct iattr *sattr, dev_t rdev) { struct nfs4_exception exception = { }; int err; do { err = nfs4_handle_exception(NFS_SERVER(dir), - _nfs4_proc_mknod(dir, name, sattr, rdev, - fh, fattr), + _nfs4_proc_mknod(dir, dentry, sattr, rdev), &exception); } while (exception.retry); return err; @@ -2001,8 +2120,8 @@ nfs4_proc_file_open(struct inode *inode, /* Find our open stateid */ cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - if (unlikely(cred == NULL)) - return -ENOMEM; + if (IS_ERR(cred)) + return PTR_ERR(cred); ctx = alloc_nfs_open_context(dentry, cred); put_rpccred(cred); if (unlikely(ctx == NULL)) @@ -2037,6 +2156,199 @@ nfs4_proc_file_release(struct inode *ino return 0; } +struct nfs4_cached_acl { + int cached; + size_t len; + char data[]; +}; + +static ssize_t +nfs4_read_cached_acl(struct inode *inode, char *buf, size_t buflen) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_cached_acl *acl; + int ret = -ENOENT; + + spin_lock(&inode->i_lock); + acl = nfsi->nfs4_acl; + if (acl == NULL) + goto out; + if (buf == NULL) /* user is just asking for length */ + goto out_len; + if (acl->cached == 0) + goto out; + ret = -ERANGE; /* see getxattr(2) man page */ + if (acl->len > buflen) + goto out; + memcpy(buf, acl->data, acl->len); +out_len: + ret = acl->len; +out: + spin_unlock(&inode->i_lock); + return ret; +} + +static void +nfs4_set_cached_acl(struct inode *inode, struct nfs4_cached_acl *acl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + kfree(nfsi->nfs4_acl); + nfsi->nfs4_acl = acl; + spin_unlock(&inode->i_lock); +} + +static void +nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len) +{ + struct nfs4_cached_acl *acl; + + if (buf && acl_len <= PAGE_SIZE) { + acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 1; + memcpy(acl->data, buf, acl_len); + } else { + acl = kmalloc(sizeof(*acl), GFP_KERNEL); + if (acl == NULL) + goto out; + acl->cached = 0; + } + acl->len = acl_len; +out: + nfs4_set_cached_acl(inode, acl); +} + +void +nfs4_zap_acl_attr(struct inode *inode) +{ + nfs4_set_cached_acl(inode, NULL); +} + +static int +nfs4_server_supports_acls(struct nfs_server *server) +{ + return (server->caps & NFS_CAP_ACLS) + && (server->acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) + && (server->acl_bitmask & ACL4_SUPPORT_DENY_ACL); +} + +/* Assuming that XATTR_SIZE_MAX is a multiple of PAGE_CACHE_SIZE, and that + * it's OK to put sizeof(void) * (XATTR_SIZE_MAX/PAGE_CACHE_SIZE) bytes on + * the stack. + */ +#define NFS4ACL_MAXPAGES (XATTR_SIZE_MAX >> PAGE_CACHE_SHIFT) + +static void buf_to_pages(const void *buf, size_t buflen, + struct page **pages, unsigned int *pgbase) +{ + const void *p = buf; + + *pgbase = offset_in_page(buf); + p -= *pgbase; + while (p < buf + buflen) { + *(pages++) = virt_to_page(p); + p += PAGE_CACHE_SIZE; + } +} + +static inline ssize_t +nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen) +{ + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_getaclargs args = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + size_t resp_len = buflen; + void *resp_buf; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], + .rpc_argp = &args, + .rpc_resp = &resp_len, + }; + struct page *localpage = NULL; + int ret; + + if (buflen < PAGE_SIZE) { + /* As long as we're doing a round trip to the server anyway, + * let's be prepared for a page of acl data. */ + localpage = alloc_page(GFP_KERNEL); + if (localpage == NULL) + return -ENOMEM; + args.acl_pages[0] = localpage; + args.acl_pgbase = 0; + args.acl_len = PAGE_SIZE; + } else + buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); + ret = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (ret) + goto out_free; + if (resp_len > args.acl_len) + resp_buf = NULL; + else if (localpage) { + resp_buf = page_address(localpage); + } else + resp_buf = buf; + nfs4_write_cached_acl(inode, resp_buf, resp_len); + ret = -ERANGE; + if (buflen && resp_len > buflen) + goto out_free; + if (localpage) + memcpy(buf, resp_buf, resp_len); + ret = resp_len; +out_free: + if (localpage) + __free_page(localpage); + return ret; +} + +ssize_t +nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + ret = nfs_revalidate_inode(server, inode); + if (ret < 0) + return ret; + ret = nfs4_read_cached_acl(inode, buf, buflen); + if (ret != -ENOENT) + return ret; + return nfs4_get_acl_uncached(inode, buf, buflen); +} + +int +nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct page *pages[NFS4ACL_MAXPAGES]; + struct nfs_setaclargs arg = { + .fh = NFS_FH(inode), + .acl_pages = pages, + .acl_len = buflen, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], + .rpc_argp = &arg, + .rpc_resp = NULL, + }; + int ret; + + if (!nfs4_server_supports_acls(server)) + return -EOPNOTSUPP; + buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); + ret = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + if (ret == 0) + nfs4_write_cached_acl(inode, buf, buflen); + return ret; +} + static int nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) { @@ -2067,7 +2379,7 @@ nfs4_async_handle_error(struct rpc_task return 0; } -int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) +static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) { DEFINE_WAIT(wait); sigset_t oldset; @@ -2151,9 +2463,7 @@ int nfs4_handle_exception(struct nfs_ser int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port) { - static nfs4_verifier sc_verifier; - static int initialized; - + nfs4_verifier sc_verifier; struct nfs4_setclientid setclientid = { .sc_verifier = &sc_verifier, .sc_prog = program, @@ -2164,27 +2474,38 @@ int nfs4_proc_setclientid(struct nfs4_cl .rpc_resp = clp, .rpc_cred = clp->cl_cred, }; + u32 *p; + int loop = 0; + int status; - if (!initialized) { - struct timespec boot_time; - u32 *p; - - initialized = 1; - boot_time = CURRENT_TIME; - p = (u32*)sc_verifier.data; - *p++ = htonl((u32)boot_time.tv_sec); - *p = htonl((u32)boot_time.tv_nsec); - } - setclientid.sc_name_len = scnprintf(setclientid.sc_name, - sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u", - clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr)); - setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, - sizeof(setclientid.sc_netid), "tcp"); - setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, - sizeof(setclientid.sc_uaddr), "%s.%d.%d", - clp->cl_ipaddr, port >> 8, port & 255); + p = (u32*)sc_verifier.data; + *p++ = htonl((u32)clp->cl_boot_time.tv_sec); + *p = htonl((u32)clp->cl_boot_time.tv_nsec); + + for(;;) { + setclientid.sc_name_len = scnprintf(setclientid.sc_name, + sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u %s %u", + clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr), + clp->cl_cred->cr_ops->cr_name, + clp->cl_id_uniquifier); + setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, + sizeof(setclientid.sc_netid), "tcp"); + setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, + sizeof(setclientid.sc_uaddr), "%s.%d.%d", + clp->cl_ipaddr, port >> 8, port & 255); - return rpc_call_sync(clp->cl_rpcclient, &msg, 0); + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (status != -NFS4ERR_CLID_INUSE) + break; + if (signalled()) + break; + if (loop++ & 1) + ssleep(clp->cl_lease_time + 1); + else + if (++clp->cl_id_uniquifier == 0) + break; + } + return status; } int @@ -2361,6 +2682,25 @@ static int nfs4_proc_getlk(struct nfs4_s return err; } +static int do_vfs_lock(struct file *file, struct file_lock *fl) +{ + int res = 0; + switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { + case FL_POSIX: + res = posix_lock_file_wait(file, fl); + break; + case FL_FLOCK: + res = flock_lock_file_wait(file, fl); + break; + default: + BUG(); + } + if (res < 0) + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", + __FUNCTION__); + return res; +} + static int _nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) { struct inode *inode = state->inode; @@ -2408,7 +2748,7 @@ static int _nfs4_proc_unlck(struct nfs4_ out: up(&state->lock_sema); if (status == 0) - posix_lock_file(request->fl_file, request); + do_vfs_lock(request->fl_file, request); up_read(&clp->cl_sem); return status; } @@ -2500,11 +2840,16 @@ static int _nfs4_do_setlk(struct nfs4_st return status; } -int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) +static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) { return _nfs4_do_setlk(state, F_SETLK, request, 1); } +static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) +{ + return _nfs4_do_setlk(state, F_SETLK, request, 0); +} + static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct nfs4_client *clp = state->owner->so_client; @@ -2517,7 +2862,7 @@ static int _nfs4_proc_setlk(struct nfs4_ if (status == 0) { /* Note: we always want to sleep here! */ request->fl_flags |= FL_SLEEP; - if (posix_lock_file_wait(request->fl_file, request) < 0) + if (do_vfs_lock(request->fl_file, request) < 0) printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__); } up_read(&clp->cl_sem); @@ -2574,10 +2919,50 @@ nfs4_proc_lock(struct file *filp, int cm return status; } +int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, + struct nfs_fs_locations *fs_locations, struct page *page) +{ + struct nfs_server *server = NFS_SERVER(dir); + u32 bitmask[2] = { + [0] = server->attr_bitmask[0] | FATTR4_WORD0_FS_LOCATIONS, + [1] = server->attr_bitmask[1], + }; + struct nfs4_fs_locations_arg args = { + .dir_fh = NFS_FH(dir), + .name = &dentry->d_name, + .page = page, + .bitmask = bitmask, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], + .rpc_argp = &args, + .rpc_resp = &fs_locations, + }; + int status; + + dprintk("%s: start\n", __FUNCTION__); + fs_locations->fattr.valid = 0; + fs_locations->server = server; + status = rpc_call_sync(server->client, &msg, 0); + dprintk("%s: returned status = %d\n", __FUNCTION__, status); + return status; +} + +struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops = { + .recover_open = nfs4_open_reclaim, + .recover_lock = nfs4_lock_reclaim, +}; + +struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops = { + .recover_open = nfs4_open_expired, + .recover_lock = nfs4_lock_expired, +}; + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ .dentry_ops = &nfs4_dentry_operations, .dir_inode_ops = &nfs4_dir_inode_operations, + .file_inode_ops = &nfs4_file_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, Index: linux-2.6.11/fs/nfs/nfs4renewd.c =================================================================== --- linux-2.6.11.orig/fs/nfs/nfs4renewd.c +++ linux-2.6.11/fs/nfs/nfs4renewd.c @@ -53,6 +53,7 @@ #include #include #include +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_PROC Index: linux-2.6.11/fs/nfs/nfs4state.c =================================================================== --- linux-2.6.11.orig/fs/nfs/nfs4state.c +++ linux-2.6.11/fs/nfs/nfs4state.c @@ -46,24 +46,18 @@ #include #include +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" #define OPENOWNER_POOL_SIZE 8 -static DEFINE_SPINLOCK(state_spinlock); - -nfs4_stateid zero_stateid; - -#if 0 -nfs4_stateid one_stateid = - { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; -#endif +const nfs4_stateid zero_stateid; +static DEFINE_SPINLOCK(state_spinlock); static LIST_HEAD(nfs4_clientid_list); static void nfs4_recover_state(void *); -extern void nfs4_renew_state(void *); void init_nfsv4_state(struct nfs_server *server) @@ -116,6 +110,7 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_superblocks); init_waitqueue_head(&clp->cl_waitq); rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_boot_time = CURRENT_TIME; clp->cl_state = 1 << NFS4CLNT_OK; return clp; } @@ -205,7 +200,7 @@ nfs4_put_client(struct nfs4_client *clp) nfs4_free_client(clp); } -int nfs4_init_client(struct nfs4_client *clp) +static int __nfs4_init_client(struct nfs4_client *clp) { int status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport); if (status == 0) @@ -215,6 +210,11 @@ int nfs4_init_client(struct nfs4_client return status; } +int nfs4_init_client(struct nfs4_client *clp) +{ + return nfs4_map_errors(__nfs4_init_client(clp)); +} + u32 nfs4_alloc_lockowner_id(struct nfs4_client *clp) { @@ -274,8 +274,8 @@ nfs4_alloc_state_owner(void) return sp; } -static void -nfs4_unhash_state_owner(struct nfs4_state_owner *sp) +void +nfs4_drop_state_owner(struct nfs4_state_owner *sp) { struct nfs4_client *clp = sp->so_client; spin_lock(&clp->cl_lock); @@ -441,7 +441,9 @@ nfs4_get_open_state(struct inode *inode, if (state == NULL && new != NULL) { state = new; /* Caller *must* be holding owner->so_sem */ - list_add(&state->open_states, &owner->so_states); + /* Note: The reclaim code dictates that we add stateless + * and read-only stateids to the end of the list */ + list_add_tail(&state->open_states, &owner->so_states); state->owner = owner; atomic_inc(&owner->so_count); list_add(&state->inode_states, &nfsi->open_states); @@ -497,8 +499,12 @@ void nfs4_close_state(struct nfs4_state state->nreaders--; if (mode & FMODE_WRITE) state->nwriters--; - if (state->nwriters == 0 && state->nreaders == 0) - list_del_init(&state->inode_states); + if (state->nwriters == 0) { + if (state->nreaders == 0) + list_del_init(&state->inode_states); + /* See reclaim code */ + list_move_tail(&state->open_states, &owner->so_states); + } spin_unlock(&inode->i_lock); newstate = 0; if (state->state != 0) { @@ -708,7 +714,7 @@ void nfs4_increment_seqid(int status, st sp->so_seqid++; /* If the server returns BAD_SEQID, unhash state_owner here */ if (status == -NFS4ERR_BAD_SEQID) - nfs4_unhash_state_owner(sp); + nfs4_drop_state_owner(sp); } static int reclaimer(void *); @@ -753,7 +759,7 @@ nfs4_schedule_state_recovery(struct nfs4 schedule_work(&clp->cl_recoverd); } -static int nfs4_reclaim_locks(struct nfs4_state *state) +static int nfs4_reclaim_locks(struct nfs4_state_recovery_ops *ops, struct nfs4_state *state) { struct inode *inode = state->inode; struct file_lock *fl; @@ -764,7 +770,7 @@ static int nfs4_reclaim_locks(struct nfs continue; if (((struct nfs_open_context *)fl->fl_file->private_data)->state != state) continue; - status = nfs4_lock_reclaim(state, fl); + status = ops->recover_lock(state, fl); if (status >= 0) continue; switch (status) { @@ -786,20 +792,28 @@ out_err: return status; } -static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp) +static int nfs4_reclaim_open_state(struct nfs4_state_recovery_ops *ops, struct nfs4_state_owner *sp) { struct nfs4_state *state; struct nfs4_lock_state *lock; int status = 0; + /* Note: we rely on the sp->so_states list being ordered + * so that we always reclaim open(O_RDWR) and/or open(O_WRITE) + * states first. + * This is needed to ensure that the server won't give us any + * read delegations that we have to return if, say, we are + * recovering after a network partition or a reboot from a + * server that doesn't support a grace period. + */ list_for_each_entry(state, &sp->so_states, open_states) { if (state->state == 0) continue; - status = nfs4_open_reclaim(sp, state); + status = ops->recover_open(sp, state); list_for_each_entry(lock, &state->lock_states, ls_locks) lock->ls_flags &= ~NFS_LOCK_INITIALIZED; if (status >= 0) { - status = nfs4_reclaim_locks(state); + status = nfs4_reclaim_locks(ops, state); if (status < 0) goto out_err; list_for_each_entry(lock, &state->lock_states, ls_locks) { @@ -813,8 +827,7 @@ static int nfs4_reclaim_open_state(struc default: printk(KERN_ERR "%s: unhandled error %d. Zeroing state\n", __FUNCTION__, status); - case -NFS4ERR_EXPIRED: - case -NFS4ERR_NO_GRACE: + case -ENOENT: case -NFS4ERR_RECLAIM_BAD: case -NFS4ERR_RECLAIM_CONFLICT: /* @@ -826,6 +839,8 @@ static int nfs4_reclaim_open_state(struc /* Mark the file as being 'closed' */ state->state = 0; break; + case -NFS4ERR_EXPIRED: + case -NFS4ERR_NO_GRACE: case -NFS4ERR_STALE_CLIENTID: goto out_err; } @@ -840,6 +855,7 @@ static int reclaimer(void *ptr) struct reclaimer_args *args = (struct reclaimer_args *)ptr; struct nfs4_client *clp = args->clp; struct nfs4_state_owner *sp; + struct nfs4_state_recovery_ops *ops; int status = 0; daemonize("%u.%u.%u.%u-reclaim", NIPQUAD(clp->cl_addr)); @@ -856,20 +872,34 @@ static int reclaimer(void *ptr) goto out; restart_loop: status = nfs4_proc_renew(clp); - if (status == 0 || status == -NFS4ERR_CB_PATH_DOWN) - goto out; - status = nfs4_init_client(clp); + switch (status) { + case 0: + case -NFS4ERR_CB_PATH_DOWN: + goto out; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_LEASE_MOVED: + ops = &nfs4_reboot_recovery_ops; + break; + default: + ops = &nfs4_network_partition_recovery_ops; + }; + status = __nfs4_init_client(clp); if (status) goto out_error; - /* Mark all delagations for reclaim */ + /* Mark all delegations for reclaim */ nfs_delegation_mark_reclaim(clp); /* Note: list is protected by exclusive lock on cl->cl_sem */ list_for_each_entry(sp, &clp->cl_state_owners, so_list) { - status = nfs4_reclaim_open_state(sp); + status = nfs4_reclaim_open_state(ops, sp); if (status < 0) { + if (status == -NFS4ERR_NO_GRACE) { + ops = &nfs4_network_partition_recovery_ops; + status = nfs4_reclaim_open_state(ops, sp); + } if (status == -NFS4ERR_STALE_CLIENTID) goto restart_loop; - goto out_error; + if (status == -NFS4ERR_EXPIRED) + goto restart_loop; } } nfs_delegation_reap_unclaimed(clp); Index: linux-2.6.11/fs/nfs/nfs4xdr.c =================================================================== --- linux-2.6.11.orig/fs/nfs/nfs4xdr.c +++ linux-2.6.11/fs/nfs/nfs4xdr.c @@ -51,6 +51,7 @@ #include #include #include +#include "nfs4_fs.h" #define NFSDBG_FACILITY NFSDBG_XDR @@ -82,12 +83,16 @@ static int nfs_stat_to_errno(int); #define encode_getfh_maxsz (op_encode_hdr_maxsz) #define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ ((3+NFS4_FHSIZE) >> 2)) -#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3) +#define nfs4_fattr_bitmap_maxsz 3 +#define encode_getattr_maxsz (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) #define nfs4_name_maxsz (1 + ((3 + NFS4_MAXNAMLEN) >> 2)) #define nfs4_path_maxsz (1 + ((3 + NFS4_MAXPATHLEN) >> 2)) -#define nfs4_fattr_bitmap_maxsz (36 + 2 * nfs4_name_maxsz) -#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \ - nfs4_fattr_bitmap_maxsz) +/* This is based on getfattr, which uses the most attributes: */ +#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \ + 3 + 3 + 3 + 2 * nfs4_name_maxsz)) +#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_value_maxsz) +#define decode_getattr_maxsz (op_decode_hdr_maxsz + nfs4_fattr_maxsz) #define encode_savefh_maxsz (op_encode_hdr_maxsz) #define decode_savefh_maxsz (op_decode_hdr_maxsz) #define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) @@ -122,11 +127,11 @@ static int nfs_stat_to_errno(int); #define encode_symlink_maxsz (op_encode_hdr_maxsz + \ 1 + nfs4_name_maxsz + \ nfs4_path_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_symlink_maxsz (op_decode_hdr_maxsz + 8) #define encode_create_maxsz (op_encode_hdr_maxsz + \ 2 + nfs4_name_maxsz + \ - nfs4_fattr_bitmap_maxsz) + nfs4_fattr_maxsz) #define decode_create_maxsz (op_decode_hdr_maxsz + 8) #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) @@ -205,7 +210,7 @@ static int nfs_stat_to_errno(int); #define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 4 + \ - nfs4_fattr_bitmap_maxsz + \ + nfs4_fattr_maxsz + \ encode_getattr_maxsz) #define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ @@ -360,6 +365,29 @@ static int nfs_stat_to_errno(int); encode_delegreturn_maxsz) #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ decode_delegreturn_maxsz) +#define NFS4_enc_getacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_getacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_enc_setacl_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + op_encode_hdr_maxsz + 4 + \ + nfs4_fattr_bitmap_maxsz + 1) +#define NFS4_dec_setacl_sz (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + nfs4_fattr_bitmap_maxsz) +#define NFS4_enc_fs_locations_sz \ + (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_fs_locations_sz \ + (compound_decode_hdr_maxsz + \ + decode_putfh_maxsz + \ + op_decode_hdr_maxsz + \ + nfs4_fattr_bitmap_maxsz) static struct { unsigned int mode; @@ -459,7 +487,7 @@ static int encode_attrs(struct xdr_strea * In the worst-case, this would be * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) * = 36 bytes, plus any contribution from variable-length fields - * such as owner/group/acl's. + * such as owner/group. */ len = 16; @@ -660,8 +688,6 @@ static int encode_getattr_two(struct xdr static int encode_getfattr(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fattr_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fattr_bitmap[0], bitmask[1] & nfs4_fattr_bitmap[1]); @@ -669,8 +695,6 @@ static int encode_getfattr(struct xdr_st static int encode_fsinfo(struct xdr_stream *xdr, const u32* bitmask) { - extern u32 nfs4_fsinfo_bitmap[]; - return encode_getattr_two(xdr, bitmask[0] & nfs4_fsinfo_bitmap[0], bitmask[1] & nfs4_fsinfo_bitmap[1]); } @@ -969,7 +993,6 @@ static int encode_putrootfh(struct xdr_s static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) { - extern nfs4_stateid zero_stateid; nfs4_stateid stateid; uint32_t *p; @@ -1010,8 +1033,13 @@ static int encode_readdir(struct xdr_str WRITE32(readdir->count >> 1); /* We're not doing readdirplus */ WRITE32(readdir->count); WRITE32(2); - WRITE32(FATTR4_WORD0_FILEID); - WRITE32(0); + if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) { + WRITE32(0); + WRITE32(FATTR4_WORD1_MOUNTED_ON_FILEID); + } else { + WRITE32(FATTR4_WORD0_FILEID); + WRITE32(0); + } /* set up reply kvec * toplevel_status + taglen + rescount + OP_PUTFH + status @@ -1084,6 +1112,25 @@ static int encode_renew(struct xdr_strea } static int +encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(4+sizeof(zero_stateid.data)); + WRITE32(OP_SETATTR); + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); + RESERVE_SPACE(2*4); + WRITE32(1); + WRITE32(FATTR4_WORD0_ACL); + if (arg->acl_len % 4) + return -EINVAL; + RESERVE_SPACE(4); + WRITE32(arg->acl_len); + xdr_write_pages(xdr, arg->acl_pages, arg->acl_pgbase, arg->acl_len); + return 0; +} + +static int encode_savefh(struct xdr_stream *xdr) { uint32_t *p; @@ -1627,6 +1674,34 @@ out: } /* + * Encode a GETACL request + */ +static int +nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p, + struct nfs_getaclargs *args) +{ + struct xdr_stream xdr; + struct rpc_auth *auth = req->rq_task->tk_auth; + struct compound_hdr hdr = { + .nops = 2, + }; + int replen, status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0); + /* set up reply buffer: */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + NFS4_dec_getacl_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, + args->acl_pages, args->acl_pgbase, args->acl_len); +out: + return status; +} + +/* * Encode a WRITE request */ static int nfs4_xdr_enc_write(struct rpc_rqst *req, uint32_t *p, struct nfs_writeargs *args) @@ -1692,7 +1767,6 @@ static int nfs4_xdr_enc_fsinfo(struct rp */ static int nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, const struct nfs4_pathconf_arg *args) { - extern u32 nfs4_pathconf_bitmap[2]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1713,7 +1787,6 @@ static int nfs4_xdr_enc_pathconf(struct */ static int nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, const struct nfs4_statfs_arg *args) { - extern u32 nfs4_statfs_bitmap[]; struct xdr_stream xdr; struct compound_hdr hdr = { .nops = 2, @@ -1823,6 +1896,38 @@ static int nfs4_xdr_enc_delegreturn(stru } /* + * Encode FS_LOCATIONS request + */ +static int nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs4_fs_locations_arg *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 3, + }; + struct rpc_auth *auth = req->rq_task->tk_auth; + int replen; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->dir_fh)) != 0) + goto out; + if ((status = encode_lookup(&xdr, args->name)) != 0) + goto out; + if ((status = encode_getfattr(&xdr, args->bitmask)) != 0) + goto out; + /* set up reply + * toplevel_status + taglen + rescount + OP_PUTFH + status + * + OP_LOOKUP + status + OP_GETATTR + status = 7 + */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, &args->page, + 0, PAGE_SIZE); +out: + return status; +} + +/* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" * set of XDR encode/decode routines which are intended to be shared by @@ -1856,7 +1961,7 @@ static int nfs4_xdr_enc_delegreturn(stru } \ } while (0) -static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string) +static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string) { uint32_t *p; @@ -1907,7 +2012,7 @@ static int decode_op_hdr(struct xdr_stre static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) { uint32_t *p; - uint32_t strlen; + unsigned int strlen; char *str; READ_BUF(12); @@ -2037,7 +2142,7 @@ static int decode_attr_symlink_support(s return 0; } -static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fsid *fsid) +static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid) { uint32_t *p; @@ -2156,6 +2261,45 @@ static int decode_attr_files_total(struc return status; } +static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fs_locations *res) +{ + int n; + uint32_t *p; + int status = -EIO; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_FS_LOCATIONS -1U))) + goto out; + status = 0; + if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) + goto out; + status = decode_opaque_inline(xdr, &res->fs_pathlen, &res->fs_path); + if (unlikely(status != 0)) + goto out; + READ_BUF(4); + READ32(n); + if (n <= 0) + goto out_eio; + res->nlocations = 0; + while (res->nlocations < n) { + struct nfs_fs_location *loc = &res->locations[res->nlocations]; + + status = decode_opaque_inline(xdr, &loc->serverlen, &loc->server); + if (unlikely(status != 0)) + goto out_eio; + status = decode_opaque_inline(xdr, &loc->rootpathlen, &loc->rootpath); + if (unlikely(status != 0)) + goto out_eio; + if (res->nlocations < NFS_FS_LOCATIONS_MAXENTRIES) + res->nlocations++; + } +out: + dprintk("%s: fs_locations done, error = %d\n", __FUNCTION__, status); + return status; +out_eio: + status = -EIO; + goto out; +} + static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res) { uint32_t *p; @@ -2686,10 +2830,14 @@ static int decode_getfattr(struct xdr_st goto xdr_error; if ((status = decode_attr_size(xdr, bitmap, &fattr->size)) != 0) goto xdr_error; - if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid_u.nfs4)) != 0) + if ((status = decode_attr_fsid(xdr, bitmap, &fattr->fsid)) != 0) goto xdr_error; if ((status = decode_attr_fileid(xdr, bitmap, &fattr->fileid)) != 0) goto xdr_error; + if ((status = decode_attr_fs_locations(xdr, bitmap, container_of(fattr, + struct nfs_fs_locations, + fattr))) != 0) + goto xdr_error; if ((status = decode_attr_mode(xdr, bitmap, &fattr->mode)) != 0) goto xdr_error; fattr->mode |= fmode; @@ -3122,6 +3270,46 @@ static int decode_renew(struct xdr_strea return decode_op_hdr(xdr, OP_RENEW); } +static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, + size_t *acl_len) +{ + uint32_t *savep; + uint32_t attrlen, + bitmap[2] = {0}; + struct kvec *iov = req->rq_rcv_buf.head; + int status; + + if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) + goto out; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) + goto out; + if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) + goto out; + + if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U))) + return -EIO; + if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { + int hdrlen, recvd; + + /* We ignore &savep and don't do consistency checks on + * the attr length. Let userspace figure it out.... */ + hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; + recvd = req->rq_rcv_buf.len - hdrlen; + if (attrlen > recvd) { + printk(KERN_WARNING "NFS: server cheating in getattr" + " acl reply: attrlen %u > recvd %u\n", + attrlen, recvd); + return -EINVAL; + } + if (attrlen <= *acl_len) + xdr_read_pages(xdr, attrlen); + *acl_len = attrlen; + } + +out: + return status; +} + static int decode_savefh(struct xdr_stream *xdr) { @@ -3175,7 +3363,7 @@ static int decode_setclientid(struct xdr READ_BUF(4); READ32(len); READ_BUF(len); - return -EEXIST; + return -NFSERR_CLID_INUSE; } else return -nfs_stat_to_errno(nfserr); @@ -3413,6 +3601,71 @@ out: } +/* + * Encode an SETACL request + */ +static int +nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); + if (status) + goto out; + status = encode_setacl(&xdr, args); +out: + return status; +} +/* + * Decode SETACL response + */ +static int +nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_setattr(&xdr, res); +out: + return status; +} + +/* + * Decode GETACL response + */ +static int +nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, size_t *acl_len) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; + status = decode_putfh(&xdr); + if (status) + goto out; + status = decode_getacl(&xdr, rqstp, acl_len); + +out: + return status; +} /* * Decode CLOSE response @@ -3855,9 +4108,32 @@ static int nfs4_xdr_dec_delegreturn(stru return status; } +/* + * FS_LOCATIONS request + */ +static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, uint32_t *p, struct nfs_fs_locations *res) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &req->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status != 0) + goto out; + if ((status = decode_putfh(&xdr)) != 0) + goto out; + if ((status = decode_lookup(&xdr)) != 0) + goto out; + xdr_enter_page(&xdr, PAGE_SIZE); + status = decode_getfattr(&xdr, &res->fattr, res->server); +out: + return status; +} + uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) { - uint32_t bitmap[1] = {0}; + uint32_t bitmap[2] = {0}; uint32_t len; if (!*p++) { @@ -3881,13 +4157,18 @@ uint32_t *nfs4_decode_dirent(uint32_t *p entry->ino = 1; len = ntohl(*p++); /* bitmap length */ - if (len > 0) { - bitmap[0] = ntohl(*p); - p += len; + if (len-- > 0) { + bitmap[0] = ntohl(*p++); + if (len-- > 0) { + bitmap[1] = ntohl(*p++); + p += len; + } } len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ if (len > 0) { - if (bitmap[0] == FATTR4_WORD0_FILEID) + if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) + xdr_decode_hyper(p, &entry->ino); + else if (bitmap[0] == FATTR4_WORD0_FILEID) xdr_decode_hyper(p, &entry->ino); p += len; } @@ -4009,6 +4290,9 @@ struct rpc_procinfo nfs4_procedures[] = PROC(READDIR, enc_readdir, dec_readdir), PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), + PROC(GETACL, enc_getacl, dec_getacl), + PROC(SETACL, enc_setacl, dec_setacl), + PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), }; struct rpc_version nfs_version4 = { Index: linux-2.6.11/fs/nfs/nfsroot.c =================================================================== --- linux-2.6.11.orig/fs/nfs/nfsroot.c +++ linux-2.6.11/fs/nfs/nfsroot.c @@ -124,7 +124,6 @@ enum { Opt_soft, Opt_hard, Opt_intr, Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, - Opt_broken_suid, /* Error token */ Opt_err }; @@ -159,7 +158,6 @@ static match_table_t __initdata tokens = {Opt_udp, "udp"}, {Opt_tcp, "proto=tcp"}, {Opt_tcp, "tcp"}, - {Opt_broken_suid, "broken_suid"}, {Opt_err, NULL} }; @@ -268,9 +266,6 @@ static int __init root_nfs_parse(char *n case Opt_tcp: nfs_data.flags |= NFS_MOUNT_TCP; break; - case Opt_broken_suid: - nfs_data.flags |= NFS_MOUNT_BROKEN_SUID; - break; default : return 0; } @@ -351,7 +346,7 @@ static void __init root_nfs_print(void) #endif -int __init root_nfs_init(void) +static int __init root_nfs_init(void) { #ifdef NFSROOT_DEBUG nfs_debug |= NFSDBG_ROOT; @@ -379,15 +374,15 @@ int __init root_nfs_init(void) * Parse NFS server and directory information passed on the kernel * command line. */ -int __init nfs_root_setup(char *line) +static int __init nfs_root_setup(char *line) { ROOT_DEV = Root_NFS; if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { strlcpy(nfs_root_name, line, sizeof(nfs_root_name)); } else { - int n = strlen(line) + strlen(NFS_ROOT); + int n = strlen(line) + sizeof(NFS_ROOT) - 1; if (n >= sizeof(nfs_root_name)) - line[sizeof(nfs_root_name) - strlen(NFS_ROOT) - 1] = '\0'; + line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0'; sprintf(nfs_root_name, NFS_ROOT, line); } root_server_addr = root_nfs_parse_addr(nfs_root_name); Index: linux-2.6.11/fs/nfs/proc.c =================================================================== --- linux-2.6.11.orig/fs/nfs/proc.c +++ linux-2.6.11/fs/nfs/proc.c @@ -212,7 +212,7 @@ static int nfs_proc_write(struct nfs_wri return status < 0? status : wdata->res.count; } -static struct inode * +static int nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, int flags) { @@ -233,37 +233,34 @@ nfs_proc_create(struct inode *dir, struc fattr.valid = 0; dprintk("NFS call create %s\n", dentry->d_name.name); status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); dprintk("NFS reply create: %d\n", status); - if (status == 0) { - struct inode *inode; - inode = nfs_fhget(dir->i_sb, &fhandle, &fattr); - if (inode) - return inode; - status = -ENOMEM; - } - return ERR_PTR(status); + return status; } /* * In NFSv2, mknod is grafted onto the create call. */ static int -nfs_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr, - dev_t rdev, struct nfs_fh *fhandle, struct nfs_fattr *fattr) +nfs_proc_mknod(struct inode *dir, struct dentry *dentry, struct iattr *sattr, + dev_t rdev) { + struct nfs_fh fhandle; + struct nfs_fattr fattr; struct nfs_createargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len, + .name = dentry->d_name.name, + .len = dentry->d_name.len, .sattr = sattr }; struct nfs_diropok res = { - .fh = fhandle, - .fattr = fattr + .fh = &fhandle, + .fattr = &fattr }; - int status, mode; + int status, mode; - dprintk("NFS call mknod %s\n", name->name); + dprintk("NFS call mknod %s\n", dentry->d_name.name); mode = sattr->ia_mode; if (S_ISFIFO(mode)) { @@ -274,14 +271,16 @@ nfs_proc_mknod(struct inode *dir, struct sattr->ia_size = new_encode_dev(rdev);/* get out your barf bag */ } - fattr->valid = 0; + fattr.valid = 0; status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); if (status == -EINVAL && S_ISFIFO(mode)) { sattr->ia_mode = mode; - fattr->valid = 0; + fattr.valid = 0; status = rpc_call(NFS_CLIENT(dir), NFSPROC_CREATE, &arg, &res, 0); } + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); dprintk("NFS reply mknod: %d\n", status); return status; } @@ -398,24 +397,27 @@ nfs_proc_symlink(struct inode *dir, stru } static int -nfs_proc_mkdir(struct inode *dir, struct qstr *name, struct iattr *sattr, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) +nfs_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr) { + struct nfs_fh fhandle; + struct nfs_fattr fattr; struct nfs_createargs arg = { .fh = NFS_FH(dir), - .name = name->name, - .len = name->len, + .name = dentry->d_name.name, + .len = dentry->d_name.len, .sattr = sattr }; struct nfs_diropok res = { - .fh = fhandle, - .fattr = fattr + .fh = &fhandle, + .fattr = &fattr }; int status; - dprintk("NFS call mkdir %s\n", name->name); - fattr->valid = 0; + dprintk("NFS call mkdir %s\n", dentry->d_name.name); + fattr.valid = 0; status = rpc_call(NFS_CLIENT(dir), NFSPROC_MKDIR, &arg, &res, 0); + if (status == 0) + status = nfs_instantiate(dentry, &fhandle, &fattr); dprintk("NFS reply mkdir: %d\n", status); return status; } @@ -620,6 +622,7 @@ struct nfs_rpc_ops nfs_v2_clientops = { .version = 2, /* protocol version */ .dentry_ops = &nfs_dentry_operations, .dir_inode_ops = &nfs_dir_inode_operations, + .file_inode_ops = &nfs_file_inode_operations, .getroot = nfs_proc_get_root, .getattr = nfs_proc_getattr, .setattr = nfs_proc_setattr, Index: linux-2.6.11/fs/nfs/read.c =================================================================== --- linux-2.6.11.orig/fs/nfs/read.c +++ linux-2.6.11/fs/nfs/read.c @@ -370,7 +370,7 @@ out_bad: return -ENOMEM; } -int +static int nfs_pagein_list(struct list_head *head, int rpages) { LIST_HEAD(one_request); Index: linux-2.6.11/fs/nfs/unlink.c =================================================================== --- linux-2.6.11.orig/fs/nfs/unlink.c +++ linux-2.6.11/fs/nfs/unlink.c @@ -167,6 +167,11 @@ nfs_async_unlink(struct dentry *dentry) goto out; memset(data, 0, sizeof(*data)); + data->cred = rpcauth_lookupcred(clnt->cl_auth, 0); + if (IS_ERR(data->cred)) { + status = PTR_ERR(data->cred); + goto out_free; + } data->dir = dget(dir); data->dentry = dentry; @@ -183,12 +188,14 @@ nfs_async_unlink(struct dentry *dentry) spin_lock(&dentry->d_lock); dentry->d_flags |= DCACHE_NFSFS_RENAMED; spin_unlock(&dentry->d_lock); - data->cred = rpcauth_lookupcred(clnt->cl_auth, 0); rpc_sleep_on(&nfs_delete_queue, task, NULL, NULL); status = 0; out: return status; +out_free: + kfree(data); + return status; } /** Index: linux-2.6.11/fs/nfs/write.c =================================================================== --- linux-2.6.11.orig/fs/nfs/write.c +++ linux-2.6.11/fs/nfs/write.c @@ -80,14 +80,31 @@ static void nfs_writeback_done_partial(s static void nfs_writeback_done_full(struct nfs_write_data *, int); static int nfs_wait_on_write_congestion(struct address_space *, int); static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); +static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how); static kmem_cache_t *nfs_wdata_cachep; mempool_t *nfs_wdata_mempool; -mempool_t *nfs_commit_mempool; +static mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); -void nfs_writedata_release(struct rpc_task *task) +static inline struct nfs_write_data *nfs_commit_alloc(void) +{ + struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); + if (p) { + memset(p, 0, sizeof(*p)); + INIT_LIST_HEAD(&p->pages); + } + return p; +} + +static inline void nfs_commit_free(struct nfs_write_data *p) +{ + mempool_free(p, nfs_commit_mempool); +} + +static void nfs_writedata_release(struct rpc_task *task) { struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; nfs_writedata_free(wdata); @@ -990,7 +1007,7 @@ static int nfs_flush_one(struct list_hea return -ENOMEM; } -int +static int nfs_flush_list(struct list_head *head, int wpages, int how) { LIST_HEAD(one_request); @@ -1240,7 +1257,7 @@ static void nfs_commit_rpcsetup(struct l /* * Commit dirty pages */ -int +static int nfs_commit_list(struct list_head *head, int how) { struct nfs_write_data *data; @@ -1314,8 +1331,8 @@ nfs_commit_done(struct rpc_task *task) } #endif -int nfs_flush_inode(struct inode *inode, unsigned long idx_start, - unsigned int npages, int how) +static int nfs_flush_inode(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) { struct nfs_inode *nfsi = NFS_I(inode); LIST_HEAD(head); Index: linux-2.6.11/fs/nfsd/nfs4callback.c =================================================================== --- linux-2.6.11.orig/fs/nfsd/nfs4callback.c +++ linux-2.6.11/fs/nfsd/nfs4callback.c @@ -447,7 +447,10 @@ nfsd4_probe_callback(struct nfs4_client atomic_inc(&clp->cl_count); msg.rpc_cred = nfsd4_lookupcred(clp,0); - status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL); + if (IS_ERR(msg.rpc_cred)) + status = PTR_ERR(msg.rpc_cred); + else + status = rpc_call_async(clnt, &msg, 0, nfs4_cb_null, NULL); if (status != 0) { dprintk("NFSD: asynchronous NFSPROC4_CB_NULL failed!\n"); @@ -563,6 +566,10 @@ nfsd4_cb_recall(struct nfs4_delegation * msg.rpc_argp = cbr; msg.rpc_resp = cbr; msg.rpc_cred = nfsd4_lookupcred(clp,0); + if (IS_ERR(msg.rpc_cred)) { + status = PTR_ERR(msg.rpc_cred); + goto out_fail; + } cbr->cbr_trunc = 0; /* XXX need to implement truncate optimization */ cbr->cbr_dp = dp; Index: linux-2.6.11/fs/super.c =================================================================== --- linux-2.6.11.orig/fs/super.c +++ linux-2.6.11/fs/super.c @@ -794,17 +794,13 @@ struct super_block *get_sb_single(struct EXPORT_SYMBOL(get_sb_single); struct vfsmount * -do_kern_mount(const char *fstype, int flags, const char *name, void *data) +vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { - struct file_system_type *type = get_fs_type(fstype); struct super_block *sb = ERR_PTR(-ENOMEM); struct vfsmount *mnt; int error; char *secdata = NULL; - if (!type) - return ERR_PTR(-ENODEV); - mnt = alloc_vfsmnt(name); if (!mnt) goto out; @@ -835,7 +831,6 @@ do_kern_mount(const char *fstype, int fl mnt->mnt_parent = mnt; mnt->mnt_namespace = current->namespace; up_write(&sb->s_umount); - put_filesystem(type); return mnt; out_sb: up_write(&sb->s_umount); @@ -846,10 +841,23 @@ out_free_secdata: out_mnt: free_vfsmnt(mnt); out: - put_filesystem(type); return (struct vfsmount *)sb; } +EXPORT_SYMBOL_GPL(vfs_kern_mount); + +struct vfsmount * +do_kern_mount(const char *fstype, int flags, const char *name, void *data) +{ + struct file_system_type *type = get_fs_type(fstype); + struct vfsmount *mnt; + if (!type) + return ERR_PTR(-ENODEV); + mnt = vfs_kern_mount(type, flags, name, data); + put_filesystem(type); + return mnt; +} + EXPORT_SYMBOL_GPL(do_kern_mount); struct vfsmount *kern_mount(struct file_system_type *type) Index: linux-2.6.11/include/linux/lockd/lockd.h =================================================================== --- linux-2.6.11.orig/include/linux/lockd/lockd.h +++ linux-2.6.11/include/linux/lockd/lockd.h @@ -42,7 +42,6 @@ struct nlm_host { struct rpc_clnt * h_rpcclnt; /* RPC client to talk to peer */ char h_name[20]; /* remote hostname */ u32 h_version; /* interface version */ - rpc_authflavor_t h_authflavor; /* RPC authentication type */ unsigned short h_proto; /* transport proto */ unsigned short h_reclaiming : 1, h_server : 1, /* server side, not client side */ @@ -143,8 +142,6 @@ extern unsigned long nlmsvc_timeout; * Lockd client functions */ struct nlm_rqst * nlmclnt_alloc_call(void); -int nlmclnt_call(struct nlm_rqst *, u32); -int nlmclnt_async_call(struct nlm_rqst *, u32, rpc_action); int nlmclnt_block(struct nlm_host *, struct file_lock *, u32 *); int nlmclnt_cancel(struct nlm_host *, struct file_lock *); u32 nlmclnt_grant(struct nlm_lock *); Index: linux-2.6.11/include/linux/mount.h =================================================================== --- linux-2.6.11.orig/include/linux/mount.h +++ linux-2.6.11/include/linux/mount.h @@ -68,6 +68,11 @@ extern struct vfsmount *alloc_vfsmnt(con extern struct vfsmount *do_kern_mount(const char *fstype, int flags, const char *name, void *data); +struct file_system_type; +extern struct vfsmount *vfs_kern_mount(struct file_system_type *type, + int flags, const char *name, + void *data); + struct nameidata; extern int do_add_mount(struct vfsmount *newmnt, struct nameidata *nd, Index: linux-2.6.11/include/linux/namei.h =================================================================== --- linux-2.6.11.orig/include/linux/namei.h +++ linux-2.6.11/include/linux/namei.h @@ -39,12 +39,14 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LA * - ending slashes ok even for nonexistent files * - internal "there are more path compnents" flag * - locked when lookup done with dcache_lock held + * - dentry cache is untrusted; force a real lookup */ #define LOOKUP_FOLLOW 1 #define LOOKUP_DIRECTORY 2 #define LOOKUP_CONTINUE 4 #define LOOKUP_PARENT 16 #define LOOKUP_NOALT 32 +#define LOOKUP_REVAL 64 /* * Intent data */ Index: linux-2.6.11/include/linux/nfs4.h =================================================================== --- linux-2.6.11.orig/include/linux/nfs4.h +++ linux-2.6.11/include/linux/nfs4.h @@ -382,6 +382,9 @@ enum { NFSPROC4_CLNT_READDIR, NFSPROC4_CLNT_SERVER_CAPS, NFSPROC4_CLNT_DELEGRETURN, + NFSPROC4_CLNT_GETACL, + NFSPROC4_CLNT_SETACL, + NFSPROC4_CLNT_FS_LOCATIONS, }; #endif Index: linux-2.6.11/include/linux/nfs_fs.h =================================================================== --- linux-2.6.11.orig/include/linux/nfs_fs.h +++ linux-2.6.11/include/linux/nfs_fs.h @@ -15,9 +15,6 @@ #include #include #include -#include - -#include #include #include @@ -28,8 +25,10 @@ #include #include #include + +#include + #include -#include #include /* @@ -44,13 +43,6 @@ #define NFS_DEF_FILE_IO_BUFFER_SIZE 4096 /* - * The upper limit on timeouts for the exponential backoff algorithm. - */ -#define NFS_WRITEBACK_DELAY (5*HZ) -#define NFS_WRITEBACK_LOCKDELAY (60*HZ) -#define NFS_COMMIT_DELAY (5*HZ) - -/* * superblock magic number for NFS */ #define NFS_SUPER_MAGIC 0x6969 @@ -60,9 +52,6 @@ */ #define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS) -#define NFS_RW_SYNC 0x0001 /* O_SYNC handling */ -#define NFS_RW_SWAP 0x0002 /* This is a swap request */ - /* * When flushing a cluster of dirty pages, there can be different * strategies: @@ -183,13 +172,13 @@ struct nfs_inode { wait_queue_head_t nfs_i_wait; #ifdef CONFIG_NFS_V4 + struct nfs4_cached_acl *nfs4_acl; /* NFSv4 state */ struct list_head open_states; struct nfs_delegation *delegation; int delegation_state; struct rw_semaphore rwsem; #endif /* CONFIG_NFS_V4*/ - struct inode vfs_inode; }; @@ -306,6 +295,12 @@ extern void put_nfs_open_context(struct extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx); extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode); extern void nfs_file_clear_open_context(struct file *filp); +extern struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + struct nfs_fh *fh, + struct nfs_fattr *fattr); +extern int nfs_try_migrate_inode(struct inode *dir, struct dentry *parent); +extern int nfs_try_migrate_filehandle(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr, uint32_t generation); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern u32 root_nfs_parse_addr(char *name); /*__init*/ @@ -345,15 +340,19 @@ extern struct inode_operations nfs_dir_i extern struct file_operations nfs_dir_operations; extern struct dentry_operations nfs_dentry_operations; +extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_fattr *fattr); + /* * linux/fs/nfs/symlink.c */ extern struct inode_operations nfs_symlink_inode_operations; /* - * linux/fs/nfs/locks.c + * linux/fs/nfs/namespace.c */ -extern int nfs_lock(struct file *, int, struct file_lock *); +extern struct inode_operations nfs_mountpoint_inode_operations; +extern int nfs_mountpoint_expiry_timeout; +extern void nfs_release_automount_timer(void); /* * linux/fs/nfs/unlink.c @@ -379,11 +378,8 @@ extern void nfs_commit_done(struct rpc_t * return value!) */ extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); -extern int nfs_flush_inode(struct inode *, unsigned long, unsigned int, int); -extern int nfs_flush_list(struct list_head *, int, int); #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) extern int nfs_commit_inode(struct inode *, unsigned long, unsigned int, int); -extern int nfs_commit_list(struct list_head *, int); #else static inline int nfs_commit_inode(struct inode *inode, unsigned long idx_start, unsigned int npages, int how) @@ -424,7 +420,6 @@ static inline int nfs_wb_page(struct ino * Allocate and free nfs_write_data structures */ extern mempool_t *nfs_wdata_mempool; -extern mempool_t *nfs_commit_mempool; static inline struct nfs_write_data *nfs_writedata_alloc(void) { @@ -441,35 +436,12 @@ static inline void nfs_writedata_free(st mempool_free(p, nfs_wdata_mempool); } -extern void nfs_writedata_release(struct rpc_task *task); - -static inline struct nfs_write_data *nfs_commit_alloc(void) -{ - struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); - if (p) { - memset(p, 0, sizeof(*p)); - INIT_LIST_HEAD(&p->pages); - } - return p; -} - -static inline void nfs_commit_free(struct nfs_write_data *p) -{ - mempool_free(p, nfs_commit_mempool); -} - -/* Hack for future NFS swap support */ -#ifndef IS_SWAPFILE -# define IS_SWAPFILE(inode) (0) -#endif - /* * linux/fs/nfs/read.c */ extern int nfs_readpage(struct file *, struct page *); extern int nfs_readpages(struct file *, struct address_space *, struct list_head *, unsigned); -extern int nfs_pagein_list(struct list_head *, int); extern void nfs_readpage_result(struct rpc_task *); /* @@ -540,220 +512,6 @@ extern void * nfs_root_data(void); #define NFS_JUKEBOX_RETRY_TIME (5 * HZ) -#ifdef CONFIG_NFS_V4 - -struct idmap; - -/* - * In a seqid-mutating op, this macro controls which error return - * values trigger incrementation of the seqid. - * - * from rfc 3010: - * The client MUST monotonically increment the sequence number for the - * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE - * operations. This is true even in the event that the previous - * operation that used the sequence number received an error. The only - * exception to this rule is if the previous operation received one of - * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, - * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, - * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. - * - */ -#define seqid_mutating_err(err) \ -(((err) != NFSERR_STALE_CLIENTID) && \ - ((err) != NFSERR_STALE_STATEID) && \ - ((err) != NFSERR_BAD_STATEID) && \ - ((err) != NFSERR_BAD_SEQID) && \ - ((err) != NFSERR_BAD_XDR) && \ - ((err) != NFSERR_RESOURCE) && \ - ((err) != NFSERR_NOFILEHANDLE)) - -enum nfs4_client_state { - NFS4CLNT_OK = 0, -}; - -/* - * The nfs4_client identifies our client state to the server. - */ -struct nfs4_client { - struct list_head cl_servers; /* Global list of servers */ - struct in_addr cl_addr; /* Server identifier */ - u64 cl_clientid; /* constant */ - nfs4_verifier cl_confirm; - unsigned long cl_state; - - u32 cl_lockowner_id; - - /* - * The following rwsem ensures exclusive access to the server - * while we recover the state following a lease expiration. - */ - struct rw_semaphore cl_sem; - - struct list_head cl_delegations; - struct list_head cl_state_owners; - struct list_head cl_unused; - int cl_nunused; - spinlock_t cl_lock; - atomic_t cl_count; - - struct rpc_clnt * cl_rpcclient; - struct rpc_cred * cl_cred; - - struct list_head cl_superblocks; /* List of nfs_server structs */ - - unsigned long cl_lease_time; - unsigned long cl_last_renewal; - struct work_struct cl_renewd; - struct work_struct cl_recoverd; - - wait_queue_head_t cl_waitq; - struct rpc_wait_queue cl_rpcwaitq; - - /* idmapper */ - struct idmap * cl_idmap; - - /* Our own IP address, as a null-terminated string. - * This is used to generate the clientid, and the callback address. - */ - char cl_ipaddr[16]; -}; - -/* - * NFS4 state_owners and lock_owners are simply labels for ordered - * sequences of RPC calls. Their sole purpose is to provide once-only - * semantics by allowing the server to identify replayed requests. - * - * The ->so_sema is held during all state_owner seqid-mutating operations: - * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize - * so_seqid. - */ -struct nfs4_state_owner { - struct list_head so_list; /* per-clientid list of state_owners */ - struct nfs4_client *so_client; - u32 so_id; /* 32-bit identifier, unique */ - struct semaphore so_sema; - u32 so_seqid; /* protected by so_sema */ - atomic_t so_count; - - struct rpc_cred *so_cred; /* Associated cred */ - struct list_head so_states; - struct list_head so_delegations; -}; - -/* - * struct nfs4_state maintains the client-side state for a given - * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). - * - * OPEN: - * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, - * we need to know how many files are open for reading or writing on a - * given inode. This information too is stored here. - * - * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) - */ - -struct nfs4_lock_state { - struct list_head ls_locks; /* Other lock stateids */ - fl_owner_t ls_owner; /* POSIX lock owner */ -#define NFS_LOCK_INITIALIZED 1 - int ls_flags; - u32 ls_seqid; - u32 ls_id; - nfs4_stateid ls_stateid; - atomic_t ls_count; -}; - -/* bits for nfs4_state->flags */ -enum { - LK_STATE_IN_USE, - NFS_DELEGATED_STATE, -}; - -struct nfs4_state { - struct list_head open_states; /* List of states for the same state_owner */ - struct list_head inode_states; /* List of states for the same inode */ - struct list_head lock_states; /* List of subservient lock stateids */ - - struct nfs4_state_owner *owner; /* Pointer to the open owner */ - struct inode *inode; /* Pointer to the inode */ - - unsigned long flags; /* Do we hold any locks? */ - struct semaphore lock_sema; /* Serializes file locking operations */ - rwlock_t state_lock; /* Protects the lock_states list */ - - nfs4_stateid stateid; - - unsigned int nreaders; - unsigned int nwriters; - int state; /* State on the server (R,W, or RW) */ - atomic_t count; -}; - - -struct nfs4_exception { - long timeout; - int retry; -}; - -extern struct dentry_operations nfs4_dentry_operations; -extern struct inode_operations nfs4_dir_inode_operations; - -/* nfs4proc.c */ -extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); -extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); -extern int nfs4_open_reclaim(struct nfs4_state_owner *, struct nfs4_state *); -extern int nfs4_proc_async_renew(struct nfs4_client *); -extern int nfs4_proc_renew(struct nfs4_client *); -extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); -extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); -extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); -extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); -extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *); -extern int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request); - -/* nfs4renewd.c */ -extern void nfs4_schedule_state_renewal(struct nfs4_client *); -extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); -extern void nfs4_kill_renewd(struct nfs4_client *); - -/* nfs4state.c */ -extern void init_nfsv4_state(struct nfs_server *); -extern void destroy_nfsv4_state(struct nfs_server *); -extern struct nfs4_client *nfs4_get_client(struct in_addr *); -extern void nfs4_put_client(struct nfs4_client *clp); -extern int nfs4_init_client(struct nfs4_client *clp); -extern struct nfs4_client *nfs4_find_client(struct in_addr *); -extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); - -extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); -extern void nfs4_put_state_owner(struct nfs4_state_owner *); -extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); -extern void nfs4_put_open_state(struct nfs4_state *); -extern void nfs4_close_state(struct nfs4_state *, mode_t); -extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); -extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); -extern void nfs4_schedule_state_recovery(struct nfs4_client *); -extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); -extern struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_owner_t); -extern void nfs4_put_lock_state(struct nfs4_lock_state *state); -extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); -extern void nfs4_notify_setlk(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); -extern void nfs4_notify_unlck(struct nfs4_state *, struct file_lock *, struct nfs4_lock_state *); -extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); - - - -struct nfs4_mount_data; -#else -#define init_nfsv4_state(server) do { } while (0) -#define destroy_nfsv4_state(server) do { } while (0) -#define nfs4_put_state_owner(inode, owner) do { } while (0) -#define nfs4_put_open_state(state) do { } while (0) -#define nfs4_close_state(a, b) do { } while (0) -#define nfs4_renewd_prepare_shutdown(server) do { } while (0) -#endif - #endif /* __KERNEL__ */ /* Index: linux-2.6.11/include/linux/nfs_fs_sb.h =================================================================== --- linux-2.6.11.orig/include/linux/nfs_fs_sb.h +++ linux-2.6.11/include/linux/nfs_fs_sb.h @@ -29,6 +29,8 @@ struct nfs_server { char * hostname; /* remote hostname */ struct nfs_fh fh; struct sockaddr_in addr; + struct nfs_fsid fsid; + uint32_t generation; #ifdef CONFIG_NFS_V4 /* Our own IP address, as a null-terminated string. * This is used to generate the clientid, and the callback address. @@ -53,5 +55,6 @@ struct nfs_server { #define NFS_CAP_HARDLINKS (1U << 1) #define NFS_CAP_SYMLINKS (1U << 2) #define NFS_CAP_ACLS (1U << 3) +#define NFS_CAP_ATOMIC_OPEN (1U << 4) #endif Index: linux-2.6.11/include/linux/nfs_page.h =================================================================== --- linux-2.6.11.orig/include/linux/nfs_page.h +++ linux-2.6.11/include/linux/nfs_page.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #include Index: linux-2.6.11/include/linux/nfs_xdr.h =================================================================== --- linux-2.6.11.orig/include/linux/nfs_xdr.h +++ linux-2.6.11/include/linux/nfs_xdr.h @@ -3,11 +3,19 @@ #include -struct nfs4_fsid { - __u64 major; - __u64 minor; +struct nfs_fsid { + uint64_t major; + uint64_t minor; }; +/* + * Helper for checking equality between 2 fsids. + */ +static inline int nfs_fsid_equal(const struct nfs_fsid *a, const struct nfs_fsid *b) +{ + return a->major == b->major && a->minor == b->minor; +} + struct nfs_fattr { unsigned short valid; /* which fields are valid */ __u64 pre_size; /* pre_op_attr.size */ @@ -29,10 +37,7 @@ struct nfs_fattr { } nfs3; } du; dev_t rdev; - union { - __u64 nfs3; /* also nfs2 */ - struct nfs4_fsid nfs4; - } fsid_u; + struct nfs_fsid fsid; __u64 fileid; struct timespec atime; struct timespec mtime; @@ -326,6 +331,20 @@ struct nfs_setattrargs { const u32 * bitmask; }; +struct nfs_setaclargs { + struct nfs_fh * fh; + size_t acl_len; + unsigned int acl_pgbase; + struct page ** acl_pages; +}; + +struct nfs_getaclargs { + struct nfs_fh * fh; + size_t acl_len; + unsigned int acl_pgbase; + struct page ** acl_pages; +}; + struct nfs_setattrres { struct nfs_fattr * fattr; const struct nfs_server * server; @@ -563,6 +582,7 @@ struct nfs4_readdir_arg { u32 count; struct page ** pages; /* zero-copy data */ unsigned int pgbase; /* zero-copy data */ + const u32 * bitmask; }; struct nfs4_readdir_res { @@ -618,6 +638,30 @@ struct nfs4_server_caps_res { u32 has_symlinks; }; +struct nfs_fs_location { + unsigned int serverlen; + char * server; + unsigned int rootpathlen; + char * rootpath; +}; + +#define NFS_FS_LOCATIONS_MAXENTRIES 10 +struct nfs_fs_locations { + struct nfs_fattr fattr; + const struct nfs_server *server; + unsigned int fs_pathlen; + char * fs_path; + int nlocations; + struct nfs_fs_location locations[NFS_FS_LOCATIONS_MAXENTRIES]; +}; + +struct nfs4_fs_locations_arg { + const struct nfs_fh *dir_fh; + const struct qstr *name; + struct page *page; + const u32 *bitmask; +}; + #endif /* CONFIG_NFS_V4 */ struct nfs_page; @@ -666,6 +710,7 @@ struct nfs_rpc_ops { int version; /* Protocol version */ struct dentry_operations *dentry_ops; struct inode_operations *dir_inode_ops; + struct inode_operations *file_inode_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); @@ -681,7 +726,7 @@ struct nfs_rpc_ops { int (*read) (struct nfs_read_data *); int (*write) (struct nfs_write_data *); int (*commit) (struct nfs_write_data *); - struct inode * (*create) (struct inode *, struct dentry *, + int (*create) (struct inode *, struct dentry *, struct iattr *, int); int (*remove) (struct inode *, struct qstr *); int (*unlink_setup) (struct rpc_message *, @@ -693,13 +738,12 @@ struct nfs_rpc_ops { int (*symlink) (struct inode *, struct qstr *, struct qstr *, struct iattr *, struct nfs_fh *, struct nfs_fattr *); - int (*mkdir) (struct inode *, struct qstr *, struct iattr *, - struct nfs_fh *, struct nfs_fattr *); + int (*mkdir) (struct inode *, struct dentry *, struct iattr *); int (*rmdir) (struct inode *, struct qstr *); int (*readdir) (struct dentry *, struct rpc_cred *, u64, struct page *, unsigned int, int); - int (*mknod) (struct inode *, struct qstr *, struct iattr *, - dev_t, struct nfs_fh *, struct nfs_fattr *); + int (*mknod) (struct inode *, struct dentry *, struct iattr *, + dev_t); int (*statfs) (struct nfs_server *, struct nfs_fh *, struct nfs_fsstat *); int (*fsinfo) (struct nfs_server *, struct nfs_fh *, @@ -731,7 +775,5 @@ extern struct nfs_rpc_ops nfs_v4_cliento extern struct rpc_version nfs_version2; extern struct rpc_version nfs_version3; extern struct rpc_version nfs_version4; -extern struct rpc_program nfs_program; -extern struct rpc_stat nfs_rpcstat; #endif Index: linux-2.6.11/include/linux/sunrpc/auth.h =================================================================== --- linux-2.6.11.orig/include/linux/sunrpc/auth.h +++ linux-2.6.11/include/linux/sunrpc/auth.h @@ -35,8 +35,7 @@ struct auth_cred { * Client user credentials */ struct rpc_cred { - struct list_head cr_hash; /* hash chain */ - struct rpc_auth * cr_auth; + struct hlist_node cr_hash; /* hash chain */ struct rpc_credops * cr_ops; unsigned long cr_expire; /* when to gc */ atomic_t cr_count; /* ref count */ @@ -59,10 +58,13 @@ struct rpc_cred { */ #define RPC_CREDCACHE_NR 8 #define RPC_CREDCACHE_MASK (RPC_CREDCACHE_NR - 1) +struct rpc_cred_cache { + struct hlist_head hashtable[RPC_CREDCACHE_NR]; + unsigned long nextgc; /* next garbage collection */ + unsigned long expire; /* cache expiry interval */ +}; + struct rpc_auth { - struct list_head au_credcache[RPC_CREDCACHE_NR]; - unsigned long au_expire; /* cache expiry interval */ - unsigned long au_nextgc; /* next garbage collection */ unsigned int au_cslack; /* call cred size estimate */ unsigned int au_rslack; /* reply verf size guess */ unsigned int au_flags; /* various flags */ @@ -73,6 +75,7 @@ struct rpc_auth { * case) */ atomic_t au_count; /* Reference counter */ + struct rpc_cred_cache * au_credcache; /* per-flavor data */ }; #define RPC_AUTH_PROC_CREDS 0x0010 /* process creds (including @@ -91,14 +94,16 @@ struct rpc_authops { struct rpc_auth * (*create)(struct rpc_clnt *, rpc_authflavor_t); void (*destroy)(struct rpc_auth *); + struct rpc_cred * (*lookup_cred)(struct rpc_auth *, struct auth_cred *, int); struct rpc_cred * (*crcreate)(struct rpc_auth*, struct auth_cred *, int); }; struct rpc_credops { + const char * cr_name; /* Name of the auth flavour */ void (*crdestroy)(struct rpc_cred *); int (*crmatch)(struct auth_cred *, struct rpc_cred *, int); - u32 * (*crmarshal)(struct rpc_task *, u32 *, int); + u32 * (*crmarshal)(struct rpc_task *, u32 *); int (*crrefresh)(struct rpc_task *); u32 * (*crvalidate)(struct rpc_task *, u32 *); int (*crwrap_req)(struct rpc_task *, kxdrproc_t, @@ -130,7 +135,7 @@ int rpcauth_unwrap_resp(struct rpc_tas int rpcauth_refreshcred(struct rpc_task *); void rpcauth_invalcred(struct rpc_task *); int rpcauth_uptodatecred(struct rpc_task *); -void rpcauth_init_credcache(struct rpc_auth *); +int rpcauth_init_credcache(struct rpc_auth *, unsigned long); void rpcauth_free_credcache(struct rpc_auth *); static inline Index: linux-2.6.11/include/linux/sunrpc/auth_gss.h =================================================================== --- linux-2.6.11.orig/include/linux/sunrpc/auth_gss.h +++ linux-2.6.11/include/linux/sunrpc/auth_gss.h @@ -68,18 +68,24 @@ struct rpc_gss_init_res { struct gss_cl_ctx { atomic_t count; - u32 gc_proc; + enum rpc_gss_proc gc_proc; u32 gc_seq; spinlock_t gc_seq_lock; struct gss_ctx *gc_gss_ctx; struct xdr_netobj gc_wire_ctx; u32 gc_win; + unsigned long gc_expiry; + char gc_principal[0]; }; +struct gss_upcall_msg; +struct key; struct gss_cred { struct rpc_cred gc_base; - u32 gc_flavor; + enum rpc_gss_svc gc_service; struct gss_cl_ctx *gc_ctx; + struct gss_upcall_msg *gc_upcall; + struct key *gc_key; }; #define gc_uid gc_base.cr_uid Index: linux-2.6.11/include/linux/sunrpc/clnt.h =================================================================== --- linux-2.6.11.orig/include/linux/sunrpc/clnt.h +++ linux-2.6.11/include/linux/sunrpc/clnt.h @@ -9,12 +9,9 @@ #ifndef _LINUX_SUNRPC_CLNT_H #define _LINUX_SUNRPC_CLNT_H -#include -#include -#include #include +#include #include -#include #include #include @@ -22,6 +19,7 @@ * This defines an RPC port mapping */ struct rpc_portmap { + struct rpc_portmap *pm_parent; __u32 pm_prog; __u32 pm_vers; __u32 pm_prot; @@ -51,7 +49,6 @@ struct rpc_clnt { cl_intr : 1,/* interruptible */ cl_chatty : 1,/* be verbose */ cl_autobind : 1,/* use getport() */ - cl_droppriv : 1,/* enable NFS suid hack */ cl_oneshot : 1,/* dispose after use */ cl_dead : 1;/* abandoned */ @@ -67,7 +64,6 @@ struct rpc_clnt { struct rpc_portmap cl_pmap_default; char cl_inline_name[32]; }; -#define cl_timeout cl_xprt->timeout #define cl_prog cl_pmap->pm_prog #define cl_vers cl_pmap->pm_vers #define cl_port cl_pmap->pm_port @@ -104,7 +100,6 @@ struct rpc_procinfo { unsigned int p_timer; /* Which RTT timer to use */ }; -#define RPC_CONGESTED(clnt) (RPCXPRT_CONGESTED((clnt)->cl_xprt)) #define RPC_PEERADDR(clnt) (&(clnt)->cl_xprt->addr) #ifdef __KERNEL__ @@ -116,6 +111,8 @@ struct rpc_clnt *rpc_clone_client(struct int rpc_shutdown_client(struct rpc_clnt *); int rpc_destroy_client(struct rpc_clnt *); void rpc_release_client(struct rpc_clnt *); +void rpc_change_program(struct rpc_clnt *, struct rpc_program *, + int); void rpc_getport(struct rpc_task *, struct rpc_clnt *); int rpc_register(u32, u32, int, unsigned short, int *); @@ -129,6 +126,11 @@ void rpc_restart_call(struct rpc_task * void rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset); void rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int); +size_t rpc_max_payload(struct rpc_clnt *); + +struct rpc_xprt *rpc_client_get_xprt(struct rpc_clnt *clnt); +void rpc_client_set_xprt(struct rpc_clnt *, struct rpc_xprt *); +void rpc_put_xprt(struct rpc_xprt *xprt); static __inline__ int rpc_call(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) Index: linux-2.6.11/include/linux/sunrpc/gss_api.h =================================================================== --- linux-2.6.11.orig/include/linux/sunrpc/gss_api.h +++ linux-2.6.11/include/linux/sunrpc/gss_api.h @@ -33,8 +33,9 @@ struct gss_ctx { /* gss-api prototypes; note that these are somewhat simplified versions of * the prototypes specified in RFC 2744. */ -u32 gss_import_sec_context( - struct xdr_netobj *input_token, +int gss_import_sec_context( + const void* input_token, + size_t bufsize, struct gss_api_mech *mech, struct gss_ctx **ctx_id); u32 gss_get_mic( @@ -50,8 +51,6 @@ u32 gss_verify_mic( u32 gss_delete_sec_context( struct gss_ctx **ctx_id); -struct gss_api_mech * gss_mech_get_by_name(char *name); -struct gss_api_mech * gss_mech_get_by_pseudoflavor(u32 pseudoflavor); u32 gss_pseudoflavor_to_service(struct gss_api_mech *, u32 pseudoflavor); char *gss_service_to_auth_domain_name(struct gss_api_mech *, u32 service); @@ -80,8 +79,9 @@ struct gss_api_mech { /* and must provide the following operations: */ struct gss_api_ops { - u32 (*gss_import_sec_context)( - struct xdr_netobj *input_token, + int (*gss_import_sec_context)( + const void *input_token, + size_t bufsize, struct gss_ctx *ctx_id); u32 (*gss_get_mic)( struct gss_ctx *ctx_id, @@ -105,7 +105,7 @@ void gss_mech_unregister(struct gss_api_ struct gss_api_mech * gss_mech_get_by_OID(struct xdr_netobj *); /* Returns a reference to a mechanism, given a name like "krb5" etc. */ -struct gss_api_mech *gss_mech_get_by_name(char *); +struct gss_api_mech *gss_mech_get_by_name(const char *); /* Similar, but get by pseudoflavor. */ struct gss_api_mech *gss_mech_get_by_pseudoflavor(u32); Index: linux-2.6.11/include/linux/sunrpc/sched.h =================================================================== --- linux-2.6.11.orig/include/linux/sunrpc/sched.h +++ linux-2.6.11/include/linux/sunrpc/sched.h @@ -44,6 +44,7 @@ struct rpc_task { #endif struct list_head tk_task; /* global list of tasks */ struct rpc_clnt * tk_client; /* RPC client */ + struct rpc_xprt * tk_xprt; /* RPC request */ struct rpc_rqst * tk_rqstp; /* RPC request */ int tk_status; /* result of last operation */ @@ -53,9 +54,8 @@ struct rpc_task { struct rpc_message tk_msg; /* RPC call info */ __u32 * tk_buffer; /* XDR buffer */ size_t tk_bufsize; - __u8 tk_garb_retry, - tk_cred_retry, - tk_suid_retry; + __u8 tk_garb_retry; + __u8 tk_cred_retry; unsigned long tk_cookie; /* Cookie for batching tasks */ @@ -95,7 +95,6 @@ struct rpc_task { #endif }; #define tk_auth tk_client->cl_auth -#define tk_xprt tk_client->cl_xprt /* support walking a list of tasks on a wait queue */ #define task_for_each(task, pos, head) \ @@ -118,9 +117,7 @@ typedef void (*rpc_action)(struct rpc_ */ #define RPC_TASK_ASYNC 0x0001 /* is an async task */ #define RPC_TASK_SWAPPER 0x0002 /* is swapping in/out */ -#define RPC_TASK_SETUID 0x0004 /* is setuid process */ #define RPC_TASK_CHILD 0x0008 /* is child of other task */ -#define RPC_CALL_REALUID 0x0010 /* try using real uid */ #define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */ #define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */ #define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */ @@ -129,7 +126,6 @@ typedef void (*rpc_action)(struct rpc_ #define RPC_TASK_NOINTR 0x0400 /* uninterruptible task */ #define RPC_IS_ASYNC(t) ((t)->tk_flags & RPC_TASK_ASYNC) -#define RPC_IS_SETUID(t) ((t)->tk_flags & RPC_TASK_SETUID) #define RPC_IS_CHILD(t) ((t)->tk_flags & RPC_TASK_CHILD) #define RPC_IS_SWAPPER(t) ((t)->tk_flags & RPC_TASK_SWAPPER) #define RPC_DO_ROOTOVERRIDE(t) ((t)->tk_flags & RPC_TASK_ROOTCREDS) Index: linux-2.6.11/include/linux/sunrpc/xdr.h =================================================================== --- linux-2.6.11.orig/include/linux/sunrpc/xdr.h +++ linux-2.6.11/include/linux/sunrpc/xdr.h @@ -146,7 +146,8 @@ extern void xdr_shift_buf(struct xdr_buf extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int); extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int); -extern int read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len); +extern int read_bytes_from_xdr_buf(struct xdr_buf *, int, void *, int); +extern int write_bytes_to_xdr_buf(struct xdr_buf *, int, void *, int); /* * Helper structure for copying from an sk_buff. @@ -160,7 +161,7 @@ typedef struct { typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len); -extern void xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, +extern int xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int, skb_reader_t *, skb_read_actor_t); struct socket; @@ -168,6 +169,22 @@ struct sockaddr; extern int xdr_sendpages(struct socket *, struct sockaddr *, int, struct xdr_buf *, unsigned int, int); +extern int xdr_encode_word(struct xdr_buf *, int, u32); +extern int xdr_decode_word(struct xdr_buf *, int, u32 *); + +struct xdr_array2_desc; +typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem); +struct xdr_array2_desc { + unsigned int elem_size; + unsigned int array_len; + xdr_xcode_elem_t xcode; +}; + +extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc); +extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc); + /* * Provide some simple tools for XDR buffer overflow-checking etc. */ @@ -186,6 +203,7 @@ extern void xdr_write_pages(struct xdr_s extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p); extern uint32_t *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); +extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); #endif /* __KERNEL__ */ Index: linux-2.6.11/include/linux/sunrpc/xprt.h =================================================================== --- linux-2.6.11.orig/include/linux/sunrpc/xprt.h +++ linux-2.6.11/include/linux/sunrpc/xprt.h @@ -9,7 +9,6 @@ #ifndef _LINUX_SUNRPC_XPRT_H #define _LINUX_SUNRPC_XPRT_H -#include #include #include #include @@ -127,6 +126,7 @@ struct rpc_rqst { #define XPRT_COPY_DATA (1 << 3) struct rpc_xprt { + atomic_t count; /* Reference counter */ struct socket * sock; /* BSD socket layer */ struct sock * inet; /* INET layer */ @@ -140,6 +140,9 @@ struct rpc_xprt { unsigned int rcvsize, /* socket receive buffer size */ sndsize; /* socket send buffer size */ + size_t max_payload; /* largest RPC payload size, + in bytes */ + struct rpc_wait_queue sending; /* requests waiting to send */ struct rpc_wait_queue resend; /* requests waiting to resend */ struct rpc_wait_queue pending; /* requests in flight */ Index: linux-2.6.11/net/sunrpc/auth.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/auth.c +++ linux-2.6.11/net/sunrpc/auth.c @@ -67,12 +67,9 @@ rpcauth_create(rpc_authflavor_t pseudofl if (flavor >= RPC_AUTH_MAXFLAVOR || !(ops = auth_flavors[flavor])) return NULL; - if (!try_module_get(ops->owner)) - return NULL; auth = ops->create(clnt, pseudoflavor); if (!auth) return NULL; - atomic_set(&auth->au_count, 1); if (clnt->cl_auth) rpcauth_destroy(clnt->cl_auth); clnt->cl_auth = auth; @@ -85,8 +82,6 @@ rpcauth_destroy(struct rpc_auth *auth) if (!atomic_dec_and_test(&auth->au_count)) return; auth->au_ops->destroy(auth); - module_put(auth->au_ops->owner); - kfree(auth); } static DEFINE_SPINLOCK(rpc_credcache_lock); @@ -94,42 +89,35 @@ static DEFINE_SPINLOCK(rpc_credcache_loc /* * Initialize RPC credential cache */ -void -rpcauth_init_credcache(struct rpc_auth *auth) +int +rpcauth_init_credcache(struct rpc_auth *auth, unsigned long expire) { + struct rpc_cred_cache *new; int i; - for (i = 0; i < RPC_CREDCACHE_NR; i++) - INIT_LIST_HEAD(&auth->au_credcache[i]); - auth->au_nextgc = jiffies + (auth->au_expire >> 1); -} -/* - * Destroy an unreferenced credential - */ -static inline void -rpcauth_crdestroy(struct rpc_cred *cred) -{ -#ifdef RPC_DEBUG - BUG_ON(cred->cr_magic != RPCAUTH_CRED_MAGIC || - atomic_read(&cred->cr_count) || - !list_empty(&cred->cr_hash)); - cred->cr_magic = 0; -#endif - cred->cr_ops->crdestroy(cred); + new = (struct rpc_cred_cache *)kmalloc(sizeof(*new), GFP_KERNEL); + if (!new) + return -ENOMEM; + for (i = 0; i < RPC_CREDCACHE_NR; i++) + INIT_HLIST_HEAD(&new->hashtable[i]); + new->expire = expire; + new->nextgc = jiffies + (expire >> 1); + auth->au_credcache = new; + return 0; } /* * Destroy a list of credentials */ static inline -void rpcauth_destroy_credlist(struct list_head *head) +void rpcauth_destroy_credlist(struct hlist_head *head) { struct rpc_cred *cred; - while (!list_empty(head)) { - cred = list_entry(head->next, struct rpc_cred, cr_hash); - list_del_init(&cred->cr_hash); - rpcauth_crdestroy(cred); + while (!hlist_empty(head)) { + cred = hlist_entry(head->first, struct rpc_cred, cr_hash); + hlist_del_init(&cred->cr_hash); + put_rpccred(cred); } } @@ -140,56 +128,56 @@ void rpcauth_destroy_credlist(struct lis void rpcauth_free_credcache(struct rpc_auth *auth) { - LIST_HEAD(free); - struct list_head *pos, *next; + struct rpc_cred_cache *cache = auth->au_credcache; + HLIST_HEAD(free); + struct hlist_node *pos, *next; struct rpc_cred *cred; int i; spin_lock(&rpc_credcache_lock); for (i = 0; i < RPC_CREDCACHE_NR; i++) { - list_for_each_safe(pos, next, &auth->au_credcache[i]) { - cred = list_entry(pos, struct rpc_cred, cr_hash); - cred->cr_auth = NULL; - list_del_init(&cred->cr_hash); - if (atomic_read(&cred->cr_count) == 0) - list_add(&cred->cr_hash, &free); + hlist_for_each_safe(pos, next, &cache->hashtable[i]) { + cred = hlist_entry(pos, struct rpc_cred, cr_hash); + __hlist_del(&cred->cr_hash); + hlist_add_head(&cred->cr_hash, &free); } } spin_unlock(&rpc_credcache_lock); rpcauth_destroy_credlist(&free); } -static inline int -rpcauth_prune_expired(struct rpc_cred *cred, struct list_head *free) +static void +rpcauth_prune_expired(struct rpc_auth *auth, struct rpc_cred *cred, struct hlist_head *free) { - if (atomic_read(&cred->cr_count) != 0) - return 0; - if (time_before(jiffies, cred->cr_expire)) - return 0; - cred->cr_auth = NULL; - list_del(&cred->cr_hash); - list_add(&cred->cr_hash, free); - return 1; + if (atomic_read(&cred->cr_count) != 1) + return; + if (time_after(jiffies, cred->cr_expire + auth->au_credcache->expire)) + cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; + if (!(cred->cr_flags & RPCAUTH_CRED_UPTODATE)) { + __hlist_del(&cred->cr_hash); + hlist_add_head(&cred->cr_hash, free); + } } /* * Remove stale credentials. Avoid sleeping inside the loop. */ static void -rpcauth_gc_credcache(struct rpc_auth *auth, struct list_head *free) +rpcauth_gc_credcache(struct rpc_auth *auth, struct hlist_head *free) { - struct list_head *pos, *next; + struct rpc_cred_cache *cache = auth->au_credcache; + struct hlist_node *pos, *next; struct rpc_cred *cred; int i; dprintk("RPC: gc'ing RPC credentials for auth %p\n", auth); for (i = 0; i < RPC_CREDCACHE_NR; i++) { - list_for_each_safe(pos, next, &auth->au_credcache[i]) { - cred = list_entry(pos, struct rpc_cred, cr_hash); - rpcauth_prune_expired(cred, free); + hlist_for_each_safe(pos, next, &cache->hashtable[i]) { + cred = hlist_entry(pos, struct rpc_cred, cr_hash); + rpcauth_prune_expired(auth, cred, free); } } - auth->au_nextgc = jiffies + auth->au_expire; + cache->nextgc = jiffies + cache->expire; } /* @@ -199,8 +187,9 @@ struct rpc_cred * rpcauth_lookup_credcache(struct rpc_auth *auth, struct auth_cred * acred, int taskflags) { - LIST_HEAD(free); - struct list_head *pos, *next; + struct rpc_cred_cache *cache = auth->au_credcache; + HLIST_HEAD(free); + struct hlist_node *pos, *next; struct rpc_cred *new = NULL, *cred = NULL; int nr = 0; @@ -209,28 +198,26 @@ rpcauth_lookup_credcache(struct rpc_auth nr = acred->uid & RPC_CREDCACHE_MASK; retry: spin_lock(&rpc_credcache_lock); - if (time_before(auth->au_nextgc, jiffies)) + if (time_before(cache->nextgc, jiffies)) rpcauth_gc_credcache(auth, &free); - list_for_each_safe(pos, next, &auth->au_credcache[nr]) { + hlist_for_each_safe(pos, next, &cache->hashtable[nr]) { struct rpc_cred *entry; - entry = list_entry(pos, struct rpc_cred, cr_hash); - if (rpcauth_prune_expired(entry, &free)) - continue; + entry = hlist_entry(pos, struct rpc_cred, cr_hash); if (entry->cr_ops->crmatch(acred, entry, taskflags)) { - list_del(&entry->cr_hash); + hlist_del(&entry->cr_hash); cred = entry; break; } + rpcauth_prune_expired(auth, entry, &free); } if (new) { if (cred) - list_add(&new->cr_hash, &free); + hlist_add_head(&new->cr_hash, &free); else cred = new; } if (cred) { - list_add(&cred->cr_hash, &auth->au_credcache[nr]); - cred->cr_auth = auth; + hlist_add_head(&cred->cr_hash, &cache->hashtable[nr]); get_rpccred(cred); } spin_unlock(&rpc_credcache_lock); @@ -239,12 +226,13 @@ retry: if (!cred) { new = auth->au_ops->crcreate(auth, acred, taskflags); - if (new) { + if (!IS_ERR(new)) { #ifdef RPC_DEBUG new->cr_magic = RPCAUTH_CRED_MAGIC; #endif goto retry; - } + } else + cred = new; } return (struct rpc_cred *) cred; @@ -253,18 +241,18 @@ retry: struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *auth, int taskflags) { - struct auth_cred acred; + struct auth_cred acred = { + .uid = current->fsuid, + .gid = current->fsgid, + .group_info = current->group_info, + }; struct rpc_cred *ret; - get_group_info(current->group_info); - acred.uid = current->fsuid; - acred.gid = current->fsgid; - acred.group_info = current->group_info; - dprintk("RPC: looking up %s cred\n", auth->au_ops->au_name); - ret = rpcauth_lookup_credcache(auth, &acred, taskflags); - put_group_info(current->group_info); + get_group_info(acred.group_info); + ret = auth->au_ops->lookup_cred(auth, &acred, taskflags); + put_group_info(acred.group_info); return ret; } @@ -272,21 +260,22 @@ struct rpc_cred * rpcauth_bindcred(struct rpc_task *task) { struct rpc_auth *auth = task->tk_auth; - struct auth_cred acred; + struct auth_cred acred = { + .uid = current->fsuid, + .gid = current->fsgid, + .group_info = current->group_info, + }; struct rpc_cred *ret; - get_group_info(current->group_info); - acred.uid = current->fsuid; - acred.gid = current->fsgid; - acred.group_info = current->group_info; - dprintk("RPC: %4d looking up %s cred\n", task->tk_pid, task->tk_auth->au_ops->au_name); - task->tk_msg.rpc_cred = rpcauth_lookup_credcache(auth, &acred, task->tk_flags); - if (task->tk_msg.rpc_cred == 0) - task->tk_status = -ENOMEM; - ret = task->tk_msg.rpc_cred; - put_group_info(current->group_info); + get_group_info(acred.group_info); + ret = auth->au_ops->lookup_cred(auth, &acred, task->tk_flags); + if (!IS_ERR(ret)) + task->tk_msg.rpc_cred = ret; + else + task->tk_status = PTR_ERR(ret); + put_group_info(acred.group_info); return ret; } @@ -302,16 +291,10 @@ rpcauth_holdcred(struct rpc_task *task) void put_rpccred(struct rpc_cred *cred) { - if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock)) + cred->cr_expire = jiffies; + if (!atomic_dec_and_test(&cred->cr_count)) return; - - if (list_empty(&cred->cr_hash)) { - spin_unlock(&rpc_credcache_lock); - rpcauth_crdestroy(cred); - return; - } - cred->cr_expire = jiffies + cred->cr_auth->au_expire; - spin_unlock(&rpc_credcache_lock); + cred->cr_ops->crdestroy(cred); } void @@ -335,8 +318,7 @@ rpcauth_marshcred(struct rpc_task *task, dprintk("RPC: %4d marshaling %s cred %p\n", task->tk_pid, auth->au_ops->au_name, cred); - return cred->cr_ops->crmarshal(task, p, - task->tk_flags & RPC_CALL_REALUID); + return cred->cr_ops->crmarshal(task, p); } u32 * @@ -357,7 +339,7 @@ rpcauth_wrap_req(struct rpc_task *task, struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %4d using %s cred %p to wrap rpc data\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_ops->cr_name, cred); if (cred->cr_ops->crwrap_req) return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj); /* By default, we encode the arguments normally. */ @@ -371,7 +353,7 @@ rpcauth_unwrap_resp(struct rpc_task *tas struct rpc_cred *cred = task->tk_msg.rpc_cred; dprintk("RPC: %4d using %s cred %p to unwrap rpc data\n", - task->tk_pid, cred->cr_auth->au_ops->au_name, cred); + task->tk_pid, cred->cr_ops->cr_name, cred); if (cred->cr_ops->crunwrap_resp) return cred->cr_ops->crunwrap_resp(task, decode, rqstp, data, obj); @@ -384,11 +366,14 @@ rpcauth_refreshcred(struct rpc_task *tas { struct rpc_auth *auth = task->tk_auth; struct rpc_cred *cred = task->tk_msg.rpc_cred; + int err; dprintk("RPC: %4d refreshing %s cred %p\n", task->tk_pid, auth->au_ops->au_name, cred); - task->tk_status = cred->cr_ops->crrefresh(task); - return task->tk_status; + err = cred->cr_ops->crrefresh(task); + if (err < 0) + task->tk_status = err; + return err; } void Index: linux-2.6.11/net/sunrpc/auth_gss/auth_gss.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/auth_gss/auth_gss.c +++ linux-2.6.11/net/sunrpc/auth_gss/auth_gss.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -55,6 +56,18 @@ #include #include +#ifdef CONFIG_RPCSEC_GSS_KEYRING + +#include + +#else + +#define gss_key_lookup_cred(auth) ERR_PTR(-ENOKEY) +#define gss_register_keytype() (0) +#define gss_unregister_keytype() do { } while(0) + +#endif + static struct rpc_authops authgss_ops; static struct rpc_credops gss_credops; @@ -83,12 +96,15 @@ static struct rpc_credops gss_credops; static DEFINE_RWLOCK(gss_ctx_lock); struct gss_auth { + spinlock_t lock; struct rpc_auth rpc_auth; struct gss_api_mech *mech; + enum rpc_gss_svc service; struct list_head upcalls; + struct rpc_clnt *client; struct dentry *dentry; char path[48]; - spinlock_t lock; + char key_name[256]; }; static void gss_destroy_ctx(struct gss_cl_ctx *); @@ -175,42 +191,34 @@ gss_cred_is_uptodate_ctx(struct rpc_cred return res; } -static inline int -simple_get_bytes(char **ptr, const char *end, void *res, int len) +static const void * +simple_get_bytes(const void *p, const void *end, void *res, size_t len) { - char *p, *q; - p = *ptr; - q = p + len; - if (q > end || q < p) - return -1; + const void *q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); memcpy(res, p, len); - *ptr = q; - return 0; + return q; } -static inline int -simple_get_netobj(char **ptr, const char *end, struct xdr_netobj *res) +static inline const void * +simple_get_netobj(const void *p, const void *end, struct xdr_netobj *dest) { - char *p, *q; - p = *ptr; - if (simple_get_bytes(&p, end, &res->len, sizeof(res->len))) - return -1; - q = p + res->len; - if (q > end || q < p) - return -1; - res->data = p; - *ptr = q; - return 0; -} + const void *q; + unsigned int len; -static int -dup_netobj(struct xdr_netobj *source, struct xdr_netobj *dest) -{ - dest->len = source->len; - if (!(dest->data = kmalloc(dest->len, GFP_KERNEL))) - return -1; - memcpy(dest->data, source->data, dest->len); - return 0; + p = simple_get_bytes(p, end, &len, sizeof(len)); + if (IS_ERR(p)) + return p; + q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); + dest->data = kmalloc(len, GFP_KERNEL); + if (unlikely(dest->data == NULL)) + return ERR_PTR(-ENOMEM); + dest->len = len; + memcpy(dest->data, p, len); + return q; } static struct gss_cl_ctx * @@ -226,74 +234,86 @@ gss_cred_get_ctx(struct rpc_cred *cred) return ctx; } -static int -gss_parse_init_downcall(struct gss_api_mech *gm, struct xdr_netobj *buf, - struct gss_cl_ctx **gc, uid_t *uid, int *gss_err) +static struct gss_cl_ctx * +gss_alloc_context(const char *principal) { - char *end = buf->data + buf->len; - char *p = buf->data; struct gss_cl_ctx *ctx; - struct xdr_netobj tmp_buf; - unsigned int timeout; - int err = -EIO; + size_t len = strlen(principal) + 1; - if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL))) { - err = -ENOMEM; - goto err; + ctx = kmalloc(sizeof(*ctx) + len, GFP_KERNEL); + if (ctx != NULL) { + memset(ctx, 0, sizeof(*ctx)); + ctx->gc_proc = RPC_GSS_PROC_DATA; + ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ + spin_lock_init(&ctx->gc_seq_lock); + atomic_set(&ctx->count,1); + memcpy(ctx->gc_principal, principal, len); } - ctx->gc_proc = RPC_GSS_PROC_DATA; - ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ - spin_lock_init(&ctx->gc_seq_lock); - atomic_set(&ctx->count,1); + return ctx; +} - if (simple_get_bytes(&p, end, uid, sizeof(*uid))) - goto err_free_ctx; - /* FIXME: discarded timeout for now */ - if (simple_get_bytes(&p, end, &timeout, sizeof(timeout))) - goto err_free_ctx; - *gss_err = 0; - if (simple_get_bytes(&p, end, &ctx->gc_win, sizeof(ctx->gc_win))) - goto err_free_ctx; +#define GSSD_MIN_TIMEOUT (60 * 60) +static const void * +gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct gss_api_mech *gm) +{ + const void *q; + unsigned int seclen; + unsigned int timeout; + u32 window_size; + int ret; + + /* First unsigned int gives the lifetime (in seconds) of the cred */ + p = simple_get_bytes(p, end, &timeout, sizeof(timeout)); + if (IS_ERR(p)) + goto err; + if (timeout == 0) + timeout = GSSD_MIN_TIMEOUT; + ctx->gc_expiry = jiffies + (unsigned long)timeout * HZ * 3 / 4; + /* Sequence number window. Determines the maximum number of simultaneous requests */ + p = simple_get_bytes(p, end, &window_size, sizeof(window_size)); + if (IS_ERR(p)) + goto err; + ctx->gc_win = window_size; /* gssd signals an error by passing ctx->gc_win = 0: */ - if (!ctx->gc_win) { - /* in which case the next int is an error code: */ - if (simple_get_bytes(&p, end, gss_err, sizeof(*gss_err))) - goto err_free_ctx; - err = 0; - goto err_free_ctx; + if (ctx->gc_win == 0) { + /* in which case, p points to an error code which we ignore */ + p = ERR_PTR(-EACCES); + goto err; } - if (simple_get_netobj(&p, end, &tmp_buf)) - goto err_free_ctx; - if (dup_netobj(&tmp_buf, &ctx->gc_wire_ctx)) { - err = -ENOMEM; - goto err_free_ctx; + /* copy the opaque wire context */ + p = simple_get_netobj(p, end, &ctx->gc_wire_ctx); + if (IS_ERR(p)) + goto err; + /* import the opaque security context */ + p = simple_get_bytes(p, end, &seclen, sizeof(seclen)); + if (IS_ERR(p)) + goto err; + q = (const void *)((const char *)p + seclen); + if (unlikely(q > end || q < p)) { + p = ERR_PTR(-EFAULT); + goto err; } - if (simple_get_netobj(&p, end, &tmp_buf)) - goto err_free_wire_ctx; - if (p != end) - goto err_free_wire_ctx; - if (gss_import_sec_context(&tmp_buf, gm, &ctx->gc_gss_ctx)) - goto err_free_wire_ctx; - *gc = ctx; - return 0; -err_free_wire_ctx: - kfree(ctx->gc_wire_ctx.data); -err_free_ctx: - kfree(ctx); + ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx); + if (ret < 0) { + p = ERR_PTR(ret); + goto err; + } + return q; err: - *gc = NULL; - dprintk("RPC: gss_parse_init_downcall returning %d\n", err); - return err; + dprintk("RPC: gss_fill_context returning %ld\n", -PTR_ERR(p)); + return p; } struct gss_upcall_msg { + atomic_t count; + uid_t uid; struct rpc_pipe_msg msg; struct list_head list; struct gss_auth *auth; - struct rpc_wait_queue waitq; - uid_t uid; - atomic_t count; + struct rpc_wait_queue rpc_waitqueue; + wait_queue_head_t waitqueue; + struct gss_cl_ctx *ctx; }; static void @@ -302,6 +322,8 @@ gss_release_msg(struct gss_upcall_msg *g if (!atomic_dec_and_test(&gss_msg->count)) return; BUG_ON(!list_empty(&gss_msg->list)); + if (gss_msg->ctx != NULL) + gss_put_ctx(gss_msg->ctx); kfree(gss_msg); } @@ -320,16 +342,34 @@ __gss_find_upcall(struct gss_auth *gss_a return NULL; } +/* Try to add a upcall to the pipefs queue. + * If an upcall owned by our uid already exists, then we return a reference + * to that upcall instead of adding the new upcall. + */ +static inline struct gss_upcall_msg * +gss_add_msg(struct gss_auth *gss_auth, struct gss_upcall_msg *gss_msg) +{ + struct gss_upcall_msg *old; + + spin_lock(&gss_auth->lock); + old = __gss_find_upcall(gss_auth, gss_msg->uid); + if (old == NULL) { + atomic_inc(&gss_msg->count); + list_add(&gss_msg->list, &gss_auth->upcalls); + } else + gss_msg = old; + spin_unlock(&gss_auth->lock); + return gss_msg; +} + static void __gss_unhash_msg(struct gss_upcall_msg *gss_msg) { if (list_empty(&gss_msg->list)) return; list_del_init(&gss_msg->list); - if (gss_msg->msg.errno < 0) - rpc_wake_up_status(&gss_msg->waitq, gss_msg->msg.errno); - else - rpc_wake_up(&gss_msg->waitq); + rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); + wake_up_all(&gss_msg->waitqueue); atomic_dec(&gss_msg->count); } @@ -343,76 +383,139 @@ gss_unhash_msg(struct gss_upcall_msg *gs spin_unlock(&gss_auth->lock); } -static int -gss_upcall(struct rpc_clnt *clnt, struct rpc_task *task, struct rpc_cred *cred) +static void +gss_upcall_callback(struct rpc_task *task) { - struct gss_auth *gss_auth = container_of(clnt->cl_auth, - struct gss_auth, rpc_auth); - struct gss_upcall_msg *gss_msg, *gss_new = NULL; - struct rpc_pipe_msg *msg; - struct dentry *dentry = gss_auth->dentry; - uid_t uid = cred->cr_uid; - int res = 0; + struct gss_cred *gss_cred = container_of(task->tk_msg.rpc_cred, + struct gss_cred, gc_base); + struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; + + BUG_ON(gss_msg == NULL); + if (gss_msg->ctx) + gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_get_ctx(gss_msg->ctx)); + else + task->tk_status = gss_msg->msg.errno; + spin_lock(&gss_msg->auth->lock); + gss_cred->gc_upcall = NULL; + rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); + spin_unlock(&gss_msg->auth->lock); + gss_release_msg(gss_msg); +} - dprintk("RPC: %4u gss_upcall for uid %u\n", task->tk_pid, uid); +static inline struct gss_upcall_msg * +gss_alloc_msg(struct gss_auth *gss_auth, uid_t uid) +{ + struct gss_upcall_msg *gss_msg; -retry: - spin_lock(&gss_auth->lock); - gss_msg = __gss_find_upcall(gss_auth, uid); - if (gss_msg) - goto out_sleep; - if (gss_new == NULL) { - spin_unlock(&gss_auth->lock); - gss_new = kmalloc(sizeof(*gss_new), GFP_KERNEL); - if (!gss_new) { - dprintk("RPC: %4u gss_upcall -ENOMEM\n", task->tk_pid); - return -ENOMEM; + gss_msg = kmalloc(sizeof(*gss_msg), GFP_KERNEL); + if (gss_msg != NULL) { + memset(gss_msg, 0, sizeof(*gss_msg)); + INIT_LIST_HEAD(&gss_msg->list); + rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); + init_waitqueue_head(&gss_msg->waitqueue); + atomic_set(&gss_msg->count, 1); + gss_msg->msg.data = &gss_msg->uid; + gss_msg->msg.len = sizeof(gss_msg->uid); + gss_msg->uid = uid; + gss_msg->auth = gss_auth; + } + return gss_msg; +} + +static struct gss_upcall_msg * +gss_setup_upcall(struct rpc_clnt *clnt, struct gss_auth *gss_auth, struct rpc_cred *cred) +{ + struct gss_upcall_msg *gss_new, *gss_msg; + + gss_new = gss_alloc_msg(gss_auth, cred->cr_uid); + if (gss_new == NULL) + return ERR_PTR(-ENOMEM); + gss_msg = gss_add_msg(gss_auth, gss_new); + if (gss_msg == gss_new) { + int res = rpc_queue_upcall(gss_auth->dentry->d_inode, &gss_new->msg); + if (res) { + gss_unhash_msg(gss_new); + gss_msg = ERR_PTR(res); } - goto retry; + } else + gss_release_msg(gss_new); + return gss_msg; +} + +static inline int +gss_refresh_upcall(struct rpc_task *task) +{ + struct rpc_cred *cred = task->tk_msg.rpc_cred; + struct gss_auth *gss_auth = container_of(task->tk_client->cl_auth, + struct gss_auth, rpc_auth); + struct gss_cred *gss_cred = container_of(cred, + struct gss_cred, gc_base); + struct gss_upcall_msg *gss_msg; + int err = 0; + + dprintk("RPC: %4u gss_refresh_upcall for uid %u\n", task->tk_pid, cred->cr_uid); + gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); + if (IS_ERR(gss_msg)) { + err = PTR_ERR(gss_msg); + goto out; } - gss_msg = gss_new; - memset(gss_new, 0, sizeof(*gss_new)); - INIT_LIST_HEAD(&gss_new->list); - rpc_init_wait_queue(&gss_new->waitq, "RPCSEC_GSS upcall waitq"); - atomic_set(&gss_new->count, 2); - msg = &gss_new->msg; - msg->data = &gss_new->uid; - msg->len = sizeof(gss_new->uid); - gss_new->uid = uid; - gss_new->auth = gss_auth; - list_add(&gss_new->list, &gss_auth->upcalls); - gss_new = NULL; - /* Has someone updated the credential behind our back? */ - if (!gss_cred_is_uptodate_ctx(cred)) { - /* No, so do upcall and sleep */ + spin_lock(&gss_auth->lock); + if (gss_cred->gc_upcall != NULL) + rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL, NULL); + else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { task->tk_timeout = 0; - rpc_sleep_on(&gss_msg->waitq, task, NULL, NULL); - spin_unlock(&gss_auth->lock); - res = rpc_queue_upcall(dentry->d_inode, msg); - if (res) - gss_unhash_msg(gss_msg); - } else { - /* Yes, so cancel upcall */ - __gss_unhash_msg(gss_msg); + gss_cred->gc_upcall = gss_msg; + /* gss_upcall_callback will release the reference to gss_upcall_msg */ + atomic_inc(&gss_msg->count); + rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback, NULL); + } else + err = gss_msg->msg.errno; + spin_unlock(&gss_auth->lock); + gss_release_msg(gss_msg); +out: + dprintk("RPC: %4u gss_refresh_upcall for uid %u result %d\n", task->tk_pid, + cred->cr_uid, err); + return err; +} + +static inline int +gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) +{ + struct rpc_cred *cred = &gss_cred->gc_base; + struct gss_upcall_msg *gss_msg; + DEFINE_WAIT(wait); + int err = 0; + + dprintk("RPC: gss_upcall for uid %u\n", cred->cr_uid); + gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); + if (IS_ERR(gss_msg)) { + err = PTR_ERR(gss_msg); + goto out; + } + for (;;) { + prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE); + spin_lock(&gss_auth->lock); + if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { + spin_unlock(&gss_auth->lock); + break; + } spin_unlock(&gss_auth->lock); + if (signalled()) { + err = -ERESTARTSYS; + goto out_intr; + } + schedule(); } + if (gss_msg->ctx) + gss_cred_set_ctx(cred, gss_get_ctx(gss_msg->ctx)); + else + err = gss_msg->msg.errno; +out_intr: + finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); - dprintk("RPC: %4u gss_upcall for uid %u result %d\n", task->tk_pid, - uid, res); - return res; -out_sleep: - task->tk_timeout = 0; - rpc_sleep_on(&gss_msg->waitq, task, NULL, NULL); - spin_unlock(&gss_auth->lock); - dprintk("RPC: %4u gss_upcall sleeping\n", task->tk_pid); - if (gss_new) - kfree(gss_new); - /* Note: we drop the reference here: we are automatically removed - * from the queue when we're woken up, and we should in any case - * have no further responsabilities w.r.t. the upcall. - */ - gss_release_msg(gss_msg); - return 0; +out: + dprintk("RPC: gss_create_upcall for uid %u result %d\n", cred->cr_uid, err); + return err; } static ssize_t @@ -441,68 +544,77 @@ gss_pipe_upcall(struct file *filp, struc static ssize_t gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { - struct xdr_netobj obj = { - .len = mlen, - }; - struct inode *inode = filp->f_dentry->d_inode; - struct rpc_inode *rpci = RPC_I(inode); + const void *p, *end; + void *buf; struct rpc_clnt *clnt; - struct rpc_auth *auth; struct gss_auth *gss_auth; - struct gss_api_mech *mech; - struct auth_cred acred = { 0 }; struct rpc_cred *cred; struct gss_upcall_msg *gss_msg; - struct gss_cl_ctx *ctx = NULL; - ssize_t left; - int err; - int gss_err; + struct gss_cl_ctx *ctx; + char principal[32]; + uid_t uid; + int err = -EFBIG; if (mlen > MSG_BUF_MAXSIZE) - return -EFBIG; - obj.data = kmalloc(mlen, GFP_KERNEL); - if (!obj.data) - return -ENOMEM; - left = copy_from_user(obj.data, src, mlen); - if (left) { - err = -EFAULT; goto out; - } - clnt = rpci->private; - atomic_inc(&clnt->cl_users); - auth = clnt->cl_auth; - gss_auth = container_of(auth, struct gss_auth, rpc_auth); - mech = gss_auth->mech; - err = gss_parse_init_downcall(mech, &obj, &ctx, &acred.uid, &gss_err); - if (err) + err = -ENOMEM; + buf = kmalloc(mlen, GFP_KERNEL); + if (!buf) + goto out; + + clnt = RPC_I(filp->f_dentry->d_inode)->private; + err = -EFAULT; + if (copy_from_user(buf, src, mlen)) goto err; - cred = rpcauth_lookup_credcache(auth, &acred, 0); - if (!cred) + + end = (const void *)((char *)buf + mlen); + p = simple_get_bytes(buf, end, &uid, sizeof(uid)); + if (IS_ERR(p)) { + err = PTR_ERR(p); goto err; - if (gss_err) - cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; - else - gss_cred_set_ctx(cred, ctx); + } + + err = -ENOMEM; + snprintf(principal, sizeof(principal), "%u@%s", uid, clnt->cl_server); + ctx = gss_alloc_context(principal); + if (ctx == NULL) + goto err; + err = 0; + gss_auth = container_of(clnt->cl_auth, struct gss_auth, rpc_auth); + p = gss_fill_context(p, end, ctx, gss_auth->mech); + if (IS_ERR(p)) { + err = PTR_ERR(p); + if (err != -EACCES) + goto err_put_ctx; + } spin_lock(&gss_auth->lock); - gss_msg = __gss_find_upcall(gss_auth, acred.uid); + gss_msg = __gss_find_upcall(gss_auth, uid); if (gss_msg) { - if (gss_err) - gss_msg->msg.errno = -EACCES; + if (err == 0 && gss_msg->ctx == NULL) + gss_msg->ctx = gss_get_ctx(ctx); + gss_msg->msg.errno = err; __gss_unhash_msg(gss_msg); spin_unlock(&gss_auth->lock); gss_release_msg(gss_msg); - } else + } else { + struct auth_cred acred = { .uid = uid }; spin_unlock(&gss_auth->lock); - rpc_release_client(clnt); - kfree(obj.data); + cred = rpcauth_lookup_credcache(clnt->cl_auth, &acred, 0); + if (IS_ERR(cred)) { + err = PTR_ERR(cred); + goto err_put_ctx; + } + gss_cred_set_ctx(cred, gss_get_ctx(ctx)); + } + gss_put_ctx(ctx); + kfree(buf); dprintk("RPC: gss_pipe_downcall returning length %Zu\n", mlen); return mlen; +err_put_ctx: + gss_put_ctx(ctx); err: - if (ctx) - gss_destroy_ctx(ctx); - rpc_release_client(clnt); + kfree(buf); out: - kfree(obj.data); dprintk("RPC: gss_pipe_downcall returning %d\n", err); return err; } @@ -569,24 +681,39 @@ gss_create(struct rpc_clnt *clnt, rpc_au dprintk("RPC: creating GSS authenticator for client %p\n",clnt); + if (!try_module_get(THIS_MODULE)) + return NULL; if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) goto out_dec; + gss_auth->client = clnt; gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); if (!gss_auth->mech) { printk(KERN_WARNING "%s: Pseudoflavor %d not found!", __FUNCTION__, flavor); goto err_free; } + gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); + /* FIXME: Will go away once privacy support is merged in */ + if (gss_auth->service == RPC_GSS_SVC_PRIVACY) + gss_auth->service = RPC_GSS_SVC_INTEGRITY; INIT_LIST_HEAD(&gss_auth->upcalls); spin_lock_init(&gss_auth->lock); auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; - auth->au_expire = GSS_CRED_EXPIRE; auth->au_ops = &authgss_ops; auth->au_flavor = flavor; + atomic_set(&auth->au_count, 1); - rpcauth_init_credcache(auth); + if (rpcauth_init_credcache(auth, GSS_CRED_EXPIRE) < 0) + goto err_put_mech; + + snprintf(gss_auth->key_name, sizeof(gss_auth->key_name), + "mechanism=\"%s\" service=\"%s%u\" host=\"%s\"", + gss_auth->mech->gm_pfs[gss_auth->service-RPC_GSS_SVC_NONE].name, + clnt->cl_protname, + clnt->cl_vers, + clnt->cl_server); snprintf(gss_auth->path, sizeof(gss_auth->path), "%s/%s", clnt->cl_pathname, @@ -601,6 +728,7 @@ err_put_mech: err_free: kfree(gss_auth); out_dec: + module_put(THIS_MODULE); return NULL; } @@ -617,6 +745,8 @@ gss_destroy(struct rpc_auth *auth) gss_mech_put(gss_auth->mech); rpcauth_free_credcache(auth); + kfree(gss_auth); + module_put(THIS_MODULE); } /* gss_destroy_cred (and gss_destroy_ctx) are used to clean up after failure @@ -630,60 +760,330 @@ gss_destroy_ctx(struct gss_cl_ctx *ctx) if (ctx->gc_gss_ctx) gss_delete_sec_context(&ctx->gc_gss_ctx); - if (ctx->gc_wire_ctx.len > 0) { - kfree(ctx->gc_wire_ctx.data); - ctx->gc_wire_ctx.len = 0; - } - + kfree(ctx->gc_wire_ctx.data); kfree(ctx); +} + +static inline struct gss_cred * +gss_alloc_cred(struct gss_auth *gss_auth) +{ + struct gss_cred *cred; + dprintk("RPC: gss_alloc_cred \n"); + + cred = kmalloc(sizeof(*cred), GFP_KERNEL); + if (cred != NULL) { + memset(cred, 0, sizeof(*cred)); + atomic_set(&cred->gc_count, 1); + cred->gc_base.cr_ops = &gss_credops; + cred->gc_service = gss_auth->service; + } + return cred; } static void gss_destroy_cred(struct rpc_cred *rc) { - struct gss_cred *cred = (struct gss_cred *)rc; + struct gss_cred *cred = container_of(rc, struct gss_cred, gc_base); dprintk("RPC: gss_destroy_cred \n"); if (cred->gc_ctx) gss_put_ctx(cred->gc_ctx); + if (cred->gc_key) + key_put(cred->gc_key); kfree(cred); } +#ifdef CONFIG_RPCSEC_GSS_KEYRING +static inline const void * +simple_skip_bytes(const void *p, const void *end, size_t len) +{ + const void *q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); + return q; +} + + +static struct gss_cl_ctx * +gss_key_read_context(const void *p, const void *end) +{ + struct gss_cl_ctx *ctx; + struct gss_api_mech *mech; + size_t len, maxlen; + + /* First up should be the name of the mechanism */ + maxlen = end - p; + len = strnlen((const char *)p, maxlen); + if (len == maxlen) + return ERR_PTR(-EFAULT); + /* find the mechanism */ + mech = gss_mech_get_by_name((const char *)p); + if (mech == NULL) + return ERR_PTR(-EINVAL); + p = simple_skip_bytes(p, end, len + 1); + if (IS_ERR(p)) + goto err_put_mech; + /* Next we want the name of the principal */ + maxlen = end - p; + len = strnlen((const char *)p, maxlen); + if (len == maxlen) { + p = ERR_PTR(-EFAULT); + goto err_put_mech; + } + ctx = gss_alloc_context((const char *)p); + p = simple_skip_bytes(p, end, len + 1); + if (IS_ERR(p)) + goto err_free_ctx; + /* Now read in context */ + p = gss_fill_context(p, end, ctx, mech); + if (IS_ERR(p)) + goto err_free_ctx; + return ctx; +err_free_ctx: + kfree(ctx); +err_put_mech: + gss_mech_put(mech); + return (struct gss_cl_ctx *)p; +} + +static int +gss_key_instantiate(struct key *key, const void *p, size_t buflen) +{ + const void *end = (const void *)((const char *)p + buflen); + struct gss_cl_ctx *ctx; + + ctx = gss_key_read_context(p, end); + if (IS_ERR(ctx)) + goto err; + write_lock(&key->lock); + key->payload.data = ctx; + key->expiry = get_seconds() + (ctx->gc_expiry - jiffies)/HZ; + write_unlock(&key->lock); + return 0; +err: + return PTR_ERR(ctx); +} + +static int +gss_key_duplicate(struct key *key, const struct key *source) +{ + struct gss_cl_ctx *ctx = (struct gss_cl_ctx *)source->payload.data; + + if (ctx != NULL) { + gss_mech_get(ctx->gc_gss_ctx->mech_type); + write_lock(&key->lock); + key->payload.data = gss_get_ctx(ctx); + key->expiry = source->expiry; + write_unlock(&key->lock); + } + return 0; +} + +static int +gss_key_update(struct key *key, const void *p, size_t buflen) +{ + const void *end = (const void *)((const char *)p + buflen); + struct gss_cl_ctx *ctx, *old; + + ctx = gss_key_read_context(p, end); + if (IS_ERR(ctx)) + goto err; + write_lock(&key->lock); + old = (struct gss_cl_ctx *) key->payload.data; + key->payload.data = ctx; + key->expiry = get_seconds() + (ctx->gc_expiry - jiffies)/HZ; + write_unlock(&key->lock); + if (old) + gss_put_ctx(ctx); + return 0; +err: + return PTR_ERR(ctx); +} + +static int +gss_key_match(const struct key *key, const void *description) +{ + return key->description != NULL && + strcmp(key->description, description) == 0; +} + +static void +gss_key_destroy(struct key *key) +{ + struct gss_cl_ctx *ctx = (struct gss_cl_ctx *)key->payload.data; + if (ctx != NULL) { + struct gss_api_mech *mech = ctx->gc_gss_ctx->mech_type; + gss_put_ctx(ctx); + gss_mech_put(mech); + } +} + +static void +gss_key_describe(const struct key *key, struct seq_file *m) +{ + struct gss_cl_ctx *ctx = NULL; + + seq_puts(m, key->description); + + if (key->payload.data) + ctx = gss_get_ctx((struct gss_cl_ctx *)key->payload.data); + if (ctx != NULL) { + seq_printf(m, ": %s", ctx->gc_principal); + gss_put_ctx(ctx); + } else + seq_printf(m, ": "); +} + +static struct key_type key_type_rpcsec_context = { + .name = "rpcsec_gss context", + .def_datalen = sizeof(struct gss_cl_ctx) + sizeof(struct gss_ctx), + .instantiate = gss_key_instantiate, + .duplicate = gss_key_duplicate, + .update = gss_key_update, + .match = gss_key_match, + .destroy = gss_key_destroy, + .describe = gss_key_describe, +}; + +static struct key * +gss_request_key(struct gss_auth *gss_auth) +{ + struct key *key; + struct rpc_clnt *clnt = gss_auth->client; + char args[384]; + + snprintf(args, sizeof(args), "%s ip=\"%u.%u.%u.%u\" port=\"%u\" proto=\"%s\"", + gss_auth->key_name, + NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr), + clnt->cl_port, + clnt->cl_prot == IPPROTO_TCP ? "tcp" : "udp"); + dprintk("%s: requesting key %s with args %s\n", __FUNCTION__, + gss_auth->key_name, args); + + key = request_key(&key_type_rpcsec_context, gss_auth->key_name, args); + if (IS_ERR(key)) + goto out_err; + dprintk("%s: returned success\n", __FUNCTION__); + return key; +out_err: + dprintk("%s: returned error %ld\n", __FUNCTION__, -PTR_ERR(key)); + return key; +} + + +static inline struct gss_cl_ctx * +gss_key_lookup_context(struct key *key) +{ + struct gss_cl_ctx *ctx = ERR_PTR(-ENOKEY); + + read_lock(&key->lock); + if (key->payload.data != NULL) + ctx = gss_get_ctx((struct gss_cl_ctx *)key->payload.data); + read_unlock(&key->lock); + return ctx; +} + +static inline struct rpc_cred * +gss_key_lookup_cred(struct rpc_auth *auth) +{ + struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); + struct gss_cred *gss_cred; + struct gss_cl_ctx *ctx; + struct key *key; + void *err; + + err = key = gss_request_key(gss_auth); + if (IS_ERR(key)) + goto out_no_key; + err = ctx = gss_key_lookup_context(key); + if (IS_ERR(ctx)) + goto out_put_key; + gss_cred = gss_alloc_cred(gss_auth); + if (gss_cred == NULL) + goto out_no_cred; + gss_cred_set_ctx(&gss_cred->gc_base, ctx); + gss_cred->gc_key = key; + return &gss_cred->gc_base; +out_no_cred: + err = ERR_PTR(-ENOMEM); +out_put_key: + key_put(key); +out_no_key: + return (struct rpc_cred *)err; +} + +static inline int +gss_register_keytype(void) +{ + return register_key_type(&key_type_rpcsec_context); +} + +static inline void +gss_unregister_keytype(void) +{ + unregister_key_type(&key_type_rpcsec_context); +} +#endif + +/* + * Lookup RPCSEC_GSS cred for the current process + */ +static struct rpc_cred * +gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags) +{ + struct rpc_cred *cred; + + /* Try to use the keyring upcall first */ + cred = gss_key_lookup_cred(auth); + if (!IS_ERR(cred)) + goto out; + switch (PTR_ERR(cred)) { + case -EKEYREVOKED: + case -EKEYEXPIRED: + /* Translate into EACCES */ + cred = ERR_PTR(-EACCES); + break; + case -ENOKEY: + cred = rpcauth_lookup_credcache(auth, acred, taskflags); + }; +out: + return cred; +} + static struct rpc_cred * gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int taskflags) { + struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); struct gss_cred *cred = NULL; + int err = -ENOMEM; dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", acred->uid, auth->au_flavor); - if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) + cred = gss_alloc_cred(gss_auth); + if (cred == NULL) goto out_err; - - memset(cred, 0, sizeof(*cred)); - atomic_set(&cred->gc_count, 0); cred->gc_uid = acred->uid; - /* - * Note: in order to force a call to call_refresh(), we deliberately - * fail to flag the credential as RPCAUTH_CRED_UPTODATE. - */ - cred->gc_flags = 0; - cred->gc_base.cr_ops = &gss_credops; - cred->gc_flavor = auth->au_flavor; - - return (struct rpc_cred *) cred; - + err = gss_create_upcall(gss_auth, cred); + if (err < 0) + goto out_err; + return &cred->gc_base; out_err: - dprintk("RPC: gss_create_cred failed\n"); - if (cred) gss_destroy_cred((struct rpc_cred *)cred); - return NULL; + dprintk("RPC: gss_create_cred failed with error %d\n", err); + if (cred) gss_destroy_cred(&cred->gc_base); + return ERR_PTR(err); } static int gss_match(struct auth_cred *acred, struct rpc_cred *rc, int taskflags) { + struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); + + /* Don't match with creds that have expired. */ + if (gss_cred->gc_ctx && time_after(jiffies, gss_cred->gc_ctx->gc_expiry)) + return 0; return (rc->cr_uid == acred->uid); } @@ -692,7 +1092,7 @@ gss_match(struct auth_cred *acred, struc * Maybe we should keep a cached credential for performance reasons. */ static u32 * -gss_marshal(struct rpc_task *task, u32 *p, int ruid) +gss_marshal(struct rpc_task *task, u32 *p) { struct rpc_cred *cred = task->tk_msg.rpc_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, @@ -704,20 +1104,12 @@ gss_marshal(struct rpc_task *task, u32 * struct xdr_netobj mic; struct kvec iov; struct xdr_buf verf_buf; - u32 service; dprintk("RPC: %4u gss_marshal\n", task->tk_pid); *p++ = htonl(RPC_AUTH_GSS); cred_len = p++; - service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type, - gss_cred->gc_flavor); - if (service == 0) { - dprintk("RPC: %4u Bad pseudoflavor %d in gss_marshal\n", - task->tk_pid, gss_cred->gc_flavor); - goto out_put_ctx; - } spin_lock(&ctx->gc_seq_lock); req->rq_seqno = ctx->gc_seq++; spin_unlock(&ctx->gc_seq_lock); @@ -725,7 +1117,7 @@ gss_marshal(struct rpc_task *task, u32 * *p++ = htonl((u32) RPC_GSS_VERSION); *p++ = htonl((u32) ctx->gc_proc); *p++ = htonl((u32) req->rq_seqno); - *p++ = htonl((u32) service); + *p++ = htonl((u32) gss_cred->gc_service); p = xdr_encode_netobj(p, &ctx->gc_wire_ctx); *cred_len = htonl((p - (cred_len + 1)) << 2); @@ -765,11 +1157,9 @@ out_put_ctx: static int gss_refresh(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; - struct rpc_cred *cred = task->tk_msg.rpc_cred; - if (!gss_cred_is_uptodate_ctx(cred)) - return gss_upcall(clnt, task, cred); + if (!gss_cred_is_uptodate_ctx(task->tk_msg.rpc_cred)) + return gss_refresh_upcall(task); return 0; } @@ -785,7 +1175,6 @@ gss_validate(struct rpc_task *task, u32 struct xdr_buf verf_buf; struct xdr_netobj mic; u32 flav,len; - u32 service; u32 maj_stat; dprintk("RPC: %4u gss_validate\n", task->tk_pid); @@ -807,9 +1196,7 @@ gss_validate(struct rpc_task *task, u32 cred->cr_flags &= ~RPCAUTH_CRED_UPTODATE; if (maj_stat) goto out_bad; - service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type, - gss_cred->gc_flavor); - switch (service) { + switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: /* verifier data, flavor, length: */ task->tk_auth->au_rslack = XDR_QUADLEN(len) + 2; @@ -818,7 +1205,7 @@ gss_validate(struct rpc_task *task, u32 /* verifier data, flavor, length, length, sequence number: */ task->tk_auth->au_rslack = XDR_QUADLEN(len) + 4; break; - default: + case RPC_GSS_SVC_PRIVACY: goto out_bad; } gss_put_ctx(ctx); @@ -889,7 +1276,6 @@ gss_wrap_req(struct rpc_task *task, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); int status = -EIO; - u32 service; dprintk("RPC: %4u gss_wrap_req\n", task->tk_pid); if (ctx->gc_proc != RPC_GSS_PROC_DATA) { @@ -899,19 +1285,16 @@ gss_wrap_req(struct rpc_task *task, status = encode(rqstp, p, obj); goto out; } - service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type, - gss_cred->gc_flavor); - switch (service) { + switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: status = encode(rqstp, p, obj); - goto out; + break; case RPC_GSS_SVC_INTEGRITY: status = gss_wrap_req_integ(cred, ctx, encode, rqstp, p, obj); - goto out; - case RPC_GSS_SVC_PRIVACY: - default: - goto out; + break; + case RPC_GSS_SVC_PRIVACY: + break; } out: gss_put_ctx(ctx); @@ -966,23 +1349,19 @@ gss_unwrap_resp(struct rpc_task *task, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); int status = -EIO; - u32 service; if (ctx->gc_proc != RPC_GSS_PROC_DATA) goto out_decode; - service = gss_pseudoflavor_to_service(ctx->gc_gss_ctx->mech_type, - gss_cred->gc_flavor); - switch (service) { + switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: - goto out_decode; + break; case RPC_GSS_SVC_INTEGRITY: status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p); if (status) goto out; break; - case RPC_GSS_SVC_PRIVACY: - default: - goto out; + case RPC_GSS_SVC_PRIVACY: + break; } out_decode: status = decode(rqstp, p, obj); @@ -1001,10 +1380,12 @@ static struct rpc_authops authgss_ops = #endif .create = gss_create, .destroy = gss_destroy, + .lookup_cred = gss_lookup_cred, .crcreate = gss_create_cred }; static struct rpc_credops gss_credops = { + .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, .crmatch = gss_match, .crmarshal = gss_marshal, @@ -1034,7 +1415,12 @@ static int __init init_rpcsec_gss(void) err = gss_svc_init(); if (err) goto out_unregister; + err = gss_register_keytype(); + if (err) + goto out_shutdown_svc; return 0; +out_shutdown_svc: + gss_svc_shutdown(); out_unregister: rpcauth_unregister(&authgss_ops); out: @@ -1043,6 +1429,7 @@ out: static void __exit exit_rpcsec_gss(void) { + gss_unregister_keytype(); gss_svc_shutdown(); rpcauth_unregister(&authgss_ops); } Index: linux-2.6.11/net/sunrpc/auth_gss/gss_krb5_mech.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/auth_gss/gss_krb5_mech.c +++ linux-2.6.11/net/sunrpc/auth_gss/gss_krb5_mech.c @@ -48,46 +48,48 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -static inline int -get_bytes(char **ptr, const char *end, void *res, int len) +static const void * +simple_get_bytes(const void *p, const void *end, void *res, int len) { - char *p, *q; - p = *ptr; - q = p + len; - if (q > end || q < p) - return -1; + const void *q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); memcpy(res, p, len); - *ptr = q; - return 0; + return q; } -static inline int -get_netobj(char **ptr, const char *end, struct xdr_netobj *res) +static const void * +simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) { - char *p, *q; - p = *ptr; - if (get_bytes(&p, end, &res->len, sizeof(res->len))) - return -1; - q = p + res->len; - if (q > end || q < p) - return -1; - if (!(res->data = kmalloc(res->len, GFP_KERNEL))) - return -1; - memcpy(res->data, p, res->len); - *ptr = q; - return 0; + const void *q; + unsigned int len; + + p = simple_get_bytes(p, end, &len, sizeof(len)); + if (IS_ERR(p)) + return p; + q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); + res->data = kmalloc(len, GFP_KERNEL); + if (unlikely(res->data == NULL)) + return ERR_PTR(-ENOMEM); + memcpy(res->data, p, len); + res->len = len; + return q; } -static inline int -get_key(char **p, char *end, struct crypto_tfm **res) +static inline const void * +get_key(const void *p, const void *end, struct crypto_tfm **res) { struct xdr_netobj key; int alg, alg_mode; char *alg_name; - if (get_bytes(p, end, &alg, sizeof(alg))) + p = simple_get_bytes(p, end, &alg, sizeof(alg)); + if (IS_ERR(p)) goto out_err; - if ((get_netobj(p, end, &key))) + p = simple_get_netobj(p, end, &key); + if (IS_ERR(p)) goto out_err; switch (alg) { @@ -105,50 +107,63 @@ get_key(char **p, char *end, struct cryp goto out_err_free_tfm; kfree(key.data); - return 0; + return p; out_err_free_tfm: crypto_free_tfm(*res); out_err_free_key: kfree(key.data); + p = ERR_PTR(-EINVAL); out_err: - return -1; + return p; } -static u32 -gss_import_sec_context_kerberos(struct xdr_netobj *inbuf, +static int +gss_import_sec_context_kerberos(const void *p, + size_t len, struct gss_ctx *ctx_id) { - char *p = inbuf->data; - char *end = inbuf->data + inbuf->len; + const void *end = (const void *)((const char *)p + len); struct krb5_ctx *ctx; if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL))) goto out_err; memset(ctx, 0, sizeof(*ctx)); - if (get_bytes(&p, end, &ctx->initiate, sizeof(ctx->initiate))) + p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate)); + if (IS_ERR(p)) goto out_err_free_ctx; - if (get_bytes(&p, end, &ctx->seed_init, sizeof(ctx->seed_init))) + p = simple_get_bytes(p, end, &ctx->seed_init, sizeof(ctx->seed_init)); + if (IS_ERR(p)) goto out_err_free_ctx; - if (get_bytes(&p, end, ctx->seed, sizeof(ctx->seed))) + p = simple_get_bytes(p, end, ctx->seed, sizeof(ctx->seed)); + if (IS_ERR(p)) goto out_err_free_ctx; - if (get_bytes(&p, end, &ctx->signalg, sizeof(ctx->signalg))) + p = simple_get_bytes(p, end, &ctx->signalg, sizeof(ctx->signalg)); + if (IS_ERR(p)) goto out_err_free_ctx; - if (get_bytes(&p, end, &ctx->sealalg, sizeof(ctx->sealalg))) + p = simple_get_bytes(p, end, &ctx->sealalg, sizeof(ctx->sealalg)); + if (IS_ERR(p)) goto out_err_free_ctx; - if (get_bytes(&p, end, &ctx->endtime, sizeof(ctx->endtime))) + p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime)); + if (IS_ERR(p)) goto out_err_free_ctx; - if (get_bytes(&p, end, &ctx->seq_send, sizeof(ctx->seq_send))) + p = simple_get_bytes(p, end, &ctx->seq_send, sizeof(ctx->seq_send)); + if (IS_ERR(p)) goto out_err_free_ctx; - if (get_netobj(&p, end, &ctx->mech_used)) + p = simple_get_netobj(p, end, &ctx->mech_used); + if (IS_ERR(p)) goto out_err_free_ctx; - if (get_key(&p, end, &ctx->enc)) + p = get_key(p, end, &ctx->enc); + if (IS_ERR(p)) goto out_err_free_mech; - if (get_key(&p, end, &ctx->seq)) + p = get_key(p, end, &ctx->seq); + if (IS_ERR(p)) goto out_err_free_key1; - if (p != end) + if (p != end) { + p = ERR_PTR(-EFAULT); goto out_err_free_key2; + } ctx_id->internal_ctx_id = ctx; dprintk("RPC: Succesfully imported new context.\n"); @@ -163,7 +178,7 @@ out_err_free_mech: out_err_free_ctx: kfree(ctx); out_err: - return GSS_S_FAILURE; + return PTR_ERR(p); } static void Index: linux-2.6.11/net/sunrpc/auth_gss/gss_mech_switch.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/auth_gss/gss_mech_switch.c +++ linux-2.6.11/net/sunrpc/auth_gss/gss_mech_switch.c @@ -143,7 +143,7 @@ gss_mech_get(struct gss_api_mech *gm) EXPORT_SYMBOL(gss_mech_get); struct gss_api_mech * -gss_mech_get_by_name(char *name) +gss_mech_get_by_name(const char *name) { struct gss_api_mech *pos, *gm = NULL; @@ -233,8 +233,8 @@ EXPORT_SYMBOL(gss_mech_put); /* The mech could probably be determined from the token instead, but it's just * as easy for now to pass it in. */ -u32 -gss_import_sec_context(struct xdr_netobj *input_token, +int +gss_import_sec_context(const void *input_token, size_t bufsize, struct gss_api_mech *mech, struct gss_ctx **ctx_id) { @@ -244,7 +244,7 @@ gss_import_sec_context(struct xdr_netobj (*ctx_id)->mech_type = gss_mech_get(mech); return mech->gm_ops - ->gss_import_sec_context(input_token, *ctx_id); + ->gss_import_sec_context(input_token, bufsize, *ctx_id); } /* gss_get_mic: compute a mic over message and return mic_token. */ Index: linux-2.6.11/net/sunrpc/auth_gss/gss_spkm3_mech.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/auth_gss/gss_spkm3_mech.c +++ linux-2.6.11/net/sunrpc/auth_gss/gss_spkm3_mech.c @@ -49,52 +49,51 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -static inline int -get_bytes(char **ptr, const char *end, void *res, int len) +static const void * +simple_get_bytes(const void *p, const void *end, void *res, int len) { - char *p, *q; - p = *ptr; - q = p + len; - if (q > end || q < p) - return -1; + const void *q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); memcpy(res, p, len); - *ptr = q; - return 0; + return q; } -static inline int -get_netobj(char **ptr, const char *end, struct xdr_netobj *res) +static const void * +simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res) { - char *p, *q; - p = *ptr; - if (get_bytes(&p, end, &res->len, sizeof(res->len))) - return -1; - q = p + res->len; - if(res->len == 0) - goto out_nocopy; - if (q > end || q < p) - return -1; - if (!(res->data = kmalloc(res->len, GFP_KERNEL))) - return -1; - memcpy(res->data, p, res->len); -out_nocopy: - *ptr = q; - return 0; + const void *q; + unsigned int len; + p = simple_get_bytes(p, end, &len, sizeof(len)); + if (IS_ERR(p)) + return p; + res->len = len; + if (len == 0) { + res->data = NULL; + return p; + } + q = (const void *)((const char *)p + len); + if (unlikely(q > end || q < p)) + return ERR_PTR(-EFAULT); + res->data = kmalloc(len, GFP_KERNEL); + if (unlikely(res->data == NULL)) + return ERR_PTR(-ENOMEM); + memcpy(res->data, p, len); + return q; } -static inline int -get_key(char **p, char *end, struct crypto_tfm **res, int *resalg) +static inline const void * +get_key(const void *p, const void *end, struct crypto_tfm **res, int *resalg) { - struct xdr_netobj key = { - .len = 0, - .data = NULL, - }; + struct xdr_netobj key = { 0 }; int alg_mode,setkey = 0; char *alg_name; - if (get_bytes(p, end, resalg, sizeof(int))) + p = simple_get_bytes(p, end, resalg, sizeof(*resalg)); + if (IS_ERR(p)) goto out_err; - if ((get_netobj(p, end, &key))) + p = simple_get_netobj(p, end, &key); + if (IS_ERR(p)) goto out_err; switch (*resalg) { @@ -111,10 +110,6 @@ get_key(char **p, char *end, struct cryp alg_mode = 0; setkey = 0; break; - case NID_cast5_cbc: - dprintk("RPC: SPKM3 get_key: case cast5_cbc, UNSUPPORTED \n"); - goto out_err; - break; default: dprintk("RPC: SPKM3 get_key: unsupported algorithm %d", *resalg); goto out_err_free_key; @@ -128,69 +123,81 @@ get_key(char **p, char *end, struct cryp if(key.len > 0) kfree(key.data); - return 0; + return p; out_err_free_tfm: crypto_free_tfm(*res); out_err_free_key: if(key.len > 0) kfree(key.data); + p = ERR_PTR(-EINVAL); out_err: - return -1; + return p; } -static u32 -gss_import_sec_context_spkm3(struct xdr_netobj *inbuf, +static int +gss_import_sec_context_spkm3(const void *p, size_t len, struct gss_ctx *ctx_id) { - char *p = inbuf->data; - char *end = inbuf->data + inbuf->len; + const void *end = (const void *)((const char *)p + len); struct spkm3_ctx *ctx; if (!(ctx = kmalloc(sizeof(*ctx), GFP_KERNEL))) goto out_err; memset(ctx, 0, sizeof(*ctx)); - if (get_netobj(&p, end, &ctx->ctx_id)) + p = simple_get_netobj(p, end, &ctx->ctx_id); + if (IS_ERR(p)) goto out_err_free_ctx; - if (get_bytes(&p, end, &ctx->qop, sizeof(ctx->qop))) + p = simple_get_bytes(p, end, &ctx->qop, sizeof(ctx->qop)); + if (IS_ERR(p)) goto out_err_free_ctx_id; - if (get_netobj(&p, end, &ctx->mech_used)) + p = simple_get_netobj(p, end, &ctx->mech_used); + if (IS_ERR(p)) goto out_err_free_mech; - if (get_bytes(&p, end, &ctx->ret_flags, sizeof(ctx->ret_flags))) + p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags)); + if (IS_ERR(p)) goto out_err_free_mech; - if (get_bytes(&p, end, &ctx->req_flags, sizeof(ctx->req_flags))) + p = simple_get_bytes(p, end, &ctx->req_flags, sizeof(ctx->req_flags)); + if (IS_ERR(p)) goto out_err_free_mech; - if (get_netobj(&p, end, &ctx->share_key)) + p = simple_get_netobj(p, end, &ctx->share_key); + if (IS_ERR(p)) goto out_err_free_s_key; - if (get_key(&p, end, &ctx->derived_conf_key, &ctx->conf_alg)) { - dprintk("RPC: SPKM3 confidentiality key will be NULL\n"); - } - - if (get_key(&p, end, &ctx->derived_integ_key, &ctx->intg_alg)) { - dprintk("RPC: SPKM3 integrity key will be NULL\n"); - } - - if (get_bytes(&p, end, &ctx->owf_alg, sizeof(ctx->owf_alg))) + p = get_key(p, end, &ctx->derived_conf_key, &ctx->conf_alg); + if (IS_ERR(p)) goto out_err_free_s_key; - if (get_bytes(&p, end, &ctx->owf_alg, sizeof(ctx->owf_alg))) - goto out_err_free_s_key; + p = get_key(p, end, &ctx->derived_integ_key, &ctx->intg_alg); + if (IS_ERR(p)) + goto out_err_free_key1; + + p = simple_get_bytes(p, end, &ctx->keyestb_alg, sizeof(ctx->keyestb_alg)); + if (IS_ERR(p)) + goto out_err_free_key2; + + p = simple_get_bytes(p, end, &ctx->owf_alg, sizeof(ctx->owf_alg)); + if (IS_ERR(p)) + goto out_err_free_key2; if (p != end) - goto out_err_free_s_key; + goto out_err_free_key2; ctx_id->internal_ctx_id = ctx; dprintk("Succesfully imported new spkm context.\n"); return 0; +out_err_free_key2: + crypto_free_tfm(ctx->derived_integ_key); +out_err_free_key1: + crypto_free_tfm(ctx->derived_conf_key); out_err_free_s_key: kfree(ctx->share_key.data); out_err_free_mech: @@ -200,7 +207,7 @@ out_err_free_ctx_id: out_err_free_ctx: kfree(ctx); out_err: - return GSS_S_FAILURE; + return PTR_ERR(p); } static void Index: linux-2.6.11/net/sunrpc/auth_gss/svcauth_gss.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/auth_gss/svcauth_gss.c +++ linux-2.6.11/net/sunrpc/auth_gss/svcauth_gss.c @@ -381,7 +381,6 @@ static int rsc_parse(struct cache_detail else { int N, i; struct gss_api_mech *gm; - struct xdr_netobj tmp_buf; /* gid */ if (get_int(&mesg, &rsci.cred.cr_gid)) @@ -420,9 +419,7 @@ static int rsc_parse(struct cache_detail gss_mech_put(gm); goto out; } - tmp_buf.len = len; - tmp_buf.data = buf; - if (gss_import_sec_context(&tmp_buf, gm, &rsci.mechctx)) { + if (gss_import_sec_context(buf, len, gm, &rsci.mechctx)) { gss_mech_put(gm); goto out; } Index: linux-2.6.11/net/sunrpc/auth_null.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/auth_null.c +++ linux-2.6.11/net/sunrpc/auth_null.c @@ -18,48 +18,29 @@ # define RPCDBG_FACILITY RPCDBG_AUTH #endif -static struct rpc_credops null_credops; +static struct rpc_auth null_auth; +static struct rpc_cred null_cred; static struct rpc_auth * nul_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) { - struct rpc_auth *auth; - - dprintk("RPC: creating NULL authenticator for client %p\n", clnt); - if (!(auth = (struct rpc_auth *) kmalloc(sizeof(*auth),GFP_KERNEL))) - return NULL; - auth->au_cslack = 4; - auth->au_rslack = 2; - auth->au_ops = &authnull_ops; - auth->au_expire = 1800 * HZ; - rpcauth_init_credcache(auth); - - return (struct rpc_auth *) auth; + atomic_inc(&null_auth.au_count); + return &null_auth; } static void nul_destroy(struct rpc_auth *auth) { - dprintk("RPC: destroying NULL authenticator %p\n", auth); - rpcauth_free_credcache(auth); + atomic_dec(&null_auth.au_count); } /* - * Create NULL creds for current process + * Lookup NULL creds for current process */ static struct rpc_cred * -nul_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +nul_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { - struct rpc_cred *cred; - - if (!(cred = (struct rpc_cred *) kmalloc(sizeof(*cred),GFP_KERNEL))) - return NULL; - atomic_set(&cred->cr_count, 0); - cred->cr_flags = RPCAUTH_CRED_UPTODATE; - cred->cr_uid = acred->uid; - cred->cr_ops = &null_credops; - - return cred; + return get_rpccred(&null_cred); } /* @@ -68,7 +49,6 @@ nul_create_cred(struct rpc_auth *auth, s static void nul_destroy_cred(struct rpc_cred *cred) { - kfree(cred); } /* @@ -84,7 +64,7 @@ nul_match(struct auth_cred *acred, struc * Marshal credential. */ static u32 * -nul_marshal(struct rpc_task *task, u32 *p, int ruid) +nul_marshal(struct rpc_task *task, u32 *p) { *p++ = htonl(RPC_AUTH_NULL); *p++ = 0; @@ -125,7 +105,7 @@ nul_validate(struct rpc_task *task, u32 return p; } -struct rpc_authops authnull_ops = { +struct rpc_authops authnull_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_NULL, #ifdef RPC_DEBUG @@ -133,14 +113,32 @@ struct rpc_authops authnull_ops = { #endif .create = nul_create, .destroy = nul_destroy, - .crcreate = nul_create_cred, + .lookup_cred = nul_lookup_cred, +}; + +static +struct rpc_auth null_auth = { + .au_cslack = 4, + .au_rslack = 2, + .au_ops = &authnull_ops, }; static struct rpc_credops null_credops = { + .cr_name = "AUTH_NULL", .crdestroy = nul_destroy_cred, .crmatch = nul_match, .crmarshal = nul_marshal, .crrefresh = nul_refresh, .crvalidate = nul_validate, }; + +static +struct rpc_cred null_cred = { + .cr_ops = &null_credops, + .cr_count = ATOMIC_INIT(1), + .cr_flags = RPCAUTH_CRED_UPTODATE, +#ifdef RPC_DEBUG + .cr_magic = RPCAUTH_CRED_MAGIC, +#endif +}; Index: linux-2.6.11/net/sunrpc/auth_unix.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/auth_unix.c +++ linux-2.6.11/net/sunrpc/auth_unix.c @@ -19,8 +19,6 @@ struct unx_cred { struct rpc_cred uc_base; gid_t uc_gid; - uid_t uc_puid; /* process uid */ - gid_t uc_pgid; /* process gid */ gid_t uc_gids[NFS_NGROUPS]; }; #define uc_uid uc_base.cr_uid @@ -36,24 +34,17 @@ struct unx_cred { # define RPCDBG_FACILITY RPCDBG_AUTH #endif +static struct rpc_auth unix_auth; +static struct rpc_cred_cache unix_cred_cache; static struct rpc_credops unix_credops; static struct rpc_auth * unx_create(struct rpc_clnt *clnt, rpc_authflavor_t flavor) { - struct rpc_auth *auth; - dprintk("RPC: creating UNIX authenticator for client %p\n", clnt); - if (!(auth = (struct rpc_auth *) kmalloc(sizeof(*auth), GFP_KERNEL))) - return NULL; - auth->au_cslack = UNX_WRITESLACK; - auth->au_rslack = 2; /* assume AUTH_NULL verf */ - auth->au_expire = UNX_CRED_EXPIRE; - auth->au_ops = &authunix_ops; - - rpcauth_init_credcache(auth); - - return auth; + if (atomic_inc_return(&unix_auth.au_count) == 0) + unix_cred_cache.nextgc = jiffies + (unix_cred_cache.expire >> 1); + return &unix_auth; } static void @@ -63,6 +54,15 @@ unx_destroy(struct rpc_auth *auth) rpcauth_free_credcache(auth); } +/* + * Lookup AUTH_UNIX creds for current process + */ +static struct rpc_cred * +unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) +{ + return rpcauth_lookup_credcache(auth, acred, flags); +} + static struct rpc_cred * unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { @@ -73,13 +73,13 @@ unx_create_cred(struct rpc_auth *auth, s acred->uid, acred->gid); if (!(cred = (struct unx_cred *) kmalloc(sizeof(*cred), GFP_KERNEL))) - return NULL; + return ERR_PTR(-ENOMEM); - atomic_set(&cred->uc_count, 0); + atomic_set(&cred->uc_count, 1); cred->uc_flags = RPCAUTH_CRED_UPTODATE; if (flags & RPC_TASK_ROOTCREDS) { - cred->uc_uid = cred->uc_puid = 0; - cred->uc_gid = cred->uc_pgid = 0; + cred->uc_uid = 0; + cred->uc_gid = 0; cred->uc_gids[0] = NOGROUP; } else { int groups = acred->group_info->ngroups; @@ -88,8 +88,6 @@ unx_create_cred(struct rpc_auth *auth, s cred->uc_uid = acred->uid; cred->uc_gid = acred->gid; - cred->uc_puid = current->uid; - cred->uc_pgid = current->gid; for (i = 0; i < groups; i++) cred->uc_gids[i] = GROUP_AT(acred->group_info, i); if (i < NFS_NGROUPS) @@ -121,9 +119,7 @@ unx_match(struct auth_cred *acred, struc int groups; if (cred->uc_uid != acred->uid - || cred->uc_gid != acred->gid - || cred->uc_puid != current->uid - || cred->uc_pgid != current->gid) + || cred->uc_gid != acred->gid) return 0; groups = acred->group_info->ngroups; @@ -134,8 +130,8 @@ unx_match(struct auth_cred *acred, struc return 0; return 1; } - return (cred->uc_uid == 0 && cred->uc_puid == 0 - && cred->uc_gid == 0 && cred->uc_pgid == 0 + return (cred->uc_uid == 0 + && cred->uc_gid == 0 && cred->uc_gids[0] == (gid_t) NOGROUP); } @@ -144,7 +140,7 @@ unx_match(struct auth_cred *acred, struc * Maybe we should keep a cached credential for performance reasons. */ static u32 * -unx_marshal(struct rpc_task *task, u32 *p, int ruid) +unx_marshal(struct rpc_task *task, u32 *p) { struct rpc_clnt *clnt = task->tk_client; struct unx_cred *cred = (struct unx_cred *) task->tk_msg.rpc_cred; @@ -160,14 +156,8 @@ unx_marshal(struct rpc_task *task, u32 * */ p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen); - /* Note: we don't use real uid if it involves raising privilege */ - if (ruid && cred->uc_puid != 0 && cred->uc_pgid != 0) { - *p++ = htonl((u32) cred->uc_puid); - *p++ = htonl((u32) cred->uc_pgid); - } else { - *p++ = htonl((u32) cred->uc_uid); - *p++ = htonl((u32) cred->uc_gid); - } + *p++ = htonl((u32) cred->uc_uid); + *p++ = htonl((u32) cred->uc_gid); hold = p++; for (i = 0; i < 16 && cred->uc_gids[i] != (gid_t) NOGROUP; i++) *p++ = htonl((u32) cred->uc_gids[i]); @@ -223,11 +213,27 @@ struct rpc_authops authunix_ops = { #endif .create = unx_create, .destroy = unx_destroy, + .lookup_cred = unx_lookup_cred, .crcreate = unx_create_cred, }; static +struct rpc_cred_cache unix_cred_cache = { + .expire = UNX_CRED_EXPIRE, +}; + +static +struct rpc_auth unix_auth = { + .au_cslack = UNX_WRITESLACK, + .au_rslack = 2, /* assume AUTH_NULL verf */ + .au_ops = &authunix_ops, + .au_count = ATOMIC_INIT(0), + .au_credcache = &unix_cred_cache, +}; + +static struct rpc_credops unix_credops = { + .cr_name = "AUTH_UNIX", .crdestroy = unx_destroy_cred, .crmatch = unx_match, .crmarshal = unx_marshal, Index: linux-2.6.11/net/sunrpc/clnt.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/clnt.c +++ linux-2.6.11/net/sunrpc/clnt.c @@ -23,6 +23,7 @@ #include +#include #include #include #include @@ -139,6 +140,7 @@ rpc_create_client(struct rpc_xprt *xprt, clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; clnt->cl_pmap = &clnt->cl_pmap_default; + clnt->cl_pmap->pm_parent = clnt->cl_pmap; clnt->cl_port = xprt->addr.sin_port; clnt->cl_prog = program->number; clnt->cl_vers = version->number; @@ -207,6 +209,9 @@ rpc_clone_client(struct rpc_clnt *clnt) rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval); if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); + new->cl_pmap = &new->cl_pmap_default; + new->cl_pmap->pm_parent = clnt->cl_pmap->pm_parent; + rpc_init_wait_queue(&new->cl_pmap_default.pm_bindwait, "bindwait"); return new; out_no_clnt: printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__); @@ -269,7 +274,7 @@ rpc_destroy_client(struct rpc_clnt *clnt if (clnt->cl_pathname[0]) rpc_rmdir(clnt->cl_pathname); if (clnt->cl_xprt) { - xprt_destroy(clnt->cl_xprt); + rpc_put_xprt(clnt->cl_xprt); clnt->cl_xprt = NULL; } if (clnt->cl_server != clnt->cl_inline_name) @@ -296,6 +301,25 @@ rpc_release_client(struct rpc_clnt *clnt } /* + * Change the program of a (usually cloned) client + */ +void +rpc_change_program(struct rpc_clnt *clnt, struct rpc_program *program, + int vers) +{ + struct rpc_version *version; + + BUG_ON(vers >= program->nrvers || !program->version[vers]); + version = program->version[vers]; + clnt->cl_procinfo = version->procs; + clnt->cl_maxproc = version->nrprocs; + clnt->cl_protname = program->name; + clnt->cl_prog = program->number; + clnt->cl_vers = version->number; + clnt->cl_stats = program->stats; +} + +/* * Default callback for async RPC calls */ static void @@ -376,6 +400,41 @@ out: return status; } +/** + * rpc_client_get_xprt() - Get reference to the RPC transport struct + * @clnt - pointer to RPC client + */ +struct rpc_xprt *rpc_client_get_xprt(struct rpc_clnt *clnt) +{ + struct rpc_xprt *xprt; + + /* Synchronize w.r.t. rpc_client_set_xprt() */ + rcu_read_lock(); + xprt = clnt->cl_xprt; + atomic_inc(&xprt->count); + rcu_read_unlock(); + return xprt; +} + +/** + * rpc_client_set_xprt() - Change the transport struct pointer on an in-use RPC client + * @clnt - pointer to RPC client + * @xprt - new transport + * + * This function should be called VERY infrequently, and is designed + * to be called only in case of a failover mount. + */ +void rpc_client_set_xprt(struct rpc_clnt *clnt, struct rpc_xprt *xprt) +{ + struct rpc_xprt *old; + + old = xchg(&clnt->cl_xprt, xprt); + /* Wait for all reads of clnt->cl_xprt == old to complete */ + synchronize_kernel(); + rpc_put_xprt(old); +} +EXPORT_SYMBOL(rpc_client_set_xprt); + /* * New rpc_call implementation */ @@ -406,12 +465,11 @@ rpc_call_async(struct rpc_clnt *clnt, st rpc_call_setup(task, msg, 0); /* Set up the call info struct and execute the task */ - if (task->tk_status == 0) - status = rpc_execute(task); - else { - status = task->tk_status; + status = task->tk_status; + if (status == 0) + rpc_execute(task); + else rpc_release_task(task); - } out: rpc_clnt_sigunmask(clnt, &oldset); @@ -426,9 +484,9 @@ rpc_call_setup(struct rpc_task *task, st task->tk_msg = *msg; task->tk_flags |= flags; /* Bind the user cred */ - if (task->tk_msg.rpc_cred != NULL) { + if (task->tk_msg.rpc_cred != NULL) rpcauth_holdcred(task); - } else + else rpcauth_bindcred(task); if (task->tk_status == 0) @@ -440,7 +498,7 @@ rpc_call_setup(struct rpc_task *task, st void rpc_setbufsize(struct rpc_clnt *clnt, unsigned int sndsize, unsigned int rcvsize) { - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt = rpc_client_get_xprt(clnt); xprt->sndsize = 0; if (sndsize) @@ -450,7 +508,22 @@ rpc_setbufsize(struct rpc_clnt *clnt, un xprt->rcvsize = rcvsize + RPC_SLACK_SPACE; if (xprt_connected(xprt)) xprt_sock_setbufsize(xprt); + rpc_put_xprt(xprt); +} + +/* + * Return size of largest payload RPC client can support, in bytes + * + * For stream transports, this is one RPC record fragment (see RFC + * 1831), as we don't support multi-record requests yet. For datagram + * transports, this is the size of an IP packet minus the IP, UDP, and + * RPC header sizes. + */ +size_t rpc_max_payload(struct rpc_clnt *clnt) +{ + return clnt->cl_xprt->max_payload; } +EXPORT_SYMBOL(rpc_max_payload); /* * Restart an (async) RPC call. Usually called from within the @@ -644,7 +717,7 @@ static void call_bind(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt = task->tk_xprt; dprintk("RPC: %4d call_bind xprt %p %s connected\n", task->tk_pid, xprt, (xprt_connected(xprt) ? "is" : "is not")); @@ -664,12 +737,10 @@ call_bind(struct rpc_task *task) static void call_connect(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; - dprintk("RPC: %4d call_connect status %d\n", task->tk_pid, task->tk_status); - if (xprt_connected(clnt->cl_xprt)) { + if (xprt_connected(task->tk_xprt)) { task->tk_action = call_transmit; return; } @@ -871,21 +942,6 @@ call_decode(struct rpc_task *task) goto out_retry; } - /* - * The following is an NFS-specific hack to cater for setuid - * processes whose uid is mapped to nobody on the server. - */ - if (task->tk_client->cl_droppriv && - (ntohl(*p) == NFSERR_ACCES || ntohl(*p) == NFSERR_PERM)) { - if (RPC_IS_SETUID(task) && task->tk_suid_retry) { - dprintk("RPC: %4d retry squashed uid\n", task->tk_pid); - task->tk_flags ^= RPC_CALL_REALUID; - task->tk_action = call_bind; - task->tk_suid_retry--; - goto out_retry; - } - } - task->tk_action = NULL; if (decode) @@ -945,7 +1001,7 @@ static u32 * call_header(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct rpc_xprt *xprt = clnt->cl_xprt; + struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; u32 *p = req->rq_svec[0].iov_base; @@ -987,10 +1043,11 @@ call_verify(struct rpc_task *task) case RPC_AUTH_ERROR: break; case RPC_MISMATCH: - printk(KERN_WARNING "%s: RPC call version mismatch!\n", __FUNCTION__); - goto out_eio; + dprintk("%s: RPC call version mismatch!\n", __FUNCTION__); + error = -ENOSYS; + goto out_err; default: - printk(KERN_WARNING "%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n); + dprintk("%s: RPC call rejected, unknown error: %x\n", __FUNCTION__, n); goto out_eio; } if (--len < 0) @@ -1041,23 +1098,26 @@ call_verify(struct rpc_task *task) case RPC_SUCCESS: return p; case RPC_PROG_UNAVAIL: - printk(KERN_WARNING "RPC: call_verify: program %u is unsupported by server %s\n", + dprintk("RPC: call_verify: program %u is unsupported by server %s\n", (unsigned int)task->tk_client->cl_prog, task->tk_client->cl_server); - goto out_eio; + error = -ENOSYS; + goto out_err; case RPC_PROG_MISMATCH: - printk(KERN_WARNING "RPC: call_verify: program %u, version %u unsupported by server %s\n", + dprintk("RPC: call_verify: program %u, version %u unsupported by server %s\n", (unsigned int)task->tk_client->cl_prog, (unsigned int)task->tk_client->cl_vers, task->tk_client->cl_server); - goto out_eio; + error = -ENOSYS; + goto out_err; case RPC_PROC_UNAVAIL: - printk(KERN_WARNING "RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n", + dprintk("RPC: call_verify: proc %p unsupported by program %u, version %u on server %s\n", task->tk_msg.rpc_proc, task->tk_client->cl_prog, task->tk_client->cl_vers, task->tk_client->cl_server); - goto out_eio; + error = -EOPNOTSUPP; + goto out_err; case RPC_GARBAGE_ARGS: dprintk("RPC: %4d %s: server saw garbage\n", task->tk_pid, __FUNCTION__); break; /* retry */ @@ -1070,7 +1130,7 @@ out_retry: task->tk_client->cl_stats->rpcgarbage++; if (task->tk_garb_retry) { task->tk_garb_retry--; - dprintk(KERN_WARNING "RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid); + dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid); task->tk_action = call_bind; return NULL; } Index: linux-2.6.11/net/sunrpc/pmap_clnt.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/pmap_clnt.c +++ linux-2.6.11/net/sunrpc/pmap_clnt.c @@ -3,9 +3,6 @@ * * Portmapper client. * - * FIXME: In a secure environment, we may want to use an authentication - * flavor other than AUTH_NULL. - * * Copyright (C) 1996, Olaf Kirch */ @@ -41,8 +38,8 @@ static DEFINE_SPINLOCK(pmap_lock); void rpc_getport(struct rpc_task *task, struct rpc_clnt *clnt) { - struct rpc_portmap *map = clnt->cl_pmap; - struct sockaddr_in *sap = &clnt->cl_xprt->addr; + struct rpc_portmap *map = clnt->cl_pmap->pm_parent; + struct sockaddr_in *sap = &task->tk_xprt->addr; struct rpc_message msg = { .rpc_proc = &pmap_procedures[PMAP_GETPORT], .rpc_argp = map, @@ -132,7 +129,7 @@ static void pmap_getport_done(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; - struct rpc_portmap *map = clnt->cl_pmap; + struct rpc_portmap *map = clnt->cl_pmap->pm_parent; dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n", task->tk_pid, task->tk_status, clnt->cl_port); @@ -212,7 +209,7 @@ pmap_create(char *hostname, struct socka /* printk("pmap: create clnt\n"); */ clnt = rpc_create_client(xprt, hostname, &pmap_program, RPC_PMAP_VERSION, - RPC_AUTH_NULL); + RPC_AUTH_UNIX); if (IS_ERR(clnt)) { xprt_destroy(xprt); } else { Index: linux-2.6.11/net/sunrpc/rpc_pipe.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/rpc_pipe.c +++ linux-2.6.11/net/sunrpc/rpc_pipe.c @@ -291,14 +291,16 @@ static int rpc_show_info(struct seq_file *m, void *v) { struct rpc_clnt *clnt = m->private; + struct rpc_xprt *xprt = rpc_client_get_xprt(clnt); seq_printf(m, "RPC server: %s\n", clnt->cl_server); seq_printf(m, "service: %s (%d) version %d\n", clnt->cl_protname, clnt->cl_prog, clnt->cl_vers); seq_printf(m, "address: %u.%u.%u.%u\n", - NIPQUAD(clnt->cl_xprt->addr.sin_addr.s_addr)); + NIPQUAD(xprt->addr.sin_addr.s_addr)); seq_printf(m, "protocol: %s\n", - clnt->cl_xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + xprt->prot == IPPROTO_UDP ? "udp" : "tcp"); + rpc_put_xprt(xprt); return 0; } Index: linux-2.6.11/net/sunrpc/sched.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/sched.c +++ linux-2.6.11/net/sunrpc/sched.c @@ -132,9 +132,11 @@ __rpc_add_timer(struct rpc_task *task, r * Delete any timer for the current task. Because we use del_timer_sync(), * this function should never be called while holding queue->lock. */ -static inline void +static void rpc_delete_timer(struct rpc_task *task) { + if (RPC_IS_QUEUED(task)) + return; if (test_and_clear_bit(RPC_TASK_HAS_TIMER, &task->tk_runstate)) { del_singleshot_timer_sync(&task->tk_timer); dprintk("RPC: %4d deleting timer\n", task->tk_pid); @@ -747,13 +749,10 @@ void rpc_init_task(struct rpc_task *task task->tk_client = clnt; task->tk_flags = flags; task->tk_exit = callback; - if (current->uid != current->fsuid || current->gid != current->fsgid) - task->tk_flags |= RPC_TASK_SETUID; /* Initialize retry counters */ task->tk_garb_retry = 2; task->tk_cred_retry = 2; - task->tk_suid_retry = 1; task->tk_priority = RPC_PRIORITY_NORMAL; task->tk_cookie = (unsigned long)current; @@ -769,6 +768,7 @@ void rpc_init_task(struct rpc_task *task task->tk_flags |= RPC_TASK_SOFT; if (!clnt->cl_intr) task->tk_flags |= RPC_TASK_NOINTR; + task->tk_xprt = rpc_client_get_xprt(clnt); } #ifdef RPC_DEBUG @@ -858,6 +858,7 @@ void rpc_release_task(struct rpc_task *t rpcauth_unbindcred(task); rpc_free(task); if (task->tk_client) { + rpc_put_xprt(task->tk_xprt); rpc_release_client(task->tk_client); task->tk_client = NULL; } Index: linux-2.6.11/net/sunrpc/sunrpc_syms.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/sunrpc_syms.c +++ linux-2.6.11/net/sunrpc/sunrpc_syms.c @@ -42,6 +42,7 @@ EXPORT_SYMBOL(rpc_release_task); /* RPC client functions */ EXPORT_SYMBOL(rpc_create_client); EXPORT_SYMBOL(rpc_clone_client); +EXPORT_SYMBOL(rpc_change_program); EXPORT_SYMBOL(rpc_destroy_client); EXPORT_SYMBOL(rpc_shutdown_client); EXPORT_SYMBOL(rpc_release_client); @@ -128,6 +129,10 @@ EXPORT_SYMBOL(xdr_encode_netobj); EXPORT_SYMBOL(xdr_encode_pages); EXPORT_SYMBOL(xdr_inline_pages); EXPORT_SYMBOL(xdr_shift_buf); +EXPORT_SYMBOL(xdr_encode_word); +EXPORT_SYMBOL(xdr_decode_word); +EXPORT_SYMBOL(xdr_encode_array2); +EXPORT_SYMBOL(xdr_decode_array2); EXPORT_SYMBOL(xdr_buf_from_iov); EXPORT_SYMBOL(xdr_buf_subsegment); EXPORT_SYMBOL(xdr_buf_read_netobj); Index: linux-2.6.11/net/sunrpc/xdr.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/xdr.c +++ linux-2.6.11/net/sunrpc/xdr.c @@ -176,7 +176,7 @@ xdr_inline_pages(struct xdr_buf *xdr, un xdr->buflen += len; } -void +int xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor) @@ -190,7 +190,7 @@ xdr_partial_copy_from_skb(struct xdr_buf len -= base; ret = copy_actor(desc, (char *)xdr->head[0].iov_base + base, len); if (ret != len || !desc->count) - return; + return 0; base = 0; } else base -= len; @@ -210,6 +210,14 @@ xdr_partial_copy_from_skb(struct xdr_buf do { char *kaddr; + /* ACL likes to be lazy in allocating pages - ACLs + * are small by default but can get huge. */ + if (unlikely(*ppage == NULL)) { + *ppage = alloc_page(GFP_ATOMIC); + if (unlikely(*ppage == NULL)) + return -ENOMEM; + } + len = PAGE_CACHE_SIZE; kaddr = kmap_atomic(*ppage, KM_SKB_SUNRPC_DATA); if (base) { @@ -226,13 +234,15 @@ xdr_partial_copy_from_skb(struct xdr_buf flush_dcache_page(*ppage); kunmap_atomic(kaddr, KM_SKB_SUNRPC_DATA); if (ret != len || !desc->count) - return; + return 0; ppage++; } while ((pglen -= len) != 0); copy_tail: len = xdr->tail[0].iov_len; if (base < len) copy_actor(desc, (char *)xdr->tail[0].iov_base + base, len - base); + + return 0; } @@ -736,8 +746,7 @@ EXPORT_SYMBOL(xdr_inline_decode); * * Moves data beyond the current pointer position from the XDR head[] buffer * into the page list. Any data that lies beyond current position + "len" - * bytes is moved into the XDR tail[]. The current pointer is then - * repositioned at the beginning of the XDR tail. + * bytes is moved into the XDR tail[]. */ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) { @@ -774,6 +783,31 @@ void xdr_read_pages(struct xdr_stream *x } EXPORT_SYMBOL(xdr_read_pages); +/** + * xdr_enter_page - decode data from the XDR page + * @xdr: pointer to xdr_stream struct + * @len: number of bytes of page data + * + * Moves data beyond the current pointer position from the XDR head[] buffer + * into the page list. Any data that lies beyond current position + "len" + * bytes is moved into the XDR tail[]. The current pointer is then + * repositioned at the beginning of the first XDR page. + */ +void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) +{ + char * kaddr = page_address(xdr->buf->pages[0]); + xdr_read_pages(xdr, len); + /* + * Position current pointer at beginning of tail, and + * set remaining message length. + */ + if (len > PAGE_CACHE_SIZE - xdr->buf->page_base) + len = PAGE_CACHE_SIZE - xdr->buf->page_base; + xdr->p = (uint32_t *)(kaddr + xdr->buf->page_base); + xdr->end = (uint32_t *)((char *)xdr->p + len); +} +EXPORT_SYMBOL(xdr_enter_page); + static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0}; void @@ -859,8 +893,34 @@ out: return status; } -static int -read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) +/* obj is assumed to point to allocated memory of size at least len: */ +int +write_bytes_to_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) +{ + struct xdr_buf subbuf; + int this_len; + int status; + + status = xdr_buf_subsegment(buf, &subbuf, base, len); + if (status) + goto out; + this_len = min(len, (int)subbuf.head[0].iov_len); + memcpy(subbuf.head[0].iov_base, obj, this_len); + len -= this_len; + obj += this_len; + this_len = min(len, (int)subbuf.page_len); + if (this_len) + _copy_to_pages(subbuf.pages, subbuf.page_base, obj, this_len); + len -= this_len; + obj += this_len; + this_len = min(len, (int)subbuf.tail[0].iov_len); + memcpy(subbuf.tail[0].iov_base, obj, this_len); +out: + return status; +} + +int +xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj) { u32 raw; int status; @@ -872,6 +932,14 @@ read_u32_from_xdr_buf(struct xdr_buf *bu return 0; } +int +xdr_encode_word(struct xdr_buf *buf, int base, u32 obj) +{ + u32 raw = htonl(obj); + + return write_bytes_to_xdr_buf(buf, base, &raw, sizeof(obj)); +} + /* If the netobj starting offset bytes from the start of xdr_buf is contained * entirely in the head or the tail, set object to point to it; otherwise * try to find space for it at the end of the tail, copy it there, and @@ -882,7 +950,7 @@ xdr_buf_read_netobj(struct xdr_buf *buf, u32 tail_offset = buf->head[0].iov_len + buf->page_len; u32 obj_end_offset; - if (read_u32_from_xdr_buf(buf, offset, &obj->len)) + if (xdr_decode_word(buf, offset, &obj->len)) goto out; obj_end_offset = offset + 4 + obj->len; @@ -915,3 +983,219 @@ xdr_buf_read_netobj(struct xdr_buf *buf, out: return -1; } + +/* Returns 0 on success, or else a negative error code. */ +static int +xdr_xcode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc, int encode) +{ + char *elem = NULL, *c; + unsigned int copied = 0, todo, avail_here; + struct page **ppages = NULL; + int err; + + if (encode) { + if (xdr_encode_word(buf, base, desc->array_len) != 0) + return -EINVAL; + } else { + if (xdr_decode_word(buf, base, &desc->array_len) != 0 || + (unsigned long) base + 4 + desc->array_len * + desc->elem_size > buf->len) + return -EINVAL; + } + base += 4; + + if (!desc->xcode) + return 0; + + todo = desc->array_len * desc->elem_size; + + /* process head */ + if (todo && base < buf->head->iov_len) { + c = buf->head->iov_base + base; + avail_here = min_t(unsigned int, todo, + buf->head->iov_len - base); + todo -= avail_here; + + while (avail_here >= desc->elem_size) { + err = desc->xcode(desc, c); + if (err) + goto out; + c += desc->elem_size; + avail_here -= desc->elem_size; + } + if (avail_here) { + if (!elem) { + elem = kmalloc(desc->elem_size, GFP_KERNEL); + err = -ENOMEM; + if (!elem) + goto out; + } + if (encode) { + err = desc->xcode(desc, elem); + if (err) + goto out; + memcpy(c, elem, avail_here); + } else + memcpy(elem, c, avail_here); + copied = avail_here; + } + base = buf->head->iov_len; /* align to start of pages */ + } + + /* process pages array */ + base -= buf->head->iov_len; + if (todo && base < buf->page_len) { + unsigned int avail_page; + + avail_here = min(todo, buf->page_len - base); + todo -= avail_here; + + base += buf->page_base; + ppages = buf->pages + (base >> PAGE_CACHE_SHIFT); + base &= ~PAGE_CACHE_MASK; + avail_page = min_t(unsigned int, PAGE_CACHE_SIZE - base, + avail_here); + c = kmap(*ppages) + base; + + while (avail_here) { + avail_here -= avail_page; + if (copied || avail_page < desc->elem_size) { + unsigned int l = min(avail_page, + desc->elem_size - copied); + if (!elem) { + elem = kmalloc(desc->elem_size, + GFP_KERNEL); + err = -ENOMEM; + if (!elem) + goto out; + } + if (encode) { + if (!copied) { + err = desc->xcode(desc, elem); + if (err) + goto out; + } + memcpy(c, elem + copied, l); + copied += l; + if (copied == desc->elem_size) + copied = 0; + } else { + memcpy(elem + copied, c, l); + copied += l; + if (copied == desc->elem_size) { + err = desc->xcode(desc, elem); + if (err) + goto out; + copied = 0; + } + } + avail_page -= l; + c += l; + } + while (avail_page >= desc->elem_size) { + err = desc->xcode(desc, c); + if (err) + goto out; + c += desc->elem_size; + avail_page -= desc->elem_size; + } + if (avail_page) { + unsigned int l = min(avail_page, + desc->elem_size - copied); + if (!elem) { + elem = kmalloc(desc->elem_size, + GFP_KERNEL); + err = -ENOMEM; + if (!elem) + goto out; + } + if (encode) { + if (!copied) { + err = desc->xcode(desc, elem); + if (err) + goto out; + } + memcpy(c, elem + copied, l); + copied += l; + if (copied == desc->elem_size) + copied = 0; + } else { + memcpy(elem + copied, c, l); + copied += l; + if (copied == desc->elem_size) { + err = desc->xcode(desc, elem); + if (err) + goto out; + copied = 0; + } + } + } + if (avail_here) { + kunmap(*ppages); + ppages++; + c = kmap(*ppages); + } + + avail_page = min(avail_here, + (unsigned int) PAGE_CACHE_SIZE); + } + base = buf->page_len; /* align to start of tail */ + } + + /* process tail */ + base -= buf->page_len; + if (todo) { + c = buf->tail->iov_base + base; + if (copied) { + unsigned int l = desc->elem_size - copied; + + if (encode) + memcpy(c, elem + copied, l); + else { + memcpy(elem + copied, c, l); + err = desc->xcode(desc, elem); + if (err) + goto out; + } + todo -= l; + c += l; + } + while (todo) { + err = desc->xcode(desc, c); + if (err) + goto out; + c += desc->elem_size; + todo -= desc->elem_size; + } + } + err = 0; + +out: + if (elem) + kfree(elem); + if (ppages) + kunmap(*ppages); + return err; +} + +int +xdr_decode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc) +{ + if (base >= buf->len) + return -EINVAL; + + return xdr_xcode_array2(buf, base, desc, 0); +} + +int +xdr_encode_array2(struct xdr_buf *buf, unsigned int base, + struct xdr_array2_desc *desc) +{ + if ((unsigned long) base + 4 + desc->array_len * desc->elem_size > + buf->head->iov_len + buf->page_len + buf->tail->iov_len) + return -EINVAL; + + return xdr_xcode_array2(buf, base, desc, 1); +} Index: linux-2.6.11/net/sunrpc/xprt.c =================================================================== --- linux-2.6.11.orig/net/sunrpc/xprt.c +++ linux-2.6.11/net/sunrpc/xprt.c @@ -725,7 +725,8 @@ csum_partial_copy_to_xdr(struct xdr_buf goto no_checksum; desc.csum = csum_partial(skb->data, desc.offset, skb->csum); - xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits); + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0) + return -1; if (desc.offset != skb->len) { unsigned int csum2; csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0); @@ -737,7 +738,8 @@ csum_partial_copy_to_xdr(struct xdr_buf return -1; return 0; no_checksum: - xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits); + if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0) + return -1; if (desc.count) return -1; return 0; @@ -907,6 +909,7 @@ tcp_read_request(struct rpc_xprt *xprt, struct rpc_rqst *req; struct xdr_buf *rcvbuf; size_t len; + int r; /* Find and lock the request corresponding to this xid */ spin_lock(&xprt->sock_lock); @@ -927,16 +930,30 @@ tcp_read_request(struct rpc_xprt *xprt, len = xprt->tcp_reclen - xprt->tcp_offset; memcpy(&my_desc, desc, sizeof(my_desc)); my_desc.count = len; - xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, &my_desc, tcp_copy_data); desc->count -= len; desc->offset += len; } else - xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, + r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied, desc, tcp_copy_data); xprt->tcp_copied += len; xprt->tcp_offset += len; + if (r < 0) { + /* Error when copying to the receive buffer, + * usually because we weren't able to allocate + * additional buffer pages. All we can do now + * is turn off XPRT_COPY_DATA, so the request + * will not receive any additional updates, + * and time out. + * Any remaining data from this record will + * be discarded. + */ + xprt->tcp_flags &= ~XPRT_COPY_DATA; + goto out; + } + if (xprt->tcp_copied == req->rq_private_buf.buflen) xprt->tcp_flags &= ~XPRT_COPY_DATA; else if (xprt->tcp_offset == xprt->tcp_reclen) { @@ -949,6 +966,7 @@ tcp_read_request(struct rpc_xprt *xprt, req->rq_task->tk_pid); xprt_complete_rqst(xprt, req, xprt->tcp_copied); } +out: spin_unlock(&xprt->sock_lock); tcp_check_recm(xprt); } @@ -1445,6 +1463,7 @@ xprt_setup(int proto, struct sockaddr_in if ((xprt = kmalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) return ERR_PTR(-ENOMEM); memset(xprt, 0, sizeof(*xprt)); /* Nnnngh! */ + atomic_set(&xprt->count, 1); xprt->max_reqs = entries; slot_table_size = entries * sizeof(xprt->slot[0]); xprt->slot = kmalloc(slot_table_size, GFP_KERNEL); @@ -1460,8 +1479,11 @@ xprt_setup(int proto, struct sockaddr_in if (xprt->stream) { xprt->cwnd = RPC_MAXCWND(xprt); xprt->nocong = 1; - } else + xprt->max_payload = (1U << 31) - 1; + } else { xprt->cwnd = RPC_INITCWND; + xprt->max_payload = (1U << 16) - (MAX_HEADER << 3); + } spin_lock_init(&xprt->sock_lock); spin_lock_init(&xprt->xprt_lock); init_waitqueue_head(&xprt->cong_wait); @@ -1672,3 +1694,14 @@ xprt_destroy(struct rpc_xprt *xprt) return 0; } + +/** + * rpc_put_xprt() - Drop reference to the RPC transport struct + * @xprt - pointer to RPC transport + */ +void rpc_put_xprt(struct rpc_xprt *xprt) +{ + if (xprt != NULL && atomic_dec_and_test(&xprt->count)) + xprt_destroy(xprt); +} +