fs/locks.c | 51 ---- fs/nfs/Makefile | 3 fs/nfs/callback.c | 325 +++++++++++++++++++++++++++ fs/nfs/callback.h | 70 +++++ fs/nfs/callback_proc.c | 80 ++++++ fs/nfs/callback_xdr.c | 481 ++++++++++++++++++++++++++++++++++++++++ fs/nfs/delegation.c | 190 ++++++++++++++++ fs/nfs/delegation.h | 40 +++ fs/nfs/dir.c | 74 +++--- fs/nfs/direct.c | 47 ++- fs/nfs/file.c | 149 ++++++++---- fs/nfs/inode.c | 185 ++++++++++++--- fs/nfs/nfs3proc.c | 50 ---- fs/nfs/nfs4proc.c | 478 +++++++++++++++++++++++++--------------- fs/nfs/nfs4state.c | 108 ++++++--- fs/nfs/nfs4xdr.c | 398 +++++++++++++++++++++++---------- fs/nfs/pagelist.c | 47 ++- fs/nfs/proc.c | 46 --- fs/nfs/read.c | 62 +++-- fs/nfs/unlink.c | 3 fs/nfs/write.c | 111 ++++----- fs/open.c | 2 include/linux/fs.h | 2 include/linux/nfs4.h | 3 include/linux/nfs_fs.h | 104 +++++--- include/linux/nfs_page.h | 29 -- include/linux/nfs_xdr.h | 57 ++-- include/linux/sunrpc/sched.h | 55 ++-- include/linux/sunrpc/svc.h | 10 net/sunrpc/clnt.c | 4 net/sunrpc/sched.c | 510 ++++++++++++------------------------------- net/sunrpc/sunrpc_syms.c | 1 net/sunrpc/svc.c | 9 net/sunrpc/xprt.c | 2 34 files changed, 2614 insertions(+), 1172 deletions(-) diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/locks.c linux-2.6.7-17-delegation_cache/fs/locks.c --- linux-2.6.7-rc3/fs/locks.c 2004-06-10 00:23:51.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/locks.c 2004-06-10 01:04:21.000000000 -0400 @@ -317,7 +317,7 @@ static int flock_to_posix_lock(struct fi if (l->l_len == 0) fl->fl_end = OFFSET_MAX; - fl->fl_owner = current->files; + fl->fl_owner = 0; fl->fl_pid = current->tgid; fl->fl_file = filp; fl->fl_flags = FL_POSIX; @@ -357,7 +357,7 @@ static int flock64_to_posix_lock(struct if (l->l_len == 0) fl->fl_end = OFFSET_MAX; - fl->fl_owner = current->files; + fl->fl_owner = 0; fl->fl_pid = current->tgid; fl->fl_file = filp; fl->fl_flags = FL_POSIX; @@ -920,7 +920,7 @@ int posix_lock_file(struct file *filp, s */ int locks_mandatory_locked(struct inode *inode) { - fl_owner_t owner = current->files; + unsigned int pid = current->tgid; struct file_lock *fl; /* @@ -930,7 +930,9 @@ int locks_mandatory_locked(struct inode for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!IS_POSIX(fl)) continue; - if (fl->fl_owner != owner) + if (fl->fl_owner != 0) + break; + if (fl->fl_pid != pid) break; } unlock_kernel(); @@ -958,7 +960,7 @@ int locks_mandatory_area(int read_write, int error; locks_init_lock(&fl); - fl.fl_owner = current->files; + fl.fl_owner = 0; fl.fl_pid = current->tgid; fl.fl_file = filp; fl.fl_flags = FL_POSIX | FL_ACCESS; @@ -1684,7 +1686,7 @@ void locks_remove_posix(struct file *fil lock_kernel(); while (*before != NULL) { struct file_lock *fl = *before; - if (IS_POSIX(fl) && (fl->fl_owner == owner)) { + if (IS_POSIX(fl) && posix_same_owner(fl, &lock)) { locks_delete_lock(before); continue; } @@ -1982,18 +1984,6 @@ int lock_may_write(struct inode *inode, EXPORT_SYMBOL(lock_may_write); -static inline void __steal_locks(struct file *file, fl_owner_t from) -{ - struct inode *inode = file->f_dentry->d_inode; - struct file_lock *fl = inode->i_flock; - - while (fl) { - if (fl->fl_file == file && fl->fl_owner == from) - fl->fl_owner = current->files; - fl = fl->fl_next; - } -} - /* When getting ready for executing a binary, we make sure that current * has a files_struct on its own. Before dropping the old files_struct, * we take over ownership of all locks for all file descriptors we own. @@ -2002,31 +1992,6 @@ static inline void __steal_locks(struct */ void steal_locks(fl_owner_t from) { - struct files_struct *files = current->files; - int i, j; - - if (from == files) - return; - - lock_kernel(); - j = 0; - for (;;) { - unsigned long set; - i = j * __NFDBITS; - if (i >= files->max_fdset || i >= files->max_fds) - break; - set = files->open_fds->fds_bits[j++]; - while (set) { - if (set & 1) { - struct file *file = files->fd[i]; - if (file) - __steal_locks(file, from); - } - i++; - set >>= 1; - } - } - unlock_kernel(); } EXPORT_SYMBOL(steal_locks); diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/callback.c linux-2.6.7-17-delegation_cache/fs/nfs/callback.c --- linux-2.6.7-rc3/fs/nfs/callback.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.7-17-delegation_cache/fs/nfs/callback.c 2004-06-10 01:04:59.000000000 -0400 @@ -0,0 +1,325 @@ +/* + * linux/fs/nfs/callback.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback handling + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "callback.h" + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +struct nfs_callback_data { + unsigned int users; + struct svc_serv *serv; + pid_t pid; + struct completion started; + struct completion stopped; +}; + +static struct nfs_callback_data nfs_callback_info; +static DECLARE_MUTEX(nfs_callback_sema); +static struct svc_program nfs4_callback_program; + +unsigned short nfs_callback_tcpport; + +/* + * This is the callback kernel thread. + */ +static void nfs_callback_svc(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + int err; + + __module_get(THIS_MODULE); + lock_kernel(); + + nfs_callback_info.pid = current->pid; + daemonize("nfsv4-svc"); + /* Process request with signals blocked, but allow SIGKILL. */ + allow_signal(SIGKILL); + + complete(&nfs_callback_info.started); + + while (nfs_callback_info.users != 0 || !signalled()) { + /* + * Listen for a request on the socket + */ + err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT); + if (err == -EAGAIN || err == -EINTR) + continue; + if (err < 0) { + printk(KERN_WARNING + "%s: terminating on error %d\n", + __FUNCTION__, -err); + break; + } + dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__, + NIPQUAD(rqstp->rq_addr.sin_addr.s_addr)); + svc_process(serv, rqstp); + } + + nfs_callback_info.pid = 0; + complete(&nfs_callback_info.stopped); + unlock_kernel(); + module_put_and_exit(0); +} + +/* + * Bring up the server process if it is not already up. + */ +int nfs_callback_up(void) +{ + struct svc_serv *serv; + struct svc_sock *svsk; + int ret = 0; + + lock_kernel(); + down(&nfs_callback_sema); + if (nfs_callback_info.users++ || nfs_callback_info.pid != 0) + goto out; + init_completion(&nfs_callback_info.started); + init_completion(&nfs_callback_info.stopped); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE); + ret = -ENOMEM; + if (!serv) + goto out_err; + /* FIXME: We don't want to register this socket with the portmapper */ + ret = svc_makesock(serv, IPPROTO_TCP, 0); + if (ret < 0) + goto out_destroy; + if (!list_empty(&serv->sv_permsocks)) { + svsk = list_entry(serv->sv_permsocks.next, + struct svc_sock, sk_list); + nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport); + dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport); + } else + BUG(); + ret = svc_create_thread(nfs_callback_svc, serv); + if (ret < 0) + goto out_destroy; + nfs_callback_info.serv = serv; + wait_for_completion(&nfs_callback_info.started); +out: + up(&nfs_callback_sema); + unlock_kernel(); + return ret; +out_destroy: + svc_destroy(serv); +out_err: + nfs_callback_info.users--; + goto out; +} + +/* + * Kill the server process if it is not already up. + */ +int nfs_callback_down(void) +{ + int ret = 0; + + lock_kernel(); + down(&nfs_callback_sema); + if (--nfs_callback_info.users || nfs_callback_info.pid == 0) + goto out; + kill_proc(nfs_callback_info.pid, SIGKILL, 1); + wait_for_completion(&nfs_callback_info.stopped); +out: + up(&nfs_callback_sema); + unlock_kernel(); + return ret; +} + +/* + * AUTH_NULL authentication + */ +static int nfs_callback_null_accept(struct svc_rqst *rqstp, u32 *authp) +{ + struct iovec *argv = &rqstp->rq_arg.head[0]; + struct iovec *resv = &rqstp->rq_res.head[0]; + + if (argv->iov_len < 3*4) + return SVC_GARBAGE; + + if (svc_getu32(argv) != 0) { + dprintk("svc: bad null cred\n"); + *authp = rpc_autherr_badcred; + return SVC_DENIED; + } + if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + dprintk("svc: bad null verf\n"); + *authp = rpc_autherr_badverf; + return SVC_DENIED; + } + + /* Signal that mapping to nobody uid/gid is required */ + rqstp->rq_cred.cr_uid = (uid_t) -1; + rqstp->rq_cred.cr_gid = (gid_t) -1; + rqstp->rq_cred.cr_group_info = groups_alloc(0); + if (rqstp->rq_cred.cr_group_info == NULL) + return SVC_DROP; /* kmalloc failure - client must retry */ + + /* Put NULL verifier */ + svc_putu32(resv, RPC_AUTH_NULL); + svc_putu32(resv, 0); + dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK); + return SVC_OK; +} + +static int nfs_callback_null_release(struct svc_rqst *rqstp) +{ + if (rqstp->rq_cred.cr_group_info) + put_group_info(rqstp->rq_cred.cr_group_info); + rqstp->rq_cred.cr_group_info = NULL; + return 0; /* don't drop */ +} + +static struct auth_ops nfs_callback_auth_null = { + .name = "null", + .flavour = RPC_AUTH_NULL, + .accept = nfs_callback_null_accept, + .release = nfs_callback_null_release, +}; + +/* + * AUTH_SYS authentication + */ +static int nfs_callback_unix_accept(struct svc_rqst *rqstp, u32 *authp) +{ + struct iovec *argv = &rqstp->rq_arg.head[0]; + struct iovec *resv = &rqstp->rq_res.head[0]; + struct svc_cred *cred = &rqstp->rq_cred; + u32 slen, i; + int len = argv->iov_len; + + dprintk("%s: start\n", __FUNCTION__); + cred->cr_group_info = NULL; + rqstp->rq_client = NULL; + if ((len -= 3*4) < 0) + return SVC_GARBAGE; + + /* Get length, time stamp and machine name */ + svc_getu32(argv); + svc_getu32(argv); + slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); + if (slen > 64 || (len -= (slen + 3)*4) < 0) + goto badcred; + argv->iov_base = (void*)((u32*)argv->iov_base + slen); + argv->iov_len -= slen*4; + + cred->cr_uid = ntohl(svc_getu32(argv)); + cred->cr_gid = ntohl(svc_getu32(argv)); + slen = ntohl(svc_getu32(argv)); + if (slen > 16 || (len -= (slen + 2)*4) < 0) + goto badcred; + cred->cr_group_info = groups_alloc(slen); + if (cred->cr_group_info == NULL) + return SVC_DROP; + for (i = 0; i < slen; i++) + GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv)); + + if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + *authp = rpc_autherr_badverf; + return SVC_DENIED; + } + /* Put NULL verifier */ + svc_putu32(resv, RPC_AUTH_NULL); + svc_putu32(resv, 0); + dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK); + return SVC_OK; +badcred: + *authp = rpc_autherr_badcred; + return SVC_DENIED; +} + +static int nfs_callback_unix_release(struct svc_rqst *rqstp) +{ + if (rqstp->rq_cred.cr_group_info) + put_group_info(rqstp->rq_cred.cr_group_info); + rqstp->rq_cred.cr_group_info = NULL; + return 0; +} + +static struct auth_ops nfs_callback_auth_unix = { + .name = "unix", + .flavour = RPC_AUTH_UNIX, + .accept = nfs_callback_unix_accept, + .release = nfs_callback_unix_release, +}; + +/* + * Hook the authentication protocol + */ +static int nfs_callback_auth(struct svc_rqst *rqstp, u32 *authp) +{ + struct in_addr *addr = &rqstp->rq_addr.sin_addr; + struct nfs4_client *clp; + struct iovec *argv = &rqstp->rq_arg.head[0]; + int flavour; + int retval; + + /* Don't talk to strangers */ + clp = nfs4_find_client(addr); + if (clp == NULL) + return SVC_DROP; + dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr)); + nfs4_put_client(clp); + flavour = ntohl(svc_getu32(argv)); + switch(flavour) { + case RPC_AUTH_NULL: + if (rqstp->rq_proc != CB_NULL) { + *authp = rpc_autherr_tooweak; + retval = SVC_DENIED; + break; + } + rqstp->rq_authop = &nfs_callback_auth_null; + retval = nfs_callback_null_accept(rqstp, authp); + break; + case RPC_AUTH_UNIX: + /* Eat the authentication flavour */ + rqstp->rq_authop = &nfs_callback_auth_unix; + retval = nfs_callback_unix_accept(rqstp, authp); + break; + default: + /* FIXME: need to add RPCSEC_GSS upcalls */ +#if 0 + svc_ungetu32(argv); + retval = svc_authenticate(rqstp, authp); +#else + *authp = rpc_autherr_rejectedcred; + retval = SVC_DENIED; +#endif + } + dprintk("%s: flavour %d returning error %d\n", __FUNCTION__, flavour, retval); + return retval; +} + +/* + * Define NFS4 callback program + */ +extern struct svc_version nfs4_callback_version1; + +static struct svc_version *nfs4_callback_version[] = { + [1] = &nfs4_callback_version1, +}; + +static struct svc_stat nfs4_callback_stats; + +static struct svc_program nfs4_callback_program = { + .pg_prog = NFS4_CALLBACK, /* RPC service number */ + .pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */ + .pg_vers = nfs4_callback_version, /* version table */ + .pg_name = "NFSv4 callback", /* service name */ + .pg_class = "nfs", /* authentication class */ + .pg_stats = &nfs4_callback_stats, + .pg_authenticate = nfs_callback_auth, +}; diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/callback.h linux-2.6.7-17-delegation_cache/fs/nfs/callback.h --- linux-2.6.7-rc3/fs/nfs/callback.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.7-17-delegation_cache/fs/nfs/callback.h 2004-06-10 01:04:59.000000000 -0400 @@ -0,0 +1,70 @@ +/* + * linux/fs/nfs/callback.h + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback definitions + */ +#ifndef __LINUX_FS_NFS_CALLBACK_H +#define __LINUX_FS_NFS_CALLBACK_H + +#define NFS4_CALLBACK 0x40000000 +#define NFS4_CALLBACK_XDRSIZE 2048 +#define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE) + +enum nfs4_callback_procnum { + CB_NULL = 0, + CB_COMPOUND = 1, +}; + +enum nfs4_callback_opnum { + OP_CB_GETATTR = 3, + OP_CB_RECALL = 4, + OP_CB_ILLEGAL = 10044, +}; + +struct cb_compound_hdr_arg { + int taglen; + const char *tag; + unsigned int callback_ident; + unsigned nops; +}; + +struct cb_compound_hdr_res { + uint32_t *status; + int taglen; + const char *tag; + uint32_t *nops; +}; + +struct cb_getattrargs { + struct sockaddr_in *addr; + struct nfs_fh fh; + uint32_t bitmap[2]; +}; + +struct cb_getattrres { + uint32_t status; + uint32_t bitmap[2]; + uint64_t size; + uint64_t change_attr; + struct timespec ctime; + struct timespec mtime; +}; + +struct cb_recallargs { + struct sockaddr_in *addr; + struct nfs_fh fh; + nfs4_stateid stateid; + uint32_t truncate; +}; + +extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); +extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy); + +extern int nfs_callback_up(void); +extern int nfs_callback_down(void); + +extern unsigned short nfs_callback_tcpport; + +#endif /* __LINUX_FS_NFS_CALLBACK_H */ diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/callback_proc.c linux-2.6.7-17-delegation_cache/fs/nfs/callback_proc.c --- linux-2.6.7-rc3/fs/nfs/callback_proc.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.7-17-delegation_cache/fs/nfs/callback_proc.c 2004-06-10 01:04:59.000000000 -0400 @@ -0,0 +1,80 @@ +/* + * linux/fs/nfs/callback_proc.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback procedures + */ +#include +#include +#include +#include "callback.h" +#include "delegation.h" + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) +{ + struct nfs4_client *clp; + struct inode *inode; + + res->bitmap[0] = res->bitmap[1] = 0; + res->status = htonl(NFS4ERR_BADHANDLE); + clp = nfs4_find_client(&args->addr->sin_addr); + if (clp == NULL) + goto out; + inode = nfs_delegation_find_inode(clp, &args->fh); + if (inode == NULL) + goto out_putclient; + if ((NFS_I(inode)->delegation.type & FMODE_WRITE) == 0) + goto out_iput; + res->size = i_size_read(inode); + res->change_attr = NFS_CHANGE_ATTR(inode); + res->ctime = inode->i_ctime; + res->mtime = inode->i_mtime; + res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & + args->bitmap[0]; + res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) & + args->bitmap[1]; + res->status = 0; +out_iput: + iput(inode); +out_putclient: + nfs4_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status)); + return res->status; +} + +unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) +{ + struct nfs4_client *clp; + struct inode *inode; + struct nfs_delegation *delegation; + unsigned status; + + status = htonl(NFS4ERR_BADHANDLE); + clp = nfs4_find_client(&args->addr->sin_addr); + if (clp == NULL) + goto out; + inode = nfs_delegation_find_inode(clp, &args->fh); + if (inode == NULL) + goto out_putclient; + status = htonl(NFS4ERR_BAD_STATEID); + delegation = &NFS_I(inode)->delegation; + if (delegation->type == 0) + goto out_iput; + if (memcmp(delegation->stateid.data, args->stateid.data, sizeof(delegation->stateid.data)) != 0) + goto out_iput; + /* Set up a helper thread to actually return the delegation */ + status = 0; + if (nfs_async_inode_return_delegation(clp, inode) < 0) + status = htonl(NFS4ERR_RESOURCE); +out_iput: + iput(inode); +out_putclient: + nfs4_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(status)); + return status; +} diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/callback_xdr.c linux-2.6.7-17-delegation_cache/fs/nfs/callback_xdr.c --- linux-2.6.7-rc3/fs/nfs/callback_xdr.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.7-17-delegation_cache/fs/nfs/callback_xdr.c 2004-06-10 01:04:59.000000000 -0400 @@ -0,0 +1,481 @@ +/* + * linux/fs/nfs/callback_xdr.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback encode/decode procedures + */ +#include +#include +#include +#include +#include +#include "callback.h" + +#define CB_OP_TAGLEN_MAXSZ (512) +#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) +#define CB_OP_GETATTR_BITMAP_MAXSZ (4) +#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + CB_OP_GETATTR_BITMAP_MAXSZ + \ + 2 + 2 + 3 + 3) +#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +typedef unsigned (*callback_process_op_t)(void *, void *); +typedef unsigned (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *); +typedef unsigned (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *); + + +struct callback_op { + callback_process_op_t process_op; + callback_decode_arg_t decode_args; + callback_encode_res_t encode_res; + long res_maxsize; +}; + +static struct callback_op callback_ops[]; + +static int nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return htonl(NFS4_OK); +} + +static int nfs4_decode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy) +{ + return xdr_argsize_check(rqstp, p); +} + +static int nfs4_encode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes) +{ + uint32_t *p; + + p = xdr_inline_decode(xdr, nbytes); + if (unlikely(p == NULL)) + printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n"); + return p; +} + +static unsigned decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str) +{ + uint32_t *p; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *len = ntohl(*p); + + if (*len != 0) { + p = read_buf(xdr, *len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *str = (const char *)p; + } else + *str = NULL; + + return 0; +} + +static unsigned decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh) +{ + uint32_t *p; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + fh->size = ntohl(*p); + if (fh->size > NFS4_FHSIZE) + return htonl(NFS4ERR_BADHANDLE); + p = read_buf(xdr, fh->size); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + memcpy(&fh->data[0], p, fh->size); + memset(&fh->data[fh->size], 0, sizeof(fh->data) - fh->size); + return 0; +} + +static unsigned decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) +{ + uint32_t *p; + unsigned int attrlen; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + attrlen = ntohl(*p); + p = read_buf(xdr, attrlen << 2); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + if (likely(attrlen > 0)) + bitmap[0] = ntohl(*p++); + if (attrlen > 1) + bitmap[1] = ntohl(*p); + return 0; +} + +static unsigned decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + uint32_t *p; + + p = read_buf(xdr, 16); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + memcpy(stateid->data, p, 16); + return 0; +} + +static unsigned decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) +{ + uint32_t *p; + unsigned int minor_version; + unsigned status; + + status = decode_string(xdr, &hdr->taglen, &hdr->tag); + if (unlikely(status != 0)) + return status; + /* We do not like overly long tags! */ + if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) { + printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", + __FUNCTION__, hdr->taglen); + return htonl(NFS4ERR_RESOURCE); + } + p = read_buf(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + minor_version = ntohl(*p++); + /* Check minor version is zero. */ + if (minor_version != 0) { + printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n", + __FUNCTION__, minor_version); + return htonl(NFS4ERR_MINOR_VERS_MISMATCH); + } + hdr->callback_ident = ntohl(*p++); + hdr->nops = ntohl(*p); + return 0; +} + +static unsigned decode_op_hdr(struct xdr_stream *xdr, unsigned int *op) +{ + uint32_t *p; + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *op = ntohl(*p); + return 0; +} + +static unsigned decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args) +{ + unsigned status; + + status = decode_fh(xdr, &args->fh); + if (unlikely(status != 0)) + goto out; + args->addr = &rqstp->rq_addr; + status = decode_bitmap(xdr, args->bitmap); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return status; +} + +static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args) +{ + uint32_t *p; + unsigned status; + + args->addr = &rqstp->rq_addr; + status = decode_stateid(xdr, &args->stateid); + if (unlikely(status != 0)) + goto out; + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_RESOURCE); + goto out; + } + args->truncate = ntohl(*p); + status = decode_fh(xdr, &args->fh); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return 0; +} + +static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 4 + len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + xdr_encode_opaque(p, str, len); + return 0; +} + +#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) +#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) +static unsigned encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep) +{ + uint32_t bm[2]; + uint32_t *p; + + bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0); + bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1); + if (bm[1] != 0) { + p = xdr_reserve_space(xdr, 16); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(2); + *p++ = bm[0]; + *p++ = bm[1]; + } else if (bm[0] != 0) { + p = xdr_reserve_space(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(1); + *p++ = bm[0]; + } else { + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(0); + } + *savep = p; + return 0; +} + +static unsigned encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change) +{ + uint32_t *p; + + if (!(bitmap[0] & FATTR4_WORD0_CHANGE)) + return 0; + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, change); + return 0; +} + +static unsigned encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size) +{ + uint32_t *p; + + if (!(bitmap[0] & FATTR4_WORD0_SIZE)) + return 0; + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, size); + return 0; +} + +static unsigned encode_attr_time(struct xdr_stream *xdr, const struct timespec *time) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 12); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, time->tv_sec); + *p = htonl(time->tv_nsec); + return 0; +} + +static unsigned encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) + return 0; + return encode_attr_time(xdr,time); +} + +static unsigned encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) + return 0; + return encode_attr_time(xdr,time); +} + +static unsigned encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr) +{ + unsigned status; + + hdr->status = xdr_reserve_space(xdr, 4); + if (unlikely(hdr->status == NULL)) + return htonl(NFS4ERR_RESOURCE); + status = encode_string(xdr, hdr->taglen, hdr->tag); + if (unlikely(status != 0)) + return status; + hdr->nops = xdr_reserve_space(xdr, 4); + if (unlikely(hdr->nops == NULL)) + return htonl(NFS4ERR_RESOURCE); + return 0; +} + +static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(op); + *p = htonl(res); + return 0; +} + +static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res) +{ + uint32_t *savep; + unsigned status = res->status; + + if (unlikely(status != 0)) + goto out; + status = encode_attr_bitmap(xdr, res->bitmap, &savep); + if (unlikely(status != 0)) + goto out; + status = encode_attr_change(xdr, res->bitmap, res->change_attr); + if (unlikely(status != 0)) + goto out; + status = encode_attr_size(xdr, res->bitmap, res->size); + if (unlikely(status != 0)) + goto out; + status = encode_attr_ctime(xdr, res->bitmap, &res->ctime); + if (unlikely(status != 0)) + goto out; + status = encode_attr_mtime(xdr, res->bitmap, &res->mtime); + *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1))); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return status; +} + +static unsigned process_op(struct svc_rqst *rqstp, + struct xdr_stream *xdr_in, void *argp, + struct xdr_stream *xdr_out, void *resp) +{ + struct callback_op *op; + unsigned int op_nr; + unsigned int status = 0; + long maxlen; + unsigned res; + + dprintk("%s: start\n", __FUNCTION__); + status = decode_op_hdr(xdr_in, &op_nr); + if (unlikely(status != 0)) { + op_nr = OP_CB_ILLEGAL; + op = &callback_ops[0]; + } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) { + op_nr = OP_CB_ILLEGAL; + op = &callback_ops[0]; + status = htonl(NFS4ERR_OP_ILLEGAL); + } else + op = &callback_ops[op_nr]; + + maxlen = xdr_out->end - xdr_out->p; + if (maxlen > 0 && maxlen < PAGE_SIZE) { + if (likely(status == 0 && op->decode_args != NULL)) + status = op->decode_args(rqstp, xdr_in, argp); + if (likely(status == 0 && op->process_op != NULL)) + status = op->process_op(argp, resp); + } else + status = htonl(NFS4ERR_RESOURCE); + + res = encode_op_hdr(xdr_out, op_nr, status); + if (status == 0) + status = res; + if (op->encode_res != NULL && status == 0) + status = op->encode_res(rqstp, xdr_out, resp); + dprintk("%s: done, status = %d\n", __FUNCTION__, status); + return status; +} + +/* + * Decode, process and encode a COMPOUND + */ +static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp) +{ + struct cb_compound_hdr_arg hdr_arg; + struct cb_compound_hdr_res hdr_res; + struct xdr_stream xdr_in, xdr_out; + uint32_t *p; + unsigned int status; + unsigned int nops = 1; + + dprintk("%s: start\n", __FUNCTION__); + + xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); + + p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); + rqstp->rq_res.head[0].iov_len = PAGE_SIZE; + xdr_init_encode(&xdr_out, &rqstp->rq_res, p); + + decode_compound_hdr_arg(&xdr_in, &hdr_arg); + hdr_res.taglen = hdr_arg.taglen; + hdr_res.tag = hdr_arg.tag; + encode_compound_hdr_res(&xdr_out, &hdr_res); + + for (;;) { + status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp); + if (status != 0) + break; + if (nops == hdr_arg.nops) + break; + nops++; + } + *hdr_res.status = status; + *hdr_res.nops = htonl(nops); + dprintk("%s: done, status = %u\n", __FUNCTION__, status); + return rpc_success; +} + +/* + * Define NFS4 callback COMPOUND ops. + */ +static struct callback_op callback_ops[] = { + [0] = { + .res_maxsize = CB_OP_HDR_RES_MAXSZ, + }, + [OP_CB_GETATTR] = { + .process_op = (callback_process_op_t)nfs4_callback_getattr, + .decode_args = (callback_decode_arg_t)decode_getattr_args, + .encode_res = (callback_encode_res_t)encode_getattr_res, + .res_maxsize = CB_OP_GETATTR_RES_MAXSZ, + }, + [OP_CB_RECALL] = { + .process_op = (callback_process_op_t)nfs4_callback_recall, + .decode_args = (callback_decode_arg_t)decode_recall_args, + .res_maxsize = CB_OP_RECALL_RES_MAXSZ, + } +}; + +/* + * Define NFS4 callback procedures + */ +static struct svc_procedure nfs4_callback_procedures1[] = { + [CB_NULL] = { + .pc_func = nfs4_callback_null, + .pc_decode = (kxdrproc_t)nfs4_decode_void, + .pc_encode = (kxdrproc_t)nfs4_encode_void, + .pc_xdrressize = 1, + }, + [CB_COMPOUND] = { + .pc_func = nfs4_callback_compound, + .pc_encode = (kxdrproc_t)nfs4_encode_void, + .pc_argsize = 256, + .pc_ressize = 256, + .pc_xdrressize = NFS4_CALLBACK_BUFSIZE, + } +}; + +struct svc_version nfs4_callback_version1 = { + .vs_vers = 1, + .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1), + .vs_proc = nfs4_callback_procedures1, + .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, + .vs_dispatch = NULL, +}; + diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/delegation.c linux-2.6.7-17-delegation_cache/fs/nfs/delegation.c --- linux-2.6.7-rc3/fs/nfs/delegation.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.7-17-delegation_cache/fs/nfs/delegation.c 2004-06-10 01:05:28.000000000 -0400 @@ -0,0 +1,190 @@ +/* + * linux/fs/nfs/delegation.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFS file delegation management + * + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "delegation.h" + +/* + * Set up a delegation on an inode + */ +void nfs_inode_set_delegation(struct inode *inode, struct nfs_openres *res, long generation) +{ + struct nfs_delegation *delegation = &NFS_I(inode)->delegation; + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + + spin_lock(&clp->cl_lock); + spin_lock(&inode->i_lock); + if (delegation->type <= res->delegation_type) { + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); + delegation->type = res->delegation_type; + delegation->maxsize = res->maxsize; + delegation->generation = generation; + if (list_empty(&delegation->list)) + list_add(&delegation->list, &clp->cl_delegations); + } + spin_unlock(&inode->i_lock); + spin_unlock(&clp->cl_lock); +} + +static void nfs_inode_reclaim_opens(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *pos; + struct nfs4_state *state; + +again: + spin_lock(&inode->i_lock); + list_for_each_entry(pos, &nfsi->open_files, open_files) { + state = pos->state; + if (state == NULL) + continue; + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + continue; + get_nfs_open_context(pos); + spin_unlock(&inode->i_lock); + nfs4_open_delegation_recall(pos->dentry, state); + put_nfs_open_context(pos); + goto again; + } + spin_unlock(&inode->i_lock); +} + +/* + * Inform the world that we no longer possess a delegation + */ +int nfs_inode_clear_delegation(struct inode *inode) +{ + struct nfs_delegation *delegation = &NFS_I(inode)->delegation; + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + int res = 0; + + if (!list_empty(&delegation->list)) { + spin_lock(&clp->cl_lock); + spin_lock(&inode->i_lock); + if (!list_empty(&delegation->list)) { + delegation->type = 0; + list_del_init(&delegation->list); + res = 1; + } + spin_unlock(&inode->i_lock); + spin_unlock(&clp->cl_lock); + } + return res; +} + +/* + * Basic procedure for returning a delegation to the server + */ +int nfs_inode_return_delegation(struct inode *inode) +{ + if (!nfs_inode_clear_delegation(inode)) + return 0; + nfs_wb_all(inode); + nfs_inode_reclaim_opens(inode); + nfs_wb_all(inode); /* In case we raced with open reclaim */ + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + return nfs4_proc_delegreturn(inode); +} + +/* + * Return all delegations associated to a super block + */ +void nfs_return_all_delegations(struct super_block *sb) +{ + struct nfs4_client *clp = NFS_SB(sb)->nfs4_state; + struct nfs_inode *nfsi; + struct inode *inode; + +restart: + spin_lock(&clp->cl_lock); + list_for_each_entry(nfsi, &clp->cl_delegations, delegation.list) { + if (nfsi->vfs_inode.i_sb != sb) + continue; + inode = igrab(&nfsi->vfs_inode); + if (inode == NULL) + continue; + spin_unlock(&clp->cl_lock); + nfs_inode_return_delegation(inode); + iput(inode); + goto restart; + } + spin_unlock(&clp->cl_lock); +} + +struct recall_threadargs { + struct inode *inode; + struct nfs4_client *clp; + + struct completion started; +}; + +static int recall_thread(void *data) +{ + struct recall_threadargs *args = (struct recall_threadargs *)data; + struct inode *inode = igrab(args->inode); + struct nfs4_client *clp = args->clp; + + daemonize("nfsv4-delegreturn"); + atomic_inc(&clp->cl_count); + complete(&args->started); + nfs_inode_return_delegation(inode); + iput(inode); + nfs4_put_client(clp); + module_put_and_exit(0); +} + +/* + * Asynchronous delegation recall! + */ +int nfs_async_inode_return_delegation(struct nfs4_client *clp, struct inode *inode) +{ + struct recall_threadargs data = { + .inode = inode, + .clp = clp, + }; + int status; + + init_completion(&data.started); + __module_get(THIS_MODULE); + status = kernel_thread(recall_thread, &data, CLONE_KERNEL); + if (status < 0) + goto out_module_put; + wait_for_completion(&data.started); + return 0; +out_module_put: + module_put(THIS_MODULE); + return status; +} + +/* + * Retrieve the inode associated with a delegation + */ +struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle) +{ + struct nfs_inode *nfsi; + struct inode *res = NULL; + spin_lock(&clp->cl_lock); + list_for_each_entry(nfsi, &clp->cl_delegations, delegation.list) { + if (nfs_compare_fh(fhandle, &nfsi->fh) == 0) { + res = igrab(&nfsi->vfs_inode); + break; + } + } + spin_unlock(&clp->cl_lock); + return res; +} diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/delegation.h linux-2.6.7-17-delegation_cache/fs/nfs/delegation.h --- linux-2.6.7-rc3/fs/nfs/delegation.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.7-17-delegation_cache/fs/nfs/delegation.h 2004-06-10 01:05:34.000000000 -0400 @@ -0,0 +1,40 @@ +/* + * linux/fs/nfs/delegation.h + * + * Copyright (c) Trond Myklebust + * + * Definitions pertaining to NFS delegated files + */ +#ifndef FS_NFS_DELEGATION_H +#define FS_NFS_DELEGATION_H + +#if defined(CONFIG_NFS_V4) + +void nfs_inode_set_delegation(struct inode *inode, struct nfs_openres *res, long generation); +int nfs_inode_clear_delegation(struct inode *inode); +int nfs_inode_return_delegation(struct inode *inode); +int nfs_async_inode_return_delegation(struct nfs4_client *clp, struct inode *inode); + +struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); +void nfs_return_all_delegations(struct super_block *sb); +/* NFSv4 delegation-related procedures */ +int nfs4_proc_delegreturn(struct inode *inode); +int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); + +static inline int nfs_have_delegation(struct inode *inode, int flags) +{ + struct nfs_delegation *delegation = &NFS_I(inode)->delegation; + + flags &= (FMODE_READ|FMODE_WRITE); + return !list_empty(&delegation->list) && + ((delegation->type & flags) == flags); +} +#else + +static inline int nfs_have_delegation(struct inode *inode, int flags) +{ + return 0; +} + +#endif /* defined(CONFIG_NFS_V4) */ +#endif /* !defined(FS_NFS_DELEGATION_H) */ diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/dir.c linux-2.6.7-17-delegation_cache/fs/nfs/dir.c --- linux-2.6.7-rc3/fs/nfs/dir.c 2004-06-10 00:23:19.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/dir.c 2004-06-10 01:05:34.000000000 -0400 @@ -32,6 +32,8 @@ #include #include +#include "delegation.h" + #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ @@ -887,6 +889,8 @@ out: return ret; no_open: dput(parent); + if (inode != NULL && nfs_have_delegation(inode, FMODE_READ)) + return 1; return nfs_lookup_revalidate(dentry, nd); } #endif /* CONFIG_NFSV4 */ @@ -1498,10 +1502,46 @@ out: return error; } +int nfs_check_access(struct inode *inode, int mask, struct rpc_cred *cred) +{ + struct nfs_access_cache *cache = &NFS_I(inode)->cache_access; + int res; + + lock_kernel(); + if (cache->cred == cred + && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) + && !(NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) { + if (!(res = cache->err)) { + /* Is the mask a subset of an accepted mask? */ + if ((cache->mask & mask) == mask) + goto out; + } else { + /* ...or is it a superset of a rejected mask? */ + if ((cache->mask & mask) == cache->mask) + goto out; + } + } + + res = NFS_PROTO(inode)->access(inode, cred, mask); + if (!res || res == -EACCES) + goto add_cache; +out: + unlock_kernel(); + return res; +add_cache: + cache->jiffies = jiffies; + if (cache->cred) + put_rpccred(cache->cred); + cache->cred = cred; + cache->mask = mask; + cache->err = res; + unlock_kernel(); + return res; +} + int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) { - struct nfs_access_cache *cache = &NFS_I(inode)->cache_access; struct rpc_cred *cred; int mode = inode->i_mode; int res; @@ -1536,46 +1576,16 @@ nfs_permission(struct inode *inode, int return 0; } - lock_kernel(); - if (!NFS_PROTO(inode)->access) goto out_notsup; cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); - if (cache->cred == cred - && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) - && !(NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) { - if (!(res = cache->err)) { - /* Is the mask a subset of an accepted mask? */ - if ((cache->mask & mask) == mask) - goto out; - } else { - /* ...or is it a superset of a rejected mask? */ - if ((cache->mask & mask) == cache->mask) - goto out; - } - } - - res = NFS_PROTO(inode)->access(inode, cred, mask); - if (!res || res == -EACCES) - goto add_cache; -out: + res = nfs_check_access(inode, mask, cred); put_rpccred(cred); - unlock_kernel(); return res; out_notsup: nfs_revalidate_inode(NFS_SERVER(inode), inode); res = vfs_permission(inode, mask); - unlock_kernel(); - return res; -add_cache: - cache->jiffies = jiffies; - if (cache->cred) - put_rpccred(cache->cred); - cache->cred = cred; - cache->mask = mask; - cache->err = res; - unlock_kernel(); return res; } diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/direct.c linux-2.6.7-17-delegation_cache/fs/nfs/direct.c --- linux-2.6.7-rc3/fs/nfs/direct.c 2004-06-10 00:23:47.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/direct.c 2004-06-10 01:04:48.000000000 -0400 @@ -110,7 +110,7 @@ nfs_free_user_pages(struct page **pages, * nfs_direct_read_seg - Read in one iov segment. Generate separate * read RPCs for each "rsize" bytes. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * user_addr: starting address of this segment of user's buffer * count: size of this segment * file_offset: offset in file to begin the operation @@ -118,7 +118,7 @@ nfs_free_user_pages(struct page **pages, * nr_pages: size of pages array */ static int -nfs_direct_read_seg(struct inode *inode, struct file *file, +nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) { @@ -127,9 +127,10 @@ nfs_direct_read_seg(struct inode *inode, int curpage = 0; struct nfs_read_data rdata = { .inode = inode, + .cred = ctx->cred, .args = { .fh = NFS_FH(inode), - .lockowner = current->files, + .context = ctx, }, .res = { .fattr = &rdata.fattr, @@ -151,7 +152,7 @@ nfs_direct_read_seg(struct inode *inode, user_addr + tot_bytes, rdata.args.pgbase, curpage); lock_kernel(); - result = NFS_PROTO(inode)->read(&rdata, file); + result = NFS_PROTO(inode)->read(&rdata); unlock_kernel(); if (result <= 0) { @@ -183,7 +184,7 @@ nfs_direct_read_seg(struct inode *inode, * nfs_direct_read - For each iov segment, map the user's buffer * then generate read RPCs. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * @iov: array of vectors that define I/O buffer * file_offset: offset in file to begin the operation * nr_segs: size of iovec array @@ -193,7 +194,7 @@ nfs_direct_read_seg(struct inode *inode, * server. */ static ssize_t -nfs_direct_read(struct inode *inode, struct file *file, +nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { @@ -216,7 +217,7 @@ nfs_direct_read(struct inode *inode, str return page_count; } - result = nfs_direct_read_seg(inode, file, user_addr, size, + result = nfs_direct_read_seg(inode, ctx, user_addr, size, file_offset, pages, page_count); nfs_free_user_pages(pages, page_count, 1); @@ -239,7 +240,7 @@ nfs_direct_read(struct inode *inode, str * nfs_direct_write_seg - Write out one iov segment. Generate separate * write RPCs for each "wsize" bytes, then commit. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * user_addr: starting address of this segment of user's buffer * count: size of this segment * file_offset: offset in file to begin the operation @@ -247,7 +248,7 @@ nfs_direct_read(struct inode *inode, str * nr_pages: size of pages array */ static int -nfs_direct_write_seg(struct inode *inode, struct file *file, +nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) { @@ -257,9 +258,10 @@ nfs_direct_write_seg(struct inode *inode struct nfs_writeverf first_verf; struct nfs_write_data wdata = { .inode = inode, + .cred = ctx->cred, .args = { .fh = NFS_FH(inode), - .lockowner = current->files, + .context = ctx, }, .res = { .fattr = &wdata.fattr, @@ -290,7 +292,7 @@ retry: user_addr + tot_bytes, wdata.args.pgbase, curpage); lock_kernel(); - result = NFS_PROTO(inode)->write(&wdata, file); + result = NFS_PROTO(inode)->write(&wdata); unlock_kernel(); if (result <= 0) { @@ -325,7 +327,7 @@ retry: wdata.args.offset = file_offset; lock_kernel(); - result = NFS_PROTO(inode)->commit(&wdata, file); + result = NFS_PROTO(inode)->commit(&wdata); unlock_kernel(); if (result < 0 || memcmp(&first_verf.verifier, @@ -349,7 +351,7 @@ sync_retry: * nfs_direct_write - For each iov segment, map the user's buffer * then generate write and commit RPCs. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * @iov: array of vectors that define I/O buffer * file_offset: offset in file to begin the operation * nr_segs: size of iovec array @@ -358,8 +360,7 @@ sync_retry: * that non-direct readers might access, so they will pick up these * writes immediately. */ -static ssize_t -nfs_direct_write(struct inode *inode, struct file *file, +static int nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { @@ -382,7 +383,7 @@ nfs_direct_write(struct inode *inode, st return page_count; } - result = nfs_direct_write_seg(inode, file, user_addr, size, + result = nfs_direct_write_seg(inode, ctx, user_addr, size, file_offset, pages, page_count); nfs_free_user_pages(pages, page_count, 0); @@ -414,6 +415,7 @@ nfs_direct_IO(int rw, struct kiocb *iocb { ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx; struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; @@ -423,19 +425,20 @@ nfs_direct_IO(int rw, struct kiocb *iocb if (!is_sync_kiocb(iocb)) return result; + ctx = (struct nfs_open_context *)file->private_data; switch (rw) { case READ: dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n", dentry->d_name.name, file_offset, nr_segs); - result = nfs_direct_read(inode, file, iov, + result = nfs_direct_read(inode, ctx, iov, file_offset, nr_segs); break; case WRITE: dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n", dentry->d_name.name, file_offset, nr_segs); - result = nfs_direct_write(inode, file, iov, + result = nfs_direct_write(inode, ctx, iov, file_offset, nr_segs); break; default: @@ -471,6 +474,8 @@ nfs_file_direct_read(struct kiocb *iocb, ssize_t retval = -EINVAL; loff_t *ppos = &iocb->ki_pos; struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx = + (struct nfs_open_context *) file->private_data; struct dentry *dentry = file->f_dentry; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; @@ -502,7 +507,7 @@ nfs_file_direct_read(struct kiocb *iocb, goto out; } - retval = nfs_direct_read(inode, file, &iov, pos, 1); + retval = nfs_direct_read(inode, ctx, &iov, pos, 1); if (retval > 0) *ppos = pos + retval; @@ -542,6 +547,8 @@ nfs_file_direct_write(struct kiocb *iocb loff_t *ppos = &iocb->ki_pos; unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx = + (struct nfs_open_context *) file->private_data; struct dentry *dentry = file->f_dentry; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; @@ -589,7 +596,7 @@ nfs_file_direct_write(struct kiocb *iocb goto out; } - retval = nfs_direct_write(inode, file, &iov, pos, 1); + retval = nfs_direct_write(inode, ctx, &iov, pos, 1); if (mapping->nrpages) invalidate_inode_pages2(mapping); if (retval > 0) diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/file.c linux-2.6.7-17-delegation_cache/fs/nfs/file.c --- linux-2.6.7-rc3/fs/nfs/file.c 2004-06-10 00:23:22.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/file.c 2004-06-10 01:05:34.000000000 -0400 @@ -31,6 +31,8 @@ #include #include +#include "delegation.h" + #define NFSDBG_FACILITY NFSDBG_FILE static long nfs_file_fcntl(int fd, unsigned int cmd, @@ -127,6 +129,7 @@ nfs_file_release(struct inode *inode, st static int nfs_file_flush(struct file *file) { + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = file->f_dentry->d_inode; int status; @@ -134,12 +137,14 @@ nfs_file_flush(struct file *file) if ((file->f_mode & FMODE_WRITE) == 0) return 0; + if (nfs_have_delegation(inode, FMODE_WRITE)) + return 0; lock_kernel(); /* Ensure that data+attribute caches are up to date after close() */ status = nfs_wb_all(inode); if (!status) { - status = file->f_error; - file->f_error = 0; + status = ctx->error; + ctx->error = 0; if (!status) __nfs_revalidate_inode(NFS_SERVER(inode), inode); } @@ -211,6 +216,7 @@ nfs_file_mmap(struct file * file, struct static int nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = dentry->d_inode; int status; @@ -219,8 +225,8 @@ nfs_fsync(struct file *file, struct dent lock_kernel(); status = nfs_wb_all(inode); if (!status) { - status = file->f_error; - file->f_error = 0; + status = ctx->error; + ctx->error = 0; } unlock_kernel(); return status; @@ -302,6 +308,90 @@ out_swapfile: goto out; } +static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + int status; + + lock_kernel(); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + unlock_kernel(); + return status; +} + +static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + sigset_t oldset; + int status; + + rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset); + /* + * Flush all pending writes before doing anything + * with locks.. + */ + filemap_fdatawrite(filp->f_mapping); + down(&inode->i_sem); + nfs_wb_all(inode); + up(&inode->i_sem); + filemap_fdatawait(filp->f_mapping); + + /* NOTE: special case + * If we're signalled while cleaning up locks on process exit, we + * still need to complete the unlock. + */ + lock_kernel(); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); + return status; +} + +static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + int status; + + /* + * Flush all pending writes before doing anything + * with locks.. + */ + status = filemap_fdatawrite(filp->f_mapping); + if (status == 0) { + down(&inode->i_sem); + status = nfs_wb_all(inode); + up(&inode->i_sem); + if (status == 0) + status = filemap_fdatawait(filp->f_mapping); + } + if (status < 0) + return status; + + lock_kernel(); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + /* If we were signalled we still need to ensure that + * we clean up any state on the server. We therefore + * record the lock call as having succeeded in order to + * ensure that locks_remove_posix() cleans it out when + * the process exits. + */ + if (status == -EINTR || status == -ERESTARTSYS) + posix_lock_file(filp, fl); + unlock_kernel(); + if (status < 0) + return status; + /* + * Make sure we clear the cache whenever we try to get the lock. + * This makes locking act as a cache coherency point. + */ + filemap_fdatawrite(filp->f_mapping); + down(&inode->i_sem); + nfs_wb_all(inode); /* we may have slept */ + up(&inode->i_sem); + filemap_fdatawait(filp->f_mapping); + nfs_zap_caches(inode); + return 0; +} + /* * Lock a (portion of) a file */ @@ -309,8 +399,6 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode * inode = filp->f_mapping->host; - int status = 0; - int status2; dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", inode->i_sb->s_id, inode->i_ino, @@ -328,8 +416,8 @@ nfs_lock(struct file *filp, int cmd, str /* Fake OK code if mounted without NLM support */ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { if (IS_GETLK(cmd)) - status = LOCK_USE_CLNT; - goto out_ok; + return LOCK_USE_CLNT; + return 0; } } @@ -340,45 +428,12 @@ nfs_lock(struct file *filp, int cmd, str * Not sure whether that would be unique, though, or whether * that would break in other places. */ - if (!fl->fl_owner || !(fl->fl_flags & FL_POSIX)) + if (!(fl->fl_flags & FL_POSIX)) return -ENOLCK; - /* - * Flush all pending writes before doing anything - * with locks.. - */ - status = filemap_fdatawrite(filp->f_mapping); - down(&inode->i_sem); - status2 = nfs_wb_all(inode); - if (!status) - status = status2; - up(&inode->i_sem); - status2 = filemap_fdatawait(filp->f_mapping); - if (!status) - status = status2; - if (status < 0) - return status; - - lock_kernel(); - status = NFS_PROTO(inode)->lock(filp, cmd, fl); - unlock_kernel(); - if (status < 0) - return status; - - status = 0; - - /* - * Make sure we clear the cache whenever we try to get the lock. - * This makes locking act as a cache coherency point. - */ - out_ok: - if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { - filemap_fdatawrite(filp->f_mapping); - down(&inode->i_sem); - nfs_wb_all(inode); /* we may have slept */ - up(&inode->i_sem); - filemap_fdatawait(filp->f_mapping); - nfs_zap_caches(inode); - } - return status; + if (IS_GETLK(cmd)) + return do_getlk(filp, cmd, fl); + if (fl->fl_type == F_UNLCK) + return do_unlk(filp, cmd, fl); + return do_setlk(filp, cmd, fl); } diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/inode.c linux-2.6.7-17-delegation_cache/fs/nfs/inode.c --- linux-2.6.7-rc3/fs/nfs/inode.c 2004-06-10 00:23:40.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/inode.c 2004-06-10 01:05:34.000000000 -0400 @@ -39,6 +39,8 @@ #include #include +#include "delegation.h" + #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_PARANOIA 1 @@ -123,8 +125,9 @@ nfs_delete_inode(struct inode * inode) { dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + nfs_wb_all(inode); /* - * The following can never actually happen... + * The following should never happen... */ if (nfs_have_writebacks(inode)) { printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); @@ -141,10 +144,10 @@ static void nfs_clear_inode(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - struct rpc_cred *cred = nfsi->mm_cred; + struct rpc_cred *cred; - if (cred) - put_rpccred(cred); + nfs_wb_all(inode); + BUG_ON (!list_empty(&nfsi->open_files)); cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); @@ -859,53 +862,114 @@ int nfs_getattr(struct vfsmount *mnt, st return err; } +struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred) +{ + struct nfs_open_context *ctx; + + ctx = (struct nfs_open_context *)kmalloc(sizeof(*ctx), GFP_KERNEL); + if (ctx != NULL) { + atomic_set(&ctx->count, 1); + ctx->dentry = dget(dentry); + ctx->cred = get_rpccred(cred); + ctx->state = NULL; + ctx->pid = current->tgid; + ctx->error = 0; + init_waitqueue_head(&ctx->waitq); + } + return ctx; +} + +struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) +{ + if (ctx != NULL) + atomic_inc(&ctx->count); + return ctx; +} + +void put_nfs_open_context(struct nfs_open_context *ctx) +{ + if (atomic_dec_and_test(&ctx->count)) { + if (ctx->state != NULL) + nfs4_close_state(ctx->state, ctx->mode); + if (ctx->cred != NULL) + put_rpccred(ctx->cred); + dput(ctx->dentry); + kfree(ctx); + } +} + /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages */ -void -nfs_set_mmcred(struct inode *inode, struct rpc_cred *cred) +void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { - struct rpc_cred **p = &NFS_I(inode)->mm_cred, - *oldcred = *p; + struct inode *inode = filp->f_dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); - *p = get_rpccred(cred); - if (oldcred) - put_rpccred(oldcred); + filp->private_data = get_nfs_open_context(ctx); + spin_lock(&inode->i_lock); + list_add(&ctx->open_files, &nfsi->open_files); + spin_unlock(&inode->i_lock); +} + +struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *pos, *ctx = NULL; + + spin_lock(&inode->i_lock); + list_for_each_entry(pos, &nfsi->open_files, open_files) { + if ((pos->mode & mode) == mode) { + ctx = get_nfs_open_context(pos); + break; + } + } + spin_unlock(&inode->i_lock); + return ctx; +} + +void nfs_file_clear_open_context(struct file *filp) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; + + if (ctx) { + filp->private_data = NULL; + spin_lock(&inode->i_lock); + list_del(&ctx->open_files); + spin_unlock(&inode->i_lock); + put_nfs_open_context(ctx); + } } /* - * These are probably going to contain hooks for - * allocating and releasing RPC credentials for - * the file. I'll have to think about Tronds patch - * a bit more.. + * These allocate and release file read/write context information. */ int nfs_open(struct inode *inode, struct file *filp) { - struct rpc_auth *auth; + struct nfs_open_context *ctx; struct rpc_cred *cred; - auth = NFS_CLIENT(inode)->cl_auth; - cred = rpcauth_lookupcred(auth, 0); - filp->private_data = cred; - if ((filp->f_mode & FMODE_WRITE) != 0) { - nfs_set_mmcred(inode, cred); + if ((cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0)) == NULL) + return -ENOMEM; + ctx = alloc_nfs_open_context(filp->f_dentry, cred); + put_rpccred(cred); + if (ctx == NULL) + return -ENOMEM; + ctx->mode = filp->f_mode; + nfs_file_set_open_context(filp, ctx); + put_nfs_open_context(ctx); + if ((filp->f_mode & FMODE_WRITE) != 0) nfs_begin_data_update(inode); - } return 0; } int nfs_release(struct inode *inode, struct file *filp) { - struct rpc_cred *cred; - - lock_kernel(); if ((filp->f_mode & FMODE_WRITE) != 0) nfs_end_data_update(inode); - cred = nfs_file_cred(filp); - if (cred) - put_rpccred(cred); - unlock_kernel(); + nfs_file_clear_open_context(filp); return 0; } @@ -1002,6 +1066,30 @@ out: return status; } +int nfs_attribute_timeout(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfs_have_delegation(inode, FMODE_READ)) + return 0; + return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); +} + +/** + * nfs_revalidate_inode - Revalidate the inode attributes + * @server - pointer to nfs_server struct + * @inode - pointer to inode struct + * + * Updates inode attribute information by retrieving the data from the server. + */ +int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) +{ + if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + && !nfs_attribute_timeout(inode)) + return NFS_STALE(inode) ? -ESTALE : 0; + return __nfs_revalidate_inode(server, inode); +} + /** * nfs_begin_data_update * @inode - pointer to inode @@ -1023,11 +1111,13 @@ void nfs_end_data_update(struct inode *i { struct nfs_inode *nfsi = NFS_I(inode); - /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; - /* Directories and symlinks: invalidate page cache too */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; + if (!nfs_have_delegation(inode, FMODE_READ)) { + /* Mark the attribute cache for revalidation */ + nfsi->flags |= NFS_INO_INVALID_ATTR; + /* Directories and symlinks: invalidate page cache too */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + nfsi->flags |= NFS_INO_INVALID_DATA; + } nfsi->cache_change_attribute ++; atomic_dec(&nfsi->data_updates); } @@ -1068,6 +1158,10 @@ int nfs_refresh_inode(struct inode *inod loff_t cur_size, new_isize; int data_unstable; + /* Do we hold a delegation? */ + if (nfs_have_delegation(inode, FMODE_READ)) + return 0; + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); @@ -1265,7 +1359,8 @@ static int nfs_update_inode(struct inode if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; - nfsi->flags |= invalid; + if (!nfs_have_delegation(inode, FMODE_READ)) + nfsi->flags |= invalid; return 0; out_changed: @@ -1402,6 +1497,7 @@ static struct file_system_type nfs_fs_ty static void nfs4_clear_inode(struct inode *); + static struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, @@ -1423,6 +1519,11 @@ static void nfs4_clear_inode(struct inod { struct nfs_inode *nfsi = NFS_I(inode); + /* If we are holding a delegation, return it! */ + nfs_inode_return_delegation(inode); + /* First call standard NFS clear_inode() code */ + nfs_clear_inode(inode); + /* Now clear out any remaining state */ while (!list_empty(&nfsi->open_states)) { struct nfs4_state *state; @@ -1437,8 +1538,6 @@ static void nfs4_clear_inode(struct inod BUG_ON(atomic_read(&state->count) != 1); nfs4_close_state(state, state->state); } - /* Now call standard NFS clear_inode() code */ - nfs_clear_inode(inode); } @@ -1709,17 +1808,25 @@ out_free: return s; } +static void nfs4_kill_super(struct super_block *sb) +{ + nfs_return_all_delegations(sb); + nfs_kill_super(sb); +} + static struct file_system_type nfs4_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .get_sb = nfs4_get_sb, - .kill_sb = nfs_kill_super, + .kill_sb = nfs4_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; #define nfs4_zero_state(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ + INIT_LIST_HEAD(&(nfsi)->delegation.list); \ + nfsi->delegation.type = 0; \ } while(0) #define register_nfs4fs() register_filesystem(&nfs4_fs_type) #define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) @@ -1746,7 +1853,6 @@ static struct inode *nfs_alloc_inode(str if (!nfsi) return NULL; nfsi->flags = 0; - nfsi->mm_cred = NULL; nfs4_zero_state(nfsi); return &nfsi->vfs_inode; } @@ -1765,6 +1871,7 @@ static void init_once(void * foo, kmem_c inode_init_once(&nfsi->vfs_inode); INIT_LIST_HEAD(&nfsi->dirty); INIT_LIST_HEAD(&nfsi->commit); + INIT_LIST_HEAD(&nfsi->open_files); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); atomic_set(&nfsi->data_updates, 0); nfsi->ndirty = 0; diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/Makefile linux-2.6.7-17-delegation_cache/fs/nfs/Makefile --- linux-2.6.7-rc3/fs/nfs/Makefile 2004-06-10 00:23:07.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/Makefile 2004-06-10 01:04:59.000000000 -0400 @@ -9,6 +9,7 @@ nfs-y := dir.o file.o inode.o nfs2xdr nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ - idmap.o + delegation.o idmap.o \ + callback.o callback_xdr.o callback_proc.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-objs := $(nfs-y) diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/nfs3proc.c linux-2.6.7-17-delegation_cache/fs/nfs/nfs3proc.c --- linux-2.6.7-rc3/fs/nfs/nfs3proc.c 2004-06-10 00:23:19.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/nfs3proc.c 2004-06-10 01:04:44.000000000 -0400 @@ -68,18 +68,6 @@ nfs3_async_handle_jukebox(struct rpc_tas return 1; } -static struct rpc_cred * -nfs_cred(struct inode *inode, struct file *filp) -{ - struct rpc_cred *cred = NULL; - - if (filp) - cred = (struct rpc_cred *)filp->private_data; - if (!cred) - cred = NFS_I(inode)->mm_cred; - return cred; -} - /* * Bare-bones access to getattr: this is for nfs_read_super. */ @@ -227,8 +215,7 @@ nfs3_proc_readlink(struct inode *inode, return status; } -static int -nfs3_proc_read(struct nfs_read_data *rdata, struct file *filp) +static int nfs3_proc_read(struct nfs_read_data *rdata) { int flags = rdata->flags; struct inode * inode = rdata->inode; @@ -237,13 +224,13 @@ nfs3_proc_read(struct nfs_read_data *rda .rpc_proc = &nfs3_procedures[NFS3PROC_READ], .rpc_argp = &rdata->args, .rpc_resp = &rdata->res, + .rpc_cred = rdata->cred, }; int status; dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -251,8 +238,7 @@ nfs3_proc_read(struct nfs_read_data *rda return status; } -static int -nfs3_proc_write(struct nfs_write_data *wdata, struct file *filp) +static int nfs3_proc_write(struct nfs_write_data *wdata) { int rpcflags = wdata->flags; struct inode * inode = wdata->inode; @@ -261,13 +247,13 @@ nfs3_proc_write(struct nfs_write_data *w .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], .rpc_argp = &wdata->args, .rpc_resp = &wdata->res, + .rpc_cred = wdata->cred, }; int status; dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -275,8 +261,7 @@ nfs3_proc_write(struct nfs_write_data *w return status < 0? status : wdata->res.count; } -static int -nfs3_proc_commit(struct nfs_write_data *cdata, struct file *filp) +static int nfs3_proc_commit(struct nfs_write_data *cdata) { struct inode * inode = cdata->inode; struct nfs_fattr * fattr = cdata->res.fattr; @@ -284,13 +269,13 @@ nfs3_proc_commit(struct nfs_write_data * .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], .rpc_argp = &cdata->args, .rpc_resp = &cdata->res, + .rpc_cred = cdata->cred, }; int status; dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, (long long) cdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -832,27 +817,6 @@ nfs3_proc_commit_setup(struct nfs_write_ rpc_call_setup(task, &msg, 0); } -/* - * Set up the nfspage struct with the right credentials - */ -void -nfs3_request_init(struct nfs_page *req, struct file *filp) -{ - req->wb_cred = get_rpccred(nfs_cred(req->wb_inode, filp)); -} - -static int -nfs3_request_compatible(struct nfs_page *req, struct file *filp, struct page *page) -{ - if (req->wb_file != filp) - return 0; - if (req->wb_page != page) - return 0; - if (req->wb_cred != nfs_file_cred(filp)) - return 0; - return 1; -} - static int nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { @@ -892,7 +856,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .commit_setup = nfs3_proc_commit_setup, .file_open = nfs_open, .file_release = nfs_release, - .request_init = nfs3_request_init, - .request_compatible = nfs3_request_compatible, .lock = nfs3_proc_lock, }; diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/nfs4proc.c linux-2.6.7-17-delegation_cache/fs/nfs/nfs4proc.c --- linux-2.6.7-rc3/fs/nfs/nfs4proc.c 2004-06-10 00:23:36.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/nfs4proc.c 2004-06-10 01:05:28.000000000 -0400 @@ -47,6 +47,8 @@ #include #include +#include "delegation.h" + #define NFSDBG_FACILITY NFSDBG_PROC #define NFS4_POLL_RETRY_TIME (15*HZ) @@ -189,29 +191,28 @@ static void update_changeattr(struct ino * reclaim state on the server after a reboot. * Assumes caller is holding the sp->so_sem */ -int -nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) { struct inode *inode = state->inode; + struct nfs_inode *nfsi = NFS_I(inode); struct nfs_server *server = NFS_SERVER(inode); - struct nfs_fattr fattr = { - .valid = 0, - }; - struct nfs_open_reclaimargs o_arg = { + struct nfs_openargs o_arg = { .fh = NFS_FH(inode), .seqid = sp->so_seqid, .id = sp->so_id, - .share_access = state->state, + .open_flags = state->state, .clientid = server->nfs4_state->cl_clientid, .claim = NFS4_OPEN_CLAIM_PREVIOUS, + .u = { + .delegation_type = nfsi->delegation.type, + }, .bitmask = server->attr_bitmask, }; struct nfs_openres o_res = { - .f_attr = &fattr, .server = server, /* Grrr */ }; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_RECLAIM], + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], .rpc_argp = &o_arg, .rpc_resp = &o_res, .rpc_cred = sp->so_cred, @@ -220,14 +221,195 @@ nfs4_open_reclaim(struct nfs4_state_owne status = rpc_call_sync(server->client, &msg, 0); nfs4_increment_seqid(status, sp); - if (status == 0) + if (status == 0) { memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); - /* Update the inode attributes */ - nfs_refresh_inode(inode, &fattr); + if (o_res.delegation_type != 0) { + nfs_inode_set_delegation(inode, &o_res, sp->so_generation); + /* Did the server issue an immediate delegation recall? */ + if (o_res.do_recall) + nfs_async_inode_return_delegation(server->nfs4_state, inode); + } else + nfs_inode_clear_delegation(inode); + } + clear_bit(NFS_DELEGATED_STATE, &state->flags); + /* Ensure we update the inode attributes */ + NFS_CACHEINV(inode); + return status; +} + +int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_delegation *delegation = &NFS_I(inode)->delegation; + + if (!nfs_have_delegation(inode, state->state) + || sp->so_generation != delegation->generation) + return _nfs4_open_reclaim(sp, state); + memcpy(&state->stateid.data, delegation->stateid.data, + sizeof(state->stateid.data)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + return 0; +} + +static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +{ + struct nfs4_state_owner *sp = state->owner; + struct inode *inode = dentry->d_inode; + struct nfs_server *server = NFS_SERVER(inode); + struct dentry *parent = dget_parent(dentry); + struct nfs_openargs arg = { + .fh = NFS_FH(parent->d_inode), + .clientid = server->nfs4_state->cl_clientid, + .name = &dentry->d_name, + .id = sp->so_id, + .server = server, + .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_DELEGATE_CUR, + }; + struct nfs_openres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = sp->so_cred, + }; + int status = 0; + + down(&sp->so_sema); + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + goto out; + if (state->state == 0) + goto out; + arg.seqid = sp->so_seqid; + arg.open_flags = state->state; + memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_seqid(status, sp); + if (status >= 0) { + memcpy(state->stateid.data, res.stateid.data, + sizeof(state->stateid.data)); + clear_bit(NFS_DELEGATED_STATE, &state->flags); + } +out: + up(&sp->so_sema); + dput(parent); + return status; +} + +int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +{ + int status; + + for(;;) { + status = _nfs4_open_delegation_recall(dentry, state); + if (status == 0) + break; + status = nfs4_handle_error(NFS_SERVER(dentry->d_inode), status); + if (status < 0) + break; + } + return status; +} + +static int nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) +{ + struct nfs_open_confirmargs arg = { + .fh = fh, + .seqid = sp->so_seqid, + .stateid = *stateid, + }; + struct nfs_open_confirmres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = sp->so_cred, + }; + int status; + + status = rpc_call_sync(clnt, &msg, 0); + nfs4_increment_seqid(status, sp); + if (status >= 0) + memcpy(stateid, &res.stateid, sizeof(*stateid)); return status; } /* + * Returns an nfs4_state + an extra reference to the inode + */ +struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred) +{ + struct nfs_delegation *delegation = &NFS_I(inode)->delegation; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_state_owner *sp; + struct nfs4_state *state = NULL; + int open_flags = flags & (FMODE_READ|FMODE_WRITE); + int mask = 0; + int err = 0; + + if (!nfs_have_delegation(inode, open_flags)) + return NULL; + if (!(sp = nfs4_get_state_owner(server, cred))) { + dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); + return ERR_PTR(-ENOMEM); + } + down(&sp->so_sema); + state = nfs4_get_open_state(inode, sp); + if (state == NULL) { + state = ERR_PTR(-ENOMEM); + goto out_up; + } + if ((state->state & open_flags) == open_flags) { + spin_lock(&inode->i_lock); + if (open_flags & FMODE_READ) + state->nreaders++; + if (open_flags & FMODE_WRITE) + state->nwriters++; + spin_unlock(&inode->i_lock); + goto out_up; + } else if (state->state != 0) + goto discard_state; + if (flags & FMODE_READ) + mask |= MAY_READ; + if (flags & FMODE_WRITE) + mask |= MAY_WRITE; + if (flags & O_APPEND) + mask |= MAY_APPEND; + err = nfs_check_access(inode, mask, cred); + if (err != 0) + goto discard_state; + spin_lock(&inode->i_lock); + if (!nfs_have_delegation(inode, open_flags)) { + spin_unlock(&inode->i_lock); + goto discard_state; + } + memcpy(state->stateid.data, delegation->stateid.data, + sizeof(state->stateid.data)); + state->state |= open_flags; + if (open_flags & FMODE_READ) + state->nreaders++; + if (open_flags & FMODE_WRITE) + state->nwriters++; + set_bit(NFS_DELEGATED_STATE, &state->flags); + spin_unlock(&inode->i_lock); +out_up: + up(&sp->so_sema); +out: + nfs4_put_state_owner(sp); + /* For compatibility with nfs4_do_open() */ + if (state != NULL && !IS_ERR(state)) + igrab(inode); + return state; +discard_state: + up(&sp->so_sema); + nfs4_put_open_state(state); + state = ERR_PTR(err); + goto out; +} + +/* * Returns an nfs4_state + an referenced inode */ struct nfs4_state * @@ -236,6 +418,7 @@ nfs4_do_open(struct inode *dir, struct q struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_client *clp = server->nfs4_state; struct inode *inode = NULL; int status; struct nfs_fattr f_attr = { @@ -243,12 +426,11 @@ nfs4_do_open(struct inode *dir, struct q }; struct nfs_openargs o_arg = { .fh = NFS_FH(dir), - .share_access = flags & (FMODE_READ|FMODE_WRITE), - .opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE, - .createmode = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED, + .open_flags = flags, .name = name, .server = server, .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_NULL, }; struct nfs_openres o_res = { .f_attr = &f_attr, @@ -260,31 +442,35 @@ nfs4_do_open(struct inode *dir, struct q .rpc_resp = &o_res, .rpc_cred = cred, }; + if (flags & O_EXCL) { + u32 *p = (u32 *) o_arg.u.verifier.data; + p[0] = jiffies; + p[1] = current->pid; + } else + o_arg.u.attrs = sattr; retry: status = -ENOMEM; - if (!(sp = nfs4_get_state_owner(NFS_SERVER(dir), cred))) { + if (!(sp = nfs4_get_state_owner(server, cred))) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); goto out; } - if (o_arg.createmode & NFS4_CREATE_EXCLUSIVE){ - u32 *p = (u32 *) o_arg.u.verifier.data; - p[0] = jiffies; - p[1] = current->pid; - } else if (o_arg.createmode == NFS4_CREATE_UNCHECKED) { - o_arg.u.attrs = sattr; - } /* Serialization for the sequence id */ down(&sp->so_sema); o_arg.seqid = sp->so_seqid; o_arg.id = sp->so_id; - o_arg.clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid, + o_arg.clientid = clp->cl_clientid; status = rpc_call_sync(server->client, &msg, 0); nfs4_increment_seqid(status, sp); if (status) goto out_up; update_changeattr(dir, &o_res.cinfo); + if(o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) { + status = nfs4_proc_open_confirm(server->client, &o_res.fh, sp, &o_res.stateid); + if (status) + goto out_up; + } status = -ENOMEM; inode = nfs_fhget(dir->i_sb, &o_res.fh, &f_attr); @@ -293,28 +479,7 @@ retry: state = nfs4_get_open_state(inode, sp); if (!state) goto out_up; - - if(o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) { - struct nfs_open_confirmargs oc_arg = { - .fh = &o_res.fh, - .seqid = sp->so_seqid, - }; - struct nfs_open_confirmres oc_res; - struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], - .rpc_argp = &oc_arg, - .rpc_resp = &oc_res, - .rpc_cred = cred, - }; - - memcpy(&oc_arg.stateid, &o_res.stateid, sizeof(oc_arg.stateid)); - status = rpc_call_sync(server->client, &msg, 0); - nfs4_increment_seqid(status, sp); - if (status) - goto out_up; - memcpy(&state->stateid, &oc_res.stateid, sizeof(state->stateid)); - } else - memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); + memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); spin_lock(&inode->i_lock); if (flags & FMODE_READ) state->nreaders++; @@ -322,7 +487,8 @@ retry: state->nwriters++; state->state |= flags & (FMODE_READ|FMODE_WRITE); spin_unlock(&inode->i_lock); - + if (o_res.delegation_type != 0) + nfs_inode_set_delegation(inode, &o_res, sp->so_generation); up(&sp->so_sema); nfs4_put_state_owner(sp); return state; @@ -449,7 +615,7 @@ nfs4_do_downgrade(struct inode *inode, s struct nfs_closeargs arg = { .fh = NFS_FH(inode), .seqid = sp->so_seqid, - .share_access = mode, + .open_flags = mode, }; struct nfs_closeres res; struct rpc_message msg = { @@ -500,7 +666,9 @@ nfs4_open_revalidate(struct inode *dir, struct inode *inode; cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); - state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred); + state = nfs4_open_delegated(dentry->d_inode, openflags, cred); + if (state == NULL) + state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred); put_rpccred(cred); if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) return 1; @@ -678,9 +846,13 @@ nfs4_proc_setattr(struct dentry *dentry, if (size_change) { struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); state = nfs4_find_state(inode, cred, FMODE_WRITE); - if (!state) { - state = nfs4_do_open(dentry->d_parent->d_inode, - &dentry->d_name, FMODE_WRITE, NULL, cred); + if (state == NULL) { + state = nfs4_open_delegated(dentry->d_inode, + FMODE_WRITE, cred); + if (state == NULL) + state = nfs4_do_open(dentry->d_parent->d_inode, + &dentry->d_name, FMODE_WRITE, + NULL, cred); need_iput = 1; } put_rpccred(cred); @@ -816,8 +988,7 @@ static int nfs4_proc_readlink(struct ino return nfs4_map_errors(rpc_call_sync(NFS_CLIENT(inode), &msg, 0)); } -static int -nfs4_proc_read(struct nfs_read_data *rdata, struct file *filp) +static int nfs4_proc_read(struct nfs_read_data *rdata) { int flags = rdata->flags; struct inode *inode = rdata->inode; @@ -827,6 +998,7 @@ nfs4_proc_read(struct nfs_read_data *rda .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ], .rpc_argp = &rdata->args, .rpc_resp = &rdata->res, + .rpc_cred = rdata->cred, }; unsigned long timestamp = jiffies; int status; @@ -834,19 +1006,6 @@ nfs4_proc_read(struct nfs_read_data *rda dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); - /* - * Try first to use O_RDONLY, then O_RDWR stateid. - */ - if (filp) { - struct nfs4_state *state; - state = (struct nfs4_state *)filp->private_data; - rdata->args.state = state; - msg.rpc_cred = state->owner->so_cred; - } else { - rdata->args.state = NULL; - msg.rpc_cred = NFS_I(inode)->mm_cred; - } - fattr->valid = 0; status = rpc_call_sync(server->client, &msg, flags); if (!status) @@ -855,8 +1014,7 @@ nfs4_proc_read(struct nfs_read_data *rda return nfs4_map_errors(status); } -static int -nfs4_proc_write(struct nfs_write_data *wdata, struct file *filp) +static int nfs4_proc_write(struct nfs_write_data *wdata) { int rpcflags = wdata->flags; struct inode *inode = wdata->inode; @@ -866,33 +1024,20 @@ nfs4_proc_write(struct nfs_write_data *w .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE], .rpc_argp = &wdata->args, .rpc_resp = &wdata->res, + .rpc_cred = wdata->cred, }; int status; dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); - /* - * Try first to use O_WRONLY, then O_RDWR stateid. - */ - if (filp) { - struct nfs4_state *state; - state = (struct nfs4_state *)filp->private_data; - wdata->args.state = state; - msg.rpc_cred = state->owner->so_cred; - } else { - wdata->args.state = NULL; - msg.rpc_cred = NFS_I(inode)->mm_cred; - } - fattr->valid = 0; status = rpc_call_sync(server->client, &msg, rpcflags); dprintk("NFS reply write: %d\n", status); return nfs4_map_errors(status); } -static int -nfs4_proc_commit(struct nfs_write_data *cdata, struct file *filp) +static int nfs4_proc_commit(struct nfs_write_data *cdata) { struct inode *inode = cdata->inode; struct nfs_fattr *fattr = cdata->res.fattr; @@ -901,20 +1046,13 @@ nfs4_proc_commit(struct nfs_write_data * .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT], .rpc_argp = &cdata->args, .rpc_resp = &cdata->res, + .rpc_cred = cdata->cred, }; int status; dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, (long long) cdata->args.offset); - /* - * Try first to use O_WRONLY, then O_RDWR stateid. - */ - if (filp) - msg.rpc_cred = ((struct nfs4_state *)filp->private_data)->owner->so_cred; - else - msg.rpc_cred = NFS_I(inode)->mm_cred; - fattr->valid = 0; status = rpc_call_sync(server->client, &msg, 0); dprintk("NFS reply commit: %d\n", status); @@ -1467,8 +1605,10 @@ static int nfs4_proc_file_open(struct inode *inode, struct file *filp) { struct dentry *dentry = filp->f_dentry; - struct nfs4_state *state; + struct nfs_open_context *ctx; + struct nfs4_state *state = NULL; struct rpc_cred *cred; + int status = -ENOMEM; dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", (int)dentry->d_parent->d_name.len, @@ -1478,21 +1618,28 @@ nfs4_proc_file_open(struct inode *inode, /* Find our open stateid */ cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_find_state(inode, cred, filp->f_mode); + if (unlikely(cred == NULL)) + return -ENOMEM; + ctx = alloc_nfs_open_context(dentry, cred); put_rpccred(cred); - if (state == NULL) { - printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - return -EIO; /* ERACE actually */ - } + if (unlikely(ctx == NULL)) + return -ENOMEM; + status = -EIO; /* ERACE actually */ + state = nfs4_find_state(inode, cred, filp->f_mode); + if (unlikely(state == NULL)) + goto no_state; + ctx->state = state; nfs4_close_state(state, filp->f_mode); - if (filp->f_mode & FMODE_WRITE) { - lock_kernel(); - nfs_set_mmcred(inode, state->owner->so_cred); + ctx->mode = filp->f_mode; + nfs_file_set_open_context(filp, ctx); + put_nfs_open_context(ctx); + if (filp->f_mode & FMODE_WRITE) nfs_begin_data_update(inode); - unlock_kernel(); - } - filp->private_data = state; return 0; +no_state: + printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); + put_nfs_open_context(ctx); + return status; } /* @@ -1501,37 +1648,12 @@ nfs4_proc_file_open(struct inode *inode, static int nfs4_proc_file_release(struct inode *inode, struct file *filp) { - struct nfs4_state *state = (struct nfs4_state *)filp->private_data; - - if (state) - nfs4_close_state(state, filp->f_mode); - if (filp->f_mode & FMODE_WRITE) { - lock_kernel(); + if (filp->f_mode & FMODE_WRITE) nfs_end_data_update(inode); - unlock_kernel(); - } + nfs_file_clear_open_context(filp); return 0; } -/* - * Set up the nfspage struct with the right state info and credentials - */ -static void -nfs4_request_init(struct nfs_page *req, struct file *filp) -{ - struct nfs4_state *state; - - if (!filp) { - req->wb_cred = get_rpccred(NFS_I(req->wb_inode)->mm_cred); - req->wb_state = NULL; - return; - } - state = (struct nfs4_state *)filp->private_data; - req->wb_state = state; - req->wb_cred = get_rpccred(state->owner->so_cred); - req->wb_lockowner = current->files; -} - static int nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) { @@ -1639,35 +1761,15 @@ nfs4_handle_error(struct nfs_server *ser return nfs4_map_errors(ret); } - -static int -nfs4_request_compatible(struct nfs_page *req, struct file *filp, struct page *page) +int nfs4_proc_setclientid(struct nfs4_client *clp, u32 program, unsigned short port) { - struct nfs4_state *state = NULL; - struct rpc_cred *cred = NULL; - - if (req->wb_file != filp) - return 0; - if (req->wb_page != page) - return 0; - state = (struct nfs4_state *)filp->private_data; - if (req->wb_state != state) - return 0; - if (req->wb_lockowner != current->files) - return 0; - cred = state->owner->so_cred; - if (req->wb_cred != cred) - return 0; - return 1; -} - -int -nfs4_proc_setclientid(struct nfs4_client *clp, - u32 program, unsigned short port) -{ - u32 *p; - struct nfs4_setclientid setclientid; - struct timespec tv; + static nfs4_verifier sc_verifier; + static int initialized; + + struct nfs4_setclientid setclientid = { + .sc_verifier = &sc_verifier, + .sc_prog = program, + }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], .rpc_argp = &setclientid, @@ -1675,15 +1777,24 @@ nfs4_proc_setclientid(struct nfs4_client .rpc_cred = clp->cl_cred, }; - tv = CURRENT_TIME; - p = (u32*)setclientid.sc_verifier.data; - *p++ = (u32)tv.tv_sec; - *p = (u32)tv.tv_nsec; - setclientid.sc_name = clp->cl_ipaddr; - sprintf(setclientid.sc_netid, "tcp"); - sprintf(setclientid.sc_uaddr, "%s.%d.%d", clp->cl_ipaddr, port >> 8, port & 255); - setclientid.sc_prog = htonl(program); - setclientid.sc_cb_ident = 0; + if (!initialized) { + struct timespec boot_time; + u32 *p; + + initialized = 1; + boot_time = CURRENT_TIME; + p = (u32*)sc_verifier.data; + *p++ = htonl((u32)boot_time.tv_sec); + *p = htonl((u32)boot_time.tv_nsec); + } + setclientid.sc_name_len = scnprintf(setclientid.sc_name, + sizeof(setclientid.sc_name), "%s/%u.%u.%u.%u", + clp->cl_ipaddr, NIPQUAD(clp->cl_addr.s_addr)); + setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, + sizeof(setclientid.sc_netid), "tcp"); + setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, + sizeof(setclientid.sc_uaddr), "%s.%d.%d", + clp->cl_ipaddr, port >> 8, port & 255); return rpc_call_sync(clp->cl_rpcclient, &msg, 0); } @@ -1712,6 +1823,22 @@ nfs4_proc_setclientid_confirm(struct nfs return status; } +int nfs4_proc_delegreturn(struct inode *inode) +{ + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs4_delegreturnargs args = { + .fhandle = NFS_FH(inode), + .stateid = &NFS_I(inode)->delegation.stateid, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN], + .rpc_argp = &args, + .rpc_cred = clp->cl_cred, + }; + + return nfs4_map_errors(rpc_call_sync(clp->cl_rpcclient, &msg, 0)); +} + #define NFS4_LOCK_MINTIMEOUT (1 * HZ) #define NFS4_LOCK_MAXTIMEOUT (30 * HZ) @@ -1780,7 +1907,7 @@ nfs4_proc_getlk(struct nfs4_state *state nlo.clientid = clp->cl_clientid; down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); + lsp = nfs4_find_lock_state(state, request->fl_pid); if (lsp) nlo.id = lsp->ls_id; else { @@ -1839,7 +1966,7 @@ nfs4_proc_unlck(struct nfs4_state *state int status = 0; down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); + lsp = nfs4_find_lock_state(state, request->fl_pid); if (!lsp) goto out; luargs.seqid = lsp->ls_seqid; @@ -1886,7 +2013,7 @@ nfs4_proc_setlk(struct nfs4_state *state int status; down(&state->lock_sema); - lsp = nfs4_find_lock_state(state, request->fl_owner); + lsp = nfs4_find_lock_state(state, request->fl_pid); if (lsp == NULL) { struct nfs4_state_owner *owner = state->owner; struct nfs_open_to_lock otl = { @@ -1895,7 +2022,7 @@ nfs4_proc_setlk(struct nfs4_state *state }, }; status = -ENOMEM; - lsp = nfs4_alloc_lock_state(state, request->fl_owner); + lsp = nfs4_alloc_lock_state(state, request->fl_pid); if (!lsp) goto out; otl.lock_seqid = lsp->ls_seqid; @@ -1938,13 +2065,14 @@ out: static int nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) { + struct nfs_open_context *ctx; struct nfs4_state *state; unsigned long timeout = NFS4_LOCK_MINTIMEOUT; int status; /* verify open state */ - state = (struct nfs4_state *)filp->private_data; - BUG_ON(!state); + ctx = (struct nfs_open_context *)filp->private_data; + state = ctx->state; if (request->fl_start < 0 || request->fl_end < 0) return -EINVAL; @@ -2004,8 +2132,6 @@ struct nfs_rpc_ops nfs_v4_clientops = { .commit_setup = nfs4_proc_commit_setup, .file_open = nfs4_proc_file_open, .file_release = nfs4_proc_file_release, - .request_init = nfs4_request_init, - .request_compatible = nfs4_request_compatible, .lock = nfs4_proc_lock, }; diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/nfs4state.c linux-2.6.7-17-delegation_cache/fs/nfs/nfs4state.c --- linux-2.6.7-rc3/fs/nfs/nfs4state.c 2004-06-10 00:23:04.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/nfs4state.c 2004-06-10 01:05:23.000000000 -0400 @@ -45,6 +45,8 @@ #include #include +#include "callback.h" + #define OPENOWNER_POOL_SIZE 8 static spinlock_t state_spinlock = SPIN_LOCK_UNLOCKED; @@ -93,21 +95,26 @@ nfs4_alloc_client(struct in_addr *addr) { struct nfs4_client *clp; - if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL))) { - memset(clp, 0, sizeof(*clp)); - memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); - init_rwsem(&clp->cl_sem); - INIT_LIST_HEAD(&clp->cl_state_owners); - INIT_LIST_HEAD(&clp->cl_unused); - spin_lock_init(&clp->cl_lock); - atomic_set(&clp->cl_count, 1); - INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp); - INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); - INIT_LIST_HEAD(&clp->cl_superblocks); - init_waitqueue_head(&clp->cl_waitq); - rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); - clp->cl_state = 1 << NFS4CLNT_NEW; + if (nfs_callback_up() < 0) + return NULL; + if ((clp = kmalloc(sizeof(*clp), GFP_KERNEL)) == NULL) { + nfs_callback_down(); + return NULL; } + memset(clp, 0, sizeof(*clp)); + memcpy(&clp->cl_addr, addr, sizeof(clp->cl_addr)); + init_rwsem(&clp->cl_sem); + INIT_LIST_HEAD(&clp->cl_delegations); + INIT_LIST_HEAD(&clp->cl_state_owners); + INIT_LIST_HEAD(&clp->cl_unused); + spin_lock_init(&clp->cl_lock); + atomic_set(&clp->cl_count, 1); + INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp); + INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); + INIT_LIST_HEAD(&clp->cl_superblocks); + init_waitqueue_head(&clp->cl_waitq); + rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_state = 1 << NFS4CLNT_NEW; return clp; } @@ -130,25 +137,52 @@ nfs4_free_client(struct nfs4_client *clp if (clp->cl_rpcclient) rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); + nfs_callback_down(); +} + +static struct nfs4_client *__nfs4_find_client(struct in_addr *addr) +{ + struct nfs4_client *clp; + list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) { + if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) { + atomic_inc(&clp->cl_count); + return clp; + } + } + return NULL; +} + +struct nfs4_client *nfs4_find_client(struct in_addr *addr) +{ + struct nfs4_client *clp; + spin_lock(&state_spinlock); + clp = __nfs4_find_client(addr); + spin_unlock(&state_spinlock); + return clp; } struct nfs4_client * nfs4_get_client(struct in_addr *addr) { - struct nfs4_client *new, *clp = NULL; + struct nfs4_client *clp, *new = NULL; - new = nfs4_alloc_client(addr); spin_lock(&state_spinlock); - list_for_each_entry(clp, &nfs4_clientid_list, cl_servers) { - if (memcmp(&clp->cl_addr, addr, sizeof(clp->cl_addr)) == 0) - goto found; + for(;;) { + clp = __nfs4_find_client(addr); + if (clp != NULL) + break; + clp = new; + if (clp != NULL) { + list_add(&clp->cl_servers, &nfs4_clientid_list); + new = NULL; + break; + } + spin_unlock(&state_spinlock); + new = nfs4_alloc_client(addr); + spin_lock(&state_spinlock); + if (new == NULL) + break; } - if (new) - list_add(&new->cl_servers, &nfs4_clientid_list); - spin_unlock(&state_spinlock); - return new; -found: - atomic_inc(&clp->cl_count); spin_unlock(&state_spinlock); if (new) nfs4_free_client(new); @@ -428,6 +462,8 @@ __nfs4_put_open_state(struct nfs4_state list_del(&state->open_states); if (state->state != 0) { do { + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + break; status = nfs4_do_close(inode, state); if (!status) break; @@ -470,6 +506,8 @@ nfs4_close_state(struct nfs4_state *stat newstate = 0; if (state->state == 0) break; + if (test_bit(NFS_DELEGATED_STATE, &state->flags)) + break; if (state->nreaders) newstate |= FMODE_READ; if (state->nwriters) @@ -496,11 +534,11 @@ nfs4_close_state(struct nfs4_state *stat * that is compatible with current->files */ static struct nfs4_lock_state * -__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +__nfs4_find_lock_state(struct nfs4_state *state, unsigned int pid) { struct nfs4_lock_state *pos; list_for_each_entry(pos, &state->lock_states, ls_locks) { - if (pos->ls_owner != fl_owner) + if (pos->ls_pid != pid) continue; atomic_inc(&pos->ls_count); return pos; @@ -509,11 +547,11 @@ __nfs4_find_lock_state(struct nfs4_state } struct nfs4_lock_state * -nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +nfs4_find_lock_state(struct nfs4_state *state, unsigned int pid) { struct nfs4_lock_state *lsp; read_lock(&state->state_lock); - lsp = __nfs4_find_lock_state(state, fl_owner); + lsp = __nfs4_find_lock_state(state, pid); read_unlock(&state->state_lock); return lsp; } @@ -525,7 +563,7 @@ nfs4_find_lock_state(struct nfs4_state * * The caller must be holding state->lock_sema */ struct nfs4_lock_state * -nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +nfs4_alloc_lock_state(struct nfs4_state *state, unsigned int pid) { struct nfs4_lock_state *lsp; struct nfs4_client *clp = state->owner->so_client; @@ -537,7 +575,7 @@ nfs4_alloc_lock_state(struct nfs4_state lsp->ls_id = -1; memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); atomic_set(&lsp->ls_count, 1); - lsp->ls_owner = fl_owner; + lsp->ls_pid = pid; lsp->ls_parent = state; INIT_LIST_HEAD(&lsp->ls_locks); spin_lock(&clp->cl_lock); @@ -551,12 +589,12 @@ nfs4_alloc_lock_state(struct nfs4_state * requests. */ void -nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, unsigned int pid) { if (test_bit(LK_STATE_IN_USE, &state->flags)) { struct nfs4_lock_state *lsp; - lsp = nfs4_find_lock_state(state, fl_owner); + lsp = nfs4_find_lock_state(state, pid); if (lsp) { memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); nfs4_put_lock_state(lsp); @@ -628,7 +666,7 @@ nfs4_notify_unlck(struct inode *inode, s for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!(fl->fl_flags & FL_POSIX)) continue; - if (fl->fl_owner != lsp->ls_owner) + if (fl->fl_pid != lsp->ls_pid) continue; /* Exit if we find at least one lock which is not consumed */ if (nfs4_check_unlock(fl,request) == 0) @@ -786,7 +824,7 @@ reclaimer(void *ptr) goto out; } } - status = nfs4_proc_setclientid(clp, 0, 0); + status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, nfs_callback_tcpport); if (status) goto out_error; status = nfs4_proc_setclientid_confirm(clp); diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/nfs4xdr.c linux-2.6.7-17-delegation_cache/fs/nfs/nfs4xdr.c --- linux-2.6.7-rc3/fs/nfs/nfs4xdr.c 2004-06-10 00:24:00.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/nfs4xdr.c 2004-06-10 01:05:13.000000000 -0400 @@ -122,6 +122,8 @@ static int nfs_stat_to_errno(int); 2 + 2 * nfs4_name_maxsz + \ nfs4_fattr_bitmap_maxsz) #define decode_create_maxsz (op_decode_hdr_maxsz + 8) +#define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4) +#define decode_delegreturn_maxsz (op_decode_hdr_maxsz) #define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ #define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ #define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ @@ -172,16 +174,14 @@ static int nfs_stat_to_errno(int); #define NFS4_dec_open_confirm_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + 4) -#define NFS4_enc_open_reclaim_sz (compound_encode_hdr_maxsz + \ +#define NFS4_enc_open_noattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + \ - 11 + \ - encode_getattr_maxsz) -#define NFS4_dec_open_reclaim_sz (compound_decode_hdr_maxsz + \ + 11) +#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + \ - 4 + 5 + 2 + 3 + \ - decode_getattr_maxsz) + 4 + 5 + 2 + 3) #define NFS4_enc_open_downgrade_sz \ (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ @@ -339,6 +339,11 @@ static int nfs_stat_to_errno(int); encode_getattr_maxsz) #define NFS4_dec_server_caps_sz (compound_decode_hdr_maxsz + \ decode_getattr_maxsz) +#define NFS4_enc_delegreturn_sz (compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_delegreturn_maxsz) +#define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \ + decode_delegreturn_maxsz) static struct { unsigned int mode; @@ -388,6 +393,15 @@ struct compound_hdr { BUG_ON(!p); \ } while (0) +static void encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 4 + len); + BUG_ON(p == NULL); + xdr_encode_opaque(p, str, len); +} + static int encode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) { uint32_t *p; @@ -402,6 +416,15 @@ static int encode_compound_hdr(struct xd return 0; } +static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *verf) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + BUG_ON(p == NULL); + xdr_encode_opaque_fixed(p, verf->data, NFS4_VERIFIER_SIZE); +} + static int encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const struct nfs_server *server) { char owner_name[IDMAP_NAMESZ]; @@ -742,19 +765,12 @@ static int encode_lookup(struct xdr_stre return 0; } -static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg) +static void encode_share_access(struct xdr_stream *xdr, int open_flags) { - int status; uint32_t *p; - /* - * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, - * owner 4, opentype 4 = 36 - */ - RESERVE_SPACE(36); - WRITE32(OP_OPEN); - WRITE32(arg->seqid); - switch (arg->share_access) { + RESERVE_SPACE(8); + switch (open_flags & (FMODE_READ|FMODE_WRITE)) { case FMODE_READ: WRITE32(NFS4_SHARE_ACCESS_READ); break; @@ -767,84 +783,134 @@ static int encode_open(struct xdr_stream default: BUG(); } - WRITE32(0); /* for linux, share_deny = 0 always */ + WRITE32(0); /* for linux, share_deny = 0 always */ +} + +static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + uint32_t *p; + /* + * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, + * owner 4 = 32 + */ + RESERVE_SPACE(8); + WRITE32(OP_OPEN); + WRITE32(arg->seqid); + encode_share_access(xdr, arg->open_flags); + RESERVE_SPACE(16); WRITE64(arg->clientid); WRITE32(4); WRITE32(arg->id); - WRITE32(arg->opentype); +} - if (arg->opentype == NFS4_OPEN_CREATE) { - if (arg->createmode == NFS4_CREATE_EXCLUSIVE) { - RESERVE_SPACE(12); - WRITE32(arg->createmode); - WRITEMEM(arg->u.verifier.data, sizeof(arg->u.verifier.data)); - } - else if (arg->u.attrs) { - RESERVE_SPACE(4); - WRITE32(arg->createmode); - if ((status = encode_attrs(xdr, arg->u.attrs, arg->server))) - return status; - } - else { - RESERVE_SPACE(12); - WRITE32(arg->createmode); - WRITE32(0); - WRITE32(0); - } +static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(4); + switch(arg->open_flags & O_EXCL) { + case 0: + WRITE32(NFS4_CREATE_UNCHECKED); + encode_attrs(xdr, arg->u.attrs, arg->server); + break; + default: + WRITE32(NFS4_CREATE_EXCLUSIVE); + encode_nfs4_verifier(xdr, &arg->u.verifier); } +} - RESERVE_SPACE(8 + arg->name->len); - WRITE32(NFS4_OPEN_CLAIM_NULL); - WRITE32(arg->name->len); - WRITEMEM(arg->name->name, arg->name->len); +static void encode_opentype(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + uint32_t *p; - return 0; + RESERVE_SPACE(4); + switch (arg->open_flags & O_CREAT) { + case 0: + WRITE32(NFS4_OPEN_NOCREATE); + break; + default: + BUG_ON(arg->claim != NFS4_OPEN_CLAIM_NULL); + WRITE32(NFS4_OPEN_CREATE); + encode_createmode(xdr, arg); + } } -static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg) +static inline void encode_delegation_type(struct xdr_stream *xdr, int delegation_type) { uint32_t *p; - RESERVE_SPACE(8+sizeof(arg->stateid.data)); - WRITE32(OP_OPEN_CONFIRM); - WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); - WRITE32(arg->seqid); + RESERVE_SPACE(4); + switch (delegation_type) { + case 0: + WRITE32(NFS4_OPEN_DELEGATE_NONE); + break; + case FMODE_READ: + WRITE32(NFS4_OPEN_DELEGATE_READ); + break; + case FMODE_WRITE|FMODE_READ: + WRITE32(NFS4_OPEN_DELEGATE_WRITE); + break; + default: + BUG(); + } +} - return 0; +static inline void encode_claim_null(struct xdr_stream *xdr, const struct qstr *name) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(NFS4_OPEN_CLAIM_NULL); + encode_string(xdr, name->len, name->name); } +static inline void encode_claim_previous(struct xdr_stream *xdr, int type) +{ + uint32_t *p; + + RESERVE_SPACE(4); + WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); + encode_delegation_type(xdr, type); +} -static int encode_open_reclaim(struct xdr_stream *xdr, const struct nfs_open_reclaimargs *arg) +static inline void encode_claim_delegate_cur(struct xdr_stream *xdr, const struct qstr *name, const nfs4_stateid *stateid) { uint32_t *p; - /* - * opcode 4, seqid 4, share_access 4, share_deny 4, clientid 8, ownerlen 4, - * owner 4, opentype 4, claim 4, delegation_type 4 = 44 - */ - RESERVE_SPACE(44); - WRITE32(OP_OPEN); - WRITE32(arg->seqid); - switch (arg->share_access) { - case FMODE_READ: - WRITE32(NFS4_SHARE_ACCESS_READ); - break; - case FMODE_WRITE: - WRITE32(NFS4_SHARE_ACCESS_WRITE); + RESERVE_SPACE(4+sizeof(stateid->data)); + WRITE32(NFS4_OPEN_CLAIM_DELEGATE_CUR); + WRITEMEM(stateid->data, sizeof(stateid->data)); + encode_string(xdr, name->len, name->name); +} + +static int encode_open(struct xdr_stream *xdr, const struct nfs_openargs *arg) +{ + encode_openhdr(xdr, arg); + encode_opentype(xdr, arg); + switch (arg->claim) { + case NFS4_OPEN_CLAIM_NULL: + encode_claim_null(xdr, arg->name); break; - case FMODE_READ|FMODE_WRITE: - WRITE32(NFS4_SHARE_ACCESS_BOTH); + case NFS4_OPEN_CLAIM_PREVIOUS: + encode_claim_previous(xdr, arg->u.delegation_type); break; + case NFS4_OPEN_CLAIM_DELEGATE_CUR: + encode_claim_delegate_cur(xdr, arg->name, &arg->u.delegation); default: BUG(); } - WRITE32(0); /* for linux, share_deny = 0 always */ - WRITE64(arg->clientid); - WRITE32(4); - WRITE32(arg->id); - WRITE32(NFS4_OPEN_NOCREATE); - WRITE32(NFS4_OPEN_CLAIM_PREVIOUS); - WRITE32(NFS4_OPEN_DELEGATE_NONE); + return 0; +} + +static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_confirmargs *arg) +{ + uint32_t *p; + + RESERVE_SPACE(8+sizeof(arg->stateid.data)); + WRITE32(OP_OPEN_CONFIRM); + WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); + WRITE32(arg->seqid); + return 0; } @@ -852,14 +918,11 @@ static int encode_open_downgrade(struct { uint32_t *p; - RESERVE_SPACE(16+sizeof(arg->stateid.data)); + RESERVE_SPACE(8+sizeof(arg->stateid.data)); WRITE32(OP_OPEN_DOWNGRADE); WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data)); WRITE32(arg->seqid); - WRITE32(arg->share_access); - /* No deny modes */ - WRITE32(0); - + encode_share_access(xdr, arg->open_flags); return 0; } @@ -887,15 +950,15 @@ static int encode_putrootfh(struct xdr_s return 0; } -static void encode_stateid(struct xdr_stream *xdr, struct nfs4_state *state, fl_owner_t lockowner) +static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx) { extern nfs4_stateid zero_stateid; nfs4_stateid stateid; uint32_t *p; RESERVE_SPACE(16); - if (state != NULL) { - nfs4_copy_stateid(&stateid, state, lockowner); + if (ctx->state != NULL) { + nfs4_copy_stateid(&stateid, ctx->state, ctx->pid); WRITEMEM(stateid.data, sizeof(stateid.data)); } else WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); @@ -908,7 +971,7 @@ static int encode_read(struct xdr_stream RESERVE_SPACE(4); WRITE32(OP_READ); - encode_stateid(xdr, args->state, args->lockowner); + encode_stateid(xdr, args->context); RESERVE_SPACE(12); WRITE64(args->offset); @@ -1031,26 +1094,18 @@ static int encode_setattr(struct xdr_str static int encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid) { - uint32_t total_len; - uint32_t len1, len2, len3; uint32_t *p; - len1 = strlen(setclientid->sc_name); - len2 = strlen(setclientid->sc_netid); - len3 = strlen(setclientid->sc_uaddr); - total_len = XDR_QUADLEN(len1) + XDR_QUADLEN(len2) + XDR_QUADLEN(len3); - total_len = (total_len << 2) + 24 + sizeof(setclientid->sc_verifier.data); - - RESERVE_SPACE(total_len); + RESERVE_SPACE(4 + sizeof(setclientid->sc_verifier->data)); WRITE32(OP_SETCLIENTID); - WRITEMEM(setclientid->sc_verifier.data, sizeof(setclientid->sc_verifier.data)); - WRITE32(len1); - WRITEMEM(setclientid->sc_name, len1); + WRITEMEM(setclientid->sc_verifier->data, sizeof(setclientid->sc_verifier->data)); + + encode_string(xdr, setclientid->sc_name_len, setclientid->sc_name); + RESERVE_SPACE(4); WRITE32(setclientid->sc_prog); - WRITE32(len2); - WRITEMEM(setclientid->sc_netid, len2); - WRITE32(len3); - WRITEMEM(setclientid->sc_uaddr, len3); + encode_string(xdr, setclientid->sc_netid_len, setclientid->sc_netid); + encode_string(xdr, setclientid->sc_uaddr_len, setclientid->sc_uaddr); + RESERVE_SPACE(4); WRITE32(setclientid->sc_cb_ident); return 0; @@ -1075,7 +1130,7 @@ static int encode_write(struct xdr_strea RESERVE_SPACE(4); WRITE32(OP_WRITE); - encode_stateid(xdr, args->state, args->lockowner); + encode_stateid(xdr, args->context); RESERVE_SPACE(16); WRITE64(args->offset); @@ -1086,6 +1141,18 @@ static int encode_write(struct xdr_strea return 0; } + +static int encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *stateid) +{ + uint32_t *p; + + RESERVE_SPACE(20); + + WRITE32(OP_DELEGRETURN); + WRITEMEM(stateid->data, sizeof(stateid->data)); + return 0; + +} /* * END OF "GENERIC" ENCODE ROUTINES. */ @@ -1331,13 +1398,13 @@ out: } /* - * Encode an OPEN request + * Encode an OPEN request with no attributes. */ -static int nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p, struct nfs_open_reclaimargs *args) +static int nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args) { struct xdr_stream xdr; struct compound_hdr hdr = { - .nops = 3, + .nops = 2, }; int status; @@ -1346,10 +1413,7 @@ static int nfs4_xdr_enc_open_reclaim(str status = encode_putfh(&xdr, args->fh); if (status) goto out; - status = encode_open_reclaim(&xdr, args); - if (status) - goto out; - status = encode_getfattr(&xdr, args->bitmask); + status = encode_open(&xdr, args); out: return status; } @@ -1716,6 +1780,24 @@ static int nfs4_xdr_enc_setclientid_conf } /* + * DELEGRETURN request + */ +static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, uint32_t *p, const struct nfs4_delegreturnargs *args) +{ + struct xdr_stream xdr; + struct compound_hdr hdr = { + .nops = 2, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + if ((status = encode_putfh(&xdr, args->fhandle)) == 0) + status = encode_delegreturn(&xdr, args->stateid); + return status; +} + +/* * START OF "GENERIC" DECODE ROUTINES. * These may look a little ugly since they are imported from a "generic" * set of XDR encode/decode routines which are intended to be shared by @@ -1749,6 +1831,17 @@ static int nfs4_xdr_enc_setclientid_conf } \ } while (0) +static int decode_opaque_inline(struct xdr_stream *xdr, uint32_t *len, char **string) +{ + uint32_t *p; + + READ_BUF(4); + READ32(*len); + READ_BUF(*len); + *string = (char *)p; + return 0; +} + static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr) { uint32_t *p; @@ -1785,6 +1878,17 @@ static int decode_op_hdr(struct xdr_stre return 0; } +/* Dummy routine */ +static int decode_ace(struct xdr_stream *xdr, void *ace, struct nfs4_client *clp) +{ + uint32_t *p; + uint32_t strlen; + char *str; + + READ_BUF(12); + return decode_opaque_inline(xdr, &strlen, &str); +} + static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) { uint32_t bmlen, *p; @@ -2717,10 +2821,56 @@ static int decode_lookup(struct xdr_stre return decode_op_hdr(xdr, OP_LOOKUP); } +/* This is too sick! */ +static int decode_space_limit(struct xdr_stream *xdr, u64 *maxsize) +{ + uint32_t *p; + uint32_t limit_type, nblocks, blocksize; + + READ_BUF(12); + READ32(limit_type); + switch (limit_type) { + case 1: + READ64(*maxsize); + break; + case 2: + READ32(nblocks); + READ32(blocksize); + *maxsize = (uint64_t)nblocks * (uint64_t)blocksize; + } + return 0; +} + +static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res) +{ + uint32_t *p; + uint32_t delegation_type; + + READ_BUF(4); + READ32(delegation_type); + if (delegation_type == NFS4_OPEN_DELEGATE_NONE) { + res->delegation_type = 0; + return 0; + } + READ_BUF(20); + COPYMEM(res->delegation.data, sizeof(res->delegation.data)); + READ32(res->do_recall); + switch (delegation_type) { + case NFS4_OPEN_DELEGATE_READ: + res->delegation_type = FMODE_READ; + break; + case NFS4_OPEN_DELEGATE_WRITE: + res->delegation_type = FMODE_WRITE|FMODE_READ; + if (decode_space_limit(xdr, &res->maxsize) < 0) + return -EIO; + } + return decode_ace(xdr, NULL, res->server->nfs4_state); +} + static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) { uint32_t *p; - uint32_t bmlen, delegation_type; + uint32_t bmlen; int status; status = decode_op_hdr(xdr, OP_OPEN); @@ -2737,11 +2887,9 @@ static int decode_open(struct xdr_stream if (bmlen > 10) goto xdr_error; - READ_BUF((bmlen << 2) + 4); + READ_BUF(bmlen << 2); p += bmlen; - READ32(delegation_type); - if (delegation_type == NFS4_OPEN_DELEGATE_NONE) - return 0; + return decode_delegation(xdr, res); xdr_error: printk(KERN_NOTICE "%s: xdr error!\n", __FUNCTION__); return -EIO; @@ -3048,6 +3196,11 @@ static int decode_write(struct xdr_strea return 0; } +static int decode_delegreturn(struct xdr_stream *xdr) +{ + return decode_op_hdr(xdr, OP_DELEGRETURN); +} + /* * Decode OPEN_DOWNGRADE response */ @@ -3314,9 +3467,9 @@ out: } /* - * Decode OPEN_RECLAIM response + * Decode OPEN response */ -static int nfs4_xdr_dec_open_reclaim(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res) +static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_openres *res) { struct xdr_stream xdr; struct compound_hdr hdr; @@ -3330,9 +3483,6 @@ static int nfs4_xdr_dec_open_reclaim(str if (status) goto out; status = decode_open(&xdr, res); - if (status) - goto out; - status = decode_getfattr(&xdr, res->f_attr, res->server); out: return status; } @@ -3665,6 +3815,25 @@ static int nfs4_xdr_dec_setclientid_conf return status; } +/* + * DELEGRETURN request + */ +static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *dummy) +{ + struct xdr_stream xdr; + struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); + status = decode_compound_hdr(&xdr, &hdr); + if (status == 0) { + status = decode_putfh(&xdr); + if (status == 0) + status = decode_delegreturn(&xdr); + } + return status; +} + uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus) { uint32_t len; @@ -3786,7 +3955,7 @@ struct rpc_procinfo nfs4_procedures[] = PROC(COMMIT, enc_commit, dec_commit), PROC(OPEN, enc_open, dec_open), PROC(OPEN_CONFIRM, enc_open_confirm, dec_open_confirm), - PROC(OPEN_RECLAIM, enc_open_reclaim, dec_open_reclaim), + PROC(OPEN_NOATTR, enc_open_noattr, dec_open_noattr), PROC(OPEN_DOWNGRADE, enc_open_downgrade, dec_open_downgrade), PROC(CLOSE, enc_close, dec_close), PROC(SETATTR, enc_setattr, dec_setattr), @@ -3810,6 +3979,7 @@ struct rpc_procinfo nfs4_procedures[] = PROC(READLINK, enc_readlink, dec_readlink), PROC(READDIR, enc_readdir, dec_readdir), PROC(SERVER_CAPS, enc_server_caps, dec_server_caps), + PROC(DELEGRETURN, enc_delegreturn, dec_delegreturn), }; struct rpc_version nfs_version4 = { diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/pagelist.c linux-2.6.7-17-delegation_cache/fs/nfs/pagelist.c --- linux-2.6.7-rc3/fs/nfs/pagelist.c 2004-06-10 00:23:50.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/pagelist.c 2004-06-10 01:04:44.000000000 -0400 @@ -36,7 +36,6 @@ nfs_page_alloc(void) if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->wb_list); - init_waitqueue_head(&p->wb_wait); } return p; } @@ -62,7 +61,7 @@ nfs_page_free(struct nfs_page *p) * User should ensure it is safe to sleep in this function. */ struct nfs_page * -nfs_create_request(struct file *file, struct inode *inode, +nfs_create_request(struct nfs_open_context *ctx, struct inode *inode, struct page *page, unsigned int offset, unsigned int count) { @@ -94,33 +93,38 @@ nfs_create_request(struct file *file, st req->wb_offset = offset; req->wb_pgbase = offset; req->wb_bytes = count; - req->wb_inode = inode; req->wb_count = 1; - server->rpc_ops->request_init(req, file); + req->wb_context = get_nfs_open_context(ctx); return req; } /** + * nfs_unlock_request - Unlock request and wake up sleepers. + * @req: + */ +void nfs_unlock_request(struct nfs_page *req) +{ + if (!NFS_WBACK_BUSY(req)) { + printk(KERN_ERR "NFS: Invalid unlock attempted\n"); + BUG(); + } + smp_mb__before_clear_bit(); + clear_bit(PG_BUSY, &req->wb_flags); + smp_mb__after_clear_bit(); + wake_up_all(&req->wb_context->waitq); + nfs_release_request(req); +} + +/** * nfs_clear_request - Free up all resources allocated to the request * @req: * - * Release all resources associated with a write request after it + * Release page resources associated with a write request after it * has completed. */ void nfs_clear_request(struct nfs_page *req) { - if (req->wb_state) - req->wb_state = NULL; - /* Release struct file or cached credential */ - if (req->wb_file) { - fput(req->wb_file); - req->wb_file = NULL; - } - if (req->wb_cred) { - put_rpccred(req->wb_cred); - req->wb_cred = NULL; - } if (req->wb_page) { page_cache_release(req->wb_page); req->wb_page = NULL; @@ -151,6 +155,7 @@ nfs_release_request(struct nfs_page *req /* Release struct file or cached credential */ nfs_clear_request(req); + put_nfs_open_context(req->wb_context); nfs_page_free(req); } @@ -194,12 +199,12 @@ nfs_list_add_request(struct nfs_page *re int nfs_wait_on_request(struct nfs_page *req) { - struct inode *inode = req->wb_inode; + struct inode *inode = req->wb_context->dentry->d_inode; struct rpc_clnt *clnt = NFS_CLIENT(inode); if (!NFS_WBACK_BUSY(req)) return 0; - return nfs_wait_event(clnt, req->wb_wait, !NFS_WBACK_BUSY(req)); + return nfs_wait_event(clnt, req->wb_context->waitq, !NFS_WBACK_BUSY(req)); } /** @@ -224,7 +229,11 @@ nfs_coalesce_requests(struct list_head * req = nfs_list_entry(head->next); if (prev) { - if (req->wb_cred != prev->wb_cred) + if (req->wb_context->cred != prev->wb_context->cred) + break; + if (req->wb_context->pid != prev->wb_context->pid) + break; + if (req->wb_context->state != prev->wb_context->state) break; if (req->wb_index != (prev->wb_index + 1)) break; diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/proc.c linux-2.6.7-17-delegation_cache/fs/nfs/proc.c --- linux-2.6.7-rc3/fs/nfs/proc.c 2004-06-10 00:23:51.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/proc.c 2004-06-10 01:04:44.000000000 -0400 @@ -49,18 +49,6 @@ extern struct rpc_procinfo nfs_procedures[]; -static struct rpc_cred * -nfs_cred(struct inode *inode, struct file *filp) -{ - struct rpc_cred *cred = NULL; - - if (filp) - cred = (struct rpc_cred *)filp->private_data; - if (!cred) - cred = NFS_I(inode)->mm_cred; - return cred; -} - /* * Bare-bones access to getattr: this is for nfs_read_super. */ @@ -167,8 +155,7 @@ nfs_proc_readlink(struct inode *inode, s return status; } -static int -nfs_proc_read(struct nfs_read_data *rdata, struct file *filp) +static int nfs_proc_read(struct nfs_read_data *rdata) { int flags = rdata->flags; struct inode * inode = rdata->inode; @@ -177,15 +164,14 @@ nfs_proc_read(struct nfs_read_data *rdat .rpc_proc = &nfs_procedures[NFSPROC_READ], .rpc_argp = &rdata->args, .rpc_resp = &rdata->res, + .rpc_resp = rdata->cred, }; int status; dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); - if (status >= 0) { nfs_refresh_inode(inode, fattr); /* Emulate the eof flag, which isn't normally needed in NFSv2 @@ -198,8 +184,7 @@ nfs_proc_read(struct nfs_read_data *rdat return status; } -static int -nfs_proc_write(struct nfs_write_data *wdata, struct file *filp) +static int nfs_proc_write(struct nfs_write_data *wdata) { int flags = wdata->flags; struct inode * inode = wdata->inode; @@ -208,13 +193,13 @@ nfs_proc_write(struct nfs_write_data *wd .rpc_proc = &nfs_procedures[NFSPROC_WRITE], .rpc_argp = &wdata->args, .rpc_resp = &wdata->res, + .rpc_resp = wdata->cred, }; int status; dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) { nfs_refresh_inode(inode, fattr); @@ -619,27 +604,6 @@ nfs_proc_commit_setup(struct nfs_write_d BUG(); } -/* - * Set up the nfspage struct with the right credentials - */ -static void -nfs_request_init(struct nfs_page *req, struct file *filp) -{ - req->wb_cred = get_rpccred(nfs_cred(req->wb_inode, filp)); -} - -static int -nfs_request_compatible(struct nfs_page *req, struct file *filp, struct page *page) -{ - if (req->wb_file != filp) - return 0; - if (req->wb_page != page) - return 0; - if (req->wb_cred != nfs_file_cred(filp)) - return 0; - return 1; -} - static int nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { @@ -680,7 +644,5 @@ struct nfs_rpc_ops nfs_v2_clientops = { .commit_setup = nfs_proc_commit_setup, .file_open = nfs_open, .file_release = nfs_release, - .request_init = nfs_request_init, - .request_compatible = nfs_request_compatible, .lock = nfs_proc_lock, }; diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/read.c linux-2.6.7-17-delegation_cache/fs/nfs/read.c --- linux-2.6.7-rc3/fs/nfs/read.c 2004-06-10 00:23:02.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/read.c 2004-06-10 01:04:48.000000000 -0400 @@ -91,8 +91,8 @@ int nfs_return_empty_page(struct page *p /* * Read a page synchronously. */ -static int -nfs_readpage_sync(struct file *file, struct inode *inode, struct page *page) +static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, + struct page *page) { unsigned int rsize = NFS_SERVER(inode)->rsize; unsigned int count = PAGE_CACHE_SIZE; @@ -105,10 +105,11 @@ nfs_readpage_sync(struct file *file, str memset(rdata, 0, sizeof(*rdata)); rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); + rdata->cred = ctx->cred; rdata->inode = inode; INIT_LIST_HEAD(&rdata->pages); rdata->args.fh = NFS_FH(inode); - rdata->args.lockowner = current->files; + rdata->args.context = ctx; rdata->args.pages = &page; rdata->args.pgbase = 0UL; rdata->args.count = rsize; @@ -134,7 +135,7 @@ nfs_readpage_sync(struct file *file, str rdata->args.count); lock_kernel(); - result = NFS_PROTO(inode)->read(rdata, file); + result = NFS_PROTO(inode)->read(rdata); unlock_kernel(); /* @@ -169,8 +170,8 @@ io_error: return result; } -static int -nfs_readpage_async(struct file *file, struct inode *inode, struct page *page) +static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, + struct page *page) { LIST_HEAD(one_request); struct nfs_page *new; @@ -179,7 +180,7 @@ nfs_readpage_async(struct file *file, st len = nfs_page_length(inode, page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(file, inode, page, 0, len); + new = nfs_create_request(ctx, inode, page, 0, len); if (IS_ERR(new)) { unlock_page(page); return PTR_ERR(new); @@ -202,8 +203,8 @@ static void nfs_readpage_release(struct nfs_unlock_request(req); dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", - req->wb_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_inode), + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); } @@ -217,16 +218,15 @@ static void nfs_read_rpcsetup(struct nfs struct inode *inode; data->req = req; - data->inode = inode = req->wb_inode; - data->cred = req->wb_cred; + data->inode = inode = req->wb_context->dentry->d_inode; + data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; - data->args.lockowner = req->wb_lockowner; - data->args.state = req->wb_state; + data->args.context = req->wb_context; data->res.fattr = &data->fattr; data->res.count = count; @@ -396,7 +396,7 @@ nfs_pagein_list(struct list_head *head, while (!list_empty(head)) { pages += nfs_coalesce_requests(head, &one_request, rpages); req = nfs_list_entry(one_request.next); - error = nfs_pagein_one(&one_request, req->wb_inode); + error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode); if (error < 0) break; } @@ -500,9 +500,9 @@ void nfs_readpage_result(struct rpc_task * - The error flag is set for this page. This happens only when a * previous async read operation failed. */ -int -nfs_readpage(struct file *file, struct page *page) +int nfs_readpage(struct file *file, struct page *page) { + struct nfs_open_context *ctx; struct inode *inode = page->mapping->host; int error; @@ -519,25 +519,33 @@ nfs_readpage(struct file *file, struct p if (error) goto out_error; + if (file == NULL) { + ctx = nfs_find_open_context(inode, FMODE_READ); + if (ctx == NULL) + return -EBADF; + } else + ctx = get_nfs_open_context((struct nfs_open_context *) + file->private_data); if (!IS_SYNC(inode)) { - error = nfs_readpage_async(file, inode, page); + error = nfs_readpage_async(ctx, inode, page); goto out; } - error = nfs_readpage_sync(file, inode, page); + error = nfs_readpage_sync(ctx, inode, page); if (error < 0 && IS_SWAPFILE(inode)) printk("Aiee.. nfs swap-in of page failed!\n"); out: + put_nfs_open_context(ctx); return error; out_error: unlock_page(page); - goto out; + return error; } struct nfs_readdesc { struct list_head *head; - struct file *filp; + struct nfs_open_context *ctx; }; static int @@ -552,7 +560,7 @@ readpage_async_filler(void *data, struct len = nfs_page_length(inode, page); if (len == 0) return nfs_return_empty_page(page); - new = nfs_create_request(desc->filp, inode, page, 0, len); + new = nfs_create_request(desc->ctx, inode, page, 0, len); if (IS_ERR(new)) { SetPageError(page); unlock_page(page); @@ -565,13 +573,11 @@ readpage_async_filler(void *data, struct return 0; } -int -nfs_readpages(struct file *filp, struct address_space *mapping, +int nfs_readpages(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages) { LIST_HEAD(head); struct nfs_readdesc desc = { - .filp = filp, .head = &head, }; struct inode *inode = mapping->host; @@ -583,12 +589,20 @@ nfs_readpages(struct file *filp, struct (long long)NFS_FILEID(inode), nr_pages); + if (filp == NULL) { + desc.ctx = nfs_find_open_context(inode, FMODE_READ); + if (desc.ctx == NULL) + return -EBADF; + } else + desc.ctx = get_nfs_open_context((struct nfs_open_context *) + filp->private_data); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); if (!list_empty(&head)) { int err = nfs_pagein_list(&head, server->rpages); if (!ret) ret = err; } + put_nfs_open_context(desc.ctx); return ret; } diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/unlink.c linux-2.6.7-17-delegation_cache/fs/nfs/unlink.c --- linux-2.6.7-rc3/fs/nfs/unlink.c 2004-06-10 00:23:51.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/unlink.c 2004-06-10 01:04:35.000000000 -0400 @@ -215,7 +215,6 @@ nfs_complete_unlink(struct dentry *dentr spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; spin_unlock(&dentry->d_lock); - if (data->task.tk_rpcwait == &nfs_delete_queue) - rpc_wake_up_task(&data->task); + rpc_wake_up_task(&data->task); nfs_put_unlinkdata(data); } diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/nfs/write.c linux-2.6.7-17-delegation_cache/fs/nfs/write.c --- linux-2.6.7-rc3/fs/nfs/write.c 2004-06-10 00:23:36.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/nfs/write.c 2004-06-10 01:05:34.000000000 -0400 @@ -63,6 +63,8 @@ #include #include +#include "delegation.h" + #define NFSDBG_FACILITY NFSDBG_PAGECACHE #define MIN_POOL_WRITE (32) @@ -71,7 +73,8 @@ /* * Local function declarations */ -static struct nfs_page * nfs_update_request(struct file*, struct inode *, +static struct nfs_page * nfs_update_request(struct nfs_open_context*, + struct inode *, struct page *, unsigned int, unsigned int); static void nfs_writeback_done_partial(struct nfs_write_data *, int); @@ -173,7 +176,7 @@ static void nfs_mark_uptodate(struct pag * Write a page synchronously. * Offset is the data offset within the page. */ -static int nfs_writepage_sync(struct file *file, struct inode *inode, +static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, struct page *page, unsigned int offset, unsigned int count, int how) { @@ -187,9 +190,10 @@ static int nfs_writepage_sync(struct fil memset(wdata, 0, sizeof(*wdata)); wdata->flags = how; + wdata->cred = ctx->cred; wdata->inode = inode; wdata->args.fh = NFS_FH(inode); - wdata->args.lockowner = current->files; + wdata->args.context = ctx; wdata->args.pages = &page; wdata->args.stable = NFS_FILE_SYNC; wdata->args.pgbase = offset; @@ -208,7 +212,7 @@ static int nfs_writepage_sync(struct fil wdata->args.count = count; wdata->args.offset = page_offset(page) + wdata->args.pgbase; - result = NFS_PROTO(inode)->write(wdata, file); + result = NFS_PROTO(inode)->write(wdata); if (result < 0) { /* Must mark the page invalid after I/O error */ @@ -241,13 +245,14 @@ io_error: return written ? written : result; } -static int nfs_writepage_async(struct file *file, struct inode *inode, - struct page *page, unsigned int offset, unsigned int count) +static int nfs_writepage_async(struct nfs_open_context *ctx, + struct inode *inode, struct page *page, + unsigned int offset, unsigned int count) { struct nfs_page *req; int status; - req = nfs_update_request(file, inode, page, offset, count); + req = nfs_update_request(ctx, inode, page, offset, count); status = (IS_ERR(req)) ? PTR_ERR(req) : 0; if (status < 0) goto out; @@ -274,6 +279,7 @@ static int wb_priority(struct writeback_ */ int nfs_writepage(struct page *page, struct writeback_control *wbc) { + struct nfs_open_context *ctx; struct inode *inode = page->mapping->host; unsigned long end_index; unsigned offset = PAGE_CACHE_SIZE; @@ -308,17 +314,18 @@ int nfs_writepage(struct page *page, str if (page->index >= end_index+1 || !offset) goto out; do_it: + ctx = nfs_find_open_context(inode, FMODE_WRITE); + if (ctx == NULL) { + err = -EBADF; + goto out; + } lock_kernel(); if (!IS_SYNC(inode) && inode_referenced) { - err = nfs_writepage_async(NULL, inode, page, 0, offset); - if (err >= 0) { + err = nfs_writepage_async(ctx, inode, page, 0, offset); + if (err > 0) err = 0; - if (wbc->for_reclaim) - err = WRITEPAGE_ACTIVATE; - } } else { - err = nfs_writepage_sync(NULL, inode, page, 0, - offset, priority); + err = nfs_writepage_sync(ctx, inode, page, 0, offset, priority); if (err >= 0) { if (err != offset) redirty_page_for_writepage(wbc, page); @@ -326,9 +333,9 @@ do_it: } } unlock_kernel(); + put_nfs_open_context(ctx); out: - if (err != WRITEPAGE_ACTIVATE) - unlock_page(page); + unlock_page(page); if (inode_referenced) iput(inode); return err; @@ -375,8 +382,7 @@ out: /* * Insert a write request into an inode */ -static inline int -nfs_inode_add_request(struct inode *inode, struct nfs_page *req) +static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(inode); int error; @@ -388,6 +394,8 @@ nfs_inode_add_request(struct inode *inod if (!nfsi->npages) { igrab(inode); nfs_begin_data_update(inode); + if (nfs_have_delegation(inode, FMODE_WRITE)) + nfsi->change_attr++; } nfsi->npages++; req->wb_count++; @@ -405,7 +413,7 @@ nfs_inode_remove_request(struct nfs_page BUG_ON (!NFS_WBACK_BUSY(req)); spin_lock(&nfs_wreq_lock); - inode = req->wb_inode; + inode = req->wb_context->dentry->d_inode; nfsi = NFS_I(inode); radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); nfsi->npages--; @@ -451,7 +459,7 @@ nfs_find_request(struct inode *inode, un static void nfs_mark_request_dirty(struct nfs_page *req) { - struct inode *inode = req->wb_inode; + struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfs_wreq_lock); @@ -468,7 +476,7 @@ nfs_mark_request_dirty(struct nfs_page * static inline int nfs_dirty_request(struct nfs_page *req) { - struct nfs_inode *nfsi = NFS_I(req->wb_inode); + struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode); return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty; } @@ -479,7 +487,7 @@ nfs_dirty_request(struct nfs_page *req) static void nfs_mark_request_commit(struct nfs_page *req) { - struct inode *inode = req->wb_inode; + struct inode *inode = req->wb_context->dentry->d_inode; struct nfs_inode *nfsi = NFS_I(inode); spin_lock(&nfs_wreq_lock); @@ -620,9 +628,9 @@ static int nfs_wait_on_write_congestion( * * Note: Should always be called with the Page Lock held! */ -static struct nfs_page * -nfs_update_request(struct file* file, struct inode *inode, struct page *page, - unsigned int offset, unsigned int bytes) +static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx, + struct inode *inode, struct page *page, + unsigned int offset, unsigned int bytes) { struct nfs_server *server = NFS_SERVER(inode); struct nfs_page *req, *new = NULL; @@ -669,13 +677,9 @@ nfs_update_request(struct file* file, st } spin_unlock(&nfs_wreq_lock); - new = nfs_create_request(file, inode, page, offset, bytes); + new = nfs_create_request(ctx, inode, page, offset, bytes); if (IS_ERR(new)) return new; - if (file) { - new->wb_file = file; - get_file(file); - } } /* We have a request for our page. @@ -685,7 +689,7 @@ nfs_update_request(struct file* file, st * request. */ rqend = req->wb_offset + req->wb_bytes; - if (req->wb_file != file + if (req->wb_context != ctx || req->wb_page != page || !nfs_dirty_request(req) || offset > rqend || end < req->wb_offset) { @@ -706,9 +710,9 @@ nfs_update_request(struct file* file, st return req; } -int -nfs_flush_incompatible(struct file *file, struct page *page) +int nfs_flush_incompatible(struct file *file, struct page *page) { + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = page->mapping->host; struct nfs_page *req; int status = 0; @@ -722,7 +726,7 @@ nfs_flush_incompatible(struct file *file */ req = nfs_find_request(inode, page->index); if (req) { - if (!NFS_PROTO(inode)->request_compatible(req, file, page)) + if (req->wb_page != page || ctx != req->wb_context) status = nfs_wb_page(inode, page); nfs_release_request(req); } @@ -738,6 +742,7 @@ nfs_flush_incompatible(struct file *file int nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count) { + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct dentry *dentry = file->f_dentry; struct inode *inode = page->mapping->host; struct nfs_page *req; @@ -748,7 +753,7 @@ int nfs_updatepage(struct file *file, st count, (long long)(page_offset(page) +offset)); if (IS_SYNC(inode)) { - status = nfs_writepage_sync(file, inode, page, offset, count, 0); + status = nfs_writepage_sync(ctx, inode, page, offset, count, 0); if (status > 0) { if (offset == 0 && status == PAGE_CACHE_SIZE) SetPageUptodate(page); @@ -785,7 +790,7 @@ int nfs_updatepage(struct file *file, st * it out now. */ do { - req = nfs_update_request(file, inode, page, offset, count); + req = nfs_update_request(ctx, inode, page, offset, count); status = (IS_ERR(req)) ? PTR_ERR(req) : 0; if (status != -EBUSY) break; @@ -861,16 +866,15 @@ static void nfs_write_rpcsetup(struct nf * NB: take care not to mess about with data->commit et al. */ data->req = req; - data->inode = inode = req->wb_inode; - data->cred = req->wb_cred; + data->inode = inode = req->wb_context->dentry->d_inode; + data->cred = req->wb_context->cred; data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; data->args.pgbase = req->wb_pgbase + offset; data->args.pages = data->pagevec; data->args.count = count; - data->args.lockowner = req->wb_lockowner; - data->args.state = req->wb_state; + data->args.context = req->wb_context; data->res.fattr = &data->fattr; data->res.count = count; @@ -1030,7 +1034,7 @@ nfs_flush_list(struct list_head *head, i while (!list_empty(head)) { pages += nfs_coalesce_requests(head, &one_request, wpages); req = nfs_list_entry(one_request.next); - error = nfs_flush_one(&one_request, req->wb_inode, how); + error = nfs_flush_one(&one_request, req->wb_context->dentry->d_inode, how); if (error < 0) break; } @@ -1055,16 +1059,15 @@ static void nfs_writeback_done_partial(s struct page *page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_inode), + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (status < 0) { ClearPageUptodate(page); SetPageError(page); - if (req->wb_file) - req->wb_file->f_error = status; + req->wb_context->error = status; dprintk(", error = %d\n", status); } else { #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) @@ -1105,16 +1108,15 @@ static void nfs_writeback_done_full(stru page = req->wb_page; dprintk("NFS: write (%s/%Ld %d@%Ld)", - req->wb_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_inode), + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (status < 0) { ClearPageUptodate(page); SetPageError(page); - if (req->wb_file) - req->wb_file->f_error = status; + req->wb_context->error = status; end_page_writeback(page); nfs_inode_remove_request(req); dprintk(", error = %d\n", status); @@ -1233,7 +1235,7 @@ static void nfs_commit_rpcsetup(struct l list_splice_init(head, &data->pages); first = nfs_list_entry(data->pages.next); last = nfs_list_entry(data->pages.prev); - inode = first->wb_inode; + inode = first->wb_context->dentry->d_inode; /* * Determine the offset range of requests in the COMMIT call. @@ -1247,7 +1249,7 @@ static void nfs_commit_rpcsetup(struct l len = 0; data->inode = inode; - data->cred = first->wb_cred; + data->cred = first->wb_context->cred; data->args.fh = NFS_FH(data->inode); data->args.offset = start; @@ -1314,13 +1316,12 @@ nfs_commit_done(struct rpc_task *task) nfs_list_remove_request(req); dprintk("NFS: commit (%s/%Ld %d@%Ld)", - req->wb_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_inode), + req->wb_context->dentry->d_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_context->dentry->d_inode), req->wb_bytes, (long long)req_offset(req)); if (task->tk_status < 0) { - if (req->wb_file) - req->wb_file->f_error = task->tk_status; + req->wb_context->error = task->tk_status; nfs_inode_remove_request(req); dprintk(", error = %d\n", task->tk_status); goto next; diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/fs/open.c linux-2.6.7-17-delegation_cache/fs/open.c --- linux-2.6.7-rc3/fs/open.c 2004-06-10 00:23:04.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/fs/open.c 2004-06-10 01:04:21.000000000 -0400 @@ -1007,7 +1007,7 @@ int filp_close(struct file *filp, fl_own } dnotify_flush(filp, id); - locks_remove_posix(filp, id); + locks_remove_posix(filp, 0); fput(filp); return retval; } diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/include/linux/fs.h linux-2.6.7-17-delegation_cache/include/linux/fs.h --- linux-2.6.7-rc3/include/linux/fs.h 2004-06-10 00:23:14.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/include/linux/fs.h 2004-06-10 01:04:21.000000000 -0400 @@ -625,7 +625,7 @@ struct file_lock { struct file_lock *fl_next; /* singly linked list for this inode */ struct list_head fl_link; /* doubly linked list of all locks */ struct list_head fl_block; /* circular list of blocked processes */ - fl_owner_t fl_owner; + fl_owner_t fl_owner; /* 0 if lock owned by a local process */ unsigned int fl_pid; wait_queue_head_t fl_wait; struct file *fl_file; diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/include/linux/nfs4.h linux-2.6.7-17-delegation_cache/include/linux/nfs4.h --- linux-2.6.7-rc3/include/linux/nfs4.h 2004-06-10 00:23:19.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/include/linux/nfs4.h 2004-06-10 01:05:13.000000000 -0400 @@ -297,7 +297,7 @@ enum { NFSPROC4_CLNT_COMMIT, NFSPROC4_CLNT_OPEN, NFSPROC4_CLNT_OPEN_CONFIRM, - NFSPROC4_CLNT_OPEN_RECLAIM, + NFSPROC4_CLNT_OPEN_NOATTR, NFSPROC4_CLNT_OPEN_DOWNGRADE, NFSPROC4_CLNT_CLOSE, NFSPROC4_CLNT_SETATTR, @@ -321,6 +321,7 @@ enum { NFSPROC4_CLNT_READLINK, NFSPROC4_CLNT_READDIR, NFSPROC4_CLNT_SERVER_CAPS, + NFSPROC4_CLNT_DELEGRETURN, }; #endif diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/include/linux/nfs_fs.h linux-2.6.7-17-delegation_cache/include/linux/nfs_fs.h --- linux-2.6.7-rc3/include/linux/nfs_fs.h 2004-06-10 00:23:15.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/include/linux/nfs_fs.h 2004-06-10 01:05:34.000000000 -0400 @@ -84,6 +84,32 @@ struct nfs_access_cache { int err; }; +struct nfs4_state; +struct nfs_open_context { + atomic_t count; + struct dentry *dentry; + struct rpc_cred *cred; + struct nfs4_state *state; + unsigned int pid; + int mode; + int error; + + struct list_head open_files; + wait_queue_head_t waitq; +}; + +/* + * NFSv4 delegation + */ +struct nfs_delegation { + struct list_head list; + struct list_head states; + nfs4_stateid stateid; + int type; + long generation; + loff_t maxsize; +}; + /* * nfs fs inode data in memory */ @@ -156,14 +182,15 @@ struct nfs_inode { ncommit, npages; - /* Credentials for shared mmap */ - struct rpc_cred *mm_cred; + /* Open contexts for shared mmap writes */ + struct list_head open_files; wait_queue_head_t nfs_i_wait; #ifdef CONFIG_NFS_V4 /* NFSv4 state */ struct list_head open_states; + struct nfs_delegation delegation; #endif /* CONFIG_NFS_V4*/ struct inode vfs_inode; @@ -259,6 +286,18 @@ static inline int nfs_verify_change_attr && chattr == NFS_I(inode)->cache_change_attribute; } +/** + * nfs_compare_fh - compare two filehandles for equality + * @fh1 - pointer to first filehandle + * @fh2 - pointer to second filehandle + */ +static inline int nfs_compare_fh(const struct nfs_fh *fh1, const struct nfs_fh *fh2) +{ + if (fh1->size == fh2->size) + return memcmp(fh1->data, fh2->data, fh1->size); + return (fh1->size > fh2->size) ? 1 : -1; +} + /* * linux/fs/nfs/inode.c */ @@ -268,9 +307,11 @@ extern struct inode *nfs_fhget(struct su extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); -extern void nfs_set_mmcred(struct inode *, struct rpc_cred *); +extern int nfs_check_access(struct inode *, int, struct rpc_cred *); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); +extern int nfs_attribute_timeout(struct inode *inode); +extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode); extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); extern int nfs_setattr(struct dentry *, struct iattr *); extern void nfs_begin_attr_update(struct inode *); @@ -278,6 +319,12 @@ extern void nfs_end_attr_update(struct i extern void nfs_begin_data_update(struct inode *); extern void nfs_end_data_update(struct inode *); extern void nfs_end_data_update_defer(struct inode *); +extern struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred); +extern struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx); +extern void put_nfs_open_context(struct nfs_open_context *ctx); +extern void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx); +extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode); +extern void nfs_file_clear_open_context(struct file *filp); /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */ extern u32 root_nfs_parse_addr(char *name); /*__init*/ @@ -289,16 +336,15 @@ extern struct inode_operations nfs_file_ extern struct file_operations nfs_file_operations; extern struct address_space_operations nfs_file_aops; -static __inline__ struct rpc_cred * -nfs_file_cred(struct file *file) +static inline struct rpc_cred *nfs_file_cred(struct file *file) { - struct rpc_cred *cred = NULL; - if (file) - cred = (struct rpc_cred *)file->private_data; -#ifdef RPC_DEBUG - BUG_ON(cred && cred->cr_magic != RPCAUTH_CRED_MAGIC); -#endif - return cred; + if (file != NULL) { + struct nfs_open_context *ctx; + + ctx = (struct nfs_open_context*)file->private_data; + return ctx->cred; + } + return NULL; } /* @@ -418,28 +464,6 @@ extern int nfsroot_mount(struct sockadd * inline functions */ -static inline int nfs_attribute_timeout(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); -} - -/** - * nfs_revalidate_inode - Revalidate the inode attributes - * @server - pointer to nfs_server struct - * @inode - pointer to inode struct - * - * Updates inode attribute information by retrieving the data from the server. - */ -static inline int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) -{ - if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) - && !nfs_attribute_timeout(inode)) - return NFS_STALE(inode) ? -ESTALE : 0; - return __nfs_revalidate_inode(server, inode); -} - static inline loff_t nfs_size_to_loff_t(__u64 size) { @@ -530,6 +554,7 @@ struct nfs4_client { */ struct rw_semaphore cl_sem; + struct list_head cl_delegations; struct list_head cl_state_owners; struct list_head cl_unused; int cl_nunused; @@ -595,7 +620,7 @@ struct nfs4_state_owner { struct nfs4_lock_state { struct list_head ls_locks; /* Other lock stateids */ - fl_owner_t ls_owner; /* POSIX lock owner */ + unsigned int ls_pid; /* pid of owner process */ struct nfs4_state * ls_parent; /* Parent nfs4_state */ u32 ls_seqid; u32 ls_id; @@ -606,6 +631,7 @@ struct nfs4_lock_state { /* bits for nfs4_state->flags */ enum { LK_STATE_IN_USE, + NFS_DELEGATED_STATE, }; struct nfs4_state { @@ -654,6 +680,7 @@ extern void init_nfsv4_state(struct nfs_ extern void destroy_nfsv4_state(struct nfs_server *); extern struct nfs4_client *nfs4_get_client(struct in_addr *); extern void nfs4_put_client(struct nfs4_client *clp); +extern struct nfs4_client *nfs4_find_client(struct in_addr *); extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); @@ -665,13 +692,13 @@ extern struct nfs4_state *nfs4_find_stat extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); extern int nfs4_handle_error(struct nfs_server *, int); extern void nfs4_schedule_state_recovery(struct nfs4_client *); -extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t); -extern struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t); +extern struct nfs4_lock_state *nfs4_find_lock_state(struct nfs4_state *state, unsigned int pid); +extern struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, unsigned int pid); extern void nfs4_put_lock_state(struct nfs4_lock_state *state); extern void nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *ls); extern void nfs4_notify_setlk(struct inode *, struct file_lock *, struct nfs4_lock_state *); extern void nfs4_notify_unlck(struct inode *, struct file_lock *, struct nfs4_lock_state *); -extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t); +extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, unsigned int pid); @@ -697,6 +724,7 @@ struct nfs4_mount_data; #define NFSDBG_XDR 0x0020 #define NFSDBG_FILE 0x0040 #define NFSDBG_ROOT 0x0080 +#define NFSDBG_CALLBACK 0x0100 #define NFSDBG_ALL 0xFFFF #ifdef __KERNEL__ diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/include/linux/nfs_page.h linux-2.6.7-17-delegation_cache/include/linux/nfs_page.h --- linux-2.6.7-rc3/include/linux/nfs_page.h 2004-06-10 00:23:06.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/include/linux/nfs_page.h 2004-06-10 01:04:44.000000000 -0400 @@ -29,14 +29,9 @@ struct nfs_page { struct list_head wb_list, /* Defines state of page: */ *wb_list_head; /* read/write/commit */ - struct file *wb_file; - fl_owner_t wb_lockowner; - struct inode *wb_inode; - struct rpc_cred *wb_cred; - struct nfs4_state *wb_state; struct page *wb_page; /* page to read in/write out */ + struct nfs_open_context *wb_context; /* File state context info */ atomic_t wb_complete; /* i/os we're waiting for */ - wait_queue_head_t wb_wait; /* wait queue */ unsigned long wb_index; /* Offset >> PAGE_CACHE_SHIFT */ unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ wb_pgbase, /* Start of page data */ @@ -50,9 +45,11 @@ struct nfs_page { #define NFS_NEED_COMMIT(req) (test_bit(PG_NEED_COMMIT,&(req)->wb_flags)) #define NFS_NEED_RESCHED(req) (test_bit(PG_NEED_RESCHED,&(req)->wb_flags)) -extern struct nfs_page *nfs_create_request(struct file *, struct inode *, - struct page *, - unsigned int, unsigned int); +extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx, + struct inode *inode, + struct page *page, + unsigned int offset, + unsigned int count); extern void nfs_clear_request(struct nfs_page *req); extern void nfs_release_request(struct nfs_page *req); @@ -64,6 +61,7 @@ extern int nfs_scan_list(struct list_hea extern int nfs_coalesce_requests(struct list_head *, struct list_head *, unsigned int); extern int nfs_wait_on_request(struct nfs_page *); +extern void nfs_unlock_request(struct nfs_page *req); extern spinlock_t nfs_wreq_lock; @@ -90,19 +88,6 @@ nfs_lock_request(struct nfs_page *req) return 1; } -static inline void -nfs_unlock_request(struct nfs_page *req) -{ - if (!NFS_WBACK_BUSY(req)) { - printk(KERN_ERR "NFS: Invalid unlock attempted\n"); - BUG(); - } - smp_mb__before_clear_bit(); - clear_bit(PG_BUSY, &req->wb_flags); - smp_mb__after_clear_bit(); - wake_up_all(&req->wb_wait); - nfs_release_request(req); -} /** * nfs_list_remove_request - Remove a request from its wb_list diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/include/linux/nfs_xdr.h linux-2.6.7-17-delegation_cache/include/linux/nfs_xdr.h --- linux-2.6.7-rc3/include/linux/nfs_xdr.h 2004-06-10 00:23:43.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/include/linux/nfs_xdr.h 2004-06-10 01:05:04.000000000 -0400 @@ -99,20 +99,21 @@ struct nfs4_change_info { * Arguments to the open call. */ struct nfs_openargs { - struct nfs_fh * fh; + const struct nfs_fh * fh; __u32 seqid; - __u32 share_access; + int open_flags; __u64 clientid; __u32 id; - __u32 opentype; - __u32 createmode; union { struct iattr * attrs; /* UNCHECKED, GUARDED */ nfs4_verifier verifier; /* EXCLUSIVE */ + nfs4_stateid delegation; /* CLAIM_DELEGATE_CUR */ + int delegation_type; /* CLAIM_PREVIOUS */ } u; const struct qstr * name; const struct nfs_server *server; /* Needed for ID mapping */ const u32 * bitmask; + __u32 claim; }; struct nfs_openres { @@ -122,13 +123,17 @@ struct nfs_openres { __u32 rflags; struct nfs_fattr * f_attr; const struct nfs_server *server; + int delegation_type; + nfs4_stateid delegation; + __u32 do_recall; + __u64 maxsize; }; /* * Arguments to the open_confirm call. */ struct nfs_open_confirmargs { - struct nfs_fh * fh; + const struct nfs_fh * fh; nfs4_stateid stateid; __u32 seqid; }; @@ -138,26 +143,13 @@ struct nfs_open_confirmres { }; /* - * Arguments to the open_reclaim call. - */ -struct nfs_open_reclaimargs { - struct nfs_fh * fh; - __u64 clientid; - __u32 seqid; - __u32 id; - __u32 share_access; - __u32 claim; - const __u32 * bitmask; -}; - -/* * Arguments to the close call. */ struct nfs_closeargs { struct nfs_fh * fh; nfs4_stateid stateid; __u32 seqid; - __u32 share_access; + int open_flags; }; struct nfs_closeres { @@ -224,6 +216,11 @@ struct nfs_lockres { const struct nfs_server * server; }; +struct nfs4_delegreturnargs { + const struct nfs_fh *fhandle; + const nfs4_stateid *stateid; +}; + /* * Arguments to the read call. */ @@ -235,8 +232,7 @@ struct nfs_lockres { struct nfs_readargs { struct nfs_fh * fh; - fl_owner_t lockowner; - struct nfs4_state * state; + struct nfs_open_context *context; __u64 offset; __u32 count; unsigned int pgbase; @@ -259,8 +255,7 @@ struct nfs_readres { struct nfs_writeargs { struct nfs_fh * fh; - fl_owner_t lockowner; - struct nfs4_state * state; + struct nfs_open_context *context; __u64 offset; __u32 count; enum nfs3_stable_how stable; @@ -597,13 +592,15 @@ struct nfs4_rename_res { }; struct nfs4_setclientid { - nfs4_verifier sc_verifier; /* request */ - char * sc_name; /* request */ + const nfs4_verifier * sc_verifier; /* request */ + unsigned int sc_name_len; + char sc_name[32]; /* request */ u32 sc_prog; /* request */ + unsigned int sc_netid_len; char sc_netid[4]; /* request */ + unsigned int sc_uaddr_len; char sc_uaddr[24]; /* request */ u32 sc_cb_ident; /* request */ - struct nfs4_client * sc_state; /* response */ }; struct nfs4_statfs_arg { @@ -674,9 +671,9 @@ struct nfs_rpc_ops { struct nfs_fh *, struct nfs_fattr *); int (*access) (struct inode *, struct rpc_cred *, int); int (*readlink)(struct inode *, struct page *); - int (*read) (struct nfs_read_data *, struct file *); - int (*write) (struct nfs_write_data *, struct file *); - int (*commit) (struct nfs_write_data *, struct file *); + int (*read) (struct nfs_read_data *); + int (*write) (struct nfs_write_data *); + int (*commit) (struct nfs_write_data *); struct inode * (*create) (struct inode *, struct qstr *, struct iattr *, int); int (*remove) (struct inode *, struct qstr *); @@ -708,8 +705,6 @@ struct nfs_rpc_ops { void (*commit_setup) (struct nfs_write_data *, int how); int (*file_open) (struct inode *, struct file *); int (*file_release) (struct inode *, struct file *); - void (*request_init)(struct nfs_page *, struct file *); - int (*request_compatible)(struct nfs_page *, struct file *, struct page *); int (*lock)(struct file *, int, struct file_lock *); }; diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/include/linux/sunrpc/sched.h linux-2.6.7-17-delegation_cache/include/linux/sunrpc/sched.h --- linux-2.6.7-rc3/include/linux/sunrpc/sched.h 2004-06-10 00:23:27.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/include/linux/sunrpc/sched.h 2004-06-10 01:04:39.000000000 -0400 @@ -11,7 +11,9 @@ #include #include +#include #include +#include #include /* @@ -25,11 +27,18 @@ struct rpc_message { struct rpc_cred * rpc_cred; /* Credentials */ }; +struct rpc_wait_queue; +struct rpc_wait { + struct list_head list; /* wait queue links */ + struct list_head links; /* Links to related tasks */ + wait_queue_head_t waitq; /* sync: sleep on this q */ + struct rpc_wait_queue * rpc_waitq; /* RPC wait queue we're on */ +}; + /* * This is the RPC task struct */ struct rpc_task { - struct list_head tk_list; /* wait queue links */ #ifdef RPC_DEBUG unsigned long tk_magic; /* 0xf00baa */ #endif @@ -37,7 +46,6 @@ struct rpc_task { struct rpc_clnt * tk_client; /* RPC client */ struct rpc_rqst * tk_rqstp; /* RPC request */ int tk_status; /* result of last operation */ - struct rpc_wait_queue * tk_rpcwait; /* RPC wait queue we're on */ /* * RPC call state @@ -70,13 +78,18 @@ struct rpc_task { * you have a pathological interest in kernel oopses. */ struct timer_list tk_timer; /* kernel timer */ - wait_queue_head_t tk_wait; /* sync: sleep on this q */ unsigned long tk_timeout; /* timeout for rpc_sleep() */ unsigned short tk_flags; /* misc flags */ unsigned char tk_active : 1;/* Task has been activated */ unsigned char tk_priority : 2;/* Task priority */ unsigned long tk_runstate; /* Task run status */ - struct list_head tk_links; /* links to related tasks */ + struct workqueue_struct *tk_workqueue; /* Normally rpciod, but could + * be any workqueue + */ + union { + struct work_struct tk_work; /* Async task work queue */ + struct rpc_wait tk_wait; /* RPC wait */ + } u; #ifdef RPC_DEBUG unsigned short tk_pid; /* debugging aid */ #endif @@ -87,11 +100,11 @@ struct rpc_task { /* support walking a list of tasks on a wait queue */ #define task_for_each(task, pos, head) \ list_for_each(pos, head) \ - if ((task=list_entry(pos, struct rpc_task, tk_list)),1) + if ((task=list_entry(pos, struct rpc_task, u.tk_wait.list)),1) #define task_for_first(task, head) \ if (!list_empty(head) && \ - ((task=list_entry((head)->next, struct rpc_task, tk_list)),1)) + ((task=list_entry((head)->next, struct rpc_task, u.tk_wait.list)),1)) /* .. and walking list of all tasks */ #define alltask_for_each(task, pos, head) \ @@ -124,22 +137,24 @@ typedef void (*rpc_action)(struct rpc_ #define RPC_DO_CALLBACK(t) ((t)->tk_callback != NULL) #define RPC_IS_SOFT(t) ((t)->tk_flags & RPC_TASK_SOFT) -#define RPC_TASK_SLEEPING 0 -#define RPC_TASK_RUNNING 1 -#define RPC_IS_SLEEPING(t) (test_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate)) -#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) +#define RPC_TASK_RUNNING 0 +#define RPC_TASK_QUEUED 1 +#define RPC_IS_RUNNING(t) (test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) #define rpc_set_running(t) (set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) -#define rpc_clear_running(t) (clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) - -#define rpc_set_sleeping(t) (set_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate)) - -#define rpc_clear_sleeping(t) \ +#define rpc_test_and_set_running(t) \ + (test_and_set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)) +#define rpc_clear_running(t) \ do { \ smp_mb__before_clear_bit(); \ - clear_bit(RPC_TASK_SLEEPING, &(t)->tk_runstate); \ + clear_bit(RPC_TASK_RUNNING, &(t)->tk_runstate); \ smp_mb__after_clear_bit(); \ - } while(0) + } while (0) + +#define RPC_IS_QUEUED(t) (test_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)) +#define rpc_set_queued(t) (set_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)) +#define rpc_test_and_clear_queued(t) \ + (test_and_clear_bit(RPC_TASK_QUEUED, &(t)->tk_runstate)) /* * Task priorities. @@ -155,6 +170,7 @@ typedef void (*rpc_action)(struct rpc_ * RPC synchronization objects */ struct rpc_wait_queue { + spinlock_t lock; struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */ unsigned long cookie; /* cookie of last task serviced */ unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */ @@ -175,6 +191,7 @@ struct rpc_wait_queue { #ifndef RPC_DEBUG # define RPC_WAITQ_INIT(var,qname) { \ + .lock = SPIN_LOCK_UNLOCKED, \ .tasks = { \ [0] = LIST_HEAD_INIT(var.tasks[0]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \ @@ -183,6 +200,7 @@ struct rpc_wait_queue { } #else # define RPC_WAITQ_INIT(var,qname) { \ + .lock = SPIN_LOCK_UNLOCKED, \ .tasks = { \ [0] = LIST_HEAD_INIT(var.tasks[0]), \ [1] = LIST_HEAD_INIT(var.tasks[1]), \ @@ -207,13 +225,10 @@ void rpc_killall_tasks(struct rpc_clnt int rpc_execute(struct rpc_task *); void rpc_run_child(struct rpc_task *parent, struct rpc_task *child, rpc_action action); -int rpc_add_wait_queue(struct rpc_wait_queue *, struct rpc_task *); -void rpc_remove_wait_queue(struct rpc_task *); void rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *); void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action, rpc_action timer); -void rpc_add_timer(struct rpc_task *, rpc_action); void rpc_wake_up_task(struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *); diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/include/linux/sunrpc/svc.h linux-2.6.7-17-delegation_cache/include/linux/sunrpc/svc.h --- linux-2.6.7-rc3/include/linux/sunrpc/svc.h 2004-06-10 00:23:20.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/include/linux/sunrpc/svc.h 2004-06-10 01:04:59.000000000 -0400 @@ -87,6 +87,14 @@ static inline u32 svc_getu32(struct iove iov->iov_len -= sizeof(u32); return val; } + +static inline void svc_ungetu32(struct iovec *iov) +{ + u32 *vp = (u32 *)iov->iov_base; + iov->iov_base = (void *)(vp - 1); + iov->iov_len += sizeof(*vp); +} + static inline void svc_putu32(struct iovec *iov, u32 val) { u32 *vp = iov->iov_base + iov->iov_len; @@ -243,6 +251,8 @@ struct svc_program { char * pg_name; /* service name */ char * pg_class; /* class name: services sharing authentication */ struct svc_stat * pg_stats; /* rpc statistics */ + /* Override authentication. NULL means use default */ + int (*pg_authenticate)(struct svc_rqst *, u32 *); }; /* diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/net/sunrpc/clnt.c linux-2.6.7-17-delegation_cache/net/sunrpc/clnt.c --- linux-2.6.7-rc3/net/sunrpc/clnt.c 2004-06-10 00:23:16.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/net/sunrpc/clnt.c 2004-06-10 01:04:35.000000000 -0400 @@ -351,7 +351,9 @@ int rpc_call_sync(struct rpc_clnt *clnt, rpc_clnt_sigmask(clnt, &oldset); /* Create/initialize a new RPC task */ - rpc_init_task(task, clnt, NULL, flags); + task = rpc_new_task(clnt, NULL, flags); + if (task == NULL) + return -ENOMEM; rpc_call_setup(task, msg, 0); /* Set up the call info struct and execute the task */ diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/net/sunrpc/sched.c linux-2.6.7-17-delegation_cache/net/sunrpc/sched.c --- linux-2.6.7-rc3/net/sunrpc/sched.c 2004-06-10 00:23:20.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/net/sunrpc/sched.c 2004-06-10 01:04:39.000000000 -0400 @@ -41,13 +41,7 @@ static mempool_t *rpc_buffer_mempool; static void __rpc_default_timer(struct rpc_task *task); static void rpciod_killall(void); - -/* - * When an asynchronous RPC task is activated within a bottom half - * handler, or while executing another RPC task, it is put on - * schedq, and rpciod is woken up. - */ -static RPC_WAITQ(schedq, "schedq"); +static void rpc_async_schedule(void *); /* * RPC tasks that create another task (e.g. for contacting the portmapper) @@ -68,26 +62,18 @@ static LIST_HEAD(all_tasks); /* * rpciod-related stuff */ -static DECLARE_WAIT_QUEUE_HEAD(rpciod_idle); -static DECLARE_COMPLETION(rpciod_killer); static DECLARE_MUTEX(rpciod_sema); static unsigned int rpciod_users; -static pid_t rpciod_pid; -static int rpc_inhibit; +static struct workqueue_struct *rpciod_workqueue; /* - * Spinlock for wait queues. Access to the latter also has to be - * interrupt-safe in order to allow timers to wake up sleeping tasks. - */ -static spinlock_t rpc_queue_lock = SPIN_LOCK_UNLOCKED; -/* * Spinlock for other critical sections of code. */ static spinlock_t rpc_sched_lock = SPIN_LOCK_UNLOCKED; /* * Disable the timer for a given RPC task. Should be called with - * rpc_queue_lock and bh_disabled in order to avoid races within + * queue->lock and bh_disabled in order to avoid races within * rpc_run_timer(). */ static inline void @@ -105,16 +91,13 @@ __rpc_disable_timer(struct rpc_task *tas * without calling del_timer_sync(). The latter could cause a * deadlock if called while we're holding spinlocks... */ -static void -rpc_run_timer(struct rpc_task *task) +static void rpc_run_timer(struct rpc_task *task) { void (*callback)(struct rpc_task *); - spin_lock_bh(&rpc_queue_lock); callback = task->tk_timeout_fn; task->tk_timeout_fn = NULL; - spin_unlock_bh(&rpc_queue_lock); - if (callback) { + if (callback && RPC_IS_QUEUED(task)) { dprintk("RPC: %4d running timer\n", task->tk_pid); callback(task); } @@ -140,19 +123,8 @@ __rpc_add_timer(struct rpc_task *task, r } /* - * Set up a timer for an already sleeping task. - */ -void rpc_add_timer(struct rpc_task *task, rpc_action timer) -{ - spin_lock_bh(&rpc_queue_lock); - if (!RPC_IS_RUNNING(task)) - __rpc_add_timer(task, timer); - spin_unlock_bh(&rpc_queue_lock); -} - -/* * Delete any timer for the current task. Because we use del_timer_sync(), - * this function should never be called while holding rpc_queue_lock. + * this function should never be called while holding queue->lock. */ static inline void rpc_delete_timer(struct rpc_task *task) @@ -169,16 +141,17 @@ static void __rpc_add_wait_queue_priorit struct list_head *q; struct rpc_task *t; + INIT_LIST_HEAD(&task->u.tk_wait.links); q = &queue->tasks[task->tk_priority]; if (unlikely(task->tk_priority > queue->maxpriority)) q = &queue->tasks[queue->maxpriority]; - list_for_each_entry(t, q, tk_list) { + list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_cookie == task->tk_cookie) { - list_add_tail(&task->tk_list, &t->tk_links); + list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links); return; } } - list_add_tail(&task->tk_list, q); + list_add_tail(&task->u.tk_wait.list, q); } /* @@ -189,37 +162,21 @@ static void __rpc_add_wait_queue_priorit * improve overall performance. * Everyone else gets appended to the queue to ensure proper FIFO behavior. */ -static int __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) +static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) { - if (task->tk_rpcwait == queue) - return 0; + BUG_ON (RPC_IS_QUEUED(task)); - if (task->tk_rpcwait) { - printk(KERN_WARNING "RPC: doubly enqueued task!\n"); - return -EWOULDBLOCK; - } if (RPC_IS_PRIORITY(queue)) __rpc_add_wait_queue_priority(queue, task); else if (RPC_IS_SWAPPER(task)) - list_add(&task->tk_list, &queue->tasks[0]); + list_add(&task->u.tk_wait.list, &queue->tasks[0]); else - list_add_tail(&task->tk_list, &queue->tasks[0]); - task->tk_rpcwait = queue; + list_add_tail(&task->u.tk_wait.list, &queue->tasks[0]); + task->u.tk_wait.rpc_waitq = queue; + rpc_set_queued(task); dprintk("RPC: %4d added to queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); - - return 0; -} - -int rpc_add_wait_queue(struct rpc_wait_queue *q, struct rpc_task *task) -{ - int result; - - spin_lock_bh(&rpc_queue_lock); - result = __rpc_add_wait_queue(q, task); - spin_unlock_bh(&rpc_queue_lock); - return result; } /* @@ -229,12 +186,12 @@ static void __rpc_remove_wait_queue_prio { struct rpc_task *t; - if (!list_empty(&task->tk_links)) { - t = list_entry(task->tk_links.next, struct rpc_task, tk_list); - list_move(&t->tk_list, &task->tk_list); - list_splice_init(&task->tk_links, &t->tk_links); + if (!list_empty(&task->u.tk_wait.links)) { + t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list); + list_move(&t->u.tk_wait.list, &task->u.tk_wait.list); + list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links); } - list_del(&task->tk_list); + list_del(&task->u.tk_wait.list); } /* @@ -243,31 +200,17 @@ static void __rpc_remove_wait_queue_prio */ static void __rpc_remove_wait_queue(struct rpc_task *task) { - struct rpc_wait_queue *queue = task->tk_rpcwait; - - if (!queue) - return; + struct rpc_wait_queue *queue; + queue = task->u.tk_wait.rpc_waitq; if (RPC_IS_PRIORITY(queue)) __rpc_remove_wait_queue_priority(task); else - list_del(&task->tk_list); - task->tk_rpcwait = NULL; - + list_del(&task->u.tk_wait.list); dprintk("RPC: %4d removed from queue %p \"%s\"\n", task->tk_pid, queue, rpc_qname(queue)); } -void -rpc_remove_wait_queue(struct rpc_task *task) -{ - if (!task->tk_rpcwait) - return; - spin_lock_bh(&rpc_queue_lock); - __rpc_remove_wait_queue(task); - spin_unlock_bh(&rpc_queue_lock); -} - static inline void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority) { queue->priority = priority; @@ -290,6 +233,7 @@ static void __rpc_init_priority_wait_que { int i; + spin_lock_init(&queue->lock); for (i = 0; i < ARRAY_SIZE(queue->tasks); i++) INIT_LIST_HEAD(&queue->tasks[i]); queue->maxpriority = maxprio; @@ -316,34 +260,27 @@ EXPORT_SYMBOL(rpc_init_wait_queue); * Note: If the task is ASYNC, this must be called with * the spinlock held to protect the wait queue operation. */ -static inline void -rpc_make_runnable(struct rpc_task *task) +static void rpc_make_runnable(struct rpc_task *task) { - if (task->tk_timeout_fn) { - printk(KERN_ERR "RPC: task w/ running timer in rpc_make_runnable!!\n"); + if (rpc_test_and_set_running(task)) return; - } - rpc_set_running(task); + BUG_ON(task->tk_timeout_fn); if (RPC_IS_ASYNC(task)) { - if (RPC_IS_SLEEPING(task)) { - int status; - status = __rpc_add_wait_queue(&schedq, task); - if (status < 0) { - printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); - task->tk_status = status; - return; - } - rpc_clear_sleeping(task); - wake_up(&rpciod_idle); + int status; + + INIT_WORK(&task->u.tk_work, rpc_async_schedule, (void *)task); + status = queue_work(task->tk_workqueue, &task->u.tk_work); + if (status < 0) { + printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); + task->tk_status = status; + return; } - } else { - rpc_clear_sleeping(task); - wake_up(&task->tk_wait); - } + } else + wake_up(&task->u.tk_wait.waitq); } /* - * Place a newly initialized task on the schedq. + * Place a newly initialized task on the workqueue. */ static inline void rpc_schedule_run(struct rpc_task *task) @@ -352,33 +289,18 @@ rpc_schedule_run(struct rpc_task *task) if (RPC_IS_ACTIVATED(task)) return; task->tk_active = 1; - rpc_set_sleeping(task); rpc_make_runnable(task); } /* - * For other people who may need to wake the I/O daemon - * but should (for now) know nothing about its innards - */ -void rpciod_wake_up(void) -{ - if(rpciod_pid==0) - printk(KERN_ERR "rpciod: wot no daemon?\n"); - wake_up(&rpciod_idle); -} - -/* * Prepare for sleeping on a wait queue. * By always appending tasks to the list we ensure FIFO behavior. * NB: An RPC task will only receive interrupt-driven events as long * as it's on a wait queue. */ -static void -__rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, +static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, rpc_action timer) { - int status; - dprintk("RPC: %4d sleep_on(queue \"%s\" time %ld)\n", task->tk_pid, rpc_qname(q), jiffies); @@ -388,49 +310,36 @@ __rpc_sleep_on(struct rpc_wait_queue *q, } /* Mark the task as being activated if so needed */ - if (!RPC_IS_ACTIVATED(task)) { + if (!RPC_IS_ACTIVATED(task)) task->tk_active = 1; - rpc_set_sleeping(task); - } - status = __rpc_add_wait_queue(q, task); - if (status) { - printk(KERN_WARNING "RPC: failed to add task to queue: error: %d!\n", status); - task->tk_status = status; - } else { - rpc_clear_running(task); - if (task->tk_callback) { - dprintk(KERN_ERR "RPC: %4d overwrites an active callback\n", task->tk_pid); - BUG(); - } - task->tk_callback = action; - __rpc_add_timer(task, timer); - } + __rpc_add_wait_queue(q, task); + + BUG_ON(task->tk_callback != NULL); + task->tk_callback = action; + __rpc_add_timer(task, timer); } -void -rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, +void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, rpc_action action, rpc_action timer) { /* * Protect the queue operations. */ - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&q->lock); __rpc_sleep_on(q, task, action, timer); - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&q->lock); } /** - * __rpc_wake_up_task - wake up a single rpc_task + * __rpc_do_wake_up_task - wake up a single rpc_task * @task: task to be woken up * - * Caller must hold rpc_queue_lock + * Caller must hold queue->lock, and have cleared the task queued flag. */ -static void -__rpc_wake_up_task(struct rpc_task *task) +static void __rpc_do_wake_up_task(struct rpc_task *task) { - dprintk("RPC: %4d __rpc_wake_up_task (now %ld inh %d)\n", - task->tk_pid, jiffies, rpc_inhibit); + dprintk("RPC: %4d __rpc_wake_up_task (now %ld)\n", task->tk_pid, jiffies); #ifdef RPC_DEBUG if (task->tk_magic != 0xf00baa) { @@ -445,12 +354,9 @@ __rpc_wake_up_task(struct rpc_task *task printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task); return; } - if (RPC_IS_RUNNING(task)) - return; __rpc_disable_timer(task); - if (task->tk_rpcwait != &schedq) - __rpc_remove_wait_queue(task); + __rpc_remove_wait_queue(task); rpc_make_runnable(task); @@ -458,6 +364,15 @@ __rpc_wake_up_task(struct rpc_task *task } /* + * Wake up the specified task + */ +static void __rpc_wake_up_task(struct rpc_task *task) +{ + if (rpc_test_and_clear_queued(task)) + __rpc_do_wake_up_task(task); +} + +/* * Default timeout handler if none specified by user */ static void @@ -471,14 +386,15 @@ __rpc_default_timer(struct rpc_task *tas /* * Wake up the specified task */ -void -rpc_wake_up_task(struct rpc_task *task) +void rpc_wake_up_task(struct rpc_task *task) { - if (RPC_IS_RUNNING(task)) - return; - spin_lock_bh(&rpc_queue_lock); - __rpc_wake_up_task(task); - spin_unlock_bh(&rpc_queue_lock); + if (rpc_test_and_clear_queued(task)) { + struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq; + + spin_lock_bh(&queue->lock); + __rpc_do_wake_up_task(task); + spin_unlock_bh(&queue->lock); + } } /* @@ -494,11 +410,11 @@ static struct rpc_task * __rpc_wake_up_n */ q = &queue->tasks[queue->priority]; if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, tk_list); + task = list_entry(q->next, struct rpc_task, u.tk_wait.list); if (queue->cookie == task->tk_cookie) { if (--queue->nr) goto out; - list_move_tail(&task->tk_list, q); + list_move_tail(&task->u.tk_wait.list, q); } /* * Check if we need to switch queues. @@ -516,7 +432,7 @@ static struct rpc_task * __rpc_wake_up_n else q = q - 1; if (!list_empty(q)) { - task = list_entry(q->next, struct rpc_task, tk_list); + task = list_entry(q->next, struct rpc_task, u.tk_wait.list); goto new_queue; } } while (q != &queue->tasks[queue->priority]); @@ -541,14 +457,14 @@ struct rpc_task * rpc_wake_up_next(struc struct rpc_task *task = NULL; dprintk("RPC: wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue)); - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&queue->lock); if (RPC_IS_PRIORITY(queue)) task = __rpc_wake_up_next_priority(queue); else { task_for_first(task, &queue->tasks[0]) __rpc_wake_up_task(task); } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&queue->lock); return task; } @@ -557,25 +473,25 @@ struct rpc_task * rpc_wake_up_next(struc * rpc_wake_up - wake up all rpc_tasks * @queue: rpc_wait_queue on which the tasks are sleeping * - * Grabs rpc_queue_lock + * Grabs queue->lock */ void rpc_wake_up(struct rpc_wait_queue *queue) { struct rpc_task *task; struct list_head *head; - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { while (!list_empty(head)) { - task = list_entry(head->next, struct rpc_task, tk_list); + task = list_entry(head->next, struct rpc_task, u.tk_wait.list); __rpc_wake_up_task(task); } if (head == &queue->tasks[0]) break; head--; } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&queue->lock); } /** @@ -583,18 +499,18 @@ void rpc_wake_up(struct rpc_wait_queue * * @queue: rpc_wait_queue on which the tasks are sleeping * @status: status value to set * - * Grabs rpc_queue_lock + * Grabs queue->lock */ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) { struct list_head *head; struct rpc_task *task; - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&queue->lock); head = &queue->tasks[queue->maxpriority]; for (;;) { while (!list_empty(head)) { - task = list_entry(head->next, struct rpc_task, tk_list); + task = list_entry(head->next, struct rpc_task, u.tk_wait.list); task->tk_status = status; __rpc_wake_up_task(task); } @@ -602,7 +518,7 @@ void rpc_wake_up_status(struct rpc_wait_ break; head--; } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&queue->lock); } /* @@ -626,18 +542,14 @@ __rpc_atrun(struct rpc_task *task) /* * This is the RPC `scheduler' (or rather, the finite state machine). */ -static int -__rpc_execute(struct rpc_task *task) +static int __rpc_execute(struct rpc_task *task) { int status = 0; dprintk("RPC: %4d rpc_execute flgs %x\n", task->tk_pid, task->tk_flags); - if (!RPC_IS_RUNNING(task)) { - printk(KERN_WARNING "RPC: rpc_execute called for sleeping task!!\n"); - return 0; - } + BUG_ON(RPC_IS_QUEUED(task)); restarted: while (1) { @@ -657,7 +569,9 @@ __rpc_execute(struct rpc_task *task) */ save_callback=task->tk_callback; task->tk_callback=NULL; + lock_kernel(); save_callback(task); + unlock_kernel(); } /* @@ -665,43 +579,41 @@ __rpc_execute(struct rpc_task *task) * tk_action may be NULL when the task has been killed * by someone else. */ - if (RPC_IS_RUNNING(task)) { + if (!RPC_IS_QUEUED(task)) { /* * Garbage collection of pending timers... */ rpc_delete_timer(task); if (!task->tk_action) break; + lock_kernel(); task->tk_action(task); - /* micro-optimization to avoid spinlock */ - if (RPC_IS_RUNNING(task)) - continue; + unlock_kernel(); } /* - * Check whether task is sleeping. + * Lockless check for whether task is sleeping or not. */ - spin_lock_bh(&rpc_queue_lock); - if (!RPC_IS_RUNNING(task)) { - rpc_set_sleeping(task); - if (RPC_IS_ASYNC(task)) { - spin_unlock_bh(&rpc_queue_lock); + if (!RPC_IS_QUEUED(task)) + continue; + if (RPC_IS_ASYNC(task)) { + rpc_clear_running(task); + /* Careful! we may have raced... */ + if (RPC_IS_QUEUED(task)) return 0; - } + if (rpc_test_and_set_running(task)) + return 0; + continue; } - spin_unlock_bh(&rpc_queue_lock); - if (!RPC_IS_SLEEPING(task)) - continue; + init_waitqueue_head(&task->u.tk_wait.waitq); + rpc_clear_running(task); /* sync task: sleep here */ dprintk("RPC: %4d sync task going to sleep\n", task->tk_pid); - if (current->pid == rpciod_pid) - printk(KERN_ERR "RPC: rpciod waiting on sync task!\n"); - if (!task->tk_client->cl_intr) { - __wait_event(task->tk_wait, !RPC_IS_SLEEPING(task)); + __wait_event(task->u.tk_wait.waitq, RPC_IS_RUNNING(task)); } else { - __wait_event_interruptible(task->tk_wait, !RPC_IS_SLEEPING(task), status); + __wait_event_interruptible(task->u.tk_wait.waitq, RPC_IS_RUNNING(task), status); /* * When a sync task receives a signal, it exits with * -ERESTARTSYS. In order to catch any callbacks that @@ -719,7 +631,9 @@ __rpc_execute(struct rpc_task *task) } if (task->tk_exit) { + lock_kernel(); task->tk_exit(task); + unlock_kernel(); /* If tk_action is non-null, the user wants us to restart */ if (task->tk_action) { if (!RPC_ASSASSINATED(task)) { @@ -738,7 +652,6 @@ __rpc_execute(struct rpc_task *task) /* Release all resources associated with the task */ rpc_release_task(task); - return status; } @@ -754,57 +667,16 @@ __rpc_execute(struct rpc_task *task) int rpc_execute(struct rpc_task *task) { - int status = -EIO; - if (rpc_inhibit) { - printk(KERN_INFO "RPC: execution inhibited!\n"); - goto out_release; - } - - status = -EWOULDBLOCK; - if (task->tk_active) { - printk(KERN_ERR "RPC: active task was run twice!\n"); - goto out_err; - } + BUG_ON(task->tk_active); task->tk_active = 1; rpc_set_running(task); return __rpc_execute(task); - out_release: - rpc_release_task(task); - out_err: - return status; } -/* - * This is our own little scheduler for async RPC tasks. - */ -static void -__rpc_schedule(void) +static void rpc_async_schedule(void *arg) { - struct rpc_task *task; - int count = 0; - - dprintk("RPC: rpc_schedule enter\n"); - while (1) { - - task_for_first(task, &schedq.tasks[0]) { - __rpc_remove_wait_queue(task); - spin_unlock_bh(&rpc_queue_lock); - - __rpc_execute(task); - spin_lock_bh(&rpc_queue_lock); - } else { - break; - } - - if (++count >= 200 || need_resched()) { - count = 0; - spin_unlock_bh(&rpc_queue_lock); - schedule(); - spin_lock_bh(&rpc_queue_lock); - } - } - dprintk("RPC: rpc_schedule leave\n"); + __rpc_execute((struct rpc_task *)arg); } /* @@ -862,7 +734,6 @@ void rpc_init_task(struct rpc_task *task task->tk_client = clnt; task->tk_flags = flags; task->tk_exit = callback; - init_waitqueue_head(&task->tk_wait); if (current->uid != current->fsuid || current->gid != current->fsgid) task->tk_flags |= RPC_TASK_SETUID; @@ -873,7 +744,9 @@ void rpc_init_task(struct rpc_task *task task->tk_priority = RPC_PRIORITY_NORMAL; task->tk_cookie = (unsigned long)current; - INIT_LIST_HEAD(&task->tk_links); + + /* Initialize workqueue for async tasks */ + task->tk_workqueue = rpciod_workqueue; /* Add to global list of all tasks */ spin_lock(&rpc_sched_lock); @@ -942,8 +815,7 @@ cleanup: goto out; } -void -rpc_release_task(struct rpc_task *task) +void rpc_release_task(struct rpc_task *task) { dprintk("RPC: %4d release task\n", task->tk_pid); @@ -961,19 +833,9 @@ rpc_release_task(struct rpc_task *task) list_del(&task->tk_task); spin_unlock(&rpc_sched_lock); - /* Protect the execution below. */ - spin_lock_bh(&rpc_queue_lock); - - /* Disable timer to prevent zombie wakeup */ - __rpc_disable_timer(task); - - /* Remove from any wait queue we're still on */ - __rpc_remove_wait_queue(task); - + BUG_ON (rpc_test_and_clear_queued(task)); task->tk_active = 0; - spin_unlock_bh(&rpc_queue_lock); - /* Synchronously delete any running timer */ rpc_delete_timer(task); @@ -1003,10 +865,9 @@ rpc_release_task(struct rpc_task *task) * queue 'childq'. If so returns a pointer to the parent. * Upon failure returns NULL. * - * Caller must hold rpc_queue_lock + * Caller must hold childq.lock */ -static inline struct rpc_task * -rpc_find_parent(struct rpc_task *child) +static inline struct rpc_task *rpc_find_parent(struct rpc_task *child) { struct rpc_task *task, *parent; struct list_head *le; @@ -1019,17 +880,16 @@ rpc_find_parent(struct rpc_task *child) return NULL; } -static void -rpc_child_exit(struct rpc_task *child) +static void rpc_child_exit(struct rpc_task *child) { struct rpc_task *parent; - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&childq.lock); if ((parent = rpc_find_parent(child)) != NULL) { parent->tk_status = child->tk_status; __rpc_wake_up_task(parent); } - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&childq.lock); } /* @@ -1052,22 +912,20 @@ fail: return NULL; } -void -rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) +void rpc_run_child(struct rpc_task *task, struct rpc_task *child, rpc_action func) { - spin_lock_bh(&rpc_queue_lock); + spin_lock_bh(&childq.lock); /* N.B. Is it possible for the child to have already finished? */ __rpc_sleep_on(&childq, task, func, NULL); rpc_schedule_run(child); - spin_unlock_bh(&rpc_queue_lock); + spin_unlock_bh(&childq.lock); } /* * Kill all tasks for the given client. * XXX: kill their descendants as well? */ -void -rpc_killall_tasks(struct rpc_clnt *clnt) +void rpc_killall_tasks(struct rpc_clnt *clnt) { struct rpc_task *rovr; struct list_head *le; @@ -1089,93 +947,14 @@ rpc_killall_tasks(struct rpc_clnt *clnt) static DECLARE_MUTEX_LOCKED(rpciod_running); -static inline int -rpciod_task_pending(void) -{ - return !list_empty(&schedq.tasks[0]); -} - - -/* - * This is the rpciod kernel thread - */ -static int -rpciod(void *ptr) -{ - int rounds = 0; - - lock_kernel(); - /* - * Let our maker know we're running ... - */ - rpciod_pid = current->pid; - up(&rpciod_running); - - daemonize("rpciod"); - allow_signal(SIGKILL); - - dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid); - spin_lock_bh(&rpc_queue_lock); - while (rpciod_users) { - DEFINE_WAIT(wait); - if (signalled()) { - spin_unlock_bh(&rpc_queue_lock); - rpciod_killall(); - flush_signals(current); - spin_lock_bh(&rpc_queue_lock); - } - __rpc_schedule(); - if (current->flags & PF_FREEZE) { - spin_unlock_bh(&rpc_queue_lock); - refrigerator(PF_FREEZE); - spin_lock_bh(&rpc_queue_lock); - } - - if (++rounds >= 64) { /* safeguard */ - spin_unlock_bh(&rpc_queue_lock); - schedule(); - rounds = 0; - spin_lock_bh(&rpc_queue_lock); - } - - dprintk("RPC: rpciod back to sleep\n"); - prepare_to_wait(&rpciod_idle, &wait, TASK_INTERRUPTIBLE); - if (!rpciod_task_pending() && !signalled()) { - spin_unlock_bh(&rpc_queue_lock); - schedule(); - rounds = 0; - spin_lock_bh(&rpc_queue_lock); - } - finish_wait(&rpciod_idle, &wait); - dprintk("RPC: switch to rpciod\n"); - } - spin_unlock_bh(&rpc_queue_lock); - - dprintk("RPC: rpciod shutdown commences\n"); - if (!list_empty(&all_tasks)) { - printk(KERN_ERR "rpciod: active tasks at shutdown?!\n"); - rpciod_killall(); - } - - dprintk("RPC: rpciod exiting\n"); - unlock_kernel(); - - rpciod_pid = 0; - complete_and_exit(&rpciod_killer, 0); - return 0; -} - -static void -rpciod_killall(void) +static void rpciod_killall(void) { unsigned long flags; while (!list_empty(&all_tasks)) { clear_thread_flag(TIF_SIGPENDING); rpc_killall_tasks(NULL); - spin_lock_bh(&rpc_queue_lock); - __rpc_schedule(); - spin_unlock_bh(&rpc_queue_lock); + flush_workqueue(rpciod_workqueue); if (!list_empty(&all_tasks)) { dprintk("rpciod_killall: waiting for tasks to exit\n"); yield(); @@ -1193,28 +972,30 @@ rpciod_killall(void) int rpciod_up(void) { + struct workqueue_struct *wq; int error = 0; down(&rpciod_sema); - dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users); + dprintk("rpciod_up: users %d\n", rpciod_users); rpciod_users++; - if (rpciod_pid) + if (rpciod_workqueue) goto out; /* * If there's no pid, we should be the first user. */ if (rpciod_users > 1) - printk(KERN_WARNING "rpciod_up: no pid, %d users??\n", rpciod_users); + printk(KERN_WARNING "rpciod_up: no workqueue, %d users??\n", rpciod_users); /* * Create the rpciod thread and wait for it to start. */ - error = kernel_thread(rpciod, NULL, 0); - if (error < 0) { - printk(KERN_WARNING "rpciod_up: create thread failed, error=%d\n", error); + error = -ENOMEM; + wq = create_workqueue("rpciod"); + if (wq == NULL) { + printk(KERN_WARNING "rpciod_up: create workqueue failed, error=%d\n", error); rpciod_users--; goto out; } - down(&rpciod_running); + rpciod_workqueue = wq; error = 0; out: up(&rpciod_sema); @@ -1225,20 +1006,21 @@ void rpciod_down(void) { down(&rpciod_sema); - dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users); + dprintk("rpciod_down sema %d\n", rpciod_users); if (rpciod_users) { if (--rpciod_users) goto out; } else - printk(KERN_WARNING "rpciod_down: pid=%d, no users??\n", rpciod_pid); + printk(KERN_WARNING "rpciod_down: no users??\n"); - if (!rpciod_pid) { + if (!rpciod_workqueue) { dprintk("rpciod_down: Nothing to do!\n"); goto out; } + rpciod_killall(); - kill_proc(rpciod_pid, SIGKILL, 1); - wait_for_completion(&rpciod_killer); + destroy_workqueue(rpciod_workqueue); + rpciod_workqueue = NULL; out: up(&rpciod_sema); } @@ -1256,7 +1038,12 @@ void rpc_show_tasks(void) } printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout " "-rpcwait -action- --exit--\n"); - alltask_for_each(t, le, &all_tasks) + alltask_for_each(t, le, &all_tasks) { + const char *rpc_waitq = "none"; + + if (RPC_IS_QUEUED(t)) + rpc_waitq = rpc_qname(t->u.tk_wait.rpc_waitq); + printk("%05d %04d %04x %06d %8p %6d %8p %08ld %8s %8p %8p\n", t->tk_pid, (t->tk_msg.rpc_proc ? t->tk_msg.rpc_proc->p_proc : -1), @@ -1264,8 +1051,9 @@ void rpc_show_tasks(void) t->tk_client, (t->tk_client ? t->tk_client->cl_prog : 0), t->tk_rqstp, t->tk_timeout, - rpc_qname(t->tk_rpcwait), + rpc_waitq, t->tk_action, t->tk_exit); + } spin_unlock(&rpc_sched_lock); } #endif diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/net/sunrpc/sunrpc_syms.c linux-2.6.7-17-delegation_cache/net/sunrpc/sunrpc_syms.c --- linux-2.6.7-rc3/net/sunrpc/sunrpc_syms.c 2004-06-10 00:23:47.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/net/sunrpc/sunrpc_syms.c 2004-06-10 01:04:59.000000000 -0400 @@ -89,6 +89,7 @@ EXPORT_SYMBOL(svc_makesock); EXPORT_SYMBOL(svc_reserve); EXPORT_SYMBOL(svc_auth_register); EXPORT_SYMBOL(auth_domain_lookup); +EXPORT_SYMBOL(svc_authenticate); /* RPC statistics */ #ifdef CONFIG_PROC_FS diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/net/sunrpc/svc.c linux-2.6.7-17-delegation_cache/net/sunrpc/svc.c --- linux-2.6.7-rc3/net/sunrpc/svc.c 2004-06-10 00:23:51.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/net/sunrpc/svc.c 2004-06-10 01:04:59.000000000 -0400 @@ -263,6 +263,7 @@ svc_process(struct svc_serv *serv, struc u32 *statp; u32 dir, prog, vers, proc, auth_stat, rpc_stat; + int auth_res; rpc_stat = rpc_success; @@ -304,12 +305,17 @@ svc_process(struct svc_serv *serv, struc rqstp->rq_vers = vers = ntohl(svc_getu32(argv)); /* version number */ rqstp->rq_proc = proc = ntohl(svc_getu32(argv)); /* procedure number */ + progp = serv->sv_program; /* * Decode auth data, and add verifier to reply buffer. * We do this before anything else in order to get a decent * auth verifier. */ - switch (svc_authenticate(rqstp, &auth_stat)) { + if (progp->pg_authenticate != NULL) + auth_res = progp->pg_authenticate(rqstp, &auth_stat); + else + auth_res = svc_authenticate(rqstp, &auth_stat); + switch (auth_res) { case SVC_OK: break; case SVC_GARBAGE: @@ -326,7 +332,6 @@ svc_process(struct svc_serv *serv, struc goto sendit; } - progp = serv->sv_program; if (prog != progp->pg_prog) goto err_bad_prog; diff -u --recursive --new-file --show-c-function linux-2.6.7-rc3/net/sunrpc/xprt.c linux-2.6.7-17-delegation_cache/net/sunrpc/xprt.c --- linux-2.6.7-rc3/net/sunrpc/xprt.c 2004-06-10 00:23:27.000000000 -0400 +++ linux-2.6.7-17-delegation_cache/net/sunrpc/xprt.c 2004-06-10 01:04:35.000000000 -0400 @@ -1099,7 +1099,7 @@ xprt_write_space(struct sock *sk) goto out; spin_lock_bh(&xprt->sock_lock); - if (xprt->snd_task && xprt->snd_task->tk_rpcwait == &xprt->pending) + if (xprt->snd_task) rpc_wake_up_task(xprt->snd_task); spin_unlock_bh(&xprt->sock_lock); out: