[GIT] NFS client updates for 2.6.22...

Trond Myklebust trond.myklebust at fys.uio.no
Tue Jul 10 23:44:46 EDT 2007


Hi Linus,

Please pull from the repository at

   git pull git://git.linux-nfs.org/pub/linux/nfs-2.6.git

This will update the following files through the appended changesets.

  Cheers,
    Trond

----
 fs/lockd/host.c                      |   39 +-
 fs/lockd/mon.c                       |    2 +-
 fs/lockd/svc.c                       |    6 -
 fs/nfs/Makefile                      |    4 +-
 fs/nfs/client.c                      |   28 +-
 fs/nfs/delegation.c                  |  186 ++++---
 fs/nfs/delegation.h                  |   26 +-
 fs/nfs/dir.c                         |   16 +-
 fs/nfs/direct.c                      |   34 +-
 fs/nfs/inode.c                       |   73 +--
 fs/nfs/internal.h                    |    4 +-
 fs/nfs/mount_clnt.c                  |  169 +++---
 fs/nfs/nfs2xdr.c                     |    6 +-
 fs/nfs/nfs3proc.c                    |    4 +-
 fs/nfs/nfs3xdr.c                     |    8 +-
 fs/nfs/nfs4_fs.h                     |   40 +-
 fs/nfs/nfs4proc.c                    |  760 +++++++++++++---------
 fs/nfs/nfs4state.c                   |  310 ++++++---
 fs/nfs/nfs4xdr.c                     |  126 +++--
 fs/nfs/nfsroot.c                     |    5 +-
 fs/nfs/pagelist.c                    |   60 +-
 fs/nfs/read.c                        |   40 +-
 fs/nfs/super.c                       | 1189 +++++++++++++++++++++++++++++-----
 fs/nfs/write.c                       |  149 +++--
 fs/nfsd/nfs4callback.c               |   18 +-
 fs/nfsd/nfs4state.c                  |    1 -
 include/linux/lockd/lockd.h          |    1 +
 include/linux/nfs4.h                 |    1 +
 include/linux/nfs4_mount.h           |    3 +-
 include/linux/nfs_fs.h               |   28 +-
 include/linux/nfs_fs_sb.h            |    8 +-
 include/linux/nfs_mount.h            |    3 +-
 include/linux/nfs_page.h             |   25 +-
 include/linux/nfs_xdr.h              |    5 +-
 include/linux/sunrpc/auth.h          |   48 +-
 include/linux/sunrpc/auth_gss.h      |    6 +-
 include/linux/sunrpc/clnt.h          |   33 +-
 include/linux/sunrpc/gss_api.h       |    2 +-
 include/linux/sunrpc/rpc_pipe_fs.h   |    2 +
 include/linux/sunrpc/sched.h         |    6 -
 include/linux/sunrpc/svcsock.h       |    1 +
 include/linux/sunrpc/xprt.h          |   16 +-
 net/sunrpc/auth.c                    |  370 ++++++++----
 net/sunrpc/auth_gss/auth_gss.c       |  349 +++++++----
 net/sunrpc/auth_gss/gss_krb5_mech.c  |    2 +-
 net/sunrpc/auth_gss/gss_spkm3_mech.c |    2 +-
 net/sunrpc/auth_null.c               |   10 +-
 net/sunrpc/auth_unix.c               |   54 +-
 net/sunrpc/clnt.c                    |  371 +++++++----
 net/sunrpc/rpc_pipe.c                |   80 ++-
 net/sunrpc/rpcb_clnt.c               |   65 +-
 net/sunrpc/sched.c                   |  209 ++-----
 net/sunrpc/sunrpc_syms.c             |    8 +-
 net/sunrpc/svcsock.c                 |   20 +
 net/sunrpc/xprt.c                    |   19 +-
 net/sunrpc/xprtsock.c                |   81 ++--
 56 files changed, 3321 insertions(+), 1810 deletions(-)

Chuck Lever (22):
      NFS: Clean ups in fs/nfs/direct.c
      NFS: Clean up nfs_size_to_loff_t()
      NFS: Clean-up: use correct type when converting NFS blocks to local blocks
      NFS: Clean-up: Define macros for maximum host and export path name lengths
      NFS: Clean-up: Replace nfs_copy_user_string with strndup_user
      NFS: Clean up error handling in nfs_get_sb
      NFS: Clean-up: fix a compiler warning in fs/nfs/super.c
      NFS: Clean-up: Refactor IP address sanity checks in NFS client
      NFS: Add a new NFS debugging flag just for mount processing
      NFS: Clean up nfs_validate_mount_data
      NFS: Introduce nfs4_validate_mount_options
      SUNRPC: Allow rpcbind requests to be interrupted by a signal.
      SUNRPC: Rename rpcb_getport_external routine
      SUNRPC: Rename rpcb_getport to be consistent with new rpcb_getport_sync name
      SUNRPC: Add a convenient default for the hostname when calling rpc_create()
      NFS: Remake nfsroot_mount as a permanent part of NFS client
      NFS: Clean up in-kernel NFS mount
      NFS: Improve debugging output in NFS in-kernel mount client
      NFS: Add enums and match tables for mount option parsing
      NFS: Introduce generic mount client API
      NFS: Add final pieces to support in-kernel mount option parsing
      NFS: Add support for mounting NFSv4 file systems with string options

Frank Filz (1):
      NFSv4: Make sure unlock is really an unlock when cancelling a lock

Frank van Maarseveen (4):
      SUNRPC: cleanup transport creation argument passing
      SUNRPC server: record the destination address of a request
      SUNRPC client: add interface for binding to a local address
      NLM: fix source address of callback to client

J. Bruce Fields (1):
      sunrpc: drop BKL around wrap and unwrap

Jeff Layton (2):
      NFS: Use GFP_HIGHUSER for page allocation in nfs_symlink()
      NFS4: on a O_EXCL OPEN make sure SETATTR sets the fields holding the verifier

Trond Myklebust (92):
      NFS: Don't fail an O_DIRECT read/write if get_user_pages() returns pages
      NFS: Don't mark a written page as uptodate until it is on disk
      NFS: Minor read optimisation...
      NFS: Replace vfsmount and dentry in nfs_open_context with struct path
      NFSv4: Cleanup: pass the nfs_open_context to open recovery code
      NFSv4: Ensure asynchronous open() calls always pin the mountpoint
      NFSv4: Ensure that nfs4_do_close() doesn't race with umount
      NFSv4: Clean up nfs4_call_async()
      NFS: Fix nfs_reval_fsid()
      NFS: Re-enable forced umounts
      NFS: nfs3_proc_create() should use nfs_post_op_update_inode()
      NFS: Fix an Oops in the nfs_access_cache_shrinker()
      NFS: Convert struct nfs_page to use krefs
      NFS cleanup: Rename NFS_PAGE_TAG_WRITEBACK to NFS_PAGE_TAG_LOCKED
      NFS cleanup: speed up nfs_scan_commit using radix tree tags
      NFS: Remove the redundant 'dirty' and 'commit' lists from nfs_inode
      NFS: Prevent integer overflow in nfs_scan_list()
      NFS: reduce latency by using conditional rescheduling in nfs_scan_list
      NFS: Convert struct nfs_open_context to use a kref
      NFSv4: Convert struct nfs4_opendata to use struct kref
      SUNRPC: Move rpc_task->tk_task list into struct rpc_clnt
      SUNRPC: Add a per-rpc_clnt spinlock
      SUNRPC: Clean up tk_pid allocation and make it lockless
      SUNRPC: Convert rpc_clnt->cl_users to a kref
      SUNRPC: Kill rpc_clnt->cl_dead
      SUNRPC: Kill rpc_clnt->cl_oneshot
      SUNRPC: Make rpc_clone take a reference instead of using cl_count
      SUNRPC: Remove rpc_clnt->cl_count
      SUNRPC: Don't create an rpc_pipefs directory before rpc_clone is initialised
      SUNRPC: Optimise rpciod_up()
      SUNRPC: Make create_client() take a reference to the rpciod workqueue
      SUNRPC: Remove redundant calls to rpciod_up()/rpciod_down()
      SUNRPC: Move rpc_register_client and friends into net/sunrpc/clnt.c
      SUNRPC: clean up rpc_call_async/rpc_call_sync/rpc_run_task
      SUNRPC: fix hang due to eventd deadlock...
      SUNRPC: Add a backpointer from the struct rpc_cred to the rpc_auth
      SUNRPC: Remove the gss_auth spinlock
      SUNRPC: Always match an upcall message in gss_pipe_downcall()
      SUNRPC: Add a downcall queue to struct rpc_inode
      SUNRPC: Enable non-exclusive create in rpc_mkpipe()
      SUNRPC: Clean up rpc_pipefs.
      SUNRPC: Add a field to track the number of kernel users of an rpc_pipe
      SUNRPC: Fix a memory leak in the auth credcache code
      SUNRPC: Fix a typo in unx_create()
      SUNRPC: Fix a memory leak in gss_create()
      SUNRPC: Fix races in rpcauth_create
      SUNRPC: Make rpc_ping() static
      SUNRPC: Add the helper function 'rpc_call_null()'
      SUNRPC: Rename rpcauth_destroy() to rpcauth_release()
      SUNRPC: Mark auth and cred operation tables as constant.
      SUNRPC: Clean up rpc credential initialisation
      SUNRPC: replace casts in auth_unix.c with container_of()
      SUNRPC: Enforce atomic updates of rpc_cred->cr_flags
      SUNRPC: cleanup rpc credential cache garbage collection
      SUNRPC: Convert the credcache lookup code to use RCU
      SUNRPC: Give credential cache a local spinlock
      SUNRPC: Convert the credential garbage collector into a shrinker callback
      SUNRPC: Convert gss_ctx_lock to an RCU lock
      SUNRPC: Allow rpc_auth to run clean up before the rpc_client is destroyed
      SUNRPC: Remove the tk_auth macro...
      SUNRPC: Ensure that the struct gss_auth lifetime exceeds the credential's
      SUNRPC: Ensure RPCSEC_GSS destroys the security context when freeing a cred
      SUNRPC: Suppress some noisy and unnecessary printk() calls in call_verify()
      NFSv4: Clean up _nfs4_proc_lookup() vs _nfs4_proc_lookupfh()
      NFS: Replace NFS_I(inode)->req_lock with inode->i_lock
      NFSv4: Fix a credential reference leak in nfs4_get_state_owner()
      NFSv4: Don't reuse expired nfs4_state_owner structs
      NFSv4: Fix the NFSv4 owner and owner_group size estimates
      NFSv4: Fix the underestimate of NFSv4 open request size
      NFSv4: Fix underestimate of NFSv4 lookup request size
      NFSv4: nfs_increment_open_seqid should not return a value
      NFSv4: Reduce the chances of an open_owner identifier collision
      NFSv4: Fix atomic open for execute...
      NFSv4: Fix a bug in __nfs4_find_state_byowner
      NFSv4: set the delegation in nfs4_opendata_to_nfs4_state
      NFSv4: Fix up a bug in nfs4_open_recover()
      NFSv4: Support recalling delegations by stateid
      NFSv4: Support recalling delegations by stateid part 2
      NFSv4: Use RCU to protect delegations
      NFSv4: Defer inode revalidation when setting up a delegation
      NFSv4: Clean up confirmation of sequence ids...
      NFSv4: Always use the delegation if we have one
      NFSv4: Improve the debugging of bad sequence id errors...
      NFSv4: Allow nfs4_opendata_to_nfs4_state to return errors.
      NFSv4: Clean up _nfs4_proc_open()
      NFSv4: Check for the existence of a delegation in nfs4_open_prepare()
      NFSv4: Don't call OPEN if we already have an open stateid for a file
      NFSv4: Clean up the callers of nfs4_open_recover_helper()
      NFSv4: Fix up stateid locking...
      NFS: Add the mount option "nosharecache"
      NFS: Error when mounting the same filesystem with different options
      NFSv4: Make the NFS state model work with the nosharedcache mount option


diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 96070bf..572601e 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -44,9 +44,8 @@ static struct nsm_handle *	nsm_find(const struct sockaddr_in *sin,
  */
 static struct nlm_host *
 nlm_lookup_host(int server, const struct sockaddr_in *sin,
-					int proto, int version,
-					const char *hostname,
-					int hostname_len)
+		int proto, int version, const char *hostname,
+		int hostname_len, const struct sockaddr_in *ssin)
 {
 	struct hlist_head *chain;
 	struct hlist_node *pos;
@@ -54,7 +53,9 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
 	struct nsm_handle *nsm = NULL;
 	int		hash;
 
-	dprintk("lockd: nlm_lookup_host(%u.%u.%u.%u, p=%d, v=%d, my role=%s, name=%.*s)\n",
+	dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
+			", p=%d, v=%d, my role=%s, name=%.*s)\n",
+			NIPQUAD(ssin->sin_addr.s_addr),
 			NIPQUAD(sin->sin_addr.s_addr), proto, version,
 			server? "server" : "client",
 			hostname_len,
@@ -91,6 +92,8 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
 			continue;
 		if (host->h_server != server)
 			continue;
+		if (!nlm_cmp_addr(&host->h_saddr, ssin))
+			continue;
 
 		/* Move to head of hash chain. */
 		hlist_del(&host->h_hash);
@@ -118,6 +121,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
 	host->h_name	   = nsm->sm_name;
 	host->h_addr       = *sin;
 	host->h_addr.sin_port = 0;	/* ouch! */
+	host->h_saddr	   = *ssin;
 	host->h_version    = version;
 	host->h_proto      = proto;
 	host->h_rpcclnt    = NULL;
@@ -161,15 +165,9 @@ nlm_destroy_host(struct nlm_host *host)
 	 */
 	nsm_unmonitor(host);
 
-	if ((clnt = host->h_rpcclnt) != NULL) {
-		if (atomic_read(&clnt->cl_users)) {
-			printk(KERN_WARNING
-				"lockd: active RPC handle\n");
-			clnt->cl_dead = 1;
-		} else {
-			rpc_destroy_client(host->h_rpcclnt);
-		}
-	}
+	clnt = host->h_rpcclnt;
+	if (clnt != NULL)
+		rpc_shutdown_client(clnt);
 	kfree(host);
 }
 
@@ -180,8 +178,10 @@ struct nlm_host *
 nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
 			const char *hostname, int hostname_len)
 {
+	struct sockaddr_in ssin = {0};
+
 	return nlm_lookup_host(0, sin, proto, version,
-			       hostname, hostname_len);
+			       hostname, hostname_len, &ssin);
 }
 
 /*
@@ -191,9 +191,12 @@ struct nlm_host *
 nlmsvc_lookup_host(struct svc_rqst *rqstp,
 			const char *hostname, int hostname_len)
 {
+	struct sockaddr_in ssin = {0};
+
+	ssin.sin_addr = rqstp->rq_daddr.addr;
 	return nlm_lookup_host(1, svc_addr_in(rqstp),
 			       rqstp->rq_prot, rqstp->rq_vers,
-			       hostname, hostname_len);
+			       hostname, hostname_len, &ssin);
 }
 
 /*
@@ -204,8 +207,9 @@ nlm_bind_host(struct nlm_host *host)
 {
 	struct rpc_clnt	*clnt;
 
-	dprintk("lockd: nlm_bind_host(%08x)\n",
-			(unsigned)ntohl(host->h_addr.sin_addr.s_addr));
+	dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n",
+			NIPQUAD(host->h_saddr.sin_addr),
+			NIPQUAD(host->h_addr.sin_addr));
 
 	/* Lock host handle */
 	mutex_lock(&host->h_mutex);
@@ -232,6 +236,7 @@ nlm_bind_host(struct nlm_host *host)
 			.protocol	= host->h_proto,
 			.address	= (struct sockaddr *)&host->h_addr,
 			.addrsize	= sizeof(host->h_addr),
+			.saddress	= (struct sockaddr *)&host->h_saddr,
 			.timeout	= &timeparms,
 			.servername	= host->h_name,
 			.program	= &nlm_program,
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 2102e2d..3353ed8 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -61,6 +61,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res)
 			status);
 	else
 		status = 0;
+	rpc_shutdown_client(clnt);
  out:
 	return status;
 }
@@ -138,7 +139,6 @@ nsm_create(void)
 		.program	= &nsm_program,
 		.version	= SM_VERSION,
 		.authflavor	= RPC_AUTH_NULL,
-		.flags		= (RPC_CLNT_CREATE_ONESHOT),
 	};
 
 	return rpc_create(&args);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 126b1bf..2680932 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -123,9 +123,6 @@ lockd(struct svc_rqst *rqstp)
 	/* Process request with signals blocked, but allow SIGKILL.  */
 	allow_signal(SIGKILL);
 
-	/* kick rpciod */
-	rpciod_up();
-
 	dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
 
 	if (!nlm_timeout)
@@ -202,9 +199,6 @@ lockd(struct svc_rqst *rqstp)
 	/* Exit the RPC thread */
 	svc_exit_thread(rqstp);
 
-	/* release rpciod */
-	rpciod_down();
-
 	/* Release module */
 	unlock_kernel();
 	module_put_and_exit(0);
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index f4580b4..b55cb23 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -6,8 +6,8 @@ obj-$(CONFIG_NFS_FS) += nfs.o
 
 nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
 			   pagelist.o proc.o read.o symlink.o unlink.o \
-			   write.o namespace.o
-nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o mount_clnt.o      
+			   write.o namespace.o mount_clnt.o
+nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
 nfs-$(CONFIG_NFS_V3_ACL)	+= nfs3acl.o
 nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 881fa49..ccb4550 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -102,19 +102,10 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 					   int nfsversion)
 {
 	struct nfs_client *clp;
-	int error;
 
 	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
 		goto error_0;
 
-	error = rpciod_up();
-	if (error < 0) {
-		dprintk("%s: couldn't start rpciod! Error = %d\n",
-				__FUNCTION__, error);
-		goto error_1;
-	}
-	__set_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
-
 	if (nfsversion == 4) {
 		if (nfs_callback_up() < 0)
 			goto error_2;
@@ -139,8 +130,6 @@ static struct nfs_client *nfs_alloc_client(const char *hostname,
 #ifdef CONFIG_NFS_V4
 	init_rwsem(&clp->cl_sem);
 	INIT_LIST_HEAD(&clp->cl_delegations);
-	INIT_LIST_HEAD(&clp->cl_state_owners);
-	INIT_LIST_HEAD(&clp->cl_unused);
 	spin_lock_init(&clp->cl_lock);
 	INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
 	rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
@@ -154,9 +143,6 @@ error_3:
 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
 		nfs_callback_down();
 error_2:
-	rpciod_down();
-	__clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state);
-error_1:
 	kfree(clp);
 error_0:
 	return NULL;
@@ -167,16 +153,7 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 #ifdef CONFIG_NFS_V4
 	if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
 		nfs4_kill_renewd(clp);
-	while (!list_empty(&clp->cl_unused)) {
-		struct nfs4_state_owner *sp;
-
-		sp = list_entry(clp->cl_unused.next,
-				struct nfs4_state_owner,
-				so_list);
-		list_del(&sp->so_list);
-		kfree(sp);
-	}
-	BUG_ON(!list_empty(&clp->cl_state_owners));
+	BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
 	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
 		nfs_idmap_delete(clp);
 #endif
@@ -198,9 +175,6 @@ static void nfs_free_client(struct nfs_client *clp)
 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
 		nfs_callback_down();
 
-	if (__test_and_clear_bit(NFS_CS_RPCIOD, &clp->cl_res_state))
-		rpciod_down();
-
 	kfree(clp->cl_hostname);
 	kfree(clp);
 
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 7f37d1b..20ac403 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -27,6 +27,13 @@ static void nfs_free_delegation(struct nfs_delegation *delegation)
 	kfree(delegation);
 }
 
+static void nfs_free_delegation_callback(struct rcu_head *head)
+{
+	struct nfs_delegation *delegation = container_of(head, struct nfs_delegation, rcu);
+
+	nfs_free_delegation(delegation);
+}
+
 static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
 	struct inode *inode = state->inode;
@@ -57,7 +64,7 @@ out_err:
 	return status;
 }
 
-static void nfs_delegation_claim_opens(struct inode *inode)
+static void nfs_delegation_claim_opens(struct inode *inode, const nfs4_stateid *stateid)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_open_context *ctx;
@@ -72,9 +79,11 @@ again:
 			continue;
 		if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
 			continue;
+		if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
+			continue;
 		get_nfs_open_context(ctx);
 		spin_unlock(&inode->i_lock);
-		err = nfs4_open_delegation_recall(ctx->dentry, state);
+		err = nfs4_open_delegation_recall(ctx, state, stateid);
 		if (err >= 0)
 			err = nfs_delegation_claim_locks(ctx, state);
 		put_nfs_open_context(ctx);
@@ -115,10 +124,6 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	struct nfs_delegation *delegation;
 	int status = 0;
 
-	/* Ensure we first revalidate the attributes and page cache! */
-	if ((nfsi->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_ATTR)))
-		__nfs_revalidate_inode(NFS_SERVER(inode), inode);
-
 	delegation = kmalloc(sizeof(*delegation), GFP_KERNEL);
 	if (delegation == NULL)
 		return -ENOMEM;
@@ -131,10 +136,10 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 	delegation->inode = inode;
 
 	spin_lock(&clp->cl_lock);
-	if (nfsi->delegation == NULL) {
-		list_add(&delegation->super_list, &clp->cl_delegations);
-		nfsi->delegation = delegation;
+	if (rcu_dereference(nfsi->delegation) == NULL) {
+		list_add_rcu(&delegation->super_list, &clp->cl_delegations);
 		nfsi->delegation_state = delegation->type;
+		rcu_assign_pointer(nfsi->delegation, delegation);
 		delegation = NULL;
 	} else {
 		if (memcmp(&delegation->stateid, &nfsi->delegation->stateid,
@@ -145,6 +150,12 @@ int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct
 			status = -EIO;
 		}
 	}
+
+	/* Ensure we revalidate the attributes and page cache! */
+	spin_lock(&inode->i_lock);
+	nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
+	spin_unlock(&inode->i_lock);
+
 	spin_unlock(&clp->cl_lock);
 	kfree(delegation);
 	return status;
@@ -155,7 +166,7 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
 	int res = 0;
 
 	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
-	nfs_free_delegation(delegation);
+	call_rcu(&delegation->rcu, nfs_free_delegation_callback);
 	return res;
 }
 
@@ -170,33 +181,55 @@ static void nfs_msync_inode(struct inode *inode)
 /*
  * Basic procedure for returning a delegation to the server
  */
-int __nfs_inode_return_delegation(struct inode *inode)
+static int __nfs_inode_return_delegation(struct inode *inode, struct nfs_delegation *delegation)
 {
 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs_delegation *delegation;
-	int res = 0;
 
 	nfs_msync_inode(inode);
 	down_read(&clp->cl_sem);
 	/* Guard against new delegated open calls */
 	down_write(&nfsi->rwsem);
-	spin_lock(&clp->cl_lock);
-	delegation = nfsi->delegation;
-	if (delegation != NULL) {
-		list_del_init(&delegation->super_list);
-		nfsi->delegation = NULL;
-		nfsi->delegation_state = 0;
-	}
-	spin_unlock(&clp->cl_lock);
-	nfs_delegation_claim_opens(inode);
+	nfs_delegation_claim_opens(inode, &delegation->stateid);
 	up_write(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
 	nfs_msync_inode(inode);
 
-	if (delegation != NULL)
-		res = nfs_do_return_delegation(inode, delegation);
-	return res;
+	return nfs_do_return_delegation(inode, delegation);
+}
+
+static struct nfs_delegation *nfs_detach_delegation_locked(struct nfs_inode *nfsi, const nfs4_stateid *stateid)
+{
+	struct nfs_delegation *delegation = rcu_dereference(nfsi->delegation);
+
+	if (delegation == NULL)
+		goto nomatch;
+	if (stateid != NULL && memcmp(delegation->stateid.data, stateid->data,
+				sizeof(delegation->stateid.data)) != 0)
+		goto nomatch;
+	list_del_rcu(&delegation->super_list);
+	nfsi->delegation_state = 0;
+	rcu_assign_pointer(nfsi->delegation, NULL);
+	return delegation;
+nomatch:
+	return NULL;
+}
+
+int nfs_inode_return_delegation(struct inode *inode)
+{
+	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	struct nfs_delegation *delegation;
+	int err = 0;
+
+	if (rcu_dereference(nfsi->delegation) != NULL) {
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(nfsi, NULL);
+		spin_unlock(&clp->cl_lock);
+		if (delegation != NULL)
+			err = __nfs_inode_return_delegation(inode, delegation);
+	}
+	return err;
 }
 
 /*
@@ -211,19 +244,23 @@ void nfs_return_all_delegations(struct super_block *sb)
 	if (clp == NULL)
 		return;
 restart:
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		if (delegation->inode->i_sb != sb)
 			continue;
 		inode = igrab(delegation->inode);
 		if (inode == NULL)
 			continue;
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
 		spin_unlock(&clp->cl_lock);
-		nfs_inode_return_delegation(inode);
+		rcu_read_unlock();
+		if (delegation != NULL)
+			__nfs_inode_return_delegation(inode, delegation);
 		iput(inode);
 		goto restart;
 	}
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 }
 
 static int nfs_do_expire_all_delegations(void *ptr)
@@ -234,22 +271,26 @@ static int nfs_do_expire_all_delegations(void *ptr)
 
 	allow_signal(SIGKILL);
 restart:
-	spin_lock(&clp->cl_lock);
 	if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) != 0)
 		goto out;
 	if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0)
 		goto out;
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		inode = igrab(delegation->inode);
 		if (inode == NULL)
 			continue;
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
 		spin_unlock(&clp->cl_lock);
-		nfs_inode_return_delegation(inode);
+		rcu_read_unlock();
+		if (delegation)
+			__nfs_inode_return_delegation(inode, delegation);
 		iput(inode);
 		goto restart;
 	}
+	rcu_read_unlock();
 out:
-	spin_unlock(&clp->cl_lock);
 	nfs_put_client(clp);
 	module_put_and_exit(0);
 }
@@ -280,17 +321,21 @@ void nfs_handle_cb_pathdown(struct nfs_client *clp)
 	if (clp == NULL)
 		return;
 restart:
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		inode = igrab(delegation->inode);
 		if (inode == NULL)
 			continue;
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(inode), NULL);
 		spin_unlock(&clp->cl_lock);
-		nfs_inode_return_delegation(inode);
+		rcu_read_unlock();
+		if (delegation != NULL)
+			__nfs_inode_return_delegation(inode, delegation);
 		iput(inode);
 		goto restart;
 	}
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 }
 
 struct recall_threadargs {
@@ -316,21 +361,14 @@ static int recall_thread(void *data)
 	down_read(&clp->cl_sem);
 	down_write(&nfsi->rwsem);
 	spin_lock(&clp->cl_lock);
-	delegation = nfsi->delegation;
-	if (delegation != NULL && memcmp(delegation->stateid.data,
-				args->stateid->data,
-				sizeof(delegation->stateid.data)) == 0) {
-		list_del_init(&delegation->super_list);
-		nfsi->delegation = NULL;
-		nfsi->delegation_state = 0;
+	delegation = nfs_detach_delegation_locked(nfsi, args->stateid);
+	if (delegation != NULL)
 		args->result = 0;
-	} else {
-		delegation = NULL;
+	else
 		args->result = -ENOENT;
-	}
 	spin_unlock(&clp->cl_lock);
 	complete(&args->started);
-	nfs_delegation_claim_opens(inode);
+	nfs_delegation_claim_opens(inode, args->stateid);
 	up_write(&nfsi->rwsem);
 	up_read(&clp->cl_sem);
 	nfs_msync_inode(inode);
@@ -371,14 +409,14 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
 {
 	struct nfs_delegation *delegation;
 	struct inode *res = NULL;
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) {
 			res = igrab(delegation->inode);
 			break;
 		}
 	}
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 	return res;
 }
 
@@ -388,10 +426,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs
 void nfs_delegation_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs_delegation *delegation;
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry(delegation, &clp->cl_delegations, super_list)
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list)
 		delegation->flags |= NFS_DELEGATION_NEED_RECLAIM;
-	spin_unlock(&clp->cl_lock);
+	rcu_read_unlock();
 }
 
 /*
@@ -399,39 +437,35 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
  */
 void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
 {
-	struct nfs_delegation *delegation, *n;
-	LIST_HEAD(head);
-	spin_lock(&clp->cl_lock);
-	list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) {
+	struct nfs_delegation *delegation;
+restart:
+	rcu_read_lock();
+	list_for_each_entry_rcu(delegation, &clp->cl_delegations, super_list) {
 		if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0)
 			continue;
-		list_move(&delegation->super_list, &head);
-		NFS_I(delegation->inode)->delegation = NULL;
-		NFS_I(delegation->inode)->delegation_state = 0;
-	}
-	spin_unlock(&clp->cl_lock);
-	while(!list_empty(&head)) {
-		delegation = list_entry(head.next, struct nfs_delegation, super_list);
-		list_del(&delegation->super_list);
-		nfs_free_delegation(delegation);
+		spin_lock(&clp->cl_lock);
+		delegation = nfs_detach_delegation_locked(NFS_I(delegation->inode), NULL);
+		spin_unlock(&clp->cl_lock);
+		rcu_read_unlock();
+		if (delegation != NULL)
+			call_rcu(&delegation->rcu, nfs_free_delegation_callback);
+		goto restart;
 	}
+	rcu_read_unlock();
 }
 
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode)
 {
-	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_delegation *delegation;
-	int res = 0;
+	int ret = 0;
 
-	if (nfsi->delegation_state == 0)
-		return 0;
-	spin_lock(&clp->cl_lock);
-	delegation = nfsi->delegation;
+	rcu_read_lock();
+	delegation = rcu_dereference(nfsi->delegation);
 	if (delegation != NULL) {
 		memcpy(dst->data, delegation->stateid.data, sizeof(dst->data));
-		res = 1;
+		ret = 1;
 	}
-	spin_unlock(&clp->cl_lock);
-	return res;
+	rcu_read_unlock();
+	return ret;
 }
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 2cfd4b2..5874ce7 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -22,11 +22,12 @@ struct nfs_delegation {
 	long flags;
 	loff_t maxsize;
 	__u64 change_attr;
+	struct rcu_head rcu;
 };
 
 int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
 void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
-int __nfs_inode_return_delegation(struct inode *inode);
+int nfs_inode_return_delegation(struct inode *inode);
 int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
 
 struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
@@ -39,27 +40,24 @@ void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
 
 /* NFSv4 delegation-related procedures */
 int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid);
-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state);
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
 int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
 int nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
 
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
+	struct nfs_delegation *delegation;
+	int ret = 0;
+
 	flags &= FMODE_READ|FMODE_WRITE;
-	smp_rmb();
-	if ((NFS_I(inode)->delegation_state & flags) == flags)
-		return 1;
-	return 0;
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation != NULL && (delegation->type & flags) == flags)
+		ret = 1;
+	rcu_read_unlock();
+	return ret;
 }
 
-static inline int nfs_inode_return_delegation(struct inode *inode)
-{
-	int err = 0;
-
-	if (NFS_I(inode)->delegation != NULL)
-		err = __nfs_inode_return_delegation(inode);
-	return err;
-}
 #else
 static inline int nfs_have_delegation(struct inode *inode, int flags)
 {
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c27258b..322141f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -897,14 +897,13 @@ int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd)
 	return (nd->intent.open.flags & O_EXCL) != 0;
 }
 
-static inline int nfs_reval_fsid(struct vfsmount *mnt, struct inode *dir,
-				 struct nfs_fh *fh, struct nfs_fattr *fattr)
+static inline int nfs_reval_fsid(struct inode *dir, const struct nfs_fattr *fattr)
 {
 	struct nfs_server *server = NFS_SERVER(dir);
 
 	if (!nfs_fsid_equal(&server->fsid, &fattr->fsid))
-		/* Revalidate fsid on root dir */
-		return __nfs_revalidate_inode(server, mnt->mnt_root->d_inode);
+		/* Revalidate fsid using the parent directory */
+		return __nfs_revalidate_inode(server, dir);
 	return 0;
 }
 
@@ -946,7 +945,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
 		res = ERR_PTR(error);
 		goto out_unlock;
 	}
-	error = nfs_reval_fsid(nd->mnt, dir, &fhandle, &fattr);
+	error = nfs_reval_fsid(dir, &fattr);
 	if (error < 0) {
 		res = ERR_PTR(error);
 		goto out_unlock;
@@ -1244,7 +1243,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode,
 	attr.ia_mode = mode;
 	attr.ia_valid = ATTR_MODE;
 
-	if (nd && (nd->flags & LOOKUP_CREATE))
+	if ((nd->flags & LOOKUP_CREATE) != 0)
 		open_flags = nd->intent.open.flags;
 
 	lock_kernel();
@@ -1535,7 +1534,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
 
 	lock_kernel();
 
-	page = alloc_page(GFP_KERNEL);
+	page = alloc_page(GFP_HIGHUSER);
 	if (!page) {
 		unlock_kernel();
 		return -ENOMEM;
@@ -1744,8 +1743,8 @@ int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask)
 	struct nfs_inode *nfsi;
 	struct nfs_access_entry *cache;
 
-	spin_lock(&nfs_access_lru_lock);
 restart:
+	spin_lock(&nfs_access_lru_lock);
 	list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) {
 		struct inode *inode;
 
@@ -1770,6 +1769,7 @@ remove_lru_entry:
 			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
 		}
 		spin_unlock(&inode->i_lock);
+		spin_unlock(&nfs_access_lru_lock);
 		iput(inode);
 		goto restart;
 	}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 00eee87..a5c82b6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,7 +266,7 @@ static const struct rpc_call_ops nfs_read_direct_ops = {
 static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos)
 {
 	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
+	struct inode *inode = ctx->path.dentry->d_inode;
 	size_t rsize = NFS_SERVER(inode)->rsize;
 	unsigned int pgbase;
 	int result;
@@ -295,9 +295,14 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
 			break;
 		}
 		if ((unsigned)result < data->npages) {
-			nfs_direct_release_pages(data->pagevec, result);
-			nfs_readdata_release(data);
-			break;
+			bytes = result * PAGE_SIZE;
+			if (bytes <= pgbase) {
+				nfs_direct_release_pages(data->pagevec, result);
+				nfs_readdata_release(data);
+				break;
+			}
+			bytes -= pgbase;
+			data->npages = result;
 		}
 
 		get_dreq(dreq);
@@ -601,7 +606,7 @@ static const struct rpc_call_ops nfs_write_direct_ops = {
 static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned long user_addr, size_t count, loff_t pos, int sync)
 {
 	struct nfs_open_context *ctx = dreq->ctx;
-	struct inode *inode = ctx->dentry->d_inode;
+	struct inode *inode = ctx->path.dentry->d_inode;
 	size_t wsize = NFS_SERVER(inode)->wsize;
 	unsigned int pgbase;
 	int result;
@@ -630,9 +635,14 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
 			break;
 		}
 		if ((unsigned)result < data->npages) {
-			nfs_direct_release_pages(data->pagevec, result);
-			nfs_writedata_release(data);
-			break;
+			bytes = result * PAGE_SIZE;
+			if (bytes <= pgbase) {
+				nfs_direct_release_pages(data->pagevec, result);
+				nfs_writedata_release(data);
+				break;
+			}
+			bytes -= pgbase;
+			data->npages = result;
 		}
 
 		get_dreq(dreq);
@@ -763,10 +773,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
 		(unsigned long) count, (long long) pos);
 
 	if (nr_segs != 1)
-		return -EINVAL;
-
-	if (count < 0)
 		goto out;
+
 	retval = -EFAULT;
 	if (!access_ok(VERIFY_WRITE, buf, count))
 		goto out;
@@ -814,7 +822,7 @@ out:
 ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos)
 {
-	ssize_t retval;
+	ssize_t retval = -EINVAL;
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
 	/* XXX: temporary */
@@ -827,7 +835,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		(unsigned long) count, (long long) pos);
 
 	if (nr_segs != 1)
-		return -EINVAL;
+		goto out;
 
 	retval = generic_write_checks(file, &pos, &count, 0);
 	if (retval)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bd9f5a8..3d9fccf 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -461,14 +461,14 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
 
 	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
 	if (ctx != NULL) {
-		atomic_set(&ctx->count, 1);
-		ctx->dentry = dget(dentry);
-		ctx->vfsmnt = mntget(mnt);
+		ctx->path.dentry = dget(dentry);
+		ctx->path.mnt = mntget(mnt);
 		ctx->cred = get_rpccred(cred);
 		ctx->state = NULL;
 		ctx->lockowner = current->files;
 		ctx->error = 0;
 		ctx->dir_cookie = 0;
+		kref_init(&ctx->kref);
 	}
 	return ctx;
 }
@@ -476,27 +476,33 @@ static struct nfs_open_context *alloc_nfs_open_context(struct vfsmount *mnt, str
 struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
 {
 	if (ctx != NULL)
-		atomic_inc(&ctx->count);
+		kref_get(&ctx->kref);
 	return ctx;
 }
 
-void put_nfs_open_context(struct nfs_open_context *ctx)
+static void nfs_free_open_context(struct kref *kref)
 {
-	if (atomic_dec_and_test(&ctx->count)) {
-		if (!list_empty(&ctx->list)) {
-			struct inode *inode = ctx->dentry->d_inode;
-			spin_lock(&inode->i_lock);
-			list_del(&ctx->list);
-			spin_unlock(&inode->i_lock);
-		}
-		if (ctx->state != NULL)
-			nfs4_close_state(ctx->state, ctx->mode);
-		if (ctx->cred != NULL)
-			put_rpccred(ctx->cred);
-		dput(ctx->dentry);
-		mntput(ctx->vfsmnt);
-		kfree(ctx);
+	struct nfs_open_context *ctx = container_of(kref,
+			struct nfs_open_context, kref);
+
+	if (!list_empty(&ctx->list)) {
+		struct inode *inode = ctx->path.dentry->d_inode;
+		spin_lock(&inode->i_lock);
+		list_del(&ctx->list);
+		spin_unlock(&inode->i_lock);
 	}
+	if (ctx->state != NULL)
+		nfs4_close_state(&ctx->path, ctx->state, ctx->mode);
+	if (ctx->cred != NULL)
+		put_rpccred(ctx->cred);
+	dput(ctx->path.dentry);
+	mntput(ctx->path.mnt);
+	kfree(ctx);
+}
+
+void put_nfs_open_context(struct nfs_open_context *ctx)
+{
+	kref_put(&ctx->kref, nfs_free_open_context);
 }
 
 /*
@@ -961,8 +967,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		goto out_changed;
 
 	server = NFS_SERVER(inode);
-	/* Update the fsid if and only if this is the root directory */
-	if (inode == inode->i_sb->s_root->d_inode
+	/* Update the fsid? */
+	if (S_ISDIR(inode->i_mode)
 			&& !nfs_fsid_equal(&server->fsid, &fattr->fsid))
 		server->fsid = fattr->fsid;
 
@@ -1066,8 +1072,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		invalid &= ~NFS_INO_INVALID_DATA;
 	if (data_stable)
 		invalid &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME|NFS_INO_REVAL_PAGECACHE);
-	if (!nfs_have_delegation(inode, FMODE_READ))
+	if (!nfs_have_delegation(inode, FMODE_READ) ||
+			(nfsi->cache_validity & NFS_INO_REVAL_FORCED))
 		nfsi->cache_validity |= invalid;
+	nfsi->cache_validity &= ~NFS_INO_REVAL_FORCED;
 
 	return 0;
  out_changed:
@@ -1103,27 +1111,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
  */
 void nfs4_clear_inode(struct inode *inode)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
-
 	/* If we are holding a delegation, return it! */
 	nfs_inode_return_delegation(inode);
 	/* First call standard NFS clear_inode() code */
 	nfs_clear_inode(inode);
-	/* Now clear out any remaining state */
-	while (!list_empty(&nfsi->open_states)) {
-		struct nfs4_state *state;
-		
-		state = list_entry(nfsi->open_states.next,
-				struct nfs4_state,
-				inode_states);
-		dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n",
-				__FUNCTION__,
-				inode->i_sb->s_id,
-				(long long)NFS_FILEID(inode),
-				state);
-		BUG_ON(atomic_read(&state->count) != 1);
-		nfs4_close_state(state, state->state);
-	}
 }
 #endif
 
@@ -1165,15 +1156,11 @@ static void init_once(void * foo, struct kmem_cache * cachep, unsigned long flag
 	struct nfs_inode *nfsi = (struct nfs_inode *) foo;
 
 	inode_init_once(&nfsi->vfs_inode);
-	spin_lock_init(&nfsi->req_lock);
-	INIT_LIST_HEAD(&nfsi->dirty);
-	INIT_LIST_HEAD(&nfsi->commit);
 	INIT_LIST_HEAD(&nfsi->open_files);
 	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 	INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
 	atomic_set(&nfsi->data_updates, 0);
-	nfsi->ndirty = 0;
 	nfsi->ncommit = 0;
 	nfsi->npages = 0;
 	nfs4_init_once(nfsi);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index ad2b40d..76cf55d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -183,9 +183,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp)
 /*
  * Calculate the number of 512byte blocks used.
  */
-static inline unsigned long nfs_calc_block_size(u64 tsize)
+static inline blkcnt_t nfs_calc_block_size(u64 tsize)
 {
-	loff_t used = (tsize + 511) >> 9;
+	blkcnt_t used = (tsize + 511) >> 9;
 	return (used > ULONG_MAX) ? ULONG_MAX : used;
 }
 
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index ca5a266..8afd9f7 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -1,7 +1,5 @@
 /*
- * linux/fs/nfs/mount_clnt.c
- *
- * MOUNT client to support NFSroot.
+ * In-kernel MOUNT protocol client
  *
  * Copyright (C) 1997, Olaf Kirch <okir at monad.swb.de>
  */
@@ -18,33 +16,31 @@
 #include <linux/nfs_fs.h>
 
 #ifdef RPC_DEBUG
-# define NFSDBG_FACILITY	NFSDBG_ROOT
+# define NFSDBG_FACILITY	NFSDBG_MOUNT
 #endif
 
-/*
-#define MOUNT_PROGRAM		100005
-#define MOUNT_VERSION		1
-#define MOUNT_MNT		1
-#define MOUNT_UMNT		3
- */
-
-static struct rpc_clnt *	mnt_create(char *, struct sockaddr_in *,
-								int, int);
 static struct rpc_program	mnt_program;
 
 struct mnt_fhstatus {
-	unsigned int		status;
-	struct nfs_fh *		fh;
+	u32 status;
+	struct nfs_fh *fh;
 };
 
-/*
- * Obtain an NFS file handle for the given host and path
+/**
+ * nfs_mount - Obtain an NFS file handle for the given host and path
+ * @addr: pointer to server's address
+ * @len: size of server's address
+ * @hostname: name of server host, or NULL
+ * @path: pointer to string containing export path to mount
+ * @version: mount version to use for this request
+ * @protocol: transport protocol to use for thie request
+ * @fh: pointer to location to place returned file handle
+ *
+ * Uses default timeout parameters specified by underlying transport.
  */
-int
-nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
-		int version, int protocol)
+int nfs_mount(struct sockaddr *addr, size_t len, char *hostname, char *path,
+	      int version, int protocol, struct nfs_fh *fh)
 {
-	struct rpc_clnt		*mnt_clnt;
 	struct mnt_fhstatus	result = {
 		.fh		= fh
 	};
@@ -52,16 +48,25 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 		.rpc_argp	= path,
 		.rpc_resp	= &result,
 	};
-	char			hostname[32];
+	struct rpc_create_args args = {
+		.protocol	= protocol,
+		.address	= addr,
+		.addrsize	= len,
+		.servername	= hostname,
+		.program	= &mnt_program,
+		.version	= version,
+		.authflavor	= RPC_AUTH_UNIX,
+		.flags		= RPC_CLNT_CREATE_INTR,
+	};
+	struct rpc_clnt		*mnt_clnt;
 	int			status;
 
-	dprintk("NFS:      nfs_mount(%08x:%s)\n",
-			(unsigned)ntohl(addr->sin_addr.s_addr), path);
+	dprintk("NFS: sending MNT request for %s:%s\n",
+		(hostname ? hostname : "server"), path);
 
-	sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr));
-	mnt_clnt = mnt_create(hostname, addr, version, protocol);
+	mnt_clnt = rpc_create(&args);
 	if (IS_ERR(mnt_clnt))
-		return PTR_ERR(mnt_clnt);
+		goto out_clnt_err;
 
 	if (version == NFS_MNT3_VERSION)
 		msg.rpc_proc = &mnt_clnt->cl_procinfo[MOUNTPROC3_MNT];
@@ -69,33 +74,39 @@ nfsroot_mount(struct sockaddr_in *addr, char *path, struct nfs_fh *fh,
 		msg.rpc_proc = &mnt_clnt->cl_procinfo[MNTPROC_MNT];
 
 	status = rpc_call_sync(mnt_clnt, &msg, 0);
-	return status < 0? status : (result.status? -EACCES : 0);
-}
+	rpc_shutdown_client(mnt_clnt);
 
-static struct rpc_clnt *
-mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
-		int protocol)
-{
-	struct rpc_create_args args = {
-		.protocol	= protocol,
-		.address	= (struct sockaddr *)srvaddr,
-		.addrsize	= sizeof(*srvaddr),
-		.servername	= hostname,
-		.program	= &mnt_program,
-		.version	= version,
-		.authflavor	= RPC_AUTH_UNIX,
-		.flags		= (RPC_CLNT_CREATE_ONESHOT |
-				   RPC_CLNT_CREATE_INTR),
-	};
+	if (status < 0)
+		goto out_call_err;
+	if (result.status != 0)
+		goto out_mnt_err;
+
+	dprintk("NFS: MNT request succeeded\n");
+	status = 0;
+
+out:
+	return status;
+
+out_clnt_err:
+	status = PTR_ERR(mnt_clnt);
+	dprintk("NFS: failed to create RPC client, status=%d\n", status);
+	goto out;
+
+out_call_err:
+	dprintk("NFS: failed to start MNT request, status=%d\n", status);
+	goto out;
 
-	return rpc_create(&args);
+out_mnt_err:
+	dprintk("NFS: MNT server returned result %d\n", result.status);
+	status = -EACCES;
+	goto out;
 }
 
 /*
  * XDR encode/decode functions for MOUNT
  */
-static int
-xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
+static int xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p,
+			      const char *path)
 {
 	p = xdr_encode_string(p, path);
 
@@ -103,8 +114,8 @@ xdr_encode_dirpath(struct rpc_rqst *req, __be32 *p, const char *path)
 	return 0;
 }
 
-static int
-xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+static int xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p,
+			       struct mnt_fhstatus *res)
 {
 	struct nfs_fh *fh = res->fh;
 
@@ -115,8 +126,8 @@ xdr_decode_fhstatus(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
 	return 0;
 }
 
-static int
-xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
+static int xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p,
+				struct mnt_fhstatus *res)
 {
 	struct nfs_fh *fh = res->fh;
 
@@ -135,53 +146,53 @@ xdr_decode_fhstatus3(struct rpc_rqst *req, __be32 *p, struct mnt_fhstatus *res)
 #define MNT_fhstatus_sz		(1 + 8)
 #define MNT_fhstatus3_sz	(1 + 16)
 
-static struct rpc_procinfo	mnt_procedures[] = {
-[MNTPROC_MNT] = {
-	  .p_proc		= MNTPROC_MNT,
-	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,	
-	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus,
-	  .p_arglen		= MNT_dirpath_sz,
-	  .p_replen		= MNT_fhstatus_sz,
-	  .p_statidx		= MNTPROC_MNT,
-	  .p_name		= "MOUNT",
+static struct rpc_procinfo mnt_procedures[] = {
+	[MNTPROC_MNT] = {
+		.p_proc		= MNTPROC_MNT,
+		.p_encode	= (kxdrproc_t) xdr_encode_dirpath,
+		.p_decode	= (kxdrproc_t) xdr_decode_fhstatus,
+		.p_arglen	= MNT_dirpath_sz,
+		.p_replen	= MNT_fhstatus_sz,
+		.p_statidx	= MNTPROC_MNT,
+		.p_name		= "MOUNT",
 	},
 };
 
 static struct rpc_procinfo mnt3_procedures[] = {
-[MOUNTPROC3_MNT] = {
-	  .p_proc		= MOUNTPROC3_MNT,
-	  .p_encode		= (kxdrproc_t) xdr_encode_dirpath,
-	  .p_decode		= (kxdrproc_t) xdr_decode_fhstatus3,
-	  .p_arglen		= MNT_dirpath_sz,
-	  .p_replen		= MNT_fhstatus3_sz,
-	  .p_statidx		= MOUNTPROC3_MNT,
-	  .p_name		= "MOUNT",
+	[MOUNTPROC3_MNT] = {
+		.p_proc		= MOUNTPROC3_MNT,
+		.p_encode	= (kxdrproc_t) xdr_encode_dirpath,
+		.p_decode	= (kxdrproc_t) xdr_decode_fhstatus3,
+		.p_arglen	= MNT_dirpath_sz,
+		.p_replen	= MNT_fhstatus3_sz,
+		.p_statidx	= MOUNTPROC3_MNT,
+		.p_name		= "MOUNT",
 	},
 };
 
 
-static struct rpc_version	mnt_version1 = {
-		.number		= 1,
-		.nrprocs 	= 2,
-		.procs 		= mnt_procedures
+static struct rpc_version mnt_version1 = {
+	.number		= 1,
+	.nrprocs	= 2,
+	.procs		= mnt_procedures,
 };
 
-static struct rpc_version       mnt_version3 = {
-		.number		= 3,
-		.nrprocs	= 2,
-		.procs		= mnt3_procedures
+static struct rpc_version mnt_version3 = {
+	.number		= 3,
+	.nrprocs	= 2,
+	.procs		= mnt3_procedures,
 };
 
-static struct rpc_version *	mnt_version[] = {
+static struct rpc_version *mnt_version[] = {
 	NULL,
 	&mnt_version1,
 	NULL,
 	&mnt_version3,
 };
 
-static struct rpc_stat		mnt_stats;
+static struct rpc_stat mnt_stats;
 
-static struct rpc_program	mnt_program = {
+static struct rpc_program mnt_program = {
 	.name		= "mount",
 	.number		= NFS_MNT_PROGRAM,
 	.nrvers		= ARRAY_SIZE(mnt_version),
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index cd3ca7b..7fcc78f 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -223,7 +223,7 @@ nfs_xdr_diropargs(struct rpc_rqst *req, __be32 *p, struct nfs_diropargs *args)
 static int
 nfs_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 offset = (u32)args->offset;
 	u32 count = args->count;
@@ -380,7 +380,7 @@ static int
 nfs_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs_readdirargs *args)
 {
 	struct rpc_task	*task = req->rq_task;
-	struct rpc_auth	*auth = task->tk_auth;
+	struct rpc_auth	*auth = task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -541,7 +541,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
 static int
 nfs_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs_readlinkargs *args)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 45268d6..814d886 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -335,9 +335,7 @@ again:
 		 * not sure this buys us anything (and I'd have
 		 * to revamp the NFSv3 XDR code) */
 		status = nfs3_proc_setattr(dentry, &fattr, sattr);
-		if (status == 0)
-			nfs_setattr_update_inode(dentry->d_inode, sattr);
-		nfs_refresh_inode(dentry->d_inode, &fattr);
+		nfs_post_op_update_inode(dentry->d_inode, &fattr);
 		dprintk("NFS reply setattr (post-create): %d\n", status);
 	}
 	if (status != 0)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b51df8e..b4647a2 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -319,7 +319,7 @@ nfs3_xdr_accessargs(struct rpc_rqst *req, __be32 *p, struct nfs3_accessargs *arg
 static int
 nfs3_xdr_readargs(struct rpc_rqst *req, __be32 *p, struct nfs_readargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -458,7 +458,7 @@ nfs3_xdr_linkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_linkargs *args)
 static int
 nfs3_xdr_readdirargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirargs *args)
 {
-	struct rpc_auth	*auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 	u32 count = args->count;
 
@@ -643,7 +643,7 @@ static int
 nfs3_xdr_getaclargs(struct rpc_rqst *req, __be32 *p,
 		    struct nfs3_getaclargs *args)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
@@ -773,7 +773,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
 static int
 nfs3_xdr_readlinkargs(struct rpc_rqst *req, __be32 *p, struct nfs3_readlinkargs *args)
 {
-	struct rpc_auth *auth = req->rq_task->tk_auth;
+	struct rpc_auth	*auth = req->rq_task->tk_msg.rpc_cred->cr_auth;
 	unsigned int replen;
 
 	p = xdr_encode_fhandle(p, args->fh);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cf3a17e..6c028e7 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -70,19 +70,26 @@ static inline void nfs_confirm_seqid(struct nfs_seqid_counter *seqid, int status
 		seqid->flags |= NFS_SEQID_CONFIRMED;
 }
 
+struct nfs_unique_id {
+	struct rb_node rb_node;
+	__u64 id;
+};
+
 /*
  * NFS4 state_owners and lock_owners are simply labels for ordered
  * sequences of RPC calls. Their sole purpose is to provide once-only
  * semantics by allowing the server to identify replayed requests.
  */
 struct nfs4_state_owner {
-	spinlock_t	     so_lock;
-	struct list_head     so_list;	 /* per-clientid list of state_owners */
+	struct nfs_unique_id so_owner_id;
 	struct nfs_client    *so_client;
-	u32                  so_id;      /* 32-bit identifier, unique */
-	atomic_t	     so_count;
+	struct nfs_server    *so_server;
+	struct rb_node	     so_client_node;
 
 	struct rpc_cred	     *so_cred;	 /* Associated cred */
+
+	spinlock_t	     so_lock;
+	atomic_t	     so_count;
 	struct list_head     so_states;
 	struct list_head     so_delegations;
 	struct nfs_seqid_counter so_seqid;
@@ -108,7 +115,7 @@ struct nfs4_lock_state {
 #define NFS_LOCK_INITIALIZED 1
 	int			ls_flags;
 	struct nfs_seqid_counter	ls_seqid;
-	u32			ls_id;
+	struct nfs_unique_id	ls_id;
 	nfs4_stateid		ls_stateid;
 	atomic_t		ls_count;
 };
@@ -116,7 +123,10 @@ struct nfs4_lock_state {
 /* bits for nfs4_state->flags */
 enum {
 	LK_STATE_IN_USE,
-	NFS_DELEGATED_STATE,
+	NFS_DELEGATED_STATE,		/* Current stateid is delegation */
+	NFS_O_RDONLY_STATE,		/* OPEN stateid has read-only state */
+	NFS_O_WRONLY_STATE,		/* OPEN stateid has write-only state */
+	NFS_O_RDWR_STATE,		/* OPEN stateid has read/write state */
 };
 
 struct nfs4_state {
@@ -130,11 +140,14 @@ struct nfs4_state {
 	unsigned long flags;		/* Do we hold any locks? */
 	spinlock_t state_lock;		/* Protects the lock_states list */
 
-	nfs4_stateid stateid;
+	seqlock_t seqlock;		/* Protects the stateid/open_stateid */
+	nfs4_stateid stateid;		/* Current stateid: may be delegation */
+	nfs4_stateid open_stateid;	/* OPEN stateid */
 
-	unsigned int n_rdonly;
-	unsigned int n_wronly;
-	unsigned int n_rdwr;
+	/* The following 3 fields are protected by owner->so_lock */
+	unsigned int n_rdonly;		/* Number of read-only references */
+	unsigned int n_wronly;		/* Number of write-only references */
+	unsigned int n_rdwr;		/* Number of read/write references */
 	int state;			/* State on the server (R,W, or RW) */
 	atomic_t count;
 };
@@ -165,7 +178,7 @@ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struc
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
-extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state);
+extern int nfs4_do_close(struct path *path, struct nfs4_state *state);
 extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
 extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *);
 extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
@@ -189,14 +202,13 @@ extern void nfs4_renew_state(struct work_struct *);
 
 /* nfs4state.c */
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp);
-extern u32 nfs4_alloc_lockowner_id(struct nfs_client *);
 
 extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
 extern void nfs4_put_state_owner(struct nfs4_state_owner *);
 extern void nfs4_drop_state_owner(struct nfs4_state_owner *);
 extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
 extern void nfs4_put_open_state(struct nfs4_state *);
-extern void nfs4_close_state(struct nfs4_state *, mode_t);
+extern void nfs4_close_state(struct path *, struct nfs4_state *, mode_t);
 extern void nfs4_state_set_mode_locked(struct nfs4_state *, mode_t);
 extern void nfs4_schedule_state_recovery(struct nfs_client *);
 extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
@@ -222,7 +234,7 @@ extern struct svc_version nfs4_callback_version1;
 
 #else
 
-#define nfs4_close_state(a, b) do { } while (0)
+#define nfs4_close_state(a, b, c) do { } while (0)
 
 #endif /* CONFIG_NFS_V4 */
 #endif /* __LINUX_FS_NFS_NFS4_FS.H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 648e0ac..fee2da8 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -65,6 +65,7 @@ static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *)
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
 static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception);
 static int nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs_client *clp);
+static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openflags);
 
 /* Prevent leaks of NFSv4 errors into userland */
 int nfs4_map_errors(int err)
@@ -214,27 +215,39 @@ static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
 }
 
 struct nfs4_opendata {
-	atomic_t count;
+	struct kref kref;
 	struct nfs_openargs o_arg;
 	struct nfs_openres o_res;
 	struct nfs_open_confirmargs c_arg;
 	struct nfs_open_confirmres c_res;
 	struct nfs_fattr f_attr;
 	struct nfs_fattr dir_attr;
-	struct dentry *dentry;
+	struct path path;
 	struct dentry *dir;
 	struct nfs4_state_owner *owner;
+	struct nfs4_state *state;
 	struct iattr attrs;
 	unsigned long timestamp;
+	unsigned int rpc_done : 1;
 	int rpc_status;
 	int cancelled;
 };
 
-static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
+
+static void nfs4_init_opendata_res(struct nfs4_opendata *p)
+{
+	p->o_res.f_attr = &p->f_attr;
+	p->o_res.dir_attr = &p->dir_attr;
+	p->o_res.server = p->o_arg.server;
+	nfs_fattr_init(&p->f_attr);
+	nfs_fattr_init(&p->dir_attr);
+}
+
+static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path,
 		struct nfs4_state_owner *sp, int flags,
 		const struct iattr *attrs)
 {
-	struct dentry *parent = dget_parent(dentry);
+	struct dentry *parent = dget_parent(path->dentry);
 	struct inode *dir = parent->d_inode;
 	struct nfs_server *server = NFS_SERVER(dir);
 	struct nfs4_opendata *p;
@@ -245,24 +258,19 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	p->o_arg.seqid = nfs_alloc_seqid(&sp->so_seqid);
 	if (p->o_arg.seqid == NULL)
 		goto err_free;
-	atomic_set(&p->count, 1);
-	p->dentry = dget(dentry);
+	p->path.mnt = mntget(path->mnt);
+	p->path.dentry = dget(path->dentry);
 	p->dir = parent;
 	p->owner = sp;
 	atomic_inc(&sp->so_count);
 	p->o_arg.fh = NFS_FH(dir);
 	p->o_arg.open_flags = flags,
 	p->o_arg.clientid = server->nfs_client->cl_clientid;
-	p->o_arg.id = sp->so_id;
-	p->o_arg.name = &dentry->d_name;
+	p->o_arg.id = sp->so_owner_id.id;
+	p->o_arg.name = &p->path.dentry->d_name;
 	p->o_arg.server = server;
 	p->o_arg.bitmask = server->attr_bitmask;
 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
-	p->o_res.f_attr = &p->f_attr;
-	p->o_res.dir_attr = &p->dir_attr;
-	p->o_res.server = server;
-	nfs_fattr_init(&p->f_attr);
-	nfs_fattr_init(&p->dir_attr);
 	if (flags & O_EXCL) {
 		u32 *s = (u32 *) p->o_arg.u.verifier.data;
 		s[0] = jiffies;
@@ -274,6 +282,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	p->c_arg.fh = &p->o_res.fh;
 	p->c_arg.stateid = &p->o_res.stateid;
 	p->c_arg.seqid = p->o_arg.seqid;
+	nfs4_init_opendata_res(p);
+	kref_init(&p->kref);
 	return p;
 err_free:
 	kfree(p);
@@ -282,27 +292,25 @@ err:
 	return NULL;
 }
 
-static void nfs4_opendata_free(struct nfs4_opendata *p)
+static void nfs4_opendata_free(struct kref *kref)
 {
-	if (p != NULL && atomic_dec_and_test(&p->count)) {
-		nfs_free_seqid(p->o_arg.seqid);
-		nfs4_put_state_owner(p->owner);
-		dput(p->dir);
-		dput(p->dentry);
-		kfree(p);
-	}
+	struct nfs4_opendata *p = container_of(kref,
+			struct nfs4_opendata, kref);
+
+	nfs_free_seqid(p->o_arg.seqid);
+	if (p->state != NULL)
+		nfs4_put_open_state(p->state);
+	nfs4_put_state_owner(p->owner);
+	dput(p->dir);
+	dput(p->path.dentry);
+	mntput(p->path.mnt);
+	kfree(p);
 }
 
-/* Helper for asynchronous RPC calls */
-static int nfs4_call_async(struct rpc_clnt *clnt,
-		const struct rpc_call_ops *tk_ops, void *calldata)
+static void nfs4_opendata_put(struct nfs4_opendata *p)
 {
-	struct rpc_task *task;
-
-	if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
-		return -ENOMEM;
-	rpc_execute(task);
-	return 0;
+	if (p != NULL)
+		kref_put(&p->kref, nfs4_opendata_free);
 }
 
 static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
@@ -316,7 +324,34 @@ static int nfs4_wait_for_completion_rpc_task(struct rpc_task *task)
 	return ret;
 }
 
-static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
+static int can_open_cached(struct nfs4_state *state, int mode)
+{
+	int ret = 0;
+	switch (mode & (FMODE_READ|FMODE_WRITE|O_EXCL)) {
+		case FMODE_READ:
+			ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0;
+			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+			break;
+		case FMODE_WRITE:
+			ret |= test_bit(NFS_O_WRONLY_STATE, &state->flags) != 0;
+			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+			break;
+		case FMODE_READ|FMODE_WRITE:
+			ret |= test_bit(NFS_O_RDWR_STATE, &state->flags) != 0;
+	}
+	return ret;
+}
+
+static int can_open_delegated(struct nfs_delegation *delegation, mode_t open_flags)
+{
+	if ((delegation->type & open_flags) != open_flags)
+		return 0;
+	if (delegation->flags & NFS_DELEGATION_NEED_RECLAIM)
+		return 0;
+	return 1;
+}
+
+static void update_open_stateflags(struct nfs4_state *state, mode_t open_flags)
 {
 	switch (open_flags) {
 		case FMODE_WRITE:
@@ -328,41 +363,176 @@ static inline void update_open_stateflags(struct nfs4_state *state, mode_t open_
 		case FMODE_READ|FMODE_WRITE:
 			state->n_rdwr++;
 	}
+	nfs4_state_set_mode_locked(state, state->state | open_flags);
 }
 
-static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
 {
-	struct inode *inode = state->inode;
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+		memcpy(state->stateid.data, stateid->data, sizeof(state->stateid.data));
+	memcpy(state->open_stateid.data, stateid->data, sizeof(state->open_stateid.data));
+	switch (open_flags) {
+		case FMODE_READ:
+			set_bit(NFS_O_RDONLY_STATE, &state->flags);
+			break;
+		case FMODE_WRITE:
+			set_bit(NFS_O_WRONLY_STATE, &state->flags);
+			break;
+		case FMODE_READ|FMODE_WRITE:
+			set_bit(NFS_O_RDWR_STATE, &state->flags);
+	}
+}
+
+static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, int open_flags)
+{
+	write_seqlock(&state->seqlock);
+	nfs_set_open_stateid_locked(state, stateid, open_flags);
+	write_sequnlock(&state->seqlock);
+}
 
+static void update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, nfs4_stateid *deleg_stateid, int open_flags)
+{
 	open_flags &= (FMODE_READ|FMODE_WRITE);
-	/* Protect against nfs4_find_state_byowner() */
+	/*
+	 * Protect the call to nfs4_state_set_mode_locked and
+	 * serialise the stateid update
+	 */
+	write_seqlock(&state->seqlock);
+	if (deleg_stateid != NULL) {
+		memcpy(state->stateid.data, deleg_stateid->data, sizeof(state->stateid.data));
+		set_bit(NFS_DELEGATED_STATE, &state->flags);
+	}
+	if (open_stateid != NULL)
+		nfs_set_open_stateid_locked(state, open_stateid, open_flags);
+	write_sequnlock(&state->seqlock);
 	spin_lock(&state->owner->so_lock);
-	spin_lock(&inode->i_lock);
-	memcpy(&state->stateid, stateid, sizeof(state->stateid));
 	update_open_stateflags(state, open_flags);
-	nfs4_state_set_mode_locked(state, state->state | open_flags);
-	spin_unlock(&inode->i_lock);
 	spin_unlock(&state->owner->so_lock);
 }
 
+static void nfs4_return_incompatible_delegation(struct inode *inode, mode_t open_flags)
+{
+	struct nfs_delegation *delegation;
+
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation == NULL || (delegation->type & open_flags) == open_flags) {
+		rcu_read_unlock();
+		return;
+	}
+	rcu_read_unlock();
+	nfs_inode_return_delegation(inode);
+}
+
+static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
+{
+	struct nfs4_state *state = opendata->state;
+	struct nfs_inode *nfsi = NFS_I(state->inode);
+	struct nfs_delegation *delegation;
+	int open_mode = opendata->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL);
+	nfs4_stateid stateid;
+	int ret = -EAGAIN;
+
+	rcu_read_lock();
+	delegation = rcu_dereference(nfsi->delegation);
+	for (;;) {
+		if (can_open_cached(state, open_mode)) {
+			spin_lock(&state->owner->so_lock);
+			if (can_open_cached(state, open_mode)) {
+				update_open_stateflags(state, open_mode);
+				spin_unlock(&state->owner->so_lock);
+				rcu_read_unlock();
+				goto out_return_state;
+			}
+			spin_unlock(&state->owner->so_lock);
+		}
+		if (delegation == NULL)
+			break;
+		if (!can_open_delegated(delegation, open_mode))
+			break;
+		/* Save the delegation */
+		memcpy(stateid.data, delegation->stateid.data, sizeof(stateid.data));
+		rcu_read_unlock();
+		lock_kernel();
+		ret = _nfs4_do_access(state->inode, state->owner->so_cred, open_mode);
+		unlock_kernel();
+		if (ret != 0)
+			goto out;
+		ret = -EAGAIN;
+		rcu_read_lock();
+		delegation = rcu_dereference(nfsi->delegation);
+		/* If no delegation, try a cached open */
+		if (delegation == NULL)
+			continue;
+		/* Is the delegation still valid? */
+		if (memcmp(stateid.data, delegation->stateid.data, sizeof(stateid.data)) != 0)
+			continue;
+		rcu_read_unlock();
+		update_open_stateid(state, NULL, &stateid, open_mode);
+		goto out_return_state;
+	}
+	rcu_read_unlock();
+out:
+	return ERR_PTR(ret);
+out_return_state:
+	atomic_inc(&state->count);
+	return state;
+}
+
 static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
 {
 	struct inode *inode;
 	struct nfs4_state *state = NULL;
+	struct nfs_delegation *delegation;
+	nfs4_stateid *deleg_stateid = NULL;
+	int ret;
 
-	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+	if (!data->rpc_done) {
+		state = nfs4_try_open_cached(data);
 		goto out;
+	}
+
+	ret = -EAGAIN;
+	if (!(data->f_attr.valid & NFS_ATTR_FATTR))
+		goto err;
 	inode = nfs_fhget(data->dir->d_sb, &data->o_res.fh, &data->f_attr);
+	ret = PTR_ERR(inode);
 	if (IS_ERR(inode))
-		goto out;
+		goto err;
+	ret = -ENOMEM;
 	state = nfs4_get_open_state(inode, data->owner);
 	if (state == NULL)
-		goto put_inode;
-	update_open_stateid(state, &data->o_res.stateid, data->o_arg.open_flags);
-put_inode:
+		goto err_put_inode;
+	if (data->o_res.delegation_type != 0) {
+		int delegation_flags = 0;
+
+		rcu_read_lock();
+		delegation = rcu_dereference(NFS_I(inode)->delegation);
+		if (delegation)
+			delegation_flags = delegation->flags;
+		rcu_read_unlock();
+		if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
+			nfs_inode_set_delegation(state->inode,
+					data->owner->so_cred,
+					&data->o_res);
+		else
+			nfs_inode_reclaim_delegation(state->inode,
+					data->owner->so_cred,
+					&data->o_res);
+	}
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(inode)->delegation);
+	if (delegation != NULL)
+		deleg_stateid = &delegation->stateid;
+	update_open_stateid(state, &data->o_res.stateid, deleg_stateid, data->o_arg.open_flags);
+	rcu_read_unlock();
 	iput(inode);
 out:
 	return state;
+err_put_inode:
+	iput(inode);
+err:
+	return ERR_PTR(ret);
 }
 
 static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
@@ -382,79 +552,66 @@ static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *
 	return ERR_PTR(-ENOENT);
 }
 
-static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, nfs4_stateid *stateid)
+static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, mode_t openflags, struct nfs4_state **res)
 {
+	struct nfs4_state *newstate;
 	int ret;
 
 	opendata->o_arg.open_flags = openflags;
+	memset(&opendata->o_res, 0, sizeof(opendata->o_res));
+	memset(&opendata->c_res, 0, sizeof(opendata->c_res));
+	nfs4_init_opendata_res(opendata);
 	ret = _nfs4_proc_open(opendata);
 	if (ret != 0)
 		return ret; 
-	memcpy(stateid->data, opendata->o_res.stateid.data,
-			sizeof(stateid->data));
+	newstate = nfs4_opendata_to_nfs4_state(opendata);
+	if (IS_ERR(newstate))
+		return PTR_ERR(newstate);
+	nfs4_close_state(&opendata->path, newstate, openflags);
+	*res = newstate;
 	return 0;
 }
 
 static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *state)
 {
-	nfs4_stateid stateid;
 	struct nfs4_state *newstate;
-	int mode = 0;
-	int delegation = 0;
 	int ret;
 
 	/* memory barrier prior to reading state->n_* */
+	clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	smp_rmb();
 	if (state->n_rdwr != 0) {
-		ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &stateid);
+		ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate);
 		if (ret != 0)
 			return ret;
-		mode |= FMODE_READ|FMODE_WRITE;
-		if (opendata->o_res.delegation_type != 0)
-			delegation = opendata->o_res.delegation_type;
-		smp_rmb();
+		if (newstate != state)
+			return -ESTALE;
 	}
 	if (state->n_wronly != 0) {
-		ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &stateid);
+		ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate);
 		if (ret != 0)
 			return ret;
-		mode |= FMODE_WRITE;
-		if (opendata->o_res.delegation_type != 0)
-			delegation = opendata->o_res.delegation_type;
-		smp_rmb();
+		if (newstate != state)
+			return -ESTALE;
 	}
 	if (state->n_rdonly != 0) {
-		ret = nfs4_open_recover_helper(opendata, FMODE_READ, &stateid);
+		ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate);
 		if (ret != 0)
 			return ret;
-		mode |= FMODE_READ;
+		if (newstate != state)
+			return -ESTALE;
 	}
-	clear_bit(NFS_DELEGATED_STATE, &state->flags);
-	if (mode == 0)
-		return 0;
-	if (opendata->o_res.delegation_type == 0)
-		opendata->o_res.delegation_type = delegation;
-	opendata->o_arg.open_flags |= mode;
-	newstate = nfs4_opendata_to_nfs4_state(opendata);
-	if (newstate != NULL) {
-		if (opendata->o_res.delegation_type != 0) {
-			struct nfs_inode *nfsi = NFS_I(newstate->inode);
-			int delegation_flags = 0;
-			if (nfsi->delegation)
-				delegation_flags = nfsi->delegation->flags;
-			if (!(delegation_flags & NFS_DELEGATION_NEED_RECLAIM))
-				nfs_inode_set_delegation(newstate->inode,
-						opendata->owner->so_cred,
-						&opendata->o_res);
-			else
-				nfs_inode_reclaim_delegation(newstate->inode,
-						opendata->owner->so_cred,
-						&opendata->o_res);
-		}
-		nfs4_close_state(newstate, opendata->o_arg.open_flags);
+	/*
+	 * We may have performed cached opens for all three recoveries.
+	 * Check if we need to update the current stateid.
+	 */
+	if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0 &&
+	    memcmp(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data)) != 0) {
+		write_seqlock(&state->seqlock);
+		if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+			memcpy(state->stateid.data, state->open_stateid.data, sizeof(state->stateid.data));
+		write_sequnlock(&state->seqlock);
 	}
-	if (newstate != state)
-		return -ESTALE;
 	return 0;
 }
 
@@ -462,41 +619,37 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
  * OPEN_RECLAIM:
  * 	reclaim state on the server after a reboot.
  */
-static int _nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-	struct nfs_delegation *delegation = NFS_I(state->inode)->delegation;
+	struct nfs_delegation *delegation;
 	struct nfs4_opendata *opendata;
 	int delegation_type = 0;
 	int status;
 
-	if (delegation != NULL) {
-		if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
-			memcpy(&state->stateid, &delegation->stateid,
-					sizeof(state->stateid));
-			set_bit(NFS_DELEGATED_STATE, &state->flags);
-			return 0;
-		}
-		delegation_type = delegation->type;
-	}
-	opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
+	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
 	if (opendata == NULL)
 		return -ENOMEM;
 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_PREVIOUS;
 	opendata->o_arg.fh = NFS_FH(state->inode);
 	nfs_copy_fh(&opendata->o_res.fh, opendata->o_arg.fh);
+	rcu_read_lock();
+	delegation = rcu_dereference(NFS_I(state->inode)->delegation);
+	if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0)
+		delegation_type = delegation->flags;
+	rcu_read_unlock();
 	opendata->o_arg.u.delegation_type = delegation_type;
 	status = nfs4_open_recover(opendata, state);
-	nfs4_opendata_free(opendata);
+	nfs4_opendata_put(opendata);
 	return status;
 }
 
-static int nfs4_do_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
 	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = _nfs4_do_open_reclaim(sp, state, dentry);
+		err = _nfs4_do_open_reclaim(ctx, state);
 		if (err != -NFS4ERR_DELAY)
 			break;
 		nfs4_handle_exception(server, err, &exception);
@@ -512,37 +665,35 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	ctx = nfs4_state_find_open_context(state);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
-	ret = nfs4_do_open_reclaim(sp, state, ctx->dentry);
+	ret = nfs4_do_open_reclaim(ctx, state);
 	put_nfs_open_context(ctx);
 	return ret;
 }
 
-static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+static int _nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
 {
 	struct nfs4_state_owner  *sp  = state->owner;
 	struct nfs4_opendata *opendata;
 	int ret;
 
-	if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
-		return 0;
-	opendata = nfs4_opendata_alloc(dentry, sp, 0, NULL);
+	opendata = nfs4_opendata_alloc(&ctx->path, sp, 0, NULL);
 	if (opendata == NULL)
 		return -ENOMEM;
 	opendata->o_arg.claim = NFS4_OPEN_CLAIM_DELEGATE_CUR;
-	memcpy(opendata->o_arg.u.delegation.data, state->stateid.data,
+	memcpy(opendata->o_arg.u.delegation.data, stateid->data,
 			sizeof(opendata->o_arg.u.delegation.data));
 	ret = nfs4_open_recover(opendata, state);
-	nfs4_opendata_free(opendata);
+	nfs4_opendata_put(opendata);
 	return ret;
 }
 
-int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
+int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
 {
 	struct nfs4_exception exception = { };
-	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+	struct nfs_server *server = NFS_SERVER(state->inode);
 	int err;
 	do {
-		err = _nfs4_open_delegation_recall(dentry, state);
+		err = _nfs4_open_delegation_recall(ctx, state, stateid);
 		switch (err) {
 			case 0:
 				return err;
@@ -582,9 +733,10 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 		memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
 				sizeof(data->o_res.stateid.data));
 		renew_lease(data->o_res.server, data->timestamp);
+		data->rpc_done = 1;
 	}
-	nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
 	nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
+	nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
 }
 
 static void nfs4_open_confirm_release(void *calldata)
@@ -596,14 +748,14 @@ static void nfs4_open_confirm_release(void *calldata)
 	if (data->cancelled == 0)
 		goto out_free;
 	/* In case of error, no cleanup! */
-	if (data->rpc_status != 0)
+	if (!data->rpc_done)
 		goto out_free;
 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	state = nfs4_opendata_to_nfs4_state(data);
-	if (state != NULL)
-		nfs4_close_state(state, data->o_arg.open_flags);
+	if (!IS_ERR(state))
+		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
 out_free:
-	nfs4_opendata_free(data);
+	nfs4_opendata_put(data);
 }
 
 static const struct rpc_call_ops nfs4_open_confirm_ops = {
@@ -621,12 +773,9 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 	struct rpc_task *task;
 	int status;
 
-	atomic_inc(&data->count);
-	/*
-	 * If rpc_run_task() ends up calling ->rpc_release(), we
-	 * want to ensure that it takes the 'error' code path.
-	 */
-	data->rpc_status = -ENOMEM;
+	kref_get(&data->kref);
+	data->rpc_done = 0;
+	data->rpc_status = 0;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -653,13 +802,35 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
 	
 	if (nfs_wait_on_sequence(data->o_arg.seqid, task) != 0)
 		return;
+	/*
+	 * Check if we still need to send an OPEN call, or if we can use
+	 * a delegation instead.
+	 */
+	if (data->state != NULL) {
+		struct nfs_delegation *delegation;
+
+		if (can_open_cached(data->state, data->o_arg.open_flags & (FMODE_READ|FMODE_WRITE|O_EXCL)))
+			goto out_no_action;
+		rcu_read_lock();
+		delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
+		if (delegation != NULL &&
+		   (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) {
+			rcu_read_unlock();
+			goto out_no_action;
+		}
+		rcu_read_unlock();
+	}
 	/* Update sequence id. */
-	data->o_arg.id = sp->so_id;
+	data->o_arg.id = sp->so_owner_id.id;
 	data->o_arg.clientid = sp->so_client->cl_clientid;
 	if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
 		msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
 	data->timestamp = jiffies;
 	rpc_call_setup(task, &msg, 0);
+	return;
+out_no_action:
+	task->tk_action = NULL;
+
 }
 
 static void nfs4_open_done(struct rpc_task *task, void *calldata)
@@ -683,8 +854,11 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
 				data->rpc_status = -ENOTDIR;
 		}
 		renew_lease(data->o_res.server, data->timestamp);
+		if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
+			nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	}
 	nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
+	data->rpc_done = 1;
 }
 
 static void nfs4_open_release(void *calldata)
@@ -696,17 +870,17 @@ static void nfs4_open_release(void *calldata)
 	if (data->cancelled == 0)
 		goto out_free;
 	/* In case of error, no cleanup! */
-	if (data->rpc_status != 0)
+	if (data->rpc_status != 0 || !data->rpc_done)
 		goto out_free;
 	/* In case we need an open_confirm, no cleanup! */
 	if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
 		goto out_free;
 	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	state = nfs4_opendata_to_nfs4_state(data);
-	if (state != NULL)
-		nfs4_close_state(state, data->o_arg.open_flags);
+	if (!IS_ERR(state))
+		nfs4_close_state(&data->path, state, data->o_arg.open_flags);
 out_free:
-	nfs4_opendata_free(data);
+	nfs4_opendata_put(data);
 }
 
 static const struct rpc_call_ops nfs4_open_ops = {
@@ -727,12 +901,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	struct rpc_task *task;
 	int status;
 
-	atomic_inc(&data->count);
-	/*
-	 * If rpc_run_task() ends up calling ->rpc_release(), we
-	 * want to ensure that it takes the 'error' code path.
-	 */
-	data->rpc_status = -ENOMEM;
+	kref_get(&data->kref);
+	data->rpc_done = 0;
+	data->rpc_status = 0;
+	data->cancelled = 0;
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_ops, data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
@@ -743,7 +915,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	} else
 		status = data->rpc_status;
 	rpc_put_task(task);
-	if (status != 0)
+	if (status != 0 || !data->rpc_done)
 		return status;
 
 	if (o_arg->open_flags & O_CREAT) {
@@ -756,7 +928,6 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		if (status != 0)
 			return status;
 	}
-	nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
 		return server->nfs_client->rpc_ops->getattr(server, &o_res->fh, o_res->f_attr);
 	return 0;
@@ -772,6 +943,8 @@ static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int openf
 		mask |= MAY_READ;
 	if (openflags & FMODE_WRITE)
 		mask |= MAY_WRITE;
+	if (openflags & FMODE_EXEC)
+		mask |= MAY_EXEC;
 	status = nfs_access_get_cached(inode, cred, &cache);
 	if (status == 0)
 		goto out;
@@ -811,43 +984,32 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
  * 	reclaim state on the server after a network partition.
  * 	Assumes caller holds the appropriate lock
  */
-static int _nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-	struct inode *inode = state->inode;
-	struct nfs_delegation *delegation = NFS_I(inode)->delegation;
 	struct nfs4_opendata *opendata;
-	int openflags = state->state & (FMODE_READ|FMODE_WRITE);
 	int ret;
 
-	if (delegation != NULL && !(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) {
-		ret = _nfs4_do_access(inode, sp->so_cred, openflags);
-		if (ret < 0)
-			return ret;
-		memcpy(&state->stateid, &delegation->stateid, sizeof(state->stateid));
-		set_bit(NFS_DELEGATED_STATE, &state->flags);
-		return 0;
-	}
-	opendata = nfs4_opendata_alloc(dentry, sp, openflags, NULL);
+	opendata = nfs4_opendata_alloc(&ctx->path, state->owner, 0, NULL);
 	if (opendata == NULL)
 		return -ENOMEM;
 	ret = nfs4_open_recover(opendata, state);
 	if (ret == -ESTALE) {
 		/* Invalidate the state owner so we don't ever use it again */
-		nfs4_drop_state_owner(sp);
-		d_drop(dentry);
+		nfs4_drop_state_owner(state->owner);
+		d_drop(ctx->path.dentry);
 	}
-	nfs4_opendata_free(opendata);
+	nfs4_opendata_put(opendata);
 	return ret;
 }
 
-static inline int nfs4_do_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state, struct dentry *dentry)
+static inline int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state *state)
 {
-	struct nfs_server *server = NFS_SERVER(dentry->d_inode);
+	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_exception exception = { };
 	int err;
 
 	do {
-		err = _nfs4_open_expired(sp, state, dentry);
+		err = _nfs4_open_expired(ctx, state);
 		if (err == -NFS4ERR_DELAY)
 			nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
@@ -862,107 +1024,38 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
 	ctx = nfs4_state_find_open_context(state);
 	if (IS_ERR(ctx))
 		return PTR_ERR(ctx);
-	ret = nfs4_do_open_expired(sp, state, ctx->dentry);
+	ret = nfs4_do_open_expired(ctx, state);
 	put_nfs_open_context(ctx);
 	return ret;
 }
 
 /*
- * Returns a referenced nfs4_state if there is an open delegation on the file
+ * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
+ * fields corresponding to attributes that were used to store the verifier.
+ * Make sure we clobber those fields in the later setattr call
  */
-static int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res)
-{
-	struct nfs_delegation *delegation;
-	struct nfs_server *server = NFS_SERVER(inode);
-	struct nfs_client *clp = server->nfs_client;
-	struct nfs_inode *nfsi = NFS_I(inode);
-	struct nfs4_state_owner *sp = NULL;
-	struct nfs4_state *state = NULL;
-	int open_flags = flags & (FMODE_READ|FMODE_WRITE);
-	int err;
-
-	err = -ENOMEM;
-	if (!(sp = nfs4_get_state_owner(server, cred))) {
-		dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__);
-		return err;
-	}
-	err = nfs4_recover_expired_lease(server);
-	if (err != 0)
-		goto out_put_state_owner;
-	/* Protect against reboot recovery - NOTE ORDER! */
-	down_read(&clp->cl_sem);
-	/* Protect against delegation recall */
-	down_read(&nfsi->rwsem);
-	delegation = NFS_I(inode)->delegation;
-	err = -ENOENT;
-	if (delegation == NULL || (delegation->type & open_flags) != open_flags)
-		goto out_err;
-	err = -ENOMEM;
-	state = nfs4_get_open_state(inode, sp);
-	if (state == NULL)
-		goto out_err;
-
-	err = -ENOENT;
-	if ((state->state & open_flags) == open_flags) {
-		spin_lock(&inode->i_lock);
-		update_open_stateflags(state, open_flags);
-		spin_unlock(&inode->i_lock);
-		goto out_ok;
-	} else if (state->state != 0)
-		goto out_put_open_state;
-
-	lock_kernel();
-	err = _nfs4_do_access(inode, cred, open_flags);
-	unlock_kernel();
-	if (err != 0)
-		goto out_put_open_state;
-	set_bit(NFS_DELEGATED_STATE, &state->flags);
-	update_open_stateid(state, &delegation->stateid, open_flags);
-out_ok:
-	nfs4_put_state_owner(sp);
-	up_read(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
-	*res = state;
-	return 0;
-out_put_open_state:
-	nfs4_put_open_state(state);
-out_err:
-	up_read(&nfsi->rwsem);
-	up_read(&clp->cl_sem);
-	if (err != -EACCES)
-		nfs_inode_return_delegation(inode);
-out_put_state_owner:
-	nfs4_put_state_owner(sp);
-	return err;
-}
-
-static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred)
+static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata, struct iattr *sattr)
 {
-	struct nfs4_exception exception = { };
-	struct nfs4_state *res = ERR_PTR(-EIO);
-	int err;
+	if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_ACCESS) &&
+	    !(sattr->ia_valid & ATTR_ATIME_SET))
+		sattr->ia_valid |= ATTR_ATIME;
 
-	do {
-		err = _nfs4_open_delegated(inode, flags, cred, &res);
-		if (err == 0)
-			break;
-		res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode),
-					err, &exception));
-	} while (exception.retry);
-	return res;
+	if ((opendata->o_res.attrset[1] & FATTR4_WORD1_TIME_MODIFY) &&
+	    !(sattr->ia_valid & ATTR_MTIME_SET))
+		sattr->ia_valid |= ATTR_MTIME;
 }
 
 /*
  * Returns a referenced nfs4_state
  */
-static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
+static int _nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res)
 {
 	struct nfs4_state_owner  *sp;
 	struct nfs4_state     *state = NULL;
 	struct nfs_server       *server = NFS_SERVER(dir);
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_opendata *opendata;
-	int                     status;
+	int status;
 
 	/* Protect against reboot recovery conflicts */
 	status = -ENOMEM;
@@ -973,29 +1066,35 @@ static int _nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, st
 	status = nfs4_recover_expired_lease(server);
 	if (status != 0)
 		goto err_put_state_owner;
+	if (path->dentry->d_inode != NULL)
+		nfs4_return_incompatible_delegation(path->dentry->d_inode, flags & (FMODE_READ|FMODE_WRITE));
 	down_read(&clp->cl_sem);
 	status = -ENOMEM;
-	opendata = nfs4_opendata_alloc(dentry, sp, flags, sattr);
+	opendata = nfs4_opendata_alloc(path, sp, flags, sattr);
 	if (opendata == NULL)
 		goto err_release_rwsem;
 
+	if (path->dentry->d_inode != NULL)
+		opendata->state = nfs4_get_open_state(path->dentry->d_inode, sp);
+
 	status = _nfs4_proc_open(opendata);
 	if (status != 0)
-		goto err_opendata_free;
+		goto err_opendata_put;
+
+	if (opendata->o_arg.open_flags & O_EXCL)
+		nfs4_exclusive_attrset(opendata, sattr);
 
-	status = -ENOMEM;
 	state = nfs4_opendata_to_nfs4_state(opendata);
-	if (state == NULL)
-		goto err_opendata_free;
-	if (opendata->o_res.delegation_type != 0)
-		nfs_inode_set_delegation(state->inode, cred, &opendata->o_res);
-	nfs4_opendata_free(opendata);
+	status = PTR_ERR(state);
+	if (IS_ERR(state))
+		goto err_opendata_put;
+	nfs4_opendata_put(opendata);
 	nfs4_put_state_owner(sp);
 	up_read(&clp->cl_sem);
 	*res = state;
 	return 0;
-err_opendata_free:
-	nfs4_opendata_free(opendata);
+err_opendata_put:
+	nfs4_opendata_put(opendata);
 err_release_rwsem:
 	up_read(&clp->cl_sem);
 err_put_state_owner:
@@ -1006,14 +1105,14 @@ out_err:
 }
 
 
-static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry, int flags, struct iattr *sattr, struct rpc_cred *cred)
+static struct nfs4_state *nfs4_do_open(struct inode *dir, struct path *path, int flags, struct iattr *sattr, struct rpc_cred *cred)
 {
 	struct nfs4_exception exception = { };
 	struct nfs4_state *res;
 	int status;
 
 	do {
-		status = _nfs4_do_open(dir, dentry, flags, sattr, cred, &res);
+		status = _nfs4_do_open(dir, path, flags, sattr, cred, &res);
 		if (status == 0)
 			break;
 		/* NOTE: BAD_SEQID means the server and client disagree about the
@@ -1028,7 +1127,9 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 		 * the user though...
 		 */
 		if (status == -NFS4ERR_BAD_SEQID) {
-			printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n");
+			printk(KERN_WARNING "NFS: v4 server %s "
+					" returned a bad sequence-id error!\n",
+					NFS_SERVER(dir)->nfs_client->cl_hostname);
 			exception.retry = 1;
 			continue;
 		}
@@ -1042,6 +1143,11 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir, struct dentry *dentry,
 			exception.retry = 1;
 			continue;
 		}
+		if (status == -EAGAIN) {
+			/* We must have found a delegation */
+			exception.retry = 1;
+			continue;
+		}
 		res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(dir),
 					status, &exception));
 	} while (exception.retry);
@@ -1101,6 +1207,7 @@ static int nfs4_do_setattr(struct inode *inode, struct nfs_fattr *fattr,
 }
 
 struct nfs4_closedata {
+	struct path path;
 	struct inode *inode;
 	struct nfs4_state *state;
 	struct nfs_closeargs arg;
@@ -1117,6 +1224,8 @@ static void nfs4_free_closedata(void *data)
 	nfs4_put_open_state(calldata->state);
 	nfs_free_seqid(calldata->arg.seqid);
 	nfs4_put_state_owner(sp);
+	dput(calldata->path.dentry);
+	mntput(calldata->path.mnt);
 	kfree(calldata);
 }
 
@@ -1134,8 +1243,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 	nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
 	switch (task->tk_status) {
 		case 0:
-			memcpy(&state->stateid, &calldata->res.stateid,
-					sizeof(state->stateid));
+			nfs_set_open_stateid(state, &calldata->res.stateid, calldata->arg.open_flags);
 			renew_lease(server, calldata->timestamp);
 			break;
 		case -NFS4ERR_STALE_STATEID:
@@ -1160,26 +1268,30 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
 		.rpc_resp = &calldata->res,
 		.rpc_cred = state->owner->so_cred,
 	};
-	int mode = 0, old_mode;
+	int clear_rd, clear_wr, clear_rdwr;
+	int mode;
 
 	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		return;
-	/* Recalculate the new open mode in case someone reopened the file
-	 * while we were waiting in line to be scheduled.
-	 */
+
+	mode = FMODE_READ|FMODE_WRITE;
+	clear_rd = clear_wr = clear_rdwr = 0;
 	spin_lock(&state->owner->so_lock);
-	spin_lock(&calldata->inode->i_lock);
-	mode = old_mode = state->state;
+	/* Calculate the change in open mode */
 	if (state->n_rdwr == 0) {
-		if (state->n_rdonly == 0)
+		if (state->n_rdonly == 0) {
 			mode &= ~FMODE_READ;
-		if (state->n_wronly == 0)
+			clear_rd |= test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+			clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
+		if (state->n_wronly == 0) {
 			mode &= ~FMODE_WRITE;
+			clear_wr |= test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+			clear_rdwr |= test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
 	}
-	nfs4_state_set_mode_locked(state, mode);
-	spin_unlock(&calldata->inode->i_lock);
 	spin_unlock(&state->owner->so_lock);
-	if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+	if (!clear_rd && !clear_wr && !clear_rdwr) {
 		/* Note: exit _without_ calling nfs4_close_done */
 		task->tk_action = NULL;
 		return;
@@ -1209,19 +1321,21 @@ static const struct rpc_call_ops nfs4_close_ops = {
  *
  * NOTE: Caller must be holding the sp->so_owner semaphore!
  */
-int nfs4_do_close(struct inode *inode, struct nfs4_state *state) 
+int nfs4_do_close(struct path *path, struct nfs4_state *state)
 {
-	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_server *server = NFS_SERVER(state->inode);
 	struct nfs4_closedata *calldata;
+	struct nfs4_state_owner *sp = state->owner;
+	struct rpc_task *task;
 	int status = -ENOMEM;
 
 	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
 	if (calldata == NULL)
 		goto out;
-	calldata->inode = inode;
+	calldata->inode = state->inode;
 	calldata->state = state;
-	calldata->arg.fh = NFS_FH(inode);
-	calldata->arg.stateid = &state->stateid;
+	calldata->arg.fh = NFS_FH(state->inode);
+	calldata->arg.stateid = &state->open_stateid;
 	/* Serialization for the sequence id */
 	calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
 	if (calldata->arg.seqid == NULL)
@@ -1229,36 +1343,55 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
 	calldata->arg.bitmask = server->attr_bitmask;
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.server = server;
+	calldata->path.mnt = mntget(path->mnt);
+	calldata->path.dentry = dget(path->dentry);
 
-	status = nfs4_call_async(server->client, &nfs4_close_ops, calldata);
-	if (status == 0)
-		goto out;
-
-	nfs_free_seqid(calldata->arg.seqid);
+	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_close_ops, calldata);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	rpc_put_task(task);
+	return 0;
 out_free_calldata:
 	kfree(calldata);
 out:
+	nfs4_put_open_state(state);
+	nfs4_put_state_owner(sp);
 	return status;
 }
 
-static int nfs4_intent_set_file(struct nameidata *nd, struct dentry *dentry, struct nfs4_state *state)
+static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state)
 {
 	struct file *filp;
+	int ret;
 
-	filp = lookup_instantiate_filp(nd, dentry, NULL);
+	/* If the open_intent is for execute, we have an extra check to make */
+	if (nd->intent.open.flags & FMODE_EXEC) {
+		ret = _nfs4_do_access(state->inode,
+				state->owner->so_cred,
+				nd->intent.open.flags);
+		if (ret < 0)
+			goto out_close;
+	}
+	filp = lookup_instantiate_filp(nd, path->dentry, NULL);
 	if (!IS_ERR(filp)) {
 		struct nfs_open_context *ctx;
 		ctx = (struct nfs_open_context *)filp->private_data;
 		ctx->state = state;
 		return 0;
 	}
-	nfs4_close_state(state, nd->intent.open.flags);
-	return PTR_ERR(filp);
+	ret = PTR_ERR(filp);
+out_close:
+	nfs4_close_state(path, state, nd->intent.open.flags);
+	return ret;
 }
 
 struct dentry *
 nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
+	struct path path = {
+		.mnt = nd->mnt,
+		.dentry = dentry,
+	};
 	struct iattr attr;
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
@@ -1277,7 +1410,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return (struct dentry *)cred;
-	state = nfs4_do_open(dir, dentry, nd->intent.open.flags, &attr, cred);
+	state = nfs4_do_open(dir, &path, nd->intent.open.flags, &attr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		if (PTR_ERR(state) == -ENOENT)
@@ -1287,22 +1420,24 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 	res = d_add_unique(dentry, igrab(state->inode));
 	if (res != NULL)
 		dentry = res;
-	nfs4_intent_set_file(nd, dentry, state);
+	nfs4_intent_set_file(nd, &path, state);
 	return res;
 }
 
 int
 nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd)
 {
+	struct path path = {
+		.mnt = nd->mnt,
+		.dentry = dentry,
+	};
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 
 	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
-	state = nfs4_open_delegated(dentry->d_inode, openflags, cred);
-	if (IS_ERR(state))
-		state = nfs4_do_open(dir, dentry, openflags, NULL, cred);
+	state = nfs4_do_open(dir, &path, openflags, NULL, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		switch (PTR_ERR(state)) {
@@ -1318,10 +1453,10 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 		}
 	}
 	if (state->inode == dentry->d_inode) {
-		nfs4_intent_set_file(nd, dentry, state);
+		nfs4_intent_set_file(nd, &path, state);
 		return 1;
 	}
-	nfs4_close_state(state, openflags);
+	nfs4_close_state(&path, state, openflags);
 out_drop:
 	d_drop(dentry);
 	return 0;
@@ -1559,8 +1694,6 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
 	dprintk("NFS call  lookupfh %s\n", name->name);
 	status = rpc_call_sync(server->client, &msg, 0);
 	dprintk("NFS reply lookupfh: %d\n", status);
-	if (status == -NFS4ERR_MOVED)
-		status = -EREMOTE;
 	return status;
 }
 
@@ -1571,10 +1704,13 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
 	struct nfs4_exception exception = { };
 	int err;
 	do {
-		err = nfs4_handle_exception(server,
-				_nfs4_proc_lookupfh(server, dirfh, name,
-						    fhandle, fattr),
-				&exception);
+		err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr);
+		/* FIXME: !!!! */
+		if (err == -NFS4ERR_MOVED) {
+			err = -EREMOTE;
+			break;
+		}
+		err = nfs4_handle_exception(server, err, &exception);
 	} while (exception.retry);
 	return err;
 }
@@ -1582,28 +1718,10 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
 static int _nfs4_proc_lookup(struct inode *dir, struct qstr *name,
 		struct nfs_fh *fhandle, struct nfs_fattr *fattr)
 {
-	int		       status;
-	struct nfs_server *server = NFS_SERVER(dir);
-	struct nfs4_lookup_arg args = {
-		.bitmask = server->attr_bitmask,
-		.dir_fh = NFS_FH(dir),
-		.name = name,
-	};
-	struct nfs4_lookup_res res = {
-		.server = server,
-		.fattr = fattr,
-		.fh = fhandle,
-	};
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
-		.rpc_argp = &args,
-		.rpc_resp = &res,
-	};
-	
-	nfs_fattr_init(fattr);
+	int status;
 	
 	dprintk("NFS call  lookup %s\n", name->name);
-	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
 	if (status == -NFS4ERR_MOVED)
 		status = nfs4_get_referral(dir, name, fattr, fhandle);
 	dprintk("NFS reply lookup: %d\n", status);
@@ -1752,6 +1870,10 @@ static int
 nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                  int flags, struct nameidata *nd)
 {
+	struct path path = {
+		.mnt = nd->mnt,
+		.dentry = dentry,
+	};
 	struct nfs4_state *state;
 	struct rpc_cred *cred;
 	int status = 0;
@@ -1761,7 +1883,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		status = PTR_ERR(cred);
 		goto out;
 	}
-	state = nfs4_do_open(dir, dentry, flags, sattr, cred);
+	state = nfs4_do_open(dir, &path, flags, sattr, cred);
 	put_rpccred(cred);
 	if (IS_ERR(state)) {
 		status = PTR_ERR(state);
@@ -1773,11 +1895,12 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 		status = nfs4_do_setattr(state->inode, &fattr, sattr, state);
 		if (status == 0)
 			nfs_setattr_update_inode(state->inode, sattr);
+		nfs_post_op_update_inode(state->inode, &fattr);
 	}
-	if (status == 0 && nd != NULL && (nd->flags & LOOKUP_OPEN))
-		status = nfs4_intent_set_file(nd, dentry, state);
+	if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0)
+		status = nfs4_intent_set_file(nd, &path, state);
 	else
-		nfs4_close_state(state, flags);
+		nfs4_close_state(&path, state, flags);
 out:
 	return status;
 }
@@ -3008,7 +3131,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 	if (status != 0)
 		goto out;
 	lsp = request->fl_u.nfs4_fl.owner;
-	arg.lock_owner.id = lsp->ls_id; 
+	arg.lock_owner.id = lsp->ls_id.id;
 	status = rpc_call_sync(server->client, &msg, 0);
 	switch (status) {
 		case 0:
@@ -3152,6 +3275,11 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 {
 	struct nfs4_unlockdata *data;
 
+	/* Ensure this is an unlock - when canceling a lock, the
+	 * canceled lock is passed in, and it won't be an unlock.
+	 */
+	fl->fl_type = F_UNLCK;
+
 	data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
 	if (data == NULL) {
 		nfs_free_seqid(seqid);
@@ -3222,7 +3350,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 		goto out_free;
 	p->arg.lock_stateid = &lsp->ls_stateid;
 	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
-	p->arg.lock_owner.id = lsp->ls_id;
+	p->arg.lock_owner.id = lsp->ls_id.id;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
 	p->ctx = get_nfs_open_context(ctx);
@@ -3285,7 +3413,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 		memcpy(data->lsp->ls_stateid.data, data->res.stateid.data,
 					sizeof(data->lsp->ls_stateid.data));
 		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
-		renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
+		renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
 	}
 	nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
 out:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8ed79d5..e9662ba 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -38,12 +38,14 @@
  * subsequent patch.
  */
 
+#include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_idmap.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <linux/random.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 
@@ -69,33 +71,14 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 	return status;
 }
 
-u32
-nfs4_alloc_lockowner_id(struct nfs_client *clp)
-{
-	return clp->cl_lockowner_id ++;
-}
-
-static struct nfs4_state_owner *
-nfs4_client_grab_unused(struct nfs_client *clp, struct rpc_cred *cred)
-{
-	struct nfs4_state_owner *sp = NULL;
-
-	if (!list_empty(&clp->cl_unused)) {
-		sp = list_entry(clp->cl_unused.next, struct nfs4_state_owner, so_list);
-		atomic_inc(&sp->so_count);
-		sp->so_cred = cred;
-		list_move(&sp->so_list, &clp->cl_state_owners);
-		clp->cl_nunused--;
-	}
-	return sp;
-}
-
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 	struct rpc_cred *cred = NULL;
 
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		if (list_empty(&sp->so_states))
 			continue;
 		cred = get_rpccred(sp->so_cred);
@@ -107,32 +90,146 @@ struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 
-	if (!list_empty(&clp->cl_state_owners)) {
-		sp = list_entry(clp->cl_state_owners.next,
-				struct nfs4_state_owner, so_list);
+	pos = rb_first(&clp->cl_state_owners);
+	if (pos != NULL) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		return get_rpccred(sp->so_cred);
 	}
 	return NULL;
 }
 
+static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
+		__u64 minval, int maxbits)
+{
+	struct rb_node **p, *parent;
+	struct nfs_unique_id *pos;
+	__u64 mask = ~0ULL;
+
+	if (maxbits < 64)
+		mask = (1ULL << maxbits) - 1ULL;
+
+	/* Ensure distribution is more or less flat */
+	get_random_bytes(&new->id, sizeof(new->id));
+	new->id &= mask;
+	if (new->id < minval)
+		new->id += minval;
+retry:
+	p = &root->rb_node;
+	parent = NULL;
+
+	while (*p != NULL) {
+		parent = *p;
+		pos = rb_entry(parent, struct nfs_unique_id, rb_node);
+
+		if (new->id < pos->id)
+			p = &(*p)->rb_left;
+		else if (new->id > pos->id)
+			p = &(*p)->rb_right;
+		else
+			goto id_exists;
+	}
+	rb_link_node(&new->rb_node, parent, p);
+	rb_insert_color(&new->rb_node, root);
+	return;
+id_exists:
+	for (;;) {
+		new->id++;
+		if (new->id < minval || (new->id & mask) != new->id) {
+			new->id = minval;
+			break;
+		}
+		parent = rb_next(parent);
+		if (parent == NULL)
+			break;
+		pos = rb_entry(parent, struct nfs_unique_id, rb_node);
+		if (new->id < pos->id)
+			break;
+	}
+	goto retry;
+}
+
+static void nfs_free_unique_id(struct rb_root *root, struct nfs_unique_id *id)
+{
+	rb_erase(&id->rb_node, root);
+}
+
 static struct nfs4_state_owner *
-nfs4_find_state_owner(struct nfs_client *clp, struct rpc_cred *cred)
+nfs4_find_state_owner(struct nfs_server *server, struct rpc_cred *cred)
 {
+	struct nfs_client *clp = server->nfs_client;
+	struct rb_node **p = &clp->cl_state_owners.rb_node,
+		       *parent = NULL;
 	struct nfs4_state_owner *sp, *res = NULL;
 
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
-		if (sp->so_cred != cred)
+	while (*p != NULL) {
+		parent = *p;
+		sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+
+		if (server < sp->so_server) {
+			p = &parent->rb_left;
 			continue;
-		atomic_inc(&sp->so_count);
-		/* Move to the head of the list */
-		list_move(&sp->so_list, &clp->cl_state_owners);
-		res = sp;
-		break;
+		}
+		if (server > sp->so_server) {
+			p = &parent->rb_right;
+			continue;
+		}
+		if (cred < sp->so_cred)
+			p = &parent->rb_left;
+		else if (cred > sp->so_cred)
+			p = &parent->rb_right;
+		else {
+			atomic_inc(&sp->so_count);
+			res = sp;
+			break;
+		}
 	}
 	return res;
 }
 
+static struct nfs4_state_owner *
+nfs4_insert_state_owner(struct nfs_client *clp, struct nfs4_state_owner *new)
+{
+	struct rb_node **p = &clp->cl_state_owners.rb_node,
+		       *parent = NULL;
+	struct nfs4_state_owner *sp;
+
+	while (*p != NULL) {
+		parent = *p;
+		sp = rb_entry(parent, struct nfs4_state_owner, so_client_node);
+
+		if (new->so_server < sp->so_server) {
+			p = &parent->rb_left;
+			continue;
+		}
+		if (new->so_server > sp->so_server) {
+			p = &parent->rb_right;
+			continue;
+		}
+		if (new->so_cred < sp->so_cred)
+			p = &parent->rb_left;
+		else if (new->so_cred > sp->so_cred)
+			p = &parent->rb_right;
+		else {
+			atomic_inc(&sp->so_count);
+			return sp;
+		}
+	}
+	nfs_alloc_unique_id(&clp->cl_openowner_id, &new->so_owner_id, 1, 64);
+	rb_link_node(&new->so_client_node, parent, p);
+	rb_insert_color(&new->so_client_node, &clp->cl_state_owners);
+	return new;
+}
+
+static void
+nfs4_remove_state_owner(struct nfs_client *clp, struct nfs4_state_owner *sp)
+{
+	if (!RB_EMPTY_NODE(&sp->so_client_node))
+		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+	nfs_free_unique_id(&clp->cl_openowner_id, &sp->so_owner_id);
+}
+
 /*
  * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to
  * create a new state_owner.
@@ -160,10 +257,14 @@ nfs4_alloc_state_owner(void)
 void
 nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs_client *clp = sp->so_client;
-	spin_lock(&clp->cl_lock);
-	list_del_init(&sp->so_list);
-	spin_unlock(&clp->cl_lock);
+	if (!RB_EMPTY_NODE(&sp->so_client_node)) {
+		struct nfs_client *clp = sp->so_client;
+
+		spin_lock(&clp->cl_lock);
+		rb_erase(&sp->so_client_node, &clp->cl_state_owners);
+		RB_CLEAR_NODE(&sp->so_client_node);
+		spin_unlock(&clp->cl_lock);
+	}
 }
 
 /*
@@ -175,26 +276,25 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state_owner *sp, *new;
 
-	get_rpccred(cred);
-	new = nfs4_alloc_state_owner();
 	spin_lock(&clp->cl_lock);
-	sp = nfs4_find_state_owner(clp, cred);
-	if (sp == NULL)
-		sp = nfs4_client_grab_unused(clp, cred);
-	if (sp == NULL && new != NULL) {
-		list_add(&new->so_list, &clp->cl_state_owners);
-		new->so_client = clp;
-		new->so_id = nfs4_alloc_lockowner_id(clp);
-		new->so_cred = cred;
-		sp = new;
-		new = NULL;
-	}
+	sp = nfs4_find_state_owner(server, cred);
 	spin_unlock(&clp->cl_lock);
-	kfree(new);
 	if (sp != NULL)
 		return sp;
-	put_rpccred(cred);
-	return NULL;
+	new = nfs4_alloc_state_owner();
+	if (new == NULL)
+		return NULL;
+	new->so_client = clp;
+	new->so_server = server;
+	new->so_cred = cred;
+	spin_lock(&clp->cl_lock);
+	sp = nfs4_insert_state_owner(clp, new);
+	spin_unlock(&clp->cl_lock);
+	if (sp == new)
+		get_rpccred(cred);
+	else
+		kfree(new);
+	return sp;
 }
 
 /*
@@ -208,18 +308,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
 		return;
-	if (clp->cl_nunused >= OPENOWNER_POOL_SIZE)
-		goto out_free;
-	if (list_empty(&sp->so_list))
-		goto out_free;
-	list_move(&sp->so_list, &clp->cl_unused);
-	clp->cl_nunused++;
-	spin_unlock(&clp->cl_lock);
-	put_rpccred(cred);
-	cred = NULL;
-	return;
-out_free:
-	list_del(&sp->so_list);
+	nfs4_remove_state_owner(clp, sp);
 	spin_unlock(&clp->cl_lock);
 	put_rpccred(cred);
 	kfree(sp);
@@ -236,6 +325,7 @@ nfs4_alloc_open_state(void)
 	atomic_set(&state->count, 1);
 	INIT_LIST_HEAD(&state->lock_states);
 	spin_lock_init(&state->state_lock);
+	seqlock_init(&state->seqlock);
 	return state;
 }
 
@@ -263,13 +353,10 @@ __nfs4_find_state_byowner(struct inode *inode, struct nfs4_state_owner *owner)
 	struct nfs4_state *state;
 
 	list_for_each_entry(state, &nfsi->open_states, inode_states) {
-		/* Is this in the process of being freed? */
-		if (state->state == 0)
+		if (state->owner != owner)
 			continue;
-		if (state->owner == owner) {
-			atomic_inc(&state->count);
+		if (atomic_inc_not_zero(&state->count))
 			return state;
-		}
 	}
 	return NULL;
 }
@@ -341,16 +428,15 @@ void nfs4_put_open_state(struct nfs4_state *state)
 /*
  * Close the current file.
  */
-void nfs4_close_state(struct nfs4_state *state, mode_t mode)
+void nfs4_close_state(struct path *path, struct nfs4_state *state, mode_t mode)
 {
-	struct inode *inode = state->inode;
 	struct nfs4_state_owner *owner = state->owner;
-	int oldstate, newstate = 0;
+	int call_close = 0;
+	int newstate;
 
 	atomic_inc(&owner->so_count);
 	/* Protect against nfs4_find_state() */
 	spin_lock(&owner->so_lock);
-	spin_lock(&inode->i_lock);
 	switch (mode & (FMODE_READ | FMODE_WRITE)) {
 		case FMODE_READ:
 			state->n_rdonly--;
@@ -361,24 +447,29 @@ void nfs4_close_state(struct nfs4_state *state, mode_t mode)
 		case FMODE_READ|FMODE_WRITE:
 			state->n_rdwr--;
 	}
-	oldstate = newstate = state->state;
+	newstate = FMODE_READ|FMODE_WRITE;
 	if (state->n_rdwr == 0) {
-		if (state->n_rdonly == 0)
+		if (state->n_rdonly == 0) {
 			newstate &= ~FMODE_READ;
-		if (state->n_wronly == 0)
+			call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
+			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
+		if (state->n_wronly == 0) {
 			newstate &= ~FMODE_WRITE;
+			call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
+			call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
+		}
+		if (newstate == 0)
+			clear_bit(NFS_DELEGATED_STATE, &state->flags);
 	}
-	if (test_bit(NFS_DELEGATED_STATE, &state->flags)) {
-		nfs4_state_set_mode_locked(state, newstate);
-		oldstate = newstate;
-	}
-	spin_unlock(&inode->i_lock);
+	nfs4_state_set_mode_locked(state, newstate);
 	spin_unlock(&owner->so_lock);
 
-	if (oldstate != newstate && nfs4_do_close(inode, state) == 0)
-		return;
-	nfs4_put_open_state(state);
-	nfs4_put_state_owner(owner);
+	if (!call_close) {
+		nfs4_put_open_state(state);
+		nfs4_put_state_owner(owner);
+	} else
+		nfs4_do_close(path, state);
 }
 
 /*
@@ -415,12 +506,22 @@ static struct nfs4_lock_state *nfs4_alloc_lock_state(struct nfs4_state *state, f
 	atomic_set(&lsp->ls_count, 1);
 	lsp->ls_owner = fl_owner;
 	spin_lock(&clp->cl_lock);
-	lsp->ls_id = nfs4_alloc_lockowner_id(clp);
+	nfs_alloc_unique_id(&clp->cl_lockowner_id, &lsp->ls_id, 1, 64);
 	spin_unlock(&clp->cl_lock);
 	INIT_LIST_HEAD(&lsp->ls_locks);
 	return lsp;
 }
 
+static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
+{
+	struct nfs_client *clp = lsp->ls_state->owner->so_client;
+
+	spin_lock(&clp->cl_lock);
+	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
+	spin_unlock(&clp->cl_lock);
+	kfree(lsp);
+}
+
 /*
  * Return a compatible lock_state. If no initialized lock_state structure
  * exists, return an uninitialized one.
@@ -450,7 +551,8 @@ static struct nfs4_lock_state *nfs4_get_lock_state(struct nfs4_state *state, fl_
 			return NULL;
 	}
 	spin_unlock(&state->state_lock);
-	kfree(new);
+	if (new != NULL)
+		nfs4_free_lock_state(new);
 	return lsp;
 }
 
@@ -471,7 +573,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
 	if (list_empty(&state->lock_states))
 		clear_bit(LK_STATE_IN_USE, &state->flags);
 	spin_unlock(&state->state_lock);
-	kfree(lsp);
+	nfs4_free_lock_state(lsp);
 }
 
 static void nfs4_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
@@ -513,8 +615,12 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
 void nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner)
 {
 	struct nfs4_lock_state *lsp;
+	int seq;
 
-	memcpy(dst, &state->stateid, sizeof(*dst));
+	do {
+		seq = read_seqbegin(&state->seqlock);
+		memcpy(dst, &state->stateid, sizeof(*dst));
+	} while (read_seqretry(&state->seqlock, seq));
 	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
 		return;
 
@@ -557,12 +663,18 @@ void nfs_free_seqid(struct nfs_seqid *seqid)
  * failed with a seqid incrementing error -
  * see comments nfs_fs.h:seqid_mutating_error()
  */
-static inline void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
+static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
 {
 	switch (status) {
 		case 0:
 			break;
 		case -NFS4ERR_BAD_SEQID:
+			if (seqid->sequence->flags & NFS_SEQID_CONFIRMED)
+				return;
+			printk(KERN_WARNING "NFS: v4 server returned a bad"
+					"sequence-id error on an"
+					"unconfirmed sequence %p!\n",
+					seqid->sequence);
 		case -NFS4ERR_STALE_CLIENTID:
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_BAD_STATEID:
@@ -586,7 +698,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
 				struct nfs4_state_owner, so_seqid);
 		nfs4_drop_state_owner(sp);
 	}
-	return nfs_increment_seqid(status, seqid);
+	nfs_increment_seqid(status, seqid);
 }
 
 /*
@@ -596,7 +708,7 @@ void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid)
  */
 void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid)
 {
-	return nfs_increment_seqid(status, seqid);
+	nfs_increment_seqid(status, seqid);
 }
 
 int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
@@ -748,15 +860,21 @@ out_err:
 static void nfs4_state_mark_reclaim(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 	struct nfs4_state *state;
 	struct nfs4_lock_state *lock;
 
 	/* Reset all sequence ids to zero */
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		sp->so_seqid.counter = 0;
 		sp->so_seqid.flags = 0;
 		spin_lock(&sp->so_lock);
 		list_for_each_entry(state, &sp->so_states, open_states) {
+			clear_bit(NFS_DELEGATED_STATE, &state->flags);
+			clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+			clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+			clear_bit(NFS_O_RDWR_STATE, &state->flags);
 			list_for_each_entry(lock, &state->lock_states, ls_locks) {
 				lock->ls_seqid.counter = 0;
 				lock->ls_seqid.flags = 0;
@@ -771,6 +889,7 @@ static int reclaimer(void *ptr)
 {
 	struct nfs_client *clp = ptr;
 	struct nfs4_state_owner *sp;
+	struct rb_node *pos;
 	struct nfs4_state_recovery_ops *ops;
 	struct rpc_cred *cred;
 	int status = 0;
@@ -816,7 +935,8 @@ restart_loop:
 	/* Mark all delegations for reclaim */
 	nfs_delegation_mark_reclaim(clp);
 	/* Note: list is protected by exclusive lock on cl->cl_sem */
-	list_for_each_entry(sp, &clp->cl_state_owners, so_list) {
+	for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) {
+		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
 		status = nfs4_reclaim_open_state(ops, sp);
 		if (status < 0) {
 			if (status == -NFS4ERR_NO_GRACE) {
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8003c91..c087384 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -68,9 +68,10 @@ static int nfs4_stat_to_errno(int);
 #endif
 
 /* lock,open owner id: 
- * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT  >> 2)
+ * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT  >> 2)
  */
-#define owner_id_maxsz          (1 + 1)
+#define open_owner_id_maxsz	(1 + 4)
+#define lock_owner_id_maxsz	(1 + 4)
 #define compound_encode_hdr_maxsz	(3 + (NFS4_MAXTAGLEN >> 2))
 #define compound_decode_hdr_maxsz	(3 + (NFS4_MAXTAGLEN >> 2))
 #define op_encode_hdr_maxsz	(1)
@@ -87,9 +88,11 @@ static int nfs4_stat_to_errno(int);
 #define encode_getattr_maxsz    (op_encode_hdr_maxsz + nfs4_fattr_bitmap_maxsz)
 #define nfs4_name_maxsz		(1 + ((3 + NFS4_MAXNAMLEN) >> 2))
 #define nfs4_path_maxsz		(1 + ((3 + NFS4_MAXPATHLEN) >> 2))
+#define nfs4_owner_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define nfs4_group_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
 /* This is based on getfattr, which uses the most attributes: */
 #define nfs4_fattr_value_maxsz	(1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
-				3 + 3 + 3 + 2 * nfs4_name_maxsz))
+				3 + 3 + 3 + nfs4_owner_maxsz + nfs4_group_maxsz))
 #define nfs4_fattr_maxsz	(nfs4_fattr_bitmap_maxsz + \
 				nfs4_fattr_value_maxsz)
 #define decode_getattr_maxsz    (op_decode_hdr_maxsz + nfs4_fattr_maxsz)
@@ -116,8 +119,27 @@ static int nfs4_stat_to_errno(int);
 				3 + (NFS4_VERIFIER_SIZE >> 2))
 #define decode_setclientid_confirm_maxsz \
 				(op_decode_hdr_maxsz)
-#define encode_lookup_maxsz	(op_encode_hdr_maxsz + \
-				1 + ((3 + NFS4_FHSIZE) >> 2))
+#define encode_lookup_maxsz	(op_encode_hdr_maxsz + nfs4_name_maxsz)
+#define decode_lookup_maxsz	(op_decode_hdr_maxsz)
+#define encode_share_access_maxsz \
+				(2)
+#define encode_createmode_maxsz	(1 + nfs4_fattr_maxsz)
+#define encode_opentype_maxsz	(1 + encode_createmode_maxsz)
+#define encode_claim_null_maxsz	(1 + nfs4_name_maxsz)
+#define encode_open_maxsz	(op_encode_hdr_maxsz + \
+				2 + encode_share_access_maxsz + 2 + \
+				open_owner_id_maxsz + \
+				encode_opentype_maxsz + \
+				encode_claim_null_maxsz)
+#define decode_ace_maxsz	(3 + nfs4_owner_maxsz)
+#define decode_delegation_maxsz	(1 + XDR_QUADLEN(NFS4_STATEID_SIZE) + 1 + \
+				decode_ace_maxsz)
+#define decode_change_info_maxsz	(5)
+#define decode_open_maxsz	(op_decode_hdr_maxsz + \
+				XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+				decode_change_info_maxsz + 1 + \
+				nfs4_fattr_bitmap_maxsz + \
+				decode_delegation_maxsz)
 #define encode_remove_maxsz	(op_encode_hdr_maxsz + \
 				nfs4_name_maxsz)
 #define encode_rename_maxsz	(op_encode_hdr_maxsz + \
@@ -134,9 +156,15 @@ static int nfs4_stat_to_errno(int);
 #define encode_create_maxsz	(op_encode_hdr_maxsz + \
 				2 + nfs4_name_maxsz + \
 				nfs4_fattr_maxsz)
-#define decode_create_maxsz	(op_decode_hdr_maxsz + 8)
+#define decode_create_maxsz	(op_decode_hdr_maxsz + \
+				decode_change_info_maxsz + \
+				nfs4_fattr_bitmap_maxsz)
 #define encode_delegreturn_maxsz (op_encode_hdr_maxsz + 4)
 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
+#define encode_fs_locations_maxsz \
+				(encode_getattr_maxsz)
+#define decode_fs_locations_maxsz \
+				(0)
 #define NFS4_enc_compound_sz	(1024)  /* XXX: large enough? */
 #define NFS4_dec_compound_sz	(1024)  /* XXX: large enough? */
 #define NFS4_enc_read_sz	(compound_encode_hdr_maxsz + \
@@ -174,16 +202,21 @@ static int nfs4_stat_to_errno(int);
 				op_decode_hdr_maxsz + 2 + \
 				decode_getattr_maxsz)
 #define NFS4_enc_open_sz        (compound_encode_hdr_maxsz + \
-                                encode_putfh_maxsz + \
-                                op_encode_hdr_maxsz + \
-                                13 + 3 + 2 + 64 + \
-                                encode_getattr_maxsz + \
-                                encode_getfh_maxsz)
+				encode_putfh_maxsz + \
+				encode_savefh_maxsz + \
+				encode_open_maxsz + \
+				encode_getfh_maxsz + \
+				encode_getattr_maxsz + \
+				encode_restorefh_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_open_sz        (compound_decode_hdr_maxsz + \
-                                decode_putfh_maxsz + \
-                                op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \
-                                decode_getattr_maxsz + \
-                                decode_getfh_maxsz)
+				decode_putfh_maxsz + \
+				decode_savefh_maxsz + \
+				decode_open_maxsz + \
+				decode_getfh_maxsz + \
+				decode_getattr_maxsz + \
+				decode_restorefh_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_open_confirm_sz      \
                                 (compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
@@ -193,12 +226,12 @@ static int nfs4_stat_to_errno(int);
                                         op_decode_hdr_maxsz + 4)
 #define NFS4_enc_open_noattr_sz	(compound_encode_hdr_maxsz + \
 					encode_putfh_maxsz + \
-					op_encode_hdr_maxsz + \
-					11)
+					encode_open_maxsz + \
+					encode_getattr_maxsz)
 #define NFS4_dec_open_noattr_sz	(compound_decode_hdr_maxsz + \
 					decode_putfh_maxsz + \
-					op_decode_hdr_maxsz + \
-					4 + 5 + 2 + 3)
+					decode_open_maxsz + \
+					decode_getattr_maxsz)
 #define NFS4_enc_open_downgrade_sz \
 				(compound_encode_hdr_maxsz + \
                                 encode_putfh_maxsz + \
@@ -256,19 +289,19 @@ static int nfs4_stat_to_errno(int);
 				op_encode_hdr_maxsz + \
 				1 + 1 + 2 + 2 + \
 				1 + 4 + 1 + 2 + \
-				owner_id_maxsz)
+				lock_owner_id_maxsz)
 #define NFS4_dec_lock_sz        (compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
 				decode_getattr_maxsz + \
 				op_decode_hdr_maxsz + \
 				2 + 2 + 1 + 2 + \
-				owner_id_maxsz)
+				lock_owner_id_maxsz)
 #define NFS4_enc_lockt_sz       (compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getattr_maxsz + \
 				op_encode_hdr_maxsz + \
 				1 + 2 + 2 + 2 + \
-				owner_id_maxsz)
+				lock_owner_id_maxsz)
 #define NFS4_dec_lockt_sz       (NFS4_dec_lock_sz)
 #define NFS4_enc_locku_sz       (compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
@@ -298,7 +331,7 @@ static int nfs4_stat_to_errno(int);
 				encode_getfh_maxsz)
 #define NFS4_dec_lookup_sz	(compound_decode_hdr_maxsz + \
 				decode_putfh_maxsz + \
-				op_decode_hdr_maxsz + \
+				decode_lookup_maxsz + \
 				decode_getattr_maxsz + \
 				decode_getfh_maxsz)
 #define NFS4_enc_lookup_root_sz (compound_encode_hdr_maxsz + \
@@ -417,12 +450,13 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_enc_fs_locations_sz \
 				(compound_encode_hdr_maxsz + \
 				 encode_putfh_maxsz + \
-				 encode_getattr_maxsz)
+				 encode_lookup_maxsz + \
+				 encode_fs_locations_maxsz)
 #define NFS4_dec_fs_locations_sz \
 				(compound_decode_hdr_maxsz + \
 				 decode_putfh_maxsz + \
-				 op_decode_hdr_maxsz + \
-				 nfs4_fattr_bitmap_maxsz)
+				 decode_lookup_maxsz + \
+				 decode_fs_locations_maxsz)
 
 static struct {
 	unsigned int	mode;
@@ -793,13 +827,14 @